diff --git a/docs/constant_model_loss_transformations/loss_transformations.html b/docs/constant_model_loss_transformations/loss_transformations.html
index 9e3783d7..0eca156f 100644
--- a/docs/constant_model_loss_transformations/loss_transformations.html
+++ b/docs/constant_model_loss_transformations/loss_transformations.html
@@ -477,7 +477,7 @@ <h3 data-number="11.1.2" class="anchored" data-anchor-id="comparing-two-differen
 </table>
 <p>(Notice how the points for our SLR scatter plot are visually not a great linear fit. We’ll come back to this).</p>
 <p>The code for generating the graphs and models is included below, but we won’t go over it in too much depth.</p>
-<div id="16bb6d33" class="cell" data-execution_count="1">
+<div id="06c34069" class="cell" data-execution_count="1">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
@@ -492,7 +492,7 @@ <h3 data-number="11.1.2" class="anchored" data-anchor-id="comparing-two-differen
 <span id="cb1-10"><a href="#cb1-10" aria-hidden="true" tabindex="-1"></a>data_linear <span class="op">=</span> dugongs[[<span class="st">"Length"</span>, <span class="st">"Age"</span>]]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 </div>
-<div id="554d069c" class="cell" data-execution_count="2">
+<div id="2e04f483" class="cell" data-execution_count="2">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Big font helper</span></span>
@@ -514,7 +514,7 @@ <h3 data-number="11.1.2" class="anchored" data-anchor-id="comparing-two-differen
 <span id="cb2-17"><a href="#cb2-17" aria-hidden="true" tabindex="-1"></a>plt.style.use(<span class="st">"default"</span>)  <span class="co"># Revert style to default mpl</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 </div>
-<div id="a66b6837" class="cell" data-execution_count="3">
+<div id="aefa3a05" class="cell" data-execution_count="3">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Constant Model + MSE</span></span>
@@ -547,7 +547,7 @@ <h3 data-number="11.1.2" class="anchored" data-anchor-id="comparing-two-differen
 </div>
 </div>
 </div>
-<div id="04eb9823" class="cell" data-execution_count="4">
+<div id="a80baf65" class="cell" data-execution_count="4">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="co"># SLR + MSE</span></span>
@@ -610,7 +610,7 @@ <h3 data-number="11.1.2" class="anchored" data-anchor-id="comparing-two-differen
 </div>
 </div>
 </div>
-<div id="d2609d98" class="cell" data-execution_count="5">
+<div id="a66a84a5" class="cell" data-execution_count="5">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Predictions</span></span>
@@ -622,7 +622,7 @@ <h3 data-number="11.1.2" class="anchored" data-anchor-id="comparing-two-differen
 <span id="cb5-7"><a href="#cb5-7" aria-hidden="true" tabindex="-1"></a>yhats_linear <span class="op">=</span> [theta_0_hat <span class="op">+</span> theta_1_hat <span class="op">*</span> x <span class="cf">for</span> x <span class="kw">in</span> xs]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 </div>
-<div id="1164e17e" class="cell" data-execution_count="6">
+<div id="e152b28a" class="cell" data-execution_count="6">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Constant Model Rug Plot</span></span>
@@ -652,7 +652,7 @@ <h3 data-number="11.1.2" class="anchored" data-anchor-id="comparing-two-differen
 </div>
 </div>
 </div>
-<div id="7143fdf6" class="cell" data-execution_count="7">
+<div id="dc0fbd93" class="cell" data-execution_count="7">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb8"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a><span class="co"># SLR model scatter plot </span></span>
@@ -766,7 +766,7 @@ <h2 data-number="11.3" class="anchored" data-anchor-id="summary-loss-optimizatio
 <h2 data-number="11.4" class="anchored" data-anchor-id="comparing-loss-functions"><span class="header-section-number">11.4</span> Comparing Loss Functions</h2>
 <p>We’ve now tried our hand at fitting a model under both MSE and MAE cost functions. How do the two results compare?</p>
 <p>Let’s consider a dataset where each entry represents the number of drinks sold at a bubble tea store each day. We’ll fit a constant model to predict the number of drinks that will be sold tomorrow.</p>
-<div id="4c550f61" class="cell" data-execution_count="8">
+<div id="6faa6c13" class="cell" data-execution_count="8">
 <div class="sourceCode cell-code" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a>drinks <span class="op">=</span> np.array([<span class="dv">20</span>, <span class="dv">21</span>, <span class="dv">22</span>, <span class="dv">29</span>, <span class="dv">33</span>])</span>
 <span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a>drinks</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="8">
@@ -774,7 +774,7 @@ <h2 data-number="11.4" class="anchored" data-anchor-id="comparing-loss-functions
 </div>
 </div>
 <p>From our derivations above, we know that the optimal model parameter under MSE cost is the mean of the dataset. Under MAE cost, the optimal parameter is the median of the dataset.</p>
-<div id="dfadd5d4" class="cell" data-execution_count="9">
+<div id="0ee29f7d" class="cell" data-execution_count="9">
 <div class="sourceCode cell-code" id="cb11"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a>np.mean(drinks), np.median(drinks)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="9">
 <pre><code>(np.float64(25.0), np.float64(22.0))</code></pre>
@@ -784,7 +784,7 @@ <h2 data-number="11.4" class="anchored" data-anchor-id="comparing-loss-functions
 <p><img src="images/error.png" alt="error" width="600"></p>
 <p>Notice that the MSE above is a <strong>smooth</strong> function – it is differentiable at all points, making it easy to minimize using numerical methods. The MAE, in contrast, is not differentiable at each of its “kinks.” We’ll explore how the smoothness of the cost function can impact our ability to apply numerical optimization in a few weeks.</p>
 <p>How do outliers affect each cost function? Imagine we replace the largest value in the dataset with 1000. The mean of the data increases substantially, while the median is nearly unaffected.</p>
-<div id="e4129863" class="cell" data-execution_count="10">
+<div id="a3bbdb3e" class="cell" data-execution_count="10">
 <div class="sourceCode cell-code" id="cb13"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a>drinks_with_outlier <span class="op">=</span> np.append(drinks, <span class="dv">1033</span>)</span>
 <span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a>display(drinks_with_outlier)</span>
 <span id="cb13-3"><a href="#cb13-3" aria-hidden="true" tabindex="-1"></a>np.mean(drinks_with_outlier), np.median(drinks_with_outlier)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -798,7 +798,7 @@ <h2 data-number="11.4" class="anchored" data-anchor-id="comparing-loss-functions
 <p><img src="images/outliers.png" alt="outliers" width="700"></p>
 <p>This means that under the MSE, the optimal model parameter <span class="math inline">\(\hat{\theta}\)</span> is strongly affected by the presence of outliers. Under the MAE, the optimal parameter is not as influenced by outlying data. We can generalize this by saying that the MSE is <strong>sensitive</strong> to outliers, while the MAE is <strong>robust</strong> to outliers.</p>
 <p>Let’s try another experiment. This time, we’ll add an additional, non-outlying datapoint to the data.</p>
-<div id="1c99a887" class="cell" data-execution_count="11">
+<div id="fa322d13" class="cell" data-execution_count="11">
 <div class="sourceCode cell-code" id="cb16"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a>drinks_with_additional_observation <span class="op">=</span> np.append(drinks, <span class="dv">35</span>)</span>
 <span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a>drinks_with_additional_observation</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="11">
@@ -870,7 +870,7 @@ <h2 data-number="11.5" class="anchored" data-anchor-id="transformations-to-fit-l
 </ul>
 <p>Other goals in addition to linearity are possible, for example, making data appear more symmetric. Linearity allows us to fit lines to the transformed data.</p>
 <p>Let’s revisit our dugongs example. The lengths and ages are plotted below:</p>
-<div id="e9258db0" class="cell" data-execution_count="12">
+<div id="c1370cf9" class="cell" data-execution_count="12">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb18"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a><span class="co"># `corrcoef` computes the correlation coefficient between two variables</span></span>
@@ -902,7 +902,7 @@ <h2 data-number="11.5" class="anchored" data-anchor-id="transformations-to-fit-l
 <p>Looking at the plot on the left, we see that there is a slight curvature to the data points. Plotting the SLR curve on the right results in a poor fit.</p>
 <p>For SLR to perform well, we’d like there to be a rough linear trend relating <code>"Age"</code> and <code>"Length"</code>. What is making the raw data deviate from a linear relationship? Notice that the data points with <code>"Length"</code> greater than 2.6 have disproportionately high values of <code>"Age"</code> relative to the rest of the data. If we could manipulate these data points to have lower <code>"Age"</code> values, we’d “shift” these points downwards and reduce the curvature in the data. Applying a logarithmic transformation to <span class="math inline">\(y_i\)</span> (that is, taking <span class="math inline">\(\log(\)</span> <code>"Age"</code> <span class="math inline">\()\)</span> ) would achieve just that.</p>
 <p>An important word on <span class="math inline">\(\log\)</span>: in Data 100 (and most upper-division STEM courses), <span class="math inline">\(\log\)</span> denotes the natural logarithm with base <span class="math inline">\(e\)</span>. The base-10 logarithm, where relevant, is indicated by <span class="math inline">\(\log_{10}\)</span>.</p>
-<div id="ba4042f3" class="cell" data-execution_count="13">
+<div id="87ff04ee" class="cell" data-execution_count="13">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb19"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a>z <span class="op">=</span> np.log(y)</span>
@@ -937,7 +937,7 @@ <h2 data-number="11.5" class="anchored" data-anchor-id="transformations-to-fit-l
 <p><span class="math display">\[\log{(y)} = \theta_0 + \theta_1 x\]</span> <span class="math display">\[y = e^{\theta_0 + \theta_1 x}\]</span> <span class="math display">\[y = (e^{\theta_0})e^{\theta_1 x}\]</span> <span class="math display">\[y_i = C e^{k x}\]</span></p>
 <p>For some constants <span class="math inline">\(C\)</span> and <span class="math inline">\(k\)</span>.</p>
 <p><span class="math inline">\(y\)</span> is an <em>exponential</em> function of <span class="math inline">\(x\)</span>. Applying an exponential fit to the untransformed variables corroborates this finding.</p>
-<div id="182e7e2b" class="cell" data-execution_count="14">
+<div id="9cb7ab92" class="cell" data-execution_count="14">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb20"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a>plt.figure(dpi<span class="op">=</span><span class="dv">120</span>, figsize<span class="op">=</span>(<span class="dv">4</span>, <span class="dv">3</span>))</span>
diff --git a/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-13-output-1.pdf b/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-13-output-1.pdf
index 37839016..c171c477 100644
Binary files a/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-13-output-1.pdf and b/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-13-output-1.pdf differ
diff --git a/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-14-output-1.pdf b/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-14-output-1.pdf
index b8a03a0a..bd75b662 100644
Binary files a/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-14-output-1.pdf and b/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-14-output-1.pdf differ
diff --git a/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-15-output-1.pdf b/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-15-output-1.pdf
index adbd1819..6ea932d4 100644
Binary files a/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-15-output-1.pdf and b/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-15-output-1.pdf differ
diff --git a/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-4-output-1.pdf b/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-4-output-1.pdf
index dcbde775..07d3fdee 100644
Binary files a/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-4-output-1.pdf and b/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-4-output-1.pdf differ
diff --git a/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-5-output-1.pdf b/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-5-output-1.pdf
index ea9d21db..1c8ae2f1 100644
Binary files a/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-5-output-1.pdf and b/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-5-output-1.pdf differ
diff --git a/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-7-output-2.pdf b/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-7-output-2.pdf
index 0355acea..ef448700 100644
Binary files a/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-7-output-2.pdf and b/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-7-output-2.pdf differ
diff --git a/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-8-output-1.pdf b/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-8-output-1.pdf
index 94ed9624..04dc09f5 100644
Binary files a/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-8-output-1.pdf and b/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-8-output-1.pdf differ
diff --git a/docs/eda/eda.html b/docs/eda/eda.html
index ff41ca75..dd8a7edb 100644
--- a/docs/eda/eda.html
+++ b/docs/eda/eda.html
@@ -361,7 +361,7 @@ <h2 id="toc-title">Data Cleaning and EDA</h2>
 </header>
 
 
-<div id="dbe287d8" class="cell" data-execution_count="1">
+<div id="6c2ab44a" class="cell" data-execution_count="1">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
@@ -426,7 +426,7 @@ <h3 data-number="5.1.1" class="anchored" data-anchor-id="file-formats"><span cla
 <section id="csv" class="level4" data-number="5.1.1.1">
 <h4 data-number="5.1.1.1" class="anchored" data-anchor-id="csv"><span class="header-section-number">5.1.1.1</span> CSV</h4>
 <p>CSVs, which stand for <strong>Comma-Separated Values</strong>, are a common tabular data format. In the past two <code>pandas</code> lectures, we briefly touched on the idea of file format: the way data is encoded in a file for storage. Specifically, our <code>elections</code> and <code>babynames</code> datasets were stored and loaded as CSVs:</p>
-<div id="60d1b115" class="cell" data-execution_count="2">
+<div id="b9124a2b" class="cell" data-execution_count="2">
 <div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a>pd.read_csv(<span class="st">"data/elections.csv"</span>).head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="2">
 <div>
@@ -497,7 +497,7 @@ <h4 data-number="5.1.1.1" class="anchored" data-anchor-id="csv"><span class="hea
 </div>
 </div>
 <p>To better understand the properties of a CSV, let’s take a look at the first few rows of the raw data file to see what it looks like before being loaded into a <code>DataFrame</code>. We’ll use the <code>repr()</code> function to return the raw string with its special characters:</p>
-<div id="2419475f" class="cell" data-execution_count="3">
+<div id="2701d3cc" class="cell" data-execution_count="3">
 <div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> <span class="bu">open</span>(<span class="st">"data/elections.csv"</span>, <span class="st">"r"</span>) <span class="im">as</span> table:</span>
 <span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a>    i <span class="op">=</span> <span class="dv">0</span></span>
 <span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a>    <span class="cf">for</span> row <span class="kw">in</span> table:</span>
@@ -518,7 +518,7 @@ <h4 data-number="5.1.1.1" class="anchored" data-anchor-id="csv"><span class="hea
 <h4 data-number="5.1.1.2" class="anchored" data-anchor-id="tsv"><span class="header-section-number">5.1.1.2</span> TSV</h4>
 <p>Another common file type is <strong>TSV (Tab-Separated Values)</strong>. In a TSV, records are still delimited by a newline <code>\n</code>, while fields are delimited by <code>\t</code> tab character.</p>
 <p>Let’s check out the first few rows of the raw TSV file. Again, we’ll use the <code>repr()</code> function so that <code>print</code> shows the special characters.</p>
-<div id="2926ff39" class="cell" data-execution_count="4">
+<div id="74729269" class="cell" data-execution_count="4">
 <div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> <span class="bu">open</span>(<span class="st">"data/elections.txt"</span>, <span class="st">"r"</span>) <span class="im">as</span> table:</span>
 <span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a>    i <span class="op">=</span> <span class="dv">0</span></span>
 <span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a>    <span class="cf">for</span> row <span class="kw">in</span> table:</span>
@@ -534,7 +534,7 @@ <h4 data-number="5.1.1.2" class="anchored" data-anchor-id="tsv"><span class="hea
 </div>
 </div>
 <p>TSVs can be loaded into <code>pandas</code> using <code>pd.read_csv</code>. We’ll need to specify the <strong>delimiter</strong> with parameter<code>sep='\t'</code> <a href="https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html">(documentation)</a>.</p>
-<div id="2711e309" class="cell" data-execution_count="5">
+<div id="26161d20" class="cell" data-execution_count="5">
 <div class="sourceCode cell-code" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a>pd.read_csv(<span class="st">"data/elections.txt"</span>, sep<span class="op">=</span><span class="st">'</span><span class="ch">\t</span><span class="st">'</span>).head(<span class="dv">3</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="5">
 <div>
@@ -591,7 +591,7 @@ <h4 data-number="5.1.1.2" class="anchored" data-anchor-id="tsv"><span class="hea
 <section id="json" class="level4" data-number="5.1.1.3">
 <h4 data-number="5.1.1.3" class="anchored" data-anchor-id="json"><span class="header-section-number">5.1.1.3</span> JSON</h4>
 <p><strong>JSON (JavaScript Object Notation)</strong> files behave similarly to Python dictionaries. A raw JSON is shown below.</p>
-<div id="ed082ef9" class="cell" data-execution_count="6">
+<div id="578517f6" class="cell" data-execution_count="6">
 <div class="sourceCode cell-code" id="cb8"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> <span class="bu">open</span>(<span class="st">"data/elections.json"</span>, <span class="st">"r"</span>) <span class="im">as</span> table:</span>
 <span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a>    i <span class="op">=</span> <span class="dv">0</span></span>
 <span id="cb8-3"><a href="#cb8-3" aria-hidden="true" tabindex="-1"></a>    <span class="cf">for</span> row <span class="kw">in</span> table:</span>
@@ -621,7 +621,7 @@ <h4 data-number="5.1.1.3" class="anchored" data-anchor-id="json"><span class="he
 </div>
 </div>
 <p>JSON files can be loaded into <code>pandas</code> using <code>pd.read_json</code>.</p>
-<div id="898ec8bc" class="cell" data-execution_count="7">
+<div id="daf2b8e0" class="cell" data-execution_count="7">
 <div class="sourceCode cell-code" id="cb10"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a>pd.read_json(<span class="st">'data/elections.json'</span>).head(<span class="dv">3</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="7">
 <div>
@@ -676,7 +676,7 @@ <h4 data-number="5.1.1.3" class="anchored" data-anchor-id="json"><span class="he
 <section id="eda-with-json-berkeley-covid-19-data" class="level5" data-number="5.1.1.3.1">
 <h5 data-number="5.1.1.3.1" class="anchored" data-anchor-id="eda-with-json-berkeley-covid-19-data"><span class="header-section-number">5.1.1.3.1</span> EDA with JSON: Berkeley COVID-19 Data</h5>
 <p>The City of Berkeley Open Data <a href="https://data.cityofberkeley.info/Health/COVID-19-Confirmed-Cases/xn6j-b766">website</a> has a dataset with COVID-19 Confirmed Cases among Berkeley residents by date. Let’s download the file and save it as a JSON (note the source URL file type is also a JSON). In the interest of reproducible data science, we will download the data programatically. We have defined some helper functions in the <a href="https://ds100.org/fa23/resources/assets/lectures/lec05/lec05-eda.html"><code>ds100_utils.py</code></a> file that we can reuse these helper functions in many different notebooks.</p>
-<div id="cd4fdbbb" class="cell" data-execution_count="8">
+<div id="5bb99fa4" class="cell" data-execution_count="8">
 <div class="sourceCode cell-code" id="cb11"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> ds100_utils <span class="im">import</span> fetch_and_cache</span>
 <span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb11-3"><a href="#cb11-3" aria-hidden="true" tabindex="-1"></a>covid_file <span class="op">=</span> fetch_and_cache(</span>
@@ -695,7 +695,7 @@ <h5 data-number="5.1.1.3.1" class="anchored" data-anchor-id="eda-with-json-berke
 <h6 data-number="5.1.1.3.1.1" class="anchored" data-anchor-id="file-size"><span class="header-section-number">5.1.1.3.1.1</span> File Size</h6>
 <p>Let’s start our analysis by getting a rough estimate of the size of the dataset to inform the tools we use to view the data. For relatively small datasets, we can use a text editor or spreadsheet. For larger datasets, more programmatic exploration or distributed computing tools may be more fitting. Here we will use <code>Python</code> tools to probe the file.</p>
 <p>Since there seem to be text files, let’s investigate the number of lines, which often corresponds to the number of records</p>
-<div id="25abb829" class="cell" data-execution_count="9">
+<div id="49da59ef" class="cell" data-execution_count="9">
 <div class="sourceCode cell-code" id="cb14"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> os</span>
 <span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb14-3"><a href="#cb14-3" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(covid_file, <span class="st">"is"</span>, os.path.getsize(covid_file) <span class="op">/</span> <span class="fl">1e6</span>, <span class="st">"MB"</span>)</span>
@@ -713,7 +713,7 @@ <h6 data-number="5.1.1.3.1.2" class="anchored" data-anchor-id="unix-commands"><s
 <p>As part of the EDA workflow, Unix commands can come in very handy. In fact, there’s an entire book called <a href="https://datascienceatthecommandline.com/">“Data Science at the Command Line”</a> that explores this idea in depth! In Jupyter/IPython, you can prefix lines with <code>!</code> to execute arbitrary Unix commands, and within those lines, you can refer to Python variables and expressions with the syntax <code>{expr}</code>.</p>
 <p>Here, we use the <code>ls</code> command to list files, using the <code>-lh</code> flags, which request “long format with information in human-readable form.” We also use the <code>wc</code> command for “word count,” but with the <code>-l</code> flag, which asks for line counts instead of words.</p>
 <p>These two give us the same information as the code above, albeit in a slightly different form:</p>
-<div id="a1dff16c" class="cell" data-execution_count="10">
+<div id="00e03f57" class="cell" data-execution_count="10">
 <div class="sourceCode cell-code" id="cb16"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a><span class="op">!</span>ls <span class="op">-</span>lh {covid_file}</span>
 <span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a><span class="op">!</span>wc <span class="op">-</span>l {covid_file}</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stdout">
@@ -725,7 +725,7 @@ <h6 data-number="5.1.1.3.1.2" class="anchored" data-anchor-id="unix-commands"><s
 <section id="file-contents" class="level6" data-number="5.1.1.3.1.3">
 <h6 data-number="5.1.1.3.1.3" class="anchored" data-anchor-id="file-contents"><span class="header-section-number">5.1.1.3.1.3</span> File Contents</h6>
 <p>Let’s explore the data format using <code>Python</code>.</p>
-<div id="03c482f0" class="cell" data-execution_count="11">
+<div id="d7db68e9" class="cell" data-execution_count="11">
 <div class="sourceCode cell-code" id="cb18"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> <span class="bu">open</span>(covid_file, <span class="st">"r"</span>) <span class="im">as</span> f:</span>
 <span id="cb18-2"><a href="#cb18-2" aria-hidden="true" tabindex="-1"></a>    <span class="cf">for</span> i, row <span class="kw">in</span> <span class="bu">enumerate</span>(f):</span>
 <span id="cb18-3"><a href="#cb18-3" aria-hidden="true" tabindex="-1"></a>        <span class="bu">print</span>(<span class="bu">repr</span>(row)) <span class="co"># print raw strings</span></span>
@@ -739,7 +739,7 @@ <h6 data-number="5.1.1.3.1.3" class="anchored" data-anchor-id="file-contents"><s
 </div>
 </div>
 <p>We can use the <code>head</code> Unix command (which is where <code>pandas</code>’ <code>head</code> method comes from!) to see the first few lines of the file:</p>
-<div id="bdcb195a" class="cell" data-execution_count="12">
+<div id="450c6269" class="cell" data-execution_count="12">
 <div class="sourceCode cell-code" id="cb20"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a><span class="op">!</span>head <span class="op">-</span><span class="dv">5</span> {covid_file}</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stdout">
 <pre><code>{
@@ -750,21 +750,21 @@ <h6 data-number="5.1.1.3.1.3" class="anchored" data-anchor-id="file-contents"><s
 </div>
 </div>
 <p>In order to load the JSON file into <code>pandas</code>, Let’s first do some EDA with Oython’s <code>json</code> package to understand the particular structure of this JSON file so that we can decide what (if anything) to load into <code>pandas</code>. Python has relatively good support for JSON data since it closely matches the internal python object model. In the following cell we import the entire JSON datafile into a python dictionary using the <code>json</code> package.</p>
-<div id="169f01e6" class="cell" data-execution_count="13">
+<div id="326012d7" class="cell" data-execution_count="13">
 <div class="sourceCode cell-code" id="cb22"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> json</span>
 <span id="cb22-2"><a href="#cb22-2" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb22-3"><a href="#cb22-3" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> <span class="bu">open</span>(covid_file, <span class="st">"rb"</span>) <span class="im">as</span> f:</span>
 <span id="cb22-4"><a href="#cb22-4" aria-hidden="true" tabindex="-1"></a>    covid_json <span class="op">=</span> json.load(f)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
 <p>The <code>covid_json</code> variable is now a dictionary encoding the data in the file:</p>
-<div id="e3a3af35" class="cell" data-execution_count="14">
+<div id="7e8b18da" class="cell" data-execution_count="14">
 <div class="sourceCode cell-code" id="cb23"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a><span class="bu">type</span>(covid_json)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="14">
 <pre><code>dict</code></pre>
 </div>
 </div>
 <p>We can examine what keys are in the top level JSON object by listing out the keys.</p>
-<div id="f8094669" class="cell" data-execution_count="15">
+<div id="91a1acd7" class="cell" data-execution_count="15">
 <div class="sourceCode cell-code" id="cb25"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb25-1"><a href="#cb25-1" aria-hidden="true" tabindex="-1"></a>covid_json.keys()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="15">
 <pre><code>dict_keys(['meta', 'data'])</code></pre>
@@ -772,14 +772,14 @@ <h6 data-number="5.1.1.3.1.3" class="anchored" data-anchor-id="file-contents"><s
 </div>
 <p><strong>Observation</strong>: The JSON dictionary contains a <code>meta</code> key which likely refers to metadata (data about the data). Metadata is often maintained with the data and can be a good source of additional information.</p>
 <p>We can investigate the metadata further by examining the keys associated with the metadata.</p>
-<div id="c36fc3cb" class="cell" data-execution_count="16">
+<div id="d27ec7e8" class="cell" data-execution_count="16">
 <div class="sourceCode cell-code" id="cb27"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb27-1"><a href="#cb27-1" aria-hidden="true" tabindex="-1"></a>covid_json[<span class="st">'meta'</span>].keys()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="16">
 <pre><code>dict_keys(['view'])</code></pre>
 </div>
 </div>
 <p>The <code>meta</code> key contains another dictionary called <code>view</code>. This likely refers to metadata about a particular “view” of some underlying database. We will learn more about views when we study SQL later in the class.</p>
-<div id="cd91f967" class="cell" data-execution_count="17">
+<div id="da303665" class="cell" data-execution_count="17">
 <div class="sourceCode cell-code" id="cb29"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb29-1"><a href="#cb29-1" aria-hidden="true" tabindex="-1"></a>covid_json[<span class="st">'meta'</span>][<span class="st">'view'</span>].keys()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="17">
 <pre><code>dict_keys(['id', 'name', 'assetType', 'attribution', 'averageRating', 'category', 'createdAt', 'description', 'displayType', 'downloadCount', 'hideFromCatalog', 'hideFromDataJson', 'newBackend', 'numberOfComments', 'oid', 'provenance', 'publicationAppendEnabled', 'publicationDate', 'publicationGroup', 'publicationStage', 'rowsUpdatedAt', 'rowsUpdatedBy', 'tableId', 'totalTimesRated', 'viewCount', 'viewLastModified', 'viewType', 'approvals', 'columns', 'grants', 'metadata', 'owner', 'query', 'rights', 'tableAuthor', 'tags', 'flags'])</code></pre>
@@ -799,7 +799,7 @@ <h6 data-number="5.1.1.3.1.3" class="anchored" data-anchor-id="file-contents"><s
     | -&gt; columns
     ...</code></pre>
 <p>There is a key called description in the view sub dictionary. This likely contains a description of the data:</p>
-<div id="4fe95b36" class="cell" data-execution_count="18">
+<div id="baac63bb" class="cell" data-execution_count="18">
 <div class="sourceCode cell-code" id="cb32"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb32-1"><a href="#cb32-1" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(covid_json[<span class="st">'meta'</span>][<span class="st">'view'</span>][<span class="st">'description'</span>])</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stdout">
 <pre><code>Counts of confirmed COVID-19 cases among Berkeley residents by date.</code></pre>
@@ -809,7 +809,7 @@ <h6 data-number="5.1.1.3.1.3" class="anchored" data-anchor-id="file-contents"><s
 <section id="examining-the-data-field-for-records" class="level6" data-number="5.1.1.3.1.4">
 <h6 data-number="5.1.1.3.1.4" class="anchored" data-anchor-id="examining-the-data-field-for-records"><span class="header-section-number">5.1.1.3.1.4</span> Examining the Data Field for Records</h6>
 <p>We can look at a few entries in the <code>data</code> field. This is what we’ll load into <code>pandas</code>.</p>
-<div id="53f42011" class="cell" data-execution_count="19">
+<div id="cb344697" class="cell" data-execution_count="19">
 <div class="sourceCode cell-code" id="cb34"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb34-1"><a href="#cb34-1" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> i <span class="kw">in</span> <span class="bu">range</span>(<span class="dv">3</span>):</span>
 <span id="cb34-2"><a href="#cb34-2" aria-hidden="true" tabindex="-1"></a>    <span class="bu">print</span>(<span class="ss">f"</span><span class="sc">{</span>i<span class="sc">:03}</span><span class="ss"> | </span><span class="sc">{</span>covid_json[<span class="st">'data'</span>][i]<span class="sc">}</span><span class="ss">"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stdout">
@@ -820,7 +820,7 @@ <h6 data-number="5.1.1.3.1.4" class="anchored" data-anchor-id="examining-the-dat
 </div>
 <p>Observations: * These look like equal-length records, so maybe <code>data</code> is a table! * But what do each of values in the record mean? Where can we find column headers?</p>
 <p>For that, we’ll need the <code>columns</code> key in the metadata dictionary. This returns a list:</p>
-<div id="62d46a27" class="cell" data-execution_count="20">
+<div id="965a2f30" class="cell" data-execution_count="20">
 <div class="sourceCode cell-code" id="cb36"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb36-1"><a href="#cb36-1" aria-hidden="true" tabindex="-1"></a><span class="bu">type</span>(covid_json[<span class="st">'meta'</span>][<span class="st">'view'</span>][<span class="st">'columns'</span>])</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="20">
 <pre><code>list</code></pre>
@@ -847,7 +847,7 @@ <h6 data-number="5.1.1.3.1.6" class="anchored" data-anchor-id="loading-covid-dat
 <li><p>Remove columns that have no metadata description. This would be a bad idea in general, but here we remove these columns since the above analysis suggests they are unlikely to contain useful information.</p></li>
 <li><p>Examine the <code>tail</code> of the table.</p></li>
 </ol>
-<div id="c11b9a63" class="cell" data-execution_count="21">
+<div id="6249dcee" class="cell" data-execution_count="21">
 <div class="sourceCode cell-code" id="cb38"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb38-1"><a href="#cb38-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Load the data from JSON and assign column titles</span></span>
 <span id="cb38-2"><a href="#cb38-2" aria-hidden="true" tabindex="-1"></a>covid <span class="op">=</span> pd.DataFrame(</span>
 <span id="cb38-3"><a href="#cb38-3" aria-hidden="true" tabindex="-1"></a>    covid_json[<span class="st">'data'</span>],</span>
@@ -960,7 +960,7 @@ <h6 data-number="5.1.1.3.1.6" class="anchored" data-anchor-id="loading-covid-dat
 <h3 data-number="5.1.2" class="anchored" data-anchor-id="primary-and-foreign-keys"><span class="header-section-number">5.1.2</span> Primary and Foreign Keys</h3>
 <p>Last time, we introduced <code>.merge</code> as the <code>pandas</code> method for joining multiple <code>DataFrame</code>s together. In our discussion of joins, we touched on the idea of using a “key” to determine what rows should be merged from each table. Let’s take a moment to examine this idea more closely.</p>
 <p>The <strong>primary key</strong> is the column or set of columns in a table that <em>uniquely</em> determine the values of the remaining columns. It can be thought of as the unique identifier for each individual row in the table. For example, a table of Data 100 students might use each student’s Cal ID as the primary key.</p>
-<div id="7233c1d2" class="cell" data-execution_count="22">
+<div id="dbdd755f" class="cell" data-execution_count="22">
 <div class="cell-output cell-output-display" data-execution_count="22">
 <div>
 
@@ -1006,7 +1006,7 @@ <h3 data-number="5.1.2" class="anchored" data-anchor-id="primary-and-foreign-key
 </div>
 </div>
 <p>The <strong>foreign key</strong> is the column or set of columns in a table that reference primary keys in other tables. Knowing a dataset’s foreign keys can be useful when assigning the <code>left_on</code> and <code>right_on</code> parameters of <code>.merge</code>. In the table of office hour tickets below, <code>"Cal ID"</code> is a foreign key referencing the previous table.</p>
-<div id="4c0a3af6" class="cell" data-execution_count="23">
+<div id="a0f16981" class="cell" data-execution_count="23">
 <div class="cell-output cell-output-display" data-execution_count="23">
 <div>
 
@@ -1099,7 +1099,7 @@ <h3 data-number="5.2.3" class="anchored" data-anchor-id="temporality"><span clas
 <section id="temporality-with-pandas-dt-accessors" class="level4" data-number="5.2.3.1">
 <h4 data-number="5.2.3.1" class="anchored" data-anchor-id="temporality-with-pandas-dt-accessors"><span class="header-section-number">5.2.3.1</span> Temporality with <code>pandas</code>’ <code>dt</code> accessors</h4>
 <p>Let’s briefly look at how we can use <code>pandas</code>’ <code>dt</code> accessors to work with dates/times in a dataset using the dataset you’ll see in Lab 3: the Berkeley PD Calls for Service dataset.</p>
-<div id="a7cfc445" class="cell" data-execution_count="24">
+<div id="dd1eeedd" class="cell" data-execution_count="24">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb39"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb39-1"><a href="#cb39-1" aria-hidden="true" tabindex="-1"></a>calls <span class="op">=</span> pd.read_csv(<span class="st">"data/Berkeley_PD_-_Calls_for_Service.csv"</span>)</span>
@@ -1206,11 +1206,11 @@ <h4 data-number="5.2.3.1" class="anchored" data-anchor-id="temporality-with-pand
 <p>Looks like there are three columns with dates/times: <code>EVENTDT</code>, <code>EVENTTM</code>, and <code>InDbDate</code>.</p>
 <p>Most likely, <code>EVENTDT</code> stands for the date when the event took place, <code>EVENTTM</code> stands for the time of day the event took place (in 24-hr format), and <code>InDbDate</code> is the date this call is recorded onto the database.</p>
 <p>If we check the data type of these columns, we will see they are stored as strings. We can convert them to <code>datetime</code> objects using pandas <code>to_datetime</code> function.</p>
-<div id="5b403add" class="cell" data-execution_count="25">
+<div id="c2ddfdcf" class="cell" data-execution_count="25">
 <div class="sourceCode cell-code" id="cb40"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb40-1"><a href="#cb40-1" aria-hidden="true" tabindex="-1"></a>calls[<span class="st">"EVENTDT"</span>] <span class="op">=</span> pd.to_datetime(calls[<span class="st">"EVENTDT"</span>])</span>
 <span id="cb40-2"><a href="#cb40-2" aria-hidden="true" tabindex="-1"></a>calls.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stderr">
-<pre><code>/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_48380/874729699.py:1: UserWarning:
+<pre><code>/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_51690/874729699.py:1: UserWarning:
 
 Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.
 </code></pre>
@@ -1315,7 +1315,7 @@ <h4 data-number="5.2.3.1" class="anchored" data-anchor-id="temporality-with-pand
 </div>
 <p>Now, we can use the <code>dt</code> accessor on this column.</p>
 <p>We can get the month:</p>
-<div id="345f21b2" class="cell" data-execution_count="26">
+<div id="0802362d" class="cell" data-execution_count="26">
 <div class="sourceCode cell-code" id="cb42"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb42-1"><a href="#cb42-1" aria-hidden="true" tabindex="-1"></a>calls[<span class="st">"EVENTDT"</span>].dt.month.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="26">
 <pre><code>0    4
@@ -1327,7 +1327,7 @@ <h4 data-number="5.2.3.1" class="anchored" data-anchor-id="temporality-with-pand
 </div>
 </div>
 <p>Which day of the week the date is on:</p>
-<div id="890c6730" class="cell" data-execution_count="27">
+<div id="4550c657" class="cell" data-execution_count="27">
 <div class="sourceCode cell-code" id="cb44"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb44-1"><a href="#cb44-1" aria-hidden="true" tabindex="-1"></a>calls[<span class="st">"EVENTDT"</span>].dt.dayofweek.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="27">
 <pre><code>0    3
@@ -1339,7 +1339,7 @@ <h4 data-number="5.2.3.1" class="anchored" data-anchor-id="temporality-with-pand
 </div>
 </div>
 <p>Check the mimimum values to see if there are any suspicious-looking, 70s dates:</p>
-<div id="1ed95706" class="cell" data-execution_count="28">
+<div id="a485ba57" class="cell" data-execution_count="28">
 <div class="sourceCode cell-code" id="cb46"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb46-1"><a href="#cb46-1" aria-hidden="true" tabindex="-1"></a>calls.sort_values(<span class="st">"EVENTDT"</span>).head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="28">
 <div>
@@ -1486,7 +1486,7 @@ <h3 data-number="5.4.1" class="anchored" data-anchor-id="csvs-and-field-names"><
 <p>We can then explore the CSV (which is a text file, and does not contain binary-encoded data) in many ways: 1. Using a text editor like emacs, vim, VSCode, etc. 2. Opening the CSV directly in DataHub (read-only), Excel, Google Sheets, etc. 3. The <code>Python</code> file object 4. <code>pandas</code>, using <code>pd.read_csv()</code></p>
 <p>To try out options 1 and 2, you can view or download the Tuberculosis from the <a href="https://data100.datahub.berkeley.edu/hub/user-redirect/git-pull?repo=https%3A%2F%2Fgithub.com%2FDS-100%2Ffa23-student&amp;urlpath=lab%2Ftree%2Ffa23-student%2Flecture%2Flec05%2Flec04-eda.ipynb&amp;branch=main">lecture demo notebook</a> under the <code>data</code> folder in the left hand menu. Notice how the CSV file is a type of <strong>rectangular data (i.e., tabular data) stored as comma-separated values</strong>.</p>
 <p>Next, let’s try out option 3 using the <code>Python</code> file object. We’ll look at the first four lines:</p>
-<div id="8e799390" class="cell" data-execution_count="29">
+<div id="a7261a5a" class="cell" data-execution_count="29">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb47"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb47-1"><a href="#cb47-1" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> <span class="bu">open</span>(<span class="st">"data/cdc_tuberculosis.csv"</span>, <span class="st">"r"</span>) <span class="im">as</span> f:</span>
@@ -1511,7 +1511,7 @@ <h3 data-number="5.4.1" class="anchored" data-anchor-id="csvs-and-field-names"><
 <p>Whoa, why are there blank lines interspaced between the lines of the CSV?</p>
 <p>You may recall that all line breaks in text files are encoded as the special newline character <code>\n</code>. Python’s <code>print()</code> prints each string (including the newline), and an additional newline on top of that.</p>
 <p>If you’re curious, we can use the <code>repr()</code> function to return the raw string with all special characters:</p>
-<div id="ea636e57" class="cell" data-execution_count="30">
+<div id="860a4dbd" class="cell" data-execution_count="30">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb49"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb49-1"><a href="#cb49-1" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> <span class="bu">open</span>(<span class="st">"data/cdc_tuberculosis.csv"</span>, <span class="st">"r"</span>) <span class="im">as</span> f:</span>
@@ -1530,7 +1530,7 @@ <h3 data-number="5.4.1" class="anchored" data-anchor-id="csvs-and-field-names"><
 </div>
 </div>
 <p>Finally, let’s try option 4 and use the tried-and-true Data 100 approach: <code>pandas</code>.</p>
-<div id="abd1c9b0" class="cell" data-execution_count="31">
+<div id="86b4a33e" class="cell" data-execution_count="31">
 <div class="sourceCode cell-code" id="cb51"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb51-1"><a href="#cb51-1" aria-hidden="true" tabindex="-1"></a>tb_df <span class="op">=</span> pd.read_csv(<span class="st">"data/cdc_tuberculosis.csv"</span>)</span>
 <span id="cb51-2"><a href="#cb51-2" aria-hidden="true" tabindex="-1"></a>tb_df.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="31">
@@ -1610,7 +1610,7 @@ <h3 data-number="5.4.1" class="anchored" data-anchor-id="csvs-and-field-names"><
 <p>You may notice some strange things about this table: what’s up with the “Unnamed” column names and the first row?</p>
 <p>Congratulations — you’re ready to wrangle your data! Because of how things are stored, we’ll need to clean the data a bit to name our columns better.</p>
 <p>A reasonable first step is to identify the row with the right header. The <code>pd.read_csv()</code> function (<a href="https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html">documentation</a>) has the convenient <code>header</code> parameter that we can set to use the elements in row 1 as the appropriate columns:</p>
-<div id="300250f8" class="cell" data-execution_count="32">
+<div id="61722a33" class="cell" data-execution_count="32">
 <div class="sourceCode cell-code" id="cb52"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb52-1"><a href="#cb52-1" aria-hidden="true" tabindex="-1"></a>tb_df <span class="op">=</span> pd.read_csv(<span class="st">"data/cdc_tuberculosis.csv"</span>, header<span class="op">=</span><span class="dv">1</span>) <span class="co"># row index</span></span>
 <span id="cb52-2"><a href="#cb52-2" aria-hidden="true" tabindex="-1"></a>tb_df.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="32">
@@ -1689,7 +1689,7 @@ <h3 data-number="5.4.1" class="anchored" data-anchor-id="csvs-and-field-names"><
 </div>
 <p>Wait…but now we can’t differentiate betwen the “Number of TB cases” and “TB incidence” year columns. <code>pandas</code> has tried to make our lives easier by automatically adding “.1” to the latter columns, but this doesn’t help us, as humans, understand the data.</p>
 <p>We can do this manually with <code>df.rename()</code> (<a href="https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.rename.html?highlight=rename#pandas.DataFrame.rename">documentation</a>):</p>
-<div id="5574df1d" class="cell" data-execution_count="33">
+<div id="0f160133" class="cell" data-execution_count="33">
 <div class="sourceCode cell-code" id="cb53"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb53-1"><a href="#cb53-1" aria-hidden="true" tabindex="-1"></a>rename_dict <span class="op">=</span> {<span class="st">'2019'</span>: <span class="st">'TB cases 2019'</span>,</span>
 <span id="cb53-2"><a href="#cb53-2" aria-hidden="true" tabindex="-1"></a>               <span class="st">'2020'</span>: <span class="st">'TB cases 2020'</span>,</span>
 <span id="cb53-3"><a href="#cb53-3" aria-hidden="true" tabindex="-1"></a>               <span class="st">'2021'</span>: <span class="st">'TB cases 2021'</span>,</span>
@@ -1779,7 +1779,7 @@ <h3 data-number="5.4.2" class="anchored" data-anchor-id="record-granularity"><sp
 <p>Row 0 is what we call a <strong>rollup record</strong>, or summary record. It’s often useful when displaying tables to humans. The <strong>granularity</strong> of record 0 (Totals) vs the rest of the records (States) is different.</p>
 <p>Okay, EDA step two. How was the rollup record aggregated?</p>
 <p>Let’s check if Total TB cases is the sum of all state TB cases. If we sum over all rows, we should get <strong>2x</strong> the total cases in each of our TB cases by year (why do you think this is?).</p>
-<div id="909d2fa2" class="cell" data-execution_count="34">
+<div id="2ab36664" class="cell" data-execution_count="34">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb54"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb54-1"><a href="#cb54-1" aria-hidden="true" tabindex="-1"></a>tb_df.<span class="bu">sum</span>(axis<span class="op">=</span><span class="dv">0</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -1796,7 +1796,7 @@ <h3 data-number="5.4.2" class="anchored" data-anchor-id="record-granularity"><sp
 </div>
 </div>
 <p>Whoa, what’s going on with the TB cases in 2019, 2020, and 2021? Check out the column types:</p>
-<div id="2d8bdb83" class="cell" data-execution_count="35">
+<div id="3d93a62c" class="cell" data-execution_count="35">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb56"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb56-1"><a href="#cb56-1" aria-hidden="true" tabindex="-1"></a>tb_df.dtypes</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -1814,7 +1814,7 @@ <h3 data-number="5.4.2" class="anchored" data-anchor-id="record-granularity"><sp
 </div>
 <p>Since there are commas in the values for TB cases, the numbers are read as the <code>object</code> datatype, or <strong>storage type</strong> (close to the <code>Python</code> string datatype), so <code>pandas</code> is concatenating strings instead of adding integers (recall that Python can “sum”, or concatenate, strings together: <code>"data" + "100"</code> evaluates to <code>"data100"</code>).</p>
 <p>Fortunately <code>read_csv</code> also has a <code>thousands</code> parameter (<a href="https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html">documentation</a>):</p>
-<div id="3b34ed82" class="cell" data-execution_count="36">
+<div id="88c22c05" class="cell" data-execution_count="36">
 <div class="sourceCode cell-code" id="cb58"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb58-1"><a href="#cb58-1" aria-hidden="true" tabindex="-1"></a><span class="co"># improve readability: chaining method calls with outer parentheses/line breaks</span></span>
 <span id="cb58-2"><a href="#cb58-2" aria-hidden="true" tabindex="-1"></a>tb_df <span class="op">=</span> (</span>
 <span id="cb58-3"><a href="#cb58-3" aria-hidden="true" tabindex="-1"></a>    pd.read_csv(<span class="st">"data/cdc_tuberculosis.csv"</span>, header<span class="op">=</span><span class="dv">1</span>, thousands<span class="op">=</span><span class="st">','</span>)</span>
@@ -1895,7 +1895,7 @@ <h3 data-number="5.4.2" class="anchored" data-anchor-id="record-granularity"><sp
 </div>
 </div>
 </div>
-<div id="e2a33794" class="cell" data-execution_count="37">
+<div id="fe98c178" class="cell" data-execution_count="37">
 <div class="sourceCode cell-code" id="cb59"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb59-1"><a href="#cb59-1" aria-hidden="true" tabindex="-1"></a>tb_df.<span class="bu">sum</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="37">
 <pre><code>U.S. jurisdiction    TotalAlabamaAlaskaArizonaArkansasCaliforniaCol...
@@ -1910,7 +1910,7 @@ <h3 data-number="5.4.2" class="anchored" data-anchor-id="record-granularity"><sp
 </div>
 <p>The total TB cases look right. Phew!</p>
 <p>Let’s just look at the records with <strong>state-level granularity</strong>:</p>
-<div id="520314a2" class="cell" data-execution_count="38">
+<div id="05f8a669" class="cell" data-execution_count="38">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb61"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb61-1"><a href="#cb61-1" aria-hidden="true" tabindex="-1"></a>state_tb_df <span class="op">=</span> tb_df[<span class="dv">1</span>:]</span>
@@ -1995,7 +1995,7 @@ <h3 data-number="5.4.2" class="anchored" data-anchor-id="record-granularity"><sp
 <h3 data-number="5.4.3" class="anchored" data-anchor-id="gather-census-data"><span class="header-section-number">5.4.3</span> Gather Census Data</h3>
 <p>U.S. Census population estimates <a href="https://www.census.gov/data/tables/time-series/demo/popest/2010s-state-total.html">source</a> (2019), <a href="https://www.census.gov/data/tables/time-series/demo/popest/2020s-state-total.html">source</a> (2020-2021).</p>
 <p>Running the below cells cleans the data. There are a few new methods here: * <code>df.convert_dtypes()</code> (<a href="https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.convert_dtypes.html">documentation</a>) conveniently converts all float dtypes into ints and is out of scope for the class. * <code>df.drop_na()</code> (<a href="https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.dropna.html">documentation</a>) will be explained in more detail next time.</p>
-<div id="b1366e01" class="cell" data-execution_count="39">
+<div id="d30f1d3a" class="cell" data-execution_count="39">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb62"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb62-1"><a href="#cb62-1" aria-hidden="true" tabindex="-1"></a><span class="co"># 2010s census data</span></span>
@@ -2119,7 +2119,7 @@ <h3 data-number="5.4.3" class="anchored" data-anchor-id="gather-census-data"><sp
 <p>or use <code>iPython</code> magic which will intelligently import code when files change:</p>
 <div class="sourceCode" id="cb64"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb64-1"><a href="#cb64-1" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>load_ext autoreload</span>
 <span id="cb64-2"><a href="#cb64-2" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>autoreload <span class="dv">2</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-<div id="696b693c" class="cell" data-execution_count="40">
+<div id="a105bf17" class="cell" data-execution_count="40">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb65"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb65-1"><a href="#cb65-1" aria-hidden="true" tabindex="-1"></a><span class="co"># census 2020s data</span></span>
@@ -2196,7 +2196,7 @@ <h3 data-number="5.4.3" class="anchored" data-anchor-id="gather-census-data"><sp
 <section id="joining-data-merging-dataframes" class="level3" data-number="5.4.4">
 <h3 data-number="5.4.4" class="anchored" data-anchor-id="joining-data-merging-dataframes"><span class="header-section-number">5.4.4</span> Joining Data (Merging <code>DataFrame</code>s)</h3>
 <p>Time to <code>merge</code>! Here we use the <code>DataFrame</code> method <code>df1.merge(right=df2, ...)</code> on <code>DataFrame</code> <code>df1</code> (<a href="https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.merge.html">documentation</a>). Contrast this with the function <code>pd.merge(left=df1, right=df2, ...)</code> (<a href="https://pandas.pydata.org/docs/reference/api/pandas.merge.html?highlight=pandas%20merge#pandas.merge">documentation</a>). Feel free to use either.</p>
-<div id="faa0acde" class="cell" data-execution_count="41">
+<div id="6016e411" class="cell" data-execution_count="41">
 <div class="sourceCode cell-code" id="cb66"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb66-1"><a href="#cb66-1" aria-hidden="true" tabindex="-1"></a><span class="co"># merge TB DataFrame with two US census DataFrames</span></span>
 <span id="cb66-2"><a href="#cb66-2" aria-hidden="true" tabindex="-1"></a>tb_census_df <span class="op">=</span> (</span>
 <span id="cb66-3"><a href="#cb66-3" aria-hidden="true" tabindex="-1"></a>    tb_df</span>
@@ -2371,7 +2371,7 @@ <h3 data-number="5.4.4" class="anchored" data-anchor-id="joining-data-merging-da
 </div>
 </div>
 <p>Having all of these columns is a little unwieldy. We could either drop the unneeded columns now, or just merge on smaller census <code>DataFrame</code>s. Let’s do the latter.</p>
-<div id="7207fb89" class="cell" data-execution_count="42">
+<div id="4378bba5" class="cell" data-execution_count="42">
 <div class="sourceCode cell-code" id="cb67"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb67-1"><a href="#cb67-1" aria-hidden="true" tabindex="-1"></a><span class="co"># try merging again, but cleaner this time</span></span>
 <span id="cb67-2"><a href="#cb67-2" aria-hidden="true" tabindex="-1"></a>tb_census_df <span class="op">=</span> (</span>
 <span id="cb67-3"><a href="#cb67-3" aria-hidden="true" tabindex="-1"></a>    tb_df</span>
@@ -2484,7 +2484,7 @@ <h3 data-number="5.4.5" class="anchored" data-anchor-id="reproducing-data-comput
 <p><span class="math display">\[\text{TB incidence} = \frac{\text{TB cases in population}}{\text{groups in population}} = \frac{\text{TB cases in population}}{\text{population}/100000} \]</span></p>
 <p><span class="math display">\[= \frac{\text{TB cases in population}}{\text{population}} \times 100000\]</span></p>
 <p>Let’s try this for 2019:</p>
-<div id="8440c177" class="cell" data-execution_count="43">
+<div id="1f3b157b" class="cell" data-execution_count="43">
 <div class="sourceCode cell-code" id="cb68"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb68-1"><a href="#cb68-1" aria-hidden="true" tabindex="-1"></a>tb_census_df[<span class="st">"recompute incidence 2019"</span>] <span class="op">=</span> tb_census_df[<span class="st">"TB cases 2019"</span>]<span class="op">/</span>tb_census_df[<span class="st">"2019"</span>]<span class="op">*</span><span class="dv">100000</span></span>
 <span id="cb68-2"><a href="#cb68-2" aria-hidden="true" tabindex="-1"></a>tb_census_df.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="43">
@@ -2587,7 +2587,7 @@ <h3 data-number="5.4.5" class="anchored" data-anchor-id="reproducing-data-comput
 </div>
 <p>Awesome!!!</p>
 <p>Let’s use a for-loop and Python format strings to compute TB incidence for all years. Python f-strings are just used for the purposes of this demo, but they’re handy to know when you explore data beyond this course (<a href="https://docs.python.org/3/tutorial/inputoutput.html">documentation</a>).</p>
-<div id="0ab48560" class="cell" data-execution_count="44">
+<div id="bf377de8" class="cell" data-execution_count="44">
 <div class="sourceCode cell-code" id="cb69"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb69-1"><a href="#cb69-1" aria-hidden="true" tabindex="-1"></a><span class="co"># recompute incidence for all years</span></span>
 <span id="cb69-2"><a href="#cb69-2" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> year <span class="kw">in</span> [<span class="dv">2019</span>, <span class="dv">2020</span>, <span class="dv">2021</span>]:</span>
 <span id="cb69-3"><a href="#cb69-3" aria-hidden="true" tabindex="-1"></a>    tb_census_df[<span class="ss">f"recompute incidence </span><span class="sc">{</span>year<span class="sc">}</span><span class="ss">"</span>] <span class="op">=</span> tb_census_df[<span class="ss">f"TB cases </span><span class="sc">{</span>year<span class="sc">}</span><span class="ss">"</span>]<span class="op">/</span>tb_census_df[<span class="ss">f"</span><span class="sc">{</span>year<span class="sc">}</span><span class="ss">"</span>]<span class="op">*</span><span class="dv">100000</span></span>
@@ -2703,7 +2703,7 @@ <h3 data-number="5.4.5" class="anchored" data-anchor-id="reproducing-data-comput
 </div>
 </div>
 <p>These numbers look pretty close!!! There are a few errors in the hundredths place, particularly in 2021. It may be useful to further explore reasons behind this discrepancy.</p>
-<div id="961ae947" class="cell" data-execution_count="45">
+<div id="38ae069c" class="cell" data-execution_count="45">
 <div class="sourceCode cell-code" id="cb70"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb70-1"><a href="#cb70-1" aria-hidden="true" tabindex="-1"></a>tb_census_df.describe()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="45">
 <div>
@@ -2864,7 +2864,7 @@ <h3 data-number="5.4.6" class="anchored" data-anchor-id="bonus-eda-reproducing-t
 <p>This is TB incidence computed across the entire U.S. population! How do we reproduce this? * We need to reproduce the “Total” TB incidences in our rolled record. * But our current <code>tb_census_df</code> only has 51 entries (50 states plus Washington, D.C.). There is no rolled record. * What happened…?</p>
 <p>Let’s get exploring!</p>
 <p>Before we keep exploring, we’ll set all indexes to more meaningful values, instead of just numbers that pertain to some row at some point. This will make our cleaning slightly easier.</p>
-<div id="9ca87646" class="cell" data-execution_count="46">
+<div id="08882cdf" class="cell" data-execution_count="46">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb71"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb71-1"><a href="#cb71-1" aria-hidden="true" tabindex="-1"></a>tb_df <span class="op">=</span> tb_df.set_index(<span class="st">"U.S. jurisdiction"</span>)</span>
@@ -2947,7 +2947,7 @@ <h3 data-number="5.4.6" class="anchored" data-anchor-id="bonus-eda-reproducing-t
 </div>
 </div>
 </div>
-<div id="8aaab18d" class="cell" data-execution_count="47">
+<div id="40c74c50" class="cell" data-execution_count="47">
 <div class="sourceCode cell-code" id="cb72"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb72-1"><a href="#cb72-1" aria-hidden="true" tabindex="-1"></a>census_2010s_df <span class="op">=</span> census_2010s_df.set_index(<span class="st">"Geographic Area"</span>)</span>
 <span id="cb72-2"><a href="#cb72-2" aria-hidden="true" tabindex="-1"></a>census_2010s_df.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="47">
@@ -3055,7 +3055,7 @@ <h3 data-number="5.4.6" class="anchored" data-anchor-id="bonus-eda-reproducing-t
 </div>
 </div>
 </div>
-<div id="15eab503" class="cell" data-execution_count="48">
+<div id="e80f8977" class="cell" data-execution_count="48">
 <div class="sourceCode cell-code" id="cb73"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb73-1"><a href="#cb73-1" aria-hidden="true" tabindex="-1"></a>census_2020s_df <span class="op">=</span> census_2020s_df.set_index(<span class="st">"Geographic Area"</span>)</span>
 <span id="cb73-2"><a href="#cb73-2" aria-hidden="true" tabindex="-1"></a>census_2020s_df.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="48">
@@ -3115,7 +3115,7 @@ <h3 data-number="5.4.6" class="anchored" data-anchor-id="bonus-eda-reproducing-t
 </div>
 </div>
 <p>It turns out that our merge above only kept state records, even though our original <code>tb_df</code> had the “Total” rolled record:</p>
-<div id="cc0cf9c5" class="cell" data-execution_count="49">
+<div id="e0a1b2d9" class="cell" data-execution_count="49">
 <div class="sourceCode cell-code" id="cb74"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb74-1"><a href="#cb74-1" aria-hidden="true" tabindex="-1"></a>tb_df.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="49">
 <div>
@@ -3196,7 +3196,7 @@ <h3 data-number="5.4.6" class="anchored" data-anchor-id="bonus-eda-reproducing-t
 </div>
 <p>Recall that <code>merge</code> by default does an <strong>inner</strong> merge by default, meaning that it only preserves keys that are present in <strong>both</strong> <code>DataFrame</code>s.</p>
 <p>The rolled records in our census <code>DataFrame</code> have different <code>Geographic Area</code> fields, which was the key we merged on:</p>
-<div id="29399cf9" class="cell" data-execution_count="50">
+<div id="b3680ce5" class="cell" data-execution_count="50">
 <div class="sourceCode cell-code" id="cb75"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb75-1"><a href="#cb75-1" aria-hidden="true" tabindex="-1"></a>census_2010s_df.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="50">
 <div>
@@ -3305,7 +3305,7 @@ <h3 data-number="5.4.6" class="anchored" data-anchor-id="bonus-eda-reproducing-t
 </div>
 <p>The Census <code>DataFrame</code> has several rolled records. The aggregate record we are looking for actually has the Geographic Area named “United States”.</p>
 <p>One straightforward way to get the right merge is to rename the value itself. Because we now have the Geographic Area index, we’ll use <code>df.rename()</code> (<a href="https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.rename.html">documentation</a>):</p>
-<div id="fbcc0ae8" class="cell" data-execution_count="51">
+<div id="d92c9724" class="cell" data-execution_count="51">
 <div class="sourceCode cell-code" id="cb76"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb76-1"><a href="#cb76-1" aria-hidden="true" tabindex="-1"></a><span class="co"># rename rolled record for 2010s</span></span>
 <span id="cb76-2"><a href="#cb76-2" aria-hidden="true" tabindex="-1"></a>census_2010s_df.rename(index<span class="op">=</span>{<span class="st">'United States'</span>:<span class="st">'Total'</span>}, inplace<span class="op">=</span><span class="va">True</span>)</span>
 <span id="cb76-3"><a href="#cb76-3" aria-hidden="true" tabindex="-1"></a>census_2010s_df.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -3414,7 +3414,7 @@ <h3 data-number="5.4.6" class="anchored" data-anchor-id="bonus-eda-reproducing-t
 </div>
 </div>
 </div>
-<div id="876717a2" class="cell" data-execution_count="52">
+<div id="25504b04" class="cell" data-execution_count="52">
 <div class="sourceCode cell-code" id="cb77"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb77-1"><a href="#cb77-1" aria-hidden="true" tabindex="-1"></a><span class="co"># same, but for 2020s rename rolled record</span></span>
 <span id="cb77-2"><a href="#cb77-2" aria-hidden="true" tabindex="-1"></a>census_2020s_df.rename(index<span class="op">=</span>{<span class="st">'United States'</span>:<span class="st">'Total'</span>}, inplace<span class="op">=</span><span class="va">True</span>)</span>
 <span id="cb77-3"><a href="#cb77-3" aria-hidden="true" tabindex="-1"></a>census_2020s_df.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -3476,7 +3476,7 @@ <h3 data-number="5.4.6" class="anchored" data-anchor-id="bonus-eda-reproducing-t
 </div>
 <p><br></p>
 <p>Next let’s rerun our merge. Note the different chaining, because we are now merging on indexes (<code>df.merge()</code> <a href="https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.merge.html">documentation</a>).</p>
-<div id="c296fa76" class="cell" data-execution_count="53">
+<div id="88e02d6a" class="cell" data-execution_count="53">
 <div class="sourceCode cell-code" id="cb78"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb78-1"><a href="#cb78-1" aria-hidden="true" tabindex="-1"></a>tb_census_df <span class="op">=</span> (</span>
 <span id="cb78-2"><a href="#cb78-2" aria-hidden="true" tabindex="-1"></a>    tb_df</span>
 <span id="cb78-3"><a href="#cb78-3" aria-hidden="true" tabindex="-1"></a>    .merge(right<span class="op">=</span>census_2010s_df[[<span class="st">"2019"</span>]],</span>
@@ -3573,7 +3573,7 @@ <h3 data-number="5.4.6" class="anchored" data-anchor-id="bonus-eda-reproducing-t
 </div>
 <p><br></p>
 <p>Finally, let’s recompute our incidences:</p>
-<div id="0596b92a" class="cell" data-execution_count="54">
+<div id="f1f9adf9" class="cell" data-execution_count="54">
 <div class="sourceCode cell-code" id="cb79"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb79-1"><a href="#cb79-1" aria-hidden="true" tabindex="-1"></a><span class="co"># recompute incidence for all years</span></span>
 <span id="cb79-2"><a href="#cb79-2" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> year <span class="kw">in</span> [<span class="dv">2019</span>, <span class="dv">2020</span>, <span class="dv">2021</span>]:</span>
 <span id="cb79-3"><a href="#cb79-3" aria-hidden="true" tabindex="-1"></a>    tb_census_df[<span class="ss">f"recompute incidence </span><span class="sc">{</span>year<span class="sc">}</span><span class="ss">"</span>] <span class="op">=</span> tb_census_df[<span class="ss">f"TB cases </span><span class="sc">{</span>year<span class="sc">}</span><span class="ss">"</span>]<span class="op">/</span>tb_census_df[<span class="ss">f"</span><span class="sc">{</span>year<span class="sc">}</span><span class="ss">"</span>]<span class="op">*</span><span class="dv">100000</span></span>
@@ -3688,21 +3688,21 @@ <h3 data-number="5.4.6" class="anchored" data-anchor-id="bonus-eda-reproducing-t
 <p>Reported TB incidence (cases per 100,000 persons) increased <strong>9.4%</strong>, from <strong>2.2</strong> during 2020 to <strong>2.4</strong> during 2021 but was lower than incidence during 2019 (2.7). Increases occurred among both U.S.-born and non–U.S.-born persons.</p>
 </blockquote>
 <p>Recall that percent change from <span class="math inline">\(A\)</span> to <span class="math inline">\(B\)</span> is computed as <span class="math inline">\(\text{percent change} = \frac{B - A}{A} \times 100\)</span>.</p>
-<div id="9d21d7a6" class="cell" data-execution_count="55">
+<div id="63a2587a" class="cell" data-execution_count="55">
 <div class="sourceCode cell-code" id="cb80"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb80-1"><a href="#cb80-1" aria-hidden="true" tabindex="-1"></a>incidence_2020 <span class="op">=</span> tb_census_df.loc[<span class="st">'Total'</span>, <span class="st">'recompute incidence 2020'</span>]</span>
 <span id="cb80-2"><a href="#cb80-2" aria-hidden="true" tabindex="-1"></a>incidence_2020</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="55">
 <pre><code>np.float64(2.1637257652759883)</code></pre>
 </div>
 </div>
-<div id="22abd6ac" class="cell" data-execution_count="56">
+<div id="f25a9df6" class="cell" data-execution_count="56">
 <div class="sourceCode cell-code" id="cb82"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb82-1"><a href="#cb82-1" aria-hidden="true" tabindex="-1"></a>incidence_2021 <span class="op">=</span> tb_census_df.loc[<span class="st">'Total'</span>, <span class="st">'recompute incidence 2021'</span>]</span>
 <span id="cb82-2"><a href="#cb82-2" aria-hidden="true" tabindex="-1"></a>incidence_2021</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="56">
 <pre><code>np.float64(2.3672448914298068)</code></pre>
 </div>
 </div>
-<div id="900f30e9" class="cell" data-execution_count="57">
+<div id="6b35cb95" class="cell" data-execution_count="57">
 <div class="sourceCode cell-code" id="cb84"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb84-1"><a href="#cb84-1" aria-hidden="true" tabindex="-1"></a>difference <span class="op">=</span> (incidence_2021 <span class="op">-</span> incidence_2020)<span class="op">/</span>incidence_2020 <span class="op">*</span> <span class="dv">100</span></span>
 <span id="cb84-2"><a href="#cb84-2" aria-hidden="true" tabindex="-1"></a>difference</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="57">
@@ -3714,7 +3714,7 @@ <h3 data-number="5.4.6" class="anchored" data-anchor-id="bonus-eda-reproducing-t
 <section id="eda-demo-2-mauna-loa-co2-data-a-lesson-in-data-faithfulness" class="level2" data-number="5.5">
 <h2 data-number="5.5" class="anchored" data-anchor-id="eda-demo-2-mauna-loa-co2-data-a-lesson-in-data-faithfulness"><span class="header-section-number">5.5</span> EDA Demo 2: Mauna Loa CO<sub>2</sub> Data – A Lesson in Data Faithfulness</h2>
 <p><a href="https://gml.noaa.gov/ccgg/trends/data.html">Mauna Loa Observatory</a> has been monitoring CO<sub>2</sub> concentrations since 1958.</p>
-<div id="f3b223aa" class="cell" data-execution_count="58">
+<div id="cc263be2" class="cell" data-execution_count="58">
 <div class="sourceCode cell-code" id="cb86"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb86-1"><a href="#cb86-1" aria-hidden="true" tabindex="-1"></a>co2_file <span class="op">=</span> <span class="st">"data/co2_mm_mlo.txt"</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
 <p>Let’s do some <strong>EDA</strong>!!</p>
@@ -3739,7 +3739,7 @@ <h3 data-number="5.5.1" class="anchored" data-anchor-id="reading-this-file-into-
 <li>The 71st and 72nd lines in the file contain column headings split over two lines.</li>
 </ul>
 <p>We can use&nbsp;<code>read_csv</code>&nbsp;to read the data into a <code>pandas</code> <code>DataFrame</code>, and we provide several arguments to specify that the separators are white space, there is no header (<strong>we will set our own column names</strong>), and to skip the first 72 rows of the file.</p>
-<div id="fa3ef26b" class="cell" data-execution_count="59">
+<div id="08c38afb" class="cell" data-execution_count="59">
 <div class="sourceCode cell-code" id="cb88"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb88-1"><a href="#cb88-1" aria-hidden="true" tabindex="-1"></a>co2 <span class="op">=</span> pd.read_csv(</span>
 <span id="cb88-2"><a href="#cb88-2" aria-hidden="true" tabindex="-1"></a>    co2_file, header <span class="op">=</span> <span class="va">None</span>, skiprows <span class="op">=</span> <span class="dv">72</span>,</span>
 <span id="cb88-3"><a href="#cb88-3" aria-hidden="true" tabindex="-1"></a>    sep <span class="op">=</span> <span class="vs">r'\s+'</span>       <span class="co">#delimiter for continuous whitespace (stay tuned for regex next lecture))</span></span>
@@ -3827,7 +3827,7 @@ <h3 data-number="5.5.1" class="anchored" data-anchor-id="reading-this-file-into-
 <h3 data-number="5.5.2" class="anchored" data-anchor-id="exploring-variable-feature-types"><span class="header-section-number">5.5.2</span> Exploring Variable Feature Types</h3>
 <p>The NOAA <a href="https://gml.noaa.gov/ccgg/trends/">webpage</a> might have some useful tidbits (in this case it doesn’t).</p>
 <p>Using this information, we’ll rerun <code>pd.read_csv</code>, but this time with some <strong>custom column names.</strong></p>
-<div id="4253fd5b" class="cell" data-execution_count="60">
+<div id="36522ccf" class="cell" data-execution_count="60">
 <div class="sourceCode cell-code" id="cb89"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb89-1"><a href="#cb89-1" aria-hidden="true" tabindex="-1"></a>co2 <span class="op">=</span> pd.read_csv(</span>
 <span id="cb89-2"><a href="#cb89-2" aria-hidden="true" tabindex="-1"></a>    co2_file, header <span class="op">=</span> <span class="va">None</span>, skiprows <span class="op">=</span> <span class="dv">72</span>,</span>
 <span id="cb89-3"><a href="#cb89-3" aria-hidden="true" tabindex="-1"></a>    sep <span class="op">=</span> <span class="st">'\s+'</span>, <span class="co">#regex for continuous whitespace (next lecture)</span></span>
@@ -3843,7 +3843,7 @@ <h3 data-number="5.5.2" class="anchored" data-anchor-id="exploring-variable-feat
 
 invalid escape sequence '\s'
 
-/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_48380/150137587.py:3: SyntaxWarning:
+/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_51690/150137587.py:3: SyntaxWarning:
 
 invalid escape sequence '\s'
 </code></pre>
@@ -3926,7 +3926,7 @@ <h3 data-number="5.5.2" class="anchored" data-anchor-id="exploring-variable-feat
 <section id="visualizing-co2" class="level3" data-number="5.5.3">
 <h3 data-number="5.5.3" class="anchored" data-anchor-id="visualizing-co2"><span class="header-section-number">5.5.3</span> Visualizing CO<sub>2</sub></h3>
 <p>Scientific studies tend to have very clean data, right…? Let’s jump right in and make a time series plot of CO<sub>2</sub> monthly averages.</p>
-<div id="aee6803e" class="cell" data-execution_count="61">
+<div id="218a92ee" class="cell" data-execution_count="61">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb91"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb91-1"><a href="#cb91-1" aria-hidden="true" tabindex="-1"></a>sns.lineplot(x<span class="op">=</span><span class="st">'DecDate'</span>, y<span class="op">=</span><span class="st">'Avg'</span>, data<span class="op">=</span>co2)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -3941,7 +3941,7 @@ <h3 data-number="5.5.3" class="anchored" data-anchor-id="visualizing-co2"><span
 </div>
 <p>The code above uses the <code>seaborn</code> plotting library (abbreviated <code>sns</code>). We will cover this in the Visualization lecture, but now you don’t need to worry about how it works!</p>
 <p>Yikes! Plotting the data uncovered a problem. The sharp vertical lines suggest that we have some <strong>missing values</strong>. What happened here?</p>
-<div id="96188716" class="cell" data-execution_count="62">
+<div id="4fbc56bf" class="cell" data-execution_count="62">
 <div class="sourceCode cell-code" id="cb92"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb92-1"><a href="#cb92-1" aria-hidden="true" tabindex="-1"></a>co2.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="62">
 <div>
@@ -4017,7 +4017,7 @@ <h3 data-number="5.5.3" class="anchored" data-anchor-id="visualizing-co2"><span
 </div>
 </div>
 </div>
-<div id="e51e476b" class="cell" data-execution_count="63">
+<div id="d24f1b96" class="cell" data-execution_count="63">
 <div class="sourceCode cell-code" id="cb93"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb93-1"><a href="#cb93-1" aria-hidden="true" tabindex="-1"></a>co2.tail()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="63">
 <div>
@@ -4110,7 +4110,7 @@ <h3 data-number="5.5.4" class="anchored" data-anchor-id="sanity-checks-reasoning
 <li>Data from March 1958 to August 2019.</li>
 <li>We should have $ 12 (2019-1957) - 2 - 4 = 738 $ records.</li>
 </ul>
-<div id="d97fad4b" class="cell" data-execution_count="64">
+<div id="0447ad56" class="cell" data-execution_count="64">
 <div class="sourceCode cell-code" id="cb94"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb94-1"><a href="#cb94-1" aria-hidden="true" tabindex="-1"></a>co2.shape</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="64">
 <pre><code>(738, 7)</code></pre>
@@ -4124,7 +4124,7 @@ <h3 data-number="5.5.5" class="anchored" data-anchor-id="understanding-missing-v
 <p><code>Days</code> is a time field, so let’s analyze other time fields to see if there is an explanation for missing values of days of operation.</p>
 <p>Let’s start with <strong>months</strong>, <code>Mo</code>.</p>
 <p>Are we missing any records? The number of months should have 62 or 61 instances (March 1957-August 2019).</p>
-<div id="f24d97b9" class="cell" data-execution_count="65">
+<div id="a7a89a3b" class="cell" data-execution_count="65">
 <div class="sourceCode cell-code" id="cb96"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb96-1"><a href="#cb96-1" aria-hidden="true" tabindex="-1"></a>co2[<span class="st">"Mo"</span>].value_counts().sort_index()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="65">
 <pre><code>Mo
@@ -4146,7 +4146,7 @@ <h3 data-number="5.5.5" class="anchored" data-anchor-id="understanding-missing-v
 <p>As expected Jan, Feb, Sep, Oct, Nov, and Dec have 61 occurrences and the rest 62.</p>
 <p><br></p>
 <p>Next let’s explore <strong>days</strong> <code>Days</code> itself, which is the number of days that the measurement equipment worked.</p>
-<div id="c9778101" class="cell" data-execution_count="66">
+<div id="cbb4759d" class="cell" data-execution_count="66">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb98"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb98-1"><a href="#cb98-1" aria-hidden="true" tabindex="-1"></a>sns.displot(co2[<span class="st">'Days'</span>])<span class="op">;</span></span>
@@ -4164,7 +4164,7 @@ <h3 data-number="5.5.5" class="anchored" data-anchor-id="understanding-missing-v
 <p><br></p>
 <p>Finally, let’s check the last time feature, <strong>year</strong> <code>Yr</code>.</p>
 <p>Let’s check to see if there is any connection between missing-ness and the year of the recording.</p>
-<div id="3789418d" class="cell" data-execution_count="67">
+<div id="59166db7" class="cell" data-execution_count="67">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb99"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb99-1"><a href="#cb99-1" aria-hidden="true" tabindex="-1"></a>sns.scatterplot(x<span class="op">=</span><span class="st">"Yr"</span>, y<span class="op">=</span><span class="st">"Days"</span>, data<span class="op">=</span>co2)<span class="op">;</span></span>
@@ -4193,7 +4193,7 @@ <h3 data-number="5.5.5" class="anchored" data-anchor-id="understanding-missing-v
 <section id="understanding-missing-value-2-avg" class="level3" data-number="5.5.6">
 <h3 data-number="5.5.6" class="anchored" data-anchor-id="understanding-missing-value-2-avg"><span class="header-section-number">5.5.6</span> Understanding Missing Value 2: <code>Avg</code></h3>
 <p>Next, let’s return to the -99.99 values in <code>Avg</code> to analyze the overall quality of the CO<sub>2</sub> measurements. We’ll plot a histogram of the average CO<sub>2</sub> measurements</p>
-<div id="7bd6c7ba" class="cell" data-execution_count="68">
+<div id="db131879" class="cell" data-execution_count="68">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb100"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb100-1"><a href="#cb100-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Histograms of average CO2 measurements</span></span>
@@ -4209,7 +4209,7 @@ <h3 data-number="5.5.6" class="anchored" data-anchor-id="understanding-missing-v
 </div>
 <p>The non-missing values are in the 300-400 range (a regular range of CO<sub>2</sub> levels).</p>
 <p>We also see that there are only a few missing <code>Avg</code> values (<strong>&lt;1% of values</strong>). Let’s examine all of them:</p>
-<div id="89d31338" class="cell" data-execution_count="69">
+<div id="156dfa36" class="cell" data-execution_count="69">
 <div class="sourceCode cell-code" id="cb101"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb101-1"><a href="#cb101-1" aria-hidden="true" tabindex="-1"></a>co2[co2[<span class="st">"Avg"</span>] <span class="op">&lt;</span> <span class="dv">0</span>]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="69">
 <div>
@@ -4316,7 +4316,7 @@ <h3 data-number="5.5.7" class="anchored" data-anchor-id="drop-nan-or-impute-miss
 <li>Impute using some strategy</li>
 </ol>
 <p>Remember we want to fix the following plot:</p>
-<div id="cc2158ed" class="cell" data-execution_count="70">
+<div id="53738296" class="cell" data-execution_count="70">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb102"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb102-1"><a href="#cb102-1" aria-hidden="true" tabindex="-1"></a>sns.lineplot(x<span class="op">=</span><span class="st">'DecDate'</span>, y<span class="op">=</span><span class="st">'Avg'</span>, data<span class="op">=</span>co2)</span>
@@ -4334,7 +4334,7 @@ <h3 data-number="5.5.7" class="anchored" data-anchor-id="drop-nan-or-impute-miss
 <p>Let’s consider a few options: 1. Drop those records 2. Replace -99.99 with NaN 3. Substitute it with a likely value for the average CO<sub>2</sub>?</p>
 <p>What do you think are the pros and cons of each possible action?</p>
 <p>Let’s examine each of these three options.</p>
-<div id="1e78fab1" class="cell" data-execution_count="71">
+<div id="d4dec9b1" class="cell" data-execution_count="71">
 <div class="sourceCode cell-code" id="cb103"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb103-1"><a href="#cb103-1" aria-hidden="true" tabindex="-1"></a><span class="co"># 1. Drop missing values</span></span>
 <span id="cb103-2"><a href="#cb103-2" aria-hidden="true" tabindex="-1"></a>co2_drop <span class="op">=</span> co2[co2[<span class="st">'Avg'</span>] <span class="op">&gt;</span> <span class="dv">0</span>]</span>
 <span id="cb103-3"><a href="#cb103-3" aria-hidden="true" tabindex="-1"></a>co2_drop.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -4412,7 +4412,7 @@ <h3 data-number="5.5.7" class="anchored" data-anchor-id="drop-nan-or-impute-miss
 </div>
 </div>
 </div>
-<div id="ccf61cf2" class="cell" data-execution_count="72">
+<div id="6b660a63" class="cell" data-execution_count="72">
 <div class="sourceCode cell-code" id="cb104"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb104-1"><a href="#cb104-1" aria-hidden="true" tabindex="-1"></a><span class="co"># 2. Replace NaN with -99.99</span></span>
 <span id="cb104-2"><a href="#cb104-2" aria-hidden="true" tabindex="-1"></a>co2_NA <span class="op">=</span> co2.replace(<span class="op">-</span><span class="fl">99.99</span>, np.nan)</span>
 <span id="cb104-3"><a href="#cb104-3" aria-hidden="true" tabindex="-1"></a>co2_NA.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -4498,7 +4498,7 @@ <h3 data-number="5.5.7" class="anchored" data-anchor-id="drop-nan-or-impute-miss
 </blockquote>
 <p>The <code>Int</code> feature has values that exactly match those in <code>Avg</code>, except when <code>Avg</code> is -99.99, and then a <strong>reasonable</strong> estimate is used instead.</p>
 <p>So, the third version of our data will use the <code>Int</code> feature instead of <code>Avg</code>.</p>
-<div id="702eec7b" class="cell" data-execution_count="73">
+<div id="0b7a6d19" class="cell" data-execution_count="73">
 <div class="sourceCode cell-code" id="cb105"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb105-1"><a href="#cb105-1" aria-hidden="true" tabindex="-1"></a><span class="co"># 3. Use interpolated column which estimates missing Avg values</span></span>
 <span id="cb105-2"><a href="#cb105-2" aria-hidden="true" tabindex="-1"></a>co2_impute <span class="op">=</span> co2.copy()</span>
 <span id="cb105-3"><a href="#cb105-3" aria-hidden="true" tabindex="-1"></a>co2_impute[<span class="st">'Avg'</span>] <span class="op">=</span> co2[<span class="st">'Int'</span>]</span>
@@ -4579,7 +4579,7 @@ <h3 data-number="5.5.7" class="anchored" data-anchor-id="drop-nan-or-impute-miss
 </div>
 <p>What’s a <strong>reasonable</strong> estimate?</p>
 <p>To answer this question, let’s zoom in on a short time period, say the measurements in 1958 (where we know we have two missing values).</p>
-<div id="ba2f23e2" class="cell" data-execution_count="74">
+<div id="a9fbb80b" class="cell" data-execution_count="74">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb106"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb106-1"><a href="#cb106-1" aria-hidden="true" tabindex="-1"></a><span class="co"># results of plotting data in 1958</span></span>
@@ -4622,7 +4622,7 @@ <h3 data-number="5.5.7" class="anchored" data-anchor-id="drop-nan-or-impute-miss
 <li>We are plotting all months in our data as a line plot</li>
 </ul>
 <p>Let’s replot our original figure with option 3:</p>
-<div id="7e39efc6" class="cell" data-execution_count="75">
+<div id="e8eb2a85" class="cell" data-execution_count="75">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb107"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb107-1"><a href="#cb107-1" aria-hidden="true" tabindex="-1"></a>sns.lineplot(x<span class="op">=</span><span class="st">'DecDate'</span>, y<span class="op">=</span><span class="st">'Avg'</span>, data<span class="op">=</span>co2_impute)</span>
@@ -4654,7 +4654,7 @@ <h3 data-number="5.5.8" class="anchored" data-anchor-id="presenting-the-data-a-d
 <ul>
 <li>You might be happier with a <strong>coarser granularity</strong> of average year data!</li>
 </ul>
-<div id="e48a2078" class="cell" data-execution_count="76">
+<div id="c4c3beb1" class="cell" data-execution_count="76">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb108"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb108-1"><a href="#cb108-1" aria-hidden="true" tabindex="-1"></a>co2_year <span class="op">=</span> co2_impute.groupby(<span class="st">'Yr'</span>).mean()</span>
diff --git a/docs/eda/eda_files/figure-pdf/cell-62-output-1.pdf b/docs/eda/eda_files/figure-pdf/cell-62-output-1.pdf
index 4b1bc989..d77fee34 100644
Binary files a/docs/eda/eda_files/figure-pdf/cell-62-output-1.pdf and b/docs/eda/eda_files/figure-pdf/cell-62-output-1.pdf differ
diff --git a/docs/eda/eda_files/figure-pdf/cell-67-output-1.pdf b/docs/eda/eda_files/figure-pdf/cell-67-output-1.pdf
index 6ab36c4e..1c5e93f6 100644
Binary files a/docs/eda/eda_files/figure-pdf/cell-67-output-1.pdf and b/docs/eda/eda_files/figure-pdf/cell-67-output-1.pdf differ
diff --git a/docs/eda/eda_files/figure-pdf/cell-68-output-1.pdf b/docs/eda/eda_files/figure-pdf/cell-68-output-1.pdf
index 0cfb1b88..fca40590 100644
Binary files a/docs/eda/eda_files/figure-pdf/cell-68-output-1.pdf and b/docs/eda/eda_files/figure-pdf/cell-68-output-1.pdf differ
diff --git a/docs/eda/eda_files/figure-pdf/cell-69-output-1.pdf b/docs/eda/eda_files/figure-pdf/cell-69-output-1.pdf
index 7ab881de..f223381e 100644
Binary files a/docs/eda/eda_files/figure-pdf/cell-69-output-1.pdf and b/docs/eda/eda_files/figure-pdf/cell-69-output-1.pdf differ
diff --git a/docs/eda/eda_files/figure-pdf/cell-71-output-1.pdf b/docs/eda/eda_files/figure-pdf/cell-71-output-1.pdf
index 6cd32a96..423bf946 100644
Binary files a/docs/eda/eda_files/figure-pdf/cell-71-output-1.pdf and b/docs/eda/eda_files/figure-pdf/cell-71-output-1.pdf differ
diff --git a/docs/eda/eda_files/figure-pdf/cell-75-output-1.pdf b/docs/eda/eda_files/figure-pdf/cell-75-output-1.pdf
index 73ec66df..f5f2f74a 100644
Binary files a/docs/eda/eda_files/figure-pdf/cell-75-output-1.pdf and b/docs/eda/eda_files/figure-pdf/cell-75-output-1.pdf differ
diff --git a/docs/eda/eda_files/figure-pdf/cell-76-output-1.pdf b/docs/eda/eda_files/figure-pdf/cell-76-output-1.pdf
index 6d0a13f3..93d2c07b 100644
Binary files a/docs/eda/eda_files/figure-pdf/cell-76-output-1.pdf and b/docs/eda/eda_files/figure-pdf/cell-76-output-1.pdf differ
diff --git a/docs/eda/eda_files/figure-pdf/cell-77-output-1.pdf b/docs/eda/eda_files/figure-pdf/cell-77-output-1.pdf
index 1b779be5..4aef0a64 100644
Binary files a/docs/eda/eda_files/figure-pdf/cell-77-output-1.pdf and b/docs/eda/eda_files/figure-pdf/cell-77-output-1.pdf differ
diff --git a/docs/feature_engineering/feature_engineering.html b/docs/feature_engineering/feature_engineering.html
index ec91550a..d56168e2 100644
--- a/docs/feature_engineering/feature_engineering.html
+++ b/docs/feature_engineering/feature_engineering.html
@@ -375,7 +375,7 @@ <h3 data-number="14.1.1" class="anchored" data-anchor-id="gradient-descent-on-mu
 <p>The function we worked with above was one-dimensional – we were only minimizing the function with respect to a single parameter, <span class="math inline">\(\theta\)</span>. However, models usually have a cost function with multiple parameters that need to be optimized. For example, simple linear regression has 2 parameters: <span class="math display">\[\hat{y} + \theta_0 + \theta_1x\]</span> and multiple linear regression has <span class="math inline">\(p+1\)</span> parameters: <span class="math display">\[\mathbb{Y} = \theta_0 + \theta_1 \Bbb{X}_{:,1} + \theta_2 \Bbb{X}_{:,2} + \cdots + \theta_p \Bbb{X}_{:,p}\]</span></p>
 <p>We’ll need to expand gradient descent so we can update our guesses for all model parameters all in one go.</p>
 <p>With multiple parameters to optimize, we consider a <strong>loss surface</strong>, or the model’s loss for a particular <em>combination</em> of possible parameter values.</p>
-<div id="96b441b7" class="cell" data-execution_count="1">
+<div id="6eaff7ec" class="cell" data-execution_count="1">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> seaborn <span class="im">as</span> sns</span>
@@ -383,7 +383,7 @@ <h3 data-number="14.1.1" class="anchored" data-anchor-id="gradient-descent-on-mu
 <span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a>df <span class="op">=</span> sns.load_dataset(<span class="st">"tips"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 </div>
-<div id="6ad6c642" class="cell" data-execution_count="2">
+<div id="30070c29" class="cell" data-execution_count="2">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> plotly.graph_objects <span class="im">as</span> go</span>
@@ -424,9 +424,9 @@ <h3 data-number="14.1.1" class="anchored" data-anchor-id="gradient-descent-on-mu
 <span id="cb2-36"><a href="#cb2-36" aria-hidden="true" tabindex="-1"></a>fig.show()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 <div class="cell-output cell-output-display">
-<div>                            <div id="75df6a5f-f1f5-4287-ba96-856344f36740" class="plotly-graph-div" style="height:600px; width:800px;"></div>            <script type="text/javascript">                require(["plotly"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("75df6a5f-f1f5-4287-ba96-856344f36740")) {                    Plotly.newPlot(                        "75df6a5f-f1f5-4287-ba96-856344f36740",                        [{"x":[[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0]],"y":[[-0.1,-0.1,-0.1,-0.1,-0.1,-0.1,-0.1,-0.1,-0.1,-0.1],[-0.05000000000000001,-0.05000000000000001,-0.05000000000000001,-0.05000000000000001,-0.05000000000000001,-0.05000000000000001,-0.05000000000000001,-0.05000000000000001,-0.05000000000000001,-0.05000000000000001],[-1.3877787807814457e-17,-1.3877787807814457e-17,-1.3877787807814457e-17,-1.3877787807814457e-17,-1.3877787807814457e-17,-1.3877787807814457e-17,-1.3877787807814457e-17,-1.3877787807814457e-17,-1.3877787807814457e-17,-1.3877787807814457e-17],[0.04999999999999999,0.04999999999999999,0.04999999999999999,0.04999999999999999,0.04999999999999999,0.04999999999999999,0.04999999999999999,0.04999999999999999,0.04999999999999999,0.04999999999999999],[0.09999999999999998,0.09999999999999998,0.09999999999999998,0.09999999999999998,0.09999999999999998,0.09999999999999998,0.09999999999999998,0.09999999999999998,0.09999999999999998,0.09999999999999998],[0.14999999999999997,0.14999999999999997,0.14999999999999997,0.14999999999999997,0.14999999999999997,0.14999999999999997,0.14999999999999997,0.14999999999999997,0.14999999999999997,0.14999999999999997],[0.19999999999999998,0.19999999999999998,0.19999999999999998,0.19999999999999998,0.19999999999999998,0.19999999999999998,0.19999999999999998,0.19999999999999998,0.19999999999999998,0.19999999999999998],[0.24999999999999997,0.24999999999999997,0.24999999999999997,0.24999999999999997,0.24999999999999997,0.24999999999999997,0.24999999999999997,0.24999999999999997,0.24999999999999997,0.24999999999999997],[0.29999999999999993,0.29999999999999993,0.29999999999999993,0.29999999999999993,0.29999999999999993,0.29999999999999993,0.29999999999999993,0.29999999999999993,0.29999999999999993,0.29999999999999993],[0.35,0.35,0.35,0.35,0.35,0.35,0.35,0.35,0.35,0.35]],"z":[[29.123031979508195,26.96047116185995,24.896675776310463,22.931645822859743,21.065381301507795,19.297882212254603,17.629148555100183,16.059180330044526,14.587977537087637,13.21554017622951],[18.833628650614756,17.11075544680986,15.486647675103727,13.961305335496359,12.534728427987758,11.20691695257792,9.97787090926685,8.847590298054545,7.816075118941006,6.883325371926231],[10.896283606557377,9.613098016595831,8.428677858733053,7.343023132969036,6.356133839303786,5.468009977737301,4.678651548269582,3.9880585509006288,3.3962309856304405,2.903168852459017],[5.310996847336067,4.467498871217872,3.722766327198442,3.0767992152777786,2.5295975354558795,2.0811612877327472,1.7314904721083795,1.4805850885827772,1.3284451371559403,1.275070617827869],[2.0777683729508207,1.6739580106759773,1.3689130804998992,1.1626335824225869,1.05511951644404,1.0463708825642581,1.1363876807832418,1.325169911100991,1.6127175735175057,1.9990306680327863],[1.196598183401639,1.232475434970147,1.3671181186374208,1.6005262344034599,1.9326997822682639,2.3636387622318336,2.8933431742941687,3.521813018455269,4.249048294715135,5.075049003073768],[2.667486278688523,3.1430511441003834,3.717381441611009,4.390477171220399,5.162338332928555,6.032964926735477,7.002356952641165,8.070514410645615,9.237437300748834,10.503125622950817],[6.490432658811471,7.405685138066682,8.419703049420658,9.532486392873402,10.74403516842491,12.054349376075182,13.463429015824218,14.971274087672022,16.57788459161859,18.283260527663924],[12.665437323770481,14.020377416869042,15.474082942066373,17.02655389936246,18.677790288757322,20.427792110250945,22.27655936384334,24.224092049534487,26.27039016732441,28.4154537172131],[21.19250027356557,22.98712798050748,24.880521119548167,26.87267969068761,28.96360369392582,31.153293129262796,33.44174799669854,35.828968296233036,38.314954027866314,40.89970519159835]],"type":"surface"},{"marker":{"color":"red","size":10},"name":"Optimal Point","x":[1.1111111111111112],"y":[0.09999999999999998],"z":[1.0463708825642581],"type":"scatter3d"}],                        {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"},"margin":{"b":0,"l":0,"r":0,"t":30}}},"scene":{"xaxis":{"title":{"text":"theta0"}},"yaxis":{"title":{"text":"theta1"}},"zaxis":{"title":{"text":"MSE"}}},"autosize":false,"width":800,"height":600},                        {"responsive": true}                    ).then(function(){
+<div>                            <div id="14103023-3fd0-4e99-9db6-d7bcbb2c3570" class="plotly-graph-div" style="height:600px; width:800px;"></div>            <script type="text/javascript">                require(["plotly"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("14103023-3fd0-4e99-9db6-d7bcbb2c3570")) {                    Plotly.newPlot(                        "14103023-3fd0-4e99-9db6-d7bcbb2c3570",                        [{"x":[[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0]],"y":[[-0.1,-0.1,-0.1,-0.1,-0.1,-0.1,-0.1,-0.1,-0.1,-0.1],[-0.05000000000000001,-0.05000000000000001,-0.05000000000000001,-0.05000000000000001,-0.05000000000000001,-0.05000000000000001,-0.05000000000000001,-0.05000000000000001,-0.05000000000000001,-0.05000000000000001],[-1.3877787807814457e-17,-1.3877787807814457e-17,-1.3877787807814457e-17,-1.3877787807814457e-17,-1.3877787807814457e-17,-1.3877787807814457e-17,-1.3877787807814457e-17,-1.3877787807814457e-17,-1.3877787807814457e-17,-1.3877787807814457e-17],[0.04999999999999999,0.04999999999999999,0.04999999999999999,0.04999999999999999,0.04999999999999999,0.04999999999999999,0.04999999999999999,0.04999999999999999,0.04999999999999999,0.04999999999999999],[0.09999999999999998,0.09999999999999998,0.09999999999999998,0.09999999999999998,0.09999999999999998,0.09999999999999998,0.09999999999999998,0.09999999999999998,0.09999999999999998,0.09999999999999998],[0.14999999999999997,0.14999999999999997,0.14999999999999997,0.14999999999999997,0.14999999999999997,0.14999999999999997,0.14999999999999997,0.14999999999999997,0.14999999999999997,0.14999999999999997],[0.19999999999999998,0.19999999999999998,0.19999999999999998,0.19999999999999998,0.19999999999999998,0.19999999999999998,0.19999999999999998,0.19999999999999998,0.19999999999999998,0.19999999999999998],[0.24999999999999997,0.24999999999999997,0.24999999999999997,0.24999999999999997,0.24999999999999997,0.24999999999999997,0.24999999999999997,0.24999999999999997,0.24999999999999997,0.24999999999999997],[0.29999999999999993,0.29999999999999993,0.29999999999999993,0.29999999999999993,0.29999999999999993,0.29999999999999993,0.29999999999999993,0.29999999999999993,0.29999999999999993,0.29999999999999993],[0.35,0.35,0.35,0.35,0.35,0.35,0.35,0.35,0.35,0.35]],"z":[[29.123031979508195,26.96047116185995,24.896675776310463,22.931645822859743,21.065381301507795,19.297882212254603,17.629148555100183,16.059180330044526,14.587977537087637,13.21554017622951],[18.833628650614756,17.11075544680986,15.486647675103727,13.961305335496359,12.534728427987758,11.20691695257792,9.97787090926685,8.847590298054545,7.816075118941006,6.883325371926231],[10.896283606557377,9.613098016595831,8.428677858733053,7.343023132969036,6.356133839303786,5.468009977737301,4.678651548269582,3.9880585509006288,3.3962309856304405,2.903168852459017],[5.310996847336067,4.467498871217872,3.722766327198442,3.0767992152777786,2.5295975354558795,2.0811612877327472,1.7314904721083795,1.4805850885827772,1.3284451371559403,1.275070617827869],[2.0777683729508207,1.6739580106759773,1.3689130804998992,1.1626335824225869,1.05511951644404,1.0463708825642581,1.1363876807832418,1.325169911100991,1.6127175735175057,1.9990306680327863],[1.196598183401639,1.232475434970147,1.3671181186374208,1.6005262344034599,1.9326997822682639,2.3636387622318336,2.8933431742941687,3.521813018455269,4.249048294715135,5.075049003073768],[2.667486278688523,3.1430511441003834,3.717381441611009,4.390477171220399,5.162338332928555,6.032964926735477,7.002356952641165,8.070514410645615,9.237437300748834,10.503125622950817],[6.490432658811471,7.405685138066682,8.419703049420658,9.532486392873402,10.74403516842491,12.054349376075182,13.463429015824218,14.971274087672022,16.57788459161859,18.283260527663924],[12.665437323770481,14.020377416869042,15.474082942066373,17.02655389936246,18.677790288757322,20.427792110250945,22.27655936384334,24.224092049534487,26.27039016732441,28.4154537172131],[21.19250027356557,22.98712798050748,24.880521119548167,26.87267969068761,28.96360369392582,31.153293129262796,33.44174799669854,35.828968296233036,38.314954027866314,40.89970519159835]],"type":"surface"},{"marker":{"color":"red","size":10},"name":"Optimal Point","x":[1.1111111111111112],"y":[0.09999999999999998],"z":[1.0463708825642581],"type":"scatter3d"}],                        {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"},"margin":{"b":0,"l":0,"r":0,"t":30}}},"scene":{"xaxis":{"title":{"text":"theta0"}},"yaxis":{"title":{"text":"theta1"}},"zaxis":{"title":{"text":"MSE"}}},"autosize":false,"width":800,"height":600},                        {"responsive": true}                    ).then(function(){
                             
-var gd = document.getElementById('75df6a5f-f1f5-4287-ba96-856344f36740');
+var gd = document.getElementById('14103023-3fd0-4e99-9db6-d7bcbb2c3570');
 var x = new MutationObserver(function (mutations, observer) {{
         var display = window.getComputedStyle(gd).display;
         if (!display || display === 'none') {{
@@ -452,7 +452,7 @@ <h3 data-number="14.1.1" class="anchored" data-anchor-id="gradient-descent-on-mu
 </div>
 </div>
 <p>We can also visualize a bird’s-eye view of the loss surface from above using a contour plot:</p>
-<div id="99b379e7" class="cell" data-execution_count="3">
+<div id="64e7edc6" class="cell" data-execution_count="3">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a>contour <span class="op">=</span> go.Contour(x<span class="op">=</span>u[<span class="dv">0</span>], y<span class="op">=</span>v[:, <span class="dv">0</span>], z<span class="op">=</span>np.reshape(MSE, u.shape))</span>
@@ -464,9 +464,9 @@ <h3 data-number="14.1.1" class="anchored" data-anchor-id="gradient-descent-on-mu
 <span id="cb3-7"><a href="#cb3-7" aria-hidden="true" tabindex="-1"></a>fig.show()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 <div class="cell-output cell-output-display">
-<div>                            <div id="f522e8ce-a8f6-4b3f-93de-966c2265d1e7" class="plotly-graph-div" style="height:600px; width:800px;"></div>            <script type="text/javascript">                require(["plotly"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("f522e8ce-a8f6-4b3f-93de-966c2265d1e7")) {                    Plotly.newPlot(                        "f522e8ce-a8f6-4b3f-93de-966c2265d1e7",                        [{"x":[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],"y":[-0.1,-0.05000000000000001,-1.3877787807814457e-17,0.04999999999999999,0.09999999999999998,0.14999999999999997,0.19999999999999998,0.24999999999999997,0.29999999999999993,0.35],"z":[[29.123031979508195,26.96047116185995,24.896675776310463,22.931645822859743,21.065381301507795,19.297882212254603,17.629148555100183,16.059180330044526,14.587977537087637,13.21554017622951],[18.833628650614756,17.11075544680986,15.486647675103727,13.961305335496359,12.534728427987758,11.20691695257792,9.97787090926685,8.847590298054545,7.816075118941006,6.883325371926231],[10.896283606557377,9.613098016595831,8.428677858733053,7.343023132969036,6.356133839303786,5.468009977737301,4.678651548269582,3.9880585509006288,3.3962309856304405,2.903168852459017],[5.310996847336067,4.467498871217872,3.722766327198442,3.0767992152777786,2.5295975354558795,2.0811612877327472,1.7314904721083795,1.4805850885827772,1.3284451371559403,1.275070617827869],[2.0777683729508207,1.6739580106759773,1.3689130804998992,1.1626335824225869,1.05511951644404,1.0463708825642581,1.1363876807832418,1.325169911100991,1.6127175735175057,1.9990306680327863],[1.196598183401639,1.232475434970147,1.3671181186374208,1.6005262344034599,1.9326997822682639,2.3636387622318336,2.8933431742941687,3.521813018455269,4.249048294715135,5.075049003073768],[2.667486278688523,3.1430511441003834,3.717381441611009,4.390477171220399,5.162338332928555,6.032964926735477,7.002356952641165,8.070514410645615,9.237437300748834,10.503125622950817],[6.490432658811471,7.405685138066682,8.419703049420658,9.532486392873402,10.74403516842491,12.054349376075182,13.463429015824218,14.971274087672022,16.57788459161859,18.283260527663924],[12.665437323770481,14.020377416869042,15.474082942066373,17.02655389936246,18.677790288757322,20.427792110250945,22.27655936384334,24.224092049534487,26.27039016732441,28.4154537172131],[21.19250027356557,22.98712798050748,24.880521119548167,26.87267969068761,28.96360369392582,31.153293129262796,33.44174799669854,35.828968296233036,38.314954027866314,40.89970519159835]],"type":"contour"}],                        {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"},"margin":{"b":0,"l":0,"r":0,"t":30}}},"xaxis":{"title":{"text":"theta0"}},"yaxis":{"title":{"text":"theta1"}},"autosize":false,"width":800,"height":600},                        {"responsive": true}                    ).then(function(){
+<div>                            <div id="c53988b9-24ba-4a4c-a623-eb52d242c984" class="plotly-graph-div" style="height:600px; width:800px;"></div>            <script type="text/javascript">                require(["plotly"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("c53988b9-24ba-4a4c-a623-eb52d242c984")) {                    Plotly.newPlot(                        "c53988b9-24ba-4a4c-a623-eb52d242c984",                        [{"x":[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],"y":[-0.1,-0.05000000000000001,-1.3877787807814457e-17,0.04999999999999999,0.09999999999999998,0.14999999999999997,0.19999999999999998,0.24999999999999997,0.29999999999999993,0.35],"z":[[29.123031979508195,26.96047116185995,24.896675776310463,22.931645822859743,21.065381301507795,19.297882212254603,17.629148555100183,16.059180330044526,14.587977537087637,13.21554017622951],[18.833628650614756,17.11075544680986,15.486647675103727,13.961305335496359,12.534728427987758,11.20691695257792,9.97787090926685,8.847590298054545,7.816075118941006,6.883325371926231],[10.896283606557377,9.613098016595831,8.428677858733053,7.343023132969036,6.356133839303786,5.468009977737301,4.678651548269582,3.9880585509006288,3.3962309856304405,2.903168852459017],[5.310996847336067,4.467498871217872,3.722766327198442,3.0767992152777786,2.5295975354558795,2.0811612877327472,1.7314904721083795,1.4805850885827772,1.3284451371559403,1.275070617827869],[2.0777683729508207,1.6739580106759773,1.3689130804998992,1.1626335824225869,1.05511951644404,1.0463708825642581,1.1363876807832418,1.325169911100991,1.6127175735175057,1.9990306680327863],[1.196598183401639,1.232475434970147,1.3671181186374208,1.6005262344034599,1.9326997822682639,2.3636387622318336,2.8933431742941687,3.521813018455269,4.249048294715135,5.075049003073768],[2.667486278688523,3.1430511441003834,3.717381441611009,4.390477171220399,5.162338332928555,6.032964926735477,7.002356952641165,8.070514410645615,9.237437300748834,10.503125622950817],[6.490432658811471,7.405685138066682,8.419703049420658,9.532486392873402,10.74403516842491,12.054349376075182,13.463429015824218,14.971274087672022,16.57788459161859,18.283260527663924],[12.665437323770481,14.020377416869042,15.474082942066373,17.02655389936246,18.677790288757322,20.427792110250945,22.27655936384334,24.224092049534487,26.27039016732441,28.4154537172131],[21.19250027356557,22.98712798050748,24.880521119548167,26.87267969068761,28.96360369392582,31.153293129262796,33.44174799669854,35.828968296233036,38.314954027866314,40.89970519159835]],"type":"contour"}],                        {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"},"margin":{"b":0,"l":0,"r":0,"t":30}}},"xaxis":{"title":{"text":"theta0"}},"yaxis":{"title":{"text":"theta1"}},"autosize":false,"width":800,"height":600},                        {"responsive": true}                    ).then(function(){
                             
-var gd = document.getElementById('f522e8ce-a8f6-4b3f-93de-966c2265d1e7');
+var gd = document.getElementById('c53988b9-24ba-4a4c-a623-eb52d242c984');
 var x = new MutationObserver(function (mutations, observer) {{
         var display = window.getComputedStyle(gd).display;
         if (!display || display === 'none') {{
@@ -681,7 +681,7 @@ <h2 data-number="14.4" class="anchored" data-anchor-id="one-hot-encoding"><span
 <p>Feature engineering opens up a whole new set of possibilities for designing better-performing models. As you will see in lab and homework, feature engineering is one of the most important parts of the entire modeling process.</p>
 <p>A particularly powerful use of feature engineering is to allow us to perform regression on <em>non-numeric</em> features. <strong>One-hot encoding</strong> is a feature engineering technique that generates numeric features from categorical data, allowing us to use our usual methods to fit a regression model on the data.</p>
 <p>To illustrate how this works, we’ll refer back to the <code>tips</code> dataset from previous lectures. Consider the <code>"day"</code> column of the dataset:</p>
-<div id="9c1c95c0" class="cell" data-execution_count="4">
+<div id="4678063a" class="cell" data-execution_count="4">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
@@ -781,7 +781,7 @@ <h2 data-number="14.4" class="anchored" data-anchor-id="one-hot-encoding"><span
 </li>
 </ul>
 <p>The <code>OneHotEncoder</code> class of <code>sklearn</code> (<a href="https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.OneHotEncoder.html#sklearn.preprocessing.OneHotEncoder.get_feature_names_out">documentation</a>) offers a quick way to perform this one-hot encoding. You will explore its use in detail in the lab. For now, recognize that we follow a very similar workflow to when we were working with the <code>LinearRegression</code> class: we initialize a <code>OneHotEncoder</code> object, fit it to our data, and finally use <code>.transform</code> to apply the fitted encoder.</p>
-<div id="4bcaf802" class="cell" data-execution_count="5">
+<div id="7c575f89" class="cell" data-execution_count="5">
 <div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> sklearn.preprocessing <span class="im">import</span> OneHotEncoder</span>
 <span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a><span class="co"># Initialize a OneHotEncoder object</span></span>
@@ -860,7 +860,7 @@ <h2 data-number="14.4" class="anchored" data-anchor-id="one-hot-encoding"><span
 <p><span class="math display">\[\hat{y} = \theta_{1}\phi_{1} + \theta_{2}\phi_{2} + \theta_{3}\phi_{3} + \theta_{4}\phi_{4} + \theta_{5}\phi_{5} + \theta_{6}\phi_{6}\]</span></p>
 <p>Now, the <code>day</code> feature (or rather, the four new boolean features that represent day) can be used to fit a model.</p>
 <p>Using <code>sklearn</code> to fit the new model, we can determine the model coefficients, allowing us to understand how each feature impacts the predicted tip.</p>
-<div id="2856aa04" class="cell" data-execution_count="6">
+<div id="ce1045db" class="cell" data-execution_count="6">
 <div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> sklearn.linear_model <span class="im">import</span> LinearRegression</span>
 <span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a>data_w_ohe <span class="op">=</span> tips[[<span class="st">"total_bill"</span>, <span class="st">"size"</span>, <span class="st">"day"</span>]].join(encoded_day_df).drop(columns <span class="op">=</span> <span class="st">"day"</span>)</span>
 <span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a>ohe_model <span class="op">=</span> lm.LinearRegression(fit_intercept<span class="op">=</span><span class="va">False</span>) <span class="co">#Tell sklearn to not add an additional bias column. Why?</span></span>
@@ -933,7 +933,7 @@ <h2 data-number="14.5" class="anchored" data-anchor-id="polynomial-features"><sp
 <p>We have encountered a few cases now where models with linear features have performed poorly on datasets that show clear non-linear curvature.</p>
 <p>As an example, consider the <code>vehicles</code> dataset, which contains information about cars. Suppose we want to use the <code>hp</code> (horsepower) of a car to predict its <code>"mpg"</code> (gas mileage in miles per gallon). If we visualize the relationship between these two variables, we see a non-linear curvature. Fitting a linear model to these variables results in a high (poor) value of RMSE.</p>
 <p><span class="math display">\[\hat{y} = \theta_0 + \theta_1 (\text{hp})\]</span></p>
-<div id="0ce8c48c" class="cell" data-execution_count="7">
+<div id="f85edaf1" class="cell" data-execution_count="7">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a>pd.options.mode.chained_assignment <span class="op">=</span> <span class="va">None</span> </span>
@@ -967,7 +967,7 @@ <h2 data-number="14.5" class="anchored" data-anchor-id="polynomial-features"><sp
 <p>As we can see from the plot, the data follows a curved line rather than a straight one. To capture this non-linearity, we can incorporate <strong>non-linear</strong> features. Let’s introduce a <strong>polynomial</strong> term, <span class="math inline">\(\text{hp}^2\)</span>, into our regression model. The model now takes the form:</p>
 <p><span class="math display">\[\hat{y} = \theta_0 + \theta_1 (\text{hp}) + \theta_2 (\text{hp}^2)\]</span> <span class="math display">\[\hat{y} = \theta_0 + \theta_1 \phi_1 + \theta_2 \phi_2\]</span></p>
 <p>How can we fit a model with non-linear features? We can use the exact same techniques as before: ordinary least squares, gradient descent, or <code>sklearn</code>. This is because our new model is still a <strong>linear model</strong>. Although it contains non-linear <em>features</em>, it is linear with respect to the model <em>parameters</em>. All of our previous work on fitting models was done under the assumption that we were working with linear models. Because our new model is still linear, we can apply our existing methods to determine the optimal parameters.</p>
-<div id="6be12136" class="cell" data-execution_count="8">
+<div id="ef9a9cb0" class="cell" data-execution_count="8">
 <div class="sourceCode cell-code" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Add a hp^2 feature to the design matrix</span></span>
 <span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a>X <span class="op">=</span> vehicles[[<span class="st">"hp"</span>]]</span>
 <span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a>X[<span class="st">"hp^2"</span>] <span class="op">=</span> vehicles[<span class="st">"hp"</span>]<span class="op">**</span><span class="dv">2</span></span>
diff --git a/docs/feature_engineering/feature_engineering_files/figure-pdf/cell-8-output-2.pdf b/docs/feature_engineering/feature_engineering_files/figure-pdf/cell-8-output-2.pdf
index 8712213a..e3df58f0 100644
Binary files a/docs/feature_engineering/feature_engineering_files/figure-pdf/cell-8-output-2.pdf and b/docs/feature_engineering/feature_engineering_files/figure-pdf/cell-8-output-2.pdf differ
diff --git a/docs/feature_engineering/feature_engineering_files/figure-pdf/cell-9-output-2.pdf b/docs/feature_engineering/feature_engineering_files/figure-pdf/cell-9-output-2.pdf
index 050c0d91..2c4c0554 100644
Binary files a/docs/feature_engineering/feature_engineering_files/figure-pdf/cell-9-output-2.pdf and b/docs/feature_engineering/feature_engineering_files/figure-pdf/cell-9-output-2.pdf differ
diff --git a/docs/gradient_descent/gradient_descent.html b/docs/gradient_descent/gradient_descent.html
index ea245df1..39cfacc7 100644
--- a/docs/gradient_descent/gradient_descent.html
+++ b/docs/gradient_descent/gradient_descent.html
@@ -300,25 +300,34 @@
     <h2 id="toc-title">sklearn and Gradient Descent</h2>
    
   <ul>
-  <li><a href="#sklearn" id="toc-sklearn" class="nav-link active" data-scroll-target="#sklearn"><span class="header-section-number">13.1</span> <code>sklearn</code></a>
+  <li><a href="#ols-recap" id="toc-ols-recap" class="nav-link active" data-scroll-target="#ols-recap"><span class="header-section-number">13.1</span> OLS Recap</a>
   <ul>
-  <li><a href="#implementing-derived-formulas-in-code" id="toc-implementing-derived-formulas-in-code" class="nav-link" data-scroll-target="#implementing-derived-formulas-in-code"><span class="header-section-number">13.1.1</span> Implementing Derived Formulas in Code</a></li>
-  <li><a href="#the-sklearn-workflow" id="toc-the-sklearn-workflow" class="nav-link" data-scroll-target="#the-sklearn-workflow"><span class="header-section-number">13.1.2</span> The <code>sklearn</code> Workflow</a></li>
+  <li><a href="#choose-a-model" id="toc-choose-a-model" class="nav-link" data-scroll-target="#choose-a-model"><span class="header-section-number">13.1.1</span> 1. Choose a model</a></li>
+  <li><a href="#choose-a-loss-function" id="toc-choose-a-loss-function" class="nav-link" data-scroll-target="#choose-a-loss-function"><span class="header-section-number">13.1.2</span> 2. Choose a loss function</a></li>
+  <li><a href="#fit-the-model" id="toc-fit-the-model" class="nav-link" data-scroll-target="#fit-the-model"><span class="header-section-number">13.1.3</span> 3. Fit the model</a>
+  <ul>
+  <li><a href="#uniqueness-of-a-solution" id="toc-uniqueness-of-a-solution" class="nav-link" data-scroll-target="#uniqueness-of-a-solution"><span class="header-section-number">13.1.3.1</span> Uniqueness of a Solution</a></li>
+  </ul></li>
+  </ul></li>
+  <li><a href="#sklearn" id="toc-sklearn" class="nav-link" data-scroll-target="#sklearn"><span class="header-section-number">13.2</span> <code>sklearn</code></a>
+  <ul>
+  <li><a href="#implementing-derived-formulas-in-code" id="toc-implementing-derived-formulas-in-code" class="nav-link" data-scroll-target="#implementing-derived-formulas-in-code"><span class="header-section-number">13.2.1</span> Implementing Derived Formulas in Code</a></li>
+  <li><a href="#the-sklearn-workflow" id="toc-the-sklearn-workflow" class="nav-link" data-scroll-target="#the-sklearn-workflow"><span class="header-section-number">13.2.2</span> The <code>sklearn</code> Workflow</a></li>
   </ul></li>
-  <li><a href="#gradient-descent" id="toc-gradient-descent" class="nav-link" data-scroll-target="#gradient-descent"><span class="header-section-number">13.2</span> Gradient Descent</a>
+  <li><a href="#gradient-descent" id="toc-gradient-descent" class="nav-link" data-scroll-target="#gradient-descent"><span class="header-section-number">13.3</span> Gradient Descent</a>
   <ul>
-  <li><a href="#minimizing-an-arbitrary-1d-function" id="toc-minimizing-an-arbitrary-1d-function" class="nav-link" data-scroll-target="#minimizing-an-arbitrary-1d-function"><span class="header-section-number">13.2.1</span> Minimizing an Arbitrary 1D Function</a>
+  <li><a href="#minimizing-an-arbitrary-1d-function" id="toc-minimizing-an-arbitrary-1d-function" class="nav-link" data-scroll-target="#minimizing-an-arbitrary-1d-function"><span class="header-section-number">13.3.1</span> Minimizing an Arbitrary 1D Function</a>
   <ul>
-  <li><a href="#the-naive-approach-guess-and-check" id="toc-the-naive-approach-guess-and-check" class="nav-link" data-scroll-target="#the-naive-approach-guess-and-check"><span class="header-section-number">13.2.1.1</span> The Naive Approach: Guess and Check</a></li>
-  <li><a href="#scipy.optimize.minimize" id="toc-scipy.optimize.minimize" class="nav-link" data-scroll-target="#scipy.optimize.minimize"><span class="header-section-number">13.2.1.2</span> <code>Scipy.optimize.minimize</code></a></li>
-  <li><a href="#digging-into-gradient-descent" id="toc-digging-into-gradient-descent" class="nav-link" data-scroll-target="#digging-into-gradient-descent"><span class="header-section-number">13.2.1.3</span> Digging into Gradient Descent</a></li>
-  <li><a href="#algorithm-attempt-1" id="toc-algorithm-attempt-1" class="nav-link" data-scroll-target="#algorithm-attempt-1"><span class="header-section-number">13.2.1.4</span> Algorithm Attempt 1</a></li>
-  <li><a href="#algorithm-attempt-2" id="toc-algorithm-attempt-2" class="nav-link" data-scroll-target="#algorithm-attempt-2"><span class="header-section-number">13.2.1.5</span> Algorithm Attempt 2</a></li>
+  <li><a href="#the-naive-approach-guess-and-check" id="toc-the-naive-approach-guess-and-check" class="nav-link" data-scroll-target="#the-naive-approach-guess-and-check"><span class="header-section-number">13.3.1.1</span> The Naive Approach: Guess and Check</a></li>
+  <li><a href="#scipy.optimize.minimize" id="toc-scipy.optimize.minimize" class="nav-link" data-scroll-target="#scipy.optimize.minimize"><span class="header-section-number">13.3.1.2</span> <code>Scipy.optimize.minimize</code></a></li>
+  <li><a href="#digging-into-gradient-descent" id="toc-digging-into-gradient-descent" class="nav-link" data-scroll-target="#digging-into-gradient-descent"><span class="header-section-number">13.3.1.3</span> Digging into Gradient Descent</a></li>
+  <li><a href="#algorithm-attempt-1" id="toc-algorithm-attempt-1" class="nav-link" data-scroll-target="#algorithm-attempt-1"><span class="header-section-number">13.3.1.4</span> Algorithm Attempt 1</a></li>
+  <li><a href="#algorithm-attempt-2" id="toc-algorithm-attempt-2" class="nav-link" data-scroll-target="#algorithm-attempt-2"><span class="header-section-number">13.3.1.5</span> Algorithm Attempt 2</a></li>
   </ul></li>
-  <li><a href="#convexity" id="toc-convexity" class="nav-link" data-scroll-target="#convexity"><span class="header-section-number">13.2.2</span> Convexity</a></li>
-  <li><a href="#gradient-descent-in-1-dimension" id="toc-gradient-descent-in-1-dimension" class="nav-link" data-scroll-target="#gradient-descent-in-1-dimension"><span class="header-section-number">13.2.3</span> Gradient Descent in 1 Dimension</a>
+  <li><a href="#convexity" id="toc-convexity" class="nav-link" data-scroll-target="#convexity"><span class="header-section-number">13.3.2</span> Convexity</a></li>
+  <li><a href="#gradient-descent-in-1-dimension" id="toc-gradient-descent-in-1-dimension" class="nav-link" data-scroll-target="#gradient-descent-in-1-dimension"><span class="header-section-number">13.3.3</span> Gradient Descent in 1 Dimension</a>
   <ul>
-  <li><a href="#gradient-descent-on-the-tips-dataset" id="toc-gradient-descent-on-the-tips-dataset" class="nav-link" data-scroll-target="#gradient-descent-on-the-tips-dataset"><span class="header-section-number">13.2.3.1</span> Gradient Descent on the <code>tips</code> Dataset</a></li>
+  <li><a href="#gradient-descent-on-the-tips-dataset" id="toc-gradient-descent-on-the-tips-dataset" class="nav-link" data-scroll-target="#gradient-descent-on-the-tips-dataset"><span class="header-section-number">13.3.3.1</span> Gradient Descent on the <code>tips</code> Dataset</a></li>
   </ul></li>
   </ul></li>
   </ul>
@@ -367,7 +376,7 @@ <h2 id="toc-title">sklearn and Gradient Descent</h2>
 </div>
 </div>
 </div>
-<div id="2d2af05e" class="cell" data-execution_count="1">
+<div id="011cac7c" class="cell" data-execution_count="1">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span>
@@ -379,12 +388,146 @@ <h2 id="toc-title">sklearn and Gradient Descent</h2>
 <span id="cb1-7"><a href="#cb1-7" aria-hidden="true" tabindex="-1"></a>pd.options.mode.chained_assignment <span class="op">=</span> <span class="va">None</span>  <span class="co"># default='warn'</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 </div>
-<section id="sklearn" class="level2" data-number="13.1">
-<h2 data-number="13.1" class="anchored" data-anchor-id="sklearn"><span class="header-section-number">13.1</span> <code>sklearn</code></h2>
-<section id="implementing-derived-formulas-in-code" class="level3" data-number="13.1.1">
-<h3 data-number="13.1.1" class="anchored" data-anchor-id="implementing-derived-formulas-in-code"><span class="header-section-number">13.1.1</span> Implementing Derived Formulas in Code</h3>
+<section id="ols-recap" class="level2" data-number="13.1">
+<h2 data-number="13.1" class="anchored" data-anchor-id="ols-recap"><span class="header-section-number">13.1</span> OLS Recap</h2>
+<section id="choose-a-model" class="level3" data-number="13.1.1">
+<h3 data-number="13.1.1" class="anchored" data-anchor-id="choose-a-model"><span class="header-section-number">13.1.1</span> 1. Choose a model</h3>
+<p>Recall that when using multiple linear regression, we can generate a prediction for each of our <span class="math inline">\(n\)</span> data points:</p>
+<p><span class="math display">\[\hat{y} =\theta_{0} + \theta_{1}x_{1} + \theta_{2}x_{2} + ... + \theta_{p}x_{p}\]</span></p>
+<div data-align="middle">
+<table style="width:100%">
+<tbody><tr align="center">
+<td>
+<img src="images/ols_matrices_old.png" alt="ols_matrices_old" width="600">
+</td>
+</tr>
+</tbody></table>
+</div>
+<p>In the previous lecture, we used p+1 features to account for the intercept, <span class="math inline">\(\theta_0\)</span>. This makes slides and notation messy.<br>
+Let’s redefine <strong>p as the number of columns in our covariate matrix</strong> and <strong>add a column of 1s</strong> to encode the intercept (if desired). If we choose to add a column of 1s, then <span class="math inline">\(x_1\)</span> can be a 1 for every data point.</p>
+<p><span class="math display">\[\hat{y} =\theta_{1}x_{1} + \theta_{2}x_{2} + ... + \theta_{p}x_{p}\]</span></p>
+<div data-align="middle">
+<table style="width:100%">
+<tbody><tr align="center">
+<td>
+<img src="images/ols_matrices_new.png" alt="ols_matrices_new" width="600">
+</td>
+</tr>
+</tbody></table>
+</div>
+</section>
+<section id="choose-a-loss-function" class="level3" data-number="13.1.2">
+<h3 data-number="13.1.2" class="anchored" data-anchor-id="choose-a-loss-function"><span class="header-section-number">13.1.2</span> 2. Choose a loss function</h3>
+<p>Recall that we then choose the mean squared error loss function shown below where the prediction vector <span class="math inline">\(\hat{\mathbb{Y}}\)</span> depends on <span class="math inline">\(\theta\)</span>. <span class="math display">\[R(\theta) = \frac{1}{n} \sum_{i=1}^n (y_i - \hat{y}_i)^2 = \frac{1}{n} (||\mathbb{Y} - \hat{\mathbb{Y}}||_2)^2\]</span></p>
+</section>
+<section id="fit-the-model" class="level3" data-number="13.1.3">
+<h3 data-number="13.1.3" class="anchored" data-anchor-id="fit-the-model"><span class="header-section-number">13.1.3</span> 3. Fit the model</h3>
+<p>We can then minimize the average loss with calculus or geometry. See the previous lecture for a derivation on the Normal Equation (<span class="math inline">\(\mathbb{X}^T \mathbb{X} \hat{\theta} = \mathbb{X}^T \mathbb{Y}\)</span>) using geometry. We can see what the matrices look like with our new interpretation where <span class="math inline">\(\mathbb{X}\)</span> is now an <span class="math inline">\(n\)</span> by <span class="math inline">\(p\)</span> matrix instead of an <span class="math inline">\(n\)</span> by <span class="math inline">\(p+1\)</span> matrix.</p>
+<div data-align="middle">
+<table style="width:100%">
+<tbody><tr align="center">
+<td>
+<img src="images/ols_solution_matrices.png" alt="ols_solution_matrices" width="400">
+</td>
+</tr>
+</tbody></table>
+</div>
+<p>To summarize:</p>
+<table class="table">
+<colgroup>
+<col style="width: 25%">
+<col style="width: 25%">
+<col style="width: 25%">
+<col style="width: 25%">
+</colgroup>
+<thead>
+<tr class="header">
+<th></th>
+<th>Model</th>
+<th>Estimate</th>
+<th>Unique?</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td>Constant Model + MSE</td>
+<td><span class="math inline">\(\hat{y} = \theta_0\)</span></td>
+<td><span class="math inline">\(\hat{\theta}_0 = mean(y) = \bar{y}\)</span></td>
+<td><strong>Yes</strong>. Any set of values has a unique mean.</td>
+</tr>
+<tr class="even">
+<td>Constant Model + MAE</td>
+<td><span class="math inline">\(\hat{y} = \theta_0\)</span></td>
+<td><span class="math inline">\(\hat{\theta}_0 = median(y)\)</span></td>
+<td><strong>Yes</strong>, if odd. <strong>No</strong>, if even. Return the average of the middle 2 values.</td>
+</tr>
+<tr class="odd">
+<td>Simple Linear Regression + MSE</td>
+<td><span class="math inline">\(\hat{y} = \theta_0 + \theta_1x\)</span></td>
+<td><span class="math inline">\(\hat{\theta}_0 = \bar{y} - \hat{\theta}_1\bar{x}\)</span> <span class="math inline">\(\hat{\theta}_1 = r\frac{\sigma_y}{\sigma_x}\)</span></td>
+<td><strong>Yes</strong>. Any set of non-constant* values has a unique mean, SD, and correlation coefficient.</td>
+</tr>
+<tr class="even">
+<td><strong>OLS</strong> (Linear Model + MSE)</td>
+<td><span class="math inline">\(\mathbb{\hat{Y}} = \mathbb{X}\mathbb{\theta}\)</span></td>
+<td><span class="math inline">\(\hat{\theta} = (\mathbb{X}^T\mathbb{X})^{-1}\mathbb{X}^T\mathbb{Y}\)</span></td>
+<td><strong>Yes</strong>, if <span class="math inline">\(\mathbb{X}\)</span> is full column rank (all columns are linearly independent, # of datapoints &gt;&gt;&gt; # of features).</td>
+</tr>
+</tbody>
+</table>
+<section id="uniqueness-of-a-solution" class="level4" data-number="13.1.3.1">
+<h4 data-number="13.1.3.1" class="anchored" data-anchor-id="uniqueness-of-a-solution"><span class="header-section-number">13.1.3.1</span> Uniqueness of a Solution</h4>
+<p>In most settings, the number of observations (<span class="math inline">\(n\)</span>) is much greater than the number of features (<span class="math inline">\(p\)</span>). Note that at least one solution always exists because intuitively, we can always draw a line of best fit for a given set of data, but there may be multiple lines that are “equally good”. (Formal proof is beyond this course.) Let’s now revisit the interpretation for uniqueness of a solution at the end of the last lecture, but with the new notation of <span class="math inline">\(p\)</span> instead of <span class="math inline">\(p+1\)</span> features.</p>
+<p>The Least Squares estimate <span class="math inline">\(\hat{\theta}\)</span> is <strong>unique</strong> if and only if <span class="math inline">\(\mathbb{X}\)</span> is <strong>full column rank</strong>.</p>
+<div class="callout callout-style-simple callout-none no-icon">
+<div class="callout-body d-flex">
+<div class="callout-icon-container">
+<i class="callout-icon no-icon"></i>
+</div>
+<div class="callout-body-container">
+<p>Proof:</p>
+<ul>
+<li>We know the solution to the normal equation <span class="math inline">\(\mathbb{X}^T\mathbb{X}\hat{\theta} = \mathbb{X}^T\mathbb{Y}\)</span> is the least square estimate that minimizes the squared loss.</li>
+<li><span class="math inline">\(\hat{\theta}\)</span> has a <strong>unique</strong> solution <span class="math inline">\(\iff\)</span> the square matrix <span class="math inline">\(\mathbb{X}^T\mathbb{X}\)</span> is <strong>invertible</strong> <span class="math inline">\(\iff\)</span> <span class="math inline">\(\mathbb{X}^T\mathbb{X}\)</span> is full rank.
+<ul>
+<li>The <strong>column rank</strong> of a square matrix is the max number of linearly independent columns it contains.</li>
+<li>An <span class="math inline">\(n\)</span> x <span class="math inline">\(n\)</span> square matrix is deemed full column rank when all of its columns are linearly independent. That is, its rank would be equal to <span class="math inline">\(n\)</span>.</li>
+<li><span class="math inline">\(\mathbb{X}^T\mathbb{X}\)</span> has shape <span class="math inline">\(p \times p\)</span>, and therefore has max rank <span class="math inline">\(p\)</span>.</li>
+</ul></li>
+<li><span class="math inline">\(rank(\mathbb{X}^T\mathbb{X})\)</span> = <span class="math inline">\(rank(\mathbb{X})\)</span> (proof out of scope).</li>
+<li>Therefore, <span class="math inline">\(\mathbb{X}^T\mathbb{X}\)</span> has rank <span class="math inline">\(p\)</span> <span class="math inline">\(\iff\)</span> <span class="math inline">\(\mathbb{X}\)</span> has rank <span class="math inline">\(p\)</span> <span class="math inline">\(\iff \mathbb{X}\)</span> is full column rank.</li>
+</ul>
+</div>
+</div>
+</div>
+<p>Therefore, if <span class="math inline">\(\mathbb{X}\)</span> is not full column rank, we will not have unique estimates. This can happen for two major reasons.</p>
+<ol type="1">
+<li>If our design matrix <span class="math inline">\(\mathbb{X}\)</span> is “<strong>wide</strong>”:
+<ul>
+<li>If n &lt; p, then we have way more features (columns) than observations (rows).</li>
+<li>Then <span class="math inline">\(rank(\mathbb{X})\)</span> = min(n, p) &lt; p, so <span class="math inline">\(\hat{\theta}\)</span> is not unique.</li>
+<li>Typically we have n &gt;&gt; p so this is less of an issue.</li>
+</ul></li>
+<li>If our design matrix <span class="math inline">\(\mathbb{X}\)</span> has features that are <strong>linear combinations</strong> of other features:
+<ul>
+<li>By definition, rank of <span class="math inline">\(\mathbb{X}\)</span> is number of linearly independent columns in <span class="math inline">\(\mathbb{X}\)</span>.</li>
+<li>Example: If “Width”, “Height”, and “Perimeter” are all columns,
+<ul>
+<li>Perimeter = 2 * Width + 2 * Height <span class="math inline">\(\rightarrow\)</span> <span class="math inline">\(\mathbb{X}\)</span> is not full rank.</li>
+</ul></li>
+<li>Important with one-hot encoding (to discuss later).</li>
+</ul></li>
+</ol>
+<p>Let’s now explore how to use the normal equations with a real-world dataset in the next section.</p>
+</section>
+</section>
+</section>
+<section id="sklearn" class="level2" data-number="13.2">
+<h2 data-number="13.2" class="anchored" data-anchor-id="sklearn"><span class="header-section-number">13.2</span> <code>sklearn</code></h2>
+<section id="implementing-derived-formulas-in-code" class="level3" data-number="13.2.1">
+<h3 data-number="13.2.1" class="anchored" data-anchor-id="implementing-derived-formulas-in-code"><span class="header-section-number">13.2.1</span> Implementing Derived Formulas in Code</h3>
 <p>Throughout this lecture, we’ll refer to the <code>penguins</code> dataset.</p>
-<div id="52b79134" class="cell" data-execution_count="2">
+<div id="48795f4a" class="cell" data-execution_count="2">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span>
@@ -470,7 +613,7 @@ <h3 data-number="13.1.1" class="anchored" data-anchor-id="implementing-derived-f
 </div>
 </div>
 <p>Our goal will be to predict the value of the <code>"bill_depth_mm"</code> for a particular penguin given its <code>"flipper_length_mm"</code> and <code>"body_mass_g"</code>. We’ll also add a bias column of all ones to represent the intercept term of our models.</p>
-<div id="3e850657" class="cell" data-execution_count="3">
+<div id="f2f948d8" class="cell" data-execution_count="3">
 <div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Add a bias column of all ones to `penguins`</span></span>
 <span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a>penguins[<span class="st">"bias"</span>] <span class="op">=</span> np.ones(<span class="bu">len</span>(penguins), dtype<span class="op">=</span><span class="bu">int</span>) </span>
 <span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a></span>
@@ -494,7 +637,7 @@ <h3 data-number="13.1.1" class="anchored" data-anchor-id="implementing-derived-f
 <li>To compute an inverse, use <code>NumPy</code>’s in-built method <code>np.linalg.inv</code></li>
 </ul>
 <p>Putting this all together, we can compute the OLS estimate for the optimal model parameters, stored in the array <code>theta_hat</code>.</p>
-<div id="07eb7b2b" class="cell" data-execution_count="4">
+<div id="14ae22ed" class="cell" data-execution_count="4">
 <div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a>theta_hat <span class="op">=</span> np.linalg.inv(X.T <span class="op">@</span> X) <span class="op">@</span> X.T <span class="op">@</span> Y</span>
 <span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a>theta_hat</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="4">
@@ -505,7 +648,7 @@ <h3 data-number="13.1.1" class="anchored" data-anchor-id="implementing-derived-f
 </div>
 <p>To make predictions using our optimized parameter values, we matrix-multiply the design matrix with the parameter vector:</p>
 <p><span class="math display">\[\hat{\mathbb{Y}} = \mathbb{X}\theta\]</span></p>
-<div id="b902dbee" class="cell" data-execution_count="5">
+<div id="fda7141e" class="cell" data-execution_count="5">
 <div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a>Y_hat <span class="op">=</span> X <span class="op">@</span> theta_hat</span>
 <span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a>pd.DataFrame(Y_hat).head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="5">
@@ -547,8 +690,8 @@ <h3 data-number="13.1.1" class="anchored" data-anchor-id="implementing-derived-f
 </div>
 </div>
 </section>
-<section id="the-sklearn-workflow" class="level3" data-number="13.1.2">
-<h3 data-number="13.1.2" class="anchored" data-anchor-id="the-sklearn-workflow"><span class="header-section-number">13.1.2</span> The <code>sklearn</code> Workflow</h3>
+<section id="the-sklearn-workflow" class="level3" data-number="13.2.2">
+<h3 data-number="13.2.2" class="anchored" data-anchor-id="the-sklearn-workflow"><span class="header-section-number">13.2.2</span> The <code>sklearn</code> Workflow</h3>
 <p>We’ve already saved a lot of time (and avoided tedious calculations) by translating our derived formulas into code. However, we still had to go through the process of writing out the linear algebra ourselves.</p>
 <p>To make life <em>even easier</em>, we can turn to the <code>sklearn</code> <a href="https://scikit-learn.org/stable/"><code>python</code> library</a>. <code>sklearn</code> is a robust library of machine learning tools used extensively in research and industry. It is the standard for simple machine learning tasks and gives us a wide variety of in-built modeling frameworks and methods, so we’ll keep returning to <code>sklearn</code> techniques as we progress through Data 100.</p>
 <p>Regardless of the specific type of model being implemented, <code>sklearn</code> follows a standard set of steps for creating a model:</p>
@@ -571,14 +714,14 @@ <h3 data-number="13.1.2" class="anchored" data-anchor-id="the-sklearn-workflow">
 <p><code>sklearn</code> stores “templates” of useful models for machine learning. We begin the modeling process by making a “copy” of one of these templates for our own use. Model initialization looks like <code>ModelClass()</code>, where <code>ModelClass</code> is the type of model we wish to create.</p>
 <p>For now, let’s create a linear regression model using <code>LinearRegression</code>.</p>
 <p><code>my_model</code> is now an instance of the <code>LinearRegression</code> class. You can think of it as the “idea” of a linear regression model. We haven’t trained it yet, so it doesn’t know any model parameters and cannot be used to make predictions. In fact, we haven’t even told it what data to use for modeling! It simply waits for further instructions.</p>
-<div id="9945d2d9" class="cell" data-execution_count="6">
+<div id="39eb5fb2" class="cell" data-execution_count="6">
 <div class="sourceCode cell-code" id="cb12"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a>my_model <span class="op">=</span> LinearRegression()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
 <p><strong>2. Train the model using <code>.fit</code></strong></p>
 <p>Before the model can make predictions, we will need to fit it to our training data. When we fit the model, <code>sklearn</code> will run gradient descent behind the scenes to determine the optimal model parameters. It will then save these model parameters to our model instance for future use.</p>
 <p>All <code>sklearn</code> model classes include a <code>.fit</code> method, which is used to fit the model. It takes in two inputs: the design matrix, <code>X</code>, and the target variable, <code>Y</code>.</p>
 <p>Let’s start by fitting a model with just one feature: the flipper length. We create a design matrix <code>X</code> by pulling out the <code>"flipper_length_mm"</code> column from the <code>DataFrame</code>.</p>
-<div id="44fea632" class="cell" data-execution_count="7">
+<div id="db9c4a28" class="cell" data-execution_count="7">
 <div class="sourceCode cell-code" id="cb13"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a><span class="co"># .fit expects a 2D data design matrix, so we use double brackets to extract a DataFrame</span></span>
 <span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a>X <span class="op">=</span> penguins[[<span class="st">"flipper_length_mm"</span>]]</span>
 <span id="cb13-3"><a href="#cb13-3" aria-hidden="true" tabindex="-1"></a>Y <span class="op">=</span> penguins[<span class="st">"bill_depth_mm"</span>]</span>
@@ -998,14 +1141,14 @@ <h3 data-number="13.1.2" class="anchored" data-anchor-id="the-sklearn-workflow">
 <p>Note that <code>LinearRegression</code> will automatically include an intercept term.</p>
 <p>The fitted model parameters are stored as attributes of the model instance. <code>my_model.intercept_</code> will return the value of <span class="math inline">\(\hat{\theta}_0\)</span> as a scalar. <code>my_model.coef_</code> will return all values <span class="math inline">\(\hat{\theta}_1,
 \hat{\theta}_1, ...\)</span> in an array. Because our model only contains one feature, we see just the value of <span class="math inline">\(\hat{\theta}_1\)</span> in the cell below.</p>
-<div id="26b9247a" class="cell" data-execution_count="8">
+<div id="85b4a3a3" class="cell" data-execution_count="8">
 <div class="sourceCode cell-code" id="cb14"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a><span class="co"># The intercept term, theta_0</span></span>
 <span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a>my_model.intercept_</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="8">
 <pre><code>np.float64(7.297305899612313)</code></pre>
 </div>
 </div>
-<div id="c9aa5e54" class="cell" data-execution_count="9">
+<div id="45cedc19" class="cell" data-execution_count="9">
 <div class="sourceCode cell-code" id="cb16"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a><span class="co"># All parameters theta_1, ..., theta_p</span></span>
 <span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a>my_model.coef_</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="9">
@@ -1015,7 +1158,7 @@ <h3 data-number="13.1.2" class="anchored" data-anchor-id="the-sklearn-workflow">
 <p><strong>3. Use the fitted model to make predictions</strong></p>
 <p>Now that the model has been trained, we can use it to make predictions! To do so, we use the <code>.predict</code> method. <code>.predict</code> takes in one argument: the design matrix that should be used to generate predictions. To understand how the model performs on the training set, we would pass in the training data. Alternatively, to make predictions on unseen data, we would pass in a new dataset that wasn’t used to train the model.</p>
 <p>Below, we call <code>.predict</code> to generate model predictions on the original training data. As before, we use double brackets to ensure that we extract 2-dimensional data.</p>
-<div id="4524d99f" class="cell" data-execution_count="10">
+<div id="6b5e97fa" class="cell" data-execution_count="10">
 <div class="sourceCode cell-code" id="cb18"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a>Y_hat_one_feature <span class="op">=</span> my_model.predict(penguins[[<span class="st">"flipper_length_mm"</span>]])</span>
 <span id="cb18-2"><a href="#cb18-2" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb18-3"><a href="#cb18-3" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="ss">f"The RMSE of the model is </span><span class="sc">{</span>np<span class="sc">.</span>sqrt(np.mean((Y<span class="op">-</span>Y_hat_one_feature)<span class="op">**</span><span class="dv">2</span>))<span class="sc">}</span><span class="ss">"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -1026,7 +1169,7 @@ <h3 data-number="13.1.2" class="anchored" data-anchor-id="the-sklearn-workflow">
 <p>What if we wanted a model with two features?</p>
 <p><span class="math display">\[\text{bill depth} = \theta_0 + \theta_1 \text{flipper length} + \theta_2 \text{body mass}\]</span></p>
 <p>We repeat this three-step process by intializing a new model object, then calling <code>.fit</code> and <code>.predict</code> as before.</p>
-<div id="dfdd7214" class="cell" data-execution_count="11">
+<div id="0c58d39b" class="cell" data-execution_count="11">
 <div class="sourceCode cell-code" id="cb20"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Step 1: initialize LinearRegression model</span></span>
 <span id="cb20-2"><a href="#cb20-2" aria-hidden="true" tabindex="-1"></a>two_feature_model <span class="op">=</span> LinearRegression()</span>
 <span id="cb20-3"><a href="#cb20-3" aria-hidden="true" tabindex="-1"></a></span>
@@ -1045,7 +1188,7 @@ <h3 data-number="13.1.2" class="anchored" data-anchor-id="the-sklearn-workflow">
 </div>
 </div>
 <p>We can also see that we obtain the same predictions using <code>sklearn</code> as we did when applying the ordinary least squares formula before!</p>
-<div id="77b941c4" class="cell" data-execution_count="12">
+<div id="f937efc2" class="cell" data-execution_count="12">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb22"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a>pd.DataFrame({<span class="st">"Y_hat from OLS"</span>:np.squeeze(Y_hat), <span class="st">"Y_hat from sklearn"</span>:Y_hat_two_features}).head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -1096,8 +1239,8 @@ <h3 data-number="13.1.2" class="anchored" data-anchor-id="the-sklearn-workflow">
 </div>
 </section>
 </section>
-<section id="gradient-descent" class="level2" data-number="13.2">
-<h2 data-number="13.2" class="anchored" data-anchor-id="gradient-descent"><span class="header-section-number">13.2</span> Gradient Descent</h2>
+<section id="gradient-descent" class="level2" data-number="13.3">
+<h2 data-number="13.3" class="anchored" data-anchor-id="gradient-descent"><span class="header-section-number">13.3</span> Gradient Descent</h2>
 <p>At this point, we’ve grown quite familiar with the process of choosing a model and a corresponding loss function and optimizing parameters by choosing the values of <span class="math inline">\(\theta\)</span> that minimize the loss function. So far, we’ve optimized <span class="math inline">\(\theta\)</span> by</p>
 <ol type="1">
 <li>Using calculus to take the derivative of the loss function with respect to <span class="math inline">\(\theta\)</span>, setting it equal to 0, and solving for <span class="math inline">\(\theta\)</span>.</li>
@@ -1107,25 +1250,25 @@ <h2 data-number="13.2" class="anchored" data-anchor-id="gradient-descent"><span
 <blockquote class="blockquote">
 <p><strong>BIG IDEA</strong>: use an iterative algorithm to numerically compute the minimum of the loss.</p>
 </blockquote>
-<section id="minimizing-an-arbitrary-1d-function" class="level3" data-number="13.2.1">
-<h3 data-number="13.2.1" class="anchored" data-anchor-id="minimizing-an-arbitrary-1d-function"><span class="header-section-number">13.2.1</span> Minimizing an Arbitrary 1D Function</h3>
+<section id="minimizing-an-arbitrary-1d-function" class="level3" data-number="13.3.1">
+<h3 data-number="13.3.1" class="anchored" data-anchor-id="minimizing-an-arbitrary-1d-function"><span class="header-section-number">13.3.1</span> Minimizing an Arbitrary 1D Function</h3>
 <p>Let’s consider an arbitrary function. Our goal is to find the value of <span class="math inline">\(x\)</span> that minimizes this function.</p>
-<div id="c4f11a21" class="cell" data-execution_count="13">
+<div id="4ce8de69" class="cell" data-execution_count="13">
 <div class="sourceCode cell-code" id="cb23"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> arbitrary(x):</span>
 <span id="cb23-2"><a href="#cb23-2" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> (x<span class="op">**</span><span class="dv">4</span> <span class="op">-</span> <span class="dv">15</span><span class="op">*</span>x<span class="op">**</span><span class="dv">3</span> <span class="op">+</span> <span class="dv">80</span><span class="op">*</span>x<span class="op">**</span><span class="dv">2</span> <span class="op">-</span> <span class="dv">180</span><span class="op">*</span>x <span class="op">+</span> <span class="dv">144</span>)<span class="op">/</span><span class="dv">10</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
 <p><img src="images/arbitrary.png" alt="arbitrary" width="600"></p>
-<section id="the-naive-approach-guess-and-check" class="level4" data-number="13.2.1.1">
-<h4 data-number="13.2.1.1" class="anchored" data-anchor-id="the-naive-approach-guess-and-check"><span class="header-section-number">13.2.1.1</span> The Naive Approach: Guess and Check</h4>
+<section id="the-naive-approach-guess-and-check" class="level4" data-number="13.3.1.1">
+<h4 data-number="13.3.1.1" class="anchored" data-anchor-id="the-naive-approach-guess-and-check"><span class="header-section-number">13.3.1.1</span> The Naive Approach: Guess and Check</h4>
 <p>Above, we saw that the minimum is somewhere around 5.3. Let’s see if we can figure out how to find the exact minimum algorithmically from scratch. One very slow (and terrible) way would be manual guess-and-check.</p>
-<div id="9b453549" class="cell" data-execution_count="14">
+<div id="4685a003" class="cell" data-execution_count="14">
 <div class="sourceCode cell-code" id="cb24"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a>arbitrary(<span class="dv">6</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="14">
 <pre><code>0.0</code></pre>
 </div>
 </div>
 <p>A somewhat better (but still slow) approach is to use brute force to try out a bunch of x values and return the one that yields the lowest loss.</p>
-<div id="4c68c29a" class="cell" data-execution_count="15">
+<div id="ee67c4ee" class="cell" data-execution_count="15">
 <div class="sourceCode cell-code" id="cb26"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb26-1"><a href="#cb26-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> simple_minimize(f, xs):</span>
 <span id="cb26-2"><a href="#cb26-2" aria-hidden="true" tabindex="-1"></a>    <span class="co"># Takes in a function f and a set of values xs. </span></span>
 <span id="cb26-3"><a href="#cb26-3" aria-hidden="true" tabindex="-1"></a>    <span class="co"># Calculates the value of the function f at all values x in xs</span></span>
@@ -1140,7 +1283,7 @@ <h4 data-number="13.2.1.1" class="anchored" data-anchor-id="the-naive-approach-g
 </div>
 </div>
 <p>This process is essentially the same as before where we made a graphical plot, it’s just that we’re only looking at 20 selected points.</p>
-<div id="64182ec7" class="cell" data-execution_count="16">
+<div id="5a9482e0" class="cell" data-execution_count="16">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb28"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb28-1"><a href="#cb28-1" aria-hidden="true" tabindex="-1"></a>xs <span class="op">=</span> np.linspace(<span class="dv">1</span>, <span class="dv">7</span>, <span class="dv">200</span>)</span>
@@ -1156,9 +1299,9 @@ <h4 data-number="13.2.1.1" class="anchored" data-anchor-id="the-naive-approach-g
 <span id="cb28-11"><a href="#cb28-11" aria-hidden="true" tabindex="-1"></a>fig.show()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 <div class="cell-output cell-output-display">
-<div>                            <div id="4cc07f0b-a295-45b8-afc7-ed8f63ada2ff" class="plotly-graph-div" style="height:600px; width:800px;"></div>            <script type="text/javascript">                require(["plotly"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("4cc07f0b-a295-45b8-afc7-ed8f63ada2ff")) {                    Plotly.newPlot(                        "4cc07f0b-a295-45b8-afc7-ed8f63ada2ff",                        [{"hovertemplate":"x=%{x}<br>y=%{y}<extra></extra>","legendgroup":"","line":{"color":"#636efa","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"","orientation":"v","showlegend":false,"x":[1.0,1.0301507537688441,1.0603015075376885,1.0904522613065326,1.120603015075377,1.150753768844221,1.1809045226130652,1.2110552763819096,1.2412060301507537,1.271356783919598,1.3015075376884422,1.3316582914572863,1.3618090452261307,1.3919597989949748,1.4221105527638191,1.4522613065326633,1.4824120603015074,1.5125628140703518,1.542713567839196,1.5728643216080402,1.6030150753768844,1.6331658291457285,1.6633165829145728,1.6934673366834172,1.7236180904522613,1.7537688442211055,1.7839195979899496,1.814070351758794,1.8442211055276383,1.8743718592964824,1.9045226130653266,1.9346733668341707,1.964824120603015,1.9949748743718594,2.0251256281407035,2.0552763819095476,2.085427135678392,2.115577889447236,2.1457286432160805,2.1758793969849246,2.2060301507537687,2.2361809045226133,2.266331658291457,2.2964824120603016,2.3266331658291457,2.35678391959799,2.3869346733668344,2.417085427135678,2.4472361809045227,2.477386934673367,2.507537688442211,2.5376884422110555,2.567839195979899,2.5979899497487438,2.628140703517588,2.658291457286432,2.6884422110552766,2.7185929648241203,2.748743718592965,2.778894472361809,2.809045226130653,2.8391959798994977,2.8693467336683414,2.899497487437186,2.92964824120603,2.959798994974874,2.9899497487437188,3.020100502512563,3.050251256281407,3.080402010050251,3.1105527638190953,3.1407035175879394,3.170854271356784,3.201005025125628,3.2311557788944723,3.2613065326633164,3.2914572864321605,3.321608040201005,3.351758793969849,3.3819095477386933,3.4120603015075375,3.4422110552763816,3.472361809045226,3.5025125628140703,3.5326633165829144,3.5628140703517586,3.5929648241206027,3.6231155778894473,3.6532663316582914,3.6834170854271355,3.7135678391959797,3.743718592964824,3.7738693467336684,3.8040201005025125,3.8341708542713566,3.8643216080402008,3.8944723618090453,3.9246231155778895,3.9547738693467336,3.9849246231155777,4.015075376884422,4.045226130653266,4.075376884422111,4.105527638190955,4.135678391959798,4.165829145728643,4.1959798994974875,4.226130653266331,4.256281407035176,4.28643216080402,4.316582914572864,4.346733668341709,4.376884422110553,4.407035175879397,4.4371859296482405,4.467336683417085,4.49748743718593,4.527638190954773,4.557788944723618,4.5879396984924625,4.618090452261306,4.648241206030151,4.678391959798995,4.708542713567839,4.738693467336683,4.768844221105527,4.798994974874372,4.829145728643216,4.85929648241206,4.889447236180905,4.919597989949748,4.949748743718593,4.9798994974874375,5.010050251256281,5.040201005025126,5.0703517587939695,5.100502512562814,5.130653266331658,5.160804020100502,5.190954773869347,5.221105527638191,5.251256281407035,5.281407035175879,5.311557788944723,5.341708542713568,5.371859296482412,5.402010050251256,5.4321608040201,5.4623115577889445,5.492462311557789,5.522613065326633,5.552763819095477,5.582914572864321,5.613065326633166,5.64321608040201,5.673366834170854,5.703517587939698,5.733668341708542,5.763819095477387,5.793969849246231,5.824120603015075,5.8542713567839195,5.884422110552763,5.914572864321608,5.944723618090452,5.974874371859296,6.005025125628141,6.035175879396984,6.065326633165829,6.0954773869346734,6.125628140703517,6.155778894472362,6.185929648241205,6.21608040201005,6.2462311557788945,6.276381909547738,6.306532663316583,6.3366834170854265,6.366834170854271,6.396984924623116,6.427135678391959,6.457286432160804,6.487437185929648,6.517587939698492,6.547738693467337,6.57788944723618,6.608040201005025,6.638190954773869,6.668341708542713,6.698492462311558,6.7286432160804015,6.758793969849246,6.788944723618091,6.819095477386934,6.849246231155779,6.879396984924623,6.909547738693467,6.939698492462312,6.969849246231155,7.0],"xaxis":"x","y":[3.0,2.8197775132646994,2.6468296407545298,2.480978457571409,2.3220480221881674,2.169864376448527,2.0242555455671196,1.8850515381294826,1.7520843460920474,1.6251879447821538,1.5041982928980473,1.3889533325088705,1.2792929890546703,1.175059171346399,1.076095771565909,0.9822486652659563,0.8933657113701969,0.809296752173205,0.7298936133404282,0.6550101039082478,0.5845020162839318,0.5182271262456482,0.45604519294247436,0.39781795889439875,0.34340914999228855,0.2926844754979413,0.24551162804404497,0.20176028363418083,0.1613021016428462,0.1240107248154402,0.08976177926825812,0.05843287448851129,0.029903603334304307,0.0040555420346265695,-0.019227749810596606,-0.04006072923056081,-0.0585558698835257,-0.07482366205689459,-0.08897261266714337,-0.10110924525984047,-0.11133810000965809,-0.119761733720361,-0.12648071982483203,-0.1315936483850237,-0.13519712609199247,-0.13738577626590426,-0.1382522388560119,-0.13788717044065493,-0.13637924422729383,-0.13381515005247593,-0.13027959438184666,-0.1258553003101497,-0.12062300756120407,-0.11466147248795551,-0.10804746807244214,-0.10085578392577758,-0.09315922628821909,-0.08502861802905386,-0.0765327986467014,-0.06773862426869641,-0.05871096765167181,-0.04951271818131318,-0.04020478187242702,-0.030846081368940757,-0.021493555943828825,-0.012202161499220664,-0.0030248705662870635,0.005987327694657552,0.014785427494228998,0.023322406413939234,0.03155322540619636,0.039434828794344415,0.046926144272549666,0.053988082905993905,0.06058353913065275,0.06667739075349459,0.0722364989522987,0.0772297082758655,0.08162784664377228,0.0854037253466231,0.0885321390457932,0.09098986577366759,0.09275566693348196,0.0938102872993909,0.09413645501645647,0.09371888160064827,0.09254426193878089,0.09060127428864462,0.08788058027890884,0.08437482490911634,0.08007863654980837,0.07498862694224044,0.06910339119879154,0.062423507802634504,0.05495153860777009,0.04669202883931121,0.037651507093005424,0.027838485335746555,0.01726345890522225,0.005938906509970821,-0.006120709770470967,-0.018898944485715673,-0.03237736881442288,-0.04653557056435602,-0.061351154172564294,-0.07679974070485969,-0.0928549678563968,-0.10948848995135449,-0.12666997794297002,-0.14436711941368685,-0.16254561857489308,-0.1811691962672853,-0.2001995899604026,-0.21959655375303555,-0.23931785837306735,-0.25931929117742814,-0.2795546561521974,-0.2999757739125698,-0.32053248170270765,-0.34117263339597914,-0.3618420994948906,-0.38248476713084756,-0.40304254006464363,-0.4234553386859261,-0.4436611000135485,-0.46359577769538873,-0.483193342008542,-0.5023857798591053,-0.5211030947822792,-0.5392733069424139,-0.5568224531329065,-0.5736745867762579,-0.589751777924107,-0.6049741132570944,-0.6192596960850778,-0.632524646347008,-0.6446831006107573,-0.6556472120734724,-0.6653271505614157,-0.6736311025297482,-0.6804652710629284,-0.6857338758744731,-0.689339153306878,-0.6911813563318902,-0.6911587545502471,-0.6891676341918014,-0.6851022981155552,-0.6788550658095346,-0.6703162733909721,-0.6593742736060904,-0.6459154358302271,-0.6298241460678924,-0.6109828069525293,-0.5892718377469237,-0.5645696743426925,-0.5367527692607041,-0.5056955916510105,-0.47127062729252883,-0.4333483785933822,-0.39179736459091147,-0.3464841209514134,-0.2972731999702091,-0.24402717057191695,-0.18660661831017933,-0.12487014536754941,-0.05867437055599112,0.01212607068350735,0.08767852628116088,0.1681323275376599,0.2536387891250115,0.34435120908576666,0.4404248688334974,0.5420170331528652,0.6492869501990981,0.7623958514986725,0.8815069519487224,1.0067854498173574,1.13839852674364,1.2765153477374043,1.421307061179641,1.572946798821863,1.7316096757870127,1.897472790568304,2.070715225030267,2.251518044408317,2.4400642973086635,2.636539015708513,2.841129214955754,3.0540238937694992,3.27541403423952,3.5054926018266315,3.7444545453624185,3.9924967970495344,4.249818272461312,4.516619870542331,4.793104473607627,5.079476947343528,5.37594414080711,5.682714886426311,6.0],"yaxis":"y","type":"scatter"},{"mode":"markers","x":[1.0,2.5,4.0,5.5,7.0],"y":[3.0,-0.13125,0.0,-0.65625,6.0],"type":"scatter"}],                        {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"},"margin":{"b":0,"l":0,"r":0,"t":30}}},"xaxis":{"anchor":"y","domain":[0.0,1.0],"title":{"text":"x"}},"yaxis":{"anchor":"x","domain":[0.0,1.0],"title":{"text":"y"}},"legend":{"tracegroupgap":0},"showlegend":false,"autosize":false,"width":800,"height":600},                        {"responsive": true}                    ).then(function(){
+<div>                            <div id="f06e0b7d-4759-4fce-a07d-6e392ff3d540" class="plotly-graph-div" style="height:600px; width:800px;"></div>            <script type="text/javascript">                require(["plotly"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("f06e0b7d-4759-4fce-a07d-6e392ff3d540")) {                    Plotly.newPlot(                        "f06e0b7d-4759-4fce-a07d-6e392ff3d540",                        [{"hovertemplate":"x=%{x}<br>y=%{y}<extra></extra>","legendgroup":"","line":{"color":"#636efa","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"","orientation":"v","showlegend":false,"x":[1.0,1.0301507537688441,1.0603015075376885,1.0904522613065326,1.120603015075377,1.150753768844221,1.1809045226130652,1.2110552763819096,1.2412060301507537,1.271356783919598,1.3015075376884422,1.3316582914572863,1.3618090452261307,1.3919597989949748,1.4221105527638191,1.4522613065326633,1.4824120603015074,1.5125628140703518,1.542713567839196,1.5728643216080402,1.6030150753768844,1.6331658291457285,1.6633165829145728,1.6934673366834172,1.7236180904522613,1.7537688442211055,1.7839195979899496,1.814070351758794,1.8442211055276383,1.8743718592964824,1.9045226130653266,1.9346733668341707,1.964824120603015,1.9949748743718594,2.0251256281407035,2.0552763819095476,2.085427135678392,2.115577889447236,2.1457286432160805,2.1758793969849246,2.2060301507537687,2.2361809045226133,2.266331658291457,2.2964824120603016,2.3266331658291457,2.35678391959799,2.3869346733668344,2.417085427135678,2.4472361809045227,2.477386934673367,2.507537688442211,2.5376884422110555,2.567839195979899,2.5979899497487438,2.628140703517588,2.658291457286432,2.6884422110552766,2.7185929648241203,2.748743718592965,2.778894472361809,2.809045226130653,2.8391959798994977,2.8693467336683414,2.899497487437186,2.92964824120603,2.959798994974874,2.9899497487437188,3.020100502512563,3.050251256281407,3.080402010050251,3.1105527638190953,3.1407035175879394,3.170854271356784,3.201005025125628,3.2311557788944723,3.2613065326633164,3.2914572864321605,3.321608040201005,3.351758793969849,3.3819095477386933,3.4120603015075375,3.4422110552763816,3.472361809045226,3.5025125628140703,3.5326633165829144,3.5628140703517586,3.5929648241206027,3.6231155778894473,3.6532663316582914,3.6834170854271355,3.7135678391959797,3.743718592964824,3.7738693467336684,3.8040201005025125,3.8341708542713566,3.8643216080402008,3.8944723618090453,3.9246231155778895,3.9547738693467336,3.9849246231155777,4.015075376884422,4.045226130653266,4.075376884422111,4.105527638190955,4.135678391959798,4.165829145728643,4.1959798994974875,4.226130653266331,4.256281407035176,4.28643216080402,4.316582914572864,4.346733668341709,4.376884422110553,4.407035175879397,4.4371859296482405,4.467336683417085,4.49748743718593,4.527638190954773,4.557788944723618,4.5879396984924625,4.618090452261306,4.648241206030151,4.678391959798995,4.708542713567839,4.738693467336683,4.768844221105527,4.798994974874372,4.829145728643216,4.85929648241206,4.889447236180905,4.919597989949748,4.949748743718593,4.9798994974874375,5.010050251256281,5.040201005025126,5.0703517587939695,5.100502512562814,5.130653266331658,5.160804020100502,5.190954773869347,5.221105527638191,5.251256281407035,5.281407035175879,5.311557788944723,5.341708542713568,5.371859296482412,5.402010050251256,5.4321608040201,5.4623115577889445,5.492462311557789,5.522613065326633,5.552763819095477,5.582914572864321,5.613065326633166,5.64321608040201,5.673366834170854,5.703517587939698,5.733668341708542,5.763819095477387,5.793969849246231,5.824120603015075,5.8542713567839195,5.884422110552763,5.914572864321608,5.944723618090452,5.974874371859296,6.005025125628141,6.035175879396984,6.065326633165829,6.0954773869346734,6.125628140703517,6.155778894472362,6.185929648241205,6.21608040201005,6.2462311557788945,6.276381909547738,6.306532663316583,6.3366834170854265,6.366834170854271,6.396984924623116,6.427135678391959,6.457286432160804,6.487437185929648,6.517587939698492,6.547738693467337,6.57788944723618,6.608040201005025,6.638190954773869,6.668341708542713,6.698492462311558,6.7286432160804015,6.758793969849246,6.788944723618091,6.819095477386934,6.849246231155779,6.879396984924623,6.909547738693467,6.939698492462312,6.969849246231155,7.0],"xaxis":"x","y":[3.0,2.8197775132646994,2.6468296407545298,2.480978457571409,2.3220480221881674,2.169864376448527,2.0242555455671196,1.8850515381294826,1.7520843460920474,1.6251879447821538,1.5041982928980473,1.3889533325088705,1.2792929890546703,1.175059171346399,1.076095771565909,0.9822486652659563,0.8933657113701969,0.809296752173205,0.7298936133404282,0.6550101039082478,0.5845020162839318,0.5182271262456482,0.45604519294247436,0.39781795889439875,0.34340914999228855,0.2926844754979413,0.24551162804404497,0.20176028363418083,0.1613021016428462,0.1240107248154402,0.08976177926825812,0.05843287448851129,0.029903603334304307,0.0040555420346265695,-0.019227749810596606,-0.04006072923056081,-0.0585558698835257,-0.07482366205689459,-0.08897261266714337,-0.10110924525984047,-0.11133810000965809,-0.119761733720361,-0.12648071982483203,-0.1315936483850237,-0.13519712609199247,-0.13738577626590426,-0.1382522388560119,-0.13788717044065493,-0.13637924422729383,-0.13381515005247593,-0.13027959438184666,-0.1258553003101497,-0.12062300756120407,-0.11466147248795551,-0.10804746807244214,-0.10085578392577758,-0.09315922628821909,-0.08502861802905386,-0.0765327986467014,-0.06773862426869641,-0.05871096765167181,-0.04951271818131318,-0.04020478187242702,-0.030846081368940757,-0.021493555943828825,-0.012202161499220664,-0.0030248705662870635,0.005987327694657552,0.014785427494228998,0.023322406413939234,0.03155322540619636,0.039434828794344415,0.046926144272549666,0.053988082905993905,0.06058353913065275,0.06667739075349459,0.0722364989522987,0.0772297082758655,0.08162784664377228,0.0854037253466231,0.0885321390457932,0.09098986577366759,0.09275566693348196,0.0938102872993909,0.09413645501645647,0.09371888160064827,0.09254426193878089,0.09060127428864462,0.08788058027890884,0.08437482490911634,0.08007863654980837,0.07498862694224044,0.06910339119879154,0.062423507802634504,0.05495153860777009,0.04669202883931121,0.037651507093005424,0.027838485335746555,0.01726345890522225,0.005938906509970821,-0.006120709770470967,-0.018898944485715673,-0.03237736881442288,-0.04653557056435602,-0.061351154172564294,-0.07679974070485969,-0.0928549678563968,-0.10948848995135449,-0.12666997794297002,-0.14436711941368685,-0.16254561857489308,-0.1811691962672853,-0.2001995899604026,-0.21959655375303555,-0.23931785837306735,-0.25931929117742814,-0.2795546561521974,-0.2999757739125698,-0.32053248170270765,-0.34117263339597914,-0.3618420994948906,-0.38248476713084756,-0.40304254006464363,-0.4234553386859261,-0.4436611000135485,-0.46359577769538873,-0.483193342008542,-0.5023857798591053,-0.5211030947822792,-0.5392733069424139,-0.5568224531329065,-0.5736745867762579,-0.589751777924107,-0.6049741132570944,-0.6192596960850778,-0.632524646347008,-0.6446831006107573,-0.6556472120734724,-0.6653271505614157,-0.6736311025297482,-0.6804652710629284,-0.6857338758744731,-0.689339153306878,-0.6911813563318902,-0.6911587545502471,-0.6891676341918014,-0.6851022981155552,-0.6788550658095346,-0.6703162733909721,-0.6593742736060904,-0.6459154358302271,-0.6298241460678924,-0.6109828069525293,-0.5892718377469237,-0.5645696743426925,-0.5367527692607041,-0.5056955916510105,-0.47127062729252883,-0.4333483785933822,-0.39179736459091147,-0.3464841209514134,-0.2972731999702091,-0.24402717057191695,-0.18660661831017933,-0.12487014536754941,-0.05867437055599112,0.01212607068350735,0.08767852628116088,0.1681323275376599,0.2536387891250115,0.34435120908576666,0.4404248688334974,0.5420170331528652,0.6492869501990981,0.7623958514986725,0.8815069519487224,1.0067854498173574,1.13839852674364,1.2765153477374043,1.421307061179641,1.572946798821863,1.7316096757870127,1.897472790568304,2.070715225030267,2.251518044408317,2.4400642973086635,2.636539015708513,2.841129214955754,3.0540238937694992,3.27541403423952,3.5054926018266315,3.7444545453624185,3.9924967970495344,4.249818272461312,4.516619870542331,4.793104473607627,5.079476947343528,5.37594414080711,5.682714886426311,6.0],"yaxis":"y","type":"scatter"},{"mode":"markers","x":[1.0,2.5,4.0,5.5,7.0],"y":[3.0,-0.13125,0.0,-0.65625,6.0],"type":"scatter"}],                        {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"},"margin":{"b":0,"l":0,"r":0,"t":30}}},"xaxis":{"anchor":"y","domain":[0.0,1.0],"title":{"text":"x"}},"yaxis":{"anchor":"x","domain":[0.0,1.0],"title":{"text":"y"}},"legend":{"tracegroupgap":0},"showlegend":false,"autosize":false,"width":800,"height":600},                        {"responsive": true}                    ).then(function(){
                             
-var gd = document.getElementById('4cc07f0b-a295-45b8-afc7-ed8f63ada2ff');
+var gd = document.getElementById('f06e0b7d-4759-4fce-a07d-6e392ff3d540');
 var x = new MutationObserver(function (mutations, observer) {{
         var display = window.getComputedStyle(gd).display;
         if (!display || display === 'none') {{
@@ -1190,10 +1333,10 @@ <h4 data-number="13.2.1.1" class="anchored" data-anchor-id="the-naive-approach-g
 <li>It is <em>very</em> computationally inefficient, considering potentially vast numbers of guesses that are useless.</li>
 </ol>
 </section>
-<section id="scipy.optimize.minimize" class="level4" data-number="13.2.1.2">
-<h4 data-number="13.2.1.2" class="anchored" data-anchor-id="scipy.optimize.minimize"><span class="header-section-number">13.2.1.2</span> <code>Scipy.optimize.minimize</code></h4>
+<section id="scipy.optimize.minimize" class="level4" data-number="13.3.1.2">
+<h4 data-number="13.3.1.2" class="anchored" data-anchor-id="scipy.optimize.minimize"><span class="header-section-number">13.3.1.2</span> <code>Scipy.optimize.minimize</code></h4>
 <p>One way to minimize this mathematical function is to use the <code>scipy.optimize.minimize</code> function. It takes a function and a starting guess and tries to find the minimum.</p>
-<div id="477268ef" class="cell" data-execution_count="17">
+<div id="5a5373a4" class="cell" data-execution_count="17">
 <div class="sourceCode cell-code" id="cb29"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb29-1"><a href="#cb29-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> scipy.optimize <span class="im">import</span> minimize</span>
 <span id="cb29-2"><a href="#cb29-2" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb29-3"><a href="#cb29-3" aria-hidden="true" tabindex="-1"></a><span class="co"># takes a function f and a starting point x0 and returns a readout </span></span>
@@ -1216,11 +1359,11 @@ <h4 data-number="13.2.1.2" class="anchored" data-anchor-id="scipy.optimize.minim
 <p>It turns out that under the hood, the <code>fit</code> method for <code>LinearRegression</code> models uses gradient descent. Gradient descent is also how much of machine learning works, including even advanced neural network models.</p>
 <p>In Data 100, the gradient descent process will usually be invisible to us, hidden beneath an abstraction layer. However, to be good data scientists, it’s important that we know the underlying principles that optimization functions harness to find optimal parameters.</p>
 </section>
-<section id="digging-into-gradient-descent" class="level4" data-number="13.2.1.3">
-<h4 data-number="13.2.1.3" class="anchored" data-anchor-id="digging-into-gradient-descent"><span class="header-section-number">13.2.1.3</span> Digging into Gradient Descent</h4>
+<section id="digging-into-gradient-descent" class="level4" data-number="13.3.1.3">
+<h4 data-number="13.3.1.3" class="anchored" data-anchor-id="digging-into-gradient-descent"><span class="header-section-number">13.3.1.3</span> Digging into Gradient Descent</h4>
 <p>Looking at the function across this domain, it is clear that the function’s minimum value occurs around <span class="math inline">\(\theta = 5.3\)</span>. Let’s pretend for a moment that we <em>couldn’t</em> see the full view of the cost function. How would we guess the value of <span class="math inline">\(\theta\)</span> that minimizes the function?</p>
 <p>It turns out that the first derivative of the function can give us a clue. In the graph below, the function and its derivative are plotted, with points where the derivative is equal to 0 plotted in light green.</p>
-<div id="23da0625" class="cell" data-execution_count="18">
+<div id="c038bada" class="cell" data-execution_count="18">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb31"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb31-1"><a href="#cb31-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> plotly.graph_objects <span class="im">as</span> go</span>
@@ -1242,9 +1385,9 @@ <h4 data-number="13.2.1.3" class="anchored" data-anchor-id="digging-into-gradien
 <span id="cb31-17"><a href="#cb31-17" aria-hidden="true" tabindex="-1"></a>fig.show()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 <div class="cell-output cell-output-display">
-<div>                            <div id="79fed66a-228f-4c8c-8b93-d5dcbd7dc8f2" class="plotly-graph-div" style="height:600px; width:800px;"></div>            <script type="text/javascript">                require(["plotly"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("79fed66a-228f-4c8c-8b93-d5dcbd7dc8f2")) {                    Plotly.newPlot(                        "79fed66a-228f-4c8c-8b93-d5dcbd7dc8f2",                        [{"mode":"lines","name":"f","x":[1.0,1.0301507537688441,1.0603015075376885,1.0904522613065326,1.120603015075377,1.150753768844221,1.1809045226130652,1.2110552763819096,1.2412060301507537,1.271356783919598,1.3015075376884422,1.3316582914572863,1.3618090452261307,1.3919597989949748,1.4221105527638191,1.4522613065326633,1.4824120603015074,1.5125628140703518,1.542713567839196,1.5728643216080402,1.6030150753768844,1.6331658291457285,1.6633165829145728,1.6934673366834172,1.7236180904522613,1.7537688442211055,1.7839195979899496,1.814070351758794,1.8442211055276383,1.8743718592964824,1.9045226130653266,1.9346733668341707,1.964824120603015,1.9949748743718594,2.0251256281407035,2.0552763819095476,2.085427135678392,2.115577889447236,2.1457286432160805,2.1758793969849246,2.2060301507537687,2.2361809045226133,2.266331658291457,2.2964824120603016,2.3266331658291457,2.35678391959799,2.3869346733668344,2.417085427135678,2.4472361809045227,2.477386934673367,2.507537688442211,2.5376884422110555,2.567839195979899,2.5979899497487438,2.628140703517588,2.658291457286432,2.6884422110552766,2.7185929648241203,2.748743718592965,2.778894472361809,2.809045226130653,2.8391959798994977,2.8693467336683414,2.899497487437186,2.92964824120603,2.959798994974874,2.9899497487437188,3.020100502512563,3.050251256281407,3.080402010050251,3.1105527638190953,3.1407035175879394,3.170854271356784,3.201005025125628,3.2311557788944723,3.2613065326633164,3.2914572864321605,3.321608040201005,3.351758793969849,3.3819095477386933,3.4120603015075375,3.4422110552763816,3.472361809045226,3.5025125628140703,3.5326633165829144,3.5628140703517586,3.5929648241206027,3.6231155778894473,3.6532663316582914,3.6834170854271355,3.7135678391959797,3.743718592964824,3.7738693467336684,3.8040201005025125,3.8341708542713566,3.8643216080402008,3.8944723618090453,3.9246231155778895,3.9547738693467336,3.9849246231155777,4.015075376884422,4.045226130653266,4.075376884422111,4.105527638190955,4.135678391959798,4.165829145728643,4.1959798994974875,4.226130653266331,4.256281407035176,4.28643216080402,4.316582914572864,4.346733668341709,4.376884422110553,4.407035175879397,4.4371859296482405,4.467336683417085,4.49748743718593,4.527638190954773,4.557788944723618,4.5879396984924625,4.618090452261306,4.648241206030151,4.678391959798995,4.708542713567839,4.738693467336683,4.768844221105527,4.798994974874372,4.829145728643216,4.85929648241206,4.889447236180905,4.919597989949748,4.949748743718593,4.9798994974874375,5.010050251256281,5.040201005025126,5.0703517587939695,5.100502512562814,5.130653266331658,5.160804020100502,5.190954773869347,5.221105527638191,5.251256281407035,5.281407035175879,5.311557788944723,5.341708542713568,5.371859296482412,5.402010050251256,5.4321608040201,5.4623115577889445,5.492462311557789,5.522613065326633,5.552763819095477,5.582914572864321,5.613065326633166,5.64321608040201,5.673366834170854,5.703517587939698,5.733668341708542,5.763819095477387,5.793969849246231,5.824120603015075,5.8542713567839195,5.884422110552763,5.914572864321608,5.944723618090452,5.974874371859296,6.005025125628141,6.035175879396984,6.065326633165829,6.0954773869346734,6.125628140703517,6.155778894472362,6.185929648241205,6.21608040201005,6.2462311557788945,6.276381909547738,6.306532663316583,6.3366834170854265,6.366834170854271,6.396984924623116,6.427135678391959,6.457286432160804,6.487437185929648,6.517587939698492,6.547738693467337,6.57788944723618,6.608040201005025,6.638190954773869,6.668341708542713,6.698492462311558,6.7286432160804015,6.758793969849246,6.788944723618091,6.819095477386934,6.849246231155779,6.879396984924623,6.909547738693467,6.939698492462312,6.969849246231155,7.0],"y":[3.0,2.8197775132646994,2.6468296407545298,2.480978457571409,2.3220480221881674,2.169864376448527,2.0242555455671196,1.8850515381294826,1.7520843460920474,1.6251879447821538,1.5041982928980473,1.3889533325088705,1.2792929890546703,1.175059171346399,1.076095771565909,0.9822486652659563,0.8933657113701969,0.809296752173205,0.7298936133404282,0.6550101039082478,0.5845020162839318,0.5182271262456482,0.45604519294247436,0.39781795889439875,0.34340914999228855,0.2926844754979413,0.24551162804404497,0.20176028363418083,0.1613021016428462,0.1240107248154402,0.08976177926825812,0.05843287448851129,0.029903603334304307,0.0040555420346265695,-0.019227749810596606,-0.04006072923056081,-0.0585558698835257,-0.07482366205689459,-0.08897261266714337,-0.10110924525984047,-0.11133810000965809,-0.119761733720361,-0.12648071982483203,-0.1315936483850237,-0.13519712609199247,-0.13738577626590426,-0.1382522388560119,-0.13788717044065493,-0.13637924422729383,-0.13381515005247593,-0.13027959438184666,-0.1258553003101497,-0.12062300756120407,-0.11466147248795551,-0.10804746807244214,-0.10085578392577758,-0.09315922628821909,-0.08502861802905386,-0.0765327986467014,-0.06773862426869641,-0.05871096765167181,-0.04951271818131318,-0.04020478187242702,-0.030846081368940757,-0.021493555943828825,-0.012202161499220664,-0.0030248705662870635,0.005987327694657552,0.014785427494228998,0.023322406413939234,0.03155322540619636,0.039434828794344415,0.046926144272549666,0.053988082905993905,0.06058353913065275,0.06667739075349459,0.0722364989522987,0.0772297082758655,0.08162784664377228,0.0854037253466231,0.0885321390457932,0.09098986577366759,0.09275566693348196,0.0938102872993909,0.09413645501645647,0.09371888160064827,0.09254426193878089,0.09060127428864462,0.08788058027890884,0.08437482490911634,0.08007863654980837,0.07498862694224044,0.06910339119879154,0.062423507802634504,0.05495153860777009,0.04669202883931121,0.037651507093005424,0.027838485335746555,0.01726345890522225,0.005938906509970821,-0.006120709770470967,-0.018898944485715673,-0.03237736881442288,-0.04653557056435602,-0.061351154172564294,-0.07679974070485969,-0.0928549678563968,-0.10948848995135449,-0.12666997794297002,-0.14436711941368685,-0.16254561857489308,-0.1811691962672853,-0.2001995899604026,-0.21959655375303555,-0.23931785837306735,-0.25931929117742814,-0.2795546561521974,-0.2999757739125698,-0.32053248170270765,-0.34117263339597914,-0.3618420994948906,-0.38248476713084756,-0.40304254006464363,-0.4234553386859261,-0.4436611000135485,-0.46359577769538873,-0.483193342008542,-0.5023857798591053,-0.5211030947822792,-0.5392733069424139,-0.5568224531329065,-0.5736745867762579,-0.589751777924107,-0.6049741132570944,-0.6192596960850778,-0.632524646347008,-0.6446831006107573,-0.6556472120734724,-0.6653271505614157,-0.6736311025297482,-0.6804652710629284,-0.6857338758744731,-0.689339153306878,-0.6911813563318902,-0.6911587545502471,-0.6891676341918014,-0.6851022981155552,-0.6788550658095346,-0.6703162733909721,-0.6593742736060904,-0.6459154358302271,-0.6298241460678924,-0.6109828069525293,-0.5892718377469237,-0.5645696743426925,-0.5367527692607041,-0.5056955916510105,-0.47127062729252883,-0.4333483785933822,-0.39179736459091147,-0.3464841209514134,-0.2972731999702091,-0.24402717057191695,-0.18660661831017933,-0.12487014536754941,-0.05867437055599112,0.01212607068350735,0.08767852628116088,0.1681323275376599,0.2536387891250115,0.34435120908576666,0.4404248688334974,0.5420170331528652,0.6492869501990981,0.7623958514986725,0.8815069519487224,1.0067854498173574,1.13839852674364,1.2765153477374043,1.421307061179641,1.572946798821863,1.7316096757870127,1.897472790568304,2.070715225030267,2.251518044408317,2.4400642973086635,2.636539015708513,2.841129214955754,3.0540238937694992,3.27541403423952,3.5054926018266315,3.7444545453624185,3.9924967970495344,4.249818272461312,4.516619870542331,4.793104473607627,5.079476947343528,5.37594414080711,5.682714886426311,6.0],"type":"scatter"},{"line":{"dash":"dash"},"mode":"lines","name":"df","x":[1.0,1.0301507537688441,1.0603015075376885,1.0904522613065326,1.120603015075377,1.150753768844221,1.1809045226130652,1.2110552763819096,1.2412060301507537,1.271356783919598,1.3015075376884422,1.3316582914572863,1.3618090452261307,1.3919597989949748,1.4221105527638191,1.4522613065326633,1.4824120603015074,1.5125628140703518,1.542713567839196,1.5728643216080402,1.6030150753768844,1.6331658291457285,1.6633165829145728,1.6934673366834172,1.7236180904522613,1.7537688442211055,1.7839195979899496,1.814070351758794,1.8442211055276383,1.8743718592964824,1.9045226130653266,1.9346733668341707,1.964824120603015,1.9949748743718594,2.0251256281407035,2.0552763819095476,2.085427135678392,2.115577889447236,2.1457286432160805,2.1758793969849246,2.2060301507537687,2.2361809045226133,2.266331658291457,2.2964824120603016,2.3266331658291457,2.35678391959799,2.3869346733668344,2.417085427135678,2.4472361809045227,2.477386934673367,2.507537688442211,2.5376884422110555,2.567839195979899,2.5979899497487438,2.628140703517588,2.658291457286432,2.6884422110552766,2.7185929648241203,2.748743718592965,2.778894472361809,2.809045226130653,2.8391959798994977,2.8693467336683414,2.899497487437186,2.92964824120603,2.959798994974874,2.9899497487437188,3.020100502512563,3.050251256281407,3.080402010050251,3.1105527638190953,3.1407035175879394,3.170854271356784,3.201005025125628,3.2311557788944723,3.2613065326633164,3.2914572864321605,3.321608040201005,3.351758793969849,3.3819095477386933,3.4120603015075375,3.4422110552763816,3.472361809045226,3.5025125628140703,3.5326633165829144,3.5628140703517586,3.5929648241206027,3.6231155778894473,3.6532663316582914,3.6834170854271355,3.7135678391959797,3.743718592964824,3.7738693467336684,3.8040201005025125,3.8341708542713566,3.8643216080402008,3.8944723618090453,3.9246231155778895,3.9547738693467336,3.9849246231155777,4.015075376884422,4.045226130653266,4.075376884422111,4.105527638190955,4.135678391959798,4.165829145728643,4.1959798994974875,4.226130653266331,4.256281407035176,4.28643216080402,4.316582914572864,4.346733668341709,4.376884422110553,4.407035175879397,4.4371859296482405,4.467336683417085,4.49748743718593,4.527638190954773,4.557788944723618,4.5879396984924625,4.618090452261306,4.648241206030151,4.678391959798995,4.708542713567839,4.738693467336683,4.768844221105527,4.798994974874372,4.829145728643216,4.85929648241206,4.889447236180905,4.919597989949748,4.949748743718593,4.9798994974874375,5.010050251256281,5.040201005025126,5.0703517587939695,5.100502512562814,5.130653266331658,5.160804020100502,5.190954773869347,5.221105527638191,5.251256281407035,5.281407035175879,5.311557788944723,5.341708542713568,5.371859296482412,5.402010050251256,5.4321608040201,5.4623115577889445,5.492462311557789,5.522613065326633,5.552763819095477,5.582914572864321,5.613065326633166,5.64321608040201,5.673366834170854,5.703517587939698,5.733668341708542,5.763819095477387,5.793969849246231,5.824120603015075,5.8542713567839195,5.884422110552763,5.914572864321608,5.944723618090452,5.974874371859296,6.005025125628141,6.035175879396984,6.065326633165829,6.0954773869346734,6.125628140703517,6.155778894472362,6.185929648241205,6.21608040201005,6.2462311557788945,6.276381909547738,6.306532663316583,6.3366834170854265,6.366834170854271,6.396984924623116,6.427135678391959,6.457286432160804,6.487437185929648,6.517587939698492,6.547738693467337,6.57788944723618,6.608040201005025,6.638190954773869,6.668341708542713,6.698492462311558,6.7286432160804015,6.758793969849246,6.788944723618091,6.819095477386934,6.849246231155779,6.879396984924623,6.909547738693467,6.939698492462312,6.969849246231155,7.0],"y":[-6.1,-5.855752779706215,-5.617439626099488,-5.384994757378214,-5.158352391740783,-4.937446747385573,-4.72221204251098,-4.512582495315394,-4.3084923239972,-4.109875746754784,-3.9166669817865367,-3.728800247290849,-3.546209761466102,-3.3688297425106897,-3.1965944086229996,-3.0294379780014196,-2.867294668844335,-2.710098699350141,-2.5577842877172143,-2.410285652143955,-2.2675370108287467,-2.129472581969978,-1.9960265837660303,-1.8671332344153029,-1.7427267521161809,-1.6227413550670406,-1.5071112614662923,-1.3957706895123068,-1.288653857403483,-1.1856949833381947,-1.0868282855148437,-0.9919879821318176,-0.9011082913874986,-0.8141234314802744,-0.7309676206085385,-0.6515750769706727,-0.5758800187650707,-0.5038166641901227,-0.4353192314442083,-0.37032193872572067,-0.30875900423305325,-0.2505646461645881,-0.1956730827187158,-0.14401853209381557,-0.09553521248829214,-0.050157342100516186,-0.007819139128892516,0.03154517822820253,0.0680013917723727,0.10161528330524447,0.1324526346284074,0.1605792275434908,0.18606084385210409,0.20896326535584536,0.22935227385634108,0.247293651155195,0.26285317905403077,0.2760966393544493,0.2870898138580628,0.295898484366478,0.30258843268132407,0.30722544060420204,0.30987528993671276,0.3106037624804969,0.3094766400371327,0.3065597044082608,0.3019187373954651,0.2956195208003862,0.28772783642461375,0.278309466069777,0.2674301915374713,0.2551557946293144,0.24155205714693012,0.2266847608919079,0.2106196876658714,0.19342261927042728,0.17515933750721616,0.15589562417781053,0.13569726108383975,0.11463003002689902,0.09275971280864041,0.07015209123063641,0.04687294709450498,0.022988062201864067,-0.0014367816456569925,-0.026335802646468665,-0.05164321899897004,-0.07729324890150338,-0.10322011055251892,-0.12935802215034756,-0.15564120189339975,-0.18200386798008594,-0.20838023860876548,-0.23470453197783173,-0.26091096628567245,-0.28693375973069807,-0.3127071305112679,-0.33816529682578106,-0.36324247687263095,-0.387872888850211,-0.4119907509568804,-0.4355302813910555,-0.4584256983510954,-0.48061122003543344,-0.5020210646424061,-0.5225894503704523,-0.5422505954179314,-0.560938717983231,-0.5785880362647674,-0.5951327684608827,-0.6105071327699989,-0.6246453473904694,-0.6374816305207218,-0.648950200359127,-0.6589852751040894,-0.6675210729539799,-0.6744918121071806,-0.6798317107620733,-0.6834749871170857,-0.6853558593705884,-0.6854085457209521,-0.6835672643665817,-0.6797662335058476,-0.6739396713371661,-0.666021796058908,-0.6559468258694551,-0.643648978967201,-0.6290624735505503,-0.6121215278178852,-0.5927603599675649,-0.5709131881979829,-0.546514230707578,-0.5194977056946982,-0.48979783135773686,-0.4573488258950988,-0.42208490750512057,-0.38394029438626376,-0.3428492047368536,-0.29874585675530624,-0.2515644686400151,-0.20123925858937355,-0.1477044448017068,-0.09089424547549925,-0.030742878809076047,0.032815436999146644,0.09984648375078678,0.17041604324746232,0.24458989729080258,0.3224338276823687,0.4040136162238241,0.48939504471677536,0.5786438949628178,0.6718259487635351,0.7690069879205907,0.8702527942355687,0.9756291495100868,1.0852018355457402,1.1990366341441927,1.3171993271069824,1.439755696235784,1.5667715233321815,1.6983125901977814,1.834444678634202,1.9752335704430606,2.1207450474259644,2.2710448913845083,2.4261988841203674,2.58627280743508,2.7513324431302637,2.9214435730075934,3.0966719788686077,3.2770834425149475,3.462743745748264,3.6537186703701194,3.8500739981821313,4.051875510985894,4.259188990583084,4.472080218775238,4.690614977364044,4.914859048151049,5.144878212937897,5.380738253526147,5.622504951717474,5.870244089313519,6.124021448115786,6.383902809925985,6.649953956545687,6.922240669776488,7.200828731420051,7.485783923277927,7.777172027151755,8.075058824843131,8.37951009815372,8.690591628885068,9.00836919883884,9.332908589816588,9.664275583619997,10.002535962050592,10.347755506910039,10.7],"type":"scatter"},{"marker":{"size":12},"mode":"markers","name":"df = zero","x":[2.3927,3.5309,5.3263],"y":[0.0,0.0,0.0],"type":"scatter"}],                        {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"},"margin":{"b":0,"l":0,"r":0,"t":30}}},"font":{"size":20},"yaxis":{"range":[-1,3]},"autosize":false,"width":800,"height":600},                        {"responsive": true}                    ).then(function(){
+<div>                            <div id="ec7c80ba-c510-4716-bfd5-0ba8deee0118" class="plotly-graph-div" style="height:600px; width:800px;"></div>            <script type="text/javascript">                require(["plotly"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("ec7c80ba-c510-4716-bfd5-0ba8deee0118")) {                    Plotly.newPlot(                        "ec7c80ba-c510-4716-bfd5-0ba8deee0118",                        [{"mode":"lines","name":"f","x":[1.0,1.0301507537688441,1.0603015075376885,1.0904522613065326,1.120603015075377,1.150753768844221,1.1809045226130652,1.2110552763819096,1.2412060301507537,1.271356783919598,1.3015075376884422,1.3316582914572863,1.3618090452261307,1.3919597989949748,1.4221105527638191,1.4522613065326633,1.4824120603015074,1.5125628140703518,1.542713567839196,1.5728643216080402,1.6030150753768844,1.6331658291457285,1.6633165829145728,1.6934673366834172,1.7236180904522613,1.7537688442211055,1.7839195979899496,1.814070351758794,1.8442211055276383,1.8743718592964824,1.9045226130653266,1.9346733668341707,1.964824120603015,1.9949748743718594,2.0251256281407035,2.0552763819095476,2.085427135678392,2.115577889447236,2.1457286432160805,2.1758793969849246,2.2060301507537687,2.2361809045226133,2.266331658291457,2.2964824120603016,2.3266331658291457,2.35678391959799,2.3869346733668344,2.417085427135678,2.4472361809045227,2.477386934673367,2.507537688442211,2.5376884422110555,2.567839195979899,2.5979899497487438,2.628140703517588,2.658291457286432,2.6884422110552766,2.7185929648241203,2.748743718592965,2.778894472361809,2.809045226130653,2.8391959798994977,2.8693467336683414,2.899497487437186,2.92964824120603,2.959798994974874,2.9899497487437188,3.020100502512563,3.050251256281407,3.080402010050251,3.1105527638190953,3.1407035175879394,3.170854271356784,3.201005025125628,3.2311557788944723,3.2613065326633164,3.2914572864321605,3.321608040201005,3.351758793969849,3.3819095477386933,3.4120603015075375,3.4422110552763816,3.472361809045226,3.5025125628140703,3.5326633165829144,3.5628140703517586,3.5929648241206027,3.6231155778894473,3.6532663316582914,3.6834170854271355,3.7135678391959797,3.743718592964824,3.7738693467336684,3.8040201005025125,3.8341708542713566,3.8643216080402008,3.8944723618090453,3.9246231155778895,3.9547738693467336,3.9849246231155777,4.015075376884422,4.045226130653266,4.075376884422111,4.105527638190955,4.135678391959798,4.165829145728643,4.1959798994974875,4.226130653266331,4.256281407035176,4.28643216080402,4.316582914572864,4.346733668341709,4.376884422110553,4.407035175879397,4.4371859296482405,4.467336683417085,4.49748743718593,4.527638190954773,4.557788944723618,4.5879396984924625,4.618090452261306,4.648241206030151,4.678391959798995,4.708542713567839,4.738693467336683,4.768844221105527,4.798994974874372,4.829145728643216,4.85929648241206,4.889447236180905,4.919597989949748,4.949748743718593,4.9798994974874375,5.010050251256281,5.040201005025126,5.0703517587939695,5.100502512562814,5.130653266331658,5.160804020100502,5.190954773869347,5.221105527638191,5.251256281407035,5.281407035175879,5.311557788944723,5.341708542713568,5.371859296482412,5.402010050251256,5.4321608040201,5.4623115577889445,5.492462311557789,5.522613065326633,5.552763819095477,5.582914572864321,5.613065326633166,5.64321608040201,5.673366834170854,5.703517587939698,5.733668341708542,5.763819095477387,5.793969849246231,5.824120603015075,5.8542713567839195,5.884422110552763,5.914572864321608,5.944723618090452,5.974874371859296,6.005025125628141,6.035175879396984,6.065326633165829,6.0954773869346734,6.125628140703517,6.155778894472362,6.185929648241205,6.21608040201005,6.2462311557788945,6.276381909547738,6.306532663316583,6.3366834170854265,6.366834170854271,6.396984924623116,6.427135678391959,6.457286432160804,6.487437185929648,6.517587939698492,6.547738693467337,6.57788944723618,6.608040201005025,6.638190954773869,6.668341708542713,6.698492462311558,6.7286432160804015,6.758793969849246,6.788944723618091,6.819095477386934,6.849246231155779,6.879396984924623,6.909547738693467,6.939698492462312,6.969849246231155,7.0],"y":[3.0,2.8197775132646994,2.6468296407545298,2.480978457571409,2.3220480221881674,2.169864376448527,2.0242555455671196,1.8850515381294826,1.7520843460920474,1.6251879447821538,1.5041982928980473,1.3889533325088705,1.2792929890546703,1.175059171346399,1.076095771565909,0.9822486652659563,0.8933657113701969,0.809296752173205,0.7298936133404282,0.6550101039082478,0.5845020162839318,0.5182271262456482,0.45604519294247436,0.39781795889439875,0.34340914999228855,0.2926844754979413,0.24551162804404497,0.20176028363418083,0.1613021016428462,0.1240107248154402,0.08976177926825812,0.05843287448851129,0.029903603334304307,0.0040555420346265695,-0.019227749810596606,-0.04006072923056081,-0.0585558698835257,-0.07482366205689459,-0.08897261266714337,-0.10110924525984047,-0.11133810000965809,-0.119761733720361,-0.12648071982483203,-0.1315936483850237,-0.13519712609199247,-0.13738577626590426,-0.1382522388560119,-0.13788717044065493,-0.13637924422729383,-0.13381515005247593,-0.13027959438184666,-0.1258553003101497,-0.12062300756120407,-0.11466147248795551,-0.10804746807244214,-0.10085578392577758,-0.09315922628821909,-0.08502861802905386,-0.0765327986467014,-0.06773862426869641,-0.05871096765167181,-0.04951271818131318,-0.04020478187242702,-0.030846081368940757,-0.021493555943828825,-0.012202161499220664,-0.0030248705662870635,0.005987327694657552,0.014785427494228998,0.023322406413939234,0.03155322540619636,0.039434828794344415,0.046926144272549666,0.053988082905993905,0.06058353913065275,0.06667739075349459,0.0722364989522987,0.0772297082758655,0.08162784664377228,0.0854037253466231,0.0885321390457932,0.09098986577366759,0.09275566693348196,0.0938102872993909,0.09413645501645647,0.09371888160064827,0.09254426193878089,0.09060127428864462,0.08788058027890884,0.08437482490911634,0.08007863654980837,0.07498862694224044,0.06910339119879154,0.062423507802634504,0.05495153860777009,0.04669202883931121,0.037651507093005424,0.027838485335746555,0.01726345890522225,0.005938906509970821,-0.006120709770470967,-0.018898944485715673,-0.03237736881442288,-0.04653557056435602,-0.061351154172564294,-0.07679974070485969,-0.0928549678563968,-0.10948848995135449,-0.12666997794297002,-0.14436711941368685,-0.16254561857489308,-0.1811691962672853,-0.2001995899604026,-0.21959655375303555,-0.23931785837306735,-0.25931929117742814,-0.2795546561521974,-0.2999757739125698,-0.32053248170270765,-0.34117263339597914,-0.3618420994948906,-0.38248476713084756,-0.40304254006464363,-0.4234553386859261,-0.4436611000135485,-0.46359577769538873,-0.483193342008542,-0.5023857798591053,-0.5211030947822792,-0.5392733069424139,-0.5568224531329065,-0.5736745867762579,-0.589751777924107,-0.6049741132570944,-0.6192596960850778,-0.632524646347008,-0.6446831006107573,-0.6556472120734724,-0.6653271505614157,-0.6736311025297482,-0.6804652710629284,-0.6857338758744731,-0.689339153306878,-0.6911813563318902,-0.6911587545502471,-0.6891676341918014,-0.6851022981155552,-0.6788550658095346,-0.6703162733909721,-0.6593742736060904,-0.6459154358302271,-0.6298241460678924,-0.6109828069525293,-0.5892718377469237,-0.5645696743426925,-0.5367527692607041,-0.5056955916510105,-0.47127062729252883,-0.4333483785933822,-0.39179736459091147,-0.3464841209514134,-0.2972731999702091,-0.24402717057191695,-0.18660661831017933,-0.12487014536754941,-0.05867437055599112,0.01212607068350735,0.08767852628116088,0.1681323275376599,0.2536387891250115,0.34435120908576666,0.4404248688334974,0.5420170331528652,0.6492869501990981,0.7623958514986725,0.8815069519487224,1.0067854498173574,1.13839852674364,1.2765153477374043,1.421307061179641,1.572946798821863,1.7316096757870127,1.897472790568304,2.070715225030267,2.251518044408317,2.4400642973086635,2.636539015708513,2.841129214955754,3.0540238937694992,3.27541403423952,3.5054926018266315,3.7444545453624185,3.9924967970495344,4.249818272461312,4.516619870542331,4.793104473607627,5.079476947343528,5.37594414080711,5.682714886426311,6.0],"type":"scatter"},{"line":{"dash":"dash"},"mode":"lines","name":"df","x":[1.0,1.0301507537688441,1.0603015075376885,1.0904522613065326,1.120603015075377,1.150753768844221,1.1809045226130652,1.2110552763819096,1.2412060301507537,1.271356783919598,1.3015075376884422,1.3316582914572863,1.3618090452261307,1.3919597989949748,1.4221105527638191,1.4522613065326633,1.4824120603015074,1.5125628140703518,1.542713567839196,1.5728643216080402,1.6030150753768844,1.6331658291457285,1.6633165829145728,1.6934673366834172,1.7236180904522613,1.7537688442211055,1.7839195979899496,1.814070351758794,1.8442211055276383,1.8743718592964824,1.9045226130653266,1.9346733668341707,1.964824120603015,1.9949748743718594,2.0251256281407035,2.0552763819095476,2.085427135678392,2.115577889447236,2.1457286432160805,2.1758793969849246,2.2060301507537687,2.2361809045226133,2.266331658291457,2.2964824120603016,2.3266331658291457,2.35678391959799,2.3869346733668344,2.417085427135678,2.4472361809045227,2.477386934673367,2.507537688442211,2.5376884422110555,2.567839195979899,2.5979899497487438,2.628140703517588,2.658291457286432,2.6884422110552766,2.7185929648241203,2.748743718592965,2.778894472361809,2.809045226130653,2.8391959798994977,2.8693467336683414,2.899497487437186,2.92964824120603,2.959798994974874,2.9899497487437188,3.020100502512563,3.050251256281407,3.080402010050251,3.1105527638190953,3.1407035175879394,3.170854271356784,3.201005025125628,3.2311557788944723,3.2613065326633164,3.2914572864321605,3.321608040201005,3.351758793969849,3.3819095477386933,3.4120603015075375,3.4422110552763816,3.472361809045226,3.5025125628140703,3.5326633165829144,3.5628140703517586,3.5929648241206027,3.6231155778894473,3.6532663316582914,3.6834170854271355,3.7135678391959797,3.743718592964824,3.7738693467336684,3.8040201005025125,3.8341708542713566,3.8643216080402008,3.8944723618090453,3.9246231155778895,3.9547738693467336,3.9849246231155777,4.015075376884422,4.045226130653266,4.075376884422111,4.105527638190955,4.135678391959798,4.165829145728643,4.1959798994974875,4.226130653266331,4.256281407035176,4.28643216080402,4.316582914572864,4.346733668341709,4.376884422110553,4.407035175879397,4.4371859296482405,4.467336683417085,4.49748743718593,4.527638190954773,4.557788944723618,4.5879396984924625,4.618090452261306,4.648241206030151,4.678391959798995,4.708542713567839,4.738693467336683,4.768844221105527,4.798994974874372,4.829145728643216,4.85929648241206,4.889447236180905,4.919597989949748,4.949748743718593,4.9798994974874375,5.010050251256281,5.040201005025126,5.0703517587939695,5.100502512562814,5.130653266331658,5.160804020100502,5.190954773869347,5.221105527638191,5.251256281407035,5.281407035175879,5.311557788944723,5.341708542713568,5.371859296482412,5.402010050251256,5.4321608040201,5.4623115577889445,5.492462311557789,5.522613065326633,5.552763819095477,5.582914572864321,5.613065326633166,5.64321608040201,5.673366834170854,5.703517587939698,5.733668341708542,5.763819095477387,5.793969849246231,5.824120603015075,5.8542713567839195,5.884422110552763,5.914572864321608,5.944723618090452,5.974874371859296,6.005025125628141,6.035175879396984,6.065326633165829,6.0954773869346734,6.125628140703517,6.155778894472362,6.185929648241205,6.21608040201005,6.2462311557788945,6.276381909547738,6.306532663316583,6.3366834170854265,6.366834170854271,6.396984924623116,6.427135678391959,6.457286432160804,6.487437185929648,6.517587939698492,6.547738693467337,6.57788944723618,6.608040201005025,6.638190954773869,6.668341708542713,6.698492462311558,6.7286432160804015,6.758793969849246,6.788944723618091,6.819095477386934,6.849246231155779,6.879396984924623,6.909547738693467,6.939698492462312,6.969849246231155,7.0],"y":[-6.1,-5.855752779706215,-5.617439626099488,-5.384994757378214,-5.158352391740783,-4.937446747385573,-4.72221204251098,-4.512582495315394,-4.3084923239972,-4.109875746754784,-3.9166669817865367,-3.728800247290849,-3.546209761466102,-3.3688297425106897,-3.1965944086229996,-3.0294379780014196,-2.867294668844335,-2.710098699350141,-2.5577842877172143,-2.410285652143955,-2.2675370108287467,-2.129472581969978,-1.9960265837660303,-1.8671332344153029,-1.7427267521161809,-1.6227413550670406,-1.5071112614662923,-1.3957706895123068,-1.288653857403483,-1.1856949833381947,-1.0868282855148437,-0.9919879821318176,-0.9011082913874986,-0.8141234314802744,-0.7309676206085385,-0.6515750769706727,-0.5758800187650707,-0.5038166641901227,-0.4353192314442083,-0.37032193872572067,-0.30875900423305325,-0.2505646461645881,-0.1956730827187158,-0.14401853209381557,-0.09553521248829214,-0.050157342100516186,-0.007819139128892516,0.03154517822820253,0.0680013917723727,0.10161528330524447,0.1324526346284074,0.1605792275434908,0.18606084385210409,0.20896326535584536,0.22935227385634108,0.247293651155195,0.26285317905403077,0.2760966393544493,0.2870898138580628,0.295898484366478,0.30258843268132407,0.30722544060420204,0.30987528993671276,0.3106037624804969,0.3094766400371327,0.3065597044082608,0.3019187373954651,0.2956195208003862,0.28772783642461375,0.278309466069777,0.2674301915374713,0.2551557946293144,0.24155205714693012,0.2266847608919079,0.2106196876658714,0.19342261927042728,0.17515933750721616,0.15589562417781053,0.13569726108383975,0.11463003002689902,0.09275971280864041,0.07015209123063641,0.04687294709450498,0.022988062201864067,-0.0014367816456569925,-0.026335802646468665,-0.05164321899897004,-0.07729324890150338,-0.10322011055251892,-0.12935802215034756,-0.15564120189339975,-0.18200386798008594,-0.20838023860876548,-0.23470453197783173,-0.26091096628567245,-0.28693375973069807,-0.3127071305112679,-0.33816529682578106,-0.36324247687263095,-0.387872888850211,-0.4119907509568804,-0.4355302813910555,-0.4584256983510954,-0.48061122003543344,-0.5020210646424061,-0.5225894503704523,-0.5422505954179314,-0.560938717983231,-0.5785880362647674,-0.5951327684608827,-0.6105071327699989,-0.6246453473904694,-0.6374816305207218,-0.648950200359127,-0.6589852751040894,-0.6675210729539799,-0.6744918121071806,-0.6798317107620733,-0.6834749871170857,-0.6853558593705884,-0.6854085457209521,-0.6835672643665817,-0.6797662335058476,-0.6739396713371661,-0.666021796058908,-0.6559468258694551,-0.643648978967201,-0.6290624735505503,-0.6121215278178852,-0.5927603599675649,-0.5709131881979829,-0.546514230707578,-0.5194977056946982,-0.48979783135773686,-0.4573488258950988,-0.42208490750512057,-0.38394029438626376,-0.3428492047368536,-0.29874585675530624,-0.2515644686400151,-0.20123925858937355,-0.1477044448017068,-0.09089424547549925,-0.030742878809076047,0.032815436999146644,0.09984648375078678,0.17041604324746232,0.24458989729080258,0.3224338276823687,0.4040136162238241,0.48939504471677536,0.5786438949628178,0.6718259487635351,0.7690069879205907,0.8702527942355687,0.9756291495100868,1.0852018355457402,1.1990366341441927,1.3171993271069824,1.439755696235784,1.5667715233321815,1.6983125901977814,1.834444678634202,1.9752335704430606,2.1207450474259644,2.2710448913845083,2.4261988841203674,2.58627280743508,2.7513324431302637,2.9214435730075934,3.0966719788686077,3.2770834425149475,3.462743745748264,3.6537186703701194,3.8500739981821313,4.051875510985894,4.259188990583084,4.472080218775238,4.690614977364044,4.914859048151049,5.144878212937897,5.380738253526147,5.622504951717474,5.870244089313519,6.124021448115786,6.383902809925985,6.649953956545687,6.922240669776488,7.200828731420051,7.485783923277927,7.777172027151755,8.075058824843131,8.37951009815372,8.690591628885068,9.00836919883884,9.332908589816588,9.664275583619997,10.002535962050592,10.347755506910039,10.7],"type":"scatter"},{"marker":{"size":12},"mode":"markers","name":"df = zero","x":[2.3927,3.5309,5.3263],"y":[0.0,0.0,0.0],"type":"scatter"}],                        {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"},"margin":{"b":0,"l":0,"r":0,"t":30}}},"font":{"size":20},"yaxis":{"range":[-1,3]},"autosize":false,"width":800,"height":600},                        {"responsive": true}                    ).then(function(){
                             
-var gd = document.getElementById('79fed66a-228f-4c8c-8b93-d5dcbd7dc8f2');
+var gd = document.getElementById('ec7c80ba-c510-4716-bfd5-0ba8deee0118');
 var x = new MutationObserver(function (mutations, observer) {{
         var display = window.getComputedStyle(gd).display;
         if (!display || display === 'none') {{
@@ -1306,8 +1449,8 @@ <h4 data-number="13.2.1.3" class="anchored" data-anchor-id="digging-into-gradien
 <li>A positive slope means we want to step to the left, or move in the <em>negative</em> direction.</li>
 </ul>
 </section>
-<section id="algorithm-attempt-1" class="level4" data-number="13.2.1.4">
-<h4 data-number="13.2.1.4" class="anchored" data-anchor-id="algorithm-attempt-1"><span class="header-section-number">13.2.1.4</span> Algorithm Attempt 1</h4>
+<section id="algorithm-attempt-1" class="level4" data-number="13.3.1.4">
+<h4 data-number="13.3.1.4" class="anchored" data-anchor-id="algorithm-attempt-1"><span class="header-section-number">13.3.1.4</span> Algorithm Attempt 1</h4>
 <p>Armed with this knowledge, let’s try to see if we can use the derivative to optimize the function.</p>
 <p>We start by making some guess for the minimizing value of <span class="math inline">\(x\)</span>. Then, we look at the derivative of the function at this value of <span class="math inline">\(x\)</span>, and step downhill in the <em>opposite</em> direction. We can express our new rule as a recurrence relation:</p>
 <p><span class="math display">\[x^{(t+1)} = x^{(t)} - \frac{d}{dx} f(x^{(t)})\]</span></p>
@@ -1334,8 +1477,8 @@ <h4 data-number="13.2.1.4" class="anchored" data-anchor-id="algorithm-attempt-1"
 </div>
 <p>In other words, each step we take when updating our guess moves us too far. We can address this by decreasing the size of each step.</p>
 </section>
-<section id="algorithm-attempt-2" class="level4" data-number="13.2.1.5">
-<h4 data-number="13.2.1.5" class="anchored" data-anchor-id="algorithm-attempt-2"><span class="header-section-number">13.2.1.5</span> Algorithm Attempt 2</h4>
+<section id="algorithm-attempt-2" class="level4" data-number="13.3.1.5">
+<h4 data-number="13.3.1.5" class="anchored" data-anchor-id="algorithm-attempt-2"><span class="header-section-number">13.3.1.5</span> Algorithm Attempt 2</h4>
 <p>Let’s update our algorithm to use a <strong>learning rate</strong> (also sometimes called the step size), which controls how far we move with each update. We represent the learning rate with <span class="math inline">\(\alpha\)</span>.</p>
 <p><span class="math display">\[x^{(t+1)} = x^{(t)} - \alpha \frac{d}{dx} f(x^{(t)})\]</span></p>
 <p>A small <span class="math inline">\(\alpha\)</span> means that we will take small steps; a large <span class="math inline">\(\alpha\)</span> means we will take large steps. When do we stop updating? We stop updating either after a fixed number of updates or after a subsequent update doesn’t change much.</p>
@@ -1351,8 +1494,8 @@ <h4 data-number="13.2.1.5" class="anchored" data-anchor-id="algorithm-attempt-2"
 </div>
 </section>
 </section>
-<section id="convexity" class="level3" data-number="13.2.2">
-<h3 data-number="13.2.2" class="anchored" data-anchor-id="convexity"><span class="header-section-number">13.2.2</span> Convexity</h3>
+<section id="convexity" class="level3" data-number="13.3.2">
+<h3 data-number="13.3.2" class="anchored" data-anchor-id="convexity"><span class="header-section-number">13.3.2</span> Convexity</h3>
 <p>In our analysis above, we focused our attention on the global minimum of the loss function. You may be wondering: what about the local minimum that’s just to the left?</p>
 <p>If we had chosen a different starting guess for <span class="math inline">\(\theta\)</span>, or a different value for the learning rate <span class="math inline">\(\alpha\)</span>, our algorithm may have gotten “stuck” and converged on the local minimum, rather than on the true optimum value of loss.</p>
 <div data-align="middle">
@@ -1377,8 +1520,8 @@ <h3 data-number="13.2.2" class="anchored" data-anchor-id="convexity"><span class
 </div>
 <p>In summary, non-convex loss functions can cause problems with optimization. This means that our choice of loss function is a key factor in our modeling process. It turns out that MSE <em>is</em> convex, which is a major reason why it is such a popular choice of loss function. Gradient descent is only guaranteed to converge (given enough iterations and an appropriate step size) for convex functions.</p>
 </section>
-<section id="gradient-descent-in-1-dimension" class="level3" data-number="13.2.3">
-<h3 data-number="13.2.3" class="anchored" data-anchor-id="gradient-descent-in-1-dimension"><span class="header-section-number">13.2.3</span> Gradient Descent in 1 Dimension</h3>
+<section id="gradient-descent-in-1-dimension" class="level3" data-number="13.3.3">
+<h3 data-number="13.3.3" class="anchored" data-anchor-id="gradient-descent-in-1-dimension"><span class="header-section-number">13.3.3</span> Gradient Descent in 1 Dimension</h3>
 <blockquote class="blockquote">
 <p><strong>Terminology clarification</strong>: In past lectures, we have used “loss” to refer to the error incurred on a <em>single</em> datapoint. In applications, we usually care more about the average error across <em>all</em> datapoints. Going forward, we will take the “model’s loss” to mean the model’s average error across the dataset. This is sometimes also known as the empirical risk (R), cost function, or objective function. <span class="math display">\[L(\theta) = R(\theta) = \frac{1}{n} \sum_{i=1}^{n} L(y, \hat{y})\]</span></p>
 </blockquote>
@@ -1397,15 +1540,15 @@ <h3 data-number="13.2.3" class="anchored" data-anchor-id="gradient-descent-in-1-
 </ol>
 <p>We can “translate” our gradient descent rule from before by replacing <span class="math inline">\(x\)</span> with <span class="math inline">\(\theta\)</span> and <span class="math inline">\(f\)</span> with <span class="math inline">\(L\)</span>:</p>
 <p><span class="math display">\[\theta^{(t+1)} = \theta^{(t)} - \alpha \frac{d}{d\theta} L(\theta^{(t)})\]</span></p>
-<section id="gradient-descent-on-the-tips-dataset" class="level4" data-number="13.2.3.1">
-<h4 data-number="13.2.3.1" class="anchored" data-anchor-id="gradient-descent-on-the-tips-dataset"><span class="header-section-number">13.2.3.1</span> Gradient Descent on the <code>tips</code> Dataset</h4>
+<section id="gradient-descent-on-the-tips-dataset" class="level4" data-number="13.3.3.1">
+<h4 data-number="13.3.3.1" class="anchored" data-anchor-id="gradient-descent-on-the-tips-dataset"><span class="header-section-number">13.3.3.1</span> Gradient Descent on the <code>tips</code> Dataset</h4>
 <p>To see this in action, let’s consider a case where we have a linear model with no offset. We want to predict the tip (y) given the price of a meal (x). To do this, we</p>
 <ul>
 <li>Choose a model: <span class="math inline">\(\hat{y} = \theta_1 x\)</span>,</li>
 <li>Choose a loss function: <span class="math inline">\(L(\theta) = MSE(\theta) = \frac{1}{n} \sum_{i=1}^n (y_i - \theta_1x_i)^2\)</span>.</li>
 </ul>
 <p>Let’s apply our <code>gradient_descent</code> function from before to optimize our model on the <code>tips</code> dataset. We will try to select the best parameter <span class="math inline">\(\theta_i\)</span> to predict the <code>tip</code> <span class="math inline">\(y\)</span> from the <code>total_bill</code> <span class="math inline">\(x\)</span>.</p>
-<div id="554efe55" class="cell" data-execution_count="19">
+<div id="b286d7f2" class="cell" data-execution_count="19">
 <div class="sourceCode cell-code" id="cb32"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb32-1"><a href="#cb32-1" aria-hidden="true" tabindex="-1"></a>df <span class="op">=</span> sns.load_dataset(<span class="st">"tips"</span>)</span>
 <span id="cb32-2"><a href="#cb32-2" aria-hidden="true" tabindex="-1"></a>df.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="19">
@@ -1491,7 +1634,7 @@ <h4 data-number="13.2.3.1" class="anchored" data-anchor-id="gradient-descent-on-
 </ul>
 <p>for some learning rate <span class="math inline">\(\alpha\)</span>.</p>
 <p>Implementing this in code, we can visualize the MSE loss on the <code>tips</code> data. <strong>MSE is convex</strong>, so there is one global minimum.</p>
-<div id="bb2707a6" class="cell" data-execution_count="20">
+<div id="af6e2d80" class="cell" data-execution_count="20">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb33"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb33-1"><a href="#cb33-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> gradient_descent(df, initial_guess, alpha, n):</span>
@@ -2050,529 +2193,620 @@ <h4 data-number="13.2.3.1" class="anchored" data-anchor-id="gradient-descent-on-
 <span id="cb35-48"><a href="#cb35-48" aria-hidden="true" tabindex="-1"></a>pd.options.mode.chained_assignment <span class="op">=</span> <span class="va">None</span>  <span class="co"># default='warn'</span></span>
 <span id="cb35-49"><a href="#cb35-49" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
 <span id="cb35-50"><a href="#cb35-50" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-51"><a href="#cb35-51" aria-hidden="true" tabindex="-1"></a><span class="fu">## `sklearn`</span></span>
-<span id="cb35-52"><a href="#cb35-52" aria-hidden="true" tabindex="-1"></a><span class="fu">### Implementing Derived Formulas in Code</span></span>
-<span id="cb35-53"><a href="#cb35-53" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-54"><a href="#cb35-54" aria-hidden="true" tabindex="-1"></a>Throughout this lecture, we'll refer to the <span class="in">`penguins`</span> dataset. </span>
+<span id="cb35-51"><a href="#cb35-51" aria-hidden="true" tabindex="-1"></a><span class="fu">## OLS Recap</span></span>
+<span id="cb35-52"><a href="#cb35-52" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-53"><a href="#cb35-53" aria-hidden="true" tabindex="-1"></a><span class="fu">### 1. Choose a model</span></span>
+<span id="cb35-54"><a href="#cb35-54" aria-hidden="true" tabindex="-1"></a>Recall that when using multiple linear regression, we can generate a prediction for each of our $n$ data points:</span>
 <span id="cb35-55"><a href="#cb35-55" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-58"><a href="#cb35-58" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb35-59"><a href="#cb35-59" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
-<span id="cb35-60"><a href="#cb35-60" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span>
-<span id="cb35-61"><a href="#cb35-61" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> seaborn <span class="im">as</span> sns</span>
-<span id="cb35-62"><a href="#cb35-62" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
-<span id="cb35-63"><a href="#cb35-63" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-64"><a href="#cb35-64" aria-hidden="true" tabindex="-1"></a>penguins <span class="op">=</span> sns.load_dataset(<span class="st">"penguins"</span>)</span>
-<span id="cb35-65"><a href="#cb35-65" aria-hidden="true" tabindex="-1"></a>penguins <span class="op">=</span> penguins[penguins[<span class="st">"species"</span>] <span class="op">==</span> <span class="st">"Adelie"</span>].dropna()</span>
-<span id="cb35-66"><a href="#cb35-66" aria-hidden="true" tabindex="-1"></a>penguins.head()</span>
-<span id="cb35-67"><a href="#cb35-67" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb35-68"><a href="#cb35-68" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-69"><a href="#cb35-69" aria-hidden="true" tabindex="-1"></a>Our goal will be to predict the value of the <span class="in">`"bill_depth_mm"`</span> for a particular penguin given its <span class="in">`"flipper_length_mm"`</span> and <span class="in">`"body_mass_g"`</span>. We'll also add a bias column of all ones to represent the intercept term of our models.</span>
-<span id="cb35-70"><a href="#cb35-70" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-73"><a href="#cb35-73" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb35-74"><a href="#cb35-74" aria-hidden="true" tabindex="-1"></a><span class="co"># Add a bias column of all ones to `penguins`</span></span>
-<span id="cb35-75"><a href="#cb35-75" aria-hidden="true" tabindex="-1"></a>penguins[<span class="st">"bias"</span>] <span class="op">=</span> np.ones(<span class="bu">len</span>(penguins), dtype<span class="op">=</span><span class="bu">int</span>) </span>
-<span id="cb35-76"><a href="#cb35-76" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-77"><a href="#cb35-77" aria-hidden="true" tabindex="-1"></a><span class="co"># Define the design matrix, X...</span></span>
-<span id="cb35-78"><a href="#cb35-78" aria-hidden="true" tabindex="-1"></a><span class="co"># Note that we use .to_numpy() to convert our DataFrame into a NumPy array so it is in Matrix form</span></span>
-<span id="cb35-79"><a href="#cb35-79" aria-hidden="true" tabindex="-1"></a>X <span class="op">=</span> penguins[[<span class="st">"bias"</span>, <span class="st">"flipper_length_mm"</span>, <span class="st">"body_mass_g"</span>]].to_numpy()</span>
+<span id="cb35-56"><a href="#cb35-56" aria-hidden="true" tabindex="-1"></a>$$\hat{y} =\theta_{0} + \theta_{1}x_{1} + \theta_{2}x_{2} + ... + \theta_{p}x_{p}$$</span>
+<span id="cb35-57"><a href="#cb35-57" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-58"><a href="#cb35-58" aria-hidden="true" tabindex="-1"></a>&lt;div align="middle"&gt;</span>
+<span id="cb35-59"><a href="#cb35-59" aria-hidden="true" tabindex="-1"></a>  &lt;table style="width:100%"&gt;</span>
+<span id="cb35-60"><a href="#cb35-60" aria-hidden="true" tabindex="-1"></a>    &lt;tr align="center"&gt;</span>
+<span id="cb35-61"><a href="#cb35-61" aria-hidden="true" tabindex="-1"></a>      &lt;td&gt;&lt;img src="images/ols_matrices_old.png" alt='ols_matrices_old' width='600'&gt;</span>
+<span id="cb35-62"><a href="#cb35-62" aria-hidden="true" tabindex="-1"></a>      &lt;/td&gt;</span>
+<span id="cb35-63"><a href="#cb35-63" aria-hidden="true" tabindex="-1"></a>    &lt;/tr&gt;</span>
+<span id="cb35-64"><a href="#cb35-64" aria-hidden="true" tabindex="-1"></a>  &lt;/table&gt;</span>
+<span id="cb35-65"><a href="#cb35-65" aria-hidden="true" tabindex="-1"></a>&lt;/div&gt;</span>
+<span id="cb35-66"><a href="#cb35-66" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-67"><a href="#cb35-67" aria-hidden="true" tabindex="-1"></a>In the previous lecture, we used p+1 features to account for the intercept, $\theta_0$.  This makes slides and notation messy.  </span>
+<span id="cb35-68"><a href="#cb35-68" aria-hidden="true" tabindex="-1"></a>Let’s redefine **p as the number of columns in our covariate matrix** and **add a column of 1s** to encode the intercept (if desired). If we choose to add a column of 1s, then $x_1$ can be a 1 for every data point.</span>
+<span id="cb35-69"><a href="#cb35-69" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-70"><a href="#cb35-70" aria-hidden="true" tabindex="-1"></a>$$\hat{y} =\theta_{1}x_{1} + \theta_{2}x_{2} + ... + \theta_{p}x_{p}$$</span>
+<span id="cb35-71"><a href="#cb35-71" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-72"><a href="#cb35-72" aria-hidden="true" tabindex="-1"></a>&lt;div align="middle"&gt;</span>
+<span id="cb35-73"><a href="#cb35-73" aria-hidden="true" tabindex="-1"></a>  &lt;table style="width:100%"&gt;</span>
+<span id="cb35-74"><a href="#cb35-74" aria-hidden="true" tabindex="-1"></a>    &lt;tr align="center"&gt;</span>
+<span id="cb35-75"><a href="#cb35-75" aria-hidden="true" tabindex="-1"></a>      &lt;td&gt;&lt;img src="images/ols_matrices_new.png" alt='ols_matrices_new' width='600'&gt;</span>
+<span id="cb35-76"><a href="#cb35-76" aria-hidden="true" tabindex="-1"></a>      &lt;/td&gt;</span>
+<span id="cb35-77"><a href="#cb35-77" aria-hidden="true" tabindex="-1"></a>    &lt;/tr&gt;</span>
+<span id="cb35-78"><a href="#cb35-78" aria-hidden="true" tabindex="-1"></a>  &lt;/table&gt;</span>
+<span id="cb35-79"><a href="#cb35-79" aria-hidden="true" tabindex="-1"></a>&lt;/div&gt;</span>
 <span id="cb35-80"><a href="#cb35-80" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-81"><a href="#cb35-81" aria-hidden="true" tabindex="-1"></a><span class="co"># ...as well as the target variable, Y</span></span>
-<span id="cb35-82"><a href="#cb35-82" aria-hidden="true" tabindex="-1"></a><span class="co"># Again, we use .to_numpy() to convert our DataFrame into a NumPy array so it is in Matrix form</span></span>
-<span id="cb35-83"><a href="#cb35-83" aria-hidden="true" tabindex="-1"></a>Y <span class="op">=</span> penguins[[<span class="st">"bill_depth_mm"</span>]].to_numpy()</span>
-<span id="cb35-84"><a href="#cb35-84" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb35-81"><a href="#cb35-81" aria-hidden="true" tabindex="-1"></a><span class="fu">### 2. Choose a loss function</span></span>
+<span id="cb35-82"><a href="#cb35-82" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-83"><a href="#cb35-83" aria-hidden="true" tabindex="-1"></a>Recall that we then choose the mean squared error loss function shown below where the prediction vector $\hat{\mathbb{Y}}$ depends on $\theta$.</span>
+<span id="cb35-84"><a href="#cb35-84" aria-hidden="true" tabindex="-1"></a>$$R(\theta) = \frac{1}{n} \sum_{i=1}^n (y_i - \hat{y}_i)^2 = \frac{1}{n} (||\mathbb{Y} - \hat{\mathbb{Y}}||_2)^2$$</span>
 <span id="cb35-85"><a href="#cb35-85" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-86"><a href="#cb35-86" aria-hidden="true" tabindex="-1"></a>In the lecture on ordinary least squares, we expressed multiple linear regression using matrix notation.</span>
+<span id="cb35-86"><a href="#cb35-86" aria-hidden="true" tabindex="-1"></a><span class="fu">### 3. Fit the model</span></span>
 <span id="cb35-87"><a href="#cb35-87" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-88"><a href="#cb35-88" aria-hidden="true" tabindex="-1"></a>$$\hat{\mathbb{Y}} = \mathbb{X}\theta$$</span>
+<span id="cb35-88"><a href="#cb35-88" aria-hidden="true" tabindex="-1"></a>We can then minimize the average loss with calculus or geometry. See the previous lecture for a derivation on the Normal Equation ($\mathbb{X}^T \mathbb{X} \hat{\theta} = \mathbb{X}^T \mathbb{Y}$) using geometry. We can see what the matrices look like with our new interpretation where $\mathbb{X}$ is now an $n$ by $p$ matrix instead of an $n$ by $p+1$ matrix.</span>
 <span id="cb35-89"><a href="#cb35-89" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-90"><a href="#cb35-90" aria-hidden="true" tabindex="-1"></a>We used a geometric approach to derive the following expression for the optimal model parameters:</span>
-<span id="cb35-91"><a href="#cb35-91" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-92"><a href="#cb35-92" aria-hidden="true" tabindex="-1"></a>$$\hat{\theta} = (\mathbb{X}^T \mathbb{X})^{-1}\mathbb{X}^T \mathbb{Y}$$</span>
-<span id="cb35-93"><a href="#cb35-93" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-94"><a href="#cb35-94" aria-hidden="true" tabindex="-1"></a>That's a whole lot of matrix manipulation. How do we implement it in <span class="in">`python`</span>?</span>
-<span id="cb35-95"><a href="#cb35-95" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-96"><a href="#cb35-96" aria-hidden="true" tabindex="-1"></a>There are three operations we need to perform here: multiplying matrices, taking transposes, and finding inverses. </span>
-<span id="cb35-97"><a href="#cb35-97" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-98"><a href="#cb35-98" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>To perform matrix multiplication, use the <span class="in">`@`</span> operator</span>
-<span id="cb35-99"><a href="#cb35-99" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>To take a transpose, call the <span class="in">`.T`</span> attribute of an <span class="in">`NumPy`</span> array or <span class="in">`DataFrame`</span></span>
-<span id="cb35-100"><a href="#cb35-100" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>To compute an inverse, use <span class="in">`NumPy`</span>'s in-built method <span class="in">`np.linalg.inv`</span></span>
-<span id="cb35-101"><a href="#cb35-101" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-102"><a href="#cb35-102" aria-hidden="true" tabindex="-1"></a>Putting this all together, we can compute the OLS estimate for the optimal model parameters, stored in the array <span class="in">`theta_hat`</span>.</span>
-<span id="cb35-103"><a href="#cb35-103" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-106"><a href="#cb35-106" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb35-107"><a href="#cb35-107" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb35-108"><a href="#cb35-108" aria-hidden="true" tabindex="-1"></a>theta_hat <span class="op">=</span> np.linalg.inv(X.T <span class="op">@</span> X) <span class="op">@</span> X.T <span class="op">@</span> Y</span>
-<span id="cb35-109"><a href="#cb35-109" aria-hidden="true" tabindex="-1"></a>theta_hat</span>
-<span id="cb35-110"><a href="#cb35-110" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb35-90"><a href="#cb35-90" aria-hidden="true" tabindex="-1"></a>&lt;div align="middle"&gt;</span>
+<span id="cb35-91"><a href="#cb35-91" aria-hidden="true" tabindex="-1"></a>  &lt;table style="width:100%"&gt;</span>
+<span id="cb35-92"><a href="#cb35-92" aria-hidden="true" tabindex="-1"></a>    &lt;tr align="center"&gt;</span>
+<span id="cb35-93"><a href="#cb35-93" aria-hidden="true" tabindex="-1"></a>      &lt;td&gt;&lt;img src="images/ols_solution_matrices.png" alt='ols_solution_matrices' width='400'&gt;</span>
+<span id="cb35-94"><a href="#cb35-94" aria-hidden="true" tabindex="-1"></a>      &lt;/td&gt;</span>
+<span id="cb35-95"><a href="#cb35-95" aria-hidden="true" tabindex="-1"></a>    &lt;/tr&gt;</span>
+<span id="cb35-96"><a href="#cb35-96" aria-hidden="true" tabindex="-1"></a>  &lt;/table&gt;</span>
+<span id="cb35-97"><a href="#cb35-97" aria-hidden="true" tabindex="-1"></a>&lt;/div&gt;</span>
+<span id="cb35-98"><a href="#cb35-98" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-99"><a href="#cb35-99" aria-hidden="true" tabindex="-1"></a>To summarize:</span>
+<span id="cb35-100"><a href="#cb35-100" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-101"><a href="#cb35-101" aria-hidden="true" tabindex="-1"></a>|   | Model | Estimate | Unique? |</span>
+<span id="cb35-102"><a href="#cb35-102" aria-hidden="true" tabindex="-1"></a>| -- | -- | -- |  -- | </span>
+<span id="cb35-103"><a href="#cb35-103" aria-hidden="true" tabindex="-1"></a>| Constant Model + MSE | $\hat{y} = \theta_0$| $\hat{\theta}_0 = mean(y) = \bar{y}$ | **Yes**. Any set of values has a unique mean.</span>
+<span id="cb35-104"><a href="#cb35-104" aria-hidden="true" tabindex="-1"></a>| Constant Model + MAE | $\hat{y} = \theta_0$  | $\hat{\theta}_0 = median(y)$ | **Yes**, if odd. **No**, if even. Return the average of the middle 2 values.</span>
+<span id="cb35-105"><a href="#cb35-105" aria-hidden="true" tabindex="-1"></a>| Simple Linear Regression + MSE | $\hat{y} = \theta_0 + \theta_1x$| $\hat{\theta}_0 = \bar{y} - \hat{\theta}_1\bar{x}$ $\hat{\theta}_1 = r\frac{\sigma_y}{\sigma_x}$| **Yes**. Any set of non-constant* values has a unique mean, SD, and correlation coefficient.</span>
+<span id="cb35-106"><a href="#cb35-106" aria-hidden="true" tabindex="-1"></a>| **OLS** (Linear Model + MSE) | $\mathbb{\hat{Y}} = \mathbb{X}\mathbb{\theta}$| $\hat{\theta} = (\mathbb{X}^T\mathbb{X})^{-1}\mathbb{X}^T\mathbb{Y}$  | **Yes**, if $\mathbb{X}$ is full column rank (all columns are linearly independent, # of datapoints &gt;&gt;&gt; # of features).</span>
+<span id="cb35-107"><a href="#cb35-107" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-108"><a href="#cb35-108" aria-hidden="true" tabindex="-1"></a><span class="fu">#### Uniqueness of a Solution</span></span>
+<span id="cb35-109"><a href="#cb35-109" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-110"><a href="#cb35-110" aria-hidden="true" tabindex="-1"></a>In most settings, the number of observations ($n$) is much greater than the number of features ($p$). Note that at least one solution always exists because intuitively, we can always draw a line of best fit for a given set of data, but there may be multiple lines that are “equally good”. (Formal proof is beyond this course.) Let's now revisit the interpretation for uniqueness of a solution at the end of the last lecture, but with the new notation of $p$ instead of $p+1$ features.</span>
 <span id="cb35-111"><a href="#cb35-111" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-112"><a href="#cb35-112" aria-hidden="true" tabindex="-1"></a>To make predictions using our optimized parameter values, we matrix-multiply the design matrix with the parameter vector:</span>
+<span id="cb35-112"><a href="#cb35-112" aria-hidden="true" tabindex="-1"></a>The Least Squares estimate $\hat{\theta}$ is **unique** if and only if $\mathbb{X}$ is **full column rank**.</span>
 <span id="cb35-113"><a href="#cb35-113" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-114"><a href="#cb35-114" aria-hidden="true" tabindex="-1"></a>$$\hat{\mathbb{Y}} = \mathbb{X}\theta$$</span>
-<span id="cb35-115"><a href="#cb35-115" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-118"><a href="#cb35-118" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb35-119"><a href="#cb35-119" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb35-120"><a href="#cb35-120" aria-hidden="true" tabindex="-1"></a>Y_hat <span class="op">=</span> X <span class="op">@</span> theta_hat</span>
-<span id="cb35-121"><a href="#cb35-121" aria-hidden="true" tabindex="-1"></a>pd.DataFrame(Y_hat).head()</span>
-<span id="cb35-122"><a href="#cb35-122" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb35-123"><a href="#cb35-123" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-124"><a href="#cb35-124" aria-hidden="true" tabindex="-1"></a><span class="fu">### The `sklearn` Workflow</span></span>
-<span id="cb35-125"><a href="#cb35-125" aria-hidden="true" tabindex="-1"></a>We've already saved a lot of time (and avoided tedious calculations) by translating our derived formulas into code. However, we still had to go through the process of writing out the linear algebra ourselves. </span>
-<span id="cb35-126"><a href="#cb35-126" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-127"><a href="#cb35-127" aria-hidden="true" tabindex="-1"></a>To make life *even easier*, we can turn to the <span class="in">`sklearn`</span> <span class="co">[</span><span class="ot">`python` library</span><span class="co">](https://scikit-learn.org/stable/)</span>. <span class="in">`sklearn`</span> is a robust library of machine learning tools used extensively in research and industry. It is the standard for simple machine learning tasks and gives us a wide variety of in-built modeling frameworks and methods, so we'll keep returning to <span class="in">`sklearn`</span> techniques as we progress through Data 100. </span>
-<span id="cb35-128"><a href="#cb35-128" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-129"><a href="#cb35-129" aria-hidden="true" tabindex="-1"></a>Regardless of the specific type of model being implemented, <span class="in">`sklearn`</span> follows a standard set of steps for creating a model: </span>
-<span id="cb35-130"><a href="#cb35-130" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-131"><a href="#cb35-131" aria-hidden="true" tabindex="-1"></a><span class="ss">1. </span>Import the <span class="in">`LinearRegression`</span> model from <span class="in">`sklearn`</span></span>
+<span id="cb35-114"><a href="#cb35-114" aria-hidden="true" tabindex="-1"></a>::: {.callout}</span>
+<span id="cb35-115"><a href="#cb35-115" aria-hidden="true" tabindex="-1"></a>Proof: </span>
+<span id="cb35-116"><a href="#cb35-116" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-117"><a href="#cb35-117" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>We know the solution to the normal equation $\mathbb{X}^T\mathbb{X}\hat{\theta} = \mathbb{X}^T\mathbb{Y}$ is the least square estimate that minimizes the squared loss.</span>
+<span id="cb35-118"><a href="#cb35-118" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>$\hat{\theta}$ has a **unique** solution $\iff$ the square matrix $\mathbb{X}^T\mathbb{X}$ is **invertible** $\iff$ $\mathbb{X}^T\mathbb{X}$ is full rank.</span>
+<span id="cb35-119"><a href="#cb35-119" aria-hidden="true" tabindex="-1"></a><span class="ss">  * </span>The **column rank** of a square matrix is the max number of linearly independent columns it contains.</span>
+<span id="cb35-120"><a href="#cb35-120" aria-hidden="true" tabindex="-1"></a><span class="ss">  * </span>An $n$ x $n$ square matrix is deemed full column rank when all of its columns are linearly independent. That is, its rank would be equal to $n$.</span>
+<span id="cb35-121"><a href="#cb35-121" aria-hidden="true" tabindex="-1"></a><span class="ss">  * </span>$\mathbb{X}^T\mathbb{X}$ has shape $p \times p$, and therefore has max rank $p$. </span>
+<span id="cb35-122"><a href="#cb35-122" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>$rank(\mathbb{X}^T\mathbb{X})$ = $rank(\mathbb{X})$ (proof out of scope).</span>
+<span id="cb35-123"><a href="#cb35-123" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Therefore, $\mathbb{X}^T\mathbb{X}$ has rank $p$ $\iff$  $\mathbb{X}$ has rank $p$ $\iff \mathbb{X}$ is full column rank.</span>
+<span id="cb35-124"><a href="#cb35-124" aria-hidden="true" tabindex="-1"></a>:::</span>
+<span id="cb35-125"><a href="#cb35-125" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-126"><a href="#cb35-126" aria-hidden="true" tabindex="-1"></a>Therefore, if $\mathbb{X}$ is not full column rank, we will not have unique estimates. This can happen for two major reasons.</span>
+<span id="cb35-127"><a href="#cb35-127" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-128"><a href="#cb35-128" aria-hidden="true" tabindex="-1"></a><span class="ss">1. </span>If our design matrix $\mathbb{X}$ is "**wide**":</span>
+<span id="cb35-129"><a href="#cb35-129" aria-hidden="true" tabindex="-1"></a><span class="ss">    * </span>If n &lt; p, then we have way more features (columns) than observations (rows).</span>
+<span id="cb35-130"><a href="#cb35-130" aria-hidden="true" tabindex="-1"></a><span class="ss">    * </span>Then $rank(\mathbb{X})$ = min(n, p) &lt; p, so $\hat{\theta}$ is not unique.</span>
+<span id="cb35-131"><a href="#cb35-131" aria-hidden="true" tabindex="-1"></a><span class="ss">    * </span>Typically we have n &gt;&gt; p so this is less of an issue.</span>
 <span id="cb35-132"><a href="#cb35-132" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-133"><a href="#cb35-133" aria-hidden="true" tabindex="-1"></a>    <span class="in">```</span></span>
-<span id="cb35-134"><a href="#cb35-134" aria-hidden="true" tabindex="-1"></a><span class="in">    from sklearn.linear_model import LinearRegression</span></span>
-<span id="cb35-135"><a href="#cb35-135" aria-hidden="true" tabindex="-1"></a><span class="in">    ```</span></span>
-<span id="cb35-136"><a href="#cb35-136" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-137"><a href="#cb35-137" aria-hidden="true" tabindex="-1"></a><span class="ss">2. </span>Create a model object. This generates a new instance of the model class. You can think of it as making a new "copy" of a standard "template" for a model. In code, this looks like:</span>
+<span id="cb35-133"><a href="#cb35-133" aria-hidden="true" tabindex="-1"></a><span class="ss">2. </span>If our design matrix $\mathbb{X}$ has features that are **linear combinations** of other features:</span>
+<span id="cb35-134"><a href="#cb35-134" aria-hidden="true" tabindex="-1"></a><span class="ss">    * </span>By definition, rank of $\mathbb{X}$ is number of linearly independent columns in $\mathbb{X}$.</span>
+<span id="cb35-135"><a href="#cb35-135" aria-hidden="true" tabindex="-1"></a><span class="ss">    * </span>Example: If “Width”, “Height”, and “Perimeter” are all columns,</span>
+<span id="cb35-136"><a href="#cb35-136" aria-hidden="true" tabindex="-1"></a><span class="ss">      * </span>Perimeter = 2 * Width + 2 * Height  $\rightarrow$  $\mathbb{X}$ is not full rank.</span>
+<span id="cb35-137"><a href="#cb35-137" aria-hidden="true" tabindex="-1"></a><span class="ss">    * </span>Important with one-hot encoding (to discuss later).</span>
 <span id="cb35-138"><a href="#cb35-138" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-139"><a href="#cb35-139" aria-hidden="true" tabindex="-1"></a>    <span class="in">```</span></span>
-<span id="cb35-140"><a href="#cb35-140" aria-hidden="true" tabindex="-1"></a><span class="in">    my_model = LinearRegression()</span></span>
-<span id="cb35-141"><a href="#cb35-141" aria-hidden="true" tabindex="-1"></a><span class="in">    ```</span></span>
-<span id="cb35-142"><a href="#cb35-142" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-143"><a href="#cb35-143" aria-hidden="true" tabindex="-1"></a>    </span>
-<span id="cb35-144"><a href="#cb35-144" aria-hidden="true" tabindex="-1"></a><span class="ss">3. </span>Fit the model to the <span class="in">`X`</span> design matrix and <span class="in">`Y`</span> target vector. This calculates the optimal model parameters "behind the scenes" without us explicitly working through the calculations ourselves. The fitted parameters are then stored within the model for use in future predictions:</span>
-<span id="cb35-145"><a href="#cb35-145" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-146"><a href="#cb35-146" aria-hidden="true" tabindex="-1"></a>    <span class="in">```</span></span>
-<span id="cb35-147"><a href="#cb35-147" aria-hidden="true" tabindex="-1"></a><span class="in">    my_model.fit(X, Y)</span></span>
-<span id="cb35-148"><a href="#cb35-148" aria-hidden="true" tabindex="-1"></a><span class="in">     ```</span></span>
-<span id="cb35-149"><a href="#cb35-149" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-150"><a href="#cb35-150" aria-hidden="true" tabindex="-1"></a>    </span>
-<span id="cb35-151"><a href="#cb35-151" aria-hidden="true" tabindex="-1"></a><span class="ss">4. </span>Use the fitted model to make predictions on the <span class="in">`X`</span> input data using <span class="in">`.predict`</span>. </span>
-<span id="cb35-152"><a href="#cb35-152" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-153"><a href="#cb35-153" aria-hidden="true" tabindex="-1"></a>    <span class="in">```</span></span>
-<span id="cb35-154"><a href="#cb35-154" aria-hidden="true" tabindex="-1"></a><span class="in">    my_model.predict(X)</span></span>
-<span id="cb35-155"><a href="#cb35-155" aria-hidden="true" tabindex="-1"></a><span class="in">    ```</span></span>
-<span id="cb35-156"><a href="#cb35-156" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-157"><a href="#cb35-157" aria-hidden="true" tabindex="-1"></a>To extract the fitted parameters, we can use:</span>
-<span id="cb35-158"><a href="#cb35-158" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-159"><a href="#cb35-159" aria-hidden="true" tabindex="-1"></a>  <span class="in">```</span></span>
-<span id="cb35-160"><a href="#cb35-160" aria-hidden="true" tabindex="-1"></a><span class="in">  my_model.coef_</span></span>
+<span id="cb35-139"><a href="#cb35-139" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-140"><a href="#cb35-140" aria-hidden="true" tabindex="-1"></a>Let's now explore how to use the normal equations with a real-world dataset in the next section.</span>
+<span id="cb35-141"><a href="#cb35-141" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-142"><a href="#cb35-142" aria-hidden="true" tabindex="-1"></a><span class="fu">## `sklearn`</span></span>
+<span id="cb35-143"><a href="#cb35-143" aria-hidden="true" tabindex="-1"></a><span class="fu">### Implementing Derived Formulas in Code</span></span>
+<span id="cb35-144"><a href="#cb35-144" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-145"><a href="#cb35-145" aria-hidden="true" tabindex="-1"></a>Throughout this lecture, we'll refer to the <span class="in">`penguins`</span> dataset. </span>
+<span id="cb35-146"><a href="#cb35-146" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-149"><a href="#cb35-149" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb35-150"><a href="#cb35-150" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
+<span id="cb35-151"><a href="#cb35-151" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span>
+<span id="cb35-152"><a href="#cb35-152" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> seaborn <span class="im">as</span> sns</span>
+<span id="cb35-153"><a href="#cb35-153" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
+<span id="cb35-154"><a href="#cb35-154" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-155"><a href="#cb35-155" aria-hidden="true" tabindex="-1"></a>penguins <span class="op">=</span> sns.load_dataset(<span class="st">"penguins"</span>)</span>
+<span id="cb35-156"><a href="#cb35-156" aria-hidden="true" tabindex="-1"></a>penguins <span class="op">=</span> penguins[penguins[<span class="st">"species"</span>] <span class="op">==</span> <span class="st">"Adelie"</span>].dropna()</span>
+<span id="cb35-157"><a href="#cb35-157" aria-hidden="true" tabindex="-1"></a>penguins.head()</span>
+<span id="cb35-158"><a href="#cb35-158" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb35-159"><a href="#cb35-159" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-160"><a href="#cb35-160" aria-hidden="true" tabindex="-1"></a>Our goal will be to predict the value of the <span class="in">`"bill_depth_mm"`</span> for a particular penguin given its <span class="in">`"flipper_length_mm"`</span> and <span class="in">`"body_mass_g"`</span>. We'll also add a bias column of all ones to represent the intercept term of our models.</span>
 <span id="cb35-161"><a href="#cb35-161" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-162"><a href="#cb35-162" aria-hidden="true" tabindex="-1"></a><span class="in">  my_model.intercept_</span></span>
-<span id="cb35-163"><a href="#cb35-163" aria-hidden="true" tabindex="-1"></a><span class="in">  ```</span></span>
-<span id="cb35-164"><a href="#cb35-164" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-165"><a href="#cb35-165" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-166"><a href="#cb35-166" aria-hidden="true" tabindex="-1"></a>Let's put this into action with our multiple regression task!</span>
+<span id="cb35-164"><a href="#cb35-164" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb35-165"><a href="#cb35-165" aria-hidden="true" tabindex="-1"></a><span class="co"># Add a bias column of all ones to `penguins`</span></span>
+<span id="cb35-166"><a href="#cb35-166" aria-hidden="true" tabindex="-1"></a>penguins[<span class="st">"bias"</span>] <span class="op">=</span> np.ones(<span class="bu">len</span>(penguins), dtype<span class="op">=</span><span class="bu">int</span>) </span>
 <span id="cb35-167"><a href="#cb35-167" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-168"><a href="#cb35-168" aria-hidden="true" tabindex="-1"></a>**1. Initialize an instance of the model class**</span>
-<span id="cb35-169"><a href="#cb35-169" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-170"><a href="#cb35-170" aria-hidden="true" tabindex="-1"></a><span class="in">`sklearn`</span> stores "templates" of useful models for machine learning. We begin the modeling process by making a "copy" of one of these templates for our own use. Model initialization looks like <span class="in">`ModelClass()`</span>, where <span class="in">`ModelClass`</span> is the type of model we wish to create.</span>
+<span id="cb35-168"><a href="#cb35-168" aria-hidden="true" tabindex="-1"></a><span class="co"># Define the design matrix, X...</span></span>
+<span id="cb35-169"><a href="#cb35-169" aria-hidden="true" tabindex="-1"></a><span class="co"># Note that we use .to_numpy() to convert our DataFrame into a NumPy array so it is in Matrix form</span></span>
+<span id="cb35-170"><a href="#cb35-170" aria-hidden="true" tabindex="-1"></a>X <span class="op">=</span> penguins[[<span class="st">"bias"</span>, <span class="st">"flipper_length_mm"</span>, <span class="st">"body_mass_g"</span>]].to_numpy()</span>
 <span id="cb35-171"><a href="#cb35-171" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-172"><a href="#cb35-172" aria-hidden="true" tabindex="-1"></a>For now, let's create a linear regression model using <span class="in">`LinearRegression`</span>. </span>
-<span id="cb35-173"><a href="#cb35-173" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-174"><a href="#cb35-174" aria-hidden="true" tabindex="-1"></a><span class="in">`my_model`</span> is now an instance of the <span class="in">`LinearRegression`</span> class. You can think of it as the "idea" of a linear regression model. We haven't trained it yet, so it doesn't know any model parameters and cannot be used to make predictions. In fact, we haven't even told it what data to use for modeling! It simply waits for further instructions.</span>
-<span id="cb35-175"><a href="#cb35-175" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-178"><a href="#cb35-178" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb35-179"><a href="#cb35-179" aria-hidden="true" tabindex="-1"></a>my_model <span class="op">=</span> LinearRegression()</span>
-<span id="cb35-180"><a href="#cb35-180" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb35-181"><a href="#cb35-181" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-182"><a href="#cb35-182" aria-hidden="true" tabindex="-1"></a>**2. Train the model using `.fit`**</span>
-<span id="cb35-183"><a href="#cb35-183" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-184"><a href="#cb35-184" aria-hidden="true" tabindex="-1"></a>Before the model can make predictions, we will need to fit it to our training data. When we fit the model, <span class="in">`sklearn`</span> will run gradient descent behind the scenes to determine the optimal model parameters. It will then save these model parameters to our model instance for future use. </span>
-<span id="cb35-185"><a href="#cb35-185" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-186"><a href="#cb35-186" aria-hidden="true" tabindex="-1"></a>All <span class="in">`sklearn`</span> model classes include a <span class="in">`.fit`</span> method, which is used to fit the model. It takes in two inputs: the design matrix, <span class="in">`X`</span>, and the target variable, <span class="in">`Y`</span>. </span>
-<span id="cb35-187"><a href="#cb35-187" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-188"><a href="#cb35-188" aria-hidden="true" tabindex="-1"></a>Let's start by fitting a model with just one feature: the flipper length. We create a design matrix <span class="in">`X`</span> by pulling out the <span class="in">`"flipper_length_mm"`</span> column from the <span class="in">`DataFrame`</span>. </span>
-<span id="cb35-189"><a href="#cb35-189" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-192"><a href="#cb35-192" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb35-193"><a href="#cb35-193" aria-hidden="true" tabindex="-1"></a><span class="co"># .fit expects a 2D data design matrix, so we use double brackets to extract a DataFrame</span></span>
-<span id="cb35-194"><a href="#cb35-194" aria-hidden="true" tabindex="-1"></a>X <span class="op">=</span> penguins[[<span class="st">"flipper_length_mm"</span>]]</span>
-<span id="cb35-195"><a href="#cb35-195" aria-hidden="true" tabindex="-1"></a>Y <span class="op">=</span> penguins[<span class="st">"bill_depth_mm"</span>]</span>
-<span id="cb35-196"><a href="#cb35-196" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-197"><a href="#cb35-197" aria-hidden="true" tabindex="-1"></a>my_model.fit(X, Y)</span>
-<span id="cb35-198"><a href="#cb35-198" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb35-199"><a href="#cb35-199" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-200"><a href="#cb35-200" aria-hidden="true" tabindex="-1"></a>Notice that we use **double brackets** to extract this column. Why double brackets instead of just single brackets? The `.fit` method, by default, expects to receive **2-dimensional** data – some kind of data that includes both rows and columns. Writing <span class="in">`penguins["flipper_length_mm"]`</span> would return a 1D <span class="in">`Series`</span>, causing <span class="in">`sklearn`</span> to error. We avoid this by writing <span class="in">`penguins[["flipper_length_mm"]]`</span> to produce a 2D <span class="in">`DataFrame`</span>. </span>
-<span id="cb35-201"><a href="#cb35-201" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-202"><a href="#cb35-202" aria-hidden="true" tabindex="-1"></a>And in just three lines of code, our model has run gradient descent to determine the optimal model parameters! Our single-feature model takes the form:</span>
-<span id="cb35-203"><a href="#cb35-203" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-204"><a href="#cb35-204" aria-hidden="true" tabindex="-1"></a>$$\text{bill depth} = \theta_0 + \theta_1 \text{flipper length}$$</span>
-<span id="cb35-205"><a href="#cb35-205" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-206"><a href="#cb35-206" aria-hidden="true" tabindex="-1"></a>Note that <span class="in">`LinearRegression`</span> will automatically include an intercept term. </span>
-<span id="cb35-207"><a href="#cb35-207" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-208"><a href="#cb35-208" aria-hidden="true" tabindex="-1"></a>The fitted model parameters are stored as attributes of the model instance. <span class="in">`my_model.intercept_`</span> will return the value of $\hat{\theta}_0$ as a scalar. `my_model.coef_` will return all values $\hat{\theta}_1, </span>
-<span id="cb35-209"><a href="#cb35-209" aria-hidden="true" tabindex="-1"></a>\hat{\theta}_1, ...$ in an array. Because our model only contains one feature, we see just the value of $\hat{\theta}_1$ in the cell below.</span>
-<span id="cb35-210"><a href="#cb35-210" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-213"><a href="#cb35-213" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb35-214"><a href="#cb35-214" aria-hidden="true" tabindex="-1"></a><span class="co"># The intercept term, theta_0</span></span>
-<span id="cb35-215"><a href="#cb35-215" aria-hidden="true" tabindex="-1"></a>my_model.intercept_</span>
-<span id="cb35-216"><a href="#cb35-216" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb35-172"><a href="#cb35-172" aria-hidden="true" tabindex="-1"></a><span class="co"># ...as well as the target variable, Y</span></span>
+<span id="cb35-173"><a href="#cb35-173" aria-hidden="true" tabindex="-1"></a><span class="co"># Again, we use .to_numpy() to convert our DataFrame into a NumPy array so it is in Matrix form</span></span>
+<span id="cb35-174"><a href="#cb35-174" aria-hidden="true" tabindex="-1"></a>Y <span class="op">=</span> penguins[[<span class="st">"bill_depth_mm"</span>]].to_numpy()</span>
+<span id="cb35-175"><a href="#cb35-175" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb35-176"><a href="#cb35-176" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-177"><a href="#cb35-177" aria-hidden="true" tabindex="-1"></a>In the lecture on ordinary least squares, we expressed multiple linear regression using matrix notation.</span>
+<span id="cb35-178"><a href="#cb35-178" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-179"><a href="#cb35-179" aria-hidden="true" tabindex="-1"></a>$$\hat{\mathbb{Y}} = \mathbb{X}\theta$$</span>
+<span id="cb35-180"><a href="#cb35-180" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-181"><a href="#cb35-181" aria-hidden="true" tabindex="-1"></a>We used a geometric approach to derive the following expression for the optimal model parameters:</span>
+<span id="cb35-182"><a href="#cb35-182" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-183"><a href="#cb35-183" aria-hidden="true" tabindex="-1"></a>$$\hat{\theta} = (\mathbb{X}^T \mathbb{X})^{-1}\mathbb{X}^T \mathbb{Y}$$</span>
+<span id="cb35-184"><a href="#cb35-184" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-185"><a href="#cb35-185" aria-hidden="true" tabindex="-1"></a>That's a whole lot of matrix manipulation. How do we implement it in <span class="in">`python`</span>?</span>
+<span id="cb35-186"><a href="#cb35-186" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-187"><a href="#cb35-187" aria-hidden="true" tabindex="-1"></a>There are three operations we need to perform here: multiplying matrices, taking transposes, and finding inverses. </span>
+<span id="cb35-188"><a href="#cb35-188" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-189"><a href="#cb35-189" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>To perform matrix multiplication, use the <span class="in">`@`</span> operator</span>
+<span id="cb35-190"><a href="#cb35-190" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>To take a transpose, call the <span class="in">`.T`</span> attribute of an <span class="in">`NumPy`</span> array or <span class="in">`DataFrame`</span></span>
+<span id="cb35-191"><a href="#cb35-191" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>To compute an inverse, use <span class="in">`NumPy`</span>'s in-built method <span class="in">`np.linalg.inv`</span></span>
+<span id="cb35-192"><a href="#cb35-192" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-193"><a href="#cb35-193" aria-hidden="true" tabindex="-1"></a>Putting this all together, we can compute the OLS estimate for the optimal model parameters, stored in the array <span class="in">`theta_hat`</span>.</span>
+<span id="cb35-194"><a href="#cb35-194" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-197"><a href="#cb35-197" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb35-198"><a href="#cb35-198" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb35-199"><a href="#cb35-199" aria-hidden="true" tabindex="-1"></a>theta_hat <span class="op">=</span> np.linalg.inv(X.T <span class="op">@</span> X) <span class="op">@</span> X.T <span class="op">@</span> Y</span>
+<span id="cb35-200"><a href="#cb35-200" aria-hidden="true" tabindex="-1"></a>theta_hat</span>
+<span id="cb35-201"><a href="#cb35-201" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb35-202"><a href="#cb35-202" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-203"><a href="#cb35-203" aria-hidden="true" tabindex="-1"></a>To make predictions using our optimized parameter values, we matrix-multiply the design matrix with the parameter vector:</span>
+<span id="cb35-204"><a href="#cb35-204" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-205"><a href="#cb35-205" aria-hidden="true" tabindex="-1"></a>$$\hat{\mathbb{Y}} = \mathbb{X}\theta$$</span>
+<span id="cb35-206"><a href="#cb35-206" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-209"><a href="#cb35-209" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb35-210"><a href="#cb35-210" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb35-211"><a href="#cb35-211" aria-hidden="true" tabindex="-1"></a>Y_hat <span class="op">=</span> X <span class="op">@</span> theta_hat</span>
+<span id="cb35-212"><a href="#cb35-212" aria-hidden="true" tabindex="-1"></a>pd.DataFrame(Y_hat).head()</span>
+<span id="cb35-213"><a href="#cb35-213" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb35-214"><a href="#cb35-214" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-215"><a href="#cb35-215" aria-hidden="true" tabindex="-1"></a><span class="fu">### The `sklearn` Workflow</span></span>
+<span id="cb35-216"><a href="#cb35-216" aria-hidden="true" tabindex="-1"></a>We've already saved a lot of time (and avoided tedious calculations) by translating our derived formulas into code. However, we still had to go through the process of writing out the linear algebra ourselves. </span>
 <span id="cb35-217"><a href="#cb35-217" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-220"><a href="#cb35-220" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb35-221"><a href="#cb35-221" aria-hidden="true" tabindex="-1"></a><span class="co"># All parameters theta_1, ..., theta_p</span></span>
-<span id="cb35-222"><a href="#cb35-222" aria-hidden="true" tabindex="-1"></a>my_model.coef_</span>
-<span id="cb35-223"><a href="#cb35-223" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb35-224"><a href="#cb35-224" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-225"><a href="#cb35-225" aria-hidden="true" tabindex="-1"></a>**3. Use the fitted model to make predictions**</span>
-<span id="cb35-226"><a href="#cb35-226" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-227"><a href="#cb35-227" aria-hidden="true" tabindex="-1"></a>Now that the model has been trained, we can use it to make predictions! To do so, we use the <span class="in">`.predict`</span> method. <span class="in">`.predict`</span> takes in one argument: the design matrix that should be used to generate predictions. To understand how the model performs on the training set, we would pass in the training data. Alternatively, to make predictions on unseen data, we would pass in a new dataset that wasn't used to train the model.</span>
-<span id="cb35-228"><a href="#cb35-228" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-229"><a href="#cb35-229" aria-hidden="true" tabindex="-1"></a>Below, we call <span class="in">`.predict`</span> to generate model predictions on the original training data. As before, we use double brackets to ensure that we extract 2-dimensional data.</span>
-<span id="cb35-230"><a href="#cb35-230" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-233"><a href="#cb35-233" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb35-234"><a href="#cb35-234" aria-hidden="true" tabindex="-1"></a>Y_hat_one_feature <span class="op">=</span> my_model.predict(penguins[[<span class="st">"flipper_length_mm"</span>]])</span>
-<span id="cb35-235"><a href="#cb35-235" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-236"><a href="#cb35-236" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="ss">f"The RMSE of the model is </span><span class="sc">{</span>np<span class="sc">.</span>sqrt(np.mean((Y<span class="op">-</span>Y_hat_one_feature)<span class="op">**</span><span class="dv">2</span>))<span class="sc">}</span><span class="ss">"</span>)</span>
-<span id="cb35-237"><a href="#cb35-237" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb35-238"><a href="#cb35-238" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-239"><a href="#cb35-239" aria-hidden="true" tabindex="-1"></a>What if we wanted a model with two features? </span>
+<span id="cb35-218"><a href="#cb35-218" aria-hidden="true" tabindex="-1"></a>To make life *even easier*, we can turn to the <span class="in">`sklearn`</span> <span class="co">[</span><span class="ot">`python` library</span><span class="co">](https://scikit-learn.org/stable/)</span>. <span class="in">`sklearn`</span> is a robust library of machine learning tools used extensively in research and industry. It is the standard for simple machine learning tasks and gives us a wide variety of in-built modeling frameworks and methods, so we'll keep returning to <span class="in">`sklearn`</span> techniques as we progress through Data 100. </span>
+<span id="cb35-219"><a href="#cb35-219" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-220"><a href="#cb35-220" aria-hidden="true" tabindex="-1"></a>Regardless of the specific type of model being implemented, <span class="in">`sklearn`</span> follows a standard set of steps for creating a model: </span>
+<span id="cb35-221"><a href="#cb35-221" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-222"><a href="#cb35-222" aria-hidden="true" tabindex="-1"></a><span class="ss">1. </span>Import the <span class="in">`LinearRegression`</span> model from <span class="in">`sklearn`</span></span>
+<span id="cb35-223"><a href="#cb35-223" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-224"><a href="#cb35-224" aria-hidden="true" tabindex="-1"></a>    <span class="in">```</span></span>
+<span id="cb35-225"><a href="#cb35-225" aria-hidden="true" tabindex="-1"></a><span class="in">    from sklearn.linear_model import LinearRegression</span></span>
+<span id="cb35-226"><a href="#cb35-226" aria-hidden="true" tabindex="-1"></a><span class="in">    ```</span></span>
+<span id="cb35-227"><a href="#cb35-227" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-228"><a href="#cb35-228" aria-hidden="true" tabindex="-1"></a><span class="ss">2. </span>Create a model object. This generates a new instance of the model class. You can think of it as making a new "copy" of a standard "template" for a model. In code, this looks like:</span>
+<span id="cb35-229"><a href="#cb35-229" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-230"><a href="#cb35-230" aria-hidden="true" tabindex="-1"></a>    <span class="in">```</span></span>
+<span id="cb35-231"><a href="#cb35-231" aria-hidden="true" tabindex="-1"></a><span class="in">    my_model = LinearRegression()</span></span>
+<span id="cb35-232"><a href="#cb35-232" aria-hidden="true" tabindex="-1"></a><span class="in">    ```</span></span>
+<span id="cb35-233"><a href="#cb35-233" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-234"><a href="#cb35-234" aria-hidden="true" tabindex="-1"></a>    </span>
+<span id="cb35-235"><a href="#cb35-235" aria-hidden="true" tabindex="-1"></a><span class="ss">3. </span>Fit the model to the <span class="in">`X`</span> design matrix and <span class="in">`Y`</span> target vector. This calculates the optimal model parameters "behind the scenes" without us explicitly working through the calculations ourselves. The fitted parameters are then stored within the model for use in future predictions:</span>
+<span id="cb35-236"><a href="#cb35-236" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-237"><a href="#cb35-237" aria-hidden="true" tabindex="-1"></a>    <span class="in">```</span></span>
+<span id="cb35-238"><a href="#cb35-238" aria-hidden="true" tabindex="-1"></a><span class="in">    my_model.fit(X, Y)</span></span>
+<span id="cb35-239"><a href="#cb35-239" aria-hidden="true" tabindex="-1"></a><span class="in">     ```</span></span>
 <span id="cb35-240"><a href="#cb35-240" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-241"><a href="#cb35-241" aria-hidden="true" tabindex="-1"></a>$$\text{bill depth} = \theta_0 + \theta_1 \text{flipper length} + \theta_2 \text{body mass}$$</span>
-<span id="cb35-242"><a href="#cb35-242" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-243"><a href="#cb35-243" aria-hidden="true" tabindex="-1"></a>We repeat this three-step process by intializing a new model object, then calling <span class="in">`.fit`</span> and <span class="in">`.predict`</span> as before.</span>
-<span id="cb35-244"><a href="#cb35-244" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-247"><a href="#cb35-247" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb35-248"><a href="#cb35-248" aria-hidden="true" tabindex="-1"></a><span class="co"># Step 1: initialize LinearRegression model</span></span>
-<span id="cb35-249"><a href="#cb35-249" aria-hidden="true" tabindex="-1"></a>two_feature_model <span class="op">=</span> LinearRegression()</span>
-<span id="cb35-250"><a href="#cb35-250" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-251"><a href="#cb35-251" aria-hidden="true" tabindex="-1"></a><span class="co"># Step 2: fit the model</span></span>
-<span id="cb35-252"><a href="#cb35-252" aria-hidden="true" tabindex="-1"></a>X_two_features <span class="op">=</span> penguins[[<span class="st">"flipper_length_mm"</span>, <span class="st">"body_mass_g"</span>]]</span>
-<span id="cb35-253"><a href="#cb35-253" aria-hidden="true" tabindex="-1"></a>Y <span class="op">=</span> penguins[<span class="st">"bill_depth_mm"</span>]</span>
-<span id="cb35-254"><a href="#cb35-254" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-255"><a href="#cb35-255" aria-hidden="true" tabindex="-1"></a>two_feature_model.fit(X_two_features, Y)</span>
+<span id="cb35-241"><a href="#cb35-241" aria-hidden="true" tabindex="-1"></a>    </span>
+<span id="cb35-242"><a href="#cb35-242" aria-hidden="true" tabindex="-1"></a><span class="ss">4. </span>Use the fitted model to make predictions on the <span class="in">`X`</span> input data using <span class="in">`.predict`</span>. </span>
+<span id="cb35-243"><a href="#cb35-243" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-244"><a href="#cb35-244" aria-hidden="true" tabindex="-1"></a>    <span class="in">```</span></span>
+<span id="cb35-245"><a href="#cb35-245" aria-hidden="true" tabindex="-1"></a><span class="in">    my_model.predict(X)</span></span>
+<span id="cb35-246"><a href="#cb35-246" aria-hidden="true" tabindex="-1"></a><span class="in">    ```</span></span>
+<span id="cb35-247"><a href="#cb35-247" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-248"><a href="#cb35-248" aria-hidden="true" tabindex="-1"></a>To extract the fitted parameters, we can use:</span>
+<span id="cb35-249"><a href="#cb35-249" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-250"><a href="#cb35-250" aria-hidden="true" tabindex="-1"></a>  <span class="in">```</span></span>
+<span id="cb35-251"><a href="#cb35-251" aria-hidden="true" tabindex="-1"></a><span class="in">  my_model.coef_</span></span>
+<span id="cb35-252"><a href="#cb35-252" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-253"><a href="#cb35-253" aria-hidden="true" tabindex="-1"></a><span class="in">  my_model.intercept_</span></span>
+<span id="cb35-254"><a href="#cb35-254" aria-hidden="true" tabindex="-1"></a><span class="in">  ```</span></span>
+<span id="cb35-255"><a href="#cb35-255" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb35-256"><a href="#cb35-256" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-257"><a href="#cb35-257" aria-hidden="true" tabindex="-1"></a><span class="co"># Step 3: make predictions</span></span>
-<span id="cb35-258"><a href="#cb35-258" aria-hidden="true" tabindex="-1"></a>Y_hat_two_features <span class="op">=</span> two_feature_model.predict(X_two_features)</span>
-<span id="cb35-259"><a href="#cb35-259" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-260"><a href="#cb35-260" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="ss">f"The RMSE of the model is </span><span class="sc">{</span>np<span class="sc">.</span>sqrt(np.mean((Y<span class="op">-</span>Y_hat_two_features)<span class="op">**</span><span class="dv">2</span>))<span class="sc">}</span><span class="ss">"</span>)</span>
-<span id="cb35-261"><a href="#cb35-261" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb35-257"><a href="#cb35-257" aria-hidden="true" tabindex="-1"></a>Let's put this into action with our multiple regression task!</span>
+<span id="cb35-258"><a href="#cb35-258" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-259"><a href="#cb35-259" aria-hidden="true" tabindex="-1"></a>**1. Initialize an instance of the model class**</span>
+<span id="cb35-260"><a href="#cb35-260" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-261"><a href="#cb35-261" aria-hidden="true" tabindex="-1"></a><span class="in">`sklearn`</span> stores "templates" of useful models for machine learning. We begin the modeling process by making a "copy" of one of these templates for our own use. Model initialization looks like <span class="in">`ModelClass()`</span>, where <span class="in">`ModelClass`</span> is the type of model we wish to create.</span>
 <span id="cb35-262"><a href="#cb35-262" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-263"><a href="#cb35-263" aria-hidden="true" tabindex="-1"></a>We can also see that we obtain the same predictions using <span class="in">`sklearn`</span> as we did when applying the ordinary least squares formula before! </span>
+<span id="cb35-263"><a href="#cb35-263" aria-hidden="true" tabindex="-1"></a>For now, let's create a linear regression model using <span class="in">`LinearRegression`</span>. </span>
 <span id="cb35-264"><a href="#cb35-264" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-267"><a href="#cb35-267" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb35-268"><a href="#cb35-268" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
-<span id="cb35-269"><a href="#cb35-269" aria-hidden="true" tabindex="-1"></a>pd.DataFrame({<span class="st">"Y_hat from OLS"</span>:np.squeeze(Y_hat), <span class="st">"Y_hat from sklearn"</span>:Y_hat_two_features}).head()</span>
-<span id="cb35-270"><a href="#cb35-270" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb35-271"><a href="#cb35-271" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-272"><a href="#cb35-272" aria-hidden="true" tabindex="-1"></a><span class="fu">## Gradient Descent </span></span>
-<span id="cb35-273"><a href="#cb35-273" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-274"><a href="#cb35-274" aria-hidden="true" tabindex="-1"></a>At this point, we've grown quite familiar with the process of choosing a model and a corresponding loss function and optimizing parameters by choosing the values of $\theta$ that minimize the loss function. So far, we've optimized $\theta$ by</span>
-<span id="cb35-275"><a href="#cb35-275" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-276"><a href="#cb35-276" aria-hidden="true" tabindex="-1"></a><span class="ss">1. </span>Using calculus to take the derivative of the loss function with respect to $\theta$, setting it equal to 0, and solving for $\theta$.</span>
-<span id="cb35-277"><a href="#cb35-277" aria-hidden="true" tabindex="-1"></a><span class="ss">2. </span>Using the geometric argument of orthogonality to derive the OLS solution $\hat{\theta} = (\mathbb{X}^T \mathbb{X})^{-1}\mathbb{X}^T \mathbb{Y}$.</span>
+<span id="cb35-265"><a href="#cb35-265" aria-hidden="true" tabindex="-1"></a><span class="in">`my_model`</span> is now an instance of the <span class="in">`LinearRegression`</span> class. You can think of it as the "idea" of a linear regression model. We haven't trained it yet, so it doesn't know any model parameters and cannot be used to make predictions. In fact, we haven't even told it what data to use for modeling! It simply waits for further instructions.</span>
+<span id="cb35-266"><a href="#cb35-266" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-269"><a href="#cb35-269" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb35-270"><a href="#cb35-270" aria-hidden="true" tabindex="-1"></a>my_model <span class="op">=</span> LinearRegression()</span>
+<span id="cb35-271"><a href="#cb35-271" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb35-272"><a href="#cb35-272" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-273"><a href="#cb35-273" aria-hidden="true" tabindex="-1"></a>**2. Train the model using `.fit`**</span>
+<span id="cb35-274"><a href="#cb35-274" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-275"><a href="#cb35-275" aria-hidden="true" tabindex="-1"></a>Before the model can make predictions, we will need to fit it to our training data. When we fit the model, <span class="in">`sklearn`</span> will run gradient descent behind the scenes to determine the optimal model parameters. It will then save these model parameters to our model instance for future use. </span>
+<span id="cb35-276"><a href="#cb35-276" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-277"><a href="#cb35-277" aria-hidden="true" tabindex="-1"></a>All <span class="in">`sklearn`</span> model classes include a <span class="in">`.fit`</span> method, which is used to fit the model. It takes in two inputs: the design matrix, <span class="in">`X`</span>, and the target variable, <span class="in">`Y`</span>. </span>
 <span id="cb35-278"><a href="#cb35-278" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-279"><a href="#cb35-279" aria-hidden="true" tabindex="-1"></a>One thing to note, however, is that the techniques we used above can only be applied if we make some big assumptions. For the calculus approach, we assumed that the loss function was differentiable at all points and that we could algebraically solve for the zero points of the derivative; for the geometric approach, OLS *only* applies when using a linear model with MSE loss. What happens when we have more complex models with different, more complex loss functions? The techniques we've learned so far will not work, so we need a new optimization technique: **gradient descent**. </span>
+<span id="cb35-279"><a href="#cb35-279" aria-hidden="true" tabindex="-1"></a>Let's start by fitting a model with just one feature: the flipper length. We create a design matrix <span class="in">`X`</span> by pulling out the <span class="in">`"flipper_length_mm"`</span> column from the <span class="in">`DataFrame`</span>. </span>
 <span id="cb35-280"><a href="#cb35-280" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-281"><a href="#cb35-281" aria-hidden="true" tabindex="-1"></a><span class="at">&gt; **BIG IDEA**: use an iterative algorithm to numerically compute the minimum of the loss.</span></span>
-<span id="cb35-282"><a href="#cb35-282" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-283"><a href="#cb35-283" aria-hidden="true" tabindex="-1"></a><span class="fu">### Minimizing an Arbitrary 1D Function</span></span>
-<span id="cb35-284"><a href="#cb35-284" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-285"><a href="#cb35-285" aria-hidden="true" tabindex="-1"></a>Let's consider an arbitrary function. Our goal is to find the value of $x$ that minimizes this function.</span>
-<span id="cb35-286"><a href="#cb35-286" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-289"><a href="#cb35-289" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb35-290"><a href="#cb35-290" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> arbitrary(x):</span>
-<span id="cb35-291"><a href="#cb35-291" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> (x<span class="op">**</span><span class="dv">4</span> <span class="op">-</span> <span class="dv">15</span><span class="op">*</span>x<span class="op">**</span><span class="dv">3</span> <span class="op">+</span> <span class="dv">80</span><span class="op">*</span>x<span class="op">**</span><span class="dv">2</span> <span class="op">-</span> <span class="dv">180</span><span class="op">*</span>x <span class="op">+</span> <span class="dv">144</span>)<span class="op">/</span><span class="dv">10</span></span>
-<span id="cb35-292"><a href="#cb35-292" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb35-293"><a href="#cb35-293" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-294"><a href="#cb35-294" aria-hidden="true" tabindex="-1"></a>&lt;img src="images/arbitrary.png" alt='arbitrary' width='600'&gt;</span>
-<span id="cb35-295"><a href="#cb35-295" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-296"><a href="#cb35-296" aria-hidden="true" tabindex="-1"></a><span class="fu">#### The Naive Approach: Guess and Check</span></span>
-<span id="cb35-297"><a href="#cb35-297" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-298"><a href="#cb35-298" aria-hidden="true" tabindex="-1"></a>Above, we saw that the minimum is somewhere around 5.3. Let's see if we can figure out how to find the exact minimum algorithmically from scratch. One very slow (and terrible) way would be manual guess-and-check.</span>
-<span id="cb35-299"><a href="#cb35-299" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-302"><a href="#cb35-302" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb35-303"><a href="#cb35-303" aria-hidden="true" tabindex="-1"></a>arbitrary(<span class="dv">6</span>)</span>
-<span id="cb35-304"><a href="#cb35-304" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb35-305"><a href="#cb35-305" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-306"><a href="#cb35-306" aria-hidden="true" tabindex="-1"></a>A somewhat better (but still slow) approach is to use brute force to try out a bunch of x values and return the one that yields the lowest loss.</span>
-<span id="cb35-307"><a href="#cb35-307" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-310"><a href="#cb35-310" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb35-311"><a href="#cb35-311" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> simple_minimize(f, xs):</span>
-<span id="cb35-312"><a href="#cb35-312" aria-hidden="true" tabindex="-1"></a>    <span class="co"># Takes in a function f and a set of values xs. </span></span>
-<span id="cb35-313"><a href="#cb35-313" aria-hidden="true" tabindex="-1"></a>    <span class="co"># Calculates the value of the function f at all values x in xs</span></span>
-<span id="cb35-314"><a href="#cb35-314" aria-hidden="true" tabindex="-1"></a>    <span class="co"># Takes the minimum value of f(x) and returns the corresponding value x </span></span>
-<span id="cb35-315"><a href="#cb35-315" aria-hidden="true" tabindex="-1"></a>    y <span class="op">=</span> [f(x) <span class="cf">for</span> x <span class="kw">in</span> xs]  </span>
-<span id="cb35-316"><a href="#cb35-316" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> xs[np.argmin(y)]</span>
+<span id="cb35-283"><a href="#cb35-283" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb35-284"><a href="#cb35-284" aria-hidden="true" tabindex="-1"></a><span class="co"># .fit expects a 2D data design matrix, so we use double brackets to extract a DataFrame</span></span>
+<span id="cb35-285"><a href="#cb35-285" aria-hidden="true" tabindex="-1"></a>X <span class="op">=</span> penguins[[<span class="st">"flipper_length_mm"</span>]]</span>
+<span id="cb35-286"><a href="#cb35-286" aria-hidden="true" tabindex="-1"></a>Y <span class="op">=</span> penguins[<span class="st">"bill_depth_mm"</span>]</span>
+<span id="cb35-287"><a href="#cb35-287" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-288"><a href="#cb35-288" aria-hidden="true" tabindex="-1"></a>my_model.fit(X, Y)</span>
+<span id="cb35-289"><a href="#cb35-289" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb35-290"><a href="#cb35-290" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-291"><a href="#cb35-291" aria-hidden="true" tabindex="-1"></a>Notice that we use **double brackets** to extract this column. Why double brackets instead of just single brackets? The `.fit` method, by default, expects to receive **2-dimensional** data – some kind of data that includes both rows and columns. Writing <span class="in">`penguins["flipper_length_mm"]`</span> would return a 1D <span class="in">`Series`</span>, causing <span class="in">`sklearn`</span> to error. We avoid this by writing <span class="in">`penguins[["flipper_length_mm"]]`</span> to produce a 2D <span class="in">`DataFrame`</span>. </span>
+<span id="cb35-292"><a href="#cb35-292" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-293"><a href="#cb35-293" aria-hidden="true" tabindex="-1"></a>And in just three lines of code, our model has run gradient descent to determine the optimal model parameters! Our single-feature model takes the form:</span>
+<span id="cb35-294"><a href="#cb35-294" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-295"><a href="#cb35-295" aria-hidden="true" tabindex="-1"></a>$$\text{bill depth} = \theta_0 + \theta_1 \text{flipper length}$$</span>
+<span id="cb35-296"><a href="#cb35-296" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-297"><a href="#cb35-297" aria-hidden="true" tabindex="-1"></a>Note that <span class="in">`LinearRegression`</span> will automatically include an intercept term. </span>
+<span id="cb35-298"><a href="#cb35-298" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-299"><a href="#cb35-299" aria-hidden="true" tabindex="-1"></a>The fitted model parameters are stored as attributes of the model instance. <span class="in">`my_model.intercept_`</span> will return the value of $\hat{\theta}_0$ as a scalar. `my_model.coef_` will return all values $\hat{\theta}_1, </span>
+<span id="cb35-300"><a href="#cb35-300" aria-hidden="true" tabindex="-1"></a>\hat{\theta}_1, ...$ in an array. Because our model only contains one feature, we see just the value of $\hat{\theta}_1$ in the cell below.</span>
+<span id="cb35-301"><a href="#cb35-301" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-304"><a href="#cb35-304" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb35-305"><a href="#cb35-305" aria-hidden="true" tabindex="-1"></a><span class="co"># The intercept term, theta_0</span></span>
+<span id="cb35-306"><a href="#cb35-306" aria-hidden="true" tabindex="-1"></a>my_model.intercept_</span>
+<span id="cb35-307"><a href="#cb35-307" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb35-308"><a href="#cb35-308" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-311"><a href="#cb35-311" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb35-312"><a href="#cb35-312" aria-hidden="true" tabindex="-1"></a><span class="co"># All parameters theta_1, ..., theta_p</span></span>
+<span id="cb35-313"><a href="#cb35-313" aria-hidden="true" tabindex="-1"></a>my_model.coef_</span>
+<span id="cb35-314"><a href="#cb35-314" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb35-315"><a href="#cb35-315" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-316"><a href="#cb35-316" aria-hidden="true" tabindex="-1"></a>**3. Use the fitted model to make predictions**</span>
 <span id="cb35-317"><a href="#cb35-317" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-318"><a href="#cb35-318" aria-hidden="true" tabindex="-1"></a>guesses <span class="op">=</span> [<span class="fl">5.3</span>, <span class="fl">5.31</span>, <span class="fl">5.32</span>, <span class="fl">5.33</span>, <span class="fl">5.34</span>, <span class="fl">5.35</span>]</span>
-<span id="cb35-319"><a href="#cb35-319" aria-hidden="true" tabindex="-1"></a>simple_minimize(arbitrary, guesses)</span>
-<span id="cb35-320"><a href="#cb35-320" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb35-318"><a href="#cb35-318" aria-hidden="true" tabindex="-1"></a>Now that the model has been trained, we can use it to make predictions! To do so, we use the <span class="in">`.predict`</span> method. <span class="in">`.predict`</span> takes in one argument: the design matrix that should be used to generate predictions. To understand how the model performs on the training set, we would pass in the training data. Alternatively, to make predictions on unseen data, we would pass in a new dataset that wasn't used to train the model.</span>
+<span id="cb35-319"><a href="#cb35-319" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-320"><a href="#cb35-320" aria-hidden="true" tabindex="-1"></a>Below, we call <span class="in">`.predict`</span> to generate model predictions on the original training data. As before, we use double brackets to ensure that we extract 2-dimensional data.</span>
 <span id="cb35-321"><a href="#cb35-321" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-322"><a href="#cb35-322" aria-hidden="true" tabindex="-1"></a>This process is essentially the same as before where we made a graphical plot, it's just that we're only looking at 20 selected points.</span>
-<span id="cb35-323"><a href="#cb35-323" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-326"><a href="#cb35-326" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb35-327"><a href="#cb35-327" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
-<span id="cb35-328"><a href="#cb35-328" aria-hidden="true" tabindex="-1"></a>xs <span class="op">=</span> np.linspace(<span class="dv">1</span>, <span class="dv">7</span>, <span class="dv">200</span>)</span>
-<span id="cb35-329"><a href="#cb35-329" aria-hidden="true" tabindex="-1"></a>sparse_xs <span class="op">=</span> np.linspace(<span class="dv">1</span>, <span class="dv">7</span>, <span class="dv">5</span>)</span>
-<span id="cb35-330"><a href="#cb35-330" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-331"><a href="#cb35-331" aria-hidden="true" tabindex="-1"></a>ys <span class="op">=</span> arbitrary(xs)</span>
-<span id="cb35-332"><a href="#cb35-332" aria-hidden="true" tabindex="-1"></a>sparse_ys <span class="op">=</span> arbitrary(sparse_xs)</span>
+<span id="cb35-324"><a href="#cb35-324" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb35-325"><a href="#cb35-325" aria-hidden="true" tabindex="-1"></a>Y_hat_one_feature <span class="op">=</span> my_model.predict(penguins[[<span class="st">"flipper_length_mm"</span>]])</span>
+<span id="cb35-326"><a href="#cb35-326" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-327"><a href="#cb35-327" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="ss">f"The RMSE of the model is </span><span class="sc">{</span>np<span class="sc">.</span>sqrt(np.mean((Y<span class="op">-</span>Y_hat_one_feature)<span class="op">**</span><span class="dv">2</span>))<span class="sc">}</span><span class="ss">"</span>)</span>
+<span id="cb35-328"><a href="#cb35-328" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb35-329"><a href="#cb35-329" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-330"><a href="#cb35-330" aria-hidden="true" tabindex="-1"></a>What if we wanted a model with two features? </span>
+<span id="cb35-331"><a href="#cb35-331" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-332"><a href="#cb35-332" aria-hidden="true" tabindex="-1"></a>$$\text{bill depth} = \theta_0 + \theta_1 \text{flipper length} + \theta_2 \text{body mass}$$</span>
 <span id="cb35-333"><a href="#cb35-333" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-334"><a href="#cb35-334" aria-hidden="true" tabindex="-1"></a>fig <span class="op">=</span> px.line(x <span class="op">=</span> xs, y <span class="op">=</span> arbitrary(xs))</span>
-<span id="cb35-335"><a href="#cb35-335" aria-hidden="true" tabindex="-1"></a>fig.add_scatter(x <span class="op">=</span> sparse_xs, y <span class="op">=</span> arbitrary(sparse_xs), mode <span class="op">=</span> <span class="st">"markers"</span>)</span>
-<span id="cb35-336"><a href="#cb35-336" aria-hidden="true" tabindex="-1"></a>fig.update_layout(showlegend<span class="op">=</span> <span class="va">False</span>)</span>
-<span id="cb35-337"><a href="#cb35-337" aria-hidden="true" tabindex="-1"></a>fig.update_layout(autosize<span class="op">=</span><span class="va">False</span>, width<span class="op">=</span><span class="dv">800</span>, height<span class="op">=</span><span class="dv">600</span>)</span>
-<span id="cb35-338"><a href="#cb35-338" aria-hidden="true" tabindex="-1"></a>fig.show()</span>
-<span id="cb35-339"><a href="#cb35-339" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb35-340"><a href="#cb35-340" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-341"><a href="#cb35-341" aria-hidden="true" tabindex="-1"></a>This basic approach suffers from three major flaws:</span>
-<span id="cb35-342"><a href="#cb35-342" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-343"><a href="#cb35-343" aria-hidden="true" tabindex="-1"></a><span class="ss">1. </span>If the minimum is outside our range of guesses, the answer will be completely wrong.</span>
-<span id="cb35-344"><a href="#cb35-344" aria-hidden="true" tabindex="-1"></a><span class="ss">2. </span>Even if our range of guesses is correct, if the guesses are too coarse, our answer will be inaccurate.</span>
-<span id="cb35-345"><a href="#cb35-345" aria-hidden="true" tabindex="-1"></a><span class="ss">3. </span>It is *very* computationally inefficient, considering potentially vast numbers of guesses that are useless.</span>
-<span id="cb35-346"><a href="#cb35-346" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-347"><a href="#cb35-347" aria-hidden="true" tabindex="-1"></a><span class="fu">#### `Scipy.optimize.minimize`</span></span>
-<span id="cb35-348"><a href="#cb35-348" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-349"><a href="#cb35-349" aria-hidden="true" tabindex="-1"></a>One way to minimize this mathematical function is to use the <span class="in">`scipy.optimize.minimize`</span> function. It takes a function and a starting guess and tries to find the minimum.</span>
+<span id="cb35-334"><a href="#cb35-334" aria-hidden="true" tabindex="-1"></a>We repeat this three-step process by intializing a new model object, then calling <span class="in">`.fit`</span> and <span class="in">`.predict`</span> as before.</span>
+<span id="cb35-335"><a href="#cb35-335" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-338"><a href="#cb35-338" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb35-339"><a href="#cb35-339" aria-hidden="true" tabindex="-1"></a><span class="co"># Step 1: initialize LinearRegression model</span></span>
+<span id="cb35-340"><a href="#cb35-340" aria-hidden="true" tabindex="-1"></a>two_feature_model <span class="op">=</span> LinearRegression()</span>
+<span id="cb35-341"><a href="#cb35-341" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-342"><a href="#cb35-342" aria-hidden="true" tabindex="-1"></a><span class="co"># Step 2: fit the model</span></span>
+<span id="cb35-343"><a href="#cb35-343" aria-hidden="true" tabindex="-1"></a>X_two_features <span class="op">=</span> penguins[[<span class="st">"flipper_length_mm"</span>, <span class="st">"body_mass_g"</span>]]</span>
+<span id="cb35-344"><a href="#cb35-344" aria-hidden="true" tabindex="-1"></a>Y <span class="op">=</span> penguins[<span class="st">"bill_depth_mm"</span>]</span>
+<span id="cb35-345"><a href="#cb35-345" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-346"><a href="#cb35-346" aria-hidden="true" tabindex="-1"></a>two_feature_model.fit(X_two_features, Y)</span>
+<span id="cb35-347"><a href="#cb35-347" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-348"><a href="#cb35-348" aria-hidden="true" tabindex="-1"></a><span class="co"># Step 3: make predictions</span></span>
+<span id="cb35-349"><a href="#cb35-349" aria-hidden="true" tabindex="-1"></a>Y_hat_two_features <span class="op">=</span> two_feature_model.predict(X_two_features)</span>
 <span id="cb35-350"><a href="#cb35-350" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-353"><a href="#cb35-353" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb35-354"><a href="#cb35-354" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> scipy.optimize <span class="im">import</span> minimize</span>
+<span id="cb35-351"><a href="#cb35-351" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="ss">f"The RMSE of the model is </span><span class="sc">{</span>np<span class="sc">.</span>sqrt(np.mean((Y<span class="op">-</span>Y_hat_two_features)<span class="op">**</span><span class="dv">2</span>))<span class="sc">}</span><span class="ss">"</span>)</span>
+<span id="cb35-352"><a href="#cb35-352" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb35-353"><a href="#cb35-353" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-354"><a href="#cb35-354" aria-hidden="true" tabindex="-1"></a>We can also see that we obtain the same predictions using <span class="in">`sklearn`</span> as we did when applying the ordinary least squares formula before! </span>
 <span id="cb35-355"><a href="#cb35-355" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-356"><a href="#cb35-356" aria-hidden="true" tabindex="-1"></a><span class="co"># takes a function f and a starting point x0 and returns a readout </span></span>
-<span id="cb35-357"><a href="#cb35-357" aria-hidden="true" tabindex="-1"></a><span class="co"># with the optimal input value of x which minimizes f</span></span>
-<span id="cb35-358"><a href="#cb35-358" aria-hidden="true" tabindex="-1"></a>minimize(arbitrary, x0 <span class="op">=</span> <span class="fl">3.5</span>)</span>
-<span id="cb35-359"><a href="#cb35-359" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb35-360"><a href="#cb35-360" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-361"><a href="#cb35-361" aria-hidden="true" tabindex="-1"></a><span class="in">`scipy.optimize.minimize`</span> is great. It may also seem a bit magical. How could you write a function that can find the minimum of any mathematical function? There are a number of ways to do this, which we'll explore in today's lecture, eventually arriving at the important idea of **gradient descent**, which is the principle that <span class="in">`scipy.optimize.minimize`</span> uses.</span>
+<span id="cb35-358"><a href="#cb35-358" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb35-359"><a href="#cb35-359" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
+<span id="cb35-360"><a href="#cb35-360" aria-hidden="true" tabindex="-1"></a>pd.DataFrame({<span class="st">"Y_hat from OLS"</span>:np.squeeze(Y_hat), <span class="st">"Y_hat from sklearn"</span>:Y_hat_two_features}).head()</span>
+<span id="cb35-361"><a href="#cb35-361" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
 <span id="cb35-362"><a href="#cb35-362" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-363"><a href="#cb35-363" aria-hidden="true" tabindex="-1"></a>It turns out that under the hood, the <span class="in">`fit`</span> method for <span class="in">`LinearRegression`</span> models uses gradient descent. Gradient descent is also how much of machine learning works, including even advanced neural network models. </span>
+<span id="cb35-363"><a href="#cb35-363" aria-hidden="true" tabindex="-1"></a><span class="fu">## Gradient Descent </span></span>
 <span id="cb35-364"><a href="#cb35-364" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-365"><a href="#cb35-365" aria-hidden="true" tabindex="-1"></a>In Data 100, the gradient descent process will usually be invisible to us, hidden beneath an abstraction layer. However, to be good data scientists, it's important that we know the underlying principles that optimization functions harness to find optimal parameters.</span>
+<span id="cb35-365"><a href="#cb35-365" aria-hidden="true" tabindex="-1"></a>At this point, we've grown quite familiar with the process of choosing a model and a corresponding loss function and optimizing parameters by choosing the values of $\theta$ that minimize the loss function. So far, we've optimized $\theta$ by</span>
 <span id="cb35-366"><a href="#cb35-366" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-367"><a href="#cb35-367" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-368"><a href="#cb35-368" aria-hidden="true" tabindex="-1"></a><span class="fu">#### Digging into Gradient Descent</span></span>
-<span id="cb35-369"><a href="#cb35-369" aria-hidden="true" tabindex="-1"></a>Looking at the function across this domain, it is clear that the function's minimum value occurs around $\theta = 5.3$. Let's pretend for a moment that we *couldn't* see the full view of the cost function. How would we guess the value of $\theta$ that minimizes the function? </span>
-<span id="cb35-370"><a href="#cb35-370" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-371"><a href="#cb35-371" aria-hidden="true" tabindex="-1"></a>It turns out that the first derivative of the function can give us a clue. In the graph below, the function and its derivative are plotted, with points where the derivative is equal to 0 plotted in light green.</span>
-<span id="cb35-372"><a href="#cb35-372" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-375"><a href="#cb35-375" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb35-376"><a href="#cb35-376" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
-<span id="cb35-377"><a href="#cb35-377" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> plotly.graph_objects <span class="im">as</span> go</span>
-<span id="cb35-378"><a href="#cb35-378" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-379"><a href="#cb35-379" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> derivative_arbitrary(x):</span>
-<span id="cb35-380"><a href="#cb35-380" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> (<span class="dv">4</span><span class="op">*</span>x<span class="op">**</span><span class="dv">3</span> <span class="op">-</span> <span class="dv">45</span><span class="op">*</span>x<span class="op">**</span><span class="dv">2</span> <span class="op">+</span> <span class="dv">160</span><span class="op">*</span>x <span class="op">-</span> <span class="dv">180</span>)<span class="op">/</span><span class="dv">10</span></span>
-<span id="cb35-381"><a href="#cb35-381" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-382"><a href="#cb35-382" aria-hidden="true" tabindex="-1"></a>fig <span class="op">=</span> go.Figure()</span>
-<span id="cb35-383"><a href="#cb35-383" aria-hidden="true" tabindex="-1"></a>roots <span class="op">=</span> np.array([<span class="fl">2.3927</span>, <span class="fl">3.5309</span>, <span class="fl">5.3263</span>])</span>
+<span id="cb35-367"><a href="#cb35-367" aria-hidden="true" tabindex="-1"></a><span class="ss">1. </span>Using calculus to take the derivative of the loss function with respect to $\theta$, setting it equal to 0, and solving for $\theta$.</span>
+<span id="cb35-368"><a href="#cb35-368" aria-hidden="true" tabindex="-1"></a><span class="ss">2. </span>Using the geometric argument of orthogonality to derive the OLS solution $\hat{\theta} = (\mathbb{X}^T \mathbb{X})^{-1}\mathbb{X}^T \mathbb{Y}$.</span>
+<span id="cb35-369"><a href="#cb35-369" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-370"><a href="#cb35-370" aria-hidden="true" tabindex="-1"></a>One thing to note, however, is that the techniques we used above can only be applied if we make some big assumptions. For the calculus approach, we assumed that the loss function was differentiable at all points and that we could algebraically solve for the zero points of the derivative; for the geometric approach, OLS *only* applies when using a linear model with MSE loss. What happens when we have more complex models with different, more complex loss functions? The techniques we've learned so far will not work, so we need a new optimization technique: **gradient descent**. </span>
+<span id="cb35-371"><a href="#cb35-371" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-372"><a href="#cb35-372" aria-hidden="true" tabindex="-1"></a><span class="at">&gt; **BIG IDEA**: use an iterative algorithm to numerically compute the minimum of the loss.</span></span>
+<span id="cb35-373"><a href="#cb35-373" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-374"><a href="#cb35-374" aria-hidden="true" tabindex="-1"></a><span class="fu">### Minimizing an Arbitrary 1D Function</span></span>
+<span id="cb35-375"><a href="#cb35-375" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-376"><a href="#cb35-376" aria-hidden="true" tabindex="-1"></a>Let's consider an arbitrary function. Our goal is to find the value of $x$ that minimizes this function.</span>
+<span id="cb35-377"><a href="#cb35-377" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-380"><a href="#cb35-380" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb35-381"><a href="#cb35-381" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> arbitrary(x):</span>
+<span id="cb35-382"><a href="#cb35-382" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> (x<span class="op">**</span><span class="dv">4</span> <span class="op">-</span> <span class="dv">15</span><span class="op">*</span>x<span class="op">**</span><span class="dv">3</span> <span class="op">+</span> <span class="dv">80</span><span class="op">*</span>x<span class="op">**</span><span class="dv">2</span> <span class="op">-</span> <span class="dv">180</span><span class="op">*</span>x <span class="op">+</span> <span class="dv">144</span>)<span class="op">/</span><span class="dv">10</span></span>
+<span id="cb35-383"><a href="#cb35-383" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
 <span id="cb35-384"><a href="#cb35-384" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-385"><a href="#cb35-385" aria-hidden="true" tabindex="-1"></a>fig.add_trace(go.Scatter(x <span class="op">=</span> xs, y <span class="op">=</span> arbitrary(xs), </span>
-<span id="cb35-386"><a href="#cb35-386" aria-hidden="true" tabindex="-1"></a>                         mode <span class="op">=</span> <span class="st">"lines"</span>, name <span class="op">=</span> <span class="st">"f"</span>))</span>
-<span id="cb35-387"><a href="#cb35-387" aria-hidden="true" tabindex="-1"></a>fig.add_trace(go.Scatter(x <span class="op">=</span> xs, y <span class="op">=</span> derivative_arbitrary(xs), </span>
-<span id="cb35-388"><a href="#cb35-388" aria-hidden="true" tabindex="-1"></a>                         mode <span class="op">=</span> <span class="st">"lines"</span>, name <span class="op">=</span> <span class="st">"df"</span>, line <span class="op">=</span> {<span class="st">"dash"</span>: <span class="st">"dash"</span>}))</span>
-<span id="cb35-389"><a href="#cb35-389" aria-hidden="true" tabindex="-1"></a>fig.add_trace(go.Scatter(x <span class="op">=</span> np.array(roots), y <span class="op">=</span> <span class="dv">0</span><span class="op">*</span>roots, </span>
-<span id="cb35-390"><a href="#cb35-390" aria-hidden="true" tabindex="-1"></a>                         mode <span class="op">=</span> <span class="st">"markers"</span>, name <span class="op">=</span> <span class="st">"df = zero"</span>, marker_size <span class="op">=</span> <span class="dv">12</span>))</span>
-<span id="cb35-391"><a href="#cb35-391" aria-hidden="true" tabindex="-1"></a>fig.update_layout(font_size <span class="op">=</span> <span class="dv">20</span>, yaxis_range<span class="op">=</span>[<span class="op">-</span><span class="dv">1</span>, <span class="dv">3</span>])</span>
-<span id="cb35-392"><a href="#cb35-392" aria-hidden="true" tabindex="-1"></a>fig.update_layout(autosize<span class="op">=</span><span class="va">False</span>, width<span class="op">=</span><span class="dv">800</span>, height<span class="op">=</span><span class="dv">600</span>)</span>
-<span id="cb35-393"><a href="#cb35-393" aria-hidden="true" tabindex="-1"></a>fig.show()</span>
-<span id="cb35-394"><a href="#cb35-394" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb35-395"><a href="#cb35-395" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-396"><a href="#cb35-396" aria-hidden="true" tabindex="-1"></a>In the plots below, the line indicates the value of the derivative of each value of $\theta$. The derivative is negative where it is red and positive where it is green.</span>
-<span id="cb35-397"><a href="#cb35-397" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-385"><a href="#cb35-385" aria-hidden="true" tabindex="-1"></a>&lt;img src="images/arbitrary.png" alt='arbitrary' width='600'&gt;</span>
+<span id="cb35-386"><a href="#cb35-386" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-387"><a href="#cb35-387" aria-hidden="true" tabindex="-1"></a><span class="fu">#### The Naive Approach: Guess and Check</span></span>
+<span id="cb35-388"><a href="#cb35-388" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-389"><a href="#cb35-389" aria-hidden="true" tabindex="-1"></a>Above, we saw that the minimum is somewhere around 5.3. Let's see if we can figure out how to find the exact minimum algorithmically from scratch. One very slow (and terrible) way would be manual guess-and-check.</span>
+<span id="cb35-390"><a href="#cb35-390" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-393"><a href="#cb35-393" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb35-394"><a href="#cb35-394" aria-hidden="true" tabindex="-1"></a>arbitrary(<span class="dv">6</span>)</span>
+<span id="cb35-395"><a href="#cb35-395" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb35-396"><a href="#cb35-396" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-397"><a href="#cb35-397" aria-hidden="true" tabindex="-1"></a>A somewhat better (but still slow) approach is to use brute force to try out a bunch of x values and return the one that yields the lowest loss.</span>
 <span id="cb35-398"><a href="#cb35-398" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-399"><a href="#cb35-399" aria-hidden="true" tabindex="-1"></a>Say we make a guess for the minimizing value of $\theta$. Remember that we read plots from left to right, and assume that our starting $\theta$ value is to the left of the optimal $\hat{\theta}$. If the guess "undershoots" the true minimizing value – our guess for $\theta$ is lower than the value of the $\hat{\theta}$ that minimizes the function – the derivative will be **negative**. This means that if we increase $\theta$ (move further to the right), then we **can decrease** our loss function further. If this guess "overshoots" the true minimizing value, the derivative will be positive, implying the converse.</span>
-<span id="cb35-400"><a href="#cb35-400" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-401"><a href="#cb35-401" aria-hidden="true" tabindex="-1"></a>&lt;div align="middle"&gt;</span>
-<span id="cb35-402"><a href="#cb35-402" aria-hidden="true" tabindex="-1"></a>  &lt;table style="width:100%"&gt;</span>
-<span id="cb35-403"><a href="#cb35-403" aria-hidden="true" tabindex="-1"></a>    &lt;tr align="center"&gt;</span>
-<span id="cb35-404"><a href="#cb35-404" aria-hidden="true" tabindex="-1"></a>      &lt;td&gt;&lt;img src="images/step.png" alt='step' width='600'&gt;</span>
-<span id="cb35-405"><a href="#cb35-405" aria-hidden="true" tabindex="-1"></a>      &lt;/td&gt;</span>
-<span id="cb35-406"><a href="#cb35-406" aria-hidden="true" tabindex="-1"></a>    &lt;/tr&gt;</span>
-<span id="cb35-407"><a href="#cb35-407" aria-hidden="true" tabindex="-1"></a>  &lt;/table&gt;</span>
-<span id="cb35-408"><a href="#cb35-408" aria-hidden="true" tabindex="-1"></a>&lt;/div&gt;</span>
-<span id="cb35-409"><a href="#cb35-409" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-410"><a href="#cb35-410" aria-hidden="true" tabindex="-1"></a>We can use this pattern to help formulate our next guess for the optimal $\hat{\theta}$. Consider the case where we've undershot $\theta$ by guessing too low of a value. We'll want our next guess to be greater in value than our previous guess – that is, we want to shift our guess to the right. You can think of this as following the slope "downhill" to the function's minimum value.</span>
-<span id="cb35-411"><a href="#cb35-411" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-412"><a href="#cb35-412" aria-hidden="true" tabindex="-1"></a>&lt;div align="middle"&gt;</span>
-<span id="cb35-413"><a href="#cb35-413" aria-hidden="true" tabindex="-1"></a>  &lt;table style="width:100%"&gt;</span>
-<span id="cb35-414"><a href="#cb35-414" aria-hidden="true" tabindex="-1"></a>    &lt;tr align="center"&gt;</span>
-<span id="cb35-415"><a href="#cb35-415" aria-hidden="true" tabindex="-1"></a>      &lt;td&gt;&lt;img src="images/neg_step.png" alt='neg_step' width='600'&gt;</span>
-<span id="cb35-416"><a href="#cb35-416" aria-hidden="true" tabindex="-1"></a>      &lt;/td&gt;</span>
-<span id="cb35-417"><a href="#cb35-417" aria-hidden="true" tabindex="-1"></a>    &lt;/tr&gt;</span>
-<span id="cb35-418"><a href="#cb35-418" aria-hidden="true" tabindex="-1"></a>  &lt;/table&gt;</span>
-<span id="cb35-419"><a href="#cb35-419" aria-hidden="true" tabindex="-1"></a>&lt;/div&gt;</span>
-<span id="cb35-420"><a href="#cb35-420" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-421"><a href="#cb35-421" aria-hidden="true" tabindex="-1"></a>If we've overshot $\hat{\theta}$ by guessing too high of a value, we'll want our next guess to be lower in value – we want to shift our guess for $\hat{\theta}$ to the left. </span>
-<span id="cb35-422"><a href="#cb35-422" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-423"><a href="#cb35-423" aria-hidden="true" tabindex="-1"></a>&lt;div align="middle"&gt;</span>
-<span id="cb35-424"><a href="#cb35-424" aria-hidden="true" tabindex="-1"></a>  &lt;table style="width:100%"&gt;</span>
-<span id="cb35-425"><a href="#cb35-425" aria-hidden="true" tabindex="-1"></a>    &lt;tr align="center"&gt;</span>
-<span id="cb35-426"><a href="#cb35-426" aria-hidden="true" tabindex="-1"></a>      &lt;td&gt;&lt;img src="images/pos_step.png" alt='pos_step' width='600'&gt;</span>
-<span id="cb35-427"><a href="#cb35-427" aria-hidden="true" tabindex="-1"></a>      &lt;/td&gt;</span>
-<span id="cb35-428"><a href="#cb35-428" aria-hidden="true" tabindex="-1"></a>    &lt;/tr&gt;</span>
-<span id="cb35-429"><a href="#cb35-429" aria-hidden="true" tabindex="-1"></a>  &lt;/table&gt;</span>
-<span id="cb35-430"><a href="#cb35-430" aria-hidden="true" tabindex="-1"></a>&lt;/div&gt;</span>
+<span id="cb35-401"><a href="#cb35-401" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb35-402"><a href="#cb35-402" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> simple_minimize(f, xs):</span>
+<span id="cb35-403"><a href="#cb35-403" aria-hidden="true" tabindex="-1"></a>    <span class="co"># Takes in a function f and a set of values xs. </span></span>
+<span id="cb35-404"><a href="#cb35-404" aria-hidden="true" tabindex="-1"></a>    <span class="co"># Calculates the value of the function f at all values x in xs</span></span>
+<span id="cb35-405"><a href="#cb35-405" aria-hidden="true" tabindex="-1"></a>    <span class="co"># Takes the minimum value of f(x) and returns the corresponding value x </span></span>
+<span id="cb35-406"><a href="#cb35-406" aria-hidden="true" tabindex="-1"></a>    y <span class="op">=</span> [f(x) <span class="cf">for</span> x <span class="kw">in</span> xs]  </span>
+<span id="cb35-407"><a href="#cb35-407" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> xs[np.argmin(y)]</span>
+<span id="cb35-408"><a href="#cb35-408" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-409"><a href="#cb35-409" aria-hidden="true" tabindex="-1"></a>guesses <span class="op">=</span> [<span class="fl">5.3</span>, <span class="fl">5.31</span>, <span class="fl">5.32</span>, <span class="fl">5.33</span>, <span class="fl">5.34</span>, <span class="fl">5.35</span>]</span>
+<span id="cb35-410"><a href="#cb35-410" aria-hidden="true" tabindex="-1"></a>simple_minimize(arbitrary, guesses)</span>
+<span id="cb35-411"><a href="#cb35-411" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb35-412"><a href="#cb35-412" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-413"><a href="#cb35-413" aria-hidden="true" tabindex="-1"></a>This process is essentially the same as before where we made a graphical plot, it's just that we're only looking at 20 selected points.</span>
+<span id="cb35-414"><a href="#cb35-414" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-417"><a href="#cb35-417" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb35-418"><a href="#cb35-418" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
+<span id="cb35-419"><a href="#cb35-419" aria-hidden="true" tabindex="-1"></a>xs <span class="op">=</span> np.linspace(<span class="dv">1</span>, <span class="dv">7</span>, <span class="dv">200</span>)</span>
+<span id="cb35-420"><a href="#cb35-420" aria-hidden="true" tabindex="-1"></a>sparse_xs <span class="op">=</span> np.linspace(<span class="dv">1</span>, <span class="dv">7</span>, <span class="dv">5</span>)</span>
+<span id="cb35-421"><a href="#cb35-421" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-422"><a href="#cb35-422" aria-hidden="true" tabindex="-1"></a>ys <span class="op">=</span> arbitrary(xs)</span>
+<span id="cb35-423"><a href="#cb35-423" aria-hidden="true" tabindex="-1"></a>sparse_ys <span class="op">=</span> arbitrary(sparse_xs)</span>
+<span id="cb35-424"><a href="#cb35-424" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-425"><a href="#cb35-425" aria-hidden="true" tabindex="-1"></a>fig <span class="op">=</span> px.line(x <span class="op">=</span> xs, y <span class="op">=</span> arbitrary(xs))</span>
+<span id="cb35-426"><a href="#cb35-426" aria-hidden="true" tabindex="-1"></a>fig.add_scatter(x <span class="op">=</span> sparse_xs, y <span class="op">=</span> arbitrary(sparse_xs), mode <span class="op">=</span> <span class="st">"markers"</span>)</span>
+<span id="cb35-427"><a href="#cb35-427" aria-hidden="true" tabindex="-1"></a>fig.update_layout(showlegend<span class="op">=</span> <span class="va">False</span>)</span>
+<span id="cb35-428"><a href="#cb35-428" aria-hidden="true" tabindex="-1"></a>fig.update_layout(autosize<span class="op">=</span><span class="va">False</span>, width<span class="op">=</span><span class="dv">800</span>, height<span class="op">=</span><span class="dv">600</span>)</span>
+<span id="cb35-429"><a href="#cb35-429" aria-hidden="true" tabindex="-1"></a>fig.show()</span>
+<span id="cb35-430"><a href="#cb35-430" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
 <span id="cb35-431"><a href="#cb35-431" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-432"><a href="#cb35-432" aria-hidden="true" tabindex="-1"></a>In other words, the derivative of the function at each point tells us the direction of our next guess.</span>
+<span id="cb35-432"><a href="#cb35-432" aria-hidden="true" tabindex="-1"></a>This basic approach suffers from three major flaws:</span>
 <span id="cb35-433"><a href="#cb35-433" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-434"><a href="#cb35-434" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>A negative slope means we want to step to the right, or move in the *positive* direction. </span>
-<span id="cb35-435"><a href="#cb35-435" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>A positive slope means we want to step to the left, or move in the *negative* direction.</span>
-<span id="cb35-436"><a href="#cb35-436" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-437"><a href="#cb35-437" aria-hidden="true" tabindex="-1"></a><span class="fu">#### Algorithm Attempt 1</span></span>
-<span id="cb35-438"><a href="#cb35-438" aria-hidden="true" tabindex="-1"></a>Armed with this knowledge, let's try to see if we can use the derivative to optimize the function.</span>
+<span id="cb35-434"><a href="#cb35-434" aria-hidden="true" tabindex="-1"></a><span class="ss">1. </span>If the minimum is outside our range of guesses, the answer will be completely wrong.</span>
+<span id="cb35-435"><a href="#cb35-435" aria-hidden="true" tabindex="-1"></a><span class="ss">2. </span>Even if our range of guesses is correct, if the guesses are too coarse, our answer will be inaccurate.</span>
+<span id="cb35-436"><a href="#cb35-436" aria-hidden="true" tabindex="-1"></a><span class="ss">3. </span>It is *very* computationally inefficient, considering potentially vast numbers of guesses that are useless.</span>
+<span id="cb35-437"><a href="#cb35-437" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-438"><a href="#cb35-438" aria-hidden="true" tabindex="-1"></a><span class="fu">#### `Scipy.optimize.minimize`</span></span>
 <span id="cb35-439"><a href="#cb35-439" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-440"><a href="#cb35-440" aria-hidden="true" tabindex="-1"></a>We start by making some guess for the minimizing value of $x$. Then, we look at the derivative of the function at this value of $x$, and step downhill in the *opposite* direction. We can express our new rule as a recurrence relation:</span>
+<span id="cb35-440"><a href="#cb35-440" aria-hidden="true" tabindex="-1"></a>One way to minimize this mathematical function is to use the <span class="in">`scipy.optimize.minimize`</span> function. It takes a function and a starting guess and tries to find the minimum.</span>
 <span id="cb35-441"><a href="#cb35-441" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-442"><a href="#cb35-442" aria-hidden="true" tabindex="-1"></a>$$x^{(t+1)} = x^{(t)} - \frac{d}{dx} f(x^{(t)})$$</span>
-<span id="cb35-443"><a href="#cb35-443" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-444"><a href="#cb35-444" aria-hidden="true" tabindex="-1"></a>Translating this statement into English: we obtain **our next guess** for the minimizing value of $x$ at timestep $t+1$ ($x^{(t+1)}$) by taking **our last guess** ($x^{(t)}$) and subtracting the **derivative of the function** at that point ($\frac{d}{dx} f(x^{(t)})$).</span>
-<span id="cb35-445"><a href="#cb35-445" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-446"><a href="#cb35-446" aria-hidden="true" tabindex="-1"></a>A few steps are shown below, where the old step is shown as a transparent point, and the next step taken is the green-filled dot.</span>
-<span id="cb35-447"><a href="#cb35-447" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-448"><a href="#cb35-448" aria-hidden="true" tabindex="-1"></a>&lt;div align="middle"&gt;</span>
-<span id="cb35-449"><a href="#cb35-449" aria-hidden="true" tabindex="-1"></a>  &lt;table style="width:100%"&gt;</span>
-<span id="cb35-450"><a href="#cb35-450" aria-hidden="true" tabindex="-1"></a>    &lt;tr align="center"&gt;</span>
-<span id="cb35-451"><a href="#cb35-451" aria-hidden="true" tabindex="-1"></a>      &lt;td&gt;&lt;img src="images/grad_descent_1.png" alt='grad_descent_2' width='800'&gt;</span>
-<span id="cb35-452"><a href="#cb35-452" aria-hidden="true" tabindex="-1"></a>      &lt;/td&gt;</span>
-<span id="cb35-453"><a href="#cb35-453" aria-hidden="true" tabindex="-1"></a>    &lt;/tr&gt;</span>
-<span id="cb35-454"><a href="#cb35-454" aria-hidden="true" tabindex="-1"></a>  &lt;/table&gt;</span>
-<span id="cb35-455"><a href="#cb35-455" aria-hidden="true" tabindex="-1"></a>&lt;/div&gt;</span>
-<span id="cb35-456"><a href="#cb35-456" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-457"><a href="#cb35-457" aria-hidden="true" tabindex="-1"></a>Looking pretty good! We do have a problem though – once we arrive close to the minimum value of the function, our guesses "bounce" back and forth past the minimum without ever reaching it.</span>
+<span id="cb35-444"><a href="#cb35-444" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb35-445"><a href="#cb35-445" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> scipy.optimize <span class="im">import</span> minimize</span>
+<span id="cb35-446"><a href="#cb35-446" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-447"><a href="#cb35-447" aria-hidden="true" tabindex="-1"></a><span class="co"># takes a function f and a starting point x0 and returns a readout </span></span>
+<span id="cb35-448"><a href="#cb35-448" aria-hidden="true" tabindex="-1"></a><span class="co"># with the optimal input value of x which minimizes f</span></span>
+<span id="cb35-449"><a href="#cb35-449" aria-hidden="true" tabindex="-1"></a>minimize(arbitrary, x0 <span class="op">=</span> <span class="fl">3.5</span>)</span>
+<span id="cb35-450"><a href="#cb35-450" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb35-451"><a href="#cb35-451" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-452"><a href="#cb35-452" aria-hidden="true" tabindex="-1"></a><span class="in">`scipy.optimize.minimize`</span> is great. It may also seem a bit magical. How could you write a function that can find the minimum of any mathematical function? There are a number of ways to do this, which we'll explore in today's lecture, eventually arriving at the important idea of **gradient descent**, which is the principle that <span class="in">`scipy.optimize.minimize`</span> uses.</span>
+<span id="cb35-453"><a href="#cb35-453" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-454"><a href="#cb35-454" aria-hidden="true" tabindex="-1"></a>It turns out that under the hood, the <span class="in">`fit`</span> method for <span class="in">`LinearRegression`</span> models uses gradient descent. Gradient descent is also how much of machine learning works, including even advanced neural network models. </span>
+<span id="cb35-455"><a href="#cb35-455" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-456"><a href="#cb35-456" aria-hidden="true" tabindex="-1"></a>In Data 100, the gradient descent process will usually be invisible to us, hidden beneath an abstraction layer. However, to be good data scientists, it's important that we know the underlying principles that optimization functions harness to find optimal parameters.</span>
+<span id="cb35-457"><a href="#cb35-457" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb35-458"><a href="#cb35-458" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-459"><a href="#cb35-459" aria-hidden="true" tabindex="-1"></a>&lt;div align="middle"&gt;</span>
-<span id="cb35-460"><a href="#cb35-460" aria-hidden="true" tabindex="-1"></a>  &lt;table style="width:100%"&gt;</span>
-<span id="cb35-461"><a href="#cb35-461" aria-hidden="true" tabindex="-1"></a>    &lt;tr align="center"&gt;</span>
-<span id="cb35-462"><a href="#cb35-462" aria-hidden="true" tabindex="-1"></a>      &lt;td&gt;&lt;img src="images/grad_descent_2.png" alt='grad_descent_2' width='500'&gt;</span>
-<span id="cb35-463"><a href="#cb35-463" aria-hidden="true" tabindex="-1"></a>      &lt;/td&gt;</span>
-<span id="cb35-464"><a href="#cb35-464" aria-hidden="true" tabindex="-1"></a>    &lt;/tr&gt;</span>
-<span id="cb35-465"><a href="#cb35-465" aria-hidden="true" tabindex="-1"></a>  &lt;/table&gt;</span>
-<span id="cb35-466"><a href="#cb35-466" aria-hidden="true" tabindex="-1"></a>&lt;/div&gt;</span>
-<span id="cb35-467"><a href="#cb35-467" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-468"><a href="#cb35-468" aria-hidden="true" tabindex="-1"></a>In other words, each step we take when updating our guess moves us too far. We can address this by decreasing the size of each step. </span>
+<span id="cb35-459"><a href="#cb35-459" aria-hidden="true" tabindex="-1"></a><span class="fu">#### Digging into Gradient Descent</span></span>
+<span id="cb35-460"><a href="#cb35-460" aria-hidden="true" tabindex="-1"></a>Looking at the function across this domain, it is clear that the function's minimum value occurs around $\theta = 5.3$. Let's pretend for a moment that we *couldn't* see the full view of the cost function. How would we guess the value of $\theta$ that minimizes the function? </span>
+<span id="cb35-461"><a href="#cb35-461" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-462"><a href="#cb35-462" aria-hidden="true" tabindex="-1"></a>It turns out that the first derivative of the function can give us a clue. In the graph below, the function and its derivative are plotted, with points where the derivative is equal to 0 plotted in light green.</span>
+<span id="cb35-463"><a href="#cb35-463" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-466"><a href="#cb35-466" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb35-467"><a href="#cb35-467" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
+<span id="cb35-468"><a href="#cb35-468" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> plotly.graph_objects <span class="im">as</span> go</span>
 <span id="cb35-469"><a href="#cb35-469" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-470"><a href="#cb35-470" aria-hidden="true" tabindex="-1"></a><span class="fu">#### Algorithm Attempt 2</span></span>
-<span id="cb35-471"><a href="#cb35-471" aria-hidden="true" tabindex="-1"></a>Let's update our algorithm to use a **learning rate** (also sometimes called the step size), which controls how far we move with each update. We represent the learning rate with $\alpha$. </span>
+<span id="cb35-470"><a href="#cb35-470" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> derivative_arbitrary(x):</span>
+<span id="cb35-471"><a href="#cb35-471" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> (<span class="dv">4</span><span class="op">*</span>x<span class="op">**</span><span class="dv">3</span> <span class="op">-</span> <span class="dv">45</span><span class="op">*</span>x<span class="op">**</span><span class="dv">2</span> <span class="op">+</span> <span class="dv">160</span><span class="op">*</span>x <span class="op">-</span> <span class="dv">180</span>)<span class="op">/</span><span class="dv">10</span></span>
 <span id="cb35-472"><a href="#cb35-472" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-473"><a href="#cb35-473" aria-hidden="true" tabindex="-1"></a>$$x^{(t+1)} = x^{(t)} - \alpha \frac{d}{dx} f(x^{(t)})$$</span>
-<span id="cb35-474"><a href="#cb35-474" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-475"><a href="#cb35-475" aria-hidden="true" tabindex="-1"></a>A small $\alpha$ means that we will take small steps; a large $\alpha$ means we will take large steps. When do we stop updating? We stop updating either after a fixed number of updates or after a subsequent update doesn't change much.</span>
-<span id="cb35-476"><a href="#cb35-476" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-477"><a href="#cb35-477" aria-hidden="true" tabindex="-1"></a>Updating our function to use $\alpha=0.3$, our algorithm successfully **converges** (settles on a solution and stops updating significantly, or at all) on the minimum value.</span>
-<span id="cb35-478"><a href="#cb35-478" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-479"><a href="#cb35-479" aria-hidden="true" tabindex="-1"></a>&lt;div align="middle"&gt;</span>
-<span id="cb35-480"><a href="#cb35-480" aria-hidden="true" tabindex="-1"></a>  &lt;table style="width:100%"&gt;</span>
-<span id="cb35-481"><a href="#cb35-481" aria-hidden="true" tabindex="-1"></a>    &lt;tr align="center"&gt;</span>
-<span id="cb35-482"><a href="#cb35-482" aria-hidden="true" tabindex="-1"></a>      &lt;td&gt;&lt;img src="images/grad_descent_3.png" alt='grad_descent_3' width='500'&gt;</span>
-<span id="cb35-483"><a href="#cb35-483" aria-hidden="true" tabindex="-1"></a>      &lt;/td&gt;</span>
-<span id="cb35-484"><a href="#cb35-484" aria-hidden="true" tabindex="-1"></a>    &lt;/tr&gt;</span>
-<span id="cb35-485"><a href="#cb35-485" aria-hidden="true" tabindex="-1"></a>  &lt;/table&gt;</span>
-<span id="cb35-486"><a href="#cb35-486" aria-hidden="true" tabindex="-1"></a>&lt;/div&gt;</span>
-<span id="cb35-487"><a href="#cb35-487" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-488"><a href="#cb35-488" aria-hidden="true" tabindex="-1"></a><span class="fu">### Convexity</span></span>
-<span id="cb35-489"><a href="#cb35-489" aria-hidden="true" tabindex="-1"></a>In our analysis above, we focused our attention on the global minimum of the loss function. You may be wondering: what about the local minimum that's just to the left? </span>
-<span id="cb35-490"><a href="#cb35-490" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-491"><a href="#cb35-491" aria-hidden="true" tabindex="-1"></a>If we had chosen a different starting guess for $\theta$, or a different value for the learning rate $\alpha$, our algorithm may have gotten "stuck" and converged on the local minimum, rather than on the true optimum value of loss. </span>
-<span id="cb35-492"><a href="#cb35-492" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-493"><a href="#cb35-493" aria-hidden="true" tabindex="-1"></a>&lt;div align="middle"&gt;</span>
-<span id="cb35-494"><a href="#cb35-494" aria-hidden="true" tabindex="-1"></a>  &lt;table style="width:100%"&gt;</span>
-<span id="cb35-495"><a href="#cb35-495" aria-hidden="true" tabindex="-1"></a>    &lt;tr align="center"&gt;</span>
-<span id="cb35-496"><a href="#cb35-496" aria-hidden="true" tabindex="-1"></a>      &lt;td&gt;&lt;img src="images/local.png" alt='local' width='600'&gt;</span>
-<span id="cb35-497"><a href="#cb35-497" aria-hidden="true" tabindex="-1"></a>      &lt;/td&gt;</span>
-<span id="cb35-498"><a href="#cb35-498" aria-hidden="true" tabindex="-1"></a>    &lt;/tr&gt;</span>
-<span id="cb35-499"><a href="#cb35-499" aria-hidden="true" tabindex="-1"></a>  &lt;/table&gt;</span>
-<span id="cb35-500"><a href="#cb35-500" aria-hidden="true" tabindex="-1"></a>&lt;/div&gt;</span>
-<span id="cb35-501"><a href="#cb35-501" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-502"><a href="#cb35-502" aria-hidden="true" tabindex="-1"></a>If the loss function is **convex**, gradient descent is guaranteed to converge and find the global minimum of the objective function. Formally, a function $f$ is convex if:</span>
-<span id="cb35-503"><a href="#cb35-503" aria-hidden="true" tabindex="-1"></a>$$tf(a) + (1-t)f(b) \geq f(ta + (1-t)b)$$</span>
-<span id="cb35-504"><a href="#cb35-504" aria-hidden="true" tabindex="-1"></a>for all $a, b$ in the domain of $f$ and $t \in <span class="co">[</span><span class="ot">0, 1</span><span class="co">]</span>$.</span>
-<span id="cb35-505"><a href="#cb35-505" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-506"><a href="#cb35-506" aria-hidden="true" tabindex="-1"></a>To put this into words: if you drew a line between any two points on the curve, all values on the curve must be *on or below* the line. Importantly, any local minimum of a convex function is also its global minimum so we avoid the situation where the algorithm converges on some critical point that is not the minimum of the function.</span>
-<span id="cb35-507"><a href="#cb35-507" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-508"><a href="#cb35-508" aria-hidden="true" tabindex="-1"></a>&lt;div align="middle"&gt;</span>
-<span id="cb35-509"><a href="#cb35-509" aria-hidden="true" tabindex="-1"></a>  &lt;table style="width:100%"&gt;</span>
-<span id="cb35-510"><a href="#cb35-510" aria-hidden="true" tabindex="-1"></a>    &lt;tr align="center"&gt;</span>
-<span id="cb35-511"><a href="#cb35-511" aria-hidden="true" tabindex="-1"></a>      &lt;td&gt;&lt;img src="images/convex.png" alt='convex' width='600'&gt;</span>
-<span id="cb35-512"><a href="#cb35-512" aria-hidden="true" tabindex="-1"></a>      &lt;/td&gt;</span>
-<span id="cb35-513"><a href="#cb35-513" aria-hidden="true" tabindex="-1"></a>    &lt;/tr&gt;</span>
-<span id="cb35-514"><a href="#cb35-514" aria-hidden="true" tabindex="-1"></a>  &lt;/table&gt;</span>
-<span id="cb35-515"><a href="#cb35-515" aria-hidden="true" tabindex="-1"></a>&lt;/div&gt;</span>
-<span id="cb35-516"><a href="#cb35-516" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-517"><a href="#cb35-517" aria-hidden="true" tabindex="-1"></a>In summary, non-convex loss functions can cause problems with optimization. This means that our choice of loss function is a key factor in our modeling process. It turns out that MSE *is* convex, which is a major reason why it is such a popular choice of loss function. Gradient descent is only guaranteed to converge (given enough iterations and an appropriate step size) for convex functions.</span>
-<span id="cb35-518"><a href="#cb35-518" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-519"><a href="#cb35-519" aria-hidden="true" tabindex="-1"></a><span class="fu">### Gradient Descent in 1 Dimension</span></span>
-<span id="cb35-520"><a href="#cb35-520" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-521"><a href="#cb35-521" aria-hidden="true" tabindex="-1"></a><span class="at">&gt; **Terminology clarification**: In past lectures, we have used “loss” to refer to the error incurred on a *single* datapoint. In applications, we usually care more about the average error across *all* datapoints. Going forward, we will take the “model’s loss” to mean the model’s average error across the dataset. This is sometimes also known as the empirical risk (R), cost function, or objective function. $$L(\theta) = R(\theta) = \frac{1}{n} \sum_{i=1}^{n} L(y, \hat{y})$$</span></span>
+<span id="cb35-473"><a href="#cb35-473" aria-hidden="true" tabindex="-1"></a>fig <span class="op">=</span> go.Figure()</span>
+<span id="cb35-474"><a href="#cb35-474" aria-hidden="true" tabindex="-1"></a>roots <span class="op">=</span> np.array([<span class="fl">2.3927</span>, <span class="fl">3.5309</span>, <span class="fl">5.3263</span>])</span>
+<span id="cb35-475"><a href="#cb35-475" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-476"><a href="#cb35-476" aria-hidden="true" tabindex="-1"></a>fig.add_trace(go.Scatter(x <span class="op">=</span> xs, y <span class="op">=</span> arbitrary(xs), </span>
+<span id="cb35-477"><a href="#cb35-477" aria-hidden="true" tabindex="-1"></a>                         mode <span class="op">=</span> <span class="st">"lines"</span>, name <span class="op">=</span> <span class="st">"f"</span>))</span>
+<span id="cb35-478"><a href="#cb35-478" aria-hidden="true" tabindex="-1"></a>fig.add_trace(go.Scatter(x <span class="op">=</span> xs, y <span class="op">=</span> derivative_arbitrary(xs), </span>
+<span id="cb35-479"><a href="#cb35-479" aria-hidden="true" tabindex="-1"></a>                         mode <span class="op">=</span> <span class="st">"lines"</span>, name <span class="op">=</span> <span class="st">"df"</span>, line <span class="op">=</span> {<span class="st">"dash"</span>: <span class="st">"dash"</span>}))</span>
+<span id="cb35-480"><a href="#cb35-480" aria-hidden="true" tabindex="-1"></a>fig.add_trace(go.Scatter(x <span class="op">=</span> np.array(roots), y <span class="op">=</span> <span class="dv">0</span><span class="op">*</span>roots, </span>
+<span id="cb35-481"><a href="#cb35-481" aria-hidden="true" tabindex="-1"></a>                         mode <span class="op">=</span> <span class="st">"markers"</span>, name <span class="op">=</span> <span class="st">"df = zero"</span>, marker_size <span class="op">=</span> <span class="dv">12</span>))</span>
+<span id="cb35-482"><a href="#cb35-482" aria-hidden="true" tabindex="-1"></a>fig.update_layout(font_size <span class="op">=</span> <span class="dv">20</span>, yaxis_range<span class="op">=</span>[<span class="op">-</span><span class="dv">1</span>, <span class="dv">3</span>])</span>
+<span id="cb35-483"><a href="#cb35-483" aria-hidden="true" tabindex="-1"></a>fig.update_layout(autosize<span class="op">=</span><span class="va">False</span>, width<span class="op">=</span><span class="dv">800</span>, height<span class="op">=</span><span class="dv">600</span>)</span>
+<span id="cb35-484"><a href="#cb35-484" aria-hidden="true" tabindex="-1"></a>fig.show()</span>
+<span id="cb35-485"><a href="#cb35-485" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb35-486"><a href="#cb35-486" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-487"><a href="#cb35-487" aria-hidden="true" tabindex="-1"></a>In the plots below, the line indicates the value of the derivative of each value of $\theta$. The derivative is negative where it is red and positive where it is green.</span>
+<span id="cb35-488"><a href="#cb35-488" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-489"><a href="#cb35-489" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-490"><a href="#cb35-490" aria-hidden="true" tabindex="-1"></a>Say we make a guess for the minimizing value of $\theta$. Remember that we read plots from left to right, and assume that our starting $\theta$ value is to the left of the optimal $\hat{\theta}$. If the guess "undershoots" the true minimizing value – our guess for $\theta$ is lower than the value of the $\hat{\theta}$ that minimizes the function – the derivative will be **negative**. This means that if we increase $\theta$ (move further to the right), then we **can decrease** our loss function further. If this guess "overshoots" the true minimizing value, the derivative will be positive, implying the converse.</span>
+<span id="cb35-491"><a href="#cb35-491" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-492"><a href="#cb35-492" aria-hidden="true" tabindex="-1"></a>&lt;div align="middle"&gt;</span>
+<span id="cb35-493"><a href="#cb35-493" aria-hidden="true" tabindex="-1"></a>  &lt;table style="width:100%"&gt;</span>
+<span id="cb35-494"><a href="#cb35-494" aria-hidden="true" tabindex="-1"></a>    &lt;tr align="center"&gt;</span>
+<span id="cb35-495"><a href="#cb35-495" aria-hidden="true" tabindex="-1"></a>      &lt;td&gt;&lt;img src="images/step.png" alt='step' width='600'&gt;</span>
+<span id="cb35-496"><a href="#cb35-496" aria-hidden="true" tabindex="-1"></a>      &lt;/td&gt;</span>
+<span id="cb35-497"><a href="#cb35-497" aria-hidden="true" tabindex="-1"></a>    &lt;/tr&gt;</span>
+<span id="cb35-498"><a href="#cb35-498" aria-hidden="true" tabindex="-1"></a>  &lt;/table&gt;</span>
+<span id="cb35-499"><a href="#cb35-499" aria-hidden="true" tabindex="-1"></a>&lt;/div&gt;</span>
+<span id="cb35-500"><a href="#cb35-500" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-501"><a href="#cb35-501" aria-hidden="true" tabindex="-1"></a>We can use this pattern to help formulate our next guess for the optimal $\hat{\theta}$. Consider the case where we've undershot $\theta$ by guessing too low of a value. We'll want our next guess to be greater in value than our previous guess – that is, we want to shift our guess to the right. You can think of this as following the slope "downhill" to the function's minimum value.</span>
+<span id="cb35-502"><a href="#cb35-502" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-503"><a href="#cb35-503" aria-hidden="true" tabindex="-1"></a>&lt;div align="middle"&gt;</span>
+<span id="cb35-504"><a href="#cb35-504" aria-hidden="true" tabindex="-1"></a>  &lt;table style="width:100%"&gt;</span>
+<span id="cb35-505"><a href="#cb35-505" aria-hidden="true" tabindex="-1"></a>    &lt;tr align="center"&gt;</span>
+<span id="cb35-506"><a href="#cb35-506" aria-hidden="true" tabindex="-1"></a>      &lt;td&gt;&lt;img src="images/neg_step.png" alt='neg_step' width='600'&gt;</span>
+<span id="cb35-507"><a href="#cb35-507" aria-hidden="true" tabindex="-1"></a>      &lt;/td&gt;</span>
+<span id="cb35-508"><a href="#cb35-508" aria-hidden="true" tabindex="-1"></a>    &lt;/tr&gt;</span>
+<span id="cb35-509"><a href="#cb35-509" aria-hidden="true" tabindex="-1"></a>  &lt;/table&gt;</span>
+<span id="cb35-510"><a href="#cb35-510" aria-hidden="true" tabindex="-1"></a>&lt;/div&gt;</span>
+<span id="cb35-511"><a href="#cb35-511" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-512"><a href="#cb35-512" aria-hidden="true" tabindex="-1"></a>If we've overshot $\hat{\theta}$ by guessing too high of a value, we'll want our next guess to be lower in value – we want to shift our guess for $\hat{\theta}$ to the left. </span>
+<span id="cb35-513"><a href="#cb35-513" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-514"><a href="#cb35-514" aria-hidden="true" tabindex="-1"></a>&lt;div align="middle"&gt;</span>
+<span id="cb35-515"><a href="#cb35-515" aria-hidden="true" tabindex="-1"></a>  &lt;table style="width:100%"&gt;</span>
+<span id="cb35-516"><a href="#cb35-516" aria-hidden="true" tabindex="-1"></a>    &lt;tr align="center"&gt;</span>
+<span id="cb35-517"><a href="#cb35-517" aria-hidden="true" tabindex="-1"></a>      &lt;td&gt;&lt;img src="images/pos_step.png" alt='pos_step' width='600'&gt;</span>
+<span id="cb35-518"><a href="#cb35-518" aria-hidden="true" tabindex="-1"></a>      &lt;/td&gt;</span>
+<span id="cb35-519"><a href="#cb35-519" aria-hidden="true" tabindex="-1"></a>    &lt;/tr&gt;</span>
+<span id="cb35-520"><a href="#cb35-520" aria-hidden="true" tabindex="-1"></a>  &lt;/table&gt;</span>
+<span id="cb35-521"><a href="#cb35-521" aria-hidden="true" tabindex="-1"></a>&lt;/div&gt;</span>
 <span id="cb35-522"><a href="#cb35-522" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-523"><a href="#cb35-523" aria-hidden="true" tabindex="-1"></a>In our discussion above, we worked with some arbitrary function $f$. As data scientists, we will almost always work with gradient descent in the context of optimizing *models* – specifically, we want to apply gradient descent to find the minimum of a *loss function*. In a modeling context, our goal is to minimize a loss function by choosing the minimizing model *parameters*.</span>
+<span id="cb35-523"><a href="#cb35-523" aria-hidden="true" tabindex="-1"></a>In other words, the derivative of the function at each point tells us the direction of our next guess.</span>
 <span id="cb35-524"><a href="#cb35-524" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-525"><a href="#cb35-525" aria-hidden="true" tabindex="-1"></a>Recall our modeling workflow from the past few lectures: </span>
-<span id="cb35-526"><a href="#cb35-526" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-527"><a href="#cb35-527" aria-hidden="true" tabindex="-1"></a><span class="ss">1. </span>Define a model with some parameters $\theta_i$</span>
-<span id="cb35-528"><a href="#cb35-528" aria-hidden="true" tabindex="-1"></a><span class="ss">2. </span>Choose a loss function </span>
-<span id="cb35-529"><a href="#cb35-529" aria-hidden="true" tabindex="-1"></a><span class="ss">3. </span>Select the values of $\theta_i$ that minimize the loss function on the data</span>
+<span id="cb35-525"><a href="#cb35-525" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>A negative slope means we want to step to the right, or move in the *positive* direction. </span>
+<span id="cb35-526"><a href="#cb35-526" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>A positive slope means we want to step to the left, or move in the *negative* direction.</span>
+<span id="cb35-527"><a href="#cb35-527" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-528"><a href="#cb35-528" aria-hidden="true" tabindex="-1"></a><span class="fu">#### Algorithm Attempt 1</span></span>
+<span id="cb35-529"><a href="#cb35-529" aria-hidden="true" tabindex="-1"></a>Armed with this knowledge, let's try to see if we can use the derivative to optimize the function.</span>
 <span id="cb35-530"><a href="#cb35-530" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-531"><a href="#cb35-531" aria-hidden="true" tabindex="-1"></a>Gradient descent is a powerful technique for completing this last task. By applying the gradient descent algorithm, we can select values for our parameters $\theta_i$ that will lead to the model having minimal loss on the training data.</span>
+<span id="cb35-531"><a href="#cb35-531" aria-hidden="true" tabindex="-1"></a>We start by making some guess for the minimizing value of $x$. Then, we look at the derivative of the function at this value of $x$, and step downhill in the *opposite* direction. We can express our new rule as a recurrence relation:</span>
 <span id="cb35-532"><a href="#cb35-532" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-533"><a href="#cb35-533" aria-hidden="true" tabindex="-1"></a>When using gradient descent in a modeling context, we:</span>
+<span id="cb35-533"><a href="#cb35-533" aria-hidden="true" tabindex="-1"></a>$$x^{(t+1)} = x^{(t)} - \frac{d}{dx} f(x^{(t)})$$</span>
 <span id="cb35-534"><a href="#cb35-534" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-535"><a href="#cb35-535" aria-hidden="true" tabindex="-1"></a><span class="ss">1. </span>Make guesses for the minimizing $\theta_i$</span>
-<span id="cb35-536"><a href="#cb35-536" aria-hidden="true" tabindex="-1"></a><span class="ss">2. </span>Compute the derivative of the loss function $L$</span>
-<span id="cb35-537"><a href="#cb35-537" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-538"><a href="#cb35-538" aria-hidden="true" tabindex="-1"></a>We can "translate" our gradient descent rule from before by replacing $x$ with $\theta$ and $f$ with $L$:</span>
-<span id="cb35-539"><a href="#cb35-539" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-540"><a href="#cb35-540" aria-hidden="true" tabindex="-1"></a>$$\theta^{(t+1)} = \theta^{(t)} - \alpha \frac{d}{d\theta} L(\theta^{(t)})$$</span>
-<span id="cb35-541"><a href="#cb35-541" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-542"><a href="#cb35-542" aria-hidden="true" tabindex="-1"></a><span class="fu">#### Gradient Descent on the `tips` Dataset </span></span>
-<span id="cb35-543"><a href="#cb35-543" aria-hidden="true" tabindex="-1"></a>To see this in action, let's consider a case where we have a linear model with no offset. We want to predict the tip (y) given the price of a meal (x). To do this, we</span>
-<span id="cb35-544"><a href="#cb35-544" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-545"><a href="#cb35-545" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Choose a model: $\hat{y} = \theta_1 x$,</span>
-<span id="cb35-546"><a href="#cb35-546" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Choose a loss function: $L(\theta) = MSE(\theta) = \frac{1}{n} \sum_{i=1}^n (y_i - \theta_1x_i)^2$.</span>
+<span id="cb35-535"><a href="#cb35-535" aria-hidden="true" tabindex="-1"></a>Translating this statement into English: we obtain **our next guess** for the minimizing value of $x$ at timestep $t+1$ ($x^{(t+1)}$) by taking **our last guess** ($x^{(t)}$) and subtracting the **derivative of the function** at that point ($\frac{d}{dx} f(x^{(t)})$).</span>
+<span id="cb35-536"><a href="#cb35-536" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-537"><a href="#cb35-537" aria-hidden="true" tabindex="-1"></a>A few steps are shown below, where the old step is shown as a transparent point, and the next step taken is the green-filled dot.</span>
+<span id="cb35-538"><a href="#cb35-538" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-539"><a href="#cb35-539" aria-hidden="true" tabindex="-1"></a>&lt;div align="middle"&gt;</span>
+<span id="cb35-540"><a href="#cb35-540" aria-hidden="true" tabindex="-1"></a>  &lt;table style="width:100%"&gt;</span>
+<span id="cb35-541"><a href="#cb35-541" aria-hidden="true" tabindex="-1"></a>    &lt;tr align="center"&gt;</span>
+<span id="cb35-542"><a href="#cb35-542" aria-hidden="true" tabindex="-1"></a>      &lt;td&gt;&lt;img src="images/grad_descent_1.png" alt='grad_descent_2' width='800'&gt;</span>
+<span id="cb35-543"><a href="#cb35-543" aria-hidden="true" tabindex="-1"></a>      &lt;/td&gt;</span>
+<span id="cb35-544"><a href="#cb35-544" aria-hidden="true" tabindex="-1"></a>    &lt;/tr&gt;</span>
+<span id="cb35-545"><a href="#cb35-545" aria-hidden="true" tabindex="-1"></a>  &lt;/table&gt;</span>
+<span id="cb35-546"><a href="#cb35-546" aria-hidden="true" tabindex="-1"></a>&lt;/div&gt;</span>
 <span id="cb35-547"><a href="#cb35-547" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-548"><a href="#cb35-548" aria-hidden="true" tabindex="-1"></a>Let's apply our <span class="in">`gradient_descent`</span> function from before to optimize our model on the <span class="in">`tips`</span> dataset. We will try to select the best parameter $\theta_i$ to predict the <span class="in">`tip`</span> $y$ from the <span class="in">`total_bill`</span> $x$.</span>
+<span id="cb35-548"><a href="#cb35-548" aria-hidden="true" tabindex="-1"></a>Looking pretty good! We do have a problem though – once we arrive close to the minimum value of the function, our guesses "bounce" back and forth past the minimum without ever reaching it.</span>
 <span id="cb35-549"><a href="#cb35-549" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-552"><a href="#cb35-552" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb35-553"><a href="#cb35-553" aria-hidden="true" tabindex="-1"></a>df <span class="op">=</span> sns.load_dataset(<span class="st">"tips"</span>)</span>
-<span id="cb35-554"><a href="#cb35-554" aria-hidden="true" tabindex="-1"></a>df.head()</span>
-<span id="cb35-555"><a href="#cb35-555" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb35-556"><a href="#cb35-556" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-557"><a href="#cb35-557" aria-hidden="true" tabindex="-1"></a>We can visualize the value of the MSE on our dataset for different possible choices of $\theta_1$. To optimize our model, we want to select the value of $\theta_1$ that leads to the lowest MSE.</span>
+<span id="cb35-550"><a href="#cb35-550" aria-hidden="true" tabindex="-1"></a>&lt;div align="middle"&gt;</span>
+<span id="cb35-551"><a href="#cb35-551" aria-hidden="true" tabindex="-1"></a>  &lt;table style="width:100%"&gt;</span>
+<span id="cb35-552"><a href="#cb35-552" aria-hidden="true" tabindex="-1"></a>    &lt;tr align="center"&gt;</span>
+<span id="cb35-553"><a href="#cb35-553" aria-hidden="true" tabindex="-1"></a>      &lt;td&gt;&lt;img src="images/grad_descent_2.png" alt='grad_descent_2' width='500'&gt;</span>
+<span id="cb35-554"><a href="#cb35-554" aria-hidden="true" tabindex="-1"></a>      &lt;/td&gt;</span>
+<span id="cb35-555"><a href="#cb35-555" aria-hidden="true" tabindex="-1"></a>    &lt;/tr&gt;</span>
+<span id="cb35-556"><a href="#cb35-556" aria-hidden="true" tabindex="-1"></a>  &lt;/table&gt;</span>
+<span id="cb35-557"><a href="#cb35-557" aria-hidden="true" tabindex="-1"></a>&lt;/div&gt;</span>
 <span id="cb35-558"><a href="#cb35-558" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-559"><a href="#cb35-559" aria-hidden="true" tabindex="-1"></a>To apply gradient descent, we need to compute the derivative of the loss function with respect to our parameter $\theta_1$.</span>
+<span id="cb35-559"><a href="#cb35-559" aria-hidden="true" tabindex="-1"></a>In other words, each step we take when updating our guess moves us too far. We can address this by decreasing the size of each step. </span>
 <span id="cb35-560"><a href="#cb35-560" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-561"><a href="#cb35-561" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Given our loss function, $$L(\theta) = MSE(\theta) = \frac{1}{n} \sum_{i=1}^n (y_i - \theta_1x_i)^2$$</span>
-<span id="cb35-562"><a href="#cb35-562" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>We take the derivative with respect to $\theta_1$ $$\frac{\partial}{\partial \theta_{1}} L(\theta_1^{(t)}) = \frac{-2}{n} \sum_{i=1}^n (y_i - \theta_1^{(t)} x_i) x_i$$</span>
-<span id="cb35-563"><a href="#cb35-563" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Which results in the gradient descent update rule</span>
-<span id="cb35-564"><a href="#cb35-564" aria-hidden="true" tabindex="-1"></a>$$\theta_1^{(t+1)} = \theta_1^{(t)} - \alpha \frac{d}{d\theta}L(\theta_1^{(t)})$$</span>
+<span id="cb35-561"><a href="#cb35-561" aria-hidden="true" tabindex="-1"></a><span class="fu">#### Algorithm Attempt 2</span></span>
+<span id="cb35-562"><a href="#cb35-562" aria-hidden="true" tabindex="-1"></a>Let's update our algorithm to use a **learning rate** (also sometimes called the step size), which controls how far we move with each update. We represent the learning rate with $\alpha$. </span>
+<span id="cb35-563"><a href="#cb35-563" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-564"><a href="#cb35-564" aria-hidden="true" tabindex="-1"></a>$$x^{(t+1)} = x^{(t)} - \alpha \frac{d}{dx} f(x^{(t)})$$</span>
 <span id="cb35-565"><a href="#cb35-565" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-566"><a href="#cb35-566" aria-hidden="true" tabindex="-1"></a>for some learning rate $\alpha$.</span>
+<span id="cb35-566"><a href="#cb35-566" aria-hidden="true" tabindex="-1"></a>A small $\alpha$ means that we will take small steps; a large $\alpha$ means we will take large steps. When do we stop updating? We stop updating either after a fixed number of updates or after a subsequent update doesn't change much.</span>
 <span id="cb35-567"><a href="#cb35-567" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-568"><a href="#cb35-568" aria-hidden="true" tabindex="-1"></a>Implementing this in code, we can visualize the MSE loss on the <span class="in">`tips`</span> data. **MSE is convex**, so there is one global minimum.</span>
+<span id="cb35-568"><a href="#cb35-568" aria-hidden="true" tabindex="-1"></a>Updating our function to use $\alpha=0.3$, our algorithm successfully **converges** (settles on a solution and stops updating significantly, or at all) on the minimum value.</span>
 <span id="cb35-569"><a href="#cb35-569" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-572"><a href="#cb35-572" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb35-573"><a href="#cb35-573" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
-<span id="cb35-574"><a href="#cb35-574" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> gradient_descent(df, initial_guess, alpha, n):</span>
-<span id="cb35-575"><a href="#cb35-575" aria-hidden="true" tabindex="-1"></a>    <span class="co">"""Performs n steps of gradient descent on df using learning rate alpha starting</span></span>
-<span id="cb35-576"><a href="#cb35-576" aria-hidden="true" tabindex="-1"></a><span class="co">       from initial_guess. Returns a numpy array of all guesses over time."""</span></span>
-<span id="cb35-577"><a href="#cb35-577" aria-hidden="true" tabindex="-1"></a>    guesses <span class="op">=</span> [initial_guess]</span>
-<span id="cb35-578"><a href="#cb35-578" aria-hidden="true" tabindex="-1"></a>    current_guess <span class="op">=</span> initial_guess</span>
-<span id="cb35-579"><a href="#cb35-579" aria-hidden="true" tabindex="-1"></a>    <span class="cf">while</span> <span class="bu">len</span>(guesses) <span class="op">&lt;</span> n:</span>
-<span id="cb35-580"><a href="#cb35-580" aria-hidden="true" tabindex="-1"></a>        current_guess <span class="op">=</span> current_guess <span class="op">-</span> alpha <span class="op">*</span> df(current_guess)</span>
-<span id="cb35-581"><a href="#cb35-581" aria-hidden="true" tabindex="-1"></a>        guesses.append(current_guess)</span>
-<span id="cb35-582"><a href="#cb35-582" aria-hidden="true" tabindex="-1"></a>        </span>
-<span id="cb35-583"><a href="#cb35-583" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> np.array(guesses)</span>
-<span id="cb35-584"><a href="#cb35-584" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-585"><a href="#cb35-585" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> mse_single_arg(theta_1):</span>
-<span id="cb35-586"><a href="#cb35-586" aria-hidden="true" tabindex="-1"></a>    <span class="co">"""Returns the MSE on our data for the given theta1"""</span></span>
-<span id="cb35-587"><a href="#cb35-587" aria-hidden="true" tabindex="-1"></a>    x <span class="op">=</span> df[<span class="st">"total_bill"</span>]</span>
-<span id="cb35-588"><a href="#cb35-588" aria-hidden="true" tabindex="-1"></a>    y_obs <span class="op">=</span> df[<span class="st">"tip"</span>]</span>
-<span id="cb35-589"><a href="#cb35-589" aria-hidden="true" tabindex="-1"></a>    y_hat <span class="op">=</span> theta_1 <span class="op">*</span> x</span>
-<span id="cb35-590"><a href="#cb35-590" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> np.mean((y_hat <span class="op">-</span> y_obs) <span class="op">**</span> <span class="dv">2</span>)</span>
-<span id="cb35-591"><a href="#cb35-591" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-592"><a href="#cb35-592" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> mse_loss_derivative_single_arg(theta_1):</span>
-<span id="cb35-593"><a href="#cb35-593" aria-hidden="true" tabindex="-1"></a>    <span class="co">"""Returns the derivative of the MSE on our data for the given theta1"""</span></span>
-<span id="cb35-594"><a href="#cb35-594" aria-hidden="true" tabindex="-1"></a>    x <span class="op">=</span> df[<span class="st">"total_bill"</span>]</span>
-<span id="cb35-595"><a href="#cb35-595" aria-hidden="true" tabindex="-1"></a>    y_obs <span class="op">=</span> df[<span class="st">"tip"</span>]</span>
-<span id="cb35-596"><a href="#cb35-596" aria-hidden="true" tabindex="-1"></a>    y_hat <span class="op">=</span> theta_1 <span class="op">*</span> x</span>
-<span id="cb35-597"><a href="#cb35-597" aria-hidden="true" tabindex="-1"></a>    </span>
-<span id="cb35-598"><a href="#cb35-598" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> np.mean(<span class="dv">2</span> <span class="op">*</span> (y_hat <span class="op">-</span> y_obs) <span class="op">*</span> x)</span>
-<span id="cb35-599"><a href="#cb35-599" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-600"><a href="#cb35-600" aria-hidden="true" tabindex="-1"></a>loss_df <span class="op">=</span> pd.DataFrame({<span class="st">"theta_1"</span>:np.linspace(<span class="op">-</span><span class="fl">1.5</span>, <span class="dv">1</span>), <span class="st">"MSE"</span>:[mse_single_arg(theta_1) <span class="cf">for</span> theta_1 <span class="kw">in</span> np.linspace(<span class="op">-</span><span class="fl">1.5</span>, <span class="dv">1</span>)]})</span>
-<span id="cb35-601"><a href="#cb35-601" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-602"><a href="#cb35-602" aria-hidden="true" tabindex="-1"></a>trajectory <span class="op">=</span> gradient_descent(mse_loss_derivative_single_arg, <span class="op">-</span><span class="fl">0.5</span>, <span class="fl">0.0001</span>, <span class="dv">100</span>)</span>
-<span id="cb35-603"><a href="#cb35-603" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-604"><a href="#cb35-604" aria-hidden="true" tabindex="-1"></a>plt.plot(loss_df[<span class="st">"theta_1"</span>], loss_df[<span class="st">"MSE"</span>])</span>
-<span id="cb35-605"><a href="#cb35-605" aria-hidden="true" tabindex="-1"></a>plt.scatter(trajectory, [mse_single_arg(guess) <span class="cf">for</span> guess <span class="kw">in</span> trajectory], c<span class="op">=</span><span class="st">"white"</span>, edgecolor<span class="op">=</span><span class="st">"firebrick"</span>)</span>
-<span id="cb35-606"><a href="#cb35-606" aria-hidden="true" tabindex="-1"></a>plt.scatter(trajectory[<span class="op">-</span><span class="dv">1</span>], mse_single_arg(trajectory[<span class="op">-</span><span class="dv">1</span>]), c<span class="op">=</span><span class="st">"firebrick"</span>)</span>
-<span id="cb35-607"><a href="#cb35-607" aria-hidden="true" tabindex="-1"></a>plt.xlabel(<span class="vs">r"$\theta_1$"</span>)</span>
-<span id="cb35-608"><a href="#cb35-608" aria-hidden="true" tabindex="-1"></a>plt.ylabel(<span class="vs">r"$L(\theta_1)$"</span>)<span class="op">;</span></span>
+<span id="cb35-570"><a href="#cb35-570" aria-hidden="true" tabindex="-1"></a>&lt;div align="middle"&gt;</span>
+<span id="cb35-571"><a href="#cb35-571" aria-hidden="true" tabindex="-1"></a>  &lt;table style="width:100%"&gt;</span>
+<span id="cb35-572"><a href="#cb35-572" aria-hidden="true" tabindex="-1"></a>    &lt;tr align="center"&gt;</span>
+<span id="cb35-573"><a href="#cb35-573" aria-hidden="true" tabindex="-1"></a>      &lt;td&gt;&lt;img src="images/grad_descent_3.png" alt='grad_descent_3' width='500'&gt;</span>
+<span id="cb35-574"><a href="#cb35-574" aria-hidden="true" tabindex="-1"></a>      &lt;/td&gt;</span>
+<span id="cb35-575"><a href="#cb35-575" aria-hidden="true" tabindex="-1"></a>    &lt;/tr&gt;</span>
+<span id="cb35-576"><a href="#cb35-576" aria-hidden="true" tabindex="-1"></a>  &lt;/table&gt;</span>
+<span id="cb35-577"><a href="#cb35-577" aria-hidden="true" tabindex="-1"></a>&lt;/div&gt;</span>
+<span id="cb35-578"><a href="#cb35-578" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-579"><a href="#cb35-579" aria-hidden="true" tabindex="-1"></a><span class="fu">### Convexity</span></span>
+<span id="cb35-580"><a href="#cb35-580" aria-hidden="true" tabindex="-1"></a>In our analysis above, we focused our attention on the global minimum of the loss function. You may be wondering: what about the local minimum that's just to the left? </span>
+<span id="cb35-581"><a href="#cb35-581" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-582"><a href="#cb35-582" aria-hidden="true" tabindex="-1"></a>If we had chosen a different starting guess for $\theta$, or a different value for the learning rate $\alpha$, our algorithm may have gotten "stuck" and converged on the local minimum, rather than on the true optimum value of loss. </span>
+<span id="cb35-583"><a href="#cb35-583" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-584"><a href="#cb35-584" aria-hidden="true" tabindex="-1"></a>&lt;div align="middle"&gt;</span>
+<span id="cb35-585"><a href="#cb35-585" aria-hidden="true" tabindex="-1"></a>  &lt;table style="width:100%"&gt;</span>
+<span id="cb35-586"><a href="#cb35-586" aria-hidden="true" tabindex="-1"></a>    &lt;tr align="center"&gt;</span>
+<span id="cb35-587"><a href="#cb35-587" aria-hidden="true" tabindex="-1"></a>      &lt;td&gt;&lt;img src="images/local.png" alt='local' width='600'&gt;</span>
+<span id="cb35-588"><a href="#cb35-588" aria-hidden="true" tabindex="-1"></a>      &lt;/td&gt;</span>
+<span id="cb35-589"><a href="#cb35-589" aria-hidden="true" tabindex="-1"></a>    &lt;/tr&gt;</span>
+<span id="cb35-590"><a href="#cb35-590" aria-hidden="true" tabindex="-1"></a>  &lt;/table&gt;</span>
+<span id="cb35-591"><a href="#cb35-591" aria-hidden="true" tabindex="-1"></a>&lt;/div&gt;</span>
+<span id="cb35-592"><a href="#cb35-592" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-593"><a href="#cb35-593" aria-hidden="true" tabindex="-1"></a>If the loss function is **convex**, gradient descent is guaranteed to converge and find the global minimum of the objective function. Formally, a function $f$ is convex if:</span>
+<span id="cb35-594"><a href="#cb35-594" aria-hidden="true" tabindex="-1"></a>$$tf(a) + (1-t)f(b) \geq f(ta + (1-t)b)$$</span>
+<span id="cb35-595"><a href="#cb35-595" aria-hidden="true" tabindex="-1"></a>for all $a, b$ in the domain of $f$ and $t \in <span class="co">[</span><span class="ot">0, 1</span><span class="co">]</span>$.</span>
+<span id="cb35-596"><a href="#cb35-596" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-597"><a href="#cb35-597" aria-hidden="true" tabindex="-1"></a>To put this into words: if you drew a line between any two points on the curve, all values on the curve must be *on or below* the line. Importantly, any local minimum of a convex function is also its global minimum so we avoid the situation where the algorithm converges on some critical point that is not the minimum of the function.</span>
+<span id="cb35-598"><a href="#cb35-598" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-599"><a href="#cb35-599" aria-hidden="true" tabindex="-1"></a>&lt;div align="middle"&gt;</span>
+<span id="cb35-600"><a href="#cb35-600" aria-hidden="true" tabindex="-1"></a>  &lt;table style="width:100%"&gt;</span>
+<span id="cb35-601"><a href="#cb35-601" aria-hidden="true" tabindex="-1"></a>    &lt;tr align="center"&gt;</span>
+<span id="cb35-602"><a href="#cb35-602" aria-hidden="true" tabindex="-1"></a>      &lt;td&gt;&lt;img src="images/convex.png" alt='convex' width='600'&gt;</span>
+<span id="cb35-603"><a href="#cb35-603" aria-hidden="true" tabindex="-1"></a>      &lt;/td&gt;</span>
+<span id="cb35-604"><a href="#cb35-604" aria-hidden="true" tabindex="-1"></a>    &lt;/tr&gt;</span>
+<span id="cb35-605"><a href="#cb35-605" aria-hidden="true" tabindex="-1"></a>  &lt;/table&gt;</span>
+<span id="cb35-606"><a href="#cb35-606" aria-hidden="true" tabindex="-1"></a>&lt;/div&gt;</span>
+<span id="cb35-607"><a href="#cb35-607" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-608"><a href="#cb35-608" aria-hidden="true" tabindex="-1"></a>In summary, non-convex loss functions can cause problems with optimization. This means that our choice of loss function is a key factor in our modeling process. It turns out that MSE *is* convex, which is a major reason why it is such a popular choice of loss function. Gradient descent is only guaranteed to converge (given enough iterations and an appropriate step size) for convex functions.</span>
 <span id="cb35-609"><a href="#cb35-609" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb35-610"><a href="#cb35-610" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="ss">f"Final guess for theta_1: </span><span class="sc">{</span>trajectory[<span class="op">-</span><span class="dv">1</span>]<span class="sc">}</span><span class="ss">"</span>)</span>
-<span id="cb35-611"><a href="#cb35-611" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb35-610"><a href="#cb35-610" aria-hidden="true" tabindex="-1"></a><span class="fu">### Gradient Descent in 1 Dimension</span></span>
+<span id="cb35-611"><a href="#cb35-611" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-612"><a href="#cb35-612" aria-hidden="true" tabindex="-1"></a><span class="at">&gt; **Terminology clarification**: In past lectures, we have used “loss” to refer to the error incurred on a *single* datapoint. In applications, we usually care more about the average error across *all* datapoints. Going forward, we will take the “model’s loss” to mean the model’s average error across the dataset. This is sometimes also known as the empirical risk (R), cost function, or objective function. $$L(\theta) = R(\theta) = \frac{1}{n} \sum_{i=1}^{n} L(y, \hat{y})$$</span></span>
+<span id="cb35-613"><a href="#cb35-613" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-614"><a href="#cb35-614" aria-hidden="true" tabindex="-1"></a>In our discussion above, we worked with some arbitrary function $f$. As data scientists, we will almost always work with gradient descent in the context of optimizing *models* – specifically, we want to apply gradient descent to find the minimum of a *loss function*. In a modeling context, our goal is to minimize a loss function by choosing the minimizing model *parameters*.</span>
+<span id="cb35-615"><a href="#cb35-615" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-616"><a href="#cb35-616" aria-hidden="true" tabindex="-1"></a>Recall our modeling workflow from the past few lectures: </span>
+<span id="cb35-617"><a href="#cb35-617" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-618"><a href="#cb35-618" aria-hidden="true" tabindex="-1"></a><span class="ss">1. </span>Define a model with some parameters $\theta_i$</span>
+<span id="cb35-619"><a href="#cb35-619" aria-hidden="true" tabindex="-1"></a><span class="ss">2. </span>Choose a loss function </span>
+<span id="cb35-620"><a href="#cb35-620" aria-hidden="true" tabindex="-1"></a><span class="ss">3. </span>Select the values of $\theta_i$ that minimize the loss function on the data</span>
+<span id="cb35-621"><a href="#cb35-621" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-622"><a href="#cb35-622" aria-hidden="true" tabindex="-1"></a>Gradient descent is a powerful technique for completing this last task. By applying the gradient descent algorithm, we can select values for our parameters $\theta_i$ that will lead to the model having minimal loss on the training data.</span>
+<span id="cb35-623"><a href="#cb35-623" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-624"><a href="#cb35-624" aria-hidden="true" tabindex="-1"></a>When using gradient descent in a modeling context, we:</span>
+<span id="cb35-625"><a href="#cb35-625" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-626"><a href="#cb35-626" aria-hidden="true" tabindex="-1"></a><span class="ss">1. </span>Make guesses for the minimizing $\theta_i$</span>
+<span id="cb35-627"><a href="#cb35-627" aria-hidden="true" tabindex="-1"></a><span class="ss">2. </span>Compute the derivative of the loss function $L$</span>
+<span id="cb35-628"><a href="#cb35-628" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-629"><a href="#cb35-629" aria-hidden="true" tabindex="-1"></a>We can "translate" our gradient descent rule from before by replacing $x$ with $\theta$ and $f$ with $L$:</span>
+<span id="cb35-630"><a href="#cb35-630" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-631"><a href="#cb35-631" aria-hidden="true" tabindex="-1"></a>$$\theta^{(t+1)} = \theta^{(t)} - \alpha \frac{d}{d\theta} L(\theta^{(t)})$$</span>
+<span id="cb35-632"><a href="#cb35-632" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-633"><a href="#cb35-633" aria-hidden="true" tabindex="-1"></a><span class="fu">#### Gradient Descent on the `tips` Dataset </span></span>
+<span id="cb35-634"><a href="#cb35-634" aria-hidden="true" tabindex="-1"></a>To see this in action, let's consider a case where we have a linear model with no offset. We want to predict the tip (y) given the price of a meal (x). To do this, we</span>
+<span id="cb35-635"><a href="#cb35-635" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-636"><a href="#cb35-636" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Choose a model: $\hat{y} = \theta_1 x$,</span>
+<span id="cb35-637"><a href="#cb35-637" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Choose a loss function: $L(\theta) = MSE(\theta) = \frac{1}{n} \sum_{i=1}^n (y_i - \theta_1x_i)^2$.</span>
+<span id="cb35-638"><a href="#cb35-638" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-639"><a href="#cb35-639" aria-hidden="true" tabindex="-1"></a>Let's apply our <span class="in">`gradient_descent`</span> function from before to optimize our model on the <span class="in">`tips`</span> dataset. We will try to select the best parameter $\theta_i$ to predict the <span class="in">`tip`</span> $y$ from the <span class="in">`total_bill`</span> $x$.</span>
+<span id="cb35-640"><a href="#cb35-640" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-643"><a href="#cb35-643" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb35-644"><a href="#cb35-644" aria-hidden="true" tabindex="-1"></a>df <span class="op">=</span> sns.load_dataset(<span class="st">"tips"</span>)</span>
+<span id="cb35-645"><a href="#cb35-645" aria-hidden="true" tabindex="-1"></a>df.head()</span>
+<span id="cb35-646"><a href="#cb35-646" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb35-647"><a href="#cb35-647" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-648"><a href="#cb35-648" aria-hidden="true" tabindex="-1"></a>We can visualize the value of the MSE on our dataset for different possible choices of $\theta_1$. To optimize our model, we want to select the value of $\theta_1$ that leads to the lowest MSE.</span>
+<span id="cb35-649"><a href="#cb35-649" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-650"><a href="#cb35-650" aria-hidden="true" tabindex="-1"></a>To apply gradient descent, we need to compute the derivative of the loss function with respect to our parameter $\theta_1$.</span>
+<span id="cb35-651"><a href="#cb35-651" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-652"><a href="#cb35-652" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Given our loss function, $$L(\theta) = MSE(\theta) = \frac{1}{n} \sum_{i=1}^n (y_i - \theta_1x_i)^2$$</span>
+<span id="cb35-653"><a href="#cb35-653" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>We take the derivative with respect to $\theta_1$ $$\frac{\partial}{\partial \theta_{1}} L(\theta_1^{(t)}) = \frac{-2}{n} \sum_{i=1}^n (y_i - \theta_1^{(t)} x_i) x_i$$</span>
+<span id="cb35-654"><a href="#cb35-654" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Which results in the gradient descent update rule</span>
+<span id="cb35-655"><a href="#cb35-655" aria-hidden="true" tabindex="-1"></a>$$\theta_1^{(t+1)} = \theta_1^{(t)} - \alpha \frac{d}{d\theta}L(\theta_1^{(t)})$$</span>
+<span id="cb35-656"><a href="#cb35-656" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-657"><a href="#cb35-657" aria-hidden="true" tabindex="-1"></a>for some learning rate $\alpha$.</span>
+<span id="cb35-658"><a href="#cb35-658" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-659"><a href="#cb35-659" aria-hidden="true" tabindex="-1"></a>Implementing this in code, we can visualize the MSE loss on the <span class="in">`tips`</span> data. **MSE is convex**, so there is one global minimum.</span>
+<span id="cb35-660"><a href="#cb35-660" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-663"><a href="#cb35-663" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb35-664"><a href="#cb35-664" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
+<span id="cb35-665"><a href="#cb35-665" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> gradient_descent(df, initial_guess, alpha, n):</span>
+<span id="cb35-666"><a href="#cb35-666" aria-hidden="true" tabindex="-1"></a>    <span class="co">"""Performs n steps of gradient descent on df using learning rate alpha starting</span></span>
+<span id="cb35-667"><a href="#cb35-667" aria-hidden="true" tabindex="-1"></a><span class="co">       from initial_guess. Returns a numpy array of all guesses over time."""</span></span>
+<span id="cb35-668"><a href="#cb35-668" aria-hidden="true" tabindex="-1"></a>    guesses <span class="op">=</span> [initial_guess]</span>
+<span id="cb35-669"><a href="#cb35-669" aria-hidden="true" tabindex="-1"></a>    current_guess <span class="op">=</span> initial_guess</span>
+<span id="cb35-670"><a href="#cb35-670" aria-hidden="true" tabindex="-1"></a>    <span class="cf">while</span> <span class="bu">len</span>(guesses) <span class="op">&lt;</span> n:</span>
+<span id="cb35-671"><a href="#cb35-671" aria-hidden="true" tabindex="-1"></a>        current_guess <span class="op">=</span> current_guess <span class="op">-</span> alpha <span class="op">*</span> df(current_guess)</span>
+<span id="cb35-672"><a href="#cb35-672" aria-hidden="true" tabindex="-1"></a>        guesses.append(current_guess)</span>
+<span id="cb35-673"><a href="#cb35-673" aria-hidden="true" tabindex="-1"></a>        </span>
+<span id="cb35-674"><a href="#cb35-674" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> np.array(guesses)</span>
+<span id="cb35-675"><a href="#cb35-675" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-676"><a href="#cb35-676" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> mse_single_arg(theta_1):</span>
+<span id="cb35-677"><a href="#cb35-677" aria-hidden="true" tabindex="-1"></a>    <span class="co">"""Returns the MSE on our data for the given theta1"""</span></span>
+<span id="cb35-678"><a href="#cb35-678" aria-hidden="true" tabindex="-1"></a>    x <span class="op">=</span> df[<span class="st">"total_bill"</span>]</span>
+<span id="cb35-679"><a href="#cb35-679" aria-hidden="true" tabindex="-1"></a>    y_obs <span class="op">=</span> df[<span class="st">"tip"</span>]</span>
+<span id="cb35-680"><a href="#cb35-680" aria-hidden="true" tabindex="-1"></a>    y_hat <span class="op">=</span> theta_1 <span class="op">*</span> x</span>
+<span id="cb35-681"><a href="#cb35-681" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> np.mean((y_hat <span class="op">-</span> y_obs) <span class="op">**</span> <span class="dv">2</span>)</span>
+<span id="cb35-682"><a href="#cb35-682" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-683"><a href="#cb35-683" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> mse_loss_derivative_single_arg(theta_1):</span>
+<span id="cb35-684"><a href="#cb35-684" aria-hidden="true" tabindex="-1"></a>    <span class="co">"""Returns the derivative of the MSE on our data for the given theta1"""</span></span>
+<span id="cb35-685"><a href="#cb35-685" aria-hidden="true" tabindex="-1"></a>    x <span class="op">=</span> df[<span class="st">"total_bill"</span>]</span>
+<span id="cb35-686"><a href="#cb35-686" aria-hidden="true" tabindex="-1"></a>    y_obs <span class="op">=</span> df[<span class="st">"tip"</span>]</span>
+<span id="cb35-687"><a href="#cb35-687" aria-hidden="true" tabindex="-1"></a>    y_hat <span class="op">=</span> theta_1 <span class="op">*</span> x</span>
+<span id="cb35-688"><a href="#cb35-688" aria-hidden="true" tabindex="-1"></a>    </span>
+<span id="cb35-689"><a href="#cb35-689" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> np.mean(<span class="dv">2</span> <span class="op">*</span> (y_hat <span class="op">-</span> y_obs) <span class="op">*</span> x)</span>
+<span id="cb35-690"><a href="#cb35-690" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-691"><a href="#cb35-691" aria-hidden="true" tabindex="-1"></a>loss_df <span class="op">=</span> pd.DataFrame({<span class="st">"theta_1"</span>:np.linspace(<span class="op">-</span><span class="fl">1.5</span>, <span class="dv">1</span>), <span class="st">"MSE"</span>:[mse_single_arg(theta_1) <span class="cf">for</span> theta_1 <span class="kw">in</span> np.linspace(<span class="op">-</span><span class="fl">1.5</span>, <span class="dv">1</span>)]})</span>
+<span id="cb35-692"><a href="#cb35-692" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-693"><a href="#cb35-693" aria-hidden="true" tabindex="-1"></a>trajectory <span class="op">=</span> gradient_descent(mse_loss_derivative_single_arg, <span class="op">-</span><span class="fl">0.5</span>, <span class="fl">0.0001</span>, <span class="dv">100</span>)</span>
+<span id="cb35-694"><a href="#cb35-694" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-695"><a href="#cb35-695" aria-hidden="true" tabindex="-1"></a>plt.plot(loss_df[<span class="st">"theta_1"</span>], loss_df[<span class="st">"MSE"</span>])</span>
+<span id="cb35-696"><a href="#cb35-696" aria-hidden="true" tabindex="-1"></a>plt.scatter(trajectory, [mse_single_arg(guess) <span class="cf">for</span> guess <span class="kw">in</span> trajectory], c<span class="op">=</span><span class="st">"white"</span>, edgecolor<span class="op">=</span><span class="st">"firebrick"</span>)</span>
+<span id="cb35-697"><a href="#cb35-697" aria-hidden="true" tabindex="-1"></a>plt.scatter(trajectory[<span class="op">-</span><span class="dv">1</span>], mse_single_arg(trajectory[<span class="op">-</span><span class="dv">1</span>]), c<span class="op">=</span><span class="st">"firebrick"</span>)</span>
+<span id="cb35-698"><a href="#cb35-698" aria-hidden="true" tabindex="-1"></a>plt.xlabel(<span class="vs">r"$\theta_1$"</span>)</span>
+<span id="cb35-699"><a href="#cb35-699" aria-hidden="true" tabindex="-1"></a>plt.ylabel(<span class="vs">r"$L(\theta_1)$"</span>)<span class="op">;</span></span>
+<span id="cb35-700"><a href="#cb35-700" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-701"><a href="#cb35-701" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="ss">f"Final guess for theta_1: </span><span class="sc">{</span>trajectory[<span class="op">-</span><span class="dv">1</span>]<span class="sc">}</span><span class="ss">"</span>)</span>
+<span id="cb35-702"><a href="#cb35-702" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
 </code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div></div></div></div></div>
 </div> <!-- /content -->
diff --git a/docs/gradient_descent/gradient_descent_files/figure-pdf/cell-21-output-2.pdf b/docs/gradient_descent/gradient_descent_files/figure-pdf/cell-21-output-2.pdf
index f2c5f978..2ff49bb9 100644
Binary files a/docs/gradient_descent/gradient_descent_files/figure-pdf/cell-21-output-2.pdf and b/docs/gradient_descent/gradient_descent_files/figure-pdf/cell-21-output-2.pdf differ
diff --git a/docs/gradient_descent/images/ols_matrices_new.png b/docs/gradient_descent/images/ols_matrices_new.png
new file mode 100644
index 00000000..f43c690e
Binary files /dev/null and b/docs/gradient_descent/images/ols_matrices_new.png differ
diff --git a/docs/gradient_descent/images/ols_matrices_old.png b/docs/gradient_descent/images/ols_matrices_old.png
new file mode 100644
index 00000000..52ff46ff
Binary files /dev/null and b/docs/gradient_descent/images/ols_matrices_old.png differ
diff --git a/docs/gradient_descent/images/ols_solution_matrices.png b/docs/gradient_descent/images/ols_solution_matrices.png
new file mode 100644
index 00000000..e3c7a907
Binary files /dev/null and b/docs/gradient_descent/images/ols_solution_matrices.png differ
diff --git a/docs/intro_to_modeling/intro_to_modeling.html b/docs/intro_to_modeling/intro_to_modeling.html
index 910b4ef0..7dd31436 100644
--- a/docs/intro_to_modeling/intro_to_modeling.html
+++ b/docs/intro_to_modeling/intro_to_modeling.html
@@ -418,7 +418,7 @@ <h2 data-number="10.2" class="anchored" data-anchor-id="simple-linear-regression
 <li><span class="math inline">\(\text{regression estimate} = y\text{-intercept} + \text{slope}\cdot\text{}x\)</span></li>
 <li><span class="math inline">\(\text{residual} =\text{observed }y - \text{regression estimate}\)</span></li>
 </ul>
-<div id="37d3410f" class="cell" data-execution_count="1">
+<div id="2de7def4" class="cell" data-execution_count="1">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span>
@@ -465,7 +465,7 @@ <h4 data-number="10.2.1.2" class="anchored" data-anchor-id="correlation"><span c
 <li>Correlations range between -1 and 1: <span class="math inline">\(|r| \leq 1\)</span>, with <span class="math inline">\(r=1\)</span> indicating perfect positive linear association, and <span class="math inline">\(r=-1\)</span> indicating perfect negative association. The closer <span class="math inline">\(r\)</span> is to <span class="math inline">\(0\)</span>, the weaker the linear association is.</li>
 <li>Correlation says nothing about causation and non-linear association. Correlation does <strong>not</strong> imply causation. When <span class="math inline">\(r = 0\)</span>, the two variables are uncorrelated. However, they could still be related through some non-linear relationship.</li>
 </ol>
-<div id="ac249963" class="cell" data-execution_count="2">
+<div id="637bb451" class="cell" data-execution_count="2">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> plot_and_get_corr(ax, x, y, title):</span>
@@ -689,7 +689,7 @@ <h2 data-number="10.7" class="anchored" data-anchor-id="evaluating-the-slr-model
 <section id="four-mysterious-datasets-anscombes-quartet" class="level3" data-number="10.7.1">
 <h3 data-number="10.7.1" class="anchored" data-anchor-id="four-mysterious-datasets-anscombes-quartet"><span class="header-section-number">10.7.1</span> Four Mysterious Datasets (Anscombe’s quartet)</h3>
 <p>Let’s take a look at four different datasets.</p>
-<div id="5e4cf562" class="cell" data-execution_count="3">
+<div id="ccfdd24c" class="cell" data-execution_count="3">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
@@ -701,7 +701,7 @@ <h3 data-number="10.7.1" class="anchored" data-anchor-id="four-mysterious-datase
 <span id="cb3-7"><a href="#cb3-7" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> mpl_toolkits.mplot3d <span class="im">import</span> Axes3D</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 </div>
-<div id="7345dcbe" class="cell" data-execution_count="4">
+<div id="99e9be7b" class="cell" data-execution_count="4">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Big font helper</span></span>
@@ -755,7 +755,7 @@ <h3 data-number="10.7.1" class="anchored" data-anchor-id="four-mysterious-datase
 <span id="cb4-49"><a href="#cb4-49" aria-hidden="true" tabindex="-1"></a>plt.style.use(<span class="st">"default"</span>)  <span class="co"># Revert style to default mpl</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 </div>
-<div id="227be1c1" class="cell" data-execution_count="5">
+<div id="2e81d19c" class="cell" data-execution_count="5">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a>plt.style.use(<span class="st">"default"</span>)  <span class="co"># Revert style to default mpl</span></span>
@@ -794,7 +794,7 @@ <h3 data-number="10.7.1" class="anchored" data-anchor-id="four-mysterious-datase
 <span id="cb5-34"><a href="#cb5-34" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> fig</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 </div>
-<div id="f17fbfdb" class="cell" data-execution_count="6">
+<div id="2617f000" class="cell" data-execution_count="6">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Load in four different datasets: I, II, III, IV</span></span>
@@ -837,7 +837,7 @@ <h3 data-number="10.7.1" class="anchored" data-anchor-id="four-mysterious-datase
 </div>
 </div>
 <p>While these four sets of datapoints look very different, they actually all have identical means <span class="math inline">\(\bar x\)</span>, <span class="math inline">\(\bar y\)</span>, standard deviations <span class="math inline">\(\sigma_x\)</span>, <span class="math inline">\(\sigma_y\)</span>, correlation <span class="math inline">\(r\)</span>, and RMSE! If we only look at these statistics, we would probably be inclined to say that these datasets are similar.</p>
-<div id="2f7d68b7" class="cell" data-execution_count="7">
+<div id="dad325ef" class="cell" data-execution_count="7">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> dataset <span class="kw">in</span> [<span class="st">"I"</span>, <span class="st">"II"</span>, <span class="st">"III"</span>, <span class="st">"IV"</span>]:</span>
@@ -884,7 +884,7 @@ <h3 data-number="10.7.1" class="anchored" data-anchor-id="four-mysterious-datase
 </div>
 <p>We may also wish to visualize the model’s <strong>residuals</strong>, defined as the difference between the observed and predicted <span class="math inline">\(y_i\)</span> value (<span class="math inline">\(e_i = y_i - \hat{y}_i\)</span>). This gives a high-level view of how “off” each prediction is from the true observed value. Recall that you explored this concept in <a href="https://inferentialthinking.com/chapters/15/5/Visual_Diagnostics.html?highlight=heteroscedasticity#detecting-heteroscedasticity">Data 8</a>: a good regression fit should display no clear pattern in its plot of residuals. The residual plots for Anscombe’s quartet are displayed below. Note how only the first plot shows no clear pattern to the magnitude of residuals. This is an indication that SLR is not the best choice of model for the remaining three sets of points.</p>
 <!-- <img src="images/residual.png" alt='residual' width='600'> -->
-<div id="828493d8" class="cell" data-execution_count="8">
+<div id="19555d01" class="cell" data-execution_count="8">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Residual visualization</span></span>
diff --git a/docs/intro_to_modeling/intro_to_modeling_files/figure-html/cell-2-output-1.png b/docs/intro_to_modeling/intro_to_modeling_files/figure-html/cell-2-output-1.png
index 75119044..1ad789c2 100644
Binary files a/docs/intro_to_modeling/intro_to_modeling_files/figure-html/cell-2-output-1.png and b/docs/intro_to_modeling/intro_to_modeling_files/figure-html/cell-2-output-1.png differ
diff --git a/docs/intro_to_modeling/intro_to_modeling_files/figure-pdf/cell-2-output-1.pdf b/docs/intro_to_modeling/intro_to_modeling_files/figure-pdf/cell-2-output-1.pdf
index 99f056af..595466b9 100644
Binary files a/docs/intro_to_modeling/intro_to_modeling_files/figure-pdf/cell-2-output-1.pdf and b/docs/intro_to_modeling/intro_to_modeling_files/figure-pdf/cell-2-output-1.pdf differ
diff --git a/docs/intro_to_modeling/intro_to_modeling_files/figure-pdf/cell-3-output-1.pdf b/docs/intro_to_modeling/intro_to_modeling_files/figure-pdf/cell-3-output-1.pdf
index e1832cf2..fd484ba7 100644
Binary files a/docs/intro_to_modeling/intro_to_modeling_files/figure-pdf/cell-3-output-1.pdf and b/docs/intro_to_modeling/intro_to_modeling_files/figure-pdf/cell-3-output-1.pdf differ
diff --git a/docs/intro_to_modeling/intro_to_modeling_files/figure-pdf/cell-7-output-1.pdf b/docs/intro_to_modeling/intro_to_modeling_files/figure-pdf/cell-7-output-1.pdf
index 739a0d4d..caf4242f 100644
Binary files a/docs/intro_to_modeling/intro_to_modeling_files/figure-pdf/cell-7-output-1.pdf and b/docs/intro_to_modeling/intro_to_modeling_files/figure-pdf/cell-7-output-1.pdf differ
diff --git a/docs/intro_to_modeling/intro_to_modeling_files/figure-pdf/cell-9-output-1.pdf b/docs/intro_to_modeling/intro_to_modeling_files/figure-pdf/cell-9-output-1.pdf
index b900bfeb..9b0451ab 100644
Binary files a/docs/intro_to_modeling/intro_to_modeling_files/figure-pdf/cell-9-output-1.pdf and b/docs/intro_to_modeling/intro_to_modeling_files/figure-pdf/cell-9-output-1.pdf differ
diff --git a/docs/ols/ols.html b/docs/ols/ols.html
index 3dcd3f0a..67225acc 100644
--- a/docs/ols/ols.html
+++ b/docs/ols/ols.html
@@ -361,7 +361,7 @@ <h3 data-number="12.1.1" class="anchored" data-anchor-id="multiple-linear-regres
 <p><span class="math display">\[\hat{y} = \theta_0\:+\:\theta_1x_{1}\:+\:\theta_2 x_{2}\:+\:...\:+\:\theta_p x_{p}\]</span></p>
 <p>Our predicted value of <span class="math inline">\(y\)</span>, <span class="math inline">\(\hat{y}\)</span>, is a linear combination of the single <strong>observations</strong> (features), <span class="math inline">\(x_i\)</span>, and the parameters, <span class="math inline">\(\theta_i\)</span>.</p>
 <p>We can explore this idea further by looking at a dataset containing aggregate per-player data from the 2018-19 NBA season, downloaded from <a href="https://www.kaggle.com/schmadam97/nba-regular-season-stats-20182019">Kaggle</a>.</p>
-<div id="082557ff" class="cell" data-execution_count="1">
+<div id="0008635d" class="cell" data-execution_count="1">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span>
@@ -369,7 +369,7 @@ <h3 data-number="12.1.1" class="anchored" data-anchor-id="multiple-linear-regres
 <span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a>nba.index.name <span class="op">=</span> <span class="va">None</span> <span class="co"># Drops name of index (players are ordered by rank)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 </div>
-<div id="25b04c02" class="cell" data-execution_count="2">
+<div id="d484c56a" class="cell" data-execution_count="2">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a>nba.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -540,7 +540,7 @@ <h3 data-number="12.1.1" class="anchored" data-anchor-id="multiple-linear-regres
 <li><code>AST</code>, the average number of assists per game</li>
 <li><code>3PA</code>, the average number of 3-point field goals attempted per game</li>
 </ul>
-<div id="8d26de69" class="cell" data-execution_count="3">
+<div id="3fcb0c40" class="cell" data-execution_count="3">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a>nba[[<span class="st">'FG'</span>, <span class="st">'AST'</span>, <span class="st">'3PA'</span>, <span class="st">'PTS'</span>]].head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
diff --git a/docs/pandas_1/pandas_1.html b/docs/pandas_1/pandas_1.html
index 33750f7b..d8884d45 100644
--- a/docs/pandas_1/pandas_1.html
+++ b/docs/pandas_1/pandas_1.html
@@ -391,7 +391,7 @@ <h2 data-number="2.1" class="anchored" data-anchor-id="tabular-data"><span class
 <section id="series-dataframes-and-indices" class="level2" data-number="2.2">
 <h2 data-number="2.2" class="anchored" data-anchor-id="series-dataframes-and-indices"><span class="header-section-number">2.2</span> <code>Series</code>, <code>DataFrame</code>s, and Indices</h2>
 <p>To begin our work in <code>pandas</code>, we must first import the library into our Python environment. This will allow us to use <code>pandas</code> data structures and methods in our code.</p>
-<div id="ed2ae645" class="cell" data-execution_count="1">
+<div id="773291a9" class="cell" data-execution_count="1">
 <div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="co"># `pd` is the conventional alias for Pandas, as `np` is for NumPy</span></span>
 <span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
@@ -414,7 +414,7 @@ <h3 data-number="2.2.1" class="anchored" data-anchor-id="series"><span class="he
 <li>A sequence of data labels called the <strong>index</strong>.</li>
 </ul>
 <p>In the cell below, we create a <code>Series</code> named <code>s</code>.</p>
-<div id="42bb57d5" class="cell" data-execution_count="2">
+<div id="b0412198" class="cell" data-execution_count="2">
 <div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a>s <span class="op">=</span> pd.Series([<span class="st">"welcome"</span>, <span class="st">"to"</span>, <span class="st">"data 100"</span>])</span>
 <span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a>s</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="2">
@@ -424,14 +424,14 @@ <h3 data-number="2.2.1" class="anchored" data-anchor-id="series"><span class="he
 dtype: object</code></pre>
 </div>
 </div>
-<div id="472dd4cc" class="cell" data-execution_count="3">
+<div id="7124b048" class="cell" data-execution_count="3">
 <div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a> <span class="co"># Accessing data values within the Series</span></span>
 <span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a> s.values</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="3">
 <pre><code>array(['welcome', 'to', 'data 100'], dtype=object)</code></pre>
 </div>
 </div>
-<div id="f39ef6c0" class="cell" data-execution_count="4">
+<div id="6cfd9c3e" class="cell" data-execution_count="4">
 <div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a> <span class="co"># Accessing the Index of the Series</span></span>
 <span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a> s.index</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="4">
@@ -439,7 +439,7 @@ <h3 data-number="2.2.1" class="anchored" data-anchor-id="series"><span class="he
 </div>
 </div>
 <p>By default, the <code>index</code> of a <code>Series</code> is a sequential list of integers beginning from 0. Optionally, a manually specified list of desired indices can be passed to the <code>index</code> argument.</p>
-<div id="440b82c6" class="cell" data-execution_count="5">
+<div id="49c932ed" class="cell" data-execution_count="5">
 <div class="sourceCode cell-code" id="cb8"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a>s <span class="op">=</span> pd.Series([<span class="op">-</span><span class="dv">1</span>, <span class="dv">10</span>, <span class="dv">2</span>], index <span class="op">=</span> [<span class="st">"a"</span>, <span class="st">"b"</span>, <span class="st">"c"</span>])</span>
 <span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a>s</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="5">
@@ -449,14 +449,14 @@ <h3 data-number="2.2.1" class="anchored" data-anchor-id="series"><span class="he
 dtype: int64</code></pre>
 </div>
 </div>
-<div id="89f8187e" class="cell" data-execution_count="6">
+<div id="285d63fe" class="cell" data-execution_count="6">
 <div class="sourceCode cell-code" id="cb10"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a>s.index</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="6">
 <pre><code>Index(['a', 'b', 'c'], dtype='object')</code></pre>
 </div>
 </div>
 <p>Indices can also be changed after initialization.</p>
-<div id="53f5a52a" class="cell" data-execution_count="7">
+<div id="e281824b" class="cell" data-execution_count="7">
 <div class="sourceCode cell-code" id="cb12"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a>s.index <span class="op">=</span> [<span class="st">"first"</span>, <span class="st">"second"</span>, <span class="st">"third"</span>]</span>
 <span id="cb12-2"><a href="#cb12-2" aria-hidden="true" tabindex="-1"></a>s</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="7">
@@ -466,7 +466,7 @@ <h3 data-number="2.2.1" class="anchored" data-anchor-id="series"><span class="he
 dtype: int64</code></pre>
 </div>
 </div>
-<div id="03fe03cd" class="cell" data-execution_count="8">
+<div id="a11c9c0f" class="cell" data-execution_count="8">
 <div class="sourceCode cell-code" id="cb14"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a>s.index</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="8">
 <pre><code>Index(['first', 'second', 'third'], dtype='object')</code></pre>
@@ -481,7 +481,7 @@ <h4 data-number="2.2.1.1" class="anchored" data-anchor-id="selection-in-series">
 <li>A filtering condition.</li>
 </ol>
 <p>To demonstrate this, let’s define a new Series <code>s</code>.</p>
-<div id="f85485bc" class="cell" data-execution_count="9">
+<div id="005cc7d0" class="cell" data-execution_count="9">
 <div class="sourceCode cell-code" id="cb16"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a>s <span class="op">=</span> pd.Series([<span class="dv">4</span>, <span class="op">-</span><span class="dv">2</span>, <span class="dv">0</span>, <span class="dv">6</span>], index <span class="op">=</span> [<span class="st">"a"</span>, <span class="st">"b"</span>, <span class="st">"c"</span>, <span class="st">"d"</span>])</span>
 <span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a>s</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="9">
@@ -494,7 +494,7 @@ <h4 data-number="2.2.1.1" class="anchored" data-anchor-id="selection-in-series">
 </div>
 <section id="a-single-label" class="level5" data-number="2.2.1.1.1">
 <h5 data-number="2.2.1.1.1" class="anchored" data-anchor-id="a-single-label"><span class="header-section-number">2.2.1.1.1</span> A Single Label</h5>
-<div id="9301b25f" class="cell" data-execution_count="10">
+<div id="a6ceef4e" class="cell" data-execution_count="10">
 <div class="sourceCode cell-code" id="cb18"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a><span class="co"># We return the value stored at the index label "a"</span></span>
 <span id="cb18-2"><a href="#cb18-2" aria-hidden="true" tabindex="-1"></a>s[<span class="st">"a"</span>] </span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="10">
@@ -504,7 +504,7 @@ <h5 data-number="2.2.1.1.1" class="anchored" data-anchor-id="a-single-label"><sp
 </section>
 <section id="a-list-of-labels" class="level5" data-number="2.2.1.1.2">
 <h5 data-number="2.2.1.1.2" class="anchored" data-anchor-id="a-list-of-labels"><span class="header-section-number">2.2.1.1.2</span> A List of Labels</h5>
-<div id="e625eea2" class="cell" data-execution_count="11">
+<div id="969d1d21" class="cell" data-execution_count="11">
 <div class="sourceCode cell-code" id="cb20"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a><span class="co"># We return a Series of the values stored at the index labels "a" and "c"</span></span>
 <span id="cb20-2"><a href="#cb20-2" aria-hidden="true" tabindex="-1"></a>s[[<span class="st">"a"</span>, <span class="st">"c"</span>]] </span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="11">
@@ -518,7 +518,7 @@ <h5 data-number="2.2.1.1.2" class="anchored" data-anchor-id="a-list-of-labels"><
 <h5 data-number="2.2.1.1.3" class="anchored" data-anchor-id="a-filtering-condition"><span class="header-section-number">2.2.1.1.3</span> A Filtering Condition</h5>
 <p>Perhaps the most interesting (and useful) method of selecting data from a <code>Series</code> is by using a filtering condition.</p>
 <p>First, we apply a boolean operation to the <code>Series</code>. This creates <strong>a new <code>Series</code> of boolean values</strong>.</p>
-<div id="d349a6ba" class="cell" data-execution_count="12">
+<div id="3aeb5628" class="cell" data-execution_count="12">
 <div class="sourceCode cell-code" id="cb22"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Filter condition: select all elements greater than 0</span></span>
 <span id="cb22-2"><a href="#cb22-2" aria-hidden="true" tabindex="-1"></a>s <span class="op">&gt;</span> <span class="dv">0</span> </span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="12">
@@ -530,7 +530,7 @@ <h5 data-number="2.2.1.1.3" class="anchored" data-anchor-id="a-filtering-conditi
 </div>
 </div>
 <p>We then use this boolean condition to index into our original <code>Series</code>. <code>pandas</code> will select only the entries in the original <code>Series</code> that satisfy the condition.</p>
-<div id="393b7c4a" class="cell" data-execution_count="13">
+<div id="99b72349" class="cell" data-execution_count="13">
 <div class="sourceCode cell-code" id="cb24"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a>s[s <span class="op">&gt;</span> <span class="dv">0</span>] </span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="13">
 <pre><code>a    4
@@ -560,7 +560,7 @@ <h4 data-number="2.2.2.1" class="anchored" data-anchor-id="creating-a-dataframe"
 <h5 data-number="2.2.2.1.1" class="anchored" data-anchor-id="from-a-csv-file"><span class="header-section-number">2.2.2.1.1</span> From a CSV file</h5>
 <p>In Data 100, our data are typically stored in a CSV (comma-separated values) file format. We can import a CSV file into a <code>DataFrame</code> by passing the data path as an argument to the following <code>pandas</code> function. <br>  <code>pd.read_csv("filename.csv")</code></p>
 <p>With our new understanding of <code>pandas</code> in hand, let’s return to the <code>elections</code> dataset from before. Now, we can recognize that it is represented as a <code>pandas</code> <code>DataFrame</code>.</p>
-<div id="4374235f" class="cell" data-execution_count="14">
+<div id="4bb8a898" class="cell" data-execution_count="14">
 <div class="sourceCode cell-code" id="cb27"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb27-1"><a href="#cb27-1" aria-hidden="true" tabindex="-1"></a>elections <span class="op">=</span> pd.read_csv(<span class="st">"data/elections.csv"</span>)</span>
 <span id="cb27-2"><a href="#cb27-2" aria-hidden="true" tabindex="-1"></a>elections</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="14">
@@ -692,7 +692,7 @@ <h5 data-number="2.2.2.1.1" class="anchored" data-anchor-id="from-a-csv-file"><s
 <h5 data-number="2.2.2.1.2" class="anchored" data-anchor-id="using-a-list-and-column-names"><span class="header-section-number">2.2.2.1.2</span> Using a List and Column Name(s)</h5>
 <p>We’ll now explore creating a <code>DataFrame</code> with data of our own.</p>
 <p>Consider the following examples. The first code cell creates a <code>DataFrame</code> with a single column <code>Numbers</code>.</p>
-<div id="fa768721" class="cell" data-execution_count="15">
+<div id="707ae1cc" class="cell" data-execution_count="15">
 <div class="sourceCode cell-code" id="cb28"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb28-1"><a href="#cb28-1" aria-hidden="true" tabindex="-1"></a>df_list <span class="op">=</span> pd.DataFrame([<span class="dv">1</span>, <span class="dv">2</span>, <span class="dv">3</span>], columns<span class="op">=</span>[<span class="st">"Numbers"</span>])</span>
 <span id="cb28-2"><a href="#cb28-2" aria-hidden="true" tabindex="-1"></a>df_list</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="15">
@@ -726,7 +726,7 @@ <h5 data-number="2.2.2.1.2" class="anchored" data-anchor-id="using-a-list-and-co
 </div>
 </div>
 <p>The second creates a <code>DataFrame</code> with the columns <code>Numbers</code> and <code>Description</code>. Notice how a 2D list of values is required to initialize the second <code>DataFrame</code> — each nested list represents a single row of data.</p>
-<div id="b1710789" class="cell" data-execution_count="16">
+<div id="58925d6f" class="cell" data-execution_count="16">
 <div class="sourceCode cell-code" id="cb29"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb29-1"><a href="#cb29-1" aria-hidden="true" tabindex="-1"></a>df_list <span class="op">=</span> pd.DataFrame([[<span class="dv">1</span>, <span class="st">"one"</span>], [<span class="dv">2</span>, <span class="st">"two"</span>]], columns <span class="op">=</span> [<span class="st">"Number"</span>, <span class="st">"Description"</span>])</span>
 <span id="cb29-2"><a href="#cb29-2" aria-hidden="true" tabindex="-1"></a>df_list</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="16">
@@ -763,7 +763,7 @@ <h5 data-number="2.2.2.1.2" class="anchored" data-anchor-id="using-a-list-and-co
 <h5 data-number="2.2.2.1.3" class="anchored" data-anchor-id="from-a-dictionary"><span class="header-section-number">2.2.2.1.3</span> From a Dictionary</h5>
 <p>A third (and more common) way to create a <code>DataFrame</code> is with a dictionary. The dictionary keys represent the column names, and the dictionary values represent the column values.</p>
 <p>Below are two ways of implementing this approach. The first is based on specifying the columns of the <code>DataFrame</code>, whereas the second is based on specifying the rows of the <code>DataFrame</code>.</p>
-<div id="42997310" class="cell" data-execution_count="17">
+<div id="d602d3ee" class="cell" data-execution_count="17">
 <div class="sourceCode cell-code" id="cb30"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb30-1"><a href="#cb30-1" aria-hidden="true" tabindex="-1"></a>df_dict <span class="op">=</span> pd.DataFrame({</span>
 <span id="cb30-2"><a href="#cb30-2" aria-hidden="true" tabindex="-1"></a>    <span class="st">"Fruit"</span>: [<span class="st">"Strawberry"</span>, <span class="st">"Orange"</span>], </span>
 <span id="cb30-3"><a href="#cb30-3" aria-hidden="true" tabindex="-1"></a>    <span class="st">"Price"</span>: [<span class="fl">5.49</span>, <span class="fl">3.99</span>]</span>
@@ -798,7 +798,7 @@ <h5 data-number="2.2.2.1.3" class="anchored" data-anchor-id="from-a-dictionary">
 </div>
 </div>
 </div>
-<div id="3d513d45" class="cell" data-execution_count="18">
+<div id="4ac4a0a6" class="cell" data-execution_count="18">
 <div class="sourceCode cell-code" id="cb31"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb31-1"><a href="#cb31-1" aria-hidden="true" tabindex="-1"></a>df_dict <span class="op">=</span> pd.DataFrame(</span>
 <span id="cb31-2"><a href="#cb31-2" aria-hidden="true" tabindex="-1"></a>    [</span>
 <span id="cb31-3"><a href="#cb31-3" aria-hidden="true" tabindex="-1"></a>        {<span class="st">"Fruit"</span>:<span class="st">"Strawberry"</span>, <span class="st">"Price"</span>:<span class="fl">5.49</span>}, </span>
@@ -840,14 +840,14 @@ <h5 data-number="2.2.2.1.3" class="anchored" data-anchor-id="from-a-dictionary">
 <h5 data-number="2.2.2.1.4" class="anchored" data-anchor-id="from-a-series"><span class="header-section-number">2.2.2.1.4</span> From a <code>Series</code></h5>
 <p>Earlier, we explained how a <code>Series</code> was synonymous to a column in a <code>DataFrame</code>. It follows, then, that a <code>DataFrame</code> is equivalent to a collection of <code>Series</code>, which all share the same <code>Index</code>.</p>
 <p>In fact, we can initialize a <code>DataFrame</code> by merging two or more <code>Series</code>. Consider the <code>Series</code> <code>s_a</code> and <code>s_b</code>.</p>
-<div id="28120d02" class="cell" data-execution_count="19">
+<div id="064cbf04" class="cell" data-execution_count="19">
 <div class="sourceCode cell-code" id="cb32"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb32-1"><a href="#cb32-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Notice how our indices, or row labels, are the same</span></span>
 <span id="cb32-2"><a href="#cb32-2" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb32-3"><a href="#cb32-3" aria-hidden="true" tabindex="-1"></a>s_a <span class="op">=</span> pd.Series([<span class="st">"a1"</span>, <span class="st">"a2"</span>, <span class="st">"a3"</span>], index <span class="op">=</span> [<span class="st">"r1"</span>, <span class="st">"r2"</span>, <span class="st">"r3"</span>])</span>
 <span id="cb32-4"><a href="#cb32-4" aria-hidden="true" tabindex="-1"></a>s_b <span class="op">=</span> pd.Series([<span class="st">"b1"</span>, <span class="st">"b2"</span>, <span class="st">"b3"</span>], index <span class="op">=</span> [<span class="st">"r1"</span>, <span class="st">"r2"</span>, <span class="st">"r3"</span>])</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
 <p>We can turn individual <code>Series</code> into a <code>DataFrame</code> using two common methods (shown below):</p>
-<div id="b649a69b" class="cell" data-execution_count="20">
+<div id="2d35bdbf" class="cell" data-execution_count="20">
 <div class="sourceCode cell-code" id="cb33"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb33-1"><a href="#cb33-1" aria-hidden="true" tabindex="-1"></a>pd.DataFrame(s_a)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="20">
 <div>
@@ -879,7 +879,7 @@ <h5 data-number="2.2.2.1.4" class="anchored" data-anchor-id="from-a-series"><spa
 </div>
 </div>
 </div>
-<div id="1203651a" class="cell" data-execution_count="21">
+<div id="05b69412" class="cell" data-execution_count="21">
 <div class="sourceCode cell-code" id="cb34"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb34-1"><a href="#cb34-1" aria-hidden="true" tabindex="-1"></a>s_b.to_frame()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="21">
 <div>
@@ -912,7 +912,7 @@ <h5 data-number="2.2.2.1.4" class="anchored" data-anchor-id="from-a-series"><spa
 </div>
 </div>
 <p>To merge the two <code>Series</code> and specify their column names, we use the following syntax:</p>
-<div id="ff4b1c9c" class="cell" data-execution_count="22">
+<div id="b50bec83" class="cell" data-execution_count="22">
 <div class="sourceCode cell-code" id="cb35"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb35-1"><a href="#cb35-1" aria-hidden="true" tabindex="-1"></a>pd.DataFrame({</span>
 <span id="cb35-2"><a href="#cb35-2" aria-hidden="true" tabindex="-1"></a>    <span class="st">"A-column"</span>: s_a, </span>
 <span id="cb35-3"><a href="#cb35-3" aria-hidden="true" tabindex="-1"></a>    <span class="st">"B-column"</span>: s_b</span>
@@ -957,7 +957,7 @@ <h5 data-number="2.2.2.1.4" class="anchored" data-anchor-id="from-a-series"><spa
 <section id="indices" class="level3" data-number="2.2.3">
 <h3 data-number="2.2.3" class="anchored" data-anchor-id="indices"><span class="header-section-number">2.2.3</span> Indices</h3>
 <p>On a more technical note, an index doesn’t have to be an integer, nor does it have to be unique. For example, we can set the index of the <code>elections</code> <code>DataFrame</code> to be the name of presidential candidates.</p>
-<div id="d2f21c1f" class="cell" data-execution_count="23">
+<div id="663efe9f" class="cell" data-execution_count="23">
 <div class="sourceCode cell-code" id="cb36"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb36-1"><a href="#cb36-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Creating a DataFrame from a CSV file and specifying the index column</span></span>
 <span id="cb36-2"><a href="#cb36-2" aria-hidden="true" tabindex="-1"></a>elections <span class="op">=</span> pd.read_csv(<span class="st">"data/elections.csv"</span>, index_col <span class="op">=</span> <span class="st">"Candidate"</span>)</span>
 <span id="cb36-3"><a href="#cb36-3" aria-hidden="true" tabindex="-1"></a>elections</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -1081,7 +1081,7 @@ <h3 data-number="2.2.3" class="anchored" data-anchor-id="indices"><span class="h
 </div>
 </div>
 <p>We can also select a new column and set it as the index of the <code>DataFrame</code>. For example, we can set the index of the <code>elections</code> <code>DataFrame</code> to represent the candidate’s party.</p>
-<div id="ffcadc20" class="cell" data-execution_count="24">
+<div id="0128c9c0" class="cell" data-execution_count="24">
 <div class="sourceCode cell-code" id="cb37"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb37-1"><a href="#cb37-1" aria-hidden="true" tabindex="-1"></a>elections.reset_index(inplace <span class="op">=</span> <span class="va">True</span>) <span class="co"># Resetting the index so we can set it again</span></span>
 <span id="cb37-2"><a href="#cb37-2" aria-hidden="true" tabindex="-1"></a><span class="co"># This sets the index to the "Party" column</span></span>
 <span id="cb37-3"><a href="#cb37-3" aria-hidden="true" tabindex="-1"></a>elections.set_index(<span class="st">"Party"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -1205,7 +1205,7 @@ <h3 data-number="2.2.3" class="anchored" data-anchor-id="indices"><span class="h
 </div>
 </div>
 <p>And, if we’d like, we can revert the index back to the default list of integers.</p>
-<div id="46ff0aac" class="cell" data-execution_count="25">
+<div id="0ec689f7" class="cell" data-execution_count="25">
 <div class="sourceCode cell-code" id="cb38"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb38-1"><a href="#cb38-1" aria-hidden="true" tabindex="-1"></a><span class="co"># This resets the index to be the default list of integer</span></span>
 <span id="cb38-2"><a href="#cb38-2" aria-hidden="true" tabindex="-1"></a>elections.reset_index(inplace<span class="op">=</span><span class="va">True</span>) </span>
 <span id="cb38-3"><a href="#cb38-3" aria-hidden="true" tabindex="-1"></a>elections.index</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -1228,7 +1228,7 @@ <h3 data-number="2.2.3" class="anchored" data-anchor-id="indices"><span class="h
 <h2 data-number="2.3" class="anchored" data-anchor-id="dataframe-attributes-index-columns-and-shape"><span class="header-section-number">2.3</span> <code>DataFrame</code> Attributes: Index, Columns, and Shape</h2>
 <p>On the other hand, column names in a <code>DataFrame</code> are almost always unique. Looking back to the <code>elections</code> dataset, it wouldn’t make sense to have two columns named <code>"Candidate"</code>. Sometimes, you’ll want to extract these different values, in particular, the list of row and column labels.</p>
 <p>For index/row labels, use <code>DataFrame.index</code>:</p>
-<div id="995e856a" class="cell" data-execution_count="26">
+<div id="eb218bab" class="cell" data-execution_count="26">
 <div class="sourceCode cell-code" id="cb40"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb40-1"><a href="#cb40-1" aria-hidden="true" tabindex="-1"></a>elections.set_index(<span class="st">"Party"</span>, inplace <span class="op">=</span> <span class="va">True</span>)</span>
 <span id="cb40-2"><a href="#cb40-2" aria-hidden="true" tabindex="-1"></a>elections.index</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="26">
@@ -1243,14 +1243,14 @@ <h2 data-number="2.3" class="anchored" data-anchor-id="dataframe-attributes-inde
 </div>
 </div>
 <p>For column labels, use <code>DataFrame.columns</code>:</p>
-<div id="8c009b6e" class="cell" data-execution_count="27">
+<div id="1cecbb37" class="cell" data-execution_count="27">
 <div class="sourceCode cell-code" id="cb42"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb42-1"><a href="#cb42-1" aria-hidden="true" tabindex="-1"></a>elections.columns</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="27">
 <pre><code>Index(['index', 'Candidate', 'Year', 'Popular vote', 'Result', '%'], dtype='object')</code></pre>
 </div>
 </div>
 <p>And for the shape of the <code>DataFrame</code>, we can use <code>DataFrame.shape</code> to get the number of rows followed by the number of columns:</p>
-<div id="96efe826" class="cell" data-execution_count="28">
+<div id="794847e8" class="cell" data-execution_count="28">
 <div class="sourceCode cell-code" id="cb44"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb44-1"><a href="#cb44-1" aria-hidden="true" tabindex="-1"></a>elections.shape</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="28">
 <pre><code>(182, 6)</code></pre>
@@ -1279,13 +1279,13 @@ <h2 data-number="2.4" class="anchored" data-anchor-id="slicing-in-dataframes"><s
 <h3 data-number="2.4.1" class="anchored" data-anchor-id="extracting-data-with-.head-and-.tail"><span class="header-section-number">2.4.1</span> Extracting data with <code>.head</code> and <code>.tail</code></h3>
 <p>The simplest scenario in which we want to extract data is when we simply want to select the first or last few rows of the <code>DataFrame</code>.</p>
 <p>To extract the first <code>n</code> rows of a <code>DataFrame</code> <code>df</code>, we use the syntax <code>df.head(n)</code>.</p>
-<div id="26d9f88b" class="cell" data-execution_count="29">
+<div id="d2fa9815" class="cell" data-execution_count="29">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb46"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb46-1"><a href="#cb46-1" aria-hidden="true" tabindex="-1"></a>elections <span class="op">=</span> pd.read_csv(<span class="st">"data/elections.csv"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 </div>
-<div id="f6f16bdd" class="cell" data-execution_count="30">
+<div id="b2e58554" class="cell" data-execution_count="30">
 <div class="sourceCode cell-code" id="cb47"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb47-1"><a href="#cb47-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Extract the first 5 rows of the DataFrame</span></span>
 <span id="cb47-2"><a href="#cb47-2" aria-hidden="true" tabindex="-1"></a>elections.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="30">
@@ -1357,7 +1357,7 @@ <h3 data-number="2.4.1" class="anchored" data-anchor-id="extracting-data-with-.h
 </div>
 </div>
 <p>Similarly, calling <code>df.tail(n)</code> allows us to extract the last <code>n</code> rows of the <code>DataFrame</code>.</p>
-<div id="3848aa94" class="cell" data-execution_count="31">
+<div id="59322303" class="cell" data-execution_count="31">
 <div class="sourceCode cell-code" id="cb48"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb48-1"><a href="#cb48-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Extract the last 5 rows of the DataFrame</span></span>
 <span id="cb48-2"><a href="#cb48-2" aria-hidden="true" tabindex="-1"></a>elections.tail(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="31">
@@ -1443,14 +1443,14 @@ <h3 data-number="2.4.2" class="anchored" data-anchor-id="label-based-extraction-
 <li>A list.</li>
 </ul>
 <p>For example, to select a single value, we can select the row labeled <code>0</code> and the column labeled <code>Candidate</code> from the <code>elections</code> <code>DataFrame</code>.</p>
-<div id="ce83e306" class="cell" data-execution_count="32">
+<div id="48084df6" class="cell" data-execution_count="32">
 <div class="sourceCode cell-code" id="cb49"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb49-1"><a href="#cb49-1" aria-hidden="true" tabindex="-1"></a>elections.loc[<span class="dv">0</span>, <span class="st">'Candidate'</span>]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="32">
 <pre><code>'Andrew Jackson'</code></pre>
 </div>
 </div>
 <p>Keep in mind that passing in just one argument as a single value will produce a <code>Series</code>. Below, we’ve extracted a subset of the <code>"Popular vote"</code> column as a <code>Series</code>.</p>
-<div id="bbd76e87" class="cell" data-execution_count="33">
+<div id="7c5cd2ea" class="cell" data-execution_count="33">
 <div class="sourceCode cell-code" id="cb51"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb51-1"><a href="#cb51-1" aria-hidden="true" tabindex="-1"></a>elections.loc[[<span class="dv">87</span>, <span class="dv">25</span>, <span class="dv">179</span>], <span class="st">"Popular vote"</span>]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="33">
 <pre><code>87     15761254
@@ -1460,7 +1460,7 @@ <h3 data-number="2.4.2" class="anchored" data-anchor-id="label-based-extraction-
 </div>
 </div>
 <p>Note that if we pass <code>"Popular vote"</code> as a list, the output will be a <code>DataFrame</code>.</p>
-<div id="9f0be5f0" class="cell" data-execution_count="34">
+<div id="6424ba32" class="cell" data-execution_count="34">
 <div class="sourceCode cell-code" id="cb53"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb53-1"><a href="#cb53-1" aria-hidden="true" tabindex="-1"></a>elections.loc[[<span class="dv">87</span>, <span class="dv">25</span>, <span class="dv">179</span>], [<span class="st">"Popular vote"</span>]]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="34">
 <div>
@@ -1493,7 +1493,7 @@ <h3 data-number="2.4.2" class="anchored" data-anchor-id="label-based-extraction-
 </div>
 </div>
 <p>To select <em>multiple</em> rows and columns, we can use Python slice notation. Here, we select the rows from labels <code>0</code> to <code>3</code> and the columns from labels <code>"Year"</code> to <code>"Popular vote"</code>. Notice that unlike Python slicing, <code>.loc</code> is <em>inclusive</em> of the right upper bound.</p>
-<div id="e79126c1" class="cell" data-execution_count="35">
+<div id="856420c4" class="cell" data-execution_count="35">
 <div class="sourceCode cell-code" id="cb54"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb54-1"><a href="#cb54-1" aria-hidden="true" tabindex="-1"></a>elections.loc[<span class="dv">0</span>:<span class="dv">3</span>, <span class="st">'Year'</span>:<span class="st">'Popular vote'</span>]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="35">
 <div>
@@ -1545,7 +1545,7 @@ <h3 data-number="2.4.2" class="anchored" data-anchor-id="label-based-extraction-
 </div>
 </div>
 <p>Suppose that instead, we want to extract <em>all</em> column values for the first four rows in the <code>elections</code> <code>DataFrame</code>. The shorthand <code>:</code> is useful for this.</p>
-<div id="be1a1d1a" class="cell" data-execution_count="36">
+<div id="549687b2" class="cell" data-execution_count="36">
 <div class="sourceCode cell-code" id="cb55"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb55-1"><a href="#cb55-1" aria-hidden="true" tabindex="-1"></a>elections.loc[<span class="dv">0</span>:<span class="dv">3</span>, :]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="36">
 <div>
@@ -1607,7 +1607,7 @@ <h3 data-number="2.4.2" class="anchored" data-anchor-id="label-based-extraction-
 </div>
 </div>
 <p>We can use the same shorthand to extract all rows.</p>
-<div id="3a1cddbc" class="cell" data-execution_count="37">
+<div id="9b9b06e7" class="cell" data-execution_count="37">
 <div class="sourceCode cell-code" id="cb56"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb56-1"><a href="#cb56-1" aria-hidden="true" tabindex="-1"></a>elections.loc[:, [<span class="st">"Year"</span>, <span class="st">"Candidate"</span>, <span class="st">"Result"</span>]]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="37">
 <div>
@@ -1698,7 +1698,7 @@ <h3 data-number="2.4.2" class="anchored" data-anchor-id="label-based-extraction-
 </div>
 <p>There are a couple of things we should note. Firstly, unlike conventional Python, <code>pandas</code> allows us to slice string values (in our example, the column labels). Secondly, slicing with <code>.loc</code> is <em>inclusive</em>. Notice how our resulting <code>DataFrame</code> includes every row and column between and including the slice labels we specified.</p>
 <p>Equivalently, we can use a list to obtain multiple rows and columns in our <code>elections</code> <code>DataFrame</code>.</p>
-<div id="1668158e" class="cell" data-execution_count="38">
+<div id="03677bac" class="cell" data-execution_count="38">
 <div class="sourceCode cell-code" id="cb57"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb57-1"><a href="#cb57-1" aria-hidden="true" tabindex="-1"></a>elections.loc[[<span class="dv">0</span>, <span class="dv">1</span>, <span class="dv">2</span>, <span class="dv">3</span>], [<span class="st">'Year'</span>, <span class="st">'Candidate'</span>, <span class="st">'Party'</span>, <span class="st">'Popular vote'</span>]]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="38">
 <div>
@@ -1750,7 +1750,7 @@ <h3 data-number="2.4.2" class="anchored" data-anchor-id="label-based-extraction-
 </div>
 </div>
 <p>Lastly, we can interchange list and slicing notation.</p>
-<div id="b75a8a66" class="cell" data-execution_count="39">
+<div id="984e62b4" class="cell" data-execution_count="39">
 <div class="sourceCode cell-code" id="cb58"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb58-1"><a href="#cb58-1" aria-hidden="true" tabindex="-1"></a>elections.loc[[<span class="dv">0</span>, <span class="dv">1</span>, <span class="dv">2</span>, <span class="dv">3</span>], :]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="39">
 <div>
@@ -1816,7 +1816,7 @@ <h3 data-number="2.4.2" class="anchored" data-anchor-id="label-based-extraction-
 <h3 data-number="2.4.3" class="anchored" data-anchor-id="integer-based-extraction-indexing-with-.iloc"><span class="header-section-number">2.4.3</span> Integer-based Extraction: Indexing with <code>.iloc</code></h3>
 <p>Slicing with <code>.iloc</code> works similarly to <code>.loc</code>. However, <code>.iloc</code> uses the <em>index positions</em> of rows and columns rather than the labels (think to yourself: <strong>l</strong>oc uses <strong>l</strong>ables; <strong>i</strong>loc uses <strong>i</strong>ndices). The arguments to the <code>.iloc</code> function also behave similarly — single values, lists, indices, and any combination of these are permitted.</p>
 <p>Let’s begin reproducing our results from above. We’ll begin by selecting the first presidential candidate in our <code>elections</code> <code>DataFrame</code>:</p>
-<div id="6e14178e" class="cell" data-execution_count="40">
+<div id="f0c30203" class="cell" data-execution_count="40">
 <div class="sourceCode cell-code" id="cb59"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb59-1"><a href="#cb59-1" aria-hidden="true" tabindex="-1"></a><span class="co"># elections.loc[0, "Candidate"] - Previous approach</span></span>
 <span id="cb59-2"><a href="#cb59-2" aria-hidden="true" tabindex="-1"></a>elections.iloc[<span class="dv">0</span>, <span class="dv">1</span>]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="40">
@@ -1825,7 +1825,7 @@ <h3 data-number="2.4.3" class="anchored" data-anchor-id="integer-based-extractio
 </div>
 <p>Notice how the first argument to both <code>.loc</code> and <code>.iloc</code> are the same. This is because the row with a label of <code>0</code> is conveniently in the <span class="math inline">\(0^{\text{th}}\)</span> (equivalently, the first position) of the <code>elections</code> <code>DataFrame</code>. Generally, this is true of any <code>DataFrame</code> where the row labels are incremented in ascending order from 0.</p>
 <p>And, as before, if we were to pass in only one single value argument, our result would be a <code>Series</code>.</p>
-<div id="dd722298" class="cell" data-execution_count="41">
+<div id="109cb14e" class="cell" data-execution_count="41">
 <div class="sourceCode cell-code" id="cb61"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb61-1"><a href="#cb61-1" aria-hidden="true" tabindex="-1"></a>elections.iloc[[<span class="dv">1</span>,<span class="dv">2</span>,<span class="dv">3</span>],<span class="dv">1</span>]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="41">
 <pre><code>1    John Quincy Adams
@@ -1835,7 +1835,7 @@ <h3 data-number="2.4.3" class="anchored" data-anchor-id="integer-based-extractio
 </div>
 </div>
 <p>However, when we select the first four rows and columns using <code>.iloc</code>, we notice something.</p>
-<div id="6329227e" class="cell" data-execution_count="42">
+<div id="f6a5c270" class="cell" data-execution_count="42">
 <div class="sourceCode cell-code" id="cb63"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb63-1"><a href="#cb63-1" aria-hidden="true" tabindex="-1"></a><span class="co"># elections.loc[0:3, 'Year':'Popular vote'] - Previous approach</span></span>
 <span id="cb63-2"><a href="#cb63-2" aria-hidden="true" tabindex="-1"></a>elections.iloc[<span class="dv">0</span>:<span class="dv">4</span>, <span class="dv">0</span>:<span class="dv">4</span>]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="42">
@@ -1889,7 +1889,7 @@ <h3 data-number="2.4.3" class="anchored" data-anchor-id="integer-based-extractio
 </div>
 <p>Slicing is no longer inclusive in <code>.iloc</code> — it’s <em>exclusive</em>. In other words, the right end of a slice is not included when using <code>.iloc</code>. This is one of the subtleties of <code>pandas</code> syntax; you will get used to it with practice.</p>
 <p>List behavior works just as expected.</p>
-<div id="ef558bd3" class="cell" data-execution_count="43">
+<div id="926f26e1" class="cell" data-execution_count="43">
 <div class="sourceCode cell-code" id="cb64"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb64-1"><a href="#cb64-1" aria-hidden="true" tabindex="-1"></a><span class="co">#elections.loc[[0, 1, 2, 3], ['Year', 'Candidate', 'Party', 'Popular vote']] - Previous Approach</span></span>
 <span id="cb64-2"><a href="#cb64-2" aria-hidden="true" tabindex="-1"></a>elections.iloc[[<span class="dv">0</span>, <span class="dv">1</span>, <span class="dv">2</span>, <span class="dv">3</span>], [<span class="dv">0</span>, <span class="dv">1</span>, <span class="dv">2</span>, <span class="dv">3</span>]]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="43">
@@ -1942,7 +1942,7 @@ <h3 data-number="2.4.3" class="anchored" data-anchor-id="integer-based-extractio
 </div>
 </div>
 <p>And just like with <code>.loc</code>, we can use a colon with <code>.iloc</code> to extract all rows or columns.</p>
-<div id="50fbbf95" class="cell" data-execution_count="44">
+<div id="4581a3bb" class="cell" data-execution_count="44">
 <div class="sourceCode cell-code" id="cb65"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb65-1"><a href="#cb65-1" aria-hidden="true" tabindex="-1"></a>elections.iloc[:, <span class="dv">0</span>:<span class="dv">3</span>]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="44">
 <div>
@@ -2050,7 +2050,7 @@ <h3 data-number="2.4.4" class="anchored" data-anchor-id="context-dependent-extra
 <section id="a-slice-of-row-numbers" class="level4" data-number="2.4.4.1">
 <h4 data-number="2.4.4.1" class="anchored" data-anchor-id="a-slice-of-row-numbers"><span class="header-section-number">2.4.4.1</span> A slice of row numbers</h4>
 <p>Say we wanted the first four rows of our <code>elections</code> <code>DataFrame</code>.</p>
-<div id="97cb4710" class="cell" data-execution_count="45">
+<div id="2bcf7edc" class="cell" data-execution_count="45">
 <div class="sourceCode cell-code" id="cb66"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb66-1"><a href="#cb66-1" aria-hidden="true" tabindex="-1"></a>elections[<span class="dv">0</span>:<span class="dv">4</span>]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="45">
 <div>
@@ -2115,7 +2115,7 @@ <h4 data-number="2.4.4.1" class="anchored" data-anchor-id="a-slice-of-row-number
 <section id="a-list-of-column-labels" class="level4" data-number="2.4.4.2">
 <h4 data-number="2.4.4.2" class="anchored" data-anchor-id="a-list-of-column-labels"><span class="header-section-number">2.4.4.2</span> A list of column labels</h4>
 <p>Suppose we now want the first four columns.</p>
-<div id="0a7e4736" class="cell" data-execution_count="46">
+<div id="78cb7e08" class="cell" data-execution_count="46">
 <div class="sourceCode cell-code" id="cb67"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb67-1"><a href="#cb67-1" aria-hidden="true" tabindex="-1"></a>elections[[<span class="st">"Year"</span>, <span class="st">"Candidate"</span>, <span class="st">"Party"</span>, <span class="st">"Popular vote"</span>]]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="46">
 <div>
@@ -2220,7 +2220,7 @@ <h4 data-number="2.4.4.2" class="anchored" data-anchor-id="a-list-of-column-labe
 <section id="a-single-column-label" class="level4" data-number="2.4.4.3">
 <h4 data-number="2.4.4.3" class="anchored" data-anchor-id="a-single-column-label"><span class="header-section-number">2.4.4.3</span> A single-column label</h4>
 <p>Lastly, <code>[]</code> allows us to extract only the <code>"Candidate"</code> column.</p>
-<div id="4d0c8904" class="cell" data-execution_count="47">
+<div id="469919d6" class="cell" data-execution_count="47">
 <div class="sourceCode cell-code" id="cb68"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb68-1"><a href="#cb68-1" aria-hidden="true" tabindex="-1"></a>elections[<span class="st">"Candidate"</span>]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="47">
 <pre><code>0         Andrew Jackson
diff --git a/docs/pandas_2/pandas_2.html b/docs/pandas_2/pandas_2.html
index e97dc2fc..8163082c 100644
--- a/docs/pandas_2/pandas_2.html
+++ b/docs/pandas_2/pandas_2.html
@@ -316,7 +316,7 @@ <h1 class="title"><span class="chapter-number">3</span>&nbsp; <span class="chapt
 <p>Last time, we introduced the <code>pandas</code> library as a toolkit for processing data. We learned the <code>DataFrame</code> and <code>Series</code> data structures, familiarized ourselves with the basic syntax for manipulating tabular data, and began writing our first lines of <code>pandas</code> code.</p>
 <p>In this lecture, we’ll start to dive into some advanced <code>pandas</code> syntax. You may find it helpful to follow along with a notebook of your own as we walk through these new pieces of code.</p>
 <p>We’ll start by loading the <code>babynames</code> dataset.</p>
-<div id="4a704d7c" class="cell" data-execution_count="1">
+<div id="a58a7f24" class="cell" data-execution_count="1">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="co"># This code pulls census data and loads it into a DataFrame</span></span>
@@ -409,7 +409,7 @@ <h2 data-number="3.1" class="anchored" data-anchor-id="conditional-selection"><s
 <p>Conditional selection allows us to select a subset of rows in a <code>DataFrame</code> that satisfy some specified condition.</p>
 <p>To understand how to use conditional selection, we must look at another possible input of the <code>.loc</code> and <code>[]</code> methods – a boolean array, which is simply an array or <code>Series</code> where each element is either <code>True</code> or <code>False</code>. This boolean array must have a length equal to the number of rows in the <code>DataFrame</code>. It will return all rows that correspond to a value of <code>True</code> in the array. We used a very similar technique when performing conditional extraction from a <code>Series</code> in the last lecture.</p>
 <p>To see this in action, let’s select all even-indexed rows in the first 10 rows of our <code>DataFrame</code>.</p>
-<div id="5f227187" class="cell" data-execution_count="2">
+<div id="4cb2b411" class="cell" data-execution_count="2">
 <div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Ask yourself: why is :9 is the correct slice to select the first 10 rows?</span></span>
 <span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a>babynames_first_10_rows <span class="op">=</span> babynames.loc[:<span class="dv">9</span>, :]</span>
 <span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a></span>
@@ -478,7 +478,7 @@ <h2 data-number="3.1" class="anchored" data-anchor-id="conditional-selection"><s
 </div>
 </div>
 <p>We can perform a similar operation using <code>.loc</code>.</p>
-<div id="03e209c0" class="cell" data-execution_count="3">
+<div id="a1b8d934" class="cell" data-execution_count="3">
 <div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a>babynames_first_10_rows.loc[[<span class="va">True</span>, <span class="va">False</span>, <span class="va">True</span>, <span class="va">False</span>, <span class="va">True</span>, <span class="va">False</span>, <span class="va">True</span>, <span class="va">False</span>, <span class="va">True</span>, <span class="va">False</span>], :]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="3">
 <div>
@@ -544,7 +544,7 @@ <h2 data-number="3.1" class="anchored" data-anchor-id="conditional-selection"><s
 </div>
 <p>These techniques worked well in this example, but you can imagine how tedious it might be to list out <code>True</code> and <code>False</code>for every row in a larger <code>DataFrame</code>. To make things easier, we can instead provide a logical condition as an input to <code>.loc</code> or <code>[]</code> that returns a boolean array with the necessary length.</p>
 <p>For example, to return all names associated with <code>F</code> sex:</p>
-<div id="287d0729" class="cell" data-execution_count="4">
+<div id="4ec75747" class="cell" data-execution_count="4">
 <div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="co"># First, use a logical condition to generate a boolean array</span></span>
 <span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a>logical_operator <span class="op">=</span> (babynames[<span class="st">"Sex"</span>] <span class="op">==</span> <span class="st">"F"</span>)</span>
 <span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a></span>
@@ -614,7 +614,7 @@ <h2 data-number="3.1" class="anchored" data-anchor-id="conditional-selection"><s
 </div>
 <p>Recall from the previous lecture that <code>.head()</code> will return only the first few rows in the <code>DataFrame</code>. In reality, <code>babynames[logical operator]</code> contains as many rows as there are entries in the original <code>babynames</code> <code>DataFrame</code> with sex <code>"F"</code>.</p>
 <p>Here, <code>logical_operator</code> evaluates to a <code>Series</code> of boolean values with length 407428.</p>
-<div id="a6117f99" class="cell" data-execution_count="5">
+<div id="7eaac15b" class="cell" data-execution_count="5">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="st">"There are a total of </span><span class="sc">{}</span><span class="st"> values in 'logical_operator'"</span>.<span class="bu">format</span>(<span class="bu">len</span>(logical_operator)))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -624,7 +624,7 @@ <h2 data-number="3.1" class="anchored" data-anchor-id="conditional-selection"><s
 </div>
 </div>
 <p>Rows starting at row 0 and ending at row 239536 evaluate to <code>True</code> and are thus returned in the <code>DataFrame</code>. Rows from 239537 onwards evaluate to <code>False</code> and are omitted from the output.</p>
-<div id="826b061e" class="cell" data-execution_count="6">
+<div id="71136b7b" class="cell" data-execution_count="6">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="st">"The 0th item in this 'logical_operator' is: </span><span class="sc">{}</span><span class="st">"</span>.<span class="bu">format</span>(logical_operator.iloc[<span class="dv">0</span>]))</span>
@@ -639,7 +639,7 @@ <h2 data-number="3.1" class="anchored" data-anchor-id="conditional-selection"><s
 </div>
 <p>Passing a <code>Series</code> as an argument to <code>babynames[]</code> has the same effect as using a boolean array. In fact, the <code>[]</code> selection operator can take a boolean <code>Series</code>, array, and list as arguments. These three are used interchangeably throughout the course.</p>
 <p>We can also use <code>.loc</code> to achieve similar results.</p>
-<div id="1e395429" class="cell" data-execution_count="7">
+<div id="89d2fda1" class="cell" data-execution_count="7">
 <div class="sourceCode cell-code" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a>babynames.loc[babynames[<span class="st">"Sex"</span>] <span class="op">==</span> <span class="st">"F"</span>].head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="7">
 <div>
@@ -737,7 +737,7 @@ <h2 data-number="3.1" class="anchored" data-anchor-id="conditional-selection"><s
 </table>
 <p>When combining multiple conditions with logical operators, we surround each individual condition with a set of parenthesis <code>()</code>. This imposes an order of operations on <code>pandas</code> evaluating your logic and can avoid code erroring.</p>
 <p>For example, if we want to return data on all names with sex <code>"F"</code> born before the year 2000, we can write:</p>
-<div id="c5c86484" class="cell" data-execution_count="8">
+<div id="8a397722" class="cell" data-execution_count="8">
 <div class="sourceCode cell-code" id="cb10"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a>babynames[(babynames[<span class="st">"Sex"</span>] <span class="op">==</span> <span class="st">"F"</span>) <span class="op">&amp;</span> (babynames[<span class="st">"Year"</span>] <span class="op">&lt;</span> <span class="dv">2000</span>)].head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="8">
 <div>
@@ -802,12 +802,12 @@ <h2 data-number="3.1" class="anchored" data-anchor-id="conditional-selection"><s
 </div>
 </div>
 <p>Note that we’re working with <code>Series</code>, so using <code>and</code> in place of <code>&amp;</code>, or <code>or</code> in place <code>|</code> will error.</p>
-<div id="e3a47e56" class="cell" data-execution_count="9">
+<div id="bf8a7c59" class="cell" data-execution_count="9">
 <div class="sourceCode cell-code" id="cb11"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a><span class="co"># This line of code will raise a ValueError</span></span>
 <span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a><span class="co"># babynames[(babynames["Sex"] == "F") and (babynames["Year"] &lt; 2000)].head()</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
 <p>If we want to return data on all names with sex <code>"F"</code> <em>or</em> all born before the year 2000, we can write:</p>
-<div id="dfc79140" class="cell" data-execution_count="10">
+<div id="59b7b584" class="cell" data-execution_count="10">
 <div class="sourceCode cell-code" id="cb12"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a>babynames[(babynames[<span class="st">"Sex"</span>] <span class="op">==</span> <span class="st">"F"</span>) <span class="op">|</span> (babynames[<span class="st">"Year"</span>] <span class="op">&lt;</span> <span class="dv">2000</span>)].head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="10">
 <div>
@@ -872,7 +872,7 @@ <h2 data-number="3.1" class="anchored" data-anchor-id="conditional-selection"><s
 </div>
 </div>
 <p>Boolean array selection is a useful tool, but can lead to overly verbose code for complex conditions. In the example below, our boolean condition is long enough to extend for several lines of code.</p>
-<div id="30418504" class="cell" data-execution_count="11">
+<div id="0a915822" class="cell" data-execution_count="11">
 <div class="sourceCode cell-code" id="cb13"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Note: The parentheses surrounding the code make it possible to break the code on to multiple lines for readability</span></span>
 <span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a>(</span>
 <span id="cb13-3"><a href="#cb13-3" aria-hidden="true" tabindex="-1"></a>    babynames[(babynames[<span class="st">"Name"</span>] <span class="op">==</span> <span class="st">"Bella"</span>) <span class="op">|</span> </span>
@@ -944,7 +944,7 @@ <h2 data-number="3.1" class="anchored" data-anchor-id="conditional-selection"><s
 </div>
 <p>Fortunately, <code>pandas</code> provides many alternative methods for constructing boolean filters.</p>
 <p>The <code>.isin</code> function is one such example. This method evaluates if the values in a <code>Series</code> are contained in a different sequence (list, array, or <code>Series</code>) of values. In the cell below, we achieve equivalent results to the <code>DataFrame</code> above with far more concise code.</p>
-<div id="c1d98290" class="cell" data-execution_count="12">
+<div id="75ac772e" class="cell" data-execution_count="12">
 <div class="sourceCode cell-code" id="cb14"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a>names <span class="op">=</span> [<span class="st">"Bella"</span>, <span class="st">"Alex"</span>, <span class="st">"Narges"</span>, <span class="st">"Lisa"</span>]</span>
 <span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a>babynames[<span class="st">"Name"</span>].isin(names).head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="12">
@@ -956,7 +956,7 @@ <h2 data-number="3.1" class="anchored" data-anchor-id="conditional-selection"><s
 Name: Name, dtype: bool</code></pre>
 </div>
 </div>
-<div id="0161b33d" class="cell" data-execution_count="13">
+<div id="ab89043f" class="cell" data-execution_count="13">
 <div class="sourceCode cell-code" id="cb16"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a>babynames[babynames[<span class="st">"Name"</span>].isin(names)].head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="13">
 <div>
@@ -1021,7 +1021,7 @@ <h2 data-number="3.1" class="anchored" data-anchor-id="conditional-selection"><s
 </div>
 </div>
 <p>The function <code>str.startswith</code> can be used to define a filter based on string values in a <code>Series</code> object. It checks to see if string values in a <code>Series</code> start with a particular character.</p>
-<div id="92959235" class="cell" data-execution_count="14">
+<div id="f17931fe" class="cell" data-execution_count="14">
 <div class="sourceCode cell-code" id="cb17"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Identify whether names begin with the letter "N"</span></span>
 <span id="cb17-2"><a href="#cb17-2" aria-hidden="true" tabindex="-1"></a>babynames[<span class="st">"Name"</span>].<span class="bu">str</span>.startswith(<span class="st">"N"</span>).head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="14">
@@ -1033,7 +1033,7 @@ <h2 data-number="3.1" class="anchored" data-anchor-id="conditional-selection"><s
 Name: Name, dtype: bool</code></pre>
 </div>
 </div>
-<div id="efebd39b" class="cell" data-execution_count="15">
+<div id="f38b8877" class="cell" data-execution_count="15">
 <div class="sourceCode cell-code" id="cb19"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Extracting names that begin with the letter "N"</span></span>
 <span id="cb19-2"><a href="#cb19-2" aria-hidden="true" tabindex="-1"></a>babynames[babynames[<span class="st">"Name"</span>].<span class="bu">str</span>.startswith(<span class="st">"N"</span>)].head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="15">
@@ -1103,7 +1103,7 @@ <h2 data-number="3.1" class="anchored" data-anchor-id="conditional-selection"><s
 <h2 data-number="3.2" class="anchored" data-anchor-id="adding-removing-and-modifying-columns"><span class="header-section-number">3.2</span> Adding, Removing, and Modifying Columns</h2>
 <p>In many data science tasks, we may need to change the columns contained in our <code>DataFrame</code> in some way. Fortunately, the syntax to do so is fairly straightforward.</p>
 <p>To add a new column to a <code>DataFrame</code>, we use a syntax similar to that used when accessing an existing column. Specify the name of the new column by writing <code>df["column"]</code>, then assign this to a <code>Series</code> or array containing the values that will populate this column.</p>
-<div id="899b5917" class="cell" data-execution_count="16">
+<div id="a9d23b57" class="cell" data-execution_count="16">
 <div class="sourceCode cell-code" id="cb20"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Create a Series of the length of each name. </span></span>
 <span id="cb20-2"><a href="#cb20-2" aria-hidden="true" tabindex="-1"></a>babyname_lengths <span class="op">=</span> babynames[<span class="st">"Name"</span>].<span class="bu">str</span>.<span class="bu">len</span>()</span>
 <span id="cb20-3"><a href="#cb20-3" aria-hidden="true" tabindex="-1"></a></span>
@@ -1179,7 +1179,7 @@ <h2 data-number="3.2" class="anchored" data-anchor-id="adding-removing-and-modif
 </div>
 </div>
 <p>If we need to later modify an existing column, we can do so by referencing this column again with the syntax <code>df["column"]</code>, then re-assigning it to a new <code>Series</code> or array of the appropriate length.</p>
-<div id="e7784c58" class="cell" data-execution_count="17">
+<div id="2978ef40" class="cell" data-execution_count="17">
 <div class="sourceCode cell-code" id="cb21"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Modify the “name_lengths” column to be one less than its original value</span></span>
 <span id="cb21-2"><a href="#cb21-2" aria-hidden="true" tabindex="-1"></a>babynames[<span class="st">"name_lengths"</span>] <span class="op">=</span> babynames[<span class="st">"name_lengths"</span>] <span class="op">-</span> <span class="dv">1</span></span>
 <span id="cb21-3"><a href="#cb21-3" aria-hidden="true" tabindex="-1"></a>babynames.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -1252,7 +1252,7 @@ <h2 data-number="3.2" class="anchored" data-anchor-id="adding-removing-and-modif
 </div>
 </div>
 <p>We can rename a column using the <code>.rename()</code> method. It takes in a dictionary that maps old column names to their new ones.</p>
-<div id="47b90ba1" class="cell" data-execution_count="18">
+<div id="d2c2880d" class="cell" data-execution_count="18">
 <div class="sourceCode cell-code" id="cb22"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Rename “name_lengths” to “Length”</span></span>
 <span id="cb22-2"><a href="#cb22-2" aria-hidden="true" tabindex="-1"></a>babynames <span class="op">=</span> babynames.rename(columns<span class="op">=</span>{<span class="st">"name_lengths"</span>:<span class="st">"Length"</span>})</span>
 <span id="cb22-3"><a href="#cb22-3" aria-hidden="true" tabindex="-1"></a>babynames.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -1325,7 +1325,7 @@ <h2 data-number="3.2" class="anchored" data-anchor-id="adding-removing-and-modif
 </div>
 </div>
 <p>If we want to remove a column or row of a <code>DataFrame</code>, we can call the <code>.drop</code> <a href="https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.drop.html">(documentation)</a> method. Use the <code>axis</code> parameter to specify whether a column or row should be dropped. Unless otherwise specified, <code>pandas</code> will assume that we are dropping a row by default.</p>
-<div id="9bd875e9" class="cell" data-execution_count="19">
+<div id="2765a0f0" class="cell" data-execution_count="19">
 <div class="sourceCode cell-code" id="cb23"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Drop our new "Length" column from the DataFrame</span></span>
 <span id="cb23-2"><a href="#cb23-2" aria-hidden="true" tabindex="-1"></a>babynames <span class="op">=</span> babynames.drop(<span class="st">"Length"</span>, axis<span class="op">=</span><span class="st">"columns"</span>)</span>
 <span id="cb23-3"><a href="#cb23-3" aria-hidden="true" tabindex="-1"></a>babynames.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -1393,7 +1393,7 @@ <h2 data-number="3.2" class="anchored" data-anchor-id="adding-removing-and-modif
 </div>
 <p>Notice that we <em>re-assigned</em> <code>babynames</code> to the result of <code>babynames.drop(...)</code>. This is a subtle but important point: <code>pandas</code> table operations <strong>do not occur in-place</strong>. Calling <code>df.drop(...)</code> will output a <em>copy</em> of <code>df</code> with the row/column of interest removed without modifying the original <code>df</code> table.</p>
 <p>In other words, if we simply call:</p>
-<div id="c3872306" class="cell" data-execution_count="20">
+<div id="32ff2390" class="cell" data-execution_count="20">
 <div class="sourceCode cell-code" id="cb24"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a><span class="co"># This creates a copy of `babynames` and removes the column "Name"...</span></span>
 <span id="cb24-2"><a href="#cb24-2" aria-hidden="true" tabindex="-1"></a>babynames.drop(<span class="st">"Name"</span>, axis<span class="op">=</span><span class="st">"columns"</span>)</span>
 <span id="cb24-3"><a href="#cb24-3" aria-hidden="true" tabindex="-1"></a></span>
@@ -1481,7 +1481,7 @@ <h2 data-number="3.3" class="anchored" data-anchor-id="useful-utility-functions"
 <section id="numpy" class="level3" data-number="3.3.1">
 <h3 data-number="3.3.1" class="anchored" data-anchor-id="numpy"><span class="header-section-number">3.3.1</span> <code>NumPy</code></h3>
 <p><code>pandas</code> is designed to work well with <code>NumPy</code>, the framework for array computations you encountered in <a href="https://www.data8.org/su23/reference/#array-functions-and-methods">Data 8</a>. Just about any <code>NumPy</code> function can be applied to <code>pandas</code> <code>DataFrame</code>s and <code>Series</code>.</p>
-<div id="d046ca57" class="cell" data-execution_count="21">
+<div id="7fa49f3a" class="cell" data-execution_count="21">
 <div class="sourceCode cell-code" id="cb25"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb25-1"><a href="#cb25-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Pull out the number of babies named Yash each year</span></span>
 <span id="cb25-2"><a href="#cb25-2" aria-hidden="true" tabindex="-1"></a>yash_count <span class="op">=</span> babynames[babynames[<span class="st">"Name"</span>] <span class="op">==</span> <span class="st">"Yash"</span>][<span class="st">"Count"</span>]</span>
 <span id="cb25-3"><a href="#cb25-3" aria-hidden="true" tabindex="-1"></a>yash_count.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -1494,14 +1494,14 @@ <h3 data-number="3.3.1" class="anchored" data-anchor-id="numpy"><span class="hea
 Name: Count, dtype: int64</code></pre>
 </div>
 </div>
-<div id="9765b3af" class="cell" data-execution_count="22">
+<div id="b0868101" class="cell" data-execution_count="22">
 <div class="sourceCode cell-code" id="cb27"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb27-1"><a href="#cb27-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Average number of babies named Yash each year</span></span>
 <span id="cb27-2"><a href="#cb27-2" aria-hidden="true" tabindex="-1"></a>np.mean(yash_count)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="22">
 <pre><code>np.float64(17.142857142857142)</code></pre>
 </div>
 </div>
-<div id="e474fc0f" class="cell" data-execution_count="23">
+<div id="0ee86ad4" class="cell" data-execution_count="23">
 <div class="sourceCode cell-code" id="cb29"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb29-1"><a href="#cb29-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Max number of babies named Yash born in any one year</span></span>
 <span id="cb29-2"><a href="#cb29-2" aria-hidden="true" tabindex="-1"></a>np.<span class="bu">max</span>(yash_count)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="23">
@@ -1513,14 +1513,14 @@ <h3 data-number="3.3.1" class="anchored" data-anchor-id="numpy"><span class="hea
 <h3 data-number="3.3.2" class="anchored" data-anchor-id="shape-and-.size"><span class="header-section-number">3.3.2</span> <code>.shape</code> and <code>.size</code></h3>
 <p><code>.shape</code> and <code>.size</code> are attributes of <code>Series</code> and <code>DataFrame</code>s that measure the “amount” of data stored in the structure. Calling <code>.shape</code> returns a tuple containing the number of rows and columns present in the <code>DataFrame</code> or <code>Series</code>. <code>.size</code> is used to find the total number of elements in a structure, equivalent to the number of rows times the number of columns.</p>
 <p>Many functions strictly require the dimensions of the arguments along certain axes to match. Calling these dimension-finding functions is much faster than counting all of the items by hand.</p>
-<div id="a979fa01" class="cell" data-execution_count="24">
+<div id="823c4fe0" class="cell" data-execution_count="24">
 <div class="sourceCode cell-code" id="cb31"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb31-1"><a href="#cb31-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Return the shape of the DataFrame, in the format (num_rows, num_columns)</span></span>
 <span id="cb31-2"><a href="#cb31-2" aria-hidden="true" tabindex="-1"></a>babynames.shape</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="24">
 <pre><code>(407428, 5)</code></pre>
 </div>
 </div>
-<div id="e6637b8d" class="cell" data-execution_count="25">
+<div id="2c8cd3b7" class="cell" data-execution_count="25">
 <div class="sourceCode cell-code" id="cb33"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb33-1"><a href="#cb33-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Return the size of the DataFrame, equal to num_rows * num_columns</span></span>
 <span id="cb33-2"><a href="#cb33-2" aria-hidden="true" tabindex="-1"></a>babynames.size</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="25">
@@ -1531,7 +1531,7 @@ <h3 data-number="3.3.2" class="anchored" data-anchor-id="shape-and-.size"><span
 <section id="describe" class="level3" data-number="3.3.3">
 <h3 data-number="3.3.3" class="anchored" data-anchor-id="describe"><span class="header-section-number">3.3.3</span> <code>.describe()</code></h3>
 <p>If many statistics are required from a <code>DataFrame</code> (minimum value, maximum value, mean value, etc.), then <code>.describe()</code> <a href="https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.describe.html">(documentation)</a> can be used to compute all of them at once.</p>
-<div id="13593cdd" class="cell" data-execution_count="26">
+<div id="e0822293" class="cell" data-execution_count="26">
 <div class="sourceCode cell-code" id="cb35"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb35-1"><a href="#cb35-1" aria-hidden="true" tabindex="-1"></a>babynames.describe()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="26">
 <div>
@@ -1593,7 +1593,7 @@ <h3 data-number="3.3.3" class="anchored" data-anchor-id="describe"><span class="
 </div>
 </div>
 <p>A different set of statistics will be reported if <code>.describe()</code> is called on a <code>Series</code>.</p>
-<div id="5aa8b58a" class="cell" data-execution_count="27">
+<div id="ae04a2ca" class="cell" data-execution_count="27">
 <div class="sourceCode cell-code" id="cb36"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb36-1"><a href="#cb36-1" aria-hidden="true" tabindex="-1"></a>babynames[<span class="st">"Sex"</span>].describe()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="27">
 <pre><code>count     407428
@@ -1608,7 +1608,7 @@ <h3 data-number="3.3.3" class="anchored" data-anchor-id="describe"><span class="
 <h3 data-number="3.3.4" class="anchored" data-anchor-id="sample"><span class="header-section-number">3.3.4</span> <code>.sample()</code></h3>
 <p>As we will see later in the semester, random processes are at the heart of many data science techniques (for example, train-test splits, bootstrapping, and cross-validation). <code>.sample()</code> <a href="https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.sample.html">(documentation)</a> lets us quickly select random entries (a row if called from a <code>DataFrame</code>, or a value if called from a <code>Series</code>).</p>
 <p>By default, <code>.sample()</code> selects entries <em>without</em> replacement. Pass in the argument <code>replace=True</code> to sample with replacement.</p>
-<div id="4fdfed3d" class="cell" data-execution_count="28">
+<div id="d1ae55fc" class="cell" data-execution_count="28">
 <div class="sourceCode cell-code" id="cb38"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb38-1"><a href="#cb38-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Sample a single row</span></span>
 <span id="cb38-2"><a href="#cb38-2" aria-hidden="true" tabindex="-1"></a>babynames.sample()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="28">
@@ -1628,12 +1628,12 @@ <h3 data-number="3.3.4" class="anchored" data-anchor-id="sample"><span class="he
 </thead>
 <tbody>
 <tr class="odd">
-<td data-quarto-table-cell-role="th">298056</td>
+<td data-quarto-table-cell-role="th">199745</td>
 <td>CA</td>
-<td>M</td>
-<td>1980</td>
-<td>Maxwell</td>
-<td>34</td>
+<td>F</td>
+<td>2012</td>
+<td>Dream</td>
+<td>10</td>
 </tr>
 </tbody>
 </table>
@@ -1642,7 +1642,7 @@ <h3 data-number="3.3.4" class="anchored" data-anchor-id="sample"><span class="he
 </div>
 </div>
 <p>Naturally, this can be chained with other methods and operators (<code>iloc</code>, etc.).</p>
-<div id="72923003" class="cell" data-execution_count="29">
+<div id="8e710ede" class="cell" data-execution_count="29">
 <div class="sourceCode cell-code" id="cb39"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb39-1"><a href="#cb39-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Sample 5 random rows, and select all columns after column 2</span></span>
 <span id="cb39-2"><a href="#cb39-2" aria-hidden="true" tabindex="-1"></a>babynames.sample(<span class="dv">5</span>).iloc[:, <span class="dv">2</span>:]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="29">
@@ -1660,34 +1660,34 @@ <h3 data-number="3.3.4" class="anchored" data-anchor-id="sample"><span class="he
 </thead>
 <tbody>
 <tr class="odd">
-<td data-quarto-table-cell-role="th">356988</td>
-<td>2005</td>
-<td>Torin</td>
-<td>10</td>
+<td data-quarto-table-cell-role="th">53849</td>
+<td>1966</td>
+<td>Stefani</td>
+<td>20</td>
 </tr>
 <tr class="even">
-<td data-quarto-table-cell-role="th">353698</td>
-<td>2004</td>
-<td>Jaren</td>
-<td>18</td>
+<td data-quarto-table-cell-role="th">176617</td>
+<td>2006</td>
+<td>Sabah</td>
+<td>5</td>
 </tr>
 <tr class="odd">
-<td data-quarto-table-cell-role="th">37458</td>
-<td>1957</td>
-<td>Candace</td>
-<td>106</td>
+<td data-quarto-table-cell-role="th">334208</td>
+<td>1996</td>
+<td>Levon</td>
+<td>8</td>
 </tr>
 <tr class="even">
-<td data-quarto-table-cell-role="th">161997</td>
-<td>2003</td>
-<td>Kimberlee</td>
-<td>17</td>
+<td data-quarto-table-cell-role="th">22777</td>
+<td>1946</td>
+<td>Lynne</td>
+<td>197</td>
 </tr>
 <tr class="odd">
-<td data-quarto-table-cell-role="th">80722</td>
-<td>1979</td>
-<td>Nancy</td>
-<td>641</td>
+<td data-quarto-table-cell-role="th">88619</td>
+<td>1982</td>
+<td>Martha</td>
+<td>316</td>
 </tr>
 </tbody>
 </table>
@@ -1695,7 +1695,7 @@ <h3 data-number="3.3.4" class="anchored" data-anchor-id="sample"><span class="he
 </div>
 </div>
 </div>
-<div id="80303402" class="cell" data-execution_count="30">
+<div id="35990d76" class="cell" data-execution_count="30">
 <div class="sourceCode cell-code" id="cb40"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb40-1"><a href="#cb40-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Randomly sample 4 names from the year 2000, with replacement, and select all columns after column 2</span></span>
 <span id="cb40-2"><a href="#cb40-2" aria-hidden="true" tabindex="-1"></a>babynames[babynames[<span class="st">"Year"</span>] <span class="op">==</span> <span class="dv">2000</span>].sample(<span class="dv">4</span>, replace <span class="op">=</span> <span class="va">True</span>).iloc[:, <span class="dv">2</span>:]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="30">
@@ -1713,28 +1713,28 @@ <h3 data-number="3.3.4" class="anchored" data-anchor-id="sample"><span class="he
 </thead>
 <tbody>
 <tr class="odd">
-<td data-quarto-table-cell-role="th">343065</td>
+<td data-quarto-table-cell-role="th">343989</td>
 <td>2000</td>
-<td>Koby</td>
-<td>35</td>
+<td>Adithya</td>
+<td>8</td>
 </tr>
 <tr class="even">
-<td data-quarto-table-cell-role="th">149148</td>
+<td data-quarto-table-cell-role="th">343085</td>
 <td>2000</td>
-<td>Morgan</td>
-<td>499</td>
+<td>Emerson</td>
+<td>33</td>
 </tr>
 <tr class="odd">
-<td data-quarto-table-cell-role="th">151307</td>
+<td data-quarto-table-cell-role="th">342772</td>
 <td>2000</td>
-<td>Janna</td>
-<td>9</td>
+<td>Derrick</td>
+<td>112</td>
 </tr>
 <tr class="even">
-<td data-quarto-table-cell-role="th">150077</td>
+<td data-quarto-table-cell-role="th">343379</td>
 <td>2000</td>
-<td>Alycia</td>
-<td>28</td>
+<td>Johann</td>
+<td>18</td>
 </tr>
 </tbody>
 </table>
@@ -1747,7 +1747,7 @@ <h3 data-number="3.3.4" class="anchored" data-anchor-id="sample"><span class="he
 <h3 data-number="3.3.5" class="anchored" data-anchor-id="value_counts"><span class="header-section-number">3.3.5</span> <code>.value_counts()</code></h3>
 <p>The <code>Series.value_counts()</code> <a href="https://pandas.pydata.org/docs/reference/api/pandas.Series.value_counts.html">(documentation)</a> method counts the number of occurrence of each unique value in a <code>Series</code>. In other words, it <em>counts</em> the number of times each unique <em>value</em> appears. This is often useful for determining the most or least common entries in a <code>Series</code>.</p>
 <p>In the example below, we can determine the name with the most years in which at least one person has taken that name by counting the number of times each name appears in the <code>"Name"</code> column of <code>babynames</code>. Note that the return value is also a <code>Series</code>.</p>
-<div id="bc6910cb" class="cell" data-execution_count="31">
+<div id="4beaa8bb" class="cell" data-execution_count="31">
 <div class="sourceCode cell-code" id="cb41"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb41-1"><a href="#cb41-1" aria-hidden="true" tabindex="-1"></a>babynames[<span class="st">"Name"</span>].value_counts().head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="31">
 <pre><code>Name
@@ -1763,7 +1763,7 @@ <h3 data-number="3.3.5" class="anchored" data-anchor-id="value_counts"><span cla
 <section id="unique" class="level3" data-number="3.3.6">
 <h3 data-number="3.3.6" class="anchored" data-anchor-id="unique"><span class="header-section-number">3.3.6</span> <code>.unique()</code></h3>
 <p>If we have a <code>Series</code> with many repeated values, then <code>.unique()</code> <a href="https://pandas.pydata.org/docs/reference/api/pandas.unique.html">(documentation)</a> can be used to identify only the <em>unique</em> values. Here we return an array of all the names in <code>babynames</code>.</p>
-<div id="a41eb330" class="cell" data-execution_count="32">
+<div id="c540df7c" class="cell" data-execution_count="32">
 <div class="sourceCode cell-code" id="cb43"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb43-1"><a href="#cb43-1" aria-hidden="true" tabindex="-1"></a>babynames[<span class="st">"Name"</span>].unique()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="32">
 <pre><code>array(['Mary', 'Helen', 'Dorothy', ..., 'Zae', 'Zai', 'Zayvier'],
@@ -1774,7 +1774,7 @@ <h3 data-number="3.3.6" class="anchored" data-anchor-id="unique"><span class="he
 <section id="sort_values" class="level3" data-number="3.3.7">
 <h3 data-number="3.3.7" class="anchored" data-anchor-id="sort_values"><span class="header-section-number">3.3.7</span> <code>.sort_values()</code></h3>
 <p>Ordering a <code>DataFrame</code> can be useful for isolating extreme values. For example, the first 5 entries of a row sorted in descending order (that is, from highest to lowest) are the largest 5 values. <code>.sort_values</code> <a href="https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.sort_values.html">(documentation)</a> allows us to order a <code>DataFrame</code> or <code>Series</code> by a specified column. We can choose to either receive the rows in <code>ascending</code> order (default) or <code>descending</code> order.</p>
-<div id="35d41af8" class="cell" data-execution_count="33">
+<div id="9ed4e6d7" class="cell" data-execution_count="33">
 <div class="sourceCode cell-code" id="cb45"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb45-1"><a href="#cb45-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Sort the "Count" column from highest to lowest</span></span>
 <span id="cb45-2"><a href="#cb45-2" aria-hidden="true" tabindex="-1"></a>babynames.sort_values(by<span class="op">=</span><span class="st">"Count"</span>, ascending<span class="op">=</span><span class="va">False</span>).head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="33">
@@ -1840,7 +1840,7 @@ <h3 data-number="3.3.7" class="anchored" data-anchor-id="sort_values"><span clas
 </div>
 </div>
 <p>Unlike when calling <code>.value_counts()</code> on a <code>DataFrame</code>, we do not need to explicitly specify the column used for sorting when calling <code>.value_counts()</code> on a <code>Series</code>. We can still specify the ordering paradigm – that is, whether values are sorted in ascending or descending order.</p>
-<div id="9a37830b" class="cell" data-execution_count="34">
+<div id="79e0e300" class="cell" data-execution_count="34">
 <div class="sourceCode cell-code" id="cb46"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb46-1"><a href="#cb46-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Sort the "Name" Series alphabetically</span></span>
 <span id="cb46-2"><a href="#cb46-2" aria-hidden="true" tabindex="-1"></a>babynames[<span class="st">"Name"</span>].sort_values(ascending<span class="op">=</span><span class="va">True</span>).head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="34">
diff --git a/docs/pandas_3/pandas_3.html b/docs/pandas_3/pandas_3.html
index a8f3cb93..6ba53ac4 100644
--- a/docs/pandas_3/pandas_3.html
+++ b/docs/pandas_3/pandas_3.html
@@ -347,7 +347,7 @@ <h2 id="toc-title">Pandas III</h2>
 <h2 data-number="4.1" class="anchored" data-anchor-id="custom-sorts"><span class="header-section-number">4.1</span> Custom Sorts</h2>
 <p>First, let’s finish our discussion about sorting. Let’s try to solve a sorting problem using different approaches. Assume we want to find the longest baby names and sort our data accordingly.</p>
 <p>We’ll start by loading the <code>babynames</code> dataset. Note that this dataset is filtered to only contain data from California.</p>
-<div id="380585a4" class="cell" data-execution_count="1">
+<div id="277bfbfd" class="cell" data-execution_count="1">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="co"># This code pulls census data and loads it into a DataFrame</span></span>
@@ -478,7 +478,7 @@ <h2 data-number="4.1" class="anchored" data-anchor-id="custom-sorts"><span class
 <section id="approach-1-create-a-temporary-column" class="level3" data-number="4.1.1">
 <h3 data-number="4.1.1" class="anchored" data-anchor-id="approach-1-create-a-temporary-column"><span class="header-section-number">4.1.1</span> Approach 1: Create a Temporary Column</h3>
 <p>One method to do this is to first start by creating a column that contains the lengths of the names.</p>
-<div id="3e1b247e" class="cell" data-execution_count="2">
+<div id="404d0ba8" class="cell" data-execution_count="2">
 <div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Create a Series of the length of each name</span></span>
 <span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a>babyname_lengths <span class="op">=</span> babynames[<span class="st">"Name"</span>].<span class="bu">str</span>.<span class="bu">len</span>()</span>
 <span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a></span>
@@ -554,7 +554,7 @@ <h3 data-number="4.1.1" class="anchored" data-anchor-id="approach-1-create-a-tem
 </div>
 </div>
 <p>We can then sort the <code>DataFrame</code> by that column using <code>.sort_values()</code>:</p>
-<div id="5c5f010e" class="cell" data-execution_count="3">
+<div id="9cb5e5ff" class="cell" data-execution_count="3">
 <div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Sort by the temporary column</span></span>
 <span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a>babynames <span class="op">=</span> babynames.sort_values(by<span class="op">=</span><span class="st">"name_lengths"</span>, ascending<span class="op">=</span><span class="va">False</span>)</span>
 <span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a>babynames.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -627,7 +627,7 @@ <h3 data-number="4.1.1" class="anchored" data-anchor-id="approach-1-create-a-tem
 </div>
 </div>
 <p>Finally, we can drop the <code>name_length</code> column from <code>babynames</code> to prevent our table from getting cluttered.</p>
-<div id="dfecb1e9" class="cell" data-execution_count="4">
+<div id="95cc1e5c" class="cell" data-execution_count="4">
 <div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Drop the 'name_length' column</span></span>
 <span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a>babynames <span class="op">=</span> babynames.drop(<span class="st">"name_lengths"</span>, axis<span class="op">=</span><span class="st">'columns'</span>)</span>
 <span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a>babynames.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -697,7 +697,7 @@ <h3 data-number="4.1.1" class="anchored" data-anchor-id="approach-1-create-a-tem
 <section id="approach-2-sorting-using-the-key-argument" class="level3" data-number="4.1.2">
 <h3 data-number="4.1.2" class="anchored" data-anchor-id="approach-2-sorting-using-the-key-argument"><span class="header-section-number">4.1.2</span> Approach 2: Sorting using the <code>key</code> Argument</h3>
 <p>Another way to approach this is to use the <code>key</code> argument of <code>.sort_values()</code>. Here we can specify that we want to sort <code>"Name"</code> values by their length.</p>
-<div id="b5448b44" class="cell" data-execution_count="5">
+<div id="3a2bf112" class="cell" data-execution_count="5">
 <div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a>babynames.sort_values(<span class="st">"Name"</span>, key<span class="op">=</span><span class="kw">lambda</span> x: x.<span class="bu">str</span>.<span class="bu">len</span>(), ascending<span class="op">=</span><span class="va">False</span>).head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="5">
 <div>
@@ -765,7 +765,7 @@ <h3 data-number="4.1.2" class="anchored" data-anchor-id="approach-2-sorting-usin
 <section id="approach-3-sorting-using-the-map-function" class="level3" data-number="4.1.3">
 <h3 data-number="4.1.3" class="anchored" data-anchor-id="approach-3-sorting-using-the-map-function"><span class="header-section-number">4.1.3</span> Approach 3: Sorting using the <code>map</code> Function</h3>
 <p>We can also use the <code>map</code> function on a <code>Series</code> to solve this. Say we want to sort the <code>babynames</code> table by the number of <code>"dr"</code>’s and <code>"ea"</code>’s in each <code>"Name"</code>. We’ll define the function <code>dr_ea_count</code> to help us out.</p>
-<div id="3cee9cdc" class="cell" data-execution_count="6">
+<div id="e8a17114" class="cell" data-execution_count="6">
 <div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="co"># First, define a function to count the number of times "dr" or "ea" appear in each name</span></span>
 <span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> dr_ea_count(string):</span>
 <span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> string.count(<span class="st">'dr'</span>) <span class="op">+</span> string.count(<span class="st">'ea'</span>)</span>
@@ -845,7 +845,7 @@ <h3 data-number="4.1.3" class="anchored" data-anchor-id="approach-3-sorting-usin
 </div>
 </div>
 <p>We can drop the <code>dr_ea_count</code> once we’re done using it to maintain a neat table.</p>
-<div id="8033dbf8" class="cell" data-execution_count="7">
+<div id="6687638f" class="cell" data-execution_count="7">
 <div class="sourceCode cell-code" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Drop the `dr_ea_count` column</span></span>
 <span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a>babynames <span class="op">=</span> babynames.drop(<span class="st">"dr_ea_count"</span>, axis <span class="op">=</span> <span class="st">'columns'</span>)</span>
 <span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a>babynames.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -917,10 +917,10 @@ <h3 data-number="4.1.3" class="anchored" data-anchor-id="approach-3-sorting-usin
 <h2 data-number="4.2" class="anchored" data-anchor-id="aggregating-data-with-.groupby"><span class="header-section-number">4.2</span> Aggregating Data with <code>.groupby</code></h2>
 <p>Up until this point, we have been working with individual rows of <code>DataFrame</code>s. As data scientists, we often wish to investigate trends across a larger <em>subset</em> of our data. For example, we may want to compute some summary statistic (the mean, median, sum, etc.) for a group of rows in our <code>DataFrame</code>. To do this, we’ll use <code>pandas</code> <code>GroupBy</code> objects. Our goal is to group together rows that fall under the same category and perform an operation that aggregates across all rows in the category.</p>
 <p>Let’s say we wanted to aggregate all rows in <code>babynames</code> for a given year.</p>
-<div id="059613ee" class="cell" data-execution_count="8">
+<div id="ae3063c3" class="cell" data-execution_count="8">
 <div class="sourceCode cell-code" id="cb8"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a>babynames.groupby(<span class="st">"Year"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="8">
-<pre><code>&lt;pandas.core.groupby.generic.DataFrameGroupBy object at 0x1257c93d0&gt;</code></pre>
+<pre><code>&lt;pandas.core.groupby.generic.DataFrameGroupBy object at 0x10f8d1cd0&gt;</code></pre>
 </div>
 </div>
 <p>What does this strange output mean? Calling <code>.groupby</code> <a href="https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.groupby.html">(documentation)</a> has generated a <code>GroupBy</code> object. You can imagine this as a set of “mini” sub-<code>DataFrame</code>s, where each subframe contains all of the rows from <code>babynames</code> that correspond to a particular year.</p>
@@ -930,7 +930,7 @@ <h2 data-number="4.2" class="anchored" data-anchor-id="aggregating-data-with-.gr
 </center>
 <p>We can’t work with a <code>GroupBy</code> object directly – that is why you saw that strange output earlier rather than a standard view of a <code>DataFrame</code>. To actually manipulate values within these “mini” <code>DataFrame</code>s, we’ll need to call an <em>aggregation method</em>. This is a method that tells <code>pandas</code> how to aggregate the values within the <code>GroupBy</code> object. Once the aggregation is applied, <code>pandas</code> will return a normal (now grouped) <code>DataFrame</code>.</p>
 <p>The first aggregation method we’ll consider is <code>.agg</code>. The <code>.agg</code> method takes in a function as its argument; this function is then applied to each column of a “mini” grouped DataFrame. We end up with a new <code>DataFrame</code> with one aggregated row per subframe. Let’s see this in action by finding the <code>sum</code> of all counts for each year in <code>babynames</code> – this is equivalent to finding the number of babies born in each year.</p>
-<div id="01d4006e" class="cell" data-execution_count="9">
+<div id="2fadc47b" class="cell" data-execution_count="9">
 <div class="sourceCode cell-code" id="cb10"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a>babynames[[<span class="st">"Year"</span>, <span class="st">"Count"</span>]].groupby(<span class="st">"Year"</span>).agg(<span class="st">"sum"</span>).head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="9">
 <div>
@@ -983,7 +983,7 @@ <h2 data-number="4.2" class="anchored" data-anchor-id="aggregating-data-with-.gr
 </div>
 <p>Calling <code>.agg</code> has condensed each subframe back into a single row. This gives us our final output: a <code>DataFrame</code> that is now indexed by <code>"Year"</code>, with a single row for each unique year in the original <code>babynames</code> DataFrame.</p>
 <p>There are many different aggregation functions we can use, all of which are useful in different applications.</p>
-<div id="97e8724e" class="cell" data-execution_count="10">
+<div id="c748cf76" class="cell" data-execution_count="10">
 <div class="sourceCode cell-code" id="cb11"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a>babynames[[<span class="st">"Year"</span>, <span class="st">"Count"</span>]].groupby(<span class="st">"Year"</span>).agg(<span class="st">"min"</span>).head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="10">
 <div>
@@ -1027,7 +1027,7 @@ <h2 data-number="4.2" class="anchored" data-anchor-id="aggregating-data-with-.gr
 </div>
 </div>
 </div>
-<div id="e0648181" class="cell" data-execution_count="11">
+<div id="cded7213" class="cell" data-execution_count="11">
 <div class="sourceCode cell-code" id="cb12"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a>babynames[[<span class="st">"Year"</span>, <span class="st">"Count"</span>]].groupby(<span class="st">"Year"</span>).agg(<span class="st">"max"</span>).head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="11">
 <div>
@@ -1071,7 +1071,7 @@ <h2 data-number="4.2" class="anchored" data-anchor-id="aggregating-data-with-.gr
 </div>
 </div>
 </div>
-<div id="30cbd950" class="cell" data-execution_count="12">
+<div id="c3562ea4" class="cell" data-execution_count="12">
 <div class="sourceCode cell-code" id="cb13"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Same result, but now we explicitly tell pandas to only consider the "Count" column when summing</span></span>
 <span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a>babynames.groupby(<span class="st">"Year"</span>)[[<span class="st">"Count"</span>]].agg(<span class="st">"sum"</span>).head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="12">
@@ -1125,7 +1125,7 @@ <h2 data-number="4.2" class="anchored" data-anchor-id="aggregating-data-with-.gr
 <h3 data-number="4.2.1" class="anchored" data-anchor-id="aggregation-functions"><span class="header-section-number">4.2.1</span> Aggregation Functions</h3>
 <p>Because of this fairly broad requirement, <code>pandas</code> offers many ways of computing an aggregation.</p>
 <p><strong>In-built</strong> Python operations – such as <code>sum</code>, <code>max</code>, and <code>min</code> – are automatically recognized by <code>pandas</code>.</p>
-<div id="f672740f" class="cell" data-execution_count="13">
+<div id="14216461" class="cell" data-execution_count="13">
 <div class="sourceCode cell-code" id="cb14"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a><span class="co"># What is the minimum count for each name in any year?</span></span>
 <span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a>babynames.groupby(<span class="st">"Name"</span>)[[<span class="st">"Count"</span>]].agg(<span class="st">"min"</span>).head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="13">
@@ -1170,7 +1170,7 @@ <h3 data-number="4.2.1" class="anchored" data-anchor-id="aggregation-functions">
 </div>
 </div>
 </div>
-<div id="0fda5ad8" class="cell" data-execution_count="14">
+<div id="cb06c7ec" class="cell" data-execution_count="14">
 <div class="sourceCode cell-code" id="cb15"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a><span class="co"># What is the largest single-year count of each name?</span></span>
 <span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a>babynames.groupby(<span class="st">"Name"</span>)[[<span class="st">"Count"</span>]].agg(<span class="st">"max"</span>).head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="14">
@@ -1216,7 +1216,7 @@ <h3 data-number="4.2.1" class="anchored" data-anchor-id="aggregation-functions">
 </div>
 </div>
 <p>As mentioned previously, functions from the <code>NumPy</code> library, such as <code>np.mean</code>, <code>np.max</code>, <code>np.min</code>, and <code>np.sum</code>, are also fair game in <code>pandas</code>.</p>
-<div id="28502a2f" class="cell" data-execution_count="15">
+<div id="14bf8a0b" class="cell" data-execution_count="15">
 <div class="sourceCode cell-code" id="cb16"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a><span class="co"># What is the average count for each name across all years?</span></span>
 <span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a>babynames.groupby(<span class="st">"Name"</span>)[[<span class="st">"Count"</span>]].agg(<span class="st">"mean"</span>).head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="15">
@@ -1272,7 +1272,7 @@ <h3 data-number="4.2.1" class="anchored" data-anchor-id="aggregation-functions">
 </ul>
 <p>The latter two entries in this list – <code>"first"</code> and <code>"last"</code> – are unique to <code>pandas</code>. They return the first or last entry in a subframe column. Why might this be useful? Consider a case where <em>multiple</em> columns in a group share identical information. To represent this information in the grouped output, we can simply grab the first or last entry, which we know will be identical to all other entries.</p>
 <p>Let’s illustrate this with an example. Say we add a new column to <code>babynames</code> that contains the first letter of each name.</p>
-<div id="cb1c7829" class="cell" data-execution_count="16">
+<div id="24d1c3b1" class="cell" data-execution_count="16">
 <div class="sourceCode cell-code" id="cb17"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Imagine we had an additional column, "First Letter". We'll explain this code next week</span></span>
 <span id="cb17-2"><a href="#cb17-2" aria-hidden="true" tabindex="-1"></a>babynames[<span class="st">"First Letter"</span>] <span class="op">=</span> babynames[<span class="st">"Name"</span>].<span class="bu">str</span>[<span class="dv">0</span>]</span>
 <span id="cb17-3"><a href="#cb17-3" aria-hidden="true" tabindex="-1"></a></span>
@@ -1337,7 +1337,7 @@ <h3 data-number="4.2.1" class="anchored" data-anchor-id="aggregation-functions">
 <figcaption>Aggregating using “first”</figcaption>
 </figure>
 </div>
-<div id="2ef57608" class="cell" data-execution_count="17">
+<div id="12ab03d3" class="cell" data-execution_count="17">
 <div class="sourceCode cell-code" id="cb18"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a>babynames_new.groupby(<span class="st">"Name"</span>).agg({<span class="st">"First Letter"</span>:<span class="st">"first"</span>, <span class="st">"Year"</span>:<span class="st">"max"</span>}).head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="17">
 <div>
@@ -1392,7 +1392,7 @@ <h3 data-number="4.2.1" class="anchored" data-anchor-id="aggregation-functions">
 <section id="plotting-birth-counts" class="level3" data-number="4.2.2">
 <h3 data-number="4.2.2" class="anchored" data-anchor-id="plotting-birth-counts"><span class="header-section-number">4.2.2</span> Plotting Birth Counts</h3>
 <p>Let’s use <code>.agg</code> to find the total number of babies born in each year. Recall that using <code>.agg</code> with <code>.groupby()</code> follows the format: <code>df.groupby(column_name).agg(aggregation_function)</code>. The line of code below gives us the total number of babies born in each year.</p>
-<div id="6f6f952f" class="cell" data-execution_count="18">
+<div id="b29ebdff" class="cell" data-execution_count="18">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb19"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a>babynames.groupby(<span class="st">"Year"</span>)[[<span class="st">"Count"</span>]].agg(<span class="bu">sum</span>).head(<span class="dv">5</span>)</span>
@@ -1402,7 +1402,7 @@ <h3 data-number="4.2.2" class="anchored" data-anchor-id="plotting-birth-counts">
 <span id="cb19-5"><a href="#cb19-5" aria-hidden="true" tabindex="-1"></a><span class="co"># babynames.groupby("Year").sum(numeric_only=True)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 <div class="cell-output cell-output-stderr">
-<pre><code>/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_48366/390646742.py:1: FutureWarning:
+<pre><code>/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_51669/390646742.py:1: FutureWarning:
 
 The provided callable &lt;built-in function sum&gt; is currently using DataFrameGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "sum" instead.
 </code></pre>
@@ -1452,7 +1452,7 @@ <h3 data-number="4.2.2" class="anchored" data-anchor-id="plotting-birth-counts">
 <p>Here’s an illustration of the process:</p>
 <p><img src="images/aggregation.png" alt="aggregation" width="600"></p>
 <p>Plotting the <code>Dataframe</code> we obtain tells an interesting story.</p>
-<div id="eed506bd" class="cell" data-execution_count="19">
+<div id="b29c57b0" class="cell" data-execution_count="19">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb21"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> plotly.express <span class="im">as</span> px</span>
@@ -1460,9 +1460,9 @@ <h3 data-number="4.2.2" class="anchored" data-anchor-id="plotting-birth-counts">
 <span id="cb21-3"><a href="#cb21-3" aria-hidden="true" tabindex="-1"></a>px.line(puzzle2, y <span class="op">=</span> <span class="st">"Count"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 <div class="cell-output cell-output-display">
-<div>                            <div id="9c047ff3-fe5d-43bb-b609-287acc49ef9c" class="plotly-graph-div" style="height:525px; width:100%;"></div>            <script type="text/javascript">                require(["plotly"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("9c047ff3-fe5d-43bb-b609-287acc49ef9c")) {                    Plotly.newPlot(                        "9c047ff3-fe5d-43bb-b609-287acc49ef9c",                        [{"hovertemplate":"Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"","line":{"color":"#636efa","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"","orientation":"v","showlegend":false,"x":[1910,1911,1912,1913,1914,1915,1916,1917,1918,1919,1920,1921,1922,1923,1924,1925,1926,1927,1928,1929,1930,1931,1932,1933,1934,1935,1936,1937,1938,1939,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022],"xaxis":"x","y":[9163,9983,17946,22094,26926,35835,37501,39916,44692,45119,54142,58983,61004,67917,74451,73493,72910,74201,74264,72108,75294,71467,69522,66895,69789,71603,74932,83738,91626,93461,102627,114296,142033,159813,164349,171764,204945,232313,229033,233625,235582,250468,271681,287484,297099,304567,324186,340083,337562,345901,358544,363926,360475,361897,355386,336567,319421,318819,321040,333671,342411,310020,287239,275036,286947,290518,302547,315011,322241,343070,365973,382156,390581,394608,404961,425583,435964,453824,480602,512615,552647,549317,541054,524983,509302,494635,483288,468412,464300,460844,471649,466934,467742,477651,480892,484503,494971,497627,483360,460305,444619,437818,439402,431945,440683,431317,427015,411058,395436,386996,362882,362582,360023],"yaxis":"y","type":"scatter"}],                        {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"},"margin":{"b":0,"l":0,"r":0,"t":30}}},"xaxis":{"anchor":"y","domain":[0.0,1.0],"title":{"text":"Year"}},"yaxis":{"anchor":"x","domain":[0.0,1.0],"title":{"text":"Count"}},"legend":{"tracegroupgap":0}},                        {"responsive": true}                    ).then(function(){
+<div>                            <div id="fbaaeece-9d27-409a-a2a8-379e1d0d5f43" class="plotly-graph-div" style="height:525px; width:100%;"></div>            <script type="text/javascript">                require(["plotly"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("fbaaeece-9d27-409a-a2a8-379e1d0d5f43")) {                    Plotly.newPlot(                        "fbaaeece-9d27-409a-a2a8-379e1d0d5f43",                        [{"hovertemplate":"Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"","line":{"color":"#636efa","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"","orientation":"v","showlegend":false,"x":[1910,1911,1912,1913,1914,1915,1916,1917,1918,1919,1920,1921,1922,1923,1924,1925,1926,1927,1928,1929,1930,1931,1932,1933,1934,1935,1936,1937,1938,1939,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022],"xaxis":"x","y":[9163,9983,17946,22094,26926,35835,37501,39916,44692,45119,54142,58983,61004,67917,74451,73493,72910,74201,74264,72108,75294,71467,69522,66895,69789,71603,74932,83738,91626,93461,102627,114296,142033,159813,164349,171764,204945,232313,229033,233625,235582,250468,271681,287484,297099,304567,324186,340083,337562,345901,358544,363926,360475,361897,355386,336567,319421,318819,321040,333671,342411,310020,287239,275036,286947,290518,302547,315011,322241,343070,365973,382156,390581,394608,404961,425583,435964,453824,480602,512615,552647,549317,541054,524983,509302,494635,483288,468412,464300,460844,471649,466934,467742,477651,480892,484503,494971,497627,483360,460305,444619,437818,439402,431945,440683,431317,427015,411058,395436,386996,362882,362582,360023],"yaxis":"y","type":"scatter"}],                        {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"},"margin":{"b":0,"l":0,"r":0,"t":30}}},"xaxis":{"anchor":"y","domain":[0.0,1.0],"title":{"text":"Year"}},"yaxis":{"anchor":"x","domain":[0.0,1.0],"title":{"text":"Count"}},"legend":{"tracegroupgap":0}},                        {"responsive": true}                    ).then(function(){
                             
-var gd = document.getElementById('9c047ff3-fe5d-43bb-b609-287acc49ef9c');
+var gd = document.getElementById('fbaaeece-9d27-409a-a2a8-379e1d0d5f43');
 var x = new MutationObserver(function (mutations, observer) {{
         var display = window.getComputedStyle(gd).display;
         if (!display || display === 'none') {{
@@ -1506,7 +1506,7 @@ <h3 data-number="4.2.4" class="anchored" data-anchor-id="revisiting-the-.agg-fun
 <pre><code>babynames.groupby("Year").mean().head()</code></pre>
 <p>We can now put this all into practice. Say we want to find the baby name with sex “F” that has fallen in popularity the most in California. To calculate this, we can first create a metric: “Ratio to Peak” (RTP). The RTP is the ratio of babies born with a given name in 2022 to the <em>maximum</em> number of babies born with the name in <em>any</em> year.</p>
 <p>Let’s start with calculating this for one baby, “Jennifer”.</p>
-<div id="010e90cd" class="cell" data-execution_count="20">
+<div id="2096d9c8" class="cell" data-execution_count="20">
 <div class="sourceCode cell-code" id="cb23"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a><span class="co"># We filter by babies with sex "F" and sort by "Year"</span></span>
 <span id="cb23-2"><a href="#cb23-2" aria-hidden="true" tabindex="-1"></a>f_babynames <span class="op">=</span> babynames[babynames[<span class="st">"Sex"</span>] <span class="op">==</span> <span class="st">"F"</span>]</span>
 <span id="cb23-3"><a href="#cb23-3" aria-hidden="true" tabindex="-1"></a>f_babynames <span class="op">=</span> f_babynames.sort_values([<span class="st">"Year"</span>])</span>
@@ -1525,7 +1525,7 @@ <h3 data-number="4.2.4" class="anchored" data-anchor-id="revisiting-the-.agg-fun
 </div>
 </div>
 <p>By creating a function to calculate RTP and applying it to our <code>DataFrame</code> by using <code>.groupby()</code>, we can easily compute the RTP for all names at once!</p>
-<div id="f2382f3e" class="cell" data-execution_count="21">
+<div id="905ca35b" class="cell" data-execution_count="21">
 <div class="sourceCode cell-code" id="cb25"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb25-1"><a href="#cb25-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> ratio_to_peak(series):</span>
 <span id="cb25-2"><a href="#cb25-2" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> series.iloc[<span class="op">-</span><span class="dv">1</span>] <span class="op">/</span> <span class="bu">max</span>(series)</span>
 <span id="cb25-3"><a href="#cb25-3" aria-hidden="true" tabindex="-1"></a></span>
@@ -1593,7 +1593,7 @@ <h3 data-number="4.2.5" class="anchored" data-anchor-id="nuisance-columns"><span
 <section id="renaming-columns-after-grouping" class="level3" data-number="4.2.6">
 <h3 data-number="4.2.6" class="anchored" data-anchor-id="renaming-columns-after-grouping"><span class="header-section-number">4.2.6</span> Renaming Columns After Grouping</h3>
 <p>By default, <code>.groupby</code> will not rename any aggregated columns. As we can see in the table above, the aggregated column is still named <code>Count</code> even though it now represents the RTP. For better readability, we can rename <code>Count</code> to <code>Count RTP</code></p>
-<div id="1f815d26" class="cell" data-execution_count="22">
+<div id="0509f671" class="cell" data-execution_count="22">
 <div class="sourceCode cell-code" id="cb26"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb26-1"><a href="#cb26-1" aria-hidden="true" tabindex="-1"></a>rtp_table <span class="op">=</span> rtp_table.rename(columns <span class="op">=</span> {<span class="st">"Count"</span>: <span class="st">"Count RTP"</span>})</span>
 <span id="cb26-2"><a href="#cb26-2" aria-hidden="true" tabindex="-1"></a>rtp_table</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="22">
@@ -1680,7 +1680,7 @@ <h3 data-number="4.2.6" class="anchored" data-anchor-id="renaming-columns-after-
 <section id="some-data-science-payoff" class="level3" data-number="4.2.7">
 <h3 data-number="4.2.7" class="anchored" data-anchor-id="some-data-science-payoff"><span class="header-section-number">4.2.7</span> Some Data Science Payoff</h3>
 <p>By sorting <code>rtp_table</code>, we can see the names whose popularity has decreased the most.</p>
-<div id="da4642b2" class="cell" data-execution_count="23">
+<div id="22f74201" class="cell" data-execution_count="23">
 <div class="sourceCode cell-code" id="cb27"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb27-1"><a href="#cb27-1" aria-hidden="true" tabindex="-1"></a>rtp_table <span class="op">=</span> rtp_table.rename(columns <span class="op">=</span> {<span class="st">"Count"</span>: <span class="st">"Count RTP"</span>})</span>
 <span id="cb27-2"><a href="#cb27-2" aria-hidden="true" tabindex="-1"></a>rtp_table.sort_values(<span class="st">"Count RTP"</span>).head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="23">
@@ -1733,16 +1733,16 @@ <h3 data-number="4.2.7" class="anchored" data-anchor-id="some-data-science-payof
 </div>
 </div>
 <p>To visualize the above <code>DataFrame</code>, let’s look at the line plot below:</p>
-<div id="90e55205" class="cell" data-execution_count="24">
+<div id="1f94ace3" class="cell" data-execution_count="24">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb28"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb28-1"><a href="#cb28-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> plotly.express <span class="im">as</span> px</span>
 <span id="cb28-2"><a href="#cb28-2" aria-hidden="true" tabindex="-1"></a>px.line(f_babynames[f_babynames[<span class="st">"Name"</span>] <span class="op">==</span> <span class="st">"Debra"</span>], x <span class="op">=</span> <span class="st">"Year"</span>, y <span class="op">=</span> <span class="st">"Count"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 <div class="cell-output cell-output-display">
-<div>                            <div id="007e89dc-9b41-4a6c-9091-da8d1e1fd26a" class="plotly-graph-div" style="height:525px; width:100%;"></div>            <script type="text/javascript">                require(["plotly"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("007e89dc-9b41-4a6c-9091-da8d1e1fd26a")) {                    Plotly.newPlot(                        "007e89dc-9b41-4a6c-9091-da8d1e1fd26a",                        [{"hovertemplate":"Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"","line":{"color":"#636efa","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"","orientation":"v","showlegend":false,"x":[1940,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2012,2013,2016],"xaxis":"x","y":[7,7,8,15,19,20,56,92,199,601,1510,2351,3295,3784,3969,3755,3318,2660,2290,2014,1647,1592,1430,1287,1154,958,818,748,647,547,463,318,242,236,159,151,151,164,130,141,97,114,97,95,93,64,78,69,71,51,62,41,34,28,28,12,14,16,10,13,14,10,7,12,13,12,13,6,7,5,8,5],"yaxis":"y","type":"scatter"}],                        {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"},"margin":{"b":0,"l":0,"r":0,"t":30}}},"xaxis":{"anchor":"y","domain":[0.0,1.0],"title":{"text":"Year"}},"yaxis":{"anchor":"x","domain":[0.0,1.0],"title":{"text":"Count"}},"legend":{"tracegroupgap":0}},                        {"responsive": true}                    ).then(function(){
+<div>                            <div id="949e9fff-4e8d-4143-ab28-5f941b2e4f39" class="plotly-graph-div" style="height:525px; width:100%;"></div>            <script type="text/javascript">                require(["plotly"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("949e9fff-4e8d-4143-ab28-5f941b2e4f39")) {                    Plotly.newPlot(                        "949e9fff-4e8d-4143-ab28-5f941b2e4f39",                        [{"hovertemplate":"Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"","line":{"color":"#636efa","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"","orientation":"v","showlegend":false,"x":[1940,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2012,2013,2016],"xaxis":"x","y":[7,7,8,15,19,20,56,92,199,601,1510,2351,3295,3784,3969,3755,3318,2660,2290,2014,1647,1592,1430,1287,1154,958,818,748,647,547,463,318,242,236,159,151,151,164,130,141,97,114,97,95,93,64,78,69,71,51,62,41,34,28,28,12,14,16,10,13,14,10,7,12,13,12,13,6,7,5,8,5],"yaxis":"y","type":"scatter"}],                        {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"},"margin":{"b":0,"l":0,"r":0,"t":30}}},"xaxis":{"anchor":"y","domain":[0.0,1.0],"title":{"text":"Year"}},"yaxis":{"anchor":"x","domain":[0.0,1.0],"title":{"text":"Count"}},"legend":{"tracegroupgap":0}},                        {"responsive": true}                    ).then(function(){
                             
-var gd = document.getElementById('007e89dc-9b41-4a6c-9091-da8d1e1fd26a');
+var gd = document.getElementById('949e9fff-4e8d-4143-ab28-5f941b2e4f39');
 var x = new MutationObserver(function (mutations, observer) {{
         var display = window.getComputedStyle(gd).display;
         if (!display || display === 'none') {{
@@ -1768,7 +1768,7 @@ <h3 data-number="4.2.7" class="anchored" data-anchor-id="some-data-science-payof
 </div>
 </div>
 <p>We can get the list of the top 10 names and then plot popularity with the following code:</p>
-<div id="e7249c30" class="cell" data-execution_count="25">
+<div id="e37b9c38" class="cell" data-execution_count="25">
 <div class="sourceCode cell-code" id="cb29"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb29-1"><a href="#cb29-1" aria-hidden="true" tabindex="-1"></a>top10 <span class="op">=</span> rtp_table.sort_values(<span class="st">"Count RTP"</span>).head(<span class="dv">10</span>).index</span>
 <span id="cb29-2"><a href="#cb29-2" aria-hidden="true" tabindex="-1"></a>px.line(</span>
 <span id="cb29-3"><a href="#cb29-3" aria-hidden="true" tabindex="-1"></a>    f_babynames[f_babynames[<span class="st">"Name"</span>].isin(top10)], </span>
@@ -1783,9 +1783,9 @@ <h3 data-number="4.2.7" class="anchored" data-anchor-id="some-data-science-payof
 </code></pre>
 </div>
 <div class="cell-output cell-output-display">
-<div>                            <div id="14d6564d-a039-4e96-8edf-eb5941a62473" class="plotly-graph-div" style="height:525px; width:100%;"></div>            <script type="text/javascript">                require(["plotly"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("14d6564d-a039-4e96-8edf-eb5941a62473")) {                    Plotly.newPlot(                        "14d6564d-a039-4e96-8edf-eb5941a62473",                        [{"hovertemplate":"Name=Carol<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Carol","line":{"color":"#636efa","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Carol","orientation":"v","showlegend":true,"x":[1910,1911,1912,1913,1914,1915,1916,1917,1918,1919,1920,1921,1922,1923,1924,1925,1926,1927,1928,1929,1930,1931,1932,1933,1934,1935,1936,1937,1938,1939,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022],"xaxis":"x","y":[8,13,17,16,26,38,59,47,55,48,64,67,74,94,138,153,151,148,193,279,270,297,367,453,559,669,873,1015,1050,1109,1079,1339,1672,1937,2089,2138,2152,2201,1954,1779,1737,1734,1727,1597,1684,1651,1704,1703,1545,1480,1359,1283,1191,993,1034,815,622,577,543,468,366,267,223,187,173,146,145,145,121,132,123,128,106,114,111,101,120,107,108,134,150,136,129,89,92,75,87,64,61,46,64,33,43,47,52,76,62,38,44,26,17,47,31,36,24,13,25,18,29,20,17,8,7],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Susan<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Susan","line":{"color":"#EF553B","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Susan","orientation":"v","showlegend":true,"x":[1911,1912,1913,1914,1915,1916,1917,1918,1919,1920,1921,1922,1923,1924,1925,1926,1927,1928,1929,1930,1931,1932,1933,1934,1935,1936,1937,1938,1939,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022],"xaxis":"x","y":[6,8,8,10,16,17,15,20,22,21,19,15,22,26,32,29,43,25,37,63,47,63,74,101,118,138,183,271,433,630,795,1058,1380,1596,1991,2689,2831,3338,3180,3260,3346,3424,3753,3934,3900,3771,3631,3504,3123,3145,3135,2952,2839,2535,2008,1825,1644,1367,1232,1070,861,651,530,552,496,456,437,424,409,420,361,391,352,338,273,280,272,286,267,272,260,196,202,172,152,152,114,116,103,100,104,85,76,70,71,74,53,56,41,39,43,28,44,26,45,22,26,22,19,17,8,13],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Tina<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Tina","line":{"color":"#00cc96","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Tina","orientation":"v","showlegend":true,"x":[1915,1916,1917,1918,1920,1921,1922,1924,1925,1927,1928,1929,1930,1937,1938,1939,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022],"xaxis":"x","y":[5,6,5,5,5,7,5,9,5,8,8,5,10,10,7,8,12,9,28,45,43,53,64,80,80,88,92,128,168,163,177,366,569,569,700,753,889,1045,1228,1212,1129,1202,1282,1342,1402,1302,1248,1091,941,634,642,546,450,370,414,363,335,371,310,268,271,310,238,252,252,208,180,196,163,171,147,121,111,91,80,83,90,80,67,64,63,69,36,37,47,39,39,27,39,28,46,38,33,36,26,21,15,13,6],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Cheryl<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Cheryl","line":{"color":"#ab63fa","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Cheryl","orientation":"v","showlegend":true,"x":[1930,1934,1935,1936,1937,1938,1939,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2011,2012,2013,2014,2015,2016,2017,2018,2019,2021,2022],"xaxis":"x","y":[6,8,12,10,16,76,49,42,48,87,377,759,801,1063,1093,1021,916,903,993,955,1058,1465,1639,1715,1833,1832,1639,1624,1565,1420,1295,1207,1051,950,899,751,635,550,428,371,293,271,236,199,178,303,299,272,204,229,164,135,129,130,98,106,88,90,65,55,39,47,38,30,30,19,22,24,14,11,16,17,16,13,21,14,11,15,12,10,12,15,8,10,9,6,7],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Michele<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Michele","line":{"color":"#FFA15A","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Michele","orientation":"v","showlegend":true,"x":[1936,1937,1938,1939,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2022],"xaxis":"x","y":[7,8,5,8,18,34,113,132,166,171,172,253,213,335,295,306,401,421,500,498,464,454,470,506,576,763,766,775,768,796,1037,1033,1111,1016,973,700,702,571,494,484,437,390,381,305,281,223,230,227,200,162,206,146,143,164,137,142,125,104,82,65,52,47,45,38,28,37,27,22,28,16,21,15,15,11,14,7,5,10,6,11,5],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Debbie<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Debbie","line":{"color":"#19d3f3","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Debbie","orientation":"v","showlegend":true,"x":[1936,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2014,2015,2016,2017,2021],"xaxis":"x","y":[5,9,9,10,16,11,32,74,91,115,120,191,233,300,427,697,902,1313,1656,1776,1675,1547,1458,1215,1004,648,504,415,338,279,243,192,145,108,108,92,72,64,87,91,81,65,79,67,74,64,56,71,78,93,85,78,50,61,70,53,46,39,22,28,19,11,16,14,13,8,21,10,11,10,12,8,9,6,5,5,5],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Terri<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Terri","line":{"color":"#FF6692","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Terri","orientation":"v","showlegend":true,"x":[1938,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2004,2005,2006,2016,2021,2022],"xaxis":"x","y":[6,8,12,26,32,38,65,99,130,132,168,154,236,306,379,542,604,685,839,875,1052,964,937,902,826,737,486,448,398,323,312,263,191,153,120,106,81,59,84,57,44,49,47,53,44,36,37,35,32,34,20,26,29,15,19,22,11,15,12,13,11,14,9,7,6,7,5,5,5,5],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Shannon<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Shannon","line":{"color":"#B6E880","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Shannon","orientation":"v","showlegend":true,"x":[1938,1939,1940,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022],"xaxis":"x","y":[6,9,6,10,14,19,25,16,34,23,34,43,51,59,73,83,111,106,126,129,161,145,206,216,305,409,441,516,587,932,1419,1650,1436,1198,1090,1127,982,1218,1136,1052,991,923,968,969,971,945,872,803,699,642,597,527,493,594,615,531,438,428,366,303,217,199,200,165,133,133,110,90,88,63,42,43,37,41,32,19,31,22,17,14,21,8,8,7],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Debra<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Debra","line":{"color":"#FF97FF","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Debra","orientation":"v","showlegend":true,"x":[1940,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2012,2013,2016],"xaxis":"x","y":[7,7,8,15,19,20,56,92,199,601,1510,2351,3295,3784,3969,3755,3318,2660,2290,2014,1647,1592,1430,1287,1154,958,818,748,647,547,463,318,242,236,159,151,151,164,130,141,97,114,97,95,93,64,78,69,71,51,62,41,34,28,28,12,14,16,10,13,14,10,7,12,13,12,13,6,7,5,8,5],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Tammy<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Tammy","line":{"color":"#FECB52","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Tammy","orientation":"v","showlegend":true,"x":[1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2019,2022],"xaxis":"x","y":[7,5,10,9,12,13,10,9,9,13,28,14,26,37,368,746,990,1038,1136,1223,1539,1273,1219,1168,1143,1099,977,1013,859,704,544,421,392,328,275,229,227,181,168,157,96,120,102,85,120,88,85,94,77,82,74,61,49,45,45,54,50,47,49,45,44,36,30,24,29,14,16,12,11,9,5,13,9,15,11,7,5],"yaxis":"y","type":"scatter"}],                        {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"},"margin":{"b":0,"l":0,"r":0,"t":30}}},"xaxis":{"anchor":"y","domain":[0.0,1.0],"title":{"text":"Year"}},"yaxis":{"anchor":"x","domain":[0.0,1.0],"title":{"text":"Count"}},"legend":{"title":{"text":"Name"},"tracegroupgap":0}},                        {"responsive": true}                    ).then(function(){
+<div>                            <div id="61f5fd43-78b3-4c7c-a94c-9c66c9681441" class="plotly-graph-div" style="height:525px; width:100%;"></div>            <script type="text/javascript">                require(["plotly"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("61f5fd43-78b3-4c7c-a94c-9c66c9681441")) {                    Plotly.newPlot(                        "61f5fd43-78b3-4c7c-a94c-9c66c9681441",                        [{"hovertemplate":"Name=Carol<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Carol","line":{"color":"#636efa","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Carol","orientation":"v","showlegend":true,"x":[1910,1911,1912,1913,1914,1915,1916,1917,1918,1919,1920,1921,1922,1923,1924,1925,1926,1927,1928,1929,1930,1931,1932,1933,1934,1935,1936,1937,1938,1939,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022],"xaxis":"x","y":[8,13,17,16,26,38,59,47,55,48,64,67,74,94,138,153,151,148,193,279,270,297,367,453,559,669,873,1015,1050,1109,1079,1339,1672,1937,2089,2138,2152,2201,1954,1779,1737,1734,1727,1597,1684,1651,1704,1703,1545,1480,1359,1283,1191,993,1034,815,622,577,543,468,366,267,223,187,173,146,145,145,121,132,123,128,106,114,111,101,120,107,108,134,150,136,129,89,92,75,87,64,61,46,64,33,43,47,52,76,62,38,44,26,17,47,31,36,24,13,25,18,29,20,17,8,7],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Susan<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Susan","line":{"color":"#EF553B","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Susan","orientation":"v","showlegend":true,"x":[1911,1912,1913,1914,1915,1916,1917,1918,1919,1920,1921,1922,1923,1924,1925,1926,1927,1928,1929,1930,1931,1932,1933,1934,1935,1936,1937,1938,1939,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022],"xaxis":"x","y":[6,8,8,10,16,17,15,20,22,21,19,15,22,26,32,29,43,25,37,63,47,63,74,101,118,138,183,271,433,630,795,1058,1380,1596,1991,2689,2831,3338,3180,3260,3346,3424,3753,3934,3900,3771,3631,3504,3123,3145,3135,2952,2839,2535,2008,1825,1644,1367,1232,1070,861,651,530,552,496,456,437,424,409,420,361,391,352,338,273,280,272,286,267,272,260,196,202,172,152,152,114,116,103,100,104,85,76,70,71,74,53,56,41,39,43,28,44,26,45,22,26,22,19,17,8,13],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Tina<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Tina","line":{"color":"#00cc96","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Tina","orientation":"v","showlegend":true,"x":[1915,1916,1917,1918,1920,1921,1922,1924,1925,1927,1928,1929,1930,1937,1938,1939,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022],"xaxis":"x","y":[5,6,5,5,5,7,5,9,5,8,8,5,10,10,7,8,12,9,28,45,43,53,64,80,80,88,92,128,168,163,177,366,569,569,700,753,889,1045,1228,1212,1129,1202,1282,1342,1402,1302,1248,1091,941,634,642,546,450,370,414,363,335,371,310,268,271,310,238,252,252,208,180,196,163,171,147,121,111,91,80,83,90,80,67,64,63,69,36,37,47,39,39,27,39,28,46,38,33,36,26,21,15,13,6],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Cheryl<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Cheryl","line":{"color":"#ab63fa","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Cheryl","orientation":"v","showlegend":true,"x":[1930,1934,1935,1936,1937,1938,1939,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2011,2012,2013,2014,2015,2016,2017,2018,2019,2021,2022],"xaxis":"x","y":[6,8,12,10,16,76,49,42,48,87,377,759,801,1063,1093,1021,916,903,993,955,1058,1465,1639,1715,1833,1832,1639,1624,1565,1420,1295,1207,1051,950,899,751,635,550,428,371,293,271,236,199,178,303,299,272,204,229,164,135,129,130,98,106,88,90,65,55,39,47,38,30,30,19,22,24,14,11,16,17,16,13,21,14,11,15,12,10,12,15,8,10,9,6,7],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Michele<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Michele","line":{"color":"#FFA15A","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Michele","orientation":"v","showlegend":true,"x":[1936,1937,1938,1939,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2022],"xaxis":"x","y":[7,8,5,8,18,34,113,132,166,171,172,253,213,335,295,306,401,421,500,498,464,454,470,506,576,763,766,775,768,796,1037,1033,1111,1016,973,700,702,571,494,484,437,390,381,305,281,223,230,227,200,162,206,146,143,164,137,142,125,104,82,65,52,47,45,38,28,37,27,22,28,16,21,15,15,11,14,7,5,10,6,11,5],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Debbie<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Debbie","line":{"color":"#19d3f3","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Debbie","orientation":"v","showlegend":true,"x":[1936,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2014,2015,2016,2017,2021],"xaxis":"x","y":[5,9,9,10,16,11,32,74,91,115,120,191,233,300,427,697,902,1313,1656,1776,1675,1547,1458,1215,1004,648,504,415,338,279,243,192,145,108,108,92,72,64,87,91,81,65,79,67,74,64,56,71,78,93,85,78,50,61,70,53,46,39,22,28,19,11,16,14,13,8,21,10,11,10,12,8,9,6,5,5,5],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Terri<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Terri","line":{"color":"#FF6692","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Terri","orientation":"v","showlegend":true,"x":[1938,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2004,2005,2006,2016,2021,2022],"xaxis":"x","y":[6,8,12,26,32,38,65,99,130,132,168,154,236,306,379,542,604,685,839,875,1052,964,937,902,826,737,486,448,398,323,312,263,191,153,120,106,81,59,84,57,44,49,47,53,44,36,37,35,32,34,20,26,29,15,19,22,11,15,12,13,11,14,9,7,6,7,5,5,5,5],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Shannon<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Shannon","line":{"color":"#B6E880","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Shannon","orientation":"v","showlegend":true,"x":[1938,1939,1940,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022],"xaxis":"x","y":[6,9,6,10,14,19,25,16,34,23,34,43,51,59,73,83,111,106,126,129,161,145,206,216,305,409,441,516,587,932,1419,1650,1436,1198,1090,1127,982,1218,1136,1052,991,923,968,969,971,945,872,803,699,642,597,527,493,594,615,531,438,428,366,303,217,199,200,165,133,133,110,90,88,63,42,43,37,41,32,19,31,22,17,14,21,8,8,7],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Debra<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Debra","line":{"color":"#FF97FF","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Debra","orientation":"v","showlegend":true,"x":[1940,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2012,2013,2016],"xaxis":"x","y":[7,7,8,15,19,20,56,92,199,601,1510,2351,3295,3784,3969,3755,3318,2660,2290,2014,1647,1592,1430,1287,1154,958,818,748,647,547,463,318,242,236,159,151,151,164,130,141,97,114,97,95,93,64,78,69,71,51,62,41,34,28,28,12,14,16,10,13,14,10,7,12,13,12,13,6,7,5,8,5],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Tammy<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Tammy","line":{"color":"#FECB52","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Tammy","orientation":"v","showlegend":true,"x":[1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2019,2022],"xaxis":"x","y":[7,5,10,9,12,13,10,9,9,13,28,14,26,37,368,746,990,1038,1136,1223,1539,1273,1219,1168,1143,1099,977,1013,859,704,544,421,392,328,275,229,227,181,168,157,96,120,102,85,120,88,85,94,77,82,74,61,49,45,45,54,50,47,49,45,44,36,30,24,29,14,16,12,11,9,5,13,9,15,11,7,5],"yaxis":"y","type":"scatter"}],                        {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"},"margin":{"b":0,"l":0,"r":0,"t":30}}},"xaxis":{"anchor":"y","domain":[0.0,1.0],"title":{"text":"Year"}},"yaxis":{"anchor":"x","domain":[0.0,1.0],"title":{"text":"Count"}},"legend":{"title":{"text":"Name"},"tracegroupgap":0}},                        {"responsive": true}                    ).then(function(){
                             
-var gd = document.getElementById('14d6564d-a039-4e96-8edf-eb5941a62473');
+var gd = document.getElementById('61f5fd43-78b3-4c7c-a94c-9c66c9681441');
 var x = new MutationObserver(function (mutations, observer) {{
         var display = window.getComputedStyle(gd).display;
         if (!display || display === 'none') {{
@@ -1811,7 +1811,7 @@ <h3 data-number="4.2.7" class="anchored" data-anchor-id="some-data-science-payof
 </div>
 </div>
 <p>As a quick exercise, consider what code would compute the total number of babies with each name.</p>
-<div id="b1d903a4" class="cell" data-execution_count="26">
+<div id="f52c3f91" class="cell" data-execution_count="26">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb31"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb31-1"><a href="#cb31-1" aria-hidden="true" tabindex="-1"></a>babynames.groupby(<span class="st">"Name"</span>)[[<span class="st">"Count"</span>]].agg(<span class="st">"sum"</span>).head()</span>
@@ -1865,7 +1865,7 @@ <h3 data-number="4.2.7" class="anchored" data-anchor-id="some-data-science-payof
 <section id="groupby-continued" class="level2" data-number="4.3">
 <h2 data-number="4.3" class="anchored" data-anchor-id="groupby-continued"><span class="header-section-number">4.3</span> <code>.groupby()</code>, Continued</h2>
 <p>We’ll work with the <code>elections</code> <code>DataFrame</code> again.</p>
-<div id="a5538eda" class="cell" data-execution_count="27">
+<div id="8f796a63" class="cell" data-execution_count="27">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb32"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb32-1"><a href="#cb32-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span>
@@ -1945,7 +1945,7 @@ <h2 data-number="4.3" class="anchored" data-anchor-id="groupby-continued"><span
 <section id="raw-groupby-objects" class="level3" data-number="4.3.1">
 <h3 data-number="4.3.1" class="anchored" data-anchor-id="raw-groupby-objects"><span class="header-section-number">4.3.1</span> Raw <code>GroupBy</code> Objects</h3>
 <p>The result of <code>groupby</code> applied to a <code>DataFrame</code> is a <code>DataFrameGroupBy</code> object, <strong>not</strong> a <code>DataFrame</code>.</p>
-<div id="6f7aee51" class="cell" data-execution_count="28">
+<div id="ca55e760" class="cell" data-execution_count="28">
 <div class="sourceCode cell-code" id="cb33"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb33-1"><a href="#cb33-1" aria-hidden="true" tabindex="-1"></a>grouped_by_year <span class="op">=</span> elections.groupby(<span class="st">"Year"</span>)</span>
 <span id="cb33-2"><a href="#cb33-2" aria-hidden="true" tabindex="-1"></a><span class="bu">type</span>(grouped_by_year)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="28">
@@ -1953,14 +1953,14 @@ <h3 data-number="4.3.1" class="anchored" data-anchor-id="raw-groupby-objects"><s
 </div>
 </div>
 <p>There are several ways to look into <code>DataFrameGroupBy</code> objects:</p>
-<div id="8dcf19fc" class="cell" data-execution_count="29">
+<div id="a4ec57f4" class="cell" data-execution_count="29">
 <div class="sourceCode cell-code" id="cb35"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb35-1"><a href="#cb35-1" aria-hidden="true" tabindex="-1"></a>grouped_by_party <span class="op">=</span> elections.groupby(<span class="st">"Party"</span>)</span>
 <span id="cb35-2"><a href="#cb35-2" aria-hidden="true" tabindex="-1"></a>grouped_by_party.groups</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="29">
 <pre><code>{'American': [22, 126], 'American Independent': [115, 119, 124], 'Anti-Masonic': [6], 'Anti-Monopoly': [38], 'Citizens': [127], 'Communist': [89], 'Constitution': [160, 164, 172], 'Constitutional Union': [24], 'Democratic': [2, 4, 8, 10, 13, 14, 17, 20, 28, 29, 34, 37, 39, 45, 47, 52, 55, 57, 64, 70, 74, 77, 81, 83, 86, 91, 94, 97, 100, 105, 108, 111, 114, 116, 118, 123, 129, 134, 137, 140, 144, 151, 158, 162, 168, 176, 178], 'Democratic-Republican': [0, 1], 'Dixiecrat': [103], 'Farmer–Labor': [78], 'Free Soil': [15, 18], 'Green': [149, 155, 156, 165, 170, 177, 181], 'Greenback': [35], 'Independent': [121, 130, 143, 161, 167, 174], 'Liberal Republican': [31], 'Libertarian': [125, 128, 132, 138, 139, 146, 153, 159, 163, 169, 175, 180], 'National Democratic': [50], 'National Republican': [3, 5], 'National Union': [27], 'Natural Law': [148], 'New Alliance': [136], 'Northern Democratic': [26], 'Populist': [48, 61, 141], 'Progressive': [68, 82, 101, 107], 'Prohibition': [41, 44, 49, 51, 54, 59, 63, 67, 73, 75, 99], 'Reform': [150, 154], 'Republican': [21, 23, 30, 32, 33, 36, 40, 43, 46, 53, 56, 60, 65, 69, 72, 79, 80, 84, 87, 90, 96, 98, 104, 106, 109, 112, 113, 117, 120, 122, 131, 133, 135, 142, 145, 152, 157, 166, 171, 173, 179], 'Socialist': [58, 62, 66, 71, 76, 85, 88, 92, 95, 102], 'Southern Democratic': [25], 'States' Rights': [110], 'Taxpayers': [147], 'Union': [93], 'Union Labor': [42], 'Whig': [7, 9, 11, 12, 16, 19]}</code></pre>
 </div>
 </div>
-<div id="6bb62b3a" class="cell" data-execution_count="30">
+<div id="7704d2c3" class="cell" data-execution_count="30">
 <div class="sourceCode cell-code" id="cb37"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb37-1"><a href="#cb37-1" aria-hidden="true" tabindex="-1"></a>grouped_by_party.get_group(<span class="st">"Socialist"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="30">
 <div>
@@ -2088,7 +2088,7 @@ <h3 data-number="4.3.2" class="anchored" data-anchor-id="other-groupby-methods">
 <li><a href="https://pandas.pydata.org/docs/reference/api/pandas.core.groupby.DataFrameGroupBy.count.html#pandas.core.groupby.DataFrameGroupBy.count"><code>.count</code></a>: creates a new <strong><code>DataFrame</code></strong> with the number of entries, excluding missing values.</li>
 </ul>
 <p>Let’s illustrate some examples by creating a <code>DataFrame</code> called <code>df</code>.</p>
-<div id="c7b2ea48" class="cell" data-execution_count="31">
+<div id="24c1a0dd" class="cell" data-execution_count="31">
 <div class="sourceCode cell-code" id="cb38"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb38-1"><a href="#cb38-1" aria-hidden="true" tabindex="-1"></a>df <span class="op">=</span> pd.DataFrame({<span class="st">'letter'</span>:[<span class="st">'A'</span>,<span class="st">'A'</span>,<span class="st">'B'</span>,<span class="st">'C'</span>,<span class="st">'C'</span>,<span class="st">'C'</span>], </span>
 <span id="cb38-2"><a href="#cb38-2" aria-hidden="true" tabindex="-1"></a>                   <span class="st">'num'</span>:[<span class="dv">1</span>,<span class="dv">2</span>,<span class="dv">3</span>,<span class="dv">4</span>,np.nan,<span class="dv">4</span>], </span>
 <span id="cb38-3"><a href="#cb38-3" aria-hidden="true" tabindex="-1"></a>                   <span class="st">'state'</span>:[np.nan, <span class="st">'tx'</span>, <span class="st">'fl'</span>, <span class="st">'hi'</span>, np.nan, <span class="st">'ak'</span>]})</span>
@@ -2150,7 +2150,7 @@ <h3 data-number="4.3.2" class="anchored" data-anchor-id="other-groupby-methods">
 </div>
 </div>
 <p>Note the slight difference between <code>.size()</code> and <code>.count()</code>: while <code>.size()</code> returns a <code>Series</code> and counts the number of entries including the missing values, <code>.count()</code> returns a <code>DataFrame</code> and counts the number of entries in each column <em>excluding missing values</em>.</p>
-<div id="2bbb598d" class="cell" data-execution_count="32">
+<div id="7692ef2a" class="cell" data-execution_count="32">
 <div class="sourceCode cell-code" id="cb39"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb39-1"><a href="#cb39-1" aria-hidden="true" tabindex="-1"></a>df.groupby(<span class="st">"letter"</span>).size()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="32">
 <pre><code>letter
@@ -2160,7 +2160,7 @@ <h3 data-number="4.3.2" class="anchored" data-anchor-id="other-groupby-methods">
 dtype: int64</code></pre>
 </div>
 </div>
-<div id="86ac1c0c" class="cell" data-execution_count="33">
+<div id="c11e3cac" class="cell" data-execution_count="33">
 <div class="sourceCode cell-code" id="cb41"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb41-1"><a href="#cb41-1" aria-hidden="true" tabindex="-1"></a>df.groupby(<span class="st">"letter"</span>).count()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="33">
 <div>
@@ -2202,7 +2202,7 @@ <h3 data-number="4.3.2" class="anchored" data-anchor-id="other-groupby-methods">
 </div>
 </div>
 <p>You might recall that the <code>value_counts()</code> function in the previous note does something similar. It turns out <code>value_counts()</code> and <code>groupby.size()</code> are the same, except <code>value_counts()</code> sorts the resulting <code>Series</code> in descending order automatically.</p>
-<div id="6c8d3204" class="cell" data-execution_count="34">
+<div id="078c8f90" class="cell" data-execution_count="34">
 <div class="sourceCode cell-code" id="cb42"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb42-1"><a href="#cb42-1" aria-hidden="true" tabindex="-1"></a>df[<span class="st">"letter"</span>].value_counts()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="34">
 <pre><code>letter
@@ -2242,7 +2242,7 @@ <h3 data-number="4.3.3" class="anchored" data-anchor-id="filtering-by-group"><sp
 <li>Return all <code>DataFrame</code> rows that correspond to these years</li>
 </ul>
 <p>For each year, we need to find the maximum <code>%</code> among <em>all</em> rows for that year. If this maximum <code>%</code> is lower than 45%, we will tell <code>pandas</code> to keep all rows corresponding to that year.</p>
-<div id="3a1fb0d9" class="cell" data-execution_count="35">
+<div id="7a7a5b9f" class="cell" data-execution_count="35">
 <div class="sourceCode cell-code" id="cb44"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb44-1"><a href="#cb44-1" aria-hidden="true" tabindex="-1"></a>elections.groupby(<span class="st">"Year"</span>).<span class="bu">filter</span>(<span class="kw">lambda</span> sf: sf[<span class="st">"%"</span>].<span class="bu">max</span>() <span class="op">&lt;</span> <span class="dv">45</span>).head(<span class="dv">9</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="35">
 <div>
@@ -2357,10 +2357,10 @@ <h3 data-number="4.3.4" class="anchored" data-anchor-id="aggregation-with-lambda
 <p>What if we wish to aggregate our <code>DataFrame</code> using a non-standard function – for example, a function of our own design? We can do so by combining <code>.agg</code> with <code>lambda</code> expressions.</p>
 <p>Let’s first consider a puzzle to jog our memory. We will attempt to find the <code>Candidate</code> from each <code>Party</code> with the highest <code>%</code> of votes.</p>
 <p>A naive approach may be to group by the <code>Party</code> column and aggregate by the maximum.</p>
-<div id="8d9821a8" class="cell" data-execution_count="36">
+<div id="27793d46" class="cell" data-execution_count="36">
 <div class="sourceCode cell-code" id="cb45"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb45-1"><a href="#cb45-1" aria-hidden="true" tabindex="-1"></a>elections.groupby(<span class="st">"Party"</span>).agg(<span class="bu">max</span>).head(<span class="dv">10</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stderr">
-<pre><code>/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_48366/4278286395.py:1: FutureWarning:
+<pre><code>/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_51669/4278286395.py:1: FutureWarning:
 
 The provided callable &lt;built-in function max&gt; is currently using DataFrameGroupBy.max. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "max" instead.
 </code></pre>
@@ -2488,7 +2488,7 @@ <h3 data-number="4.3.4" class="anchored" data-anchor-id="aggregation-with-lambda
 <li>Group by <code>Party</code> and select the first row of each sub-<code>DataFrame</code></li>
 </ol>
 <p>While it may seem unintuitive, sorting <code>elections</code> by descending order of <code>%</code> is extremely helpful. If we then group by <code>Party</code>, the first row of each <code>GroupBy</code> object will contain information about the <code>Candidate</code> with the highest voter <code>%</code>.</p>
-<div id="6b4b8309" class="cell" data-execution_count="37">
+<div id="91f3330f" class="cell" data-execution_count="37">
 <div class="sourceCode cell-code" id="cb47"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb47-1"><a href="#cb47-1" aria-hidden="true" tabindex="-1"></a>elections_sorted_by_percent <span class="op">=</span> elections.sort_values(<span class="st">"%"</span>, ascending<span class="op">=</span><span class="va">False</span>)</span>
 <span id="cb47-2"><a href="#cb47-2" aria-hidden="true" tabindex="-1"></a>elections_sorted_by_percent.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="37">
@@ -2559,7 +2559,7 @@ <h3 data-number="4.3.4" class="anchored" data-anchor-id="aggregation-with-lambda
 </div>
 </div>
 </div>
-<div id="82f03918" class="cell" data-execution_count="38">
+<div id="660d15e0" class="cell" data-execution_count="38">
 <div class="sourceCode cell-code" id="cb48"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb48-1"><a href="#cb48-1" aria-hidden="true" tabindex="-1"></a>elections_sorted_by_percent.groupby(<span class="st">"Party"</span>).agg(<span class="kw">lambda</span> x : x.iloc[<span class="dv">0</span>]).head(<span class="dv">10</span>)</span>
 <span id="cb48-2"><a href="#cb48-2" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb48-3"><a href="#cb48-3" aria-hidden="true" tabindex="-1"></a><span class="co"># Equivalent to the below code</span></span>
@@ -2680,7 +2680,7 @@ <h3 data-number="4.3.4" class="anchored" data-anchor-id="aggregation-with-lambda
 <p>More generally, <code>lambda</code> functions are used to design custom aggregation functions that aren’t pre-defined by Python. The input parameter <code>x</code> to the <code>lambda</code> function is a <code>GroupBy</code> object. Therefore, it should make sense why <code>lambda x : x.iloc[0]</code> selects the first row in each groupby object.</p>
 <p>In fact, there’s a few different ways to approach this problem. Each approach has different tradeoffs in terms of readability, performance, memory consumption, complexity, etc. We’ve given a few examples below.</p>
 <p><strong>Note</strong>: Understanding these alternative solutions is not required. They are given to demonstrate the vast number of problem-solving approaches in <code>pandas</code>.</p>
-<div id="168ce541" class="cell" data-execution_count="39">
+<div id="223fdddd" class="cell" data-execution_count="39">
 <div class="sourceCode cell-code" id="cb49"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb49-1"><a href="#cb49-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Using the idxmax function</span></span>
 <span id="cb49-2"><a href="#cb49-2" aria-hidden="true" tabindex="-1"></a>best_per_party <span class="op">=</span> elections.loc[elections.groupby(<span class="st">'Party'</span>)[<span class="st">'%'</span>].idxmax()]</span>
 <span id="cb49-3"><a href="#cb49-3" aria-hidden="true" tabindex="-1"></a>best_per_party.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -2752,7 +2752,7 @@ <h3 data-number="4.3.4" class="anchored" data-anchor-id="aggregation-with-lambda
 </div>
 </div>
 </div>
-<div id="bb0e9fa6" class="cell" data-execution_count="40">
+<div id="1243c41d" class="cell" data-execution_count="40">
 <div class="sourceCode cell-code" id="cb50"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb50-1"><a href="#cb50-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Using the .drop_duplicates function</span></span>
 <span id="cb50-2"><a href="#cb50-2" aria-hidden="true" tabindex="-1"></a>best_per_party2 <span class="op">=</span> elections.sort_values(<span class="st">'%'</span>).drop_duplicates([<span class="st">'Party'</span>], keep<span class="op">=</span><span class="st">'last'</span>)</span>
 <span id="cb50-3"><a href="#cb50-3" aria-hidden="true" tabindex="-1"></a>best_per_party2.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -2830,7 +2830,7 @@ <h3 data-number="4.3.4" class="anchored" data-anchor-id="aggregation-with-lambda
 <h2 data-number="4.4" class="anchored" data-anchor-id="aggregating-data-with-pivot-tables"><span class="header-section-number">4.4</span> Aggregating Data with Pivot Tables</h2>
 <p>We know now that <code>.groupby</code> gives us the ability to group and aggregate data across our <code>DataFrame</code>. The examples above formed groups using just one column in the <code>DataFrame</code>. It’s possible to group by multiple columns at once by passing in a list of column names to <code>.groupby</code>.</p>
 <p>Let’s consider the <code>babynames</code> dataset again. In this problem, we will find the total number of baby names associated with each sex for each year. To do this, we’ll group by <em>both</em> the <code>"Year"</code> and <code>"Sex"</code> columns.</p>
-<div id="bf4b11dc" class="cell" data-execution_count="41">
+<div id="3587d67e" class="cell" data-execution_count="41">
 <div class="sourceCode cell-code" id="cb51"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb51-1"><a href="#cb51-1" aria-hidden="true" tabindex="-1"></a>babynames.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="41">
 <div>
@@ -2900,12 +2900,12 @@ <h2 data-number="4.4" class="anchored" data-anchor-id="aggregating-data-with-piv
 </div>
 </div>
 </div>
-<div id="e370b44e" class="cell" data-execution_count="42">
+<div id="e1470f9e" class="cell" data-execution_count="42">
 <div class="sourceCode cell-code" id="cb52"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb52-1"><a href="#cb52-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Find the total number of baby names associated with each sex for each </span></span>
 <span id="cb52-2"><a href="#cb52-2" aria-hidden="true" tabindex="-1"></a><span class="co"># year in the data</span></span>
 <span id="cb52-3"><a href="#cb52-3" aria-hidden="true" tabindex="-1"></a>babynames.groupby([<span class="st">"Year"</span>, <span class="st">"Sex"</span>])[[<span class="st">"Count"</span>]].agg(<span class="bu">sum</span>).head(<span class="dv">6</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stderr">
-<pre><code>/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_48366/3186035650.py:3: FutureWarning:
+<pre><code>/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_51669/3186035650.py:3: FutureWarning:
 
 The provided callable &lt;built-in function sum&gt; is currently using DataFrameGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "sum" instead.
 </code></pre>
@@ -2967,7 +2967,7 @@ <h2 data-number="4.4" class="anchored" data-anchor-id="aggregating-data-with-piv
 <p>Here’s an illustration of the process:</p>
 <p><img src="images/pivot.png" alt="groupby_demo" width="600"></p>
 <p>The best way to understand pivot tables is to see one in action. Let’s return to our original goal of summing the total number of names associated with each combination of year and sex. We’ll call the <code>pandas</code> <a href="https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.pivot_table.html"><code>.pivot_table</code></a> method to create a new table.</p>
-<div id="b3ef3301" class="cell" data-execution_count="43">
+<div id="a02236a8" class="cell" data-execution_count="43">
 <div class="sourceCode cell-code" id="cb54"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb54-1"><a href="#cb54-1" aria-hidden="true" tabindex="-1"></a><span class="co"># The `pivot_table` method is used to generate a Pandas pivot table</span></span>
 <span id="cb54-2"><a href="#cb54-2" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
 <span id="cb54-3"><a href="#cb54-3" aria-hidden="true" tabindex="-1"></a>babynames.pivot_table(</span>
@@ -3034,7 +3034,7 @@ <h2 data-number="4.4" class="anchored" data-anchor-id="aggregating-data-with-piv
 <li><code>aggfunc = np.sum</code> tells <code>pandas</code> what function to use when aggregating the data specified by <code>values</code>. Here, we are summing the name counts for each pair of <code>"Year"</code> and <code>"Sex"</code></li>
 </ul>
 <p>We can even include multiple values in the index or columns of our pivot tables.</p>
-<div id="a1df9756" class="cell" data-execution_count="44">
+<div id="442efe56" class="cell" data-execution_count="44">
 <div class="sourceCode cell-code" id="cb55"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb55-1"><a href="#cb55-1" aria-hidden="true" tabindex="-1"></a>babynames_pivot <span class="op">=</span> babynames.pivot_table(</span>
 <span id="cb55-2"><a href="#cb55-2" aria-hidden="true" tabindex="-1"></a>    index<span class="op">=</span><span class="st">"Year"</span>,     <span class="co"># the rows (turned into index)</span></span>
 <span id="cb55-3"><a href="#cb55-3" aria-hidden="true" tabindex="-1"></a>    columns<span class="op">=</span><span class="st">"Sex"</span>,    <span class="co"># the column values</span></span>
@@ -3123,7 +3123,7 @@ <h2 data-number="4.4" class="anchored" data-anchor-id="aggregating-data-with-piv
 <h2 data-number="4.5" class="anchored" data-anchor-id="joining-tables"><span class="header-section-number">4.5</span> Joining Tables</h2>
 <p>When working on data science projects, we’re unlikely to have absolutely all the data we want contained in a single <code>DataFrame</code> – a real-world data scientist needs to grapple with data coming from multiple sources. If we have access to multiple datasets with related information, we can join two or more tables into a single <code>DataFrame</code>.</p>
 <p>To put this into practice, we’ll revisit the <code>elections</code> dataset.</p>
-<div id="b122df8b" class="cell" data-execution_count="45">
+<div id="7358d460" class="cell" data-execution_count="45">
 <div class="sourceCode cell-code" id="cb56"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb56-1"><a href="#cb56-1" aria-hidden="true" tabindex="-1"></a>elections.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="45">
 <div>
@@ -3195,7 +3195,7 @@ <h2 data-number="4.5" class="anchored" data-anchor-id="joining-tables"><span cla
 </div>
 <p>Say we want to understand the popularity of the names of each presidential candidate in 2022. To do this, we’ll need the combined data of <code>babynames</code> <em>and</em> <code>elections</code>.</p>
 <p>We’ll start by creating a new column containing the first name of each presidential candidate. This will help us join each name in <code>elections</code> to the corresponding name data in <code>babynames</code>.</p>
-<div id="a52e4886" class="cell" data-execution_count="46">
+<div id="1d085a2b" class="cell" data-execution_count="46">
 <div class="sourceCode cell-code" id="cb57"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb57-1"><a href="#cb57-1" aria-hidden="true" tabindex="-1"></a><span class="co"># This `str` operation splits each candidate's full name at each </span></span>
 <span id="cb57-2"><a href="#cb57-2" aria-hidden="true" tabindex="-1"></a><span class="co"># blank space, then takes just the candidate's first name</span></span>
 <span id="cb57-3"><a href="#cb57-3" aria-hidden="true" tabindex="-1"></a>elections[<span class="st">"First Name"</span>] <span class="op">=</span> elections[<span class="st">"Candidate"</span>].<span class="bu">str</span>.split().<span class="bu">str</span>[<span class="dv">0</span>]</span>
@@ -3274,7 +3274,7 @@ <h2 data-number="4.5" class="anchored" data-anchor-id="joining-tables"><span cla
 </div>
 </div>
 </div>
-<div id="a9ef86b3" class="cell" data-execution_count="47">
+<div id="14b3e372" class="cell" data-execution_count="47">
 <div class="sourceCode cell-code" id="cb58"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb58-1"><a href="#cb58-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Here, we'll only consider `babynames` data from 2022</span></span>
 <span id="cb58-2"><a href="#cb58-2" aria-hidden="true" tabindex="-1"></a>babynames_2022 <span class="op">=</span> babynames[babynames[<span class="st">"Year"</span>]<span class="op">==</span><span class="dv">2022</span>]</span>
 <span id="cb58-3"><a href="#cb58-3" aria-hidden="true" tabindex="-1"></a>babynames_2022.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -3347,7 +3347,7 @@ <h2 data-number="4.5" class="anchored" data-anchor-id="joining-tables"><span cla
 </div>
 </div>
 <p>Now, we’re ready to join the two tables. <a href="https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.merge.html"><code>pd.merge</code></a> is the <code>pandas</code> method used to join <code>DataFrame</code>s together.</p>
-<div id="4270199b" class="cell" data-execution_count="48">
+<div id="8a3b1aab" class="cell" data-execution_count="48">
 <div class="sourceCode cell-code" id="cb59"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb59-1"><a href="#cb59-1" aria-hidden="true" tabindex="-1"></a>merged <span class="op">=</span> pd.merge(left <span class="op">=</span> elections, right <span class="op">=</span> babynames_2022, <span class="op">\</span></span>
 <span id="cb59-2"><a href="#cb59-2" aria-hidden="true" tabindex="-1"></a>                  left_on <span class="op">=</span> <span class="st">"First Name"</span>, right_on <span class="op">=</span> <span class="st">"Name"</span>)</span>
 <span id="cb59-3"><a href="#cb59-3" aria-hidden="true" tabindex="-1"></a>merged.head()</span>
diff --git a/docs/regex/regex.html b/docs/regex/regex.html
index 74ffb1fb..eee8a4ab 100644
--- a/docs/regex/regex.html
+++ b/docs/regex/regex.html
@@ -439,7 +439,7 @@ <h2 data-number="6.2" class="anchored" data-anchor-id="python-string-methods"><s
 <section id="canonicalization" class="level3" data-number="6.2.1">
 <h3 data-number="6.2.1" class="anchored" data-anchor-id="canonicalization"><span class="header-section-number">6.2.1</span> Canonicalization</h3>
 <p>Assume we want to merge the given tables.</p>
-<div id="04047aa4" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="1">
+<div id="6b62fa88" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="1">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span>
@@ -451,7 +451,7 @@ <h3 data-number="6.2.1" class="anchored" data-anchor-id="canonicalization"><span
 <span id="cb1-7"><a href="#cb1-7" aria-hidden="true" tabindex="-1"></a>    county_and_pop <span class="op">=</span> pd.read_csv(f)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 </div>
-<div id="5d3cfe03" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="2">
+<div id="3145e359" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="2">
 <div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a>display(county_and_state), display(county_and_pop)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display">
 <div>
@@ -534,7 +534,7 @@ <h3 data-number="6.2.1" class="anchored" data-anchor-id="canonicalization"><span
 <section id="canonicalization-with-python-string-manipulation" class="level4" data-number="6.2.1.1">
 <h4 data-number="6.2.1.1" class="anchored" data-anchor-id="canonicalization-with-python-string-manipulation"><span class="header-section-number">6.2.1.1</span> Canonicalization with Python String Manipulation</h4>
 <p>The following function uses Python string manipulation to convert a single county name into canonical form. It does so by eliminating whitespace, punctuation, and unnecessary text.</p>
-<div id="5ca29632" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="3">
+<div id="467e3e5c" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="3">
 <div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> canonicalize_county(county_name):</span>
 <span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> (</span>
 <span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a>        county_name</span>
@@ -552,7 +552,7 @@ <h4 data-number="6.2.1.1" class="anchored" data-anchor-id="canonicalization-with
 </div>
 </div>
 <p>We will use the <code>pandas</code> <code>map</code> function to apply the <code>canonicalize_county</code> function to every row in both <code>DataFrame</code>s. In doing so, we’ll create a new column in each called <code>clean_county_python</code> with the canonical form.</p>
-<div id="8e332c04" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="4">
+<div id="630fcc43" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="4">
 <div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a>county_and_pop[<span class="st">'clean_county_python'</span>] <span class="op">=</span> county_and_pop[<span class="st">'County'</span>].<span class="bu">map</span>(canonicalize_county)</span>
 <span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a>county_and_state[<span class="st">'clean_county_python'</span>] <span class="op">=</span> county_and_state[<span class="st">'County'</span>].<span class="bu">map</span>(canonicalize_county)</span>
 <span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a>display(county_and_state), display(county_and_pop)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -648,7 +648,7 @@ <h4 data-number="6.2.1.1" class="anchored" data-anchor-id="canonicalization-with
 <h4 data-number="6.2.1.2" class="anchored" data-anchor-id="canonicalization-with-pandas-series-methods"><span class="header-section-number">6.2.1.2</span> Canonicalization with Pandas Series Methods</h4>
 <p>Alternatively, we can use <code>pandas</code> <code>Series</code> methods to create this standardized column. To do so, we must call the <code>.str</code> attribute of our <code>Series</code> object prior to calling any methods, like <code>.lower</code> and <code>.replace</code>. Notice how these method names match their equivalent built-in Python string functions.</p>
 <p>Chaining multiple <code>Series</code> methods in this manner eliminates the need to use the <code>map</code> function (as this code is vectorized).</p>
-<div id="c2ec0131" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="5">
+<div id="b12eb678" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="5">
 <div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> canonicalize_county_series(county_series):</span>
 <span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> (</span>
 <span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a>        county_series</span>
@@ -766,7 +766,7 @@ <h4 data-number="6.2.1.2" class="anchored" data-anchor-id="canonicalization-with
 <h3 data-number="6.2.2" class="anchored" data-anchor-id="extraction"><span class="header-section-number">6.2.2</span> Extraction</h3>
 <p>Extraction explores the idea of obtaining useful information from text data. This will be particularily important in model building, which we’ll study in a few weeks.</p>
 <p>Say we want to read some data from a <code>.txt</code> file.</p>
-<div id="3b206193" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="6">
+<div id="6547c5a7" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="6">
 <div class="sourceCode cell-code" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> <span class="bu">open</span>(<span class="st">'data/log.txt'</span>, <span class="st">'r'</span>) <span class="im">as</span> f:</span>
 <span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a>    log_lines <span class="op">=</span> f.readlines()</span>
 <span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a></span>
@@ -779,7 +779,7 @@ <h3 data-number="6.2.2" class="anchored" data-anchor-id="extraction"><span class
 </div>
 <p>Suppose we want to extract the day, month, year, hour, minutes, seconds, and time zone. Unfortunately, these items are not in a fixed position from the beginning of the string, so slicing by some fixed offset won’t work.</p>
 <p>Instead, we can use some clever thinking. Notice how the relevant information is contained within a set of brackets, further separated by <code>/</code> and <code>:</code>. We can hone in on this region of text, and split the data on these characters. Python’s built-in <code>.split</code> function makes this easy.</p>
-<div id="09ee4c75" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="7">
+<div id="10459cfa" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="7">
 <div class="sourceCode cell-code" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a>first <span class="op">=</span> log_lines[<span class="dv">0</span>] <span class="co"># Only considering the first row of data</span></span>
 <span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a>pertinent <span class="op">=</span> first.split(<span class="st">"["</span>)[<span class="dv">1</span>].split(<span class="st">']'</span>)[<span class="dv">0</span>]</span>
@@ -809,7 +809,7 @@ <h3 data-number="6.2.2" class="anchored" data-anchor-id="extraction"><span class
 <h2 data-number="6.3" class="anchored" data-anchor-id="regex-basics"><span class="header-section-number">6.3</span> RegEx Basics</h2>
 <p>A <strong>regular expression (“RegEx”)</strong> is a sequence of characters that specifies a search pattern. They are written to extract specific information from text. Regular expressions are essentially part of a smaller programming language embedded in Python, made available through the <code>re</code> module. As such, they have a stand-alone syntax and methods for various capabilities.</p>
 <p>Regular expressions are useful in many applications beyond data science. For example, Social Security Numbers (SSNs) are often validated with regular expressions.</p>
-<div id="8e1fdec2" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="8">
+<div id="d45f75ac" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="8">
 <div class="sourceCode cell-code" id="cb11"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a><span class="co">r"[0-9]{3}-[0-9]{2}-[0-9]{4}"</span> <span class="co"># Regular Expression Syntax</span></span>
 <span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb11-3"><a href="#cb11-3" aria-hidden="true" tabindex="-1"></a><span class="co"># 3 of any digit, then a dash,</span></span>
@@ -1112,7 +1112,7 @@ <h3 data-number="6.5.1" class="anchored" data-anchor-id="greediness"><span class
 <section id="examples-2" class="level3" data-number="6.5.2">
 <h3 data-number="6.5.2" class="anchored" data-anchor-id="examples-2"><span class="header-section-number">6.5.2</span> Examples</h3>
 <p>Let’s revisit our earlier problem of extracting date/time data from the given <code>.txt</code> files. Here is how the data looked.</p>
-<div id="40d83604" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="9">
+<div id="1dae0c3e" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="9">
 <div class="sourceCode cell-code" id="cb13"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a>log_lines[<span class="dv">0</span>]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="9">
 <pre><code>'169.237.46.168 - - [26/Jan/2014:10:47:58 -0800] "GET /stat141/Winter04/ HTTP/1.1" 200 2585 "http://anson.ucdavis.edu/courses/"\n'</code></pre>
@@ -1145,7 +1145,7 @@ <h4 data-number="6.6.1.1" class="anchored" data-anchor-id="canonicalization-with
 <p>The regular expression here removes text surrounded by <code>&lt;&gt;</code> (also known as HTML tags).</p>
 <p>In order, the pattern matches … 1. a single <code>&lt;</code> 2. any character that is not a <code>&gt;</code> : div, td valign…, /td, /div 3. a single <code>&gt;</code></p>
 <p>Any substring in <code>text</code> that fulfills all three conditions will be replaced by <code>''</code>.</p>
-<div id="4ac0ccc4" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="10">
+<div id="d6682e96" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="10">
 <div class="sourceCode cell-code" id="cb15"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> re</span>
 <span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb15-3"><a href="#cb15-3" aria-hidden="true" tabindex="-1"></a>text <span class="op">=</span> <span class="st">"&lt;div&gt;&lt;td valign='top'&gt;Moo&lt;/td&gt;&lt;/div&gt;"</span></span>
@@ -1162,7 +1162,7 @@ <h4 data-number="6.6.1.1" class="anchored" data-anchor-id="canonicalization-with
 <h4 data-number="6.6.1.2" class="anchored" data-anchor-id="canonicalization-with-pandas"><span class="header-section-number">6.6.1.2</span> Canonicalization with <code>pandas</code></h4>
 <p>We can also use regular expressions with <code>pandas</code> <code>Series</code> methods. This gives us the benefit of operating on an entire column of data as opposed to a single value. The code is simple: <br> <code>ser.str.replace(pattern, repl, regex=True</code>).</p>
 <p>Consider the following <code>DataFrame</code> <code>html_data</code> with a single column.</p>
-<div id="d53ea8f2" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="11">
+<div id="e224803e" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="11">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb17"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a>data <span class="op">=</span> {<span class="st">"HTML"</span>: [<span class="st">"&lt;div&gt;&lt;td valign='top'&gt;Moo&lt;/td&gt;&lt;/div&gt;"</span>, <span class="op">\</span></span>
@@ -1171,7 +1171,7 @@ <h4 data-number="6.6.1.2" class="anchored" data-anchor-id="canonicalization-with
 <span id="cb17-4"><a href="#cb17-4" aria-hidden="true" tabindex="-1"></a>html_data <span class="op">=</span> pd.DataFrame(data)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 </div>
-<div id="846abd75" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="12">
+<div id="635e9517" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="12">
 <div class="sourceCode cell-code" id="cb18"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a>html_data</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="12">
 <div>
@@ -1203,7 +1203,7 @@ <h4 data-number="6.6.1.2" class="anchored" data-anchor-id="canonicalization-with
 </div>
 </div>
 </div>
-<div id="6380d236" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="13">
+<div id="5b02b741" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="13">
 <div class="sourceCode cell-code" id="cb19"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a>pattern <span class="op">=</span> <span class="vs">r"&lt;[^&gt;]+&gt;"</span></span>
 <span id="cb19-2"><a href="#cb19-2" aria-hidden="true" tabindex="-1"></a>html_data[<span class="st">'HTML'</span>].<span class="bu">str</span>.replace(pattern, <span class="st">''</span>, regex<span class="op">=</span><span class="va">True</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="13">
@@ -1221,7 +1221,7 @@ <h3 data-number="6.6.2" class="anchored" data-anchor-id="extraction-1"><span cla
 <h4 data-number="6.6.2.1" class="anchored" data-anchor-id="extraction-with-regex"><span class="header-section-number">6.6.2.1</span> Extraction with RegEx</h4>
 <p>Just like with canonicalization, the <code>re</code> module provides capability to extract relevant text from a string: <br> <code>re.findall(pattern, text)</code>. This function returns a list of all matches to <code>pattern</code>.</p>
 <p>Using the familiar regular expression for Social Security Numbers:</p>
-<div id="dd2f8b31" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="14">
+<div id="834d5058" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="14">
 <div class="sourceCode cell-code" id="cb21"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a>text <span class="op">=</span> <span class="st">"My social security number is 123-45-6789 bro, or maybe it’s 321-45-6789."</span></span>
 <span id="cb21-2"><a href="#cb21-2" aria-hidden="true" tabindex="-1"></a>pattern <span class="op">=</span> <span class="vs">r"[0-9]</span><span class="sc">{3}</span><span class="vs">-[0-9]</span><span class="sc">{2}</span><span class="vs">-[0-9]</span><span class="sc">{4}</span><span class="vs">"</span></span>
 <span id="cb21-3"><a href="#cb21-3" aria-hidden="true" tabindex="-1"></a>re.findall(pattern, text)  </span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -1234,7 +1234,7 @@ <h4 data-number="6.6.2.1" class="anchored" data-anchor-id="extraction-with-regex
 <h4 data-number="6.6.2.2" class="anchored" data-anchor-id="extraction-with-pandas"><span class="header-section-number">6.6.2.2</span> Extraction with <code>pandas</code></h4>
 <p><code>pandas</code> similarily provides extraction functionality on a <code>Series</code> of data: <code>ser.str.findall(pattern)</code></p>
 <p>Consider the following <code>DataFrame</code> <code>ssn_data</code>.</p>
-<div id="81c86fec" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="15">
+<div id="b729f074" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="15">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb23"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a>data <span class="op">=</span> {<span class="st">"SSN"</span>: [<span class="st">"987-65-4321"</span>, <span class="st">"forty"</span>, <span class="op">\</span></span>
@@ -1243,7 +1243,7 @@ <h4 data-number="6.6.2.2" class="anchored" data-anchor-id="extraction-with-panda
 <span id="cb23-4"><a href="#cb23-4" aria-hidden="true" tabindex="-1"></a>ssn_data <span class="op">=</span> pd.DataFrame(data)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 </div>
-<div id="9f7832f8" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="16">
+<div id="0aab2d83" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="16">
 <div class="sourceCode cell-code" id="cb24"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a>ssn_data</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="16">
 <div>
@@ -1279,7 +1279,7 @@ <h4 data-number="6.6.2.2" class="anchored" data-anchor-id="extraction-with-panda
 </div>
 </div>
 </div>
-<div id="818c09a7" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="17">
+<div id="097ac9bb" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="17">
 <div class="sourceCode cell-code" id="cb25"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb25-1"><a href="#cb25-1" aria-hidden="true" tabindex="-1"></a>ssn_data[<span class="st">"SSN"</span>].<span class="bu">str</span>.findall(pattern)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="17">
 <pre><code>0                 [987-65-4321]
@@ -1291,7 +1291,7 @@ <h4 data-number="6.6.2.2" class="anchored" data-anchor-id="extraction-with-panda
 </div>
 <p>This function returns a list for every row containing the pattern matches in a given string.</p>
 <p>As you may expect, there are similar <code>pandas</code> equivalents for other <code>re</code> functions as well. <code>Series.str.extract</code> takes in a pattern and returns a <code>DataFrame</code> of each capture group’s first match in the string. In contrast, <code>Series.str.extractall</code> returns a multi-indexed <code>DataFrame</code> of all matches for each capture group. You can see the difference in the outputs below:</p>
-<div id="a5e920dd" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="18">
+<div id="d448e17e" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="18">
 <div class="sourceCode cell-code" id="cb27"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb27-1"><a href="#cb27-1" aria-hidden="true" tabindex="-1"></a>pattern_cg <span class="op">=</span> <span class="vs">r"([0-9]</span><span class="sc">{3}</span><span class="vs">)-([0-9]</span><span class="sc">{2}</span><span class="vs">)-([0-9]</span><span class="sc">{4}</span><span class="vs">)"</span></span>
 <span id="cb27-2"><a href="#cb27-2" aria-hidden="true" tabindex="-1"></a>ssn_data[<span class="st">"SSN"</span>].<span class="bu">str</span>.extract(pattern_cg)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="18">
@@ -1338,7 +1338,7 @@ <h4 data-number="6.6.2.2" class="anchored" data-anchor-id="extraction-with-panda
 </div>
 </div>
 </div>
-<div id="1c9300ab" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="19">
+<div id="528ba1f7" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="19">
 <div class="sourceCode cell-code" id="cb28"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb28-1"><a href="#cb28-1" aria-hidden="true" tabindex="-1"></a>ssn_data[<span class="st">"SSN"</span>].<span class="bu">str</span>.extractall(pattern_cg)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="19">
 <div>
@@ -1403,12 +1403,12 @@ <h3 data-number="6.6.3" class="anchored" data-anchor-id="regular-expression-capt
 <p>Let’s take a look at an example.</p>
 <section id="example-1" class="level4" data-number="6.6.3.1">
 <h4 data-number="6.6.3.1" class="anchored" data-anchor-id="example-1"><span class="header-section-number">6.6.3.1</span> Example 1</h4>
-<div id="97d7b974" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="20">
+<div id="2c8eae38" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="20">
 <div class="sourceCode cell-code" id="cb29"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb29-1"><a href="#cb29-1" aria-hidden="true" tabindex="-1"></a>text <span class="op">=</span> <span class="st">"Observations: 03:04:53 - Horse awakens. </span><span class="ch">\</span></span>
 <span id="cb29-2"><a href="#cb29-2" aria-hidden="true" tabindex="-1"></a><span class="st">        03:05:14 - Horse goes back to sleep."</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
 <p>Say we want to capture all occurences of time data (hour, minute, and second) as <em>separate entities</em>.</p>
-<div id="18d24375" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="21">
+<div id="6b088fed" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="21">
 <div class="sourceCode cell-code" id="cb30"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb30-1"><a href="#cb30-1" aria-hidden="true" tabindex="-1"></a>pattern_1 <span class="op">=</span> <span class="vs">r"(\d\d):(\d\d):(\d\d)"</span></span>
 <span id="cb30-2"><a href="#cb30-2" aria-hidden="true" tabindex="-1"></a>re.findall(pattern_1, text)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="21">
@@ -1417,7 +1417,7 @@ <h4 data-number="6.6.3.1" class="anchored" data-anchor-id="example-1"><span clas
 </div>
 <p>Notice how the given pattern has 3 capture groups, each specified by the regular expression <code>(\d\d)</code>. We then use <code>re.findall</code> to return these capture groups, each as tuples containing 3 matches.</p>
 <p>These regular expression capture groups can be different. We can use the <code>(\d{2})</code> shorthand to extract the same data.</p>
-<div id="36a965bb" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="22">
+<div id="07c2f370" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="22">
 <div class="sourceCode cell-code" id="cb32"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb32-1"><a href="#cb32-1" aria-hidden="true" tabindex="-1"></a>pattern_2 <span class="op">=</span> <span class="vs">r"(\d\d):(\d\d):(\d</span><span class="sc">{2}</span><span class="vs">)"</span></span>
 <span id="cb32-2"><a href="#cb32-2" aria-hidden="true" tabindex="-1"></a>re.findall(pattern_2, text)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="22">
@@ -1428,14 +1428,14 @@ <h4 data-number="6.6.3.1" class="anchored" data-anchor-id="example-1"><span clas
 <section id="example-2" class="level4" data-number="6.6.3.2">
 <h4 data-number="6.6.3.2" class="anchored" data-anchor-id="example-2"><span class="header-section-number">6.6.3.2</span> Example 2</h4>
 <p>With the notion of capture groups, convince yourself how the following regular expression works.</p>
-<div id="9748413d" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="23">
+<div id="faf714f9" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="23">
 <div class="sourceCode cell-code" id="cb34"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb34-1"><a href="#cb34-1" aria-hidden="true" tabindex="-1"></a>first <span class="op">=</span> log_lines[<span class="dv">0</span>]</span>
 <span id="cb34-2"><a href="#cb34-2" aria-hidden="true" tabindex="-1"></a>first</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="23">
 <pre><code>'169.237.46.168 - - [26/Jan/2014:10:47:58 -0800] "GET /stat141/Winter04/ HTTP/1.1" 200 2585 "http://anson.ucdavis.edu/courses/"\n'</code></pre>
 </div>
 </div>
-<div id="d2ebde7a" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="24">
+<div id="ed491a4b" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="24">
 <div class="sourceCode cell-code" id="cb36"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb36-1"><a href="#cb36-1" aria-hidden="true" tabindex="-1"></a>pattern <span class="op">=</span> <span class="vs">r'\[(\d+)\/(\w+)\/(\d+):(\d+):(\d+):(\d+) (.+)\]'</span></span>
 <span id="cb36-2"><a href="#cb36-2" aria-hidden="true" tabindex="-1"></a>day, month, year, hour, minute, second, time_zone <span class="op">=</span> re.findall(pattern, first)[<span class="dv">0</span>]</span>
 <span id="cb36-3"><a href="#cb36-3" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(day, month, year, hour, minute, second, time_zone)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
diff --git a/docs/sampling/sampling.html b/docs/sampling/sampling.html
index a9961072..0fb91ede 100644
--- a/docs/sampling/sampling.html
+++ b/docs/sampling/sampling.html
@@ -482,7 +482,7 @@ <h3 data-number="9.3.3" class="anchored" data-anchor-id="demo-barbie-v.-oppenhei
 <li>There are only two movies they can watch on July 21st: Barbie and Oppenheimer.</li>
 <li>Every resident watches a movie (either Barbie or Oppenheimer) on July 21st.</li>
 </ul>
-<div id="06648044" class="cell" data-execution_count="1">
+<div id="acff1f06" class="cell" data-execution_count="1">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> matplotlib.pyplot <span class="im">as</span> plt</span>
@@ -496,7 +496,7 @@ <h3 data-number="9.3.3" class="anchored" data-anchor-id="demo-barbie-v.-oppenhei
 <span id="cb1-9"><a href="#cb1-9" aria-hidden="true" tabindex="-1"></a>rng <span class="op">=</span> np.random.default_rng()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 </div>
-<div id="2629d555" class="cell" data-execution_count="2">
+<div id="bc7295cf" class="cell" data-execution_count="2">
 <div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a>movie <span class="op">=</span> pd.read_csv(<span class="st">"data/movie.csv"</span>)</span>
 <span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a><span class="co"># create a 1/0 int that indicates Barbie vote</span></span>
@@ -559,7 +559,7 @@ <h3 data-number="9.3.3" class="anchored" data-anchor-id="demo-barbie-v.-oppenhei
 </div>
 </div>
 <p>What fraction of Berkeley residents chose Barbie?</p>
-<div id="39b6dfd0" class="cell" data-execution_count="3">
+<div id="c2a055d3" class="cell" data-execution_count="3">
 <div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a>actual_barbie <span class="op">=</span> np.mean(movie[<span class="st">"barbie"</span>])</span>
 <span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a>actual_barbie</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="3">
@@ -570,7 +570,7 @@ <h3 data-number="9.3.3" class="anchored" data-anchor-id="demo-barbie-v.-oppenhei
 <section id="convenience-sample-retirees" class="level4" data-number="9.3.3.1">
 <h4 data-number="9.3.3.1" class="anchored" data-anchor-id="convenience-sample-retirees"><span class="header-section-number">9.3.3.1</span> Convenience Sample: Retirees</h4>
 <p>Let’s take a convenience sample of people who have retired (&gt;= 65 years old). What proportion of them went to see Barbie instead of Oppenheimer?</p>
-<div id="f83ade55" class="cell" data-execution_count="4">
+<div id="a71b830c" class="cell" data-execution_count="4">
 <div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a>convenience_sample <span class="op">=</span> movie[movie[<span class="st">'age'</span>] <span class="op">&gt;=</span> <span class="dv">65</span>] <span class="co"># take a convenience sample of retirees</span></span>
 <span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a>np.mean(convenience_sample[<span class="st">"barbie"</span>]) <span class="co"># what proportion of them saw Barbie? </span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="4">
@@ -578,14 +578,14 @@ <h4 data-number="9.3.3.1" class="anchored" data-anchor-id="convenience-sample-re
 </div>
 </div>
 <p>Based on this result, we would have predicted that Oppenheimer would win! What happened? Is it possible that our sample is too small or noisy?</p>
-<div id="8b4a228a" class="cell" data-execution_count="5">
+<div id="a46a391c" class="cell" data-execution_count="5">
 <div class="sourceCode cell-code" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="co"># what's the size of our sample? </span></span>
 <span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a><span class="bu">len</span>(convenience_sample)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="5">
 <pre><code>359396</code></pre>
 </div>
 </div>
-<div id="6891a867" class="cell" data-execution_count="6">
+<div id="350ea19c" class="cell" data-execution_count="6">
 <div class="sourceCode cell-code" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="co"># what proportion of our data is in the convenience sample? </span></span>
 <span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a><span class="bu">len</span>(convenience_sample)<span class="op">/</span><span class="bu">len</span>(movie)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="6">
@@ -597,7 +597,7 @@ <h4 data-number="9.3.3.1" class="anchored" data-anchor-id="convenience-sample-re
 <section id="check-for-bias" class="level4" data-number="9.3.3.2">
 <h4 data-number="9.3.3.2" class="anchored" data-anchor-id="check-for-bias"><span class="header-section-number">9.3.3.2</span> Check for Bias</h4>
 <p>Let us aggregate all choices by age and visualize the fraction of Barbie views, split by gender.</p>
-<div id="f45a9421" class="cell" data-execution_count="7">
+<div id="0e89eb2a" class="cell" data-execution_count="7">
 <div class="sourceCode cell-code" id="cb11"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a>votes_by_barbie <span class="op">=</span> movie.groupby([<span class="st">"age"</span>,<span class="st">"is_male"</span>]).agg(<span class="st">"mean"</span>, numeric_only<span class="op">=</span><span class="va">True</span>).reset_index()</span>
 <span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a>votes_by_barbie.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="7">
@@ -650,7 +650,7 @@ <h4 data-number="9.3.3.2" class="anchored" data-anchor-id="check-for-bias"><span
 </div>
 </div>
 </div>
-<div id="3cfd0361" class="cell" data-execution_count="8">
+<div id="d26c3115" class="cell" data-execution_count="8">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb12"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a><span class="co"># A common matplotlib/seaborn pattern: create the figure and axes object, pass ax</span></span>
@@ -681,17 +681,17 @@ <h4 data-number="9.3.3.2" class="anchored" data-anchor-id="check-for-bias"><span
 <section id="simple-random-sample" class="level4" data-number="9.3.3.3">
 <h4 data-number="9.3.3.3" class="anchored" data-anchor-id="simple-random-sample"><span class="header-section-number">9.3.3.3</span> Simple Random Sample</h4>
 <p>Suppose we took a simple random sample (SRS) of the same size as our retiree sample:</p>
-<div id="cda574d9" class="cell" data-execution_count="9">
+<div id="0e146729" class="cell" data-execution_count="9">
 <div class="sourceCode cell-code" id="cb13"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a>n <span class="op">=</span> <span class="bu">len</span>(convenience_sample)</span>
 <span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a>random_sample <span class="op">=</span> movie.sample(n, replace <span class="op">=</span> <span class="va">False</span>) <span class="co">## By default, replace = False</span></span>
 <span id="cb13-3"><a href="#cb13-3" aria-hidden="true" tabindex="-1"></a>np.mean(random_sample[<span class="st">"barbie"</span>])</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="9">
-<pre><code>np.float64(0.5279914078064308)</code></pre>
+<pre><code>np.float64(0.5317171031397122)</code></pre>
 </div>
 </div>
 <p>This is very close to the actual vote of 0.5302792307692308!</p>
 <p>It turns out that we can get similar results with a <strong>much smaller sample size</strong>, say, 800:</p>
-<div id="c6a070b9" class="cell" data-execution_count="10">
+<div id="da2e1766" class="cell" data-execution_count="10">
 <div class="sourceCode cell-code" id="cb15"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a>n <span class="op">=</span> <span class="dv">800</span></span>
 <span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a>random_sample <span class="op">=</span> movie.sample(n, replace <span class="op">=</span> <span class="va">False</span>)</span>
 <span id="cb15-3"><a href="#cb15-3" aria-hidden="true" tabindex="-1"></a></span>
@@ -704,7 +704,7 @@ <h4 data-number="9.3.3.3" class="anchored" data-anchor-id="simple-random-sample"
 <span id="cb15-10"><a href="#cb15-10" aria-hidden="true" tabindex="-1"></a>Markdown(<span class="ss">f"**Actual** = </span><span class="sc">{</span>actual_barbie<span class="sc">:.4f}</span><span class="ss">, **Sample** = </span><span class="sc">{</span>sample_barbie<span class="sc">:.4f}</span><span class="ss">, "</span></span>
 <span id="cb15-11"><a href="#cb15-11" aria-hidden="true" tabindex="-1"></a>         <span class="ss">f"**Err** = </span><span class="sc">{</span><span class="dv">100</span><span class="op">*</span>err<span class="sc">:.2f}</span><span class="ss">%."</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display cell-output-markdown" data-execution_count="10">
-<p><strong>Actual</strong> = 0.5303, <strong>Sample</strong> = 0.5387, <strong>Err</strong> = 1.60%.</p>
+<p><strong>Actual</strong> = 0.5303, <strong>Sample</strong> = 0.5012, <strong>Err</strong> = 5.47%.</p>
 </div>
 </div>
 <p>We’ll learn how to choose this number when we (re)learn the Central Limit Theorem later in the semester.</p>
@@ -713,7 +713,7 @@ <h4 data-number="9.3.3.3" class="anchored" data-anchor-id="simple-random-sample"
 <h4 data-number="9.3.3.4" class="anchored" data-anchor-id="quantifying-chance-error"><span class="header-section-number">9.3.3.4</span> Quantifying Chance Error</h4>
 <p>In our SRS of size 800, what would be our chance error?</p>
 <p>Let’s simulate 1000 versions of taking the 800-sized SRS from before:</p>
-<div id="9c1cc655" class="cell" data-execution_count="11">
+<div id="ba82d4b4" class="cell" data-execution_count="11">
 <div class="sourceCode cell-code" id="cb16"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a>nrep <span class="op">=</span> <span class="dv">1000</span>   <span class="co"># number of simulations</span></span>
 <span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a>n <span class="op">=</span> <span class="dv">800</span>       <span class="co"># size of our sample</span></span>
 <span id="cb16-3"><a href="#cb16-3" aria-hidden="true" tabindex="-1"></a>poll_result <span class="op">=</span> []</span>
@@ -721,7 +721,7 @@ <h4 data-number="9.3.3.4" class="anchored" data-anchor-id="quantifying-chance-er
 <span id="cb16-5"><a href="#cb16-5" aria-hidden="true" tabindex="-1"></a>    random_sample <span class="op">=</span> movie.sample(n, replace <span class="op">=</span> <span class="va">False</span>)</span>
 <span id="cb16-6"><a href="#cb16-6" aria-hidden="true" tabindex="-1"></a>    poll_result.append(np.mean(random_sample[<span class="st">"barbie"</span>]))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
-<div id="626000bf" class="cell" data-execution_count="12">
+<div id="75d9d50d" class="cell" data-execution_count="12">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb17"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a>fig, ax <span class="op">=</span> plt.subplots()</span>
@@ -743,15 +743,15 @@ <h4 data-number="9.3.3.4" class="anchored" data-anchor-id="quantifying-chance-er
 </div>
 </div>
 <p>What fraction of these simulated samples would have predicted Barbie?</p>
-<div id="88a761ba" class="cell" data-execution_count="13">
+<div id="c2067c15" class="cell" data-execution_count="13">
 <div class="sourceCode cell-code" id="cb19"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a>poll_result <span class="op">=</span> pd.Series(poll_result)</span>
 <span id="cb19-2"><a href="#cb19-2" aria-hidden="true" tabindex="-1"></a>np.<span class="bu">sum</span>(poll_result <span class="op">&gt;</span> <span class="fl">0.5</span>)<span class="op">/</span><span class="dv">1000</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="13">
-<pre><code>np.float64(0.956)</code></pre>
+<pre><code>np.float64(0.961)</code></pre>
 </div>
 </div>
 <p>You can see the curve looks roughly Gaussian/normal. Using KDE:</p>
-<div id="9af06f3c" class="cell" data-execution_count="14">
+<div id="26e328f6" class="cell" data-execution_count="14">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb21"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a>sns.histplot(poll_result, stat<span class="op">=</span><span class="st">'density'</span>, kde<span class="op">=</span><span class="va">True</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
diff --git a/docs/sampling/sampling_files/figure-html/cell-13-output-2.png b/docs/sampling/sampling_files/figure-html/cell-13-output-2.png
index 565ec86e..f53a6c8a 100644
Binary files a/docs/sampling/sampling_files/figure-html/cell-13-output-2.png and b/docs/sampling/sampling_files/figure-html/cell-13-output-2.png differ
diff --git a/docs/sampling/sampling_files/figure-html/cell-15-output-2.png b/docs/sampling/sampling_files/figure-html/cell-15-output-2.png
index d107eb0b..f298bbf1 100644
Binary files a/docs/sampling/sampling_files/figure-html/cell-15-output-2.png and b/docs/sampling/sampling_files/figure-html/cell-15-output-2.png differ
diff --git a/docs/search.json b/docs/search.json
index a6bb8583..21e8ac27 100644
--- a/docs/search.json
+++ b/docs/search.json
@@ -144,7 +144,7 @@
     "href": "pandas_2/pandas_2.html#useful-utility-functions",
     "title": "3  Pandas II",
     "section": "3.3 Useful Utility Functions",
-    "text": "3.3 Useful Utility Functions\npandas contains an extensive library of functions that can help shorten the process of setting and getting information from its data structures. In the following section, we will give overviews of each of the main utility functions that will help us in Data 100.\nDiscussing all functionality offered by pandas could take an entire semester! We will walk you through the most commonly-used functions and encourage you to explore and experiment on your own.\n\nNumPy and built-in function support\n.shape\n.size\n.describe()\n.sample()\n.value_counts()\n.unique()\n.sort_values()\n\nThe pandas documentation will be a valuable resource in Data 100 and beyond.\n\n3.3.1 NumPy\npandas is designed to work well with NumPy, the framework for array computations you encountered in Data 8. Just about any NumPy function can be applied to pandas DataFrames and Series.\n\n# Pull out the number of babies named Yash each year\nyash_count = babynames[babynames[\"Name\"] == \"Yash\"][\"Count\"]\nyash_count.head()\n\n331824     8\n334114     9\n336390    11\n338773    12\n341387    10\nName: Count, dtype: int64\n\n\n\n# Average number of babies named Yash each year\nnp.mean(yash_count)\n\nnp.float64(17.142857142857142)\n\n\n\n# Max number of babies named Yash born in any one year\nnp.max(yash_count)\n\nnp.int64(29)\n\n\n\n\n3.3.2 .shape and .size\n.shape and .size are attributes of Series and DataFrames that measure the “amount” of data stored in the structure. Calling .shape returns a tuple containing the number of rows and columns present in the DataFrame or Series. .size is used to find the total number of elements in a structure, equivalent to the number of rows times the number of columns.\nMany functions strictly require the dimensions of the arguments along certain axes to match. Calling these dimension-finding functions is much faster than counting all of the items by hand.\n\n# Return the shape of the DataFrame, in the format (num_rows, num_columns)\nbabynames.shape\n\n(407428, 5)\n\n\n\n# Return the size of the DataFrame, equal to num_rows * num_columns\nbabynames.size\n\n2037140\n\n\n\n\n3.3.3 .describe()\nIf many statistics are required from a DataFrame (minimum value, maximum value, mean value, etc.), then .describe() (documentation) can be used to compute all of them at once.\n\nbabynames.describe()\n\n\n\n\n\n\n\n\nYear\nCount\n\n\n\n\ncount\n407428.000000\n407428.000000\n\n\nmean\n1985.733609\n79.543456\n\n\nstd\n27.007660\n293.698654\n\n\nmin\n1910.000000\n5.000000\n\n\n25%\n1969.000000\n7.000000\n\n\n50%\n1992.000000\n13.000000\n\n\n75%\n2008.000000\n38.000000\n\n\nmax\n2022.000000\n8260.000000\n\n\n\n\n\n\n\nA different set of statistics will be reported if .describe() is called on a Series.\n\nbabynames[\"Sex\"].describe()\n\ncount     407428\nunique         2\ntop            F\nfreq      239537\nName: Sex, dtype: object\n\n\n\n\n3.3.4 .sample()\nAs we will see later in the semester, random processes are at the heart of many data science techniques (for example, train-test splits, bootstrapping, and cross-validation). .sample() (documentation) lets us quickly select random entries (a row if called from a DataFrame, or a value if called from a Series).\nBy default, .sample() selects entries without replacement. Pass in the argument replace=True to sample with replacement.\n\n# Sample a single row\nbabynames.sample()\n\n\n\n\n\n\n\n\nState\nSex\nYear\nName\nCount\n\n\n\n\n298056\nCA\nM\n1980\nMaxwell\n34\n\n\n\n\n\n\n\nNaturally, this can be chained with other methods and operators (iloc, etc.).\n\n# Sample 5 random rows, and select all columns after column 2\nbabynames.sample(5).iloc[:, 2:]\n\n\n\n\n\n\n\n\nYear\nName\nCount\n\n\n\n\n356988\n2005\nTorin\n10\n\n\n353698\n2004\nJaren\n18\n\n\n37458\n1957\nCandace\n106\n\n\n161997\n2003\nKimberlee\n17\n\n\n80722\n1979\nNancy\n641\n\n\n\n\n\n\n\n\n# Randomly sample 4 names from the year 2000, with replacement, and select all columns after column 2\nbabynames[babynames[\"Year\"] == 2000].sample(4, replace = True).iloc[:, 2:]\n\n\n\n\n\n\n\n\nYear\nName\nCount\n\n\n\n\n343065\n2000\nKoby\n35\n\n\n149148\n2000\nMorgan\n499\n\n\n151307\n2000\nJanna\n9\n\n\n150077\n2000\nAlycia\n28\n\n\n\n\n\n\n\n\n\n3.3.5 .value_counts()\nThe Series.value_counts() (documentation) method counts the number of occurrence of each unique value in a Series. In other words, it counts the number of times each unique value appears. This is often useful for determining the most or least common entries in a Series.\nIn the example below, we can determine the name with the most years in which at least one person has taken that name by counting the number of times each name appears in the \"Name\" column of babynames. Note that the return value is also a Series.\n\nbabynames[\"Name\"].value_counts().head()\n\nName\nJean         223\nFrancis      221\nGuadalupe    218\nJessie       217\nMarion       214\nName: count, dtype: int64\n\n\n\n\n3.3.6 .unique()\nIf we have a Series with many repeated values, then .unique() (documentation) can be used to identify only the unique values. Here we return an array of all the names in babynames.\n\nbabynames[\"Name\"].unique()\n\narray(['Mary', 'Helen', 'Dorothy', ..., 'Zae', 'Zai', 'Zayvier'],\n      dtype=object)\n\n\n\n\n3.3.7 .sort_values()\nOrdering a DataFrame can be useful for isolating extreme values. For example, the first 5 entries of a row sorted in descending order (that is, from highest to lowest) are the largest 5 values. .sort_values (documentation) allows us to order a DataFrame or Series by a specified column. We can choose to either receive the rows in ascending order (default) or descending order.\n\n# Sort the \"Count\" column from highest to lowest\nbabynames.sort_values(by=\"Count\", ascending=False).head()\n\n\n\n\n\n\n\n\nState\nSex\nYear\nName\nCount\n\n\n\n\n268041\nCA\nM\n1957\nMichael\n8260\n\n\n267017\nCA\nM\n1956\nMichael\n8258\n\n\n317387\nCA\nM\n1990\nMichael\n8246\n\n\n281850\nCA\nM\n1969\nMichael\n8245\n\n\n283146\nCA\nM\n1970\nMichael\n8196\n\n\n\n\n\n\n\nUnlike when calling .value_counts() on a DataFrame, we do not need to explicitly specify the column used for sorting when calling .value_counts() on a Series. We can still specify the ordering paradigm – that is, whether values are sorted in ascending or descending order.\n\n# Sort the \"Name\" Series alphabetically\nbabynames[\"Name\"].sort_values(ascending=True).head()\n\n366001      Aadan\n384005      Aadan\n369120      Aadan\n398211    Aadarsh\n370306      Aaden\nName: Name, dtype: object",
+    "text": "3.3 Useful Utility Functions\npandas contains an extensive library of functions that can help shorten the process of setting and getting information from its data structures. In the following section, we will give overviews of each of the main utility functions that will help us in Data 100.\nDiscussing all functionality offered by pandas could take an entire semester! We will walk you through the most commonly-used functions and encourage you to explore and experiment on your own.\n\nNumPy and built-in function support\n.shape\n.size\n.describe()\n.sample()\n.value_counts()\n.unique()\n.sort_values()\n\nThe pandas documentation will be a valuable resource in Data 100 and beyond.\n\n3.3.1 NumPy\npandas is designed to work well with NumPy, the framework for array computations you encountered in Data 8. Just about any NumPy function can be applied to pandas DataFrames and Series.\n\n# Pull out the number of babies named Yash each year\nyash_count = babynames[babynames[\"Name\"] == \"Yash\"][\"Count\"]\nyash_count.head()\n\n331824     8\n334114     9\n336390    11\n338773    12\n341387    10\nName: Count, dtype: int64\n\n\n\n# Average number of babies named Yash each year\nnp.mean(yash_count)\n\nnp.float64(17.142857142857142)\n\n\n\n# Max number of babies named Yash born in any one year\nnp.max(yash_count)\n\nnp.int64(29)\n\n\n\n\n3.3.2 .shape and .size\n.shape and .size are attributes of Series and DataFrames that measure the “amount” of data stored in the structure. Calling .shape returns a tuple containing the number of rows and columns present in the DataFrame or Series. .size is used to find the total number of elements in a structure, equivalent to the number of rows times the number of columns.\nMany functions strictly require the dimensions of the arguments along certain axes to match. Calling these dimension-finding functions is much faster than counting all of the items by hand.\n\n# Return the shape of the DataFrame, in the format (num_rows, num_columns)\nbabynames.shape\n\n(407428, 5)\n\n\n\n# Return the size of the DataFrame, equal to num_rows * num_columns\nbabynames.size\n\n2037140\n\n\n\n\n3.3.3 .describe()\nIf many statistics are required from a DataFrame (minimum value, maximum value, mean value, etc.), then .describe() (documentation) can be used to compute all of them at once.\n\nbabynames.describe()\n\n\n\n\n\n\n\n\nYear\nCount\n\n\n\n\ncount\n407428.000000\n407428.000000\n\n\nmean\n1985.733609\n79.543456\n\n\nstd\n27.007660\n293.698654\n\n\nmin\n1910.000000\n5.000000\n\n\n25%\n1969.000000\n7.000000\n\n\n50%\n1992.000000\n13.000000\n\n\n75%\n2008.000000\n38.000000\n\n\nmax\n2022.000000\n8260.000000\n\n\n\n\n\n\n\nA different set of statistics will be reported if .describe() is called on a Series.\n\nbabynames[\"Sex\"].describe()\n\ncount     407428\nunique         2\ntop            F\nfreq      239537\nName: Sex, dtype: object\n\n\n\n\n3.3.4 .sample()\nAs we will see later in the semester, random processes are at the heart of many data science techniques (for example, train-test splits, bootstrapping, and cross-validation). .sample() (documentation) lets us quickly select random entries (a row if called from a DataFrame, or a value if called from a Series).\nBy default, .sample() selects entries without replacement. Pass in the argument replace=True to sample with replacement.\n\n# Sample a single row\nbabynames.sample()\n\n\n\n\n\n\n\n\nState\nSex\nYear\nName\nCount\n\n\n\n\n199745\nCA\nF\n2012\nDream\n10\n\n\n\n\n\n\n\nNaturally, this can be chained with other methods and operators (iloc, etc.).\n\n# Sample 5 random rows, and select all columns after column 2\nbabynames.sample(5).iloc[:, 2:]\n\n\n\n\n\n\n\n\nYear\nName\nCount\n\n\n\n\n53849\n1966\nStefani\n20\n\n\n176617\n2006\nSabah\n5\n\n\n334208\n1996\nLevon\n8\n\n\n22777\n1946\nLynne\n197\n\n\n88619\n1982\nMartha\n316\n\n\n\n\n\n\n\n\n# Randomly sample 4 names from the year 2000, with replacement, and select all columns after column 2\nbabynames[babynames[\"Year\"] == 2000].sample(4, replace = True).iloc[:, 2:]\n\n\n\n\n\n\n\n\nYear\nName\nCount\n\n\n\n\n343989\n2000\nAdithya\n8\n\n\n343085\n2000\nEmerson\n33\n\n\n342772\n2000\nDerrick\n112\n\n\n343379\n2000\nJohann\n18\n\n\n\n\n\n\n\n\n\n3.3.5 .value_counts()\nThe Series.value_counts() (documentation) method counts the number of occurrence of each unique value in a Series. In other words, it counts the number of times each unique value appears. This is often useful for determining the most or least common entries in a Series.\nIn the example below, we can determine the name with the most years in which at least one person has taken that name by counting the number of times each name appears in the \"Name\" column of babynames. Note that the return value is also a Series.\n\nbabynames[\"Name\"].value_counts().head()\n\nName\nJean         223\nFrancis      221\nGuadalupe    218\nJessie       217\nMarion       214\nName: count, dtype: int64\n\n\n\n\n3.3.6 .unique()\nIf we have a Series with many repeated values, then .unique() (documentation) can be used to identify only the unique values. Here we return an array of all the names in babynames.\n\nbabynames[\"Name\"].unique()\n\narray(['Mary', 'Helen', 'Dorothy', ..., 'Zae', 'Zai', 'Zayvier'],\n      dtype=object)\n\n\n\n\n3.3.7 .sort_values()\nOrdering a DataFrame can be useful for isolating extreme values. For example, the first 5 entries of a row sorted in descending order (that is, from highest to lowest) are the largest 5 values. .sort_values (documentation) allows us to order a DataFrame or Series by a specified column. We can choose to either receive the rows in ascending order (default) or descending order.\n\n# Sort the \"Count\" column from highest to lowest\nbabynames.sort_values(by=\"Count\", ascending=False).head()\n\n\n\n\n\n\n\n\nState\nSex\nYear\nName\nCount\n\n\n\n\n268041\nCA\nM\n1957\nMichael\n8260\n\n\n267017\nCA\nM\n1956\nMichael\n8258\n\n\n317387\nCA\nM\n1990\nMichael\n8246\n\n\n281850\nCA\nM\n1969\nMichael\n8245\n\n\n283146\nCA\nM\n1970\nMichael\n8196\n\n\n\n\n\n\n\nUnlike when calling .value_counts() on a DataFrame, we do not need to explicitly specify the column used for sorting when calling .value_counts() on a Series. We can still specify the ordering paradigm – that is, whether values are sorted in ascending or descending order.\n\n# Sort the \"Name\" Series alphabetically\nbabynames[\"Name\"].sort_values(ascending=True).head()\n\n366001      Aadan\n384005      Aadan\n369120      Aadan\n398211    Aadarsh\n370306      Aaden\nName: Name, dtype: object",
     "crumbs": [
       "<span class='chapter-number'>3</span>  <span class='chapter-title'>Pandas II</span>"
     ]
@@ -184,7 +184,7 @@
     "href": "pandas_3/pandas_3.html#aggregating-data-with-.groupby",
     "title": "4  Pandas III",
     "section": "4.2 Aggregating Data with .groupby",
-    "text": "4.2 Aggregating Data with .groupby\nUp until this point, we have been working with individual rows of DataFrames. As data scientists, we often wish to investigate trends across a larger subset of our data. For example, we may want to compute some summary statistic (the mean, median, sum, etc.) for a group of rows in our DataFrame. To do this, we’ll use pandas GroupBy objects. Our goal is to group together rows that fall under the same category and perform an operation that aggregates across all rows in the category.\nLet’s say we wanted to aggregate all rows in babynames for a given year.\n\nbabynames.groupby(\"Year\")\n\n&lt;pandas.core.groupby.generic.DataFrameGroupBy object at 0x1257c93d0&gt;\n\n\nWhat does this strange output mean? Calling .groupby (documentation) has generated a GroupBy object. You can imagine this as a set of “mini” sub-DataFrames, where each subframe contains all of the rows from babynames that correspond to a particular year.\nThe diagram below shows a simplified view of babynames to help illustrate this idea.\n\n\n\nWe can’t work with a GroupBy object directly – that is why you saw that strange output earlier rather than a standard view of a DataFrame. To actually manipulate values within these “mini” DataFrames, we’ll need to call an aggregation method. This is a method that tells pandas how to aggregate the values within the GroupBy object. Once the aggregation is applied, pandas will return a normal (now grouped) DataFrame.\nThe first aggregation method we’ll consider is .agg. The .agg method takes in a function as its argument; this function is then applied to each column of a “mini” grouped DataFrame. We end up with a new DataFrame with one aggregated row per subframe. Let’s see this in action by finding the sum of all counts for each year in babynames – this is equivalent to finding the number of babies born in each year.\n\nbabynames[[\"Year\", \"Count\"]].groupby(\"Year\").agg(\"sum\").head(5)\n\n\n\n\n\n\n\n\nCount\n\n\nYear\n\n\n\n\n\n1910\n9163\n\n\n1911\n9983\n\n\n1912\n17946\n\n\n1913\n22094\n\n\n1914\n26926\n\n\n\n\n\n\n\nWe can relate this back to the diagram we used above. Remember that the diagram uses a simplified version of babynames, which is why we see smaller values for the summed counts.\n\n\n\nPerforming an aggregation\n\n\nCalling .agg has condensed each subframe back into a single row. This gives us our final output: a DataFrame that is now indexed by \"Year\", with a single row for each unique year in the original babynames DataFrame.\nThere are many different aggregation functions we can use, all of which are useful in different applications.\n\nbabynames[[\"Year\", \"Count\"]].groupby(\"Year\").agg(\"min\").head(5)\n\n\n\n\n\n\n\n\nCount\n\n\nYear\n\n\n\n\n\n1910\n5\n\n\n1911\n5\n\n\n1912\n5\n\n\n1913\n5\n\n\n1914\n5\n\n\n\n\n\n\n\n\nbabynames[[\"Year\", \"Count\"]].groupby(\"Year\").agg(\"max\").head(5)\n\n\n\n\n\n\n\n\nCount\n\n\nYear\n\n\n\n\n\n1910\n295\n\n\n1911\n390\n\n\n1912\n534\n\n\n1913\n614\n\n\n1914\n773\n\n\n\n\n\n\n\n\n# Same result, but now we explicitly tell pandas to only consider the \"Count\" column when summing\nbabynames.groupby(\"Year\")[[\"Count\"]].agg(\"sum\").head(5)\n\n\n\n\n\n\n\n\nCount\n\n\nYear\n\n\n\n\n\n1910\n9163\n\n\n1911\n9983\n\n\n1912\n17946\n\n\n1913\n22094\n\n\n1914\n26926\n\n\n\n\n\n\n\nThere are many different aggregations that can be applied to the grouped data. The primary requirement is that an aggregation function must:\n\nTake in a Series of data (a single column of the grouped subframe).\nReturn a single value that aggregates this Series.\n\n\n4.2.1 Aggregation Functions\nBecause of this fairly broad requirement, pandas offers many ways of computing an aggregation.\nIn-built Python operations – such as sum, max, and min – are automatically recognized by pandas.\n\n# What is the minimum count for each name in any year?\nbabynames.groupby(\"Name\")[[\"Count\"]].agg(\"min\").head()\n\n\n\n\n\n\n\n\nCount\n\n\nName\n\n\n\n\n\nAadan\n5\n\n\nAadarsh\n6\n\n\nAaden\n10\n\n\nAadhav\n6\n\n\nAadhini\n6\n\n\n\n\n\n\n\n\n# What is the largest single-year count of each name?\nbabynames.groupby(\"Name\")[[\"Count\"]].agg(\"max\").head()\n\n\n\n\n\n\n\n\nCount\n\n\nName\n\n\n\n\n\nAadan\n7\n\n\nAadarsh\n6\n\n\nAaden\n158\n\n\nAadhav\n8\n\n\nAadhini\n6\n\n\n\n\n\n\n\nAs mentioned previously, functions from the NumPy library, such as np.mean, np.max, np.min, and np.sum, are also fair game in pandas.\n\n# What is the average count for each name across all years?\nbabynames.groupby(\"Name\")[[\"Count\"]].agg(\"mean\").head()\n\n\n\n\n\n\n\n\nCount\n\n\nName\n\n\n\n\n\nAadan\n6.000000\n\n\nAadarsh\n6.000000\n\n\nAaden\n46.214286\n\n\nAadhav\n6.750000\n\n\nAadhini\n6.000000\n\n\n\n\n\n\n\npandas also offers a number of in-built functions. Functions that are native to pandas can be referenced using their string name within a call to .agg. Some examples include:\n\n.agg(\"sum\")\n.agg(\"max\")\n.agg(\"min\")\n.agg(\"mean\")\n.agg(\"first\")\n.agg(\"last\")\n\nThe latter two entries in this list – \"first\" and \"last\" – are unique to pandas. They return the first or last entry in a subframe column. Why might this be useful? Consider a case where multiple columns in a group share identical information. To represent this information in the grouped output, we can simply grab the first or last entry, which we know will be identical to all other entries.\nLet’s illustrate this with an example. Say we add a new column to babynames that contains the first letter of each name.\n\n# Imagine we had an additional column, \"First Letter\". We'll explain this code next week\nbabynames[\"First Letter\"] = babynames[\"Name\"].str[0]\n\n# We construct a simplified DataFrame containing just a subset of columns\nbabynames_new = babynames[[\"Name\", \"First Letter\", \"Year\"]]\nbabynames_new.head()\n\n\n\n\n\n\n\n\nName\nFirst Letter\nYear\n\n\n\n\n115957\nDeandrea\nD\n1990\n\n\n101976\nDeandrea\nD\n1986\n\n\n131029\nLeandrea\nL\n1994\n\n\n108731\nDeandrea\nD\n1988\n\n\n308131\nDeandrea\nD\n1985\n\n\n\n\n\n\n\nIf we form groups for each name in the dataset, \"First Letter\" will be the same for all members of the group. This means that if we simply select the first entry for \"First Letter\" in the group, we’ll represent all data in that group.\nWe can use a dictionary to apply different aggregation functions to each column during grouping.\n\n\n\nAggregating using “first”\n\n\n\nbabynames_new.groupby(\"Name\").agg({\"First Letter\":\"first\", \"Year\":\"max\"}).head()\n\n\n\n\n\n\n\n\nFirst Letter\nYear\n\n\nName\n\n\n\n\n\n\nAadan\nA\n2014\n\n\nAadarsh\nA\n2019\n\n\nAaden\nA\n2020\n\n\nAadhav\nA\n2019\n\n\nAadhini\nA\n2022\n\n\n\n\n\n\n\n\n\n4.2.2 Plotting Birth Counts\nLet’s use .agg to find the total number of babies born in each year. Recall that using .agg with .groupby() follows the format: df.groupby(column_name).agg(aggregation_function). The line of code below gives us the total number of babies born in each year.\n\n\nCode\nbabynames.groupby(\"Year\")[[\"Count\"]].agg(sum).head(5)\n# Alternative 1\n# babynames.groupby(\"Year\")[[\"Count\"]].sum()\n# Alternative 2\n# babynames.groupby(\"Year\").sum(numeric_only=True)\n\n\n/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_48366/390646742.py:1: FutureWarning:\n\nThe provided callable &lt;built-in function sum&gt; is currently using DataFrameGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"sum\" instead.\n\n\n\n\n\n\n\n\n\n\nCount\n\n\nYear\n\n\n\n\n\n1910\n9163\n\n\n1911\n9983\n\n\n1912\n17946\n\n\n1913\n22094\n\n\n1914\n26926\n\n\n\n\n\n\n\nHere’s an illustration of the process:\n\nPlotting the Dataframe we obtain tells an interesting story.\n\n\nCode\nimport plotly.express as px\npuzzle2 = babynames.groupby(\"Year\")[[\"Count\"]].agg(\"sum\")\npx.line(puzzle2, y = \"Count\")\n\n\n                                                \n\n\nA word of warning: we made an enormous assumption when we decided to use this dataset to estimate birth rate. According to this article from the Legistlative Analyst Office, the true number of babies born in California in 2020 was 421,275. However, our plot shows 362,882 babies —— what happened?\n\n\n4.2.3 Summary of the .groupby() Function\nA groupby operation involves some combination of splitting a DataFrame into grouped subframes, applying a function, and combining the results.\nFor some arbitrary DataFrame df below, the code df.groupby(\"year\").agg(sum) does the following:\n\nSplits the DataFrame into sub-DataFrames with rows belonging to the same year.\nApplies the sum function to each column of each sub-DataFrame.\nCombines the results of sum into a single DataFrame, indexed by year.\n\n\n\n\n4.2.4 Revisiting the .agg() Function\n.agg() can take in any function that aggregates several values into one summary value. Some commonly-used aggregation functions can even be called directly, without explicit use of .agg(). For example, we can call .mean() on .groupby():\nbabynames.groupby(\"Year\").mean().head()\nWe can now put this all into practice. Say we want to find the baby name with sex “F” that has fallen in popularity the most in California. To calculate this, we can first create a metric: “Ratio to Peak” (RTP). The RTP is the ratio of babies born with a given name in 2022 to the maximum number of babies born with the name in any year.\nLet’s start with calculating this for one baby, “Jennifer”.\n\n# We filter by babies with sex \"F\" and sort by \"Year\"\nf_babynames = babynames[babynames[\"Sex\"] == \"F\"]\nf_babynames = f_babynames.sort_values([\"Year\"])\n\n# Determine how many Jennifers were born in CA per year\njenn_counts_series = f_babynames[f_babynames[\"Name\"] == \"Jennifer\"][\"Count\"]\n\n# Determine the max number of Jennifers born in a year and the number born in 2022 \n# to calculate RTP\nmax_jenn = max(f_babynames[f_babynames[\"Name\"] == \"Jennifer\"][\"Count\"])\ncurr_jenn = f_babynames[f_babynames[\"Name\"] == \"Jennifer\"][\"Count\"].iloc[-1]\nrtp = curr_jenn / max_jenn\nrtp\n\nnp.float64(0.018796372629843364)\n\n\nBy creating a function to calculate RTP and applying it to our DataFrame by using .groupby(), we can easily compute the RTP for all names at once!\n\ndef ratio_to_peak(series):\n    return series.iloc[-1] / max(series)\n\n#Using .groupby() to apply the function\nrtp_table = f_babynames.groupby(\"Name\")[[\"Year\", \"Count\"]].agg(ratio_to_peak)\nrtp_table.head()\n\n\n\n\n\n\n\n\nYear\nCount\n\n\nName\n\n\n\n\n\n\nAadhini\n1.0\n1.000000\n\n\nAadhira\n1.0\n0.500000\n\n\nAadhya\n1.0\n0.660000\n\n\nAadya\n1.0\n0.586207\n\n\nAahana\n1.0\n0.269231\n\n\n\n\n\n\n\nIn the rows shown above, we can see that every row shown has a Year value of 1.0.\nThis is the “pandas-ification” of logic you saw in Data 8. Much of the logic you’ve learned in Data 8 will serve you well in Data 100.\n\n\n4.2.5 Nuisance Columns\nNote that you must be careful with which columns you apply the .agg() function to. If we were to apply our function to the table as a whole by doing f_babynames.groupby(\"Name\").agg(ratio_to_peak), executing our .agg() call would result in a TypeError.\n\nWe can avoid this issue (and prevent unintentional loss of data) by explicitly selecting column(s) we want to apply our aggregation function to BEFORE calling .agg(),\n\n\n4.2.6 Renaming Columns After Grouping\nBy default, .groupby will not rename any aggregated columns. As we can see in the table above, the aggregated column is still named Count even though it now represents the RTP. For better readability, we can rename Count to Count RTP\n\nrtp_table = rtp_table.rename(columns = {\"Count\": \"Count RTP\"})\nrtp_table\n\n\n\n\n\n\n\n\nYear\nCount RTP\n\n\nName\n\n\n\n\n\n\nAadhini\n1.0\n1.000000\n\n\nAadhira\n1.0\n0.500000\n\n\nAadhya\n1.0\n0.660000\n\n\nAadya\n1.0\n0.586207\n\n\nAahana\n1.0\n0.269231\n\n\n...\n...\n...\n\n\nZyanya\n1.0\n0.466667\n\n\nZyla\n1.0\n1.000000\n\n\nZylah\n1.0\n1.000000\n\n\nZyra\n1.0\n1.000000\n\n\nZyrah\n1.0\n0.833333\n\n\n\n\n13782 rows × 2 columns\n\n\n\n\n\n4.2.7 Some Data Science Payoff\nBy sorting rtp_table, we can see the names whose popularity has decreased the most.\n\nrtp_table = rtp_table.rename(columns = {\"Count\": \"Count RTP\"})\nrtp_table.sort_values(\"Count RTP\").head()\n\n\n\n\n\n\n\n\nYear\nCount RTP\n\n\nName\n\n\n\n\n\n\nDebra\n1.0\n0.001260\n\n\nDebbie\n1.0\n0.002815\n\n\nCarol\n1.0\n0.003180\n\n\nTammy\n1.0\n0.003249\n\n\nSusan\n1.0\n0.003305\n\n\n\n\n\n\n\nTo visualize the above DataFrame, let’s look at the line plot below:\n\n\nCode\nimport plotly.express as px\npx.line(f_babynames[f_babynames[\"Name\"] == \"Debra\"], x = \"Year\", y = \"Count\")\n\n\n                                                \n\n\nWe can get the list of the top 10 names and then plot popularity with the following code:\n\ntop10 = rtp_table.sort_values(\"Count RTP\").head(10).index\npx.line(\n    f_babynames[f_babynames[\"Name\"].isin(top10)], \n    x = \"Year\", \n    y = \"Count\", \n    color = \"Name\"\n)\n\n/Users/nikhilreddy/course-notes/ds100env/lib/python3.12/site-packages/plotly/express/_core.py:1980: FutureWarning:\n\nWhen grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.\n\n\n\n                                                \n\n\nAs a quick exercise, consider what code would compute the total number of babies with each name.\n\n\nCode\nbabynames.groupby(\"Name\")[[\"Count\"]].agg(\"sum\").head()\n# alternative solution: \n# babynames.groupby(\"Name\")[[\"Count\"]].sum()\n\n\n\n\n\n\n\n\n\nCount\n\n\nName\n\n\n\n\n\nAadan\n18\n\n\nAadarsh\n6\n\n\nAaden\n647\n\n\nAadhav\n27\n\n\nAadhini\n6",
+    "text": "4.2 Aggregating Data with .groupby\nUp until this point, we have been working with individual rows of DataFrames. As data scientists, we often wish to investigate trends across a larger subset of our data. For example, we may want to compute some summary statistic (the mean, median, sum, etc.) for a group of rows in our DataFrame. To do this, we’ll use pandas GroupBy objects. Our goal is to group together rows that fall under the same category and perform an operation that aggregates across all rows in the category.\nLet’s say we wanted to aggregate all rows in babynames for a given year.\n\nbabynames.groupby(\"Year\")\n\n&lt;pandas.core.groupby.generic.DataFrameGroupBy object at 0x10f8d1cd0&gt;\n\n\nWhat does this strange output mean? Calling .groupby (documentation) has generated a GroupBy object. You can imagine this as a set of “mini” sub-DataFrames, where each subframe contains all of the rows from babynames that correspond to a particular year.\nThe diagram below shows a simplified view of babynames to help illustrate this idea.\n\n\n\nWe can’t work with a GroupBy object directly – that is why you saw that strange output earlier rather than a standard view of a DataFrame. To actually manipulate values within these “mini” DataFrames, we’ll need to call an aggregation method. This is a method that tells pandas how to aggregate the values within the GroupBy object. Once the aggregation is applied, pandas will return a normal (now grouped) DataFrame.\nThe first aggregation method we’ll consider is .agg. The .agg method takes in a function as its argument; this function is then applied to each column of a “mini” grouped DataFrame. We end up with a new DataFrame with one aggregated row per subframe. Let’s see this in action by finding the sum of all counts for each year in babynames – this is equivalent to finding the number of babies born in each year.\n\nbabynames[[\"Year\", \"Count\"]].groupby(\"Year\").agg(\"sum\").head(5)\n\n\n\n\n\n\n\n\nCount\n\n\nYear\n\n\n\n\n\n1910\n9163\n\n\n1911\n9983\n\n\n1912\n17946\n\n\n1913\n22094\n\n\n1914\n26926\n\n\n\n\n\n\n\nWe can relate this back to the diagram we used above. Remember that the diagram uses a simplified version of babynames, which is why we see smaller values for the summed counts.\n\n\n\nPerforming an aggregation\n\n\nCalling .agg has condensed each subframe back into a single row. This gives us our final output: a DataFrame that is now indexed by \"Year\", with a single row for each unique year in the original babynames DataFrame.\nThere are many different aggregation functions we can use, all of which are useful in different applications.\n\nbabynames[[\"Year\", \"Count\"]].groupby(\"Year\").agg(\"min\").head(5)\n\n\n\n\n\n\n\n\nCount\n\n\nYear\n\n\n\n\n\n1910\n5\n\n\n1911\n5\n\n\n1912\n5\n\n\n1913\n5\n\n\n1914\n5\n\n\n\n\n\n\n\n\nbabynames[[\"Year\", \"Count\"]].groupby(\"Year\").agg(\"max\").head(5)\n\n\n\n\n\n\n\n\nCount\n\n\nYear\n\n\n\n\n\n1910\n295\n\n\n1911\n390\n\n\n1912\n534\n\n\n1913\n614\n\n\n1914\n773\n\n\n\n\n\n\n\n\n# Same result, but now we explicitly tell pandas to only consider the \"Count\" column when summing\nbabynames.groupby(\"Year\")[[\"Count\"]].agg(\"sum\").head(5)\n\n\n\n\n\n\n\n\nCount\n\n\nYear\n\n\n\n\n\n1910\n9163\n\n\n1911\n9983\n\n\n1912\n17946\n\n\n1913\n22094\n\n\n1914\n26926\n\n\n\n\n\n\n\nThere are many different aggregations that can be applied to the grouped data. The primary requirement is that an aggregation function must:\n\nTake in a Series of data (a single column of the grouped subframe).\nReturn a single value that aggregates this Series.\n\n\n4.2.1 Aggregation Functions\nBecause of this fairly broad requirement, pandas offers many ways of computing an aggregation.\nIn-built Python operations – such as sum, max, and min – are automatically recognized by pandas.\n\n# What is the minimum count for each name in any year?\nbabynames.groupby(\"Name\")[[\"Count\"]].agg(\"min\").head()\n\n\n\n\n\n\n\n\nCount\n\n\nName\n\n\n\n\n\nAadan\n5\n\n\nAadarsh\n6\n\n\nAaden\n10\n\n\nAadhav\n6\n\n\nAadhini\n6\n\n\n\n\n\n\n\n\n# What is the largest single-year count of each name?\nbabynames.groupby(\"Name\")[[\"Count\"]].agg(\"max\").head()\n\n\n\n\n\n\n\n\nCount\n\n\nName\n\n\n\n\n\nAadan\n7\n\n\nAadarsh\n6\n\n\nAaden\n158\n\n\nAadhav\n8\n\n\nAadhini\n6\n\n\n\n\n\n\n\nAs mentioned previously, functions from the NumPy library, such as np.mean, np.max, np.min, and np.sum, are also fair game in pandas.\n\n# What is the average count for each name across all years?\nbabynames.groupby(\"Name\")[[\"Count\"]].agg(\"mean\").head()\n\n\n\n\n\n\n\n\nCount\n\n\nName\n\n\n\n\n\nAadan\n6.000000\n\n\nAadarsh\n6.000000\n\n\nAaden\n46.214286\n\n\nAadhav\n6.750000\n\n\nAadhini\n6.000000\n\n\n\n\n\n\n\npandas also offers a number of in-built functions. Functions that are native to pandas can be referenced using their string name within a call to .agg. Some examples include:\n\n.agg(\"sum\")\n.agg(\"max\")\n.agg(\"min\")\n.agg(\"mean\")\n.agg(\"first\")\n.agg(\"last\")\n\nThe latter two entries in this list – \"first\" and \"last\" – are unique to pandas. They return the first or last entry in a subframe column. Why might this be useful? Consider a case where multiple columns in a group share identical information. To represent this information in the grouped output, we can simply grab the first or last entry, which we know will be identical to all other entries.\nLet’s illustrate this with an example. Say we add a new column to babynames that contains the first letter of each name.\n\n# Imagine we had an additional column, \"First Letter\". We'll explain this code next week\nbabynames[\"First Letter\"] = babynames[\"Name\"].str[0]\n\n# We construct a simplified DataFrame containing just a subset of columns\nbabynames_new = babynames[[\"Name\", \"First Letter\", \"Year\"]]\nbabynames_new.head()\n\n\n\n\n\n\n\n\nName\nFirst Letter\nYear\n\n\n\n\n115957\nDeandrea\nD\n1990\n\n\n101976\nDeandrea\nD\n1986\n\n\n131029\nLeandrea\nL\n1994\n\n\n108731\nDeandrea\nD\n1988\n\n\n308131\nDeandrea\nD\n1985\n\n\n\n\n\n\n\nIf we form groups for each name in the dataset, \"First Letter\" will be the same for all members of the group. This means that if we simply select the first entry for \"First Letter\" in the group, we’ll represent all data in that group.\nWe can use a dictionary to apply different aggregation functions to each column during grouping.\n\n\n\nAggregating using “first”\n\n\n\nbabynames_new.groupby(\"Name\").agg({\"First Letter\":\"first\", \"Year\":\"max\"}).head()\n\n\n\n\n\n\n\n\nFirst Letter\nYear\n\n\nName\n\n\n\n\n\n\nAadan\nA\n2014\n\n\nAadarsh\nA\n2019\n\n\nAaden\nA\n2020\n\n\nAadhav\nA\n2019\n\n\nAadhini\nA\n2022\n\n\n\n\n\n\n\n\n\n4.2.2 Plotting Birth Counts\nLet’s use .agg to find the total number of babies born in each year. Recall that using .agg with .groupby() follows the format: df.groupby(column_name).agg(aggregation_function). The line of code below gives us the total number of babies born in each year.\n\n\nCode\nbabynames.groupby(\"Year\")[[\"Count\"]].agg(sum).head(5)\n# Alternative 1\n# babynames.groupby(\"Year\")[[\"Count\"]].sum()\n# Alternative 2\n# babynames.groupby(\"Year\").sum(numeric_only=True)\n\n\n/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_51669/390646742.py:1: FutureWarning:\n\nThe provided callable &lt;built-in function sum&gt; is currently using DataFrameGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"sum\" instead.\n\n\n\n\n\n\n\n\n\n\nCount\n\n\nYear\n\n\n\n\n\n1910\n9163\n\n\n1911\n9983\n\n\n1912\n17946\n\n\n1913\n22094\n\n\n1914\n26926\n\n\n\n\n\n\n\nHere’s an illustration of the process:\n\nPlotting the Dataframe we obtain tells an interesting story.\n\n\nCode\nimport plotly.express as px\npuzzle2 = babynames.groupby(\"Year\")[[\"Count\"]].agg(\"sum\")\npx.line(puzzle2, y = \"Count\")\n\n\n                                                \n\n\nA word of warning: we made an enormous assumption when we decided to use this dataset to estimate birth rate. According to this article from the Legistlative Analyst Office, the true number of babies born in California in 2020 was 421,275. However, our plot shows 362,882 babies —— what happened?\n\n\n4.2.3 Summary of the .groupby() Function\nA groupby operation involves some combination of splitting a DataFrame into grouped subframes, applying a function, and combining the results.\nFor some arbitrary DataFrame df below, the code df.groupby(\"year\").agg(sum) does the following:\n\nSplits the DataFrame into sub-DataFrames with rows belonging to the same year.\nApplies the sum function to each column of each sub-DataFrame.\nCombines the results of sum into a single DataFrame, indexed by year.\n\n\n\n\n4.2.4 Revisiting the .agg() Function\n.agg() can take in any function that aggregates several values into one summary value. Some commonly-used aggregation functions can even be called directly, without explicit use of .agg(). For example, we can call .mean() on .groupby():\nbabynames.groupby(\"Year\").mean().head()\nWe can now put this all into practice. Say we want to find the baby name with sex “F” that has fallen in popularity the most in California. To calculate this, we can first create a metric: “Ratio to Peak” (RTP). The RTP is the ratio of babies born with a given name in 2022 to the maximum number of babies born with the name in any year.\nLet’s start with calculating this for one baby, “Jennifer”.\n\n# We filter by babies with sex \"F\" and sort by \"Year\"\nf_babynames = babynames[babynames[\"Sex\"] == \"F\"]\nf_babynames = f_babynames.sort_values([\"Year\"])\n\n# Determine how many Jennifers were born in CA per year\njenn_counts_series = f_babynames[f_babynames[\"Name\"] == \"Jennifer\"][\"Count\"]\n\n# Determine the max number of Jennifers born in a year and the number born in 2022 \n# to calculate RTP\nmax_jenn = max(f_babynames[f_babynames[\"Name\"] == \"Jennifer\"][\"Count\"])\ncurr_jenn = f_babynames[f_babynames[\"Name\"] == \"Jennifer\"][\"Count\"].iloc[-1]\nrtp = curr_jenn / max_jenn\nrtp\n\nnp.float64(0.018796372629843364)\n\n\nBy creating a function to calculate RTP and applying it to our DataFrame by using .groupby(), we can easily compute the RTP for all names at once!\n\ndef ratio_to_peak(series):\n    return series.iloc[-1] / max(series)\n\n#Using .groupby() to apply the function\nrtp_table = f_babynames.groupby(\"Name\")[[\"Year\", \"Count\"]].agg(ratio_to_peak)\nrtp_table.head()\n\n\n\n\n\n\n\n\nYear\nCount\n\n\nName\n\n\n\n\n\n\nAadhini\n1.0\n1.000000\n\n\nAadhira\n1.0\n0.500000\n\n\nAadhya\n1.0\n0.660000\n\n\nAadya\n1.0\n0.586207\n\n\nAahana\n1.0\n0.269231\n\n\n\n\n\n\n\nIn the rows shown above, we can see that every row shown has a Year value of 1.0.\nThis is the “pandas-ification” of logic you saw in Data 8. Much of the logic you’ve learned in Data 8 will serve you well in Data 100.\n\n\n4.2.5 Nuisance Columns\nNote that you must be careful with which columns you apply the .agg() function to. If we were to apply our function to the table as a whole by doing f_babynames.groupby(\"Name\").agg(ratio_to_peak), executing our .agg() call would result in a TypeError.\n\nWe can avoid this issue (and prevent unintentional loss of data) by explicitly selecting column(s) we want to apply our aggregation function to BEFORE calling .agg(),\n\n\n4.2.6 Renaming Columns After Grouping\nBy default, .groupby will not rename any aggregated columns. As we can see in the table above, the aggregated column is still named Count even though it now represents the RTP. For better readability, we can rename Count to Count RTP\n\nrtp_table = rtp_table.rename(columns = {\"Count\": \"Count RTP\"})\nrtp_table\n\n\n\n\n\n\n\n\nYear\nCount RTP\n\n\nName\n\n\n\n\n\n\nAadhini\n1.0\n1.000000\n\n\nAadhira\n1.0\n0.500000\n\n\nAadhya\n1.0\n0.660000\n\n\nAadya\n1.0\n0.586207\n\n\nAahana\n1.0\n0.269231\n\n\n...\n...\n...\n\n\nZyanya\n1.0\n0.466667\n\n\nZyla\n1.0\n1.000000\n\n\nZylah\n1.0\n1.000000\n\n\nZyra\n1.0\n1.000000\n\n\nZyrah\n1.0\n0.833333\n\n\n\n\n13782 rows × 2 columns\n\n\n\n\n\n4.2.7 Some Data Science Payoff\nBy sorting rtp_table, we can see the names whose popularity has decreased the most.\n\nrtp_table = rtp_table.rename(columns = {\"Count\": \"Count RTP\"})\nrtp_table.sort_values(\"Count RTP\").head()\n\n\n\n\n\n\n\n\nYear\nCount RTP\n\n\nName\n\n\n\n\n\n\nDebra\n1.0\n0.001260\n\n\nDebbie\n1.0\n0.002815\n\n\nCarol\n1.0\n0.003180\n\n\nTammy\n1.0\n0.003249\n\n\nSusan\n1.0\n0.003305\n\n\n\n\n\n\n\nTo visualize the above DataFrame, let’s look at the line plot below:\n\n\nCode\nimport plotly.express as px\npx.line(f_babynames[f_babynames[\"Name\"] == \"Debra\"], x = \"Year\", y = \"Count\")\n\n\n                                                \n\n\nWe can get the list of the top 10 names and then plot popularity with the following code:\n\ntop10 = rtp_table.sort_values(\"Count RTP\").head(10).index\npx.line(\n    f_babynames[f_babynames[\"Name\"].isin(top10)], \n    x = \"Year\", \n    y = \"Count\", \n    color = \"Name\"\n)\n\n/Users/nikhilreddy/course-notes/ds100env/lib/python3.12/site-packages/plotly/express/_core.py:1980: FutureWarning:\n\nWhen grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.\n\n\n\n                                                \n\n\nAs a quick exercise, consider what code would compute the total number of babies with each name.\n\n\nCode\nbabynames.groupby(\"Name\")[[\"Count\"]].agg(\"sum\").head()\n# alternative solution: \n# babynames.groupby(\"Name\")[[\"Count\"]].sum()\n\n\n\n\n\n\n\n\n\nCount\n\n\nName\n\n\n\n\n\nAadan\n18\n\n\nAadarsh\n6\n\n\nAaden\n647\n\n\nAadhav\n27\n\n\nAadhini\n6",
     "crumbs": [
       "<span class='chapter-number'>4</span>  <span class='chapter-title'>Pandas III</span>"
     ]
@@ -194,7 +194,7 @@
     "href": "pandas_3/pandas_3.html#groupby-continued",
     "title": "4  Pandas III",
     "section": "4.3 .groupby(), Continued",
-    "text": "4.3 .groupby(), Continued\nWe’ll work with the elections DataFrame again.\n\n\nCode\nimport pandas as pd\nimport numpy as np\n\nelections = pd.read_csv(\"data/elections.csv\")\nelections.head(5)\n\n\n\n\n\n\n\n\n\nYear\nCandidate\nParty\nPopular vote\nResult\n%\n\n\n\n\n0\n1824\nAndrew Jackson\nDemocratic-Republican\n151271\nloss\n57.210122\n\n\n1\n1824\nJohn Quincy Adams\nDemocratic-Republican\n113142\nwin\n42.789878\n\n\n2\n1828\nAndrew Jackson\nDemocratic\n642806\nwin\n56.203927\n\n\n3\n1828\nJohn Quincy Adams\nNational Republican\n500897\nloss\n43.796073\n\n\n4\n1832\nAndrew Jackson\nDemocratic\n702735\nwin\n54.574789\n\n\n\n\n\n\n\n\n4.3.1 Raw GroupBy Objects\nThe result of groupby applied to a DataFrame is a DataFrameGroupBy object, not a DataFrame.\n\ngrouped_by_year = elections.groupby(\"Year\")\ntype(grouped_by_year)\n\npandas.core.groupby.generic.DataFrameGroupBy\n\n\nThere are several ways to look into DataFrameGroupBy objects:\n\ngrouped_by_party = elections.groupby(\"Party\")\ngrouped_by_party.groups\n\n{'American': [22, 126], 'American Independent': [115, 119, 124], 'Anti-Masonic': [6], 'Anti-Monopoly': [38], 'Citizens': [127], 'Communist': [89], 'Constitution': [160, 164, 172], 'Constitutional Union': [24], 'Democratic': [2, 4, 8, 10, 13, 14, 17, 20, 28, 29, 34, 37, 39, 45, 47, 52, 55, 57, 64, 70, 74, 77, 81, 83, 86, 91, 94, 97, 100, 105, 108, 111, 114, 116, 118, 123, 129, 134, 137, 140, 144, 151, 158, 162, 168, 176, 178], 'Democratic-Republican': [0, 1], 'Dixiecrat': [103], 'Farmer–Labor': [78], 'Free Soil': [15, 18], 'Green': [149, 155, 156, 165, 170, 177, 181], 'Greenback': [35], 'Independent': [121, 130, 143, 161, 167, 174], 'Liberal Republican': [31], 'Libertarian': [125, 128, 132, 138, 139, 146, 153, 159, 163, 169, 175, 180], 'National Democratic': [50], 'National Republican': [3, 5], 'National Union': [27], 'Natural Law': [148], 'New Alliance': [136], 'Northern Democratic': [26], 'Populist': [48, 61, 141], 'Progressive': [68, 82, 101, 107], 'Prohibition': [41, 44, 49, 51, 54, 59, 63, 67, 73, 75, 99], 'Reform': [150, 154], 'Republican': [21, 23, 30, 32, 33, 36, 40, 43, 46, 53, 56, 60, 65, 69, 72, 79, 80, 84, 87, 90, 96, 98, 104, 106, 109, 112, 113, 117, 120, 122, 131, 133, 135, 142, 145, 152, 157, 166, 171, 173, 179], 'Socialist': [58, 62, 66, 71, 76, 85, 88, 92, 95, 102], 'Southern Democratic': [25], 'States' Rights': [110], 'Taxpayers': [147], 'Union': [93], 'Union Labor': [42], 'Whig': [7, 9, 11, 12, 16, 19]}\n\n\n\ngrouped_by_party.get_group(\"Socialist\")\n\n\n\n\n\n\n\n\nYear\nCandidate\nParty\nPopular vote\nResult\n%\n\n\n\n\n58\n1904\nEugene V. Debs\nSocialist\n402810\nloss\n2.985897\n\n\n62\n1908\nEugene V. Debs\nSocialist\n420852\nloss\n2.850866\n\n\n66\n1912\nEugene V. Debs\nSocialist\n901551\nloss\n6.004354\n\n\n71\n1916\nAllan L. Benson\nSocialist\n590524\nloss\n3.194193\n\n\n76\n1920\nEugene V. Debs\nSocialist\n913693\nloss\n3.428282\n\n\n85\n1928\nNorman Thomas\nSocialist\n267478\nloss\n0.728623\n\n\n88\n1932\nNorman Thomas\nSocialist\n884885\nloss\n2.236211\n\n\n92\n1936\nNorman Thomas\nSocialist\n187910\nloss\n0.412876\n\n\n95\n1940\nNorman Thomas\nSocialist\n116599\nloss\n0.234237\n\n\n102\n1948\nNorman Thomas\nSocialist\n139569\nloss\n0.286312\n\n\n\n\n\n\n\n\n\n4.3.2 Other GroupBy Methods\nThere are many aggregation methods we can use with .agg. Some useful options are:\n\n.mean: creates a new DataFrame with the mean value of each group\n.sum: creates a new DataFrame with the sum of each group\n.max and .min: creates a new DataFrame with the maximum/minimum value of each group\n.first and .last: creates a new DataFrame with the first/last row in each group\n.size: creates a new Series with the number of entries in each group\n.count: creates a new DataFrame with the number of entries, excluding missing values.\n\nLet’s illustrate some examples by creating a DataFrame called df.\n\ndf = pd.DataFrame({'letter':['A','A','B','C','C','C'], \n                   'num':[1,2,3,4,np.nan,4], \n                   'state':[np.nan, 'tx', 'fl', 'hi', np.nan, 'ak']})\ndf\n\n\n\n\n\n\n\n\nletter\nnum\nstate\n\n\n\n\n0\nA\n1.0\nNaN\n\n\n1\nA\n2.0\ntx\n\n\n2\nB\n3.0\nfl\n\n\n3\nC\n4.0\nhi\n\n\n4\nC\nNaN\nNaN\n\n\n5\nC\n4.0\nak\n\n\n\n\n\n\n\nNote the slight difference between .size() and .count(): while .size() returns a Series and counts the number of entries including the missing values, .count() returns a DataFrame and counts the number of entries in each column excluding missing values.\n\ndf.groupby(\"letter\").size()\n\nletter\nA    2\nB    1\nC    3\ndtype: int64\n\n\n\ndf.groupby(\"letter\").count()\n\n\n\n\n\n\n\n\nnum\nstate\n\n\nletter\n\n\n\n\n\n\nA\n2\n1\n\n\nB\n1\n1\n\n\nC\n2\n2\n\n\n\n\n\n\n\nYou might recall that the value_counts() function in the previous note does something similar. It turns out value_counts() and groupby.size() are the same, except value_counts() sorts the resulting Series in descending order automatically.\n\ndf[\"letter\"].value_counts()\n\nletter\nC    3\nA    2\nB    1\nName: count, dtype: int64\n\n\nThese (and other) aggregation functions are so common that pandas allows for writing shorthand. Instead of explicitly stating the use of .agg, we can call the function directly on the GroupBy object.\nFor example, the following are equivalent:\n\nelections.groupby(\"Candidate\").agg(mean)\nelections.groupby(\"Candidate\").mean()\n\nThere are many other methods that pandas supports. You can check them out on the pandas documentation.\n\n\n4.3.3 Filtering by Group\nAnother common use for GroupBy objects is to filter data by group.\ngroupby.filter takes an argument func, where func is a function that:\n\nTakes a DataFrame object as input\nReturns a single True or False.\n\ngroupby.filter applies func to each group/sub-DataFrame:\n\nIf func returns True for a group, then all rows belonging to the group are preserved.\nIf func returns False for a group, then all rows belonging to that group are filtered out.\n\nIn other words, sub-DataFrames that correspond to True are returned in the final result, whereas those with a False value are not. Importantly, groupby.filter is different from groupby.agg in that an entire sub-DataFrame is returned in the final DataFrame, not just a single row. As a result, groupby.filter preserves the original indices and the column we grouped on does NOT become the index!\n\nTo illustrate how this happens, let’s go back to the elections dataset. Say we want to identify “tight” election years – that is, we want to find all rows that correspond to election years where all candidates in that year won a similar portion of the total vote. Specifically, let’s find all rows corresponding to a year where no candidate won more than 45% of the total vote.\nIn other words, we want to:\n\nFind the years where the maximum % in that year is less than 45%\nReturn all DataFrame rows that correspond to these years\n\nFor each year, we need to find the maximum % among all rows for that year. If this maximum % is lower than 45%, we will tell pandas to keep all rows corresponding to that year.\n\nelections.groupby(\"Year\").filter(lambda sf: sf[\"%\"].max() &lt; 45).head(9)\n\n\n\n\n\n\n\n\nYear\nCandidate\nParty\nPopular vote\nResult\n%\n\n\n\n\n23\n1860\nAbraham Lincoln\nRepublican\n1855993\nwin\n39.699408\n\n\n24\n1860\nJohn Bell\nConstitutional Union\n590901\nloss\n12.639283\n\n\n25\n1860\nJohn C. Breckinridge\nSouthern Democratic\n848019\nloss\n18.138998\n\n\n26\n1860\nStephen A. Douglas\nNorthern Democratic\n1380202\nloss\n29.522311\n\n\n66\n1912\nEugene V. Debs\nSocialist\n901551\nloss\n6.004354\n\n\n67\n1912\nEugene W. Chafin\nProhibition\n208156\nloss\n1.386325\n\n\n68\n1912\nTheodore Roosevelt\nProgressive\n4122721\nloss\n27.457433\n\n\n69\n1912\nWilliam Taft\nRepublican\n3486242\nloss\n23.218466\n\n\n70\n1912\nWoodrow Wilson\nDemocratic\n6296284\nwin\n41.933422\n\n\n\n\n\n\n\nWhat’s going on here? In this example, we’ve defined our filtering function, func, to be lambda sf: sf[\"%\"].max() &lt; 45. This filtering function will find the maximum \"%\" value among all entries in the grouped sub-DataFrame, which we call sf. If the maximum value is less than 45, then the filter function will return True and all rows in that grouped sub-DataFrame will appear in the final output DataFrame.\nExamine the DataFrame above. Notice how, in this preview of the first 9 rows, all entries from the years 1860 and 1912 appear. This means that in 1860 and 1912, no candidate in that year won more than 45% of the total vote.\nYou may ask: how is the groupby.filter procedure different to the boolean filtering we’ve seen previously? Boolean filtering considers individual rows when applying a boolean condition. For example, the code elections[elections[\"%\"] &lt; 45] will check the \"%\" value of every single row in elections; if it is less than 45, then that row will be kept in the output. groupby.filter, in contrast, applies a boolean condition across all rows in a group. If not all rows in that group satisfy the condition specified by the filter, the entire group will be discarded in the output.\n\n\n4.3.4 Aggregation with lambda Functions\nWhat if we wish to aggregate our DataFrame using a non-standard function – for example, a function of our own design? We can do so by combining .agg with lambda expressions.\nLet’s first consider a puzzle to jog our memory. We will attempt to find the Candidate from each Party with the highest % of votes.\nA naive approach may be to group by the Party column and aggregate by the maximum.\n\nelections.groupby(\"Party\").agg(max).head(10)\n\n/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_48366/4278286395.py:1: FutureWarning:\n\nThe provided callable &lt;built-in function max&gt; is currently using DataFrameGroupBy.max. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"max\" instead.\n\n\n\n\n\n\n\n\n\n\nYear\nCandidate\nPopular vote\nResult\n%\n\n\nParty\n\n\n\n\n\n\n\n\n\nAmerican\n1976\nThomas J. Anderson\n873053\nloss\n21.554001\n\n\nAmerican Independent\n1976\nLester Maddox\n9901118\nloss\n13.571218\n\n\nAnti-Masonic\n1832\nWilliam Wirt\n100715\nloss\n7.821583\n\n\nAnti-Monopoly\n1884\nBenjamin Butler\n134294\nloss\n1.335838\n\n\nCitizens\n1980\nBarry Commoner\n233052\nloss\n0.270182\n\n\nCommunist\n1932\nWilliam Z. Foster\n103307\nloss\n0.261069\n\n\nConstitution\n2016\nMichael Peroutka\n203091\nloss\n0.152398\n\n\nConstitutional Union\n1860\nJohn Bell\n590901\nloss\n12.639283\n\n\nDemocratic\n2020\nWoodrow Wilson\n81268924\nwin\n61.344703\n\n\nDemocratic-Republican\n1824\nJohn Quincy Adams\n151271\nwin\n57.210122\n\n\n\n\n\n\n\nThis approach is clearly wrong – the DataFrame claims that Woodrow Wilson won the presidency in 2020.\nWhy is this happening? Here, the max aggregation function is taken over every column independently. Among Democrats, max is computing:\n\nThe most recent Year a Democratic candidate ran for president (2020)\nThe Candidate with the alphabetically “largest” name (“Woodrow Wilson”)\nThe Result with the alphabetically “largest” outcome (“win”)\n\nInstead, let’s try a different approach. We will:\n\nSort the DataFrame so that rows are in descending order of %\nGroup by Party and select the first row of each sub-DataFrame\n\nWhile it may seem unintuitive, sorting elections by descending order of % is extremely helpful. If we then group by Party, the first row of each GroupBy object will contain information about the Candidate with the highest voter %.\n\nelections_sorted_by_percent = elections.sort_values(\"%\", ascending=False)\nelections_sorted_by_percent.head(5)\n\n\n\n\n\n\n\n\nYear\nCandidate\nParty\nPopular vote\nResult\n%\n\n\n\n\n114\n1964\nLyndon Johnson\nDemocratic\n43127041\nwin\n61.344703\n\n\n91\n1936\nFranklin Roosevelt\nDemocratic\n27752648\nwin\n60.978107\n\n\n120\n1972\nRichard Nixon\nRepublican\n47168710\nwin\n60.907806\n\n\n79\n1920\nWarren Harding\nRepublican\n16144093\nwin\n60.574501\n\n\n133\n1984\nRonald Reagan\nRepublican\n54455472\nwin\n59.023326\n\n\n\n\n\n\n\n\nelections_sorted_by_percent.groupby(\"Party\").agg(lambda x : x.iloc[0]).head(10)\n\n# Equivalent to the below code\n# elections_sorted_by_percent.groupby(\"Party\").agg('first').head(10)\n\n\n\n\n\n\n\n\nYear\nCandidate\nPopular vote\nResult\n%\n\n\nParty\n\n\n\n\n\n\n\n\n\nAmerican\n1856\nMillard Fillmore\n873053\nloss\n21.554001\n\n\nAmerican Independent\n1968\nGeorge Wallace\n9901118\nloss\n13.571218\n\n\nAnti-Masonic\n1832\nWilliam Wirt\n100715\nloss\n7.821583\n\n\nAnti-Monopoly\n1884\nBenjamin Butler\n134294\nloss\n1.335838\n\n\nCitizens\n1980\nBarry Commoner\n233052\nloss\n0.270182\n\n\nCommunist\n1932\nWilliam Z. Foster\n103307\nloss\n0.261069\n\n\nConstitution\n2008\nChuck Baldwin\n199750\nloss\n0.152398\n\n\nConstitutional Union\n1860\nJohn Bell\n590901\nloss\n12.639283\n\n\nDemocratic\n1964\nLyndon Johnson\n43127041\nwin\n61.344703\n\n\nDemocratic-Republican\n1824\nAndrew Jackson\n151271\nloss\n57.210122\n\n\n\n\n\n\n\nHere’s an illustration of the process:\n\nNotice how our code correctly determines that Lyndon Johnson from the Democratic Party has the highest voter %.\nMore generally, lambda functions are used to design custom aggregation functions that aren’t pre-defined by Python. The input parameter x to the lambda function is a GroupBy object. Therefore, it should make sense why lambda x : x.iloc[0] selects the first row in each groupby object.\nIn fact, there’s a few different ways to approach this problem. Each approach has different tradeoffs in terms of readability, performance, memory consumption, complexity, etc. We’ve given a few examples below.\nNote: Understanding these alternative solutions is not required. They are given to demonstrate the vast number of problem-solving approaches in pandas.\n\n# Using the idxmax function\nbest_per_party = elections.loc[elections.groupby('Party')['%'].idxmax()]\nbest_per_party.head(5)\n\n\n\n\n\n\n\n\nYear\nCandidate\nParty\nPopular vote\nResult\n%\n\n\n\n\n22\n1856\nMillard Fillmore\nAmerican\n873053\nloss\n21.554001\n\n\n115\n1968\nGeorge Wallace\nAmerican Independent\n9901118\nloss\n13.571218\n\n\n6\n1832\nWilliam Wirt\nAnti-Masonic\n100715\nloss\n7.821583\n\n\n38\n1884\nBenjamin Butler\nAnti-Monopoly\n134294\nloss\n1.335838\n\n\n127\n1980\nBarry Commoner\nCitizens\n233052\nloss\n0.270182\n\n\n\n\n\n\n\n\n# Using the .drop_duplicates function\nbest_per_party2 = elections.sort_values('%').drop_duplicates(['Party'], keep='last')\nbest_per_party2.head(5)\n\n\n\n\n\n\n\n\nYear\nCandidate\nParty\nPopular vote\nResult\n%\n\n\n\n\n148\n1996\nJohn Hagelin\nNatural Law\n113670\nloss\n0.118219\n\n\n164\n2008\nChuck Baldwin\nConstitution\n199750\nloss\n0.152398\n\n\n110\n1956\nT. Coleman Andrews\nStates' Rights\n107929\nloss\n0.174883\n\n\n147\n1996\nHoward Phillips\nTaxpayers\n184656\nloss\n0.192045\n\n\n136\n1988\nLenora Fulani\nNew Alliance\n217221\nloss\n0.237804",
+    "text": "4.3 .groupby(), Continued\nWe’ll work with the elections DataFrame again.\n\n\nCode\nimport pandas as pd\nimport numpy as np\n\nelections = pd.read_csv(\"data/elections.csv\")\nelections.head(5)\n\n\n\n\n\n\n\n\n\nYear\nCandidate\nParty\nPopular vote\nResult\n%\n\n\n\n\n0\n1824\nAndrew Jackson\nDemocratic-Republican\n151271\nloss\n57.210122\n\n\n1\n1824\nJohn Quincy Adams\nDemocratic-Republican\n113142\nwin\n42.789878\n\n\n2\n1828\nAndrew Jackson\nDemocratic\n642806\nwin\n56.203927\n\n\n3\n1828\nJohn Quincy Adams\nNational Republican\n500897\nloss\n43.796073\n\n\n4\n1832\nAndrew Jackson\nDemocratic\n702735\nwin\n54.574789\n\n\n\n\n\n\n\n\n4.3.1 Raw GroupBy Objects\nThe result of groupby applied to a DataFrame is a DataFrameGroupBy object, not a DataFrame.\n\ngrouped_by_year = elections.groupby(\"Year\")\ntype(grouped_by_year)\n\npandas.core.groupby.generic.DataFrameGroupBy\n\n\nThere are several ways to look into DataFrameGroupBy objects:\n\ngrouped_by_party = elections.groupby(\"Party\")\ngrouped_by_party.groups\n\n{'American': [22, 126], 'American Independent': [115, 119, 124], 'Anti-Masonic': [6], 'Anti-Monopoly': [38], 'Citizens': [127], 'Communist': [89], 'Constitution': [160, 164, 172], 'Constitutional Union': [24], 'Democratic': [2, 4, 8, 10, 13, 14, 17, 20, 28, 29, 34, 37, 39, 45, 47, 52, 55, 57, 64, 70, 74, 77, 81, 83, 86, 91, 94, 97, 100, 105, 108, 111, 114, 116, 118, 123, 129, 134, 137, 140, 144, 151, 158, 162, 168, 176, 178], 'Democratic-Republican': [0, 1], 'Dixiecrat': [103], 'Farmer–Labor': [78], 'Free Soil': [15, 18], 'Green': [149, 155, 156, 165, 170, 177, 181], 'Greenback': [35], 'Independent': [121, 130, 143, 161, 167, 174], 'Liberal Republican': [31], 'Libertarian': [125, 128, 132, 138, 139, 146, 153, 159, 163, 169, 175, 180], 'National Democratic': [50], 'National Republican': [3, 5], 'National Union': [27], 'Natural Law': [148], 'New Alliance': [136], 'Northern Democratic': [26], 'Populist': [48, 61, 141], 'Progressive': [68, 82, 101, 107], 'Prohibition': [41, 44, 49, 51, 54, 59, 63, 67, 73, 75, 99], 'Reform': [150, 154], 'Republican': [21, 23, 30, 32, 33, 36, 40, 43, 46, 53, 56, 60, 65, 69, 72, 79, 80, 84, 87, 90, 96, 98, 104, 106, 109, 112, 113, 117, 120, 122, 131, 133, 135, 142, 145, 152, 157, 166, 171, 173, 179], 'Socialist': [58, 62, 66, 71, 76, 85, 88, 92, 95, 102], 'Southern Democratic': [25], 'States' Rights': [110], 'Taxpayers': [147], 'Union': [93], 'Union Labor': [42], 'Whig': [7, 9, 11, 12, 16, 19]}\n\n\n\ngrouped_by_party.get_group(\"Socialist\")\n\n\n\n\n\n\n\n\nYear\nCandidate\nParty\nPopular vote\nResult\n%\n\n\n\n\n58\n1904\nEugene V. Debs\nSocialist\n402810\nloss\n2.985897\n\n\n62\n1908\nEugene V. Debs\nSocialist\n420852\nloss\n2.850866\n\n\n66\n1912\nEugene V. Debs\nSocialist\n901551\nloss\n6.004354\n\n\n71\n1916\nAllan L. Benson\nSocialist\n590524\nloss\n3.194193\n\n\n76\n1920\nEugene V. Debs\nSocialist\n913693\nloss\n3.428282\n\n\n85\n1928\nNorman Thomas\nSocialist\n267478\nloss\n0.728623\n\n\n88\n1932\nNorman Thomas\nSocialist\n884885\nloss\n2.236211\n\n\n92\n1936\nNorman Thomas\nSocialist\n187910\nloss\n0.412876\n\n\n95\n1940\nNorman Thomas\nSocialist\n116599\nloss\n0.234237\n\n\n102\n1948\nNorman Thomas\nSocialist\n139569\nloss\n0.286312\n\n\n\n\n\n\n\n\n\n4.3.2 Other GroupBy Methods\nThere are many aggregation methods we can use with .agg. Some useful options are:\n\n.mean: creates a new DataFrame with the mean value of each group\n.sum: creates a new DataFrame with the sum of each group\n.max and .min: creates a new DataFrame with the maximum/minimum value of each group\n.first and .last: creates a new DataFrame with the first/last row in each group\n.size: creates a new Series with the number of entries in each group\n.count: creates a new DataFrame with the number of entries, excluding missing values.\n\nLet’s illustrate some examples by creating a DataFrame called df.\n\ndf = pd.DataFrame({'letter':['A','A','B','C','C','C'], \n                   'num':[1,2,3,4,np.nan,4], \n                   'state':[np.nan, 'tx', 'fl', 'hi', np.nan, 'ak']})\ndf\n\n\n\n\n\n\n\n\nletter\nnum\nstate\n\n\n\n\n0\nA\n1.0\nNaN\n\n\n1\nA\n2.0\ntx\n\n\n2\nB\n3.0\nfl\n\n\n3\nC\n4.0\nhi\n\n\n4\nC\nNaN\nNaN\n\n\n5\nC\n4.0\nak\n\n\n\n\n\n\n\nNote the slight difference between .size() and .count(): while .size() returns a Series and counts the number of entries including the missing values, .count() returns a DataFrame and counts the number of entries in each column excluding missing values.\n\ndf.groupby(\"letter\").size()\n\nletter\nA    2\nB    1\nC    3\ndtype: int64\n\n\n\ndf.groupby(\"letter\").count()\n\n\n\n\n\n\n\n\nnum\nstate\n\n\nletter\n\n\n\n\n\n\nA\n2\n1\n\n\nB\n1\n1\n\n\nC\n2\n2\n\n\n\n\n\n\n\nYou might recall that the value_counts() function in the previous note does something similar. It turns out value_counts() and groupby.size() are the same, except value_counts() sorts the resulting Series in descending order automatically.\n\ndf[\"letter\"].value_counts()\n\nletter\nC    3\nA    2\nB    1\nName: count, dtype: int64\n\n\nThese (and other) aggregation functions are so common that pandas allows for writing shorthand. Instead of explicitly stating the use of .agg, we can call the function directly on the GroupBy object.\nFor example, the following are equivalent:\n\nelections.groupby(\"Candidate\").agg(mean)\nelections.groupby(\"Candidate\").mean()\n\nThere are many other methods that pandas supports. You can check them out on the pandas documentation.\n\n\n4.3.3 Filtering by Group\nAnother common use for GroupBy objects is to filter data by group.\ngroupby.filter takes an argument func, where func is a function that:\n\nTakes a DataFrame object as input\nReturns a single True or False.\n\ngroupby.filter applies func to each group/sub-DataFrame:\n\nIf func returns True for a group, then all rows belonging to the group are preserved.\nIf func returns False for a group, then all rows belonging to that group are filtered out.\n\nIn other words, sub-DataFrames that correspond to True are returned in the final result, whereas those with a False value are not. Importantly, groupby.filter is different from groupby.agg in that an entire sub-DataFrame is returned in the final DataFrame, not just a single row. As a result, groupby.filter preserves the original indices and the column we grouped on does NOT become the index!\n\nTo illustrate how this happens, let’s go back to the elections dataset. Say we want to identify “tight” election years – that is, we want to find all rows that correspond to election years where all candidates in that year won a similar portion of the total vote. Specifically, let’s find all rows corresponding to a year where no candidate won more than 45% of the total vote.\nIn other words, we want to:\n\nFind the years where the maximum % in that year is less than 45%\nReturn all DataFrame rows that correspond to these years\n\nFor each year, we need to find the maximum % among all rows for that year. If this maximum % is lower than 45%, we will tell pandas to keep all rows corresponding to that year.\n\nelections.groupby(\"Year\").filter(lambda sf: sf[\"%\"].max() &lt; 45).head(9)\n\n\n\n\n\n\n\n\nYear\nCandidate\nParty\nPopular vote\nResult\n%\n\n\n\n\n23\n1860\nAbraham Lincoln\nRepublican\n1855993\nwin\n39.699408\n\n\n24\n1860\nJohn Bell\nConstitutional Union\n590901\nloss\n12.639283\n\n\n25\n1860\nJohn C. Breckinridge\nSouthern Democratic\n848019\nloss\n18.138998\n\n\n26\n1860\nStephen A. Douglas\nNorthern Democratic\n1380202\nloss\n29.522311\n\n\n66\n1912\nEugene V. Debs\nSocialist\n901551\nloss\n6.004354\n\n\n67\n1912\nEugene W. Chafin\nProhibition\n208156\nloss\n1.386325\n\n\n68\n1912\nTheodore Roosevelt\nProgressive\n4122721\nloss\n27.457433\n\n\n69\n1912\nWilliam Taft\nRepublican\n3486242\nloss\n23.218466\n\n\n70\n1912\nWoodrow Wilson\nDemocratic\n6296284\nwin\n41.933422\n\n\n\n\n\n\n\nWhat’s going on here? In this example, we’ve defined our filtering function, func, to be lambda sf: sf[\"%\"].max() &lt; 45. This filtering function will find the maximum \"%\" value among all entries in the grouped sub-DataFrame, which we call sf. If the maximum value is less than 45, then the filter function will return True and all rows in that grouped sub-DataFrame will appear in the final output DataFrame.\nExamine the DataFrame above. Notice how, in this preview of the first 9 rows, all entries from the years 1860 and 1912 appear. This means that in 1860 and 1912, no candidate in that year won more than 45% of the total vote.\nYou may ask: how is the groupby.filter procedure different to the boolean filtering we’ve seen previously? Boolean filtering considers individual rows when applying a boolean condition. For example, the code elections[elections[\"%\"] &lt; 45] will check the \"%\" value of every single row in elections; if it is less than 45, then that row will be kept in the output. groupby.filter, in contrast, applies a boolean condition across all rows in a group. If not all rows in that group satisfy the condition specified by the filter, the entire group will be discarded in the output.\n\n\n4.3.4 Aggregation with lambda Functions\nWhat if we wish to aggregate our DataFrame using a non-standard function – for example, a function of our own design? We can do so by combining .agg with lambda expressions.\nLet’s first consider a puzzle to jog our memory. We will attempt to find the Candidate from each Party with the highest % of votes.\nA naive approach may be to group by the Party column and aggregate by the maximum.\n\nelections.groupby(\"Party\").agg(max).head(10)\n\n/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_51669/4278286395.py:1: FutureWarning:\n\nThe provided callable &lt;built-in function max&gt; is currently using DataFrameGroupBy.max. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"max\" instead.\n\n\n\n\n\n\n\n\n\n\nYear\nCandidate\nPopular vote\nResult\n%\n\n\nParty\n\n\n\n\n\n\n\n\n\nAmerican\n1976\nThomas J. Anderson\n873053\nloss\n21.554001\n\n\nAmerican Independent\n1976\nLester Maddox\n9901118\nloss\n13.571218\n\n\nAnti-Masonic\n1832\nWilliam Wirt\n100715\nloss\n7.821583\n\n\nAnti-Monopoly\n1884\nBenjamin Butler\n134294\nloss\n1.335838\n\n\nCitizens\n1980\nBarry Commoner\n233052\nloss\n0.270182\n\n\nCommunist\n1932\nWilliam Z. Foster\n103307\nloss\n0.261069\n\n\nConstitution\n2016\nMichael Peroutka\n203091\nloss\n0.152398\n\n\nConstitutional Union\n1860\nJohn Bell\n590901\nloss\n12.639283\n\n\nDemocratic\n2020\nWoodrow Wilson\n81268924\nwin\n61.344703\n\n\nDemocratic-Republican\n1824\nJohn Quincy Adams\n151271\nwin\n57.210122\n\n\n\n\n\n\n\nThis approach is clearly wrong – the DataFrame claims that Woodrow Wilson won the presidency in 2020.\nWhy is this happening? Here, the max aggregation function is taken over every column independently. Among Democrats, max is computing:\n\nThe most recent Year a Democratic candidate ran for president (2020)\nThe Candidate with the alphabetically “largest” name (“Woodrow Wilson”)\nThe Result with the alphabetically “largest” outcome (“win”)\n\nInstead, let’s try a different approach. We will:\n\nSort the DataFrame so that rows are in descending order of %\nGroup by Party and select the first row of each sub-DataFrame\n\nWhile it may seem unintuitive, sorting elections by descending order of % is extremely helpful. If we then group by Party, the first row of each GroupBy object will contain information about the Candidate with the highest voter %.\n\nelections_sorted_by_percent = elections.sort_values(\"%\", ascending=False)\nelections_sorted_by_percent.head(5)\n\n\n\n\n\n\n\n\nYear\nCandidate\nParty\nPopular vote\nResult\n%\n\n\n\n\n114\n1964\nLyndon Johnson\nDemocratic\n43127041\nwin\n61.344703\n\n\n91\n1936\nFranklin Roosevelt\nDemocratic\n27752648\nwin\n60.978107\n\n\n120\n1972\nRichard Nixon\nRepublican\n47168710\nwin\n60.907806\n\n\n79\n1920\nWarren Harding\nRepublican\n16144093\nwin\n60.574501\n\n\n133\n1984\nRonald Reagan\nRepublican\n54455472\nwin\n59.023326\n\n\n\n\n\n\n\n\nelections_sorted_by_percent.groupby(\"Party\").agg(lambda x : x.iloc[0]).head(10)\n\n# Equivalent to the below code\n# elections_sorted_by_percent.groupby(\"Party\").agg('first').head(10)\n\n\n\n\n\n\n\n\nYear\nCandidate\nPopular vote\nResult\n%\n\n\nParty\n\n\n\n\n\n\n\n\n\nAmerican\n1856\nMillard Fillmore\n873053\nloss\n21.554001\n\n\nAmerican Independent\n1968\nGeorge Wallace\n9901118\nloss\n13.571218\n\n\nAnti-Masonic\n1832\nWilliam Wirt\n100715\nloss\n7.821583\n\n\nAnti-Monopoly\n1884\nBenjamin Butler\n134294\nloss\n1.335838\n\n\nCitizens\n1980\nBarry Commoner\n233052\nloss\n0.270182\n\n\nCommunist\n1932\nWilliam Z. Foster\n103307\nloss\n0.261069\n\n\nConstitution\n2008\nChuck Baldwin\n199750\nloss\n0.152398\n\n\nConstitutional Union\n1860\nJohn Bell\n590901\nloss\n12.639283\n\n\nDemocratic\n1964\nLyndon Johnson\n43127041\nwin\n61.344703\n\n\nDemocratic-Republican\n1824\nAndrew Jackson\n151271\nloss\n57.210122\n\n\n\n\n\n\n\nHere’s an illustration of the process:\n\nNotice how our code correctly determines that Lyndon Johnson from the Democratic Party has the highest voter %.\nMore generally, lambda functions are used to design custom aggregation functions that aren’t pre-defined by Python. The input parameter x to the lambda function is a GroupBy object. Therefore, it should make sense why lambda x : x.iloc[0] selects the first row in each groupby object.\nIn fact, there’s a few different ways to approach this problem. Each approach has different tradeoffs in terms of readability, performance, memory consumption, complexity, etc. We’ve given a few examples below.\nNote: Understanding these alternative solutions is not required. They are given to demonstrate the vast number of problem-solving approaches in pandas.\n\n# Using the idxmax function\nbest_per_party = elections.loc[elections.groupby('Party')['%'].idxmax()]\nbest_per_party.head(5)\n\n\n\n\n\n\n\n\nYear\nCandidate\nParty\nPopular vote\nResult\n%\n\n\n\n\n22\n1856\nMillard Fillmore\nAmerican\n873053\nloss\n21.554001\n\n\n115\n1968\nGeorge Wallace\nAmerican Independent\n9901118\nloss\n13.571218\n\n\n6\n1832\nWilliam Wirt\nAnti-Masonic\n100715\nloss\n7.821583\n\n\n38\n1884\nBenjamin Butler\nAnti-Monopoly\n134294\nloss\n1.335838\n\n\n127\n1980\nBarry Commoner\nCitizens\n233052\nloss\n0.270182\n\n\n\n\n\n\n\n\n# Using the .drop_duplicates function\nbest_per_party2 = elections.sort_values('%').drop_duplicates(['Party'], keep='last')\nbest_per_party2.head(5)\n\n\n\n\n\n\n\n\nYear\nCandidate\nParty\nPopular vote\nResult\n%\n\n\n\n\n148\n1996\nJohn Hagelin\nNatural Law\n113670\nloss\n0.118219\n\n\n164\n2008\nChuck Baldwin\nConstitution\n199750\nloss\n0.152398\n\n\n110\n1956\nT. Coleman Andrews\nStates' Rights\n107929\nloss\n0.174883\n\n\n147\n1996\nHoward Phillips\nTaxpayers\n184656\nloss\n0.192045\n\n\n136\n1988\nLenora Fulani\nNew Alliance\n217221\nloss\n0.237804",
     "crumbs": [
       "<span class='chapter-number'>4</span>  <span class='chapter-title'>Pandas III</span>"
     ]
@@ -204,7 +204,7 @@
     "href": "pandas_3/pandas_3.html#aggregating-data-with-pivot-tables",
     "title": "4  Pandas III",
     "section": "4.4 Aggregating Data with Pivot Tables",
-    "text": "4.4 Aggregating Data with Pivot Tables\nWe know now that .groupby gives us the ability to group and aggregate data across our DataFrame. The examples above formed groups using just one column in the DataFrame. It’s possible to group by multiple columns at once by passing in a list of column names to .groupby.\nLet’s consider the babynames dataset again. In this problem, we will find the total number of baby names associated with each sex for each year. To do this, we’ll group by both the \"Year\" and \"Sex\" columns.\n\nbabynames.head()\n\n\n\n\n\n\n\n\nState\nSex\nYear\nName\nCount\nFirst Letter\n\n\n\n\n115957\nCA\nF\n1990\nDeandrea\n5\nD\n\n\n101976\nCA\nF\n1986\nDeandrea\n6\nD\n\n\n131029\nCA\nF\n1994\nLeandrea\n5\nL\n\n\n108731\nCA\nF\n1988\nDeandrea\n5\nD\n\n\n308131\nCA\nM\n1985\nDeandrea\n6\nD\n\n\n\n\n\n\n\n\n# Find the total number of baby names associated with each sex for each \n# year in the data\nbabynames.groupby([\"Year\", \"Sex\"])[[\"Count\"]].agg(sum).head(6)\n\n/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_48366/3186035650.py:3: FutureWarning:\n\nThe provided callable &lt;built-in function sum&gt; is currently using DataFrameGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"sum\" instead.\n\n\n\n\n\n\n\n\n\n\n\nCount\n\n\nYear\nSex\n\n\n\n\n\n1910\nF\n5950\n\n\nM\n3213\n\n\n1911\nF\n6602\n\n\nM\n3381\n\n\n1912\nF\n9804\n\n\nM\n8142\n\n\n\n\n\n\n\nNotice that both \"Year\" and \"Sex\" serve as the index of the DataFrame (they are both rendered in bold). We’ve created a multi-index DataFrame where two different index values, the year and sex, are used to uniquely identify each row.\nThis isn’t the most intuitive way of representing this data – and, because multi-indexed DataFrames have multiple dimensions in their index, they can often be difficult to use.\nAnother strategy to aggregate across two columns is to create a pivot table. You saw these back in Data 8. One set of values is used to create the index of the pivot table; another set is used to define the column names. The values contained in each cell of the table correspond to the aggregated data for each index-column pair.\nHere’s an illustration of the process:\n\nThe best way to understand pivot tables is to see one in action. Let’s return to our original goal of summing the total number of names associated with each combination of year and sex. We’ll call the pandas .pivot_table method to create a new table.\n\n# The `pivot_table` method is used to generate a Pandas pivot table\nimport numpy as np\nbabynames.pivot_table(\n    index = \"Year\",\n    columns = \"Sex\",    \n    values = \"Count\", \n    aggfunc = \"sum\", \n).head(5)\n\n\n\n\n\n\n\nSex\nF\nM\n\n\nYear\n\n\n\n\n\n\n1910\n5950\n3213\n\n\n1911\n6602\n3381\n\n\n1912\n9804\n8142\n\n\n1913\n11860\n10234\n\n\n1914\n13815\n13111\n\n\n\n\n\n\n\nLooks a lot better! Now, our DataFrame is structured with clear index-column combinations. Each entry in the pivot table represents the summed count of names for a given combination of \"Year\" and \"Sex\".\nLet’s take a closer look at the code implemented above.\n\nindex = \"Year\" specifies the column name in the original DataFrame that should be used as the index of the pivot table\ncolumns = \"Sex\" specifies the column name in the original DataFrame that should be used to generate the columns of the pivot table\nvalues = \"Count\" indicates what values from the original DataFrame should be used to populate the entry for each index-column combination\naggfunc = np.sum tells pandas what function to use when aggregating the data specified by values. Here, we are summing the name counts for each pair of \"Year\" and \"Sex\"\n\nWe can even include multiple values in the index or columns of our pivot tables.\n\nbabynames_pivot = babynames.pivot_table(\n    index=\"Year\",     # the rows (turned into index)\n    columns=\"Sex\",    # the column values\n    values=[\"Count\", \"Name\"], \n    aggfunc=\"max\",      # group operation\n)\nbabynames_pivot.head(6)\n\n\n\n\n\n\n\n\nCount\nName\n\n\nSex\nF\nM\nF\nM\n\n\nYear\n\n\n\n\n\n\n\n\n1910\n295\n237\nYvonne\nWilliam\n\n\n1911\n390\n214\nZelma\nWillis\n\n\n1912\n534\n501\nYvonne\nWoodrow\n\n\n1913\n584\n614\nZelma\nYoshio\n\n\n1914\n773\n769\nZelma\nYoshio\n\n\n1915\n998\n1033\nZita\nYukio\n\n\n\n\n\n\n\nNote that each row provides the number of girls and number of boys having that year’s most common name, and also lists the alphabetically largest girl name and boy name. The counts for number of girls/boys in the resulting DataFrame do not correspond to the names listed. For example, in 1910, the most popular girl name is given to 295 girls, but that name was likely not Yvonne.",
+    "text": "4.4 Aggregating Data with Pivot Tables\nWe know now that .groupby gives us the ability to group and aggregate data across our DataFrame. The examples above formed groups using just one column in the DataFrame. It’s possible to group by multiple columns at once by passing in a list of column names to .groupby.\nLet’s consider the babynames dataset again. In this problem, we will find the total number of baby names associated with each sex for each year. To do this, we’ll group by both the \"Year\" and \"Sex\" columns.\n\nbabynames.head()\n\n\n\n\n\n\n\n\nState\nSex\nYear\nName\nCount\nFirst Letter\n\n\n\n\n115957\nCA\nF\n1990\nDeandrea\n5\nD\n\n\n101976\nCA\nF\n1986\nDeandrea\n6\nD\n\n\n131029\nCA\nF\n1994\nLeandrea\n5\nL\n\n\n108731\nCA\nF\n1988\nDeandrea\n5\nD\n\n\n308131\nCA\nM\n1985\nDeandrea\n6\nD\n\n\n\n\n\n\n\n\n# Find the total number of baby names associated with each sex for each \n# year in the data\nbabynames.groupby([\"Year\", \"Sex\"])[[\"Count\"]].agg(sum).head(6)\n\n/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_51669/3186035650.py:3: FutureWarning:\n\nThe provided callable &lt;built-in function sum&gt; is currently using DataFrameGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"sum\" instead.\n\n\n\n\n\n\n\n\n\n\n\nCount\n\n\nYear\nSex\n\n\n\n\n\n1910\nF\n5950\n\n\nM\n3213\n\n\n1911\nF\n6602\n\n\nM\n3381\n\n\n1912\nF\n9804\n\n\nM\n8142\n\n\n\n\n\n\n\nNotice that both \"Year\" and \"Sex\" serve as the index of the DataFrame (they are both rendered in bold). We’ve created a multi-index DataFrame where two different index values, the year and sex, are used to uniquely identify each row.\nThis isn’t the most intuitive way of representing this data – and, because multi-indexed DataFrames have multiple dimensions in their index, they can often be difficult to use.\nAnother strategy to aggregate across two columns is to create a pivot table. You saw these back in Data 8. One set of values is used to create the index of the pivot table; another set is used to define the column names. The values contained in each cell of the table correspond to the aggregated data for each index-column pair.\nHere’s an illustration of the process:\n\nThe best way to understand pivot tables is to see one in action. Let’s return to our original goal of summing the total number of names associated with each combination of year and sex. We’ll call the pandas .pivot_table method to create a new table.\n\n# The `pivot_table` method is used to generate a Pandas pivot table\nimport numpy as np\nbabynames.pivot_table(\n    index = \"Year\",\n    columns = \"Sex\",    \n    values = \"Count\", \n    aggfunc = \"sum\", \n).head(5)\n\n\n\n\n\n\n\nSex\nF\nM\n\n\nYear\n\n\n\n\n\n\n1910\n5950\n3213\n\n\n1911\n6602\n3381\n\n\n1912\n9804\n8142\n\n\n1913\n11860\n10234\n\n\n1914\n13815\n13111\n\n\n\n\n\n\n\nLooks a lot better! Now, our DataFrame is structured with clear index-column combinations. Each entry in the pivot table represents the summed count of names for a given combination of \"Year\" and \"Sex\".\nLet’s take a closer look at the code implemented above.\n\nindex = \"Year\" specifies the column name in the original DataFrame that should be used as the index of the pivot table\ncolumns = \"Sex\" specifies the column name in the original DataFrame that should be used to generate the columns of the pivot table\nvalues = \"Count\" indicates what values from the original DataFrame should be used to populate the entry for each index-column combination\naggfunc = np.sum tells pandas what function to use when aggregating the data specified by values. Here, we are summing the name counts for each pair of \"Year\" and \"Sex\"\n\nWe can even include multiple values in the index or columns of our pivot tables.\n\nbabynames_pivot = babynames.pivot_table(\n    index=\"Year\",     # the rows (turned into index)\n    columns=\"Sex\",    # the column values\n    values=[\"Count\", \"Name\"], \n    aggfunc=\"max\",      # group operation\n)\nbabynames_pivot.head(6)\n\n\n\n\n\n\n\n\nCount\nName\n\n\nSex\nF\nM\nF\nM\n\n\nYear\n\n\n\n\n\n\n\n\n1910\n295\n237\nYvonne\nWilliam\n\n\n1911\n390\n214\nZelma\nWillis\n\n\n1912\n534\n501\nYvonne\nWoodrow\n\n\n1913\n584\n614\nZelma\nYoshio\n\n\n1914\n773\n769\nZelma\nYoshio\n\n\n1915\n998\n1033\nZita\nYukio\n\n\n\n\n\n\n\nNote that each row provides the number of girls and number of boys having that year’s most common name, and also lists the alphabetically largest girl name and boy name. The counts for number of girls/boys in the resulting DataFrame do not correspond to the names listed. For example, in 1910, the most popular girl name is given to 295 girls, but that name was likely not Yvonne.",
     "crumbs": [
       "<span class='chapter-number'>4</span>  <span class='chapter-title'>Pandas III</span>"
     ]
@@ -254,7 +254,7 @@
     "href": "eda/eda.html#granularity-scope-and-temporality",
     "title": "5  Data Cleaning and EDA",
     "section": "5.2 Granularity, Scope, and Temporality",
-    "text": "5.2 Granularity, Scope, and Temporality\nAfter understanding the structure of the dataset, the next task is to determine what exactly the data represents. We’ll do so by considering the data’s granularity, scope, and temporality.\n\n5.2.1 Granularity\nThe granularity of a dataset is what a single row represents. You can also think of it as the level of detail included in the data. To determine the data’s granularity, ask: what does each row in the dataset represent? Fine-grained data contains a high level of detail, with a single row representing a small individual unit. For example, each record may represent one person. Coarse-grained data is encoded such that a single row represents a large individual unit – for example, each record may represent a group of people.\n\n\n5.2.2 Scope\nThe scope of a dataset is the subset of the population covered by the data. If we were investigating student performance in Data Science courses, a dataset with a narrow scope might encompass all students enrolled in Data 100 whereas a dataset with an expansive scope might encompass all students in California.\n\n\n5.2.3 Temporality\nThe temporality of a dataset describes the periodicity over which the data was collected as well as when the data was most recently collected or updated.\nTime and date fields of a dataset could represent a few things:\n\nwhen the “event” happened\nwhen the data was collected, or when it was entered into the system\nwhen the data was copied into the database\n\nTo fully understand the temporality of the data, it also may be necessary to standardize time zones or inspect recurring time-based trends in the data (do patterns recur in 24-hour periods? Over the course of a month? Seasonally?). The convention for standardizing time is the Coordinated Universal Time (UTC), an international time standard measured at 0 degrees latitude that stays consistent throughout the year (no daylight savings). We can represent Berkeley’s time zone, Pacific Standard Time (PST), as UTC-7 (with daylight savings).\n\n5.2.3.1 Temporality with pandas’ dt accessors\nLet’s briefly look at how we can use pandas’ dt accessors to work with dates/times in a dataset using the dataset you’ll see in Lab 3: the Berkeley PD Calls for Service dataset.\n\n\nCode\ncalls = pd.read_csv(\"data/Berkeley_PD_-_Calls_for_Service.csv\")\ncalls.head()\n\n\n\n\n\n\n\n\n\nCASENO\nOFFENSE\nEVENTDT\nEVENTTM\nCVLEGEND\nCVDOW\nInDbDate\nBlock_Location\nBLKADDR\nCity\nState\n\n\n\n\n0\n21014296\nTHEFT MISD. (UNDER $950)\n04/01/2021 12:00:00 AM\n10:58\nLARCENY\n4\n06/15/2021 12:00:00 AM\nBerkeley, CA\\n(37.869058, -122.270455)\nNaN\nBerkeley\nCA\n\n\n1\n21014391\nTHEFT MISD. (UNDER $950)\n04/01/2021 12:00:00 AM\n10:38\nLARCENY\n4\n06/15/2021 12:00:00 AM\nBerkeley, CA\\n(37.869058, -122.270455)\nNaN\nBerkeley\nCA\n\n\n2\n21090494\nTHEFT MISD. (UNDER $950)\n04/19/2021 12:00:00 AM\n12:15\nLARCENY\n1\n06/15/2021 12:00:00 AM\n2100 BLOCK HASTE ST\\nBerkeley, CA\\n(37.864908,...\n2100 BLOCK HASTE ST\nBerkeley\nCA\n\n\n3\n21090204\nTHEFT FELONY (OVER $950)\n02/13/2021 12:00:00 AM\n17:00\nLARCENY\n6\n06/15/2021 12:00:00 AM\n2600 BLOCK WARRING ST\\nBerkeley, CA\\n(37.86393...\n2600 BLOCK WARRING ST\nBerkeley\nCA\n\n\n4\n21090179\nBURGLARY AUTO\n02/08/2021 12:00:00 AM\n6:20\nBURGLARY - VEHICLE\n1\n06/15/2021 12:00:00 AM\n2700 BLOCK GARBER ST\\nBerkeley, CA\\n(37.86066,...\n2700 BLOCK GARBER ST\nBerkeley\nCA\n\n\n\n\n\n\n\nLooks like there are three columns with dates/times: EVENTDT, EVENTTM, and InDbDate.\nMost likely, EVENTDT stands for the date when the event took place, EVENTTM stands for the time of day the event took place (in 24-hr format), and InDbDate is the date this call is recorded onto the database.\nIf we check the data type of these columns, we will see they are stored as strings. We can convert them to datetime objects using pandas to_datetime function.\n\ncalls[\"EVENTDT\"] = pd.to_datetime(calls[\"EVENTDT\"])\ncalls.head()\n\n/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_48380/874729699.py:1: UserWarning:\n\nCould not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n\n\n\n\n\n\n\n\n\n\nCASENO\nOFFENSE\nEVENTDT\nEVENTTM\nCVLEGEND\nCVDOW\nInDbDate\nBlock_Location\nBLKADDR\nCity\nState\n\n\n\n\n0\n21014296\nTHEFT MISD. (UNDER $950)\n2021-04-01\n10:58\nLARCENY\n4\n06/15/2021 12:00:00 AM\nBerkeley, CA\\n(37.869058, -122.270455)\nNaN\nBerkeley\nCA\n\n\n1\n21014391\nTHEFT MISD. (UNDER $950)\n2021-04-01\n10:38\nLARCENY\n4\n06/15/2021 12:00:00 AM\nBerkeley, CA\\n(37.869058, -122.270455)\nNaN\nBerkeley\nCA\n\n\n2\n21090494\nTHEFT MISD. (UNDER $950)\n2021-04-19\n12:15\nLARCENY\n1\n06/15/2021 12:00:00 AM\n2100 BLOCK HASTE ST\\nBerkeley, CA\\n(37.864908,...\n2100 BLOCK HASTE ST\nBerkeley\nCA\n\n\n3\n21090204\nTHEFT FELONY (OVER $950)\n2021-02-13\n17:00\nLARCENY\n6\n06/15/2021 12:00:00 AM\n2600 BLOCK WARRING ST\\nBerkeley, CA\\n(37.86393...\n2600 BLOCK WARRING ST\nBerkeley\nCA\n\n\n4\n21090179\nBURGLARY AUTO\n2021-02-08\n6:20\nBURGLARY - VEHICLE\n1\n06/15/2021 12:00:00 AM\n2700 BLOCK GARBER ST\\nBerkeley, CA\\n(37.86066,...\n2700 BLOCK GARBER ST\nBerkeley\nCA\n\n\n\n\n\n\n\nNow, we can use the dt accessor on this column.\nWe can get the month:\n\ncalls[\"EVENTDT\"].dt.month.head()\n\n0    4\n1    4\n2    4\n3    2\n4    2\nName: EVENTDT, dtype: int32\n\n\nWhich day of the week the date is on:\n\ncalls[\"EVENTDT\"].dt.dayofweek.head()\n\n0    3\n1    3\n2    0\n3    5\n4    0\nName: EVENTDT, dtype: int32\n\n\nCheck the mimimum values to see if there are any suspicious-looking, 70s dates:\n\ncalls.sort_values(\"EVENTDT\").head()\n\n\n\n\n\n\n\n\nCASENO\nOFFENSE\nEVENTDT\nEVENTTM\nCVLEGEND\nCVDOW\nInDbDate\nBlock_Location\nBLKADDR\nCity\nState\n\n\n\n\n2513\n20057398\nBURGLARY COMMERCIAL\n2020-12-17\n16:05\nBURGLARY - COMMERCIAL\n4\n06/15/2021 12:00:00 AM\n600 BLOCK GILMAN ST\\nBerkeley, CA\\n(37.878405,...\n600 BLOCK GILMAN ST\nBerkeley\nCA\n\n\n624\n20057207\nASSAULT/BATTERY MISD.\n2020-12-17\n16:50\nASSAULT\n4\n06/15/2021 12:00:00 AM\n2100 BLOCK SHATTUCK AVE\\nBerkeley, CA\\n(37.871...\n2100 BLOCK SHATTUCK AVE\nBerkeley\nCA\n\n\n154\n20092214\nTHEFT FROM AUTO\n2020-12-17\n18:30\nLARCENY - FROM VEHICLE\n4\n06/15/2021 12:00:00 AM\n800 BLOCK SHATTUCK AVE\\nBerkeley, CA\\n(37.8918...\n800 BLOCK SHATTUCK AVE\nBerkeley\nCA\n\n\n659\n20057324\nTHEFT MISD. (UNDER $950)\n2020-12-17\n15:44\nLARCENY\n4\n06/15/2021 12:00:00 AM\n1800 BLOCK 4TH ST\\nBerkeley, CA\\n(37.869888, -...\n1800 BLOCK 4TH ST\nBerkeley\nCA\n\n\n993\n20057573\nBURGLARY RESIDENTIAL\n2020-12-17\n22:15\nBURGLARY - RESIDENTIAL\n4\n06/15/2021 12:00:00 AM\n1700 BLOCK STUART ST\\nBerkeley, CA\\n(37.857495...\n1700 BLOCK STUART ST\nBerkeley\nCA\n\n\n\n\n\n\n\nDoesn’t look like it! We are good!\nWe can also do many things with the dt accessor like switching time zones and converting time back to UNIX/POSIX time. Check out the documentation on .dt accessor and time series/date functionality.",
+    "text": "5.2 Granularity, Scope, and Temporality\nAfter understanding the structure of the dataset, the next task is to determine what exactly the data represents. We’ll do so by considering the data’s granularity, scope, and temporality.\n\n5.2.1 Granularity\nThe granularity of a dataset is what a single row represents. You can also think of it as the level of detail included in the data. To determine the data’s granularity, ask: what does each row in the dataset represent? Fine-grained data contains a high level of detail, with a single row representing a small individual unit. For example, each record may represent one person. Coarse-grained data is encoded such that a single row represents a large individual unit – for example, each record may represent a group of people.\n\n\n5.2.2 Scope\nThe scope of a dataset is the subset of the population covered by the data. If we were investigating student performance in Data Science courses, a dataset with a narrow scope might encompass all students enrolled in Data 100 whereas a dataset with an expansive scope might encompass all students in California.\n\n\n5.2.3 Temporality\nThe temporality of a dataset describes the periodicity over which the data was collected as well as when the data was most recently collected or updated.\nTime and date fields of a dataset could represent a few things:\n\nwhen the “event” happened\nwhen the data was collected, or when it was entered into the system\nwhen the data was copied into the database\n\nTo fully understand the temporality of the data, it also may be necessary to standardize time zones or inspect recurring time-based trends in the data (do patterns recur in 24-hour periods? Over the course of a month? Seasonally?). The convention for standardizing time is the Coordinated Universal Time (UTC), an international time standard measured at 0 degrees latitude that stays consistent throughout the year (no daylight savings). We can represent Berkeley’s time zone, Pacific Standard Time (PST), as UTC-7 (with daylight savings).\n\n5.2.3.1 Temporality with pandas’ dt accessors\nLet’s briefly look at how we can use pandas’ dt accessors to work with dates/times in a dataset using the dataset you’ll see in Lab 3: the Berkeley PD Calls for Service dataset.\n\n\nCode\ncalls = pd.read_csv(\"data/Berkeley_PD_-_Calls_for_Service.csv\")\ncalls.head()\n\n\n\n\n\n\n\n\n\nCASENO\nOFFENSE\nEVENTDT\nEVENTTM\nCVLEGEND\nCVDOW\nInDbDate\nBlock_Location\nBLKADDR\nCity\nState\n\n\n\n\n0\n21014296\nTHEFT MISD. (UNDER $950)\n04/01/2021 12:00:00 AM\n10:58\nLARCENY\n4\n06/15/2021 12:00:00 AM\nBerkeley, CA\\n(37.869058, -122.270455)\nNaN\nBerkeley\nCA\n\n\n1\n21014391\nTHEFT MISD. (UNDER $950)\n04/01/2021 12:00:00 AM\n10:38\nLARCENY\n4\n06/15/2021 12:00:00 AM\nBerkeley, CA\\n(37.869058, -122.270455)\nNaN\nBerkeley\nCA\n\n\n2\n21090494\nTHEFT MISD. (UNDER $950)\n04/19/2021 12:00:00 AM\n12:15\nLARCENY\n1\n06/15/2021 12:00:00 AM\n2100 BLOCK HASTE ST\\nBerkeley, CA\\n(37.864908,...\n2100 BLOCK HASTE ST\nBerkeley\nCA\n\n\n3\n21090204\nTHEFT FELONY (OVER $950)\n02/13/2021 12:00:00 AM\n17:00\nLARCENY\n6\n06/15/2021 12:00:00 AM\n2600 BLOCK WARRING ST\\nBerkeley, CA\\n(37.86393...\n2600 BLOCK WARRING ST\nBerkeley\nCA\n\n\n4\n21090179\nBURGLARY AUTO\n02/08/2021 12:00:00 AM\n6:20\nBURGLARY - VEHICLE\n1\n06/15/2021 12:00:00 AM\n2700 BLOCK GARBER ST\\nBerkeley, CA\\n(37.86066,...\n2700 BLOCK GARBER ST\nBerkeley\nCA\n\n\n\n\n\n\n\nLooks like there are three columns with dates/times: EVENTDT, EVENTTM, and InDbDate.\nMost likely, EVENTDT stands for the date when the event took place, EVENTTM stands for the time of day the event took place (in 24-hr format), and InDbDate is the date this call is recorded onto the database.\nIf we check the data type of these columns, we will see they are stored as strings. We can convert them to datetime objects using pandas to_datetime function.\n\ncalls[\"EVENTDT\"] = pd.to_datetime(calls[\"EVENTDT\"])\ncalls.head()\n\n/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_51690/874729699.py:1: UserWarning:\n\nCould not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n\n\n\n\n\n\n\n\n\n\nCASENO\nOFFENSE\nEVENTDT\nEVENTTM\nCVLEGEND\nCVDOW\nInDbDate\nBlock_Location\nBLKADDR\nCity\nState\n\n\n\n\n0\n21014296\nTHEFT MISD. (UNDER $950)\n2021-04-01\n10:58\nLARCENY\n4\n06/15/2021 12:00:00 AM\nBerkeley, CA\\n(37.869058, -122.270455)\nNaN\nBerkeley\nCA\n\n\n1\n21014391\nTHEFT MISD. (UNDER $950)\n2021-04-01\n10:38\nLARCENY\n4\n06/15/2021 12:00:00 AM\nBerkeley, CA\\n(37.869058, -122.270455)\nNaN\nBerkeley\nCA\n\n\n2\n21090494\nTHEFT MISD. (UNDER $950)\n2021-04-19\n12:15\nLARCENY\n1\n06/15/2021 12:00:00 AM\n2100 BLOCK HASTE ST\\nBerkeley, CA\\n(37.864908,...\n2100 BLOCK HASTE ST\nBerkeley\nCA\n\n\n3\n21090204\nTHEFT FELONY (OVER $950)\n2021-02-13\n17:00\nLARCENY\n6\n06/15/2021 12:00:00 AM\n2600 BLOCK WARRING ST\\nBerkeley, CA\\n(37.86393...\n2600 BLOCK WARRING ST\nBerkeley\nCA\n\n\n4\n21090179\nBURGLARY AUTO\n2021-02-08\n6:20\nBURGLARY - VEHICLE\n1\n06/15/2021 12:00:00 AM\n2700 BLOCK GARBER ST\\nBerkeley, CA\\n(37.86066,...\n2700 BLOCK GARBER ST\nBerkeley\nCA\n\n\n\n\n\n\n\nNow, we can use the dt accessor on this column.\nWe can get the month:\n\ncalls[\"EVENTDT\"].dt.month.head()\n\n0    4\n1    4\n2    4\n3    2\n4    2\nName: EVENTDT, dtype: int32\n\n\nWhich day of the week the date is on:\n\ncalls[\"EVENTDT\"].dt.dayofweek.head()\n\n0    3\n1    3\n2    0\n3    5\n4    0\nName: EVENTDT, dtype: int32\n\n\nCheck the mimimum values to see if there are any suspicious-looking, 70s dates:\n\ncalls.sort_values(\"EVENTDT\").head()\n\n\n\n\n\n\n\n\nCASENO\nOFFENSE\nEVENTDT\nEVENTTM\nCVLEGEND\nCVDOW\nInDbDate\nBlock_Location\nBLKADDR\nCity\nState\n\n\n\n\n2513\n20057398\nBURGLARY COMMERCIAL\n2020-12-17\n16:05\nBURGLARY - COMMERCIAL\n4\n06/15/2021 12:00:00 AM\n600 BLOCK GILMAN ST\\nBerkeley, CA\\n(37.878405,...\n600 BLOCK GILMAN ST\nBerkeley\nCA\n\n\n624\n20057207\nASSAULT/BATTERY MISD.\n2020-12-17\n16:50\nASSAULT\n4\n06/15/2021 12:00:00 AM\n2100 BLOCK SHATTUCK AVE\\nBerkeley, CA\\n(37.871...\n2100 BLOCK SHATTUCK AVE\nBerkeley\nCA\n\n\n154\n20092214\nTHEFT FROM AUTO\n2020-12-17\n18:30\nLARCENY - FROM VEHICLE\n4\n06/15/2021 12:00:00 AM\n800 BLOCK SHATTUCK AVE\\nBerkeley, CA\\n(37.8918...\n800 BLOCK SHATTUCK AVE\nBerkeley\nCA\n\n\n659\n20057324\nTHEFT MISD. (UNDER $950)\n2020-12-17\n15:44\nLARCENY\n4\n06/15/2021 12:00:00 AM\n1800 BLOCK 4TH ST\\nBerkeley, CA\\n(37.869888, -...\n1800 BLOCK 4TH ST\nBerkeley\nCA\n\n\n993\n20057573\nBURGLARY RESIDENTIAL\n2020-12-17\n22:15\nBURGLARY - RESIDENTIAL\n4\n06/15/2021 12:00:00 AM\n1700 BLOCK STUART ST\\nBerkeley, CA\\n(37.857495...\n1700 BLOCK STUART ST\nBerkeley\nCA\n\n\n\n\n\n\n\nDoesn’t look like it! We are good!\nWe can also do many things with the dt accessor like switching time zones and converting time back to UNIX/POSIX time. Check out the documentation on .dt accessor and time series/date functionality.",
     "crumbs": [
       "<span class='chapter-number'>5</span>  <span class='chapter-title'>Data Cleaning and EDA</span>"
     ]
@@ -284,7 +284,7 @@
     "href": "eda/eda.html#eda-demo-2-mauna-loa-co2-data-a-lesson-in-data-faithfulness",
     "title": "5  Data Cleaning and EDA",
     "section": "5.5 EDA Demo 2: Mauna Loa CO2 Data – A Lesson in Data Faithfulness",
-    "text": "5.5 EDA Demo 2: Mauna Loa CO2 Data – A Lesson in Data Faithfulness\nMauna Loa Observatory has been monitoring CO2 concentrations since 1958.\n\nco2_file = \"data/co2_mm_mlo.txt\"\n\nLet’s do some EDA!!\n\n5.5.1 Reading this file into Pandas?\nLet’s instead check out this .txt file. Some questions to keep in mind: Do we trust this file extension? What structure is it?\nLines 71-78 (inclusive) are shown below:\nline number |                            file contents\n\n71          |   #            decimal     average   interpolated    trend    #days\n72          |   #             date                             (season corr)\n73          |   1958   3    1958.208      315.71      315.71      314.62     -1\n74          |   1958   4    1958.292      317.45      317.45      315.29     -1\n75          |   1958   5    1958.375      317.50      317.50      314.71     -1\n76          |   1958   6    1958.458      -99.99      317.10      314.85     -1\n77          |   1958   7    1958.542      315.86      315.86      314.98     -1\n78          |   1958   8    1958.625      314.93      314.93      315.94     -1\nNotice how:\n\nThe values are separated by white space, possibly tabs.\nThe data line up down the rows. For example, the month appears in 7th to 8th position of each line.\nThe 71st and 72nd lines in the file contain column headings split over two lines.\n\nWe can use read_csv to read the data into a pandas DataFrame, and we provide several arguments to specify that the separators are white space, there is no header (we will set our own column names), and to skip the first 72 rows of the file.\n\nco2 = pd.read_csv(\n    co2_file, header = None, skiprows = 72,\n    sep = r'\\s+'       #delimiter for continuous whitespace (stay tuned for regex next lecture))\n)\nco2.head()\n\n\n\n\n\n\n\n\n0\n1\n2\n3\n4\n5\n6\n\n\n\n\n0\n1958\n3\n1958.21\n315.71\n315.71\n314.62\n-1\n\n\n1\n1958\n4\n1958.29\n317.45\n317.45\n315.29\n-1\n\n\n2\n1958\n5\n1958.38\n317.50\n317.50\n314.71\n-1\n\n\n3\n1958\n6\n1958.46\n-99.99\n317.10\n314.85\n-1\n\n\n4\n1958\n7\n1958.54\n315.86\n315.86\n314.98\n-1\n\n\n\n\n\n\n\nCongratulations! You’ve wrangled the data!\n\n…But our columns aren’t named. We need to do more EDA.\n\n\n5.5.2 Exploring Variable Feature Types\nThe NOAA webpage might have some useful tidbits (in this case it doesn’t).\nUsing this information, we’ll rerun pd.read_csv, but this time with some custom column names.\n\nco2 = pd.read_csv(\n    co2_file, header = None, skiprows = 72,\n    sep = '\\s+', #regex for continuous whitespace (next lecture)\n    names = ['Yr', 'Mo', 'DecDate', 'Avg', 'Int', 'Trend', 'Days']\n)\nco2.head()\n\n&lt;&gt;:3: SyntaxWarning:\n\ninvalid escape sequence '\\s'\n\n&lt;&gt;:3: SyntaxWarning:\n\ninvalid escape sequence '\\s'\n\n/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_48380/150137587.py:3: SyntaxWarning:\n\ninvalid escape sequence '\\s'\n\n\n\n\n\n\n\n\n\n\nYr\nMo\nDecDate\nAvg\nInt\nTrend\nDays\n\n\n\n\n0\n1958\n3\n1958.21\n315.71\n315.71\n314.62\n-1\n\n\n1\n1958\n4\n1958.29\n317.45\n317.45\n315.29\n-1\n\n\n2\n1958\n5\n1958.38\n317.50\n317.50\n314.71\n-1\n\n\n3\n1958\n6\n1958.46\n-99.99\n317.10\n314.85\n-1\n\n\n4\n1958\n7\n1958.54\n315.86\n315.86\n314.98\n-1\n\n\n\n\n\n\n\n\n\n5.5.3 Visualizing CO2\nScientific studies tend to have very clean data, right…? Let’s jump right in and make a time series plot of CO2 monthly averages.\n\n\nCode\nsns.lineplot(x='DecDate', y='Avg', data=co2);\n\n\n\n\n\n\n\n\n\nThe code above uses the seaborn plotting library (abbreviated sns). We will cover this in the Visualization lecture, but now you don’t need to worry about how it works!\nYikes! Plotting the data uncovered a problem. The sharp vertical lines suggest that we have some missing values. What happened here?\n\nco2.head()\n\n\n\n\n\n\n\n\nYr\nMo\nDecDate\nAvg\nInt\nTrend\nDays\n\n\n\n\n0\n1958\n3\n1958.21\n315.71\n315.71\n314.62\n-1\n\n\n1\n1958\n4\n1958.29\n317.45\n317.45\n315.29\n-1\n\n\n2\n1958\n5\n1958.38\n317.50\n317.50\n314.71\n-1\n\n\n3\n1958\n6\n1958.46\n-99.99\n317.10\n314.85\n-1\n\n\n4\n1958\n7\n1958.54\n315.86\n315.86\n314.98\n-1\n\n\n\n\n\n\n\n\nco2.tail()\n\n\n\n\n\n\n\n\nYr\nMo\nDecDate\nAvg\nInt\nTrend\nDays\n\n\n\n\n733\n2019\n4\n2019.29\n413.32\n413.32\n410.49\n26\n\n\n734\n2019\n5\n2019.38\n414.66\n414.66\n411.20\n28\n\n\n735\n2019\n6\n2019.46\n413.92\n413.92\n411.58\n27\n\n\n736\n2019\n7\n2019.54\n411.77\n411.77\n411.43\n23\n\n\n737\n2019\n8\n2019.62\n409.95\n409.95\n411.84\n29\n\n\n\n\n\n\n\nSome data have unusual values like -1 and -99.99.\nLet’s check the description at the top of the file again.\n\n-1 signifies a missing value for the number of days Days the equipment was in operation that month.\n-99.99 denotes a missing monthly average Avg\n\nHow can we fix this? First, let’s explore other aspects of our data. Understanding our data will help us decide what to do with the missing values.\n\n\n\n5.5.4 Sanity Checks: Reasoning about the data\nFirst, we consider the shape of the data. How many rows should we have?\n\nIf chronological order, we should have one record per month.\nData from March 1958 to August 2019.\nWe should have $ 12 (2019-1957) - 2 - 4 = 738 $ records.\n\n\nco2.shape\n\n(738, 7)\n\n\nNice!! The number of rows (i.e. records) match our expectations.\nLet’s now check the quality of each feature.\n\n\n5.5.5 Understanding Missing Value 1: Days\nDays is a time field, so let’s analyze other time fields to see if there is an explanation for missing values of days of operation.\nLet’s start with months, Mo.\nAre we missing any records? The number of months should have 62 or 61 instances (March 1957-August 2019).\n\nco2[\"Mo\"].value_counts().sort_index()\n\nMo\n1     61\n2     61\n3     62\n4     62\n5     62\n6     62\n7     62\n8     62\n9     61\n10    61\n11    61\n12    61\nName: count, dtype: int64\n\n\nAs expected Jan, Feb, Sep, Oct, Nov, and Dec have 61 occurrences and the rest 62.\n\nNext let’s explore days Days itself, which is the number of days that the measurement equipment worked.\n\n\nCode\nsns.displot(co2['Days']);\nplt.title(\"Distribution of days feature\"); # suppresses unneeded plotting output\n\n\n\n\n\n\n\n\n\nIn terms of data quality, a handful of months have averages based on measurements taken on fewer than half the days. In addition, there are nearly 200 missing values–that’s about 27% of the data!\n\nFinally, let’s check the last time feature, year Yr.\nLet’s check to see if there is any connection between missing-ness and the year of the recording.\n\n\nCode\nsns.scatterplot(x=\"Yr\", y=\"Days\", data=co2);\nplt.title(\"Day field by Year\"); # the ; suppresses output\n\n\n\n\n\n\n\n\n\nObservations:\n\nAll of the missing data are in the early years of operation.\nIt appears there may have been problems with equipment in the mid to late 80s.\n\nPotential Next Steps:\n\nConfirm these explanations through documentation about the historical readings.\nMaybe drop the earliest recordings? However, we would want to delay such action until after we have examined the time trends and assess whether there are any potential problems.\n\n\n\n\n5.5.6 Understanding Missing Value 2: Avg\nNext, let’s return to the -99.99 values in Avg to analyze the overall quality of the CO2 measurements. We’ll plot a histogram of the average CO2 measurements\n\n\nCode\n# Histograms of average CO2 measurements\nsns.displot(co2['Avg']);\n\n\n\n\n\n\n\n\n\nThe non-missing values are in the 300-400 range (a regular range of CO2 levels).\nWe also see that there are only a few missing Avg values (&lt;1% of values). Let’s examine all of them:\n\nco2[co2[\"Avg\"] &lt; 0]\n\n\n\n\n\n\n\n\nYr\nMo\nDecDate\nAvg\nInt\nTrend\nDays\n\n\n\n\n3\n1958\n6\n1958.46\n-99.99\n317.10\n314.85\n-1\n\n\n7\n1958\n10\n1958.79\n-99.99\n312.66\n315.61\n-1\n\n\n71\n1964\n2\n1964.12\n-99.99\n320.07\n319.61\n-1\n\n\n72\n1964\n3\n1964.21\n-99.99\n320.73\n319.55\n-1\n\n\n73\n1964\n4\n1964.29\n-99.99\n321.77\n319.48\n-1\n\n\n213\n1975\n12\n1975.96\n-99.99\n330.59\n331.60\n0\n\n\n313\n1984\n4\n1984.29\n-99.99\n346.84\n344.27\n2\n\n\n\n\n\n\n\nThere doesn’t seem to be a pattern to these values, other than that most records also were missing Days data.\n\n\n5.5.7 Drop, NaN, or Impute Missing Avg Data?\nHow should we address the invalid Avg data?\n\nDrop records\nSet to NaN\nImpute using some strategy\n\nRemember we want to fix the following plot:\n\n\nCode\nsns.lineplot(x='DecDate', y='Avg', data=co2)\nplt.title(\"CO2 Average By Month\");\n\n\n\n\n\n\n\n\n\nSince we are plotting Avg vs DecDate, we should just focus on dealing with missing values for Avg.\nLet’s consider a few options: 1. Drop those records 2. Replace -99.99 with NaN 3. Substitute it with a likely value for the average CO2?\nWhat do you think are the pros and cons of each possible action?\nLet’s examine each of these three options.\n\n# 1. Drop missing values\nco2_drop = co2[co2['Avg'] &gt; 0]\nco2_drop.head()\n\n\n\n\n\n\n\n\nYr\nMo\nDecDate\nAvg\nInt\nTrend\nDays\n\n\n\n\n0\n1958\n3\n1958.21\n315.71\n315.71\n314.62\n-1\n\n\n1\n1958\n4\n1958.29\n317.45\n317.45\n315.29\n-1\n\n\n2\n1958\n5\n1958.38\n317.50\n317.50\n314.71\n-1\n\n\n4\n1958\n7\n1958.54\n315.86\n315.86\n314.98\n-1\n\n\n5\n1958\n8\n1958.62\n314.93\n314.93\n315.94\n-1\n\n\n\n\n\n\n\n\n# 2. Replace NaN with -99.99\nco2_NA = co2.replace(-99.99, np.nan)\nco2_NA.head()\n\n\n\n\n\n\n\n\nYr\nMo\nDecDate\nAvg\nInt\nTrend\nDays\n\n\n\n\n0\n1958\n3\n1958.21\n315.71\n315.71\n314.62\n-1\n\n\n1\n1958\n4\n1958.29\n317.45\n317.45\n315.29\n-1\n\n\n2\n1958\n5\n1958.38\n317.50\n317.50\n314.71\n-1\n\n\n3\n1958\n6\n1958.46\nNaN\n317.10\n314.85\n-1\n\n\n4\n1958\n7\n1958.54\n315.86\n315.86\n314.98\n-1\n\n\n\n\n\n\n\nWe’ll also use a third version of the data.\nFirst, we note that the dataset already comes with a substitute value for the -99.99.\nFrom the file description:\n\nThe interpolated column includes average values from the preceding column (average) and interpolated values where data are missing. Interpolated values are computed in two steps…\n\nThe Int feature has values that exactly match those in Avg, except when Avg is -99.99, and then a reasonable estimate is used instead.\nSo, the third version of our data will use the Int feature instead of Avg.\n\n# 3. Use interpolated column which estimates missing Avg values\nco2_impute = co2.copy()\nco2_impute['Avg'] = co2['Int']\nco2_impute.head()\n\n\n\n\n\n\n\n\nYr\nMo\nDecDate\nAvg\nInt\nTrend\nDays\n\n\n\n\n0\n1958\n3\n1958.21\n315.71\n315.71\n314.62\n-1\n\n\n1\n1958\n4\n1958.29\n317.45\n317.45\n315.29\n-1\n\n\n2\n1958\n5\n1958.38\n317.50\n317.50\n314.71\n-1\n\n\n3\n1958\n6\n1958.46\n317.10\n317.10\n314.85\n-1\n\n\n4\n1958\n7\n1958.54\n315.86\n315.86\n314.98\n-1\n\n\n\n\n\n\n\nWhat’s a reasonable estimate?\nTo answer this question, let’s zoom in on a short time period, say the measurements in 1958 (where we know we have two missing values).\n\n\nCode\n# results of plotting data in 1958\n\ndef line_and_points(data, ax, title):\n    # assumes single year, hence Mo\n    ax.plot('Mo', 'Avg', data=data)\n    ax.scatter('Mo', 'Avg', data=data)\n    ax.set_xlim(2, 13)\n    ax.set_title(title)\n    ax.set_xticks(np.arange(3, 13))\n\ndef data_year(data, year):\n    return data[data[\"Yr\"] == 1958]\n    \n# uses matplotlib subplots\n# you may see more next week; focus on output for now\nfig, axes = plt.subplots(ncols = 3, figsize=(12, 4), sharey=True)\n\nyear = 1958\nline_and_points(data_year(co2_drop, year), axes[0], title=\"1. Drop Missing\")\nline_and_points(data_year(co2_NA, year), axes[1], title=\"2. Missing Set to NaN\")\nline_and_points(data_year(co2_impute, year), axes[2], title=\"3. Missing Interpolated\")\n\nfig.suptitle(f\"Monthly Averages for {year}\")\nplt.tight_layout()\n\n\n\n\n\n\n\n\n\nIn the big picture since there are only 7 Avg values missing (&lt;1% of 738 months), any of these approaches would work.\nHowever there is some appeal to option C, Imputing:\n\nShows seasonal trends for CO2\nWe are plotting all months in our data as a line plot\n\nLet’s replot our original figure with option 3:\n\n\nCode\nsns.lineplot(x='DecDate', y='Avg', data=co2_impute)\nplt.title(\"CO2 Average By Month, Imputed\");\n\n\n\n\n\n\n\n\n\nLooks pretty close to what we see on the NOAA website!\n\n\n5.5.8 Presenting the Data: A Discussion on Data Granularity\nFrom the description:\n\nMonthly measurements are averages of average day measurements.\nThe NOAA GML website has datasets for daily/hourly measurements too.\n\nThe data you present depends on your research question.\nHow do CO2 levels vary by season?\n\nYou might want to keep average monthly data.\n\nAre CO2 levels rising over the past 50+ years, consistent with global warming predictions?\n\nYou might be happier with a coarser granularity of average year data!\n\n\n\nCode\nco2_year = co2_impute.groupby('Yr').mean()\nsns.lineplot(x='Yr', y='Avg', data=co2_year)\nplt.title(\"CO2 Average By Year\");\n\n\n\n\n\n\n\n\n\nIndeed, we see a rise by nearly 100 ppm of CO2 since Mauna Loa began recording in 1958.",
+    "text": "5.5 EDA Demo 2: Mauna Loa CO2 Data – A Lesson in Data Faithfulness\nMauna Loa Observatory has been monitoring CO2 concentrations since 1958.\n\nco2_file = \"data/co2_mm_mlo.txt\"\n\nLet’s do some EDA!!\n\n5.5.1 Reading this file into Pandas?\nLet’s instead check out this .txt file. Some questions to keep in mind: Do we trust this file extension? What structure is it?\nLines 71-78 (inclusive) are shown below:\nline number |                            file contents\n\n71          |   #            decimal     average   interpolated    trend    #days\n72          |   #             date                             (season corr)\n73          |   1958   3    1958.208      315.71      315.71      314.62     -1\n74          |   1958   4    1958.292      317.45      317.45      315.29     -1\n75          |   1958   5    1958.375      317.50      317.50      314.71     -1\n76          |   1958   6    1958.458      -99.99      317.10      314.85     -1\n77          |   1958   7    1958.542      315.86      315.86      314.98     -1\n78          |   1958   8    1958.625      314.93      314.93      315.94     -1\nNotice how:\n\nThe values are separated by white space, possibly tabs.\nThe data line up down the rows. For example, the month appears in 7th to 8th position of each line.\nThe 71st and 72nd lines in the file contain column headings split over two lines.\n\nWe can use read_csv to read the data into a pandas DataFrame, and we provide several arguments to specify that the separators are white space, there is no header (we will set our own column names), and to skip the first 72 rows of the file.\n\nco2 = pd.read_csv(\n    co2_file, header = None, skiprows = 72,\n    sep = r'\\s+'       #delimiter for continuous whitespace (stay tuned for regex next lecture))\n)\nco2.head()\n\n\n\n\n\n\n\n\n0\n1\n2\n3\n4\n5\n6\n\n\n\n\n0\n1958\n3\n1958.21\n315.71\n315.71\n314.62\n-1\n\n\n1\n1958\n4\n1958.29\n317.45\n317.45\n315.29\n-1\n\n\n2\n1958\n5\n1958.38\n317.50\n317.50\n314.71\n-1\n\n\n3\n1958\n6\n1958.46\n-99.99\n317.10\n314.85\n-1\n\n\n4\n1958\n7\n1958.54\n315.86\n315.86\n314.98\n-1\n\n\n\n\n\n\n\nCongratulations! You’ve wrangled the data!\n\n…But our columns aren’t named. We need to do more EDA.\n\n\n5.5.2 Exploring Variable Feature Types\nThe NOAA webpage might have some useful tidbits (in this case it doesn’t).\nUsing this information, we’ll rerun pd.read_csv, but this time with some custom column names.\n\nco2 = pd.read_csv(\n    co2_file, header = None, skiprows = 72,\n    sep = '\\s+', #regex for continuous whitespace (next lecture)\n    names = ['Yr', 'Mo', 'DecDate', 'Avg', 'Int', 'Trend', 'Days']\n)\nco2.head()\n\n&lt;&gt;:3: SyntaxWarning:\n\ninvalid escape sequence '\\s'\n\n&lt;&gt;:3: SyntaxWarning:\n\ninvalid escape sequence '\\s'\n\n/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_51690/150137587.py:3: SyntaxWarning:\n\ninvalid escape sequence '\\s'\n\n\n\n\n\n\n\n\n\n\nYr\nMo\nDecDate\nAvg\nInt\nTrend\nDays\n\n\n\n\n0\n1958\n3\n1958.21\n315.71\n315.71\n314.62\n-1\n\n\n1\n1958\n4\n1958.29\n317.45\n317.45\n315.29\n-1\n\n\n2\n1958\n5\n1958.38\n317.50\n317.50\n314.71\n-1\n\n\n3\n1958\n6\n1958.46\n-99.99\n317.10\n314.85\n-1\n\n\n4\n1958\n7\n1958.54\n315.86\n315.86\n314.98\n-1\n\n\n\n\n\n\n\n\n\n5.5.3 Visualizing CO2\nScientific studies tend to have very clean data, right…? Let’s jump right in and make a time series plot of CO2 monthly averages.\n\n\nCode\nsns.lineplot(x='DecDate', y='Avg', data=co2);\n\n\n\n\n\n\n\n\n\nThe code above uses the seaborn plotting library (abbreviated sns). We will cover this in the Visualization lecture, but now you don’t need to worry about how it works!\nYikes! Plotting the data uncovered a problem. The sharp vertical lines suggest that we have some missing values. What happened here?\n\nco2.head()\n\n\n\n\n\n\n\n\nYr\nMo\nDecDate\nAvg\nInt\nTrend\nDays\n\n\n\n\n0\n1958\n3\n1958.21\n315.71\n315.71\n314.62\n-1\n\n\n1\n1958\n4\n1958.29\n317.45\n317.45\n315.29\n-1\n\n\n2\n1958\n5\n1958.38\n317.50\n317.50\n314.71\n-1\n\n\n3\n1958\n6\n1958.46\n-99.99\n317.10\n314.85\n-1\n\n\n4\n1958\n7\n1958.54\n315.86\n315.86\n314.98\n-1\n\n\n\n\n\n\n\n\nco2.tail()\n\n\n\n\n\n\n\n\nYr\nMo\nDecDate\nAvg\nInt\nTrend\nDays\n\n\n\n\n733\n2019\n4\n2019.29\n413.32\n413.32\n410.49\n26\n\n\n734\n2019\n5\n2019.38\n414.66\n414.66\n411.20\n28\n\n\n735\n2019\n6\n2019.46\n413.92\n413.92\n411.58\n27\n\n\n736\n2019\n7\n2019.54\n411.77\n411.77\n411.43\n23\n\n\n737\n2019\n8\n2019.62\n409.95\n409.95\n411.84\n29\n\n\n\n\n\n\n\nSome data have unusual values like -1 and -99.99.\nLet’s check the description at the top of the file again.\n\n-1 signifies a missing value for the number of days Days the equipment was in operation that month.\n-99.99 denotes a missing monthly average Avg\n\nHow can we fix this? First, let’s explore other aspects of our data. Understanding our data will help us decide what to do with the missing values.\n\n\n\n5.5.4 Sanity Checks: Reasoning about the data\nFirst, we consider the shape of the data. How many rows should we have?\n\nIf chronological order, we should have one record per month.\nData from March 1958 to August 2019.\nWe should have $ 12 (2019-1957) - 2 - 4 = 738 $ records.\n\n\nco2.shape\n\n(738, 7)\n\n\nNice!! The number of rows (i.e. records) match our expectations.\nLet’s now check the quality of each feature.\n\n\n5.5.5 Understanding Missing Value 1: Days\nDays is a time field, so let’s analyze other time fields to see if there is an explanation for missing values of days of operation.\nLet’s start with months, Mo.\nAre we missing any records? The number of months should have 62 or 61 instances (March 1957-August 2019).\n\nco2[\"Mo\"].value_counts().sort_index()\n\nMo\n1     61\n2     61\n3     62\n4     62\n5     62\n6     62\n7     62\n8     62\n9     61\n10    61\n11    61\n12    61\nName: count, dtype: int64\n\n\nAs expected Jan, Feb, Sep, Oct, Nov, and Dec have 61 occurrences and the rest 62.\n\nNext let’s explore days Days itself, which is the number of days that the measurement equipment worked.\n\n\nCode\nsns.displot(co2['Days']);\nplt.title(\"Distribution of days feature\"); # suppresses unneeded plotting output\n\n\n\n\n\n\n\n\n\nIn terms of data quality, a handful of months have averages based on measurements taken on fewer than half the days. In addition, there are nearly 200 missing values–that’s about 27% of the data!\n\nFinally, let’s check the last time feature, year Yr.\nLet’s check to see if there is any connection between missing-ness and the year of the recording.\n\n\nCode\nsns.scatterplot(x=\"Yr\", y=\"Days\", data=co2);\nplt.title(\"Day field by Year\"); # the ; suppresses output\n\n\n\n\n\n\n\n\n\nObservations:\n\nAll of the missing data are in the early years of operation.\nIt appears there may have been problems with equipment in the mid to late 80s.\n\nPotential Next Steps:\n\nConfirm these explanations through documentation about the historical readings.\nMaybe drop the earliest recordings? However, we would want to delay such action until after we have examined the time trends and assess whether there are any potential problems.\n\n\n\n\n5.5.6 Understanding Missing Value 2: Avg\nNext, let’s return to the -99.99 values in Avg to analyze the overall quality of the CO2 measurements. We’ll plot a histogram of the average CO2 measurements\n\n\nCode\n# Histograms of average CO2 measurements\nsns.displot(co2['Avg']);\n\n\n\n\n\n\n\n\n\nThe non-missing values are in the 300-400 range (a regular range of CO2 levels).\nWe also see that there are only a few missing Avg values (&lt;1% of values). Let’s examine all of them:\n\nco2[co2[\"Avg\"] &lt; 0]\n\n\n\n\n\n\n\n\nYr\nMo\nDecDate\nAvg\nInt\nTrend\nDays\n\n\n\n\n3\n1958\n6\n1958.46\n-99.99\n317.10\n314.85\n-1\n\n\n7\n1958\n10\n1958.79\n-99.99\n312.66\n315.61\n-1\n\n\n71\n1964\n2\n1964.12\n-99.99\n320.07\n319.61\n-1\n\n\n72\n1964\n3\n1964.21\n-99.99\n320.73\n319.55\n-1\n\n\n73\n1964\n4\n1964.29\n-99.99\n321.77\n319.48\n-1\n\n\n213\n1975\n12\n1975.96\n-99.99\n330.59\n331.60\n0\n\n\n313\n1984\n4\n1984.29\n-99.99\n346.84\n344.27\n2\n\n\n\n\n\n\n\nThere doesn’t seem to be a pattern to these values, other than that most records also were missing Days data.\n\n\n5.5.7 Drop, NaN, or Impute Missing Avg Data?\nHow should we address the invalid Avg data?\n\nDrop records\nSet to NaN\nImpute using some strategy\n\nRemember we want to fix the following plot:\n\n\nCode\nsns.lineplot(x='DecDate', y='Avg', data=co2)\nplt.title(\"CO2 Average By Month\");\n\n\n\n\n\n\n\n\n\nSince we are plotting Avg vs DecDate, we should just focus on dealing with missing values for Avg.\nLet’s consider a few options: 1. Drop those records 2. Replace -99.99 with NaN 3. Substitute it with a likely value for the average CO2?\nWhat do you think are the pros and cons of each possible action?\nLet’s examine each of these three options.\n\n# 1. Drop missing values\nco2_drop = co2[co2['Avg'] &gt; 0]\nco2_drop.head()\n\n\n\n\n\n\n\n\nYr\nMo\nDecDate\nAvg\nInt\nTrend\nDays\n\n\n\n\n0\n1958\n3\n1958.21\n315.71\n315.71\n314.62\n-1\n\n\n1\n1958\n4\n1958.29\n317.45\n317.45\n315.29\n-1\n\n\n2\n1958\n5\n1958.38\n317.50\n317.50\n314.71\n-1\n\n\n4\n1958\n7\n1958.54\n315.86\n315.86\n314.98\n-1\n\n\n5\n1958\n8\n1958.62\n314.93\n314.93\n315.94\n-1\n\n\n\n\n\n\n\n\n# 2. Replace NaN with -99.99\nco2_NA = co2.replace(-99.99, np.nan)\nco2_NA.head()\n\n\n\n\n\n\n\n\nYr\nMo\nDecDate\nAvg\nInt\nTrend\nDays\n\n\n\n\n0\n1958\n3\n1958.21\n315.71\n315.71\n314.62\n-1\n\n\n1\n1958\n4\n1958.29\n317.45\n317.45\n315.29\n-1\n\n\n2\n1958\n5\n1958.38\n317.50\n317.50\n314.71\n-1\n\n\n3\n1958\n6\n1958.46\nNaN\n317.10\n314.85\n-1\n\n\n4\n1958\n7\n1958.54\n315.86\n315.86\n314.98\n-1\n\n\n\n\n\n\n\nWe’ll also use a third version of the data.\nFirst, we note that the dataset already comes with a substitute value for the -99.99.\nFrom the file description:\n\nThe interpolated column includes average values from the preceding column (average) and interpolated values where data are missing. Interpolated values are computed in two steps…\n\nThe Int feature has values that exactly match those in Avg, except when Avg is -99.99, and then a reasonable estimate is used instead.\nSo, the third version of our data will use the Int feature instead of Avg.\n\n# 3. Use interpolated column which estimates missing Avg values\nco2_impute = co2.copy()\nco2_impute['Avg'] = co2['Int']\nco2_impute.head()\n\n\n\n\n\n\n\n\nYr\nMo\nDecDate\nAvg\nInt\nTrend\nDays\n\n\n\n\n0\n1958\n3\n1958.21\n315.71\n315.71\n314.62\n-1\n\n\n1\n1958\n4\n1958.29\n317.45\n317.45\n315.29\n-1\n\n\n2\n1958\n5\n1958.38\n317.50\n317.50\n314.71\n-1\n\n\n3\n1958\n6\n1958.46\n317.10\n317.10\n314.85\n-1\n\n\n4\n1958\n7\n1958.54\n315.86\n315.86\n314.98\n-1\n\n\n\n\n\n\n\nWhat’s a reasonable estimate?\nTo answer this question, let’s zoom in on a short time period, say the measurements in 1958 (where we know we have two missing values).\n\n\nCode\n# results of plotting data in 1958\n\ndef line_and_points(data, ax, title):\n    # assumes single year, hence Mo\n    ax.plot('Mo', 'Avg', data=data)\n    ax.scatter('Mo', 'Avg', data=data)\n    ax.set_xlim(2, 13)\n    ax.set_title(title)\n    ax.set_xticks(np.arange(3, 13))\n\ndef data_year(data, year):\n    return data[data[\"Yr\"] == 1958]\n    \n# uses matplotlib subplots\n# you may see more next week; focus on output for now\nfig, axes = plt.subplots(ncols = 3, figsize=(12, 4), sharey=True)\n\nyear = 1958\nline_and_points(data_year(co2_drop, year), axes[0], title=\"1. Drop Missing\")\nline_and_points(data_year(co2_NA, year), axes[1], title=\"2. Missing Set to NaN\")\nline_and_points(data_year(co2_impute, year), axes[2], title=\"3. Missing Interpolated\")\n\nfig.suptitle(f\"Monthly Averages for {year}\")\nplt.tight_layout()\n\n\n\n\n\n\n\n\n\nIn the big picture since there are only 7 Avg values missing (&lt;1% of 738 months), any of these approaches would work.\nHowever there is some appeal to option C, Imputing:\n\nShows seasonal trends for CO2\nWe are plotting all months in our data as a line plot\n\nLet’s replot our original figure with option 3:\n\n\nCode\nsns.lineplot(x='DecDate', y='Avg', data=co2_impute)\nplt.title(\"CO2 Average By Month, Imputed\");\n\n\n\n\n\n\n\n\n\nLooks pretty close to what we see on the NOAA website!\n\n\n5.5.8 Presenting the Data: A Discussion on Data Granularity\nFrom the description:\n\nMonthly measurements are averages of average day measurements.\nThe NOAA GML website has datasets for daily/hourly measurements too.\n\nThe data you present depends on your research question.\nHow do CO2 levels vary by season?\n\nYou might want to keep average monthly data.\n\nAre CO2 levels rising over the past 50+ years, consistent with global warming predictions?\n\nYou might be happier with a coarser granularity of average year data!\n\n\n\nCode\nco2_year = co2_impute.groupby('Yr').mean()\nsns.lineplot(x='Yr', y='Avg', data=co2_year)\nplt.title(\"CO2 Average By Year\");\n\n\n\n\n\n\n\n\n\nIndeed, we see a rise by nearly 100 ppm of CO2 since Mauna Loa began recording in 1958.",
     "crumbs": [
       "<span class='chapter-number'>5</span>  <span class='chapter-title'>Data Cleaning and EDA</span>"
     ]
@@ -544,7 +544,7 @@
     "href": "sampling/sampling.html#probability-samples",
     "title": "9  Sampling",
     "section": "9.3 Probability Samples",
-    "text": "9.3 Probability Samples\nWhen sampling, it is essential to focus on the quality of the sample rather than the quantity of the sample. A huge sample size does not fix a bad sampling method. Our main goal is to gather a sample that is representative of the population it came from. In this section, we’ll explore the different types of sampling and their pros and cons.\nA convenience sample is whatever you can get ahold of; this type of sampling is non-random. Note that haphazard sampling is not necessarily random sampling; there are many potential sources of bias.\nIn a probability sample, we provide the chance that any specified set of individuals will be in the sample (individuals in the population can have different chances of being selected; they don’t all have to be uniform), and we sample at random based off this known chance. For this reason, probability samples are also called random samples. The randomness provides a few benefits:\n\nBecause we know the source probabilities, we can measure the errors.\nSampling at random gives us a more representative sample of the population, which reduces bias. (Note: this is only the case when the probability distribution we’re sampling from is accurate. Random samples using “bad” or inaccurate distributions can produce biased estimates of population quantities.)\nProbability samples allow us to estimate the bias and chance error, which helps us quantify uncertainty (more in a future lecture).\n\nThe real world is usually more complicated, and we often don’t know the initial probabilities. For example, we do not generally know the probability that a given bacterium is in a microbiome sample or whether people will answer when Gallup calls landlines. That being said, still we try to model probability sampling to the best of our ability even when the sampling or measurement process is not fully under our control.\nA few common random sampling schemes:\n\nA uniform random sample with replacement is a sample drawn uniformly at random with replacement.\n\nRandom doesn’t always mean “uniformly at random,” but in this specific context, it does.\nSome individuals in the population might get picked more than once.\n\nA simple random sample (SRS) is a sample drawn uniformly at random without replacement.\n\nEvery individual (and subset of individuals) has the same chance of being selected from the sampling frame.\nEvery pair has the same chance as every other pair.\nEvery triple has the same chance as every other triple.\nAnd so on.\n\nA stratified random sample, where random sampling is performed on strata (specific groups), and the groups together compose a sample.\n\n\n9.3.1 Example Scheme 1: Probability Sample\nSuppose we have 3 TA’s (Arman, Boyu, Charlie): I decide to sample 2 of them as follows:\n\nI choose A with probability 1.0\nI choose either B or C, each with a probability of 0.5.\n\nWe can list all the possible outcomes and their respective probabilities in a table:\n\n\n\nOutcome\nProbability\n\n\n\n\n{A, B}\n0.5\n\n\n{A, C}\n0.5\n\n\n{B, C}\n0\n\n\n\nThis is a probability sample (though not a great one). Of the 3 people in my population, I know the chance of getting each subset. Suppose I’m measuring the average distance TAs live from campus.\n\nThis scheme does not see the entire population!\nMy estimate using the single sample I take has some chance error depending on if I see AB or AC.\nThis scheme is biased towards A’s response.\n\n\n\n9.3.2 Example Scheme 2: Simple Random Sample\nConsider the following sampling scheme:\n\nA class roster has 1100 students listed alphabetically.\nPick one of the first 10 students on the list at random (e.g. Student 8).\nTo create your sample, take that student and every 10th student listed after that (e.g. Students 8, 18, 28, 38, etc.).\n\n\n\nIs this a probability sample?\n\nYes. For a sample [n, n + 10, n + 20, …, n + 1090], where 1 &lt;= n &lt;= 10, the probability of that sample is 1/10. Otherwise, the probability is 0.\nOnly 10 possible samples!\n\n\n\nDoes each student have the same probability of being selected?\n\nYes. Each student is chosen with a probability of 1/10.\n\n\n\nIs this a simple random sample?\n\nNo. The chance of selecting (8, 18) is 1/10; the chance of selecting (8, 9) is 0.\n\n\n\n9.3.3 Demo: Barbie v. Oppenheimer\nWe are trying to collect a sample from Berkeley residents to predict the which one of Barbie and Oppenheimer would perform better on their opening day, July 21st.\nFirst, let’s grab a dataset that has every single resident in Berkeley (this is a fake dataset) and which movie they actually watched on July 21st.\nLet’s load in the movie.csv table. We can assume that:\n\nis_male is a boolean that indicates if a resident identifies as male.\nThere are only two movies they can watch on July 21st: Barbie and Oppenheimer.\nEvery resident watches a movie (either Barbie or Oppenheimer) on July 21st.\n\n\n\nCode\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport pandas as pd\nimport seaborn as sns\n\nsns.set_theme(style='darkgrid', font_scale = 1.5,\n              rc={'figure.figsize':(7,5)})\n\nrng = np.random.default_rng()\n\n\n\nmovie = pd.read_csv(\"data/movie.csv\")\n\n# create a 1/0 int that indicates Barbie vote\nmovie['barbie'] = (movie['movie'] == 'Barbie').astype(int)\nmovie.head()\n\n\n\n\n\n\n\n\nage\nis_male\nmovie\nbarbie\n\n\n\n\n0\n35\nFalse\nBarbie\n1\n\n\n1\n42\nTrue\nOppenheimer\n0\n\n\n2\n55\nFalse\nBarbie\n1\n\n\n3\n77\nTrue\nOppenheimer\n0\n\n\n4\n31\nFalse\nBarbie\n1\n\n\n\n\n\n\n\nWhat fraction of Berkeley residents chose Barbie?\n\nactual_barbie = np.mean(movie[\"barbie\"])\nactual_barbie\n\nnp.float64(0.5302792307692308)\n\n\nThis is the actual outcome of the competition. Based on this result, Barbie would win. How did our sample of retirees do?\n\n9.3.3.1 Convenience Sample: Retirees\nLet’s take a convenience sample of people who have retired (&gt;= 65 years old). What proportion of them went to see Barbie instead of Oppenheimer?\n\nconvenience_sample = movie[movie['age'] &gt;= 65] # take a convenience sample of retirees\nnp.mean(convenience_sample[\"barbie\"]) # what proportion of them saw Barbie? \n\nnp.float64(0.3744755089093924)\n\n\nBased on this result, we would have predicted that Oppenheimer would win! What happened? Is it possible that our sample is too small or noisy?\n\n# what's the size of our sample? \nlen(convenience_sample)\n\n359396\n\n\n\n# what proportion of our data is in the convenience sample? \nlen(convenience_sample)/len(movie)\n\n0.27645846153846154\n\n\nSeems like our sample is rather large (roughly 360,000 people), so the error is likely not due to solely to chance.\n\n\n9.3.3.2 Check for Bias\nLet us aggregate all choices by age and visualize the fraction of Barbie views, split by gender.\n\nvotes_by_barbie = movie.groupby([\"age\",\"is_male\"]).agg(\"mean\", numeric_only=True).reset_index()\nvotes_by_barbie.head()\n\n\n\n\n\n\n\n\nage\nis_male\nbarbie\n\n\n\n\n0\n18\nFalse\n0.819594\n\n\n1\n18\nTrue\n0.667001\n\n\n2\n19\nFalse\n0.812214\n\n\n3\n19\nTrue\n0.661252\n\n\n4\n20\nFalse\n0.805281\n\n\n\n\n\n\n\n\n\nCode\n# A common matplotlib/seaborn pattern: create the figure and axes object, pass ax\n# to seaborn for drawing into, and later fine-tune the figure via ax.\nfig, ax = plt.subplots();\n\nred_blue = [\"#bf1518\", \"#397eb7\"]\nwith sns.color_palette(red_blue):\n    sns.pointplot(data=votes_by_barbie, x = \"age\", y = \"barbie\", hue = \"is_male\", ax=ax)\n\nnew_ticks = [i.get_text() for i in ax.get_xticklabels()]\nax.set_xticks(range(0, len(new_ticks), 10), new_ticks[::10])\nax.set_title(\"Preferences by Demographics\");\n\n\n\n\n\n\n\n\n\n\nWe see that retirees (in Berkeley) tend to watch Oppenheimer.\nWe also see that residents who identify as non-male tend to prefer Barbie.\n\n\n\n9.3.3.3 Simple Random Sample\nSuppose we took a simple random sample (SRS) of the same size as our retiree sample:\n\nn = len(convenience_sample)\nrandom_sample = movie.sample(n, replace = False) ## By default, replace = False\nnp.mean(random_sample[\"barbie\"])\n\nnp.float64(0.5279914078064308)\n\n\nThis is very close to the actual vote of 0.5302792307692308!\nIt turns out that we can get similar results with a much smaller sample size, say, 800:\n\nn = 800\nrandom_sample = movie.sample(n, replace = False)\n\n# Compute the sample average and the resulting relative error\nsample_barbie = np.mean(random_sample[\"barbie\"])\nerr = abs(sample_barbie-actual_barbie)/actual_barbie\n\n# We can print output with Markdown formatting too...\nfrom IPython.display import Markdown\nMarkdown(f\"**Actual** = {actual_barbie:.4f}, **Sample** = {sample_barbie:.4f}, \"\n         f\"**Err** = {100*err:.2f}%.\")\n\nActual = 0.5303, Sample = 0.5387, Err = 1.60%.\n\n\nWe’ll learn how to choose this number when we (re)learn the Central Limit Theorem later in the semester.\n\n\n9.3.3.4 Quantifying Chance Error\nIn our SRS of size 800, what would be our chance error?\nLet’s simulate 1000 versions of taking the 800-sized SRS from before:\n\nnrep = 1000   # number of simulations\nn = 800       # size of our sample\npoll_result = []\nfor i in range(0, nrep):\n    random_sample = movie.sample(n, replace = False)\n    poll_result.append(np.mean(random_sample[\"barbie\"]))\n\n\n\nCode\nfig, ax = plt.subplots()\nsns.histplot(poll_result, stat='density', ax=ax)\nax.axvline(actual_barbie, color=\"orange\", lw=4);\n\n\n/Users/nikhilreddy/course-notes/ds100env/lib/python3.12/site-packages/seaborn/_oldcore.py:1119: FutureWarning:\n\nuse_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.\n\n\n\n\n\n\n\n\n\n\nWhat fraction of these simulated samples would have predicted Barbie?\n\npoll_result = pd.Series(poll_result)\nnp.sum(poll_result &gt; 0.5)/1000\n\nnp.float64(0.956)\n\n\nYou can see the curve looks roughly Gaussian/normal. Using KDE:\n\n\nCode\nsns.histplot(poll_result, stat='density', kde=True);\n\n\n/Users/nikhilreddy/course-notes/ds100env/lib/python3.12/site-packages/seaborn/_oldcore.py:1119: FutureWarning:\n\nuse_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.",
+    "text": "9.3 Probability Samples\nWhen sampling, it is essential to focus on the quality of the sample rather than the quantity of the sample. A huge sample size does not fix a bad sampling method. Our main goal is to gather a sample that is representative of the population it came from. In this section, we’ll explore the different types of sampling and their pros and cons.\nA convenience sample is whatever you can get ahold of; this type of sampling is non-random. Note that haphazard sampling is not necessarily random sampling; there are many potential sources of bias.\nIn a probability sample, we provide the chance that any specified set of individuals will be in the sample (individuals in the population can have different chances of being selected; they don’t all have to be uniform), and we sample at random based off this known chance. For this reason, probability samples are also called random samples. The randomness provides a few benefits:\n\nBecause we know the source probabilities, we can measure the errors.\nSampling at random gives us a more representative sample of the population, which reduces bias. (Note: this is only the case when the probability distribution we’re sampling from is accurate. Random samples using “bad” or inaccurate distributions can produce biased estimates of population quantities.)\nProbability samples allow us to estimate the bias and chance error, which helps us quantify uncertainty (more in a future lecture).\n\nThe real world is usually more complicated, and we often don’t know the initial probabilities. For example, we do not generally know the probability that a given bacterium is in a microbiome sample or whether people will answer when Gallup calls landlines. That being said, still we try to model probability sampling to the best of our ability even when the sampling or measurement process is not fully under our control.\nA few common random sampling schemes:\n\nA uniform random sample with replacement is a sample drawn uniformly at random with replacement.\n\nRandom doesn’t always mean “uniformly at random,” but in this specific context, it does.\nSome individuals in the population might get picked more than once.\n\nA simple random sample (SRS) is a sample drawn uniformly at random without replacement.\n\nEvery individual (and subset of individuals) has the same chance of being selected from the sampling frame.\nEvery pair has the same chance as every other pair.\nEvery triple has the same chance as every other triple.\nAnd so on.\n\nA stratified random sample, where random sampling is performed on strata (specific groups), and the groups together compose a sample.\n\n\n9.3.1 Example Scheme 1: Probability Sample\nSuppose we have 3 TA’s (Arman, Boyu, Charlie): I decide to sample 2 of them as follows:\n\nI choose A with probability 1.0\nI choose either B or C, each with a probability of 0.5.\n\nWe can list all the possible outcomes and their respective probabilities in a table:\n\n\n\nOutcome\nProbability\n\n\n\n\n{A, B}\n0.5\n\n\n{A, C}\n0.5\n\n\n{B, C}\n0\n\n\n\nThis is a probability sample (though not a great one). Of the 3 people in my population, I know the chance of getting each subset. Suppose I’m measuring the average distance TAs live from campus.\n\nThis scheme does not see the entire population!\nMy estimate using the single sample I take has some chance error depending on if I see AB or AC.\nThis scheme is biased towards A’s response.\n\n\n\n9.3.2 Example Scheme 2: Simple Random Sample\nConsider the following sampling scheme:\n\nA class roster has 1100 students listed alphabetically.\nPick one of the first 10 students on the list at random (e.g. Student 8).\nTo create your sample, take that student and every 10th student listed after that (e.g. Students 8, 18, 28, 38, etc.).\n\n\n\nIs this a probability sample?\n\nYes. For a sample [n, n + 10, n + 20, …, n + 1090], where 1 &lt;= n &lt;= 10, the probability of that sample is 1/10. Otherwise, the probability is 0.\nOnly 10 possible samples!\n\n\n\nDoes each student have the same probability of being selected?\n\nYes. Each student is chosen with a probability of 1/10.\n\n\n\nIs this a simple random sample?\n\nNo. The chance of selecting (8, 18) is 1/10; the chance of selecting (8, 9) is 0.\n\n\n\n9.3.3 Demo: Barbie v. Oppenheimer\nWe are trying to collect a sample from Berkeley residents to predict the which one of Barbie and Oppenheimer would perform better on their opening day, July 21st.\nFirst, let’s grab a dataset that has every single resident in Berkeley (this is a fake dataset) and which movie they actually watched on July 21st.\nLet’s load in the movie.csv table. We can assume that:\n\nis_male is a boolean that indicates if a resident identifies as male.\nThere are only two movies they can watch on July 21st: Barbie and Oppenheimer.\nEvery resident watches a movie (either Barbie or Oppenheimer) on July 21st.\n\n\n\nCode\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport pandas as pd\nimport seaborn as sns\n\nsns.set_theme(style='darkgrid', font_scale = 1.5,\n              rc={'figure.figsize':(7,5)})\n\nrng = np.random.default_rng()\n\n\n\nmovie = pd.read_csv(\"data/movie.csv\")\n\n# create a 1/0 int that indicates Barbie vote\nmovie['barbie'] = (movie['movie'] == 'Barbie').astype(int)\nmovie.head()\n\n\n\n\n\n\n\n\nage\nis_male\nmovie\nbarbie\n\n\n\n\n0\n35\nFalse\nBarbie\n1\n\n\n1\n42\nTrue\nOppenheimer\n0\n\n\n2\n55\nFalse\nBarbie\n1\n\n\n3\n77\nTrue\nOppenheimer\n0\n\n\n4\n31\nFalse\nBarbie\n1\n\n\n\n\n\n\n\nWhat fraction of Berkeley residents chose Barbie?\n\nactual_barbie = np.mean(movie[\"barbie\"])\nactual_barbie\n\nnp.float64(0.5302792307692308)\n\n\nThis is the actual outcome of the competition. Based on this result, Barbie would win. How did our sample of retirees do?\n\n9.3.3.1 Convenience Sample: Retirees\nLet’s take a convenience sample of people who have retired (&gt;= 65 years old). What proportion of them went to see Barbie instead of Oppenheimer?\n\nconvenience_sample = movie[movie['age'] &gt;= 65] # take a convenience sample of retirees\nnp.mean(convenience_sample[\"barbie\"]) # what proportion of them saw Barbie? \n\nnp.float64(0.3744755089093924)\n\n\nBased on this result, we would have predicted that Oppenheimer would win! What happened? Is it possible that our sample is too small or noisy?\n\n# what's the size of our sample? \nlen(convenience_sample)\n\n359396\n\n\n\n# what proportion of our data is in the convenience sample? \nlen(convenience_sample)/len(movie)\n\n0.27645846153846154\n\n\nSeems like our sample is rather large (roughly 360,000 people), so the error is likely not due to solely to chance.\n\n\n9.3.3.2 Check for Bias\nLet us aggregate all choices by age and visualize the fraction of Barbie views, split by gender.\n\nvotes_by_barbie = movie.groupby([\"age\",\"is_male\"]).agg(\"mean\", numeric_only=True).reset_index()\nvotes_by_barbie.head()\n\n\n\n\n\n\n\n\nage\nis_male\nbarbie\n\n\n\n\n0\n18\nFalse\n0.819594\n\n\n1\n18\nTrue\n0.667001\n\n\n2\n19\nFalse\n0.812214\n\n\n3\n19\nTrue\n0.661252\n\n\n4\n20\nFalse\n0.805281\n\n\n\n\n\n\n\n\n\nCode\n# A common matplotlib/seaborn pattern: create the figure and axes object, pass ax\n# to seaborn for drawing into, and later fine-tune the figure via ax.\nfig, ax = plt.subplots();\n\nred_blue = [\"#bf1518\", \"#397eb7\"]\nwith sns.color_palette(red_blue):\n    sns.pointplot(data=votes_by_barbie, x = \"age\", y = \"barbie\", hue = \"is_male\", ax=ax)\n\nnew_ticks = [i.get_text() for i in ax.get_xticklabels()]\nax.set_xticks(range(0, len(new_ticks), 10), new_ticks[::10])\nax.set_title(\"Preferences by Demographics\");\n\n\n\n\n\n\n\n\n\n\nWe see that retirees (in Berkeley) tend to watch Oppenheimer.\nWe also see that residents who identify as non-male tend to prefer Barbie.\n\n\n\n9.3.3.3 Simple Random Sample\nSuppose we took a simple random sample (SRS) of the same size as our retiree sample:\n\nn = len(convenience_sample)\nrandom_sample = movie.sample(n, replace = False) ## By default, replace = False\nnp.mean(random_sample[\"barbie\"])\n\nnp.float64(0.5317171031397122)\n\n\nThis is very close to the actual vote of 0.5302792307692308!\nIt turns out that we can get similar results with a much smaller sample size, say, 800:\n\nn = 800\nrandom_sample = movie.sample(n, replace = False)\n\n# Compute the sample average and the resulting relative error\nsample_barbie = np.mean(random_sample[\"barbie\"])\nerr = abs(sample_barbie-actual_barbie)/actual_barbie\n\n# We can print output with Markdown formatting too...\nfrom IPython.display import Markdown\nMarkdown(f\"**Actual** = {actual_barbie:.4f}, **Sample** = {sample_barbie:.4f}, \"\n         f\"**Err** = {100*err:.2f}%.\")\n\nActual = 0.5303, Sample = 0.5012, Err = 5.47%.\n\n\nWe’ll learn how to choose this number when we (re)learn the Central Limit Theorem later in the semester.\n\n\n9.3.3.4 Quantifying Chance Error\nIn our SRS of size 800, what would be our chance error?\nLet’s simulate 1000 versions of taking the 800-sized SRS from before:\n\nnrep = 1000   # number of simulations\nn = 800       # size of our sample\npoll_result = []\nfor i in range(0, nrep):\n    random_sample = movie.sample(n, replace = False)\n    poll_result.append(np.mean(random_sample[\"barbie\"]))\n\n\n\nCode\nfig, ax = plt.subplots()\nsns.histplot(poll_result, stat='density', ax=ax)\nax.axvline(actual_barbie, color=\"orange\", lw=4);\n\n\n/Users/nikhilreddy/course-notes/ds100env/lib/python3.12/site-packages/seaborn/_oldcore.py:1119: FutureWarning:\n\nuse_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.\n\n\n\n\n\n\n\n\n\n\nWhat fraction of these simulated samples would have predicted Barbie?\n\npoll_result = pd.Series(poll_result)\nnp.sum(poll_result &gt; 0.5)/1000\n\nnp.float64(0.961)\n\n\nYou can see the curve looks roughly Gaussian/normal. Using KDE:\n\n\nCode\nsns.histplot(poll_result, stat='density', kde=True);\n\n\n/Users/nikhilreddy/course-notes/ds100env/lib/python3.12/site-packages/seaborn/_oldcore.py:1119: FutureWarning:\n\nuse_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.",
     "crumbs": [
       "<span class='chapter-number'>9</span>  <span class='chapter-title'>Sampling</span>"
     ]
@@ -559,86 +559,6 @@
       "<span class='chapter-number'>9</span>  <span class='chapter-title'>Sampling</span>"
     ]
   },
-  {
-    "objectID": "intro_to_modeling/intro_to_modeling.html",
-    "href": "intro_to_modeling/intro_to_modeling.html",
-    "title": "10  Introduction to Modeling",
-    "section": "",
-    "text": "10.1 What is a Model?\nA model is an idealized representation of a system. A system is a set of principles or procedures according to which something functions. We live in a world full of systems: the procedure of turning on a light happens according to a specific set of rules dictating the flow of electricity. The truth behind how any event occurs is usually complex, and many times the specifics are unknown. The workings of the world can be viewed as its own giant procedure. Models seek to simplify the world and distill them into workable pieces.\nExample: We model the fall of an object on Earth as subject to a constant acceleration of \\(9.81 m/s^2\\) due to gravity.",
-    "crumbs": [
-      "<span class='chapter-number'>10</span>  <span class='chapter-title'>Introduction to Modeling</span>"
-    ]
-  },
-  {
-    "objectID": "intro_to_modeling/intro_to_modeling.html#what-is-a-model",
-    "href": "intro_to_modeling/intro_to_modeling.html#what-is-a-model",
-    "title": "10  Introduction to Modeling",
-    "section": "",
-    "text": "While this describes the behavior of our system, it is merely an approximation.\nIt doesn’t account for the effects of air resistance, local variations in gravity, etc.\nIn practice, it’s accurate enough to be useful!\n\n\n10.1.1 Reasons for Building Models\nWhy do we want to build models? As far as data scientists and statisticians are concerned, there are three reasons, and each implies a different focus on modeling.\n\nTo explain complex phenomena occurring in the world we live in. Examples of this might be:\n\nHow are the parents’ average height related to their children’s average height?\nHow does an object’s velocity and acceleration impact how far it travels? (Physics: \\(d = d_0 + vt + \\frac{1}{2}at^2\\))\n\nIn these cases, we care about creating models that are simple and interpretable, allowing us to understand what the relationships between our variables are.\nTo make accurate predictions about unseen data. Some examples include:\n\nCan we predict if an email is spam or not?\nCan we generate a one-sentence summary of this 10-page long article?\n\nWhen making predictions, we care more about making extremely accurate predictions, at the cost of having an uninterpretable model. These are sometimes called black-box models and are common in fields like deep learning.\nTo measure the causal effects of one event on some other event. For example,\n\nDoes smoking cause lung cancer?\nDoes a job training program cause increases in employment and wages?\n\nThis is a much harder question because most statistical tools are designed to infer association, not causation. We will not focus on this task in Data 100, but you can take other advanced classes on causal inference (e.g., Stat 156, Data 102) if you are intrigued!\n\nMost of the time, we aim to strike a balance between building interpretable models and building accurate models.\n\n\n10.1.2 Common Types of Models\nIn general, models can be split into two categories:\n\nDeterministic physical (mechanistic) models: Laws that govern how the world works.\n\nKepler’s Third Law of Planetary Motion (1619): The ratio of the square of an object’s orbital period with the cube of the semi-major axis of its orbit is the same for all objects orbiting the same primary.\n\n\\(T^2 \\propto R^3\\)\n\nNewton’s Laws: motion and gravitation (1687): Newton’s second law of motion models the relationship between the mass of an object and the force required to accelerate it.\n\n\\(F = ma\\)\n\\(F_g = G \\frac{m_1 m_2}{r^2}\\) \n\n\nProbabilistic models: Models that attempt to understand how random processes evolve. These are more general and can be used to describe many phenomena in the real world. These models commonly make simplifying assumptions about the nature of the world.\n\nPoisson Process models: Used to model random events that happen with some probability at any point in time and are strictly increasing in count, such as the arrival of customers at a store.\n\n\nNote: These specific models are not in the scope of Data 100 and exist to serve as motivation.",
-    "crumbs": [
-      "<span class='chapter-number'>10</span>  <span class='chapter-title'>Introduction to Modeling</span>"
-    ]
-  },
-  {
-    "objectID": "intro_to_modeling/intro_to_modeling.html#simple-linear-regression",
-    "href": "intro_to_modeling/intro_to_modeling.html#simple-linear-regression",
-    "title": "10  Introduction to Modeling",
-    "section": "10.2 Simple Linear Regression",
-    "text": "10.2 Simple Linear Regression\nThe regression line is the unique straight line that minimizes the mean squared error of estimation among all straight lines. As with any straight line, it can be defined by a slope and a y-intercept:\n\n\\(\\text{slope} = r \\cdot \\frac{\\text{Standard Deviation of } y}{\\text{Standard Deviation of }x}\\)\n\\(y\\text{-intercept} = \\text{average of }y - \\text{slope}\\cdot\\text{average of }x\\)\n\\(\\text{regression estimate} = y\\text{-intercept} + \\text{slope}\\cdot\\text{}x\\)\n\\(\\text{residual} =\\text{observed }y - \\text{regression estimate}\\)\n\n\n\nCode\nimport pandas as pd\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport seaborn as sns\n# Set random seed for consistency \nnp.random.seed(43)\nplt.style.use('default') \n\n# Generate random noise for plotting\nx = np.linspace(-3, 3, 100)\ny = x * 0.5 - 1 + np.random.randn(100) * 0.3\n\n# Plot regression line\nsns.regplot(x=x,y=y);\n\n\n\n\n\n\n\n\n\n\n10.2.1 Notations and Definitions\nFor a pair of variables \\(x\\) and \\(y\\) representing our data \\(\\mathcal{D} = \\{(x_1, y_1), (x_2, y_2), \\dots, (x_n, y_n)\\}\\), we denote their means/averages as \\(\\bar x\\) and \\(\\bar y\\) and standard deviations as \\(\\sigma_x\\) and \\(\\sigma_y\\).\n\n10.2.1.1 Standard Units\nA variable is represented in standard units if the following are true:\n\n0 in standard units is equal to the mean (\\(\\bar{x}\\)) in the original variable’s units.\nAn increase of 1 standard unit is an increase of 1 standard deviation (\\(\\sigma_x\\)) in the original variable’s units.\n\nTo convert a variable \\(x_i\\) into standard units, we subtract its mean from it and divide it by its standard deviation. For example, \\(x_i\\) in standard units is \\(\\frac{x_i - \\bar x}{\\sigma_x}\\).\n\n\n10.2.1.2 Correlation\nThe correlation (\\(r\\)) is the average of the product of \\(x\\) and \\(y\\), both measured in standard units.\n\\[r = \\frac{1}{n} \\sum_{i=1}^n (\\frac{x_i - \\bar{x}}{\\sigma_x})(\\frac{y_i - \\bar{y}}{\\sigma_y})\\]\n\nCorrelation measures the strength of a linear association between two variables.\nCorrelations range between -1 and 1: \\(|r| \\leq 1\\), with \\(r=1\\) indicating perfect positive linear association, and \\(r=-1\\) indicating perfect negative association. The closer \\(r\\) is to \\(0\\), the weaker the linear association is.\nCorrelation says nothing about causation and non-linear association. Correlation does not imply causation. When \\(r = 0\\), the two variables are uncorrelated. However, they could still be related through some non-linear relationship.\n\n\n\nCode\ndef plot_and_get_corr(ax, x, y, title):\n    ax.set_xlim(-3, 3)\n    ax.set_ylim(-3, 3)\n    ax.set_xticks([])\n    ax.set_yticks([])\n    ax.scatter(x, y, alpha = 0.73)\n    r = np.corrcoef(x, y)[0, 1]\n    ax.set_title(title + \" (corr: {})\".format(r.round(2)))\n    return r\n\nfig, axs = plt.subplots(2, 2, figsize = (10, 10))\n\n# Just noise\nx1, y1 = np.random.randn(2, 100)\ncorr1 = plot_and_get_corr(axs[0, 0], x1, y1, title = \"noise\")\n\n# Strong linear\nx2 = np.linspace(-3, 3, 100)\ny2 = x2 * 0.5 - 1 + np.random.randn(100) * 0.3\ncorr2 = plot_and_get_corr(axs[0, 1], x2, y2, title = \"strong linear\")\n\n# Unequal spread\nx3 = np.linspace(-3, 3, 100)\ny3 = - x3/3 + np.random.randn(100)*(x3)/2.5\ncorr3 = plot_and_get_corr(axs[1, 0], x3, y3, title = \"strong linear\")\nextent = axs[1, 0].get_window_extent().transformed(fig.dpi_scale_trans.inverted())\n\n# Strong non-linear\nx4 = np.linspace(-3, 3, 100)\ny4 = 2*np.sin(x3 - 1.5) + np.random.randn(100) * 0.3\ncorr4 = plot_and_get_corr(axs[1, 1], x4, y4, title = \"strong non-linear\")\n\nplt.show()\n\n\n\n\n\n\n\n\n\n\n\n\n10.2.2 Alternate Form\nWhen the variables \\(y\\) and \\(x\\) are measured in standard units, the regression line for predicting \\(y\\) based on \\(x\\) has slope \\(r\\) and passes through the origin.\n\\[\\hat{y}_{su} = r \\cdot x_{su}\\]\n\n\nIn the original units, this becomes\n\n\\[\\frac{\\hat{y} - \\bar{y}}{\\sigma_y} = r \\cdot \\frac{x - \\bar{x}}{\\sigma_x}\\]\n\n\n\n10.2.3 Derivation\nStarting from the top, we have our claimed form of the regression line, and we want to show that it is equivalent to the optimal linear regression line: \\(\\hat{y} = \\hat{a} + \\hat{b}x\\).\nRecall:\n\n\\(\\hat{b} = r \\cdot \\frac{\\text{Standard Deviation of }y}{\\text{Standard Deviation of }x}\\)\n\\(\\hat{a} = \\text{average of }y - \\text{slope}\\cdot\\text{average of }x\\)\n\n\n\n\n\n\n\nProof:\n\\[\\frac{\\hat{y} - \\bar{y}}{\\sigma_y} = r \\cdot \\frac{x - \\bar{x}}{\\sigma_x}\\]\nMultiply by \\(\\sigma_y\\), and add \\(\\bar{y}\\) on both sides.\n\\[\\hat{y} = \\sigma_y \\cdot r \\cdot \\frac{x - \\bar{x}}{\\sigma_x} + \\bar{y}\\]\nDistribute coefficient \\(\\sigma_{y}\\cdot r\\) to the \\(\\frac{x - \\bar{x}}{\\sigma_x}\\) term\n\\[\\hat{y} = (\\frac{r\\sigma_y}{\\sigma_x} ) \\cdot x + (\\bar{y} - (\\frac{r\\sigma_y}{\\sigma_x} ) \\bar{x})\\]\nWe now see that we have a line that matches our claim:\n\nslope: \\(r\\cdot\\frac{\\text{SD of y}}{\\text{SD of x}} = r\\cdot\\frac{\\sigma_y}{\\sigma_x}\\)\nintercept: \\(\\bar{y} - \\text{slope}\\cdot \\bar{x}\\)\n\nNote that the error for the i-th datapoint is: \\(e_i = y_i - \\hat{y_i}\\)",
-    "crumbs": [
-      "<span class='chapter-number'>10</span>  <span class='chapter-title'>Introduction to Modeling</span>"
-    ]
-  },
-  {
-    "objectID": "intro_to_modeling/intro_to_modeling.html#the-modeling-process",
-    "href": "intro_to_modeling/intro_to_modeling.html#the-modeling-process",
-    "title": "10  Introduction to Modeling",
-    "section": "10.3 The Modeling Process",
-    "text": "10.3 The Modeling Process\nAt a high level, a model is a way of representing a system. In Data 100, we’ll treat a model as some mathematical rule we use to describe the relationship between variables.\nWhat variables are we modeling? Typically, we use a subset of the variables in our sample of collected data to model another variable in this data. To put this more formally, say we have the following dataset \\(\\mathcal{D}\\):\n\\[\\mathcal{D} = \\{(x_1, y_1), (x_2, y_2), ..., (x_n, y_n)\\}\\]\nEach pair of values \\((x_i, y_i)\\) represents a datapoint. In a modeling setting, we call these observations. \\(y_i\\) is the dependent variable we are trying to model, also called an output or response. \\(x_i\\) is the independent variable inputted into the model to make predictions, also known as a feature.\nOur goal in modeling is to use the observed data \\(\\mathcal{D}\\) to predict the output variable \\(y_i\\). We denote each prediction as \\(\\hat{y}_i\\) (read: “y hat sub i”).\nHow do we generate these predictions? Some examples of models we’ll encounter in the next few lectures are given below:\n\\[\\hat{y}_i = \\theta\\] \\[\\hat{y}_i = \\theta_0 + \\theta_1 x_i\\]\nThe examples above are known as parametric models. They relate the collected data, \\(x_i\\), to the prediction we make, \\(\\hat{y}_i\\). A few parameters (\\(\\theta\\), \\(\\theta_0\\), \\(\\theta_1\\)) are used to describe the relationship between \\(x_i\\) and \\(\\hat{y}_i\\).\nNotice that we don’t immediately know the values of these parameters. While the features, \\(x_i\\), are taken from our observed data, we need to decide what values to give \\(\\theta\\), \\(\\theta_0\\), and \\(\\theta_1\\) ourselves. This is the heart of parametric modeling: what parameter values should we choose so our model makes the best possible predictions?\nTo choose our model parameters, we’ll work through the modeling process.\n\nChoose a model: how should we represent the world?\nChoose a loss function: how do we quantify prediction error?\nFit the model: how do we choose the best parameters of our model given our data?\nEvaluate model performance: how do we evaluate whether this process gave rise to a good model?",
-    "crumbs": [
-      "<span class='chapter-number'>10</span>  <span class='chapter-title'>Introduction to Modeling</span>"
-    ]
-  },
-  {
-    "objectID": "intro_to_modeling/intro_to_modeling.html#choosing-a-model",
-    "href": "intro_to_modeling/intro_to_modeling.html#choosing-a-model",
-    "title": "10  Introduction to Modeling",
-    "section": "10.4 Choosing a Model",
-    "text": "10.4 Choosing a Model\nOur first step is choosing a model: defining the mathematical rule that describes the relationship between the features, \\(x_i\\), and predictions \\(\\hat{y}_i\\).\nIn Data 8, you learned about the Simple Linear Regression (SLR) model. You learned that the model takes the form: \\[\\hat{y}_i = a + bx_i\\]\nIn Data 100, we’ll use slightly different notation: we will replace \\(a\\) with \\(\\theta_0\\) and \\(b\\) with \\(\\theta_1\\). This will allow us to use the same notation when we explore more complex models later on in the course.\n\\[\\hat{y}_i = \\theta_0 + \\theta_1 x_i\\]\nThe parameters of the SLR model are \\(\\theta_0\\), also called the intercept term, and \\(\\theta_1\\), also called the slope term. To create an effective model, we want to choose values for \\(\\theta_0\\) and \\(\\theta_1\\) that most accurately predict the output variable. The “best” fitting model parameters are given the special names: \\(\\hat{\\theta}_0\\) and \\(\\hat{\\theta}_1\\); they are the specific parameter values that allow our model to generate the best possible predictions.\nIn Data 8, you learned that the best SLR model parameters are: \\[\\hat{\\theta}_0 = \\bar{y} - \\hat{\\theta}_1\\bar{x} \\qquad \\qquad \\hat{\\theta}_1 = r \\frac{\\sigma_y}{\\sigma_x}\\]\nA quick reminder on notation:\n\n\\(\\bar{y}\\) and \\(\\bar{x}\\) indicate the mean value of \\(y\\) and \\(x\\), respectively\n\\(\\sigma_y\\) and \\(\\sigma_x\\) indicate the standard deviations of \\(y\\) and \\(x\\)\n\\(r\\) is the correlation coefficient, defined as the average of the product of \\(x\\) and \\(y\\) measured in standard units: \\(\\frac{1}{n} \\sum_{i=1}^n (\\frac{x_i-\\bar{x}}{\\sigma_x})(\\frac{y_i-\\bar{y}}{\\sigma_y})\\)\n\nIn Data 100, we want to understand how to derive these best model coefficients. To do so, we’ll introduce the concept of a loss function.",
-    "crumbs": [
-      "<span class='chapter-number'>10</span>  <span class='chapter-title'>Introduction to Modeling</span>"
-    ]
-  },
-  {
-    "objectID": "intro_to_modeling/intro_to_modeling.html#choosing-a-loss-function",
-    "href": "intro_to_modeling/intro_to_modeling.html#choosing-a-loss-function",
-    "title": "10  Introduction to Modeling",
-    "section": "10.5 Choosing a Loss Function",
-    "text": "10.5 Choosing a Loss Function\nWe’ve talked about the idea of creating the “best” possible predictions. This begs the question: how do we decide how “good” or “bad” our model’s predictions are?\nA loss function characterizes the cost, error, or fit resulting from a particular choice of model or model parameters. This function, \\(L(y, \\hat{y})\\), quantifies how “bad” or “far off” a single prediction by our model is from a true, observed value in our collected data.\nThe choice of loss function for a particular model will affect the accuracy and computational cost of estimation, and it’ll also depend on the estimation task at hand. For example,\n\nAre outputs quantitative or qualitative?\nDo outliers matter?\nAre all errors equally costly? (e.g., a false negative on a cancer test is arguably more dangerous than a false positive)\n\nRegardless of the specific function used, a loss function should follow two basic principles:\n\nIf the prediction \\(\\hat{y}_i\\) is close to the actual value \\(y_i\\), loss should be low.\nIf the prediction \\(\\hat{y}_i\\) is far from the actual value \\(y_i\\), loss should be high.\n\nTwo common choices of loss function are squared loss and absolute loss.\nSquared loss, also known as L2 loss, computes loss as the square of the difference between the observed \\(y_i\\) and predicted \\(\\hat{y}_i\\): \\[L(y_i, \\hat{y}_i) = (y_i - \\hat{y}_i)^2\\]\nAbsolute loss, also known as L1 loss, computes loss as the absolute difference between the observed \\(y_i\\) and predicted \\(\\hat{y}_i\\): \\[L(y_i, \\hat{y}_i) = |y_i - \\hat{y}_i|\\]\nL1 and L2 loss give us a tool for quantifying our model’s performance on a single data point. This is a good start, but ideally, we want to understand how our model performs across our entire dataset. A natural way to do this is to compute the average loss across all data points in the dataset. This is known as the cost function, \\(\\hat{R}(\\theta)\\): \\[\\hat{R}(\\theta) = \\frac{1}{n} \\sum^n_{i=1} L(y_i, \\hat{y}_i)\\]\nThe cost function has many names in the statistics literature. You may also encounter the terms:\n\nEmpirical risk (this is why we give the cost function the name \\(R\\))\nError function\nAverage loss\n\nWe can substitute our L1 and L2 loss into the cost function definition. The Mean Squared Error (MSE) is the average squared loss across a dataset: \\[\\text{MSE} = \\frac{1}{n} \\sum_{i=1}^n (y_i - \\hat{y}_i)^2\\]\nThe Mean Absolute Error (MAE) is the average absolute loss across a dataset: \\[\\text{MAE}= \\frac{1}{n} \\sum_{i=1}^n |y_i - \\hat{y}_i|\\]",
-    "crumbs": [
-      "<span class='chapter-number'>10</span>  <span class='chapter-title'>Introduction to Modeling</span>"
-    ]
-  },
-  {
-    "objectID": "intro_to_modeling/intro_to_modeling.html#fitting-the-model",
-    "href": "intro_to_modeling/intro_to_modeling.html#fitting-the-model",
-    "title": "10  Introduction to Modeling",
-    "section": "10.6 Fitting the Model",
-    "text": "10.6 Fitting the Model\nNow that we’ve established the concept of a loss function, we can return to our original goal of choosing model parameters. Specifically, we want to choose the best set of model parameters that will minimize the model’s cost on our dataset. This process is called fitting the model.\nWe know from calculus that a function is minimized when (1) its first derivative is equal to zero and (2) its second derivative is positive. We often call the function being minimized the objective function (our objective is to find its minimum).\nTo find the optimal model parameter, we:\n\nTake the derivative of the cost function with respect to that parameter\nSet the derivative equal to 0\nSolve for the parameter\n\nWe repeat this process for each parameter present in the model. For now, we’ll disregard the second derivative condition.\nTo help us make sense of this process, let’s put it into action by deriving the optimal model parameters for simple linear regression using the mean squared error as our cost function. Remember: although the notation may look tricky, all we are doing is following the three steps above!\nStep 1: take the derivative of the cost function with respect to each model parameter. We substitute the SLR model, \\(\\hat{y}_i = \\theta_0+\\theta_1 x_i\\), into the definition of MSE above and differentiate with respect to \\(\\theta_0\\) and \\(\\theta_1\\). \\[\\text{MSE} = \\frac{1}{n} \\sum_{i=1}^{n} (y_i - \\hat{y}_i)^2 = \\frac{1}{n} \\sum_{i=1}^{n} (y_i - \\theta_0 - \\theta_1 x_i)^2\\]\n\\[\\frac{\\partial}{\\partial \\theta_0} \\text{MSE} = \\frac{-2}{n} \\sum_{i=1}^{n} y_i - \\theta_0 - \\theta_1 x_i\\]\n\\[\\frac{\\partial}{\\partial \\theta_1} \\text{MSE} = \\frac{-2}{n} \\sum_{i=1}^{n} (y_i - \\theta_0 - \\theta_1 x_i)x_i\\]\nLet’s walk through these derivations in more depth, starting with the derivative of MSE with respect to \\(\\theta_0\\).\nGiven our MSE above, we know that: \\[\\frac{\\partial}{\\partial \\theta_0} \\text{MSE} = \\frac{\\partial}{\\partial \\theta_0} \\frac{1}{n} \\sum_{i=1}^{n} {(y_i - \\theta_0 - \\theta_1 x_i)}^{2}\\]\nNoting that the derivative of sum is equivalent to the sum of derivatives, this then becomes: \\[ = \\frac{1}{n} \\sum_{i=1}^{n} \\frac{\\partial}{\\partial \\theta_0} {(y_i - \\theta_0 - \\theta_1 x_i)}^{2}\\]\nWe can then apply the chain rule.\n\\[ = \\frac{1}{n} \\sum_{i=1}^{n} 2 \\cdot{(y_i - \\theta_0 - \\theta_1 x_i)}\\dot(-1)\\]\nFinally, we can simplify the constants, leaving us with our answer.\n\\[\\frac{\\partial}{\\partial \\theta_0} \\text{MSE} = \\frac{-2}{n} \\sum_{i=1}^{n}{(y_i - \\theta_0 - \\theta_1 x_i)}\\]\nFollowing the same procedure, we can take the derivative of MSE with respect to \\(\\theta_1\\).\n\\[\\frac{\\partial}{\\partial \\theta_1} \\text{MSE} = \\frac{\\partial}{\\partial \\theta_1} \\frac{1}{n} \\sum_{i=1}^{n} {(y_i - \\theta_0 - \\theta_1 x_i)}^{2}\\]\n\\[ = \\frac{1}{n} \\sum_{i=1}^{n} \\frac{\\partial}{\\partial \\theta_1} {(y_i - \\theta_0 - \\theta_1 x_i)}^{2}\\]\n\\[ = \\frac{1}{n} \\sum_{i=1}^{n} 2 \\dot{(y_i - \\theta_0 - \\theta_1 x_i)}\\dot(-x_i)\\]\n\\[= \\frac{-2}{n} \\sum_{i=1}^{n} {(y_i - \\theta_0 - \\theta_1 x_i)}x_i\\]\nStep 2: set the derivatives equal to 0. After simplifying terms, this produces two estimating equations. The best set of model parameters \\((\\hat{\\theta}_0, \\hat{\\theta}_1)\\) must satisfy these two optimality conditions. \\[0 = \\frac{-2}{n} \\sum_{i=1}^{n} y_i - \\hat{\\theta}_0 - \\hat{\\theta}_1 x_i \\Longleftrightarrow \\frac{1}{n}\\sum_{i=1}^{n} y_i - \\hat{y}_i = 0\\] \\[0 = \\frac{-2}{n} \\sum_{i=1}^{n} (y_i - \\hat{\\theta}_0 - \\hat{\\theta}_1 x_i)x_i \\Longleftrightarrow \\frac{1}{n}\\sum_{i=1}^{n} (y_i - \\hat{y}_i)x_i = 0\\]\nStep 3: solve the estimating equations to compute estimates for \\(\\hat{\\theta}_0\\) and \\(\\hat{\\theta}_1\\).\nTaking the first equation gives the estimate of \\(\\hat{\\theta}_0\\): \\[\\frac{1}{n} \\sum_{i=1}^n y_i - \\hat{\\theta}_0 - \\hat{\\theta}_1 x_i = 0 \\]\n\\[\\left(\\frac{1}{n} \\sum_{i=1}^n y_i \\right) - \\hat{\\theta}_0 - \\hat{\\theta}_1\\left(\\frac{1}{n} \\sum_{i=1}^n x_i \\right) = 0\\]\n\\[ \\hat{\\theta}_0 = \\bar{y} - \\hat{\\theta}_1 \\bar{x}\\]\nWith a bit more maneuvering, the second equation gives the estimate of \\(\\hat{\\theta}_1\\). Start by multiplying the first estimating equation by \\(\\bar{x}\\), then subtracting the result from the second estimating equation.\n\\[\\frac{1}{n} \\sum_{i=1}^n (y_i - \\hat{y}_i)x_i - \\frac{1}{n} \\sum_{i=1}^n (y_i - \\hat{y}_i)\\bar{x} = 0 \\]\n\\[\\frac{1}{n} \\sum_{i=1}^n (y_i - \\hat{y}_i)(x_i - \\bar{x}) = 0 \\]\nNext, plug in \\(\\hat{y}_i = \\hat{\\theta}_0 + \\hat{\\theta}_1 x_i = \\bar{y} + \\hat{\\theta}_1(x_i - \\bar{x})\\):\n\\[\\frac{1}{n} \\sum_{i=1}^n (y_i - \\bar{y} - \\hat{\\theta}_1(x - \\bar{x}))(x_i - \\bar{x}) = 0 \\]\n\\[\\frac{1}{n} \\sum_{i=1}^n (y_i - \\bar{y})(x_i - \\bar{x}) = \\hat{\\theta}_1 \\times \\frac{1}{n} \\sum_{i=1}^n (x_i - \\bar{x})^2\n\\]\nBy using the definition of correlation \\(\\left(r = \\frac{1}{n} \\sum_{i=1}^n (\\frac{x_i-\\bar{x}}{\\sigma_x})(\\frac{y_i-\\bar{y}}{\\sigma_y}) \\right)\\) and standard deviation \\(\\left(\\sigma_x = \\sqrt{\\frac{1}{n} \\sum_{i=1}^n (x_i - \\bar{x})^2} \\right)\\), we can conclude: \\[r \\sigma_x \\sigma_y = \\hat{\\theta}_1 \\times \\sigma_x^2\\] \\[\\hat{\\theta}_1 = r \\frac{\\sigma_y}{\\sigma_x}\\]\nJust as was given in Data 8!\nRemember, this derivation found the optimal model parameters for SLR when using the MSE cost function. If we had used a different model or different loss function, we likely would have found different values for the best model parameters. However, regardless of the model and loss used, we can always follow these three steps to fit the model.",
-    "crumbs": [
-      "<span class='chapter-number'>10</span>  <span class='chapter-title'>Introduction to Modeling</span>"
-    ]
-  },
-  {
-    "objectID": "intro_to_modeling/intro_to_modeling.html#evaluating-the-slr-model",
-    "href": "intro_to_modeling/intro_to_modeling.html#evaluating-the-slr-model",
-    "title": "10  Introduction to Modeling",
-    "section": "10.7 Evaluating the SLR Model",
-    "text": "10.7 Evaluating the SLR Model\nNow that we’ve explored the mathematics behind (1) choosing a model, (2) choosing a loss function, and (3) fitting the model, we’re left with one final question – how “good” are the predictions made by this “best” fitted model? To determine this, we can:\n\nVisualize data and compute statistics:\n\nPlot the original data.\nCompute each column’s mean and standard deviation. If the mean and standard deviation of our predictions are close to those of the original observed \\(y_i\\)’s, we might be inclined to say that our model has done well.\n(If we’re fitting a linear model) Compute the correlation \\(r\\). A large magnitude for the correlation coefficient between the feature and response variables could also indicate that our model has done well.\n\nPerformance metrics:\n\nWe can take the Root Mean Squared Error (RMSE).\n\nIt’s the square root of the mean squared error (MSE), which is the average loss that we’ve been minimizing to determine optimal model parameters.\nRMSE is in the same units as \\(y\\).\nA lower RMSE indicates more “accurate” predictions, as we have a lower “average loss” across the data.\n\n\n\\[\\text{RMSE} = \\sqrt{\\frac{1}{n} \\sum_{i=1}^n (y_i - \\hat{y}_i)^2}\\]\nVisualization:\n\nLook at the residual plot of \\(e_i = y_i - \\hat{y_i}\\) to visualize the difference between actual and predicted values. The good residual plot should not show any pattern between input/features \\(x_i\\) and residual values \\(e_i\\).\n\n\nTo illustrate this process, let’s take a look at Anscombe’s quartet.\n\n10.7.1 Four Mysterious Datasets (Anscombe’s quartet)\nLet’s take a look at four different datasets.\n\n\nCode\nimport numpy as np\nimport pandas as pd\nimport matplotlib.pyplot as plt\n%matplotlib inline\nimport seaborn as sns\nimport itertools\nfrom mpl_toolkits.mplot3d import Axes3D\n\n\n\n\nCode\n# Big font helper\ndef adjust_fontsize(size=None):\n    SMALL_SIZE = 8\n    MEDIUM_SIZE = 10\n    BIGGER_SIZE = 12\n    if size != None:\n        SMALL_SIZE = MEDIUM_SIZE = BIGGER_SIZE = size\n\n    plt.rc(\"font\", size=SMALL_SIZE)  # controls default text sizes\n    plt.rc(\"axes\", titlesize=SMALL_SIZE)  # fontsize of the axes title\n    plt.rc(\"axes\", labelsize=MEDIUM_SIZE)  # fontsize of the x and y labels\n    plt.rc(\"xtick\", labelsize=SMALL_SIZE)  # fontsize of the tick labels\n    plt.rc(\"ytick\", labelsize=SMALL_SIZE)  # fontsize of the tick labels\n    plt.rc(\"legend\", fontsize=SMALL_SIZE)  # legend fontsize\n    plt.rc(\"figure\", titlesize=BIGGER_SIZE)  # fontsize of the figure title\n\n\n# Helper functions\ndef standard_units(x):\n    return (x - np.mean(x)) / np.std(x)\n\n\ndef correlation(x, y):\n    return np.mean(standard_units(x) * standard_units(y))\n\n\ndef slope(x, y):\n    return correlation(x, y) * np.std(y) / np.std(x)\n\n\ndef intercept(x, y):\n    return np.mean(y) - slope(x, y) * np.mean(x)\n\n\ndef fit_least_squares(x, y):\n    theta_0 = intercept(x, y)\n    theta_1 = slope(x, y)\n    return theta_0, theta_1\n\n\ndef predict(x, theta_0, theta_1):\n    return theta_0 + theta_1 * x\n\n\ndef compute_mse(y, yhat):\n    return np.mean((y - yhat) ** 2)\n\n\nplt.style.use(\"default\")  # Revert style to default mpl\n\n\n\n\nCode\nplt.style.use(\"default\")  # Revert style to default mpl\nNO_VIZ, RESID, RESID_SCATTER = range(3)\n\n\ndef least_squares_evaluation(x, y, visualize=NO_VIZ):\n    # statistics\n    print(f\"x_mean : {np.mean(x):.2f}, y_mean : {np.mean(y):.2f}\")\n    print(f\"x_stdev: {np.std(x):.2f}, y_stdev: {np.std(y):.2f}\")\n    print(f\"r = Correlation(x, y): {correlation(x, y):.3f}\")\n\n    # Performance metrics\n    ahat, bhat = fit_least_squares(x, y)\n    yhat = predict(x, ahat, bhat)\n    print(f\"\\theta_0: {ahat:.2f}, \\theta_1: {bhat:.2f}\")\n    print(f\"RMSE: {np.sqrt(compute_mse(y, yhat)):.3f}\")\n\n    # visualization\n    fig, ax_resid = None, None\n    if visualize == RESID_SCATTER:\n        fig, axs = plt.subplots(1, 2, figsize=(8, 3))\n        axs[0].scatter(x, y)\n        axs[0].plot(x, yhat)\n        axs[0].set_title(\"LS fit\")\n        ax_resid = axs[1]\n    elif visualize == RESID:\n        fig = plt.figure(figsize=(4, 3))\n        ax_resid = plt.gca()\n\n    if ax_resid is not None:\n        ax_resid.scatter(x, y - yhat, color=\"red\")\n        ax_resid.plot([4, 14], [0, 0], color=\"black\")\n        ax_resid.set_title(\"Residuals\")\n\n    return fig\n\n\n\n\nCode\n# Load in four different datasets: I, II, III, IV\nx = [10, 8, 13, 9, 11, 14, 6, 4, 12, 7, 5]\ny1 = [8.04, 6.95, 7.58, 8.81, 8.33, 9.96, 7.24, 4.26, 10.84, 4.82, 5.68]\ny2 = [9.14, 8.14, 8.74, 8.77, 9.26, 8.10, 6.13, 3.10, 9.13, 7.26, 4.74]\ny3 = [7.46, 6.77, 12.74, 7.11, 7.81, 8.84, 6.08, 5.39, 8.15, 6.42, 5.73]\nx4 = [8, 8, 8, 8, 8, 8, 8, 19, 8, 8, 8]\ny4 = [6.58, 5.76, 7.71, 8.84, 8.47, 7.04, 5.25, 12.50, 5.56, 7.91, 6.89]\n\nanscombe = {\n    \"I\": pd.DataFrame(list(zip(x, y1)), columns=[\"x\", \"y\"]),\n    \"II\": pd.DataFrame(list(zip(x, y2)), columns=[\"x\", \"y\"]),\n    \"III\": pd.DataFrame(list(zip(x, y3)), columns=[\"x\", \"y\"]),\n    \"IV\": pd.DataFrame(list(zip(x4, y4)), columns=[\"x\", \"y\"]),\n}\n\n# Plot the scatter plot and line of best fit\nfig, axs = plt.subplots(2, 2, figsize=(10, 10))\n\nfor i, dataset in enumerate([\"I\", \"II\", \"III\", \"IV\"]):\n    ans = anscombe[dataset]\n    x, y = ans[\"x\"], ans[\"y\"]\n    ahat, bhat = fit_least_squares(x, y)\n    yhat = predict(x, ahat, bhat)\n    axs[i // 2, i % 2].scatter(x, y, alpha=0.6, color=\"red\")  # plot the x, y points\n    axs[i // 2, i % 2].plot(x, yhat)  # plot the line of best fit\n    axs[i // 2, i % 2].set_xlabel(f\"$x_{i+1}$\")\n    axs[i // 2, i % 2].set_ylabel(f\"$y_{i+1}$\")\n    axs[i // 2, i % 2].set_title(f\"Dataset {dataset}\")\n\nplt.show()\n\n\n\n\n\n\n\n\n\nWhile these four sets of datapoints look very different, they actually all have identical means \\(\\bar x\\), \\(\\bar y\\), standard deviations \\(\\sigma_x\\), \\(\\sigma_y\\), correlation \\(r\\), and RMSE! If we only look at these statistics, we would probably be inclined to say that these datasets are similar.\n\n\nCode\nfor dataset in [\"I\", \"II\", \"III\", \"IV\"]:\n    print(f\"&gt;&gt;&gt; Dataset {dataset}:\")\n    ans = anscombe[dataset]\n    fig = least_squares_evaluation(ans[\"x\"], ans[\"y\"], visualize=NO_VIZ)\n    print()\n    print()\n\n\n&gt;&gt;&gt; Dataset I:\nx_mean : 9.00, y_mean : 7.50\nx_stdev: 3.16, y_stdev: 1.94\nr = Correlation(x, y): 0.816\n    heta_0: 3.00,   heta_1: 0.50\nRMSE: 1.119\n\n\n&gt;&gt;&gt; Dataset II:\nx_mean : 9.00, y_mean : 7.50\nx_stdev: 3.16, y_stdev: 1.94\nr = Correlation(x, y): 0.816\n    heta_0: 3.00,   heta_1: 0.50\nRMSE: 1.119\n\n\n&gt;&gt;&gt; Dataset III:\nx_mean : 9.00, y_mean : 7.50\nx_stdev: 3.16, y_stdev: 1.94\nr = Correlation(x, y): 0.816\n    heta_0: 3.00,   heta_1: 0.50\nRMSE: 1.118\n\n\n&gt;&gt;&gt; Dataset IV:\nx_mean : 9.00, y_mean : 7.50\nx_stdev: 3.16, y_stdev: 1.94\nr = Correlation(x, y): 0.817\n    heta_0: 3.00,   heta_1: 0.50\nRMSE: 1.118\n\n\n\n\nWe may also wish to visualize the model’s residuals, defined as the difference between the observed and predicted \\(y_i\\) value (\\(e_i = y_i - \\hat{y}_i\\)). This gives a high-level view of how “off” each prediction is from the true observed value. Recall that you explored this concept in Data 8: a good regression fit should display no clear pattern in its plot of residuals. The residual plots for Anscombe’s quartet are displayed below. Note how only the first plot shows no clear pattern to the magnitude of residuals. This is an indication that SLR is not the best choice of model for the remaining three sets of points.\n\n\n\nCode\n# Residual visualization\nfig, axs = plt.subplots(2, 2, figsize=(10, 10))\n\nfor i, dataset in enumerate([\"I\", \"II\", \"III\", \"IV\"]):\n    ans = anscombe[dataset]\n    x, y = ans[\"x\"], ans[\"y\"]\n    ahat, bhat = fit_least_squares(x, y)\n    yhat = predict(x, ahat, bhat)\n    axs[i // 2, i % 2].scatter(\n        x, y - yhat, alpha=0.6, color=\"red\"\n    )  # plot the x, y points\n    axs[i // 2, i % 2].plot(\n        x, np.zeros_like(x), color=\"black\"\n    )  # plot the residual line\n    axs[i // 2, i % 2].set_xlabel(f\"$x_{i+1}$\")\n    axs[i // 2, i % 2].set_ylabel(f\"$e_{i+1}$\")\n    axs[i // 2, i % 2].set_title(f\"Dataset {dataset} Residuals\")\n\nplt.show()",
-    "crumbs": [
-      "<span class='chapter-number'>10</span>  <span class='chapter-title'>Introduction to Modeling</span>"
-    ]
-  },
   {
     "objectID": "constant_model_loss_transformations/loss_transformations.html",
     "href": "constant_model_loss_transformations/loss_transformations.html",
@@ -774,7 +694,17 @@
     "href": "gradient_descent/gradient_descent.html",
     "title": "13  sklearn and Gradient Descent",
     "section": "",
-    "text": "13.1 sklearn",
+    "text": "13.1 OLS Recap",
+    "crumbs": [
+      "<span class='chapter-number'>13</span>  <span class='chapter-title'>sklearn and Gradient Descent</span>"
+    ]
+  },
+  {
+    "objectID": "gradient_descent/gradient_descent.html#ols-recap",
+    "href": "gradient_descent/gradient_descent.html#ols-recap",
+    "title": "13  sklearn and Gradient Descent",
+    "section": "",
+    "text": "13.1.1 1. Choose a model\nRecall that when using multiple linear regression, we can generate a prediction for each of our \\(n\\) data points:\n\\[\\hat{y} =\\theta_{0} + \\theta_{1}x_{1} + \\theta_{2}x_{2} + ... + \\theta_{p}x_{p}\\]\n\n\n\n\n\n\n\n\n\nIn the previous lecture, we used p+1 features to account for the intercept, \\(\\theta_0\\). This makes slides and notation messy.\nLet’s redefine p as the number of columns in our covariate matrix and add a column of 1s to encode the intercept (if desired). If we choose to add a column of 1s, then \\(x_1\\) can be a 1 for every data point.\n\\[\\hat{y} =\\theta_{1}x_{1} + \\theta_{2}x_{2} + ... + \\theta_{p}x_{p}\\]\n\n\n\n\n\n\n\n\n\n\n\n13.1.2 2. Choose a loss function\nRecall that we then choose the mean squared error loss function shown below where the prediction vector \\(\\hat{\\mathbb{Y}}\\) depends on \\(\\theta\\). \\[R(\\theta) = \\frac{1}{n} \\sum_{i=1}^n (y_i - \\hat{y}_i)^2 = \\frac{1}{n} (||\\mathbb{Y} - \\hat{\\mathbb{Y}}||_2)^2\\]\n\n\n13.1.3 3. Fit the model\nWe can then minimize the average loss with calculus or geometry. See the previous lecture for a derivation on the Normal Equation (\\(\\mathbb{X}^T \\mathbb{X} \\hat{\\theta} = \\mathbb{X}^T \\mathbb{Y}\\)) using geometry. We can see what the matrices look like with our new interpretation where \\(\\mathbb{X}\\) is now an \\(n\\) by \\(p\\) matrix instead of an \\(n\\) by \\(p+1\\) matrix.\n\n\n\n\n\n\n\n\n\nTo summarize:\n\n\n\n\n\n\n\n\n\n\nModel\nEstimate\nUnique?\n\n\n\n\nConstant Model + MSE\n\\(\\hat{y} = \\theta_0\\)\n\\(\\hat{\\theta}_0 = mean(y) = \\bar{y}\\)\nYes. Any set of values has a unique mean.\n\n\nConstant Model + MAE\n\\(\\hat{y} = \\theta_0\\)\n\\(\\hat{\\theta}_0 = median(y)\\)\nYes, if odd. No, if even. Return the average of the middle 2 values.\n\n\nSimple Linear Regression + MSE\n\\(\\hat{y} = \\theta_0 + \\theta_1x\\)\n\\(\\hat{\\theta}_0 = \\bar{y} - \\hat{\\theta}_1\\bar{x}\\) \\(\\hat{\\theta}_1 = r\\frac{\\sigma_y}{\\sigma_x}\\)\nYes. Any set of non-constant* values has a unique mean, SD, and correlation coefficient.\n\n\nOLS (Linear Model + MSE)\n\\(\\mathbb{\\hat{Y}} = \\mathbb{X}\\mathbb{\\theta}\\)\n\\(\\hat{\\theta} = (\\mathbb{X}^T\\mathbb{X})^{-1}\\mathbb{X}^T\\mathbb{Y}\\)\nYes, if \\(\\mathbb{X}\\) is full column rank (all columns are linearly independent, # of datapoints &gt;&gt;&gt; # of features).\n\n\n\n\n13.1.3.1 Uniqueness of a Solution\nIn most settings, the number of observations (\\(n\\)) is much greater than the number of features (\\(p\\)). Note that at least one solution always exists because intuitively, we can always draw a line of best fit for a given set of data, but there may be multiple lines that are “equally good”. (Formal proof is beyond this course.) Let’s now revisit the interpretation for uniqueness of a solution at the end of the last lecture, but with the new notation of \\(p\\) instead of \\(p+1\\) features.\nThe Least Squares estimate \\(\\hat{\\theta}\\) is unique if and only if \\(\\mathbb{X}\\) is full column rank.\n\n\n\n\n\n\nProof:\n\nWe know the solution to the normal equation \\(\\mathbb{X}^T\\mathbb{X}\\hat{\\theta} = \\mathbb{X}^T\\mathbb{Y}\\) is the least square estimate that minimizes the squared loss.\n\\(\\hat{\\theta}\\) has a unique solution \\(\\iff\\) the square matrix \\(\\mathbb{X}^T\\mathbb{X}\\) is invertible \\(\\iff\\) \\(\\mathbb{X}^T\\mathbb{X}\\) is full rank.\n\nThe column rank of a square matrix is the max number of linearly independent columns it contains.\nAn \\(n\\) x \\(n\\) square matrix is deemed full column rank when all of its columns are linearly independent. That is, its rank would be equal to \\(n\\).\n\\(\\mathbb{X}^T\\mathbb{X}\\) has shape \\(p \\times p\\), and therefore has max rank \\(p\\).\n\n\\(rank(\\mathbb{X}^T\\mathbb{X})\\) = \\(rank(\\mathbb{X})\\) (proof out of scope).\nTherefore, \\(\\mathbb{X}^T\\mathbb{X}\\) has rank \\(p\\) \\(\\iff\\) \\(\\mathbb{X}\\) has rank \\(p\\) \\(\\iff \\mathbb{X}\\) is full column rank.\n\n\n\n\nTherefore, if \\(\\mathbb{X}\\) is not full column rank, we will not have unique estimates. This can happen for two major reasons.\n\nIf our design matrix \\(\\mathbb{X}\\) is “wide”:\n\nIf n &lt; p, then we have way more features (columns) than observations (rows).\nThen \\(rank(\\mathbb{X})\\) = min(n, p) &lt; p, so \\(\\hat{\\theta}\\) is not unique.\nTypically we have n &gt;&gt; p so this is less of an issue.\n\nIf our design matrix \\(\\mathbb{X}\\) has features that are linear combinations of other features:\n\nBy definition, rank of \\(\\mathbb{X}\\) is number of linearly independent columns in \\(\\mathbb{X}\\).\nExample: If “Width”, “Height”, and “Perimeter” are all columns,\n\nPerimeter = 2 * Width + 2 * Height \\(\\rightarrow\\) \\(\\mathbb{X}\\) is not full rank.\n\nImportant with one-hot encoding (to discuss later).\n\n\nLet’s now explore how to use the normal equations with a real-world dataset in the next section.",
     "crumbs": [
       "<span class='chapter-number'>13</span>  <span class='chapter-title'>sklearn and Gradient Descent</span>"
     ]
@@ -783,8 +713,8 @@
     "objectID": "gradient_descent/gradient_descent.html#sklearn",
     "href": "gradient_descent/gradient_descent.html#sklearn",
     "title": "13  sklearn and Gradient Descent",
-    "section": "",
-    "text": "13.1.1 Implementing Derived Formulas in Code\nThroughout this lecture, we’ll refer to the penguins dataset.\n\n\nCode\nimport pandas as pd\nimport seaborn as sns\nimport numpy as np\n\npenguins = sns.load_dataset(\"penguins\")\npenguins = penguins[penguins[\"species\"] == \"Adelie\"].dropna()\npenguins.head()\n\n\n\n\n\n\n\n\n\nspecies\nisland\nbill_length_mm\nbill_depth_mm\nflipper_length_mm\nbody_mass_g\nsex\n\n\n\n\n0\nAdelie\nTorgersen\n39.1\n18.7\n181.0\n3750.0\nMale\n\n\n1\nAdelie\nTorgersen\n39.5\n17.4\n186.0\n3800.0\nFemale\n\n\n2\nAdelie\nTorgersen\n40.3\n18.0\n195.0\n3250.0\nFemale\n\n\n4\nAdelie\nTorgersen\n36.7\n19.3\n193.0\n3450.0\nFemale\n\n\n5\nAdelie\nTorgersen\n39.3\n20.6\n190.0\n3650.0\nMale\n\n\n\n\n\n\n\nOur goal will be to predict the value of the \"bill_depth_mm\" for a particular penguin given its \"flipper_length_mm\" and \"body_mass_g\". We’ll also add a bias column of all ones to represent the intercept term of our models.\n\n# Add a bias column of all ones to `penguins`\npenguins[\"bias\"] = np.ones(len(penguins), dtype=int) \n\n# Define the design matrix, X...\n# Note that we use .to_numpy() to convert our DataFrame into a NumPy array so it is in Matrix form\nX = penguins[[\"bias\", \"flipper_length_mm\", \"body_mass_g\"]].to_numpy()\n\n# ...as well as the target variable, Y\n# Again, we use .to_numpy() to convert our DataFrame into a NumPy array so it is in Matrix form\nY = penguins[[\"bill_depth_mm\"]].to_numpy()\n\nIn the lecture on ordinary least squares, we expressed multiple linear regression using matrix notation.\n\\[\\hat{\\mathbb{Y}} = \\mathbb{X}\\theta\\]\nWe used a geometric approach to derive the following expression for the optimal model parameters:\n\\[\\hat{\\theta} = (\\mathbb{X}^T \\mathbb{X})^{-1}\\mathbb{X}^T \\mathbb{Y}\\]\nThat’s a whole lot of matrix manipulation. How do we implement it in python?\nThere are three operations we need to perform here: multiplying matrices, taking transposes, and finding inverses.\n\nTo perform matrix multiplication, use the @ operator\nTo take a transpose, call the .T attribute of an NumPy array or DataFrame\nTo compute an inverse, use NumPy’s in-built method np.linalg.inv\n\nPutting this all together, we can compute the OLS estimate for the optimal model parameters, stored in the array theta_hat.\n\ntheta_hat = np.linalg.inv(X.T @ X) @ X.T @ Y\ntheta_hat\n\narray([[1.10029953e+01],\n       [9.82848689e-03],\n       [1.47749591e-03]])\n\n\nTo make predictions using our optimized parameter values, we matrix-multiply the design matrix with the parameter vector:\n\\[\\hat{\\mathbb{Y}} = \\mathbb{X}\\theta\\]\n\nY_hat = X @ theta_hat\npd.DataFrame(Y_hat).head()\n\n\n\n\n\n\n\n\n0\n\n\n\n\n0\n18.322561\n\n\n1\n18.445578\n\n\n2\n17.721412\n\n\n3\n17.997254\n\n\n4\n18.263268\n\n\n\n\n\n\n\n\n\n13.1.2 The sklearn Workflow\nWe’ve already saved a lot of time (and avoided tedious calculations) by translating our derived formulas into code. However, we still had to go through the process of writing out the linear algebra ourselves.\nTo make life even easier, we can turn to the sklearn python library. sklearn is a robust library of machine learning tools used extensively in research and industry. It is the standard for simple machine learning tasks and gives us a wide variety of in-built modeling frameworks and methods, so we’ll keep returning to sklearn techniques as we progress through Data 100.\nRegardless of the specific type of model being implemented, sklearn follows a standard set of steps for creating a model:\n\nImport the LinearRegression model from sklearn\nfrom sklearn.linear_model import LinearRegression\nCreate a model object. This generates a new instance of the model class. You can think of it as making a new “copy” of a standard “template” for a model. In code, this looks like:\nmy_model = LinearRegression()\nFit the model to the X design matrix and Y target vector. This calculates the optimal model parameters “behind the scenes” without us explicitly working through the calculations ourselves. The fitted parameters are then stored within the model for use in future predictions:\nmy_model.fit(X, Y)\nUse the fitted model to make predictions on the X input data using .predict.\nmy_model.predict(X)\n\nTo extract the fitted parameters, we can use:\nmy_model.coef_\n\nmy_model.intercept_\nLet’s put this into action with our multiple regression task!\n1. Initialize an instance of the model class\nsklearn stores “templates” of useful models for machine learning. We begin the modeling process by making a “copy” of one of these templates for our own use. Model initialization looks like ModelClass(), where ModelClass is the type of model we wish to create.\nFor now, let’s create a linear regression model using LinearRegression.\nmy_model is now an instance of the LinearRegression class. You can think of it as the “idea” of a linear regression model. We haven’t trained it yet, so it doesn’t know any model parameters and cannot be used to make predictions. In fact, we haven’t even told it what data to use for modeling! It simply waits for further instructions.\n\nmy_model = LinearRegression()\n\n2. Train the model using .fit\nBefore the model can make predictions, we will need to fit it to our training data. When we fit the model, sklearn will run gradient descent behind the scenes to determine the optimal model parameters. It will then save these model parameters to our model instance for future use.\nAll sklearn model classes include a .fit method, which is used to fit the model. It takes in two inputs: the design matrix, X, and the target variable, Y.\nLet’s start by fitting a model with just one feature: the flipper length. We create a design matrix X by pulling out the \"flipper_length_mm\" column from the DataFrame.\n\n# .fit expects a 2D data design matrix, so we use double brackets to extract a DataFrame\nX = penguins[[\"flipper_length_mm\"]]\nY = penguins[\"bill_depth_mm\"]\n\nmy_model.fit(X, Y)\n\nLinearRegression()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.  LinearRegression?Documentation for LinearRegressioniFittedLinearRegression() \n\n\nNotice that we use double brackets to extract this column. Why double brackets instead of just single brackets? The .fit method, by default, expects to receive 2-dimensional data – some kind of data that includes both rows and columns. Writing penguins[\"flipper_length_mm\"] would return a 1D Series, causing sklearn to error. We avoid this by writing penguins[[\"flipper_length_mm\"]] to produce a 2D DataFrame.\nAnd in just three lines of code, our model has run gradient descent to determine the optimal model parameters! Our single-feature model takes the form:\n\\[\\text{bill depth} = \\theta_0 + \\theta_1 \\text{flipper length}\\]\nNote that LinearRegression will automatically include an intercept term.\nThe fitted model parameters are stored as attributes of the model instance. my_model.intercept_ will return the value of \\(\\hat{\\theta}_0\\) as a scalar. my_model.coef_ will return all values \\(\\hat{\\theta}_1,\n\\hat{\\theta}_1, ...\\) in an array. Because our model only contains one feature, we see just the value of \\(\\hat{\\theta}_1\\) in the cell below.\n\n# The intercept term, theta_0\nmy_model.intercept_\n\nnp.float64(7.297305899612313)\n\n\n\n# All parameters theta_1, ..., theta_p\nmy_model.coef_\n\narray([0.05812622])\n\n\n3. Use the fitted model to make predictions\nNow that the model has been trained, we can use it to make predictions! To do so, we use the .predict method. .predict takes in one argument: the design matrix that should be used to generate predictions. To understand how the model performs on the training set, we would pass in the training data. Alternatively, to make predictions on unseen data, we would pass in a new dataset that wasn’t used to train the model.\nBelow, we call .predict to generate model predictions on the original training data. As before, we use double brackets to ensure that we extract 2-dimensional data.\n\nY_hat_one_feature = my_model.predict(penguins[[\"flipper_length_mm\"]])\n\nprint(f\"The RMSE of the model is {np.sqrt(np.mean((Y-Y_hat_one_feature)**2))}\")\n\nThe RMSE of the model is 1.154936309923901\n\n\nWhat if we wanted a model with two features?\n\\[\\text{bill depth} = \\theta_0 + \\theta_1 \\text{flipper length} + \\theta_2 \\text{body mass}\\]\nWe repeat this three-step process by intializing a new model object, then calling .fit and .predict as before.\n\n# Step 1: initialize LinearRegression model\ntwo_feature_model = LinearRegression()\n\n# Step 2: fit the model\nX_two_features = penguins[[\"flipper_length_mm\", \"body_mass_g\"]]\nY = penguins[\"bill_depth_mm\"]\n\ntwo_feature_model.fit(X_two_features, Y)\n\n# Step 3: make predictions\nY_hat_two_features = two_feature_model.predict(X_two_features)\n\nprint(f\"The RMSE of the model is {np.sqrt(np.mean((Y-Y_hat_two_features)**2))}\")\n\nThe RMSE of the model is 0.9881331104079043\n\n\nWe can also see that we obtain the same predictions using sklearn as we did when applying the ordinary least squares formula before!\n\n\nCode\npd.DataFrame({\"Y_hat from OLS\":np.squeeze(Y_hat), \"Y_hat from sklearn\":Y_hat_two_features}).head()\n\n\n\n\n\n\n\n\n\nY_hat from OLS\nY_hat from sklearn\n\n\n\n\n0\n18.322561\n18.322561\n\n\n1\n18.445578\n18.445578\n\n\n2\n17.721412\n17.721412\n\n\n3\n17.997254\n17.997254\n\n\n4\n18.263268\n18.263268",
+    "section": "13.2 sklearn",
+    "text": "13.2 sklearn\n\n13.2.1 Implementing Derived Formulas in Code\nThroughout this lecture, we’ll refer to the penguins dataset.\n\n\nCode\nimport pandas as pd\nimport seaborn as sns\nimport numpy as np\n\npenguins = sns.load_dataset(\"penguins\")\npenguins = penguins[penguins[\"species\"] == \"Adelie\"].dropna()\npenguins.head()\n\n\n\n\n\n\n\n\n\nspecies\nisland\nbill_length_mm\nbill_depth_mm\nflipper_length_mm\nbody_mass_g\nsex\n\n\n\n\n0\nAdelie\nTorgersen\n39.1\n18.7\n181.0\n3750.0\nMale\n\n\n1\nAdelie\nTorgersen\n39.5\n17.4\n186.0\n3800.0\nFemale\n\n\n2\nAdelie\nTorgersen\n40.3\n18.0\n195.0\n3250.0\nFemale\n\n\n4\nAdelie\nTorgersen\n36.7\n19.3\n193.0\n3450.0\nFemale\n\n\n5\nAdelie\nTorgersen\n39.3\n20.6\n190.0\n3650.0\nMale\n\n\n\n\n\n\n\nOur goal will be to predict the value of the \"bill_depth_mm\" for a particular penguin given its \"flipper_length_mm\" and \"body_mass_g\". We’ll also add a bias column of all ones to represent the intercept term of our models.\n\n# Add a bias column of all ones to `penguins`\npenguins[\"bias\"] = np.ones(len(penguins), dtype=int) \n\n# Define the design matrix, X...\n# Note that we use .to_numpy() to convert our DataFrame into a NumPy array so it is in Matrix form\nX = penguins[[\"bias\", \"flipper_length_mm\", \"body_mass_g\"]].to_numpy()\n\n# ...as well as the target variable, Y\n# Again, we use .to_numpy() to convert our DataFrame into a NumPy array so it is in Matrix form\nY = penguins[[\"bill_depth_mm\"]].to_numpy()\n\nIn the lecture on ordinary least squares, we expressed multiple linear regression using matrix notation.\n\\[\\hat{\\mathbb{Y}} = \\mathbb{X}\\theta\\]\nWe used a geometric approach to derive the following expression for the optimal model parameters:\n\\[\\hat{\\theta} = (\\mathbb{X}^T \\mathbb{X})^{-1}\\mathbb{X}^T \\mathbb{Y}\\]\nThat’s a whole lot of matrix manipulation. How do we implement it in python?\nThere are three operations we need to perform here: multiplying matrices, taking transposes, and finding inverses.\n\nTo perform matrix multiplication, use the @ operator\nTo take a transpose, call the .T attribute of an NumPy array or DataFrame\nTo compute an inverse, use NumPy’s in-built method np.linalg.inv\n\nPutting this all together, we can compute the OLS estimate for the optimal model parameters, stored in the array theta_hat.\n\ntheta_hat = np.linalg.inv(X.T @ X) @ X.T @ Y\ntheta_hat\n\narray([[1.10029953e+01],\n       [9.82848689e-03],\n       [1.47749591e-03]])\n\n\nTo make predictions using our optimized parameter values, we matrix-multiply the design matrix with the parameter vector:\n\\[\\hat{\\mathbb{Y}} = \\mathbb{X}\\theta\\]\n\nY_hat = X @ theta_hat\npd.DataFrame(Y_hat).head()\n\n\n\n\n\n\n\n\n0\n\n\n\n\n0\n18.322561\n\n\n1\n18.445578\n\n\n2\n17.721412\n\n\n3\n17.997254\n\n\n4\n18.263268\n\n\n\n\n\n\n\n\n\n13.2.2 The sklearn Workflow\nWe’ve already saved a lot of time (and avoided tedious calculations) by translating our derived formulas into code. However, we still had to go through the process of writing out the linear algebra ourselves.\nTo make life even easier, we can turn to the sklearn python library. sklearn is a robust library of machine learning tools used extensively in research and industry. It is the standard for simple machine learning tasks and gives us a wide variety of in-built modeling frameworks and methods, so we’ll keep returning to sklearn techniques as we progress through Data 100.\nRegardless of the specific type of model being implemented, sklearn follows a standard set of steps for creating a model:\n\nImport the LinearRegression model from sklearn\nfrom sklearn.linear_model import LinearRegression\nCreate a model object. This generates a new instance of the model class. You can think of it as making a new “copy” of a standard “template” for a model. In code, this looks like:\nmy_model = LinearRegression()\nFit the model to the X design matrix and Y target vector. This calculates the optimal model parameters “behind the scenes” without us explicitly working through the calculations ourselves. The fitted parameters are then stored within the model for use in future predictions:\nmy_model.fit(X, Y)\nUse the fitted model to make predictions on the X input data using .predict.\nmy_model.predict(X)\n\nTo extract the fitted parameters, we can use:\nmy_model.coef_\n\nmy_model.intercept_\nLet’s put this into action with our multiple regression task!\n1. Initialize an instance of the model class\nsklearn stores “templates” of useful models for machine learning. We begin the modeling process by making a “copy” of one of these templates for our own use. Model initialization looks like ModelClass(), where ModelClass is the type of model we wish to create.\nFor now, let’s create a linear regression model using LinearRegression.\nmy_model is now an instance of the LinearRegression class. You can think of it as the “idea” of a linear regression model. We haven’t trained it yet, so it doesn’t know any model parameters and cannot be used to make predictions. In fact, we haven’t even told it what data to use for modeling! It simply waits for further instructions.\n\nmy_model = LinearRegression()\n\n2. Train the model using .fit\nBefore the model can make predictions, we will need to fit it to our training data. When we fit the model, sklearn will run gradient descent behind the scenes to determine the optimal model parameters. It will then save these model parameters to our model instance for future use.\nAll sklearn model classes include a .fit method, which is used to fit the model. It takes in two inputs: the design matrix, X, and the target variable, Y.\nLet’s start by fitting a model with just one feature: the flipper length. We create a design matrix X by pulling out the \"flipper_length_mm\" column from the DataFrame.\n\n# .fit expects a 2D data design matrix, so we use double brackets to extract a DataFrame\nX = penguins[[\"flipper_length_mm\"]]\nY = penguins[\"bill_depth_mm\"]\n\nmy_model.fit(X, Y)\n\nLinearRegression()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.  LinearRegression?Documentation for LinearRegressioniFittedLinearRegression() \n\n\nNotice that we use double brackets to extract this column. Why double brackets instead of just single brackets? The .fit method, by default, expects to receive 2-dimensional data – some kind of data that includes both rows and columns. Writing penguins[\"flipper_length_mm\"] would return a 1D Series, causing sklearn to error. We avoid this by writing penguins[[\"flipper_length_mm\"]] to produce a 2D DataFrame.\nAnd in just three lines of code, our model has run gradient descent to determine the optimal model parameters! Our single-feature model takes the form:\n\\[\\text{bill depth} = \\theta_0 + \\theta_1 \\text{flipper length}\\]\nNote that LinearRegression will automatically include an intercept term.\nThe fitted model parameters are stored as attributes of the model instance. my_model.intercept_ will return the value of \\(\\hat{\\theta}_0\\) as a scalar. my_model.coef_ will return all values \\(\\hat{\\theta}_1,\n\\hat{\\theta}_1, ...\\) in an array. Because our model only contains one feature, we see just the value of \\(\\hat{\\theta}_1\\) in the cell below.\n\n# The intercept term, theta_0\nmy_model.intercept_\n\nnp.float64(7.297305899612313)\n\n\n\n# All parameters theta_1, ..., theta_p\nmy_model.coef_\n\narray([0.05812622])\n\n\n3. Use the fitted model to make predictions\nNow that the model has been trained, we can use it to make predictions! To do so, we use the .predict method. .predict takes in one argument: the design matrix that should be used to generate predictions. To understand how the model performs on the training set, we would pass in the training data. Alternatively, to make predictions on unseen data, we would pass in a new dataset that wasn’t used to train the model.\nBelow, we call .predict to generate model predictions on the original training data. As before, we use double brackets to ensure that we extract 2-dimensional data.\n\nY_hat_one_feature = my_model.predict(penguins[[\"flipper_length_mm\"]])\n\nprint(f\"The RMSE of the model is {np.sqrt(np.mean((Y-Y_hat_one_feature)**2))}\")\n\nThe RMSE of the model is 1.154936309923901\n\n\nWhat if we wanted a model with two features?\n\\[\\text{bill depth} = \\theta_0 + \\theta_1 \\text{flipper length} + \\theta_2 \\text{body mass}\\]\nWe repeat this three-step process by intializing a new model object, then calling .fit and .predict as before.\n\n# Step 1: initialize LinearRegression model\ntwo_feature_model = LinearRegression()\n\n# Step 2: fit the model\nX_two_features = penguins[[\"flipper_length_mm\", \"body_mass_g\"]]\nY = penguins[\"bill_depth_mm\"]\n\ntwo_feature_model.fit(X_two_features, Y)\n\n# Step 3: make predictions\nY_hat_two_features = two_feature_model.predict(X_two_features)\n\nprint(f\"The RMSE of the model is {np.sqrt(np.mean((Y-Y_hat_two_features)**2))}\")\n\nThe RMSE of the model is 0.9881331104079043\n\n\nWe can also see that we obtain the same predictions using sklearn as we did when applying the ordinary least squares formula before!\n\n\nCode\npd.DataFrame({\"Y_hat from OLS\":np.squeeze(Y_hat), \"Y_hat from sklearn\":Y_hat_two_features}).head()\n\n\n\n\n\n\n\n\n\nY_hat from OLS\nY_hat from sklearn\n\n\n\n\n0\n18.322561\n18.322561\n\n\n1\n18.445578\n18.445578\n\n\n2\n17.721412\n17.721412\n\n\n3\n17.997254\n17.997254\n\n\n4\n18.263268\n18.263268",
     "crumbs": [
       "<span class='chapter-number'>13</span>  <span class='chapter-title'>sklearn and Gradient Descent</span>"
     ]
@@ -793,8 +723,8 @@
     "objectID": "gradient_descent/gradient_descent.html#gradient-descent",
     "href": "gradient_descent/gradient_descent.html#gradient-descent",
     "title": "13  sklearn and Gradient Descent",
-    "section": "13.2 Gradient Descent",
-    "text": "13.2 Gradient Descent\nAt this point, we’ve grown quite familiar with the process of choosing a model and a corresponding loss function and optimizing parameters by choosing the values of \\(\\theta\\) that minimize the loss function. So far, we’ve optimized \\(\\theta\\) by\n\nUsing calculus to take the derivative of the loss function with respect to \\(\\theta\\), setting it equal to 0, and solving for \\(\\theta\\).\nUsing the geometric argument of orthogonality to derive the OLS solution \\(\\hat{\\theta} = (\\mathbb{X}^T \\mathbb{X})^{-1}\\mathbb{X}^T \\mathbb{Y}\\).\n\nOne thing to note, however, is that the techniques we used above can only be applied if we make some big assumptions. For the calculus approach, we assumed that the loss function was differentiable at all points and that we could algebraically solve for the zero points of the derivative; for the geometric approach, OLS only applies when using a linear model with MSE loss. What happens when we have more complex models with different, more complex loss functions? The techniques we’ve learned so far will not work, so we need a new optimization technique: gradient descent.\n\nBIG IDEA: use an iterative algorithm to numerically compute the minimum of the loss.\n\n\n13.2.1 Minimizing an Arbitrary 1D Function\nLet’s consider an arbitrary function. Our goal is to find the value of \\(x\\) that minimizes this function.\n\ndef arbitrary(x):\n    return (x**4 - 15*x**3 + 80*x**2 - 180*x + 144)/10\n\n\n\n13.2.1.1 The Naive Approach: Guess and Check\nAbove, we saw that the minimum is somewhere around 5.3. Let’s see if we can figure out how to find the exact minimum algorithmically from scratch. One very slow (and terrible) way would be manual guess-and-check.\n\narbitrary(6)\n\n0.0\n\n\nA somewhat better (but still slow) approach is to use brute force to try out a bunch of x values and return the one that yields the lowest loss.\n\ndef simple_minimize(f, xs):\n    # Takes in a function f and a set of values xs. \n    # Calculates the value of the function f at all values x in xs\n    # Takes the minimum value of f(x) and returns the corresponding value x \n    y = [f(x) for x in xs]  \n    return xs[np.argmin(y)]\n\nguesses = [5.3, 5.31, 5.32, 5.33, 5.34, 5.35]\nsimple_minimize(arbitrary, guesses)\n\n5.33\n\n\nThis process is essentially the same as before where we made a graphical plot, it’s just that we’re only looking at 20 selected points.\n\n\nCode\nxs = np.linspace(1, 7, 200)\nsparse_xs = np.linspace(1, 7, 5)\n\nys = arbitrary(xs)\nsparse_ys = arbitrary(sparse_xs)\n\nfig = px.line(x = xs, y = arbitrary(xs))\nfig.add_scatter(x = sparse_xs, y = arbitrary(sparse_xs), mode = \"markers\")\nfig.update_layout(showlegend= False)\nfig.update_layout(autosize=False, width=800, height=600)\nfig.show()\n\n\n                                                \n\n\nThis basic approach suffers from three major flaws:\n\nIf the minimum is outside our range of guesses, the answer will be completely wrong.\nEven if our range of guesses is correct, if the guesses are too coarse, our answer will be inaccurate.\nIt is very computationally inefficient, considering potentially vast numbers of guesses that are useless.\n\n\n\n13.2.1.2 Scipy.optimize.minimize\nOne way to minimize this mathematical function is to use the scipy.optimize.minimize function. It takes a function and a starting guess and tries to find the minimum.\n\nfrom scipy.optimize import minimize\n\n# takes a function f and a starting point x0 and returns a readout \n# with the optimal input value of x which minimizes f\nminimize(arbitrary, x0 = 3.5)\n\n  message: Optimization terminated successfully.\n  success: True\n   status: 0\n      fun: -0.13827491292966557\n        x: [ 2.393e+00]\n      nit: 3\n      jac: [ 6.486e-06]\n hess_inv: [[ 7.385e-01]]\n     nfev: 20\n     njev: 10\n\n\nscipy.optimize.minimize is great. It may also seem a bit magical. How could you write a function that can find the minimum of any mathematical function? There are a number of ways to do this, which we’ll explore in today’s lecture, eventually arriving at the important idea of gradient descent, which is the principle that scipy.optimize.minimize uses.\nIt turns out that under the hood, the fit method for LinearRegression models uses gradient descent. Gradient descent is also how much of machine learning works, including even advanced neural network models.\nIn Data 100, the gradient descent process will usually be invisible to us, hidden beneath an abstraction layer. However, to be good data scientists, it’s important that we know the underlying principles that optimization functions harness to find optimal parameters.\n\n\n13.2.1.3 Digging into Gradient Descent\nLooking at the function across this domain, it is clear that the function’s minimum value occurs around \\(\\theta = 5.3\\). Let’s pretend for a moment that we couldn’t see the full view of the cost function. How would we guess the value of \\(\\theta\\) that minimizes the function?\nIt turns out that the first derivative of the function can give us a clue. In the graph below, the function and its derivative are plotted, with points where the derivative is equal to 0 plotted in light green.\n\n\nCode\nimport plotly.graph_objects as go\n\ndef derivative_arbitrary(x):\n    return (4*x**3 - 45*x**2 + 160*x - 180)/10\n\nfig = go.Figure()\nroots = np.array([2.3927, 3.5309, 5.3263])\n\nfig.add_trace(go.Scatter(x = xs, y = arbitrary(xs), \n                         mode = \"lines\", name = \"f\"))\nfig.add_trace(go.Scatter(x = xs, y = derivative_arbitrary(xs), \n                         mode = \"lines\", name = \"df\", line = {\"dash\": \"dash\"}))\nfig.add_trace(go.Scatter(x = np.array(roots), y = 0*roots, \n                         mode = \"markers\", name = \"df = zero\", marker_size = 12))\nfig.update_layout(font_size = 20, yaxis_range=[-1, 3])\nfig.update_layout(autosize=False, width=800, height=600)\nfig.show()\n\n\n                                                \n\n\nIn the plots below, the line indicates the value of the derivative of each value of \\(\\theta\\). The derivative is negative where it is red and positive where it is green.\nSay we make a guess for the minimizing value of \\(\\theta\\). Remember that we read plots from left to right, and assume that our starting \\(\\theta\\) value is to the left of the optimal \\(\\hat{\\theta}\\). If the guess “undershoots” the true minimizing value – our guess for \\(\\theta\\) is lower than the value of the \\(\\hat{\\theta}\\) that minimizes the function – the derivative will be negative. This means that if we increase \\(\\theta\\) (move further to the right), then we can decrease our loss function further. If this guess “overshoots” the true minimizing value, the derivative will be positive, implying the converse.\n\n\n\n\n\n\n\n\n\nWe can use this pattern to help formulate our next guess for the optimal \\(\\hat{\\theta}\\). Consider the case where we’ve undershot \\(\\theta\\) by guessing too low of a value. We’ll want our next guess to be greater in value than our previous guess – that is, we want to shift our guess to the right. You can think of this as following the slope “downhill” to the function’s minimum value.\n\n\n\n\n\n\n\n\n\nIf we’ve overshot \\(\\hat{\\theta}\\) by guessing too high of a value, we’ll want our next guess to be lower in value – we want to shift our guess for \\(\\hat{\\theta}\\) to the left.\n\n\n\n\n\n\n\n\n\nIn other words, the derivative of the function at each point tells us the direction of our next guess.\n\nA negative slope means we want to step to the right, or move in the positive direction.\nA positive slope means we want to step to the left, or move in the negative direction.\n\n\n\n13.2.1.4 Algorithm Attempt 1\nArmed with this knowledge, let’s try to see if we can use the derivative to optimize the function.\nWe start by making some guess for the minimizing value of \\(x\\). Then, we look at the derivative of the function at this value of \\(x\\), and step downhill in the opposite direction. We can express our new rule as a recurrence relation:\n\\[x^{(t+1)} = x^{(t)} - \\frac{d}{dx} f(x^{(t)})\\]\nTranslating this statement into English: we obtain our next guess for the minimizing value of \\(x\\) at timestep \\(t+1\\) (\\(x^{(t+1)}\\)) by taking our last guess (\\(x^{(t)}\\)) and subtracting the derivative of the function at that point (\\(\\frac{d}{dx} f(x^{(t)})\\)).\nA few steps are shown below, where the old step is shown as a transparent point, and the next step taken is the green-filled dot.\n\n\n\n\n\n\n\n\n\nLooking pretty good! We do have a problem though – once we arrive close to the minimum value of the function, our guesses “bounce” back and forth past the minimum without ever reaching it.\n\n\n\n\n\n\n\n\n\nIn other words, each step we take when updating our guess moves us too far. We can address this by decreasing the size of each step.\n\n\n13.2.1.5 Algorithm Attempt 2\nLet’s update our algorithm to use a learning rate (also sometimes called the step size), which controls how far we move with each update. We represent the learning rate with \\(\\alpha\\).\n\\[x^{(t+1)} = x^{(t)} - \\alpha \\frac{d}{dx} f(x^{(t)})\\]\nA small \\(\\alpha\\) means that we will take small steps; a large \\(\\alpha\\) means we will take large steps. When do we stop updating? We stop updating either after a fixed number of updates or after a subsequent update doesn’t change much.\nUpdating our function to use \\(\\alpha=0.3\\), our algorithm successfully converges (settles on a solution and stops updating significantly, or at all) on the minimum value.\n\n\n\n\n\n\n\n\n\n\n\n\n13.2.2 Convexity\nIn our analysis above, we focused our attention on the global minimum of the loss function. You may be wondering: what about the local minimum that’s just to the left?\nIf we had chosen a different starting guess for \\(\\theta\\), or a different value for the learning rate \\(\\alpha\\), our algorithm may have gotten “stuck” and converged on the local minimum, rather than on the true optimum value of loss.\n\n\n\n\n\n\n\n\n\nIf the loss function is convex, gradient descent is guaranteed to converge and find the global minimum of the objective function. Formally, a function \\(f\\) is convex if: \\[tf(a) + (1-t)f(b) \\geq f(ta + (1-t)b)\\] for all \\(a, b\\) in the domain of \\(f\\) and \\(t \\in [0, 1]\\).\nTo put this into words: if you drew a line between any two points on the curve, all values on the curve must be on or below the line. Importantly, any local minimum of a convex function is also its global minimum so we avoid the situation where the algorithm converges on some critical point that is not the minimum of the function.\n\n\n\n\n\n\n\n\n\nIn summary, non-convex loss functions can cause problems with optimization. This means that our choice of loss function is a key factor in our modeling process. It turns out that MSE is convex, which is a major reason why it is such a popular choice of loss function. Gradient descent is only guaranteed to converge (given enough iterations and an appropriate step size) for convex functions.\n\n\n13.2.3 Gradient Descent in 1 Dimension\n\nTerminology clarification: In past lectures, we have used “loss” to refer to the error incurred on a single datapoint. In applications, we usually care more about the average error across all datapoints. Going forward, we will take the “model’s loss” to mean the model’s average error across the dataset. This is sometimes also known as the empirical risk (R), cost function, or objective function. \\[L(\\theta) = R(\\theta) = \\frac{1}{n} \\sum_{i=1}^{n} L(y, \\hat{y})\\]\n\nIn our discussion above, we worked with some arbitrary function \\(f\\). As data scientists, we will almost always work with gradient descent in the context of optimizing models – specifically, we want to apply gradient descent to find the minimum of a loss function. In a modeling context, our goal is to minimize a loss function by choosing the minimizing model parameters.\nRecall our modeling workflow from the past few lectures:\n\nDefine a model with some parameters \\(\\theta_i\\)\nChoose a loss function\nSelect the values of \\(\\theta_i\\) that minimize the loss function on the data\n\nGradient descent is a powerful technique for completing this last task. By applying the gradient descent algorithm, we can select values for our parameters \\(\\theta_i\\) that will lead to the model having minimal loss on the training data.\nWhen using gradient descent in a modeling context, we:\n\nMake guesses for the minimizing \\(\\theta_i\\)\nCompute the derivative of the loss function \\(L\\)\n\nWe can “translate” our gradient descent rule from before by replacing \\(x\\) with \\(\\theta\\) and \\(f\\) with \\(L\\):\n\\[\\theta^{(t+1)} = \\theta^{(t)} - \\alpha \\frac{d}{d\\theta} L(\\theta^{(t)})\\]\n\n13.2.3.1 Gradient Descent on the tips Dataset\nTo see this in action, let’s consider a case where we have a linear model with no offset. We want to predict the tip (y) given the price of a meal (x). To do this, we\n\nChoose a model: \\(\\hat{y} = \\theta_1 x\\),\nChoose a loss function: \\(L(\\theta) = MSE(\\theta) = \\frac{1}{n} \\sum_{i=1}^n (y_i - \\theta_1x_i)^2\\).\n\nLet’s apply our gradient_descent function from before to optimize our model on the tips dataset. We will try to select the best parameter \\(\\theta_i\\) to predict the tip \\(y\\) from the total_bill \\(x\\).\n\ndf = sns.load_dataset(\"tips\")\ndf.head()\n\n\n\n\n\n\n\n\ntotal_bill\ntip\nsex\nsmoker\nday\ntime\nsize\n\n\n\n\n0\n16.99\n1.01\nFemale\nNo\nSun\nDinner\n2\n\n\n1\n10.34\n1.66\nMale\nNo\nSun\nDinner\n3\n\n\n2\n21.01\n3.50\nMale\nNo\nSun\nDinner\n3\n\n\n3\n23.68\n3.31\nMale\nNo\nSun\nDinner\n2\n\n\n4\n24.59\n3.61\nFemale\nNo\nSun\nDinner\n4\n\n\n\n\n\n\n\nWe can visualize the value of the MSE on our dataset for different possible choices of \\(\\theta_1\\). To optimize our model, we want to select the value of \\(\\theta_1\\) that leads to the lowest MSE.\nTo apply gradient descent, we need to compute the derivative of the loss function with respect to our parameter \\(\\theta_1\\).\n\nGiven our loss function, \\[L(\\theta) = MSE(\\theta) = \\frac{1}{n} \\sum_{i=1}^n (y_i - \\theta_1x_i)^2\\]\nWe take the derivative with respect to \\(\\theta_1\\) \\[\\frac{\\partial}{\\partial \\theta_{1}} L(\\theta_1^{(t)}) = \\frac{-2}{n} \\sum_{i=1}^n (y_i - \\theta_1^{(t)} x_i) x_i\\]\nWhich results in the gradient descent update rule \\[\\theta_1^{(t+1)} = \\theta_1^{(t)} - \\alpha \\frac{d}{d\\theta}L(\\theta_1^{(t)})\\]\n\nfor some learning rate \\(\\alpha\\).\nImplementing this in code, we can visualize the MSE loss on the tips data. MSE is convex, so there is one global minimum.\n\n\nCode\ndef gradient_descent(df, initial_guess, alpha, n):\n    \"\"\"Performs n steps of gradient descent on df using learning rate alpha starting\n       from initial_guess. Returns a numpy array of all guesses over time.\"\"\"\n    guesses = [initial_guess]\n    current_guess = initial_guess\n    while len(guesses) &lt; n:\n        current_guess = current_guess - alpha * df(current_guess)\n        guesses.append(current_guess)\n        \n    return np.array(guesses)\n\ndef mse_single_arg(theta_1):\n    \"\"\"Returns the MSE on our data for the given theta1\"\"\"\n    x = df[\"total_bill\"]\n    y_obs = df[\"tip\"]\n    y_hat = theta_1 * x\n    return np.mean((y_hat - y_obs) ** 2)\n\ndef mse_loss_derivative_single_arg(theta_1):\n    \"\"\"Returns the derivative of the MSE on our data for the given theta1\"\"\"\n    x = df[\"total_bill\"]\n    y_obs = df[\"tip\"]\n    y_hat = theta_1 * x\n    \n    return np.mean(2 * (y_hat - y_obs) * x)\n\nloss_df = pd.DataFrame({\"theta_1\":np.linspace(-1.5, 1), \"MSE\":[mse_single_arg(theta_1) for theta_1 in np.linspace(-1.5, 1)]})\n\ntrajectory = gradient_descent(mse_loss_derivative_single_arg, -0.5, 0.0001, 100)\n\nplt.plot(loss_df[\"theta_1\"], loss_df[\"MSE\"])\nplt.scatter(trajectory, [mse_single_arg(guess) for guess in trajectory], c=\"white\", edgecolor=\"firebrick\")\nplt.scatter(trajectory[-1], mse_single_arg(trajectory[-1]), c=\"firebrick\")\nplt.xlabel(r\"$\\theta_1$\")\nplt.ylabel(r\"$L(\\theta_1)$\");\n\nprint(f\"Final guess for theta_1: {trajectory[-1]}\")\n\n\nFinal guess for theta_1: 0.14369554654231262",
+    "section": "13.3 Gradient Descent",
+    "text": "13.3 Gradient Descent\nAt this point, we’ve grown quite familiar with the process of choosing a model and a corresponding loss function and optimizing parameters by choosing the values of \\(\\theta\\) that minimize the loss function. So far, we’ve optimized \\(\\theta\\) by\n\nUsing calculus to take the derivative of the loss function with respect to \\(\\theta\\), setting it equal to 0, and solving for \\(\\theta\\).\nUsing the geometric argument of orthogonality to derive the OLS solution \\(\\hat{\\theta} = (\\mathbb{X}^T \\mathbb{X})^{-1}\\mathbb{X}^T \\mathbb{Y}\\).\n\nOne thing to note, however, is that the techniques we used above can only be applied if we make some big assumptions. For the calculus approach, we assumed that the loss function was differentiable at all points and that we could algebraically solve for the zero points of the derivative; for the geometric approach, OLS only applies when using a linear model with MSE loss. What happens when we have more complex models with different, more complex loss functions? The techniques we’ve learned so far will not work, so we need a new optimization technique: gradient descent.\n\nBIG IDEA: use an iterative algorithm to numerically compute the minimum of the loss.\n\n\n13.3.1 Minimizing an Arbitrary 1D Function\nLet’s consider an arbitrary function. Our goal is to find the value of \\(x\\) that minimizes this function.\n\ndef arbitrary(x):\n    return (x**4 - 15*x**3 + 80*x**2 - 180*x + 144)/10\n\n\n\n13.3.1.1 The Naive Approach: Guess and Check\nAbove, we saw that the minimum is somewhere around 5.3. Let’s see if we can figure out how to find the exact minimum algorithmically from scratch. One very slow (and terrible) way would be manual guess-and-check.\n\narbitrary(6)\n\n0.0\n\n\nA somewhat better (but still slow) approach is to use brute force to try out a bunch of x values and return the one that yields the lowest loss.\n\ndef simple_minimize(f, xs):\n    # Takes in a function f and a set of values xs. \n    # Calculates the value of the function f at all values x in xs\n    # Takes the minimum value of f(x) and returns the corresponding value x \n    y = [f(x) for x in xs]  \n    return xs[np.argmin(y)]\n\nguesses = [5.3, 5.31, 5.32, 5.33, 5.34, 5.35]\nsimple_minimize(arbitrary, guesses)\n\n5.33\n\n\nThis process is essentially the same as before where we made a graphical plot, it’s just that we’re only looking at 20 selected points.\n\n\nCode\nxs = np.linspace(1, 7, 200)\nsparse_xs = np.linspace(1, 7, 5)\n\nys = arbitrary(xs)\nsparse_ys = arbitrary(sparse_xs)\n\nfig = px.line(x = xs, y = arbitrary(xs))\nfig.add_scatter(x = sparse_xs, y = arbitrary(sparse_xs), mode = \"markers\")\nfig.update_layout(showlegend= False)\nfig.update_layout(autosize=False, width=800, height=600)\nfig.show()\n\n\n                                                \n\n\nThis basic approach suffers from three major flaws:\n\nIf the minimum is outside our range of guesses, the answer will be completely wrong.\nEven if our range of guesses is correct, if the guesses are too coarse, our answer will be inaccurate.\nIt is very computationally inefficient, considering potentially vast numbers of guesses that are useless.\n\n\n\n13.3.1.2 Scipy.optimize.minimize\nOne way to minimize this mathematical function is to use the scipy.optimize.minimize function. It takes a function and a starting guess and tries to find the minimum.\n\nfrom scipy.optimize import minimize\n\n# takes a function f and a starting point x0 and returns a readout \n# with the optimal input value of x which minimizes f\nminimize(arbitrary, x0 = 3.5)\n\n  message: Optimization terminated successfully.\n  success: True\n   status: 0\n      fun: -0.13827491292966557\n        x: [ 2.393e+00]\n      nit: 3\n      jac: [ 6.486e-06]\n hess_inv: [[ 7.385e-01]]\n     nfev: 20\n     njev: 10\n\n\nscipy.optimize.minimize is great. It may also seem a bit magical. How could you write a function that can find the minimum of any mathematical function? There are a number of ways to do this, which we’ll explore in today’s lecture, eventually arriving at the important idea of gradient descent, which is the principle that scipy.optimize.minimize uses.\nIt turns out that under the hood, the fit method for LinearRegression models uses gradient descent. Gradient descent is also how much of machine learning works, including even advanced neural network models.\nIn Data 100, the gradient descent process will usually be invisible to us, hidden beneath an abstraction layer. However, to be good data scientists, it’s important that we know the underlying principles that optimization functions harness to find optimal parameters.\n\n\n13.3.1.3 Digging into Gradient Descent\nLooking at the function across this domain, it is clear that the function’s minimum value occurs around \\(\\theta = 5.3\\). Let’s pretend for a moment that we couldn’t see the full view of the cost function. How would we guess the value of \\(\\theta\\) that minimizes the function?\nIt turns out that the first derivative of the function can give us a clue. In the graph below, the function and its derivative are plotted, with points where the derivative is equal to 0 plotted in light green.\n\n\nCode\nimport plotly.graph_objects as go\n\ndef derivative_arbitrary(x):\n    return (4*x**3 - 45*x**2 + 160*x - 180)/10\n\nfig = go.Figure()\nroots = np.array([2.3927, 3.5309, 5.3263])\n\nfig.add_trace(go.Scatter(x = xs, y = arbitrary(xs), \n                         mode = \"lines\", name = \"f\"))\nfig.add_trace(go.Scatter(x = xs, y = derivative_arbitrary(xs), \n                         mode = \"lines\", name = \"df\", line = {\"dash\": \"dash\"}))\nfig.add_trace(go.Scatter(x = np.array(roots), y = 0*roots, \n                         mode = \"markers\", name = \"df = zero\", marker_size = 12))\nfig.update_layout(font_size = 20, yaxis_range=[-1, 3])\nfig.update_layout(autosize=False, width=800, height=600)\nfig.show()\n\n\n                                                \n\n\nIn the plots below, the line indicates the value of the derivative of each value of \\(\\theta\\). The derivative is negative where it is red and positive where it is green.\nSay we make a guess for the minimizing value of \\(\\theta\\). Remember that we read plots from left to right, and assume that our starting \\(\\theta\\) value is to the left of the optimal \\(\\hat{\\theta}\\). If the guess “undershoots” the true minimizing value – our guess for \\(\\theta\\) is lower than the value of the \\(\\hat{\\theta}\\) that minimizes the function – the derivative will be negative. This means that if we increase \\(\\theta\\) (move further to the right), then we can decrease our loss function further. If this guess “overshoots” the true minimizing value, the derivative will be positive, implying the converse.\n\n\n\n\n\n\n\n\n\nWe can use this pattern to help formulate our next guess for the optimal \\(\\hat{\\theta}\\). Consider the case where we’ve undershot \\(\\theta\\) by guessing too low of a value. We’ll want our next guess to be greater in value than our previous guess – that is, we want to shift our guess to the right. You can think of this as following the slope “downhill” to the function’s minimum value.\n\n\n\n\n\n\n\n\n\nIf we’ve overshot \\(\\hat{\\theta}\\) by guessing too high of a value, we’ll want our next guess to be lower in value – we want to shift our guess for \\(\\hat{\\theta}\\) to the left.\n\n\n\n\n\n\n\n\n\nIn other words, the derivative of the function at each point tells us the direction of our next guess.\n\nA negative slope means we want to step to the right, or move in the positive direction.\nA positive slope means we want to step to the left, or move in the negative direction.\n\n\n\n13.3.1.4 Algorithm Attempt 1\nArmed with this knowledge, let’s try to see if we can use the derivative to optimize the function.\nWe start by making some guess for the minimizing value of \\(x\\). Then, we look at the derivative of the function at this value of \\(x\\), and step downhill in the opposite direction. We can express our new rule as a recurrence relation:\n\\[x^{(t+1)} = x^{(t)} - \\frac{d}{dx} f(x^{(t)})\\]\nTranslating this statement into English: we obtain our next guess for the minimizing value of \\(x\\) at timestep \\(t+1\\) (\\(x^{(t+1)}\\)) by taking our last guess (\\(x^{(t)}\\)) and subtracting the derivative of the function at that point (\\(\\frac{d}{dx} f(x^{(t)})\\)).\nA few steps are shown below, where the old step is shown as a transparent point, and the next step taken is the green-filled dot.\n\n\n\n\n\n\n\n\n\nLooking pretty good! We do have a problem though – once we arrive close to the minimum value of the function, our guesses “bounce” back and forth past the minimum without ever reaching it.\n\n\n\n\n\n\n\n\n\nIn other words, each step we take when updating our guess moves us too far. We can address this by decreasing the size of each step.\n\n\n13.3.1.5 Algorithm Attempt 2\nLet’s update our algorithm to use a learning rate (also sometimes called the step size), which controls how far we move with each update. We represent the learning rate with \\(\\alpha\\).\n\\[x^{(t+1)} = x^{(t)} - \\alpha \\frac{d}{dx} f(x^{(t)})\\]\nA small \\(\\alpha\\) means that we will take small steps; a large \\(\\alpha\\) means we will take large steps. When do we stop updating? We stop updating either after a fixed number of updates or after a subsequent update doesn’t change much.\nUpdating our function to use \\(\\alpha=0.3\\), our algorithm successfully converges (settles on a solution and stops updating significantly, or at all) on the minimum value.\n\n\n\n\n\n\n\n\n\n\n\n\n13.3.2 Convexity\nIn our analysis above, we focused our attention on the global minimum of the loss function. You may be wondering: what about the local minimum that’s just to the left?\nIf we had chosen a different starting guess for \\(\\theta\\), or a different value for the learning rate \\(\\alpha\\), our algorithm may have gotten “stuck” and converged on the local minimum, rather than on the true optimum value of loss.\n\n\n\n\n\n\n\n\n\nIf the loss function is convex, gradient descent is guaranteed to converge and find the global minimum of the objective function. Formally, a function \\(f\\) is convex if: \\[tf(a) + (1-t)f(b) \\geq f(ta + (1-t)b)\\] for all \\(a, b\\) in the domain of \\(f\\) and \\(t \\in [0, 1]\\).\nTo put this into words: if you drew a line between any two points on the curve, all values on the curve must be on or below the line. Importantly, any local minimum of a convex function is also its global minimum so we avoid the situation where the algorithm converges on some critical point that is not the minimum of the function.\n\n\n\n\n\n\n\n\n\nIn summary, non-convex loss functions can cause problems with optimization. This means that our choice of loss function is a key factor in our modeling process. It turns out that MSE is convex, which is a major reason why it is such a popular choice of loss function. Gradient descent is only guaranteed to converge (given enough iterations and an appropriate step size) for convex functions.\n\n\n13.3.3 Gradient Descent in 1 Dimension\n\nTerminology clarification: In past lectures, we have used “loss” to refer to the error incurred on a single datapoint. In applications, we usually care more about the average error across all datapoints. Going forward, we will take the “model’s loss” to mean the model’s average error across the dataset. This is sometimes also known as the empirical risk (R), cost function, or objective function. \\[L(\\theta) = R(\\theta) = \\frac{1}{n} \\sum_{i=1}^{n} L(y, \\hat{y})\\]\n\nIn our discussion above, we worked with some arbitrary function \\(f\\). As data scientists, we will almost always work with gradient descent in the context of optimizing models – specifically, we want to apply gradient descent to find the minimum of a loss function. In a modeling context, our goal is to minimize a loss function by choosing the minimizing model parameters.\nRecall our modeling workflow from the past few lectures:\n\nDefine a model with some parameters \\(\\theta_i\\)\nChoose a loss function\nSelect the values of \\(\\theta_i\\) that minimize the loss function on the data\n\nGradient descent is a powerful technique for completing this last task. By applying the gradient descent algorithm, we can select values for our parameters \\(\\theta_i\\) that will lead to the model having minimal loss on the training data.\nWhen using gradient descent in a modeling context, we:\n\nMake guesses for the minimizing \\(\\theta_i\\)\nCompute the derivative of the loss function \\(L\\)\n\nWe can “translate” our gradient descent rule from before by replacing \\(x\\) with \\(\\theta\\) and \\(f\\) with \\(L\\):\n\\[\\theta^{(t+1)} = \\theta^{(t)} - \\alpha \\frac{d}{d\\theta} L(\\theta^{(t)})\\]\n\n13.3.3.1 Gradient Descent on the tips Dataset\nTo see this in action, let’s consider a case where we have a linear model with no offset. We want to predict the tip (y) given the price of a meal (x). To do this, we\n\nChoose a model: \\(\\hat{y} = \\theta_1 x\\),\nChoose a loss function: \\(L(\\theta) = MSE(\\theta) = \\frac{1}{n} \\sum_{i=1}^n (y_i - \\theta_1x_i)^2\\).\n\nLet’s apply our gradient_descent function from before to optimize our model on the tips dataset. We will try to select the best parameter \\(\\theta_i\\) to predict the tip \\(y\\) from the total_bill \\(x\\).\n\ndf = sns.load_dataset(\"tips\")\ndf.head()\n\n\n\n\n\n\n\n\ntotal_bill\ntip\nsex\nsmoker\nday\ntime\nsize\n\n\n\n\n0\n16.99\n1.01\nFemale\nNo\nSun\nDinner\n2\n\n\n1\n10.34\n1.66\nMale\nNo\nSun\nDinner\n3\n\n\n2\n21.01\n3.50\nMale\nNo\nSun\nDinner\n3\n\n\n3\n23.68\n3.31\nMale\nNo\nSun\nDinner\n2\n\n\n4\n24.59\n3.61\nFemale\nNo\nSun\nDinner\n4\n\n\n\n\n\n\n\nWe can visualize the value of the MSE on our dataset for different possible choices of \\(\\theta_1\\). To optimize our model, we want to select the value of \\(\\theta_1\\) that leads to the lowest MSE.\nTo apply gradient descent, we need to compute the derivative of the loss function with respect to our parameter \\(\\theta_1\\).\n\nGiven our loss function, \\[L(\\theta) = MSE(\\theta) = \\frac{1}{n} \\sum_{i=1}^n (y_i - \\theta_1x_i)^2\\]\nWe take the derivative with respect to \\(\\theta_1\\) \\[\\frac{\\partial}{\\partial \\theta_{1}} L(\\theta_1^{(t)}) = \\frac{-2}{n} \\sum_{i=1}^n (y_i - \\theta_1^{(t)} x_i) x_i\\]\nWhich results in the gradient descent update rule \\[\\theta_1^{(t+1)} = \\theta_1^{(t)} - \\alpha \\frac{d}{d\\theta}L(\\theta_1^{(t)})\\]\n\nfor some learning rate \\(\\alpha\\).\nImplementing this in code, we can visualize the MSE loss on the tips data. MSE is convex, so there is one global minimum.\n\n\nCode\ndef gradient_descent(df, initial_guess, alpha, n):\n    \"\"\"Performs n steps of gradient descent on df using learning rate alpha starting\n       from initial_guess. Returns a numpy array of all guesses over time.\"\"\"\n    guesses = [initial_guess]\n    current_guess = initial_guess\n    while len(guesses) &lt; n:\n        current_guess = current_guess - alpha * df(current_guess)\n        guesses.append(current_guess)\n        \n    return np.array(guesses)\n\ndef mse_single_arg(theta_1):\n    \"\"\"Returns the MSE on our data for the given theta1\"\"\"\n    x = df[\"total_bill\"]\n    y_obs = df[\"tip\"]\n    y_hat = theta_1 * x\n    return np.mean((y_hat - y_obs) ** 2)\n\ndef mse_loss_derivative_single_arg(theta_1):\n    \"\"\"Returns the derivative of the MSE on our data for the given theta1\"\"\"\n    x = df[\"total_bill\"]\n    y_obs = df[\"tip\"]\n    y_hat = theta_1 * x\n    \n    return np.mean(2 * (y_hat - y_obs) * x)\n\nloss_df = pd.DataFrame({\"theta_1\":np.linspace(-1.5, 1), \"MSE\":[mse_single_arg(theta_1) for theta_1 in np.linspace(-1.5, 1)]})\n\ntrajectory = gradient_descent(mse_loss_derivative_single_arg, -0.5, 0.0001, 100)\n\nplt.plot(loss_df[\"theta_1\"], loss_df[\"MSE\"])\nplt.scatter(trajectory, [mse_single_arg(guess) for guess in trajectory], c=\"white\", edgecolor=\"firebrick\")\nplt.scatter(trajectory[-1], mse_single_arg(trajectory[-1]), c=\"firebrick\")\nplt.xlabel(r\"$\\theta_1$\")\nplt.ylabel(r\"$L(\\theta_1)$\");\n\nprint(f\"Final guess for theta_1: {trajectory[-1]}\")\n\n\nFinal guess for theta_1: 0.14369554654231262",
     "crumbs": [
       "<span class='chapter-number'>13</span>  <span class='chapter-title'>sklearn and Gradient Descent</span>"
     ]
diff --git a/docs/visualization_1/visualization_1.html b/docs/visualization_1/visualization_1.html
index 19e5a617..da69869c 100644
--- a/docs/visualization_1/visualization_1.html
+++ b/docs/visualization_1/visualization_1.html
@@ -443,7 +443,7 @@ <h2 data-number="7.4" class="anchored" data-anchor-id="variable-types-should-inf
 <h2 data-number="7.5" class="anchored" data-anchor-id="qualitative-variables-bar-plots"><span class="header-section-number">7.5</span> Qualitative Variables: Bar Plots</h2>
 <p>A <strong>bar plot</strong> is one of the most common ways of displaying the <strong>distribution</strong> of a <strong>qualitative</strong> (categorical) variable. The length of a bar plot encodes the frequency of a category; the width encodes no useful information. The color <em>could</em> indicate a sub-category, but this is not necessarily the case.</p>
 <p>Let’s contextualize this in an example. We will use the World Bank dataset (<code>wb</code>) in our analysis.</p>
-<div id="100b2f6f" class="cell" data-execution_count="1">
+<div id="31bd5427" class="cell" data-execution_count="1">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span>
@@ -617,7 +617,7 @@ <h2 data-number="7.5" class="anchored" data-anchor-id="qualitative-variables-bar
 <p>We can visualize the distribution of the <code>Continent</code> column using a bar plot. There are a few ways to do this.</p>
 <section id="plotting-in-pandas" class="level3" data-number="7.5.1">
 <h3 data-number="7.5.1" class="anchored" data-anchor-id="plotting-in-pandas"><span class="header-section-number">7.5.1</span> Plotting in Pandas</h3>
-<div id="1e67a83d" class="cell" data-execution_count="2">
+<div id="f13cf7b6" class="cell" data-execution_count="2">
 <div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a>wb[<span class="st">'Continent'</span>].value_counts().plot(kind<span class="op">=</span><span class="st">'bar'</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display">
 <div>
@@ -632,7 +632,7 @@ <h3 data-number="7.5.1" class="anchored" data-anchor-id="plotting-in-pandas"><sp
 </section>
 <section id="plotting-in-matplotlib" class="level3" data-number="7.5.2">
 <h3 data-number="7.5.2" class="anchored" data-anchor-id="plotting-in-matplotlib"><span class="header-section-number">7.5.2</span> Plotting in Matplotlib</h3>
-<div id="6e6f53c7" class="cell" data-execution_count="3">
+<div id="62e18bf8" class="cell" data-execution_count="3">
 <div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> matplotlib.pyplot <span class="im">as</span> plt <span class="co"># matplotlib is typically given the alias plt</span></span>
 <span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a>continent <span class="op">=</span> wb[<span class="st">'Continent'</span>].value_counts()</span>
@@ -652,7 +652,7 @@ <h3 data-number="7.5.2" class="anchored" data-anchor-id="plotting-in-matplotlib"
 </section>
 <section id="plotting-in-seaborn" class="level3" data-number="7.5.3">
 <h3 data-number="7.5.3" class="anchored" data-anchor-id="plotting-in-seaborn"><span class="header-section-number">7.5.3</span> Plotting in <code>Seaborn</code></h3>
-<div id="8badb667" class="cell" data-execution_count="4">
+<div id="981b1255" class="cell" data-execution_count="4">
 <div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> seaborn <span class="im">as</span> sns <span class="co"># seaborn is typically given the alias sns</span></span>
 <span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a>sns.countplot(data <span class="op">=</span> wb, x <span class="op">=</span> <span class="st">'Continent'</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display">
@@ -675,7 +675,7 @@ <h3 data-number="7.5.3" class="anchored" data-anchor-id="plotting-in-seaborn"><s
 <section id="distributions-of-quantitative-variables" class="level2" data-number="7.6">
 <h2 data-number="7.6" class="anchored" data-anchor-id="distributions-of-quantitative-variables"><span class="header-section-number">7.6</span> Distributions of Quantitative Variables</h2>
 <p>Revisiting our example with the <code>wb</code> DataFrame, let’s plot the distribution of <code>Gross national income per capita</code>.</p>
-<div id="ab0af5b8" class="cell" data-execution_count="5">
+<div id="a9916753" class="cell" data-execution_count="5">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a>wb.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -841,7 +841,7 @@ <h2 data-number="7.6" class="anchored" data-anchor-id="distributions-of-quantita
 </div>
 <p>How should we define our categories for this variable? In the previous example, these were a few unique values of the <code>Continent</code> column. If we use similar logic here, our categories are the different numerical values contained in the <code>Gross national income per capita</code> column.</p>
 <p>Under this assumption, let’s plot this distribution using the <code>seaborn.countplot</code> function.</p>
-<div id="3d5f31e8" class="cell" data-execution_count="6">
+<div id="7f56a682" class="cell" data-execution_count="6">
 <div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a>sns.countplot(data <span class="op">=</span> wb, x <span class="op">=</span> <span class="st">'Gross national income per capita, Atlas method: $: 2016'</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display">
 <div>
@@ -863,7 +863,7 @@ <h2 data-number="7.6" class="anchored" data-anchor-id="distributions-of-quantita
 <h3 data-number="7.6.1" class="anchored" data-anchor-id="box-plots-and-violin-plots"><span class="header-section-number">7.6.1</span> Box Plots and Violin Plots</h3>
 <p>Box plots and violin plots are two very similar kinds of visualizations. Both display the distribution of a variable using information about <strong>quartiles</strong>.</p>
 <p>In a box plot, the width of the box at any point does not encode meaning. In a violin plot, the width of the plot indicates the density of the distribution at each possible value.</p>
-<div id="09f0b167" class="cell" data-execution_count="7">
+<div id="b58dca79" class="cell" data-execution_count="7">
 <div class="sourceCode cell-code" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a>sns.boxplot(data<span class="op">=</span>wb, y<span class="op">=</span><span class="st">'Gross national income per capita, Atlas method: $: 2016'</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display">
 <div>
@@ -873,7 +873,7 @@ <h3 data-number="7.6.1" class="anchored" data-anchor-id="box-plots-and-violin-pl
 </div>
 </div>
 </div>
-<div id="515b1d2e" class="cell" data-execution_count="8">
+<div id="cb17cdf4" class="cell" data-execution_count="8">
 <div class="sourceCode cell-code" id="cb8"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a>sns.violinplot(data<span class="op">=</span>wb, y<span class="op">=</span><span class="st">"Gross national income per capita, Atlas method: $: 2016"</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display">
 <div>
@@ -890,7 +890,7 @@ <h3 data-number="7.6.1" class="anchored" data-anchor-id="box-plots-and-violin-pl
 <li>The third quartile (Q3) represents the 75th percentile – 75% of the data is smaller than or equal to the third quartile.</li>
 </ul>
 <p>This means that the middle 50% of the data lies between the first and third quartiles. This is demonstrated in the histogram below. The three quartiles are marked with red vertical bars.</p>
-<div id="6a80f190" class="cell" data-execution_count="9">
+<div id="eea20e3a" class="cell" data-execution_count="9">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a>gdp <span class="op">=</span> wb[<span class="st">'Gross domestic product: </span><span class="sc">% g</span><span class="st">rowth : 2016'</span>]</span>
@@ -929,7 +929,7 @@ <h3 data-number="7.6.1" class="anchored" data-anchor-id="box-plots-and-violin-pl
 </div>
 </div>
 <p>In a box plot, the lower extent of the box lies at Q1, while the upper extent of the box lies at Q3. The horizontal line in the middle of the box corresponds to Q2 (equivalently, the median).</p>
-<div id="736d7fdc" class="cell" data-execution_count="10">
+<div id="c109675a" class="cell" data-execution_count="10">
 <div class="sourceCode cell-code" id="cb11"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a>sns.boxplot(data<span class="op">=</span>wb, y<span class="op">=</span><span class="st">'Gross domestic product: </span><span class="sc">% g</span><span class="st">rowth : 2016'</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display">
 <div>
@@ -945,7 +945,7 @@ <h3 data-number="7.6.1" class="anchored" data-anchor-id="box-plots-and-violin-pl
 <img src="images/box_plot_diagram.png" width="600">
 </center>
 <p>A violin plot displays quartile information, albeit a bit more subtly through smoothed density curves. Look closely at the center vertical bar of the violin plot below; the three quartiles and “whiskers” are still present!</p>
-<div id="ee3ad196" class="cell" data-execution_count="11">
+<div id="d4b12dcb" class="cell" data-execution_count="11">
 <div class="sourceCode cell-code" id="cb12"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a>sns.violinplot(data<span class="op">=</span>wb, y<span class="op">=</span><span class="st">'Gross domestic product: </span><span class="sc">% g</span><span class="st">rowth : 2016'</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display">
 <div>
@@ -960,7 +960,7 @@ <h3 data-number="7.6.1" class="anchored" data-anchor-id="box-plots-and-violin-pl
 <h3 data-number="7.6.2" class="anchored" data-anchor-id="side-by-side-box-and-violin-plots"><span class="header-section-number">7.6.2</span> Side-by-Side Box and Violin Plots</h3>
 <p>Plotting side-by-side box or violin plots allows us to compare distributions across different categories. In other words, they enable us to plot both a qualitative variable and a quantitative continuous variable in one visualization.</p>
 <p>With <code>seaborn</code>, we can easily create side-by-side plots by specifying both an x and y column.</p>
-<div id="73ad2017" class="cell" data-execution_count="12">
+<div id="1bbeaf27" class="cell" data-execution_count="12">
 <div class="sourceCode cell-code" id="cb13"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a>sns.boxplot(data<span class="op">=</span>wb, x<span class="op">=</span><span class="st">"Continent"</span>, y<span class="op">=</span><span class="st">'Gross domestic product: </span><span class="sc">% g</span><span class="st">rowth : 2016'</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display">
 <div>
@@ -977,7 +977,7 @@ <h3 data-number="7.6.3" class="anchored" data-anchor-id="histograms"><span class
 <section id="plotting-histograms" class="level4" data-number="7.6.3.1">
 <h4 data-number="7.6.3.1" class="anchored" data-anchor-id="plotting-histograms"><span class="header-section-number">7.6.3.1</span> Plotting Histograms</h4>
 <p>Below, we plot a histogram using matplotlib and seaborn. Which graph do you prefer?</p>
-<div id="611b59e0" class="cell" data-execution_count="13">
+<div id="f429e7c4" class="cell" data-execution_count="13">
 <div class="sourceCode cell-code" id="cb14"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a><span class="co"># The `edgecolor` argument controls the color of the bin edges</span></span>
 <span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a>gni <span class="op">=</span> wb[<span class="st">"Gross national income per capita, Atlas method: $: 2016"</span>]</span>
 <span id="cb14-3"><a href="#cb14-3" aria-hidden="true" tabindex="-1"></a>plt.hist(gni, density<span class="op">=</span><span class="va">True</span>, edgecolor<span class="op">=</span><span class="st">"white"</span>)</span>
@@ -994,7 +994,7 @@ <h4 data-number="7.6.3.1" class="anchored" data-anchor-id="plotting-histograms">
 </div>
 </div>
 </div>
-<div id="0336bbe9" class="cell" data-execution_count="14">
+<div id="fd4f965e" class="cell" data-execution_count="14">
 <div class="sourceCode cell-code" id="cb15"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a>sns.histplot(data<span class="op">=</span>wb, x<span class="op">=</span><span class="st">"Gross national income per capita, Atlas method: $: 2016"</span>, stat<span class="op">=</span><span class="st">"density"</span>)</span>
 <span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">"Distribution of gross national income per capita"</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display">
@@ -1011,14 +1011,14 @@ <h4 data-number="7.6.3.2" class="anchored" data-anchor-id="overlaid-histograms">
 <p>We can overlay histograms (or density curves) to compare distributions across qualitative categories.</p>
 <p>The <code>hue</code> parameter of <code>sns.histplot</code> specifies the column that should be used to determine the color of each category. <code>hue</code> can be used in many <code>seaborn</code> plotting functions.</p>
 <p>Notice that the resulting plot includes a legend describing which color corresponds to each hemisphere – a legend should always be included if color is used to encode information in a visualization!</p>
-<div id="4f54864c" class="cell" data-execution_count="15">
+<div id="7cc3af20" class="cell" data-execution_count="15">
 <div class="sourceCode cell-code" id="cb16"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Create a new variable to store the hemisphere in which each country is located</span></span>
 <span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a>north <span class="op">=</span> [<span class="st">"Asia"</span>, <span class="st">"Europe"</span>, <span class="st">"N. America"</span>]</span>
 <span id="cb16-3"><a href="#cb16-3" aria-hidden="true" tabindex="-1"></a>south <span class="op">=</span> [<span class="st">"Africa"</span>, <span class="st">"Oceania"</span>, <span class="st">"S. America"</span>]</span>
 <span id="cb16-4"><a href="#cb16-4" aria-hidden="true" tabindex="-1"></a>wb.loc[wb[<span class="st">"Continent"</span>].isin(north), <span class="st">"Hemisphere"</span>] <span class="op">=</span> <span class="st">"Northern"</span></span>
 <span id="cb16-5"><a href="#cb16-5" aria-hidden="true" tabindex="-1"></a>wb.loc[wb[<span class="st">"Continent"</span>].isin(south), <span class="st">"Hemisphere"</span>] <span class="op">=</span> <span class="st">"Southern"</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
-<div id="a9f6a27a" class="cell" data-execution_count="16">
+<div id="f1b03984" class="cell" data-execution_count="16">
 <div class="sourceCode cell-code" id="cb17"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a>sns.histplot(data<span class="op">=</span>wb, x<span class="op">=</span><span class="st">"Gross national income per capita, Atlas method: $: 2016"</span>, hue<span class="op">=</span><span class="st">"Hemisphere"</span>, stat<span class="op">=</span><span class="st">"density"</span>)</span>
 <span id="cb17-2"><a href="#cb17-2" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">"Distribution of gross national income per capita"</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stderr">
@@ -1048,7 +1048,7 @@ <h4 data-number="7.6.3.2" class="anchored" data-anchor-id="overlaid-histograms">
 </div>
 </div>
 <p>Again, each bin of a histogram is scaled such that its <strong>area</strong> is proportional to the <strong>percentage</strong> of all datapoints that it contains.</p>
-<div id="288292ef" class="cell" data-execution_count="17">
+<div id="4bffd41c" class="cell" data-execution_count="17">
 <div class="sourceCode cell-code" id="cb19"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a>densities, bins, _ <span class="op">=</span> plt.hist(gni, density<span class="op">=</span><span class="va">True</span>, edgecolor<span class="op">=</span><span class="st">"white"</span>, bins<span class="op">=</span><span class="dv">5</span>)</span>
 <span id="cb19-2"><a href="#cb19-2" aria-hidden="true" tabindex="-1"></a>plt.xlabel(<span class="st">"Gross national income per capita"</span>)</span>
 <span id="cb19-3"><a href="#cb19-3" aria-hidden="true" tabindex="-1"></a>plt.ylabel(<span class="st">"Density"</span>)</span>
@@ -1089,7 +1089,7 @@ <h4 data-number="7.6.3.3" class="anchored" data-anchor-id="evaluating-histograms
 <section id="skewness-and-tails" class="level5" data-number="7.6.3.3.1">
 <h5 data-number="7.6.3.3.1" class="anchored" data-anchor-id="skewness-and-tails"><span class="header-section-number">7.6.3.3.1</span> Skewness and Tails</h5>
 <p>The skew of a histogram describes the direction in which its “tail” extends. - A distribution with a long right tail is <strong>skewed right</strong> (such as <code>Gross national income per capita</code>). In a right-skewed distribution, the few large outliers “pull” the mean to the <strong>right</strong> of the median.</p>
-<div id="321effb7" class="cell" data-execution_count="18">
+<div id="a66146d3" class="cell" data-execution_count="18">
 <div class="sourceCode cell-code" id="cb21"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a>sns.histplot(data <span class="op">=</span> wb, x <span class="op">=</span> <span class="st">'Gross national income per capita, Atlas method: $: 2016'</span>, stat <span class="op">=</span> <span class="st">'density'</span>)<span class="op">;</span></span>
 <span id="cb21-2"><a href="#cb21-2" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">'Distribution with a long right tail'</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="18">
@@ -1107,7 +1107,7 @@ <h5 data-number="7.6.3.3.1" class="anchored" data-anchor-id="skewness-and-tails"
 <li>A distribution with a long left tail is <strong>skewed left</strong> (such as <code>Access to an improved water source</code>). In a left-skewed distribution, the few small outliers “pull” the mean to the <strong>left</strong> of the median.</li>
 </ul>
 <p>In the case where a distribution has equal-sized right and left tails, it is <strong>symmetric</strong>. The mean is approximately <strong>equal</strong> to the median. Think of mean as the balancing point of the distribution.</p>
-<div id="75d7adb1" class="cell" data-execution_count="19">
+<div id="823d3a00" class="cell" data-execution_count="19">
 <div class="sourceCode cell-code" id="cb23"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a>sns.histplot(data <span class="op">=</span> wb, x <span class="op">=</span> <span class="st">'Access to an improved water source: </span><span class="sc">% o</span><span class="st">f population: 2015'</span>, stat <span class="op">=</span> <span class="st">'density'</span>)<span class="op">;</span></span>
 <span id="cb23-2"><a href="#cb23-2" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">'Distribution with a long left tail'</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="19">
@@ -1130,7 +1130,7 @@ <h5 data-number="7.6.3.3.2" class="anchored" data-anchor-id="outliers"><span cla
 <h5 data-number="7.6.3.3.3" class="anchored" data-anchor-id="modes"><span class="header-section-number">7.6.3.3.3</span> Modes</h5>
 <p>In Data 100, we describe a “mode” of a histogram as a peak in the distribution. Often, however, it is difficult to determine what counts as its own “peak.” For example, the number of peaks in the distribution of HIV rates across different countries varies depending on the number of histogram bins we plot.</p>
 <p>If we set the number of bins to 5, the distribution appears unimodal.</p>
-<div id="64987db7" class="cell" data-execution_count="20">
+<div id="ea008373" class="cell" data-execution_count="20">
 <div class="sourceCode cell-code" id="cb25"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb25-1"><a href="#cb25-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Rename the very long column name for convenience</span></span>
 <span id="cb25-2"><a href="#cb25-2" aria-hidden="true" tabindex="-1"></a>wb <span class="op">=</span> wb.rename(columns<span class="op">=</span>{<span class="st">'Antiretroviral therapy coverage: </span><span class="sc">% o</span><span class="st">f people living with HIV: 2015'</span>:<span class="st">"HIV rate"</span>})</span>
 <span id="cb25-3"><a href="#cb25-3" aria-hidden="true" tabindex="-1"></a><span class="co"># With 5 bins, it seems that there is only one peak</span></span>
@@ -1144,7 +1144,7 @@ <h5 data-number="7.6.3.3.3" class="anchored" data-anchor-id="modes"><span class=
 </div>
 </div>
 </div>
-<div id="f08e0eb4" class="cell" data-execution_count="21">
+<div id="96b1c83a" class="cell" data-execution_count="21">
 <div class="sourceCode cell-code" id="cb26"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb26-1"><a href="#cb26-1" aria-hidden="true" tabindex="-1"></a><span class="co"># With 10 bins, there seem to be two peaks</span></span>
 <span id="cb26-2"><a href="#cb26-2" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb26-3"><a href="#cb26-3" aria-hidden="true" tabindex="-1"></a>sns.histplot(data<span class="op">=</span>wb, x<span class="op">=</span><span class="st">"HIV rate"</span>, stat<span class="op">=</span><span class="st">"density"</span>, bins<span class="op">=</span><span class="dv">10</span>)</span>
@@ -1157,7 +1157,7 @@ <h5 data-number="7.6.3.3.3" class="anchored" data-anchor-id="modes"><span class=
 </div>
 </div>
 </div>
-<div id="0b8b5ba0" class="cell" data-execution_count="22">
+<div id="b9826d6d" class="cell" data-execution_count="22">
 <div class="sourceCode cell-code" id="cb27"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb27-1"><a href="#cb27-1" aria-hidden="true" tabindex="-1"></a><span class="co"># And with 20 bins, it becomes hard to say what counts as a "peak"!</span></span>
 <span id="cb27-2"><a href="#cb27-2" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb27-3"><a href="#cb27-3" aria-hidden="true" tabindex="-1"></a>sns.histplot(data<span class="op">=</span>wb, x <span class="op">=</span><span class="st">"HIV rate"</span>, stat<span class="op">=</span><span class="st">"density"</span>, bins<span class="op">=</span><span class="dv">20</span>)</span>
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-10-output-2.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-10-output-2.pdf
index 2ae38dc7..ef847cf8 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-10-output-2.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-10-output-2.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-11-output-1.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-11-output-1.pdf
index 647c38cc..a49bf3b4 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-11-output-1.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-11-output-1.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-12-output-1.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-12-output-1.pdf
index 7540a406..78d2393c 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-12-output-1.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-12-output-1.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-13-output-1.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-13-output-1.pdf
index e12e02eb..716afd23 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-13-output-1.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-13-output-1.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-14-output-1.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-14-output-1.pdf
index 732cdb08..552f6ca8 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-14-output-1.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-14-output-1.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-15-output-1.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-15-output-1.pdf
index 36b3284f..daf5b5e4 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-15-output-1.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-15-output-1.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-17-output-2.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-17-output-2.pdf
index 254c1874..ad0d4106 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-17-output-2.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-17-output-2.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-18-output-2.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-18-output-2.pdf
index ffd56f5e..29665684 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-18-output-2.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-18-output-2.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-19-output-2.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-19-output-2.pdf
index 4791350a..bae110b6 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-19-output-2.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-19-output-2.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-20-output-2.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-20-output-2.pdf
index f443f05e..1bde0c84 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-20-output-2.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-20-output-2.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-21-output-1.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-21-output-1.pdf
index 72130d7b..f1ff56ef 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-21-output-1.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-21-output-1.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-22-output-1.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-22-output-1.pdf
index d84eed8e..accdc2df 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-22-output-1.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-22-output-1.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-23-output-1.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-23-output-1.pdf
index 80ba62da..f5ce207a 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-23-output-1.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-23-output-1.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-3-output-1.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-3-output-1.pdf
index f2cde7b9..536fda13 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-3-output-1.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-3-output-1.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-4-output-1.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-4-output-1.pdf
index 2c020392..b1b6d2f3 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-4-output-1.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-4-output-1.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-5-output-1.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-5-output-1.pdf
index 03ba9480..a521df06 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-5-output-1.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-5-output-1.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-7-output-1.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-7-output-1.pdf
index 72bb6039..4e47a5e2 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-7-output-1.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-7-output-1.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-8-output-1.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-8-output-1.pdf
index a9c12bf0..91115672 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-8-output-1.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-8-output-1.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-9-output-1.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-9-output-1.pdf
index 6090c11e..b95c57f8 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-9-output-1.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-9-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2.html b/docs/visualization_2/visualization_2.html
index ca98bc33..fe6903d0 100644
--- a/docs/visualization_2/visualization_2.html
+++ b/docs/visualization_2/visualization_2.html
@@ -391,7 +391,7 @@ <h3 data-number="8.1.1" class="anchored" data-anchor-id="kde-theory"><span class
 <p>A <strong>kernel density estimate (KDE)</strong> is a smooth, continuous function that approximates a curve. It allows us to represent general trends in a distribution without focusing on the details, which is useful for analyzing the broad structure of a dataset.</p>
 <p>More formally, a KDE attempts to approximate the underlying <strong>probability distribution</strong> from which our dataset was drawn. You may have encountered the idea of a probability distribution in your other classes; if not, we’ll discuss it at length in the next lecture. For now, you can think of a probability distribution as a description of how likely it is for us to sample a particular value in our dataset.</p>
 <p>A KDE curve estimates the probability density function of a random variable. Consider the example below, where we have used <code>sns.displot</code> to plot both a histogram (containing the data points we actually collected) and a KDE curve (representing the <em>approximated</em> probability distribution from which this data was drawn) using data from the World Bank dataset (<code>wb</code>).</p>
-<div id="2c13d9c4" class="cell" data-execution_count="1">
+<div id="a52218a1" class="cell" data-execution_count="1">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span>
@@ -566,7 +566,7 @@ <h3 data-number="8.1.1" class="anchored" data-anchor-id="kde-theory"><span class
 </div>
 </div>
 </div>
-<div id="b486befa" class="cell" data-execution_count="2">
+<div id="a37e462f" class="cell" data-execution_count="2">
 <div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> seaborn <span class="im">as</span> sns</span>
 <span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> matplotlib.pyplot <span class="im">as</span> plt</span>
 <span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a></span>
@@ -595,7 +595,7 @@ <h3 data-number="8.1.2" class="anchored" data-anchor-id="constructing-a-kde"><sp
 </ol>
 <p>We’ll explain what a “kernel” is momentarily.</p>
 <p>To make things simpler, let’s construct a KDE for a small, artificially generated dataset of 5 datapoints: <span class="math inline">\([2.2, 2.8, 3.7, 5.3, 5.7]\)</span>. In the plot below, each vertical bar represents one data point.</p>
-<div id="dc6d8d67" class="cell" data-execution_count="3">
+<div id="3e474b73" class="cell" data-execution_count="3">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a>data <span class="op">=</span> [<span class="fl">2.2</span>, <span class="fl">2.8</span>, <span class="fl">3.7</span>, <span class="fl">5.3</span>, <span class="fl">5.7</span>]</span>
@@ -616,7 +616,7 @@ <h3 data-number="8.1.2" class="anchored" data-anchor-id="constructing-a-kde"><sp
 </div>
 </div>
 <p>Our goal is to create the following KDE curve, which was generated automatically by <code>sns.kdeplot</code>.</p>
-<div id="0523a0f1" class="cell" data-execution_count="4">
+<div id="7b0bd53b" class="cell" data-execution_count="4">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a>plt.xlabel(<span class="st">"Data"</span>)</span>
@@ -635,7 +635,7 @@ <h3 data-number="8.1.2" class="anchored" data-anchor-id="constructing-a-kde"><sp
 </div>
 </div>
 <p>Alternatively, we can use <code>sns.histplot</code>. You can also get a very similar result in a single call by requesting the KDE be added to the histogram, with <code>kde=True</code> and some extra keywords:</p>
-<div id="76bbe7ad" class="cell" data-execution_count="5">
+<div id="70bb62e2" class="cell" data-execution_count="5">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a>plt.xlabel(<span class="st">"Data"</span>)</span>
@@ -658,7 +658,7 @@ <h4 data-number="8.1.2.1" class="anchored" data-anchor-id="step-1-place-a-kernel
 <p>A <strong>kernel</strong> is a density curve. It is the mathematical function that attempts to capture the randomness of each data point in our sampled data. To explain what this means, consider just <em>one</em> of the datapoints in our dataset: <span class="math inline">\(2.2\)</span>. We obtained this datapoint by randomly sampling some information out in the real world (you can imagine <span class="math inline">\(2.2\)</span> as representing a single measurement taken in an experiment, for example). If we were to sample a new datapoint, we may obtain a slightly different value. It could be higher than <span class="math inline">\(2.2\)</span>; it could also be lower than <span class="math inline">\(2.2\)</span>. We make the assumption that any future sampled datapoints will likely be similar in value to the data we’ve already drawn. This means that our <em>kernel</em> – our description of the probability of randomly sampling any new value – will be greatest at the datapoint we’ve already drawn but still have non-zero probability above and below it. The area under any kernel should integrate to 1, representing the total probability of drawing a new datapoint.</p>
 <p>A <strong>bandwidth value</strong>, usually denoted by <span class="math inline">\(\alpha\)</span>, represents the width of the kernel. A large value of <span class="math inline">\(\alpha\)</span> will result in a wide, short kernel function, while a small value with result in a narrow, tall kernel.</p>
 <p>Below, we place a <strong>Gaussian kernel</strong>, plotted in orange, over the datapoint <span class="math inline">\(2.2\)</span>. A Gaussian kernel is simply the normal distribution, which you may have called a bell curve in Data 8.</p>
-<div id="bd0757c4" class="cell" data-execution_count="6">
+<div id="c7c2ed81" class="cell" data-execution_count="6">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> gaussian_kernel(x, z, a):</span>
@@ -686,7 +686,7 @@ <h4 data-number="8.1.2.1" class="anchored" data-anchor-id="step-1-place-a-kernel
 </div>
 </div>
 <p>To begin creating our KDE, we place a kernel on <em>each</em> datapoint in our dataset. For our dataset of 5 points, we will have 5 kernels.</p>
-<div id="4745d166" class="cell" data-execution_count="7">
+<div id="185cc793" class="cell" data-execution_count="7">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="co"># You will work with the functions below in Lab 4</span></span>
@@ -738,7 +738,7 @@ <h4 data-number="8.1.2.1" class="anchored" data-anchor-id="step-1-place-a-kernel
 <h4 data-number="8.1.2.2" class="anchored" data-anchor-id="step-2-normalize-kernels-to-have-a-total-area-of-1"><span class="header-section-number">8.1.2.2</span> Step 2: Normalize Kernels to Have a Total Area of 1</h4>
 <p>Above, we said that <em>each</em> kernel has an area of 1. Earlier, we also said that our goal is to construct a KDE curve using these kernels with a <em>total</em> area of 1. If we were to directly sum the kernels as they are, we would produce a KDE curve with an integrated area of (5 kernels) <span class="math inline">\(\times\)</span> (area of 1 each) = 5. To avoid this, we will <strong>normalize</strong> each of our kernels. This involves multiplying each kernel by <span class="math inline">\(\frac{1}{\#\:\text{datapoints}}\)</span>.</p>
 <p>In the cell below, we multiply each of our 5 kernels by <span class="math inline">\(\frac{1}{5}\)</span> to apply normalization.</p>
-<div id="a50d41c7" class="cell" data-execution_count="8">
+<div id="a040f9be" class="cell" data-execution_count="8">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb8"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a>plt.xlim(<span class="op">-</span><span class="dv">3</span>, <span class="dv">10</span>)</span>
@@ -761,7 +761,7 @@ <h4 data-number="8.1.2.2" class="anchored" data-anchor-id="step-2-normalize-kern
 <section id="step-3-sum-the-normalized-kernels" class="level4" data-number="8.1.2.3">
 <h4 data-number="8.1.2.3" class="anchored" data-anchor-id="step-3-sum-the-normalized-kernels"><span class="header-section-number">8.1.2.3</span> Step 3: Sum the Normalized Kernels</h4>
 <p>Our KDE curve is the sum of the normalized kernels. Notice that the final curve is identical to the plot generated by <code>sns.kdeplot</code> we saw earlier!</p>
-<div id="a67ed712" class="cell" data-execution_count="9">
+<div id="d5d94876" class="cell" data-execution_count="9">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a>plt.xlim(<span class="op">-</span><span class="dv">3</span>, <span class="dv">10</span>)</span>
@@ -871,7 +871,7 @@ <h4 data-number="8.1.3.2" class="anchored" data-anchor-id="boxcar-kernel"><span
         0, &amp; \text{else }
     \end{cases}\]</span></p>
 <p>The boxcar kernel is seldom used in practice – we include it here to demonstrate that a kernel function can take whatever form you would like, provided it integrates to 1 and does not output negative values.</p>
-<div id="df9494b3" class="cell" data-execution_count="10">
+<div id="2adf6b36" class="cell" data-execution_count="10">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb10"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> boxcar_kernel(alpha, x, z):</span>
@@ -918,7 +918,7 @@ <h2 data-number="8.2" class="anchored" data-anchor-id="diving-deeper-into-displo
 <p>As we saw earlier, we can use <code>seaborn</code>’s <code>displot</code> function to plot various distributions. In particular, <code>displot</code> allows you to specify the <code>kind</code> of plot and is a wrapper for <code>histplot</code>, <code>kdeplot</code>, and <code>ecdfplot</code>.</p>
 <p>Below, we can see a couple of examples of how <code>sns.displot</code> can be used to plot various distributions.</p>
 <p>First, we can plot a histogram by setting <code>kind</code> to <code>"hist"</code>. Note that here we’ve specified <code>stat = density</code> to normalize the histogram such that the area under the histogram is equal to 1.</p>
-<div id="3c406bfa" class="cell" data-execution_count="11">
+<div id="bd1c8378" class="cell" data-execution_count="11">
 <div class="sourceCode cell-code" id="cb11"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a>sns.displot(data<span class="op">=</span>wb, </span>
 <span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a>            x<span class="op">=</span><span class="st">"gni"</span>, </span>
 <span id="cb11-3"><a href="#cb11-3" aria-hidden="true" tabindex="-1"></a>            kind<span class="op">=</span><span class="st">"hist"</span>, </span>
@@ -933,7 +933,7 @@ <h2 data-number="8.2" class="anchored" data-anchor-id="diving-deeper-into-displo
 </div>
 </div>
 <p>Now, what if we want to generate a KDE plot? We can set <code>kind</code> = to <code>"kde"</code>!</p>
-<div id="1505bd66" class="cell" data-execution_count="12">
+<div id="3f5e1c57" class="cell" data-execution_count="12">
 <div class="sourceCode cell-code" id="cb12"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a>sns.displot(data<span class="op">=</span>wb, </span>
 <span id="cb12-2"><a href="#cb12-2" aria-hidden="true" tabindex="-1"></a>            x<span class="op">=</span><span class="st">"gni"</span>, </span>
 <span id="cb12-3"><a href="#cb12-3" aria-hidden="true" tabindex="-1"></a>            kind<span class="op">=</span><span class="st">'kde'</span>)</span>
@@ -947,7 +947,7 @@ <h2 data-number="8.2" class="anchored" data-anchor-id="diving-deeper-into-displo
 </div>
 </div>
 <p>And finally, if we want to generate an Empirical Cumulative Distribution Function (ECDF), we can specify <code>kind = "ecdf"</code>.</p>
-<div id="838db689" class="cell" data-execution_count="13">
+<div id="bd3882dd" class="cell" data-execution_count="13">
 <div class="sourceCode cell-code" id="cb13"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a>sns.displot(data<span class="op">=</span>wb, </span>
 <span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a>            x<span class="op">=</span><span class="st">"gni"</span>, </span>
 <span id="cb13-3"><a href="#cb13-3" aria-hidden="true" tabindex="-1"></a>            kind<span class="op">=</span><span class="st">'ecdf'</span>)</span>
@@ -968,7 +968,7 @@ <h2 data-number="8.3" class="anchored" data-anchor-id="relationships-between-qua
 <h4 data-number="8.3.0.1" class="anchored" data-anchor-id="scatter-plots"><span class="header-section-number">8.3.0.1</span> Scatter Plots</h4>
 <p><strong>Scatter plots</strong> are one of the most useful tools in representing the relationship between <strong>pairs</strong> of quantitative variables. They are particularly important in gauging the strength, or correlation, of the relationship between variables. Knowledge of these relationships can then motivate decisions in our modeling process.</p>
 <p>In <code>matplotlib</code>, we use the function <code>plt.scatter</code> to generate a scatter plot. Notice that, unlike our examples of plotting single-variable distributions, now we specify sequences of values to be plotted along the x-axis <em>and</em> the y-axis.</p>
-<div id="25378ece" class="cell" data-execution_count="14">
+<div id="dc3c248d" class="cell" data-execution_count="14">
 <div class="sourceCode cell-code" id="cb14"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a>plt.scatter(wb[<span class="st">"per capita: </span><span class="sc">% g</span><span class="st">rowth: 2016"</span>], <span class="op">\</span></span>
 <span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a>            wb[<span class="st">'Adult literacy rate: Female: % ages 15 and older: 2005-14'</span>])</span>
 <span id="cb14-3"><a href="#cb14-3" aria-hidden="true" tabindex="-1"></a></span>
@@ -984,7 +984,7 @@ <h4 data-number="8.3.0.1" class="anchored" data-anchor-id="scatter-plots"><span
 </div>
 </div>
 <p>In <code>seaborn</code>, we call the function <code>sns.scatterplot</code>. We use the <code>x</code> and <code>y</code> parameters to indicate the values to be plotted along the x and y axes, respectively. By using the <code>hue</code> parameter, we can specify a third variable to be used for coloring each scatter point.</p>
-<div id="7c8b0bc7" class="cell" data-execution_count="15">
+<div id="5cd2ce6d" class="cell" data-execution_count="15">
 <div class="sourceCode cell-code" id="cb15"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a>sns.scatterplot(data <span class="op">=</span> wb, x <span class="op">=</span> <span class="st">"per capita: </span><span class="sc">% g</span><span class="st">rowth: 2016"</span>, <span class="op">\</span></span>
 <span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a>               y <span class="op">=</span> <span class="st">"Adult literacy rate: Female: % ages 15 and older: 2005-14"</span>, </span>
 <span id="cb15-3"><a href="#cb15-3" aria-hidden="true" tabindex="-1"></a>               hue <span class="op">=</span> <span class="st">"Continent"</span>)</span>
@@ -1007,7 +1007,7 @@ <h5 data-number="8.3.0.1.1" class="anchored" data-anchor-id="overplotting"><span
 <li><strong>Jittering</strong> is the process of adding a small amount of random noise to all x and y values to slightly shift the position of each datapoint. By randomly shifting all the data by some small distance, we can discern individual points more clearly without modifying the major trends of the original dataset.</li>
 </ul>
 <p>In the cell below, we first jitter the data using <code>np.random.uniform</code>, then re-plot it with smaller markers. The resulting plot is much easier to interpret.</p>
-<div id="0303ee3e" class="cell" data-execution_count="16">
+<div id="29695286" class="cell" data-execution_count="16">
 <div class="sourceCode cell-code" id="cb16"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Setting a seed ensures that we produce the same plot each time</span></span>
 <span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a><span class="co"># This means that the course notes will not change each time you access them</span></span>
 <span id="cb16-3"><a href="#cb16-3" aria-hidden="true" tabindex="-1"></a>np.random.seed(<span class="dv">150</span>)</span>
@@ -1041,7 +1041,7 @@ <h5 data-number="8.3.0.1.1" class="anchored" data-anchor-id="overplotting"><span
 <h4 data-number="8.3.0.2" class="anchored" data-anchor-id="lmplot-and-jointplot"><span class="header-section-number">8.3.0.2</span> <code>lmplot</code> and <code>jointplot</code></h4>
 <p><code>seaborn</code> also includes several built-in functions for creating more sophisticated scatter plots. Two of the most commonly used examples are <code>sns.lmplot</code> and <code>sns.jointplot</code>.</p>
 <p><code>sns.lmplot</code> plots both a scatter plot <em>and</em> a linear regression line, all in one function call. We’ll discuss linear regression in a few lectures.</p>
-<div id="756121b8" class="cell" data-execution_count="17">
+<div id="9ef6479f" class="cell" data-execution_count="17">
 <div class="sourceCode cell-code" id="cb17"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a>sns.lmplot(data <span class="op">=</span> wb, x <span class="op">=</span> <span class="st">"per capita: </span><span class="sc">% g</span><span class="st">rowth: 2016"</span>, <span class="op">\</span></span>
 <span id="cb17-2"><a href="#cb17-2" aria-hidden="true" tabindex="-1"></a>           y <span class="op">=</span> <span class="st">"Adult literacy rate: Female: % ages 15 and older: 2005-14"</span>)</span>
 <span id="cb17-3"><a href="#cb17-3" aria-hidden="true" tabindex="-1"></a></span>
@@ -1055,7 +1055,7 @@ <h4 data-number="8.3.0.2" class="anchored" data-anchor-id="lmplot-and-jointplot"
 </div>
 </div>
 <p><code>sns.jointplot</code> creates a visualization with three components: a scatter plot, a histogram of the distribution of x values, and a histogram of the distribution of y values.</p>
-<div id="cab6e820" class="cell" data-execution_count="18">
+<div id="8c775c4a" class="cell" data-execution_count="18">
 <div class="sourceCode cell-code" id="cb18"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a>sns.jointplot(data <span class="op">=</span> wb, x <span class="op">=</span> <span class="st">"per capita: </span><span class="sc">% g</span><span class="st">rowth: 2016"</span>, <span class="op">\</span></span>
 <span id="cb18-2"><a href="#cb18-2" aria-hidden="true" tabindex="-1"></a>           y <span class="op">=</span> <span class="st">"Adult literacy rate: Female: % ages 15 and older: 2005-14"</span>)</span>
 <span id="cb18-3"><a href="#cb18-3" aria-hidden="true" tabindex="-1"></a></span>
@@ -1076,7 +1076,7 @@ <h4 data-number="8.3.0.3" class="anchored" data-anchor-id="hex-plots"><span clas
 <p>For datasets with a very large number of datapoints, jittering is unlikely to fully resolve the issue of overplotting. In these cases, we can attempt to visualize our data by its <em>density</em>, rather than displaying each individual datapoint.</p>
 <p><strong>Hex plots</strong> can be thought of as two-dimensional histograms that show the joint distribution between two variables. This is particularly useful when working with very dense data. In a hex plot, the x-y plane is binned into hexagons. Hexagons that are darker in color indicate a greater density of data – that is, there are more data points that lie in the region enclosed by the hexagon.</p>
 <p>We can generate a hex plot using <code>sns.jointplot</code> modified with the <code>kind</code> parameter.</p>
-<div id="28905805" class="cell" data-execution_count="19">
+<div id="c1c4126b" class="cell" data-execution_count="19">
 <div class="sourceCode cell-code" id="cb19"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a>sns.jointplot(data <span class="op">=</span> wb, x <span class="op">=</span> <span class="st">"per capita: </span><span class="sc">% g</span><span class="st">rowth: 2016"</span>, <span class="op">\</span></span>
 <span id="cb19-2"><a href="#cb19-2" aria-hidden="true" tabindex="-1"></a>              y <span class="op">=</span> <span class="st">"Adult literacy rate: Female: % ages 15 and older: 2005-14"</span>, <span class="op">\</span></span>
 <span id="cb19-3"><a href="#cb19-3" aria-hidden="true" tabindex="-1"></a>              kind <span class="op">=</span> <span class="st">"hex"</span>)</span>
@@ -1097,7 +1097,7 @@ <h4 data-number="8.3.0.3" class="anchored" data-anchor-id="hex-plots"><span clas
 <h4 data-number="8.3.0.4" class="anchored" data-anchor-id="contour-plots"><span class="header-section-number">8.3.0.4</span> Contour Plots</h4>
 <p><strong>Contour plots</strong> are an alternative way of plotting the joint distribution of two variables. You can think of them as the 2-dimensional versions of KDE plots. A contour plot can be interpreted in a similar way to a <a href="https://gisgeography.com/contour-lines-topographic-map/">topographic map</a>. Each contour line represents an area that has the same <em>density</em> of datapoints throughout the region. Contours marked with darker colors contain more datapoints (a higher density) in that region.</p>
 <p><code>sns.kdeplot</code> will generate a contour plot if we specify both x and y data.</p>
-<div id="920ce145" class="cell" data-execution_count="20">
+<div id="835bc98b" class="cell" data-execution_count="20">
 <div class="sourceCode cell-code" id="cb20"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a>sns.kdeplot(data <span class="op">=</span> wb, x <span class="op">=</span> <span class="st">"per capita: </span><span class="sc">% g</span><span class="st">rowth: 2016"</span>, <span class="op">\</span></span>
 <span id="cb20-2"><a href="#cb20-2" aria-hidden="true" tabindex="-1"></a>            y <span class="op">=</span> <span class="st">"Adult literacy rate: Female: % ages 15 and older: 2005-14"</span>, <span class="op">\</span></span>
 <span id="cb20-3"><a href="#cb20-3" aria-hidden="true" tabindex="-1"></a>            fill <span class="op">=</span> <span class="va">True</span>)</span>
@@ -1119,7 +1119,7 @@ <h2 data-number="8.4" class="anchored" data-anchor-id="transformations"><span cl
 <p>Much of this was done to uncover insights in data, which will prove necessary when we begin building models of data later in the course. A strong graphical correlation between two variables hints at an underlying relationship that we may want to study in greater detail. However, relying on visual relationships alone is limiting - not all plots show association. The presence of outliers and other statistical anomalies makes it hard to interpret data.</p>
 <p><strong>Transformations</strong> are the process of manipulating data to find significant relationships between variables. These are often found by applying mathematical functions to variables that “transform” their range of possible values and highlight some previously hidden associations between data.</p>
 <p>To see why we may want to transform data, consider the following plot of adult literacy rates against gross national income.</p>
-<div id="4a2a0e15" class="cell" data-execution_count="21">
+<div id="62cd2178" class="cell" data-execution_count="21">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb21"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Some data cleaning to help with the next example</span></span>
@@ -1163,7 +1163,7 @@ <h3 data-number="8.4.1" class="anchored" data-anchor-id="linearization-and-apply
 </ul>
 <p>One function that produces this result is the <strong>log transformation</strong>. When we take the logarithm of a large number, the original number will decrease in magnitude dramatically. Conversely, when we take the logarithm of a small number, the original number does not change its value by as significant of an amount (to illustrate this, consider the difference between <span class="math inline">\(\log{(100)} = 4.61\)</span> and <span class="math inline">\(\log{(10)} = 2.3\)</span>).</p>
 <p>In Data 100 (and most upper-division STEM classes), <span class="math inline">\(\log\)</span> is used to refer to the natural logarithm with base <span class="math inline">\(e\)</span>.</p>
-<div id="2171fdf1" class="cell" data-execution_count="22">
+<div id="8343c34f" class="cell" data-execution_count="22">
 <div class="sourceCode cell-code" id="cb22"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a><span class="co"># np.log takes the logarithm of an array or Series</span></span>
 <span id="cb22-2"><a href="#cb22-2" aria-hidden="true" tabindex="-1"></a>plt.scatter(np.log(df[<span class="st">"inc"</span>]), df[<span class="st">"lit"</span>])</span>
 <span id="cb22-3"><a href="#cb22-3" aria-hidden="true" tabindex="-1"></a></span>
@@ -1186,7 +1186,7 @@ <h3 data-number="8.4.1" class="anchored" data-anchor-id="linearization-and-apply
 <li>Not substantially alter the scaling of small values of y (we do not want to drastically modify the lower end of the y axis, which is already distributed evenly on the vertical scale).</li>
 </ul>
 <p>In this case, it is helpful to apply a <strong>power transformation</strong> – that is, raise our y values to a power. Let’s try raising our adult literacy rate values to the power of 4. Large values raised to the power of 4 will increase in magnitude proportionally much more than small values raised to the power of 4 (consider the difference between <span class="math inline">\(2^4 = 16\)</span> and <span class="math inline">\(200^4 = 1600000000\)</span>).</p>
-<div id="cc9c03b9" class="cell" data-execution_count="23">
+<div id="1fba9ae7" class="cell" data-execution_count="23">
 <div class="sourceCode cell-code" id="cb23"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Apply a log transformation to the x values and a power transformation to the y values</span></span>
 <span id="cb23-2"><a href="#cb23-2" aria-hidden="true" tabindex="-1"></a>plt.scatter(np.log(df[<span class="st">"inc"</span>]), df[<span class="st">"lit"</span>]<span class="op">**</span><span class="dv">4</span>)</span>
 <span id="cb23-3"><a href="#cb23-3" aria-hidden="true" tabindex="-1"></a></span>
@@ -1207,7 +1207,7 @@ <h3 data-number="8.4.1" class="anchored" data-anchor-id="linearization-and-apply
 <p><span class="math display">\[y^4 = m(\log{x}) + b\]</span></p>
 <p>Where <span class="math inline">\(m\)</span> represents the slope of the linear fit, while <span class="math inline">\(b\)</span> represents the intercept.</p>
 <p>The cell below computes <span class="math inline">\(m\)</span> and <span class="math inline">\(b\)</span> for our transformed data. We’ll discuss how this code was generated in a future lecture.</p>
-<div id="ac48c8a2" class="cell" data-execution_count="24">
+<div id="6824bbec" class="cell" data-execution_count="24">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb24"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a><span class="co"># The code below fits a linear regression model. We'll discuss it at length in a future lecture</span></span>
@@ -1245,7 +1245,7 @@ <h3 data-number="8.4.1" class="anchored" data-anchor-id="linearization-and-apply
 <p>By rearranging the equation, we find a relationship between the untransformed variables <span class="math inline">\(x\)</span> and <span class="math inline">\(y\)</span>.</p>
 <p><span class="math display">\[y = [m(\log{x}) + b]^{(1/4)}\]</span></p>
 <p>When we plug in the values for <span class="math inline">\(m\)</span> and <span class="math inline">\(b\)</span> computed above, something interesting happens.</p>
-<div id="5bbed94a" class="cell" data-execution_count="25">
+<div id="0d3fda0d" class="cell" data-execution_count="25">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb26"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb26-1"><a href="#cb26-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Now, plug the values for m and b into the relationship between the untransformed x and y</span></span>
diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-18-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-18-output-1.png
index c4eb5c60..3b217d8c 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-html/cell-18-output-1.png and b/docs/visualization_2/visualization_2_files/figure-html/cell-18-output-1.png differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-10-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-10-output-1.pdf
index f64ca895..884db26a 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-10-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-10-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-11-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-11-output-1.pdf
index edcbac07..aed7d425 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-11-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-11-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-12-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-12-output-1.pdf
index f908683f..33ac4fbe 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-12-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-12-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-13-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-13-output-1.pdf
index 16ff6ef7..67b2a61d 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-13-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-13-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-14-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-14-output-1.pdf
index 8bf1d82b..a9f2a517 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-14-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-14-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-15-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-15-output-1.pdf
index d707dc7d..4f6d27d5 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-15-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-15-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-16-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-16-output-1.pdf
index faa99204..ac2ef14e 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-16-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-16-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-17-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-17-output-1.pdf
index 1b501101..128dc983 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-17-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-17-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-18-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-18-output-1.pdf
index 481432ee..85179c76 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-18-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-18-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-19-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-19-output-1.pdf
index b6bee853..26a13635 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-19-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-19-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-20-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-20-output-1.pdf
index f0bf1f81..0a9b9bde 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-20-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-20-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-21-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-21-output-1.pdf
index 77855d09..1d27ac2d 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-21-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-21-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-22-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-22-output-1.pdf
index 0e7075a4..baed5647 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-22-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-22-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-23-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-23-output-1.pdf
index de34d72b..23add7ed 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-23-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-23-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-24-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-24-output-1.pdf
index 2e14b281..5a9cadd7 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-24-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-24-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-25-output-2.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-25-output-2.pdf
index f45d7458..85027ba1 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-25-output-2.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-25-output-2.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-26-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-26-output-1.pdf
index 0662a22a..6ad514b9 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-26-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-26-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-3-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-3-output-1.pdf
index 4efaeb70..e5fc0d22 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-3-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-3-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-4-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-4-output-1.pdf
index 1e738777..b22baae5 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-4-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-4-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-5-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-5-output-1.pdf
index 2e061576..9bf808c2 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-5-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-5-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-6-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-6-output-1.pdf
index 8b145578..8537aaf6 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-6-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-6-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-7-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-7-output-1.pdf
index d4b63854..3f7b75ca 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-7-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-7-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-8-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-8-output-1.pdf
index bcdb265d..fdb394fd 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-8-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-8-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-9-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-9-output-1.pdf
index 799e6c0e..c29f7973 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-9-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-9-output-1.pdf differ
diff --git a/gradient_descent/gradient_descent.qmd b/gradient_descent/gradient_descent.qmd
index c9045a91..8d7a6d0a 100644
--- a/gradient_descent/gradient_descent.qmd
+++ b/gradient_descent/gradient_descent.qmd
@@ -46,6 +46,97 @@ from sklearn.linear_model import LinearRegression
 pd.options.mode.chained_assignment = None  # default='warn'
 ```
 
+## OLS Recap
+
+### 1. Choose a model
+Recall that when using multiple linear regression, we can generate a prediction for each of our $n$ data points:
+
+$$\hat{y} =\theta_{0} + \theta_{1}x_{1} + \theta_{2}x_{2} + ... + \theta_{p}x_{p}$$
+
+<div align="middle">
+  <table style="width:100%">
+    <tr align="center">
+      <td><img src="images/ols_matrices_old.png" alt='ols_matrices_old' width='600'>
+      </td>
+    </tr>
+  </table>
+</div>
+
+In the previous lecture, we used p+1 features to account for the intercept, $\theta_0$.  This makes slides and notation messy.  
+Let’s redefine **p as the number of columns in our covariate matrix** and **add a column of 1s** to encode the intercept (if desired). If we choose to add a column of 1s, then $x_1$ can be a 1 for every data point.
+
+$$\hat{y} =\theta_{1}x_{1} + \theta_{2}x_{2} + ... + \theta_{p}x_{p}$$
+
+<div align="middle">
+  <table style="width:100%">
+    <tr align="center">
+      <td><img src="images/ols_matrices_new.png" alt='ols_matrices_new' width='600'>
+      </td>
+    </tr>
+  </table>
+</div>
+
+### 2. Choose a loss function
+
+Recall that we then choose the mean squared error loss function shown below where the prediction vector $\hat{\mathbb{Y}}$ depends on $\theta$.
+$$R(\theta) = \frac{1}{n} \sum_{i=1}^n (y_i - \hat{y}_i)^2 = \frac{1}{n} (||\mathbb{Y} - \hat{\mathbb{Y}}||_2)^2$$
+
+### 3. Fit the model
+
+We can then minimize the average loss with calculus or geometry. See the previous lecture for a derivation on the Normal Equation ($\mathbb{X}^T \mathbb{X} \hat{\theta} = \mathbb{X}^T \mathbb{Y}$) using geometry. We can see what the matrices look like with our new interpretation where $\mathbb{X}$ is now an $n$ by $p$ matrix instead of an $n$ by $p+1$ matrix.
+
+<div align="middle">
+  <table style="width:100%">
+    <tr align="center">
+      <td><img src="images/ols_solution_matrices.png" alt='ols_solution_matrices' width='400'>
+      </td>
+    </tr>
+  </table>
+</div>
+
+To summarize:
+
+|   | Model | Estimate | Unique? |
+| -- | -- | -- |  -- | 
+| Constant Model + MSE | $\hat{y} = \theta_0$| $\hat{\theta}_0 = mean(y) = \bar{y}$ | **Yes**. Any set of values has a unique mean.
+| Constant Model + MAE | $\hat{y} = \theta_0$  | $\hat{\theta}_0 = median(y)$ | **Yes**, if odd. **No**, if even. Return the average of the middle 2 values.
+| Simple Linear Regression + MSE | $\hat{y} = \theta_0 + \theta_1x$| $\hat{\theta}_0 = \bar{y} - \hat{\theta}_1\bar{x}$ $\hat{\theta}_1 = r\frac{\sigma_y}{\sigma_x}$| **Yes**. Any set of non-constant* values has a unique mean, SD, and correlation coefficient.
+| **OLS** (Linear Model + MSE) | $\mathbb{\hat{Y}} = \mathbb{X}\mathbb{\theta}$| $\hat{\theta} = (\mathbb{X}^T\mathbb{X})^{-1}\mathbb{X}^T\mathbb{Y}$  | **Yes**, if $\mathbb{X}$ is full column rank (all columns are linearly independent, # of datapoints >>> # of features).
+
+#### Uniqueness of a Solution
+
+In most settings, the number of observations ($n$) is much greater than the number of features ($p$). Note that at least one solution always exists because intuitively, we can always draw a line of best fit for a given set of data, but there may be multiple lines that are “equally good”. (Formal proof is beyond this course.) Let's now revisit the interpretation for uniqueness of a solution at the end of the last lecture, but with the new notation of $p$ instead of $p+1$ features.
+
+The Least Squares estimate $\hat{\theta}$ is **unique** if and only if $\mathbb{X}$ is **full column rank**.
+
+::: {.callout}
+Proof: 
+
+* We know the solution to the normal equation $\mathbb{X}^T\mathbb{X}\hat{\theta} = \mathbb{X}^T\mathbb{Y}$ is the least square estimate that minimizes the squared loss.
+* $\hat{\theta}$ has a **unique** solution $\iff$ the square matrix $\mathbb{X}^T\mathbb{X}$ is **invertible** $\iff$ $\mathbb{X}^T\mathbb{X}$ is full rank.
+  * The **column rank** of a square matrix is the max number of linearly independent columns it contains.
+  * An $n$ x $n$ square matrix is deemed full column rank when all of its columns are linearly independent. That is, its rank would be equal to $n$.
+  * $\mathbb{X}^T\mathbb{X}$ has shape $p \times p$, and therefore has max rank $p$. 
+* $rank(\mathbb{X}^T\mathbb{X})$ = $rank(\mathbb{X})$ (proof out of scope).
+* Therefore, $\mathbb{X}^T\mathbb{X}$ has rank $p$ $\iff$  $\mathbb{X}$ has rank $p$ $\iff \mathbb{X}$ is full column rank.
+:::
+
+Therefore, if $\mathbb{X}$ is not full column rank, we will not have unique estimates. This can happen for two major reasons.
+
+1. If our design matrix $\mathbb{X}$ is "**wide**":
+    * If n < p, then we have way more features (columns) than observations (rows).
+    * Then $rank(\mathbb{X})$ = min(n, p) < p, so $\hat{\theta}$ is not unique.
+    * Typically we have n >> p so this is less of an issue.
+
+2. If our design matrix $\mathbb{X}$ has features that are **linear combinations** of other features:
+    * By definition, rank of $\mathbb{X}$ is number of linearly independent columns in $\mathbb{X}$.
+    * Example: If “Width”, “Height”, and “Perimeter” are all columns,
+      * Perimeter = 2 * Width + 2 * Height  $\rightarrow$  $\mathbb{X}$ is not full rank.
+    * Important with one-hot encoding (to discuss later).
+
+
+Let's now explore how to use the normal equations with a real-world dataset in the next section.
+
 ## `sklearn`
 ### Implementing Derived Formulas in Code
 
diff --git a/gradient_descent/images/ols_matrices_new.png b/gradient_descent/images/ols_matrices_new.png
new file mode 100644
index 00000000..f43c690e
Binary files /dev/null and b/gradient_descent/images/ols_matrices_new.png differ
diff --git a/gradient_descent/images/ols_matrices_old.png b/gradient_descent/images/ols_matrices_old.png
new file mode 100644
index 00000000..52ff46ff
Binary files /dev/null and b/gradient_descent/images/ols_matrices_old.png differ
diff --git a/gradient_descent/images/ols_solution_matrices.png b/gradient_descent/images/ols_solution_matrices.png
new file mode 100644
index 00000000..e3c7a907
Binary files /dev/null and b/gradient_descent/images/ols_solution_matrices.png differ
diff --git a/index.tex b/index.tex
index f35692bd..1e03ac1e 100644
--- a/index.tex
+++ b/index.tex
@@ -247,7 +247,7 @@ \section*{About the Course Notes}\label{about-the-course-notes}
 
 \chapter{Introduction}\label{introduction}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, rightrule=.15mm, toptitle=1mm, bottomrule=.15mm, breakable, opacityback=0, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, colback=white, leftrule=.75mm, bottomtitle=1mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, arc=.35mm, opacitybacktitle=0.6, coltitle=black, toprule=.15mm]
+\begin{tcolorbox}[enhanced jigsaw, coltitle=black, leftrule=.75mm, left=2mm, bottomrule=.15mm, bottomtitle=1mm, opacityback=0, breakable, arc=.35mm, opacitybacktitle=0.6, toptitle=1mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, rightrule=.15mm, colframe=quarto-callout-note-color-frame, toprule=.15mm, colback=white]
 
 \begin{itemize}
 \tightlist
@@ -301,7 +301,7 @@ \chapter{Introduction}\label{introduction}
 allowing you to take data and produce useful insights on the world's
 most challenging and ambiguous problems.
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, rightrule=.15mm, toptitle=1mm, bottomrule=.15mm, breakable, opacityback=0, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, colback=white, leftrule=.75mm, bottomtitle=1mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Course Goals}, left=2mm, arc=.35mm, opacitybacktitle=0.6, coltitle=black, toprule=.15mm]
+\begin{tcolorbox}[enhanced jigsaw, coltitle=black, leftrule=.75mm, left=2mm, bottomrule=.15mm, bottomtitle=1mm, opacityback=0, breakable, arc=.35mm, opacitybacktitle=0.6, toptitle=1mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Course Goals}, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, rightrule=.15mm, colframe=quarto-callout-note-color-frame, toprule=.15mm, colback=white]
 
 \begin{itemize}
 \tightlist
@@ -319,7 +319,7 @@ \chapter{Introduction}\label{introduction}
 
 \end{tcolorbox}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, rightrule=.15mm, toptitle=1mm, bottomrule=.15mm, breakable, opacityback=0, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, colback=white, leftrule=.75mm, bottomtitle=1mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Some Topics We'll Cover}, left=2mm, arc=.35mm, opacitybacktitle=0.6, coltitle=black, toprule=.15mm]
+\begin{tcolorbox}[enhanced jigsaw, coltitle=black, leftrule=.75mm, left=2mm, bottomrule=.15mm, bottomtitle=1mm, opacityback=0, breakable, arc=.35mm, opacitybacktitle=0.6, toptitle=1mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Some Topics We'll Cover}, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, rightrule=.15mm, colframe=quarto-callout-note-color-frame, toprule=.15mm, colback=white]
 
 \begin{itemize}
 \tightlist
@@ -349,7 +349,7 @@ \chapter{Introduction}\label{introduction}
 
 \end{tcolorbox}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, rightrule=.15mm, toptitle=1mm, bottomrule=.15mm, breakable, opacityback=0, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, colback=white, leftrule=.75mm, bottomtitle=1mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Prerequisites}, left=2mm, arc=.35mm, opacitybacktitle=0.6, coltitle=black, toprule=.15mm]
+\begin{tcolorbox}[enhanced jigsaw, coltitle=black, leftrule=.75mm, left=2mm, bottomrule=.15mm, bottomtitle=1mm, opacityback=0, breakable, arc=.35mm, opacitybacktitle=0.6, toptitle=1mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Prerequisites}, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, rightrule=.15mm, colframe=quarto-callout-note-color-frame, toprule=.15mm, colback=white]
 
 To ensure that you can get the most out of the course content, please
 make sure that you are familiar with:
@@ -580,7 +580,7 @@ \section{Conclusion}\label{conclusion}
 
 \chapter{Pandas I}\label{pandas-i}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, rightrule=.15mm, toptitle=1mm, bottomrule=.15mm, breakable, opacityback=0, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, colback=white, leftrule=.75mm, bottomtitle=1mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, arc=.35mm, opacitybacktitle=0.6, coltitle=black, toprule=.15mm]
+\begin{tcolorbox}[enhanced jigsaw, coltitle=black, leftrule=.75mm, left=2mm, bottomrule=.15mm, bottomtitle=1mm, opacityback=0, breakable, arc=.35mm, opacitybacktitle=0.6, toptitle=1mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, rightrule=.15mm, colframe=quarto-callout-note-color-frame, toprule=.15mm, colback=white]
 
 \begin{itemize}
 \tightlist
@@ -1920,7 +1920,7 @@ \section{Parting Note}\label{parting-note}
 
 \chapter{Pandas II}\label{pandas-ii}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, rightrule=.15mm, toptitle=1mm, bottomrule=.15mm, breakable, opacityback=0, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, colback=white, leftrule=.75mm, bottomtitle=1mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, arc=.35mm, opacitybacktitle=0.6, coltitle=black, toprule=.15mm]
+\begin{tcolorbox}[enhanced jigsaw, coltitle=black, leftrule=.75mm, left=2mm, bottomrule=.15mm, bottomtitle=1mm, opacityback=0, breakable, arc=.35mm, opacitybacktitle=0.6, toptitle=1mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, rightrule=.15mm, colframe=quarto-callout-note-color-frame, toprule=.15mm, colback=white]
 
 \begin{itemize}
 \tightlist
@@ -2691,7 +2691,7 @@ \subsection{\texorpdfstring{\texttt{.sample()}}{.sample()}}\label{sample}
 \endhead
 \bottomrule\noalign{}
 \endlastfoot
-325441 & CA & M & 1993 & Tristan & 63 \\
+107251 & CA & F & 1988 & Marrissa & 14 \\
 \end{longtable}
 
 Naturally, this can be chained with other methods and operators
@@ -2711,11 +2711,11 @@ \subsection{\texorpdfstring{\texttt{.sample()}}{.sample()}}\label{sample}
 \endhead
 \bottomrule\noalign{}
 \endlastfoot
-157851 & 2002 & Melannie & 23 \\
-154714 & 2001 & Leana & 12 \\
-7620 & 1925 & Hiroko & 6 \\
-239231 & 2022 & Hanalei & 5 \\
-86018 & 1981 & Diane & 175 \\
+18984 & 1942 & Marylou & 36 \\
+94717 & 1984 & Lina & 30 \\
+378292 & 2012 & Helios & 5 \\
+360928 & 2006 & Sachin & 5 \\
+84949 & 1980 & Mayela & 8 \\
 \end{longtable}
 
 \begin{Shaded}
@@ -2732,10 +2732,10 @@ \subsection{\texorpdfstring{\texttt{.sample()}}{.sample()}}\label{sample}
 \endhead
 \bottomrule\noalign{}
 \endlastfoot
-150330 & 2000 & Marin & 21 \\
-152796 & 2000 & Vivi & 5 \\
-344732 & 2000 & Florentino & 5 \\
-344715 & 2000 & Elizabeth & 5 \\
+344251 & 2000 & Javen & 7 \\
+152159 & 2000 & Marilynn & 6 \\
+150308 & 2000 & Divya & 21 \\
+152369 & 2000 & Carrissa & 5 \\
 \end{longtable}
 
 \subsection{\texorpdfstring{\texttt{.value\_counts()}}{.value\_counts()}}\label{value_counts}
@@ -2857,7 +2857,7 @@ \section{Parting Note}\label{parting-note-1}
 
 \chapter{Pandas III}\label{pandas-iii}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, rightrule=.15mm, toptitle=1mm, bottomrule=.15mm, breakable, opacityback=0, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, colback=white, leftrule=.75mm, bottomtitle=1mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, arc=.35mm, opacitybacktitle=0.6, coltitle=black, toprule=.15mm]
+\begin{tcolorbox}[enhanced jigsaw, coltitle=black, leftrule=.75mm, left=2mm, bottomrule=.15mm, bottomtitle=1mm, opacityback=0, breakable, arc=.35mm, opacitybacktitle=0.6, toptitle=1mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, rightrule=.15mm, colframe=quarto-callout-note-color-frame, toprule=.15mm, colback=white]
 
 \begin{itemize}
 \tightlist
@@ -3127,7 +3127,7 @@ \section{\texorpdfstring{Aggregating Data with
 \end{Shaded}
 
 \begin{verbatim}
-<pandas.core.groupby.generic.DataFrameGroupBy object at 0x10c76d940>
+<pandas.core.groupby.generic.DataFrameGroupBy object at 0x10fe24dd0>
 \end{verbatim}
 
 What does this strange output mean? Calling \texttt{.groupby}
@@ -3467,7 +3467,7 @@ \subsection{Plotting Birth Counts}\label{plotting-birth-counts}
 \end{Shaded}
 
 \begin{verbatim}
-/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_48374/390646742.py:1: FutureWarning:
+/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_51680/390646742.py:1: FutureWarning:
 
 The provided callable <built-in function sum> is currently using DataFrameGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "sum" instead.
 \end{verbatim}
@@ -4118,7 +4118,7 @@ \subsection{\texorpdfstring{Aggregation with \texttt{lambda}
 \end{Shaded}
 
 \begin{verbatim}
-/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_48374/4278286395.py:1: FutureWarning:
+/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_51680/4278286395.py:1: FutureWarning:
 
 The provided callable <built-in function max> is currently using DataFrameGroupBy.max. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "max" instead.
 \end{verbatim}
@@ -4347,7 +4347,7 @@ \section{Aggregating Data with Pivot
 \end{Shaded}
 
 \begin{verbatim}
-/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_48374/3186035650.py:3: FutureWarning:
+/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_51680/3186035650.py:3: FutureWarning:
 
 The provided callable <built-in function sum> is currently using DataFrameGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "sum" instead.
 \end{verbatim}
@@ -4674,7 +4674,7 @@ \chapter{Data Cleaning and EDA}\label{data-cleaning-and-eda}
 \end{Highlighting}
 \end{Shaded}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, rightrule=.15mm, toptitle=1mm, bottomrule=.15mm, breakable, opacityback=0, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, colback=white, leftrule=.75mm, bottomtitle=1mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, arc=.35mm, opacitybacktitle=0.6, coltitle=black, toprule=.15mm]
+\begin{tcolorbox}[enhanced jigsaw, coltitle=black, leftrule=.75mm, left=2mm, bottomrule=.15mm, bottomtitle=1mm, opacityback=0, breakable, arc=.35mm, opacitybacktitle=0.6, toptitle=1mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, rightrule=.15mm, colframe=quarto-callout-note-color-frame, toprule=.15mm, colback=white]
 
 \begin{itemize}
 \tightlist
@@ -5518,7 +5518,7 @@ \subsubsection{\texorpdfstring{Temporality with \texttt{pandas}'
 \end{Shaded}
 
 \begin{verbatim}
-/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_48400/874729699.py:1: UserWarning:
+/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_51717/874729699.py:1: UserWarning:
 
 Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.
 \end{verbatim}
@@ -6801,7 +6801,7 @@ \subsection{Exploring Variable Feature
 
 invalid escape sequence '\s'
 
-/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_48400/150137587.py:3: SyntaxWarning:
+/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_51717/150137587.py:3: SyntaxWarning:
 
 invalid escape sequence '\s'
 \end{verbatim}
@@ -7359,7 +7359,7 @@ \subsection{EDA and Data Wrangling}\label{eda-and-data-wrangling}
 
 \chapter{Regular Expressions}\label{regular-expressions}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, rightrule=.15mm, toptitle=1mm, bottomrule=.15mm, breakable, opacityback=0, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, colback=white, leftrule=.75mm, bottomtitle=1mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, arc=.35mm, opacitybacktitle=0.6, coltitle=black, toprule=.15mm]
+\begin{tcolorbox}[enhanced jigsaw, coltitle=black, leftrule=.75mm, left=2mm, bottomrule=.15mm, bottomtitle=1mm, opacityback=0, breakable, arc=.35mm, opacitybacktitle=0.6, toptitle=1mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, rightrule=.15mm, colframe=quarto-callout-note-color-frame, toprule=.15mm, colback=white]
 
 \begin{itemize}
 \tightlist
@@ -8578,7 +8578,7 @@ \section{Limitations of Regular
 
 \chapter{Visualization I}\label{visualization-i}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, rightrule=.15mm, toptitle=1mm, bottomrule=.15mm, breakable, opacityback=0, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, colback=white, leftrule=.75mm, bottomtitle=1mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, arc=.35mm, opacitybacktitle=0.6, coltitle=black, toprule=.15mm]
+\begin{tcolorbox}[enhanced jigsaw, coltitle=black, leftrule=.75mm, left=2mm, bottomrule=.15mm, bottomtitle=1mm, opacityback=0, breakable, arc=.35mm, opacitybacktitle=0.6, toptitle=1mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, rightrule=.15mm, colframe=quarto-callout-note-color-frame, toprule=.15mm, colback=white]
 
 \begin{itemize}
 \tightlist
@@ -9368,7 +9368,7 @@ \subsubsection{Evaluating Histograms}\label{evaluating-histograms}
 
 \chapter{Visualization II}\label{visualization-ii}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, rightrule=.15mm, toptitle=1mm, bottomrule=.15mm, breakable, opacityback=0, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, colback=white, leftrule=.75mm, bottomtitle=1mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, arc=.35mm, opacitybacktitle=0.6, coltitle=black, toprule=.15mm]
+\begin{tcolorbox}[enhanced jigsaw, coltitle=black, leftrule=.75mm, left=2mm, bottomrule=.15mm, bottomtitle=1mm, opacityback=0, breakable, arc=.35mm, opacitybacktitle=0.6, toptitle=1mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, rightrule=.15mm, colframe=quarto-callout-note-color-frame, toprule=.15mm, colback=white]
 
 \begin{itemize}
 \tightlist
@@ -10765,7 +10765,7 @@ \subsection{Harnessing Context}\label{harnessing-context}
 
 \chapter{Sampling}\label{sampling}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, rightrule=.15mm, toptitle=1mm, bottomrule=.15mm, breakable, opacityback=0, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, colback=white, leftrule=.75mm, bottomtitle=1mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, arc=.35mm, opacitybacktitle=0.6, coltitle=black, toprule=.15mm]
+\begin{tcolorbox}[enhanced jigsaw, coltitle=black, leftrule=.75mm, left=2mm, bottomrule=.15mm, bottomtitle=1mm, opacityback=0, breakable, arc=.35mm, opacitybacktitle=0.6, toptitle=1mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, rightrule=.15mm, colframe=quarto-callout-note-color-frame, toprule=.15mm, colback=white]
 
 \begin{itemize}
 \tightlist
@@ -11351,7 +11351,7 @@ \subsubsection{Simple Random Sample}\label{simple-random-sample}
 \end{Shaded}
 
 \begin{verbatim}
-np.float64(0.5295968792084498)
+np.float64(0.5297165243909225)
 \end{verbatim}
 
 This is very close to the actual vote of 0.5302792307692308!
@@ -11375,8 +11375,8 @@ \subsubsection{Simple Random Sample}\label{simple-random-sample}
 \end{Highlighting}
 \end{Shaded}
 
-\textbf{Actual} = 0.5303, \textbf{Sample} = 0.5275, \textbf{Err} =
-0.52\%.
+\textbf{Actual} = 0.5303, \textbf{Sample} = 0.5088, \textbf{Err} =
+4.06\%.
 
 We'll learn how to choose this number when we (re)learn the Central
 Limit Theorem later in the semester.
@@ -11424,7 +11424,7 @@ \subsubsection{Quantifying Chance Error}\label{quantifying-chance-error}
 \end{Shaded}
 
 \begin{verbatim}
-np.float64(0.943)
+np.float64(0.963)
 \end{verbatim}
 
 You can see the curve looks roughly Gaussian/normal. Using KDE:
@@ -11455,7 +11455,7 @@ \section{Summary}\label{summary-1}
 
 \chapter{Introduction to Modeling}\label{introduction-to-modeling}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, rightrule=.15mm, toptitle=1mm, bottomrule=.15mm, breakable, opacityback=0, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, colback=white, leftrule=.75mm, bottomtitle=1mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, arc=.35mm, opacitybacktitle=0.6, coltitle=black, toprule=.15mm]
+\begin{tcolorbox}[enhanced jigsaw, coltitle=black, leftrule=.75mm, left=2mm, bottomrule=.15mm, bottomtitle=1mm, opacityback=0, breakable, arc=.35mm, opacitybacktitle=0.6, toptitle=1mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, rightrule=.15mm, colframe=quarto-callout-note-color-frame, toprule=.15mm, colback=white]
 
 \begin{itemize}
 \tightlist
@@ -11797,7 +11797,7 @@ \subsection{Derivation}\label{derivation}
   \(\hat{a} = \text{average of }y - \text{slope}\cdot\text{average of }x\)
 \end{itemize}
 
-\begin{tcolorbox}[enhanced jigsaw, breakable, opacityback=0, rightrule=.15mm, colback=white, leftrule=.75mm, left=2mm, colframe=quarto-callout-color-frame, bottomrule=.15mm, arc=.35mm, toprule=.15mm]
+\begin{tcolorbox}[enhanced jigsaw, arc=.35mm, leftrule=.75mm, left=2mm, bottomrule=.15mm, rightrule=.15mm, colframe=quarto-callout-color-frame, opacityback=0, breakable, toprule=.15mm, colback=white]
 
 Proof:
 
@@ -12446,7 +12446,7 @@ \subsection{Four Mysterious Datasets (Anscombe's
 \chapter{Constant Model, Loss, and
 Transformations}\label{constant-model-loss-and-transformations}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, rightrule=.15mm, toptitle=1mm, bottomrule=.15mm, breakable, opacityback=0, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, colback=white, leftrule=.75mm, bottomtitle=1mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, arc=.35mm, opacitybacktitle=0.6, coltitle=black, toprule=.15mm]
+\begin{tcolorbox}[enhanced jigsaw, coltitle=black, leftrule=.75mm, left=2mm, bottomrule=.15mm, bottomtitle=1mm, opacityback=0, breakable, arc=.35mm, opacitybacktitle=0.6, toptitle=1mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, rightrule=.15mm, colframe=quarto-callout-note-color-frame, toprule=.15mm, colback=white]
 
 \begin{itemize}
 \tightlist
@@ -13431,7 +13431,7 @@ \section{Bonus: Calculating Constant Model MSE Using an Algebraic
 
 \chapter{Ordinary Least Squares}\label{ordinary-least-squares}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, rightrule=.15mm, toptitle=1mm, bottomrule=.15mm, breakable, opacityback=0, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, colback=white, leftrule=.75mm, bottomtitle=1mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, arc=.35mm, opacitybacktitle=0.6, coltitle=black, toprule=.15mm]
+\begin{tcolorbox}[enhanced jigsaw, coltitle=black, leftrule=.75mm, left=2mm, bottomrule=.15mm, bottomtitle=1mm, opacityback=0, breakable, arc=.35mm, opacitybacktitle=0.6, toptitle=1mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, rightrule=.15mm, colframe=quarto-callout-note-color-frame, toprule=.15mm, colback=white]
 
 \begin{itemize}
 \tightlist
@@ -13605,7 +13605,7 @@ \subsection{Multiple Linear
 
 \subsection{Linear Algebra Approach}\label{linear-algebra-approach}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-tip-color-frame, rightrule=.15mm, toptitle=1mm, bottomrule=.15mm, breakable, opacityback=0, colbacktitle=quarto-callout-tip-color!10!white, titlerule=0mm, colback=white, leftrule=.75mm, bottomtitle=1mm, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra Review: Vector Dot Product}, left=2mm, arc=.35mm, opacitybacktitle=0.6, coltitle=black, toprule=.15mm]
+\begin{tcolorbox}[enhanced jigsaw, coltitle=black, leftrule=.75mm, left=2mm, bottomrule=.15mm, bottomtitle=1mm, opacityback=0, breakable, arc=.35mm, opacitybacktitle=0.6, toptitle=1mm, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra Review: Vector Dot Product}, colbacktitle=quarto-callout-tip-color!10!white, titlerule=0mm, rightrule=.15mm, colframe=quarto-callout-tip-color-frame, toprule=.15mm, colback=white]
 
 The \textbf{dot product (or inner product)} is a vector operation that:
 
@@ -13703,7 +13703,7 @@ \subsection{Linear Algebra Approach}\label{linear-algebra-approach}
 \(\mathbb{Y}\) is also a vector with \(n\) elements
 (\(\mathbb{Y} \in \mathbb{R}^{n}\)).
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-tip-color-frame, rightrule=.15mm, toptitle=1mm, bottomrule=.15mm, breakable, opacityback=0, colbacktitle=quarto-callout-tip-color!10!white, titlerule=0mm, colback=white, leftrule=.75mm, bottomtitle=1mm, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra Review: Linearity}, left=2mm, arc=.35mm, opacitybacktitle=0.6, coltitle=black, toprule=.15mm]
+\begin{tcolorbox}[enhanced jigsaw, coltitle=black, leftrule=.75mm, left=2mm, bottomrule=.15mm, bottomtitle=1mm, opacityback=0, breakable, arc=.35mm, opacitybacktitle=0.6, toptitle=1mm, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra Review: Linearity}, colbacktitle=quarto-callout-tip-color!10!white, titlerule=0mm, rightrule=.15mm, colframe=quarto-callout-tip-color-frame, toprule=.15mm, colback=white]
 
 An expression is \textbf{linear in \(\theta\)} (a set of parameters) if
 it is a linear combination of the elements of the set. Checking if an
@@ -13747,7 +13747,7 @@ \subsection{Mean Squared Error}\label{mean-squared-error}
 indication of how ``far away'' the predictions are from the true values,
 on average.
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-tip-color-frame, rightrule=.15mm, toptitle=1mm, bottomrule=.15mm, breakable, opacityback=0, colbacktitle=quarto-callout-tip-color!10!white, titlerule=0mm, colback=white, leftrule=.75mm, bottomtitle=1mm, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra: L2 Norm}, left=2mm, arc=.35mm, opacitybacktitle=0.6, coltitle=black, toprule=.15mm]
+\begin{tcolorbox}[enhanced jigsaw, coltitle=black, leftrule=.75mm, left=2mm, bottomrule=.15mm, bottomtitle=1mm, opacityback=0, breakable, arc=.35mm, opacitybacktitle=0.6, toptitle=1mm, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra: L2 Norm}, colbacktitle=quarto-callout-tip-color!10!white, titlerule=0mm, rightrule=.15mm, colframe=quarto-callout-tip-color-frame, toprule=.15mm, colback=white]
 
 When working with vectors, this idea of ``distance'' or the vector's
 \textbf{size/length} is represented by the \textbf{norm}. More
@@ -13893,7 +13893,7 @@ \subsection{A Note on Terminology for Multiple Linear
 
 \section{Geometric Derivation}\label{geometric-derivation}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-tip-color-frame, rightrule=.15mm, toptitle=1mm, bottomrule=.15mm, breakable, opacityback=0, colbacktitle=quarto-callout-tip-color!10!white, titlerule=0mm, colback=white, leftrule=.75mm, bottomtitle=1mm, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra: Span}, left=2mm, arc=.35mm, opacitybacktitle=0.6, coltitle=black, toprule=.15mm]
+\begin{tcolorbox}[enhanced jigsaw, coltitle=black, leftrule=.75mm, left=2mm, bottomrule=.15mm, bottomtitle=1mm, opacityback=0, breakable, arc=.35mm, opacitybacktitle=0.6, toptitle=1mm, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra: Span}, colbacktitle=quarto-callout-tip-color!10!white, titlerule=0mm, rightrule=.15mm, colframe=quarto-callout-tip-color-frame, toprule=.15mm, colback=white]
 
 Recall that the \textbf{span} or \textbf{column space} of a matrix
 \(\mathbb{X}\) (denoted \(span(\mathbb{X})\)) is the set of all possible
@@ -13905,7 +13905,7 @@ \section{Geometric Derivation}\label{geometric-derivation}
 
 \end{tcolorbox}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-tip-color-frame, rightrule=.15mm, toptitle=1mm, bottomrule=.15mm, breakable, opacityback=0, colbacktitle=quarto-callout-tip-color!10!white, titlerule=0mm, colback=white, leftrule=.75mm, bottomtitle=1mm, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra: Matrix-Vector Multiplication}, left=2mm, arc=.35mm, opacitybacktitle=0.6, coltitle=black, toprule=.15mm]
+\begin{tcolorbox}[enhanced jigsaw, coltitle=black, leftrule=.75mm, left=2mm, bottomrule=.15mm, bottomtitle=1mm, opacityback=0, breakable, arc=.35mm, opacitybacktitle=0.6, toptitle=1mm, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra: Matrix-Vector Multiplication}, colbacktitle=quarto-callout-tip-color!10!white, titlerule=0mm, rightrule=.15mm, colframe=quarto-callout-tip-color-frame, toprule=.15mm, colback=white]
 
 There are 2 ways we can think about matrix-vector multiplication
 
@@ -13994,7 +13994,7 @@ \section{Geometric Derivation}\label{geometric-derivation}
 visualize this as the vector created by dropping a perpendicular line
 from \(\mathbb{Y}\) onto the span of \(\mathbb{X}\).
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-tip-color-frame, rightrule=.15mm, toptitle=1mm, bottomrule=.15mm, breakable, opacityback=0, colbacktitle=quarto-callout-tip-color!10!white, titlerule=0mm, colback=white, leftrule=.75mm, bottomtitle=1mm, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra: Orthogonality}, left=2mm, arc=.35mm, opacitybacktitle=0.6, coltitle=black, toprule=.15mm]
+\begin{tcolorbox}[enhanced jigsaw, coltitle=black, leftrule=.75mm, left=2mm, bottomrule=.15mm, bottomtitle=1mm, opacityback=0, breakable, arc=.35mm, opacitybacktitle=0.6, toptitle=1mm, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra: Orthogonality}, colbacktitle=quarto-callout-tip-color!10!white, titlerule=0mm, rightrule=.15mm, colframe=quarto-callout-tip-color-frame, toprule=.15mm, colback=white]
 
 Recall that two vectors \(\vec{a}\) and \(\vec{b}\) are orthogonal if
 their dot product is zero: \(\vec{a}^{T}\vec{b} = 0\).
@@ -14109,7 +14109,7 @@ \subsection{Residuals}\label{residuals}
 
 \[\mathbb{X}^Te = 0 \]
 
-\begin{tcolorbox}[enhanced jigsaw, breakable, opacityback=0, rightrule=.15mm, colback=white, leftrule=.75mm, left=2mm, colframe=quarto-callout-color-frame, bottomrule=.15mm, arc=.35mm, toprule=.15mm]
+\begin{tcolorbox}[enhanced jigsaw, arc=.35mm, leftrule=.75mm, left=2mm, bottomrule=.15mm, rightrule=.15mm, colframe=quarto-callout-color-frame, opacityback=0, breakable, toprule=.15mm, colback=white]
 
 Proof:
 
@@ -14143,7 +14143,7 @@ \subsection{The Bias/Intercept Term}\label{the-biasintercept-term}
 
 \[\sum_i^n e_i = 0\]
 
-\begin{tcolorbox}[enhanced jigsaw, breakable, opacityback=0, rightrule=.15mm, colback=white, leftrule=.75mm, left=2mm, colframe=quarto-callout-color-frame, bottomrule=.15mm, arc=.35mm, toprule=.15mm]
+\begin{tcolorbox}[enhanced jigsaw, arc=.35mm, leftrule=.75mm, left=2mm, bottomrule=.15mm, rightrule=.15mm, colframe=quarto-callout-color-frame, opacityback=0, breakable, toprule=.15mm, colback=white]
 
 Proof:
 
@@ -14221,7 +14221,7 @@ \subsection{Uniqueness of the OLS
 The Least Squares estimate \(\hat{\theta}\) is \textbf{unique} if and
 only if \(\mathbb{X}\) is \textbf{full column rank}.
 
-\begin{tcolorbox}[enhanced jigsaw, breakable, opacityback=0, rightrule=.15mm, colback=white, leftrule=.75mm, left=2mm, colframe=quarto-callout-color-frame, bottomrule=.15mm, arc=.35mm, toprule=.15mm]
+\begin{tcolorbox}[enhanced jigsaw, arc=.35mm, leftrule=.75mm, left=2mm, bottomrule=.15mm, rightrule=.15mm, colframe=quarto-callout-color-frame, opacityback=0, breakable, toprule=.15mm, colback=white]
 
 Proof:
 
@@ -14310,7 +14310,7 @@ \subsection{Uniqueness of the OLS
 \chapter{sklearn and Gradient
 Descent}\label{sklearn-and-gradient-descent}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, rightrule=.15mm, toptitle=1mm, bottomrule=.15mm, breakable, opacityback=0, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, colback=white, leftrule=.75mm, bottomtitle=1mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, arc=.35mm, opacitybacktitle=0.6, coltitle=black, toprule=.15mm]
+\begin{tcolorbox}[enhanced jigsaw, coltitle=black, leftrule=.75mm, left=2mm, bottomrule=.15mm, bottomtitle=1mm, opacityback=0, breakable, arc=.35mm, opacitybacktitle=0.6, toptitle=1mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, rightrule=.15mm, colframe=quarto-callout-note-color-frame, toprule=.15mm, colback=white]
 
 \begin{itemize}
 \tightlist
@@ -14339,6 +14339,179 @@ \chapter{sklearn and Gradient
 \end{Highlighting}
 \end{Shaded}
 
+\section{OLS Recap}\label{ols-recap}
+
+\subsection{1. Choose a model}\label{choose-a-model}
+
+Recall that when using multiple linear regression, we can generate a
+prediction for each of our \(n\) data points:
+
+\[\hat{y} =\theta_{0} + \theta_{1}x_{1} + \theta_{2}x_{2} + ... + \theta_{p}x_{p}\]
+
+In the previous lecture, we used p+1 features to account for the
+intercept, \(\theta_0\). This makes slides and notation messy.\\
+Let's redefine \textbf{p as the number of columns in our covariate
+matrix} and \textbf{add a column of 1s} to encode the intercept (if
+desired). If we choose to add a column of 1s, then \(x_1\) can be a 1
+for every data point.
+
+\[\hat{y} =\theta_{1}x_{1} + \theta_{2}x_{2} + ... + \theta_{p}x_{p}\]
+
+\subsection{2. Choose a loss function}\label{choose-a-loss-function}
+
+Recall that we then choose the mean squared error loss function shown
+below where the prediction vector \(\hat{\mathbb{Y}}\) depends on
+\(\theta\).
+\[R(\theta) = \frac{1}{n} \sum_{i=1}^n (y_i - \hat{y}_i)^2 = \frac{1}{n} (||\mathbb{Y} - \hat{\mathbb{Y}}||_2)^2\]
+
+\subsection{3. Fit the model}\label{fit-the-model}
+
+We can then minimize the average loss with calculus or geometry. See the
+previous lecture for a derivation on the Normal Equation
+(\(\mathbb{X}^T \mathbb{X} \hat{\theta} = \mathbb{X}^T \mathbb{Y}\))
+using geometry. We can see what the matrices look like with our new
+interpretation where \(\mathbb{X}\) is now an \(n\) by \(p\) matrix
+instead of an \(n\) by \(p+1\) matrix.
+
+To summarize:
+
+\begin{longtable}[]{@{}
+  >{\raggedright\arraybackslash}p{(\columnwidth - 6\tabcolsep) * \real{0.2500}}
+  >{\raggedright\arraybackslash}p{(\columnwidth - 6\tabcolsep) * \real{0.2500}}
+  >{\raggedright\arraybackslash}p{(\columnwidth - 6\tabcolsep) * \real{0.2500}}
+  >{\raggedright\arraybackslash}p{(\columnwidth - 6\tabcolsep) * \real{0.2500}}@{}}
+\toprule\noalign{}
+\begin{minipage}[b]{\linewidth}\raggedright
+\end{minipage} & \begin{minipage}[b]{\linewidth}\raggedright
+Model
+\end{minipage} & \begin{minipage}[b]{\linewidth}\raggedright
+Estimate
+\end{minipage} & \begin{minipage}[b]{\linewidth}\raggedright
+Unique?
+\end{minipage} \\
+\midrule\noalign{}
+\endhead
+\bottomrule\noalign{}
+\endlastfoot
+Constant Model + MSE & \(\hat{y} = \theta_0\) &
+\(\hat{\theta}_0 = mean(y) = \bar{y}\) & \textbf{Yes}. Any set of values
+has a unique mean. \\
+Constant Model + MAE & \(\hat{y} = \theta_0\) &
+\(\hat{\theta}_0 = median(y)\) & \textbf{Yes}, if odd. \textbf{No}, if
+even. Return the average of the middle 2 values. \\
+Simple Linear Regression + MSE & \(\hat{y} = \theta_0 + \theta_1x\) &
+\(\hat{\theta}_0 = \bar{y} - \hat{\theta}_1\bar{x}\)
+\(\hat{\theta}_1 = r\frac{\sigma_y}{\sigma_x}\) & \textbf{Yes}. Any set
+of non-constant* values has a unique mean, SD, and correlation
+coefficient. \\
+\textbf{OLS} (Linear Model + MSE) &
+\(\mathbb{\hat{Y}} = \mathbb{X}\mathbb{\theta}\) &
+\(\hat{\theta} = (\mathbb{X}^T\mathbb{X})^{-1}\mathbb{X}^T\mathbb{Y}\) &
+\textbf{Yes}, if \(\mathbb{X}\) is full column rank (all columns are
+linearly independent, \# of datapoints
+\textgreater\textgreater\textgreater{} \# of features). \\
+\end{longtable}
+
+\subsubsection{Uniqueness of a Solution}\label{uniqueness-of-a-solution}
+
+In most settings, the number of observations (\(n\)) is much greater
+than the number of features (\(p\)). Note that at least one solution
+always exists because intuitively, we can always draw a line of best fit
+for a given set of data, but there may be multiple lines that are
+``equally good''. (Formal proof is beyond this course.) Let's now
+revisit the interpretation for uniqueness of a solution at the end of
+the last lecture, but with the new notation of \(p\) instead of \(p+1\)
+features.
+
+The Least Squares estimate \(\hat{\theta}\) is \textbf{unique} if and
+only if \(\mathbb{X}\) is \textbf{full column rank}.
+
+\begin{tcolorbox}[enhanced jigsaw, arc=.35mm, leftrule=.75mm, left=2mm, bottomrule=.15mm, rightrule=.15mm, colframe=quarto-callout-color-frame, opacityback=0, breakable, toprule=.15mm, colback=white]
+
+Proof:
+
+\begin{itemize}
+\tightlist
+\item
+  We know the solution to the normal equation
+  \(\mathbb{X}^T\mathbb{X}\hat{\theta} = \mathbb{X}^T\mathbb{Y}\) is the
+  least square estimate that minimizes the squared loss.
+\item
+  \(\hat{\theta}\) has a \textbf{unique} solution \(\iff\) the square
+  matrix \(\mathbb{X}^T\mathbb{X}\) is \textbf{invertible} \(\iff\)
+  \(\mathbb{X}^T\mathbb{X}\) is full rank.
+
+  \begin{itemize}
+  \tightlist
+  \item
+    The \textbf{column rank} of a square matrix is the max number of
+    linearly independent columns it contains.
+  \item
+    An \(n\) x \(n\) square matrix is deemed full column rank when all
+    of its columns are linearly independent. That is, its rank would be
+    equal to \(n\).
+  \item
+    \(\mathbb{X}^T\mathbb{X}\) has shape \(p \times p\), and therefore
+    has max rank \(p\).
+  \end{itemize}
+\item
+  \(rank(\mathbb{X}^T\mathbb{X})\) = \(rank(\mathbb{X})\) (proof out of
+  scope).
+\item
+  Therefore, \(\mathbb{X}^T\mathbb{X}\) has rank \(p\) \(\iff\)
+  \(\mathbb{X}\) has rank \(p\) \(\iff \mathbb{X}\) is full column rank.
+\end{itemize}
+
+\end{tcolorbox}
+
+Therefore, if \(\mathbb{X}\) is not full column rank, we will not have
+unique estimates. This can happen for two major reasons.
+
+\begin{enumerate}
+\def\labelenumi{\arabic{enumi}.}
+\tightlist
+\item
+  If our design matrix \(\mathbb{X}\) is ``\textbf{wide}'':
+
+  \begin{itemize}
+  \tightlist
+  \item
+    If n \textless{} p, then we have way more features (columns) than
+    observations (rows).
+  \item
+    Then \(rank(\mathbb{X})\) = min(n, p) \textless{} p, so
+    \(\hat{\theta}\) is not unique.
+  \item
+    Typically we have n \textgreater\textgreater{} p so this is less of
+    an issue.
+  \end{itemize}
+\item
+  If our design matrix \(\mathbb{X}\) has features that are
+  \textbf{linear combinations} of other features:
+
+  \begin{itemize}
+  \tightlist
+  \item
+    By definition, rank of \(\mathbb{X}\) is number of linearly
+    independent columns in \(\mathbb{X}\).
+  \item
+    Example: If ``Width'', ``Height'', and ``Perimeter'' are all
+    columns,
+
+    \begin{itemize}
+    \tightlist
+    \item
+      Perimeter = 2 * Width + 2 * Height \(\rightarrow\) \(\mathbb{X}\)
+      is not full rank.
+    \end{itemize}
+  \item
+    Important with one-hot encoding (to discuss later).
+  \end{itemize}
+\end{enumerate}
+
+Let's now explore how to use the normal equations with a real-world
+dataset in the next section.
+
 \section{\texorpdfstring{\texttt{sklearn}}{sklearn}}\label{sklearn}
 
 \subsection{Implementing Derived Formulas in
@@ -15210,7 +15383,7 @@ \subsubsection{\texorpdfstring{Gradient Descent on the \texttt{tips}
 
 \chapter{Feature Engineering}\label{feature-engineering}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, rightrule=.15mm, toptitle=1mm, bottomrule=.15mm, breakable, opacityback=0, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, colback=white, leftrule=.75mm, bottomtitle=1mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, arc=.35mm, opacitybacktitle=0.6, coltitle=black, toprule=.15mm]
+\begin{tcolorbox}[enhanced jigsaw, coltitle=black, leftrule=.75mm, left=2mm, bottomrule=.15mm, bottomtitle=1mm, opacityback=0, breakable, arc=.35mm, opacitybacktitle=0.6, toptitle=1mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, rightrule=.15mm, colframe=quarto-callout-note-color-frame, toprule=.15mm, colback=white]
 
 \begin{itemize}
 \tightlist
@@ -15358,7 +15531,7 @@ \subsubsection{The Gradient Vector}\label{the-gradient-vector}
 On a 2D (or higher) surface, the best way to go down (gradient) is
 described by a \emph{vector}.
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-tip-color-frame, rightrule=.15mm, toptitle=1mm, bottomrule=.15mm, breakable, opacityback=0, colbacktitle=quarto-callout-tip-color!10!white, titlerule=0mm, colback=white, leftrule=.75mm, bottomtitle=1mm, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Math Aside: Partial Derivatives}, left=2mm, arc=.35mm, opacitybacktitle=0.6, coltitle=black, toprule=.15mm]
+\begin{tcolorbox}[enhanced jigsaw, coltitle=black, leftrule=.75mm, left=2mm, bottomrule=.15mm, bottomtitle=1mm, opacityback=0, breakable, arc=.35mm, opacitybacktitle=0.6, toptitle=1mm, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Math Aside: Partial Derivatives}, colbacktitle=quarto-callout-tip-color!10!white, titlerule=0mm, rightrule=.15mm, colframe=quarto-callout-tip-color-frame, toprule=.15mm, colback=white]
 
 \begin{itemize}
 \tightlist
@@ -15929,7 +16102,7 @@ \section{Complexity and Overfitting}\label{complexity-and-overfitting}
 we can improve model performance by designing increasingly complex
 models.
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-tip-color-frame, rightrule=.15mm, toptitle=1mm, bottomrule=.15mm, breakable, opacityback=0, colbacktitle=quarto-callout-tip-color!10!white, titlerule=0mm, colback=white, leftrule=.75mm, bottomtitle=1mm, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Math Fact: Polynomial Degrees}, left=2mm, arc=.35mm, opacitybacktitle=0.6, coltitle=black, toprule=.15mm]
+\begin{tcolorbox}[enhanced jigsaw, coltitle=black, leftrule=.75mm, left=2mm, bottomrule=.15mm, bottomtitle=1mm, opacityback=0, breakable, arc=.35mm, opacitybacktitle=0.6, toptitle=1mm, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Math Fact: Polynomial Degrees}, colbacktitle=quarto-callout-tip-color!10!white, titlerule=0mm, rightrule=.15mm, colframe=quarto-callout-tip-color-frame, toprule=.15mm, colback=white]
 
 Given \(N\) overlapping data points, we can always find a polynomial of
 degree \(N-1\) that goes through all those points.
@@ -16058,7 +16231,7 @@ \chapter{Case Study in Human Contexts and
 understand the material. The course notes will have the same broader
 structure but are by no means comprehensive.
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, rightrule=.15mm, toptitle=1mm, bottomrule=.15mm, breakable, opacityback=0, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, colback=white, leftrule=.75mm, bottomtitle=1mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, arc=.35mm, opacitybacktitle=0.6, coltitle=black, toprule=.15mm]
+\begin{tcolorbox}[enhanced jigsaw, coltitle=black, leftrule=.75mm, left=2mm, bottomrule=.15mm, bottomtitle=1mm, opacityback=0, breakable, arc=.35mm, opacitybacktitle=0.6, toptitle=1mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, rightrule=.15mm, colframe=quarto-callout-note-color-frame, toprule=.15mm, colback=white]
 
 \begin{itemize}
 \tightlist
@@ -16277,7 +16450,7 @@ \section{The Response: Cook County Open Data
 \subsection{1. Question/Problem
 Formulation}\label{questionproblem-formulation}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, rightrule=.15mm, toptitle=1mm, bottomrule=.15mm, breakable, opacityback=0, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, colback=white, leftrule=.75mm, bottomtitle=1mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Driving Questions}, left=2mm, arc=.35mm, opacitybacktitle=0.6, coltitle=black, toprule=.15mm]
+\begin{tcolorbox}[enhanced jigsaw, coltitle=black, leftrule=.75mm, left=2mm, bottomrule=.15mm, bottomtitle=1mm, opacityback=0, breakable, arc=.35mm, opacitybacktitle=0.6, toptitle=1mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Driving Questions}, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, rightrule=.15mm, colframe=quarto-callout-note-color-frame, toprule=.15mm, colback=white]
 
 \begin{itemize}
 \tightlist
@@ -16317,7 +16490,7 @@ \subsection{1. Question/Problem
 can determine some metrics of success and frame a social problem as a
 data science problem.
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-tip-color-frame, rightrule=.15mm, toptitle=1mm, bottomrule=.15mm, breakable, opacityback=0, colbacktitle=quarto-callout-tip-color!10!white, titlerule=0mm, colback=white, leftrule=.75mm, bottomtitle=1mm, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Definitions: Fairness and Transparency}, left=2mm, arc=.35mm, opacitybacktitle=0.6, coltitle=black, toprule=.15mm]
+\begin{tcolorbox}[enhanced jigsaw, coltitle=black, leftrule=.75mm, left=2mm, bottomrule=.15mm, bottomtitle=1mm, opacityback=0, breakable, arc=.35mm, opacitybacktitle=0.6, toptitle=1mm, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Definitions: Fairness and Transparency}, colbacktitle=quarto-callout-tip-color!10!white, titlerule=0mm, rightrule=.15mm, colframe=quarto-callout-tip-color-frame, toprule=.15mm, colback=white]
 
 The definitions, as given by the Cook County Assessor's Office, are
 given below:
@@ -16409,7 +16582,7 @@ \subsection{1. Question/Problem
 \subsection{2. Data Acquisition and
 Cleaning}\label{data-acquisition-and-cleaning}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, rightrule=.15mm, toptitle=1mm, bottomrule=.15mm, breakable, opacityback=0, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, colback=white, leftrule=.75mm, bottomtitle=1mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Driving Questions}, left=2mm, arc=.35mm, opacitybacktitle=0.6, coltitle=black, toprule=.15mm]
+\begin{tcolorbox}[enhanced jigsaw, coltitle=black, leftrule=.75mm, left=2mm, bottomrule=.15mm, bottomtitle=1mm, opacityback=0, breakable, arc=.35mm, opacitybacktitle=0.6, toptitle=1mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Driving Questions}, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, rightrule=.15mm, colframe=quarto-callout-note-color-frame, toprule=.15mm, colback=white]
 
 \begin{itemize}
 \tightlist
@@ -16485,7 +16658,7 @@ \subsection{2. Data Acquisition and
 \subsection{3. Exploratory Data
 Analysis}\label{exploratory-data-analysis}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, rightrule=.15mm, toptitle=1mm, bottomrule=.15mm, breakable, opacityback=0, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, colback=white, leftrule=.75mm, bottomtitle=1mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Driving Questions}, left=2mm, arc=.35mm, opacitybacktitle=0.6, coltitle=black, toprule=.15mm]
+\begin{tcolorbox}[enhanced jigsaw, coltitle=black, leftrule=.75mm, left=2mm, bottomrule=.15mm, bottomtitle=1mm, opacityback=0, breakable, arc=.35mm, opacitybacktitle=0.6, toptitle=1mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Driving Questions}, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, rightrule=.15mm, colframe=quarto-callout-note-color-frame, toprule=.15mm, colback=white]
 
 \begin{itemize}
 \tightlist
@@ -16529,7 +16702,7 @@ \subsection{3. Exploratory Data
 
 \subsection{4. Prediction and Inference}\label{prediction-and-inference}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, rightrule=.15mm, toptitle=1mm, bottomrule=.15mm, breakable, opacityback=0, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, colback=white, leftrule=.75mm, bottomtitle=1mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Driving Questions}, left=2mm, arc=.35mm, opacitybacktitle=0.6, coltitle=black, toprule=.15mm]
+\begin{tcolorbox}[enhanced jigsaw, coltitle=black, leftrule=.75mm, left=2mm, bottomrule=.15mm, bottomtitle=1mm, opacityback=0, breakable, arc=.35mm, opacitybacktitle=0.6, toptitle=1mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Driving Questions}, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, rightrule=.15mm, colframe=quarto-callout-note-color-frame, toprule=.15mm, colback=white]
 
 \begin{itemize}
 \tightlist
@@ -16590,7 +16763,7 @@ \subsection{4. Prediction and Inference}\label{prediction-and-inference}
 
 \subsection{5. Results and Conclusions}\label{results-and-conclusions}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, rightrule=.15mm, toptitle=1mm, bottomrule=.15mm, breakable, opacityback=0, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, colback=white, leftrule=.75mm, bottomtitle=1mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Driving Questions}, left=2mm, arc=.35mm, opacitybacktitle=0.6, coltitle=black, toprule=.15mm]
+\begin{tcolorbox}[enhanced jigsaw, coltitle=black, leftrule=.75mm, left=2mm, bottomrule=.15mm, bottomtitle=1mm, opacityback=0, breakable, arc=.35mm, opacitybacktitle=0.6, toptitle=1mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Driving Questions}, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, rightrule=.15mm, colframe=quarto-callout-note-color-frame, toprule=.15mm, colback=white]
 
 \begin{itemize}
 \tightlist