diff --git a/_quarto.yml b/_quarto.yml
index bcc0d7a11..e9340bc24 100644
--- a/_quarto.yml
+++ b/_quarto.yml
@@ -28,7 +28,7 @@ book:
         - intro_to_modeling/intro_to_modeling.qmd
         - constant_model_loss_transformations/loss_transformations.qmd
         - ols/ols.qmd
-        # - gradient_descent/gradient_descent.qmd
+        - gradient_descent/gradient_descent.qmd
         # - feature_engineering/feature_engineering.qmd
         # - case_study_HCE/case_study_HCE.qmd
         # - cv_regularization/cv_reg.qmd
diff --git a/docs/constant_model_loss_transformations/loss_transformations.html b/docs/constant_model_loss_transformations/loss_transformations.html
index 07cc2058c..1fe95e7ab 100644
--- a/docs/constant_model_loss_transformations/loss_transformations.html
+++ b/docs/constant_model_loss_transformations/loss_transformations.html
@@ -252,6 +252,12 @@
   <a href="../ols/ols.html" class="sidebar-item-text sidebar-link">
  <span class="menu-text"><span class="chapter-number">12</span>&nbsp; <span class="chapter-title">Ordinary Least Squares</span></span></a>
   </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../gradient_descent/gradient_descent.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">13</span>&nbsp; <span class="chapter-title">sklearn and Gradient Descent</span></span></a>
+  </div>
 </li>
     </ul>
     </div>
@@ -459,7 +465,7 @@ <h3 data-number="11.1.2" class="anchored" data-anchor-id="comparing-two-differen
 </table>
 <p>(Notice how the points for our SLR scatter plot are visually not a great linear fit. We’ll come back to this).</p>
 <p>The code for generating the graphs and models is included below, but we won’t go over it in too much depth.</p>
-<div id="9a522990" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="1">
+<div id="97299d35" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="1">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
@@ -474,7 +480,7 @@ <h3 data-number="11.1.2" class="anchored" data-anchor-id="comparing-two-differen
 <span id="cb1-10"><a href="#cb1-10" aria-hidden="true" tabindex="-1"></a>data_linear <span class="op">=</span> dugongs[[<span class="st">"Length"</span>, <span class="st">"Age"</span>]]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 </div>
-<div id="f929aeac" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="2">
+<div id="8b9598a0" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="2">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Big font helper</span></span>
@@ -496,7 +502,7 @@ <h3 data-number="11.1.2" class="anchored" data-anchor-id="comparing-two-differen
 <span id="cb2-17"><a href="#cb2-17" aria-hidden="true" tabindex="-1"></a>plt.style.use(<span class="st">"default"</span>)  <span class="co"># Revert style to default mpl</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 </div>
-<div id="31bb6a7d" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="3">
+<div id="6bcb1901" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="3">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Constant Model + MSE</span></span>
@@ -529,7 +535,7 @@ <h3 data-number="11.1.2" class="anchored" data-anchor-id="comparing-two-differen
 </div>
 </div>
 </div>
-<div id="6cad0dff" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="4">
+<div id="9d4e1ca0" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="4">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="co"># SLR + MSE</span></span>
@@ -592,7 +598,7 @@ <h3 data-number="11.1.2" class="anchored" data-anchor-id="comparing-two-differen
 </div>
 </div>
 </div>
-<div id="11396b57" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="5">
+<div id="04d156e9" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="5">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Predictions</span></span>
@@ -604,7 +610,7 @@ <h3 data-number="11.1.2" class="anchored" data-anchor-id="comparing-two-differen
 <span id="cb5-7"><a href="#cb5-7" aria-hidden="true" tabindex="-1"></a>yhats_linear <span class="op">=</span> [theta_0_hat <span class="op">+</span> theta_1_hat <span class="op">*</span> x <span class="cf">for</span> x <span class="kw">in</span> xs]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 </div>
-<div id="acb7b3cd" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="6">
+<div id="4f81aa54" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="6">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Constant Model Rug Plot</span></span>
@@ -634,7 +640,7 @@ <h3 data-number="11.1.2" class="anchored" data-anchor-id="comparing-two-differen
 </div>
 </div>
 </div>
-<div id="bc0ce242" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="7">
+<div id="da116ef7" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="7">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb8"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a><span class="co"># SLR model scatter plot </span></span>
@@ -748,7 +754,7 @@ <h2 data-number="11.3" class="anchored" data-anchor-id="summary-loss-optimizatio
 <h2 data-number="11.4" class="anchored" data-anchor-id="comparing-loss-functions"><span class="header-section-number">11.4</span> Comparing Loss Functions</h2>
 <p>We’ve now tried our hand at fitting a model under both MSE and MAE cost functions. How do the two results compare?</p>
 <p>Let’s consider a dataset where each entry represents the number of drinks sold at a bubble tea store each day. We’ll fit a constant model to predict the number of drinks that will be sold tomorrow.</p>
-<div id="e3fbed35" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="8">
+<div id="ec7fb9fc" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="8">
 <div class="sourceCode cell-code" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a>drinks <span class="op">=</span> np.array([<span class="dv">20</span>, <span class="dv">21</span>, <span class="dv">22</span>, <span class="dv">29</span>, <span class="dv">33</span>])</span>
 <span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a>drinks</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="8">
@@ -756,7 +762,7 @@ <h2 data-number="11.4" class="anchored" data-anchor-id="comparing-loss-functions
 </div>
 </div>
 <p>From our derivations above, we know that the optimal model parameter under MSE cost is the mean of the dataset. Under MAE cost, the optimal parameter is the median of the dataset.</p>
-<div id="ab635d39" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="9">
+<div id="b6ae5841" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="9">
 <div class="sourceCode cell-code" id="cb11"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a>np.mean(drinks), np.median(drinks)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="9">
 <pre><code>(np.float64(25.0), np.float64(22.0))</code></pre>
@@ -766,7 +772,7 @@ <h2 data-number="11.4" class="anchored" data-anchor-id="comparing-loss-functions
 <p><img src="images/error.png" alt="error" width="600"></p>
 <p>Notice that the MSE above is a <strong>smooth</strong> function – it is differentiable at all points, making it easy to minimize using numerical methods. The MAE, in contrast, is not differentiable at each of its “kinks.” We’ll explore how the smoothness of the cost function can impact our ability to apply numerical optimization in a few weeks.</p>
 <p>How do outliers affect each cost function? Imagine we replace the largest value in the dataset with 1000. The mean of the data increases substantially, while the median is nearly unaffected.</p>
-<div id="933d07b8" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="10">
+<div id="38437c0a" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="10">
 <div class="sourceCode cell-code" id="cb13"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a>drinks_with_outlier <span class="op">=</span> np.append(drinks, <span class="dv">1033</span>)</span>
 <span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a>display(drinks_with_outlier)</span>
 <span id="cb13-3"><a href="#cb13-3" aria-hidden="true" tabindex="-1"></a>np.mean(drinks_with_outlier), np.median(drinks_with_outlier)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -780,7 +786,7 @@ <h2 data-number="11.4" class="anchored" data-anchor-id="comparing-loss-functions
 <p><img src="images/outliers.png" alt="outliers" width="700"></p>
 <p>This means that under the MSE, the optimal model parameter <span class="math inline">\(\hat{\theta}\)</span> is strongly affected by the presence of outliers. Under the MAE, the optimal parameter is not as influenced by outlying data. We can generalize this by saying that the MSE is <strong>sensitive</strong> to outliers, while the MAE is <strong>robust</strong> to outliers.</p>
 <p>Let’s try another experiment. This time, we’ll add an additional, non-outlying datapoint to the data.</p>
-<div id="b896c216" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="11">
+<div id="030d4e6e" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="11">
 <div class="sourceCode cell-code" id="cb16"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a>drinks_with_additional_observation <span class="op">=</span> np.append(drinks, <span class="dv">35</span>)</span>
 <span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a>drinks_with_additional_observation</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="11">
@@ -852,7 +858,7 @@ <h2 data-number="11.5" class="anchored" data-anchor-id="transformations-to-fit-l
 </ul>
 <p>Other goals in addition to linearity are possible, for example, making data appear more symmetric. Linearity allows us to fit lines to the transformed data.</p>
 <p>Let’s revisit our dugongs example. The lengths and ages are plotted below:</p>
-<div id="22df8f8f" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="12">
+<div id="7c035544" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="12">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb18"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a><span class="co"># `corrcoef` computes the correlation coefficient between two variables</span></span>
@@ -884,7 +890,7 @@ <h2 data-number="11.5" class="anchored" data-anchor-id="transformations-to-fit-l
 <p>Looking at the plot on the left, we see that there is a slight curvature to the data points. Plotting the SLR curve on the right results in a poor fit.</p>
 <p>For SLR to perform well, we’d like there to be a rough linear trend relating <code>"Age"</code> and <code>"Length"</code>. What is making the raw data deviate from a linear relationship? Notice that the data points with <code>"Length"</code> greater than 2.6 have disproportionately high values of <code>"Age"</code> relative to the rest of the data. If we could manipulate these data points to have lower <code>"Age"</code> values, we’d “shift” these points downwards and reduce the curvature in the data. Applying a logarithmic transformation to <span class="math inline">\(y_i\)</span> (that is, taking <span class="math inline">\(\log(\)</span> <code>"Age"</code> <span class="math inline">\()\)</span> ) would achieve just that.</p>
 <p>An important word on <span class="math inline">\(\log\)</span>: in Data 100 (and most upper-division STEM courses), <span class="math inline">\(\log\)</span> denotes the natural logarithm with base <span class="math inline">\(e\)</span>. The base-10 logarithm, where relevant, is indicated by <span class="math inline">\(\log_{10}\)</span>.</p>
-<div id="ad6597b6" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="13">
+<div id="ec6f5d36" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="13">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb19"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a>z <span class="op">=</span> np.log(y)</span>
@@ -919,7 +925,7 @@ <h2 data-number="11.5" class="anchored" data-anchor-id="transformations-to-fit-l
 <p><span class="math display">\[\log{(y)} = \theta_0 + \theta_1 x\]</span> <span class="math display">\[y = e^{\theta_0 + \theta_1 x}\]</span> <span class="math display">\[y = (e^{\theta_0})e^{\theta_1 x}\]</span> <span class="math display">\[y_i = C e^{k x}\]</span></p>
 <p>For some constants <span class="math inline">\(C\)</span> and <span class="math inline">\(k\)</span>.</p>
 <p><span class="math inline">\(y\)</span> is an <em>exponential</em> function of <span class="math inline">\(x\)</span>. Applying an exponential fit to the untransformed variables corroborates this finding.</p>
-<div id="db1e58ec" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="14">
+<div id="c73291ff" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="14">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb20"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a>plt.figure(dpi<span class="op">=</span><span class="dv">120</span>, figsize<span class="op">=</span>(<span class="dv">4</span>, <span class="dv">3</span>))</span>
diff --git a/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-13-output-1.pdf b/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-13-output-1.pdf
index bae7e593c..bc735d3e7 100644
Binary files a/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-13-output-1.pdf and b/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-13-output-1.pdf differ
diff --git a/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-14-output-1.pdf b/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-14-output-1.pdf
index fcf504066..dec61cece 100644
Binary files a/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-14-output-1.pdf and b/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-14-output-1.pdf differ
diff --git a/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-15-output-1.pdf b/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-15-output-1.pdf
index b2ec7eb82..5c08b56ec 100644
Binary files a/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-15-output-1.pdf and b/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-15-output-1.pdf differ
diff --git a/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-4-output-1.pdf b/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-4-output-1.pdf
index aaf5adffd..586ab8a97 100644
Binary files a/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-4-output-1.pdf and b/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-4-output-1.pdf differ
diff --git a/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-5-output-1.pdf b/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-5-output-1.pdf
index dfdad302a..a083da24b 100644
Binary files a/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-5-output-1.pdf and b/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-5-output-1.pdf differ
diff --git a/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-7-output-2.pdf b/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-7-output-2.pdf
index 1c5bfb789..0d07f4125 100644
Binary files a/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-7-output-2.pdf and b/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-7-output-2.pdf differ
diff --git a/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-8-output-1.pdf b/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-8-output-1.pdf
index de3d473cf..e054b4431 100644
Binary files a/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-8-output-1.pdf and b/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-8-output-1.pdf differ
diff --git a/docs/eda/eda.html b/docs/eda/eda.html
index abfd86a39..2ce797035 100644
--- a/docs/eda/eda.html
+++ b/docs/eda/eda.html
@@ -255,6 +255,12 @@
   <a href="../ols/ols.html" class="sidebar-item-text sidebar-link">
  <span class="menu-text"><span class="chapter-number">12</span>&nbsp; <span class="chapter-title">Ordinary Least Squares</span></span></a>
   </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../gradient_descent/gradient_descent.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">13</span>&nbsp; <span class="chapter-title">sklearn and Gradient Descent</span></span></a>
+  </div>
 </li>
     </ul>
     </div>
@@ -343,7 +349,7 @@ <h2 id="toc-title">Data Cleaning and EDA</h2>
 </header>
 
 
-<div id="e7ba7520" class="cell" data-execution_count="1">
+<div id="7d84a433" class="cell" data-execution_count="1">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
@@ -408,7 +414,7 @@ <h3 data-number="5.1.1" class="anchored" data-anchor-id="file-formats"><span cla
 <section id="csv" class="level4" data-number="5.1.1.1">
 <h4 data-number="5.1.1.1" class="anchored" data-anchor-id="csv"><span class="header-section-number">5.1.1.1</span> CSV</h4>
 <p>CSVs, which stand for <strong>Comma-Separated Values</strong>, are a common tabular data format. In the past two <code>pandas</code> lectures, we briefly touched on the idea of file format: the way data is encoded in a file for storage. Specifically, our <code>elections</code> and <code>babynames</code> datasets were stored and loaded as CSVs:</p>
-<div id="74202a7d" class="cell" data-execution_count="2">
+<div id="884a5c07" class="cell" data-execution_count="2">
 <div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a>pd.read_csv(<span class="st">"data/elections.csv"</span>).head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="2">
 <div>
@@ -479,7 +485,7 @@ <h4 data-number="5.1.1.1" class="anchored" data-anchor-id="csv"><span class="hea
 </div>
 </div>
 <p>To better understand the properties of a CSV, let’s take a look at the first few rows of the raw data file to see what it looks like before being loaded into a <code>DataFrame</code>. We’ll use the <code>repr()</code> function to return the raw string with its special characters:</p>
-<div id="6dfaa5ad" class="cell" data-execution_count="3">
+<div id="09d0b287" class="cell" data-execution_count="3">
 <div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> <span class="bu">open</span>(<span class="st">"data/elections.csv"</span>, <span class="st">"r"</span>) <span class="im">as</span> table:</span>
 <span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a>    i <span class="op">=</span> <span class="dv">0</span></span>
 <span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a>    <span class="cf">for</span> row <span class="kw">in</span> table:</span>
@@ -500,7 +506,7 @@ <h4 data-number="5.1.1.1" class="anchored" data-anchor-id="csv"><span class="hea
 <h4 data-number="5.1.1.2" class="anchored" data-anchor-id="tsv"><span class="header-section-number">5.1.1.2</span> TSV</h4>
 <p>Another common file type is <strong>TSV (Tab-Separated Values)</strong>. In a TSV, records are still delimited by a newline <code>\n</code>, while fields are delimited by <code>\t</code> tab character.</p>
 <p>Let’s check out the first few rows of the raw TSV file. Again, we’ll use the <code>repr()</code> function so that <code>print</code> shows the special characters.</p>
-<div id="26dca2ef" class="cell" data-execution_count="4">
+<div id="5bb6ea5a" class="cell" data-execution_count="4">
 <div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> <span class="bu">open</span>(<span class="st">"data/elections.txt"</span>, <span class="st">"r"</span>) <span class="im">as</span> table:</span>
 <span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a>    i <span class="op">=</span> <span class="dv">0</span></span>
 <span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a>    <span class="cf">for</span> row <span class="kw">in</span> table:</span>
@@ -516,7 +522,7 @@ <h4 data-number="5.1.1.2" class="anchored" data-anchor-id="tsv"><span class="hea
 </div>
 </div>
 <p>TSVs can be loaded into <code>pandas</code> using <code>pd.read_csv</code>. We’ll need to specify the <strong>delimiter</strong> with parameter<code>sep='\t'</code> <a href="https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html">(documentation)</a>.</p>
-<div id="2ad73b43" class="cell" data-execution_count="5">
+<div id="bd8c1ddf" class="cell" data-execution_count="5">
 <div class="sourceCode cell-code" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a>pd.read_csv(<span class="st">"data/elections.txt"</span>, sep<span class="op">=</span><span class="st">'</span><span class="ch">\t</span><span class="st">'</span>).head(<span class="dv">3</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="5">
 <div>
@@ -573,7 +579,7 @@ <h4 data-number="5.1.1.2" class="anchored" data-anchor-id="tsv"><span class="hea
 <section id="json" class="level4" data-number="5.1.1.3">
 <h4 data-number="5.1.1.3" class="anchored" data-anchor-id="json"><span class="header-section-number">5.1.1.3</span> JSON</h4>
 <p><strong>JSON (JavaScript Object Notation)</strong> files behave similarly to Python dictionaries. A raw JSON is shown below.</p>
-<div id="0d8105e9" class="cell" data-execution_count="6">
+<div id="9b500722" class="cell" data-execution_count="6">
 <div class="sourceCode cell-code" id="cb8"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> <span class="bu">open</span>(<span class="st">"data/elections.json"</span>, <span class="st">"r"</span>) <span class="im">as</span> table:</span>
 <span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a>    i <span class="op">=</span> <span class="dv">0</span></span>
 <span id="cb8-3"><a href="#cb8-3" aria-hidden="true" tabindex="-1"></a>    <span class="cf">for</span> row <span class="kw">in</span> table:</span>
@@ -603,7 +609,7 @@ <h4 data-number="5.1.1.3" class="anchored" data-anchor-id="json"><span class="he
 </div>
 </div>
 <p>JSON files can be loaded into <code>pandas</code> using <code>pd.read_json</code>.</p>
-<div id="63002740" class="cell" data-execution_count="7">
+<div id="52149a69" class="cell" data-execution_count="7">
 <div class="sourceCode cell-code" id="cb10"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a>pd.read_json(<span class="st">'data/elections.json'</span>).head(<span class="dv">3</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="7">
 <div>
@@ -658,7 +664,7 @@ <h4 data-number="5.1.1.3" class="anchored" data-anchor-id="json"><span class="he
 <section id="eda-with-json-berkeley-covid-19-data" class="level5" data-number="5.1.1.3.1">
 <h5 data-number="5.1.1.3.1" class="anchored" data-anchor-id="eda-with-json-berkeley-covid-19-data"><span class="header-section-number">5.1.1.3.1</span> EDA with JSON: Berkeley COVID-19 Data</h5>
 <p>The City of Berkeley Open Data <a href="https://data.cityofberkeley.info/Health/COVID-19-Confirmed-Cases/xn6j-b766">website</a> has a dataset with COVID-19 Confirmed Cases among Berkeley residents by date. Let’s download the file and save it as a JSON (note the source URL file type is also a JSON). In the interest of reproducible data science, we will download the data programatically. We have defined some helper functions in the <a href="https://ds100.org/fa23/resources/assets/lectures/lec05/lec05-eda.html"><code>ds100_utils.py</code></a> file that we can reuse these helper functions in many different notebooks.</p>
-<div id="11f81b00" class="cell" data-execution_count="8">
+<div id="cd22c144" class="cell" data-execution_count="8">
 <div class="sourceCode cell-code" id="cb11"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> ds100_utils <span class="im">import</span> fetch_and_cache</span>
 <span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb11-3"><a href="#cb11-3" aria-hidden="true" tabindex="-1"></a>covid_file <span class="op">=</span> fetch_and_cache(</span>
@@ -677,7 +683,7 @@ <h5 data-number="5.1.1.3.1" class="anchored" data-anchor-id="eda-with-json-berke
 <h6 data-number="5.1.1.3.1.1" class="anchored" data-anchor-id="file-size"><span class="header-section-number">5.1.1.3.1.1</span> File Size</h6>
 <p>Let’s start our analysis by getting a rough estimate of the size of the dataset to inform the tools we use to view the data. For relatively small datasets, we can use a text editor or spreadsheet. For larger datasets, more programmatic exploration or distributed computing tools may be more fitting. Here we will use <code>Python</code> tools to probe the file.</p>
 <p>Since there seem to be text files, let’s investigate the number of lines, which often corresponds to the number of records</p>
-<div id="3afb2f93" class="cell" data-execution_count="9">
+<div id="bbeaaed3" class="cell" data-execution_count="9">
 <div class="sourceCode cell-code" id="cb14"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> os</span>
 <span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb14-3"><a href="#cb14-3" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(covid_file, <span class="st">"is"</span>, os.path.getsize(covid_file) <span class="op">/</span> <span class="fl">1e6</span>, <span class="st">"MB"</span>)</span>
@@ -695,7 +701,7 @@ <h6 data-number="5.1.1.3.1.2" class="anchored" data-anchor-id="unix-commands"><s
 <p>As part of the EDA workflow, Unix commands can come in very handy. In fact, there’s an entire book called <a href="https://datascienceatthecommandline.com/">“Data Science at the Command Line”</a> that explores this idea in depth! In Jupyter/IPython, you can prefix lines with <code>!</code> to execute arbitrary Unix commands, and within those lines, you can refer to Python variables and expressions with the syntax <code>{expr}</code>.</p>
 <p>Here, we use the <code>ls</code> command to list files, using the <code>-lh</code> flags, which request “long format with information in human-readable form.” We also use the <code>wc</code> command for “word count,” but with the <code>-l</code> flag, which asks for line counts instead of words.</p>
 <p>These two give us the same information as the code above, albeit in a slightly different form:</p>
-<div id="511de9e9" class="cell" data-execution_count="10">
+<div id="ab931db0" class="cell" data-execution_count="10">
 <div class="sourceCode cell-code" id="cb16"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a><span class="op">!</span>ls <span class="op">-</span>lh {covid_file}</span>
 <span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a><span class="op">!</span>wc <span class="op">-</span>l {covid_file}</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stdout">
@@ -707,7 +713,7 @@ <h6 data-number="5.1.1.3.1.2" class="anchored" data-anchor-id="unix-commands"><s
 <section id="file-contents" class="level6" data-number="5.1.1.3.1.3">
 <h6 data-number="5.1.1.3.1.3" class="anchored" data-anchor-id="file-contents"><span class="header-section-number">5.1.1.3.1.3</span> File Contents</h6>
 <p>Let’s explore the data format using <code>Python</code>.</p>
-<div id="f572d4c7" class="cell" data-execution_count="11">
+<div id="e3a1a57a" class="cell" data-execution_count="11">
 <div class="sourceCode cell-code" id="cb18"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> <span class="bu">open</span>(covid_file, <span class="st">"r"</span>) <span class="im">as</span> f:</span>
 <span id="cb18-2"><a href="#cb18-2" aria-hidden="true" tabindex="-1"></a>    <span class="cf">for</span> i, row <span class="kw">in</span> <span class="bu">enumerate</span>(f):</span>
 <span id="cb18-3"><a href="#cb18-3" aria-hidden="true" tabindex="-1"></a>        <span class="bu">print</span>(<span class="bu">repr</span>(row)) <span class="co"># print raw strings</span></span>
@@ -721,7 +727,7 @@ <h6 data-number="5.1.1.3.1.3" class="anchored" data-anchor-id="file-contents"><s
 </div>
 </div>
 <p>We can use the <code>head</code> Unix command (which is where <code>pandas</code>’ <code>head</code> method comes from!) to see the first few lines of the file:</p>
-<div id="07f08803" class="cell" data-execution_count="12">
+<div id="a234671c" class="cell" data-execution_count="12">
 <div class="sourceCode cell-code" id="cb20"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a><span class="op">!</span>head <span class="op">-</span><span class="dv">5</span> {covid_file}</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stdout">
 <pre><code>{
@@ -732,21 +738,21 @@ <h6 data-number="5.1.1.3.1.3" class="anchored" data-anchor-id="file-contents"><s
 </div>
 </div>
 <p>In order to load the JSON file into <code>pandas</code>, Let’s first do some EDA with Oython’s <code>json</code> package to understand the particular structure of this JSON file so that we can decide what (if anything) to load into <code>pandas</code>. Python has relatively good support for JSON data since it closely matches the internal python object model. In the following cell we import the entire JSON datafile into a python dictionary using the <code>json</code> package.</p>
-<div id="a1511132" class="cell" data-execution_count="13">
+<div id="a9c3cb22" class="cell" data-execution_count="13">
 <div class="sourceCode cell-code" id="cb22"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> json</span>
 <span id="cb22-2"><a href="#cb22-2" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb22-3"><a href="#cb22-3" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> <span class="bu">open</span>(covid_file, <span class="st">"rb"</span>) <span class="im">as</span> f:</span>
 <span id="cb22-4"><a href="#cb22-4" aria-hidden="true" tabindex="-1"></a>    covid_json <span class="op">=</span> json.load(f)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
 <p>The <code>covid_json</code> variable is now a dictionary encoding the data in the file:</p>
-<div id="03fef249" class="cell" data-execution_count="14">
+<div id="68bbc13a" class="cell" data-execution_count="14">
 <div class="sourceCode cell-code" id="cb23"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a><span class="bu">type</span>(covid_json)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="14">
 <pre><code>dict</code></pre>
 </div>
 </div>
 <p>We can examine what keys are in the top level JSON object by listing out the keys.</p>
-<div id="ee621288" class="cell" data-execution_count="15">
+<div id="6ed7aa7f" class="cell" data-execution_count="15">
 <div class="sourceCode cell-code" id="cb25"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb25-1"><a href="#cb25-1" aria-hidden="true" tabindex="-1"></a>covid_json.keys()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="15">
 <pre><code>dict_keys(['meta', 'data'])</code></pre>
@@ -754,14 +760,14 @@ <h6 data-number="5.1.1.3.1.3" class="anchored" data-anchor-id="file-contents"><s
 </div>
 <p><strong>Observation</strong>: The JSON dictionary contains a <code>meta</code> key which likely refers to metadata (data about the data). Metadata is often maintained with the data and can be a good source of additional information.</p>
 <p>We can investigate the metadata further by examining the keys associated with the metadata.</p>
-<div id="0e7cf210" class="cell" data-execution_count="16">
+<div id="2b0f2293" class="cell" data-execution_count="16">
 <div class="sourceCode cell-code" id="cb27"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb27-1"><a href="#cb27-1" aria-hidden="true" tabindex="-1"></a>covid_json[<span class="st">'meta'</span>].keys()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="16">
 <pre><code>dict_keys(['view'])</code></pre>
 </div>
 </div>
 <p>The <code>meta</code> key contains another dictionary called <code>view</code>. This likely refers to metadata about a particular “view” of some underlying database. We will learn more about views when we study SQL later in the class.</p>
-<div id="182f605f" class="cell" data-execution_count="17">
+<div id="3831080b" class="cell" data-execution_count="17">
 <div class="sourceCode cell-code" id="cb29"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb29-1"><a href="#cb29-1" aria-hidden="true" tabindex="-1"></a>covid_json[<span class="st">'meta'</span>][<span class="st">'view'</span>].keys()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="17">
 <pre><code>dict_keys(['id', 'name', 'assetType', 'attribution', 'averageRating', 'category', 'createdAt', 'description', 'displayType', 'downloadCount', 'hideFromCatalog', 'hideFromDataJson', 'newBackend', 'numberOfComments', 'oid', 'provenance', 'publicationAppendEnabled', 'publicationDate', 'publicationGroup', 'publicationStage', 'rowsUpdatedAt', 'rowsUpdatedBy', 'tableId', 'totalTimesRated', 'viewCount', 'viewLastModified', 'viewType', 'approvals', 'columns', 'grants', 'metadata', 'owner', 'query', 'rights', 'tableAuthor', 'tags', 'flags'])</code></pre>
@@ -781,7 +787,7 @@ <h6 data-number="5.1.1.3.1.3" class="anchored" data-anchor-id="file-contents"><s
     | -&gt; columns
     ...</code></pre>
 <p>There is a key called description in the view sub dictionary. This likely contains a description of the data:</p>
-<div id="fa184ebc" class="cell" data-execution_count="18">
+<div id="60d2250f" class="cell" data-execution_count="18">
 <div class="sourceCode cell-code" id="cb32"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb32-1"><a href="#cb32-1" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(covid_json[<span class="st">'meta'</span>][<span class="st">'view'</span>][<span class="st">'description'</span>])</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stdout">
 <pre><code>Counts of confirmed COVID-19 cases among Berkeley residents by date.</code></pre>
@@ -791,7 +797,7 @@ <h6 data-number="5.1.1.3.1.3" class="anchored" data-anchor-id="file-contents"><s
 <section id="examining-the-data-field-for-records" class="level6" data-number="5.1.1.3.1.4">
 <h6 data-number="5.1.1.3.1.4" class="anchored" data-anchor-id="examining-the-data-field-for-records"><span class="header-section-number">5.1.1.3.1.4</span> Examining the Data Field for Records</h6>
 <p>We can look at a few entries in the <code>data</code> field. This is what we’ll load into <code>pandas</code>.</p>
-<div id="a626079c" class="cell" data-execution_count="19">
+<div id="4b3786a4" class="cell" data-execution_count="19">
 <div class="sourceCode cell-code" id="cb34"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb34-1"><a href="#cb34-1" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> i <span class="kw">in</span> <span class="bu">range</span>(<span class="dv">3</span>):</span>
 <span id="cb34-2"><a href="#cb34-2" aria-hidden="true" tabindex="-1"></a>    <span class="bu">print</span>(<span class="ss">f"</span><span class="sc">{</span>i<span class="sc">:03}</span><span class="ss"> | </span><span class="sc">{</span>covid_json[<span class="st">'data'</span>][i]<span class="sc">}</span><span class="ss">"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stdout">
@@ -802,7 +808,7 @@ <h6 data-number="5.1.1.3.1.4" class="anchored" data-anchor-id="examining-the-dat
 </div>
 <p>Observations: * These look like equal-length records, so maybe <code>data</code> is a table! * But what do each of values in the record mean? Where can we find column headers?</p>
 <p>For that, we’ll need the <code>columns</code> key in the metadata dictionary. This returns a list:</p>
-<div id="0621cdec" class="cell" data-execution_count="20">
+<div id="6afc1ba1" class="cell" data-execution_count="20">
 <div class="sourceCode cell-code" id="cb36"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb36-1"><a href="#cb36-1" aria-hidden="true" tabindex="-1"></a><span class="bu">type</span>(covid_json[<span class="st">'meta'</span>][<span class="st">'view'</span>][<span class="st">'columns'</span>])</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="20">
 <pre><code>list</code></pre>
@@ -829,7 +835,7 @@ <h6 data-number="5.1.1.3.1.6" class="anchored" data-anchor-id="loading-covid-dat
 <li><p>Remove columns that have no metadata description. This would be a bad idea in general, but here we remove these columns since the above analysis suggests they are unlikely to contain useful information.</p></li>
 <li><p>Examine the <code>tail</code> of the table.</p></li>
 </ol>
-<div id="2c84ce81" class="cell" data-execution_count="21">
+<div id="493d0100" class="cell" data-execution_count="21">
 <div class="sourceCode cell-code" id="cb38"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb38-1"><a href="#cb38-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Load the data from JSON and assign column titles</span></span>
 <span id="cb38-2"><a href="#cb38-2" aria-hidden="true" tabindex="-1"></a>covid <span class="op">=</span> pd.DataFrame(</span>
 <span id="cb38-3"><a href="#cb38-3" aria-hidden="true" tabindex="-1"></a>    covid_json[<span class="st">'data'</span>],</span>
@@ -942,7 +948,7 @@ <h6 data-number="5.1.1.3.1.6" class="anchored" data-anchor-id="loading-covid-dat
 <h3 data-number="5.1.2" class="anchored" data-anchor-id="primary-and-foreign-keys"><span class="header-section-number">5.1.2</span> Primary and Foreign Keys</h3>
 <p>Last time, we introduced <code>.merge</code> as the <code>pandas</code> method for joining multiple <code>DataFrame</code>s together. In our discussion of joins, we touched on the idea of using a “key” to determine what rows should be merged from each table. Let’s take a moment to examine this idea more closely.</p>
 <p>The <strong>primary key</strong> is the column or set of columns in a table that <em>uniquely</em> determine the values of the remaining columns. It can be thought of as the unique identifier for each individual row in the table. For example, a table of Data 100 students might use each student’s Cal ID as the primary key.</p>
-<div id="109ad444" class="cell" data-execution_count="22">
+<div id="214749a8" class="cell" data-execution_count="22">
 <div class="cell-output cell-output-display" data-execution_count="22">
 <div>
 
@@ -988,7 +994,7 @@ <h3 data-number="5.1.2" class="anchored" data-anchor-id="primary-and-foreign-key
 </div>
 </div>
 <p>The <strong>foreign key</strong> is the column or set of columns in a table that reference primary keys in other tables. Knowing a dataset’s foreign keys can be useful when assigning the <code>left_on</code> and <code>right_on</code> parameters of <code>.merge</code>. In the table of office hour tickets below, <code>"Cal ID"</code> is a foreign key referencing the previous table.</p>
-<div id="2fb9ba5b" class="cell" data-execution_count="23">
+<div id="216a65c7" class="cell" data-execution_count="23">
 <div class="cell-output cell-output-display" data-execution_count="23">
 <div>
 
@@ -1081,7 +1087,7 @@ <h3 data-number="5.2.3" class="anchored" data-anchor-id="temporality"><span clas
 <section id="temporality-with-pandas-dt-accessors" class="level4" data-number="5.2.3.1">
 <h4 data-number="5.2.3.1" class="anchored" data-anchor-id="temporality-with-pandas-dt-accessors"><span class="header-section-number">5.2.3.1</span> Temporality with <code>pandas</code>’ <code>dt</code> accessors</h4>
 <p>Let’s briefly look at how we can use <code>pandas</code>’ <code>dt</code> accessors to work with dates/times in a dataset using the dataset you’ll see in Lab 3: the Berkeley PD Calls for Service dataset.</p>
-<div id="27439715" class="cell" data-execution_count="24">
+<div id="2907a76a" class="cell" data-execution_count="24">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb39"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb39-1"><a href="#cb39-1" aria-hidden="true" tabindex="-1"></a>calls <span class="op">=</span> pd.read_csv(<span class="st">"data/Berkeley_PD_-_Calls_for_Service.csv"</span>)</span>
@@ -1188,11 +1194,11 @@ <h4 data-number="5.2.3.1" class="anchored" data-anchor-id="temporality-with-pand
 <p>Looks like there are three columns with dates/times: <code>EVENTDT</code>, <code>EVENTTM</code>, and <code>InDbDate</code>.</p>
 <p>Most likely, <code>EVENTDT</code> stands for the date when the event took place, <code>EVENTTM</code> stands for the time of day the event took place (in 24-hr format), and <code>InDbDate</code> is the date this call is recorded onto the database.</p>
 <p>If we check the data type of these columns, we will see they are stored as strings. We can convert them to <code>datetime</code> objects using pandas <code>to_datetime</code> function.</p>
-<div id="7b616477" class="cell" data-execution_count="25">
+<div id="0f483fdc" class="cell" data-execution_count="25">
 <div class="sourceCode cell-code" id="cb40"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb40-1"><a href="#cb40-1" aria-hidden="true" tabindex="-1"></a>calls[<span class="st">"EVENTDT"</span>] <span class="op">=</span> pd.to_datetime(calls[<span class="st">"EVENTDT"</span>])</span>
 <span id="cb40-2"><a href="#cb40-2" aria-hidden="true" tabindex="-1"></a>calls.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stderr">
-<pre><code>/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_60488/874729699.py:1: UserWarning:
+<pre><code>/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_99046/874729699.py:1: UserWarning:
 
 Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.
 </code></pre>
@@ -1297,7 +1303,7 @@ <h4 data-number="5.2.3.1" class="anchored" data-anchor-id="temporality-with-pand
 </div>
 <p>Now, we can use the <code>dt</code> accessor on this column.</p>
 <p>We can get the month:</p>
-<div id="fa80f5b0" class="cell" data-execution_count="26">
+<div id="ab073426" class="cell" data-execution_count="26">
 <div class="sourceCode cell-code" id="cb42"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb42-1"><a href="#cb42-1" aria-hidden="true" tabindex="-1"></a>calls[<span class="st">"EVENTDT"</span>].dt.month.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="26">
 <pre><code>0    4
@@ -1309,7 +1315,7 @@ <h4 data-number="5.2.3.1" class="anchored" data-anchor-id="temporality-with-pand
 </div>
 </div>
 <p>Which day of the week the date is on:</p>
-<div id="2fe38f1e" class="cell" data-execution_count="27">
+<div id="9e58d53b" class="cell" data-execution_count="27">
 <div class="sourceCode cell-code" id="cb44"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb44-1"><a href="#cb44-1" aria-hidden="true" tabindex="-1"></a>calls[<span class="st">"EVENTDT"</span>].dt.dayofweek.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="27">
 <pre><code>0    3
@@ -1321,7 +1327,7 @@ <h4 data-number="5.2.3.1" class="anchored" data-anchor-id="temporality-with-pand
 </div>
 </div>
 <p>Check the mimimum values to see if there are any suspicious-looking, 70s dates:</p>
-<div id="c1977bf2" class="cell" data-execution_count="28">
+<div id="223bbde0" class="cell" data-execution_count="28">
 <div class="sourceCode cell-code" id="cb46"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb46-1"><a href="#cb46-1" aria-hidden="true" tabindex="-1"></a>calls.sort_values(<span class="st">"EVENTDT"</span>).head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="28">
 <div>
@@ -1468,7 +1474,7 @@ <h3 data-number="5.4.1" class="anchored" data-anchor-id="csvs-and-field-names"><
 <p>We can then explore the CSV (which is a text file, and does not contain binary-encoded data) in many ways: 1. Using a text editor like emacs, vim, VSCode, etc. 2. Opening the CSV directly in DataHub (read-only), Excel, Google Sheets, etc. 3. The <code>Python</code> file object 4. <code>pandas</code>, using <code>pd.read_csv()</code></p>
 <p>To try out options 1 and 2, you can view or download the Tuberculosis from the <a href="https://data100.datahub.berkeley.edu/hub/user-redirect/git-pull?repo=https%3A%2F%2Fgithub.com%2FDS-100%2Ffa23-student&amp;urlpath=lab%2Ftree%2Ffa23-student%2Flecture%2Flec05%2Flec04-eda.ipynb&amp;branch=main">lecture demo notebook</a> under the <code>data</code> folder in the left hand menu. Notice how the CSV file is a type of <strong>rectangular data (i.e., tabular data) stored as comma-separated values</strong>.</p>
 <p>Next, let’s try out option 3 using the <code>Python</code> file object. We’ll look at the first four lines:</p>
-<div id="e7be663c" class="cell" data-execution_count="29">
+<div id="8c444e1d" class="cell" data-execution_count="29">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb47"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb47-1"><a href="#cb47-1" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> <span class="bu">open</span>(<span class="st">"data/cdc_tuberculosis.csv"</span>, <span class="st">"r"</span>) <span class="im">as</span> f:</span>
@@ -1493,7 +1499,7 @@ <h3 data-number="5.4.1" class="anchored" data-anchor-id="csvs-and-field-names"><
 <p>Whoa, why are there blank lines interspaced between the lines of the CSV?</p>
 <p>You may recall that all line breaks in text files are encoded as the special newline character <code>\n</code>. Python’s <code>print()</code> prints each string (including the newline), and an additional newline on top of that.</p>
 <p>If you’re curious, we can use the <code>repr()</code> function to return the raw string with all special characters:</p>
-<div id="69e52713" class="cell" data-execution_count="30">
+<div id="fed4cb79" class="cell" data-execution_count="30">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb49"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb49-1"><a href="#cb49-1" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> <span class="bu">open</span>(<span class="st">"data/cdc_tuberculosis.csv"</span>, <span class="st">"r"</span>) <span class="im">as</span> f:</span>
@@ -1512,7 +1518,7 @@ <h3 data-number="5.4.1" class="anchored" data-anchor-id="csvs-and-field-names"><
 </div>
 </div>
 <p>Finally, let’s try option 4 and use the tried-and-true Data 100 approach: <code>pandas</code>.</p>
-<div id="45b02272" class="cell" data-execution_count="31">
+<div id="a608d1cc" class="cell" data-execution_count="31">
 <div class="sourceCode cell-code" id="cb51"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb51-1"><a href="#cb51-1" aria-hidden="true" tabindex="-1"></a>tb_df <span class="op">=</span> pd.read_csv(<span class="st">"data/cdc_tuberculosis.csv"</span>)</span>
 <span id="cb51-2"><a href="#cb51-2" aria-hidden="true" tabindex="-1"></a>tb_df.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="31">
@@ -1592,7 +1598,7 @@ <h3 data-number="5.4.1" class="anchored" data-anchor-id="csvs-and-field-names"><
 <p>You may notice some strange things about this table: what’s up with the “Unnamed” column names and the first row?</p>
 <p>Congratulations — you’re ready to wrangle your data! Because of how things are stored, we’ll need to clean the data a bit to name our columns better.</p>
 <p>A reasonable first step is to identify the row with the right header. The <code>pd.read_csv()</code> function (<a href="https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html">documentation</a>) has the convenient <code>header</code> parameter that we can set to use the elements in row 1 as the appropriate columns:</p>
-<div id="4dfdf76d" class="cell" data-execution_count="32">
+<div id="7722f35d" class="cell" data-execution_count="32">
 <div class="sourceCode cell-code" id="cb52"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb52-1"><a href="#cb52-1" aria-hidden="true" tabindex="-1"></a>tb_df <span class="op">=</span> pd.read_csv(<span class="st">"data/cdc_tuberculosis.csv"</span>, header<span class="op">=</span><span class="dv">1</span>) <span class="co"># row index</span></span>
 <span id="cb52-2"><a href="#cb52-2" aria-hidden="true" tabindex="-1"></a>tb_df.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="32">
@@ -1671,7 +1677,7 @@ <h3 data-number="5.4.1" class="anchored" data-anchor-id="csvs-and-field-names"><
 </div>
 <p>Wait…but now we can’t differentiate betwen the “Number of TB cases” and “TB incidence” year columns. <code>pandas</code> has tried to make our lives easier by automatically adding “.1” to the latter columns, but this doesn’t help us, as humans, understand the data.</p>
 <p>We can do this manually with <code>df.rename()</code> (<a href="https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.rename.html?highlight=rename#pandas.DataFrame.rename">documentation</a>):</p>
-<div id="0f6b09d9" class="cell" data-execution_count="33">
+<div id="0a927b9c" class="cell" data-execution_count="33">
 <div class="sourceCode cell-code" id="cb53"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb53-1"><a href="#cb53-1" aria-hidden="true" tabindex="-1"></a>rename_dict <span class="op">=</span> {<span class="st">'2019'</span>: <span class="st">'TB cases 2019'</span>,</span>
 <span id="cb53-2"><a href="#cb53-2" aria-hidden="true" tabindex="-1"></a>               <span class="st">'2020'</span>: <span class="st">'TB cases 2020'</span>,</span>
 <span id="cb53-3"><a href="#cb53-3" aria-hidden="true" tabindex="-1"></a>               <span class="st">'2021'</span>: <span class="st">'TB cases 2021'</span>,</span>
@@ -1761,7 +1767,7 @@ <h3 data-number="5.4.2" class="anchored" data-anchor-id="record-granularity"><sp
 <p>Row 0 is what we call a <strong>rollup record</strong>, or summary record. It’s often useful when displaying tables to humans. The <strong>granularity</strong> of record 0 (Totals) vs the rest of the records (States) is different.</p>
 <p>Okay, EDA step two. How was the rollup record aggregated?</p>
 <p>Let’s check if Total TB cases is the sum of all state TB cases. If we sum over all rows, we should get <strong>2x</strong> the total cases in each of our TB cases by year (why do you think this is?).</p>
-<div id="64325316" class="cell" data-execution_count="34">
+<div id="639be468" class="cell" data-execution_count="34">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb54"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb54-1"><a href="#cb54-1" aria-hidden="true" tabindex="-1"></a>tb_df.<span class="bu">sum</span>(axis<span class="op">=</span><span class="dv">0</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -1778,7 +1784,7 @@ <h3 data-number="5.4.2" class="anchored" data-anchor-id="record-granularity"><sp
 </div>
 </div>
 <p>Whoa, what’s going on with the TB cases in 2019, 2020, and 2021? Check out the column types:</p>
-<div id="1a3065df" class="cell" data-execution_count="35">
+<div id="29611217" class="cell" data-execution_count="35">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb56"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb56-1"><a href="#cb56-1" aria-hidden="true" tabindex="-1"></a>tb_df.dtypes</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -1796,7 +1802,7 @@ <h3 data-number="5.4.2" class="anchored" data-anchor-id="record-granularity"><sp
 </div>
 <p>Since there are commas in the values for TB cases, the numbers are read as the <code>object</code> datatype, or <strong>storage type</strong> (close to the <code>Python</code> string datatype), so <code>pandas</code> is concatenating strings instead of adding integers (recall that Python can “sum”, or concatenate, strings together: <code>"data" + "100"</code> evaluates to <code>"data100"</code>).</p>
 <p>Fortunately <code>read_csv</code> also has a <code>thousands</code> parameter (<a href="https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html">documentation</a>):</p>
-<div id="e3046f8b" class="cell" data-execution_count="36">
+<div id="0b616f8c" class="cell" data-execution_count="36">
 <div class="sourceCode cell-code" id="cb58"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb58-1"><a href="#cb58-1" aria-hidden="true" tabindex="-1"></a><span class="co"># improve readability: chaining method calls with outer parentheses/line breaks</span></span>
 <span id="cb58-2"><a href="#cb58-2" aria-hidden="true" tabindex="-1"></a>tb_df <span class="op">=</span> (</span>
 <span id="cb58-3"><a href="#cb58-3" aria-hidden="true" tabindex="-1"></a>    pd.read_csv(<span class="st">"data/cdc_tuberculosis.csv"</span>, header<span class="op">=</span><span class="dv">1</span>, thousands<span class="op">=</span><span class="st">','</span>)</span>
@@ -1877,7 +1883,7 @@ <h3 data-number="5.4.2" class="anchored" data-anchor-id="record-granularity"><sp
 </div>
 </div>
 </div>
-<div id="00b99b86" class="cell" data-execution_count="37">
+<div id="c08f34a1" class="cell" data-execution_count="37">
 <div class="sourceCode cell-code" id="cb59"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb59-1"><a href="#cb59-1" aria-hidden="true" tabindex="-1"></a>tb_df.<span class="bu">sum</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="37">
 <pre><code>U.S. jurisdiction    TotalAlabamaAlaskaArizonaArkansasCaliforniaCol...
@@ -1892,7 +1898,7 @@ <h3 data-number="5.4.2" class="anchored" data-anchor-id="record-granularity"><sp
 </div>
 <p>The total TB cases look right. Phew!</p>
 <p>Let’s just look at the records with <strong>state-level granularity</strong>:</p>
-<div id="904b38fc" class="cell" data-execution_count="38">
+<div id="9729e3df" class="cell" data-execution_count="38">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb61"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb61-1"><a href="#cb61-1" aria-hidden="true" tabindex="-1"></a>state_tb_df <span class="op">=</span> tb_df[<span class="dv">1</span>:]</span>
@@ -1977,7 +1983,7 @@ <h3 data-number="5.4.2" class="anchored" data-anchor-id="record-granularity"><sp
 <h3 data-number="5.4.3" class="anchored" data-anchor-id="gather-census-data"><span class="header-section-number">5.4.3</span> Gather Census Data</h3>
 <p>U.S. Census population estimates <a href="https://www.census.gov/data/tables/time-series/demo/popest/2010s-state-total.html">source</a> (2019), <a href="https://www.census.gov/data/tables/time-series/demo/popest/2020s-state-total.html">source</a> (2020-2021).</p>
 <p>Running the below cells cleans the data. There are a few new methods here: * <code>df.convert_dtypes()</code> (<a href="https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.convert_dtypes.html">documentation</a>) conveniently converts all float dtypes into ints and is out of scope for the class. * <code>df.drop_na()</code> (<a href="https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.dropna.html">documentation</a>) will be explained in more detail next time.</p>
-<div id="266ed9af" class="cell" data-execution_count="39">
+<div id="5b08f86c" class="cell" data-execution_count="39">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb62"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb62-1"><a href="#cb62-1" aria-hidden="true" tabindex="-1"></a><span class="co"># 2010s census data</span></span>
@@ -2101,7 +2107,7 @@ <h3 data-number="5.4.3" class="anchored" data-anchor-id="gather-census-data"><sp
 <p>or use <code>iPython</code> magic which will intelligently import code when files change:</p>
 <div class="sourceCode" id="cb64"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb64-1"><a href="#cb64-1" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>load_ext autoreload</span>
 <span id="cb64-2"><a href="#cb64-2" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>autoreload <span class="dv">2</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-<div id="50eba5fc" class="cell" data-execution_count="40">
+<div id="9bee4405" class="cell" data-execution_count="40">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb65"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb65-1"><a href="#cb65-1" aria-hidden="true" tabindex="-1"></a><span class="co"># census 2020s data</span></span>
@@ -2178,7 +2184,7 @@ <h3 data-number="5.4.3" class="anchored" data-anchor-id="gather-census-data"><sp
 <section id="joining-data-merging-dataframes" class="level3" data-number="5.4.4">
 <h3 data-number="5.4.4" class="anchored" data-anchor-id="joining-data-merging-dataframes"><span class="header-section-number">5.4.4</span> Joining Data (Merging <code>DataFrame</code>s)</h3>
 <p>Time to <code>merge</code>! Here we use the <code>DataFrame</code> method <code>df1.merge(right=df2, ...)</code> on <code>DataFrame</code> <code>df1</code> (<a href="https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.merge.html">documentation</a>). Contrast this with the function <code>pd.merge(left=df1, right=df2, ...)</code> (<a href="https://pandas.pydata.org/docs/reference/api/pandas.merge.html?highlight=pandas%20merge#pandas.merge">documentation</a>). Feel free to use either.</p>
-<div id="09d3ef78" class="cell" data-execution_count="41">
+<div id="6f04872d" class="cell" data-execution_count="41">
 <div class="sourceCode cell-code" id="cb66"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb66-1"><a href="#cb66-1" aria-hidden="true" tabindex="-1"></a><span class="co"># merge TB DataFrame with two US census DataFrames</span></span>
 <span id="cb66-2"><a href="#cb66-2" aria-hidden="true" tabindex="-1"></a>tb_census_df <span class="op">=</span> (</span>
 <span id="cb66-3"><a href="#cb66-3" aria-hidden="true" tabindex="-1"></a>    tb_df</span>
@@ -2353,7 +2359,7 @@ <h3 data-number="5.4.4" class="anchored" data-anchor-id="joining-data-merging-da
 </div>
 </div>
 <p>Having all of these columns is a little unwieldy. We could either drop the unneeded columns now, or just merge on smaller census <code>DataFrame</code>s. Let’s do the latter.</p>
-<div id="3bdaba24" class="cell" data-execution_count="42">
+<div id="a062045a" class="cell" data-execution_count="42">
 <div class="sourceCode cell-code" id="cb67"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb67-1"><a href="#cb67-1" aria-hidden="true" tabindex="-1"></a><span class="co"># try merging again, but cleaner this time</span></span>
 <span id="cb67-2"><a href="#cb67-2" aria-hidden="true" tabindex="-1"></a>tb_census_df <span class="op">=</span> (</span>
 <span id="cb67-3"><a href="#cb67-3" aria-hidden="true" tabindex="-1"></a>    tb_df</span>
@@ -2466,7 +2472,7 @@ <h3 data-number="5.4.5" class="anchored" data-anchor-id="reproducing-data-comput
 <p><span class="math display">\[\text{TB incidence} = \frac{\text{TB cases in population}}{\text{groups in population}} = \frac{\text{TB cases in population}}{\text{population}/100000} \]</span></p>
 <p><span class="math display">\[= \frac{\text{TB cases in population}}{\text{population}} \times 100000\]</span></p>
 <p>Let’s try this for 2019:</p>
-<div id="a46fab5b" class="cell" data-execution_count="43">
+<div id="f348e000" class="cell" data-execution_count="43">
 <div class="sourceCode cell-code" id="cb68"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb68-1"><a href="#cb68-1" aria-hidden="true" tabindex="-1"></a>tb_census_df[<span class="st">"recompute incidence 2019"</span>] <span class="op">=</span> tb_census_df[<span class="st">"TB cases 2019"</span>]<span class="op">/</span>tb_census_df[<span class="st">"2019"</span>]<span class="op">*</span><span class="dv">100000</span></span>
 <span id="cb68-2"><a href="#cb68-2" aria-hidden="true" tabindex="-1"></a>tb_census_df.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="43">
@@ -2569,7 +2575,7 @@ <h3 data-number="5.4.5" class="anchored" data-anchor-id="reproducing-data-comput
 </div>
 <p>Awesome!!!</p>
 <p>Let’s use a for-loop and Python format strings to compute TB incidence for all years. Python f-strings are just used for the purposes of this demo, but they’re handy to know when you explore data beyond this course (<a href="https://docs.python.org/3/tutorial/inputoutput.html">documentation</a>).</p>
-<div id="e8ca5c16" class="cell" data-execution_count="44">
+<div id="f25973eb" class="cell" data-execution_count="44">
 <div class="sourceCode cell-code" id="cb69"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb69-1"><a href="#cb69-1" aria-hidden="true" tabindex="-1"></a><span class="co"># recompute incidence for all years</span></span>
 <span id="cb69-2"><a href="#cb69-2" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> year <span class="kw">in</span> [<span class="dv">2019</span>, <span class="dv">2020</span>, <span class="dv">2021</span>]:</span>
 <span id="cb69-3"><a href="#cb69-3" aria-hidden="true" tabindex="-1"></a>    tb_census_df[<span class="ss">f"recompute incidence </span><span class="sc">{</span>year<span class="sc">}</span><span class="ss">"</span>] <span class="op">=</span> tb_census_df[<span class="ss">f"TB cases </span><span class="sc">{</span>year<span class="sc">}</span><span class="ss">"</span>]<span class="op">/</span>tb_census_df[<span class="ss">f"</span><span class="sc">{</span>year<span class="sc">}</span><span class="ss">"</span>]<span class="op">*</span><span class="dv">100000</span></span>
@@ -2685,7 +2691,7 @@ <h3 data-number="5.4.5" class="anchored" data-anchor-id="reproducing-data-comput
 </div>
 </div>
 <p>These numbers look pretty close!!! There are a few errors in the hundredths place, particularly in 2021. It may be useful to further explore reasons behind this discrepancy.</p>
-<div id="32f35482" class="cell" data-execution_count="45">
+<div id="dd1b5fea" class="cell" data-execution_count="45">
 <div class="sourceCode cell-code" id="cb70"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb70-1"><a href="#cb70-1" aria-hidden="true" tabindex="-1"></a>tb_census_df.describe()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="45">
 <div>
@@ -2846,7 +2852,7 @@ <h3 data-number="5.4.6" class="anchored" data-anchor-id="bonus-eda-reproducing-t
 <p>This is TB incidence computed across the entire U.S. population! How do we reproduce this? * We need to reproduce the “Total” TB incidences in our rolled record. * But our current <code>tb_census_df</code> only has 51 entries (50 states plus Washington, D.C.). There is no rolled record. * What happened…?</p>
 <p>Let’s get exploring!</p>
 <p>Before we keep exploring, we’ll set all indexes to more meaningful values, instead of just numbers that pertain to some row at some point. This will make our cleaning slightly easier.</p>
-<div id="8a305355" class="cell" data-execution_count="46">
+<div id="45ec7de3" class="cell" data-execution_count="46">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb71"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb71-1"><a href="#cb71-1" aria-hidden="true" tabindex="-1"></a>tb_df <span class="op">=</span> tb_df.set_index(<span class="st">"U.S. jurisdiction"</span>)</span>
@@ -2929,7 +2935,7 @@ <h3 data-number="5.4.6" class="anchored" data-anchor-id="bonus-eda-reproducing-t
 </div>
 </div>
 </div>
-<div id="2588be23" class="cell" data-execution_count="47">
+<div id="23128292" class="cell" data-execution_count="47">
 <div class="sourceCode cell-code" id="cb72"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb72-1"><a href="#cb72-1" aria-hidden="true" tabindex="-1"></a>census_2010s_df <span class="op">=</span> census_2010s_df.set_index(<span class="st">"Geographic Area"</span>)</span>
 <span id="cb72-2"><a href="#cb72-2" aria-hidden="true" tabindex="-1"></a>census_2010s_df.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="47">
@@ -3037,7 +3043,7 @@ <h3 data-number="5.4.6" class="anchored" data-anchor-id="bonus-eda-reproducing-t
 </div>
 </div>
 </div>
-<div id="aba72fc3" class="cell" data-execution_count="48">
+<div id="1e25ae66" class="cell" data-execution_count="48">
 <div class="sourceCode cell-code" id="cb73"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb73-1"><a href="#cb73-1" aria-hidden="true" tabindex="-1"></a>census_2020s_df <span class="op">=</span> census_2020s_df.set_index(<span class="st">"Geographic Area"</span>)</span>
 <span id="cb73-2"><a href="#cb73-2" aria-hidden="true" tabindex="-1"></a>census_2020s_df.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="48">
@@ -3097,7 +3103,7 @@ <h3 data-number="5.4.6" class="anchored" data-anchor-id="bonus-eda-reproducing-t
 </div>
 </div>
 <p>It turns out that our merge above only kept state records, even though our original <code>tb_df</code> had the “Total” rolled record:</p>
-<div id="3cf7f447" class="cell" data-execution_count="49">
+<div id="b68c4bd3" class="cell" data-execution_count="49">
 <div class="sourceCode cell-code" id="cb74"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb74-1"><a href="#cb74-1" aria-hidden="true" tabindex="-1"></a>tb_df.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="49">
 <div>
@@ -3178,7 +3184,7 @@ <h3 data-number="5.4.6" class="anchored" data-anchor-id="bonus-eda-reproducing-t
 </div>
 <p>Recall that <code>merge</code> by default does an <strong>inner</strong> merge by default, meaning that it only preserves keys that are present in <strong>both</strong> <code>DataFrame</code>s.</p>
 <p>The rolled records in our census <code>DataFrame</code> have different <code>Geographic Area</code> fields, which was the key we merged on:</p>
-<div id="5106a436" class="cell" data-execution_count="50">
+<div id="73d2100b" class="cell" data-execution_count="50">
 <div class="sourceCode cell-code" id="cb75"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb75-1"><a href="#cb75-1" aria-hidden="true" tabindex="-1"></a>census_2010s_df.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="50">
 <div>
@@ -3287,7 +3293,7 @@ <h3 data-number="5.4.6" class="anchored" data-anchor-id="bonus-eda-reproducing-t
 </div>
 <p>The Census <code>DataFrame</code> has several rolled records. The aggregate record we are looking for actually has the Geographic Area named “United States”.</p>
 <p>One straightforward way to get the right merge is to rename the value itself. Because we now have the Geographic Area index, we’ll use <code>df.rename()</code> (<a href="https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.rename.html">documentation</a>):</p>
-<div id="4b85cbe8" class="cell" data-execution_count="51">
+<div id="77242b62" class="cell" data-execution_count="51">
 <div class="sourceCode cell-code" id="cb76"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb76-1"><a href="#cb76-1" aria-hidden="true" tabindex="-1"></a><span class="co"># rename rolled record for 2010s</span></span>
 <span id="cb76-2"><a href="#cb76-2" aria-hidden="true" tabindex="-1"></a>census_2010s_df.rename(index<span class="op">=</span>{<span class="st">'United States'</span>:<span class="st">'Total'</span>}, inplace<span class="op">=</span><span class="va">True</span>)</span>
 <span id="cb76-3"><a href="#cb76-3" aria-hidden="true" tabindex="-1"></a>census_2010s_df.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -3396,7 +3402,7 @@ <h3 data-number="5.4.6" class="anchored" data-anchor-id="bonus-eda-reproducing-t
 </div>
 </div>
 </div>
-<div id="5ed5c5a5" class="cell" data-execution_count="52">
+<div id="3c3bf7bc" class="cell" data-execution_count="52">
 <div class="sourceCode cell-code" id="cb77"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb77-1"><a href="#cb77-1" aria-hidden="true" tabindex="-1"></a><span class="co"># same, but for 2020s rename rolled record</span></span>
 <span id="cb77-2"><a href="#cb77-2" aria-hidden="true" tabindex="-1"></a>census_2020s_df.rename(index<span class="op">=</span>{<span class="st">'United States'</span>:<span class="st">'Total'</span>}, inplace<span class="op">=</span><span class="va">True</span>)</span>
 <span id="cb77-3"><a href="#cb77-3" aria-hidden="true" tabindex="-1"></a>census_2020s_df.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -3458,7 +3464,7 @@ <h3 data-number="5.4.6" class="anchored" data-anchor-id="bonus-eda-reproducing-t
 </div>
 <p><br></p>
 <p>Next let’s rerun our merge. Note the different chaining, because we are now merging on indexes (<code>df.merge()</code> <a href="https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.merge.html">documentation</a>).</p>
-<div id="a3f08bf4" class="cell" data-execution_count="53">
+<div id="52961843" class="cell" data-execution_count="53">
 <div class="sourceCode cell-code" id="cb78"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb78-1"><a href="#cb78-1" aria-hidden="true" tabindex="-1"></a>tb_census_df <span class="op">=</span> (</span>
 <span id="cb78-2"><a href="#cb78-2" aria-hidden="true" tabindex="-1"></a>    tb_df</span>
 <span id="cb78-3"><a href="#cb78-3" aria-hidden="true" tabindex="-1"></a>    .merge(right<span class="op">=</span>census_2010s_df[[<span class="st">"2019"</span>]],</span>
@@ -3555,7 +3561,7 @@ <h3 data-number="5.4.6" class="anchored" data-anchor-id="bonus-eda-reproducing-t
 </div>
 <p><br></p>
 <p>Finally, let’s recompute our incidences:</p>
-<div id="db5821f5" class="cell" data-execution_count="54">
+<div id="8fa0ad88" class="cell" data-execution_count="54">
 <div class="sourceCode cell-code" id="cb79"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb79-1"><a href="#cb79-1" aria-hidden="true" tabindex="-1"></a><span class="co"># recompute incidence for all years</span></span>
 <span id="cb79-2"><a href="#cb79-2" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> year <span class="kw">in</span> [<span class="dv">2019</span>, <span class="dv">2020</span>, <span class="dv">2021</span>]:</span>
 <span id="cb79-3"><a href="#cb79-3" aria-hidden="true" tabindex="-1"></a>    tb_census_df[<span class="ss">f"recompute incidence </span><span class="sc">{</span>year<span class="sc">}</span><span class="ss">"</span>] <span class="op">=</span> tb_census_df[<span class="ss">f"TB cases </span><span class="sc">{</span>year<span class="sc">}</span><span class="ss">"</span>]<span class="op">/</span>tb_census_df[<span class="ss">f"</span><span class="sc">{</span>year<span class="sc">}</span><span class="ss">"</span>]<span class="op">*</span><span class="dv">100000</span></span>
@@ -3670,21 +3676,21 @@ <h3 data-number="5.4.6" class="anchored" data-anchor-id="bonus-eda-reproducing-t
 <p>Reported TB incidence (cases per 100,000 persons) increased <strong>9.4%</strong>, from <strong>2.2</strong> during 2020 to <strong>2.4</strong> during 2021 but was lower than incidence during 2019 (2.7). Increases occurred among both U.S.-born and non–U.S.-born persons.</p>
 </blockquote>
 <p>Recall that percent change from <span class="math inline">\(A\)</span> to <span class="math inline">\(B\)</span> is computed as <span class="math inline">\(\text{percent change} = \frac{B - A}{A} \times 100\)</span>.</p>
-<div id="cb55610b" class="cell" data-execution_count="55">
+<div id="2f6a0e14" class="cell" data-execution_count="55">
 <div class="sourceCode cell-code" id="cb80"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb80-1"><a href="#cb80-1" aria-hidden="true" tabindex="-1"></a>incidence_2020 <span class="op">=</span> tb_census_df.loc[<span class="st">'Total'</span>, <span class="st">'recompute incidence 2020'</span>]</span>
 <span id="cb80-2"><a href="#cb80-2" aria-hidden="true" tabindex="-1"></a>incidence_2020</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="55">
 <pre><code>np.float64(2.1637257652759883)</code></pre>
 </div>
 </div>
-<div id="62c0c896" class="cell" data-execution_count="56">
+<div id="1f0a8d26" class="cell" data-execution_count="56">
 <div class="sourceCode cell-code" id="cb82"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb82-1"><a href="#cb82-1" aria-hidden="true" tabindex="-1"></a>incidence_2021 <span class="op">=</span> tb_census_df.loc[<span class="st">'Total'</span>, <span class="st">'recompute incidence 2021'</span>]</span>
 <span id="cb82-2"><a href="#cb82-2" aria-hidden="true" tabindex="-1"></a>incidence_2021</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="56">
 <pre><code>np.float64(2.3672448914298068)</code></pre>
 </div>
 </div>
-<div id="8e21b0c9" class="cell" data-execution_count="57">
+<div id="9605a35e" class="cell" data-execution_count="57">
 <div class="sourceCode cell-code" id="cb84"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb84-1"><a href="#cb84-1" aria-hidden="true" tabindex="-1"></a>difference <span class="op">=</span> (incidence_2021 <span class="op">-</span> incidence_2020)<span class="op">/</span>incidence_2020 <span class="op">*</span> <span class="dv">100</span></span>
 <span id="cb84-2"><a href="#cb84-2" aria-hidden="true" tabindex="-1"></a>difference</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="57">
@@ -3696,7 +3702,7 @@ <h3 data-number="5.4.6" class="anchored" data-anchor-id="bonus-eda-reproducing-t
 <section id="eda-demo-2-mauna-loa-co2-data-a-lesson-in-data-faithfulness" class="level2" data-number="5.5">
 <h2 data-number="5.5" class="anchored" data-anchor-id="eda-demo-2-mauna-loa-co2-data-a-lesson-in-data-faithfulness"><span class="header-section-number">5.5</span> EDA Demo 2: Mauna Loa CO<sub>2</sub> Data – A Lesson in Data Faithfulness</h2>
 <p><a href="https://gml.noaa.gov/ccgg/trends/data.html">Mauna Loa Observatory</a> has been monitoring CO<sub>2</sub> concentrations since 1958.</p>
-<div id="15a89622" class="cell" data-execution_count="58">
+<div id="f54d63a0" class="cell" data-execution_count="58">
 <div class="sourceCode cell-code" id="cb86"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb86-1"><a href="#cb86-1" aria-hidden="true" tabindex="-1"></a>co2_file <span class="op">=</span> <span class="st">"data/co2_mm_mlo.txt"</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
 <p>Let’s do some <strong>EDA</strong>!!</p>
@@ -3721,7 +3727,7 @@ <h3 data-number="5.5.1" class="anchored" data-anchor-id="reading-this-file-into-
 <li>The 71st and 72nd lines in the file contain column headings split over two lines.</li>
 </ul>
 <p>We can use&nbsp;<code>read_csv</code>&nbsp;to read the data into a <code>pandas</code> <code>DataFrame</code>, and we provide several arguments to specify that the separators are white space, there is no header (<strong>we will set our own column names</strong>), and to skip the first 72 rows of the file.</p>
-<div id="4737364e" class="cell" data-execution_count="59">
+<div id="53b1f67b" class="cell" data-execution_count="59">
 <div class="sourceCode cell-code" id="cb88"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb88-1"><a href="#cb88-1" aria-hidden="true" tabindex="-1"></a>co2 <span class="op">=</span> pd.read_csv(</span>
 <span id="cb88-2"><a href="#cb88-2" aria-hidden="true" tabindex="-1"></a>    co2_file, header <span class="op">=</span> <span class="va">None</span>, skiprows <span class="op">=</span> <span class="dv">72</span>,</span>
 <span id="cb88-3"><a href="#cb88-3" aria-hidden="true" tabindex="-1"></a>    sep <span class="op">=</span> <span class="vs">r'\s+'</span>       <span class="co">#delimiter for continuous whitespace (stay tuned for regex next lecture))</span></span>
@@ -3809,7 +3815,7 @@ <h3 data-number="5.5.1" class="anchored" data-anchor-id="reading-this-file-into-
 <h3 data-number="5.5.2" class="anchored" data-anchor-id="exploring-variable-feature-types"><span class="header-section-number">5.5.2</span> Exploring Variable Feature Types</h3>
 <p>The NOAA <a href="https://gml.noaa.gov/ccgg/trends/">webpage</a> might have some useful tidbits (in this case it doesn’t).</p>
 <p>Using this information, we’ll rerun <code>pd.read_csv</code>, but this time with some <strong>custom column names.</strong></p>
-<div id="74884cb5" class="cell" data-execution_count="60">
+<div id="8667e5a1" class="cell" data-execution_count="60">
 <div class="sourceCode cell-code" id="cb89"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb89-1"><a href="#cb89-1" aria-hidden="true" tabindex="-1"></a>co2 <span class="op">=</span> pd.read_csv(</span>
 <span id="cb89-2"><a href="#cb89-2" aria-hidden="true" tabindex="-1"></a>    co2_file, header <span class="op">=</span> <span class="va">None</span>, skiprows <span class="op">=</span> <span class="dv">72</span>,</span>
 <span id="cb89-3"><a href="#cb89-3" aria-hidden="true" tabindex="-1"></a>    sep <span class="op">=</span> <span class="st">'\s+'</span>, <span class="co">#regex for continuous whitespace (next lecture)</span></span>
@@ -3825,7 +3831,7 @@ <h3 data-number="5.5.2" class="anchored" data-anchor-id="exploring-variable-feat
 
 invalid escape sequence '\s'
 
-/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_60488/150137587.py:3: SyntaxWarning:
+/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_99046/150137587.py:3: SyntaxWarning:
 
 invalid escape sequence '\s'
 </code></pre>
@@ -3908,7 +3914,7 @@ <h3 data-number="5.5.2" class="anchored" data-anchor-id="exploring-variable-feat
 <section id="visualizing-co2" class="level3" data-number="5.5.3">
 <h3 data-number="5.5.3" class="anchored" data-anchor-id="visualizing-co2"><span class="header-section-number">5.5.3</span> Visualizing CO<sub>2</sub></h3>
 <p>Scientific studies tend to have very clean data, right…? Let’s jump right in and make a time series plot of CO<sub>2</sub> monthly averages.</p>
-<div id="0258fdff" class="cell" data-execution_count="61">
+<div id="6ed4bb8a" class="cell" data-execution_count="61">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb91"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb91-1"><a href="#cb91-1" aria-hidden="true" tabindex="-1"></a>sns.lineplot(x<span class="op">=</span><span class="st">'DecDate'</span>, y<span class="op">=</span><span class="st">'Avg'</span>, data<span class="op">=</span>co2)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -3923,7 +3929,7 @@ <h3 data-number="5.5.3" class="anchored" data-anchor-id="visualizing-co2"><span
 </div>
 <p>The code above uses the <code>seaborn</code> plotting library (abbreviated <code>sns</code>). We will cover this in the Visualization lecture, but now you don’t need to worry about how it works!</p>
 <p>Yikes! Plotting the data uncovered a problem. The sharp vertical lines suggest that we have some <strong>missing values</strong>. What happened here?</p>
-<div id="0d23bfdf" class="cell" data-execution_count="62">
+<div id="6409287c" class="cell" data-execution_count="62">
 <div class="sourceCode cell-code" id="cb92"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb92-1"><a href="#cb92-1" aria-hidden="true" tabindex="-1"></a>co2.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="62">
 <div>
@@ -3999,7 +4005,7 @@ <h3 data-number="5.5.3" class="anchored" data-anchor-id="visualizing-co2"><span
 </div>
 </div>
 </div>
-<div id="9f08d563" class="cell" data-execution_count="63">
+<div id="21e2678d" class="cell" data-execution_count="63">
 <div class="sourceCode cell-code" id="cb93"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb93-1"><a href="#cb93-1" aria-hidden="true" tabindex="-1"></a>co2.tail()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="63">
 <div>
@@ -4092,7 +4098,7 @@ <h3 data-number="5.5.4" class="anchored" data-anchor-id="sanity-checks-reasoning
 <li>Data from March 1958 to August 2019.</li>
 <li>We should have $ 12 (2019-1957) - 2 - 4 = 738 $ records.</li>
 </ul>
-<div id="81ba4a52" class="cell" data-execution_count="64">
+<div id="9bb6f396" class="cell" data-execution_count="64">
 <div class="sourceCode cell-code" id="cb94"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb94-1"><a href="#cb94-1" aria-hidden="true" tabindex="-1"></a>co2.shape</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="64">
 <pre><code>(738, 7)</code></pre>
@@ -4106,7 +4112,7 @@ <h3 data-number="5.5.5" class="anchored" data-anchor-id="understanding-missing-v
 <p><code>Days</code> is a time field, so let’s analyze other time fields to see if there is an explanation for missing values of days of operation.</p>
 <p>Let’s start with <strong>months</strong>, <code>Mo</code>.</p>
 <p>Are we missing any records? The number of months should have 62 or 61 instances (March 1957-August 2019).</p>
-<div id="a871e5b3" class="cell" data-execution_count="65">
+<div id="c703ec4a" class="cell" data-execution_count="65">
 <div class="sourceCode cell-code" id="cb96"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb96-1"><a href="#cb96-1" aria-hidden="true" tabindex="-1"></a>co2[<span class="st">"Mo"</span>].value_counts().sort_index()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="65">
 <pre><code>Mo
@@ -4128,7 +4134,7 @@ <h3 data-number="5.5.5" class="anchored" data-anchor-id="understanding-missing-v
 <p>As expected Jan, Feb, Sep, Oct, Nov, and Dec have 61 occurrences and the rest 62.</p>
 <p><br></p>
 <p>Next let’s explore <strong>days</strong> <code>Days</code> itself, which is the number of days that the measurement equipment worked.</p>
-<div id="660522d8" class="cell" data-execution_count="66">
+<div id="13158933" class="cell" data-execution_count="66">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb98"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb98-1"><a href="#cb98-1" aria-hidden="true" tabindex="-1"></a>sns.displot(co2[<span class="st">'Days'</span>])<span class="op">;</span></span>
@@ -4146,7 +4152,7 @@ <h3 data-number="5.5.5" class="anchored" data-anchor-id="understanding-missing-v
 <p><br></p>
 <p>Finally, let’s check the last time feature, <strong>year</strong> <code>Yr</code>.</p>
 <p>Let’s check to see if there is any connection between missing-ness and the year of the recording.</p>
-<div id="f0a11f1e" class="cell" data-execution_count="67">
+<div id="12059555" class="cell" data-execution_count="67">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb99"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb99-1"><a href="#cb99-1" aria-hidden="true" tabindex="-1"></a>sns.scatterplot(x<span class="op">=</span><span class="st">"Yr"</span>, y<span class="op">=</span><span class="st">"Days"</span>, data<span class="op">=</span>co2)<span class="op">;</span></span>
@@ -4175,7 +4181,7 @@ <h3 data-number="5.5.5" class="anchored" data-anchor-id="understanding-missing-v
 <section id="understanding-missing-value-2-avg" class="level3" data-number="5.5.6">
 <h3 data-number="5.5.6" class="anchored" data-anchor-id="understanding-missing-value-2-avg"><span class="header-section-number">5.5.6</span> Understanding Missing Value 2: <code>Avg</code></h3>
 <p>Next, let’s return to the -99.99 values in <code>Avg</code> to analyze the overall quality of the CO<sub>2</sub> measurements. We’ll plot a histogram of the average CO<sub>2</sub> measurements</p>
-<div id="1a99ea60" class="cell" data-execution_count="68">
+<div id="1b1df1d9" class="cell" data-execution_count="68">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb100"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb100-1"><a href="#cb100-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Histograms of average CO2 measurements</span></span>
@@ -4191,7 +4197,7 @@ <h3 data-number="5.5.6" class="anchored" data-anchor-id="understanding-missing-v
 </div>
 <p>The non-missing values are in the 300-400 range (a regular range of CO<sub>2</sub> levels).</p>
 <p>We also see that there are only a few missing <code>Avg</code> values (<strong>&lt;1% of values</strong>). Let’s examine all of them:</p>
-<div id="470344ee" class="cell" data-execution_count="69">
+<div id="06af5ba3" class="cell" data-execution_count="69">
 <div class="sourceCode cell-code" id="cb101"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb101-1"><a href="#cb101-1" aria-hidden="true" tabindex="-1"></a>co2[co2[<span class="st">"Avg"</span>] <span class="op">&lt;</span> <span class="dv">0</span>]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="69">
 <div>
@@ -4298,7 +4304,7 @@ <h3 data-number="5.5.7" class="anchored" data-anchor-id="drop-nan-or-impute-miss
 <li>Impute using some strategy</li>
 </ol>
 <p>Remember we want to fix the following plot:</p>
-<div id="c88ea715" class="cell" data-execution_count="70">
+<div id="f3e39e46" class="cell" data-execution_count="70">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb102"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb102-1"><a href="#cb102-1" aria-hidden="true" tabindex="-1"></a>sns.lineplot(x<span class="op">=</span><span class="st">'DecDate'</span>, y<span class="op">=</span><span class="st">'Avg'</span>, data<span class="op">=</span>co2)</span>
@@ -4316,7 +4322,7 @@ <h3 data-number="5.5.7" class="anchored" data-anchor-id="drop-nan-or-impute-miss
 <p>Let’s consider a few options: 1. Drop those records 2. Replace -99.99 with NaN 3. Substitute it with a likely value for the average CO<sub>2</sub>?</p>
 <p>What do you think are the pros and cons of each possible action?</p>
 <p>Let’s examine each of these three options.</p>
-<div id="77da90e6" class="cell" data-execution_count="71">
+<div id="b8e13d92" class="cell" data-execution_count="71">
 <div class="sourceCode cell-code" id="cb103"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb103-1"><a href="#cb103-1" aria-hidden="true" tabindex="-1"></a><span class="co"># 1. Drop missing values</span></span>
 <span id="cb103-2"><a href="#cb103-2" aria-hidden="true" tabindex="-1"></a>co2_drop <span class="op">=</span> co2[co2[<span class="st">'Avg'</span>] <span class="op">&gt;</span> <span class="dv">0</span>]</span>
 <span id="cb103-3"><a href="#cb103-3" aria-hidden="true" tabindex="-1"></a>co2_drop.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -4394,7 +4400,7 @@ <h3 data-number="5.5.7" class="anchored" data-anchor-id="drop-nan-or-impute-miss
 </div>
 </div>
 </div>
-<div id="91a123f8" class="cell" data-execution_count="72">
+<div id="aa6e8b92" class="cell" data-execution_count="72">
 <div class="sourceCode cell-code" id="cb104"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb104-1"><a href="#cb104-1" aria-hidden="true" tabindex="-1"></a><span class="co"># 2. Replace NaN with -99.99</span></span>
 <span id="cb104-2"><a href="#cb104-2" aria-hidden="true" tabindex="-1"></a>co2_NA <span class="op">=</span> co2.replace(<span class="op">-</span><span class="fl">99.99</span>, np.nan)</span>
 <span id="cb104-3"><a href="#cb104-3" aria-hidden="true" tabindex="-1"></a>co2_NA.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -4480,7 +4486,7 @@ <h3 data-number="5.5.7" class="anchored" data-anchor-id="drop-nan-or-impute-miss
 </blockquote>
 <p>The <code>Int</code> feature has values that exactly match those in <code>Avg</code>, except when <code>Avg</code> is -99.99, and then a <strong>reasonable</strong> estimate is used instead.</p>
 <p>So, the third version of our data will use the <code>Int</code> feature instead of <code>Avg</code>.</p>
-<div id="472276fb" class="cell" data-execution_count="73">
+<div id="6f99af67" class="cell" data-execution_count="73">
 <div class="sourceCode cell-code" id="cb105"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb105-1"><a href="#cb105-1" aria-hidden="true" tabindex="-1"></a><span class="co"># 3. Use interpolated column which estimates missing Avg values</span></span>
 <span id="cb105-2"><a href="#cb105-2" aria-hidden="true" tabindex="-1"></a>co2_impute <span class="op">=</span> co2.copy()</span>
 <span id="cb105-3"><a href="#cb105-3" aria-hidden="true" tabindex="-1"></a>co2_impute[<span class="st">'Avg'</span>] <span class="op">=</span> co2[<span class="st">'Int'</span>]</span>
@@ -4561,7 +4567,7 @@ <h3 data-number="5.5.7" class="anchored" data-anchor-id="drop-nan-or-impute-miss
 </div>
 <p>What’s a <strong>reasonable</strong> estimate?</p>
 <p>To answer this question, let’s zoom in on a short time period, say the measurements in 1958 (where we know we have two missing values).</p>
-<div id="3254b1b2" class="cell" data-execution_count="74">
+<div id="d12604cb" class="cell" data-execution_count="74">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb106"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb106-1"><a href="#cb106-1" aria-hidden="true" tabindex="-1"></a><span class="co"># results of plotting data in 1958</span></span>
@@ -4604,7 +4610,7 @@ <h3 data-number="5.5.7" class="anchored" data-anchor-id="drop-nan-or-impute-miss
 <li>We are plotting all months in our data as a line plot</li>
 </ul>
 <p>Let’s replot our original figure with option 3:</p>
-<div id="bbacdaa1" class="cell" data-execution_count="75">
+<div id="4e787008" class="cell" data-execution_count="75">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb107"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb107-1"><a href="#cb107-1" aria-hidden="true" tabindex="-1"></a>sns.lineplot(x<span class="op">=</span><span class="st">'DecDate'</span>, y<span class="op">=</span><span class="st">'Avg'</span>, data<span class="op">=</span>co2_impute)</span>
@@ -4636,7 +4642,7 @@ <h3 data-number="5.5.8" class="anchored" data-anchor-id="presenting-the-data-a-d
 <ul>
 <li>You might be happier with a <strong>coarser granularity</strong> of average year data!</li>
 </ul>
-<div id="a720c2e2" class="cell" data-execution_count="76">
+<div id="4ab72b5c" class="cell" data-execution_count="76">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb108"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb108-1"><a href="#cb108-1" aria-hidden="true" tabindex="-1"></a>co2_year <span class="op">=</span> co2_impute.groupby(<span class="st">'Yr'</span>).mean()</span>
diff --git a/docs/eda/eda_files/figure-pdf/cell-62-output-1.pdf b/docs/eda/eda_files/figure-pdf/cell-62-output-1.pdf
index 93273ef28..87af840d5 100644
Binary files a/docs/eda/eda_files/figure-pdf/cell-62-output-1.pdf and b/docs/eda/eda_files/figure-pdf/cell-62-output-1.pdf differ
diff --git a/docs/eda/eda_files/figure-pdf/cell-67-output-1.pdf b/docs/eda/eda_files/figure-pdf/cell-67-output-1.pdf
index 99aae2dfd..80805f861 100644
Binary files a/docs/eda/eda_files/figure-pdf/cell-67-output-1.pdf and b/docs/eda/eda_files/figure-pdf/cell-67-output-1.pdf differ
diff --git a/docs/eda/eda_files/figure-pdf/cell-68-output-1.pdf b/docs/eda/eda_files/figure-pdf/cell-68-output-1.pdf
index 52df340cb..2dbe3f071 100644
Binary files a/docs/eda/eda_files/figure-pdf/cell-68-output-1.pdf and b/docs/eda/eda_files/figure-pdf/cell-68-output-1.pdf differ
diff --git a/docs/eda/eda_files/figure-pdf/cell-69-output-1.pdf b/docs/eda/eda_files/figure-pdf/cell-69-output-1.pdf
index 5bb889a6b..322f203ce 100644
Binary files a/docs/eda/eda_files/figure-pdf/cell-69-output-1.pdf and b/docs/eda/eda_files/figure-pdf/cell-69-output-1.pdf differ
diff --git a/docs/eda/eda_files/figure-pdf/cell-71-output-1.pdf b/docs/eda/eda_files/figure-pdf/cell-71-output-1.pdf
index c14456817..6059fa765 100644
Binary files a/docs/eda/eda_files/figure-pdf/cell-71-output-1.pdf and b/docs/eda/eda_files/figure-pdf/cell-71-output-1.pdf differ
diff --git a/docs/eda/eda_files/figure-pdf/cell-75-output-1.pdf b/docs/eda/eda_files/figure-pdf/cell-75-output-1.pdf
index 1dee3227a..c84dcdfe8 100644
Binary files a/docs/eda/eda_files/figure-pdf/cell-75-output-1.pdf and b/docs/eda/eda_files/figure-pdf/cell-75-output-1.pdf differ
diff --git a/docs/eda/eda_files/figure-pdf/cell-76-output-1.pdf b/docs/eda/eda_files/figure-pdf/cell-76-output-1.pdf
index 87f50153a..4c59ba7bd 100644
Binary files a/docs/eda/eda_files/figure-pdf/cell-76-output-1.pdf and b/docs/eda/eda_files/figure-pdf/cell-76-output-1.pdf differ
diff --git a/docs/eda/eda_files/figure-pdf/cell-77-output-1.pdf b/docs/eda/eda_files/figure-pdf/cell-77-output-1.pdf
index 0841ef023..a8298e284 100644
Binary files a/docs/eda/eda_files/figure-pdf/cell-77-output-1.pdf and b/docs/eda/eda_files/figure-pdf/cell-77-output-1.pdf differ
diff --git a/docs/gradient_descent/gradient_descent.html b/docs/gradient_descent/gradient_descent.html
new file mode 100644
index 000000000..87987835c
--- /dev/null
+++ b/docs/gradient_descent/gradient_descent.html
@@ -0,0 +1,2567 @@
+<!DOCTYPE html>
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
+
+<meta charset="utf-8">
+<meta name="generator" content="quarto-1.4.549">
+
+<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
+
+
+<title>Principles and Techniques of Data Science - 13&nbsp; sklearn and Gradient Descent</title>
+<style>
+code{white-space: pre-wrap;}
+span.smallcaps{font-variant: small-caps;}
+div.columns{display: flex; gap: min(4vw, 1.5em);}
+div.column{flex: auto; overflow-x: auto;}
+div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
+ul.task-list{list-style: none;}
+ul.task-list li input[type="checkbox"] {
+  width: 0.8em;
+  margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */ 
+  vertical-align: middle;
+}
+/* CSS for syntax highlighting */
+pre > code.sourceCode { white-space: pre; position: relative; }
+pre > code.sourceCode > span { line-height: 1.25; }
+pre > code.sourceCode > span:empty { height: 1.2em; }
+.sourceCode { overflow: visible; }
+code.sourceCode > span { color: inherit; text-decoration: inherit; }
+div.sourceCode { margin: 1em 0; }
+pre.sourceCode { margin: 0; }
+@media screen {
+div.sourceCode { overflow: auto; }
+}
+@media print {
+pre > code.sourceCode { white-space: pre-wrap; }
+pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
+}
+pre.numberSource code
+  { counter-reset: source-line 0; }
+pre.numberSource code > span
+  { position: relative; left: -4em; counter-increment: source-line; }
+pre.numberSource code > span > a:first-child::before
+  { content: counter(source-line);
+    position: relative; left: -1em; text-align: right; vertical-align: baseline;
+    border: none; display: inline-block;
+    -webkit-touch-callout: none; -webkit-user-select: none;
+    -khtml-user-select: none; -moz-user-select: none;
+    -ms-user-select: none; user-select: none;
+    padding: 0 4px; width: 4em;
+  }
+pre.numberSource { margin-left: 3em;  padding-left: 4px; }
+div.sourceCode
+  {   }
+@media screen {
+pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
+}
+</style>
+
+
+<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.5.1/jquery.min.js" integrity="sha512-bLT0Qm9VnAYZDflyKcBaQ2gg0hSYNQrJ8RilYldYQ1FxQYoCLtUjuuRuZo+fjqhx/qtq/1itJ0C2ejDxltZVFg==" crossorigin="anonymous"></script><script src="../site_libs/quarto-nav/quarto-nav.js"></script>
+<script src="../site_libs/quarto-nav/headroom.min.js"></script>
+<script src="../site_libs/clipboard/clipboard.min.js"></script>
+<script src="../site_libs/quarto-search/autocomplete.umd.js"></script>
+<script src="../site_libs/quarto-search/fuse.min.js"></script>
+<script src="../site_libs/quarto-search/quarto-search.js"></script>
+<meta name="quarto:offset" content="../">
+<link href="../ols/ols.html" rel="prev">
+<link href="../data100_logo.png" rel="icon" type="image/png">
+<script src="../site_libs/quarto-html/quarto.js"></script>
+<script src="../site_libs/quarto-html/popper.min.js"></script>
+<script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
+<script src="../site_libs/quarto-html/anchor.min.js"></script>
+<link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
+<link href="../site_libs/quarto-html/quarto-syntax-highlighting.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<script src="../site_libs/bootstrap/bootstrap.min.js"></script>
+<link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
+<link href="../site_libs/bootstrap/bootstrap.min.css" rel="stylesheet" id="quarto-bootstrap" data-mode="light">
+<script id="quarto-search-options" type="application/json">{
+  "location": "sidebar",
+  "copy-button": false,
+  "collapse-after": 3,
+  "panel-placement": "start",
+  "type": "textbox",
+  "limit": 50,
+  "keyboard-shortcut": [
+    "f",
+    "/",
+    "s"
+  ],
+  "language": {
+    "search-no-results-text": "No results",
+    "search-matching-documents-text": "matching documents",
+    "search-copy-link-title": "Copy link to search",
+    "search-hide-matches-text": "Hide additional matches",
+    "search-more-match-text": "more match in this document",
+    "search-more-matches-text": "more matches in this document",
+    "search-clear-button-title": "Clear",
+    "search-text-placeholder": "",
+    "search-detached-cancel-button-title": "Cancel",
+    "search-submit-button-title": "Submit",
+    "search-label": "Search"
+  }
+}</script>
+<script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" integrity="sha512-c3Nl8+7g4LMSTdrm621y7kf9v3SDPnhxLNhcjFJbKECVnmZHTdo+IRO05sNLTH/D3vA6u1X32ehoLC7WFVdheg==" crossorigin="anonymous"></script>
+
+<script type="application/javascript">define('jquery', [],function() {return window.jQuery;})</script>
+<script type="text/javascript">
+window.PlotlyConfig = {MathJaxConfig: 'local'};
+if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: "STIX-Web"}});}
+if (typeof require !== 'undefined') {
+require.undef("plotly");
+requirejs.config({
+    paths: {
+        'plotly': ['https://cdn.plot.ly/plotly-2.18.2.min']
+    }
+});
+require(['plotly'], function(Plotly) {
+    window._Plotly = Plotly;
+});
+}
+</script>
+
+
+  <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
+  <script src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-chtml-full.js" type="text/javascript"></script>
+
+<script type="text/javascript">
+const typesetMath = (el) => {
+  if (window.MathJax) {
+    // MathJax Typeset
+    window.MathJax.typeset([el]);
+  } else if (window.katex) {
+    // KaTeX Render
+    var mathElements = el.getElementsByClassName("math");
+    var macros = [];
+    for (var i = 0; i < mathElements.length; i++) {
+      var texText = mathElements[i].firstChild;
+      if (mathElements[i].tagName == "SPAN") {
+        window.katex.render(texText.data, mathElements[i], {
+          displayMode: mathElements[i].classList.contains('display'),
+          throwOnError: false,
+          macros: macros,
+          fleqn: false
+        });
+      }
+    }
+  }
+}
+window.Quarto = {
+  typesetMath
+};
+</script>
+
+</head>
+
+<body class="nav-sidebar floating">
+
+<div id="quarto-search-results"></div>
+  <header id="quarto-header" class="headroom fixed-top">
+  <nav class="quarto-secondary-nav">
+    <div class="container-fluid d-flex">
+      <button type="button" class="quarto-btn-toggle btn" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
+        <i class="bi bi-layout-text-sidebar-reverse"></i>
+      </button>
+        <nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="../gradient_descent/gradient_descent.html"><span class="chapter-number">13</span>&nbsp; <span class="chapter-title">sklearn and Gradient Descent</span></a></li></ol></nav>
+        <a class="flex-grow-1" role="button" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">      
+        </a>
+      <button type="button" class="btn quarto-search-button" aria-label="" onclick="window.quartoOpenSearch();">
+        <i class="bi bi-search"></i>
+      </button>
+    </div>
+  </nav>
+</header>
+<!-- content -->
+<div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-full">
+<!-- sidebar -->
+  <nav id="quarto-sidebar" class="sidebar collapse collapse-horizontal quarto-sidebar-collapse-item sidebar-navigation floating overflow-auto">
+    <div class="pt-lg-2 mt-2 text-left sidebar-header sidebar-header-stacked">
+      <a href="../index.html" class="sidebar-logo-link">
+      <img src="../data100_logo.png" alt="" class="sidebar-logo py-0 d-lg-inline d-none">
+      </a>
+    <div class="sidebar-title mb-0 py-0">
+      <a href="../">Principles and Techniques of Data Science</a> 
+        <div class="sidebar-tools-main">
+    <a href="https://github.com/DS-100/course-notes" title="Source Code" class="quarto-navigation-tool px-1" aria-label="Source Code"><i class="bi bi-github"></i></a>
+</div>
+    </div>
+      </div>
+        <div class="mt-2 flex-shrink-0 align-items-center">
+        <div class="sidebar-search">
+        <div id="quarto-search" class="" title="Search"></div>
+        </div>
+        </div>
+    <div class="sidebar-menu-container"> 
+    <ul class="list-unstyled mt-1">
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../index.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text">Welcome</span></a>
+  </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../intro_lec/introduction.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">1</span>&nbsp; <span class="chapter-title">Introduction</span></span></a>
+  </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../pandas_1/pandas_1.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">2</span>&nbsp; <span class="chapter-title">Pandas I</span></span></a>
+  </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../pandas_2/pandas_2.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">3</span>&nbsp; <span class="chapter-title">Pandas II</span></span></a>
+  </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../pandas_3/pandas_3.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">4</span>&nbsp; <span class="chapter-title">Pandas III</span></span></a>
+  </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../eda/eda.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">5</span>&nbsp; <span class="chapter-title">Data Cleaning and EDA</span></span></a>
+  </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../regex/regex.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">6</span>&nbsp; <span class="chapter-title">Regular Expressions</span></span></a>
+  </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../visualization_1/visualization_1.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">7</span>&nbsp; <span class="chapter-title">Visualization I</span></span></a>
+  </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../visualization_2/visualization_2.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">8</span>&nbsp; <span class="chapter-title">Visualization II</span></span></a>
+  </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../sampling/sampling.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">9</span>&nbsp; <span class="chapter-title">Sampling</span></span></a>
+  </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../intro_to_modeling/intro_to_modeling.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">10</span>&nbsp; <span class="chapter-title">Introduction to Modeling</span></span></a>
+  </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../constant_model_loss_transformations/loss_transformations.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">11</span>&nbsp; <span class="chapter-title">Constant Model, Loss, and Transformations</span></span></a>
+  </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../ols/ols.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">12</span>&nbsp; <span class="chapter-title">Ordinary Least Squares</span></span></a>
+  </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../gradient_descent/gradient_descent.html" class="sidebar-item-text sidebar-link active">
+ <span class="menu-text"><span class="chapter-number">13</span>&nbsp; <span class="chapter-title">sklearn and Gradient Descent</span></span></a>
+  </div>
+</li>
+    </ul>
+    </div>
+</nav>
+<div id="quarto-sidebar-glass" class="quarto-sidebar-collapse-item" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item"></div>
+<!-- margin-sidebar -->
+    <div id="quarto-margin-sidebar" class="sidebar margin-sidebar">
+        <nav id="TOC" role="doc-toc" class="toc-active">
+    <h2 id="toc-title">sklearn and Gradient Descent</h2>
+   
+  <ul>
+  <li><a href="#sklearn" id="toc-sklearn" class="nav-link active" data-scroll-target="#sklearn"><span class="header-section-number">13.1</span> <code>sklearn</code></a>
+  <ul>
+  <li><a href="#implementing-derived-formulas-in-code" id="toc-implementing-derived-formulas-in-code" class="nav-link" data-scroll-target="#implementing-derived-formulas-in-code"><span class="header-section-number">13.1.1</span> Implementing Derived Formulas in Code</a></li>
+  <li><a href="#the-sklearn-workflow" id="toc-the-sklearn-workflow" class="nav-link" data-scroll-target="#the-sklearn-workflow"><span class="header-section-number">13.1.2</span> The <code>sklearn</code> Workflow</a></li>
+  </ul></li>
+  <li><a href="#gradient-descent" id="toc-gradient-descent" class="nav-link" data-scroll-target="#gradient-descent"><span class="header-section-number">13.2</span> Gradient Descent</a>
+  <ul>
+  <li><a href="#minimizing-an-arbitrary-1d-function" id="toc-minimizing-an-arbitrary-1d-function" class="nav-link" data-scroll-target="#minimizing-an-arbitrary-1d-function"><span class="header-section-number">13.2.1</span> Minimizing an Arbitrary 1D Function</a>
+  <ul>
+  <li><a href="#the-naive-approach-guess-and-check" id="toc-the-naive-approach-guess-and-check" class="nav-link" data-scroll-target="#the-naive-approach-guess-and-check"><span class="header-section-number">13.2.1.1</span> The Naive Approach: Guess and Check</a></li>
+  <li><a href="#scipy.optimize.minimize" id="toc-scipy.optimize.minimize" class="nav-link" data-scroll-target="#scipy.optimize.minimize"><span class="header-section-number">13.2.1.2</span> <code>Scipy.optimize.minimize</code></a></li>
+  <li><a href="#digging-into-gradient-descent" id="toc-digging-into-gradient-descent" class="nav-link" data-scroll-target="#digging-into-gradient-descent"><span class="header-section-number">13.2.1.3</span> Digging into Gradient Descent</a></li>
+  <li><a href="#algorithm-attempt-1" id="toc-algorithm-attempt-1" class="nav-link" data-scroll-target="#algorithm-attempt-1"><span class="header-section-number">13.2.1.4</span> Algorithm Attempt 1</a></li>
+  <li><a href="#algorithm-attempt-2" id="toc-algorithm-attempt-2" class="nav-link" data-scroll-target="#algorithm-attempt-2"><span class="header-section-number">13.2.1.5</span> Algorithm Attempt 2</a></li>
+  </ul></li>
+  <li><a href="#convexity" id="toc-convexity" class="nav-link" data-scroll-target="#convexity"><span class="header-section-number">13.2.2</span> Convexity</a></li>
+  <li><a href="#gradient-descent-in-1-dimension" id="toc-gradient-descent-in-1-dimension" class="nav-link" data-scroll-target="#gradient-descent-in-1-dimension"><span class="header-section-number">13.2.3</span> Gradient Descent in 1 Dimension</a>
+  <ul>
+  <li><a href="#gradient-descent-on-the-tips-dataset" id="toc-gradient-descent-on-the-tips-dataset" class="nav-link" data-scroll-target="#gradient-descent-on-the-tips-dataset"><span class="header-section-number">13.2.3.1</span> Gradient Descent on the <code>tips</code> Dataset</a></li>
+  </ul></li>
+  </ul></li>
+  </ul>
+</nav>
+    </div>
+<!-- main -->
+<main class="content column-body" id="quarto-document-content">
+
+<header id="title-block-header" class="quarto-title-block default">
+<div class="quarto-title">
+<div class="quarto-title-block"><div><h1 class="title"><span class="chapter-number">13</span>&nbsp; <span class="chapter-title">sklearn and Gradient Descent</span></h1><button type="button" class="btn code-tools-button dropdown-toggle" id="quarto-code-tools-menu" data-bs-toggle="dropdown" aria-expanded="false"><i class="bi"></i> Code</button><ul class="dropdown-menu dropdown-menu-end" aria-labelelledby="quarto-code-tools-menu"><li><a id="quarto-show-all-code" class="dropdown-item" href="javascript:void(0)" role="button">Show All Code</a></li><li><a id="quarto-hide-all-code" class="dropdown-item" href="javascript:void(0)" role="button">Hide All Code</a></li><li><hr class="dropdown-divider"></li><li><a id="quarto-view-source" class="dropdown-item" href="javascript:void(0)" role="button">View Source</a></li></ul></div></div>
+</div>
+
+
+
+<div class="quarto-title-meta column-body">
+
+    
+  
+    
+  </div>
+  
+
+
+</header>
+
+
+<div class="callout callout-style-default callout-note no-icon callout-titled">
+<div class="callout-header d-flex align-content-center" data-bs-toggle="collapse" data-bs-target=".callout-1-contents" aria-controls="callout-1" aria-expanded="true" aria-label="Toggle callout">
+<div class="callout-icon-container">
+<i class="callout-icon no-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Learning Outcomes
+</div>
+<div class="callout-btn-toggle d-inline-block border-0 py-1 ps-1 pe-0 float-end"><i class="callout-toggle"></i></div>
+</div>
+<div id="callout-1" class="callout-1-contents callout-collapse collapse show">
+<div class="callout-body-container callout-body">
+<ul>
+<li>Apply the <code>sklearn</code> library for model creation and training</li>
+<li>Optimizing complex models</li>
+<li>Identifying cases where straight calculus or geometric arguments won’t help solve the loss function</li>
+<li>Applying gradient descent for numerical optimization</li>
+</ul>
+</div>
+</div>
+</div>
+<div id="2c9cdb0c" class="cell" data-execution_count="1">
+<details class="code-fold">
+<summary>Code</summary>
+<div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span>
+<span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> seaborn <span class="im">as</span> sns</span>
+<span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> plotly.express <span class="im">as</span> px</span>
+<span id="cb1-4"><a href="#cb1-4" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> matplotlib.pyplot <span class="im">as</span> plt</span>
+<span id="cb1-5"><a href="#cb1-5" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
+<span id="cb1-6"><a href="#cb1-6" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> sklearn.linear_model <span class="im">import</span> LinearRegression</span>
+<span id="cb1-7"><a href="#cb1-7" aria-hidden="true" tabindex="-1"></a>pd.options.mode.chained_assignment <span class="op">=</span> <span class="va">None</span>  <span class="co"># default='warn'</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</details>
+</div>
+<section id="sklearn" class="level2" data-number="13.1">
+<h2 data-number="13.1" class="anchored" data-anchor-id="sklearn"><span class="header-section-number">13.1</span> <code>sklearn</code></h2>
+<section id="implementing-derived-formulas-in-code" class="level3" data-number="13.1.1">
+<h3 data-number="13.1.1" class="anchored" data-anchor-id="implementing-derived-formulas-in-code"><span class="header-section-number">13.1.1</span> Implementing Derived Formulas in Code</h3>
+<p>Throughout this lecture, we’ll refer to the <code>penguins</code> dataset.</p>
+<div id="735ddcbd" class="cell" data-execution_count="2">
+<details class="code-fold">
+<summary>Code</summary>
+<div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span>
+<span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> seaborn <span class="im">as</span> sns</span>
+<span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
+<span id="cb2-4"><a href="#cb2-4" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb2-5"><a href="#cb2-5" aria-hidden="true" tabindex="-1"></a>penguins <span class="op">=</span> sns.load_dataset(<span class="st">"penguins"</span>)</span>
+<span id="cb2-6"><a href="#cb2-6" aria-hidden="true" tabindex="-1"></a>penguins <span class="op">=</span> penguins[penguins[<span class="st">"species"</span>] <span class="op">==</span> <span class="st">"Adelie"</span>].dropna()</span>
+<span id="cb2-7"><a href="#cb2-7" aria-hidden="true" tabindex="-1"></a>penguins.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</details>
+<div class="cell-output cell-output-display" data-execution_count="2">
+<div>
+
+
+<table class="dataframe table table-sm table-striped small" data-quarto-postprocess="true" data-border="1">
+<thead>
+<tr class="header">
+<th data-quarto-table-cell-role="th"></th>
+<th data-quarto-table-cell-role="th">species</th>
+<th data-quarto-table-cell-role="th">island</th>
+<th data-quarto-table-cell-role="th">bill_length_mm</th>
+<th data-quarto-table-cell-role="th">bill_depth_mm</th>
+<th data-quarto-table-cell-role="th">flipper_length_mm</th>
+<th data-quarto-table-cell-role="th">body_mass_g</th>
+<th data-quarto-table-cell-role="th">sex</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">0</td>
+<td>Adelie</td>
+<td>Torgersen</td>
+<td>39.1</td>
+<td>18.7</td>
+<td>181.0</td>
+<td>3750.0</td>
+<td>Male</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">1</td>
+<td>Adelie</td>
+<td>Torgersen</td>
+<td>39.5</td>
+<td>17.4</td>
+<td>186.0</td>
+<td>3800.0</td>
+<td>Female</td>
+</tr>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">2</td>
+<td>Adelie</td>
+<td>Torgersen</td>
+<td>40.3</td>
+<td>18.0</td>
+<td>195.0</td>
+<td>3250.0</td>
+<td>Female</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">4</td>
+<td>Adelie</td>
+<td>Torgersen</td>
+<td>36.7</td>
+<td>19.3</td>
+<td>193.0</td>
+<td>3450.0</td>
+<td>Female</td>
+</tr>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">5</td>
+<td>Adelie</td>
+<td>Torgersen</td>
+<td>39.3</td>
+<td>20.6</td>
+<td>190.0</td>
+<td>3650.0</td>
+<td>Male</td>
+</tr>
+</tbody>
+</table>
+
+</div>
+</div>
+</div>
+<p>Our goal will be to predict the value of the <code>"bill_depth_mm"</code> for a particular penguin given its <code>"flipper_length_mm"</code> and <code>"body_mass_g"</code>. We’ll also add a bias column of all ones to represent the intercept term of our models.</p>
+<div id="35ab7b0d" class="cell" data-execution_count="3">
+<div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Add a bias column of all ones to `penguins`</span></span>
+<span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a>penguins[<span class="st">"bias"</span>] <span class="op">=</span> np.ones(<span class="bu">len</span>(penguins), dtype<span class="op">=</span><span class="bu">int</span>) </span>
+<span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb3-4"><a href="#cb3-4" aria-hidden="true" tabindex="-1"></a><span class="co"># Define the design matrix, X...</span></span>
+<span id="cb3-5"><a href="#cb3-5" aria-hidden="true" tabindex="-1"></a><span class="co"># Note that we use .to_numpy() to convert our DataFrame into a NumPy array so it is in Matrix form</span></span>
+<span id="cb3-6"><a href="#cb3-6" aria-hidden="true" tabindex="-1"></a>X <span class="op">=</span> penguins[[<span class="st">"bias"</span>, <span class="st">"flipper_length_mm"</span>, <span class="st">"body_mass_g"</span>]].to_numpy()</span>
+<span id="cb3-7"><a href="#cb3-7" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb3-8"><a href="#cb3-8" aria-hidden="true" tabindex="-1"></a><span class="co"># ...as well as the target variable, Y</span></span>
+<span id="cb3-9"><a href="#cb3-9" aria-hidden="true" tabindex="-1"></a><span class="co"># Again, we use .to_numpy() to convert our DataFrame into a NumPy array so it is in Matrix form</span></span>
+<span id="cb3-10"><a href="#cb3-10" aria-hidden="true" tabindex="-1"></a>Y <span class="op">=</span> penguins[[<span class="st">"bill_depth_mm"</span>]].to_numpy()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+<p>In the lecture on ordinary least squares, we expressed multiple linear regression using matrix notation.</p>
+<p><span class="math display">\[\hat{\mathbb{Y}} = \mathbb{X}\theta\]</span></p>
+<p>We used a geometric approach to derive the following expression for the optimal model parameters:</p>
+<p><span class="math display">\[\hat{\theta} = (\mathbb{X}^T \mathbb{X})^{-1}\mathbb{X}^T \mathbb{Y}\]</span></p>
+<p>That’s a whole lot of matrix manipulation. How do we implement it in <code>python</code>?</p>
+<p>There are three operations we need to perform here: multiplying matrices, taking transposes, and finding inverses.</p>
+<ul>
+<li>To perform matrix multiplication, use the <code>@</code> operator</li>
+<li>To take a transpose, call the <code>.T</code> attribute of an <code>NumPy</code> array or <code>DataFrame</code></li>
+<li>To compute an inverse, use <code>NumPy</code>’s in-built method <code>np.linalg.inv</code></li>
+</ul>
+<p>Putting this all together, we can compute the OLS estimate for the optimal model parameters, stored in the array <code>theta_hat</code>.</p>
+<div id="81cad8d4" class="cell" data-execution_count="4">
+<div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a>theta_hat <span class="op">=</span> np.linalg.inv(X.T <span class="op">@</span> X) <span class="op">@</span> X.T <span class="op">@</span> Y</span>
+<span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a>theta_hat</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-display" data-execution_count="4">
+<pre><code>array([[1.10029953e+01],
+       [9.82848689e-03],
+       [1.47749591e-03]])</code></pre>
+</div>
+</div>
+<p>To make predictions using our optimized parameter values, we matrix-multiply the design matrix with the parameter vector:</p>
+<p><span class="math display">\[\hat{\mathbb{Y}} = \mathbb{X}\theta\]</span></p>
+<div id="0d4aa0be" class="cell" data-execution_count="5">
+<div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a>Y_hat <span class="op">=</span> X <span class="op">@</span> theta_hat</span>
+<span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a>pd.DataFrame(Y_hat).head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-display" data-execution_count="5">
+<div>
+
+
+<table class="dataframe table table-sm table-striped small" data-quarto-postprocess="true" data-border="1">
+<thead>
+<tr class="header">
+<th data-quarto-table-cell-role="th"></th>
+<th data-quarto-table-cell-role="th">0</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">0</td>
+<td>18.322561</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">1</td>
+<td>18.445578</td>
+</tr>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">2</td>
+<td>17.721412</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">3</td>
+<td>17.997254</td>
+</tr>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">4</td>
+<td>18.263268</td>
+</tr>
+</tbody>
+</table>
+
+</div>
+</div>
+</div>
+</section>
+<section id="the-sklearn-workflow" class="level3" data-number="13.1.2">
+<h3 data-number="13.1.2" class="anchored" data-anchor-id="the-sklearn-workflow"><span class="header-section-number">13.1.2</span> The <code>sklearn</code> Workflow</h3>
+<p>We’ve already saved a lot of time (and avoided tedious calculations) by translating our derived formulas into code. However, we still had to go through the process of writing out the linear algebra ourselves.</p>
+<p>To make life <em>even easier</em>, we can turn to the <code>sklearn</code> <a href="https://scikit-learn.org/stable/"><code>python</code> library</a>. <code>sklearn</code> is a robust library of machine learning tools used extensively in research and industry. It is the standard for simple machine learning tasks and gives us a wide variety of in-built modeling frameworks and methods, so we’ll keep returning to <code>sklearn</code> techniques as we progress through Data 100.</p>
+<p>Regardless of the specific type of model being implemented, <code>sklearn</code> follows a standard set of steps for creating a model:</p>
+<ol type="1">
+<li><p>Import the <code>LinearRegression</code> model from <code>sklearn</code></p>
+<pre><code>from sklearn.linear_model import LinearRegression</code></pre></li>
+<li><p>Create a model object. This generates a new instance of the model class. You can think of it as making a new “copy” of a standard “template” for a model. In code, this looks like:</p>
+<pre><code>my_model = LinearRegression()</code></pre></li>
+<li><p>Fit the model to the <code>X</code> design matrix and <code>Y</code> target vector. This calculates the optimal model parameters “behind the scenes” without us explicitly working through the calculations ourselves. The fitted parameters are then stored within the model for use in future predictions:</p>
+<pre><code>my_model.fit(X, Y)</code></pre></li>
+<li><p>Use the fitted model to make predictions on the <code>X</code> input data using <code>.predict</code>.</p>
+<pre><code>my_model.predict(X)</code></pre></li>
+</ol>
+<p>To extract the fitted parameters, we can use:</p>
+<pre><code>my_model.coef_
+
+my_model.intercept_</code></pre>
+<p>Let’s put this into action with our multiple regression task!</p>
+<p><strong>1. Initialize an instance of the model class</strong></p>
+<p><code>sklearn</code> stores “templates” of useful models for machine learning. We begin the modeling process by making a “copy” of one of these templates for our own use. Model initialization looks like <code>ModelClass()</code>, where <code>ModelClass</code> is the type of model we wish to create.</p>
+<p>For now, let’s create a linear regression model using <code>LinearRegression</code>.</p>
+<p><code>my_model</code> is now an instance of the <code>LinearRegression</code> class. You can think of it as the “idea” of a linear regression model. We haven’t trained it yet, so it doesn’t know any model parameters and cannot be used to make predictions. In fact, we haven’t even told it what data to use for modeling! It simply waits for further instructions.</p>
+<div id="f9e60cb1" class="cell" data-execution_count="6">
+<div class="sourceCode cell-code" id="cb12"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a>my_model <span class="op">=</span> LinearRegression()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+<p><strong>2. Train the model using <code>.fit</code></strong></p>
+<p>Before the model can make predictions, we will need to fit it to our training data. When we fit the model, <code>sklearn</code> will run gradient descent behind the scenes to determine the optimal model parameters. It will then save these model parameters to our model instance for future use.</p>
+<p>All <code>sklearn</code> model classes include a <code>.fit</code> method, which is used to fit the model. It takes in two inputs: the design matrix, <code>X</code>, and the target variable, <code>Y</code>.</p>
+<p>Let’s start by fitting a model with just one feature: the flipper length. We create a design matrix <code>X</code> by pulling out the <code>"flipper_length_mm"</code> column from the <code>DataFrame</code>.</p>
+<div id="9dc7fa4c" class="cell" data-execution_count="7">
+<div class="sourceCode cell-code" id="cb13"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a><span class="co"># .fit expects a 2D data design matrix, so we use double brackets to extract a DataFrame</span></span>
+<span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a>X <span class="op">=</span> penguins[[<span class="st">"flipper_length_mm"</span>]]</span>
+<span id="cb13-3"><a href="#cb13-3" aria-hidden="true" tabindex="-1"></a>Y <span class="op">=</span> penguins[<span class="st">"bill_depth_mm"</span>]</span>
+<span id="cb13-4"><a href="#cb13-4" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb13-5"><a href="#cb13-5" aria-hidden="true" tabindex="-1"></a>my_model.fit(X, Y)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-display" data-execution_count="7">
+<style>#sk-container-id-1 {
+  /* Definition of color scheme common for light and dark mode */
+  --sklearn-color-text: black;
+  --sklearn-color-line: gray;
+  /* Definition of color scheme for unfitted estimators */
+  --sklearn-color-unfitted-level-0: #fff5e6;
+  --sklearn-color-unfitted-level-1: #f6e4d2;
+  --sklearn-color-unfitted-level-2: #ffe0b3;
+  --sklearn-color-unfitted-level-3: chocolate;
+  /* Definition of color scheme for fitted estimators */
+  --sklearn-color-fitted-level-0: #f0f8ff;
+  --sklearn-color-fitted-level-1: #d4ebff;
+  --sklearn-color-fitted-level-2: #b3dbfd;
+  --sklearn-color-fitted-level-3: cornflowerblue;
+
+  /* Specific color for light theme */
+  --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));
+  --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));
+  --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));
+  --sklearn-color-icon: #696969;
+
+  @media (prefers-color-scheme: dark) {
+    /* Redefinition of color scheme for dark theme */
+    --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));
+    --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));
+    --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));
+    --sklearn-color-icon: #878787;
+  }
+}
+
+#sk-container-id-1 {
+  color: var(--sklearn-color-text);
+}
+
+#sk-container-id-1 pre {
+  padding: 0;
+}
+
+#sk-container-id-1 input.sk-hidden--visually {
+  border: 0;
+  clip: rect(1px 1px 1px 1px);
+  clip: rect(1px, 1px, 1px, 1px);
+  height: 1px;
+  margin: -1px;
+  overflow: hidden;
+  padding: 0;
+  position: absolute;
+  width: 1px;
+}
+
+#sk-container-id-1 div.sk-dashed-wrapped {
+  border: 1px dashed var(--sklearn-color-line);
+  margin: 0 0.4em 0.5em 0.4em;
+  box-sizing: border-box;
+  padding-bottom: 0.4em;
+  background-color: var(--sklearn-color-background);
+}
+
+#sk-container-id-1 div.sk-container {
+  /* jupyter's `normalize.less` sets `[hidden] { display: none; }`
+     but bootstrap.min.css set `[hidden] { display: none !important; }`
+     so we also need the `!important` here to be able to override the
+     default hidden behavior on the sphinx rendered scikit-learn.org.
+     See: https://github.com/scikit-learn/scikit-learn/issues/21755 */
+  display: inline-block !important;
+  position: relative;
+}
+
+#sk-container-id-1 div.sk-text-repr-fallback {
+  display: none;
+}
+
+div.sk-parallel-item,
+div.sk-serial,
+div.sk-item {
+  /* draw centered vertical line to link estimators */
+  background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));
+  background-size: 2px 100%;
+  background-repeat: no-repeat;
+  background-position: center center;
+}
+
+/* Parallel-specific style estimator block */
+
+#sk-container-id-1 div.sk-parallel-item::after {
+  content: "";
+  width: 100%;
+  border-bottom: 2px solid var(--sklearn-color-text-on-default-background);
+  flex-grow: 1;
+}
+
+#sk-container-id-1 div.sk-parallel {
+  display: flex;
+  align-items: stretch;
+  justify-content: center;
+  background-color: var(--sklearn-color-background);
+  position: relative;
+}
+
+#sk-container-id-1 div.sk-parallel-item {
+  display: flex;
+  flex-direction: column;
+}
+
+#sk-container-id-1 div.sk-parallel-item:first-child::after {
+  align-self: flex-end;
+  width: 50%;
+}
+
+#sk-container-id-1 div.sk-parallel-item:last-child::after {
+  align-self: flex-start;
+  width: 50%;
+}
+
+#sk-container-id-1 div.sk-parallel-item:only-child::after {
+  width: 0;
+}
+
+/* Serial-specific style estimator block */
+
+#sk-container-id-1 div.sk-serial {
+  display: flex;
+  flex-direction: column;
+  align-items: center;
+  background-color: var(--sklearn-color-background);
+  padding-right: 1em;
+  padding-left: 1em;
+}
+
+
+/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is
+clickable and can be expanded/collapsed.
+- Pipeline and ColumnTransformer use this feature and define the default style
+- Estimators will overwrite some part of the style using the `sk-estimator` class
+*/
+
+/* Pipeline and ColumnTransformer style (default) */
+
+#sk-container-id-1 div.sk-toggleable {
+  /* Default theme specific background. It is overwritten whether we have a
+  specific estimator or a Pipeline/ColumnTransformer */
+  background-color: var(--sklearn-color-background);
+}
+
+/* Toggleable label */
+#sk-container-id-1 label.sk-toggleable__label {
+  cursor: pointer;
+  display: block;
+  width: 100%;
+  margin-bottom: 0;
+  padding: 0.5em;
+  box-sizing: border-box;
+  text-align: center;
+}
+
+#sk-container-id-1 label.sk-toggleable__label-arrow:before {
+  /* Arrow on the left of the label */
+  content: "▸";
+  float: left;
+  margin-right: 0.25em;
+  color: var(--sklearn-color-icon);
+}
+
+#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {
+  color: var(--sklearn-color-text);
+}
+
+/* Toggleable content - dropdown */
+
+#sk-container-id-1 div.sk-toggleable__content {
+  max-height: 0;
+  max-width: 0;
+  overflow: hidden;
+  text-align: left;
+  /* unfitted */
+  background-color: var(--sklearn-color-unfitted-level-0);
+}
+
+#sk-container-id-1 div.sk-toggleable__content.fitted {
+  /* fitted */
+  background-color: var(--sklearn-color-fitted-level-0);
+}
+
+#sk-container-id-1 div.sk-toggleable__content pre {
+  margin: 0.2em;
+  border-radius: 0.25em;
+  color: var(--sklearn-color-text);
+  /* unfitted */
+  background-color: var(--sklearn-color-unfitted-level-0);
+}
+
+#sk-container-id-1 div.sk-toggleable__content.fitted pre {
+  /* unfitted */
+  background-color: var(--sklearn-color-fitted-level-0);
+}
+
+#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {
+  /* Expand drop-down */
+  max-height: 200px;
+  max-width: 100%;
+  overflow: auto;
+}
+
+#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {
+  content: "▾";
+}
+
+/* Pipeline/ColumnTransformer-specific style */
+
+#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {
+  color: var(--sklearn-color-text);
+  background-color: var(--sklearn-color-unfitted-level-2);
+}
+
+#sk-container-id-1 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {
+  background-color: var(--sklearn-color-fitted-level-2);
+}
+
+/* Estimator-specific style */
+
+/* Colorize estimator box */
+#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {
+  /* unfitted */
+  background-color: var(--sklearn-color-unfitted-level-2);
+}
+
+#sk-container-id-1 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {
+  /* fitted */
+  background-color: var(--sklearn-color-fitted-level-2);
+}
+
+#sk-container-id-1 div.sk-label label.sk-toggleable__label,
+#sk-container-id-1 div.sk-label label {
+  /* The background is the default theme color */
+  color: var(--sklearn-color-text-on-default-background);
+}
+
+/* On hover, darken the color of the background */
+#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {
+  color: var(--sklearn-color-text);
+  background-color: var(--sklearn-color-unfitted-level-2);
+}
+
+/* Label box, darken color on hover, fitted */
+#sk-container-id-1 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {
+  color: var(--sklearn-color-text);
+  background-color: var(--sklearn-color-fitted-level-2);
+}
+
+/* Estimator label */
+
+#sk-container-id-1 div.sk-label label {
+  font-family: monospace;
+  font-weight: bold;
+  display: inline-block;
+  line-height: 1.2em;
+}
+
+#sk-container-id-1 div.sk-label-container {
+  text-align: center;
+}
+
+/* Estimator-specific */
+#sk-container-id-1 div.sk-estimator {
+  font-family: monospace;
+  border: 1px dotted var(--sklearn-color-border-box);
+  border-radius: 0.25em;
+  box-sizing: border-box;
+  margin-bottom: 0.5em;
+  /* unfitted */
+  background-color: var(--sklearn-color-unfitted-level-0);
+}
+
+#sk-container-id-1 div.sk-estimator.fitted {
+  /* fitted */
+  background-color: var(--sklearn-color-fitted-level-0);
+}
+
+/* on hover */
+#sk-container-id-1 div.sk-estimator:hover {
+  /* unfitted */
+  background-color: var(--sklearn-color-unfitted-level-2);
+}
+
+#sk-container-id-1 div.sk-estimator.fitted:hover {
+  /* fitted */
+  background-color: var(--sklearn-color-fitted-level-2);
+}
+
+/* Specification for estimator info (e.g. "i" and "?") */
+
+/* Common style for "i" and "?" */
+
+.sk-estimator-doc-link,
+a:link.sk-estimator-doc-link,
+a:visited.sk-estimator-doc-link {
+  float: right;
+  font-size: smaller;
+  line-height: 1em;
+  font-family: monospace;
+  background-color: var(--sklearn-color-background);
+  border-radius: 1em;
+  height: 1em;
+  width: 1em;
+  text-decoration: none !important;
+  margin-left: 1ex;
+  /* unfitted */
+  border: var(--sklearn-color-unfitted-level-1) 1pt solid;
+  color: var(--sklearn-color-unfitted-level-1);
+}
+
+.sk-estimator-doc-link.fitted,
+a:link.sk-estimator-doc-link.fitted,
+a:visited.sk-estimator-doc-link.fitted {
+  /* fitted */
+  border: var(--sklearn-color-fitted-level-1) 1pt solid;
+  color: var(--sklearn-color-fitted-level-1);
+}
+
+/* On hover */
+div.sk-estimator:hover .sk-estimator-doc-link:hover,
+.sk-estimator-doc-link:hover,
+div.sk-label-container:hover .sk-estimator-doc-link:hover,
+.sk-estimator-doc-link:hover {
+  /* unfitted */
+  background-color: var(--sklearn-color-unfitted-level-3);
+  color: var(--sklearn-color-background);
+  text-decoration: none;
+}
+
+div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,
+.sk-estimator-doc-link.fitted:hover,
+div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,
+.sk-estimator-doc-link.fitted:hover {
+  /* fitted */
+  background-color: var(--sklearn-color-fitted-level-3);
+  color: var(--sklearn-color-background);
+  text-decoration: none;
+}
+
+/* Span, style for the box shown on hovering the info icon */
+.sk-estimator-doc-link span {
+  display: none;
+  z-index: 9999;
+  position: relative;
+  font-weight: normal;
+  right: .2ex;
+  padding: .5ex;
+  margin: .5ex;
+  width: min-content;
+  min-width: 20ex;
+  max-width: 50ex;
+  color: var(--sklearn-color-text);
+  box-shadow: 2pt 2pt 4pt #999;
+  /* unfitted */
+  background: var(--sklearn-color-unfitted-level-0);
+  border: .5pt solid var(--sklearn-color-unfitted-level-3);
+}
+
+.sk-estimator-doc-link.fitted span {
+  /* fitted */
+  background: var(--sklearn-color-fitted-level-0);
+  border: var(--sklearn-color-fitted-level-3);
+}
+
+.sk-estimator-doc-link:hover span {
+  display: block;
+}
+
+/* "?"-specific style due to the `<a>` HTML tag */
+
+#sk-container-id-1 a.estimator_doc_link {
+  float: right;
+  font-size: 1rem;
+  line-height: 1em;
+  font-family: monospace;
+  background-color: var(--sklearn-color-background);
+  border-radius: 1rem;
+  height: 1rem;
+  width: 1rem;
+  text-decoration: none;
+  /* unfitted */
+  color: var(--sklearn-color-unfitted-level-1);
+  border: var(--sklearn-color-unfitted-level-1) 1pt solid;
+}
+
+#sk-container-id-1 a.estimator_doc_link.fitted {
+  /* fitted */
+  border: var(--sklearn-color-fitted-level-1) 1pt solid;
+  color: var(--sklearn-color-fitted-level-1);
+}
+
+/* On hover */
+#sk-container-id-1 a.estimator_doc_link:hover {
+  /* unfitted */
+  background-color: var(--sklearn-color-unfitted-level-3);
+  color: var(--sklearn-color-background);
+  text-decoration: none;
+}
+
+#sk-container-id-1 a.estimator_doc_link.fitted:hover {
+  /* fitted */
+  background-color: var(--sklearn-color-fitted-level-3);
+}
+</a></style><div id="sk-container-id-1" class="sk-top-container"><div class="sk-text-repr-fallback"><pre>LinearRegression()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br>On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class="sk-container" hidden=""><div class="sk-item"><div class="sk-estimator fitted sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-1" type="checkbox" checked=""><label for="sk-estimator-id-1" class="sk-toggleable__label fitted sk-toggleable__label-arrow fitted">&nbsp;&nbsp;LinearRegression<a class="sk-estimator-doc-link fitted" rel="noreferrer" target="_blank" href="https://scikit-learn.org/1.5/modules/generated/sklearn.linear_model.LinearRegression.html">?<span>Documentation for LinearRegression</span></a><span class="sk-estimator-doc-link fitted">i<span>Fitted</span></span></label><div class="sk-toggleable__content fitted"><pre>LinearRegression()</pre></div> </div></div></div></div>
+</div>
+</div>
+<p>Notice that we use <strong>double brackets</strong> to extract this column. Why double brackets instead of just single brackets? The <code>.fit</code> method, by default, expects to receive <strong>2-dimensional</strong> data – some kind of data that includes both rows and columns. Writing <code>penguins["flipper_length_mm"]</code> would return a 1D <code>Series</code>, causing <code>sklearn</code> to error. We avoid this by writing <code>penguins[["flipper_length_mm"]]</code> to produce a 2D <code>DataFrame</code>.</p>
+<p>And in just three lines of code, our model has run gradient descent to determine the optimal model parameters! Our single-feature model takes the form:</p>
+<p><span class="math display">\[\text{bill depth} = \theta_0 + \theta_1 \text{flipper length}\]</span></p>
+<p>Note that <code>LinearRegression</code> will automatically include an intercept term.</p>
+<p>The fitted model parameters are stored as attributes of the model instance. <code>my_model.intercept_</code> will return the value of <span class="math inline">\(\hat{\theta}_0\)</span> as a scalar. <code>my_model.coef_</code> will return all values <span class="math inline">\(\hat{\theta}_1,
+\hat{\theta}_1, ...\)</span> in an array. Because our model only contains one feature, we see just the value of <span class="math inline">\(\hat{\theta}_1\)</span> in the cell below.</p>
+<div id="cfeb9a39" class="cell" data-execution_count="8">
+<div class="sourceCode cell-code" id="cb14"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a><span class="co"># The intercept term, theta_0</span></span>
+<span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a>my_model.intercept_</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-display" data-execution_count="8">
+<pre><code>np.float64(7.297305899612313)</code></pre>
+</div>
+</div>
+<div id="1f137350" class="cell" data-execution_count="9">
+<div class="sourceCode cell-code" id="cb16"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a><span class="co"># All parameters theta_1, ..., theta_p</span></span>
+<span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a>my_model.coef_</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-display" data-execution_count="9">
+<pre><code>array([0.05812622])</code></pre>
+</div>
+</div>
+<p><strong>3. Use the fitted model to make predictions</strong></p>
+<p>Now that the model has been trained, we can use it to make predictions! To do so, we use the <code>.predict</code> method. <code>.predict</code> takes in one argument: the design matrix that should be used to generate predictions. To understand how the model performs on the training set, we would pass in the training data. Alternatively, to make predictions on unseen data, we would pass in a new dataset that wasn’t used to train the model.</p>
+<p>Below, we call <code>.predict</code> to generate model predictions on the original training data. As before, we use double brackets to ensure that we extract 2-dimensional data.</p>
+<div id="c282c25d" class="cell" data-execution_count="10">
+<div class="sourceCode cell-code" id="cb18"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a>Y_hat_one_feature <span class="op">=</span> my_model.predict(penguins[[<span class="st">"flipper_length_mm"</span>]])</span>
+<span id="cb18-2"><a href="#cb18-2" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb18-3"><a href="#cb18-3" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="ss">f"The RMSE of the model is </span><span class="sc">{</span>np<span class="sc">.</span>sqrt(np.mean((Y<span class="op">-</span>Y_hat_one_feature)<span class="op">**</span><span class="dv">2</span>))<span class="sc">}</span><span class="ss">"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code>The RMSE of the model is 1.154936309923901</code></pre>
+</div>
+</div>
+<p>What if we wanted a model with two features?</p>
+<p><span class="math display">\[\text{bill depth} = \theta_0 + \theta_1 \text{flipper length} + \theta_2 \text{body mass}\]</span></p>
+<p>We repeat this three-step process by intializing a new model object, then calling <code>.fit</code> and <code>.predict</code> as before.</p>
+<div id="381fca16" class="cell" data-execution_count="11">
+<div class="sourceCode cell-code" id="cb20"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Step 1: initialize LinearRegression model</span></span>
+<span id="cb20-2"><a href="#cb20-2" aria-hidden="true" tabindex="-1"></a>two_feature_model <span class="op">=</span> LinearRegression()</span>
+<span id="cb20-3"><a href="#cb20-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb20-4"><a href="#cb20-4" aria-hidden="true" tabindex="-1"></a><span class="co"># Step 2: fit the model</span></span>
+<span id="cb20-5"><a href="#cb20-5" aria-hidden="true" tabindex="-1"></a>X_two_features <span class="op">=</span> penguins[[<span class="st">"flipper_length_mm"</span>, <span class="st">"body_mass_g"</span>]]</span>
+<span id="cb20-6"><a href="#cb20-6" aria-hidden="true" tabindex="-1"></a>Y <span class="op">=</span> penguins[<span class="st">"bill_depth_mm"</span>]</span>
+<span id="cb20-7"><a href="#cb20-7" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb20-8"><a href="#cb20-8" aria-hidden="true" tabindex="-1"></a>two_feature_model.fit(X_two_features, Y)</span>
+<span id="cb20-9"><a href="#cb20-9" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb20-10"><a href="#cb20-10" aria-hidden="true" tabindex="-1"></a><span class="co"># Step 3: make predictions</span></span>
+<span id="cb20-11"><a href="#cb20-11" aria-hidden="true" tabindex="-1"></a>Y_hat_two_features <span class="op">=</span> two_feature_model.predict(X_two_features)</span>
+<span id="cb20-12"><a href="#cb20-12" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb20-13"><a href="#cb20-13" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="ss">f"The RMSE of the model is </span><span class="sc">{</span>np<span class="sc">.</span>sqrt(np.mean((Y<span class="op">-</span>Y_hat_two_features)<span class="op">**</span><span class="dv">2</span>))<span class="sc">}</span><span class="ss">"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code>The RMSE of the model is 0.9881331104079043</code></pre>
+</div>
+</div>
+<p>We can also see that we obtain the same predictions using <code>sklearn</code> as we did when applying the ordinary least squares formula before!</p>
+<div id="547c700f" class="cell" data-execution_count="12">
+<details class="code-fold">
+<summary>Code</summary>
+<div class="sourceCode cell-code" id="cb22"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a>pd.DataFrame({<span class="st">"Y_hat from OLS"</span>:np.squeeze(Y_hat), <span class="st">"Y_hat from sklearn"</span>:Y_hat_two_features}).head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</details>
+<div class="cell-output cell-output-display" data-execution_count="12">
+<div>
+
+
+<table class="dataframe table table-sm table-striped small" data-quarto-postprocess="true" data-border="1">
+<thead>
+<tr class="header">
+<th data-quarto-table-cell-role="th"></th>
+<th data-quarto-table-cell-role="th">Y_hat from OLS</th>
+<th data-quarto-table-cell-role="th">Y_hat from sklearn</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">0</td>
+<td>18.322561</td>
+<td>18.322561</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">1</td>
+<td>18.445578</td>
+<td>18.445578</td>
+</tr>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">2</td>
+<td>17.721412</td>
+<td>17.721412</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">3</td>
+<td>17.997254</td>
+<td>17.997254</td>
+</tr>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">4</td>
+<td>18.263268</td>
+<td>18.263268</td>
+</tr>
+</tbody>
+</table>
+
+</div>
+</div>
+</div>
+</section>
+</section>
+<section id="gradient-descent" class="level2" data-number="13.2">
+<h2 data-number="13.2" class="anchored" data-anchor-id="gradient-descent"><span class="header-section-number">13.2</span> Gradient Descent</h2>
+<p>At this point, we’ve grown quite familiar with the process of choosing a model and a corresponding loss function and optimizing parameters by choosing the values of <span class="math inline">\(\theta\)</span> that minimize the loss function. So far, we’ve optimized <span class="math inline">\(\theta\)</span> by</p>
+<ol type="1">
+<li>Using calculus to take the derivative of the loss function with respect to <span class="math inline">\(\theta\)</span>, setting it equal to 0, and solving for <span class="math inline">\(\theta\)</span>.</li>
+<li>Using the geometric argument of orthogonality to derive the OLS solution <span class="math inline">\(\hat{\theta} = (\mathbb{X}^T \mathbb{X})^{-1}\mathbb{X}^T \mathbb{Y}\)</span>.</li>
+</ol>
+<p>One thing to note, however, is that the techniques we used above can only be applied if we make some big assumptions. For the calculus approach, we assumed that the loss function was differentiable at all points and that we could algebraically solve for the zero points of the derivative; for the geometric approach, OLS <em>only</em> applies when using a linear model with MSE loss. What happens when we have more complex models with different, more complex loss functions? The techniques we’ve learned so far will not work, so we need a new optimization technique: <strong>gradient descent</strong>.</p>
+<blockquote class="blockquote">
+<p><strong>BIG IDEA</strong>: use an iterative algorithm to numerically compute the minimum of the loss.</p>
+</blockquote>
+<section id="minimizing-an-arbitrary-1d-function" class="level3" data-number="13.2.1">
+<h3 data-number="13.2.1" class="anchored" data-anchor-id="minimizing-an-arbitrary-1d-function"><span class="header-section-number">13.2.1</span> Minimizing an Arbitrary 1D Function</h3>
+<p>Let’s consider an arbitrary function. Our goal is to find the value of <span class="math inline">\(x\)</span> that minimizes this function.</p>
+<div id="0dd61e3b" class="cell" data-execution_count="13">
+<div class="sourceCode cell-code" id="cb23"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> arbitrary(x):</span>
+<span id="cb23-2"><a href="#cb23-2" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> (x<span class="op">**</span><span class="dv">4</span> <span class="op">-</span> <span class="dv">15</span><span class="op">*</span>x<span class="op">**</span><span class="dv">3</span> <span class="op">+</span> <span class="dv">80</span><span class="op">*</span>x<span class="op">**</span><span class="dv">2</span> <span class="op">-</span> <span class="dv">180</span><span class="op">*</span>x <span class="op">+</span> <span class="dv">144</span>)<span class="op">/</span><span class="dv">10</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+<p><img src="images/arbitrary.png" alt="arbitrary" width="600"></p>
+<section id="the-naive-approach-guess-and-check" class="level4" data-number="13.2.1.1">
+<h4 data-number="13.2.1.1" class="anchored" data-anchor-id="the-naive-approach-guess-and-check"><span class="header-section-number">13.2.1.1</span> The Naive Approach: Guess and Check</h4>
+<p>Above, we saw that the minimum is somewhere around 5.3. Let’s see if we can figure out how to find the exact minimum algorithmically from scratch. One very slow (and terrible) way would be manual guess-and-check.</p>
+<div id="73c25ef1" class="cell" data-execution_count="14">
+<div class="sourceCode cell-code" id="cb24"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a>arbitrary(<span class="dv">6</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-display" data-execution_count="14">
+<pre><code>0.0</code></pre>
+</div>
+</div>
+<p>A somewhat better (but still slow) approach is to use brute force to try out a bunch of x values and return the one that yields the lowest loss.</p>
+<div id="ab95b43b" class="cell" data-execution_count="15">
+<div class="sourceCode cell-code" id="cb26"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb26-1"><a href="#cb26-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> simple_minimize(f, xs):</span>
+<span id="cb26-2"><a href="#cb26-2" aria-hidden="true" tabindex="-1"></a>    <span class="co"># Takes in a function f and a set of values xs. </span></span>
+<span id="cb26-3"><a href="#cb26-3" aria-hidden="true" tabindex="-1"></a>    <span class="co"># Calculates the value of the function f at all values x in xs</span></span>
+<span id="cb26-4"><a href="#cb26-4" aria-hidden="true" tabindex="-1"></a>    <span class="co"># Takes the minimum value of f(x) and returns the corresponding value x </span></span>
+<span id="cb26-5"><a href="#cb26-5" aria-hidden="true" tabindex="-1"></a>    y <span class="op">=</span> [f(x) <span class="cf">for</span> x <span class="kw">in</span> xs]  </span>
+<span id="cb26-6"><a href="#cb26-6" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> xs[np.argmin(y)]</span>
+<span id="cb26-7"><a href="#cb26-7" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb26-8"><a href="#cb26-8" aria-hidden="true" tabindex="-1"></a>guesses <span class="op">=</span> [<span class="fl">5.3</span>, <span class="fl">5.31</span>, <span class="fl">5.32</span>, <span class="fl">5.33</span>, <span class="fl">5.34</span>, <span class="fl">5.35</span>]</span>
+<span id="cb26-9"><a href="#cb26-9" aria-hidden="true" tabindex="-1"></a>simple_minimize(arbitrary, guesses)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-display" data-execution_count="15">
+<pre><code>5.33</code></pre>
+</div>
+</div>
+<p>This process is essentially the same as before where we made a graphical plot, it’s just that we’re only looking at 20 selected points.</p>
+<div id="cccec774" class="cell" data-execution_count="16">
+<details class="code-fold">
+<summary>Code</summary>
+<div class="sourceCode cell-code" id="cb28"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb28-1"><a href="#cb28-1" aria-hidden="true" tabindex="-1"></a>xs <span class="op">=</span> np.linspace(<span class="dv">1</span>, <span class="dv">7</span>, <span class="dv">200</span>)</span>
+<span id="cb28-2"><a href="#cb28-2" aria-hidden="true" tabindex="-1"></a>sparse_xs <span class="op">=</span> np.linspace(<span class="dv">1</span>, <span class="dv">7</span>, <span class="dv">5</span>)</span>
+<span id="cb28-3"><a href="#cb28-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb28-4"><a href="#cb28-4" aria-hidden="true" tabindex="-1"></a>ys <span class="op">=</span> arbitrary(xs)</span>
+<span id="cb28-5"><a href="#cb28-5" aria-hidden="true" tabindex="-1"></a>sparse_ys <span class="op">=</span> arbitrary(sparse_xs)</span>
+<span id="cb28-6"><a href="#cb28-6" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb28-7"><a href="#cb28-7" aria-hidden="true" tabindex="-1"></a>fig <span class="op">=</span> px.line(x <span class="op">=</span> xs, y <span class="op">=</span> arbitrary(xs))</span>
+<span id="cb28-8"><a href="#cb28-8" aria-hidden="true" tabindex="-1"></a>fig.add_scatter(x <span class="op">=</span> sparse_xs, y <span class="op">=</span> arbitrary(sparse_xs), mode <span class="op">=</span> <span class="st">"markers"</span>)</span>
+<span id="cb28-9"><a href="#cb28-9" aria-hidden="true" tabindex="-1"></a>fig.update_layout(showlegend<span class="op">=</span> <span class="va">False</span>)</span>
+<span id="cb28-10"><a href="#cb28-10" aria-hidden="true" tabindex="-1"></a>fig.update_layout(autosize<span class="op">=</span><span class="va">False</span>, width<span class="op">=</span><span class="dv">800</span>, height<span class="op">=</span><span class="dv">600</span>)</span>
+<span id="cb28-11"><a href="#cb28-11" aria-hidden="true" tabindex="-1"></a>fig.show()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</details>
+<div class="cell-output cell-output-display">
+<div>                            <div id="7eef0490-be30-43c5-ac12-b1892fd80b32" class="plotly-graph-div" style="height:600px; width:800px;"></div>            <script type="text/javascript">                require(["plotly"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("7eef0490-be30-43c5-ac12-b1892fd80b32")) {                    Plotly.newPlot(                        "7eef0490-be30-43c5-ac12-b1892fd80b32",                        [{"hovertemplate":"x=%{x}<br>y=%{y}<extra></extra>","legendgroup":"","line":{"color":"#636efa","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"","orientation":"v","showlegend":false,"x":[1.0,1.0301507537688441,1.0603015075376885,1.0904522613065326,1.120603015075377,1.150753768844221,1.1809045226130652,1.2110552763819096,1.2412060301507537,1.271356783919598,1.3015075376884422,1.3316582914572863,1.3618090452261307,1.3919597989949748,1.4221105527638191,1.4522613065326633,1.4824120603015074,1.5125628140703518,1.542713567839196,1.5728643216080402,1.6030150753768844,1.6331658291457285,1.6633165829145728,1.6934673366834172,1.7236180904522613,1.7537688442211055,1.7839195979899496,1.814070351758794,1.8442211055276383,1.8743718592964824,1.9045226130653266,1.9346733668341707,1.964824120603015,1.9949748743718594,2.0251256281407035,2.0552763819095476,2.085427135678392,2.115577889447236,2.1457286432160805,2.1758793969849246,2.2060301507537687,2.2361809045226133,2.266331658291457,2.2964824120603016,2.3266331658291457,2.35678391959799,2.3869346733668344,2.417085427135678,2.4472361809045227,2.477386934673367,2.507537688442211,2.5376884422110555,2.567839195979899,2.5979899497487438,2.628140703517588,2.658291457286432,2.6884422110552766,2.7185929648241203,2.748743718592965,2.778894472361809,2.809045226130653,2.8391959798994977,2.8693467336683414,2.899497487437186,2.92964824120603,2.959798994974874,2.9899497487437188,3.020100502512563,3.050251256281407,3.080402010050251,3.1105527638190953,3.1407035175879394,3.170854271356784,3.201005025125628,3.2311557788944723,3.2613065326633164,3.2914572864321605,3.321608040201005,3.351758793969849,3.3819095477386933,3.4120603015075375,3.4422110552763816,3.472361809045226,3.5025125628140703,3.5326633165829144,3.5628140703517586,3.5929648241206027,3.6231155778894473,3.6532663316582914,3.6834170854271355,3.7135678391959797,3.743718592964824,3.7738693467336684,3.8040201005025125,3.8341708542713566,3.8643216080402008,3.8944723618090453,3.9246231155778895,3.9547738693467336,3.9849246231155777,4.015075376884422,4.045226130653266,4.075376884422111,4.105527638190955,4.135678391959798,4.165829145728643,4.1959798994974875,4.226130653266331,4.256281407035176,4.28643216080402,4.316582914572864,4.346733668341709,4.376884422110553,4.407035175879397,4.4371859296482405,4.467336683417085,4.49748743718593,4.527638190954773,4.557788944723618,4.5879396984924625,4.618090452261306,4.648241206030151,4.678391959798995,4.708542713567839,4.738693467336683,4.768844221105527,4.798994974874372,4.829145728643216,4.85929648241206,4.889447236180905,4.919597989949748,4.949748743718593,4.9798994974874375,5.010050251256281,5.040201005025126,5.0703517587939695,5.100502512562814,5.130653266331658,5.160804020100502,5.190954773869347,5.221105527638191,5.251256281407035,5.281407035175879,5.311557788944723,5.341708542713568,5.371859296482412,5.402010050251256,5.4321608040201,5.4623115577889445,5.492462311557789,5.522613065326633,5.552763819095477,5.582914572864321,5.613065326633166,5.64321608040201,5.673366834170854,5.703517587939698,5.733668341708542,5.763819095477387,5.793969849246231,5.824120603015075,5.8542713567839195,5.884422110552763,5.914572864321608,5.944723618090452,5.974874371859296,6.005025125628141,6.035175879396984,6.065326633165829,6.0954773869346734,6.125628140703517,6.155778894472362,6.185929648241205,6.21608040201005,6.2462311557788945,6.276381909547738,6.306532663316583,6.3366834170854265,6.366834170854271,6.396984924623116,6.427135678391959,6.457286432160804,6.487437185929648,6.517587939698492,6.547738693467337,6.57788944723618,6.608040201005025,6.638190954773869,6.668341708542713,6.698492462311558,6.7286432160804015,6.758793969849246,6.788944723618091,6.819095477386934,6.849246231155779,6.879396984924623,6.909547738693467,6.939698492462312,6.969849246231155,7.0],"xaxis":"x","y":[3.0,2.8197775132646994,2.6468296407545298,2.480978457571409,2.3220480221881674,2.169864376448527,2.0242555455671196,1.8850515381294826,1.7520843460920474,1.6251879447821538,1.5041982928980473,1.3889533325088705,1.2792929890546703,1.175059171346399,1.076095771565909,0.9822486652659563,0.8933657113701969,0.809296752173205,0.7298936133404282,0.6550101039082478,0.5845020162839318,0.5182271262456482,0.45604519294247436,0.39781795889439875,0.34340914999228855,0.2926844754979413,0.24551162804404497,0.20176028363418083,0.1613021016428462,0.1240107248154402,0.08976177926825812,0.05843287448851129,0.029903603334304307,0.0040555420346265695,-0.019227749810596606,-0.04006072923056081,-0.0585558698835257,-0.07482366205689459,-0.08897261266714337,-0.10110924525984047,-0.11133810000965809,-0.119761733720361,-0.12648071982483203,-0.1315936483850237,-0.13519712609199247,-0.13738577626590426,-0.1382522388560119,-0.13788717044065493,-0.13637924422729383,-0.13381515005247593,-0.13027959438184666,-0.1258553003101497,-0.12062300756120407,-0.11466147248795551,-0.10804746807244214,-0.10085578392577758,-0.09315922628821909,-0.08502861802905386,-0.0765327986467014,-0.06773862426869641,-0.05871096765167181,-0.04951271818131318,-0.04020478187242702,-0.030846081368940757,-0.021493555943828825,-0.012202161499220664,-0.0030248705662870635,0.005987327694657552,0.014785427494228998,0.023322406413939234,0.03155322540619636,0.039434828794344415,0.046926144272549666,0.053988082905993905,0.06058353913065275,0.06667739075349459,0.0722364989522987,0.0772297082758655,0.08162784664377228,0.0854037253466231,0.0885321390457932,0.09098986577366759,0.09275566693348196,0.0938102872993909,0.09413645501645647,0.09371888160064827,0.09254426193878089,0.09060127428864462,0.08788058027890884,0.08437482490911634,0.08007863654980837,0.07498862694224044,0.06910339119879154,0.062423507802634504,0.05495153860777009,0.04669202883931121,0.037651507093005424,0.027838485335746555,0.01726345890522225,0.005938906509970821,-0.006120709770470967,-0.018898944485715673,-0.03237736881442288,-0.04653557056435602,-0.061351154172564294,-0.07679974070485969,-0.0928549678563968,-0.10948848995135449,-0.12666997794297002,-0.14436711941368685,-0.16254561857489308,-0.1811691962672853,-0.2001995899604026,-0.21959655375303555,-0.23931785837306735,-0.25931929117742814,-0.2795546561521974,-0.2999757739125698,-0.32053248170270765,-0.34117263339597914,-0.3618420994948906,-0.38248476713084756,-0.40304254006464363,-0.4234553386859261,-0.4436611000135485,-0.46359577769538873,-0.483193342008542,-0.5023857798591053,-0.5211030947822792,-0.5392733069424139,-0.5568224531329065,-0.5736745867762579,-0.589751777924107,-0.6049741132570944,-0.6192596960850778,-0.632524646347008,-0.6446831006107573,-0.6556472120734724,-0.6653271505614157,-0.6736311025297482,-0.6804652710629284,-0.6857338758744731,-0.689339153306878,-0.6911813563318902,-0.6911587545502471,-0.6891676341918014,-0.6851022981155552,-0.6788550658095346,-0.6703162733909721,-0.6593742736060904,-0.6459154358302271,-0.6298241460678924,-0.6109828069525293,-0.5892718377469237,-0.5645696743426925,-0.5367527692607041,-0.5056955916510105,-0.47127062729252883,-0.4333483785933822,-0.39179736459091147,-0.3464841209514134,-0.2972731999702091,-0.24402717057191695,-0.18660661831017933,-0.12487014536754941,-0.05867437055599112,0.01212607068350735,0.08767852628116088,0.1681323275376599,0.2536387891250115,0.34435120908576666,0.4404248688334974,0.5420170331528652,0.6492869501990981,0.7623958514986725,0.8815069519487224,1.0067854498173574,1.13839852674364,1.2765153477374043,1.421307061179641,1.572946798821863,1.7316096757870127,1.897472790568304,2.070715225030267,2.251518044408317,2.4400642973086635,2.636539015708513,2.841129214955754,3.0540238937694992,3.27541403423952,3.5054926018266315,3.7444545453624185,3.9924967970495344,4.249818272461312,4.516619870542331,4.793104473607627,5.079476947343528,5.37594414080711,5.682714886426311,6.0],"yaxis":"y","type":"scatter"},{"mode":"markers","x":[1.0,2.5,4.0,5.5,7.0],"y":[3.0,-0.13125,0.0,-0.65625,6.0],"type":"scatter"}],                        {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"},"margin":{"b":0,"l":0,"r":0,"t":30}}},"xaxis":{"anchor":"y","domain":[0.0,1.0],"title":{"text":"x"}},"yaxis":{"anchor":"x","domain":[0.0,1.0],"title":{"text":"y"}},"legend":{"tracegroupgap":0},"showlegend":false,"autosize":false,"width":800,"height":600},                        {"responsive": true}                    ).then(function(){
+                            
+var gd = document.getElementById('7eef0490-be30-43c5-ac12-b1892fd80b32');
+var x = new MutationObserver(function (mutations, observer) {{
+        var display = window.getComputedStyle(gd).display;
+        if (!display || display === 'none') {{
+            console.log([gd, 'removed!']);
+            Plotly.purge(gd);
+            observer.disconnect();
+        }}
+}});
+
+// Listen for the removal of the full notebook cells
+var notebookContainer = gd.closest('#notebook-container');
+if (notebookContainer) {{
+    x.observe(notebookContainer, {childList: true});
+}}
+
+// Listen for the clearing of the current output cell
+var outputEl = gd.closest('.output');
+if (outputEl) {{
+    x.observe(outputEl, {childList: true});
+}}
+
+                        })                };                });            </script>        </div>
+</div>
+</div>
+<p>This basic approach suffers from three major flaws:</p>
+<ol type="1">
+<li>If the minimum is outside our range of guesses, the answer will be completely wrong.</li>
+<li>Even if our range of guesses is correct, if the guesses are too coarse, our answer will be inaccurate.</li>
+<li>It is <em>very</em> computationally inefficient, considering potentially vast numbers of guesses that are useless.</li>
+</ol>
+</section>
+<section id="scipy.optimize.minimize" class="level4" data-number="13.2.1.2">
+<h4 data-number="13.2.1.2" class="anchored" data-anchor-id="scipy.optimize.minimize"><span class="header-section-number">13.2.1.2</span> <code>Scipy.optimize.minimize</code></h4>
+<p>One way to minimize this mathematical function is to use the <code>scipy.optimize.minimize</code> function. It takes a function and a starting guess and tries to find the minimum.</p>
+<div id="7287d309" class="cell" data-execution_count="17">
+<div class="sourceCode cell-code" id="cb29"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb29-1"><a href="#cb29-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> scipy.optimize <span class="im">import</span> minimize</span>
+<span id="cb29-2"><a href="#cb29-2" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb29-3"><a href="#cb29-3" aria-hidden="true" tabindex="-1"></a><span class="co"># takes a function f and a starting point x0 and returns a readout </span></span>
+<span id="cb29-4"><a href="#cb29-4" aria-hidden="true" tabindex="-1"></a><span class="co"># with the optimal input value of x which minimizes f</span></span>
+<span id="cb29-5"><a href="#cb29-5" aria-hidden="true" tabindex="-1"></a>minimize(arbitrary, x0 <span class="op">=</span> <span class="fl">3.5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-display" data-execution_count="17">
+<pre><code>  message: Optimization terminated successfully.
+  success: True
+   status: 0
+      fun: -0.13827491292966557
+        x: [ 2.393e+00]
+      nit: 3
+      jac: [ 6.486e-06]
+ hess_inv: [[ 7.385e-01]]
+     nfev: 20
+     njev: 10</code></pre>
+</div>
+</div>
+<p><code>scipy.optimize.minimize</code> is great. It may also seem a bit magical. How could you write a function that can find the minimum of any mathematical function? There are a number of ways to do this, which we’ll explore in today’s lecture, eventually arriving at the important idea of <strong>gradient descent</strong>, which is the principle that <code>scipy.optimize.minimize</code> uses.</p>
+<p>It turns out that under the hood, the <code>fit</code> method for <code>LinearRegression</code> models uses gradient descent. Gradient descent is also how much of machine learning works, including even advanced neural network models.</p>
+<p>In Data 100, the gradient descent process will usually be invisible to us, hidden beneath an abstraction layer. However, to be good data scientists, it’s important that we know the underlying principles that optimization functions harness to find optimal parameters.</p>
+</section>
+<section id="digging-into-gradient-descent" class="level4" data-number="13.2.1.3">
+<h4 data-number="13.2.1.3" class="anchored" data-anchor-id="digging-into-gradient-descent"><span class="header-section-number">13.2.1.3</span> Digging into Gradient Descent</h4>
+<p>Looking at the function across this domain, it is clear that the function’s minimum value occurs around <span class="math inline">\(\theta = 5.3\)</span>. Let’s pretend for a moment that we <em>couldn’t</em> see the full view of the cost function. How would we guess the value of <span class="math inline">\(\theta\)</span> that minimizes the function?</p>
+<p>It turns out that the first derivative of the function can give us a clue. In the graph below, the function and its derivative are plotted, with points where the derivative is equal to 0 plotted in light green.</p>
+<div id="4653f862" class="cell" data-execution_count="18">
+<details class="code-fold">
+<summary>Code</summary>
+<div class="sourceCode cell-code" id="cb31"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb31-1"><a href="#cb31-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> plotly.graph_objects <span class="im">as</span> go</span>
+<span id="cb31-2"><a href="#cb31-2" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb31-3"><a href="#cb31-3" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> derivative_arbitrary(x):</span>
+<span id="cb31-4"><a href="#cb31-4" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> (<span class="dv">4</span><span class="op">*</span>x<span class="op">**</span><span class="dv">3</span> <span class="op">-</span> <span class="dv">45</span><span class="op">*</span>x<span class="op">**</span><span class="dv">2</span> <span class="op">+</span> <span class="dv">160</span><span class="op">*</span>x <span class="op">-</span> <span class="dv">180</span>)<span class="op">/</span><span class="dv">10</span></span>
+<span id="cb31-5"><a href="#cb31-5" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb31-6"><a href="#cb31-6" aria-hidden="true" tabindex="-1"></a>fig <span class="op">=</span> go.Figure()</span>
+<span id="cb31-7"><a href="#cb31-7" aria-hidden="true" tabindex="-1"></a>roots <span class="op">=</span> np.array([<span class="fl">2.3927</span>, <span class="fl">3.5309</span>, <span class="fl">5.3263</span>])</span>
+<span id="cb31-8"><a href="#cb31-8" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb31-9"><a href="#cb31-9" aria-hidden="true" tabindex="-1"></a>fig.add_trace(go.Scatter(x <span class="op">=</span> xs, y <span class="op">=</span> arbitrary(xs), </span>
+<span id="cb31-10"><a href="#cb31-10" aria-hidden="true" tabindex="-1"></a>                         mode <span class="op">=</span> <span class="st">"lines"</span>, name <span class="op">=</span> <span class="st">"f"</span>))</span>
+<span id="cb31-11"><a href="#cb31-11" aria-hidden="true" tabindex="-1"></a>fig.add_trace(go.Scatter(x <span class="op">=</span> xs, y <span class="op">=</span> derivative_arbitrary(xs), </span>
+<span id="cb31-12"><a href="#cb31-12" aria-hidden="true" tabindex="-1"></a>                         mode <span class="op">=</span> <span class="st">"lines"</span>, name <span class="op">=</span> <span class="st">"df"</span>, line <span class="op">=</span> {<span class="st">"dash"</span>: <span class="st">"dash"</span>}))</span>
+<span id="cb31-13"><a href="#cb31-13" aria-hidden="true" tabindex="-1"></a>fig.add_trace(go.Scatter(x <span class="op">=</span> np.array(roots), y <span class="op">=</span> <span class="dv">0</span><span class="op">*</span>roots, </span>
+<span id="cb31-14"><a href="#cb31-14" aria-hidden="true" tabindex="-1"></a>                         mode <span class="op">=</span> <span class="st">"markers"</span>, name <span class="op">=</span> <span class="st">"df = zero"</span>, marker_size <span class="op">=</span> <span class="dv">12</span>))</span>
+<span id="cb31-15"><a href="#cb31-15" aria-hidden="true" tabindex="-1"></a>fig.update_layout(font_size <span class="op">=</span> <span class="dv">20</span>, yaxis_range<span class="op">=</span>[<span class="op">-</span><span class="dv">1</span>, <span class="dv">3</span>])</span>
+<span id="cb31-16"><a href="#cb31-16" aria-hidden="true" tabindex="-1"></a>fig.update_layout(autosize<span class="op">=</span><span class="va">False</span>, width<span class="op">=</span><span class="dv">800</span>, height<span class="op">=</span><span class="dv">600</span>)</span>
+<span id="cb31-17"><a href="#cb31-17" aria-hidden="true" tabindex="-1"></a>fig.show()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</details>
+<div class="cell-output cell-output-display">
+<div>                            <div id="c77e19d9-36ad-4888-82b9-61ec8aa1b15f" class="plotly-graph-div" style="height:600px; width:800px;"></div>            <script type="text/javascript">                require(["plotly"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("c77e19d9-36ad-4888-82b9-61ec8aa1b15f")) {                    Plotly.newPlot(                        "c77e19d9-36ad-4888-82b9-61ec8aa1b15f",                        [{"mode":"lines","name":"f","x":[1.0,1.0301507537688441,1.0603015075376885,1.0904522613065326,1.120603015075377,1.150753768844221,1.1809045226130652,1.2110552763819096,1.2412060301507537,1.271356783919598,1.3015075376884422,1.3316582914572863,1.3618090452261307,1.3919597989949748,1.4221105527638191,1.4522613065326633,1.4824120603015074,1.5125628140703518,1.542713567839196,1.5728643216080402,1.6030150753768844,1.6331658291457285,1.6633165829145728,1.6934673366834172,1.7236180904522613,1.7537688442211055,1.7839195979899496,1.814070351758794,1.8442211055276383,1.8743718592964824,1.9045226130653266,1.9346733668341707,1.964824120603015,1.9949748743718594,2.0251256281407035,2.0552763819095476,2.085427135678392,2.115577889447236,2.1457286432160805,2.1758793969849246,2.2060301507537687,2.2361809045226133,2.266331658291457,2.2964824120603016,2.3266331658291457,2.35678391959799,2.3869346733668344,2.417085427135678,2.4472361809045227,2.477386934673367,2.507537688442211,2.5376884422110555,2.567839195979899,2.5979899497487438,2.628140703517588,2.658291457286432,2.6884422110552766,2.7185929648241203,2.748743718592965,2.778894472361809,2.809045226130653,2.8391959798994977,2.8693467336683414,2.899497487437186,2.92964824120603,2.959798994974874,2.9899497487437188,3.020100502512563,3.050251256281407,3.080402010050251,3.1105527638190953,3.1407035175879394,3.170854271356784,3.201005025125628,3.2311557788944723,3.2613065326633164,3.2914572864321605,3.321608040201005,3.351758793969849,3.3819095477386933,3.4120603015075375,3.4422110552763816,3.472361809045226,3.5025125628140703,3.5326633165829144,3.5628140703517586,3.5929648241206027,3.6231155778894473,3.6532663316582914,3.6834170854271355,3.7135678391959797,3.743718592964824,3.7738693467336684,3.8040201005025125,3.8341708542713566,3.8643216080402008,3.8944723618090453,3.9246231155778895,3.9547738693467336,3.9849246231155777,4.015075376884422,4.045226130653266,4.075376884422111,4.105527638190955,4.135678391959798,4.165829145728643,4.1959798994974875,4.226130653266331,4.256281407035176,4.28643216080402,4.316582914572864,4.346733668341709,4.376884422110553,4.407035175879397,4.4371859296482405,4.467336683417085,4.49748743718593,4.527638190954773,4.557788944723618,4.5879396984924625,4.618090452261306,4.648241206030151,4.678391959798995,4.708542713567839,4.738693467336683,4.768844221105527,4.798994974874372,4.829145728643216,4.85929648241206,4.889447236180905,4.919597989949748,4.949748743718593,4.9798994974874375,5.010050251256281,5.040201005025126,5.0703517587939695,5.100502512562814,5.130653266331658,5.160804020100502,5.190954773869347,5.221105527638191,5.251256281407035,5.281407035175879,5.311557788944723,5.341708542713568,5.371859296482412,5.402010050251256,5.4321608040201,5.4623115577889445,5.492462311557789,5.522613065326633,5.552763819095477,5.582914572864321,5.613065326633166,5.64321608040201,5.673366834170854,5.703517587939698,5.733668341708542,5.763819095477387,5.793969849246231,5.824120603015075,5.8542713567839195,5.884422110552763,5.914572864321608,5.944723618090452,5.974874371859296,6.005025125628141,6.035175879396984,6.065326633165829,6.0954773869346734,6.125628140703517,6.155778894472362,6.185929648241205,6.21608040201005,6.2462311557788945,6.276381909547738,6.306532663316583,6.3366834170854265,6.366834170854271,6.396984924623116,6.427135678391959,6.457286432160804,6.487437185929648,6.517587939698492,6.547738693467337,6.57788944723618,6.608040201005025,6.638190954773869,6.668341708542713,6.698492462311558,6.7286432160804015,6.758793969849246,6.788944723618091,6.819095477386934,6.849246231155779,6.879396984924623,6.909547738693467,6.939698492462312,6.969849246231155,7.0],"y":[3.0,2.8197775132646994,2.6468296407545298,2.480978457571409,2.3220480221881674,2.169864376448527,2.0242555455671196,1.8850515381294826,1.7520843460920474,1.6251879447821538,1.5041982928980473,1.3889533325088705,1.2792929890546703,1.175059171346399,1.076095771565909,0.9822486652659563,0.8933657113701969,0.809296752173205,0.7298936133404282,0.6550101039082478,0.5845020162839318,0.5182271262456482,0.45604519294247436,0.39781795889439875,0.34340914999228855,0.2926844754979413,0.24551162804404497,0.20176028363418083,0.1613021016428462,0.1240107248154402,0.08976177926825812,0.05843287448851129,0.029903603334304307,0.0040555420346265695,-0.019227749810596606,-0.04006072923056081,-0.0585558698835257,-0.07482366205689459,-0.08897261266714337,-0.10110924525984047,-0.11133810000965809,-0.119761733720361,-0.12648071982483203,-0.1315936483850237,-0.13519712609199247,-0.13738577626590426,-0.1382522388560119,-0.13788717044065493,-0.13637924422729383,-0.13381515005247593,-0.13027959438184666,-0.1258553003101497,-0.12062300756120407,-0.11466147248795551,-0.10804746807244214,-0.10085578392577758,-0.09315922628821909,-0.08502861802905386,-0.0765327986467014,-0.06773862426869641,-0.05871096765167181,-0.04951271818131318,-0.04020478187242702,-0.030846081368940757,-0.021493555943828825,-0.012202161499220664,-0.0030248705662870635,0.005987327694657552,0.014785427494228998,0.023322406413939234,0.03155322540619636,0.039434828794344415,0.046926144272549666,0.053988082905993905,0.06058353913065275,0.06667739075349459,0.0722364989522987,0.0772297082758655,0.08162784664377228,0.0854037253466231,0.0885321390457932,0.09098986577366759,0.09275566693348196,0.0938102872993909,0.09413645501645647,0.09371888160064827,0.09254426193878089,0.09060127428864462,0.08788058027890884,0.08437482490911634,0.08007863654980837,0.07498862694224044,0.06910339119879154,0.062423507802634504,0.05495153860777009,0.04669202883931121,0.037651507093005424,0.027838485335746555,0.01726345890522225,0.005938906509970821,-0.006120709770470967,-0.018898944485715673,-0.03237736881442288,-0.04653557056435602,-0.061351154172564294,-0.07679974070485969,-0.0928549678563968,-0.10948848995135449,-0.12666997794297002,-0.14436711941368685,-0.16254561857489308,-0.1811691962672853,-0.2001995899604026,-0.21959655375303555,-0.23931785837306735,-0.25931929117742814,-0.2795546561521974,-0.2999757739125698,-0.32053248170270765,-0.34117263339597914,-0.3618420994948906,-0.38248476713084756,-0.40304254006464363,-0.4234553386859261,-0.4436611000135485,-0.46359577769538873,-0.483193342008542,-0.5023857798591053,-0.5211030947822792,-0.5392733069424139,-0.5568224531329065,-0.5736745867762579,-0.589751777924107,-0.6049741132570944,-0.6192596960850778,-0.632524646347008,-0.6446831006107573,-0.6556472120734724,-0.6653271505614157,-0.6736311025297482,-0.6804652710629284,-0.6857338758744731,-0.689339153306878,-0.6911813563318902,-0.6911587545502471,-0.6891676341918014,-0.6851022981155552,-0.6788550658095346,-0.6703162733909721,-0.6593742736060904,-0.6459154358302271,-0.6298241460678924,-0.6109828069525293,-0.5892718377469237,-0.5645696743426925,-0.5367527692607041,-0.5056955916510105,-0.47127062729252883,-0.4333483785933822,-0.39179736459091147,-0.3464841209514134,-0.2972731999702091,-0.24402717057191695,-0.18660661831017933,-0.12487014536754941,-0.05867437055599112,0.01212607068350735,0.08767852628116088,0.1681323275376599,0.2536387891250115,0.34435120908576666,0.4404248688334974,0.5420170331528652,0.6492869501990981,0.7623958514986725,0.8815069519487224,1.0067854498173574,1.13839852674364,1.2765153477374043,1.421307061179641,1.572946798821863,1.7316096757870127,1.897472790568304,2.070715225030267,2.251518044408317,2.4400642973086635,2.636539015708513,2.841129214955754,3.0540238937694992,3.27541403423952,3.5054926018266315,3.7444545453624185,3.9924967970495344,4.249818272461312,4.516619870542331,4.793104473607627,5.079476947343528,5.37594414080711,5.682714886426311,6.0],"type":"scatter"},{"line":{"dash":"dash"},"mode":"lines","name":"df","x":[1.0,1.0301507537688441,1.0603015075376885,1.0904522613065326,1.120603015075377,1.150753768844221,1.1809045226130652,1.2110552763819096,1.2412060301507537,1.271356783919598,1.3015075376884422,1.3316582914572863,1.3618090452261307,1.3919597989949748,1.4221105527638191,1.4522613065326633,1.4824120603015074,1.5125628140703518,1.542713567839196,1.5728643216080402,1.6030150753768844,1.6331658291457285,1.6633165829145728,1.6934673366834172,1.7236180904522613,1.7537688442211055,1.7839195979899496,1.814070351758794,1.8442211055276383,1.8743718592964824,1.9045226130653266,1.9346733668341707,1.964824120603015,1.9949748743718594,2.0251256281407035,2.0552763819095476,2.085427135678392,2.115577889447236,2.1457286432160805,2.1758793969849246,2.2060301507537687,2.2361809045226133,2.266331658291457,2.2964824120603016,2.3266331658291457,2.35678391959799,2.3869346733668344,2.417085427135678,2.4472361809045227,2.477386934673367,2.507537688442211,2.5376884422110555,2.567839195979899,2.5979899497487438,2.628140703517588,2.658291457286432,2.6884422110552766,2.7185929648241203,2.748743718592965,2.778894472361809,2.809045226130653,2.8391959798994977,2.8693467336683414,2.899497487437186,2.92964824120603,2.959798994974874,2.9899497487437188,3.020100502512563,3.050251256281407,3.080402010050251,3.1105527638190953,3.1407035175879394,3.170854271356784,3.201005025125628,3.2311557788944723,3.2613065326633164,3.2914572864321605,3.321608040201005,3.351758793969849,3.3819095477386933,3.4120603015075375,3.4422110552763816,3.472361809045226,3.5025125628140703,3.5326633165829144,3.5628140703517586,3.5929648241206027,3.6231155778894473,3.6532663316582914,3.6834170854271355,3.7135678391959797,3.743718592964824,3.7738693467336684,3.8040201005025125,3.8341708542713566,3.8643216080402008,3.8944723618090453,3.9246231155778895,3.9547738693467336,3.9849246231155777,4.015075376884422,4.045226130653266,4.075376884422111,4.105527638190955,4.135678391959798,4.165829145728643,4.1959798994974875,4.226130653266331,4.256281407035176,4.28643216080402,4.316582914572864,4.346733668341709,4.376884422110553,4.407035175879397,4.4371859296482405,4.467336683417085,4.49748743718593,4.527638190954773,4.557788944723618,4.5879396984924625,4.618090452261306,4.648241206030151,4.678391959798995,4.708542713567839,4.738693467336683,4.768844221105527,4.798994974874372,4.829145728643216,4.85929648241206,4.889447236180905,4.919597989949748,4.949748743718593,4.9798994974874375,5.010050251256281,5.040201005025126,5.0703517587939695,5.100502512562814,5.130653266331658,5.160804020100502,5.190954773869347,5.221105527638191,5.251256281407035,5.281407035175879,5.311557788944723,5.341708542713568,5.371859296482412,5.402010050251256,5.4321608040201,5.4623115577889445,5.492462311557789,5.522613065326633,5.552763819095477,5.582914572864321,5.613065326633166,5.64321608040201,5.673366834170854,5.703517587939698,5.733668341708542,5.763819095477387,5.793969849246231,5.824120603015075,5.8542713567839195,5.884422110552763,5.914572864321608,5.944723618090452,5.974874371859296,6.005025125628141,6.035175879396984,6.065326633165829,6.0954773869346734,6.125628140703517,6.155778894472362,6.185929648241205,6.21608040201005,6.2462311557788945,6.276381909547738,6.306532663316583,6.3366834170854265,6.366834170854271,6.396984924623116,6.427135678391959,6.457286432160804,6.487437185929648,6.517587939698492,6.547738693467337,6.57788944723618,6.608040201005025,6.638190954773869,6.668341708542713,6.698492462311558,6.7286432160804015,6.758793969849246,6.788944723618091,6.819095477386934,6.849246231155779,6.879396984924623,6.909547738693467,6.939698492462312,6.969849246231155,7.0],"y":[-6.1,-5.855752779706215,-5.617439626099488,-5.384994757378214,-5.158352391740783,-4.937446747385573,-4.72221204251098,-4.512582495315394,-4.3084923239972,-4.109875746754784,-3.9166669817865367,-3.728800247290849,-3.546209761466102,-3.3688297425106897,-3.1965944086229996,-3.0294379780014196,-2.867294668844335,-2.710098699350141,-2.5577842877172143,-2.410285652143955,-2.2675370108287467,-2.129472581969978,-1.9960265837660303,-1.8671332344153029,-1.7427267521161809,-1.6227413550670406,-1.5071112614662923,-1.3957706895123068,-1.288653857403483,-1.1856949833381947,-1.0868282855148437,-0.9919879821318176,-0.9011082913874986,-0.8141234314802744,-0.7309676206085385,-0.6515750769706727,-0.5758800187650707,-0.5038166641901227,-0.4353192314442083,-0.37032193872572067,-0.30875900423305325,-0.2505646461645881,-0.1956730827187158,-0.14401853209381557,-0.09553521248829214,-0.050157342100516186,-0.007819139128892516,0.03154517822820253,0.0680013917723727,0.10161528330524447,0.1324526346284074,0.1605792275434908,0.18606084385210409,0.20896326535584536,0.22935227385634108,0.247293651155195,0.26285317905403077,0.2760966393544493,0.2870898138580628,0.295898484366478,0.30258843268132407,0.30722544060420204,0.30987528993671276,0.3106037624804969,0.3094766400371327,0.3065597044082608,0.3019187373954651,0.2956195208003862,0.28772783642461375,0.278309466069777,0.2674301915374713,0.2551557946293144,0.24155205714693012,0.2266847608919079,0.2106196876658714,0.19342261927042728,0.17515933750721616,0.15589562417781053,0.13569726108383975,0.11463003002689902,0.09275971280864041,0.07015209123063641,0.04687294709450498,0.022988062201864067,-0.0014367816456569925,-0.026335802646468665,-0.05164321899897004,-0.07729324890150338,-0.10322011055251892,-0.12935802215034756,-0.15564120189339975,-0.18200386798008594,-0.20838023860876548,-0.23470453197783173,-0.26091096628567245,-0.28693375973069807,-0.3127071305112679,-0.33816529682578106,-0.36324247687263095,-0.387872888850211,-0.4119907509568804,-0.4355302813910555,-0.4584256983510954,-0.48061122003543344,-0.5020210646424061,-0.5225894503704523,-0.5422505954179314,-0.560938717983231,-0.5785880362647674,-0.5951327684608827,-0.6105071327699989,-0.6246453473904694,-0.6374816305207218,-0.648950200359127,-0.6589852751040894,-0.6675210729539799,-0.6744918121071806,-0.6798317107620733,-0.6834749871170857,-0.6853558593705884,-0.6854085457209521,-0.6835672643665817,-0.6797662335058476,-0.6739396713371661,-0.666021796058908,-0.6559468258694551,-0.643648978967201,-0.6290624735505503,-0.6121215278178852,-0.5927603599675649,-0.5709131881979829,-0.546514230707578,-0.5194977056946982,-0.48979783135773686,-0.4573488258950988,-0.42208490750512057,-0.38394029438626376,-0.3428492047368536,-0.29874585675530624,-0.2515644686400151,-0.20123925858937355,-0.1477044448017068,-0.09089424547549925,-0.030742878809076047,0.032815436999146644,0.09984648375078678,0.17041604324746232,0.24458989729080258,0.3224338276823687,0.4040136162238241,0.48939504471677536,0.5786438949628178,0.6718259487635351,0.7690069879205907,0.8702527942355687,0.9756291495100868,1.0852018355457402,1.1990366341441927,1.3171993271069824,1.439755696235784,1.5667715233321815,1.6983125901977814,1.834444678634202,1.9752335704430606,2.1207450474259644,2.2710448913845083,2.4261988841203674,2.58627280743508,2.7513324431302637,2.9214435730075934,3.0966719788686077,3.2770834425149475,3.462743745748264,3.6537186703701194,3.8500739981821313,4.051875510985894,4.259188990583084,4.472080218775238,4.690614977364044,4.914859048151049,5.144878212937897,5.380738253526147,5.622504951717474,5.870244089313519,6.124021448115786,6.383902809925985,6.649953956545687,6.922240669776488,7.200828731420051,7.485783923277927,7.777172027151755,8.075058824843131,8.37951009815372,8.690591628885068,9.00836919883884,9.332908589816588,9.664275583619997,10.002535962050592,10.347755506910039,10.7],"type":"scatter"},{"marker":{"size":12},"mode":"markers","name":"df = zero","x":[2.3927,3.5309,5.3263],"y":[0.0,0.0,0.0],"type":"scatter"}],                        {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"},"margin":{"b":0,"l":0,"r":0,"t":30}}},"font":{"size":20},"yaxis":{"range":[-1,3]},"autosize":false,"width":800,"height":600},                        {"responsive": true}                    ).then(function(){
+                            
+var gd = document.getElementById('c77e19d9-36ad-4888-82b9-61ec8aa1b15f');
+var x = new MutationObserver(function (mutations, observer) {{
+        var display = window.getComputedStyle(gd).display;
+        if (!display || display === 'none') {{
+            console.log([gd, 'removed!']);
+            Plotly.purge(gd);
+            observer.disconnect();
+        }}
+}});
+
+// Listen for the removal of the full notebook cells
+var notebookContainer = gd.closest('#notebook-container');
+if (notebookContainer) {{
+    x.observe(notebookContainer, {childList: true});
+}}
+
+// Listen for the clearing of the current output cell
+var outputEl = gd.closest('.output');
+if (outputEl) {{
+    x.observe(outputEl, {childList: true});
+}}
+
+                        })                };                });            </script>        </div>
+</div>
+</div>
+<p>In the plots below, the line indicates the value of the derivative of each value of <span class="math inline">\(\theta\)</span>. The derivative is negative where it is red and positive where it is green.</p>
+<p>Say we make a guess for the minimizing value of <span class="math inline">\(\theta\)</span>. Remember that we read plots from left to right, and assume that our starting <span class="math inline">\(\theta\)</span> value is to the left of the optimal <span class="math inline">\(\hat{\theta}\)</span>. If the guess “undershoots” the true minimizing value – our guess for <span class="math inline">\(\theta\)</span> is lower than the value of the <span class="math inline">\(\hat{\theta}\)</span> that minimizes the function – the derivative will be <strong>negative</strong>. This means that if we increase <span class="math inline">\(\theta\)</span> (move further to the right), then we <strong>can decrease</strong> our loss function further. If this guess “overshoots” the true minimizing value, the derivative will be positive, implying the converse.</p>
+<div data-align="middle">
+<table style="width:100%">
+<tbody><tr align="center">
+<td>
+<img src="images/step.png" alt="step" width="600">
+</td>
+</tr>
+</tbody></table>
+</div>
+<p>We can use this pattern to help formulate our next guess for the optimal <span class="math inline">\(\hat{\theta}\)</span>. Consider the case where we’ve undershot <span class="math inline">\(\theta\)</span> by guessing too low of a value. We’ll want our next guess to be greater in value than our previous guess – that is, we want to shift our guess to the right. You can think of this as following the slope “downhill” to the function’s minimum value.</p>
+<div data-align="middle">
+<table style="width:100%">
+<tbody><tr align="center">
+<td>
+<img src="images/neg_step.png" alt="neg_step" width="600">
+</td>
+</tr>
+</tbody></table>
+</div>
+<p>If we’ve overshot <span class="math inline">\(\hat{\theta}\)</span> by guessing too high of a value, we’ll want our next guess to be lower in value – we want to shift our guess for <span class="math inline">\(\hat{\theta}\)</span> to the left.</p>
+<div data-align="middle">
+<table style="width:100%">
+<tbody><tr align="center">
+<td>
+<img src="images/pos_step.png" alt="pos_step" width="600">
+</td>
+</tr>
+</tbody></table>
+</div>
+<p>In other words, the derivative of the function at each point tells us the direction of our next guess.</p>
+<ul>
+<li>A negative slope means we want to step to the right, or move in the <em>positive</em> direction.</li>
+<li>A positive slope means we want to step to the left, or move in the <em>negative</em> direction.</li>
+</ul>
+</section>
+<section id="algorithm-attempt-1" class="level4" data-number="13.2.1.4">
+<h4 data-number="13.2.1.4" class="anchored" data-anchor-id="algorithm-attempt-1"><span class="header-section-number">13.2.1.4</span> Algorithm Attempt 1</h4>
+<p>Armed with this knowledge, let’s try to see if we can use the derivative to optimize the function.</p>
+<p>We start by making some guess for the minimizing value of <span class="math inline">\(x\)</span>. Then, we look at the derivative of the function at this value of <span class="math inline">\(x\)</span>, and step downhill in the <em>opposite</em> direction. We can express our new rule as a recurrence relation:</p>
+<p><span class="math display">\[x^{(t+1)} = x^{(t)} - \frac{d}{dx} f(x^{(t)})\]</span></p>
+<p>Translating this statement into English: we obtain <strong>our next guess</strong> for the minimizing value of <span class="math inline">\(x\)</span> at timestep <span class="math inline">\(t+1\)</span> (<span class="math inline">\(x^{(t+1)}\)</span>) by taking <strong>our last guess</strong> (<span class="math inline">\(x^{(t)}\)</span>) and subtracting the <strong>derivative of the function</strong> at that point (<span class="math inline">\(\frac{d}{dx} f(x^{(t)})\)</span>).</p>
+<p>A few steps are shown below, where the old step is shown as a transparent point, and the next step taken is the green-filled dot.</p>
+<div data-align="middle">
+<table style="width:100%">
+<tbody><tr align="center">
+<td>
+<img src="images/grad_descent_1.png" alt="grad_descent_2" width="800">
+</td>
+</tr>
+</tbody></table>
+</div>
+<p>Looking pretty good! We do have a problem though – once we arrive close to the minimum value of the function, our guesses “bounce” back and forth past the minimum without ever reaching it.</p>
+<div data-align="middle">
+<table style="width:100%">
+<tbody><tr align="center">
+<td>
+<img src="images/grad_descent_2.png" alt="grad_descent_2" width="500">
+</td>
+</tr>
+</tbody></table>
+</div>
+<p>In other words, each step we take when updating our guess moves us too far. We can address this by decreasing the size of each step.</p>
+</section>
+<section id="algorithm-attempt-2" class="level4" data-number="13.2.1.5">
+<h4 data-number="13.2.1.5" class="anchored" data-anchor-id="algorithm-attempt-2"><span class="header-section-number">13.2.1.5</span> Algorithm Attempt 2</h4>
+<p>Let’s update our algorithm to use a <strong>learning rate</strong> (also sometimes called the step size), which controls how far we move with each update. We represent the learning rate with <span class="math inline">\(\alpha\)</span>.</p>
+<p><span class="math display">\[x^{(t+1)} = x^{(t)} - \alpha \frac{d}{dx} f(x^{(t)})\]</span></p>
+<p>A small <span class="math inline">\(\alpha\)</span> means that we will take small steps; a large <span class="math inline">\(\alpha\)</span> means we will take large steps. When do we stop updating? We stop updating either after a fixed number of updates or after a subsequent update doesn’t change much.</p>
+<p>Updating our function to use <span class="math inline">\(\alpha=0.3\)</span>, our algorithm successfully <strong>converges</strong> (settles on a solution and stops updating significantly, or at all) on the minimum value.</p>
+<div data-align="middle">
+<table style="width:100%">
+<tbody><tr align="center">
+<td>
+<img src="images/grad_descent_3.png" alt="grad_descent_3" width="500">
+</td>
+</tr>
+</tbody></table>
+</div>
+</section>
+</section>
+<section id="convexity" class="level3" data-number="13.2.2">
+<h3 data-number="13.2.2" class="anchored" data-anchor-id="convexity"><span class="header-section-number">13.2.2</span> Convexity</h3>
+<p>In our analysis above, we focused our attention on the global minimum of the loss function. You may be wondering: what about the local minimum that’s just to the left?</p>
+<p>If we had chosen a different starting guess for <span class="math inline">\(\theta\)</span>, or a different value for the learning rate <span class="math inline">\(\alpha\)</span>, our algorithm may have gotten “stuck” and converged on the local minimum, rather than on the true optimum value of loss.</p>
+<div data-align="middle">
+<table style="width:100%">
+<tbody><tr align="center">
+<td>
+<img src="images/local.png" alt="local" width="600">
+</td>
+</tr>
+</tbody></table>
+</div>
+<p>If the loss function is <strong>convex</strong>, gradient descent is guaranteed to converge and find the global minimum of the objective function. Formally, a function <span class="math inline">\(f\)</span> is convex if: <span class="math display">\[tf(a) + (1-t)f(b) \geq f(ta + (1-t)b)\]</span> for all <span class="math inline">\(a, b\)</span> in the domain of <span class="math inline">\(f\)</span> and <span class="math inline">\(t \in [0, 1]\)</span>.</p>
+<p>To put this into words: if you drew a line between any two points on the curve, all values on the curve must be <em>on or below</em> the line. Importantly, any local minimum of a convex function is also its global minimum so we avoid the situation where the algorithm converges on some critical point that is not the minimum of the function.</p>
+<div data-align="middle">
+<table style="width:100%">
+<tbody><tr align="center">
+<td>
+<img src="images/convex.png" alt="convex" width="600">
+</td>
+</tr>
+</tbody></table>
+</div>
+<p>In summary, non-convex loss functions can cause problems with optimization. This means that our choice of loss function is a key factor in our modeling process. It turns out that MSE <em>is</em> convex, which is a major reason why it is such a popular choice of loss function. Gradient descent is only guaranteed to converge (given enough iterations and an appropriate step size) for convex functions.</p>
+</section>
+<section id="gradient-descent-in-1-dimension" class="level3" data-number="13.2.3">
+<h3 data-number="13.2.3" class="anchored" data-anchor-id="gradient-descent-in-1-dimension"><span class="header-section-number">13.2.3</span> Gradient Descent in 1 Dimension</h3>
+<blockquote class="blockquote">
+<p><strong>Terminology clarification</strong>: In past lectures, we have used “loss” to refer to the error incurred on a <em>single</em> datapoint. In applications, we usually care more about the average error across <em>all</em> datapoints. Going forward, we will take the “model’s loss” to mean the model’s average error across the dataset. This is sometimes also known as the empirical risk (R), cost function, or objective function. <span class="math display">\[L(\theta) = R(\theta) = \frac{1}{n} \sum_{i=1}^{n} L(y, \hat{y})\]</span></p>
+</blockquote>
+<p>In our discussion above, we worked with some arbitrary function <span class="math inline">\(f\)</span>. As data scientists, we will almost always work with gradient descent in the context of optimizing <em>models</em> – specifically, we want to apply gradient descent to find the minimum of a <em>loss function</em>. In a modeling context, our goal is to minimize a loss function by choosing the minimizing model <em>parameters</em>.</p>
+<p>Recall our modeling workflow from the past few lectures:</p>
+<ol type="1">
+<li>Define a model with some parameters <span class="math inline">\(\theta_i\)</span></li>
+<li>Choose a loss function</li>
+<li>Select the values of <span class="math inline">\(\theta_i\)</span> that minimize the loss function on the data</li>
+</ol>
+<p>Gradient descent is a powerful technique for completing this last task. By applying the gradient descent algorithm, we can select values for our parameters <span class="math inline">\(\theta_i\)</span> that will lead to the model having minimal loss on the training data.</p>
+<p>When using gradient descent in a modeling context, we:</p>
+<ol type="1">
+<li>Make guesses for the minimizing <span class="math inline">\(\theta_i\)</span></li>
+<li>Compute the derivative of the loss function <span class="math inline">\(L\)</span></li>
+</ol>
+<p>We can “translate” our gradient descent rule from before by replacing <span class="math inline">\(x\)</span> with <span class="math inline">\(\theta\)</span> and <span class="math inline">\(f\)</span> with <span class="math inline">\(L\)</span>:</p>
+<p><span class="math display">\[\theta^{(t+1)} = \theta^{(t)} - \alpha \frac{d}{d\theta} L(\theta^{(t)})\]</span></p>
+<section id="gradient-descent-on-the-tips-dataset" class="level4" data-number="13.2.3.1">
+<h4 data-number="13.2.3.1" class="anchored" data-anchor-id="gradient-descent-on-the-tips-dataset"><span class="header-section-number">13.2.3.1</span> Gradient Descent on the <code>tips</code> Dataset</h4>
+<p>To see this in action, let’s consider a case where we have a linear model with no offset. We want to predict the tip (y) given the price of a meal (x). To do this, we</p>
+<ul>
+<li>Choose a model: <span class="math inline">\(\hat{y} = \theta_1 x\)</span>,</li>
+<li>Choose a loss function: <span class="math inline">\(L(\theta) = MSE(\theta) = \frac{1}{n} \sum_{i=1}^n (y_i - \theta_1x_i)^2\)</span>.</li>
+</ul>
+<p>Let’s apply our <code>gradient_descent</code> function from before to optimize our model on the <code>tips</code> dataset. We will try to select the best parameter <span class="math inline">\(\theta_i\)</span> to predict the <code>tip</code> <span class="math inline">\(y\)</span> from the <code>total_bill</code> <span class="math inline">\(x\)</span>.</p>
+<div id="3621c1d6" class="cell" data-execution_count="19">
+<div class="sourceCode cell-code" id="cb32"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb32-1"><a href="#cb32-1" aria-hidden="true" tabindex="-1"></a>df <span class="op">=</span> sns.load_dataset(<span class="st">"tips"</span>)</span>
+<span id="cb32-2"><a href="#cb32-2" aria-hidden="true" tabindex="-1"></a>df.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-display" data-execution_count="19">
+<div>
+
+
+<table class="dataframe table table-sm table-striped small" data-quarto-postprocess="true" data-border="1">
+<thead>
+<tr class="header">
+<th data-quarto-table-cell-role="th"></th>
+<th data-quarto-table-cell-role="th">total_bill</th>
+<th data-quarto-table-cell-role="th">tip</th>
+<th data-quarto-table-cell-role="th">sex</th>
+<th data-quarto-table-cell-role="th">smoker</th>
+<th data-quarto-table-cell-role="th">day</th>
+<th data-quarto-table-cell-role="th">time</th>
+<th data-quarto-table-cell-role="th">size</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">0</td>
+<td>16.99</td>
+<td>1.01</td>
+<td>Female</td>
+<td>No</td>
+<td>Sun</td>
+<td>Dinner</td>
+<td>2</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">1</td>
+<td>10.34</td>
+<td>1.66</td>
+<td>Male</td>
+<td>No</td>
+<td>Sun</td>
+<td>Dinner</td>
+<td>3</td>
+</tr>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">2</td>
+<td>21.01</td>
+<td>3.50</td>
+<td>Male</td>
+<td>No</td>
+<td>Sun</td>
+<td>Dinner</td>
+<td>3</td>
+</tr>
+<tr class="even">
+<td data-quarto-table-cell-role="th">3</td>
+<td>23.68</td>
+<td>3.31</td>
+<td>Male</td>
+<td>No</td>
+<td>Sun</td>
+<td>Dinner</td>
+<td>2</td>
+</tr>
+<tr class="odd">
+<td data-quarto-table-cell-role="th">4</td>
+<td>24.59</td>
+<td>3.61</td>
+<td>Female</td>
+<td>No</td>
+<td>Sun</td>
+<td>Dinner</td>
+<td>4</td>
+</tr>
+</tbody>
+</table>
+
+</div>
+</div>
+</div>
+<p>We can visualize the value of the MSE on our dataset for different possible choices of <span class="math inline">\(\theta_1\)</span>. To optimize our model, we want to select the value of <span class="math inline">\(\theta_1\)</span> that leads to the lowest MSE.</p>
+<p>To apply gradient descent, we need to compute the derivative of the loss function with respect to our parameter <span class="math inline">\(\theta_1\)</span>.</p>
+<ul>
+<li>Given our loss function, <span class="math display">\[L(\theta) = MSE(\theta) = \frac{1}{n} \sum_{i=1}^n (y_i - \theta_1x_i)^2\]</span></li>
+<li>We take the derivative with respect to <span class="math inline">\(\theta_1\)</span> <span class="math display">\[\frac{\partial}{\partial \theta_{1}} L(\theta_1^{(t)}) = \frac{-2}{n} \sum_{i=1}^n (y_i - \theta_1^{(t)} x_i) x_i\]</span></li>
+<li>Which results in the gradient descent update rule <span class="math display">\[\theta_1^{(t+1)} = \theta_1^{(t)} - \alpha \frac{d}{d\theta}L(\theta_1^{(t)})\]</span></li>
+</ul>
+<p>for some learning rate <span class="math inline">\(\alpha\)</span>.</p>
+<p>Implementing this in code, we can visualize the MSE loss on the <code>tips</code> data. <strong>MSE is convex</strong>, so there is one global minimum.</p>
+<div id="61d8ca69" class="cell" data-execution_count="20">
+<details class="code-fold">
+<summary>Code</summary>
+<div class="sourceCode cell-code" id="cb33"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb33-1"><a href="#cb33-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> gradient_descent(df, initial_guess, alpha, n):</span>
+<span id="cb33-2"><a href="#cb33-2" aria-hidden="true" tabindex="-1"></a>    <span class="co">"""Performs n steps of gradient descent on df using learning rate alpha starting</span></span>
+<span id="cb33-3"><a href="#cb33-3" aria-hidden="true" tabindex="-1"></a><span class="co">       from initial_guess. Returns a numpy array of all guesses over time."""</span></span>
+<span id="cb33-4"><a href="#cb33-4" aria-hidden="true" tabindex="-1"></a>    guesses <span class="op">=</span> [initial_guess]</span>
+<span id="cb33-5"><a href="#cb33-5" aria-hidden="true" tabindex="-1"></a>    current_guess <span class="op">=</span> initial_guess</span>
+<span id="cb33-6"><a href="#cb33-6" aria-hidden="true" tabindex="-1"></a>    <span class="cf">while</span> <span class="bu">len</span>(guesses) <span class="op">&lt;</span> n:</span>
+<span id="cb33-7"><a href="#cb33-7" aria-hidden="true" tabindex="-1"></a>        current_guess <span class="op">=</span> current_guess <span class="op">-</span> alpha <span class="op">*</span> df(current_guess)</span>
+<span id="cb33-8"><a href="#cb33-8" aria-hidden="true" tabindex="-1"></a>        guesses.append(current_guess)</span>
+<span id="cb33-9"><a href="#cb33-9" aria-hidden="true" tabindex="-1"></a>        </span>
+<span id="cb33-10"><a href="#cb33-10" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> np.array(guesses)</span>
+<span id="cb33-11"><a href="#cb33-11" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb33-12"><a href="#cb33-12" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> mse_single_arg(theta_1):</span>
+<span id="cb33-13"><a href="#cb33-13" aria-hidden="true" tabindex="-1"></a>    <span class="co">"""Returns the MSE on our data for the given theta1"""</span></span>
+<span id="cb33-14"><a href="#cb33-14" aria-hidden="true" tabindex="-1"></a>    x <span class="op">=</span> df[<span class="st">"total_bill"</span>]</span>
+<span id="cb33-15"><a href="#cb33-15" aria-hidden="true" tabindex="-1"></a>    y_obs <span class="op">=</span> df[<span class="st">"tip"</span>]</span>
+<span id="cb33-16"><a href="#cb33-16" aria-hidden="true" tabindex="-1"></a>    y_hat <span class="op">=</span> theta_1 <span class="op">*</span> x</span>
+<span id="cb33-17"><a href="#cb33-17" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> np.mean((y_hat <span class="op">-</span> y_obs) <span class="op">**</span> <span class="dv">2</span>)</span>
+<span id="cb33-18"><a href="#cb33-18" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb33-19"><a href="#cb33-19" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> mse_loss_derivative_single_arg(theta_1):</span>
+<span id="cb33-20"><a href="#cb33-20" aria-hidden="true" tabindex="-1"></a>    <span class="co">"""Returns the derivative of the MSE on our data for the given theta1"""</span></span>
+<span id="cb33-21"><a href="#cb33-21" aria-hidden="true" tabindex="-1"></a>    x <span class="op">=</span> df[<span class="st">"total_bill"</span>]</span>
+<span id="cb33-22"><a href="#cb33-22" aria-hidden="true" tabindex="-1"></a>    y_obs <span class="op">=</span> df[<span class="st">"tip"</span>]</span>
+<span id="cb33-23"><a href="#cb33-23" aria-hidden="true" tabindex="-1"></a>    y_hat <span class="op">=</span> theta_1 <span class="op">*</span> x</span>
+<span id="cb33-24"><a href="#cb33-24" aria-hidden="true" tabindex="-1"></a>    </span>
+<span id="cb33-25"><a href="#cb33-25" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> np.mean(<span class="dv">2</span> <span class="op">*</span> (y_hat <span class="op">-</span> y_obs) <span class="op">*</span> x)</span>
+<span id="cb33-26"><a href="#cb33-26" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb33-27"><a href="#cb33-27" aria-hidden="true" tabindex="-1"></a>loss_df <span class="op">=</span> pd.DataFrame({<span class="st">"theta_1"</span>:np.linspace(<span class="op">-</span><span class="fl">1.5</span>, <span class="dv">1</span>), <span class="st">"MSE"</span>:[mse_single_arg(theta_1) <span class="cf">for</span> theta_1 <span class="kw">in</span> np.linspace(<span class="op">-</span><span class="fl">1.5</span>, <span class="dv">1</span>)]})</span>
+<span id="cb33-28"><a href="#cb33-28" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb33-29"><a href="#cb33-29" aria-hidden="true" tabindex="-1"></a>trajectory <span class="op">=</span> gradient_descent(mse_loss_derivative_single_arg, <span class="op">-</span><span class="fl">0.5</span>, <span class="fl">0.0001</span>, <span class="dv">100</span>)</span>
+<span id="cb33-30"><a href="#cb33-30" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb33-31"><a href="#cb33-31" aria-hidden="true" tabindex="-1"></a>plt.plot(loss_df[<span class="st">"theta_1"</span>], loss_df[<span class="st">"MSE"</span>])</span>
+<span id="cb33-32"><a href="#cb33-32" aria-hidden="true" tabindex="-1"></a>plt.scatter(trajectory, [mse_single_arg(guess) <span class="cf">for</span> guess <span class="kw">in</span> trajectory], c<span class="op">=</span><span class="st">"white"</span>, edgecolor<span class="op">=</span><span class="st">"firebrick"</span>)</span>
+<span id="cb33-33"><a href="#cb33-33" aria-hidden="true" tabindex="-1"></a>plt.scatter(trajectory[<span class="op">-</span><span class="dv">1</span>], mse_single_arg(trajectory[<span class="op">-</span><span class="dv">1</span>]), c<span class="op">=</span><span class="st">"firebrick"</span>)</span>
+<span id="cb33-34"><a href="#cb33-34" aria-hidden="true" tabindex="-1"></a>plt.xlabel(<span class="vs">r"$\theta_1$"</span>)</span>
+<span id="cb33-35"><a href="#cb33-35" aria-hidden="true" tabindex="-1"></a>plt.ylabel(<span class="vs">r"$L(\theta_1)$"</span>)<span class="op">;</span></span>
+<span id="cb33-36"><a href="#cb33-36" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb33-37"><a href="#cb33-37" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="ss">f"Final guess for theta_1: </span><span class="sc">{</span>trajectory[<span class="op">-</span><span class="dv">1</span>]<span class="sc">}</span><span class="ss">"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</details>
+<div class="cell-output cell-output-stdout">
+<pre><code>Final guess for theta_1: 0.14369554654231262</code></pre>
+</div>
+<div class="cell-output cell-output-display">
+<div>
+<figure class="figure">
+<p><img src="gradient_descent_files/figure-html/cell-21-output-2.png" width="603" height="431" class="figure-img"></p>
+</figure>
+</div>
+</div>
+</div>
+
+
+<!-- -->
+
+</section>
+</section>
+</section>
+
+</main> <!-- /main -->
+<script id="quarto-html-after-body" type="application/javascript">
+window.document.addEventListener("DOMContentLoaded", function (event) {
+  const toggleBodyColorMode = (bsSheetEl) => {
+    const mode = bsSheetEl.getAttribute("data-mode");
+    const bodyEl = window.document.querySelector("body");
+    if (mode === "dark") {
+      bodyEl.classList.add("quarto-dark");
+      bodyEl.classList.remove("quarto-light");
+    } else {
+      bodyEl.classList.add("quarto-light");
+      bodyEl.classList.remove("quarto-dark");
+    }
+  }
+  const toggleBodyColorPrimary = () => {
+    const bsSheetEl = window.document.querySelector("link#quarto-bootstrap");
+    if (bsSheetEl) {
+      toggleBodyColorMode(bsSheetEl);
+    }
+  }
+  toggleBodyColorPrimary();  
+  const icon = "";
+  const anchorJS = new window.AnchorJS();
+  anchorJS.options = {
+    placement: 'right',
+    icon: icon
+  };
+  anchorJS.add('.anchored');
+  const isCodeAnnotation = (el) => {
+    for (const clz of el.classList) {
+      if (clz.startsWith('code-annotation-')) {                     
+        return true;
+      }
+    }
+    return false;
+  }
+  const clipboard = new window.ClipboardJS('.code-copy-button', {
+    text: function(trigger) {
+      const codeEl = trigger.previousElementSibling.cloneNode(true);
+      for (const childEl of codeEl.children) {
+        if (isCodeAnnotation(childEl)) {
+          childEl.remove();
+        }
+      }
+      return codeEl.innerText;
+    }
+  });
+  clipboard.on('success', function(e) {
+    // button target
+    const button = e.trigger;
+    // don't keep focus
+    button.blur();
+    // flash "checked"
+    button.classList.add('code-copy-button-checked');
+    var currentTitle = button.getAttribute("title");
+    button.setAttribute("title", "Copied!");
+    let tooltip;
+    if (window.bootstrap) {
+      button.setAttribute("data-bs-toggle", "tooltip");
+      button.setAttribute("data-bs-placement", "left");
+      button.setAttribute("data-bs-title", "Copied!");
+      tooltip = new bootstrap.Tooltip(button, 
+        { trigger: "manual", 
+          customClass: "code-copy-button-tooltip",
+          offset: [0, -8]});
+      tooltip.show();    
+    }
+    setTimeout(function() {
+      if (tooltip) {
+        tooltip.hide();
+        button.removeAttribute("data-bs-title");
+        button.removeAttribute("data-bs-toggle");
+        button.removeAttribute("data-bs-placement");
+      }
+      button.setAttribute("title", currentTitle);
+      button.classList.remove('code-copy-button-checked');
+    }, 1000);
+    // clear code selection
+    e.clearSelection();
+  });
+  const viewSource = window.document.getElementById('quarto-view-source') ||
+                     window.document.getElementById('quarto-code-tools-source');
+  if (viewSource) {
+    const sourceUrl = viewSource.getAttribute("data-quarto-source-url");
+    viewSource.addEventListener("click", function(e) {
+      if (sourceUrl) {
+        // rstudio viewer pane
+        if (/\bcapabilities=\b/.test(window.location)) {
+          window.open(sourceUrl);
+        } else {
+          window.location.href = sourceUrl;
+        }
+      } else {
+        const modal = new bootstrap.Modal(document.getElementById('quarto-embedded-source-code-modal'));
+        modal.show();
+      }
+      return false;
+    });
+  }
+  function toggleCodeHandler(show) {
+    return function(e) {
+      const detailsSrc = window.document.querySelectorAll(".cell > details > .sourceCode");
+      for (let i=0; i<detailsSrc.length; i++) {
+        const details = detailsSrc[i].parentElement;
+        if (show) {
+          details.open = true;
+        } else {
+          details.removeAttribute("open");
+        }
+      }
+      const cellCodeDivs = window.document.querySelectorAll(".cell > .sourceCode");
+      const fromCls = show ? "hidden" : "unhidden";
+      const toCls = show ? "unhidden" : "hidden";
+      for (let i=0; i<cellCodeDivs.length; i++) {
+        const codeDiv = cellCodeDivs[i];
+        if (codeDiv.classList.contains(fromCls)) {
+          codeDiv.classList.remove(fromCls);
+          codeDiv.classList.add(toCls);
+        } 
+      }
+      return false;
+    }
+  }
+  const hideAllCode = window.document.getElementById("quarto-hide-all-code");
+  if (hideAllCode) {
+    hideAllCode.addEventListener("click", toggleCodeHandler(false));
+  }
+  const showAllCode = window.document.getElementById("quarto-show-all-code");
+  if (showAllCode) {
+    showAllCode.addEventListener("click", toggleCodeHandler(true));
+  }
+  function tippyHover(el, contentFn, onTriggerFn, onUntriggerFn) {
+    const config = {
+      allowHTML: true,
+      maxWidth: 500,
+      delay: 100,
+      arrow: false,
+      appendTo: function(el) {
+          return el.parentElement;
+      },
+      interactive: true,
+      interactiveBorder: 10,
+      theme: 'quarto',
+      placement: 'bottom-start',
+    };
+    if (contentFn) {
+      config.content = contentFn;
+    }
+    if (onTriggerFn) {
+      config.onTrigger = onTriggerFn;
+    }
+    if (onUntriggerFn) {
+      config.onUntrigger = onUntriggerFn;
+    }
+    window.tippy(el, config); 
+  }
+  const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
+  for (var i=0; i<noterefs.length; i++) {
+    const ref = noterefs[i];
+    tippyHover(ref, function() {
+      // use id or data attribute instead here
+      let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
+      try { href = new URL(href).hash; } catch {}
+      const id = href.replace(/^#\/?/, "");
+      const note = window.document.getElementById(id);
+      return note.innerHTML;
+    });
+  }
+  const xrefs = window.document.querySelectorAll('a.quarto-xref');
+  const processXRef = (id, note) => {
+    // Strip column container classes
+    const stripColumnClz = (el) => {
+      el.classList.remove("page-full", "page-columns");
+      if (el.children) {
+        for (const child of el.children) {
+          stripColumnClz(child);
+        }
+      }
+    }
+    stripColumnClz(note)
+    if (id === null || id.startsWith('sec-')) {
+      // Special case sections, only their first couple elements
+      const container = document.createElement("div");
+      if (note.children && note.children.length > 2) {
+        container.appendChild(note.children[0].cloneNode(true));
+        for (let i = 1; i < note.children.length; i++) {
+          const child = note.children[i];
+          if (child.tagName === "P" && child.innerText === "") {
+            continue;
+          } else {
+            container.appendChild(child.cloneNode(true));
+            break;
+          }
+        }
+        if (window.Quarto?.typesetMath) {
+          window.Quarto.typesetMath(container);
+        }
+        return container.innerHTML
+      } else {
+        if (window.Quarto?.typesetMath) {
+          window.Quarto.typesetMath(note);
+        }
+        return note.innerHTML;
+      }
+    } else {
+      // Remove any anchor links if they are present
+      const anchorLink = note.querySelector('a.anchorjs-link');
+      if (anchorLink) {
+        anchorLink.remove();
+      }
+      if (window.Quarto?.typesetMath) {
+        window.Quarto.typesetMath(note);
+      }
+      // TODO in 1.5, we should make sure this works without a callout special case
+      if (note.classList.contains("callout")) {
+        return note.outerHTML;
+      } else {
+        return note.innerHTML;
+      }
+    }
+  }
+  for (var i=0; i<xrefs.length; i++) {
+    const xref = xrefs[i];
+    tippyHover(xref, undefined, function(instance) {
+      instance.disable();
+      let url = xref.getAttribute('href');
+      let hash = undefined; 
+      if (url.startsWith('#')) {
+        hash = url;
+      } else {
+        try { hash = new URL(url).hash; } catch {}
+      }
+      if (hash) {
+        const id = hash.replace(/^#\/?/, "");
+        const note = window.document.getElementById(id);
+        if (note !== null) {
+          try {
+            const html = processXRef(id, note.cloneNode(true));
+            instance.setContent(html);
+          } finally {
+            instance.enable();
+            instance.show();
+          }
+        } else {
+          // See if we can fetch this
+          fetch(url.split('#')[0])
+          .then(res => res.text())
+          .then(html => {
+            const parser = new DOMParser();
+            const htmlDoc = parser.parseFromString(html, "text/html");
+            const note = htmlDoc.getElementById(id);
+            if (note !== null) {
+              const html = processXRef(id, note);
+              instance.setContent(html);
+            } 
+          }).finally(() => {
+            instance.enable();
+            instance.show();
+          });
+        }
+      } else {
+        // See if we can fetch a full url (with no hash to target)
+        // This is a special case and we should probably do some content thinning / targeting
+        fetch(url)
+        .then(res => res.text())
+        .then(html => {
+          const parser = new DOMParser();
+          const htmlDoc = parser.parseFromString(html, "text/html");
+          const note = htmlDoc.querySelector('main.content');
+          if (note !== null) {
+            // This should only happen for chapter cross references
+            // (since there is no id in the URL)
+            // remove the first header
+            if (note.children.length > 0 && note.children[0].tagName === "HEADER") {
+              note.children[0].remove();
+            }
+            const html = processXRef(null, note);
+            instance.setContent(html);
+          } 
+        }).finally(() => {
+          instance.enable();
+          instance.show();
+        });
+      }
+    }, function(instance) {
+    });
+  }
+      let selectedAnnoteEl;
+      const selectorForAnnotation = ( cell, annotation) => {
+        let cellAttr = 'data-code-cell="' + cell + '"';
+        let lineAttr = 'data-code-annotation="' +  annotation + '"';
+        const selector = 'span[' + cellAttr + '][' + lineAttr + ']';
+        return selector;
+      }
+      const selectCodeLines = (annoteEl) => {
+        const doc = window.document;
+        const targetCell = annoteEl.getAttribute("data-target-cell");
+        const targetAnnotation = annoteEl.getAttribute("data-target-annotation");
+        const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation));
+        const lines = annoteSpan.getAttribute("data-code-lines").split(",");
+        const lineIds = lines.map((line) => {
+          return targetCell + "-" + line;
+        })
+        let top = null;
+        let height = null;
+        let parent = null;
+        if (lineIds.length > 0) {
+            //compute the position of the single el (top and bottom and make a div)
+            const el = window.document.getElementById(lineIds[0]);
+            top = el.offsetTop;
+            height = el.offsetHeight;
+            parent = el.parentElement.parentElement;
+          if (lineIds.length > 1) {
+            const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]);
+            const bottom = lastEl.offsetTop + lastEl.offsetHeight;
+            height = bottom - top;
+          }
+          if (top !== null && height !== null && parent !== null) {
+            // cook up a div (if necessary) and position it 
+            let div = window.document.getElementById("code-annotation-line-highlight");
+            if (div === null) {
+              div = window.document.createElement("div");
+              div.setAttribute("id", "code-annotation-line-highlight");
+              div.style.position = 'absolute';
+              parent.appendChild(div);
+            }
+            div.style.top = top - 2 + "px";
+            div.style.height = height + 4 + "px";
+            div.style.left = 0;
+            let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter");
+            if (gutterDiv === null) {
+              gutterDiv = window.document.createElement("div");
+              gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter");
+              gutterDiv.style.position = 'absolute';
+              const codeCell = window.document.getElementById(targetCell);
+              const gutter = codeCell.querySelector('.code-annotation-gutter');
+              gutter.appendChild(gutterDiv);
+            }
+            gutterDiv.style.top = top - 2 + "px";
+            gutterDiv.style.height = height + 4 + "px";
+          }
+          selectedAnnoteEl = annoteEl;
+        }
+      };
+      const unselectCodeLines = () => {
+        const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"];
+        elementsIds.forEach((elId) => {
+          const div = window.document.getElementById(elId);
+          if (div) {
+            div.remove();
+          }
+        });
+        selectedAnnoteEl = undefined;
+      };
+        // Handle positioning of the toggle
+    window.addEventListener(
+      "resize",
+      throttle(() => {
+        elRect = undefined;
+        if (selectedAnnoteEl) {
+          selectCodeLines(selectedAnnoteEl);
+        }
+      }, 10)
+    );
+    function throttle(fn, ms) {
+    let throttle = false;
+    let timer;
+      return (...args) => {
+        if(!throttle) { // first call gets through
+            fn.apply(this, args);
+            throttle = true;
+        } else { // all the others get throttled
+            if(timer) clearTimeout(timer); // cancel #2
+            timer = setTimeout(() => {
+              fn.apply(this, args);
+              timer = throttle = false;
+            }, ms);
+        }
+      };
+    }
+      // Attach click handler to the DT
+      const annoteDls = window.document.querySelectorAll('dt[data-target-cell]');
+      for (const annoteDlNode of annoteDls) {
+        annoteDlNode.addEventListener('click', (event) => {
+          const clickedEl = event.target;
+          if (clickedEl !== selectedAnnoteEl) {
+            unselectCodeLines();
+            const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active');
+            if (activeEl) {
+              activeEl.classList.remove('code-annotation-active');
+            }
+            selectCodeLines(clickedEl);
+            clickedEl.classList.add('code-annotation-active');
+          } else {
+            // Unselect the line
+            unselectCodeLines();
+            clickedEl.classList.remove('code-annotation-active');
+          }
+        });
+      }
+  const findCites = (el) => {
+    const parentEl = el.parentElement;
+    if (parentEl) {
+      const cites = parentEl.dataset.cites;
+      if (cites) {
+        return {
+          el,
+          cites: cites.split(' ')
+        };
+      } else {
+        return findCites(el.parentElement)
+      }
+    } else {
+      return undefined;
+    }
+  };
+  var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
+  for (var i=0; i<bibliorefs.length; i++) {
+    const ref = bibliorefs[i];
+    const citeInfo = findCites(ref);
+    if (citeInfo) {
+      tippyHover(citeInfo.el, function() {
+        var popup = window.document.createElement('div');
+        citeInfo.cites.forEach(function(cite) {
+          var citeDiv = window.document.createElement('div');
+          citeDiv.classList.add('hanging-indent');
+          citeDiv.classList.add('csl-entry');
+          var biblioDiv = window.document.getElementById('ref-' + cite);
+          if (biblioDiv) {
+            citeDiv.innerHTML = biblioDiv.innerHTML;
+          }
+          popup.appendChild(citeDiv);
+        });
+        return popup.innerHTML;
+      });
+    }
+  }
+});
+</script>
+<nav class="page-navigation column-body">
+  <div class="nav-page nav-page-previous">
+      <a href="../ols/ols.html" class="pagination-link  aria-label=" &lt;span="" least="" squares&lt;="" span&gt;"="">
+        <i class="bi bi-arrow-left-short"></i> <span class="nav-page-text"><span class="chapter-number">12</span>&nbsp; <span class="chapter-title">Ordinary Least Squares</span></span>
+      </a>          
+  </div>
+  <div class="nav-page nav-page-next">
+  </div>
+</nav><div class="modal fade" id="quarto-embedded-source-code-modal" tabindex="-1" aria-labelledby="quarto-embedded-source-code-modal-label" aria-hidden="true"><div class="modal-dialog modal-dialog-scrollable"><div class="modal-content"><div class="modal-header"><h5 class="modal-title" id="quarto-embedded-source-code-modal-label">Source Code</h5><button class="btn-close" data-bs-dismiss="modal"></button></div><div class="modal-body"><div class="">
+<div class="sourceCode" id="cb35" data-shortcodes="false"><pre class="sourceCode markdown code-with-copy"><code class="sourceCode markdown"><span id="cb35-1"><a href="#cb35-1" aria-hidden="true" tabindex="-1"></a><span class="co">---</span></span>
+<span id="cb35-2"><a href="#cb35-2" aria-hidden="true" tabindex="-1"></a><span class="an">title:</span><span class="co"> sklearn and Gradient Descent</span></span>
+<span id="cb35-3"><a href="#cb35-3" aria-hidden="true" tabindex="-1"></a><span class="an">execute:</span></span>
+<span id="cb35-4"><a href="#cb35-4" aria-hidden="true" tabindex="-1"></a><span class="co">  echo: true</span></span>
+<span id="cb35-5"><a href="#cb35-5" aria-hidden="true" tabindex="-1"></a><span class="co">  warning: false</span></span>
+<span id="cb35-6"><a href="#cb35-6" aria-hidden="true" tabindex="-1"></a><span class="an">format:</span></span>
+<span id="cb35-7"><a href="#cb35-7" aria-hidden="true" tabindex="-1"></a><span class="co">  html:</span></span>
+<span id="cb35-8"><a href="#cb35-8" aria-hidden="true" tabindex="-1"></a><span class="co">    code-fold: false</span></span>
+<span id="cb35-9"><a href="#cb35-9" aria-hidden="true" tabindex="-1"></a><span class="co">    code-tools: true</span></span>
+<span id="cb35-10"><a href="#cb35-10" aria-hidden="true" tabindex="-1"></a><span class="co">    toc: true</span></span>
+<span id="cb35-11"><a href="#cb35-11" aria-hidden="true" tabindex="-1"></a><span class="co">    toc-title: sklearn and Gradient Descent</span></span>
+<span id="cb35-12"><a href="#cb35-12" aria-hidden="true" tabindex="-1"></a><span class="co">    page-layout: full</span></span>
+<span id="cb35-13"><a href="#cb35-13" aria-hidden="true" tabindex="-1"></a><span class="co">    theme:</span></span>
+<span id="cb35-14"><a href="#cb35-14" aria-hidden="true" tabindex="-1"></a><span class="co">      - cosmo</span></span>
+<span id="cb35-15"><a href="#cb35-15" aria-hidden="true" tabindex="-1"></a><span class="co">      - cerulean</span></span>
+<span id="cb35-16"><a href="#cb35-16" aria-hidden="true" tabindex="-1"></a><span class="co">    callout-icon: false</span></span>
+<span id="cb35-17"><a href="#cb35-17" aria-hidden="true" tabindex="-1"></a><span class="an">jupyter:</span></span>
+<span id="cb35-18"><a href="#cb35-18" aria-hidden="true" tabindex="-1"></a><span class="co">  jupytext:</span></span>
+<span id="cb35-19"><a href="#cb35-19" aria-hidden="true" tabindex="-1"></a><span class="co">    text_representation:</span></span>
+<span id="cb35-20"><a href="#cb35-20" aria-hidden="true" tabindex="-1"></a><span class="co">      extension: .qmd</span></span>
+<span id="cb35-21"><a href="#cb35-21" aria-hidden="true" tabindex="-1"></a><span class="co">      format_name: quarto</span></span>
+<span id="cb35-22"><a href="#cb35-22" aria-hidden="true" tabindex="-1"></a><span class="co">      format_version: '1.0'</span></span>
+<span id="cb35-23"><a href="#cb35-23" aria-hidden="true" tabindex="-1"></a><span class="co">      jupytext_version: 1.16.1</span></span>
+<span id="cb35-24"><a href="#cb35-24" aria-hidden="true" tabindex="-1"></a><span class="co">  kernelspec:</span></span>
+<span id="cb35-25"><a href="#cb35-25" aria-hidden="true" tabindex="-1"></a><span class="co">    display_name: ds100env</span></span>
+<span id="cb35-26"><a href="#cb35-26" aria-hidden="true" tabindex="-1"></a><span class="co">    language: python</span></span>
+<span id="cb35-27"><a href="#cb35-27" aria-hidden="true" tabindex="-1"></a><span class="co">    name: python3</span></span>
+<span id="cb35-28"><a href="#cb35-28" aria-hidden="true" tabindex="-1"></a><span class="co">---</span></span>
+<span id="cb35-29"><a href="#cb35-29" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-30"><a href="#cb35-30" aria-hidden="true" tabindex="-1"></a>::: {.callout-note collapse="false"}</span>
+<span id="cb35-31"><a href="#cb35-31" aria-hidden="true" tabindex="-1"></a><span class="fu">## Learning Outcomes</span></span>
+<span id="cb35-32"><a href="#cb35-32" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Apply the <span class="in">`sklearn`</span> library for model creation and training</span>
+<span id="cb35-33"><a href="#cb35-33" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Optimizing complex models </span>
+<span id="cb35-34"><a href="#cb35-34" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Identifying cases where straight calculus or geometric arguments won't help solve the loss function</span>
+<span id="cb35-35"><a href="#cb35-35" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Applying gradient descent for numerical optimization</span>
+<span id="cb35-36"><a href="#cb35-36" aria-hidden="true" tabindex="-1"></a>:::</span>
+<span id="cb35-37"><a href="#cb35-37" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-40"><a href="#cb35-40" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb35-41"><a href="#cb35-41" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
+<span id="cb35-42"><a href="#cb35-42" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span>
+<span id="cb35-43"><a href="#cb35-43" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> seaborn <span class="im">as</span> sns</span>
+<span id="cb35-44"><a href="#cb35-44" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> plotly.express <span class="im">as</span> px</span>
+<span id="cb35-45"><a href="#cb35-45" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> matplotlib.pyplot <span class="im">as</span> plt</span>
+<span id="cb35-46"><a href="#cb35-46" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
+<span id="cb35-47"><a href="#cb35-47" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> sklearn.linear_model <span class="im">import</span> LinearRegression</span>
+<span id="cb35-48"><a href="#cb35-48" aria-hidden="true" tabindex="-1"></a>pd.options.mode.chained_assignment <span class="op">=</span> <span class="va">None</span>  <span class="co"># default='warn'</span></span>
+<span id="cb35-49"><a href="#cb35-49" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb35-50"><a href="#cb35-50" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-51"><a href="#cb35-51" aria-hidden="true" tabindex="-1"></a><span class="fu">## `sklearn`</span></span>
+<span id="cb35-52"><a href="#cb35-52" aria-hidden="true" tabindex="-1"></a><span class="fu">### Implementing Derived Formulas in Code</span></span>
+<span id="cb35-53"><a href="#cb35-53" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-54"><a href="#cb35-54" aria-hidden="true" tabindex="-1"></a>Throughout this lecture, we'll refer to the <span class="in">`penguins`</span> dataset. </span>
+<span id="cb35-55"><a href="#cb35-55" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-58"><a href="#cb35-58" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb35-59"><a href="#cb35-59" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
+<span id="cb35-60"><a href="#cb35-60" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span>
+<span id="cb35-61"><a href="#cb35-61" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> seaborn <span class="im">as</span> sns</span>
+<span id="cb35-62"><a href="#cb35-62" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
+<span id="cb35-63"><a href="#cb35-63" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-64"><a href="#cb35-64" aria-hidden="true" tabindex="-1"></a>penguins <span class="op">=</span> sns.load_dataset(<span class="st">"penguins"</span>)</span>
+<span id="cb35-65"><a href="#cb35-65" aria-hidden="true" tabindex="-1"></a>penguins <span class="op">=</span> penguins[penguins[<span class="st">"species"</span>] <span class="op">==</span> <span class="st">"Adelie"</span>].dropna()</span>
+<span id="cb35-66"><a href="#cb35-66" aria-hidden="true" tabindex="-1"></a>penguins.head()</span>
+<span id="cb35-67"><a href="#cb35-67" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb35-68"><a href="#cb35-68" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-69"><a href="#cb35-69" aria-hidden="true" tabindex="-1"></a>Our goal will be to predict the value of the <span class="in">`"bill_depth_mm"`</span> for a particular penguin given its <span class="in">`"flipper_length_mm"`</span> and <span class="in">`"body_mass_g"`</span>. We'll also add a bias column of all ones to represent the intercept term of our models.</span>
+<span id="cb35-70"><a href="#cb35-70" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-73"><a href="#cb35-73" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb35-74"><a href="#cb35-74" aria-hidden="true" tabindex="-1"></a><span class="co"># Add a bias column of all ones to `penguins`</span></span>
+<span id="cb35-75"><a href="#cb35-75" aria-hidden="true" tabindex="-1"></a>penguins[<span class="st">"bias"</span>] <span class="op">=</span> np.ones(<span class="bu">len</span>(penguins), dtype<span class="op">=</span><span class="bu">int</span>) </span>
+<span id="cb35-76"><a href="#cb35-76" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-77"><a href="#cb35-77" aria-hidden="true" tabindex="-1"></a><span class="co"># Define the design matrix, X...</span></span>
+<span id="cb35-78"><a href="#cb35-78" aria-hidden="true" tabindex="-1"></a><span class="co"># Note that we use .to_numpy() to convert our DataFrame into a NumPy array so it is in Matrix form</span></span>
+<span id="cb35-79"><a href="#cb35-79" aria-hidden="true" tabindex="-1"></a>X <span class="op">=</span> penguins[[<span class="st">"bias"</span>, <span class="st">"flipper_length_mm"</span>, <span class="st">"body_mass_g"</span>]].to_numpy()</span>
+<span id="cb35-80"><a href="#cb35-80" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-81"><a href="#cb35-81" aria-hidden="true" tabindex="-1"></a><span class="co"># ...as well as the target variable, Y</span></span>
+<span id="cb35-82"><a href="#cb35-82" aria-hidden="true" tabindex="-1"></a><span class="co"># Again, we use .to_numpy() to convert our DataFrame into a NumPy array so it is in Matrix form</span></span>
+<span id="cb35-83"><a href="#cb35-83" aria-hidden="true" tabindex="-1"></a>Y <span class="op">=</span> penguins[[<span class="st">"bill_depth_mm"</span>]].to_numpy()</span>
+<span id="cb35-84"><a href="#cb35-84" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb35-85"><a href="#cb35-85" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-86"><a href="#cb35-86" aria-hidden="true" tabindex="-1"></a>In the lecture on ordinary least squares, we expressed multiple linear regression using matrix notation.</span>
+<span id="cb35-87"><a href="#cb35-87" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-88"><a href="#cb35-88" aria-hidden="true" tabindex="-1"></a>$$\hat{\mathbb{Y}} = \mathbb{X}\theta$$</span>
+<span id="cb35-89"><a href="#cb35-89" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-90"><a href="#cb35-90" aria-hidden="true" tabindex="-1"></a>We used a geometric approach to derive the following expression for the optimal model parameters:</span>
+<span id="cb35-91"><a href="#cb35-91" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-92"><a href="#cb35-92" aria-hidden="true" tabindex="-1"></a>$$\hat{\theta} = (\mathbb{X}^T \mathbb{X})^{-1}\mathbb{X}^T \mathbb{Y}$$</span>
+<span id="cb35-93"><a href="#cb35-93" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-94"><a href="#cb35-94" aria-hidden="true" tabindex="-1"></a>That's a whole lot of matrix manipulation. How do we implement it in <span class="in">`python`</span>?</span>
+<span id="cb35-95"><a href="#cb35-95" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-96"><a href="#cb35-96" aria-hidden="true" tabindex="-1"></a>There are three operations we need to perform here: multiplying matrices, taking transposes, and finding inverses. </span>
+<span id="cb35-97"><a href="#cb35-97" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-98"><a href="#cb35-98" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>To perform matrix multiplication, use the <span class="in">`@`</span> operator</span>
+<span id="cb35-99"><a href="#cb35-99" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>To take a transpose, call the <span class="in">`.T`</span> attribute of an <span class="in">`NumPy`</span> array or <span class="in">`DataFrame`</span></span>
+<span id="cb35-100"><a href="#cb35-100" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>To compute an inverse, use <span class="in">`NumPy`</span>'s in-built method <span class="in">`np.linalg.inv`</span></span>
+<span id="cb35-101"><a href="#cb35-101" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-102"><a href="#cb35-102" aria-hidden="true" tabindex="-1"></a>Putting this all together, we can compute the OLS estimate for the optimal model parameters, stored in the array <span class="in">`theta_hat`</span>.</span>
+<span id="cb35-103"><a href="#cb35-103" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-106"><a href="#cb35-106" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb35-107"><a href="#cb35-107" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb35-108"><a href="#cb35-108" aria-hidden="true" tabindex="-1"></a>theta_hat <span class="op">=</span> np.linalg.inv(X.T <span class="op">@</span> X) <span class="op">@</span> X.T <span class="op">@</span> Y</span>
+<span id="cb35-109"><a href="#cb35-109" aria-hidden="true" tabindex="-1"></a>theta_hat</span>
+<span id="cb35-110"><a href="#cb35-110" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb35-111"><a href="#cb35-111" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-112"><a href="#cb35-112" aria-hidden="true" tabindex="-1"></a>To make predictions using our optimized parameter values, we matrix-multiply the design matrix with the parameter vector:</span>
+<span id="cb35-113"><a href="#cb35-113" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-114"><a href="#cb35-114" aria-hidden="true" tabindex="-1"></a>$$\hat{\mathbb{Y}} = \mathbb{X}\theta$$</span>
+<span id="cb35-115"><a href="#cb35-115" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-118"><a href="#cb35-118" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb35-119"><a href="#cb35-119" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb35-120"><a href="#cb35-120" aria-hidden="true" tabindex="-1"></a>Y_hat <span class="op">=</span> X <span class="op">@</span> theta_hat</span>
+<span id="cb35-121"><a href="#cb35-121" aria-hidden="true" tabindex="-1"></a>pd.DataFrame(Y_hat).head()</span>
+<span id="cb35-122"><a href="#cb35-122" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb35-123"><a href="#cb35-123" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-124"><a href="#cb35-124" aria-hidden="true" tabindex="-1"></a><span class="fu">### The `sklearn` Workflow</span></span>
+<span id="cb35-125"><a href="#cb35-125" aria-hidden="true" tabindex="-1"></a>We've already saved a lot of time (and avoided tedious calculations) by translating our derived formulas into code. However, we still had to go through the process of writing out the linear algebra ourselves. </span>
+<span id="cb35-126"><a href="#cb35-126" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-127"><a href="#cb35-127" aria-hidden="true" tabindex="-1"></a>To make life *even easier*, we can turn to the <span class="in">`sklearn`</span> <span class="co">[</span><span class="ot">`python` library</span><span class="co">](https://scikit-learn.org/stable/)</span>. <span class="in">`sklearn`</span> is a robust library of machine learning tools used extensively in research and industry. It is the standard for simple machine learning tasks and gives us a wide variety of in-built modeling frameworks and methods, so we'll keep returning to <span class="in">`sklearn`</span> techniques as we progress through Data 100. </span>
+<span id="cb35-128"><a href="#cb35-128" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-129"><a href="#cb35-129" aria-hidden="true" tabindex="-1"></a>Regardless of the specific type of model being implemented, <span class="in">`sklearn`</span> follows a standard set of steps for creating a model: </span>
+<span id="cb35-130"><a href="#cb35-130" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-131"><a href="#cb35-131" aria-hidden="true" tabindex="-1"></a><span class="ss">1. </span>Import the <span class="in">`LinearRegression`</span> model from <span class="in">`sklearn`</span></span>
+<span id="cb35-132"><a href="#cb35-132" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-133"><a href="#cb35-133" aria-hidden="true" tabindex="-1"></a>    <span class="in">```</span></span>
+<span id="cb35-134"><a href="#cb35-134" aria-hidden="true" tabindex="-1"></a><span class="in">    from sklearn.linear_model import LinearRegression</span></span>
+<span id="cb35-135"><a href="#cb35-135" aria-hidden="true" tabindex="-1"></a><span class="in">    ```</span></span>
+<span id="cb35-136"><a href="#cb35-136" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-137"><a href="#cb35-137" aria-hidden="true" tabindex="-1"></a><span class="ss">2. </span>Create a model object. This generates a new instance of the model class. You can think of it as making a new "copy" of a standard "template" for a model. In code, this looks like:</span>
+<span id="cb35-138"><a href="#cb35-138" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-139"><a href="#cb35-139" aria-hidden="true" tabindex="-1"></a>    <span class="in">```</span></span>
+<span id="cb35-140"><a href="#cb35-140" aria-hidden="true" tabindex="-1"></a><span class="in">    my_model = LinearRegression()</span></span>
+<span id="cb35-141"><a href="#cb35-141" aria-hidden="true" tabindex="-1"></a><span class="in">    ```</span></span>
+<span id="cb35-142"><a href="#cb35-142" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-143"><a href="#cb35-143" aria-hidden="true" tabindex="-1"></a>    </span>
+<span id="cb35-144"><a href="#cb35-144" aria-hidden="true" tabindex="-1"></a><span class="ss">3. </span>Fit the model to the <span class="in">`X`</span> design matrix and <span class="in">`Y`</span> target vector. This calculates the optimal model parameters "behind the scenes" without us explicitly working through the calculations ourselves. The fitted parameters are then stored within the model for use in future predictions:</span>
+<span id="cb35-145"><a href="#cb35-145" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-146"><a href="#cb35-146" aria-hidden="true" tabindex="-1"></a>    <span class="in">```</span></span>
+<span id="cb35-147"><a href="#cb35-147" aria-hidden="true" tabindex="-1"></a><span class="in">    my_model.fit(X, Y)</span></span>
+<span id="cb35-148"><a href="#cb35-148" aria-hidden="true" tabindex="-1"></a><span class="in">     ```</span></span>
+<span id="cb35-149"><a href="#cb35-149" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-150"><a href="#cb35-150" aria-hidden="true" tabindex="-1"></a>    </span>
+<span id="cb35-151"><a href="#cb35-151" aria-hidden="true" tabindex="-1"></a><span class="ss">4. </span>Use the fitted model to make predictions on the <span class="in">`X`</span> input data using <span class="in">`.predict`</span>. </span>
+<span id="cb35-152"><a href="#cb35-152" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-153"><a href="#cb35-153" aria-hidden="true" tabindex="-1"></a>    <span class="in">```</span></span>
+<span id="cb35-154"><a href="#cb35-154" aria-hidden="true" tabindex="-1"></a><span class="in">    my_model.predict(X)</span></span>
+<span id="cb35-155"><a href="#cb35-155" aria-hidden="true" tabindex="-1"></a><span class="in">    ```</span></span>
+<span id="cb35-156"><a href="#cb35-156" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-157"><a href="#cb35-157" aria-hidden="true" tabindex="-1"></a>To extract the fitted parameters, we can use:</span>
+<span id="cb35-158"><a href="#cb35-158" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-159"><a href="#cb35-159" aria-hidden="true" tabindex="-1"></a>  <span class="in">```</span></span>
+<span id="cb35-160"><a href="#cb35-160" aria-hidden="true" tabindex="-1"></a><span class="in">  my_model.coef_</span></span>
+<span id="cb35-161"><a href="#cb35-161" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-162"><a href="#cb35-162" aria-hidden="true" tabindex="-1"></a><span class="in">  my_model.intercept_</span></span>
+<span id="cb35-163"><a href="#cb35-163" aria-hidden="true" tabindex="-1"></a><span class="in">  ```</span></span>
+<span id="cb35-164"><a href="#cb35-164" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-165"><a href="#cb35-165" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-166"><a href="#cb35-166" aria-hidden="true" tabindex="-1"></a>Let's put this into action with our multiple regression task!</span>
+<span id="cb35-167"><a href="#cb35-167" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-168"><a href="#cb35-168" aria-hidden="true" tabindex="-1"></a>**1. Initialize an instance of the model class**</span>
+<span id="cb35-169"><a href="#cb35-169" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-170"><a href="#cb35-170" aria-hidden="true" tabindex="-1"></a><span class="in">`sklearn`</span> stores "templates" of useful models for machine learning. We begin the modeling process by making a "copy" of one of these templates for our own use. Model initialization looks like <span class="in">`ModelClass()`</span>, where <span class="in">`ModelClass`</span> is the type of model we wish to create.</span>
+<span id="cb35-171"><a href="#cb35-171" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-172"><a href="#cb35-172" aria-hidden="true" tabindex="-1"></a>For now, let's create a linear regression model using <span class="in">`LinearRegression`</span>. </span>
+<span id="cb35-173"><a href="#cb35-173" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-174"><a href="#cb35-174" aria-hidden="true" tabindex="-1"></a><span class="in">`my_model`</span> is now an instance of the <span class="in">`LinearRegression`</span> class. You can think of it as the "idea" of a linear regression model. We haven't trained it yet, so it doesn't know any model parameters and cannot be used to make predictions. In fact, we haven't even told it what data to use for modeling! It simply waits for further instructions.</span>
+<span id="cb35-175"><a href="#cb35-175" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-178"><a href="#cb35-178" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb35-179"><a href="#cb35-179" aria-hidden="true" tabindex="-1"></a>my_model <span class="op">=</span> LinearRegression()</span>
+<span id="cb35-180"><a href="#cb35-180" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb35-181"><a href="#cb35-181" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-182"><a href="#cb35-182" aria-hidden="true" tabindex="-1"></a>**2. Train the model using `.fit`**</span>
+<span id="cb35-183"><a href="#cb35-183" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-184"><a href="#cb35-184" aria-hidden="true" tabindex="-1"></a>Before the model can make predictions, we will need to fit it to our training data. When we fit the model, <span class="in">`sklearn`</span> will run gradient descent behind the scenes to determine the optimal model parameters. It will then save these model parameters to our model instance for future use. </span>
+<span id="cb35-185"><a href="#cb35-185" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-186"><a href="#cb35-186" aria-hidden="true" tabindex="-1"></a>All <span class="in">`sklearn`</span> model classes include a <span class="in">`.fit`</span> method, which is used to fit the model. It takes in two inputs: the design matrix, <span class="in">`X`</span>, and the target variable, <span class="in">`Y`</span>. </span>
+<span id="cb35-187"><a href="#cb35-187" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-188"><a href="#cb35-188" aria-hidden="true" tabindex="-1"></a>Let's start by fitting a model with just one feature: the flipper length. We create a design matrix <span class="in">`X`</span> by pulling out the <span class="in">`"flipper_length_mm"`</span> column from the <span class="in">`DataFrame`</span>. </span>
+<span id="cb35-189"><a href="#cb35-189" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-192"><a href="#cb35-192" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb35-193"><a href="#cb35-193" aria-hidden="true" tabindex="-1"></a><span class="co"># .fit expects a 2D data design matrix, so we use double brackets to extract a DataFrame</span></span>
+<span id="cb35-194"><a href="#cb35-194" aria-hidden="true" tabindex="-1"></a>X <span class="op">=</span> penguins[[<span class="st">"flipper_length_mm"</span>]]</span>
+<span id="cb35-195"><a href="#cb35-195" aria-hidden="true" tabindex="-1"></a>Y <span class="op">=</span> penguins[<span class="st">"bill_depth_mm"</span>]</span>
+<span id="cb35-196"><a href="#cb35-196" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-197"><a href="#cb35-197" aria-hidden="true" tabindex="-1"></a>my_model.fit(X, Y)</span>
+<span id="cb35-198"><a href="#cb35-198" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb35-199"><a href="#cb35-199" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-200"><a href="#cb35-200" aria-hidden="true" tabindex="-1"></a>Notice that we use **double brackets** to extract this column. Why double brackets instead of just single brackets? The `.fit` method, by default, expects to receive **2-dimensional** data – some kind of data that includes both rows and columns. Writing <span class="in">`penguins["flipper_length_mm"]`</span> would return a 1D <span class="in">`Series`</span>, causing <span class="in">`sklearn`</span> to error. We avoid this by writing <span class="in">`penguins[["flipper_length_mm"]]`</span> to produce a 2D <span class="in">`DataFrame`</span>. </span>
+<span id="cb35-201"><a href="#cb35-201" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-202"><a href="#cb35-202" aria-hidden="true" tabindex="-1"></a>And in just three lines of code, our model has run gradient descent to determine the optimal model parameters! Our single-feature model takes the form:</span>
+<span id="cb35-203"><a href="#cb35-203" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-204"><a href="#cb35-204" aria-hidden="true" tabindex="-1"></a>$$\text{bill depth} = \theta_0 + \theta_1 \text{flipper length}$$</span>
+<span id="cb35-205"><a href="#cb35-205" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-206"><a href="#cb35-206" aria-hidden="true" tabindex="-1"></a>Note that <span class="in">`LinearRegression`</span> will automatically include an intercept term. </span>
+<span id="cb35-207"><a href="#cb35-207" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-208"><a href="#cb35-208" aria-hidden="true" tabindex="-1"></a>The fitted model parameters are stored as attributes of the model instance. <span class="in">`my_model.intercept_`</span> will return the value of $\hat{\theta}_0$ as a scalar. `my_model.coef_` will return all values $\hat{\theta}_1, </span>
+<span id="cb35-209"><a href="#cb35-209" aria-hidden="true" tabindex="-1"></a>\hat{\theta}_1, ...$ in an array. Because our model only contains one feature, we see just the value of $\hat{\theta}_1$ in the cell below.</span>
+<span id="cb35-210"><a href="#cb35-210" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-213"><a href="#cb35-213" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb35-214"><a href="#cb35-214" aria-hidden="true" tabindex="-1"></a><span class="co"># The intercept term, theta_0</span></span>
+<span id="cb35-215"><a href="#cb35-215" aria-hidden="true" tabindex="-1"></a>my_model.intercept_</span>
+<span id="cb35-216"><a href="#cb35-216" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb35-217"><a href="#cb35-217" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-220"><a href="#cb35-220" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb35-221"><a href="#cb35-221" aria-hidden="true" tabindex="-1"></a><span class="co"># All parameters theta_1, ..., theta_p</span></span>
+<span id="cb35-222"><a href="#cb35-222" aria-hidden="true" tabindex="-1"></a>my_model.coef_</span>
+<span id="cb35-223"><a href="#cb35-223" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb35-224"><a href="#cb35-224" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-225"><a href="#cb35-225" aria-hidden="true" tabindex="-1"></a>**3. Use the fitted model to make predictions**</span>
+<span id="cb35-226"><a href="#cb35-226" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-227"><a href="#cb35-227" aria-hidden="true" tabindex="-1"></a>Now that the model has been trained, we can use it to make predictions! To do so, we use the <span class="in">`.predict`</span> method. <span class="in">`.predict`</span> takes in one argument: the design matrix that should be used to generate predictions. To understand how the model performs on the training set, we would pass in the training data. Alternatively, to make predictions on unseen data, we would pass in a new dataset that wasn't used to train the model.</span>
+<span id="cb35-228"><a href="#cb35-228" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-229"><a href="#cb35-229" aria-hidden="true" tabindex="-1"></a>Below, we call <span class="in">`.predict`</span> to generate model predictions on the original training data. As before, we use double brackets to ensure that we extract 2-dimensional data.</span>
+<span id="cb35-230"><a href="#cb35-230" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-233"><a href="#cb35-233" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb35-234"><a href="#cb35-234" aria-hidden="true" tabindex="-1"></a>Y_hat_one_feature <span class="op">=</span> my_model.predict(penguins[[<span class="st">"flipper_length_mm"</span>]])</span>
+<span id="cb35-235"><a href="#cb35-235" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-236"><a href="#cb35-236" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="ss">f"The RMSE of the model is </span><span class="sc">{</span>np<span class="sc">.</span>sqrt(np.mean((Y<span class="op">-</span>Y_hat_one_feature)<span class="op">**</span><span class="dv">2</span>))<span class="sc">}</span><span class="ss">"</span>)</span>
+<span id="cb35-237"><a href="#cb35-237" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb35-238"><a href="#cb35-238" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-239"><a href="#cb35-239" aria-hidden="true" tabindex="-1"></a>What if we wanted a model with two features? </span>
+<span id="cb35-240"><a href="#cb35-240" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-241"><a href="#cb35-241" aria-hidden="true" tabindex="-1"></a>$$\text{bill depth} = \theta_0 + \theta_1 \text{flipper length} + \theta_2 \text{body mass}$$</span>
+<span id="cb35-242"><a href="#cb35-242" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-243"><a href="#cb35-243" aria-hidden="true" tabindex="-1"></a>We repeat this three-step process by intializing a new model object, then calling <span class="in">`.fit`</span> and <span class="in">`.predict`</span> as before.</span>
+<span id="cb35-244"><a href="#cb35-244" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-247"><a href="#cb35-247" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb35-248"><a href="#cb35-248" aria-hidden="true" tabindex="-1"></a><span class="co"># Step 1: initialize LinearRegression model</span></span>
+<span id="cb35-249"><a href="#cb35-249" aria-hidden="true" tabindex="-1"></a>two_feature_model <span class="op">=</span> LinearRegression()</span>
+<span id="cb35-250"><a href="#cb35-250" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-251"><a href="#cb35-251" aria-hidden="true" tabindex="-1"></a><span class="co"># Step 2: fit the model</span></span>
+<span id="cb35-252"><a href="#cb35-252" aria-hidden="true" tabindex="-1"></a>X_two_features <span class="op">=</span> penguins[[<span class="st">"flipper_length_mm"</span>, <span class="st">"body_mass_g"</span>]]</span>
+<span id="cb35-253"><a href="#cb35-253" aria-hidden="true" tabindex="-1"></a>Y <span class="op">=</span> penguins[<span class="st">"bill_depth_mm"</span>]</span>
+<span id="cb35-254"><a href="#cb35-254" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-255"><a href="#cb35-255" aria-hidden="true" tabindex="-1"></a>two_feature_model.fit(X_two_features, Y)</span>
+<span id="cb35-256"><a href="#cb35-256" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-257"><a href="#cb35-257" aria-hidden="true" tabindex="-1"></a><span class="co"># Step 3: make predictions</span></span>
+<span id="cb35-258"><a href="#cb35-258" aria-hidden="true" tabindex="-1"></a>Y_hat_two_features <span class="op">=</span> two_feature_model.predict(X_two_features)</span>
+<span id="cb35-259"><a href="#cb35-259" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-260"><a href="#cb35-260" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="ss">f"The RMSE of the model is </span><span class="sc">{</span>np<span class="sc">.</span>sqrt(np.mean((Y<span class="op">-</span>Y_hat_two_features)<span class="op">**</span><span class="dv">2</span>))<span class="sc">}</span><span class="ss">"</span>)</span>
+<span id="cb35-261"><a href="#cb35-261" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb35-262"><a href="#cb35-262" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-263"><a href="#cb35-263" aria-hidden="true" tabindex="-1"></a>We can also see that we obtain the same predictions using <span class="in">`sklearn`</span> as we did when applying the ordinary least squares formula before! </span>
+<span id="cb35-264"><a href="#cb35-264" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-267"><a href="#cb35-267" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb35-268"><a href="#cb35-268" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
+<span id="cb35-269"><a href="#cb35-269" aria-hidden="true" tabindex="-1"></a>pd.DataFrame({<span class="st">"Y_hat from OLS"</span>:np.squeeze(Y_hat), <span class="st">"Y_hat from sklearn"</span>:Y_hat_two_features}).head()</span>
+<span id="cb35-270"><a href="#cb35-270" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb35-271"><a href="#cb35-271" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-272"><a href="#cb35-272" aria-hidden="true" tabindex="-1"></a><span class="fu">## Gradient Descent </span></span>
+<span id="cb35-273"><a href="#cb35-273" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-274"><a href="#cb35-274" aria-hidden="true" tabindex="-1"></a>At this point, we've grown quite familiar with the process of choosing a model and a corresponding loss function and optimizing parameters by choosing the values of $\theta$ that minimize the loss function. So far, we've optimized $\theta$ by</span>
+<span id="cb35-275"><a href="#cb35-275" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-276"><a href="#cb35-276" aria-hidden="true" tabindex="-1"></a><span class="ss">1. </span>Using calculus to take the derivative of the loss function with respect to $\theta$, setting it equal to 0, and solving for $\theta$.</span>
+<span id="cb35-277"><a href="#cb35-277" aria-hidden="true" tabindex="-1"></a><span class="ss">2. </span>Using the geometric argument of orthogonality to derive the OLS solution $\hat{\theta} = (\mathbb{X}^T \mathbb{X})^{-1}\mathbb{X}^T \mathbb{Y}$.</span>
+<span id="cb35-278"><a href="#cb35-278" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-279"><a href="#cb35-279" aria-hidden="true" tabindex="-1"></a>One thing to note, however, is that the techniques we used above can only be applied if we make some big assumptions. For the calculus approach, we assumed that the loss function was differentiable at all points and that we could algebraically solve for the zero points of the derivative; for the geometric approach, OLS *only* applies when using a linear model with MSE loss. What happens when we have more complex models with different, more complex loss functions? The techniques we've learned so far will not work, so we need a new optimization technique: **gradient descent**. </span>
+<span id="cb35-280"><a href="#cb35-280" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-281"><a href="#cb35-281" aria-hidden="true" tabindex="-1"></a><span class="at">&gt; **BIG IDEA**: use an iterative algorithm to numerically compute the minimum of the loss.</span></span>
+<span id="cb35-282"><a href="#cb35-282" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-283"><a href="#cb35-283" aria-hidden="true" tabindex="-1"></a><span class="fu">### Minimizing an Arbitrary 1D Function</span></span>
+<span id="cb35-284"><a href="#cb35-284" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-285"><a href="#cb35-285" aria-hidden="true" tabindex="-1"></a>Let's consider an arbitrary function. Our goal is to find the value of $x$ that minimizes this function.</span>
+<span id="cb35-286"><a href="#cb35-286" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-289"><a href="#cb35-289" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb35-290"><a href="#cb35-290" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> arbitrary(x):</span>
+<span id="cb35-291"><a href="#cb35-291" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> (x<span class="op">**</span><span class="dv">4</span> <span class="op">-</span> <span class="dv">15</span><span class="op">*</span>x<span class="op">**</span><span class="dv">3</span> <span class="op">+</span> <span class="dv">80</span><span class="op">*</span>x<span class="op">**</span><span class="dv">2</span> <span class="op">-</span> <span class="dv">180</span><span class="op">*</span>x <span class="op">+</span> <span class="dv">144</span>)<span class="op">/</span><span class="dv">10</span></span>
+<span id="cb35-292"><a href="#cb35-292" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb35-293"><a href="#cb35-293" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-294"><a href="#cb35-294" aria-hidden="true" tabindex="-1"></a>&lt;img src="images/arbitrary.png" alt='arbitrary' width='600'&gt;</span>
+<span id="cb35-295"><a href="#cb35-295" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-296"><a href="#cb35-296" aria-hidden="true" tabindex="-1"></a><span class="fu">#### The Naive Approach: Guess and Check</span></span>
+<span id="cb35-297"><a href="#cb35-297" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-298"><a href="#cb35-298" aria-hidden="true" tabindex="-1"></a>Above, we saw that the minimum is somewhere around 5.3. Let's see if we can figure out how to find the exact minimum algorithmically from scratch. One very slow (and terrible) way would be manual guess-and-check.</span>
+<span id="cb35-299"><a href="#cb35-299" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-302"><a href="#cb35-302" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb35-303"><a href="#cb35-303" aria-hidden="true" tabindex="-1"></a>arbitrary(<span class="dv">6</span>)</span>
+<span id="cb35-304"><a href="#cb35-304" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb35-305"><a href="#cb35-305" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-306"><a href="#cb35-306" aria-hidden="true" tabindex="-1"></a>A somewhat better (but still slow) approach is to use brute force to try out a bunch of x values and return the one that yields the lowest loss.</span>
+<span id="cb35-307"><a href="#cb35-307" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-310"><a href="#cb35-310" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb35-311"><a href="#cb35-311" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> simple_minimize(f, xs):</span>
+<span id="cb35-312"><a href="#cb35-312" aria-hidden="true" tabindex="-1"></a>    <span class="co"># Takes in a function f and a set of values xs. </span></span>
+<span id="cb35-313"><a href="#cb35-313" aria-hidden="true" tabindex="-1"></a>    <span class="co"># Calculates the value of the function f at all values x in xs</span></span>
+<span id="cb35-314"><a href="#cb35-314" aria-hidden="true" tabindex="-1"></a>    <span class="co"># Takes the minimum value of f(x) and returns the corresponding value x </span></span>
+<span id="cb35-315"><a href="#cb35-315" aria-hidden="true" tabindex="-1"></a>    y <span class="op">=</span> [f(x) <span class="cf">for</span> x <span class="kw">in</span> xs]  </span>
+<span id="cb35-316"><a href="#cb35-316" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> xs[np.argmin(y)]</span>
+<span id="cb35-317"><a href="#cb35-317" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-318"><a href="#cb35-318" aria-hidden="true" tabindex="-1"></a>guesses <span class="op">=</span> [<span class="fl">5.3</span>, <span class="fl">5.31</span>, <span class="fl">5.32</span>, <span class="fl">5.33</span>, <span class="fl">5.34</span>, <span class="fl">5.35</span>]</span>
+<span id="cb35-319"><a href="#cb35-319" aria-hidden="true" tabindex="-1"></a>simple_minimize(arbitrary, guesses)</span>
+<span id="cb35-320"><a href="#cb35-320" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb35-321"><a href="#cb35-321" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-322"><a href="#cb35-322" aria-hidden="true" tabindex="-1"></a>This process is essentially the same as before where we made a graphical plot, it's just that we're only looking at 20 selected points.</span>
+<span id="cb35-323"><a href="#cb35-323" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-326"><a href="#cb35-326" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb35-327"><a href="#cb35-327" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
+<span id="cb35-328"><a href="#cb35-328" aria-hidden="true" tabindex="-1"></a>xs <span class="op">=</span> np.linspace(<span class="dv">1</span>, <span class="dv">7</span>, <span class="dv">200</span>)</span>
+<span id="cb35-329"><a href="#cb35-329" aria-hidden="true" tabindex="-1"></a>sparse_xs <span class="op">=</span> np.linspace(<span class="dv">1</span>, <span class="dv">7</span>, <span class="dv">5</span>)</span>
+<span id="cb35-330"><a href="#cb35-330" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-331"><a href="#cb35-331" aria-hidden="true" tabindex="-1"></a>ys <span class="op">=</span> arbitrary(xs)</span>
+<span id="cb35-332"><a href="#cb35-332" aria-hidden="true" tabindex="-1"></a>sparse_ys <span class="op">=</span> arbitrary(sparse_xs)</span>
+<span id="cb35-333"><a href="#cb35-333" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-334"><a href="#cb35-334" aria-hidden="true" tabindex="-1"></a>fig <span class="op">=</span> px.line(x <span class="op">=</span> xs, y <span class="op">=</span> arbitrary(xs))</span>
+<span id="cb35-335"><a href="#cb35-335" aria-hidden="true" tabindex="-1"></a>fig.add_scatter(x <span class="op">=</span> sparse_xs, y <span class="op">=</span> arbitrary(sparse_xs), mode <span class="op">=</span> <span class="st">"markers"</span>)</span>
+<span id="cb35-336"><a href="#cb35-336" aria-hidden="true" tabindex="-1"></a>fig.update_layout(showlegend<span class="op">=</span> <span class="va">False</span>)</span>
+<span id="cb35-337"><a href="#cb35-337" aria-hidden="true" tabindex="-1"></a>fig.update_layout(autosize<span class="op">=</span><span class="va">False</span>, width<span class="op">=</span><span class="dv">800</span>, height<span class="op">=</span><span class="dv">600</span>)</span>
+<span id="cb35-338"><a href="#cb35-338" aria-hidden="true" tabindex="-1"></a>fig.show()</span>
+<span id="cb35-339"><a href="#cb35-339" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb35-340"><a href="#cb35-340" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-341"><a href="#cb35-341" aria-hidden="true" tabindex="-1"></a>This basic approach suffers from three major flaws:</span>
+<span id="cb35-342"><a href="#cb35-342" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-343"><a href="#cb35-343" aria-hidden="true" tabindex="-1"></a><span class="ss">1. </span>If the minimum is outside our range of guesses, the answer will be completely wrong.</span>
+<span id="cb35-344"><a href="#cb35-344" aria-hidden="true" tabindex="-1"></a><span class="ss">2. </span>Even if our range of guesses is correct, if the guesses are too coarse, our answer will be inaccurate.</span>
+<span id="cb35-345"><a href="#cb35-345" aria-hidden="true" tabindex="-1"></a><span class="ss">3. </span>It is *very* computationally inefficient, considering potentially vast numbers of guesses that are useless.</span>
+<span id="cb35-346"><a href="#cb35-346" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-347"><a href="#cb35-347" aria-hidden="true" tabindex="-1"></a><span class="fu">#### `Scipy.optimize.minimize`</span></span>
+<span id="cb35-348"><a href="#cb35-348" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-349"><a href="#cb35-349" aria-hidden="true" tabindex="-1"></a>One way to minimize this mathematical function is to use the <span class="in">`scipy.optimize.minimize`</span> function. It takes a function and a starting guess and tries to find the minimum.</span>
+<span id="cb35-350"><a href="#cb35-350" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-353"><a href="#cb35-353" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb35-354"><a href="#cb35-354" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> scipy.optimize <span class="im">import</span> minimize</span>
+<span id="cb35-355"><a href="#cb35-355" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-356"><a href="#cb35-356" aria-hidden="true" tabindex="-1"></a><span class="co"># takes a function f and a starting point x0 and returns a readout </span></span>
+<span id="cb35-357"><a href="#cb35-357" aria-hidden="true" tabindex="-1"></a><span class="co"># with the optimal input value of x which minimizes f</span></span>
+<span id="cb35-358"><a href="#cb35-358" aria-hidden="true" tabindex="-1"></a>minimize(arbitrary, x0 <span class="op">=</span> <span class="fl">3.5</span>)</span>
+<span id="cb35-359"><a href="#cb35-359" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb35-360"><a href="#cb35-360" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-361"><a href="#cb35-361" aria-hidden="true" tabindex="-1"></a><span class="in">`scipy.optimize.minimize`</span> is great. It may also seem a bit magical. How could you write a function that can find the minimum of any mathematical function? There are a number of ways to do this, which we'll explore in today's lecture, eventually arriving at the important idea of **gradient descent**, which is the principle that <span class="in">`scipy.optimize.minimize`</span> uses.</span>
+<span id="cb35-362"><a href="#cb35-362" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-363"><a href="#cb35-363" aria-hidden="true" tabindex="-1"></a>It turns out that under the hood, the <span class="in">`fit`</span> method for <span class="in">`LinearRegression`</span> models uses gradient descent. Gradient descent is also how much of machine learning works, including even advanced neural network models. </span>
+<span id="cb35-364"><a href="#cb35-364" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-365"><a href="#cb35-365" aria-hidden="true" tabindex="-1"></a>In Data 100, the gradient descent process will usually be invisible to us, hidden beneath an abstraction layer. However, to be good data scientists, it's important that we know the underlying principles that optimization functions harness to find optimal parameters.</span>
+<span id="cb35-366"><a href="#cb35-366" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-367"><a href="#cb35-367" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-368"><a href="#cb35-368" aria-hidden="true" tabindex="-1"></a><span class="fu">#### Digging into Gradient Descent</span></span>
+<span id="cb35-369"><a href="#cb35-369" aria-hidden="true" tabindex="-1"></a>Looking at the function across this domain, it is clear that the function's minimum value occurs around $\theta = 5.3$. Let's pretend for a moment that we *couldn't* see the full view of the cost function. How would we guess the value of $\theta$ that minimizes the function? </span>
+<span id="cb35-370"><a href="#cb35-370" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-371"><a href="#cb35-371" aria-hidden="true" tabindex="-1"></a>It turns out that the first derivative of the function can give us a clue. In the graph below, the function and its derivative are plotted, with points where the derivative is equal to 0 plotted in light green.</span>
+<span id="cb35-372"><a href="#cb35-372" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-375"><a href="#cb35-375" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb35-376"><a href="#cb35-376" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
+<span id="cb35-377"><a href="#cb35-377" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> plotly.graph_objects <span class="im">as</span> go</span>
+<span id="cb35-378"><a href="#cb35-378" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-379"><a href="#cb35-379" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> derivative_arbitrary(x):</span>
+<span id="cb35-380"><a href="#cb35-380" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> (<span class="dv">4</span><span class="op">*</span>x<span class="op">**</span><span class="dv">3</span> <span class="op">-</span> <span class="dv">45</span><span class="op">*</span>x<span class="op">**</span><span class="dv">2</span> <span class="op">+</span> <span class="dv">160</span><span class="op">*</span>x <span class="op">-</span> <span class="dv">180</span>)<span class="op">/</span><span class="dv">10</span></span>
+<span id="cb35-381"><a href="#cb35-381" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-382"><a href="#cb35-382" aria-hidden="true" tabindex="-1"></a>fig <span class="op">=</span> go.Figure()</span>
+<span id="cb35-383"><a href="#cb35-383" aria-hidden="true" tabindex="-1"></a>roots <span class="op">=</span> np.array([<span class="fl">2.3927</span>, <span class="fl">3.5309</span>, <span class="fl">5.3263</span>])</span>
+<span id="cb35-384"><a href="#cb35-384" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-385"><a href="#cb35-385" aria-hidden="true" tabindex="-1"></a>fig.add_trace(go.Scatter(x <span class="op">=</span> xs, y <span class="op">=</span> arbitrary(xs), </span>
+<span id="cb35-386"><a href="#cb35-386" aria-hidden="true" tabindex="-1"></a>                         mode <span class="op">=</span> <span class="st">"lines"</span>, name <span class="op">=</span> <span class="st">"f"</span>))</span>
+<span id="cb35-387"><a href="#cb35-387" aria-hidden="true" tabindex="-1"></a>fig.add_trace(go.Scatter(x <span class="op">=</span> xs, y <span class="op">=</span> derivative_arbitrary(xs), </span>
+<span id="cb35-388"><a href="#cb35-388" aria-hidden="true" tabindex="-1"></a>                         mode <span class="op">=</span> <span class="st">"lines"</span>, name <span class="op">=</span> <span class="st">"df"</span>, line <span class="op">=</span> {<span class="st">"dash"</span>: <span class="st">"dash"</span>}))</span>
+<span id="cb35-389"><a href="#cb35-389" aria-hidden="true" tabindex="-1"></a>fig.add_trace(go.Scatter(x <span class="op">=</span> np.array(roots), y <span class="op">=</span> <span class="dv">0</span><span class="op">*</span>roots, </span>
+<span id="cb35-390"><a href="#cb35-390" aria-hidden="true" tabindex="-1"></a>                         mode <span class="op">=</span> <span class="st">"markers"</span>, name <span class="op">=</span> <span class="st">"df = zero"</span>, marker_size <span class="op">=</span> <span class="dv">12</span>))</span>
+<span id="cb35-391"><a href="#cb35-391" aria-hidden="true" tabindex="-1"></a>fig.update_layout(font_size <span class="op">=</span> <span class="dv">20</span>, yaxis_range<span class="op">=</span>[<span class="op">-</span><span class="dv">1</span>, <span class="dv">3</span>])</span>
+<span id="cb35-392"><a href="#cb35-392" aria-hidden="true" tabindex="-1"></a>fig.update_layout(autosize<span class="op">=</span><span class="va">False</span>, width<span class="op">=</span><span class="dv">800</span>, height<span class="op">=</span><span class="dv">600</span>)</span>
+<span id="cb35-393"><a href="#cb35-393" aria-hidden="true" tabindex="-1"></a>fig.show()</span>
+<span id="cb35-394"><a href="#cb35-394" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb35-395"><a href="#cb35-395" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-396"><a href="#cb35-396" aria-hidden="true" tabindex="-1"></a>In the plots below, the line indicates the value of the derivative of each value of $\theta$. The derivative is negative where it is red and positive where it is green.</span>
+<span id="cb35-397"><a href="#cb35-397" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-398"><a href="#cb35-398" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-399"><a href="#cb35-399" aria-hidden="true" tabindex="-1"></a>Say we make a guess for the minimizing value of $\theta$. Remember that we read plots from left to right, and assume that our starting $\theta$ value is to the left of the optimal $\hat{\theta}$. If the guess "undershoots" the true minimizing value – our guess for $\theta$ is lower than the value of the $\hat{\theta}$ that minimizes the function – the derivative will be **negative**. This means that if we increase $\theta$ (move further to the right), then we **can decrease** our loss function further. If this guess "overshoots" the true minimizing value, the derivative will be positive, implying the converse.</span>
+<span id="cb35-400"><a href="#cb35-400" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-401"><a href="#cb35-401" aria-hidden="true" tabindex="-1"></a>&lt;div align="middle"&gt;</span>
+<span id="cb35-402"><a href="#cb35-402" aria-hidden="true" tabindex="-1"></a>  &lt;table style="width:100%"&gt;</span>
+<span id="cb35-403"><a href="#cb35-403" aria-hidden="true" tabindex="-1"></a>    &lt;tr align="center"&gt;</span>
+<span id="cb35-404"><a href="#cb35-404" aria-hidden="true" tabindex="-1"></a>      &lt;td&gt;&lt;img src="images/step.png" alt='step' width='600'&gt;</span>
+<span id="cb35-405"><a href="#cb35-405" aria-hidden="true" tabindex="-1"></a>      &lt;/td&gt;</span>
+<span id="cb35-406"><a href="#cb35-406" aria-hidden="true" tabindex="-1"></a>    &lt;/tr&gt;</span>
+<span id="cb35-407"><a href="#cb35-407" aria-hidden="true" tabindex="-1"></a>  &lt;/table&gt;</span>
+<span id="cb35-408"><a href="#cb35-408" aria-hidden="true" tabindex="-1"></a>&lt;/div&gt;</span>
+<span id="cb35-409"><a href="#cb35-409" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-410"><a href="#cb35-410" aria-hidden="true" tabindex="-1"></a>We can use this pattern to help formulate our next guess for the optimal $\hat{\theta}$. Consider the case where we've undershot $\theta$ by guessing too low of a value. We'll want our next guess to be greater in value than our previous guess – that is, we want to shift our guess to the right. You can think of this as following the slope "downhill" to the function's minimum value.</span>
+<span id="cb35-411"><a href="#cb35-411" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-412"><a href="#cb35-412" aria-hidden="true" tabindex="-1"></a>&lt;div align="middle"&gt;</span>
+<span id="cb35-413"><a href="#cb35-413" aria-hidden="true" tabindex="-1"></a>  &lt;table style="width:100%"&gt;</span>
+<span id="cb35-414"><a href="#cb35-414" aria-hidden="true" tabindex="-1"></a>    &lt;tr align="center"&gt;</span>
+<span id="cb35-415"><a href="#cb35-415" aria-hidden="true" tabindex="-1"></a>      &lt;td&gt;&lt;img src="images/neg_step.png" alt='neg_step' width='600'&gt;</span>
+<span id="cb35-416"><a href="#cb35-416" aria-hidden="true" tabindex="-1"></a>      &lt;/td&gt;</span>
+<span id="cb35-417"><a href="#cb35-417" aria-hidden="true" tabindex="-1"></a>    &lt;/tr&gt;</span>
+<span id="cb35-418"><a href="#cb35-418" aria-hidden="true" tabindex="-1"></a>  &lt;/table&gt;</span>
+<span id="cb35-419"><a href="#cb35-419" aria-hidden="true" tabindex="-1"></a>&lt;/div&gt;</span>
+<span id="cb35-420"><a href="#cb35-420" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-421"><a href="#cb35-421" aria-hidden="true" tabindex="-1"></a>If we've overshot $\hat{\theta}$ by guessing too high of a value, we'll want our next guess to be lower in value – we want to shift our guess for $\hat{\theta}$ to the left. </span>
+<span id="cb35-422"><a href="#cb35-422" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-423"><a href="#cb35-423" aria-hidden="true" tabindex="-1"></a>&lt;div align="middle"&gt;</span>
+<span id="cb35-424"><a href="#cb35-424" aria-hidden="true" tabindex="-1"></a>  &lt;table style="width:100%"&gt;</span>
+<span id="cb35-425"><a href="#cb35-425" aria-hidden="true" tabindex="-1"></a>    &lt;tr align="center"&gt;</span>
+<span id="cb35-426"><a href="#cb35-426" aria-hidden="true" tabindex="-1"></a>      &lt;td&gt;&lt;img src="images/pos_step.png" alt='pos_step' width='600'&gt;</span>
+<span id="cb35-427"><a href="#cb35-427" aria-hidden="true" tabindex="-1"></a>      &lt;/td&gt;</span>
+<span id="cb35-428"><a href="#cb35-428" aria-hidden="true" tabindex="-1"></a>    &lt;/tr&gt;</span>
+<span id="cb35-429"><a href="#cb35-429" aria-hidden="true" tabindex="-1"></a>  &lt;/table&gt;</span>
+<span id="cb35-430"><a href="#cb35-430" aria-hidden="true" tabindex="-1"></a>&lt;/div&gt;</span>
+<span id="cb35-431"><a href="#cb35-431" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-432"><a href="#cb35-432" aria-hidden="true" tabindex="-1"></a>In other words, the derivative of the function at each point tells us the direction of our next guess.</span>
+<span id="cb35-433"><a href="#cb35-433" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-434"><a href="#cb35-434" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>A negative slope means we want to step to the right, or move in the *positive* direction. </span>
+<span id="cb35-435"><a href="#cb35-435" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>A positive slope means we want to step to the left, or move in the *negative* direction.</span>
+<span id="cb35-436"><a href="#cb35-436" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-437"><a href="#cb35-437" aria-hidden="true" tabindex="-1"></a><span class="fu">#### Algorithm Attempt 1</span></span>
+<span id="cb35-438"><a href="#cb35-438" aria-hidden="true" tabindex="-1"></a>Armed with this knowledge, let's try to see if we can use the derivative to optimize the function.</span>
+<span id="cb35-439"><a href="#cb35-439" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-440"><a href="#cb35-440" aria-hidden="true" tabindex="-1"></a>We start by making some guess for the minimizing value of $x$. Then, we look at the derivative of the function at this value of $x$, and step downhill in the *opposite* direction. We can express our new rule as a recurrence relation:</span>
+<span id="cb35-441"><a href="#cb35-441" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-442"><a href="#cb35-442" aria-hidden="true" tabindex="-1"></a>$$x^{(t+1)} = x^{(t)} - \frac{d}{dx} f(x^{(t)})$$</span>
+<span id="cb35-443"><a href="#cb35-443" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-444"><a href="#cb35-444" aria-hidden="true" tabindex="-1"></a>Translating this statement into English: we obtain **our next guess** for the minimizing value of $x$ at timestep $t+1$ ($x^{(t+1)}$) by taking **our last guess** ($x^{(t)}$) and subtracting the **derivative of the function** at that point ($\frac{d}{dx} f(x^{(t)})$).</span>
+<span id="cb35-445"><a href="#cb35-445" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-446"><a href="#cb35-446" aria-hidden="true" tabindex="-1"></a>A few steps are shown below, where the old step is shown as a transparent point, and the next step taken is the green-filled dot.</span>
+<span id="cb35-447"><a href="#cb35-447" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-448"><a href="#cb35-448" aria-hidden="true" tabindex="-1"></a>&lt;div align="middle"&gt;</span>
+<span id="cb35-449"><a href="#cb35-449" aria-hidden="true" tabindex="-1"></a>  &lt;table style="width:100%"&gt;</span>
+<span id="cb35-450"><a href="#cb35-450" aria-hidden="true" tabindex="-1"></a>    &lt;tr align="center"&gt;</span>
+<span id="cb35-451"><a href="#cb35-451" aria-hidden="true" tabindex="-1"></a>      &lt;td&gt;&lt;img src="images/grad_descent_1.png" alt='grad_descent_2' width='800'&gt;</span>
+<span id="cb35-452"><a href="#cb35-452" aria-hidden="true" tabindex="-1"></a>      &lt;/td&gt;</span>
+<span id="cb35-453"><a href="#cb35-453" aria-hidden="true" tabindex="-1"></a>    &lt;/tr&gt;</span>
+<span id="cb35-454"><a href="#cb35-454" aria-hidden="true" tabindex="-1"></a>  &lt;/table&gt;</span>
+<span id="cb35-455"><a href="#cb35-455" aria-hidden="true" tabindex="-1"></a>&lt;/div&gt;</span>
+<span id="cb35-456"><a href="#cb35-456" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-457"><a href="#cb35-457" aria-hidden="true" tabindex="-1"></a>Looking pretty good! We do have a problem though – once we arrive close to the minimum value of the function, our guesses "bounce" back and forth past the minimum without ever reaching it.</span>
+<span id="cb35-458"><a href="#cb35-458" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-459"><a href="#cb35-459" aria-hidden="true" tabindex="-1"></a>&lt;div align="middle"&gt;</span>
+<span id="cb35-460"><a href="#cb35-460" aria-hidden="true" tabindex="-1"></a>  &lt;table style="width:100%"&gt;</span>
+<span id="cb35-461"><a href="#cb35-461" aria-hidden="true" tabindex="-1"></a>    &lt;tr align="center"&gt;</span>
+<span id="cb35-462"><a href="#cb35-462" aria-hidden="true" tabindex="-1"></a>      &lt;td&gt;&lt;img src="images/grad_descent_2.png" alt='grad_descent_2' width='500'&gt;</span>
+<span id="cb35-463"><a href="#cb35-463" aria-hidden="true" tabindex="-1"></a>      &lt;/td&gt;</span>
+<span id="cb35-464"><a href="#cb35-464" aria-hidden="true" tabindex="-1"></a>    &lt;/tr&gt;</span>
+<span id="cb35-465"><a href="#cb35-465" aria-hidden="true" tabindex="-1"></a>  &lt;/table&gt;</span>
+<span id="cb35-466"><a href="#cb35-466" aria-hidden="true" tabindex="-1"></a>&lt;/div&gt;</span>
+<span id="cb35-467"><a href="#cb35-467" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-468"><a href="#cb35-468" aria-hidden="true" tabindex="-1"></a>In other words, each step we take when updating our guess moves us too far. We can address this by decreasing the size of each step. </span>
+<span id="cb35-469"><a href="#cb35-469" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-470"><a href="#cb35-470" aria-hidden="true" tabindex="-1"></a><span class="fu">#### Algorithm Attempt 2</span></span>
+<span id="cb35-471"><a href="#cb35-471" aria-hidden="true" tabindex="-1"></a>Let's update our algorithm to use a **learning rate** (also sometimes called the step size), which controls how far we move with each update. We represent the learning rate with $\alpha$. </span>
+<span id="cb35-472"><a href="#cb35-472" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-473"><a href="#cb35-473" aria-hidden="true" tabindex="-1"></a>$$x^{(t+1)} = x^{(t)} - \alpha \frac{d}{dx} f(x^{(t)})$$</span>
+<span id="cb35-474"><a href="#cb35-474" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-475"><a href="#cb35-475" aria-hidden="true" tabindex="-1"></a>A small $\alpha$ means that we will take small steps; a large $\alpha$ means we will take large steps. When do we stop updating? We stop updating either after a fixed number of updates or after a subsequent update doesn't change much.</span>
+<span id="cb35-476"><a href="#cb35-476" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-477"><a href="#cb35-477" aria-hidden="true" tabindex="-1"></a>Updating our function to use $\alpha=0.3$, our algorithm successfully **converges** (settles on a solution and stops updating significantly, or at all) on the minimum value.</span>
+<span id="cb35-478"><a href="#cb35-478" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-479"><a href="#cb35-479" aria-hidden="true" tabindex="-1"></a>&lt;div align="middle"&gt;</span>
+<span id="cb35-480"><a href="#cb35-480" aria-hidden="true" tabindex="-1"></a>  &lt;table style="width:100%"&gt;</span>
+<span id="cb35-481"><a href="#cb35-481" aria-hidden="true" tabindex="-1"></a>    &lt;tr align="center"&gt;</span>
+<span id="cb35-482"><a href="#cb35-482" aria-hidden="true" tabindex="-1"></a>      &lt;td&gt;&lt;img src="images/grad_descent_3.png" alt='grad_descent_3' width='500'&gt;</span>
+<span id="cb35-483"><a href="#cb35-483" aria-hidden="true" tabindex="-1"></a>      &lt;/td&gt;</span>
+<span id="cb35-484"><a href="#cb35-484" aria-hidden="true" tabindex="-1"></a>    &lt;/tr&gt;</span>
+<span id="cb35-485"><a href="#cb35-485" aria-hidden="true" tabindex="-1"></a>  &lt;/table&gt;</span>
+<span id="cb35-486"><a href="#cb35-486" aria-hidden="true" tabindex="-1"></a>&lt;/div&gt;</span>
+<span id="cb35-487"><a href="#cb35-487" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-488"><a href="#cb35-488" aria-hidden="true" tabindex="-1"></a><span class="fu">### Convexity</span></span>
+<span id="cb35-489"><a href="#cb35-489" aria-hidden="true" tabindex="-1"></a>In our analysis above, we focused our attention on the global minimum of the loss function. You may be wondering: what about the local minimum that's just to the left? </span>
+<span id="cb35-490"><a href="#cb35-490" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-491"><a href="#cb35-491" aria-hidden="true" tabindex="-1"></a>If we had chosen a different starting guess for $\theta$, or a different value for the learning rate $\alpha$, our algorithm may have gotten "stuck" and converged on the local minimum, rather than on the true optimum value of loss. </span>
+<span id="cb35-492"><a href="#cb35-492" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-493"><a href="#cb35-493" aria-hidden="true" tabindex="-1"></a>&lt;div align="middle"&gt;</span>
+<span id="cb35-494"><a href="#cb35-494" aria-hidden="true" tabindex="-1"></a>  &lt;table style="width:100%"&gt;</span>
+<span id="cb35-495"><a href="#cb35-495" aria-hidden="true" tabindex="-1"></a>    &lt;tr align="center"&gt;</span>
+<span id="cb35-496"><a href="#cb35-496" aria-hidden="true" tabindex="-1"></a>      &lt;td&gt;&lt;img src="images/local.png" alt='local' width='600'&gt;</span>
+<span id="cb35-497"><a href="#cb35-497" aria-hidden="true" tabindex="-1"></a>      &lt;/td&gt;</span>
+<span id="cb35-498"><a href="#cb35-498" aria-hidden="true" tabindex="-1"></a>    &lt;/tr&gt;</span>
+<span id="cb35-499"><a href="#cb35-499" aria-hidden="true" tabindex="-1"></a>  &lt;/table&gt;</span>
+<span id="cb35-500"><a href="#cb35-500" aria-hidden="true" tabindex="-1"></a>&lt;/div&gt;</span>
+<span id="cb35-501"><a href="#cb35-501" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-502"><a href="#cb35-502" aria-hidden="true" tabindex="-1"></a>If the loss function is **convex**, gradient descent is guaranteed to converge and find the global minimum of the objective function. Formally, a function $f$ is convex if:</span>
+<span id="cb35-503"><a href="#cb35-503" aria-hidden="true" tabindex="-1"></a>$$tf(a) + (1-t)f(b) \geq f(ta + (1-t)b)$$</span>
+<span id="cb35-504"><a href="#cb35-504" aria-hidden="true" tabindex="-1"></a>for all $a, b$ in the domain of $f$ and $t \in <span class="co">[</span><span class="ot">0, 1</span><span class="co">]</span>$.</span>
+<span id="cb35-505"><a href="#cb35-505" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-506"><a href="#cb35-506" aria-hidden="true" tabindex="-1"></a>To put this into words: if you drew a line between any two points on the curve, all values on the curve must be *on or below* the line. Importantly, any local minimum of a convex function is also its global minimum so we avoid the situation where the algorithm converges on some critical point that is not the minimum of the function.</span>
+<span id="cb35-507"><a href="#cb35-507" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-508"><a href="#cb35-508" aria-hidden="true" tabindex="-1"></a>&lt;div align="middle"&gt;</span>
+<span id="cb35-509"><a href="#cb35-509" aria-hidden="true" tabindex="-1"></a>  &lt;table style="width:100%"&gt;</span>
+<span id="cb35-510"><a href="#cb35-510" aria-hidden="true" tabindex="-1"></a>    &lt;tr align="center"&gt;</span>
+<span id="cb35-511"><a href="#cb35-511" aria-hidden="true" tabindex="-1"></a>      &lt;td&gt;&lt;img src="images/convex.png" alt='convex' width='600'&gt;</span>
+<span id="cb35-512"><a href="#cb35-512" aria-hidden="true" tabindex="-1"></a>      &lt;/td&gt;</span>
+<span id="cb35-513"><a href="#cb35-513" aria-hidden="true" tabindex="-1"></a>    &lt;/tr&gt;</span>
+<span id="cb35-514"><a href="#cb35-514" aria-hidden="true" tabindex="-1"></a>  &lt;/table&gt;</span>
+<span id="cb35-515"><a href="#cb35-515" aria-hidden="true" tabindex="-1"></a>&lt;/div&gt;</span>
+<span id="cb35-516"><a href="#cb35-516" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-517"><a href="#cb35-517" aria-hidden="true" tabindex="-1"></a>In summary, non-convex loss functions can cause problems with optimization. This means that our choice of loss function is a key factor in our modeling process. It turns out that MSE *is* convex, which is a major reason why it is such a popular choice of loss function. Gradient descent is only guaranteed to converge (given enough iterations and an appropriate step size) for convex functions.</span>
+<span id="cb35-518"><a href="#cb35-518" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-519"><a href="#cb35-519" aria-hidden="true" tabindex="-1"></a><span class="fu">### Gradient Descent in 1 Dimension</span></span>
+<span id="cb35-520"><a href="#cb35-520" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-521"><a href="#cb35-521" aria-hidden="true" tabindex="-1"></a><span class="at">&gt; **Terminology clarification**: In past lectures, we have used “loss” to refer to the error incurred on a *single* datapoint. In applications, we usually care more about the average error across *all* datapoints. Going forward, we will take the “model’s loss” to mean the model’s average error across the dataset. This is sometimes also known as the empirical risk (R), cost function, or objective function. $$L(\theta) = R(\theta) = \frac{1}{n} \sum_{i=1}^{n} L(y, \hat{y})$$</span></span>
+<span id="cb35-522"><a href="#cb35-522" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-523"><a href="#cb35-523" aria-hidden="true" tabindex="-1"></a>In our discussion above, we worked with some arbitrary function $f$. As data scientists, we will almost always work with gradient descent in the context of optimizing *models* – specifically, we want to apply gradient descent to find the minimum of a *loss function*. In a modeling context, our goal is to minimize a loss function by choosing the minimizing model *parameters*.</span>
+<span id="cb35-524"><a href="#cb35-524" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-525"><a href="#cb35-525" aria-hidden="true" tabindex="-1"></a>Recall our modeling workflow from the past few lectures: </span>
+<span id="cb35-526"><a href="#cb35-526" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-527"><a href="#cb35-527" aria-hidden="true" tabindex="-1"></a><span class="ss">1. </span>Define a model with some parameters $\theta_i$</span>
+<span id="cb35-528"><a href="#cb35-528" aria-hidden="true" tabindex="-1"></a><span class="ss">2. </span>Choose a loss function </span>
+<span id="cb35-529"><a href="#cb35-529" aria-hidden="true" tabindex="-1"></a><span class="ss">3. </span>Select the values of $\theta_i$ that minimize the loss function on the data</span>
+<span id="cb35-530"><a href="#cb35-530" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-531"><a href="#cb35-531" aria-hidden="true" tabindex="-1"></a>Gradient descent is a powerful technique for completing this last task. By applying the gradient descent algorithm, we can select values for our parameters $\theta_i$ that will lead to the model having minimal loss on the training data.</span>
+<span id="cb35-532"><a href="#cb35-532" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-533"><a href="#cb35-533" aria-hidden="true" tabindex="-1"></a>When using gradient descent in a modeling context, we:</span>
+<span id="cb35-534"><a href="#cb35-534" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-535"><a href="#cb35-535" aria-hidden="true" tabindex="-1"></a><span class="ss">1. </span>Make guesses for the minimizing $\theta_i$</span>
+<span id="cb35-536"><a href="#cb35-536" aria-hidden="true" tabindex="-1"></a><span class="ss">2. </span>Compute the derivative of the loss function $L$</span>
+<span id="cb35-537"><a href="#cb35-537" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-538"><a href="#cb35-538" aria-hidden="true" tabindex="-1"></a>We can "translate" our gradient descent rule from before by replacing $x$ with $\theta$ and $f$ with $L$:</span>
+<span id="cb35-539"><a href="#cb35-539" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-540"><a href="#cb35-540" aria-hidden="true" tabindex="-1"></a>$$\theta^{(t+1)} = \theta^{(t)} - \alpha \frac{d}{d\theta} L(\theta^{(t)})$$</span>
+<span id="cb35-541"><a href="#cb35-541" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-542"><a href="#cb35-542" aria-hidden="true" tabindex="-1"></a><span class="fu">#### Gradient Descent on the `tips` Dataset </span></span>
+<span id="cb35-543"><a href="#cb35-543" aria-hidden="true" tabindex="-1"></a>To see this in action, let's consider a case where we have a linear model with no offset. We want to predict the tip (y) given the price of a meal (x). To do this, we</span>
+<span id="cb35-544"><a href="#cb35-544" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-545"><a href="#cb35-545" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Choose a model: $\hat{y} = \theta_1 x$,</span>
+<span id="cb35-546"><a href="#cb35-546" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Choose a loss function: $L(\theta) = MSE(\theta) = \frac{1}{n} \sum_{i=1}^n (y_i - \theta_1x_i)^2$.</span>
+<span id="cb35-547"><a href="#cb35-547" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-548"><a href="#cb35-548" aria-hidden="true" tabindex="-1"></a>Let's apply our <span class="in">`gradient_descent`</span> function from before to optimize our model on the <span class="in">`tips`</span> dataset. We will try to select the best parameter $\theta_i$ to predict the <span class="in">`tip`</span> $y$ from the <span class="in">`total_bill`</span> $x$.</span>
+<span id="cb35-549"><a href="#cb35-549" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-552"><a href="#cb35-552" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb35-553"><a href="#cb35-553" aria-hidden="true" tabindex="-1"></a>df <span class="op">=</span> sns.load_dataset(<span class="st">"tips"</span>)</span>
+<span id="cb35-554"><a href="#cb35-554" aria-hidden="true" tabindex="-1"></a>df.head()</span>
+<span id="cb35-555"><a href="#cb35-555" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb35-556"><a href="#cb35-556" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-557"><a href="#cb35-557" aria-hidden="true" tabindex="-1"></a>We can visualize the value of the MSE on our dataset for different possible choices of $\theta_1$. To optimize our model, we want to select the value of $\theta_1$ that leads to the lowest MSE.</span>
+<span id="cb35-558"><a href="#cb35-558" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-559"><a href="#cb35-559" aria-hidden="true" tabindex="-1"></a>To apply gradient descent, we need to compute the derivative of the loss function with respect to our parameter $\theta_1$.</span>
+<span id="cb35-560"><a href="#cb35-560" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-561"><a href="#cb35-561" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Given our loss function, $$L(\theta) = MSE(\theta) = \frac{1}{n} \sum_{i=1}^n (y_i - \theta_1x_i)^2$$</span>
+<span id="cb35-562"><a href="#cb35-562" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>We take the derivative with respect to $\theta_1$ $$\frac{\partial}{\partial \theta_{1}} L(\theta_1^{(t)}) = \frac{-2}{n} \sum_{i=1}^n (y_i - \theta_1^{(t)} x_i) x_i$$</span>
+<span id="cb35-563"><a href="#cb35-563" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Which results in the gradient descent update rule</span>
+<span id="cb35-564"><a href="#cb35-564" aria-hidden="true" tabindex="-1"></a>$$\theta_1^{(t+1)} = \theta_1^{(t)} - \alpha \frac{d}{d\theta}L(\theta_1^{(t)})$$</span>
+<span id="cb35-565"><a href="#cb35-565" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-566"><a href="#cb35-566" aria-hidden="true" tabindex="-1"></a>for some learning rate $\alpha$.</span>
+<span id="cb35-567"><a href="#cb35-567" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-568"><a href="#cb35-568" aria-hidden="true" tabindex="-1"></a>Implementing this in code, we can visualize the MSE loss on the <span class="in">`tips`</span> data. **MSE is convex**, so there is one global minimum.</span>
+<span id="cb35-569"><a href="#cb35-569" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-572"><a href="#cb35-572" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb35-573"><a href="#cb35-573" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
+<span id="cb35-574"><a href="#cb35-574" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> gradient_descent(df, initial_guess, alpha, n):</span>
+<span id="cb35-575"><a href="#cb35-575" aria-hidden="true" tabindex="-1"></a>    <span class="co">"""Performs n steps of gradient descent on df using learning rate alpha starting</span></span>
+<span id="cb35-576"><a href="#cb35-576" aria-hidden="true" tabindex="-1"></a><span class="co">       from initial_guess. Returns a numpy array of all guesses over time."""</span></span>
+<span id="cb35-577"><a href="#cb35-577" aria-hidden="true" tabindex="-1"></a>    guesses <span class="op">=</span> [initial_guess]</span>
+<span id="cb35-578"><a href="#cb35-578" aria-hidden="true" tabindex="-1"></a>    current_guess <span class="op">=</span> initial_guess</span>
+<span id="cb35-579"><a href="#cb35-579" aria-hidden="true" tabindex="-1"></a>    <span class="cf">while</span> <span class="bu">len</span>(guesses) <span class="op">&lt;</span> n:</span>
+<span id="cb35-580"><a href="#cb35-580" aria-hidden="true" tabindex="-1"></a>        current_guess <span class="op">=</span> current_guess <span class="op">-</span> alpha <span class="op">*</span> df(current_guess)</span>
+<span id="cb35-581"><a href="#cb35-581" aria-hidden="true" tabindex="-1"></a>        guesses.append(current_guess)</span>
+<span id="cb35-582"><a href="#cb35-582" aria-hidden="true" tabindex="-1"></a>        </span>
+<span id="cb35-583"><a href="#cb35-583" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> np.array(guesses)</span>
+<span id="cb35-584"><a href="#cb35-584" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-585"><a href="#cb35-585" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> mse_single_arg(theta_1):</span>
+<span id="cb35-586"><a href="#cb35-586" aria-hidden="true" tabindex="-1"></a>    <span class="co">"""Returns the MSE on our data for the given theta1"""</span></span>
+<span id="cb35-587"><a href="#cb35-587" aria-hidden="true" tabindex="-1"></a>    x <span class="op">=</span> df[<span class="st">"total_bill"</span>]</span>
+<span id="cb35-588"><a href="#cb35-588" aria-hidden="true" tabindex="-1"></a>    y_obs <span class="op">=</span> df[<span class="st">"tip"</span>]</span>
+<span id="cb35-589"><a href="#cb35-589" aria-hidden="true" tabindex="-1"></a>    y_hat <span class="op">=</span> theta_1 <span class="op">*</span> x</span>
+<span id="cb35-590"><a href="#cb35-590" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> np.mean((y_hat <span class="op">-</span> y_obs) <span class="op">**</span> <span class="dv">2</span>)</span>
+<span id="cb35-591"><a href="#cb35-591" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-592"><a href="#cb35-592" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> mse_loss_derivative_single_arg(theta_1):</span>
+<span id="cb35-593"><a href="#cb35-593" aria-hidden="true" tabindex="-1"></a>    <span class="co">"""Returns the derivative of the MSE on our data for the given theta1"""</span></span>
+<span id="cb35-594"><a href="#cb35-594" aria-hidden="true" tabindex="-1"></a>    x <span class="op">=</span> df[<span class="st">"total_bill"</span>]</span>
+<span id="cb35-595"><a href="#cb35-595" aria-hidden="true" tabindex="-1"></a>    y_obs <span class="op">=</span> df[<span class="st">"tip"</span>]</span>
+<span id="cb35-596"><a href="#cb35-596" aria-hidden="true" tabindex="-1"></a>    y_hat <span class="op">=</span> theta_1 <span class="op">*</span> x</span>
+<span id="cb35-597"><a href="#cb35-597" aria-hidden="true" tabindex="-1"></a>    </span>
+<span id="cb35-598"><a href="#cb35-598" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> np.mean(<span class="dv">2</span> <span class="op">*</span> (y_hat <span class="op">-</span> y_obs) <span class="op">*</span> x)</span>
+<span id="cb35-599"><a href="#cb35-599" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-600"><a href="#cb35-600" aria-hidden="true" tabindex="-1"></a>loss_df <span class="op">=</span> pd.DataFrame({<span class="st">"theta_1"</span>:np.linspace(<span class="op">-</span><span class="fl">1.5</span>, <span class="dv">1</span>), <span class="st">"MSE"</span>:[mse_single_arg(theta_1) <span class="cf">for</span> theta_1 <span class="kw">in</span> np.linspace(<span class="op">-</span><span class="fl">1.5</span>, <span class="dv">1</span>)]})</span>
+<span id="cb35-601"><a href="#cb35-601" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-602"><a href="#cb35-602" aria-hidden="true" tabindex="-1"></a>trajectory <span class="op">=</span> gradient_descent(mse_loss_derivative_single_arg, <span class="op">-</span><span class="fl">0.5</span>, <span class="fl">0.0001</span>, <span class="dv">100</span>)</span>
+<span id="cb35-603"><a href="#cb35-603" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-604"><a href="#cb35-604" aria-hidden="true" tabindex="-1"></a>plt.plot(loss_df[<span class="st">"theta_1"</span>], loss_df[<span class="st">"MSE"</span>])</span>
+<span id="cb35-605"><a href="#cb35-605" aria-hidden="true" tabindex="-1"></a>plt.scatter(trajectory, [mse_single_arg(guess) <span class="cf">for</span> guess <span class="kw">in</span> trajectory], c<span class="op">=</span><span class="st">"white"</span>, edgecolor<span class="op">=</span><span class="st">"firebrick"</span>)</span>
+<span id="cb35-606"><a href="#cb35-606" aria-hidden="true" tabindex="-1"></a>plt.scatter(trajectory[<span class="op">-</span><span class="dv">1</span>], mse_single_arg(trajectory[<span class="op">-</span><span class="dv">1</span>]), c<span class="op">=</span><span class="st">"firebrick"</span>)</span>
+<span id="cb35-607"><a href="#cb35-607" aria-hidden="true" tabindex="-1"></a>plt.xlabel(<span class="vs">r"$\theta_1$"</span>)</span>
+<span id="cb35-608"><a href="#cb35-608" aria-hidden="true" tabindex="-1"></a>plt.ylabel(<span class="vs">r"$L(\theta_1)$"</span>)<span class="op">;</span></span>
+<span id="cb35-609"><a href="#cb35-609" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-610"><a href="#cb35-610" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="ss">f"Final guess for theta_1: </span><span class="sc">{</span>trajectory[<span class="op">-</span><span class="dv">1</span>]<span class="sc">}</span><span class="ss">"</span>)</span>
+<span id="cb35-611"><a href="#cb35-611" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+</code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div></div></div></div></div>
+</div> <!-- /content -->
+
+
+
+
+</body></html>
\ No newline at end of file
diff --git a/docs/gradient_descent/gradient_descent_files/figure-html/cell-21-output-2.png b/docs/gradient_descent/gradient_descent_files/figure-html/cell-21-output-2.png
new file mode 100644
index 000000000..6d65a353e
Binary files /dev/null and b/docs/gradient_descent/gradient_descent_files/figure-html/cell-21-output-2.png differ
diff --git a/docs/gradient_descent/gradient_descent_files/figure-pdf/cell-21-output-2.pdf b/docs/gradient_descent/gradient_descent_files/figure-pdf/cell-21-output-2.pdf
new file mode 100644
index 000000000..ceab56787
Binary files /dev/null and b/docs/gradient_descent/gradient_descent_files/figure-pdf/cell-21-output-2.pdf differ
diff --git a/docs/gradient_descent/images/arbitrary.png b/docs/gradient_descent/images/arbitrary.png
new file mode 100644
index 000000000..06bb4fb69
Binary files /dev/null and b/docs/gradient_descent/images/arbitrary.png differ
diff --git a/docs/gradient_descent/images/convex.png b/docs/gradient_descent/images/convex.png
new file mode 100644
index 000000000..72bf6a47a
Binary files /dev/null and b/docs/gradient_descent/images/convex.png differ
diff --git a/docs/gradient_descent/images/grad_descent_1.png b/docs/gradient_descent/images/grad_descent_1.png
new file mode 100644
index 000000000..8361821fe
Binary files /dev/null and b/docs/gradient_descent/images/grad_descent_1.png differ
diff --git a/docs/gradient_descent/images/grad_descent_2.png b/docs/gradient_descent/images/grad_descent_2.png
new file mode 100644
index 000000000..9c320b2a8
Binary files /dev/null and b/docs/gradient_descent/images/grad_descent_2.png differ
diff --git a/docs/gradient_descent/images/grad_descent_3.png b/docs/gradient_descent/images/grad_descent_3.png
new file mode 100644
index 000000000..a93a9f67a
Binary files /dev/null and b/docs/gradient_descent/images/grad_descent_3.png differ
diff --git a/docs/gradient_descent/images/local.png b/docs/gradient_descent/images/local.png
new file mode 100644
index 000000000..d753299ad
Binary files /dev/null and b/docs/gradient_descent/images/local.png differ
diff --git a/docs/gradient_descent/images/neg_step.png b/docs/gradient_descent/images/neg_step.png
new file mode 100644
index 000000000..92b4d0e6c
Binary files /dev/null and b/docs/gradient_descent/images/neg_step.png differ
diff --git a/docs/gradient_descent/images/pos_step.png b/docs/gradient_descent/images/pos_step.png
new file mode 100644
index 000000000..61f9ccd84
Binary files /dev/null and b/docs/gradient_descent/images/pos_step.png differ
diff --git a/docs/gradient_descent/images/step.png b/docs/gradient_descent/images/step.png
new file mode 100644
index 000000000..712933064
Binary files /dev/null and b/docs/gradient_descent/images/step.png differ
diff --git a/docs/index.html b/docs/index.html
index 33c8fe308..3ab4443fd 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -200,6 +200,12 @@
   <a href="./ols/ols.html" class="sidebar-item-text sidebar-link">
  <span class="menu-text"><span class="chapter-number">12</span>&nbsp; <span class="chapter-title">Ordinary Least Squares</span></span></a>
   </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./gradient_descent/gradient_descent.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">13</span>&nbsp; <span class="chapter-title">sklearn and Gradient Descent</span></span></a>
+  </div>
 </li>
     </ul>
     </div>
diff --git a/docs/intro_lec/introduction.html b/docs/intro_lec/introduction.html
index 7ff8f9928..d809c7c56 100644
--- a/docs/intro_lec/introduction.html
+++ b/docs/intro_lec/introduction.html
@@ -189,6 +189,12 @@
   <a href="../ols/ols.html" class="sidebar-item-text sidebar-link">
  <span class="menu-text"><span class="chapter-number">12</span>&nbsp; <span class="chapter-title">Ordinary Least Squares</span></span></a>
   </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../gradient_descent/gradient_descent.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">13</span>&nbsp; <span class="chapter-title">sklearn and Gradient Descent</span></span></a>
+  </div>
 </li>
     </ul>
     </div>
diff --git a/docs/intro_to_modeling/intro_to_modeling.html b/docs/intro_to_modeling/intro_to_modeling.html
index 01f9ead1b..f3969dc48 100644
--- a/docs/intro_to_modeling/intro_to_modeling.html
+++ b/docs/intro_to_modeling/intro_to_modeling.html
@@ -252,6 +252,12 @@
   <a href="../ols/ols.html" class="sidebar-item-text sidebar-link">
  <span class="menu-text"><span class="chapter-number">12</span>&nbsp; <span class="chapter-title">Ordinary Least Squares</span></span></a>
   </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../gradient_descent/gradient_descent.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">13</span>&nbsp; <span class="chapter-title">sklearn and Gradient Descent</span></span></a>
+  </div>
 </li>
     </ul>
     </div>
@@ -400,7 +406,7 @@ <h2 data-number="10.2" class="anchored" data-anchor-id="simple-linear-regression
 <li><span class="math inline">\(\text{regression estimate} = y\text{-intercept} + \text{slope}\cdot\text{}x\)</span></li>
 <li><span class="math inline">\(\text{residual} =\text{observed }y - \text{regression estimate}\)</span></li>
 </ul>
-<div id="2066597d" class="cell" data-execution_count="1">
+<div id="b7cd16f7" class="cell" data-execution_count="1">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span>
@@ -447,7 +453,7 @@ <h4 data-number="10.2.1.2" class="anchored" data-anchor-id="correlation"><span c
 <li>Correlations range between -1 and 1: <span class="math inline">\(|r| \leq 1\)</span>, with <span class="math inline">\(r=1\)</span> indicating perfect positive linear association, and <span class="math inline">\(r=-1\)</span> indicating perfect negative association. The closer <span class="math inline">\(r\)</span> is to <span class="math inline">\(0\)</span>, the weaker the linear association is.</li>
 <li>Correlation says nothing about causation and non-linear association. Correlation does <strong>not</strong> imply causation. When <span class="math inline">\(r = 0\)</span>, the two variables are uncorrelated. However, they could still be related through some non-linear relationship.</li>
 </ol>
-<div id="a2909d4c" class="cell" data-execution_count="2">
+<div id="2c405f52" class="cell" data-execution_count="2">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> plot_and_get_corr(ax, x, y, title):</span>
@@ -671,7 +677,7 @@ <h2 data-number="10.7" class="anchored" data-anchor-id="evaluating-the-slr-model
 <section id="four-mysterious-datasets-anscombes-quartet" class="level3" data-number="10.7.1">
 <h3 data-number="10.7.1" class="anchored" data-anchor-id="four-mysterious-datasets-anscombes-quartet"><span class="header-section-number">10.7.1</span> Four Mysterious Datasets (Anscombe’s quartet)</h3>
 <p>Let’s take a look at four different datasets.</p>
-<div id="f9566ccf" class="cell" data-execution_count="3">
+<div id="5b9b828f" class="cell" data-execution_count="3">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
@@ -683,7 +689,7 @@ <h3 data-number="10.7.1" class="anchored" data-anchor-id="four-mysterious-datase
 <span id="cb3-7"><a href="#cb3-7" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> mpl_toolkits.mplot3d <span class="im">import</span> Axes3D</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 </div>
-<div id="e5e1838f" class="cell" data-execution_count="4">
+<div id="f80fb592" class="cell" data-execution_count="4">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Big font helper</span></span>
@@ -737,7 +743,7 @@ <h3 data-number="10.7.1" class="anchored" data-anchor-id="four-mysterious-datase
 <span id="cb4-49"><a href="#cb4-49" aria-hidden="true" tabindex="-1"></a>plt.style.use(<span class="st">"default"</span>)  <span class="co"># Revert style to default mpl</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 </div>
-<div id="c4074f17" class="cell" data-execution_count="5">
+<div id="ac8b0991" class="cell" data-execution_count="5">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a>plt.style.use(<span class="st">"default"</span>)  <span class="co"># Revert style to default mpl</span></span>
@@ -776,7 +782,7 @@ <h3 data-number="10.7.1" class="anchored" data-anchor-id="four-mysterious-datase
 <span id="cb5-34"><a href="#cb5-34" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> fig</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 </div>
-<div id="35b3a007" class="cell" data-execution_count="6">
+<div id="58def802" class="cell" data-execution_count="6">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Load in four different datasets: I, II, III, IV</span></span>
@@ -819,7 +825,7 @@ <h3 data-number="10.7.1" class="anchored" data-anchor-id="four-mysterious-datase
 </div>
 </div>
 <p>While these four sets of datapoints look very different, they actually all have identical means <span class="math inline">\(\bar x\)</span>, <span class="math inline">\(\bar y\)</span>, standard deviations <span class="math inline">\(\sigma_x\)</span>, <span class="math inline">\(\sigma_y\)</span>, correlation <span class="math inline">\(r\)</span>, and RMSE! If we only look at these statistics, we would probably be inclined to say that these datasets are similar.</p>
-<div id="aa26b7b1" class="cell" data-execution_count="7">
+<div id="29cfdf4f" class="cell" data-execution_count="7">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> dataset <span class="kw">in</span> [<span class="st">"I"</span>, <span class="st">"II"</span>, <span class="st">"III"</span>, <span class="st">"IV"</span>]:</span>
@@ -866,7 +872,7 @@ <h3 data-number="10.7.1" class="anchored" data-anchor-id="four-mysterious-datase
 </div>
 <p>We may also wish to visualize the model’s <strong>residuals</strong>, defined as the difference between the observed and predicted <span class="math inline">\(y_i\)</span> value (<span class="math inline">\(e_i = y_i - \hat{y}_i\)</span>). This gives a high-level view of how “off” each prediction is from the true observed value. Recall that you explored this concept in <a href="https://inferentialthinking.com/chapters/15/5/Visual_Diagnostics.html?highlight=heteroscedasticity#detecting-heteroscedasticity">Data 8</a>: a good regression fit should display no clear pattern in its plot of residuals. The residual plots for Anscombe’s quartet are displayed below. Note how only the first plot shows no clear pattern to the magnitude of residuals. This is an indication that SLR is not the best choice of model for the remaining three sets of points.</p>
 <!-- <img src="images/residual.png" alt='residual' width='600'> -->
-<div id="52c98674" class="cell" data-execution_count="8">
+<div id="4f1f3de6" class="cell" data-execution_count="8">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Residual visualization</span></span>
diff --git a/docs/intro_to_modeling/intro_to_modeling_files/figure-html/cell-2-output-1.png b/docs/intro_to_modeling/intro_to_modeling_files/figure-html/cell-2-output-1.png
index 6bbec23ea..16391f949 100644
Binary files a/docs/intro_to_modeling/intro_to_modeling_files/figure-html/cell-2-output-1.png and b/docs/intro_to_modeling/intro_to_modeling_files/figure-html/cell-2-output-1.png differ
diff --git a/docs/intro_to_modeling/intro_to_modeling_files/figure-pdf/cell-2-output-1.pdf b/docs/intro_to_modeling/intro_to_modeling_files/figure-pdf/cell-2-output-1.pdf
index 3a5b648fe..4dbc057ff 100644
Binary files a/docs/intro_to_modeling/intro_to_modeling_files/figure-pdf/cell-2-output-1.pdf and b/docs/intro_to_modeling/intro_to_modeling_files/figure-pdf/cell-2-output-1.pdf differ
diff --git a/docs/intro_to_modeling/intro_to_modeling_files/figure-pdf/cell-3-output-1.pdf b/docs/intro_to_modeling/intro_to_modeling_files/figure-pdf/cell-3-output-1.pdf
index 3a8bbd238..0c118a463 100644
Binary files a/docs/intro_to_modeling/intro_to_modeling_files/figure-pdf/cell-3-output-1.pdf and b/docs/intro_to_modeling/intro_to_modeling_files/figure-pdf/cell-3-output-1.pdf differ
diff --git a/docs/intro_to_modeling/intro_to_modeling_files/figure-pdf/cell-7-output-1.pdf b/docs/intro_to_modeling/intro_to_modeling_files/figure-pdf/cell-7-output-1.pdf
index 003761927..f1416c443 100644
Binary files a/docs/intro_to_modeling/intro_to_modeling_files/figure-pdf/cell-7-output-1.pdf and b/docs/intro_to_modeling/intro_to_modeling_files/figure-pdf/cell-7-output-1.pdf differ
diff --git a/docs/intro_to_modeling/intro_to_modeling_files/figure-pdf/cell-9-output-1.pdf b/docs/intro_to_modeling/intro_to_modeling_files/figure-pdf/cell-9-output-1.pdf
index a46aa70eb..1935626db 100644
Binary files a/docs/intro_to_modeling/intro_to_modeling_files/figure-pdf/cell-9-output-1.pdf and b/docs/intro_to_modeling/intro_to_modeling_files/figure-pdf/cell-9-output-1.pdf differ
diff --git a/docs/ols/ols.html b/docs/ols/ols.html
index 1c43b630a..43a15ca0a 100644
--- a/docs/ols/ols.html
+++ b/docs/ols/ols.html
@@ -64,6 +64,7 @@
 <script src="../site_libs/quarto-search/fuse.min.js"></script>
 <script src="../site_libs/quarto-search/quarto-search.js"></script>
 <meta name="quarto:offset" content="../">
+<link href="../gradient_descent/gradient_descent.html" rel="next">
 <link href="../constant_model_loss_transformations/loss_transformations.html" rel="prev">
 <link href="../data100_logo.png" rel="icon" type="image/png">
 <script src="../site_libs/quarto-html/quarto.js"></script>
@@ -254,6 +255,12 @@
   <a href="../ols/ols.html" class="sidebar-item-text sidebar-link active">
  <span class="menu-text"><span class="chapter-number">12</span>&nbsp; <span class="chapter-title">Ordinary Least Squares</span></span></a>
   </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../gradient_descent/gradient_descent.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">13</span>&nbsp; <span class="chapter-title">sklearn and Gradient Descent</span></span></a>
+  </div>
 </li>
     </ul>
     </div>
@@ -337,7 +344,7 @@ <h3 data-number="12.1.1" class="anchored" data-anchor-id="multiple-linear-regres
 <p><span class="math display">\[\hat{y} = \theta_0\:+\:\theta_1x_{1}\:+\:\theta_2 x_{2}\:+\:...\:+\:\theta_p x_{p}\]</span></p>
 <p>Our predicted value of <span class="math inline">\(y\)</span>, <span class="math inline">\(\hat{y}\)</span>, is a linear combination of the single <strong>observations</strong> (features), <span class="math inline">\(x_i\)</span>, and the parameters, <span class="math inline">\(\theta_i\)</span>.</p>
 <p>We can explore this idea further by looking at a dataset containing aggregate per-player data from the 2018-19 NBA season, downloaded from <a href="https://www.kaggle.com/schmadam97/nba-regular-season-stats-20182019">Kaggle</a>.</p>
-<div id="4afd6f89" class="cell" data-execution_count="1">
+<div id="7d27351b" class="cell" data-execution_count="1">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span>
@@ -345,7 +352,7 @@ <h3 data-number="12.1.1" class="anchored" data-anchor-id="multiple-linear-regres
 <span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a>nba.index.name <span class="op">=</span> <span class="va">None</span> <span class="co"># Drops name of index (players are ordered by rank)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 </div>
-<div id="827bd264" class="cell" data-execution_count="2">
+<div id="bd1e54e4" class="cell" data-execution_count="2">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a>nba.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -516,7 +523,7 @@ <h3 data-number="12.1.1" class="anchored" data-anchor-id="multiple-linear-regres
 <li><code>AST</code>, the average number of assists per game</li>
 <li><code>3PA</code>, the average number of 3-point field goals attempted per game</li>
 </ul>
-<div id="f52f0200" class="cell" data-execution_count="3">
+<div id="c2a3390e" class="cell" data-execution_count="3">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a>nba[[<span class="st">'FG'</span>, <span class="st">'AST'</span>, <span class="st">'3PA'</span>, <span class="st">'PTS'</span>]].head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -1517,6 +1524,9 @@ <h2 data-number="12.5" class="anchored" data-anchor-id="bonus-uniqueness-of-the-
       </a>          
   </div>
   <div class="nav-page nav-page-next">
+      <a href="../gradient_descent/gradient_descent.html" class="pagination-link" aria-label="<span class='chapter-number'>13</span>&nbsp; <span class='chapter-title'>sklearn and Gradient Descent</span>">
+        <span class="nav-page-text"><span class="chapter-number">13</span>&nbsp; <span class="chapter-title">sklearn and Gradient Descent</span></span> <i class="bi bi-arrow-right-short"></i>
+      </a>
   </div>
 </nav><div class="modal fade" id="quarto-embedded-source-code-modal" tabindex="-1" aria-labelledby="quarto-embedded-source-code-modal-label" aria-hidden="true"><div class="modal-dialog modal-dialog-scrollable"><div class="modal-content"><div class="modal-header"><h5 class="modal-title" id="quarto-embedded-source-code-modal-label">Source Code</h5><button class="btn-close" data-bs-dismiss="modal"></button></div><div class="modal-body"><div class="">
 <div class="sourceCode" id="cb4" data-shortcodes="false"><pre class="sourceCode markdown code-with-copy"><code class="sourceCode markdown"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="co">---</span></span>
diff --git a/docs/pandas_1/pandas_1.html b/docs/pandas_1/pandas_1.html
index dd93b130f..496ea27a3 100644
--- a/docs/pandas_1/pandas_1.html
+++ b/docs/pandas_1/pandas_1.html
@@ -255,6 +255,12 @@
   <a href="../ols/ols.html" class="sidebar-item-text sidebar-link">
  <span class="menu-text"><span class="chapter-number">12</span>&nbsp; <span class="chapter-title">Ordinary Least Squares</span></span></a>
   </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../gradient_descent/gradient_descent.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">13</span>&nbsp; <span class="chapter-title">sklearn and Gradient Descent</span></span></a>
+  </div>
 </li>
     </ul>
     </div>
@@ -373,7 +379,7 @@ <h2 data-number="2.1" class="anchored" data-anchor-id="tabular-data"><span class
 <section id="series-dataframes-and-indices" class="level2" data-number="2.2">
 <h2 data-number="2.2" class="anchored" data-anchor-id="series-dataframes-and-indices"><span class="header-section-number">2.2</span> <code>Series</code>, <code>DataFrame</code>s, and Indices</h2>
 <p>To begin our work in <code>pandas</code>, we must first import the library into our Python environment. This will allow us to use <code>pandas</code> data structures and methods in our code.</p>
-<div id="1cb22b27" class="cell" data-execution_count="1">
+<div id="b8fe7334" class="cell" data-execution_count="1">
 <div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="co"># `pd` is the conventional alias for Pandas, as `np` is for NumPy</span></span>
 <span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
@@ -396,7 +402,7 @@ <h3 data-number="2.2.1" class="anchored" data-anchor-id="series"><span class="he
 <li>A sequence of data labels called the <strong>index</strong>.</li>
 </ul>
 <p>In the cell below, we create a <code>Series</code> named <code>s</code>.</p>
-<div id="54ca2503" class="cell" data-execution_count="2">
+<div id="e44cfb7f" class="cell" data-execution_count="2">
 <div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a>s <span class="op">=</span> pd.Series([<span class="st">"welcome"</span>, <span class="st">"to"</span>, <span class="st">"data 100"</span>])</span>
 <span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a>s</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="2">
@@ -406,14 +412,14 @@ <h3 data-number="2.2.1" class="anchored" data-anchor-id="series"><span class="he
 dtype: object</code></pre>
 </div>
 </div>
-<div id="c98a617b" class="cell" data-execution_count="3">
+<div id="25fce893" class="cell" data-execution_count="3">
 <div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a> <span class="co"># Accessing data values within the Series</span></span>
 <span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a> s.values</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="3">
 <pre><code>array(['welcome', 'to', 'data 100'], dtype=object)</code></pre>
 </div>
 </div>
-<div id="6769e22a" class="cell" data-execution_count="4">
+<div id="96a9d6ca" class="cell" data-execution_count="4">
 <div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a> <span class="co"># Accessing the Index of the Series</span></span>
 <span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a> s.index</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="4">
@@ -421,7 +427,7 @@ <h3 data-number="2.2.1" class="anchored" data-anchor-id="series"><span class="he
 </div>
 </div>
 <p>By default, the <code>index</code> of a <code>Series</code> is a sequential list of integers beginning from 0. Optionally, a manually specified list of desired indices can be passed to the <code>index</code> argument.</p>
-<div id="98f8ab15" class="cell" data-execution_count="5">
+<div id="6c6f7c57" class="cell" data-execution_count="5">
 <div class="sourceCode cell-code" id="cb8"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a>s <span class="op">=</span> pd.Series([<span class="op">-</span><span class="dv">1</span>, <span class="dv">10</span>, <span class="dv">2</span>], index <span class="op">=</span> [<span class="st">"a"</span>, <span class="st">"b"</span>, <span class="st">"c"</span>])</span>
 <span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a>s</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="5">
@@ -431,14 +437,14 @@ <h3 data-number="2.2.1" class="anchored" data-anchor-id="series"><span class="he
 dtype: int64</code></pre>
 </div>
 </div>
-<div id="a516f1c2" class="cell" data-execution_count="6">
+<div id="2bc1b41b" class="cell" data-execution_count="6">
 <div class="sourceCode cell-code" id="cb10"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a>s.index</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="6">
 <pre><code>Index(['a', 'b', 'c'], dtype='object')</code></pre>
 </div>
 </div>
 <p>Indices can also be changed after initialization.</p>
-<div id="e6f8f460" class="cell" data-execution_count="7">
+<div id="ea6d1128" class="cell" data-execution_count="7">
 <div class="sourceCode cell-code" id="cb12"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a>s.index <span class="op">=</span> [<span class="st">"first"</span>, <span class="st">"second"</span>, <span class="st">"third"</span>]</span>
 <span id="cb12-2"><a href="#cb12-2" aria-hidden="true" tabindex="-1"></a>s</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="7">
@@ -448,7 +454,7 @@ <h3 data-number="2.2.1" class="anchored" data-anchor-id="series"><span class="he
 dtype: int64</code></pre>
 </div>
 </div>
-<div id="8aba36ea" class="cell" data-execution_count="8">
+<div id="907aa68c" class="cell" data-execution_count="8">
 <div class="sourceCode cell-code" id="cb14"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a>s.index</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="8">
 <pre><code>Index(['first', 'second', 'third'], dtype='object')</code></pre>
@@ -463,7 +469,7 @@ <h4 data-number="2.2.1.1" class="anchored" data-anchor-id="selection-in-series">
 <li>A filtering condition.</li>
 </ol>
 <p>To demonstrate this, let’s define a new Series <code>s</code>.</p>
-<div id="06c75cf8" class="cell" data-execution_count="9">
+<div id="ed5e0aec" class="cell" data-execution_count="9">
 <div class="sourceCode cell-code" id="cb16"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a>s <span class="op">=</span> pd.Series([<span class="dv">4</span>, <span class="op">-</span><span class="dv">2</span>, <span class="dv">0</span>, <span class="dv">6</span>], index <span class="op">=</span> [<span class="st">"a"</span>, <span class="st">"b"</span>, <span class="st">"c"</span>, <span class="st">"d"</span>])</span>
 <span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a>s</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="9">
@@ -476,7 +482,7 @@ <h4 data-number="2.2.1.1" class="anchored" data-anchor-id="selection-in-series">
 </div>
 <section id="a-single-label" class="level5" data-number="2.2.1.1.1">
 <h5 data-number="2.2.1.1.1" class="anchored" data-anchor-id="a-single-label"><span class="header-section-number">2.2.1.1.1</span> A Single Label</h5>
-<div id="f31cdf18" class="cell" data-execution_count="10">
+<div id="d89e0c97" class="cell" data-execution_count="10">
 <div class="sourceCode cell-code" id="cb18"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a><span class="co"># We return the value stored at the index label "a"</span></span>
 <span id="cb18-2"><a href="#cb18-2" aria-hidden="true" tabindex="-1"></a>s[<span class="st">"a"</span>] </span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="10">
@@ -486,7 +492,7 @@ <h5 data-number="2.2.1.1.1" class="anchored" data-anchor-id="a-single-label"><sp
 </section>
 <section id="a-list-of-labels" class="level5" data-number="2.2.1.1.2">
 <h5 data-number="2.2.1.1.2" class="anchored" data-anchor-id="a-list-of-labels"><span class="header-section-number">2.2.1.1.2</span> A List of Labels</h5>
-<div id="e83110cf" class="cell" data-execution_count="11">
+<div id="0a7ca2b6" class="cell" data-execution_count="11">
 <div class="sourceCode cell-code" id="cb20"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a><span class="co"># We return a Series of the values stored at the index labels "a" and "c"</span></span>
 <span id="cb20-2"><a href="#cb20-2" aria-hidden="true" tabindex="-1"></a>s[[<span class="st">"a"</span>, <span class="st">"c"</span>]] </span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="11">
@@ -500,7 +506,7 @@ <h5 data-number="2.2.1.1.2" class="anchored" data-anchor-id="a-list-of-labels"><
 <h5 data-number="2.2.1.1.3" class="anchored" data-anchor-id="a-filtering-condition"><span class="header-section-number">2.2.1.1.3</span> A Filtering Condition</h5>
 <p>Perhaps the most interesting (and useful) method of selecting data from a <code>Series</code> is by using a filtering condition.</p>
 <p>First, we apply a boolean operation to the <code>Series</code>. This creates <strong>a new <code>Series</code> of boolean values</strong>.</p>
-<div id="c31ce8b1" class="cell" data-execution_count="12">
+<div id="7892247d" class="cell" data-execution_count="12">
 <div class="sourceCode cell-code" id="cb22"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Filter condition: select all elements greater than 0</span></span>
 <span id="cb22-2"><a href="#cb22-2" aria-hidden="true" tabindex="-1"></a>s <span class="op">&gt;</span> <span class="dv">0</span> </span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="12">
@@ -512,7 +518,7 @@ <h5 data-number="2.2.1.1.3" class="anchored" data-anchor-id="a-filtering-conditi
 </div>
 </div>
 <p>We then use this boolean condition to index into our original <code>Series</code>. <code>pandas</code> will select only the entries in the original <code>Series</code> that satisfy the condition.</p>
-<div id="e20504c5" class="cell" data-execution_count="13">
+<div id="b2cb3c67" class="cell" data-execution_count="13">
 <div class="sourceCode cell-code" id="cb24"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a>s[s <span class="op">&gt;</span> <span class="dv">0</span>] </span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="13">
 <pre><code>a    4
@@ -542,7 +548,7 @@ <h4 data-number="2.2.2.1" class="anchored" data-anchor-id="creating-a-dataframe"
 <h5 data-number="2.2.2.1.1" class="anchored" data-anchor-id="from-a-csv-file"><span class="header-section-number">2.2.2.1.1</span> From a CSV file</h5>
 <p>In Data 100, our data are typically stored in a CSV (comma-separated values) file format. We can import a CSV file into a <code>DataFrame</code> by passing the data path as an argument to the following <code>pandas</code> function. <br>  <code>pd.read_csv("filename.csv")</code></p>
 <p>With our new understanding of <code>pandas</code> in hand, let’s return to the <code>elections</code> dataset from before. Now, we can recognize that it is represented as a <code>pandas</code> <code>DataFrame</code>.</p>
-<div id="9cc72f75" class="cell" data-execution_count="14">
+<div id="f99e6b88" class="cell" data-execution_count="14">
 <div class="sourceCode cell-code" id="cb27"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb27-1"><a href="#cb27-1" aria-hidden="true" tabindex="-1"></a>elections <span class="op">=</span> pd.read_csv(<span class="st">"data/elections.csv"</span>)</span>
 <span id="cb27-2"><a href="#cb27-2" aria-hidden="true" tabindex="-1"></a>elections</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="14">
@@ -674,7 +680,7 @@ <h5 data-number="2.2.2.1.1" class="anchored" data-anchor-id="from-a-csv-file"><s
 <h5 data-number="2.2.2.1.2" class="anchored" data-anchor-id="using-a-list-and-column-names"><span class="header-section-number">2.2.2.1.2</span> Using a List and Column Name(s)</h5>
 <p>We’ll now explore creating a <code>DataFrame</code> with data of our own.</p>
 <p>Consider the following examples. The first code cell creates a <code>DataFrame</code> with a single column <code>Numbers</code>.</p>
-<div id="5e0426a7" class="cell" data-execution_count="15">
+<div id="31e01f0d" class="cell" data-execution_count="15">
 <div class="sourceCode cell-code" id="cb28"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb28-1"><a href="#cb28-1" aria-hidden="true" tabindex="-1"></a>df_list <span class="op">=</span> pd.DataFrame([<span class="dv">1</span>, <span class="dv">2</span>, <span class="dv">3</span>], columns<span class="op">=</span>[<span class="st">"Numbers"</span>])</span>
 <span id="cb28-2"><a href="#cb28-2" aria-hidden="true" tabindex="-1"></a>df_list</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="15">
@@ -708,7 +714,7 @@ <h5 data-number="2.2.2.1.2" class="anchored" data-anchor-id="using-a-list-and-co
 </div>
 </div>
 <p>The second creates a <code>DataFrame</code> with the columns <code>Numbers</code> and <code>Description</code>. Notice how a 2D list of values is required to initialize the second <code>DataFrame</code> — each nested list represents a single row of data.</p>
-<div id="dfceba6d" class="cell" data-execution_count="16">
+<div id="f0eb54cb" class="cell" data-execution_count="16">
 <div class="sourceCode cell-code" id="cb29"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb29-1"><a href="#cb29-1" aria-hidden="true" tabindex="-1"></a>df_list <span class="op">=</span> pd.DataFrame([[<span class="dv">1</span>, <span class="st">"one"</span>], [<span class="dv">2</span>, <span class="st">"two"</span>]], columns <span class="op">=</span> [<span class="st">"Number"</span>, <span class="st">"Description"</span>])</span>
 <span id="cb29-2"><a href="#cb29-2" aria-hidden="true" tabindex="-1"></a>df_list</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="16">
@@ -745,7 +751,7 @@ <h5 data-number="2.2.2.1.2" class="anchored" data-anchor-id="using-a-list-and-co
 <h5 data-number="2.2.2.1.3" class="anchored" data-anchor-id="from-a-dictionary"><span class="header-section-number">2.2.2.1.3</span> From a Dictionary</h5>
 <p>A third (and more common) way to create a <code>DataFrame</code> is with a dictionary. The dictionary keys represent the column names, and the dictionary values represent the column values.</p>
 <p>Below are two ways of implementing this approach. The first is based on specifying the columns of the <code>DataFrame</code>, whereas the second is based on specifying the rows of the <code>DataFrame</code>.</p>
-<div id="54b690cd" class="cell" data-execution_count="17">
+<div id="ff2fd9d0" class="cell" data-execution_count="17">
 <div class="sourceCode cell-code" id="cb30"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb30-1"><a href="#cb30-1" aria-hidden="true" tabindex="-1"></a>df_dict <span class="op">=</span> pd.DataFrame({</span>
 <span id="cb30-2"><a href="#cb30-2" aria-hidden="true" tabindex="-1"></a>    <span class="st">"Fruit"</span>: [<span class="st">"Strawberry"</span>, <span class="st">"Orange"</span>], </span>
 <span id="cb30-3"><a href="#cb30-3" aria-hidden="true" tabindex="-1"></a>    <span class="st">"Price"</span>: [<span class="fl">5.49</span>, <span class="fl">3.99</span>]</span>
@@ -780,7 +786,7 @@ <h5 data-number="2.2.2.1.3" class="anchored" data-anchor-id="from-a-dictionary">
 </div>
 </div>
 </div>
-<div id="5c188987" class="cell" data-execution_count="18">
+<div id="46b43b84" class="cell" data-execution_count="18">
 <div class="sourceCode cell-code" id="cb31"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb31-1"><a href="#cb31-1" aria-hidden="true" tabindex="-1"></a>df_dict <span class="op">=</span> pd.DataFrame(</span>
 <span id="cb31-2"><a href="#cb31-2" aria-hidden="true" tabindex="-1"></a>    [</span>
 <span id="cb31-3"><a href="#cb31-3" aria-hidden="true" tabindex="-1"></a>        {<span class="st">"Fruit"</span>:<span class="st">"Strawberry"</span>, <span class="st">"Price"</span>:<span class="fl">5.49</span>}, </span>
@@ -822,14 +828,14 @@ <h5 data-number="2.2.2.1.3" class="anchored" data-anchor-id="from-a-dictionary">
 <h5 data-number="2.2.2.1.4" class="anchored" data-anchor-id="from-a-series"><span class="header-section-number">2.2.2.1.4</span> From a <code>Series</code></h5>
 <p>Earlier, we explained how a <code>Series</code> was synonymous to a column in a <code>DataFrame</code>. It follows, then, that a <code>DataFrame</code> is equivalent to a collection of <code>Series</code>, which all share the same <code>Index</code>.</p>
 <p>In fact, we can initialize a <code>DataFrame</code> by merging two or more <code>Series</code>. Consider the <code>Series</code> <code>s_a</code> and <code>s_b</code>.</p>
-<div id="91cc8de6" class="cell" data-execution_count="19">
+<div id="8c60ba30" class="cell" data-execution_count="19">
 <div class="sourceCode cell-code" id="cb32"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb32-1"><a href="#cb32-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Notice how our indices, or row labels, are the same</span></span>
 <span id="cb32-2"><a href="#cb32-2" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb32-3"><a href="#cb32-3" aria-hidden="true" tabindex="-1"></a>s_a <span class="op">=</span> pd.Series([<span class="st">"a1"</span>, <span class="st">"a2"</span>, <span class="st">"a3"</span>], index <span class="op">=</span> [<span class="st">"r1"</span>, <span class="st">"r2"</span>, <span class="st">"r3"</span>])</span>
 <span id="cb32-4"><a href="#cb32-4" aria-hidden="true" tabindex="-1"></a>s_b <span class="op">=</span> pd.Series([<span class="st">"b1"</span>, <span class="st">"b2"</span>, <span class="st">"b3"</span>], index <span class="op">=</span> [<span class="st">"r1"</span>, <span class="st">"r2"</span>, <span class="st">"r3"</span>])</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
 <p>We can turn individual <code>Series</code> into a <code>DataFrame</code> using two common methods (shown below):</p>
-<div id="652e18ec" class="cell" data-execution_count="20">
+<div id="e764bf7c" class="cell" data-execution_count="20">
 <div class="sourceCode cell-code" id="cb33"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb33-1"><a href="#cb33-1" aria-hidden="true" tabindex="-1"></a>pd.DataFrame(s_a)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="20">
 <div>
@@ -861,7 +867,7 @@ <h5 data-number="2.2.2.1.4" class="anchored" data-anchor-id="from-a-series"><spa
 </div>
 </div>
 </div>
-<div id="3ae00d51" class="cell" data-execution_count="21">
+<div id="1ad4015e" class="cell" data-execution_count="21">
 <div class="sourceCode cell-code" id="cb34"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb34-1"><a href="#cb34-1" aria-hidden="true" tabindex="-1"></a>s_b.to_frame()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="21">
 <div>
@@ -894,7 +900,7 @@ <h5 data-number="2.2.2.1.4" class="anchored" data-anchor-id="from-a-series"><spa
 </div>
 </div>
 <p>To merge the two <code>Series</code> and specify their column names, we use the following syntax:</p>
-<div id="bbf26c72" class="cell" data-execution_count="22">
+<div id="f07b11d4" class="cell" data-execution_count="22">
 <div class="sourceCode cell-code" id="cb35"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb35-1"><a href="#cb35-1" aria-hidden="true" tabindex="-1"></a>pd.DataFrame({</span>
 <span id="cb35-2"><a href="#cb35-2" aria-hidden="true" tabindex="-1"></a>    <span class="st">"A-column"</span>: s_a, </span>
 <span id="cb35-3"><a href="#cb35-3" aria-hidden="true" tabindex="-1"></a>    <span class="st">"B-column"</span>: s_b</span>
@@ -939,7 +945,7 @@ <h5 data-number="2.2.2.1.4" class="anchored" data-anchor-id="from-a-series"><spa
 <section id="indices" class="level3" data-number="2.2.3">
 <h3 data-number="2.2.3" class="anchored" data-anchor-id="indices"><span class="header-section-number">2.2.3</span> Indices</h3>
 <p>On a more technical note, an index doesn’t have to be an integer, nor does it have to be unique. For example, we can set the index of the <code>elections</code> <code>DataFrame</code> to be the name of presidential candidates.</p>
-<div id="26e46a96" class="cell" data-execution_count="23">
+<div id="b0e8582b" class="cell" data-execution_count="23">
 <div class="sourceCode cell-code" id="cb36"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb36-1"><a href="#cb36-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Creating a DataFrame from a CSV file and specifying the index column</span></span>
 <span id="cb36-2"><a href="#cb36-2" aria-hidden="true" tabindex="-1"></a>elections <span class="op">=</span> pd.read_csv(<span class="st">"data/elections.csv"</span>, index_col <span class="op">=</span> <span class="st">"Candidate"</span>)</span>
 <span id="cb36-3"><a href="#cb36-3" aria-hidden="true" tabindex="-1"></a>elections</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -1063,7 +1069,7 @@ <h3 data-number="2.2.3" class="anchored" data-anchor-id="indices"><span class="h
 </div>
 </div>
 <p>We can also select a new column and set it as the index of the <code>DataFrame</code>. For example, we can set the index of the <code>elections</code> <code>DataFrame</code> to represent the candidate’s party.</p>
-<div id="26d2a809" class="cell" data-execution_count="24">
+<div id="c53f73b1" class="cell" data-execution_count="24">
 <div class="sourceCode cell-code" id="cb37"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb37-1"><a href="#cb37-1" aria-hidden="true" tabindex="-1"></a>elections.reset_index(inplace <span class="op">=</span> <span class="va">True</span>) <span class="co"># Resetting the index so we can set it again</span></span>
 <span id="cb37-2"><a href="#cb37-2" aria-hidden="true" tabindex="-1"></a><span class="co"># This sets the index to the "Party" column</span></span>
 <span id="cb37-3"><a href="#cb37-3" aria-hidden="true" tabindex="-1"></a>elections.set_index(<span class="st">"Party"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -1187,7 +1193,7 @@ <h3 data-number="2.2.3" class="anchored" data-anchor-id="indices"><span class="h
 </div>
 </div>
 <p>And, if we’d like, we can revert the index back to the default list of integers.</p>
-<div id="eeaa7ecf" class="cell" data-execution_count="25">
+<div id="8c5dea7b" class="cell" data-execution_count="25">
 <div class="sourceCode cell-code" id="cb38"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb38-1"><a href="#cb38-1" aria-hidden="true" tabindex="-1"></a><span class="co"># This resets the index to be the default list of integer</span></span>
 <span id="cb38-2"><a href="#cb38-2" aria-hidden="true" tabindex="-1"></a>elections.reset_index(inplace<span class="op">=</span><span class="va">True</span>) </span>
 <span id="cb38-3"><a href="#cb38-3" aria-hidden="true" tabindex="-1"></a>elections.index</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -1210,7 +1216,7 @@ <h3 data-number="2.2.3" class="anchored" data-anchor-id="indices"><span class="h
 <h2 data-number="2.3" class="anchored" data-anchor-id="dataframe-attributes-index-columns-and-shape"><span class="header-section-number">2.3</span> <code>DataFrame</code> Attributes: Index, Columns, and Shape</h2>
 <p>On the other hand, column names in a <code>DataFrame</code> are almost always unique. Looking back to the <code>elections</code> dataset, it wouldn’t make sense to have two columns named <code>"Candidate"</code>. Sometimes, you’ll want to extract these different values, in particular, the list of row and column labels.</p>
 <p>For index/row labels, use <code>DataFrame.index</code>:</p>
-<div id="bac4eacd" class="cell" data-execution_count="26">
+<div id="464da198" class="cell" data-execution_count="26">
 <div class="sourceCode cell-code" id="cb40"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb40-1"><a href="#cb40-1" aria-hidden="true" tabindex="-1"></a>elections.set_index(<span class="st">"Party"</span>, inplace <span class="op">=</span> <span class="va">True</span>)</span>
 <span id="cb40-2"><a href="#cb40-2" aria-hidden="true" tabindex="-1"></a>elections.index</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="26">
@@ -1225,14 +1231,14 @@ <h2 data-number="2.3" class="anchored" data-anchor-id="dataframe-attributes-inde
 </div>
 </div>
 <p>For column labels, use <code>DataFrame.columns</code>:</p>
-<div id="5eef3682" class="cell" data-execution_count="27">
+<div id="5578fae4" class="cell" data-execution_count="27">
 <div class="sourceCode cell-code" id="cb42"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb42-1"><a href="#cb42-1" aria-hidden="true" tabindex="-1"></a>elections.columns</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="27">
 <pre><code>Index(['index', 'Candidate', 'Year', 'Popular vote', 'Result', '%'], dtype='object')</code></pre>
 </div>
 </div>
 <p>And for the shape of the <code>DataFrame</code>, we can use <code>DataFrame.shape</code> to get the number of rows followed by the number of columns:</p>
-<div id="0684658c" class="cell" data-execution_count="28">
+<div id="18446ce5" class="cell" data-execution_count="28">
 <div class="sourceCode cell-code" id="cb44"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb44-1"><a href="#cb44-1" aria-hidden="true" tabindex="-1"></a>elections.shape</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="28">
 <pre><code>(182, 6)</code></pre>
@@ -1261,13 +1267,13 @@ <h2 data-number="2.4" class="anchored" data-anchor-id="slicing-in-dataframes"><s
 <h3 data-number="2.4.1" class="anchored" data-anchor-id="extracting-data-with-.head-and-.tail"><span class="header-section-number">2.4.1</span> Extracting data with <code>.head</code> and <code>.tail</code></h3>
 <p>The simplest scenario in which we want to extract data is when we simply want to select the first or last few rows of the <code>DataFrame</code>.</p>
 <p>To extract the first <code>n</code> rows of a <code>DataFrame</code> <code>df</code>, we use the syntax <code>df.head(n)</code>.</p>
-<div id="ca633cc1" class="cell" data-execution_count="29">
+<div id="0a09eab2" class="cell" data-execution_count="29">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb46"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb46-1"><a href="#cb46-1" aria-hidden="true" tabindex="-1"></a>elections <span class="op">=</span> pd.read_csv(<span class="st">"data/elections.csv"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 </div>
-<div id="c3590a7c" class="cell" data-execution_count="30">
+<div id="00c883b0" class="cell" data-execution_count="30">
 <div class="sourceCode cell-code" id="cb47"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb47-1"><a href="#cb47-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Extract the first 5 rows of the DataFrame</span></span>
 <span id="cb47-2"><a href="#cb47-2" aria-hidden="true" tabindex="-1"></a>elections.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="30">
@@ -1339,7 +1345,7 @@ <h3 data-number="2.4.1" class="anchored" data-anchor-id="extracting-data-with-.h
 </div>
 </div>
 <p>Similarly, calling <code>df.tail(n)</code> allows us to extract the last <code>n</code> rows of the <code>DataFrame</code>.</p>
-<div id="82a850d3" class="cell" data-execution_count="31">
+<div id="dddb2848" class="cell" data-execution_count="31">
 <div class="sourceCode cell-code" id="cb48"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb48-1"><a href="#cb48-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Extract the last 5 rows of the DataFrame</span></span>
 <span id="cb48-2"><a href="#cb48-2" aria-hidden="true" tabindex="-1"></a>elections.tail(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="31">
@@ -1425,14 +1431,14 @@ <h3 data-number="2.4.2" class="anchored" data-anchor-id="label-based-extraction-
 <li>A list.</li>
 </ul>
 <p>For example, to select a single value, we can select the row labeled <code>0</code> and the column labeled <code>Candidate</code> from the <code>elections</code> <code>DataFrame</code>.</p>
-<div id="13d1b8a2" class="cell" data-execution_count="32">
+<div id="2cb30648" class="cell" data-execution_count="32">
 <div class="sourceCode cell-code" id="cb49"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb49-1"><a href="#cb49-1" aria-hidden="true" tabindex="-1"></a>elections.loc[<span class="dv">0</span>, <span class="st">'Candidate'</span>]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="32">
 <pre><code>'Andrew Jackson'</code></pre>
 </div>
 </div>
 <p>Keep in mind that passing in just one argument as a single value will produce a <code>Series</code>. Below, we’ve extracted a subset of the <code>"Popular vote"</code> column as a <code>Series</code>.</p>
-<div id="449c0905" class="cell" data-execution_count="33">
+<div id="cd5cd4b5" class="cell" data-execution_count="33">
 <div class="sourceCode cell-code" id="cb51"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb51-1"><a href="#cb51-1" aria-hidden="true" tabindex="-1"></a>elections.loc[[<span class="dv">87</span>, <span class="dv">25</span>, <span class="dv">179</span>], <span class="st">"Popular vote"</span>]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="33">
 <pre><code>87     15761254
@@ -1442,7 +1448,7 @@ <h3 data-number="2.4.2" class="anchored" data-anchor-id="label-based-extraction-
 </div>
 </div>
 <p>Note that if we pass <code>"Popular vote"</code> as a list, the output will be a <code>DataFrame</code>.</p>
-<div id="f735e654" class="cell" data-execution_count="34">
+<div id="806aea8d" class="cell" data-execution_count="34">
 <div class="sourceCode cell-code" id="cb53"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb53-1"><a href="#cb53-1" aria-hidden="true" tabindex="-1"></a>elections.loc[[<span class="dv">87</span>, <span class="dv">25</span>, <span class="dv">179</span>], [<span class="st">"Popular vote"</span>]]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="34">
 <div>
@@ -1475,7 +1481,7 @@ <h3 data-number="2.4.2" class="anchored" data-anchor-id="label-based-extraction-
 </div>
 </div>
 <p>To select <em>multiple</em> rows and columns, we can use Python slice notation. Here, we select the rows from labels <code>0</code> to <code>3</code> and the columns from labels <code>"Year"</code> to <code>"Popular vote"</code>. Notice that unlike Python slicing, <code>.loc</code> is <em>inclusive</em> of the right upper bound.</p>
-<div id="d219abf3" class="cell" data-execution_count="35">
+<div id="b86e3c7f" class="cell" data-execution_count="35">
 <div class="sourceCode cell-code" id="cb54"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb54-1"><a href="#cb54-1" aria-hidden="true" tabindex="-1"></a>elections.loc[<span class="dv">0</span>:<span class="dv">3</span>, <span class="st">'Year'</span>:<span class="st">'Popular vote'</span>]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="35">
 <div>
@@ -1527,7 +1533,7 @@ <h3 data-number="2.4.2" class="anchored" data-anchor-id="label-based-extraction-
 </div>
 </div>
 <p>Suppose that instead, we want to extract <em>all</em> column values for the first four rows in the <code>elections</code> <code>DataFrame</code>. The shorthand <code>:</code> is useful for this.</p>
-<div id="14eaf934" class="cell" data-execution_count="36">
+<div id="91792ce6" class="cell" data-execution_count="36">
 <div class="sourceCode cell-code" id="cb55"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb55-1"><a href="#cb55-1" aria-hidden="true" tabindex="-1"></a>elections.loc[<span class="dv">0</span>:<span class="dv">3</span>, :]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="36">
 <div>
@@ -1589,7 +1595,7 @@ <h3 data-number="2.4.2" class="anchored" data-anchor-id="label-based-extraction-
 </div>
 </div>
 <p>We can use the same shorthand to extract all rows.</p>
-<div id="7046d6ec" class="cell" data-execution_count="37">
+<div id="32617823" class="cell" data-execution_count="37">
 <div class="sourceCode cell-code" id="cb56"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb56-1"><a href="#cb56-1" aria-hidden="true" tabindex="-1"></a>elections.loc[:, [<span class="st">"Year"</span>, <span class="st">"Candidate"</span>, <span class="st">"Result"</span>]]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="37">
 <div>
@@ -1680,7 +1686,7 @@ <h3 data-number="2.4.2" class="anchored" data-anchor-id="label-based-extraction-
 </div>
 <p>There are a couple of things we should note. Firstly, unlike conventional Python, <code>pandas</code> allows us to slice string values (in our example, the column labels). Secondly, slicing with <code>.loc</code> is <em>inclusive</em>. Notice how our resulting <code>DataFrame</code> includes every row and column between and including the slice labels we specified.</p>
 <p>Equivalently, we can use a list to obtain multiple rows and columns in our <code>elections</code> <code>DataFrame</code>.</p>
-<div id="2dc89688" class="cell" data-execution_count="38">
+<div id="cd5cbea0" class="cell" data-execution_count="38">
 <div class="sourceCode cell-code" id="cb57"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb57-1"><a href="#cb57-1" aria-hidden="true" tabindex="-1"></a>elections.loc[[<span class="dv">0</span>, <span class="dv">1</span>, <span class="dv">2</span>, <span class="dv">3</span>], [<span class="st">'Year'</span>, <span class="st">'Candidate'</span>, <span class="st">'Party'</span>, <span class="st">'Popular vote'</span>]]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="38">
 <div>
@@ -1732,7 +1738,7 @@ <h3 data-number="2.4.2" class="anchored" data-anchor-id="label-based-extraction-
 </div>
 </div>
 <p>Lastly, we can interchange list and slicing notation.</p>
-<div id="348d53a9" class="cell" data-execution_count="39">
+<div id="adb104b0" class="cell" data-execution_count="39">
 <div class="sourceCode cell-code" id="cb58"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb58-1"><a href="#cb58-1" aria-hidden="true" tabindex="-1"></a>elections.loc[[<span class="dv">0</span>, <span class="dv">1</span>, <span class="dv">2</span>, <span class="dv">3</span>], :]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="39">
 <div>
@@ -1798,7 +1804,7 @@ <h3 data-number="2.4.2" class="anchored" data-anchor-id="label-based-extraction-
 <h3 data-number="2.4.3" class="anchored" data-anchor-id="integer-based-extraction-indexing-with-.iloc"><span class="header-section-number">2.4.3</span> Integer-based Extraction: Indexing with <code>.iloc</code></h3>
 <p>Slicing with <code>.iloc</code> works similarly to <code>.loc</code>. However, <code>.iloc</code> uses the <em>index positions</em> of rows and columns rather than the labels (think to yourself: <strong>l</strong>oc uses <strong>l</strong>ables; <strong>i</strong>loc uses <strong>i</strong>ndices). The arguments to the <code>.iloc</code> function also behave similarly — single values, lists, indices, and any combination of these are permitted.</p>
 <p>Let’s begin reproducing our results from above. We’ll begin by selecting the first presidential candidate in our <code>elections</code> <code>DataFrame</code>:</p>
-<div id="b7a27d4a" class="cell" data-execution_count="40">
+<div id="de2b0b41" class="cell" data-execution_count="40">
 <div class="sourceCode cell-code" id="cb59"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb59-1"><a href="#cb59-1" aria-hidden="true" tabindex="-1"></a><span class="co"># elections.loc[0, "Candidate"] - Previous approach</span></span>
 <span id="cb59-2"><a href="#cb59-2" aria-hidden="true" tabindex="-1"></a>elections.iloc[<span class="dv">0</span>, <span class="dv">1</span>]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="40">
@@ -1807,7 +1813,7 @@ <h3 data-number="2.4.3" class="anchored" data-anchor-id="integer-based-extractio
 </div>
 <p>Notice how the first argument to both <code>.loc</code> and <code>.iloc</code> are the same. This is because the row with a label of <code>0</code> is conveniently in the <span class="math inline">\(0^{\text{th}}\)</span> (equivalently, the first position) of the <code>elections</code> <code>DataFrame</code>. Generally, this is true of any <code>DataFrame</code> where the row labels are incremented in ascending order from 0.</p>
 <p>And, as before, if we were to pass in only one single value argument, our result would be a <code>Series</code>.</p>
-<div id="050e7000" class="cell" data-execution_count="41">
+<div id="0e580e90" class="cell" data-execution_count="41">
 <div class="sourceCode cell-code" id="cb61"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb61-1"><a href="#cb61-1" aria-hidden="true" tabindex="-1"></a>elections.iloc[[<span class="dv">1</span>,<span class="dv">2</span>,<span class="dv">3</span>],<span class="dv">1</span>]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="41">
 <pre><code>1    John Quincy Adams
@@ -1817,7 +1823,7 @@ <h3 data-number="2.4.3" class="anchored" data-anchor-id="integer-based-extractio
 </div>
 </div>
 <p>However, when we select the first four rows and columns using <code>.iloc</code>, we notice something.</p>
-<div id="9d54ddde" class="cell" data-execution_count="42">
+<div id="ab5b95f0" class="cell" data-execution_count="42">
 <div class="sourceCode cell-code" id="cb63"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb63-1"><a href="#cb63-1" aria-hidden="true" tabindex="-1"></a><span class="co"># elections.loc[0:3, 'Year':'Popular vote'] - Previous approach</span></span>
 <span id="cb63-2"><a href="#cb63-2" aria-hidden="true" tabindex="-1"></a>elections.iloc[<span class="dv">0</span>:<span class="dv">4</span>, <span class="dv">0</span>:<span class="dv">4</span>]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="42">
@@ -1871,7 +1877,7 @@ <h3 data-number="2.4.3" class="anchored" data-anchor-id="integer-based-extractio
 </div>
 <p>Slicing is no longer inclusive in <code>.iloc</code> — it’s <em>exclusive</em>. In other words, the right end of a slice is not included when using <code>.iloc</code>. This is one of the subtleties of <code>pandas</code> syntax; you will get used to it with practice.</p>
 <p>List behavior works just as expected.</p>
-<div id="033d9efb" class="cell" data-execution_count="43">
+<div id="297ab349" class="cell" data-execution_count="43">
 <div class="sourceCode cell-code" id="cb64"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb64-1"><a href="#cb64-1" aria-hidden="true" tabindex="-1"></a><span class="co">#elections.loc[[0, 1, 2, 3], ['Year', 'Candidate', 'Party', 'Popular vote']] - Previous Approach</span></span>
 <span id="cb64-2"><a href="#cb64-2" aria-hidden="true" tabindex="-1"></a>elections.iloc[[<span class="dv">0</span>, <span class="dv">1</span>, <span class="dv">2</span>, <span class="dv">3</span>], [<span class="dv">0</span>, <span class="dv">1</span>, <span class="dv">2</span>, <span class="dv">3</span>]]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="43">
@@ -1924,7 +1930,7 @@ <h3 data-number="2.4.3" class="anchored" data-anchor-id="integer-based-extractio
 </div>
 </div>
 <p>And just like with <code>.loc</code>, we can use a colon with <code>.iloc</code> to extract all rows or columns.</p>
-<div id="606c2808" class="cell" data-execution_count="44">
+<div id="bb992155" class="cell" data-execution_count="44">
 <div class="sourceCode cell-code" id="cb65"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb65-1"><a href="#cb65-1" aria-hidden="true" tabindex="-1"></a>elections.iloc[:, <span class="dv">0</span>:<span class="dv">3</span>]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="44">
 <div>
@@ -2032,7 +2038,7 @@ <h3 data-number="2.4.4" class="anchored" data-anchor-id="context-dependent-extra
 <section id="a-slice-of-row-numbers" class="level4" data-number="2.4.4.1">
 <h4 data-number="2.4.4.1" class="anchored" data-anchor-id="a-slice-of-row-numbers"><span class="header-section-number">2.4.4.1</span> A slice of row numbers</h4>
 <p>Say we wanted the first four rows of our <code>elections</code> <code>DataFrame</code>.</p>
-<div id="875e42d5" class="cell" data-execution_count="45">
+<div id="2c582098" class="cell" data-execution_count="45">
 <div class="sourceCode cell-code" id="cb66"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb66-1"><a href="#cb66-1" aria-hidden="true" tabindex="-1"></a>elections[<span class="dv">0</span>:<span class="dv">4</span>]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="45">
 <div>
@@ -2097,7 +2103,7 @@ <h4 data-number="2.4.4.1" class="anchored" data-anchor-id="a-slice-of-row-number
 <section id="a-list-of-column-labels" class="level4" data-number="2.4.4.2">
 <h4 data-number="2.4.4.2" class="anchored" data-anchor-id="a-list-of-column-labels"><span class="header-section-number">2.4.4.2</span> A list of column labels</h4>
 <p>Suppose we now want the first four columns.</p>
-<div id="5c921b52" class="cell" data-execution_count="46">
+<div id="42d82102" class="cell" data-execution_count="46">
 <div class="sourceCode cell-code" id="cb67"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb67-1"><a href="#cb67-1" aria-hidden="true" tabindex="-1"></a>elections[[<span class="st">"Year"</span>, <span class="st">"Candidate"</span>, <span class="st">"Party"</span>, <span class="st">"Popular vote"</span>]]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="46">
 <div>
@@ -2202,7 +2208,7 @@ <h4 data-number="2.4.4.2" class="anchored" data-anchor-id="a-list-of-column-labe
 <section id="a-single-column-label" class="level4" data-number="2.4.4.3">
 <h4 data-number="2.4.4.3" class="anchored" data-anchor-id="a-single-column-label"><span class="header-section-number">2.4.4.3</span> A single-column label</h4>
 <p>Lastly, <code>[]</code> allows us to extract only the <code>"Candidate"</code> column.</p>
-<div id="e48a3933" class="cell" data-execution_count="47">
+<div id="db4f7655" class="cell" data-execution_count="47">
 <div class="sourceCode cell-code" id="cb68"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb68-1"><a href="#cb68-1" aria-hidden="true" tabindex="-1"></a>elections[<span class="st">"Candidate"</span>]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="47">
 <pre><code>0         Andrew Jackson
diff --git a/docs/pandas_2/pandas_2.html b/docs/pandas_2/pandas_2.html
index 4a70f92a8..c56461102 100644
--- a/docs/pandas_2/pandas_2.html
+++ b/docs/pandas_2/pandas_2.html
@@ -226,6 +226,12 @@
   <a href="../ols/ols.html" class="sidebar-item-text sidebar-link">
  <span class="menu-text"><span class="chapter-number">12</span>&nbsp; <span class="chapter-title">Ordinary Least Squares</span></span></a>
   </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../gradient_descent/gradient_descent.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">13</span>&nbsp; <span class="chapter-title">sklearn and Gradient Descent</span></span></a>
+  </div>
 </li>
     </ul>
     </div>
@@ -298,7 +304,7 @@ <h1 class="title"><span class="chapter-number">3</span>&nbsp; <span class="chapt
 <p>Last time, we introduced the <code>pandas</code> library as a toolkit for processing data. We learned the <code>DataFrame</code> and <code>Series</code> data structures, familiarized ourselves with the basic syntax for manipulating tabular data, and began writing our first lines of <code>pandas</code> code.</p>
 <p>In this lecture, we’ll start to dive into some advanced <code>pandas</code> syntax. You may find it helpful to follow along with a notebook of your own as we walk through these new pieces of code.</p>
 <p>We’ll start by loading the <code>babynames</code> dataset.</p>
-<div id="ca0c41da" class="cell" data-execution_count="1">
+<div id="ba29d8f1" class="cell" data-execution_count="1">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="co"># This code pulls census data and loads it into a DataFrame</span></span>
@@ -391,7 +397,7 @@ <h2 data-number="3.1" class="anchored" data-anchor-id="conditional-selection"><s
 <p>Conditional selection allows us to select a subset of rows in a <code>DataFrame</code> that satisfy some specified condition.</p>
 <p>To understand how to use conditional selection, we must look at another possible input of the <code>.loc</code> and <code>[]</code> methods – a boolean array, which is simply an array or <code>Series</code> where each element is either <code>True</code> or <code>False</code>. This boolean array must have a length equal to the number of rows in the <code>DataFrame</code>. It will return all rows that correspond to a value of <code>True</code> in the array. We used a very similar technique when performing conditional extraction from a <code>Series</code> in the last lecture.</p>
 <p>To see this in action, let’s select all even-indexed rows in the first 10 rows of our <code>DataFrame</code>.</p>
-<div id="9cf83ef9" class="cell" data-execution_count="2">
+<div id="6e0eef9f" class="cell" data-execution_count="2">
 <div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Ask yourself: why is :9 is the correct slice to select the first 10 rows?</span></span>
 <span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a>babynames_first_10_rows <span class="op">=</span> babynames.loc[:<span class="dv">9</span>, :]</span>
 <span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a></span>
@@ -460,7 +466,7 @@ <h2 data-number="3.1" class="anchored" data-anchor-id="conditional-selection"><s
 </div>
 </div>
 <p>We can perform a similar operation using <code>.loc</code>.</p>
-<div id="bb4b3718" class="cell" data-execution_count="3">
+<div id="c9a9c05b" class="cell" data-execution_count="3">
 <div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a>babynames_first_10_rows.loc[[<span class="va">True</span>, <span class="va">False</span>, <span class="va">True</span>, <span class="va">False</span>, <span class="va">True</span>, <span class="va">False</span>, <span class="va">True</span>, <span class="va">False</span>, <span class="va">True</span>, <span class="va">False</span>], :]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="3">
 <div>
@@ -526,7 +532,7 @@ <h2 data-number="3.1" class="anchored" data-anchor-id="conditional-selection"><s
 </div>
 <p>These techniques worked well in this example, but you can imagine how tedious it might be to list out <code>True</code> and <code>False</code>for every row in a larger <code>DataFrame</code>. To make things easier, we can instead provide a logical condition as an input to <code>.loc</code> or <code>[]</code> that returns a boolean array with the necessary length.</p>
 <p>For example, to return all names associated with <code>F</code> sex:</p>
-<div id="8ff1e930" class="cell" data-execution_count="4">
+<div id="0f33a690" class="cell" data-execution_count="4">
 <div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="co"># First, use a logical condition to generate a boolean array</span></span>
 <span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a>logical_operator <span class="op">=</span> (babynames[<span class="st">"Sex"</span>] <span class="op">==</span> <span class="st">"F"</span>)</span>
 <span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a></span>
@@ -596,7 +602,7 @@ <h2 data-number="3.1" class="anchored" data-anchor-id="conditional-selection"><s
 </div>
 <p>Recall from the previous lecture that <code>.head()</code> will return only the first few rows in the <code>DataFrame</code>. In reality, <code>babynames[logical operator]</code> contains as many rows as there are entries in the original <code>babynames</code> <code>DataFrame</code> with sex <code>"F"</code>.</p>
 <p>Here, <code>logical_operator</code> evaluates to a <code>Series</code> of boolean values with length 407428.</p>
-<div id="961bf8af" class="cell" data-execution_count="5">
+<div id="a8e9f7de" class="cell" data-execution_count="5">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="st">"There are a total of </span><span class="sc">{}</span><span class="st"> values in 'logical_operator'"</span>.<span class="bu">format</span>(<span class="bu">len</span>(logical_operator)))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -606,7 +612,7 @@ <h2 data-number="3.1" class="anchored" data-anchor-id="conditional-selection"><s
 </div>
 </div>
 <p>Rows starting at row 0 and ending at row 239536 evaluate to <code>True</code> and are thus returned in the <code>DataFrame</code>. Rows from 239537 onwards evaluate to <code>False</code> and are omitted from the output.</p>
-<div id="667b3a65" class="cell" data-execution_count="6">
+<div id="268dc255" class="cell" data-execution_count="6">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="st">"The 0th item in this 'logical_operator' is: </span><span class="sc">{}</span><span class="st">"</span>.<span class="bu">format</span>(logical_operator.iloc[<span class="dv">0</span>]))</span>
@@ -621,7 +627,7 @@ <h2 data-number="3.1" class="anchored" data-anchor-id="conditional-selection"><s
 </div>
 <p>Passing a <code>Series</code> as an argument to <code>babynames[]</code> has the same effect as using a boolean array. In fact, the <code>[]</code> selection operator can take a boolean <code>Series</code>, array, and list as arguments. These three are used interchangeably throughout the course.</p>
 <p>We can also use <code>.loc</code> to achieve similar results.</p>
-<div id="1e528788" class="cell" data-execution_count="7">
+<div id="11662509" class="cell" data-execution_count="7">
 <div class="sourceCode cell-code" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a>babynames.loc[babynames[<span class="st">"Sex"</span>] <span class="op">==</span> <span class="st">"F"</span>].head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="7">
 <div>
@@ -719,7 +725,7 @@ <h2 data-number="3.1" class="anchored" data-anchor-id="conditional-selection"><s
 </table>
 <p>When combining multiple conditions with logical operators, we surround each individual condition with a set of parenthesis <code>()</code>. This imposes an order of operations on <code>pandas</code> evaluating your logic and can avoid code erroring.</p>
 <p>For example, if we want to return data on all names with sex <code>"F"</code> born before the year 2000, we can write:</p>
-<div id="4ac241e1" class="cell" data-execution_count="8">
+<div id="feb801ed" class="cell" data-execution_count="8">
 <div class="sourceCode cell-code" id="cb10"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a>babynames[(babynames[<span class="st">"Sex"</span>] <span class="op">==</span> <span class="st">"F"</span>) <span class="op">&amp;</span> (babynames[<span class="st">"Year"</span>] <span class="op">&lt;</span> <span class="dv">2000</span>)].head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="8">
 <div>
@@ -784,12 +790,12 @@ <h2 data-number="3.1" class="anchored" data-anchor-id="conditional-selection"><s
 </div>
 </div>
 <p>Note that we’re working with <code>Series</code>, so using <code>and</code> in place of <code>&amp;</code>, or <code>or</code> in place <code>|</code> will error.</p>
-<div id="ac9af376" class="cell" data-execution_count="9">
+<div id="d797fb2f" class="cell" data-execution_count="9">
 <div class="sourceCode cell-code" id="cb11"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a><span class="co"># This line of code will raise a ValueError</span></span>
 <span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a><span class="co"># babynames[(babynames["Sex"] == "F") and (babynames["Year"] &lt; 2000)].head()</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
 <p>If we want to return data on all names with sex <code>"F"</code> <em>or</em> all born before the year 2000, we can write:</p>
-<div id="54475097" class="cell" data-execution_count="10">
+<div id="e1800dbf" class="cell" data-execution_count="10">
 <div class="sourceCode cell-code" id="cb12"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a>babynames[(babynames[<span class="st">"Sex"</span>] <span class="op">==</span> <span class="st">"F"</span>) <span class="op">|</span> (babynames[<span class="st">"Year"</span>] <span class="op">&lt;</span> <span class="dv">2000</span>)].head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="10">
 <div>
@@ -854,7 +860,7 @@ <h2 data-number="3.1" class="anchored" data-anchor-id="conditional-selection"><s
 </div>
 </div>
 <p>Boolean array selection is a useful tool, but can lead to overly verbose code for complex conditions. In the example below, our boolean condition is long enough to extend for several lines of code.</p>
-<div id="615b90da" class="cell" data-execution_count="11">
+<div id="d0720b4d" class="cell" data-execution_count="11">
 <div class="sourceCode cell-code" id="cb13"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Note: The parentheses surrounding the code make it possible to break the code on to multiple lines for readability</span></span>
 <span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a>(</span>
 <span id="cb13-3"><a href="#cb13-3" aria-hidden="true" tabindex="-1"></a>    babynames[(babynames[<span class="st">"Name"</span>] <span class="op">==</span> <span class="st">"Bella"</span>) <span class="op">|</span> </span>
@@ -926,7 +932,7 @@ <h2 data-number="3.1" class="anchored" data-anchor-id="conditional-selection"><s
 </div>
 <p>Fortunately, <code>pandas</code> provides many alternative methods for constructing boolean filters.</p>
 <p>The <code>.isin</code> function is one such example. This method evaluates if the values in a <code>Series</code> are contained in a different sequence (list, array, or <code>Series</code>) of values. In the cell below, we achieve equivalent results to the <code>DataFrame</code> above with far more concise code.</p>
-<div id="e847fbfb" class="cell" data-execution_count="12">
+<div id="8650e128" class="cell" data-execution_count="12">
 <div class="sourceCode cell-code" id="cb14"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a>names <span class="op">=</span> [<span class="st">"Bella"</span>, <span class="st">"Alex"</span>, <span class="st">"Narges"</span>, <span class="st">"Lisa"</span>]</span>
 <span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a>babynames[<span class="st">"Name"</span>].isin(names).head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="12">
@@ -938,7 +944,7 @@ <h2 data-number="3.1" class="anchored" data-anchor-id="conditional-selection"><s
 Name: Name, dtype: bool</code></pre>
 </div>
 </div>
-<div id="4166ae08" class="cell" data-execution_count="13">
+<div id="5afcfe1e" class="cell" data-execution_count="13">
 <div class="sourceCode cell-code" id="cb16"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a>babynames[babynames[<span class="st">"Name"</span>].isin(names)].head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="13">
 <div>
@@ -1003,7 +1009,7 @@ <h2 data-number="3.1" class="anchored" data-anchor-id="conditional-selection"><s
 </div>
 </div>
 <p>The function <code>str.startswith</code> can be used to define a filter based on string values in a <code>Series</code> object. It checks to see if string values in a <code>Series</code> start with a particular character.</p>
-<div id="9f119590" class="cell" data-execution_count="14">
+<div id="87083483" class="cell" data-execution_count="14">
 <div class="sourceCode cell-code" id="cb17"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Identify whether names begin with the letter "N"</span></span>
 <span id="cb17-2"><a href="#cb17-2" aria-hidden="true" tabindex="-1"></a>babynames[<span class="st">"Name"</span>].<span class="bu">str</span>.startswith(<span class="st">"N"</span>).head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="14">
@@ -1015,7 +1021,7 @@ <h2 data-number="3.1" class="anchored" data-anchor-id="conditional-selection"><s
 Name: Name, dtype: bool</code></pre>
 </div>
 </div>
-<div id="49b1a75f" class="cell" data-execution_count="15">
+<div id="fc98fb50" class="cell" data-execution_count="15">
 <div class="sourceCode cell-code" id="cb19"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Extracting names that begin with the letter "N"</span></span>
 <span id="cb19-2"><a href="#cb19-2" aria-hidden="true" tabindex="-1"></a>babynames[babynames[<span class="st">"Name"</span>].<span class="bu">str</span>.startswith(<span class="st">"N"</span>)].head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="15">
@@ -1085,7 +1091,7 @@ <h2 data-number="3.1" class="anchored" data-anchor-id="conditional-selection"><s
 <h2 data-number="3.2" class="anchored" data-anchor-id="adding-removing-and-modifying-columns"><span class="header-section-number">3.2</span> Adding, Removing, and Modifying Columns</h2>
 <p>In many data science tasks, we may need to change the columns contained in our <code>DataFrame</code> in some way. Fortunately, the syntax to do so is fairly straightforward.</p>
 <p>To add a new column to a <code>DataFrame</code>, we use a syntax similar to that used when accessing an existing column. Specify the name of the new column by writing <code>df["column"]</code>, then assign this to a <code>Series</code> or array containing the values that will populate this column.</p>
-<div id="704af9e1" class="cell" data-execution_count="16">
+<div id="f1e2c4e3" class="cell" data-execution_count="16">
 <div class="sourceCode cell-code" id="cb20"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Create a Series of the length of each name. </span></span>
 <span id="cb20-2"><a href="#cb20-2" aria-hidden="true" tabindex="-1"></a>babyname_lengths <span class="op">=</span> babynames[<span class="st">"Name"</span>].<span class="bu">str</span>.<span class="bu">len</span>()</span>
 <span id="cb20-3"><a href="#cb20-3" aria-hidden="true" tabindex="-1"></a></span>
@@ -1161,7 +1167,7 @@ <h2 data-number="3.2" class="anchored" data-anchor-id="adding-removing-and-modif
 </div>
 </div>
 <p>If we need to later modify an existing column, we can do so by referencing this column again with the syntax <code>df["column"]</code>, then re-assigning it to a new <code>Series</code> or array of the appropriate length.</p>
-<div id="0d14a92f" class="cell" data-execution_count="17">
+<div id="c46ba022" class="cell" data-execution_count="17">
 <div class="sourceCode cell-code" id="cb21"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Modify the “name_lengths” column to be one less than its original value</span></span>
 <span id="cb21-2"><a href="#cb21-2" aria-hidden="true" tabindex="-1"></a>babynames[<span class="st">"name_lengths"</span>] <span class="op">=</span> babynames[<span class="st">"name_lengths"</span>] <span class="op">-</span> <span class="dv">1</span></span>
 <span id="cb21-3"><a href="#cb21-3" aria-hidden="true" tabindex="-1"></a>babynames.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -1234,7 +1240,7 @@ <h2 data-number="3.2" class="anchored" data-anchor-id="adding-removing-and-modif
 </div>
 </div>
 <p>We can rename a column using the <code>.rename()</code> method. It takes in a dictionary that maps old column names to their new ones.</p>
-<div id="b1ad950a" class="cell" data-execution_count="18">
+<div id="6cc3e23d" class="cell" data-execution_count="18">
 <div class="sourceCode cell-code" id="cb22"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Rename “name_lengths” to “Length”</span></span>
 <span id="cb22-2"><a href="#cb22-2" aria-hidden="true" tabindex="-1"></a>babynames <span class="op">=</span> babynames.rename(columns<span class="op">=</span>{<span class="st">"name_lengths"</span>:<span class="st">"Length"</span>})</span>
 <span id="cb22-3"><a href="#cb22-3" aria-hidden="true" tabindex="-1"></a>babynames.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -1307,7 +1313,7 @@ <h2 data-number="3.2" class="anchored" data-anchor-id="adding-removing-and-modif
 </div>
 </div>
 <p>If we want to remove a column or row of a <code>DataFrame</code>, we can call the <code>.drop</code> <a href="https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.drop.html">(documentation)</a> method. Use the <code>axis</code> parameter to specify whether a column or row should be dropped. Unless otherwise specified, <code>pandas</code> will assume that we are dropping a row by default.</p>
-<div id="a92ee1e9" class="cell" data-execution_count="19">
+<div id="93fbb60b" class="cell" data-execution_count="19">
 <div class="sourceCode cell-code" id="cb23"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Drop our new "Length" column from the DataFrame</span></span>
 <span id="cb23-2"><a href="#cb23-2" aria-hidden="true" tabindex="-1"></a>babynames <span class="op">=</span> babynames.drop(<span class="st">"Length"</span>, axis<span class="op">=</span><span class="st">"columns"</span>)</span>
 <span id="cb23-3"><a href="#cb23-3" aria-hidden="true" tabindex="-1"></a>babynames.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -1375,7 +1381,7 @@ <h2 data-number="3.2" class="anchored" data-anchor-id="adding-removing-and-modif
 </div>
 <p>Notice that we <em>re-assigned</em> <code>babynames</code> to the result of <code>babynames.drop(...)</code>. This is a subtle but important point: <code>pandas</code> table operations <strong>do not occur in-place</strong>. Calling <code>df.drop(...)</code> will output a <em>copy</em> of <code>df</code> with the row/column of interest removed without modifying the original <code>df</code> table.</p>
 <p>In other words, if we simply call:</p>
-<div id="23405ea0" class="cell" data-execution_count="20">
+<div id="2e283f2c" class="cell" data-execution_count="20">
 <div class="sourceCode cell-code" id="cb24"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a><span class="co"># This creates a copy of `babynames` and removes the column "Name"...</span></span>
 <span id="cb24-2"><a href="#cb24-2" aria-hidden="true" tabindex="-1"></a>babynames.drop(<span class="st">"Name"</span>, axis<span class="op">=</span><span class="st">"columns"</span>)</span>
 <span id="cb24-3"><a href="#cb24-3" aria-hidden="true" tabindex="-1"></a></span>
@@ -1463,7 +1469,7 @@ <h2 data-number="3.3" class="anchored" data-anchor-id="useful-utility-functions"
 <section id="numpy" class="level3" data-number="3.3.1">
 <h3 data-number="3.3.1" class="anchored" data-anchor-id="numpy"><span class="header-section-number">3.3.1</span> <code>NumPy</code></h3>
 <p><code>pandas</code> is designed to work well with <code>NumPy</code>, the framework for array computations you encountered in <a href="https://www.data8.org/su23/reference/#array-functions-and-methods">Data 8</a>. Just about any <code>NumPy</code> function can be applied to <code>pandas</code> <code>DataFrame</code>s and <code>Series</code>.</p>
-<div id="9a5f8599" class="cell" data-execution_count="21">
+<div id="4d67b242" class="cell" data-execution_count="21">
 <div class="sourceCode cell-code" id="cb25"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb25-1"><a href="#cb25-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Pull out the number of babies named Yash each year</span></span>
 <span id="cb25-2"><a href="#cb25-2" aria-hidden="true" tabindex="-1"></a>yash_count <span class="op">=</span> babynames[babynames[<span class="st">"Name"</span>] <span class="op">==</span> <span class="st">"Yash"</span>][<span class="st">"Count"</span>]</span>
 <span id="cb25-3"><a href="#cb25-3" aria-hidden="true" tabindex="-1"></a>yash_count.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -1476,14 +1482,14 @@ <h3 data-number="3.3.1" class="anchored" data-anchor-id="numpy"><span class="hea
 Name: Count, dtype: int64</code></pre>
 </div>
 </div>
-<div id="92d37c05" class="cell" data-execution_count="22">
+<div id="dfdec356" class="cell" data-execution_count="22">
 <div class="sourceCode cell-code" id="cb27"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb27-1"><a href="#cb27-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Average number of babies named Yash each year</span></span>
 <span id="cb27-2"><a href="#cb27-2" aria-hidden="true" tabindex="-1"></a>np.mean(yash_count)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="22">
 <pre><code>np.float64(17.142857142857142)</code></pre>
 </div>
 </div>
-<div id="92eeceeb" class="cell" data-execution_count="23">
+<div id="55c7aa26" class="cell" data-execution_count="23">
 <div class="sourceCode cell-code" id="cb29"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb29-1"><a href="#cb29-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Max number of babies named Yash born in any one year</span></span>
 <span id="cb29-2"><a href="#cb29-2" aria-hidden="true" tabindex="-1"></a>np.<span class="bu">max</span>(yash_count)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="23">
@@ -1495,14 +1501,14 @@ <h3 data-number="3.3.1" class="anchored" data-anchor-id="numpy"><span class="hea
 <h3 data-number="3.3.2" class="anchored" data-anchor-id="shape-and-.size"><span class="header-section-number">3.3.2</span> <code>.shape</code> and <code>.size</code></h3>
 <p><code>.shape</code> and <code>.size</code> are attributes of <code>Series</code> and <code>DataFrame</code>s that measure the “amount” of data stored in the structure. Calling <code>.shape</code> returns a tuple containing the number of rows and columns present in the <code>DataFrame</code> or <code>Series</code>. <code>.size</code> is used to find the total number of elements in a structure, equivalent to the number of rows times the number of columns.</p>
 <p>Many functions strictly require the dimensions of the arguments along certain axes to match. Calling these dimension-finding functions is much faster than counting all of the items by hand.</p>
-<div id="f4ebb99c" class="cell" data-execution_count="24">
+<div id="d0e3f902" class="cell" data-execution_count="24">
 <div class="sourceCode cell-code" id="cb31"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb31-1"><a href="#cb31-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Return the shape of the DataFrame, in the format (num_rows, num_columns)</span></span>
 <span id="cb31-2"><a href="#cb31-2" aria-hidden="true" tabindex="-1"></a>babynames.shape</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="24">
 <pre><code>(407428, 5)</code></pre>
 </div>
 </div>
-<div id="cd2e8cd7" class="cell" data-execution_count="25">
+<div id="f23fc739" class="cell" data-execution_count="25">
 <div class="sourceCode cell-code" id="cb33"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb33-1"><a href="#cb33-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Return the size of the DataFrame, equal to num_rows * num_columns</span></span>
 <span id="cb33-2"><a href="#cb33-2" aria-hidden="true" tabindex="-1"></a>babynames.size</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="25">
@@ -1513,7 +1519,7 @@ <h3 data-number="3.3.2" class="anchored" data-anchor-id="shape-and-.size"><span
 <section id="describe" class="level3" data-number="3.3.3">
 <h3 data-number="3.3.3" class="anchored" data-anchor-id="describe"><span class="header-section-number">3.3.3</span> <code>.describe()</code></h3>
 <p>If many statistics are required from a <code>DataFrame</code> (minimum value, maximum value, mean value, etc.), then <code>.describe()</code> <a href="https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.describe.html">(documentation)</a> can be used to compute all of them at once.</p>
-<div id="6994d2b6" class="cell" data-execution_count="26">
+<div id="eda581f0" class="cell" data-execution_count="26">
 <div class="sourceCode cell-code" id="cb35"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb35-1"><a href="#cb35-1" aria-hidden="true" tabindex="-1"></a>babynames.describe()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="26">
 <div>
@@ -1575,7 +1581,7 @@ <h3 data-number="3.3.3" class="anchored" data-anchor-id="describe"><span class="
 </div>
 </div>
 <p>A different set of statistics will be reported if <code>.describe()</code> is called on a <code>Series</code>.</p>
-<div id="28e23d5d" class="cell" data-execution_count="27">
+<div id="8a5d67e2" class="cell" data-execution_count="27">
 <div class="sourceCode cell-code" id="cb36"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb36-1"><a href="#cb36-1" aria-hidden="true" tabindex="-1"></a>babynames[<span class="st">"Sex"</span>].describe()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="27">
 <pre><code>count     407428
@@ -1590,7 +1596,7 @@ <h3 data-number="3.3.3" class="anchored" data-anchor-id="describe"><span class="
 <h3 data-number="3.3.4" class="anchored" data-anchor-id="sample"><span class="header-section-number">3.3.4</span> <code>.sample()</code></h3>
 <p>As we will see later in the semester, random processes are at the heart of many data science techniques (for example, train-test splits, bootstrapping, and cross-validation). <code>.sample()</code> <a href="https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.sample.html">(documentation)</a> lets us quickly select random entries (a row if called from a <code>DataFrame</code>, or a value if called from a <code>Series</code>).</p>
 <p>By default, <code>.sample()</code> selects entries <em>without</em> replacement. Pass in the argument <code>replace=True</code> to sample with replacement.</p>
-<div id="9f0a184d" class="cell" data-execution_count="28">
+<div id="397b7ea3" class="cell" data-execution_count="28">
 <div class="sourceCode cell-code" id="cb38"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb38-1"><a href="#cb38-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Sample a single row</span></span>
 <span id="cb38-2"><a href="#cb38-2" aria-hidden="true" tabindex="-1"></a>babynames.sample()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="28">
@@ -1610,12 +1616,12 @@ <h3 data-number="3.3.4" class="anchored" data-anchor-id="sample"><span class="he
 </thead>
 <tbody>
 <tr class="odd">
-<td data-quarto-table-cell-role="th">232915</td>
+<td data-quarto-table-cell-role="th">89604</td>
 <td>CA</td>
 <td>F</td>
-<td>2021</td>
-<td>Kora</td>
-<td>44</td>
+<td>1982</td>
+<td>Shara</td>
+<td>17</td>
 </tr>
 </tbody>
 </table>
@@ -1624,7 +1630,7 @@ <h3 data-number="3.3.4" class="anchored" data-anchor-id="sample"><span class="he
 </div>
 </div>
 <p>Naturally, this can be chained with other methods and operators (<code>iloc</code>, etc.).</p>
-<div id="eaf43d72" class="cell" data-execution_count="29">
+<div id="d23dc49b" class="cell" data-execution_count="29">
 <div class="sourceCode cell-code" id="cb39"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb39-1"><a href="#cb39-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Sample 5 random rows, and select all columns after column 2</span></span>
 <span id="cb39-2"><a href="#cb39-2" aria-hidden="true" tabindex="-1"></a>babynames.sample(<span class="dv">5</span>).iloc[:, <span class="dv">2</span>:]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="29">
@@ -1642,34 +1648,34 @@ <h3 data-number="3.3.4" class="anchored" data-anchor-id="sample"><span class="he
 </thead>
 <tbody>
 <tr class="odd">
-<td data-quarto-table-cell-role="th">181153</td>
-<td>2008</td>
-<td>Leila</td>
-<td>302</td>
+<td data-quarto-table-cell-role="th">395124</td>
+<td>2018</td>
+<td>Lucius</td>
+<td>8</td>
 </tr>
 <tr class="even">
-<td data-quarto-table-cell-role="th">224157</td>
-<td>2018</td>
-<td>Deema</td>
-<td>6</td>
+<td data-quarto-table-cell-role="th">298347</td>
+<td>1980</td>
+<td>Hassan</td>
+<td>15</td>
 </tr>
 <tr class="odd">
-<td data-quarto-table-cell-role="th">322241</td>
-<td>1991</td>
-<td>Jayshawn</td>
+<td data-quarto-table-cell-role="th">349813</td>
+<td>2002</td>
+<td>Farid</td>
 <td>5</td>
 </tr>
 <tr class="even">
-<td data-quarto-table-cell-role="th">355722</td>
-<td>2005</td>
-<td>Roger</td>
-<td>99</td>
+<td data-quarto-table-cell-role="th">281344</td>
+<td>1968</td>
+<td>Timmothy</td>
+<td>12</td>
 </tr>
 <tr class="odd">
-<td data-quarto-table-cell-role="th">182725</td>
-<td>2008</td>
-<td>Marielle</td>
-<td>16</td>
+<td data-quarto-table-cell-role="th">98005</td>
+<td>1985</td>
+<td>Shoshana</td>
+<td>15</td>
 </tr>
 </tbody>
 </table>
@@ -1677,7 +1683,7 @@ <h3 data-number="3.3.4" class="anchored" data-anchor-id="sample"><span class="he
 </div>
 </div>
 </div>
-<div id="073fcbe9" class="cell" data-execution_count="30">
+<div id="66b5c84f" class="cell" data-execution_count="30">
 <div class="sourceCode cell-code" id="cb40"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb40-1"><a href="#cb40-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Randomly sample 4 names from the year 2000, with replacement, and select all columns after column 2</span></span>
 <span id="cb40-2"><a href="#cb40-2" aria-hidden="true" tabindex="-1"></a>babynames[babynames[<span class="st">"Year"</span>] <span class="op">==</span> <span class="dv">2000</span>].sample(<span class="dv">4</span>, replace <span class="op">=</span> <span class="va">True</span>).iloc[:, <span class="dv">2</span>:]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="30">
@@ -1695,28 +1701,28 @@ <h3 data-number="3.3.4" class="anchored" data-anchor-id="sample"><span class="he
 </thead>
 <tbody>
 <tr class="odd">
-<td data-quarto-table-cell-role="th">152643</td>
+<td data-quarto-table-cell-role="th">151266</td>
 <td>2000</td>
-<td>Mazie</td>
-<td>5</td>
+<td>Chyenne</td>
+<td>9</td>
 </tr>
 <tr class="even">
-<td data-quarto-table-cell-role="th">150164</td>
+<td data-quarto-table-cell-role="th">151767</td>
 <td>2000</td>
-<td>Arriana</td>
-<td>25</td>
+<td>Jizelle</td>
+<td>7</td>
 </tr>
 <tr class="odd">
-<td data-quarto-table-cell-role="th">343764</td>
+<td data-quarto-table-cell-role="th">149793</td>
 <td>2000</td>
-<td>Myron</td>
-<td>11</td>
+<td>Alia</td>
+<td>45</td>
 </tr>
 <tr class="even">
-<td data-quarto-table-cell-role="th">150136</td>
+<td data-quarto-table-cell-role="th">151519</td>
 <td>2000</td>
-<td>Brielle</td>
-<td>26</td>
+<td>Jeanine</td>
+<td>8</td>
 </tr>
 </tbody>
 </table>
@@ -1729,7 +1735,7 @@ <h3 data-number="3.3.4" class="anchored" data-anchor-id="sample"><span class="he
 <h3 data-number="3.3.5" class="anchored" data-anchor-id="value_counts"><span class="header-section-number">3.3.5</span> <code>.value_counts()</code></h3>
 <p>The <code>Series.value_counts()</code> <a href="https://pandas.pydata.org/docs/reference/api/pandas.Series.value_counts.html">(documentation)</a> method counts the number of occurrence of each unique value in a <code>Series</code>. In other words, it <em>counts</em> the number of times each unique <em>value</em> appears. This is often useful for determining the most or least common entries in a <code>Series</code>.</p>
 <p>In the example below, we can determine the name with the most years in which at least one person has taken that name by counting the number of times each name appears in the <code>"Name"</code> column of <code>babynames</code>. Note that the return value is also a <code>Series</code>.</p>
-<div id="8368c5b2" class="cell" data-execution_count="31">
+<div id="5dd20db6" class="cell" data-execution_count="31">
 <div class="sourceCode cell-code" id="cb41"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb41-1"><a href="#cb41-1" aria-hidden="true" tabindex="-1"></a>babynames[<span class="st">"Name"</span>].value_counts().head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="31">
 <pre><code>Name
@@ -1745,7 +1751,7 @@ <h3 data-number="3.3.5" class="anchored" data-anchor-id="value_counts"><span cla
 <section id="unique" class="level3" data-number="3.3.6">
 <h3 data-number="3.3.6" class="anchored" data-anchor-id="unique"><span class="header-section-number">3.3.6</span> <code>.unique()</code></h3>
 <p>If we have a <code>Series</code> with many repeated values, then <code>.unique()</code> <a href="https://pandas.pydata.org/docs/reference/api/pandas.unique.html">(documentation)</a> can be used to identify only the <em>unique</em> values. Here we return an array of all the names in <code>babynames</code>.</p>
-<div id="77ed8b57" class="cell" data-execution_count="32">
+<div id="78140973" class="cell" data-execution_count="32">
 <div class="sourceCode cell-code" id="cb43"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb43-1"><a href="#cb43-1" aria-hidden="true" tabindex="-1"></a>babynames[<span class="st">"Name"</span>].unique()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="32">
 <pre><code>array(['Mary', 'Helen', 'Dorothy', ..., 'Zae', 'Zai', 'Zayvier'],
@@ -1756,7 +1762,7 @@ <h3 data-number="3.3.6" class="anchored" data-anchor-id="unique"><span class="he
 <section id="sort_values" class="level3" data-number="3.3.7">
 <h3 data-number="3.3.7" class="anchored" data-anchor-id="sort_values"><span class="header-section-number">3.3.7</span> <code>.sort_values()</code></h3>
 <p>Ordering a <code>DataFrame</code> can be useful for isolating extreme values. For example, the first 5 entries of a row sorted in descending order (that is, from highest to lowest) are the largest 5 values. <code>.sort_values</code> <a href="https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.sort_values.html">(documentation)</a> allows us to order a <code>DataFrame</code> or <code>Series</code> by a specified column. We can choose to either receive the rows in <code>ascending</code> order (default) or <code>descending</code> order.</p>
-<div id="1676321a" class="cell" data-execution_count="33">
+<div id="be31c5f5" class="cell" data-execution_count="33">
 <div class="sourceCode cell-code" id="cb45"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb45-1"><a href="#cb45-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Sort the "Count" column from highest to lowest</span></span>
 <span id="cb45-2"><a href="#cb45-2" aria-hidden="true" tabindex="-1"></a>babynames.sort_values(by<span class="op">=</span><span class="st">"Count"</span>, ascending<span class="op">=</span><span class="va">False</span>).head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="33">
@@ -1822,7 +1828,7 @@ <h3 data-number="3.3.7" class="anchored" data-anchor-id="sort_values"><span clas
 </div>
 </div>
 <p>Unlike when calling <code>.value_counts()</code> on a <code>DataFrame</code>, we do not need to explicitly specify the column used for sorting when calling <code>.value_counts()</code> on a <code>Series</code>. We can still specify the ordering paradigm – that is, whether values are sorted in ascending or descending order.</p>
-<div id="f3e2f820" class="cell" data-execution_count="34">
+<div id="d204fd17" class="cell" data-execution_count="34">
 <div class="sourceCode cell-code" id="cb46"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb46-1"><a href="#cb46-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Sort the "Name" Series alphabetically</span></span>
 <span id="cb46-2"><a href="#cb46-2" aria-hidden="true" tabindex="-1"></a>babynames[<span class="st">"Name"</span>].sort_values(ascending<span class="op">=</span><span class="va">True</span>).head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="34">
diff --git a/docs/pandas_3/pandas_3.html b/docs/pandas_3/pandas_3.html
index d14c492c6..e2cc5890e 100644
--- a/docs/pandas_3/pandas_3.html
+++ b/docs/pandas_3/pandas_3.html
@@ -242,6 +242,12 @@
   <a href="../ols/ols.html" class="sidebar-item-text sidebar-link">
  <span class="menu-text"><span class="chapter-number">12</span>&nbsp; <span class="chapter-title">Ordinary Least Squares</span></span></a>
   </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../gradient_descent/gradient_descent.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">13</span>&nbsp; <span class="chapter-title">sklearn and Gradient Descent</span></span></a>
+  </div>
 </li>
     </ul>
     </div>
@@ -329,7 +335,7 @@ <h2 id="toc-title">Pandas III</h2>
 <h2 data-number="4.1" class="anchored" data-anchor-id="custom-sorts"><span class="header-section-number">4.1</span> Custom Sorts</h2>
 <p>First, let’s finish our discussion about sorting. Let’s try to solve a sorting problem using different approaches. Assume we want to find the longest baby names and sort our data accordingly.</p>
 <p>We’ll start by loading the <code>babynames</code> dataset. Note that this dataset is filtered to only contain data from California.</p>
-<div id="e6df92f8" class="cell" data-execution_count="1">
+<div id="a8e6e107" class="cell" data-execution_count="1">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="co"># This code pulls census data and loads it into a DataFrame</span></span>
@@ -460,7 +466,7 @@ <h2 data-number="4.1" class="anchored" data-anchor-id="custom-sorts"><span class
 <section id="approach-1-create-a-temporary-column" class="level3" data-number="4.1.1">
 <h3 data-number="4.1.1" class="anchored" data-anchor-id="approach-1-create-a-temporary-column"><span class="header-section-number">4.1.1</span> Approach 1: Create a Temporary Column</h3>
 <p>One method to do this is to first start by creating a column that contains the lengths of the names.</p>
-<div id="fd5722fa" class="cell" data-execution_count="2">
+<div id="d383c5db" class="cell" data-execution_count="2">
 <div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Create a Series of the length of each name</span></span>
 <span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a>babyname_lengths <span class="op">=</span> babynames[<span class="st">"Name"</span>].<span class="bu">str</span>.<span class="bu">len</span>()</span>
 <span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a></span>
@@ -536,7 +542,7 @@ <h3 data-number="4.1.1" class="anchored" data-anchor-id="approach-1-create-a-tem
 </div>
 </div>
 <p>We can then sort the <code>DataFrame</code> by that column using <code>.sort_values()</code>:</p>
-<div id="138d13f9" class="cell" data-execution_count="3">
+<div id="fae30f5d" class="cell" data-execution_count="3">
 <div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Sort by the temporary column</span></span>
 <span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a>babynames <span class="op">=</span> babynames.sort_values(by<span class="op">=</span><span class="st">"name_lengths"</span>, ascending<span class="op">=</span><span class="va">False</span>)</span>
 <span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a>babynames.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -609,7 +615,7 @@ <h3 data-number="4.1.1" class="anchored" data-anchor-id="approach-1-create-a-tem
 </div>
 </div>
 <p>Finally, we can drop the <code>name_length</code> column from <code>babynames</code> to prevent our table from getting cluttered.</p>
-<div id="3516f056" class="cell" data-execution_count="4">
+<div id="aa78c6cc" class="cell" data-execution_count="4">
 <div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Drop the 'name_length' column</span></span>
 <span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a>babynames <span class="op">=</span> babynames.drop(<span class="st">"name_lengths"</span>, axis<span class="op">=</span><span class="st">'columns'</span>)</span>
 <span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a>babynames.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -679,7 +685,7 @@ <h3 data-number="4.1.1" class="anchored" data-anchor-id="approach-1-create-a-tem
 <section id="approach-2-sorting-using-the-key-argument" class="level3" data-number="4.1.2">
 <h3 data-number="4.1.2" class="anchored" data-anchor-id="approach-2-sorting-using-the-key-argument"><span class="header-section-number">4.1.2</span> Approach 2: Sorting using the <code>key</code> Argument</h3>
 <p>Another way to approach this is to use the <code>key</code> argument of <code>.sort_values()</code>. Here we can specify that we want to sort <code>"Name"</code> values by their length.</p>
-<div id="3a217a6b" class="cell" data-execution_count="5">
+<div id="9dd10482" class="cell" data-execution_count="5">
 <div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a>babynames.sort_values(<span class="st">"Name"</span>, key<span class="op">=</span><span class="kw">lambda</span> x: x.<span class="bu">str</span>.<span class="bu">len</span>(), ascending<span class="op">=</span><span class="va">False</span>).head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="5">
 <div>
@@ -747,7 +753,7 @@ <h3 data-number="4.1.2" class="anchored" data-anchor-id="approach-2-sorting-usin
 <section id="approach-3-sorting-using-the-map-function" class="level3" data-number="4.1.3">
 <h3 data-number="4.1.3" class="anchored" data-anchor-id="approach-3-sorting-using-the-map-function"><span class="header-section-number">4.1.3</span> Approach 3: Sorting using the <code>map</code> Function</h3>
 <p>We can also use the <code>map</code> function on a <code>Series</code> to solve this. Say we want to sort the <code>babynames</code> table by the number of <code>"dr"</code>’s and <code>"ea"</code>’s in each <code>"Name"</code>. We’ll define the function <code>dr_ea_count</code> to help us out.</p>
-<div id="9e94dcd0" class="cell" data-execution_count="6">
+<div id="41d56e83" class="cell" data-execution_count="6">
 <div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="co"># First, define a function to count the number of times "dr" or "ea" appear in each name</span></span>
 <span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> dr_ea_count(string):</span>
 <span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> string.count(<span class="st">'dr'</span>) <span class="op">+</span> string.count(<span class="st">'ea'</span>)</span>
@@ -827,7 +833,7 @@ <h3 data-number="4.1.3" class="anchored" data-anchor-id="approach-3-sorting-usin
 </div>
 </div>
 <p>We can drop the <code>dr_ea_count</code> once we’re done using it to maintain a neat table.</p>
-<div id="883aed2e" class="cell" data-execution_count="7">
+<div id="6722051b" class="cell" data-execution_count="7">
 <div class="sourceCode cell-code" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Drop the `dr_ea_count` column</span></span>
 <span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a>babynames <span class="op">=</span> babynames.drop(<span class="st">"dr_ea_count"</span>, axis <span class="op">=</span> <span class="st">'columns'</span>)</span>
 <span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a>babynames.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -899,10 +905,10 @@ <h3 data-number="4.1.3" class="anchored" data-anchor-id="approach-3-sorting-usin
 <h2 data-number="4.2" class="anchored" data-anchor-id="aggregating-data-with-.groupby"><span class="header-section-number">4.2</span> Aggregating Data with <code>.groupby</code></h2>
 <p>Up until this point, we have been working with individual rows of <code>DataFrame</code>s. As data scientists, we often wish to investigate trends across a larger <em>subset</em> of our data. For example, we may want to compute some summary statistic (the mean, median, sum, etc.) for a group of rows in our <code>DataFrame</code>. To do this, we’ll use <code>pandas</code> <code>GroupBy</code> objects. Our goal is to group together rows that fall under the same category and perform an operation that aggregates across all rows in the category.</p>
 <p>Let’s say we wanted to aggregate all rows in <code>babynames</code> for a given year.</p>
-<div id="8d636858" class="cell" data-execution_count="8">
+<div id="c43e6da8" class="cell" data-execution_count="8">
 <div class="sourceCode cell-code" id="cb8"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a>babynames.groupby(<span class="st">"Year"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="8">
-<pre><code>&lt;pandas.core.groupby.generic.DataFrameGroupBy object at 0x1100a06b0&gt;</code></pre>
+<pre><code>&lt;pandas.core.groupby.generic.DataFrameGroupBy object at 0x10f5ad9d0&gt;</code></pre>
 </div>
 </div>
 <p>What does this strange output mean? Calling <code>.groupby</code> <a href="https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.groupby.html">(documentation)</a> has generated a <code>GroupBy</code> object. You can imagine this as a set of “mini” sub-<code>DataFrame</code>s, where each subframe contains all of the rows from <code>babynames</code> that correspond to a particular year.</p>
@@ -912,7 +918,7 @@ <h2 data-number="4.2" class="anchored" data-anchor-id="aggregating-data-with-.gr
 </center>
 <p>We can’t work with a <code>GroupBy</code> object directly – that is why you saw that strange output earlier rather than a standard view of a <code>DataFrame</code>. To actually manipulate values within these “mini” <code>DataFrame</code>s, we’ll need to call an <em>aggregation method</em>. This is a method that tells <code>pandas</code> how to aggregate the values within the <code>GroupBy</code> object. Once the aggregation is applied, <code>pandas</code> will return a normal (now grouped) <code>DataFrame</code>.</p>
 <p>The first aggregation method we’ll consider is <code>.agg</code>. The <code>.agg</code> method takes in a function as its argument; this function is then applied to each column of a “mini” grouped DataFrame. We end up with a new <code>DataFrame</code> with one aggregated row per subframe. Let’s see this in action by finding the <code>sum</code> of all counts for each year in <code>babynames</code> – this is equivalent to finding the number of babies born in each year.</p>
-<div id="e8fb9ec0" class="cell" data-execution_count="9">
+<div id="af5b0b92" class="cell" data-execution_count="9">
 <div class="sourceCode cell-code" id="cb10"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a>babynames[[<span class="st">"Year"</span>, <span class="st">"Count"</span>]].groupby(<span class="st">"Year"</span>).agg(<span class="st">"sum"</span>).head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="9">
 <div>
@@ -965,7 +971,7 @@ <h2 data-number="4.2" class="anchored" data-anchor-id="aggregating-data-with-.gr
 </div>
 <p>Calling <code>.agg</code> has condensed each subframe back into a single row. This gives us our final output: a <code>DataFrame</code> that is now indexed by <code>"Year"</code>, with a single row for each unique year in the original <code>babynames</code> DataFrame.</p>
 <p>There are many different aggregation functions we can use, all of which are useful in different applications.</p>
-<div id="2ac17e2e" class="cell" data-execution_count="10">
+<div id="62d021bf" class="cell" data-execution_count="10">
 <div class="sourceCode cell-code" id="cb11"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a>babynames[[<span class="st">"Year"</span>, <span class="st">"Count"</span>]].groupby(<span class="st">"Year"</span>).agg(<span class="st">"min"</span>).head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="10">
 <div>
@@ -1009,7 +1015,7 @@ <h2 data-number="4.2" class="anchored" data-anchor-id="aggregating-data-with-.gr
 </div>
 </div>
 </div>
-<div id="6d3882bc" class="cell" data-execution_count="11">
+<div id="8a7ace86" class="cell" data-execution_count="11">
 <div class="sourceCode cell-code" id="cb12"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a>babynames[[<span class="st">"Year"</span>, <span class="st">"Count"</span>]].groupby(<span class="st">"Year"</span>).agg(<span class="st">"max"</span>).head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="11">
 <div>
@@ -1053,7 +1059,7 @@ <h2 data-number="4.2" class="anchored" data-anchor-id="aggregating-data-with-.gr
 </div>
 </div>
 </div>
-<div id="9b07adfc" class="cell" data-execution_count="12">
+<div id="4dc09da3" class="cell" data-execution_count="12">
 <div class="sourceCode cell-code" id="cb13"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Same result, but now we explicitly tell pandas to only consider the "Count" column when summing</span></span>
 <span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a>babynames.groupby(<span class="st">"Year"</span>)[[<span class="st">"Count"</span>]].agg(<span class="st">"sum"</span>).head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="12">
@@ -1107,7 +1113,7 @@ <h2 data-number="4.2" class="anchored" data-anchor-id="aggregating-data-with-.gr
 <h3 data-number="4.2.1" class="anchored" data-anchor-id="aggregation-functions"><span class="header-section-number">4.2.1</span> Aggregation Functions</h3>
 <p>Because of this fairly broad requirement, <code>pandas</code> offers many ways of computing an aggregation.</p>
 <p><strong>In-built</strong> Python operations – such as <code>sum</code>, <code>max</code>, and <code>min</code> – are automatically recognized by <code>pandas</code>.</p>
-<div id="1bc88ae1" class="cell" data-execution_count="13">
+<div id="f75e6cea" class="cell" data-execution_count="13">
 <div class="sourceCode cell-code" id="cb14"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a><span class="co"># What is the minimum count for each name in any year?</span></span>
 <span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a>babynames.groupby(<span class="st">"Name"</span>)[[<span class="st">"Count"</span>]].agg(<span class="st">"min"</span>).head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="13">
@@ -1152,7 +1158,7 @@ <h3 data-number="4.2.1" class="anchored" data-anchor-id="aggregation-functions">
 </div>
 </div>
 </div>
-<div id="ed40309b" class="cell" data-execution_count="14">
+<div id="279e30d2" class="cell" data-execution_count="14">
 <div class="sourceCode cell-code" id="cb15"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a><span class="co"># What is the largest single-year count of each name?</span></span>
 <span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a>babynames.groupby(<span class="st">"Name"</span>)[[<span class="st">"Count"</span>]].agg(<span class="st">"max"</span>).head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="14">
@@ -1198,7 +1204,7 @@ <h3 data-number="4.2.1" class="anchored" data-anchor-id="aggregation-functions">
 </div>
 </div>
 <p>As mentioned previously, functions from the <code>NumPy</code> library, such as <code>np.mean</code>, <code>np.max</code>, <code>np.min</code>, and <code>np.sum</code>, are also fair game in <code>pandas</code>.</p>
-<div id="91a6d6eb" class="cell" data-execution_count="15">
+<div id="43dd95fb" class="cell" data-execution_count="15">
 <div class="sourceCode cell-code" id="cb16"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a><span class="co"># What is the average count for each name across all years?</span></span>
 <span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a>babynames.groupby(<span class="st">"Name"</span>)[[<span class="st">"Count"</span>]].agg(<span class="st">"mean"</span>).head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="15">
@@ -1254,7 +1260,7 @@ <h3 data-number="4.2.1" class="anchored" data-anchor-id="aggregation-functions">
 </ul>
 <p>The latter two entries in this list – <code>"first"</code> and <code>"last"</code> – are unique to <code>pandas</code>. They return the first or last entry in a subframe column. Why might this be useful? Consider a case where <em>multiple</em> columns in a group share identical information. To represent this information in the grouped output, we can simply grab the first or last entry, which we know will be identical to all other entries.</p>
 <p>Let’s illustrate this with an example. Say we add a new column to <code>babynames</code> that contains the first letter of each name.</p>
-<div id="47bb7cbf" class="cell" data-execution_count="16">
+<div id="489039b6" class="cell" data-execution_count="16">
 <div class="sourceCode cell-code" id="cb17"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Imagine we had an additional column, "First Letter". We'll explain this code next week</span></span>
 <span id="cb17-2"><a href="#cb17-2" aria-hidden="true" tabindex="-1"></a>babynames[<span class="st">"First Letter"</span>] <span class="op">=</span> babynames[<span class="st">"Name"</span>].<span class="bu">str</span>[<span class="dv">0</span>]</span>
 <span id="cb17-3"><a href="#cb17-3" aria-hidden="true" tabindex="-1"></a></span>
@@ -1319,7 +1325,7 @@ <h3 data-number="4.2.1" class="anchored" data-anchor-id="aggregation-functions">
 <figcaption>Aggregating using “first”</figcaption>
 </figure>
 </div>
-<div id="55dd9fc0" class="cell" data-execution_count="17">
+<div id="ce7ae720" class="cell" data-execution_count="17">
 <div class="sourceCode cell-code" id="cb18"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a>babynames_new.groupby(<span class="st">"Name"</span>).agg({<span class="st">"First Letter"</span>:<span class="st">"first"</span>, <span class="st">"Year"</span>:<span class="st">"max"</span>}).head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="17">
 <div>
@@ -1374,7 +1380,7 @@ <h3 data-number="4.2.1" class="anchored" data-anchor-id="aggregation-functions">
 <section id="plotting-birth-counts" class="level3" data-number="4.2.2">
 <h3 data-number="4.2.2" class="anchored" data-anchor-id="plotting-birth-counts"><span class="header-section-number">4.2.2</span> Plotting Birth Counts</h3>
 <p>Let’s use <code>.agg</code> to find the total number of babies born in each year. Recall that using <code>.agg</code> with <code>.groupby()</code> follows the format: <code>df.groupby(column_name).agg(aggregation_function)</code>. The line of code below gives us the total number of babies born in each year.</p>
-<div id="f31e2795" class="cell" data-execution_count="18">
+<div id="96f3119b" class="cell" data-execution_count="18">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb19"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a>babynames.groupby(<span class="st">"Year"</span>)[[<span class="st">"Count"</span>]].agg(<span class="bu">sum</span>).head(<span class="dv">5</span>)</span>
@@ -1384,7 +1390,7 @@ <h3 data-number="4.2.2" class="anchored" data-anchor-id="plotting-birth-counts">
 <span id="cb19-5"><a href="#cb19-5" aria-hidden="true" tabindex="-1"></a><span class="co"># babynames.groupby("Year").sum(numeric_only=True)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 <div class="cell-output cell-output-stderr">
-<pre><code>/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_60475/390646742.py:1: FutureWarning:
+<pre><code>/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_99025/390646742.py:1: FutureWarning:
 
 The provided callable &lt;built-in function sum&gt; is currently using DataFrameGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "sum" instead.
 </code></pre>
@@ -1434,7 +1440,7 @@ <h3 data-number="4.2.2" class="anchored" data-anchor-id="plotting-birth-counts">
 <p>Here’s an illustration of the process:</p>
 <p><img src="images/aggregation.png" alt="aggregation" width="600"></p>
 <p>Plotting the <code>Dataframe</code> we obtain tells an interesting story.</p>
-<div id="0f690eab" class="cell" data-execution_count="19">
+<div id="3c87d8d3" class="cell" data-execution_count="19">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb21"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> plotly.express <span class="im">as</span> px</span>
@@ -1442,9 +1448,9 @@ <h3 data-number="4.2.2" class="anchored" data-anchor-id="plotting-birth-counts">
 <span id="cb21-3"><a href="#cb21-3" aria-hidden="true" tabindex="-1"></a>px.line(puzzle2, y <span class="op">=</span> <span class="st">"Count"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 <div class="cell-output cell-output-display">
-<div>                            <div id="3642ee46-6551-4430-82f7-83455f179221" class="plotly-graph-div" style="height:525px; width:100%;"></div>            <script type="text/javascript">                require(["plotly"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("3642ee46-6551-4430-82f7-83455f179221")) {                    Plotly.newPlot(                        "3642ee46-6551-4430-82f7-83455f179221",                        [{"hovertemplate":"Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"","line":{"color":"#636efa","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"","orientation":"v","showlegend":false,"x":[1910,1911,1912,1913,1914,1915,1916,1917,1918,1919,1920,1921,1922,1923,1924,1925,1926,1927,1928,1929,1930,1931,1932,1933,1934,1935,1936,1937,1938,1939,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022],"xaxis":"x","y":[9163,9983,17946,22094,26926,35835,37501,39916,44692,45119,54142,58983,61004,67917,74451,73493,72910,74201,74264,72108,75294,71467,69522,66895,69789,71603,74932,83738,91626,93461,102627,114296,142033,159813,164349,171764,204945,232313,229033,233625,235582,250468,271681,287484,297099,304567,324186,340083,337562,345901,358544,363926,360475,361897,355386,336567,319421,318819,321040,333671,342411,310020,287239,275036,286947,290518,302547,315011,322241,343070,365973,382156,390581,394608,404961,425583,435964,453824,480602,512615,552647,549317,541054,524983,509302,494635,483288,468412,464300,460844,471649,466934,467742,477651,480892,484503,494971,497627,483360,460305,444619,437818,439402,431945,440683,431317,427015,411058,395436,386996,362882,362582,360023],"yaxis":"y","type":"scatter"}],                        {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"},"margin":{"b":0,"l":0,"r":0,"t":30}}},"xaxis":{"anchor":"y","domain":[0.0,1.0],"title":{"text":"Year"}},"yaxis":{"anchor":"x","domain":[0.0,1.0],"title":{"text":"Count"}},"legend":{"tracegroupgap":0}},                        {"responsive": true}                    ).then(function(){
+<div>                            <div id="c5bf62af-5343-408c-8381-0f871b90bb13" class="plotly-graph-div" style="height:525px; width:100%;"></div>            <script type="text/javascript">                require(["plotly"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("c5bf62af-5343-408c-8381-0f871b90bb13")) {                    Plotly.newPlot(                        "c5bf62af-5343-408c-8381-0f871b90bb13",                        [{"hovertemplate":"Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"","line":{"color":"#636efa","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"","orientation":"v","showlegend":false,"x":[1910,1911,1912,1913,1914,1915,1916,1917,1918,1919,1920,1921,1922,1923,1924,1925,1926,1927,1928,1929,1930,1931,1932,1933,1934,1935,1936,1937,1938,1939,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022],"xaxis":"x","y":[9163,9983,17946,22094,26926,35835,37501,39916,44692,45119,54142,58983,61004,67917,74451,73493,72910,74201,74264,72108,75294,71467,69522,66895,69789,71603,74932,83738,91626,93461,102627,114296,142033,159813,164349,171764,204945,232313,229033,233625,235582,250468,271681,287484,297099,304567,324186,340083,337562,345901,358544,363926,360475,361897,355386,336567,319421,318819,321040,333671,342411,310020,287239,275036,286947,290518,302547,315011,322241,343070,365973,382156,390581,394608,404961,425583,435964,453824,480602,512615,552647,549317,541054,524983,509302,494635,483288,468412,464300,460844,471649,466934,467742,477651,480892,484503,494971,497627,483360,460305,444619,437818,439402,431945,440683,431317,427015,411058,395436,386996,362882,362582,360023],"yaxis":"y","type":"scatter"}],                        {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"},"margin":{"b":0,"l":0,"r":0,"t":30}}},"xaxis":{"anchor":"y","domain":[0.0,1.0],"title":{"text":"Year"}},"yaxis":{"anchor":"x","domain":[0.0,1.0],"title":{"text":"Count"}},"legend":{"tracegroupgap":0}},                        {"responsive": true}                    ).then(function(){
                             
-var gd = document.getElementById('3642ee46-6551-4430-82f7-83455f179221');
+var gd = document.getElementById('c5bf62af-5343-408c-8381-0f871b90bb13');
 var x = new MutationObserver(function (mutations, observer) {{
         var display = window.getComputedStyle(gd).display;
         if (!display || display === 'none') {{
@@ -1488,7 +1494,7 @@ <h3 data-number="4.2.4" class="anchored" data-anchor-id="revisiting-the-.agg-fun
 <pre><code>babynames.groupby("Year").mean().head()</code></pre>
 <p>We can now put this all into practice. Say we want to find the baby name with sex “F” that has fallen in popularity the most in California. To calculate this, we can first create a metric: “Ratio to Peak” (RTP). The RTP is the ratio of babies born with a given name in 2022 to the <em>maximum</em> number of babies born with the name in <em>any</em> year.</p>
 <p>Let’s start with calculating this for one baby, “Jennifer”.</p>
-<div id="af7309aa" class="cell" data-execution_count="20">
+<div id="19e0e4c9" class="cell" data-execution_count="20">
 <div class="sourceCode cell-code" id="cb23"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a><span class="co"># We filter by babies with sex "F" and sort by "Year"</span></span>
 <span id="cb23-2"><a href="#cb23-2" aria-hidden="true" tabindex="-1"></a>f_babynames <span class="op">=</span> babynames[babynames[<span class="st">"Sex"</span>] <span class="op">==</span> <span class="st">"F"</span>]</span>
 <span id="cb23-3"><a href="#cb23-3" aria-hidden="true" tabindex="-1"></a>f_babynames <span class="op">=</span> f_babynames.sort_values([<span class="st">"Year"</span>])</span>
@@ -1507,7 +1513,7 @@ <h3 data-number="4.2.4" class="anchored" data-anchor-id="revisiting-the-.agg-fun
 </div>
 </div>
 <p>By creating a function to calculate RTP and applying it to our <code>DataFrame</code> by using <code>.groupby()</code>, we can easily compute the RTP for all names at once!</p>
-<div id="e9acddd9" class="cell" data-execution_count="21">
+<div id="f8edbbbe" class="cell" data-execution_count="21">
 <div class="sourceCode cell-code" id="cb25"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb25-1"><a href="#cb25-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> ratio_to_peak(series):</span>
 <span id="cb25-2"><a href="#cb25-2" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> series.iloc[<span class="op">-</span><span class="dv">1</span>] <span class="op">/</span> <span class="bu">max</span>(series)</span>
 <span id="cb25-3"><a href="#cb25-3" aria-hidden="true" tabindex="-1"></a></span>
@@ -1575,7 +1581,7 @@ <h3 data-number="4.2.5" class="anchored" data-anchor-id="nuisance-columns"><span
 <section id="renaming-columns-after-grouping" class="level3" data-number="4.2.6">
 <h3 data-number="4.2.6" class="anchored" data-anchor-id="renaming-columns-after-grouping"><span class="header-section-number">4.2.6</span> Renaming Columns After Grouping</h3>
 <p>By default, <code>.groupby</code> will not rename any aggregated columns. As we can see in the table above, the aggregated column is still named <code>Count</code> even though it now represents the RTP. For better readability, we can rename <code>Count</code> to <code>Count RTP</code></p>
-<div id="b1d39876" class="cell" data-execution_count="22">
+<div id="477bb0f8" class="cell" data-execution_count="22">
 <div class="sourceCode cell-code" id="cb26"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb26-1"><a href="#cb26-1" aria-hidden="true" tabindex="-1"></a>rtp_table <span class="op">=</span> rtp_table.rename(columns <span class="op">=</span> {<span class="st">"Count"</span>: <span class="st">"Count RTP"</span>})</span>
 <span id="cb26-2"><a href="#cb26-2" aria-hidden="true" tabindex="-1"></a>rtp_table</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="22">
@@ -1662,7 +1668,7 @@ <h3 data-number="4.2.6" class="anchored" data-anchor-id="renaming-columns-after-
 <section id="some-data-science-payoff" class="level3" data-number="4.2.7">
 <h3 data-number="4.2.7" class="anchored" data-anchor-id="some-data-science-payoff"><span class="header-section-number">4.2.7</span> Some Data Science Payoff</h3>
 <p>By sorting <code>rtp_table</code>, we can see the names whose popularity has decreased the most.</p>
-<div id="dc0eb78b" class="cell" data-execution_count="23">
+<div id="05d087c1" class="cell" data-execution_count="23">
 <div class="sourceCode cell-code" id="cb27"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb27-1"><a href="#cb27-1" aria-hidden="true" tabindex="-1"></a>rtp_table <span class="op">=</span> rtp_table.rename(columns <span class="op">=</span> {<span class="st">"Count"</span>: <span class="st">"Count RTP"</span>})</span>
 <span id="cb27-2"><a href="#cb27-2" aria-hidden="true" tabindex="-1"></a>rtp_table.sort_values(<span class="st">"Count RTP"</span>).head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="23">
@@ -1715,16 +1721,16 @@ <h3 data-number="4.2.7" class="anchored" data-anchor-id="some-data-science-payof
 </div>
 </div>
 <p>To visualize the above <code>DataFrame</code>, let’s look at the line plot below:</p>
-<div id="e35a9fc4" class="cell" data-execution_count="24">
+<div id="cc997ede" class="cell" data-execution_count="24">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb28"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb28-1"><a href="#cb28-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> plotly.express <span class="im">as</span> px</span>
 <span id="cb28-2"><a href="#cb28-2" aria-hidden="true" tabindex="-1"></a>px.line(f_babynames[f_babynames[<span class="st">"Name"</span>] <span class="op">==</span> <span class="st">"Debra"</span>], x <span class="op">=</span> <span class="st">"Year"</span>, y <span class="op">=</span> <span class="st">"Count"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 <div class="cell-output cell-output-display">
-<div>                            <div id="d26243ae-dc40-4417-aa0f-0bf585c4ca1e" class="plotly-graph-div" style="height:525px; width:100%;"></div>            <script type="text/javascript">                require(["plotly"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("d26243ae-dc40-4417-aa0f-0bf585c4ca1e")) {                    Plotly.newPlot(                        "d26243ae-dc40-4417-aa0f-0bf585c4ca1e",                        [{"hovertemplate":"Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"","line":{"color":"#636efa","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"","orientation":"v","showlegend":false,"x":[1940,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2012,2013,2016],"xaxis":"x","y":[7,7,8,15,19,20,56,92,199,601,1510,2351,3295,3784,3969,3755,3318,2660,2290,2014,1647,1592,1430,1287,1154,958,818,748,647,547,463,318,242,236,159,151,151,164,130,141,97,114,97,95,93,64,78,69,71,51,62,41,34,28,28,12,14,16,10,13,14,10,7,12,13,12,13,6,7,5,8,5],"yaxis":"y","type":"scatter"}],                        {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"},"margin":{"b":0,"l":0,"r":0,"t":30}}},"xaxis":{"anchor":"y","domain":[0.0,1.0],"title":{"text":"Year"}},"yaxis":{"anchor":"x","domain":[0.0,1.0],"title":{"text":"Count"}},"legend":{"tracegroupgap":0}},                        {"responsive": true}                    ).then(function(){
+<div>                            <div id="24b9205b-5939-4457-b72d-b7938b26fb9a" class="plotly-graph-div" style="height:525px; width:100%;"></div>            <script type="text/javascript">                require(["plotly"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("24b9205b-5939-4457-b72d-b7938b26fb9a")) {                    Plotly.newPlot(                        "24b9205b-5939-4457-b72d-b7938b26fb9a",                        [{"hovertemplate":"Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"","line":{"color":"#636efa","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"","orientation":"v","showlegend":false,"x":[1940,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2012,2013,2016],"xaxis":"x","y":[7,7,8,15,19,20,56,92,199,601,1510,2351,3295,3784,3969,3755,3318,2660,2290,2014,1647,1592,1430,1287,1154,958,818,748,647,547,463,318,242,236,159,151,151,164,130,141,97,114,97,95,93,64,78,69,71,51,62,41,34,28,28,12,14,16,10,13,14,10,7,12,13,12,13,6,7,5,8,5],"yaxis":"y","type":"scatter"}],                        {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"},"margin":{"b":0,"l":0,"r":0,"t":30}}},"xaxis":{"anchor":"y","domain":[0.0,1.0],"title":{"text":"Year"}},"yaxis":{"anchor":"x","domain":[0.0,1.0],"title":{"text":"Count"}},"legend":{"tracegroupgap":0}},                        {"responsive": true}                    ).then(function(){
                             
-var gd = document.getElementById('d26243ae-dc40-4417-aa0f-0bf585c4ca1e');
+var gd = document.getElementById('24b9205b-5939-4457-b72d-b7938b26fb9a');
 var x = new MutationObserver(function (mutations, observer) {{
         var display = window.getComputedStyle(gd).display;
         if (!display || display === 'none') {{
@@ -1750,7 +1756,7 @@ <h3 data-number="4.2.7" class="anchored" data-anchor-id="some-data-science-payof
 </div>
 </div>
 <p>We can get the list of the top 10 names and then plot popularity with the following code:</p>
-<div id="e7f2b197" class="cell" data-execution_count="25">
+<div id="1bbe3d26" class="cell" data-execution_count="25">
 <div class="sourceCode cell-code" id="cb29"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb29-1"><a href="#cb29-1" aria-hidden="true" tabindex="-1"></a>top10 <span class="op">=</span> rtp_table.sort_values(<span class="st">"Count RTP"</span>).head(<span class="dv">10</span>).index</span>
 <span id="cb29-2"><a href="#cb29-2" aria-hidden="true" tabindex="-1"></a>px.line(</span>
 <span id="cb29-3"><a href="#cb29-3" aria-hidden="true" tabindex="-1"></a>    f_babynames[f_babynames[<span class="st">"Name"</span>].isin(top10)], </span>
@@ -1765,9 +1771,9 @@ <h3 data-number="4.2.7" class="anchored" data-anchor-id="some-data-science-payof
 </code></pre>
 </div>
 <div class="cell-output cell-output-display">
-<div>                            <div id="61a08145-97c6-4fc2-84a7-49ed6570511d" class="plotly-graph-div" style="height:525px; width:100%;"></div>            <script type="text/javascript">                require(["plotly"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("61a08145-97c6-4fc2-84a7-49ed6570511d")) {                    Plotly.newPlot(                        "61a08145-97c6-4fc2-84a7-49ed6570511d",                        [{"hovertemplate":"Name=Carol<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Carol","line":{"color":"#636efa","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Carol","orientation":"v","showlegend":true,"x":[1910,1911,1912,1913,1914,1915,1916,1917,1918,1919,1920,1921,1922,1923,1924,1925,1926,1927,1928,1929,1930,1931,1932,1933,1934,1935,1936,1937,1938,1939,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022],"xaxis":"x","y":[8,13,17,16,26,38,59,47,55,48,64,67,74,94,138,153,151,148,193,279,270,297,367,453,559,669,873,1015,1050,1109,1079,1339,1672,1937,2089,2138,2152,2201,1954,1779,1737,1734,1727,1597,1684,1651,1704,1703,1545,1480,1359,1283,1191,993,1034,815,622,577,543,468,366,267,223,187,173,146,145,145,121,132,123,128,106,114,111,101,120,107,108,134,150,136,129,89,92,75,87,64,61,46,64,33,43,47,52,76,62,38,44,26,17,47,31,36,24,13,25,18,29,20,17,8,7],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Susan<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Susan","line":{"color":"#EF553B","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Susan","orientation":"v","showlegend":true,"x":[1911,1912,1913,1914,1915,1916,1917,1918,1919,1920,1921,1922,1923,1924,1925,1926,1927,1928,1929,1930,1931,1932,1933,1934,1935,1936,1937,1938,1939,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022],"xaxis":"x","y":[6,8,8,10,16,17,15,20,22,21,19,15,22,26,32,29,43,25,37,63,47,63,74,101,118,138,183,271,433,630,795,1058,1380,1596,1991,2689,2831,3338,3180,3260,3346,3424,3753,3934,3900,3771,3631,3504,3123,3145,3135,2952,2839,2535,2008,1825,1644,1367,1232,1070,861,651,530,552,496,456,437,424,409,420,361,391,352,338,273,280,272,286,267,272,260,196,202,172,152,152,114,116,103,100,104,85,76,70,71,74,53,56,41,39,43,28,44,26,45,22,26,22,19,17,8,13],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Tina<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Tina","line":{"color":"#00cc96","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Tina","orientation":"v","showlegend":true,"x":[1915,1916,1917,1918,1920,1921,1922,1924,1925,1927,1928,1929,1930,1937,1938,1939,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022],"xaxis":"x","y":[5,6,5,5,5,7,5,9,5,8,8,5,10,10,7,8,12,9,28,45,43,53,64,80,80,88,92,128,168,163,177,366,569,569,700,753,889,1045,1228,1212,1129,1202,1282,1342,1402,1302,1248,1091,941,634,642,546,450,370,414,363,335,371,310,268,271,310,238,252,252,208,180,196,163,171,147,121,111,91,80,83,90,80,67,64,63,69,36,37,47,39,39,27,39,28,46,38,33,36,26,21,15,13,6],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Cheryl<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Cheryl","line":{"color":"#ab63fa","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Cheryl","orientation":"v","showlegend":true,"x":[1930,1934,1935,1936,1937,1938,1939,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2011,2012,2013,2014,2015,2016,2017,2018,2019,2021,2022],"xaxis":"x","y":[6,8,12,10,16,76,49,42,48,87,377,759,801,1063,1093,1021,916,903,993,955,1058,1465,1639,1715,1833,1832,1639,1624,1565,1420,1295,1207,1051,950,899,751,635,550,428,371,293,271,236,199,178,303,299,272,204,229,164,135,129,130,98,106,88,90,65,55,39,47,38,30,30,19,22,24,14,11,16,17,16,13,21,14,11,15,12,10,12,15,8,10,9,6,7],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Michele<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Michele","line":{"color":"#FFA15A","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Michele","orientation":"v","showlegend":true,"x":[1936,1937,1938,1939,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2022],"xaxis":"x","y":[7,8,5,8,18,34,113,132,166,171,172,253,213,335,295,306,401,421,500,498,464,454,470,506,576,763,766,775,768,796,1037,1033,1111,1016,973,700,702,571,494,484,437,390,381,305,281,223,230,227,200,162,206,146,143,164,137,142,125,104,82,65,52,47,45,38,28,37,27,22,28,16,21,15,15,11,14,7,5,10,6,11,5],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Debbie<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Debbie","line":{"color":"#19d3f3","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Debbie","orientation":"v","showlegend":true,"x":[1936,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2014,2015,2016,2017,2021],"xaxis":"x","y":[5,9,9,10,16,11,32,74,91,115,120,191,233,300,427,697,902,1313,1656,1776,1675,1547,1458,1215,1004,648,504,415,338,279,243,192,145,108,108,92,72,64,87,91,81,65,79,67,74,64,56,71,78,93,85,78,50,61,70,53,46,39,22,28,19,11,16,14,13,8,21,10,11,10,12,8,9,6,5,5,5],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Terri<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Terri","line":{"color":"#FF6692","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Terri","orientation":"v","showlegend":true,"x":[1938,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2004,2005,2006,2016,2021,2022],"xaxis":"x","y":[6,8,12,26,32,38,65,99,130,132,168,154,236,306,379,542,604,685,839,875,1052,964,937,902,826,737,486,448,398,323,312,263,191,153,120,106,81,59,84,57,44,49,47,53,44,36,37,35,32,34,20,26,29,15,19,22,11,15,12,13,11,14,9,7,6,7,5,5,5,5],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Shannon<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Shannon","line":{"color":"#B6E880","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Shannon","orientation":"v","showlegend":true,"x":[1938,1939,1940,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022],"xaxis":"x","y":[6,9,6,10,14,19,25,16,34,23,34,43,51,59,73,83,111,106,126,129,161,145,206,216,305,409,441,516,587,932,1419,1650,1436,1198,1090,1127,982,1218,1136,1052,991,923,968,969,971,945,872,803,699,642,597,527,493,594,615,531,438,428,366,303,217,199,200,165,133,133,110,90,88,63,42,43,37,41,32,19,31,22,17,14,21,8,8,7],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Debra<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Debra","line":{"color":"#FF97FF","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Debra","orientation":"v","showlegend":true,"x":[1940,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2012,2013,2016],"xaxis":"x","y":[7,7,8,15,19,20,56,92,199,601,1510,2351,3295,3784,3969,3755,3318,2660,2290,2014,1647,1592,1430,1287,1154,958,818,748,647,547,463,318,242,236,159,151,151,164,130,141,97,114,97,95,93,64,78,69,71,51,62,41,34,28,28,12,14,16,10,13,14,10,7,12,13,12,13,6,7,5,8,5],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Tammy<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Tammy","line":{"color":"#FECB52","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Tammy","orientation":"v","showlegend":true,"x":[1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2019,2022],"xaxis":"x","y":[7,5,10,9,12,13,10,9,9,13,28,14,26,37,368,746,990,1038,1136,1223,1539,1273,1219,1168,1143,1099,977,1013,859,704,544,421,392,328,275,229,227,181,168,157,96,120,102,85,120,88,85,94,77,82,74,61,49,45,45,54,50,47,49,45,44,36,30,24,29,14,16,12,11,9,5,13,9,15,11,7,5],"yaxis":"y","type":"scatter"}],                        {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"},"margin":{"b":0,"l":0,"r":0,"t":30}}},"xaxis":{"anchor":"y","domain":[0.0,1.0],"title":{"text":"Year"}},"yaxis":{"anchor":"x","domain":[0.0,1.0],"title":{"text":"Count"}},"legend":{"title":{"text":"Name"},"tracegroupgap":0}},                        {"responsive": true}                    ).then(function(){
+<div>                            <div id="abeb381a-fbcc-4035-96f3-a3e4b7f79db8" class="plotly-graph-div" style="height:525px; width:100%;"></div>            <script type="text/javascript">                require(["plotly"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("abeb381a-fbcc-4035-96f3-a3e4b7f79db8")) {                    Plotly.newPlot(                        "abeb381a-fbcc-4035-96f3-a3e4b7f79db8",                        [{"hovertemplate":"Name=Carol<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Carol","line":{"color":"#636efa","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Carol","orientation":"v","showlegend":true,"x":[1910,1911,1912,1913,1914,1915,1916,1917,1918,1919,1920,1921,1922,1923,1924,1925,1926,1927,1928,1929,1930,1931,1932,1933,1934,1935,1936,1937,1938,1939,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022],"xaxis":"x","y":[8,13,17,16,26,38,59,47,55,48,64,67,74,94,138,153,151,148,193,279,270,297,367,453,559,669,873,1015,1050,1109,1079,1339,1672,1937,2089,2138,2152,2201,1954,1779,1737,1734,1727,1597,1684,1651,1704,1703,1545,1480,1359,1283,1191,993,1034,815,622,577,543,468,366,267,223,187,173,146,145,145,121,132,123,128,106,114,111,101,120,107,108,134,150,136,129,89,92,75,87,64,61,46,64,33,43,47,52,76,62,38,44,26,17,47,31,36,24,13,25,18,29,20,17,8,7],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Susan<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Susan","line":{"color":"#EF553B","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Susan","orientation":"v","showlegend":true,"x":[1911,1912,1913,1914,1915,1916,1917,1918,1919,1920,1921,1922,1923,1924,1925,1926,1927,1928,1929,1930,1931,1932,1933,1934,1935,1936,1937,1938,1939,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022],"xaxis":"x","y":[6,8,8,10,16,17,15,20,22,21,19,15,22,26,32,29,43,25,37,63,47,63,74,101,118,138,183,271,433,630,795,1058,1380,1596,1991,2689,2831,3338,3180,3260,3346,3424,3753,3934,3900,3771,3631,3504,3123,3145,3135,2952,2839,2535,2008,1825,1644,1367,1232,1070,861,651,530,552,496,456,437,424,409,420,361,391,352,338,273,280,272,286,267,272,260,196,202,172,152,152,114,116,103,100,104,85,76,70,71,74,53,56,41,39,43,28,44,26,45,22,26,22,19,17,8,13],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Tina<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Tina","line":{"color":"#00cc96","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Tina","orientation":"v","showlegend":true,"x":[1915,1916,1917,1918,1920,1921,1922,1924,1925,1927,1928,1929,1930,1937,1938,1939,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022],"xaxis":"x","y":[5,6,5,5,5,7,5,9,5,8,8,5,10,10,7,8,12,9,28,45,43,53,64,80,80,88,92,128,168,163,177,366,569,569,700,753,889,1045,1228,1212,1129,1202,1282,1342,1402,1302,1248,1091,941,634,642,546,450,370,414,363,335,371,310,268,271,310,238,252,252,208,180,196,163,171,147,121,111,91,80,83,90,80,67,64,63,69,36,37,47,39,39,27,39,28,46,38,33,36,26,21,15,13,6],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Cheryl<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Cheryl","line":{"color":"#ab63fa","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Cheryl","orientation":"v","showlegend":true,"x":[1930,1934,1935,1936,1937,1938,1939,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2011,2012,2013,2014,2015,2016,2017,2018,2019,2021,2022],"xaxis":"x","y":[6,8,12,10,16,76,49,42,48,87,377,759,801,1063,1093,1021,916,903,993,955,1058,1465,1639,1715,1833,1832,1639,1624,1565,1420,1295,1207,1051,950,899,751,635,550,428,371,293,271,236,199,178,303,299,272,204,229,164,135,129,130,98,106,88,90,65,55,39,47,38,30,30,19,22,24,14,11,16,17,16,13,21,14,11,15,12,10,12,15,8,10,9,6,7],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Michele<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Michele","line":{"color":"#FFA15A","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Michele","orientation":"v","showlegend":true,"x":[1936,1937,1938,1939,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2022],"xaxis":"x","y":[7,8,5,8,18,34,113,132,166,171,172,253,213,335,295,306,401,421,500,498,464,454,470,506,576,763,766,775,768,796,1037,1033,1111,1016,973,700,702,571,494,484,437,390,381,305,281,223,230,227,200,162,206,146,143,164,137,142,125,104,82,65,52,47,45,38,28,37,27,22,28,16,21,15,15,11,14,7,5,10,6,11,5],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Debbie<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Debbie","line":{"color":"#19d3f3","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Debbie","orientation":"v","showlegend":true,"x":[1936,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2014,2015,2016,2017,2021],"xaxis":"x","y":[5,9,9,10,16,11,32,74,91,115,120,191,233,300,427,697,902,1313,1656,1776,1675,1547,1458,1215,1004,648,504,415,338,279,243,192,145,108,108,92,72,64,87,91,81,65,79,67,74,64,56,71,78,93,85,78,50,61,70,53,46,39,22,28,19,11,16,14,13,8,21,10,11,10,12,8,9,6,5,5,5],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Terri<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Terri","line":{"color":"#FF6692","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Terri","orientation":"v","showlegend":true,"x":[1938,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2004,2005,2006,2016,2021,2022],"xaxis":"x","y":[6,8,12,26,32,38,65,99,130,132,168,154,236,306,379,542,604,685,839,875,1052,964,937,902,826,737,486,448,398,323,312,263,191,153,120,106,81,59,84,57,44,49,47,53,44,36,37,35,32,34,20,26,29,15,19,22,11,15,12,13,11,14,9,7,6,7,5,5,5,5],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Shannon<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Shannon","line":{"color":"#B6E880","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Shannon","orientation":"v","showlegend":true,"x":[1938,1939,1940,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022],"xaxis":"x","y":[6,9,6,10,14,19,25,16,34,23,34,43,51,59,73,83,111,106,126,129,161,145,206,216,305,409,441,516,587,932,1419,1650,1436,1198,1090,1127,982,1218,1136,1052,991,923,968,969,971,945,872,803,699,642,597,527,493,594,615,531,438,428,366,303,217,199,200,165,133,133,110,90,88,63,42,43,37,41,32,19,31,22,17,14,21,8,8,7],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Debra<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Debra","line":{"color":"#FF97FF","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Debra","orientation":"v","showlegend":true,"x":[1940,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2012,2013,2016],"xaxis":"x","y":[7,7,8,15,19,20,56,92,199,601,1510,2351,3295,3784,3969,3755,3318,2660,2290,2014,1647,1592,1430,1287,1154,958,818,748,647,547,463,318,242,236,159,151,151,164,130,141,97,114,97,95,93,64,78,69,71,51,62,41,34,28,28,12,14,16,10,13,14,10,7,12,13,12,13,6,7,5,8,5],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Tammy<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Tammy","line":{"color":"#FECB52","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Tammy","orientation":"v","showlegend":true,"x":[1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2019,2022],"xaxis":"x","y":[7,5,10,9,12,13,10,9,9,13,28,14,26,37,368,746,990,1038,1136,1223,1539,1273,1219,1168,1143,1099,977,1013,859,704,544,421,392,328,275,229,227,181,168,157,96,120,102,85,120,88,85,94,77,82,74,61,49,45,45,54,50,47,49,45,44,36,30,24,29,14,16,12,11,9,5,13,9,15,11,7,5],"yaxis":"y","type":"scatter"}],                        {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"},"margin":{"b":0,"l":0,"r":0,"t":30}}},"xaxis":{"anchor":"y","domain":[0.0,1.0],"title":{"text":"Year"}},"yaxis":{"anchor":"x","domain":[0.0,1.0],"title":{"text":"Count"}},"legend":{"title":{"text":"Name"},"tracegroupgap":0}},                        {"responsive": true}                    ).then(function(){
                             
-var gd = document.getElementById('61a08145-97c6-4fc2-84a7-49ed6570511d');
+var gd = document.getElementById('abeb381a-fbcc-4035-96f3-a3e4b7f79db8');
 var x = new MutationObserver(function (mutations, observer) {{
         var display = window.getComputedStyle(gd).display;
         if (!display || display === 'none') {{
@@ -1793,7 +1799,7 @@ <h3 data-number="4.2.7" class="anchored" data-anchor-id="some-data-science-payof
 </div>
 </div>
 <p>As a quick exercise, consider what code would compute the total number of babies with each name.</p>
-<div id="f1e89df2" class="cell" data-execution_count="26">
+<div id="2764ca08" class="cell" data-execution_count="26">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb31"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb31-1"><a href="#cb31-1" aria-hidden="true" tabindex="-1"></a>babynames.groupby(<span class="st">"Name"</span>)[[<span class="st">"Count"</span>]].agg(<span class="st">"sum"</span>).head()</span>
@@ -1847,7 +1853,7 @@ <h3 data-number="4.2.7" class="anchored" data-anchor-id="some-data-science-payof
 <section id="groupby-continued" class="level2" data-number="4.3">
 <h2 data-number="4.3" class="anchored" data-anchor-id="groupby-continued"><span class="header-section-number">4.3</span> <code>.groupby()</code>, Continued</h2>
 <p>We’ll work with the <code>elections</code> <code>DataFrame</code> again.</p>
-<div id="0945531a" class="cell" data-execution_count="27">
+<div id="1856ef4f" class="cell" data-execution_count="27">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb32"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb32-1"><a href="#cb32-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span>
@@ -1927,7 +1933,7 @@ <h2 data-number="4.3" class="anchored" data-anchor-id="groupby-continued"><span
 <section id="raw-groupby-objects" class="level3" data-number="4.3.1">
 <h3 data-number="4.3.1" class="anchored" data-anchor-id="raw-groupby-objects"><span class="header-section-number">4.3.1</span> Raw <code>GroupBy</code> Objects</h3>
 <p>The result of <code>groupby</code> applied to a <code>DataFrame</code> is a <code>DataFrameGroupBy</code> object, <strong>not</strong> a <code>DataFrame</code>.</p>
-<div id="237feae5" class="cell" data-execution_count="28">
+<div id="d4a56786" class="cell" data-execution_count="28">
 <div class="sourceCode cell-code" id="cb33"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb33-1"><a href="#cb33-1" aria-hidden="true" tabindex="-1"></a>grouped_by_year <span class="op">=</span> elections.groupby(<span class="st">"Year"</span>)</span>
 <span id="cb33-2"><a href="#cb33-2" aria-hidden="true" tabindex="-1"></a><span class="bu">type</span>(grouped_by_year)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="28">
@@ -1935,14 +1941,14 @@ <h3 data-number="4.3.1" class="anchored" data-anchor-id="raw-groupby-objects"><s
 </div>
 </div>
 <p>There are several ways to look into <code>DataFrameGroupBy</code> objects:</p>
-<div id="3b8dbcb4" class="cell" data-execution_count="29">
+<div id="368dac0a" class="cell" data-execution_count="29">
 <div class="sourceCode cell-code" id="cb35"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb35-1"><a href="#cb35-1" aria-hidden="true" tabindex="-1"></a>grouped_by_party <span class="op">=</span> elections.groupby(<span class="st">"Party"</span>)</span>
 <span id="cb35-2"><a href="#cb35-2" aria-hidden="true" tabindex="-1"></a>grouped_by_party.groups</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="29">
 <pre><code>{'American': [22, 126], 'American Independent': [115, 119, 124], 'Anti-Masonic': [6], 'Anti-Monopoly': [38], 'Citizens': [127], 'Communist': [89], 'Constitution': [160, 164, 172], 'Constitutional Union': [24], 'Democratic': [2, 4, 8, 10, 13, 14, 17, 20, 28, 29, 34, 37, 39, 45, 47, 52, 55, 57, 64, 70, 74, 77, 81, 83, 86, 91, 94, 97, 100, 105, 108, 111, 114, 116, 118, 123, 129, 134, 137, 140, 144, 151, 158, 162, 168, 176, 178], 'Democratic-Republican': [0, 1], 'Dixiecrat': [103], 'Farmer–Labor': [78], 'Free Soil': [15, 18], 'Green': [149, 155, 156, 165, 170, 177, 181], 'Greenback': [35], 'Independent': [121, 130, 143, 161, 167, 174], 'Liberal Republican': [31], 'Libertarian': [125, 128, 132, 138, 139, 146, 153, 159, 163, 169, 175, 180], 'National Democratic': [50], 'National Republican': [3, 5], 'National Union': [27], 'Natural Law': [148], 'New Alliance': [136], 'Northern Democratic': [26], 'Populist': [48, 61, 141], 'Progressive': [68, 82, 101, 107], 'Prohibition': [41, 44, 49, 51, 54, 59, 63, 67, 73, 75, 99], 'Reform': [150, 154], 'Republican': [21, 23, 30, 32, 33, 36, 40, 43, 46, 53, 56, 60, 65, 69, 72, 79, 80, 84, 87, 90, 96, 98, 104, 106, 109, 112, 113, 117, 120, 122, 131, 133, 135, 142, 145, 152, 157, 166, 171, 173, 179], 'Socialist': [58, 62, 66, 71, 76, 85, 88, 92, 95, 102], 'Southern Democratic': [25], 'States' Rights': [110], 'Taxpayers': [147], 'Union': [93], 'Union Labor': [42], 'Whig': [7, 9, 11, 12, 16, 19]}</code></pre>
 </div>
 </div>
-<div id="5290bc5c" class="cell" data-execution_count="30">
+<div id="e9140d4b" class="cell" data-execution_count="30">
 <div class="sourceCode cell-code" id="cb37"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb37-1"><a href="#cb37-1" aria-hidden="true" tabindex="-1"></a>grouped_by_party.get_group(<span class="st">"Socialist"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="30">
 <div>
@@ -2070,7 +2076,7 @@ <h3 data-number="4.3.2" class="anchored" data-anchor-id="other-groupby-methods">
 <li><a href="https://pandas.pydata.org/docs/reference/api/pandas.core.groupby.DataFrameGroupBy.count.html#pandas.core.groupby.DataFrameGroupBy.count"><code>.count</code></a>: creates a new <strong><code>DataFrame</code></strong> with the number of entries, excluding missing values.</li>
 </ul>
 <p>Let’s illustrate some examples by creating a <code>DataFrame</code> called <code>df</code>.</p>
-<div id="a4d2d2fe" class="cell" data-execution_count="31">
+<div id="4686a7da" class="cell" data-execution_count="31">
 <div class="sourceCode cell-code" id="cb38"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb38-1"><a href="#cb38-1" aria-hidden="true" tabindex="-1"></a>df <span class="op">=</span> pd.DataFrame({<span class="st">'letter'</span>:[<span class="st">'A'</span>,<span class="st">'A'</span>,<span class="st">'B'</span>,<span class="st">'C'</span>,<span class="st">'C'</span>,<span class="st">'C'</span>], </span>
 <span id="cb38-2"><a href="#cb38-2" aria-hidden="true" tabindex="-1"></a>                   <span class="st">'num'</span>:[<span class="dv">1</span>,<span class="dv">2</span>,<span class="dv">3</span>,<span class="dv">4</span>,np.nan,<span class="dv">4</span>], </span>
 <span id="cb38-3"><a href="#cb38-3" aria-hidden="true" tabindex="-1"></a>                   <span class="st">'state'</span>:[np.nan, <span class="st">'tx'</span>, <span class="st">'fl'</span>, <span class="st">'hi'</span>, np.nan, <span class="st">'ak'</span>]})</span>
@@ -2132,7 +2138,7 @@ <h3 data-number="4.3.2" class="anchored" data-anchor-id="other-groupby-methods">
 </div>
 </div>
 <p>Note the slight difference between <code>.size()</code> and <code>.count()</code>: while <code>.size()</code> returns a <code>Series</code> and counts the number of entries including the missing values, <code>.count()</code> returns a <code>DataFrame</code> and counts the number of entries in each column <em>excluding missing values</em>.</p>
-<div id="f31575a8" class="cell" data-execution_count="32">
+<div id="785d5cc4" class="cell" data-execution_count="32">
 <div class="sourceCode cell-code" id="cb39"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb39-1"><a href="#cb39-1" aria-hidden="true" tabindex="-1"></a>df.groupby(<span class="st">"letter"</span>).size()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="32">
 <pre><code>letter
@@ -2142,7 +2148,7 @@ <h3 data-number="4.3.2" class="anchored" data-anchor-id="other-groupby-methods">
 dtype: int64</code></pre>
 </div>
 </div>
-<div id="a831c872" class="cell" data-execution_count="33">
+<div id="2b2f046c" class="cell" data-execution_count="33">
 <div class="sourceCode cell-code" id="cb41"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb41-1"><a href="#cb41-1" aria-hidden="true" tabindex="-1"></a>df.groupby(<span class="st">"letter"</span>).count()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="33">
 <div>
@@ -2184,7 +2190,7 @@ <h3 data-number="4.3.2" class="anchored" data-anchor-id="other-groupby-methods">
 </div>
 </div>
 <p>You might recall that the <code>value_counts()</code> function in the previous note does something similar. It turns out <code>value_counts()</code> and <code>groupby.size()</code> are the same, except <code>value_counts()</code> sorts the resulting <code>Series</code> in descending order automatically.</p>
-<div id="87be71ea" class="cell" data-execution_count="34">
+<div id="70e11153" class="cell" data-execution_count="34">
 <div class="sourceCode cell-code" id="cb42"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb42-1"><a href="#cb42-1" aria-hidden="true" tabindex="-1"></a>df[<span class="st">"letter"</span>].value_counts()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="34">
 <pre><code>letter
@@ -2224,7 +2230,7 @@ <h3 data-number="4.3.3" class="anchored" data-anchor-id="filtering-by-group"><sp
 <li>Return all <code>DataFrame</code> rows that correspond to these years</li>
 </ul>
 <p>For each year, we need to find the maximum <code>%</code> among <em>all</em> rows for that year. If this maximum <code>%</code> is lower than 45%, we will tell <code>pandas</code> to keep all rows corresponding to that year.</p>
-<div id="e596aab8" class="cell" data-execution_count="35">
+<div id="1315a6a1" class="cell" data-execution_count="35">
 <div class="sourceCode cell-code" id="cb44"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb44-1"><a href="#cb44-1" aria-hidden="true" tabindex="-1"></a>elections.groupby(<span class="st">"Year"</span>).<span class="bu">filter</span>(<span class="kw">lambda</span> sf: sf[<span class="st">"%"</span>].<span class="bu">max</span>() <span class="op">&lt;</span> <span class="dv">45</span>).head(<span class="dv">9</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="35">
 <div>
@@ -2339,10 +2345,10 @@ <h3 data-number="4.3.4" class="anchored" data-anchor-id="aggregation-with-lambda
 <p>What if we wish to aggregate our <code>DataFrame</code> using a non-standard function – for example, a function of our own design? We can do so by combining <code>.agg</code> with <code>lambda</code> expressions.</p>
 <p>Let’s first consider a puzzle to jog our memory. We will attempt to find the <code>Candidate</code> from each <code>Party</code> with the highest <code>%</code> of votes.</p>
 <p>A naive approach may be to group by the <code>Party</code> column and aggregate by the maximum.</p>
-<div id="0d1e7054" class="cell" data-execution_count="36">
+<div id="3aa2e967" class="cell" data-execution_count="36">
 <div class="sourceCode cell-code" id="cb45"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb45-1"><a href="#cb45-1" aria-hidden="true" tabindex="-1"></a>elections.groupby(<span class="st">"Party"</span>).agg(<span class="bu">max</span>).head(<span class="dv">10</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stderr">
-<pre><code>/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_60475/4278286395.py:1: FutureWarning:
+<pre><code>/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_99025/4278286395.py:1: FutureWarning:
 
 The provided callable &lt;built-in function max&gt; is currently using DataFrameGroupBy.max. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "max" instead.
 </code></pre>
@@ -2470,7 +2476,7 @@ <h3 data-number="4.3.4" class="anchored" data-anchor-id="aggregation-with-lambda
 <li>Group by <code>Party</code> and select the first row of each sub-<code>DataFrame</code></li>
 </ol>
 <p>While it may seem unintuitive, sorting <code>elections</code> by descending order of <code>%</code> is extremely helpful. If we then group by <code>Party</code>, the first row of each <code>GroupBy</code> object will contain information about the <code>Candidate</code> with the highest voter <code>%</code>.</p>
-<div id="05a6693b" class="cell" data-execution_count="37">
+<div id="61167ec1" class="cell" data-execution_count="37">
 <div class="sourceCode cell-code" id="cb47"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb47-1"><a href="#cb47-1" aria-hidden="true" tabindex="-1"></a>elections_sorted_by_percent <span class="op">=</span> elections.sort_values(<span class="st">"%"</span>, ascending<span class="op">=</span><span class="va">False</span>)</span>
 <span id="cb47-2"><a href="#cb47-2" aria-hidden="true" tabindex="-1"></a>elections_sorted_by_percent.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="37">
@@ -2541,7 +2547,7 @@ <h3 data-number="4.3.4" class="anchored" data-anchor-id="aggregation-with-lambda
 </div>
 </div>
 </div>
-<div id="bdd1a8f4" class="cell" data-execution_count="38">
+<div id="79ee8212" class="cell" data-execution_count="38">
 <div class="sourceCode cell-code" id="cb48"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb48-1"><a href="#cb48-1" aria-hidden="true" tabindex="-1"></a>elections_sorted_by_percent.groupby(<span class="st">"Party"</span>).agg(<span class="kw">lambda</span> x : x.iloc[<span class="dv">0</span>]).head(<span class="dv">10</span>)</span>
 <span id="cb48-2"><a href="#cb48-2" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb48-3"><a href="#cb48-3" aria-hidden="true" tabindex="-1"></a><span class="co"># Equivalent to the below code</span></span>
@@ -2662,7 +2668,7 @@ <h3 data-number="4.3.4" class="anchored" data-anchor-id="aggregation-with-lambda
 <p>More generally, <code>lambda</code> functions are used to design custom aggregation functions that aren’t pre-defined by Python. The input parameter <code>x</code> to the <code>lambda</code> function is a <code>GroupBy</code> object. Therefore, it should make sense why <code>lambda x : x.iloc[0]</code> selects the first row in each groupby object.</p>
 <p>In fact, there’s a few different ways to approach this problem. Each approach has different tradeoffs in terms of readability, performance, memory consumption, complexity, etc. We’ve given a few examples below.</p>
 <p><strong>Note</strong>: Understanding these alternative solutions is not required. They are given to demonstrate the vast number of problem-solving approaches in <code>pandas</code>.</p>
-<div id="8105f469" class="cell" data-execution_count="39">
+<div id="65e54664" class="cell" data-execution_count="39">
 <div class="sourceCode cell-code" id="cb49"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb49-1"><a href="#cb49-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Using the idxmax function</span></span>
 <span id="cb49-2"><a href="#cb49-2" aria-hidden="true" tabindex="-1"></a>best_per_party <span class="op">=</span> elections.loc[elections.groupby(<span class="st">'Party'</span>)[<span class="st">'%'</span>].idxmax()]</span>
 <span id="cb49-3"><a href="#cb49-3" aria-hidden="true" tabindex="-1"></a>best_per_party.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -2734,7 +2740,7 @@ <h3 data-number="4.3.4" class="anchored" data-anchor-id="aggregation-with-lambda
 </div>
 </div>
 </div>
-<div id="950b561e" class="cell" data-execution_count="40">
+<div id="56659758" class="cell" data-execution_count="40">
 <div class="sourceCode cell-code" id="cb50"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb50-1"><a href="#cb50-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Using the .drop_duplicates function</span></span>
 <span id="cb50-2"><a href="#cb50-2" aria-hidden="true" tabindex="-1"></a>best_per_party2 <span class="op">=</span> elections.sort_values(<span class="st">'%'</span>).drop_duplicates([<span class="st">'Party'</span>], keep<span class="op">=</span><span class="st">'last'</span>)</span>
 <span id="cb50-3"><a href="#cb50-3" aria-hidden="true" tabindex="-1"></a>best_per_party2.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -2812,7 +2818,7 @@ <h3 data-number="4.3.4" class="anchored" data-anchor-id="aggregation-with-lambda
 <h2 data-number="4.4" class="anchored" data-anchor-id="aggregating-data-with-pivot-tables"><span class="header-section-number">4.4</span> Aggregating Data with Pivot Tables</h2>
 <p>We know now that <code>.groupby</code> gives us the ability to group and aggregate data across our <code>DataFrame</code>. The examples above formed groups using just one column in the <code>DataFrame</code>. It’s possible to group by multiple columns at once by passing in a list of column names to <code>.groupby</code>.</p>
 <p>Let’s consider the <code>babynames</code> dataset again. In this problem, we will find the total number of baby names associated with each sex for each year. To do this, we’ll group by <em>both</em> the <code>"Year"</code> and <code>"Sex"</code> columns.</p>
-<div id="5c70ceff" class="cell" data-execution_count="41">
+<div id="e5e93a2d" class="cell" data-execution_count="41">
 <div class="sourceCode cell-code" id="cb51"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb51-1"><a href="#cb51-1" aria-hidden="true" tabindex="-1"></a>babynames.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="41">
 <div>
@@ -2882,12 +2888,12 @@ <h2 data-number="4.4" class="anchored" data-anchor-id="aggregating-data-with-piv
 </div>
 </div>
 </div>
-<div id="81283e8c" class="cell" data-execution_count="42">
+<div id="142f2cee" class="cell" data-execution_count="42">
 <div class="sourceCode cell-code" id="cb52"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb52-1"><a href="#cb52-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Find the total number of baby names associated with each sex for each </span></span>
 <span id="cb52-2"><a href="#cb52-2" aria-hidden="true" tabindex="-1"></a><span class="co"># year in the data</span></span>
 <span id="cb52-3"><a href="#cb52-3" aria-hidden="true" tabindex="-1"></a>babynames.groupby([<span class="st">"Year"</span>, <span class="st">"Sex"</span>])[[<span class="st">"Count"</span>]].agg(<span class="bu">sum</span>).head(<span class="dv">6</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stderr">
-<pre><code>/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_60475/3186035650.py:3: FutureWarning:
+<pre><code>/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_99025/3186035650.py:3: FutureWarning:
 
 The provided callable &lt;built-in function sum&gt; is currently using DataFrameGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "sum" instead.
 </code></pre>
@@ -2949,7 +2955,7 @@ <h2 data-number="4.4" class="anchored" data-anchor-id="aggregating-data-with-piv
 <p>Here’s an illustration of the process:</p>
 <p><img src="images/pivot.png" alt="groupby_demo" width="600"></p>
 <p>The best way to understand pivot tables is to see one in action. Let’s return to our original goal of summing the total number of names associated with each combination of year and sex. We’ll call the <code>pandas</code> <a href="https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.pivot_table.html"><code>.pivot_table</code></a> method to create a new table.</p>
-<div id="73cdfd71" class="cell" data-execution_count="43">
+<div id="430fd760" class="cell" data-execution_count="43">
 <div class="sourceCode cell-code" id="cb54"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb54-1"><a href="#cb54-1" aria-hidden="true" tabindex="-1"></a><span class="co"># The `pivot_table` method is used to generate a Pandas pivot table</span></span>
 <span id="cb54-2"><a href="#cb54-2" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
 <span id="cb54-3"><a href="#cb54-3" aria-hidden="true" tabindex="-1"></a>babynames.pivot_table(</span>
@@ -3016,7 +3022,7 @@ <h2 data-number="4.4" class="anchored" data-anchor-id="aggregating-data-with-piv
 <li><code>aggfunc = np.sum</code> tells <code>pandas</code> what function to use when aggregating the data specified by <code>values</code>. Here, we are summing the name counts for each pair of <code>"Year"</code> and <code>"Sex"</code></li>
 </ul>
 <p>We can even include multiple values in the index or columns of our pivot tables.</p>
-<div id="242284ac" class="cell" data-execution_count="44">
+<div id="8b014d02" class="cell" data-execution_count="44">
 <div class="sourceCode cell-code" id="cb55"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb55-1"><a href="#cb55-1" aria-hidden="true" tabindex="-1"></a>babynames_pivot <span class="op">=</span> babynames.pivot_table(</span>
 <span id="cb55-2"><a href="#cb55-2" aria-hidden="true" tabindex="-1"></a>    index<span class="op">=</span><span class="st">"Year"</span>,     <span class="co"># the rows (turned into index)</span></span>
 <span id="cb55-3"><a href="#cb55-3" aria-hidden="true" tabindex="-1"></a>    columns<span class="op">=</span><span class="st">"Sex"</span>,    <span class="co"># the column values</span></span>
@@ -3105,7 +3111,7 @@ <h2 data-number="4.4" class="anchored" data-anchor-id="aggregating-data-with-piv
 <h2 data-number="4.5" class="anchored" data-anchor-id="joining-tables"><span class="header-section-number">4.5</span> Joining Tables</h2>
 <p>When working on data science projects, we’re unlikely to have absolutely all the data we want contained in a single <code>DataFrame</code> – a real-world data scientist needs to grapple with data coming from multiple sources. If we have access to multiple datasets with related information, we can join two or more tables into a single <code>DataFrame</code>.</p>
 <p>To put this into practice, we’ll revisit the <code>elections</code> dataset.</p>
-<div id="abd63f21" class="cell" data-execution_count="45">
+<div id="ef282081" class="cell" data-execution_count="45">
 <div class="sourceCode cell-code" id="cb56"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb56-1"><a href="#cb56-1" aria-hidden="true" tabindex="-1"></a>elections.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="45">
 <div>
@@ -3177,7 +3183,7 @@ <h2 data-number="4.5" class="anchored" data-anchor-id="joining-tables"><span cla
 </div>
 <p>Say we want to understand the popularity of the names of each presidential candidate in 2022. To do this, we’ll need the combined data of <code>babynames</code> <em>and</em> <code>elections</code>.</p>
 <p>We’ll start by creating a new column containing the first name of each presidential candidate. This will help us join each name in <code>elections</code> to the corresponding name data in <code>babynames</code>.</p>
-<div id="d1af0b1c" class="cell" data-execution_count="46">
+<div id="37bc8b34" class="cell" data-execution_count="46">
 <div class="sourceCode cell-code" id="cb57"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb57-1"><a href="#cb57-1" aria-hidden="true" tabindex="-1"></a><span class="co"># This `str` operation splits each candidate's full name at each </span></span>
 <span id="cb57-2"><a href="#cb57-2" aria-hidden="true" tabindex="-1"></a><span class="co"># blank space, then takes just the candidate's first name</span></span>
 <span id="cb57-3"><a href="#cb57-3" aria-hidden="true" tabindex="-1"></a>elections[<span class="st">"First Name"</span>] <span class="op">=</span> elections[<span class="st">"Candidate"</span>].<span class="bu">str</span>.split().<span class="bu">str</span>[<span class="dv">0</span>]</span>
@@ -3256,7 +3262,7 @@ <h2 data-number="4.5" class="anchored" data-anchor-id="joining-tables"><span cla
 </div>
 </div>
 </div>
-<div id="664bcb27" class="cell" data-execution_count="47">
+<div id="f0ec4549" class="cell" data-execution_count="47">
 <div class="sourceCode cell-code" id="cb58"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb58-1"><a href="#cb58-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Here, we'll only consider `babynames` data from 2022</span></span>
 <span id="cb58-2"><a href="#cb58-2" aria-hidden="true" tabindex="-1"></a>babynames_2022 <span class="op">=</span> babynames[babynames[<span class="st">"Year"</span>]<span class="op">==</span><span class="dv">2022</span>]</span>
 <span id="cb58-3"><a href="#cb58-3" aria-hidden="true" tabindex="-1"></a>babynames_2022.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -3329,7 +3335,7 @@ <h2 data-number="4.5" class="anchored" data-anchor-id="joining-tables"><span cla
 </div>
 </div>
 <p>Now, we’re ready to join the two tables. <a href="https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.merge.html"><code>pd.merge</code></a> is the <code>pandas</code> method used to join <code>DataFrame</code>s together.</p>
-<div id="3e477023" class="cell" data-execution_count="48">
+<div id="4b0a4334" class="cell" data-execution_count="48">
 <div class="sourceCode cell-code" id="cb59"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb59-1"><a href="#cb59-1" aria-hidden="true" tabindex="-1"></a>merged <span class="op">=</span> pd.merge(left <span class="op">=</span> elections, right <span class="op">=</span> babynames_2022, <span class="op">\</span></span>
 <span id="cb59-2"><a href="#cb59-2" aria-hidden="true" tabindex="-1"></a>                  left_on <span class="op">=</span> <span class="st">"First Name"</span>, right_on <span class="op">=</span> <span class="st">"Name"</span>)</span>
 <span id="cb59-3"><a href="#cb59-3" aria-hidden="true" tabindex="-1"></a>merged.head()</span>
diff --git a/docs/regex/regex.html b/docs/regex/regex.html
index 26bcb88be..196b997eb 100644
--- a/docs/regex/regex.html
+++ b/docs/regex/regex.html
@@ -226,6 +226,12 @@
   <a href="../ols/ols.html" class="sidebar-item-text sidebar-link">
  <span class="menu-text"><span class="chapter-number">12</span>&nbsp; <span class="chapter-title">Ordinary Least Squares</span></span></a>
   </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../gradient_descent/gradient_descent.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">13</span>&nbsp; <span class="chapter-title">sklearn and Gradient Descent</span></span></a>
+  </div>
 </li>
     </ul>
     </div>
@@ -421,7 +427,7 @@ <h2 data-number="6.2" class="anchored" data-anchor-id="python-string-methods"><s
 <section id="canonicalization" class="level3" data-number="6.2.1">
 <h3 data-number="6.2.1" class="anchored" data-anchor-id="canonicalization"><span class="header-section-number">6.2.1</span> Canonicalization</h3>
 <p>Assume we want to merge the given tables.</p>
-<div id="cf4fe11a" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="1">
+<div id="c7df1b10" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="1">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span>
@@ -433,7 +439,7 @@ <h3 data-number="6.2.1" class="anchored" data-anchor-id="canonicalization"><span
 <span id="cb1-7"><a href="#cb1-7" aria-hidden="true" tabindex="-1"></a>    county_and_pop <span class="op">=</span> pd.read_csv(f)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 </div>
-<div id="a9523164" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="2">
+<div id="d7c2410b" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="2">
 <div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a>display(county_and_state), display(county_and_pop)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display">
 <div>
@@ -516,7 +522,7 @@ <h3 data-number="6.2.1" class="anchored" data-anchor-id="canonicalization"><span
 <section id="canonicalization-with-python-string-manipulation" class="level4" data-number="6.2.1.1">
 <h4 data-number="6.2.1.1" class="anchored" data-anchor-id="canonicalization-with-python-string-manipulation"><span class="header-section-number">6.2.1.1</span> Canonicalization with Python String Manipulation</h4>
 <p>The following function uses Python string manipulation to convert a single county name into canonical form. It does so by eliminating whitespace, punctuation, and unnecessary text.</p>
-<div id="f0fb7fd4" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="3">
+<div id="e8447afb" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="3">
 <div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> canonicalize_county(county_name):</span>
 <span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> (</span>
 <span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a>        county_name</span>
@@ -534,7 +540,7 @@ <h4 data-number="6.2.1.1" class="anchored" data-anchor-id="canonicalization-with
 </div>
 </div>
 <p>We will use the <code>pandas</code> <code>map</code> function to apply the <code>canonicalize_county</code> function to every row in both <code>DataFrame</code>s. In doing so, we’ll create a new column in each called <code>clean_county_python</code> with the canonical form.</p>
-<div id="faeb23c3" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="4">
+<div id="b128ed0d" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="4">
 <div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a>county_and_pop[<span class="st">'clean_county_python'</span>] <span class="op">=</span> county_and_pop[<span class="st">'County'</span>].<span class="bu">map</span>(canonicalize_county)</span>
 <span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a>county_and_state[<span class="st">'clean_county_python'</span>] <span class="op">=</span> county_and_state[<span class="st">'County'</span>].<span class="bu">map</span>(canonicalize_county)</span>
 <span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a>display(county_and_state), display(county_and_pop)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -630,7 +636,7 @@ <h4 data-number="6.2.1.1" class="anchored" data-anchor-id="canonicalization-with
 <h4 data-number="6.2.1.2" class="anchored" data-anchor-id="canonicalization-with-pandas-series-methods"><span class="header-section-number">6.2.1.2</span> Canonicalization with Pandas Series Methods</h4>
 <p>Alternatively, we can use <code>pandas</code> <code>Series</code> methods to create this standardized column. To do so, we must call the <code>.str</code> attribute of our <code>Series</code> object prior to calling any methods, like <code>.lower</code> and <code>.replace</code>. Notice how these method names match their equivalent built-in Python string functions.</p>
 <p>Chaining multiple <code>Series</code> methods in this manner eliminates the need to use the <code>map</code> function (as this code is vectorized).</p>
-<div id="678957e8" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="5">
+<div id="9767fea9" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="5">
 <div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> canonicalize_county_series(county_series):</span>
 <span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> (</span>
 <span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a>        county_series</span>
@@ -748,7 +754,7 @@ <h4 data-number="6.2.1.2" class="anchored" data-anchor-id="canonicalization-with
 <h3 data-number="6.2.2" class="anchored" data-anchor-id="extraction"><span class="header-section-number">6.2.2</span> Extraction</h3>
 <p>Extraction explores the idea of obtaining useful information from text data. This will be particularily important in model building, which we’ll study in a few weeks.</p>
 <p>Say we want to read some data from a <code>.txt</code> file.</p>
-<div id="3f96031d" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="6">
+<div id="e3db0cb6" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="6">
 <div class="sourceCode cell-code" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> <span class="bu">open</span>(<span class="st">'data/log.txt'</span>, <span class="st">'r'</span>) <span class="im">as</span> f:</span>
 <span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a>    log_lines <span class="op">=</span> f.readlines()</span>
 <span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a></span>
@@ -761,7 +767,7 @@ <h3 data-number="6.2.2" class="anchored" data-anchor-id="extraction"><span class
 </div>
 <p>Suppose we want to extract the day, month, year, hour, minutes, seconds, and time zone. Unfortunately, these items are not in a fixed position from the beginning of the string, so slicing by some fixed offset won’t work.</p>
 <p>Instead, we can use some clever thinking. Notice how the relevant information is contained within a set of brackets, further separated by <code>/</code> and <code>:</code>. We can hone in on this region of text, and split the data on these characters. Python’s built-in <code>.split</code> function makes this easy.</p>
-<div id="c075d293" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="7">
+<div id="edf43115" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="7">
 <div class="sourceCode cell-code" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a>first <span class="op">=</span> log_lines[<span class="dv">0</span>] <span class="co"># Only considering the first row of data</span></span>
 <span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a>pertinent <span class="op">=</span> first.split(<span class="st">"["</span>)[<span class="dv">1</span>].split(<span class="st">']'</span>)[<span class="dv">0</span>]</span>
@@ -791,7 +797,7 @@ <h3 data-number="6.2.2" class="anchored" data-anchor-id="extraction"><span class
 <h2 data-number="6.3" class="anchored" data-anchor-id="regex-basics"><span class="header-section-number">6.3</span> RegEx Basics</h2>
 <p>A <strong>regular expression (“RegEx”)</strong> is a sequence of characters that specifies a search pattern. They are written to extract specific information from text. Regular expressions are essentially part of a smaller programming language embedded in Python, made available through the <code>re</code> module. As such, they have a stand-alone syntax and methods for various capabilities.</p>
 <p>Regular expressions are useful in many applications beyond data science. For example, Social Security Numbers (SSNs) are often validated with regular expressions.</p>
-<div id="c94f52e1" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="8">
+<div id="cb420076" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="8">
 <div class="sourceCode cell-code" id="cb11"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a><span class="co">r"[0-9]{3}-[0-9]{2}-[0-9]{4}"</span> <span class="co"># Regular Expression Syntax</span></span>
 <span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb11-3"><a href="#cb11-3" aria-hidden="true" tabindex="-1"></a><span class="co"># 3 of any digit, then a dash,</span></span>
@@ -1094,7 +1100,7 @@ <h3 data-number="6.5.1" class="anchored" data-anchor-id="greediness"><span class
 <section id="examples-2" class="level3" data-number="6.5.2">
 <h3 data-number="6.5.2" class="anchored" data-anchor-id="examples-2"><span class="header-section-number">6.5.2</span> Examples</h3>
 <p>Let’s revisit our earlier problem of extracting date/time data from the given <code>.txt</code> files. Here is how the data looked.</p>
-<div id="5b9d1495" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="9">
+<div id="8ec45d0f" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="9">
 <div class="sourceCode cell-code" id="cb13"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a>log_lines[<span class="dv">0</span>]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="9">
 <pre><code>'169.237.46.168 - - [26/Jan/2014:10:47:58 -0800] "GET /stat141/Winter04/ HTTP/1.1" 200 2585 "http://anson.ucdavis.edu/courses/"\n'</code></pre>
@@ -1127,7 +1133,7 @@ <h4 data-number="6.6.1.1" class="anchored" data-anchor-id="canonicalization-with
 <p>The regular expression here removes text surrounded by <code>&lt;&gt;</code> (also known as HTML tags).</p>
 <p>In order, the pattern matches … 1. a single <code>&lt;</code> 2. any character that is not a <code>&gt;</code> : div, td valign…, /td, /div 3. a single <code>&gt;</code></p>
 <p>Any substring in <code>text</code> that fulfills all three conditions will be replaced by <code>''</code>.</p>
-<div id="379d39ad" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="10">
+<div id="ff8f5770" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="10">
 <div class="sourceCode cell-code" id="cb15"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> re</span>
 <span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb15-3"><a href="#cb15-3" aria-hidden="true" tabindex="-1"></a>text <span class="op">=</span> <span class="st">"&lt;div&gt;&lt;td valign='top'&gt;Moo&lt;/td&gt;&lt;/div&gt;"</span></span>
@@ -1144,7 +1150,7 @@ <h4 data-number="6.6.1.1" class="anchored" data-anchor-id="canonicalization-with
 <h4 data-number="6.6.1.2" class="anchored" data-anchor-id="canonicalization-with-pandas"><span class="header-section-number">6.6.1.2</span> Canonicalization with <code>pandas</code></h4>
 <p>We can also use regular expressions with <code>pandas</code> <code>Series</code> methods. This gives us the benefit of operating on an entire column of data as opposed to a single value. The code is simple: <br> <code>ser.str.replace(pattern, repl, regex=True</code>).</p>
 <p>Consider the following <code>DataFrame</code> <code>html_data</code> with a single column.</p>
-<div id="1121103e" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="11">
+<div id="af5dc28b" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="11">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb17"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a>data <span class="op">=</span> {<span class="st">"HTML"</span>: [<span class="st">"&lt;div&gt;&lt;td valign='top'&gt;Moo&lt;/td&gt;&lt;/div&gt;"</span>, <span class="op">\</span></span>
@@ -1153,7 +1159,7 @@ <h4 data-number="6.6.1.2" class="anchored" data-anchor-id="canonicalization-with
 <span id="cb17-4"><a href="#cb17-4" aria-hidden="true" tabindex="-1"></a>html_data <span class="op">=</span> pd.DataFrame(data)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 </div>
-<div id="f7e82660" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="12">
+<div id="7219b1e8" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="12">
 <div class="sourceCode cell-code" id="cb18"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a>html_data</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="12">
 <div>
@@ -1185,7 +1191,7 @@ <h4 data-number="6.6.1.2" class="anchored" data-anchor-id="canonicalization-with
 </div>
 </div>
 </div>
-<div id="d3ec9d31" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="13">
+<div id="88333aeb" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="13">
 <div class="sourceCode cell-code" id="cb19"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a>pattern <span class="op">=</span> <span class="vs">r"&lt;[^&gt;]+&gt;"</span></span>
 <span id="cb19-2"><a href="#cb19-2" aria-hidden="true" tabindex="-1"></a>html_data[<span class="st">'HTML'</span>].<span class="bu">str</span>.replace(pattern, <span class="st">''</span>, regex<span class="op">=</span><span class="va">True</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="13">
@@ -1203,7 +1209,7 @@ <h3 data-number="6.6.2" class="anchored" data-anchor-id="extraction-1"><span cla
 <h4 data-number="6.6.2.1" class="anchored" data-anchor-id="extraction-with-regex"><span class="header-section-number">6.6.2.1</span> Extraction with RegEx</h4>
 <p>Just like with canonicalization, the <code>re</code> module provides capability to extract relevant text from a string: <br> <code>re.findall(pattern, text)</code>. This function returns a list of all matches to <code>pattern</code>.</p>
 <p>Using the familiar regular expression for Social Security Numbers:</p>
-<div id="e65660cb" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="14">
+<div id="1b04dc80" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="14">
 <div class="sourceCode cell-code" id="cb21"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a>text <span class="op">=</span> <span class="st">"My social security number is 123-45-6789 bro, or maybe it’s 321-45-6789."</span></span>
 <span id="cb21-2"><a href="#cb21-2" aria-hidden="true" tabindex="-1"></a>pattern <span class="op">=</span> <span class="vs">r"[0-9]</span><span class="sc">{3}</span><span class="vs">-[0-9]</span><span class="sc">{2}</span><span class="vs">-[0-9]</span><span class="sc">{4}</span><span class="vs">"</span></span>
 <span id="cb21-3"><a href="#cb21-3" aria-hidden="true" tabindex="-1"></a>re.findall(pattern, text)  </span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -1216,7 +1222,7 @@ <h4 data-number="6.6.2.1" class="anchored" data-anchor-id="extraction-with-regex
 <h4 data-number="6.6.2.2" class="anchored" data-anchor-id="extraction-with-pandas"><span class="header-section-number">6.6.2.2</span> Extraction with <code>pandas</code></h4>
 <p><code>pandas</code> similarily provides extraction functionality on a <code>Series</code> of data: <code>ser.str.findall(pattern)</code></p>
 <p>Consider the following <code>DataFrame</code> <code>ssn_data</code>.</p>
-<div id="978c99b9" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="15">
+<div id="38c0e0fd" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="15">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb23"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a>data <span class="op">=</span> {<span class="st">"SSN"</span>: [<span class="st">"987-65-4321"</span>, <span class="st">"forty"</span>, <span class="op">\</span></span>
@@ -1225,7 +1231,7 @@ <h4 data-number="6.6.2.2" class="anchored" data-anchor-id="extraction-with-panda
 <span id="cb23-4"><a href="#cb23-4" aria-hidden="true" tabindex="-1"></a>ssn_data <span class="op">=</span> pd.DataFrame(data)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 </div>
-<div id="6783c6ee" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="16">
+<div id="4304420a" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="16">
 <div class="sourceCode cell-code" id="cb24"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a>ssn_data</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="16">
 <div>
@@ -1261,7 +1267,7 @@ <h4 data-number="6.6.2.2" class="anchored" data-anchor-id="extraction-with-panda
 </div>
 </div>
 </div>
-<div id="1a7da82c" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="17">
+<div id="52b573ed" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="17">
 <div class="sourceCode cell-code" id="cb25"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb25-1"><a href="#cb25-1" aria-hidden="true" tabindex="-1"></a>ssn_data[<span class="st">"SSN"</span>].<span class="bu">str</span>.findall(pattern)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="17">
 <pre><code>0                 [987-65-4321]
@@ -1273,7 +1279,7 @@ <h4 data-number="6.6.2.2" class="anchored" data-anchor-id="extraction-with-panda
 </div>
 <p>This function returns a list for every row containing the pattern matches in a given string.</p>
 <p>As you may expect, there are similar <code>pandas</code> equivalents for other <code>re</code> functions as well. <code>Series.str.extract</code> takes in a pattern and returns a <code>DataFrame</code> of each capture group’s first match in the string. In contrast, <code>Series.str.extractall</code> returns a multi-indexed <code>DataFrame</code> of all matches for each capture group. You can see the difference in the outputs below:</p>
-<div id="3a135989" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="18">
+<div id="601583f6" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="18">
 <div class="sourceCode cell-code" id="cb27"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb27-1"><a href="#cb27-1" aria-hidden="true" tabindex="-1"></a>pattern_cg <span class="op">=</span> <span class="vs">r"([0-9]</span><span class="sc">{3}</span><span class="vs">)-([0-9]</span><span class="sc">{2}</span><span class="vs">)-([0-9]</span><span class="sc">{4}</span><span class="vs">)"</span></span>
 <span id="cb27-2"><a href="#cb27-2" aria-hidden="true" tabindex="-1"></a>ssn_data[<span class="st">"SSN"</span>].<span class="bu">str</span>.extract(pattern_cg)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="18">
@@ -1320,7 +1326,7 @@ <h4 data-number="6.6.2.2" class="anchored" data-anchor-id="extraction-with-panda
 </div>
 </div>
 </div>
-<div id="81158608" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="19">
+<div id="e4cce2a5" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="19">
 <div class="sourceCode cell-code" id="cb28"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb28-1"><a href="#cb28-1" aria-hidden="true" tabindex="-1"></a>ssn_data[<span class="st">"SSN"</span>].<span class="bu">str</span>.extractall(pattern_cg)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="19">
 <div>
@@ -1385,12 +1391,12 @@ <h3 data-number="6.6.3" class="anchored" data-anchor-id="regular-expression-capt
 <p>Let’s take a look at an example.</p>
 <section id="example-1" class="level4" data-number="6.6.3.1">
 <h4 data-number="6.6.3.1" class="anchored" data-anchor-id="example-1"><span class="header-section-number">6.6.3.1</span> Example 1</h4>
-<div id="e2465c8a" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="20">
+<div id="aefcaa51" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="20">
 <div class="sourceCode cell-code" id="cb29"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb29-1"><a href="#cb29-1" aria-hidden="true" tabindex="-1"></a>text <span class="op">=</span> <span class="st">"Observations: 03:04:53 - Horse awakens. </span><span class="ch">\</span></span>
 <span id="cb29-2"><a href="#cb29-2" aria-hidden="true" tabindex="-1"></a><span class="st">        03:05:14 - Horse goes back to sleep."</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
 <p>Say we want to capture all occurences of time data (hour, minute, and second) as <em>separate entities</em>.</p>
-<div id="fb95032b" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="21">
+<div id="70ea2583" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="21">
 <div class="sourceCode cell-code" id="cb30"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb30-1"><a href="#cb30-1" aria-hidden="true" tabindex="-1"></a>pattern_1 <span class="op">=</span> <span class="vs">r"(\d\d):(\d\d):(\d\d)"</span></span>
 <span id="cb30-2"><a href="#cb30-2" aria-hidden="true" tabindex="-1"></a>re.findall(pattern_1, text)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="21">
@@ -1399,7 +1405,7 @@ <h4 data-number="6.6.3.1" class="anchored" data-anchor-id="example-1"><span clas
 </div>
 <p>Notice how the given pattern has 3 capture groups, each specified by the regular expression <code>(\d\d)</code>. We then use <code>re.findall</code> to return these capture groups, each as tuples containing 3 matches.</p>
 <p>These regular expression capture groups can be different. We can use the <code>(\d{2})</code> shorthand to extract the same data.</p>
-<div id="c1a9d7a2" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="22">
+<div id="256181b0" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="22">
 <div class="sourceCode cell-code" id="cb32"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb32-1"><a href="#cb32-1" aria-hidden="true" tabindex="-1"></a>pattern_2 <span class="op">=</span> <span class="vs">r"(\d\d):(\d\d):(\d</span><span class="sc">{2}</span><span class="vs">)"</span></span>
 <span id="cb32-2"><a href="#cb32-2" aria-hidden="true" tabindex="-1"></a>re.findall(pattern_2, text)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="22">
@@ -1410,14 +1416,14 @@ <h4 data-number="6.6.3.1" class="anchored" data-anchor-id="example-1"><span clas
 <section id="example-2" class="level4" data-number="6.6.3.2">
 <h4 data-number="6.6.3.2" class="anchored" data-anchor-id="example-2"><span class="header-section-number">6.6.3.2</span> Example 2</h4>
 <p>With the notion of capture groups, convince yourself how the following regular expression works.</p>
-<div id="e3ff9321" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="23">
+<div id="ed16e681" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="23">
 <div class="sourceCode cell-code" id="cb34"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb34-1"><a href="#cb34-1" aria-hidden="true" tabindex="-1"></a>first <span class="op">=</span> log_lines[<span class="dv">0</span>]</span>
 <span id="cb34-2"><a href="#cb34-2" aria-hidden="true" tabindex="-1"></a>first</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="23">
 <pre><code>'169.237.46.168 - - [26/Jan/2014:10:47:58 -0800] "GET /stat141/Winter04/ HTTP/1.1" 200 2585 "http://anson.ucdavis.edu/courses/"\n'</code></pre>
 </div>
 </div>
-<div id="195743b3" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="24">
+<div id="e64ad535" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="24">
 <div class="sourceCode cell-code" id="cb36"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb36-1"><a href="#cb36-1" aria-hidden="true" tabindex="-1"></a>pattern <span class="op">=</span> <span class="vs">r'\[(\d+)\/(\w+)\/(\d+):(\d+):(\d+):(\d+) (.+)\]'</span></span>
 <span id="cb36-2"><a href="#cb36-2" aria-hidden="true" tabindex="-1"></a>day, month, year, hour, minute, second, time_zone <span class="op">=</span> re.findall(pattern, first)[<span class="dv">0</span>]</span>
 <span id="cb36-3"><a href="#cb36-3" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(day, month, year, hour, minute, second, time_zone)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
diff --git a/docs/sampling/sampling.html b/docs/sampling/sampling.html
index a7eb0bb11..d40293354 100644
--- a/docs/sampling/sampling.html
+++ b/docs/sampling/sampling.html
@@ -226,6 +226,12 @@
   <a href="../ols/ols.html" class="sidebar-item-text sidebar-link">
  <span class="menu-text"><span class="chapter-number">12</span>&nbsp; <span class="chapter-title">Ordinary Least Squares</span></span></a>
   </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../gradient_descent/gradient_descent.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">13</span>&nbsp; <span class="chapter-title">sklearn and Gradient Descent</span></span></a>
+  </div>
 </li>
     </ul>
     </div>
@@ -464,7 +470,7 @@ <h3 data-number="9.3.3" class="anchored" data-anchor-id="demo-barbie-v.-oppenhei
 <li>There are only two movies they can watch on July 21st: Barbie and Oppenheimer.</li>
 <li>Every resident watches a movie (either Barbie or Oppenheimer) on July 21st.</li>
 </ul>
-<div id="ec49bcd9" class="cell" data-execution_count="1">
+<div id="b6b41731" class="cell" data-execution_count="1">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> matplotlib.pyplot <span class="im">as</span> plt</span>
@@ -478,7 +484,7 @@ <h3 data-number="9.3.3" class="anchored" data-anchor-id="demo-barbie-v.-oppenhei
 <span id="cb1-9"><a href="#cb1-9" aria-hidden="true" tabindex="-1"></a>rng <span class="op">=</span> np.random.default_rng()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 </div>
-<div id="a5921186" class="cell" data-execution_count="2">
+<div id="12dac825" class="cell" data-execution_count="2">
 <div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a>movie <span class="op">=</span> pd.read_csv(<span class="st">"data/movie.csv"</span>)</span>
 <span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a><span class="co"># create a 1/0 int that indicates Barbie vote</span></span>
@@ -541,7 +547,7 @@ <h3 data-number="9.3.3" class="anchored" data-anchor-id="demo-barbie-v.-oppenhei
 </div>
 </div>
 <p>What fraction of Berkeley residents chose Barbie?</p>
-<div id="b54d8c91" class="cell" data-execution_count="3">
+<div id="0079406d" class="cell" data-execution_count="3">
 <div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a>actual_barbie <span class="op">=</span> np.mean(movie[<span class="st">"barbie"</span>])</span>
 <span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a>actual_barbie</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="3">
@@ -552,7 +558,7 @@ <h3 data-number="9.3.3" class="anchored" data-anchor-id="demo-barbie-v.-oppenhei
 <section id="convenience-sample-retirees" class="level4" data-number="9.3.3.1">
 <h4 data-number="9.3.3.1" class="anchored" data-anchor-id="convenience-sample-retirees"><span class="header-section-number">9.3.3.1</span> Convenience Sample: Retirees</h4>
 <p>Let’s take a convenience sample of people who have retired (&gt;= 65 years old). What proportion of them went to see Barbie instead of Oppenheimer?</p>
-<div id="57a8fc32" class="cell" data-execution_count="4">
+<div id="d1c5ee38" class="cell" data-execution_count="4">
 <div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a>convenience_sample <span class="op">=</span> movie[movie[<span class="st">'age'</span>] <span class="op">&gt;=</span> <span class="dv">65</span>] <span class="co"># take a convenience sample of retirees</span></span>
 <span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a>np.mean(convenience_sample[<span class="st">"barbie"</span>]) <span class="co"># what proportion of them saw Barbie? </span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="4">
@@ -560,14 +566,14 @@ <h4 data-number="9.3.3.1" class="anchored" data-anchor-id="convenience-sample-re
 </div>
 </div>
 <p>Based on this result, we would have predicted that Oppenheimer would win! What happened? Is it possible that our sample is too small or noisy?</p>
-<div id="63d1df3b" class="cell" data-execution_count="5">
+<div id="467b05dd" class="cell" data-execution_count="5">
 <div class="sourceCode cell-code" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="co"># what's the size of our sample? </span></span>
 <span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a><span class="bu">len</span>(convenience_sample)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="5">
 <pre><code>359396</code></pre>
 </div>
 </div>
-<div id="3edb62e5" class="cell" data-execution_count="6">
+<div id="2c2f6ec4" class="cell" data-execution_count="6">
 <div class="sourceCode cell-code" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="co"># what proportion of our data is in the convenience sample? </span></span>
 <span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a><span class="bu">len</span>(convenience_sample)<span class="op">/</span><span class="bu">len</span>(movie)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="6">
@@ -579,7 +585,7 @@ <h4 data-number="9.3.3.1" class="anchored" data-anchor-id="convenience-sample-re
 <section id="check-for-bias" class="level4" data-number="9.3.3.2">
 <h4 data-number="9.3.3.2" class="anchored" data-anchor-id="check-for-bias"><span class="header-section-number">9.3.3.2</span> Check for Bias</h4>
 <p>Let us aggregate all choices by age and visualize the fraction of Barbie views, split by gender.</p>
-<div id="b8c678cd" class="cell" data-execution_count="7">
+<div id="0f4fad2b" class="cell" data-execution_count="7">
 <div class="sourceCode cell-code" id="cb11"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a>votes_by_barbie <span class="op">=</span> movie.groupby([<span class="st">"age"</span>,<span class="st">"is_male"</span>]).agg(<span class="st">"mean"</span>, numeric_only<span class="op">=</span><span class="va">True</span>).reset_index()</span>
 <span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a>votes_by_barbie.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="7">
@@ -632,7 +638,7 @@ <h4 data-number="9.3.3.2" class="anchored" data-anchor-id="check-for-bias"><span
 </div>
 </div>
 </div>
-<div id="ca869102" class="cell" data-execution_count="8">
+<div id="b1a10e00" class="cell" data-execution_count="8">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb12"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a><span class="co"># A common matplotlib/seaborn pattern: create the figure and axes object, pass ax</span></span>
@@ -663,17 +669,17 @@ <h4 data-number="9.3.3.2" class="anchored" data-anchor-id="check-for-bias"><span
 <section id="simple-random-sample" class="level4" data-number="9.3.3.3">
 <h4 data-number="9.3.3.3" class="anchored" data-anchor-id="simple-random-sample"><span class="header-section-number">9.3.3.3</span> Simple Random Sample</h4>
 <p>Suppose we took a simple random sample (SRS) of the same size as our retiree sample:</p>
-<div id="0306db67" class="cell" data-execution_count="9">
+<div id="90da5027" class="cell" data-execution_count="9">
 <div class="sourceCode cell-code" id="cb13"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a>n <span class="op">=</span> <span class="bu">len</span>(convenience_sample)</span>
 <span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a>random_sample <span class="op">=</span> movie.sample(n, replace <span class="op">=</span> <span class="va">False</span>) <span class="co">## By default, replace = False</span></span>
 <span id="cb13-3"><a href="#cb13-3" aria-hidden="true" tabindex="-1"></a>np.mean(random_sample[<span class="st">"barbie"</span>])</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="9">
-<pre><code>np.float64(0.5302952731805585)</code></pre>
+<pre><code>np.float64(0.5292351612149273)</code></pre>
 </div>
 </div>
 <p>This is very close to the actual vote of 0.5302792307692308!</p>
 <p>It turns out that we can get similar results with a <strong>much smaller sample size</strong>, say, 800:</p>
-<div id="8bff435d" class="cell" data-execution_count="10">
+<div id="91a76873" class="cell" data-execution_count="10">
 <div class="sourceCode cell-code" id="cb15"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a>n <span class="op">=</span> <span class="dv">800</span></span>
 <span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a>random_sample <span class="op">=</span> movie.sample(n, replace <span class="op">=</span> <span class="va">False</span>)</span>
 <span id="cb15-3"><a href="#cb15-3" aria-hidden="true" tabindex="-1"></a></span>
@@ -686,7 +692,7 @@ <h4 data-number="9.3.3.3" class="anchored" data-anchor-id="simple-random-sample"
 <span id="cb15-10"><a href="#cb15-10" aria-hidden="true" tabindex="-1"></a>Markdown(<span class="ss">f"**Actual** = </span><span class="sc">{</span>actual_barbie<span class="sc">:.4f}</span><span class="ss">, **Sample** = </span><span class="sc">{</span>sample_barbie<span class="sc">:.4f}</span><span class="ss">, "</span></span>
 <span id="cb15-11"><a href="#cb15-11" aria-hidden="true" tabindex="-1"></a>         <span class="ss">f"**Err** = </span><span class="sc">{</span><span class="dv">100</span><span class="op">*</span>err<span class="sc">:.2f}</span><span class="ss">%."</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display cell-output-markdown" data-execution_count="10">
-<p><strong>Actual</strong> = 0.5303, <strong>Sample</strong> = 0.5138, <strong>Err</strong> = 3.12%.</p>
+<p><strong>Actual</strong> = 0.5303, <strong>Sample</strong> = 0.5275, <strong>Err</strong> = 0.52%.</p>
 </div>
 </div>
 <p>We’ll learn how to choose this number when we (re)learn the Central Limit Theorem later in the semester.</p>
@@ -695,7 +701,7 @@ <h4 data-number="9.3.3.3" class="anchored" data-anchor-id="simple-random-sample"
 <h4 data-number="9.3.3.4" class="anchored" data-anchor-id="quantifying-chance-error"><span class="header-section-number">9.3.3.4</span> Quantifying Chance Error</h4>
 <p>In our SRS of size 800, what would be our chance error?</p>
 <p>Let’s simulate 1000 versions of taking the 800-sized SRS from before:</p>
-<div id="4a3613da" class="cell" data-execution_count="11">
+<div id="67b2a3a9" class="cell" data-execution_count="11">
 <div class="sourceCode cell-code" id="cb16"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a>nrep <span class="op">=</span> <span class="dv">1000</span>   <span class="co"># number of simulations</span></span>
 <span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a>n <span class="op">=</span> <span class="dv">800</span>       <span class="co"># size of our sample</span></span>
 <span id="cb16-3"><a href="#cb16-3" aria-hidden="true" tabindex="-1"></a>poll_result <span class="op">=</span> []</span>
@@ -703,7 +709,7 @@ <h4 data-number="9.3.3.4" class="anchored" data-anchor-id="quantifying-chance-er
 <span id="cb16-5"><a href="#cb16-5" aria-hidden="true" tabindex="-1"></a>    random_sample <span class="op">=</span> movie.sample(n, replace <span class="op">=</span> <span class="va">False</span>)</span>
 <span id="cb16-6"><a href="#cb16-6" aria-hidden="true" tabindex="-1"></a>    poll_result.append(np.mean(random_sample[<span class="st">"barbie"</span>]))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
-<div id="b364aba7" class="cell" data-execution_count="12">
+<div id="4a2b0364" class="cell" data-execution_count="12">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb17"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a>fig, ax <span class="op">=</span> plt.subplots()</span>
@@ -719,21 +725,21 @@ <h4 data-number="9.3.3.4" class="anchored" data-anchor-id="quantifying-chance-er
 <div class="cell-output cell-output-display">
 <div>
 <figure class="figure">
-<p><img src="sampling_files/figure-html/cell-13-output-2.png" width="605" height="421" class="figure-img"></p>
+<p><img src="sampling_files/figure-html/cell-13-output-2.png" width="608" height="421" class="figure-img"></p>
 </figure>
 </div>
 </div>
 </div>
 <p>What fraction of these simulated samples would have predicted Barbie?</p>
-<div id="6e7d82ec" class="cell" data-execution_count="13">
+<div id="5da00fdb" class="cell" data-execution_count="13">
 <div class="sourceCode cell-code" id="cb19"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a>poll_result <span class="op">=</span> pd.Series(poll_result)</span>
 <span id="cb19-2"><a href="#cb19-2" aria-hidden="true" tabindex="-1"></a>np.<span class="bu">sum</span>(poll_result <span class="op">&gt;</span> <span class="fl">0.5</span>)<span class="op">/</span><span class="dv">1000</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="13">
-<pre><code>np.float64(0.948)</code></pre>
+<pre><code>np.float64(0.963)</code></pre>
 </div>
 </div>
 <p>You can see the curve looks roughly Gaussian/normal. Using KDE:</p>
-<div id="40bf9a31" class="cell" data-execution_count="14">
+<div id="e220df03" class="cell" data-execution_count="14">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb21"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a>sns.histplot(poll_result, stat<span class="op">=</span><span class="st">'density'</span>, kde<span class="op">=</span><span class="va">True</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -747,7 +753,7 @@ <h4 data-number="9.3.3.4" class="anchored" data-anchor-id="quantifying-chance-er
 <div class="cell-output cell-output-display">
 <div>
 <figure class="figure">
-<p><img src="sampling_files/figure-html/cell-15-output-2.png" width="605" height="421" class="figure-img"></p>
+<p><img src="sampling_files/figure-html/cell-15-output-2.png" width="608" height="421" class="figure-img"></p>
 </figure>
 </div>
 </div>
diff --git a/docs/sampling/sampling_files/figure-html/cell-13-output-2.png b/docs/sampling/sampling_files/figure-html/cell-13-output-2.png
index c0e222686..c8a30facc 100644
Binary files a/docs/sampling/sampling_files/figure-html/cell-13-output-2.png and b/docs/sampling/sampling_files/figure-html/cell-13-output-2.png differ
diff --git a/docs/sampling/sampling_files/figure-html/cell-15-output-2.png b/docs/sampling/sampling_files/figure-html/cell-15-output-2.png
index ea4eeb846..9025c6508 100644
Binary files a/docs/sampling/sampling_files/figure-html/cell-15-output-2.png and b/docs/sampling/sampling_files/figure-html/cell-15-output-2.png differ
diff --git a/docs/search.json b/docs/search.json
index e8208779e..c82aa0ced 100644
--- a/docs/search.json
+++ b/docs/search.json
@@ -144,7 +144,7 @@
     "href": "pandas_2/pandas_2.html#useful-utility-functions",
     "title": "3  Pandas II",
     "section": "3.3 Useful Utility Functions",
-    "text": "3.3 Useful Utility Functions\npandas contains an extensive library of functions that can help shorten the process of setting and getting information from its data structures. In the following section, we will give overviews of each of the main utility functions that will help us in Data 100.\nDiscussing all functionality offered by pandas could take an entire semester! We will walk you through the most commonly-used functions and encourage you to explore and experiment on your own.\n\nNumPy and built-in function support\n.shape\n.size\n.describe()\n.sample()\n.value_counts()\n.unique()\n.sort_values()\n\nThe pandas documentation will be a valuable resource in Data 100 and beyond.\n\n3.3.1 NumPy\npandas is designed to work well with NumPy, the framework for array computations you encountered in Data 8. Just about any NumPy function can be applied to pandas DataFrames and Series.\n\n# Pull out the number of babies named Yash each year\nyash_count = babynames[babynames[\"Name\"] == \"Yash\"][\"Count\"]\nyash_count.head()\n\n331824     8\n334114     9\n336390    11\n338773    12\n341387    10\nName: Count, dtype: int64\n\n\n\n# Average number of babies named Yash each year\nnp.mean(yash_count)\n\nnp.float64(17.142857142857142)\n\n\n\n# Max number of babies named Yash born in any one year\nnp.max(yash_count)\n\nnp.int64(29)\n\n\n\n\n3.3.2 .shape and .size\n.shape and .size are attributes of Series and DataFrames that measure the “amount” of data stored in the structure. Calling .shape returns a tuple containing the number of rows and columns present in the DataFrame or Series. .size is used to find the total number of elements in a structure, equivalent to the number of rows times the number of columns.\nMany functions strictly require the dimensions of the arguments along certain axes to match. Calling these dimension-finding functions is much faster than counting all of the items by hand.\n\n# Return the shape of the DataFrame, in the format (num_rows, num_columns)\nbabynames.shape\n\n(407428, 5)\n\n\n\n# Return the size of the DataFrame, equal to num_rows * num_columns\nbabynames.size\n\n2037140\n\n\n\n\n3.3.3 .describe()\nIf many statistics are required from a DataFrame (minimum value, maximum value, mean value, etc.), then .describe() (documentation) can be used to compute all of them at once.\n\nbabynames.describe()\n\n\n\n\n\n\n\n\nYear\nCount\n\n\n\n\ncount\n407428.000000\n407428.000000\n\n\nmean\n1985.733609\n79.543456\n\n\nstd\n27.007660\n293.698654\n\n\nmin\n1910.000000\n5.000000\n\n\n25%\n1969.000000\n7.000000\n\n\n50%\n1992.000000\n13.000000\n\n\n75%\n2008.000000\n38.000000\n\n\nmax\n2022.000000\n8260.000000\n\n\n\n\n\n\n\nA different set of statistics will be reported if .describe() is called on a Series.\n\nbabynames[\"Sex\"].describe()\n\ncount     407428\nunique         2\ntop            F\nfreq      239537\nName: Sex, dtype: object\n\n\n\n\n3.3.4 .sample()\nAs we will see later in the semester, random processes are at the heart of many data science techniques (for example, train-test splits, bootstrapping, and cross-validation). .sample() (documentation) lets us quickly select random entries (a row if called from a DataFrame, or a value if called from a Series).\nBy default, .sample() selects entries without replacement. Pass in the argument replace=True to sample with replacement.\n\n# Sample a single row\nbabynames.sample()\n\n\n\n\n\n\n\n\nState\nSex\nYear\nName\nCount\n\n\n\n\n232915\nCA\nF\n2021\nKora\n44\n\n\n\n\n\n\n\nNaturally, this can be chained with other methods and operators (iloc, etc.).\n\n# Sample 5 random rows, and select all columns after column 2\nbabynames.sample(5).iloc[:, 2:]\n\n\n\n\n\n\n\n\nYear\nName\nCount\n\n\n\n\n181153\n2008\nLeila\n302\n\n\n224157\n2018\nDeema\n6\n\n\n322241\n1991\nJayshawn\n5\n\n\n355722\n2005\nRoger\n99\n\n\n182725\n2008\nMarielle\n16\n\n\n\n\n\n\n\n\n# Randomly sample 4 names from the year 2000, with replacement, and select all columns after column 2\nbabynames[babynames[\"Year\"] == 2000].sample(4, replace = True).iloc[:, 2:]\n\n\n\n\n\n\n\n\nYear\nName\nCount\n\n\n\n\n152643\n2000\nMazie\n5\n\n\n150164\n2000\nArriana\n25\n\n\n343764\n2000\nMyron\n11\n\n\n150136\n2000\nBrielle\n26\n\n\n\n\n\n\n\n\n\n3.3.5 .value_counts()\nThe Series.value_counts() (documentation) method counts the number of occurrence of each unique value in a Series. In other words, it counts the number of times each unique value appears. This is often useful for determining the most or least common entries in a Series.\nIn the example below, we can determine the name with the most years in which at least one person has taken that name by counting the number of times each name appears in the \"Name\" column of babynames. Note that the return value is also a Series.\n\nbabynames[\"Name\"].value_counts().head()\n\nName\nJean         223\nFrancis      221\nGuadalupe    218\nJessie       217\nMarion       214\nName: count, dtype: int64\n\n\n\n\n3.3.6 .unique()\nIf we have a Series with many repeated values, then .unique() (documentation) can be used to identify only the unique values. Here we return an array of all the names in babynames.\n\nbabynames[\"Name\"].unique()\n\narray(['Mary', 'Helen', 'Dorothy', ..., 'Zae', 'Zai', 'Zayvier'],\n      dtype=object)\n\n\n\n\n3.3.7 .sort_values()\nOrdering a DataFrame can be useful for isolating extreme values. For example, the first 5 entries of a row sorted in descending order (that is, from highest to lowest) are the largest 5 values. .sort_values (documentation) allows us to order a DataFrame or Series by a specified column. We can choose to either receive the rows in ascending order (default) or descending order.\n\n# Sort the \"Count\" column from highest to lowest\nbabynames.sort_values(by=\"Count\", ascending=False).head()\n\n\n\n\n\n\n\n\nState\nSex\nYear\nName\nCount\n\n\n\n\n268041\nCA\nM\n1957\nMichael\n8260\n\n\n267017\nCA\nM\n1956\nMichael\n8258\n\n\n317387\nCA\nM\n1990\nMichael\n8246\n\n\n281850\nCA\nM\n1969\nMichael\n8245\n\n\n283146\nCA\nM\n1970\nMichael\n8196\n\n\n\n\n\n\n\nUnlike when calling .value_counts() on a DataFrame, we do not need to explicitly specify the column used for sorting when calling .value_counts() on a Series. We can still specify the ordering paradigm – that is, whether values are sorted in ascending or descending order.\n\n# Sort the \"Name\" Series alphabetically\nbabynames[\"Name\"].sort_values(ascending=True).head()\n\n366001      Aadan\n384005      Aadan\n369120      Aadan\n398211    Aadarsh\n370306      Aaden\nName: Name, dtype: object",
+    "text": "3.3 Useful Utility Functions\npandas contains an extensive library of functions that can help shorten the process of setting and getting information from its data structures. In the following section, we will give overviews of each of the main utility functions that will help us in Data 100.\nDiscussing all functionality offered by pandas could take an entire semester! We will walk you through the most commonly-used functions and encourage you to explore and experiment on your own.\n\nNumPy and built-in function support\n.shape\n.size\n.describe()\n.sample()\n.value_counts()\n.unique()\n.sort_values()\n\nThe pandas documentation will be a valuable resource in Data 100 and beyond.\n\n3.3.1 NumPy\npandas is designed to work well with NumPy, the framework for array computations you encountered in Data 8. Just about any NumPy function can be applied to pandas DataFrames and Series.\n\n# Pull out the number of babies named Yash each year\nyash_count = babynames[babynames[\"Name\"] == \"Yash\"][\"Count\"]\nyash_count.head()\n\n331824     8\n334114     9\n336390    11\n338773    12\n341387    10\nName: Count, dtype: int64\n\n\n\n# Average number of babies named Yash each year\nnp.mean(yash_count)\n\nnp.float64(17.142857142857142)\n\n\n\n# Max number of babies named Yash born in any one year\nnp.max(yash_count)\n\nnp.int64(29)\n\n\n\n\n3.3.2 .shape and .size\n.shape and .size are attributes of Series and DataFrames that measure the “amount” of data stored in the structure. Calling .shape returns a tuple containing the number of rows and columns present in the DataFrame or Series. .size is used to find the total number of elements in a structure, equivalent to the number of rows times the number of columns.\nMany functions strictly require the dimensions of the arguments along certain axes to match. Calling these dimension-finding functions is much faster than counting all of the items by hand.\n\n# Return the shape of the DataFrame, in the format (num_rows, num_columns)\nbabynames.shape\n\n(407428, 5)\n\n\n\n# Return the size of the DataFrame, equal to num_rows * num_columns\nbabynames.size\n\n2037140\n\n\n\n\n3.3.3 .describe()\nIf many statistics are required from a DataFrame (minimum value, maximum value, mean value, etc.), then .describe() (documentation) can be used to compute all of them at once.\n\nbabynames.describe()\n\n\n\n\n\n\n\n\nYear\nCount\n\n\n\n\ncount\n407428.000000\n407428.000000\n\n\nmean\n1985.733609\n79.543456\n\n\nstd\n27.007660\n293.698654\n\n\nmin\n1910.000000\n5.000000\n\n\n25%\n1969.000000\n7.000000\n\n\n50%\n1992.000000\n13.000000\n\n\n75%\n2008.000000\n38.000000\n\n\nmax\n2022.000000\n8260.000000\n\n\n\n\n\n\n\nA different set of statistics will be reported if .describe() is called on a Series.\n\nbabynames[\"Sex\"].describe()\n\ncount     407428\nunique         2\ntop            F\nfreq      239537\nName: Sex, dtype: object\n\n\n\n\n3.3.4 .sample()\nAs we will see later in the semester, random processes are at the heart of many data science techniques (for example, train-test splits, bootstrapping, and cross-validation). .sample() (documentation) lets us quickly select random entries (a row if called from a DataFrame, or a value if called from a Series).\nBy default, .sample() selects entries without replacement. Pass in the argument replace=True to sample with replacement.\n\n# Sample a single row\nbabynames.sample()\n\n\n\n\n\n\n\n\nState\nSex\nYear\nName\nCount\n\n\n\n\n89604\nCA\nF\n1982\nShara\n17\n\n\n\n\n\n\n\nNaturally, this can be chained with other methods and operators (iloc, etc.).\n\n# Sample 5 random rows, and select all columns after column 2\nbabynames.sample(5).iloc[:, 2:]\n\n\n\n\n\n\n\n\nYear\nName\nCount\n\n\n\n\n395124\n2018\nLucius\n8\n\n\n298347\n1980\nHassan\n15\n\n\n349813\n2002\nFarid\n5\n\n\n281344\n1968\nTimmothy\n12\n\n\n98005\n1985\nShoshana\n15\n\n\n\n\n\n\n\n\n# Randomly sample 4 names from the year 2000, with replacement, and select all columns after column 2\nbabynames[babynames[\"Year\"] == 2000].sample(4, replace = True).iloc[:, 2:]\n\n\n\n\n\n\n\n\nYear\nName\nCount\n\n\n\n\n151266\n2000\nChyenne\n9\n\n\n151767\n2000\nJizelle\n7\n\n\n149793\n2000\nAlia\n45\n\n\n151519\n2000\nJeanine\n8\n\n\n\n\n\n\n\n\n\n3.3.5 .value_counts()\nThe Series.value_counts() (documentation) method counts the number of occurrence of each unique value in a Series. In other words, it counts the number of times each unique value appears. This is often useful for determining the most or least common entries in a Series.\nIn the example below, we can determine the name with the most years in which at least one person has taken that name by counting the number of times each name appears in the \"Name\" column of babynames. Note that the return value is also a Series.\n\nbabynames[\"Name\"].value_counts().head()\n\nName\nJean         223\nFrancis      221\nGuadalupe    218\nJessie       217\nMarion       214\nName: count, dtype: int64\n\n\n\n\n3.3.6 .unique()\nIf we have a Series with many repeated values, then .unique() (documentation) can be used to identify only the unique values. Here we return an array of all the names in babynames.\n\nbabynames[\"Name\"].unique()\n\narray(['Mary', 'Helen', 'Dorothy', ..., 'Zae', 'Zai', 'Zayvier'],\n      dtype=object)\n\n\n\n\n3.3.7 .sort_values()\nOrdering a DataFrame can be useful for isolating extreme values. For example, the first 5 entries of a row sorted in descending order (that is, from highest to lowest) are the largest 5 values. .sort_values (documentation) allows us to order a DataFrame or Series by a specified column. We can choose to either receive the rows in ascending order (default) or descending order.\n\n# Sort the \"Count\" column from highest to lowest\nbabynames.sort_values(by=\"Count\", ascending=False).head()\n\n\n\n\n\n\n\n\nState\nSex\nYear\nName\nCount\n\n\n\n\n268041\nCA\nM\n1957\nMichael\n8260\n\n\n267017\nCA\nM\n1956\nMichael\n8258\n\n\n317387\nCA\nM\n1990\nMichael\n8246\n\n\n281850\nCA\nM\n1969\nMichael\n8245\n\n\n283146\nCA\nM\n1970\nMichael\n8196\n\n\n\n\n\n\n\nUnlike when calling .value_counts() on a DataFrame, we do not need to explicitly specify the column used for sorting when calling .value_counts() on a Series. We can still specify the ordering paradigm – that is, whether values are sorted in ascending or descending order.\n\n# Sort the \"Name\" Series alphabetically\nbabynames[\"Name\"].sort_values(ascending=True).head()\n\n366001      Aadan\n384005      Aadan\n369120      Aadan\n398211    Aadarsh\n370306      Aaden\nName: Name, dtype: object",
     "crumbs": [
       "<span class='chapter-number'>3</span>  <span class='chapter-title'>Pandas II</span>"
     ]
@@ -184,7 +184,7 @@
     "href": "pandas_3/pandas_3.html#aggregating-data-with-.groupby",
     "title": "4  Pandas III",
     "section": "4.2 Aggregating Data with .groupby",
-    "text": "4.2 Aggregating Data with .groupby\nUp until this point, we have been working with individual rows of DataFrames. As data scientists, we often wish to investigate trends across a larger subset of our data. For example, we may want to compute some summary statistic (the mean, median, sum, etc.) for a group of rows in our DataFrame. To do this, we’ll use pandas GroupBy objects. Our goal is to group together rows that fall under the same category and perform an operation that aggregates across all rows in the category.\nLet’s say we wanted to aggregate all rows in babynames for a given year.\n\nbabynames.groupby(\"Year\")\n\n&lt;pandas.core.groupby.generic.DataFrameGroupBy object at 0x1100a06b0&gt;\n\n\nWhat does this strange output mean? Calling .groupby (documentation) has generated a GroupBy object. You can imagine this as a set of “mini” sub-DataFrames, where each subframe contains all of the rows from babynames that correspond to a particular year.\nThe diagram below shows a simplified view of babynames to help illustrate this idea.\n\n\n\nWe can’t work with a GroupBy object directly – that is why you saw that strange output earlier rather than a standard view of a DataFrame. To actually manipulate values within these “mini” DataFrames, we’ll need to call an aggregation method. This is a method that tells pandas how to aggregate the values within the GroupBy object. Once the aggregation is applied, pandas will return a normal (now grouped) DataFrame.\nThe first aggregation method we’ll consider is .agg. The .agg method takes in a function as its argument; this function is then applied to each column of a “mini” grouped DataFrame. We end up with a new DataFrame with one aggregated row per subframe. Let’s see this in action by finding the sum of all counts for each year in babynames – this is equivalent to finding the number of babies born in each year.\n\nbabynames[[\"Year\", \"Count\"]].groupby(\"Year\").agg(\"sum\").head(5)\n\n\n\n\n\n\n\n\nCount\n\n\nYear\n\n\n\n\n\n1910\n9163\n\n\n1911\n9983\n\n\n1912\n17946\n\n\n1913\n22094\n\n\n1914\n26926\n\n\n\n\n\n\n\nWe can relate this back to the diagram we used above. Remember that the diagram uses a simplified version of babynames, which is why we see smaller values for the summed counts.\n\n\n\nPerforming an aggregation\n\n\nCalling .agg has condensed each subframe back into a single row. This gives us our final output: a DataFrame that is now indexed by \"Year\", with a single row for each unique year in the original babynames DataFrame.\nThere are many different aggregation functions we can use, all of which are useful in different applications.\n\nbabynames[[\"Year\", \"Count\"]].groupby(\"Year\").agg(\"min\").head(5)\n\n\n\n\n\n\n\n\nCount\n\n\nYear\n\n\n\n\n\n1910\n5\n\n\n1911\n5\n\n\n1912\n5\n\n\n1913\n5\n\n\n1914\n5\n\n\n\n\n\n\n\n\nbabynames[[\"Year\", \"Count\"]].groupby(\"Year\").agg(\"max\").head(5)\n\n\n\n\n\n\n\n\nCount\n\n\nYear\n\n\n\n\n\n1910\n295\n\n\n1911\n390\n\n\n1912\n534\n\n\n1913\n614\n\n\n1914\n773\n\n\n\n\n\n\n\n\n# Same result, but now we explicitly tell pandas to only consider the \"Count\" column when summing\nbabynames.groupby(\"Year\")[[\"Count\"]].agg(\"sum\").head(5)\n\n\n\n\n\n\n\n\nCount\n\n\nYear\n\n\n\n\n\n1910\n9163\n\n\n1911\n9983\n\n\n1912\n17946\n\n\n1913\n22094\n\n\n1914\n26926\n\n\n\n\n\n\n\nThere are many different aggregations that can be applied to the grouped data. The primary requirement is that an aggregation function must:\n\nTake in a Series of data (a single column of the grouped subframe).\nReturn a single value that aggregates this Series.\n\n\n4.2.1 Aggregation Functions\nBecause of this fairly broad requirement, pandas offers many ways of computing an aggregation.\nIn-built Python operations – such as sum, max, and min – are automatically recognized by pandas.\n\n# What is the minimum count for each name in any year?\nbabynames.groupby(\"Name\")[[\"Count\"]].agg(\"min\").head()\n\n\n\n\n\n\n\n\nCount\n\n\nName\n\n\n\n\n\nAadan\n5\n\n\nAadarsh\n6\n\n\nAaden\n10\n\n\nAadhav\n6\n\n\nAadhini\n6\n\n\n\n\n\n\n\n\n# What is the largest single-year count of each name?\nbabynames.groupby(\"Name\")[[\"Count\"]].agg(\"max\").head()\n\n\n\n\n\n\n\n\nCount\n\n\nName\n\n\n\n\n\nAadan\n7\n\n\nAadarsh\n6\n\n\nAaden\n158\n\n\nAadhav\n8\n\n\nAadhini\n6\n\n\n\n\n\n\n\nAs mentioned previously, functions from the NumPy library, such as np.mean, np.max, np.min, and np.sum, are also fair game in pandas.\n\n# What is the average count for each name across all years?\nbabynames.groupby(\"Name\")[[\"Count\"]].agg(\"mean\").head()\n\n\n\n\n\n\n\n\nCount\n\n\nName\n\n\n\n\n\nAadan\n6.000000\n\n\nAadarsh\n6.000000\n\n\nAaden\n46.214286\n\n\nAadhav\n6.750000\n\n\nAadhini\n6.000000\n\n\n\n\n\n\n\npandas also offers a number of in-built functions. Functions that are native to pandas can be referenced using their string name within a call to .agg. Some examples include:\n\n.agg(\"sum\")\n.agg(\"max\")\n.agg(\"min\")\n.agg(\"mean\")\n.agg(\"first\")\n.agg(\"last\")\n\nThe latter two entries in this list – \"first\" and \"last\" – are unique to pandas. They return the first or last entry in a subframe column. Why might this be useful? Consider a case where multiple columns in a group share identical information. To represent this information in the grouped output, we can simply grab the first or last entry, which we know will be identical to all other entries.\nLet’s illustrate this with an example. Say we add a new column to babynames that contains the first letter of each name.\n\n# Imagine we had an additional column, \"First Letter\". We'll explain this code next week\nbabynames[\"First Letter\"] = babynames[\"Name\"].str[0]\n\n# We construct a simplified DataFrame containing just a subset of columns\nbabynames_new = babynames[[\"Name\", \"First Letter\", \"Year\"]]\nbabynames_new.head()\n\n\n\n\n\n\n\n\nName\nFirst Letter\nYear\n\n\n\n\n115957\nDeandrea\nD\n1990\n\n\n101976\nDeandrea\nD\n1986\n\n\n131029\nLeandrea\nL\n1994\n\n\n108731\nDeandrea\nD\n1988\n\n\n308131\nDeandrea\nD\n1985\n\n\n\n\n\n\n\nIf we form groups for each name in the dataset, \"First Letter\" will be the same for all members of the group. This means that if we simply select the first entry for \"First Letter\" in the group, we’ll represent all data in that group.\nWe can use a dictionary to apply different aggregation functions to each column during grouping.\n\n\n\nAggregating using “first”\n\n\n\nbabynames_new.groupby(\"Name\").agg({\"First Letter\":\"first\", \"Year\":\"max\"}).head()\n\n\n\n\n\n\n\n\nFirst Letter\nYear\n\n\nName\n\n\n\n\n\n\nAadan\nA\n2014\n\n\nAadarsh\nA\n2019\n\n\nAaden\nA\n2020\n\n\nAadhav\nA\n2019\n\n\nAadhini\nA\n2022\n\n\n\n\n\n\n\n\n\n4.2.2 Plotting Birth Counts\nLet’s use .agg to find the total number of babies born in each year. Recall that using .agg with .groupby() follows the format: df.groupby(column_name).agg(aggregation_function). The line of code below gives us the total number of babies born in each year.\n\n\nCode\nbabynames.groupby(\"Year\")[[\"Count\"]].agg(sum).head(5)\n# Alternative 1\n# babynames.groupby(\"Year\")[[\"Count\"]].sum()\n# Alternative 2\n# babynames.groupby(\"Year\").sum(numeric_only=True)\n\n\n/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_60475/390646742.py:1: FutureWarning:\n\nThe provided callable &lt;built-in function sum&gt; is currently using DataFrameGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"sum\" instead.\n\n\n\n\n\n\n\n\n\n\nCount\n\n\nYear\n\n\n\n\n\n1910\n9163\n\n\n1911\n9983\n\n\n1912\n17946\n\n\n1913\n22094\n\n\n1914\n26926\n\n\n\n\n\n\n\nHere’s an illustration of the process:\n\nPlotting the Dataframe we obtain tells an interesting story.\n\n\nCode\nimport plotly.express as px\npuzzle2 = babynames.groupby(\"Year\")[[\"Count\"]].agg(\"sum\")\npx.line(puzzle2, y = \"Count\")\n\n\n                                                \n\n\nA word of warning: we made an enormous assumption when we decided to use this dataset to estimate birth rate. According to this article from the Legistlative Analyst Office, the true number of babies born in California in 2020 was 421,275. However, our plot shows 362,882 babies —— what happened?\n\n\n4.2.3 Summary of the .groupby() Function\nA groupby operation involves some combination of splitting a DataFrame into grouped subframes, applying a function, and combining the results.\nFor some arbitrary DataFrame df below, the code df.groupby(\"year\").agg(sum) does the following:\n\nSplits the DataFrame into sub-DataFrames with rows belonging to the same year.\nApplies the sum function to each column of each sub-DataFrame.\nCombines the results of sum into a single DataFrame, indexed by year.\n\n\n\n\n4.2.4 Revisiting the .agg() Function\n.agg() can take in any function that aggregates several values into one summary value. Some commonly-used aggregation functions can even be called directly, without explicit use of .agg(). For example, we can call .mean() on .groupby():\nbabynames.groupby(\"Year\").mean().head()\nWe can now put this all into practice. Say we want to find the baby name with sex “F” that has fallen in popularity the most in California. To calculate this, we can first create a metric: “Ratio to Peak” (RTP). The RTP is the ratio of babies born with a given name in 2022 to the maximum number of babies born with the name in any year.\nLet’s start with calculating this for one baby, “Jennifer”.\n\n# We filter by babies with sex \"F\" and sort by \"Year\"\nf_babynames = babynames[babynames[\"Sex\"] == \"F\"]\nf_babynames = f_babynames.sort_values([\"Year\"])\n\n# Determine how many Jennifers were born in CA per year\njenn_counts_series = f_babynames[f_babynames[\"Name\"] == \"Jennifer\"][\"Count\"]\n\n# Determine the max number of Jennifers born in a year and the number born in 2022 \n# to calculate RTP\nmax_jenn = max(f_babynames[f_babynames[\"Name\"] == \"Jennifer\"][\"Count\"])\ncurr_jenn = f_babynames[f_babynames[\"Name\"] == \"Jennifer\"][\"Count\"].iloc[-1]\nrtp = curr_jenn / max_jenn\nrtp\n\nnp.float64(0.018796372629843364)\n\n\nBy creating a function to calculate RTP and applying it to our DataFrame by using .groupby(), we can easily compute the RTP for all names at once!\n\ndef ratio_to_peak(series):\n    return series.iloc[-1] / max(series)\n\n#Using .groupby() to apply the function\nrtp_table = f_babynames.groupby(\"Name\")[[\"Year\", \"Count\"]].agg(ratio_to_peak)\nrtp_table.head()\n\n\n\n\n\n\n\n\nYear\nCount\n\n\nName\n\n\n\n\n\n\nAadhini\n1.0\n1.000000\n\n\nAadhira\n1.0\n0.500000\n\n\nAadhya\n1.0\n0.660000\n\n\nAadya\n1.0\n0.586207\n\n\nAahana\n1.0\n0.269231\n\n\n\n\n\n\n\nIn the rows shown above, we can see that every row shown has a Year value of 1.0.\nThis is the “pandas-ification” of logic you saw in Data 8. Much of the logic you’ve learned in Data 8 will serve you well in Data 100.\n\n\n4.2.5 Nuisance Columns\nNote that you must be careful with which columns you apply the .agg() function to. If we were to apply our function to the table as a whole by doing f_babynames.groupby(\"Name\").agg(ratio_to_peak), executing our .agg() call would result in a TypeError.\n\nWe can avoid this issue (and prevent unintentional loss of data) by explicitly selecting column(s) we want to apply our aggregation function to BEFORE calling .agg(),\n\n\n4.2.6 Renaming Columns After Grouping\nBy default, .groupby will not rename any aggregated columns. As we can see in the table above, the aggregated column is still named Count even though it now represents the RTP. For better readability, we can rename Count to Count RTP\n\nrtp_table = rtp_table.rename(columns = {\"Count\": \"Count RTP\"})\nrtp_table\n\n\n\n\n\n\n\n\nYear\nCount RTP\n\n\nName\n\n\n\n\n\n\nAadhini\n1.0\n1.000000\n\n\nAadhira\n1.0\n0.500000\n\n\nAadhya\n1.0\n0.660000\n\n\nAadya\n1.0\n0.586207\n\n\nAahana\n1.0\n0.269231\n\n\n...\n...\n...\n\n\nZyanya\n1.0\n0.466667\n\n\nZyla\n1.0\n1.000000\n\n\nZylah\n1.0\n1.000000\n\n\nZyra\n1.0\n1.000000\n\n\nZyrah\n1.0\n0.833333\n\n\n\n\n13782 rows × 2 columns\n\n\n\n\n\n4.2.7 Some Data Science Payoff\nBy sorting rtp_table, we can see the names whose popularity has decreased the most.\n\nrtp_table = rtp_table.rename(columns = {\"Count\": \"Count RTP\"})\nrtp_table.sort_values(\"Count RTP\").head()\n\n\n\n\n\n\n\n\nYear\nCount RTP\n\n\nName\n\n\n\n\n\n\nDebra\n1.0\n0.001260\n\n\nDebbie\n1.0\n0.002815\n\n\nCarol\n1.0\n0.003180\n\n\nTammy\n1.0\n0.003249\n\n\nSusan\n1.0\n0.003305\n\n\n\n\n\n\n\nTo visualize the above DataFrame, let’s look at the line plot below:\n\n\nCode\nimport plotly.express as px\npx.line(f_babynames[f_babynames[\"Name\"] == \"Debra\"], x = \"Year\", y = \"Count\")\n\n\n                                                \n\n\nWe can get the list of the top 10 names and then plot popularity with the following code:\n\ntop10 = rtp_table.sort_values(\"Count RTP\").head(10).index\npx.line(\n    f_babynames[f_babynames[\"Name\"].isin(top10)], \n    x = \"Year\", \n    y = \"Count\", \n    color = \"Name\"\n)\n\n/Users/nikhilreddy/course-notes/ds100env/lib/python3.12/site-packages/plotly/express/_core.py:1980: FutureWarning:\n\nWhen grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.\n\n\n\n                                                \n\n\nAs a quick exercise, consider what code would compute the total number of babies with each name.\n\n\nCode\nbabynames.groupby(\"Name\")[[\"Count\"]].agg(\"sum\").head()\n# alternative solution: \n# babynames.groupby(\"Name\")[[\"Count\"]].sum()\n\n\n\n\n\n\n\n\n\nCount\n\n\nName\n\n\n\n\n\nAadan\n18\n\n\nAadarsh\n6\n\n\nAaden\n647\n\n\nAadhav\n27\n\n\nAadhini\n6",
+    "text": "4.2 Aggregating Data with .groupby\nUp until this point, we have been working with individual rows of DataFrames. As data scientists, we often wish to investigate trends across a larger subset of our data. For example, we may want to compute some summary statistic (the mean, median, sum, etc.) for a group of rows in our DataFrame. To do this, we’ll use pandas GroupBy objects. Our goal is to group together rows that fall under the same category and perform an operation that aggregates across all rows in the category.\nLet’s say we wanted to aggregate all rows in babynames for a given year.\n\nbabynames.groupby(\"Year\")\n\n&lt;pandas.core.groupby.generic.DataFrameGroupBy object at 0x10f5ad9d0&gt;\n\n\nWhat does this strange output mean? Calling .groupby (documentation) has generated a GroupBy object. You can imagine this as a set of “mini” sub-DataFrames, where each subframe contains all of the rows from babynames that correspond to a particular year.\nThe diagram below shows a simplified view of babynames to help illustrate this idea.\n\n\n\nWe can’t work with a GroupBy object directly – that is why you saw that strange output earlier rather than a standard view of a DataFrame. To actually manipulate values within these “mini” DataFrames, we’ll need to call an aggregation method. This is a method that tells pandas how to aggregate the values within the GroupBy object. Once the aggregation is applied, pandas will return a normal (now grouped) DataFrame.\nThe first aggregation method we’ll consider is .agg. The .agg method takes in a function as its argument; this function is then applied to each column of a “mini” grouped DataFrame. We end up with a new DataFrame with one aggregated row per subframe. Let’s see this in action by finding the sum of all counts for each year in babynames – this is equivalent to finding the number of babies born in each year.\n\nbabynames[[\"Year\", \"Count\"]].groupby(\"Year\").agg(\"sum\").head(5)\n\n\n\n\n\n\n\n\nCount\n\n\nYear\n\n\n\n\n\n1910\n9163\n\n\n1911\n9983\n\n\n1912\n17946\n\n\n1913\n22094\n\n\n1914\n26926\n\n\n\n\n\n\n\nWe can relate this back to the diagram we used above. Remember that the diagram uses a simplified version of babynames, which is why we see smaller values for the summed counts.\n\n\n\nPerforming an aggregation\n\n\nCalling .agg has condensed each subframe back into a single row. This gives us our final output: a DataFrame that is now indexed by \"Year\", with a single row for each unique year in the original babynames DataFrame.\nThere are many different aggregation functions we can use, all of which are useful in different applications.\n\nbabynames[[\"Year\", \"Count\"]].groupby(\"Year\").agg(\"min\").head(5)\n\n\n\n\n\n\n\n\nCount\n\n\nYear\n\n\n\n\n\n1910\n5\n\n\n1911\n5\n\n\n1912\n5\n\n\n1913\n5\n\n\n1914\n5\n\n\n\n\n\n\n\n\nbabynames[[\"Year\", \"Count\"]].groupby(\"Year\").agg(\"max\").head(5)\n\n\n\n\n\n\n\n\nCount\n\n\nYear\n\n\n\n\n\n1910\n295\n\n\n1911\n390\n\n\n1912\n534\n\n\n1913\n614\n\n\n1914\n773\n\n\n\n\n\n\n\n\n# Same result, but now we explicitly tell pandas to only consider the \"Count\" column when summing\nbabynames.groupby(\"Year\")[[\"Count\"]].agg(\"sum\").head(5)\n\n\n\n\n\n\n\n\nCount\n\n\nYear\n\n\n\n\n\n1910\n9163\n\n\n1911\n9983\n\n\n1912\n17946\n\n\n1913\n22094\n\n\n1914\n26926\n\n\n\n\n\n\n\nThere are many different aggregations that can be applied to the grouped data. The primary requirement is that an aggregation function must:\n\nTake in a Series of data (a single column of the grouped subframe).\nReturn a single value that aggregates this Series.\n\n\n4.2.1 Aggregation Functions\nBecause of this fairly broad requirement, pandas offers many ways of computing an aggregation.\nIn-built Python operations – such as sum, max, and min – are automatically recognized by pandas.\n\n# What is the minimum count for each name in any year?\nbabynames.groupby(\"Name\")[[\"Count\"]].agg(\"min\").head()\n\n\n\n\n\n\n\n\nCount\n\n\nName\n\n\n\n\n\nAadan\n5\n\n\nAadarsh\n6\n\n\nAaden\n10\n\n\nAadhav\n6\n\n\nAadhini\n6\n\n\n\n\n\n\n\n\n# What is the largest single-year count of each name?\nbabynames.groupby(\"Name\")[[\"Count\"]].agg(\"max\").head()\n\n\n\n\n\n\n\n\nCount\n\n\nName\n\n\n\n\n\nAadan\n7\n\n\nAadarsh\n6\n\n\nAaden\n158\n\n\nAadhav\n8\n\n\nAadhini\n6\n\n\n\n\n\n\n\nAs mentioned previously, functions from the NumPy library, such as np.mean, np.max, np.min, and np.sum, are also fair game in pandas.\n\n# What is the average count for each name across all years?\nbabynames.groupby(\"Name\")[[\"Count\"]].agg(\"mean\").head()\n\n\n\n\n\n\n\n\nCount\n\n\nName\n\n\n\n\n\nAadan\n6.000000\n\n\nAadarsh\n6.000000\n\n\nAaden\n46.214286\n\n\nAadhav\n6.750000\n\n\nAadhini\n6.000000\n\n\n\n\n\n\n\npandas also offers a number of in-built functions. Functions that are native to pandas can be referenced using their string name within a call to .agg. Some examples include:\n\n.agg(\"sum\")\n.agg(\"max\")\n.agg(\"min\")\n.agg(\"mean\")\n.agg(\"first\")\n.agg(\"last\")\n\nThe latter two entries in this list – \"first\" and \"last\" – are unique to pandas. They return the first or last entry in a subframe column. Why might this be useful? Consider a case where multiple columns in a group share identical information. To represent this information in the grouped output, we can simply grab the first or last entry, which we know will be identical to all other entries.\nLet’s illustrate this with an example. Say we add a new column to babynames that contains the first letter of each name.\n\n# Imagine we had an additional column, \"First Letter\". We'll explain this code next week\nbabynames[\"First Letter\"] = babynames[\"Name\"].str[0]\n\n# We construct a simplified DataFrame containing just a subset of columns\nbabynames_new = babynames[[\"Name\", \"First Letter\", \"Year\"]]\nbabynames_new.head()\n\n\n\n\n\n\n\n\nName\nFirst Letter\nYear\n\n\n\n\n115957\nDeandrea\nD\n1990\n\n\n101976\nDeandrea\nD\n1986\n\n\n131029\nLeandrea\nL\n1994\n\n\n108731\nDeandrea\nD\n1988\n\n\n308131\nDeandrea\nD\n1985\n\n\n\n\n\n\n\nIf we form groups for each name in the dataset, \"First Letter\" will be the same for all members of the group. This means that if we simply select the first entry for \"First Letter\" in the group, we’ll represent all data in that group.\nWe can use a dictionary to apply different aggregation functions to each column during grouping.\n\n\n\nAggregating using “first”\n\n\n\nbabynames_new.groupby(\"Name\").agg({\"First Letter\":\"first\", \"Year\":\"max\"}).head()\n\n\n\n\n\n\n\n\nFirst Letter\nYear\n\n\nName\n\n\n\n\n\n\nAadan\nA\n2014\n\n\nAadarsh\nA\n2019\n\n\nAaden\nA\n2020\n\n\nAadhav\nA\n2019\n\n\nAadhini\nA\n2022\n\n\n\n\n\n\n\n\n\n4.2.2 Plotting Birth Counts\nLet’s use .agg to find the total number of babies born in each year. Recall that using .agg with .groupby() follows the format: df.groupby(column_name).agg(aggregation_function). The line of code below gives us the total number of babies born in each year.\n\n\nCode\nbabynames.groupby(\"Year\")[[\"Count\"]].agg(sum).head(5)\n# Alternative 1\n# babynames.groupby(\"Year\")[[\"Count\"]].sum()\n# Alternative 2\n# babynames.groupby(\"Year\").sum(numeric_only=True)\n\n\n/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_99025/390646742.py:1: FutureWarning:\n\nThe provided callable &lt;built-in function sum&gt; is currently using DataFrameGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"sum\" instead.\n\n\n\n\n\n\n\n\n\n\nCount\n\n\nYear\n\n\n\n\n\n1910\n9163\n\n\n1911\n9983\n\n\n1912\n17946\n\n\n1913\n22094\n\n\n1914\n26926\n\n\n\n\n\n\n\nHere’s an illustration of the process:\n\nPlotting the Dataframe we obtain tells an interesting story.\n\n\nCode\nimport plotly.express as px\npuzzle2 = babynames.groupby(\"Year\")[[\"Count\"]].agg(\"sum\")\npx.line(puzzle2, y = \"Count\")\n\n\n                                                \n\n\nA word of warning: we made an enormous assumption when we decided to use this dataset to estimate birth rate. According to this article from the Legistlative Analyst Office, the true number of babies born in California in 2020 was 421,275. However, our plot shows 362,882 babies —— what happened?\n\n\n4.2.3 Summary of the .groupby() Function\nA groupby operation involves some combination of splitting a DataFrame into grouped subframes, applying a function, and combining the results.\nFor some arbitrary DataFrame df below, the code df.groupby(\"year\").agg(sum) does the following:\n\nSplits the DataFrame into sub-DataFrames with rows belonging to the same year.\nApplies the sum function to each column of each sub-DataFrame.\nCombines the results of sum into a single DataFrame, indexed by year.\n\n\n\n\n4.2.4 Revisiting the .agg() Function\n.agg() can take in any function that aggregates several values into one summary value. Some commonly-used aggregation functions can even be called directly, without explicit use of .agg(). For example, we can call .mean() on .groupby():\nbabynames.groupby(\"Year\").mean().head()\nWe can now put this all into practice. Say we want to find the baby name with sex “F” that has fallen in popularity the most in California. To calculate this, we can first create a metric: “Ratio to Peak” (RTP). The RTP is the ratio of babies born with a given name in 2022 to the maximum number of babies born with the name in any year.\nLet’s start with calculating this for one baby, “Jennifer”.\n\n# We filter by babies with sex \"F\" and sort by \"Year\"\nf_babynames = babynames[babynames[\"Sex\"] == \"F\"]\nf_babynames = f_babynames.sort_values([\"Year\"])\n\n# Determine how many Jennifers were born in CA per year\njenn_counts_series = f_babynames[f_babynames[\"Name\"] == \"Jennifer\"][\"Count\"]\n\n# Determine the max number of Jennifers born in a year and the number born in 2022 \n# to calculate RTP\nmax_jenn = max(f_babynames[f_babynames[\"Name\"] == \"Jennifer\"][\"Count\"])\ncurr_jenn = f_babynames[f_babynames[\"Name\"] == \"Jennifer\"][\"Count\"].iloc[-1]\nrtp = curr_jenn / max_jenn\nrtp\n\nnp.float64(0.018796372629843364)\n\n\nBy creating a function to calculate RTP and applying it to our DataFrame by using .groupby(), we can easily compute the RTP for all names at once!\n\ndef ratio_to_peak(series):\n    return series.iloc[-1] / max(series)\n\n#Using .groupby() to apply the function\nrtp_table = f_babynames.groupby(\"Name\")[[\"Year\", \"Count\"]].agg(ratio_to_peak)\nrtp_table.head()\n\n\n\n\n\n\n\n\nYear\nCount\n\n\nName\n\n\n\n\n\n\nAadhini\n1.0\n1.000000\n\n\nAadhira\n1.0\n0.500000\n\n\nAadhya\n1.0\n0.660000\n\n\nAadya\n1.0\n0.586207\n\n\nAahana\n1.0\n0.269231\n\n\n\n\n\n\n\nIn the rows shown above, we can see that every row shown has a Year value of 1.0.\nThis is the “pandas-ification” of logic you saw in Data 8. Much of the logic you’ve learned in Data 8 will serve you well in Data 100.\n\n\n4.2.5 Nuisance Columns\nNote that you must be careful with which columns you apply the .agg() function to. If we were to apply our function to the table as a whole by doing f_babynames.groupby(\"Name\").agg(ratio_to_peak), executing our .agg() call would result in a TypeError.\n\nWe can avoid this issue (and prevent unintentional loss of data) by explicitly selecting column(s) we want to apply our aggregation function to BEFORE calling .agg(),\n\n\n4.2.6 Renaming Columns After Grouping\nBy default, .groupby will not rename any aggregated columns. As we can see in the table above, the aggregated column is still named Count even though it now represents the RTP. For better readability, we can rename Count to Count RTP\n\nrtp_table = rtp_table.rename(columns = {\"Count\": \"Count RTP\"})\nrtp_table\n\n\n\n\n\n\n\n\nYear\nCount RTP\n\n\nName\n\n\n\n\n\n\nAadhini\n1.0\n1.000000\n\n\nAadhira\n1.0\n0.500000\n\n\nAadhya\n1.0\n0.660000\n\n\nAadya\n1.0\n0.586207\n\n\nAahana\n1.0\n0.269231\n\n\n...\n...\n...\n\n\nZyanya\n1.0\n0.466667\n\n\nZyla\n1.0\n1.000000\n\n\nZylah\n1.0\n1.000000\n\n\nZyra\n1.0\n1.000000\n\n\nZyrah\n1.0\n0.833333\n\n\n\n\n13782 rows × 2 columns\n\n\n\n\n\n4.2.7 Some Data Science Payoff\nBy sorting rtp_table, we can see the names whose popularity has decreased the most.\n\nrtp_table = rtp_table.rename(columns = {\"Count\": \"Count RTP\"})\nrtp_table.sort_values(\"Count RTP\").head()\n\n\n\n\n\n\n\n\nYear\nCount RTP\n\n\nName\n\n\n\n\n\n\nDebra\n1.0\n0.001260\n\n\nDebbie\n1.0\n0.002815\n\n\nCarol\n1.0\n0.003180\n\n\nTammy\n1.0\n0.003249\n\n\nSusan\n1.0\n0.003305\n\n\n\n\n\n\n\nTo visualize the above DataFrame, let’s look at the line plot below:\n\n\nCode\nimport plotly.express as px\npx.line(f_babynames[f_babynames[\"Name\"] == \"Debra\"], x = \"Year\", y = \"Count\")\n\n\n                                                \n\n\nWe can get the list of the top 10 names and then plot popularity with the following code:\n\ntop10 = rtp_table.sort_values(\"Count RTP\").head(10).index\npx.line(\n    f_babynames[f_babynames[\"Name\"].isin(top10)], \n    x = \"Year\", \n    y = \"Count\", \n    color = \"Name\"\n)\n\n/Users/nikhilreddy/course-notes/ds100env/lib/python3.12/site-packages/plotly/express/_core.py:1980: FutureWarning:\n\nWhen grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.\n\n\n\n                                                \n\n\nAs a quick exercise, consider what code would compute the total number of babies with each name.\n\n\nCode\nbabynames.groupby(\"Name\")[[\"Count\"]].agg(\"sum\").head()\n# alternative solution: \n# babynames.groupby(\"Name\")[[\"Count\"]].sum()\n\n\n\n\n\n\n\n\n\nCount\n\n\nName\n\n\n\n\n\nAadan\n18\n\n\nAadarsh\n6\n\n\nAaden\n647\n\n\nAadhav\n27\n\n\nAadhini\n6",
     "crumbs": [
       "<span class='chapter-number'>4</span>  <span class='chapter-title'>Pandas III</span>"
     ]
@@ -194,7 +194,7 @@
     "href": "pandas_3/pandas_3.html#groupby-continued",
     "title": "4  Pandas III",
     "section": "4.3 .groupby(), Continued",
-    "text": "4.3 .groupby(), Continued\nWe’ll work with the elections DataFrame again.\n\n\nCode\nimport pandas as pd\nimport numpy as np\n\nelections = pd.read_csv(\"data/elections.csv\")\nelections.head(5)\n\n\n\n\n\n\n\n\n\nYear\nCandidate\nParty\nPopular vote\nResult\n%\n\n\n\n\n0\n1824\nAndrew Jackson\nDemocratic-Republican\n151271\nloss\n57.210122\n\n\n1\n1824\nJohn Quincy Adams\nDemocratic-Republican\n113142\nwin\n42.789878\n\n\n2\n1828\nAndrew Jackson\nDemocratic\n642806\nwin\n56.203927\n\n\n3\n1828\nJohn Quincy Adams\nNational Republican\n500897\nloss\n43.796073\n\n\n4\n1832\nAndrew Jackson\nDemocratic\n702735\nwin\n54.574789\n\n\n\n\n\n\n\n\n4.3.1 Raw GroupBy Objects\nThe result of groupby applied to a DataFrame is a DataFrameGroupBy object, not a DataFrame.\n\ngrouped_by_year = elections.groupby(\"Year\")\ntype(grouped_by_year)\n\npandas.core.groupby.generic.DataFrameGroupBy\n\n\nThere are several ways to look into DataFrameGroupBy objects:\n\ngrouped_by_party = elections.groupby(\"Party\")\ngrouped_by_party.groups\n\n{'American': [22, 126], 'American Independent': [115, 119, 124], 'Anti-Masonic': [6], 'Anti-Monopoly': [38], 'Citizens': [127], 'Communist': [89], 'Constitution': [160, 164, 172], 'Constitutional Union': [24], 'Democratic': [2, 4, 8, 10, 13, 14, 17, 20, 28, 29, 34, 37, 39, 45, 47, 52, 55, 57, 64, 70, 74, 77, 81, 83, 86, 91, 94, 97, 100, 105, 108, 111, 114, 116, 118, 123, 129, 134, 137, 140, 144, 151, 158, 162, 168, 176, 178], 'Democratic-Republican': [0, 1], 'Dixiecrat': [103], 'Farmer–Labor': [78], 'Free Soil': [15, 18], 'Green': [149, 155, 156, 165, 170, 177, 181], 'Greenback': [35], 'Independent': [121, 130, 143, 161, 167, 174], 'Liberal Republican': [31], 'Libertarian': [125, 128, 132, 138, 139, 146, 153, 159, 163, 169, 175, 180], 'National Democratic': [50], 'National Republican': [3, 5], 'National Union': [27], 'Natural Law': [148], 'New Alliance': [136], 'Northern Democratic': [26], 'Populist': [48, 61, 141], 'Progressive': [68, 82, 101, 107], 'Prohibition': [41, 44, 49, 51, 54, 59, 63, 67, 73, 75, 99], 'Reform': [150, 154], 'Republican': [21, 23, 30, 32, 33, 36, 40, 43, 46, 53, 56, 60, 65, 69, 72, 79, 80, 84, 87, 90, 96, 98, 104, 106, 109, 112, 113, 117, 120, 122, 131, 133, 135, 142, 145, 152, 157, 166, 171, 173, 179], 'Socialist': [58, 62, 66, 71, 76, 85, 88, 92, 95, 102], 'Southern Democratic': [25], 'States' Rights': [110], 'Taxpayers': [147], 'Union': [93], 'Union Labor': [42], 'Whig': [7, 9, 11, 12, 16, 19]}\n\n\n\ngrouped_by_party.get_group(\"Socialist\")\n\n\n\n\n\n\n\n\nYear\nCandidate\nParty\nPopular vote\nResult\n%\n\n\n\n\n58\n1904\nEugene V. Debs\nSocialist\n402810\nloss\n2.985897\n\n\n62\n1908\nEugene V. Debs\nSocialist\n420852\nloss\n2.850866\n\n\n66\n1912\nEugene V. Debs\nSocialist\n901551\nloss\n6.004354\n\n\n71\n1916\nAllan L. Benson\nSocialist\n590524\nloss\n3.194193\n\n\n76\n1920\nEugene V. Debs\nSocialist\n913693\nloss\n3.428282\n\n\n85\n1928\nNorman Thomas\nSocialist\n267478\nloss\n0.728623\n\n\n88\n1932\nNorman Thomas\nSocialist\n884885\nloss\n2.236211\n\n\n92\n1936\nNorman Thomas\nSocialist\n187910\nloss\n0.412876\n\n\n95\n1940\nNorman Thomas\nSocialist\n116599\nloss\n0.234237\n\n\n102\n1948\nNorman Thomas\nSocialist\n139569\nloss\n0.286312\n\n\n\n\n\n\n\n\n\n4.3.2 Other GroupBy Methods\nThere are many aggregation methods we can use with .agg. Some useful options are:\n\n.mean: creates a new DataFrame with the mean value of each group\n.sum: creates a new DataFrame with the sum of each group\n.max and .min: creates a new DataFrame with the maximum/minimum value of each group\n.first and .last: creates a new DataFrame with the first/last row in each group\n.size: creates a new Series with the number of entries in each group\n.count: creates a new DataFrame with the number of entries, excluding missing values.\n\nLet’s illustrate some examples by creating a DataFrame called df.\n\ndf = pd.DataFrame({'letter':['A','A','B','C','C','C'], \n                   'num':[1,2,3,4,np.nan,4], \n                   'state':[np.nan, 'tx', 'fl', 'hi', np.nan, 'ak']})\ndf\n\n\n\n\n\n\n\n\nletter\nnum\nstate\n\n\n\n\n0\nA\n1.0\nNaN\n\n\n1\nA\n2.0\ntx\n\n\n2\nB\n3.0\nfl\n\n\n3\nC\n4.0\nhi\n\n\n4\nC\nNaN\nNaN\n\n\n5\nC\n4.0\nak\n\n\n\n\n\n\n\nNote the slight difference between .size() and .count(): while .size() returns a Series and counts the number of entries including the missing values, .count() returns a DataFrame and counts the number of entries in each column excluding missing values.\n\ndf.groupby(\"letter\").size()\n\nletter\nA    2\nB    1\nC    3\ndtype: int64\n\n\n\ndf.groupby(\"letter\").count()\n\n\n\n\n\n\n\n\nnum\nstate\n\n\nletter\n\n\n\n\n\n\nA\n2\n1\n\n\nB\n1\n1\n\n\nC\n2\n2\n\n\n\n\n\n\n\nYou might recall that the value_counts() function in the previous note does something similar. It turns out value_counts() and groupby.size() are the same, except value_counts() sorts the resulting Series in descending order automatically.\n\ndf[\"letter\"].value_counts()\n\nletter\nC    3\nA    2\nB    1\nName: count, dtype: int64\n\n\nThese (and other) aggregation functions are so common that pandas allows for writing shorthand. Instead of explicitly stating the use of .agg, we can call the function directly on the GroupBy object.\nFor example, the following are equivalent:\n\nelections.groupby(\"Candidate\").agg(mean)\nelections.groupby(\"Candidate\").mean()\n\nThere are many other methods that pandas supports. You can check them out on the pandas documentation.\n\n\n4.3.3 Filtering by Group\nAnother common use for GroupBy objects is to filter data by group.\ngroupby.filter takes an argument func, where func is a function that:\n\nTakes a DataFrame object as input\nReturns a single True or False.\n\ngroupby.filter applies func to each group/sub-DataFrame:\n\nIf func returns True for a group, then all rows belonging to the group are preserved.\nIf func returns False for a group, then all rows belonging to that group are filtered out.\n\nIn other words, sub-DataFrames that correspond to True are returned in the final result, whereas those with a False value are not. Importantly, groupby.filter is different from groupby.agg in that an entire sub-DataFrame is returned in the final DataFrame, not just a single row. As a result, groupby.filter preserves the original indices and the column we grouped on does NOT become the index!\n\nTo illustrate how this happens, let’s go back to the elections dataset. Say we want to identify “tight” election years – that is, we want to find all rows that correspond to election years where all candidates in that year won a similar portion of the total vote. Specifically, let’s find all rows corresponding to a year where no candidate won more than 45% of the total vote.\nIn other words, we want to:\n\nFind the years where the maximum % in that year is less than 45%\nReturn all DataFrame rows that correspond to these years\n\nFor each year, we need to find the maximum % among all rows for that year. If this maximum % is lower than 45%, we will tell pandas to keep all rows corresponding to that year.\n\nelections.groupby(\"Year\").filter(lambda sf: sf[\"%\"].max() &lt; 45).head(9)\n\n\n\n\n\n\n\n\nYear\nCandidate\nParty\nPopular vote\nResult\n%\n\n\n\n\n23\n1860\nAbraham Lincoln\nRepublican\n1855993\nwin\n39.699408\n\n\n24\n1860\nJohn Bell\nConstitutional Union\n590901\nloss\n12.639283\n\n\n25\n1860\nJohn C. Breckinridge\nSouthern Democratic\n848019\nloss\n18.138998\n\n\n26\n1860\nStephen A. Douglas\nNorthern Democratic\n1380202\nloss\n29.522311\n\n\n66\n1912\nEugene V. Debs\nSocialist\n901551\nloss\n6.004354\n\n\n67\n1912\nEugene W. Chafin\nProhibition\n208156\nloss\n1.386325\n\n\n68\n1912\nTheodore Roosevelt\nProgressive\n4122721\nloss\n27.457433\n\n\n69\n1912\nWilliam Taft\nRepublican\n3486242\nloss\n23.218466\n\n\n70\n1912\nWoodrow Wilson\nDemocratic\n6296284\nwin\n41.933422\n\n\n\n\n\n\n\nWhat’s going on here? In this example, we’ve defined our filtering function, func, to be lambda sf: sf[\"%\"].max() &lt; 45. This filtering function will find the maximum \"%\" value among all entries in the grouped sub-DataFrame, which we call sf. If the maximum value is less than 45, then the filter function will return True and all rows in that grouped sub-DataFrame will appear in the final output DataFrame.\nExamine the DataFrame above. Notice how, in this preview of the first 9 rows, all entries from the years 1860 and 1912 appear. This means that in 1860 and 1912, no candidate in that year won more than 45% of the total vote.\nYou may ask: how is the groupby.filter procedure different to the boolean filtering we’ve seen previously? Boolean filtering considers individual rows when applying a boolean condition. For example, the code elections[elections[\"%\"] &lt; 45] will check the \"%\" value of every single row in elections; if it is less than 45, then that row will be kept in the output. groupby.filter, in contrast, applies a boolean condition across all rows in a group. If not all rows in that group satisfy the condition specified by the filter, the entire group will be discarded in the output.\n\n\n4.3.4 Aggregation with lambda Functions\nWhat if we wish to aggregate our DataFrame using a non-standard function – for example, a function of our own design? We can do so by combining .agg with lambda expressions.\nLet’s first consider a puzzle to jog our memory. We will attempt to find the Candidate from each Party with the highest % of votes.\nA naive approach may be to group by the Party column and aggregate by the maximum.\n\nelections.groupby(\"Party\").agg(max).head(10)\n\n/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_60475/4278286395.py:1: FutureWarning:\n\nThe provided callable &lt;built-in function max&gt; is currently using DataFrameGroupBy.max. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"max\" instead.\n\n\n\n\n\n\n\n\n\n\nYear\nCandidate\nPopular vote\nResult\n%\n\n\nParty\n\n\n\n\n\n\n\n\n\nAmerican\n1976\nThomas J. Anderson\n873053\nloss\n21.554001\n\n\nAmerican Independent\n1976\nLester Maddox\n9901118\nloss\n13.571218\n\n\nAnti-Masonic\n1832\nWilliam Wirt\n100715\nloss\n7.821583\n\n\nAnti-Monopoly\n1884\nBenjamin Butler\n134294\nloss\n1.335838\n\n\nCitizens\n1980\nBarry Commoner\n233052\nloss\n0.270182\n\n\nCommunist\n1932\nWilliam Z. Foster\n103307\nloss\n0.261069\n\n\nConstitution\n2016\nMichael Peroutka\n203091\nloss\n0.152398\n\n\nConstitutional Union\n1860\nJohn Bell\n590901\nloss\n12.639283\n\n\nDemocratic\n2020\nWoodrow Wilson\n81268924\nwin\n61.344703\n\n\nDemocratic-Republican\n1824\nJohn Quincy Adams\n151271\nwin\n57.210122\n\n\n\n\n\n\n\nThis approach is clearly wrong – the DataFrame claims that Woodrow Wilson won the presidency in 2020.\nWhy is this happening? Here, the max aggregation function is taken over every column independently. Among Democrats, max is computing:\n\nThe most recent Year a Democratic candidate ran for president (2020)\nThe Candidate with the alphabetically “largest” name (“Woodrow Wilson”)\nThe Result with the alphabetically “largest” outcome (“win”)\n\nInstead, let’s try a different approach. We will:\n\nSort the DataFrame so that rows are in descending order of %\nGroup by Party and select the first row of each sub-DataFrame\n\nWhile it may seem unintuitive, sorting elections by descending order of % is extremely helpful. If we then group by Party, the first row of each GroupBy object will contain information about the Candidate with the highest voter %.\n\nelections_sorted_by_percent = elections.sort_values(\"%\", ascending=False)\nelections_sorted_by_percent.head(5)\n\n\n\n\n\n\n\n\nYear\nCandidate\nParty\nPopular vote\nResult\n%\n\n\n\n\n114\n1964\nLyndon Johnson\nDemocratic\n43127041\nwin\n61.344703\n\n\n91\n1936\nFranklin Roosevelt\nDemocratic\n27752648\nwin\n60.978107\n\n\n120\n1972\nRichard Nixon\nRepublican\n47168710\nwin\n60.907806\n\n\n79\n1920\nWarren Harding\nRepublican\n16144093\nwin\n60.574501\n\n\n133\n1984\nRonald Reagan\nRepublican\n54455472\nwin\n59.023326\n\n\n\n\n\n\n\n\nelections_sorted_by_percent.groupby(\"Party\").agg(lambda x : x.iloc[0]).head(10)\n\n# Equivalent to the below code\n# elections_sorted_by_percent.groupby(\"Party\").agg('first').head(10)\n\n\n\n\n\n\n\n\nYear\nCandidate\nPopular vote\nResult\n%\n\n\nParty\n\n\n\n\n\n\n\n\n\nAmerican\n1856\nMillard Fillmore\n873053\nloss\n21.554001\n\n\nAmerican Independent\n1968\nGeorge Wallace\n9901118\nloss\n13.571218\n\n\nAnti-Masonic\n1832\nWilliam Wirt\n100715\nloss\n7.821583\n\n\nAnti-Monopoly\n1884\nBenjamin Butler\n134294\nloss\n1.335838\n\n\nCitizens\n1980\nBarry Commoner\n233052\nloss\n0.270182\n\n\nCommunist\n1932\nWilliam Z. Foster\n103307\nloss\n0.261069\n\n\nConstitution\n2008\nChuck Baldwin\n199750\nloss\n0.152398\n\n\nConstitutional Union\n1860\nJohn Bell\n590901\nloss\n12.639283\n\n\nDemocratic\n1964\nLyndon Johnson\n43127041\nwin\n61.344703\n\n\nDemocratic-Republican\n1824\nAndrew Jackson\n151271\nloss\n57.210122\n\n\n\n\n\n\n\nHere’s an illustration of the process:\n\nNotice how our code correctly determines that Lyndon Johnson from the Democratic Party has the highest voter %.\nMore generally, lambda functions are used to design custom aggregation functions that aren’t pre-defined by Python. The input parameter x to the lambda function is a GroupBy object. Therefore, it should make sense why lambda x : x.iloc[0] selects the first row in each groupby object.\nIn fact, there’s a few different ways to approach this problem. Each approach has different tradeoffs in terms of readability, performance, memory consumption, complexity, etc. We’ve given a few examples below.\nNote: Understanding these alternative solutions is not required. They are given to demonstrate the vast number of problem-solving approaches in pandas.\n\n# Using the idxmax function\nbest_per_party = elections.loc[elections.groupby('Party')['%'].idxmax()]\nbest_per_party.head(5)\n\n\n\n\n\n\n\n\nYear\nCandidate\nParty\nPopular vote\nResult\n%\n\n\n\n\n22\n1856\nMillard Fillmore\nAmerican\n873053\nloss\n21.554001\n\n\n115\n1968\nGeorge Wallace\nAmerican Independent\n9901118\nloss\n13.571218\n\n\n6\n1832\nWilliam Wirt\nAnti-Masonic\n100715\nloss\n7.821583\n\n\n38\n1884\nBenjamin Butler\nAnti-Monopoly\n134294\nloss\n1.335838\n\n\n127\n1980\nBarry Commoner\nCitizens\n233052\nloss\n0.270182\n\n\n\n\n\n\n\n\n# Using the .drop_duplicates function\nbest_per_party2 = elections.sort_values('%').drop_duplicates(['Party'], keep='last')\nbest_per_party2.head(5)\n\n\n\n\n\n\n\n\nYear\nCandidate\nParty\nPopular vote\nResult\n%\n\n\n\n\n148\n1996\nJohn Hagelin\nNatural Law\n113670\nloss\n0.118219\n\n\n164\n2008\nChuck Baldwin\nConstitution\n199750\nloss\n0.152398\n\n\n110\n1956\nT. Coleman Andrews\nStates' Rights\n107929\nloss\n0.174883\n\n\n147\n1996\nHoward Phillips\nTaxpayers\n184656\nloss\n0.192045\n\n\n136\n1988\nLenora Fulani\nNew Alliance\n217221\nloss\n0.237804",
+    "text": "4.3 .groupby(), Continued\nWe’ll work with the elections DataFrame again.\n\n\nCode\nimport pandas as pd\nimport numpy as np\n\nelections = pd.read_csv(\"data/elections.csv\")\nelections.head(5)\n\n\n\n\n\n\n\n\n\nYear\nCandidate\nParty\nPopular vote\nResult\n%\n\n\n\n\n0\n1824\nAndrew Jackson\nDemocratic-Republican\n151271\nloss\n57.210122\n\n\n1\n1824\nJohn Quincy Adams\nDemocratic-Republican\n113142\nwin\n42.789878\n\n\n2\n1828\nAndrew Jackson\nDemocratic\n642806\nwin\n56.203927\n\n\n3\n1828\nJohn Quincy Adams\nNational Republican\n500897\nloss\n43.796073\n\n\n4\n1832\nAndrew Jackson\nDemocratic\n702735\nwin\n54.574789\n\n\n\n\n\n\n\n\n4.3.1 Raw GroupBy Objects\nThe result of groupby applied to a DataFrame is a DataFrameGroupBy object, not a DataFrame.\n\ngrouped_by_year = elections.groupby(\"Year\")\ntype(grouped_by_year)\n\npandas.core.groupby.generic.DataFrameGroupBy\n\n\nThere are several ways to look into DataFrameGroupBy objects:\n\ngrouped_by_party = elections.groupby(\"Party\")\ngrouped_by_party.groups\n\n{'American': [22, 126], 'American Independent': [115, 119, 124], 'Anti-Masonic': [6], 'Anti-Monopoly': [38], 'Citizens': [127], 'Communist': [89], 'Constitution': [160, 164, 172], 'Constitutional Union': [24], 'Democratic': [2, 4, 8, 10, 13, 14, 17, 20, 28, 29, 34, 37, 39, 45, 47, 52, 55, 57, 64, 70, 74, 77, 81, 83, 86, 91, 94, 97, 100, 105, 108, 111, 114, 116, 118, 123, 129, 134, 137, 140, 144, 151, 158, 162, 168, 176, 178], 'Democratic-Republican': [0, 1], 'Dixiecrat': [103], 'Farmer–Labor': [78], 'Free Soil': [15, 18], 'Green': [149, 155, 156, 165, 170, 177, 181], 'Greenback': [35], 'Independent': [121, 130, 143, 161, 167, 174], 'Liberal Republican': [31], 'Libertarian': [125, 128, 132, 138, 139, 146, 153, 159, 163, 169, 175, 180], 'National Democratic': [50], 'National Republican': [3, 5], 'National Union': [27], 'Natural Law': [148], 'New Alliance': [136], 'Northern Democratic': [26], 'Populist': [48, 61, 141], 'Progressive': [68, 82, 101, 107], 'Prohibition': [41, 44, 49, 51, 54, 59, 63, 67, 73, 75, 99], 'Reform': [150, 154], 'Republican': [21, 23, 30, 32, 33, 36, 40, 43, 46, 53, 56, 60, 65, 69, 72, 79, 80, 84, 87, 90, 96, 98, 104, 106, 109, 112, 113, 117, 120, 122, 131, 133, 135, 142, 145, 152, 157, 166, 171, 173, 179], 'Socialist': [58, 62, 66, 71, 76, 85, 88, 92, 95, 102], 'Southern Democratic': [25], 'States' Rights': [110], 'Taxpayers': [147], 'Union': [93], 'Union Labor': [42], 'Whig': [7, 9, 11, 12, 16, 19]}\n\n\n\ngrouped_by_party.get_group(\"Socialist\")\n\n\n\n\n\n\n\n\nYear\nCandidate\nParty\nPopular vote\nResult\n%\n\n\n\n\n58\n1904\nEugene V. Debs\nSocialist\n402810\nloss\n2.985897\n\n\n62\n1908\nEugene V. Debs\nSocialist\n420852\nloss\n2.850866\n\n\n66\n1912\nEugene V. Debs\nSocialist\n901551\nloss\n6.004354\n\n\n71\n1916\nAllan L. Benson\nSocialist\n590524\nloss\n3.194193\n\n\n76\n1920\nEugene V. Debs\nSocialist\n913693\nloss\n3.428282\n\n\n85\n1928\nNorman Thomas\nSocialist\n267478\nloss\n0.728623\n\n\n88\n1932\nNorman Thomas\nSocialist\n884885\nloss\n2.236211\n\n\n92\n1936\nNorman Thomas\nSocialist\n187910\nloss\n0.412876\n\n\n95\n1940\nNorman Thomas\nSocialist\n116599\nloss\n0.234237\n\n\n102\n1948\nNorman Thomas\nSocialist\n139569\nloss\n0.286312\n\n\n\n\n\n\n\n\n\n4.3.2 Other GroupBy Methods\nThere are many aggregation methods we can use with .agg. Some useful options are:\n\n.mean: creates a new DataFrame with the mean value of each group\n.sum: creates a new DataFrame with the sum of each group\n.max and .min: creates a new DataFrame with the maximum/minimum value of each group\n.first and .last: creates a new DataFrame with the first/last row in each group\n.size: creates a new Series with the number of entries in each group\n.count: creates a new DataFrame with the number of entries, excluding missing values.\n\nLet’s illustrate some examples by creating a DataFrame called df.\n\ndf = pd.DataFrame({'letter':['A','A','B','C','C','C'], \n                   'num':[1,2,3,4,np.nan,4], \n                   'state':[np.nan, 'tx', 'fl', 'hi', np.nan, 'ak']})\ndf\n\n\n\n\n\n\n\n\nletter\nnum\nstate\n\n\n\n\n0\nA\n1.0\nNaN\n\n\n1\nA\n2.0\ntx\n\n\n2\nB\n3.0\nfl\n\n\n3\nC\n4.0\nhi\n\n\n4\nC\nNaN\nNaN\n\n\n5\nC\n4.0\nak\n\n\n\n\n\n\n\nNote the slight difference between .size() and .count(): while .size() returns a Series and counts the number of entries including the missing values, .count() returns a DataFrame and counts the number of entries in each column excluding missing values.\n\ndf.groupby(\"letter\").size()\n\nletter\nA    2\nB    1\nC    3\ndtype: int64\n\n\n\ndf.groupby(\"letter\").count()\n\n\n\n\n\n\n\n\nnum\nstate\n\n\nletter\n\n\n\n\n\n\nA\n2\n1\n\n\nB\n1\n1\n\n\nC\n2\n2\n\n\n\n\n\n\n\nYou might recall that the value_counts() function in the previous note does something similar. It turns out value_counts() and groupby.size() are the same, except value_counts() sorts the resulting Series in descending order automatically.\n\ndf[\"letter\"].value_counts()\n\nletter\nC    3\nA    2\nB    1\nName: count, dtype: int64\n\n\nThese (and other) aggregation functions are so common that pandas allows for writing shorthand. Instead of explicitly stating the use of .agg, we can call the function directly on the GroupBy object.\nFor example, the following are equivalent:\n\nelections.groupby(\"Candidate\").agg(mean)\nelections.groupby(\"Candidate\").mean()\n\nThere are many other methods that pandas supports. You can check them out on the pandas documentation.\n\n\n4.3.3 Filtering by Group\nAnother common use for GroupBy objects is to filter data by group.\ngroupby.filter takes an argument func, where func is a function that:\n\nTakes a DataFrame object as input\nReturns a single True or False.\n\ngroupby.filter applies func to each group/sub-DataFrame:\n\nIf func returns True for a group, then all rows belonging to the group are preserved.\nIf func returns False for a group, then all rows belonging to that group are filtered out.\n\nIn other words, sub-DataFrames that correspond to True are returned in the final result, whereas those with a False value are not. Importantly, groupby.filter is different from groupby.agg in that an entire sub-DataFrame is returned in the final DataFrame, not just a single row. As a result, groupby.filter preserves the original indices and the column we grouped on does NOT become the index!\n\nTo illustrate how this happens, let’s go back to the elections dataset. Say we want to identify “tight” election years – that is, we want to find all rows that correspond to election years where all candidates in that year won a similar portion of the total vote. Specifically, let’s find all rows corresponding to a year where no candidate won more than 45% of the total vote.\nIn other words, we want to:\n\nFind the years where the maximum % in that year is less than 45%\nReturn all DataFrame rows that correspond to these years\n\nFor each year, we need to find the maximum % among all rows for that year. If this maximum % is lower than 45%, we will tell pandas to keep all rows corresponding to that year.\n\nelections.groupby(\"Year\").filter(lambda sf: sf[\"%\"].max() &lt; 45).head(9)\n\n\n\n\n\n\n\n\nYear\nCandidate\nParty\nPopular vote\nResult\n%\n\n\n\n\n23\n1860\nAbraham Lincoln\nRepublican\n1855993\nwin\n39.699408\n\n\n24\n1860\nJohn Bell\nConstitutional Union\n590901\nloss\n12.639283\n\n\n25\n1860\nJohn C. Breckinridge\nSouthern Democratic\n848019\nloss\n18.138998\n\n\n26\n1860\nStephen A. Douglas\nNorthern Democratic\n1380202\nloss\n29.522311\n\n\n66\n1912\nEugene V. Debs\nSocialist\n901551\nloss\n6.004354\n\n\n67\n1912\nEugene W. Chafin\nProhibition\n208156\nloss\n1.386325\n\n\n68\n1912\nTheodore Roosevelt\nProgressive\n4122721\nloss\n27.457433\n\n\n69\n1912\nWilliam Taft\nRepublican\n3486242\nloss\n23.218466\n\n\n70\n1912\nWoodrow Wilson\nDemocratic\n6296284\nwin\n41.933422\n\n\n\n\n\n\n\nWhat’s going on here? In this example, we’ve defined our filtering function, func, to be lambda sf: sf[\"%\"].max() &lt; 45. This filtering function will find the maximum \"%\" value among all entries in the grouped sub-DataFrame, which we call sf. If the maximum value is less than 45, then the filter function will return True and all rows in that grouped sub-DataFrame will appear in the final output DataFrame.\nExamine the DataFrame above. Notice how, in this preview of the first 9 rows, all entries from the years 1860 and 1912 appear. This means that in 1860 and 1912, no candidate in that year won more than 45% of the total vote.\nYou may ask: how is the groupby.filter procedure different to the boolean filtering we’ve seen previously? Boolean filtering considers individual rows when applying a boolean condition. For example, the code elections[elections[\"%\"] &lt; 45] will check the \"%\" value of every single row in elections; if it is less than 45, then that row will be kept in the output. groupby.filter, in contrast, applies a boolean condition across all rows in a group. If not all rows in that group satisfy the condition specified by the filter, the entire group will be discarded in the output.\n\n\n4.3.4 Aggregation with lambda Functions\nWhat if we wish to aggregate our DataFrame using a non-standard function – for example, a function of our own design? We can do so by combining .agg with lambda expressions.\nLet’s first consider a puzzle to jog our memory. We will attempt to find the Candidate from each Party with the highest % of votes.\nA naive approach may be to group by the Party column and aggregate by the maximum.\n\nelections.groupby(\"Party\").agg(max).head(10)\n\n/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_99025/4278286395.py:1: FutureWarning:\n\nThe provided callable &lt;built-in function max&gt; is currently using DataFrameGroupBy.max. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"max\" instead.\n\n\n\n\n\n\n\n\n\n\nYear\nCandidate\nPopular vote\nResult\n%\n\n\nParty\n\n\n\n\n\n\n\n\n\nAmerican\n1976\nThomas J. Anderson\n873053\nloss\n21.554001\n\n\nAmerican Independent\n1976\nLester Maddox\n9901118\nloss\n13.571218\n\n\nAnti-Masonic\n1832\nWilliam Wirt\n100715\nloss\n7.821583\n\n\nAnti-Monopoly\n1884\nBenjamin Butler\n134294\nloss\n1.335838\n\n\nCitizens\n1980\nBarry Commoner\n233052\nloss\n0.270182\n\n\nCommunist\n1932\nWilliam Z. Foster\n103307\nloss\n0.261069\n\n\nConstitution\n2016\nMichael Peroutka\n203091\nloss\n0.152398\n\n\nConstitutional Union\n1860\nJohn Bell\n590901\nloss\n12.639283\n\n\nDemocratic\n2020\nWoodrow Wilson\n81268924\nwin\n61.344703\n\n\nDemocratic-Republican\n1824\nJohn Quincy Adams\n151271\nwin\n57.210122\n\n\n\n\n\n\n\nThis approach is clearly wrong – the DataFrame claims that Woodrow Wilson won the presidency in 2020.\nWhy is this happening? Here, the max aggregation function is taken over every column independently. Among Democrats, max is computing:\n\nThe most recent Year a Democratic candidate ran for president (2020)\nThe Candidate with the alphabetically “largest” name (“Woodrow Wilson”)\nThe Result with the alphabetically “largest” outcome (“win”)\n\nInstead, let’s try a different approach. We will:\n\nSort the DataFrame so that rows are in descending order of %\nGroup by Party and select the first row of each sub-DataFrame\n\nWhile it may seem unintuitive, sorting elections by descending order of % is extremely helpful. If we then group by Party, the first row of each GroupBy object will contain information about the Candidate with the highest voter %.\n\nelections_sorted_by_percent = elections.sort_values(\"%\", ascending=False)\nelections_sorted_by_percent.head(5)\n\n\n\n\n\n\n\n\nYear\nCandidate\nParty\nPopular vote\nResult\n%\n\n\n\n\n114\n1964\nLyndon Johnson\nDemocratic\n43127041\nwin\n61.344703\n\n\n91\n1936\nFranklin Roosevelt\nDemocratic\n27752648\nwin\n60.978107\n\n\n120\n1972\nRichard Nixon\nRepublican\n47168710\nwin\n60.907806\n\n\n79\n1920\nWarren Harding\nRepublican\n16144093\nwin\n60.574501\n\n\n133\n1984\nRonald Reagan\nRepublican\n54455472\nwin\n59.023326\n\n\n\n\n\n\n\n\nelections_sorted_by_percent.groupby(\"Party\").agg(lambda x : x.iloc[0]).head(10)\n\n# Equivalent to the below code\n# elections_sorted_by_percent.groupby(\"Party\").agg('first').head(10)\n\n\n\n\n\n\n\n\nYear\nCandidate\nPopular vote\nResult\n%\n\n\nParty\n\n\n\n\n\n\n\n\n\nAmerican\n1856\nMillard Fillmore\n873053\nloss\n21.554001\n\n\nAmerican Independent\n1968\nGeorge Wallace\n9901118\nloss\n13.571218\n\n\nAnti-Masonic\n1832\nWilliam Wirt\n100715\nloss\n7.821583\n\n\nAnti-Monopoly\n1884\nBenjamin Butler\n134294\nloss\n1.335838\n\n\nCitizens\n1980\nBarry Commoner\n233052\nloss\n0.270182\n\n\nCommunist\n1932\nWilliam Z. Foster\n103307\nloss\n0.261069\n\n\nConstitution\n2008\nChuck Baldwin\n199750\nloss\n0.152398\n\n\nConstitutional Union\n1860\nJohn Bell\n590901\nloss\n12.639283\n\n\nDemocratic\n1964\nLyndon Johnson\n43127041\nwin\n61.344703\n\n\nDemocratic-Republican\n1824\nAndrew Jackson\n151271\nloss\n57.210122\n\n\n\n\n\n\n\nHere’s an illustration of the process:\n\nNotice how our code correctly determines that Lyndon Johnson from the Democratic Party has the highest voter %.\nMore generally, lambda functions are used to design custom aggregation functions that aren’t pre-defined by Python. The input parameter x to the lambda function is a GroupBy object. Therefore, it should make sense why lambda x : x.iloc[0] selects the first row in each groupby object.\nIn fact, there’s a few different ways to approach this problem. Each approach has different tradeoffs in terms of readability, performance, memory consumption, complexity, etc. We’ve given a few examples below.\nNote: Understanding these alternative solutions is not required. They are given to demonstrate the vast number of problem-solving approaches in pandas.\n\n# Using the idxmax function\nbest_per_party = elections.loc[elections.groupby('Party')['%'].idxmax()]\nbest_per_party.head(5)\n\n\n\n\n\n\n\n\nYear\nCandidate\nParty\nPopular vote\nResult\n%\n\n\n\n\n22\n1856\nMillard Fillmore\nAmerican\n873053\nloss\n21.554001\n\n\n115\n1968\nGeorge Wallace\nAmerican Independent\n9901118\nloss\n13.571218\n\n\n6\n1832\nWilliam Wirt\nAnti-Masonic\n100715\nloss\n7.821583\n\n\n38\n1884\nBenjamin Butler\nAnti-Monopoly\n134294\nloss\n1.335838\n\n\n127\n1980\nBarry Commoner\nCitizens\n233052\nloss\n0.270182\n\n\n\n\n\n\n\n\n# Using the .drop_duplicates function\nbest_per_party2 = elections.sort_values('%').drop_duplicates(['Party'], keep='last')\nbest_per_party2.head(5)\n\n\n\n\n\n\n\n\nYear\nCandidate\nParty\nPopular vote\nResult\n%\n\n\n\n\n148\n1996\nJohn Hagelin\nNatural Law\n113670\nloss\n0.118219\n\n\n164\n2008\nChuck Baldwin\nConstitution\n199750\nloss\n0.152398\n\n\n110\n1956\nT. Coleman Andrews\nStates' Rights\n107929\nloss\n0.174883\n\n\n147\n1996\nHoward Phillips\nTaxpayers\n184656\nloss\n0.192045\n\n\n136\n1988\nLenora Fulani\nNew Alliance\n217221\nloss\n0.237804",
     "crumbs": [
       "<span class='chapter-number'>4</span>  <span class='chapter-title'>Pandas III</span>"
     ]
@@ -204,7 +204,7 @@
     "href": "pandas_3/pandas_3.html#aggregating-data-with-pivot-tables",
     "title": "4  Pandas III",
     "section": "4.4 Aggregating Data with Pivot Tables",
-    "text": "4.4 Aggregating Data with Pivot Tables\nWe know now that .groupby gives us the ability to group and aggregate data across our DataFrame. The examples above formed groups using just one column in the DataFrame. It’s possible to group by multiple columns at once by passing in a list of column names to .groupby.\nLet’s consider the babynames dataset again. In this problem, we will find the total number of baby names associated with each sex for each year. To do this, we’ll group by both the \"Year\" and \"Sex\" columns.\n\nbabynames.head()\n\n\n\n\n\n\n\n\nState\nSex\nYear\nName\nCount\nFirst Letter\n\n\n\n\n115957\nCA\nF\n1990\nDeandrea\n5\nD\n\n\n101976\nCA\nF\n1986\nDeandrea\n6\nD\n\n\n131029\nCA\nF\n1994\nLeandrea\n5\nL\n\n\n108731\nCA\nF\n1988\nDeandrea\n5\nD\n\n\n308131\nCA\nM\n1985\nDeandrea\n6\nD\n\n\n\n\n\n\n\n\n# Find the total number of baby names associated with each sex for each \n# year in the data\nbabynames.groupby([\"Year\", \"Sex\"])[[\"Count\"]].agg(sum).head(6)\n\n/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_60475/3186035650.py:3: FutureWarning:\n\nThe provided callable &lt;built-in function sum&gt; is currently using DataFrameGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"sum\" instead.\n\n\n\n\n\n\n\n\n\n\n\nCount\n\n\nYear\nSex\n\n\n\n\n\n1910\nF\n5950\n\n\nM\n3213\n\n\n1911\nF\n6602\n\n\nM\n3381\n\n\n1912\nF\n9804\n\n\nM\n8142\n\n\n\n\n\n\n\nNotice that both \"Year\" and \"Sex\" serve as the index of the DataFrame (they are both rendered in bold). We’ve created a multi-index DataFrame where two different index values, the year and sex, are used to uniquely identify each row.\nThis isn’t the most intuitive way of representing this data – and, because multi-indexed DataFrames have multiple dimensions in their index, they can often be difficult to use.\nAnother strategy to aggregate across two columns is to create a pivot table. You saw these back in Data 8. One set of values is used to create the index of the pivot table; another set is used to define the column names. The values contained in each cell of the table correspond to the aggregated data for each index-column pair.\nHere’s an illustration of the process:\n\nThe best way to understand pivot tables is to see one in action. Let’s return to our original goal of summing the total number of names associated with each combination of year and sex. We’ll call the pandas .pivot_table method to create a new table.\n\n# The `pivot_table` method is used to generate a Pandas pivot table\nimport numpy as np\nbabynames.pivot_table(\n    index = \"Year\",\n    columns = \"Sex\",    \n    values = \"Count\", \n    aggfunc = \"sum\", \n).head(5)\n\n\n\n\n\n\n\nSex\nF\nM\n\n\nYear\n\n\n\n\n\n\n1910\n5950\n3213\n\n\n1911\n6602\n3381\n\n\n1912\n9804\n8142\n\n\n1913\n11860\n10234\n\n\n1914\n13815\n13111\n\n\n\n\n\n\n\nLooks a lot better! Now, our DataFrame is structured with clear index-column combinations. Each entry in the pivot table represents the summed count of names for a given combination of \"Year\" and \"Sex\".\nLet’s take a closer look at the code implemented above.\n\nindex = \"Year\" specifies the column name in the original DataFrame that should be used as the index of the pivot table\ncolumns = \"Sex\" specifies the column name in the original DataFrame that should be used to generate the columns of the pivot table\nvalues = \"Count\" indicates what values from the original DataFrame should be used to populate the entry for each index-column combination\naggfunc = np.sum tells pandas what function to use when aggregating the data specified by values. Here, we are summing the name counts for each pair of \"Year\" and \"Sex\"\n\nWe can even include multiple values in the index or columns of our pivot tables.\n\nbabynames_pivot = babynames.pivot_table(\n    index=\"Year\",     # the rows (turned into index)\n    columns=\"Sex\",    # the column values\n    values=[\"Count\", \"Name\"], \n    aggfunc=\"max\",      # group operation\n)\nbabynames_pivot.head(6)\n\n\n\n\n\n\n\n\nCount\nName\n\n\nSex\nF\nM\nF\nM\n\n\nYear\n\n\n\n\n\n\n\n\n1910\n295\n237\nYvonne\nWilliam\n\n\n1911\n390\n214\nZelma\nWillis\n\n\n1912\n534\n501\nYvonne\nWoodrow\n\n\n1913\n584\n614\nZelma\nYoshio\n\n\n1914\n773\n769\nZelma\nYoshio\n\n\n1915\n998\n1033\nZita\nYukio\n\n\n\n\n\n\n\nNote that each row provides the number of girls and number of boys having that year’s most common name, and also lists the alphabetically largest girl name and boy name. The counts for number of girls/boys in the resulting DataFrame do not correspond to the names listed. For example, in 1910, the most popular girl name is given to 295 girls, but that name was likely not Yvonne.",
+    "text": "4.4 Aggregating Data with Pivot Tables\nWe know now that .groupby gives us the ability to group and aggregate data across our DataFrame. The examples above formed groups using just one column in the DataFrame. It’s possible to group by multiple columns at once by passing in a list of column names to .groupby.\nLet’s consider the babynames dataset again. In this problem, we will find the total number of baby names associated with each sex for each year. To do this, we’ll group by both the \"Year\" and \"Sex\" columns.\n\nbabynames.head()\n\n\n\n\n\n\n\n\nState\nSex\nYear\nName\nCount\nFirst Letter\n\n\n\n\n115957\nCA\nF\n1990\nDeandrea\n5\nD\n\n\n101976\nCA\nF\n1986\nDeandrea\n6\nD\n\n\n131029\nCA\nF\n1994\nLeandrea\n5\nL\n\n\n108731\nCA\nF\n1988\nDeandrea\n5\nD\n\n\n308131\nCA\nM\n1985\nDeandrea\n6\nD\n\n\n\n\n\n\n\n\n# Find the total number of baby names associated with each sex for each \n# year in the data\nbabynames.groupby([\"Year\", \"Sex\"])[[\"Count\"]].agg(sum).head(6)\n\n/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_99025/3186035650.py:3: FutureWarning:\n\nThe provided callable &lt;built-in function sum&gt; is currently using DataFrameGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"sum\" instead.\n\n\n\n\n\n\n\n\n\n\n\nCount\n\n\nYear\nSex\n\n\n\n\n\n1910\nF\n5950\n\n\nM\n3213\n\n\n1911\nF\n6602\n\n\nM\n3381\n\n\n1912\nF\n9804\n\n\nM\n8142\n\n\n\n\n\n\n\nNotice that both \"Year\" and \"Sex\" serve as the index of the DataFrame (they are both rendered in bold). We’ve created a multi-index DataFrame where two different index values, the year and sex, are used to uniquely identify each row.\nThis isn’t the most intuitive way of representing this data – and, because multi-indexed DataFrames have multiple dimensions in their index, they can often be difficult to use.\nAnother strategy to aggregate across two columns is to create a pivot table. You saw these back in Data 8. One set of values is used to create the index of the pivot table; another set is used to define the column names. The values contained in each cell of the table correspond to the aggregated data for each index-column pair.\nHere’s an illustration of the process:\n\nThe best way to understand pivot tables is to see one in action. Let’s return to our original goal of summing the total number of names associated with each combination of year and sex. We’ll call the pandas .pivot_table method to create a new table.\n\n# The `pivot_table` method is used to generate a Pandas pivot table\nimport numpy as np\nbabynames.pivot_table(\n    index = \"Year\",\n    columns = \"Sex\",    \n    values = \"Count\", \n    aggfunc = \"sum\", \n).head(5)\n\n\n\n\n\n\n\nSex\nF\nM\n\n\nYear\n\n\n\n\n\n\n1910\n5950\n3213\n\n\n1911\n6602\n3381\n\n\n1912\n9804\n8142\n\n\n1913\n11860\n10234\n\n\n1914\n13815\n13111\n\n\n\n\n\n\n\nLooks a lot better! Now, our DataFrame is structured with clear index-column combinations. Each entry in the pivot table represents the summed count of names for a given combination of \"Year\" and \"Sex\".\nLet’s take a closer look at the code implemented above.\n\nindex = \"Year\" specifies the column name in the original DataFrame that should be used as the index of the pivot table\ncolumns = \"Sex\" specifies the column name in the original DataFrame that should be used to generate the columns of the pivot table\nvalues = \"Count\" indicates what values from the original DataFrame should be used to populate the entry for each index-column combination\naggfunc = np.sum tells pandas what function to use when aggregating the data specified by values. Here, we are summing the name counts for each pair of \"Year\" and \"Sex\"\n\nWe can even include multiple values in the index or columns of our pivot tables.\n\nbabynames_pivot = babynames.pivot_table(\n    index=\"Year\",     # the rows (turned into index)\n    columns=\"Sex\",    # the column values\n    values=[\"Count\", \"Name\"], \n    aggfunc=\"max\",      # group operation\n)\nbabynames_pivot.head(6)\n\n\n\n\n\n\n\n\nCount\nName\n\n\nSex\nF\nM\nF\nM\n\n\nYear\n\n\n\n\n\n\n\n\n1910\n295\n237\nYvonne\nWilliam\n\n\n1911\n390\n214\nZelma\nWillis\n\n\n1912\n534\n501\nYvonne\nWoodrow\n\n\n1913\n584\n614\nZelma\nYoshio\n\n\n1914\n773\n769\nZelma\nYoshio\n\n\n1915\n998\n1033\nZita\nYukio\n\n\n\n\n\n\n\nNote that each row provides the number of girls and number of boys having that year’s most common name, and also lists the alphabetically largest girl name and boy name. The counts for number of girls/boys in the resulting DataFrame do not correspond to the names listed. For example, in 1910, the most popular girl name is given to 295 girls, but that name was likely not Yvonne.",
     "crumbs": [
       "<span class='chapter-number'>4</span>  <span class='chapter-title'>Pandas III</span>"
     ]
@@ -254,7 +254,7 @@
     "href": "eda/eda.html#granularity-scope-and-temporality",
     "title": "5  Data Cleaning and EDA",
     "section": "5.2 Granularity, Scope, and Temporality",
-    "text": "5.2 Granularity, Scope, and Temporality\nAfter understanding the structure of the dataset, the next task is to determine what exactly the data represents. We’ll do so by considering the data’s granularity, scope, and temporality.\n\n5.2.1 Granularity\nThe granularity of a dataset is what a single row represents. You can also think of it as the level of detail included in the data. To determine the data’s granularity, ask: what does each row in the dataset represent? Fine-grained data contains a high level of detail, with a single row representing a small individual unit. For example, each record may represent one person. Coarse-grained data is encoded such that a single row represents a large individual unit – for example, each record may represent a group of people.\n\n\n5.2.2 Scope\nThe scope of a dataset is the subset of the population covered by the data. If we were investigating student performance in Data Science courses, a dataset with a narrow scope might encompass all students enrolled in Data 100 whereas a dataset with an expansive scope might encompass all students in California.\n\n\n5.2.3 Temporality\nThe temporality of a dataset describes the periodicity over which the data was collected as well as when the data was most recently collected or updated.\nTime and date fields of a dataset could represent a few things:\n\nwhen the “event” happened\nwhen the data was collected, or when it was entered into the system\nwhen the data was copied into the database\n\nTo fully understand the temporality of the data, it also may be necessary to standardize time zones or inspect recurring time-based trends in the data (do patterns recur in 24-hour periods? Over the course of a month? Seasonally?). The convention for standardizing time is the Coordinated Universal Time (UTC), an international time standard measured at 0 degrees latitude that stays consistent throughout the year (no daylight savings). We can represent Berkeley’s time zone, Pacific Standard Time (PST), as UTC-7 (with daylight savings).\n\n5.2.3.1 Temporality with pandas’ dt accessors\nLet’s briefly look at how we can use pandas’ dt accessors to work with dates/times in a dataset using the dataset you’ll see in Lab 3: the Berkeley PD Calls for Service dataset.\n\n\nCode\ncalls = pd.read_csv(\"data/Berkeley_PD_-_Calls_for_Service.csv\")\ncalls.head()\n\n\n\n\n\n\n\n\n\nCASENO\nOFFENSE\nEVENTDT\nEVENTTM\nCVLEGEND\nCVDOW\nInDbDate\nBlock_Location\nBLKADDR\nCity\nState\n\n\n\n\n0\n21014296\nTHEFT MISD. (UNDER $950)\n04/01/2021 12:00:00 AM\n10:58\nLARCENY\n4\n06/15/2021 12:00:00 AM\nBerkeley, CA\\n(37.869058, -122.270455)\nNaN\nBerkeley\nCA\n\n\n1\n21014391\nTHEFT MISD. (UNDER $950)\n04/01/2021 12:00:00 AM\n10:38\nLARCENY\n4\n06/15/2021 12:00:00 AM\nBerkeley, CA\\n(37.869058, -122.270455)\nNaN\nBerkeley\nCA\n\n\n2\n21090494\nTHEFT MISD. (UNDER $950)\n04/19/2021 12:00:00 AM\n12:15\nLARCENY\n1\n06/15/2021 12:00:00 AM\n2100 BLOCK HASTE ST\\nBerkeley, CA\\n(37.864908,...\n2100 BLOCK HASTE ST\nBerkeley\nCA\n\n\n3\n21090204\nTHEFT FELONY (OVER $950)\n02/13/2021 12:00:00 AM\n17:00\nLARCENY\n6\n06/15/2021 12:00:00 AM\n2600 BLOCK WARRING ST\\nBerkeley, CA\\n(37.86393...\n2600 BLOCK WARRING ST\nBerkeley\nCA\n\n\n4\n21090179\nBURGLARY AUTO\n02/08/2021 12:00:00 AM\n6:20\nBURGLARY - VEHICLE\n1\n06/15/2021 12:00:00 AM\n2700 BLOCK GARBER ST\\nBerkeley, CA\\n(37.86066,...\n2700 BLOCK GARBER ST\nBerkeley\nCA\n\n\n\n\n\n\n\nLooks like there are three columns with dates/times: EVENTDT, EVENTTM, and InDbDate.\nMost likely, EVENTDT stands for the date when the event took place, EVENTTM stands for the time of day the event took place (in 24-hr format), and InDbDate is the date this call is recorded onto the database.\nIf we check the data type of these columns, we will see they are stored as strings. We can convert them to datetime objects using pandas to_datetime function.\n\ncalls[\"EVENTDT\"] = pd.to_datetime(calls[\"EVENTDT\"])\ncalls.head()\n\n/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_60488/874729699.py:1: UserWarning:\n\nCould not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n\n\n\n\n\n\n\n\n\n\nCASENO\nOFFENSE\nEVENTDT\nEVENTTM\nCVLEGEND\nCVDOW\nInDbDate\nBlock_Location\nBLKADDR\nCity\nState\n\n\n\n\n0\n21014296\nTHEFT MISD. (UNDER $950)\n2021-04-01\n10:58\nLARCENY\n4\n06/15/2021 12:00:00 AM\nBerkeley, CA\\n(37.869058, -122.270455)\nNaN\nBerkeley\nCA\n\n\n1\n21014391\nTHEFT MISD. (UNDER $950)\n2021-04-01\n10:38\nLARCENY\n4\n06/15/2021 12:00:00 AM\nBerkeley, CA\\n(37.869058, -122.270455)\nNaN\nBerkeley\nCA\n\n\n2\n21090494\nTHEFT MISD. (UNDER $950)\n2021-04-19\n12:15\nLARCENY\n1\n06/15/2021 12:00:00 AM\n2100 BLOCK HASTE ST\\nBerkeley, CA\\n(37.864908,...\n2100 BLOCK HASTE ST\nBerkeley\nCA\n\n\n3\n21090204\nTHEFT FELONY (OVER $950)\n2021-02-13\n17:00\nLARCENY\n6\n06/15/2021 12:00:00 AM\n2600 BLOCK WARRING ST\\nBerkeley, CA\\n(37.86393...\n2600 BLOCK WARRING ST\nBerkeley\nCA\n\n\n4\n21090179\nBURGLARY AUTO\n2021-02-08\n6:20\nBURGLARY - VEHICLE\n1\n06/15/2021 12:00:00 AM\n2700 BLOCK GARBER ST\\nBerkeley, CA\\n(37.86066,...\n2700 BLOCK GARBER ST\nBerkeley\nCA\n\n\n\n\n\n\n\nNow, we can use the dt accessor on this column.\nWe can get the month:\n\ncalls[\"EVENTDT\"].dt.month.head()\n\n0    4\n1    4\n2    4\n3    2\n4    2\nName: EVENTDT, dtype: int32\n\n\nWhich day of the week the date is on:\n\ncalls[\"EVENTDT\"].dt.dayofweek.head()\n\n0    3\n1    3\n2    0\n3    5\n4    0\nName: EVENTDT, dtype: int32\n\n\nCheck the mimimum values to see if there are any suspicious-looking, 70s dates:\n\ncalls.sort_values(\"EVENTDT\").head()\n\n\n\n\n\n\n\n\nCASENO\nOFFENSE\nEVENTDT\nEVENTTM\nCVLEGEND\nCVDOW\nInDbDate\nBlock_Location\nBLKADDR\nCity\nState\n\n\n\n\n2513\n20057398\nBURGLARY COMMERCIAL\n2020-12-17\n16:05\nBURGLARY - COMMERCIAL\n4\n06/15/2021 12:00:00 AM\n600 BLOCK GILMAN ST\\nBerkeley, CA\\n(37.878405,...\n600 BLOCK GILMAN ST\nBerkeley\nCA\n\n\n624\n20057207\nASSAULT/BATTERY MISD.\n2020-12-17\n16:50\nASSAULT\n4\n06/15/2021 12:00:00 AM\n2100 BLOCK SHATTUCK AVE\\nBerkeley, CA\\n(37.871...\n2100 BLOCK SHATTUCK AVE\nBerkeley\nCA\n\n\n154\n20092214\nTHEFT FROM AUTO\n2020-12-17\n18:30\nLARCENY - FROM VEHICLE\n4\n06/15/2021 12:00:00 AM\n800 BLOCK SHATTUCK AVE\\nBerkeley, CA\\n(37.8918...\n800 BLOCK SHATTUCK AVE\nBerkeley\nCA\n\n\n659\n20057324\nTHEFT MISD. (UNDER $950)\n2020-12-17\n15:44\nLARCENY\n4\n06/15/2021 12:00:00 AM\n1800 BLOCK 4TH ST\\nBerkeley, CA\\n(37.869888, -...\n1800 BLOCK 4TH ST\nBerkeley\nCA\n\n\n993\n20057573\nBURGLARY RESIDENTIAL\n2020-12-17\n22:15\nBURGLARY - RESIDENTIAL\n4\n06/15/2021 12:00:00 AM\n1700 BLOCK STUART ST\\nBerkeley, CA\\n(37.857495...\n1700 BLOCK STUART ST\nBerkeley\nCA\n\n\n\n\n\n\n\nDoesn’t look like it! We are good!\nWe can also do many things with the dt accessor like switching time zones and converting time back to UNIX/POSIX time. Check out the documentation on .dt accessor and time series/date functionality.",
+    "text": "5.2 Granularity, Scope, and Temporality\nAfter understanding the structure of the dataset, the next task is to determine what exactly the data represents. We’ll do so by considering the data’s granularity, scope, and temporality.\n\n5.2.1 Granularity\nThe granularity of a dataset is what a single row represents. You can also think of it as the level of detail included in the data. To determine the data’s granularity, ask: what does each row in the dataset represent? Fine-grained data contains a high level of detail, with a single row representing a small individual unit. For example, each record may represent one person. Coarse-grained data is encoded such that a single row represents a large individual unit – for example, each record may represent a group of people.\n\n\n5.2.2 Scope\nThe scope of a dataset is the subset of the population covered by the data. If we were investigating student performance in Data Science courses, a dataset with a narrow scope might encompass all students enrolled in Data 100 whereas a dataset with an expansive scope might encompass all students in California.\n\n\n5.2.3 Temporality\nThe temporality of a dataset describes the periodicity over which the data was collected as well as when the data was most recently collected or updated.\nTime and date fields of a dataset could represent a few things:\n\nwhen the “event” happened\nwhen the data was collected, or when it was entered into the system\nwhen the data was copied into the database\n\nTo fully understand the temporality of the data, it also may be necessary to standardize time zones or inspect recurring time-based trends in the data (do patterns recur in 24-hour periods? Over the course of a month? Seasonally?). The convention for standardizing time is the Coordinated Universal Time (UTC), an international time standard measured at 0 degrees latitude that stays consistent throughout the year (no daylight savings). We can represent Berkeley’s time zone, Pacific Standard Time (PST), as UTC-7 (with daylight savings).\n\n5.2.3.1 Temporality with pandas’ dt accessors\nLet’s briefly look at how we can use pandas’ dt accessors to work with dates/times in a dataset using the dataset you’ll see in Lab 3: the Berkeley PD Calls for Service dataset.\n\n\nCode\ncalls = pd.read_csv(\"data/Berkeley_PD_-_Calls_for_Service.csv\")\ncalls.head()\n\n\n\n\n\n\n\n\n\nCASENO\nOFFENSE\nEVENTDT\nEVENTTM\nCVLEGEND\nCVDOW\nInDbDate\nBlock_Location\nBLKADDR\nCity\nState\n\n\n\n\n0\n21014296\nTHEFT MISD. (UNDER $950)\n04/01/2021 12:00:00 AM\n10:58\nLARCENY\n4\n06/15/2021 12:00:00 AM\nBerkeley, CA\\n(37.869058, -122.270455)\nNaN\nBerkeley\nCA\n\n\n1\n21014391\nTHEFT MISD. (UNDER $950)\n04/01/2021 12:00:00 AM\n10:38\nLARCENY\n4\n06/15/2021 12:00:00 AM\nBerkeley, CA\\n(37.869058, -122.270455)\nNaN\nBerkeley\nCA\n\n\n2\n21090494\nTHEFT MISD. (UNDER $950)\n04/19/2021 12:00:00 AM\n12:15\nLARCENY\n1\n06/15/2021 12:00:00 AM\n2100 BLOCK HASTE ST\\nBerkeley, CA\\n(37.864908,...\n2100 BLOCK HASTE ST\nBerkeley\nCA\n\n\n3\n21090204\nTHEFT FELONY (OVER $950)\n02/13/2021 12:00:00 AM\n17:00\nLARCENY\n6\n06/15/2021 12:00:00 AM\n2600 BLOCK WARRING ST\\nBerkeley, CA\\n(37.86393...\n2600 BLOCK WARRING ST\nBerkeley\nCA\n\n\n4\n21090179\nBURGLARY AUTO\n02/08/2021 12:00:00 AM\n6:20\nBURGLARY - VEHICLE\n1\n06/15/2021 12:00:00 AM\n2700 BLOCK GARBER ST\\nBerkeley, CA\\n(37.86066,...\n2700 BLOCK GARBER ST\nBerkeley\nCA\n\n\n\n\n\n\n\nLooks like there are three columns with dates/times: EVENTDT, EVENTTM, and InDbDate.\nMost likely, EVENTDT stands for the date when the event took place, EVENTTM stands for the time of day the event took place (in 24-hr format), and InDbDate is the date this call is recorded onto the database.\nIf we check the data type of these columns, we will see they are stored as strings. We can convert them to datetime objects using pandas to_datetime function.\n\ncalls[\"EVENTDT\"] = pd.to_datetime(calls[\"EVENTDT\"])\ncalls.head()\n\n/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_99046/874729699.py:1: UserWarning:\n\nCould not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n\n\n\n\n\n\n\n\n\n\nCASENO\nOFFENSE\nEVENTDT\nEVENTTM\nCVLEGEND\nCVDOW\nInDbDate\nBlock_Location\nBLKADDR\nCity\nState\n\n\n\n\n0\n21014296\nTHEFT MISD. (UNDER $950)\n2021-04-01\n10:58\nLARCENY\n4\n06/15/2021 12:00:00 AM\nBerkeley, CA\\n(37.869058, -122.270455)\nNaN\nBerkeley\nCA\n\n\n1\n21014391\nTHEFT MISD. (UNDER $950)\n2021-04-01\n10:38\nLARCENY\n4\n06/15/2021 12:00:00 AM\nBerkeley, CA\\n(37.869058, -122.270455)\nNaN\nBerkeley\nCA\n\n\n2\n21090494\nTHEFT MISD. (UNDER $950)\n2021-04-19\n12:15\nLARCENY\n1\n06/15/2021 12:00:00 AM\n2100 BLOCK HASTE ST\\nBerkeley, CA\\n(37.864908,...\n2100 BLOCK HASTE ST\nBerkeley\nCA\n\n\n3\n21090204\nTHEFT FELONY (OVER $950)\n2021-02-13\n17:00\nLARCENY\n6\n06/15/2021 12:00:00 AM\n2600 BLOCK WARRING ST\\nBerkeley, CA\\n(37.86393...\n2600 BLOCK WARRING ST\nBerkeley\nCA\n\n\n4\n21090179\nBURGLARY AUTO\n2021-02-08\n6:20\nBURGLARY - VEHICLE\n1\n06/15/2021 12:00:00 AM\n2700 BLOCK GARBER ST\\nBerkeley, CA\\n(37.86066,...\n2700 BLOCK GARBER ST\nBerkeley\nCA\n\n\n\n\n\n\n\nNow, we can use the dt accessor on this column.\nWe can get the month:\n\ncalls[\"EVENTDT\"].dt.month.head()\n\n0    4\n1    4\n2    4\n3    2\n4    2\nName: EVENTDT, dtype: int32\n\n\nWhich day of the week the date is on:\n\ncalls[\"EVENTDT\"].dt.dayofweek.head()\n\n0    3\n1    3\n2    0\n3    5\n4    0\nName: EVENTDT, dtype: int32\n\n\nCheck the mimimum values to see if there are any suspicious-looking, 70s dates:\n\ncalls.sort_values(\"EVENTDT\").head()\n\n\n\n\n\n\n\n\nCASENO\nOFFENSE\nEVENTDT\nEVENTTM\nCVLEGEND\nCVDOW\nInDbDate\nBlock_Location\nBLKADDR\nCity\nState\n\n\n\n\n2513\n20057398\nBURGLARY COMMERCIAL\n2020-12-17\n16:05\nBURGLARY - COMMERCIAL\n4\n06/15/2021 12:00:00 AM\n600 BLOCK GILMAN ST\\nBerkeley, CA\\n(37.878405,...\n600 BLOCK GILMAN ST\nBerkeley\nCA\n\n\n624\n20057207\nASSAULT/BATTERY MISD.\n2020-12-17\n16:50\nASSAULT\n4\n06/15/2021 12:00:00 AM\n2100 BLOCK SHATTUCK AVE\\nBerkeley, CA\\n(37.871...\n2100 BLOCK SHATTUCK AVE\nBerkeley\nCA\n\n\n154\n20092214\nTHEFT FROM AUTO\n2020-12-17\n18:30\nLARCENY - FROM VEHICLE\n4\n06/15/2021 12:00:00 AM\n800 BLOCK SHATTUCK AVE\\nBerkeley, CA\\n(37.8918...\n800 BLOCK SHATTUCK AVE\nBerkeley\nCA\n\n\n659\n20057324\nTHEFT MISD. (UNDER $950)\n2020-12-17\n15:44\nLARCENY\n4\n06/15/2021 12:00:00 AM\n1800 BLOCK 4TH ST\\nBerkeley, CA\\n(37.869888, -...\n1800 BLOCK 4TH ST\nBerkeley\nCA\n\n\n993\n20057573\nBURGLARY RESIDENTIAL\n2020-12-17\n22:15\nBURGLARY - RESIDENTIAL\n4\n06/15/2021 12:00:00 AM\n1700 BLOCK STUART ST\\nBerkeley, CA\\n(37.857495...\n1700 BLOCK STUART ST\nBerkeley\nCA\n\n\n\n\n\n\n\nDoesn’t look like it! We are good!\nWe can also do many things with the dt accessor like switching time zones and converting time back to UNIX/POSIX time. Check out the documentation on .dt accessor and time series/date functionality.",
     "crumbs": [
       "<span class='chapter-number'>5</span>  <span class='chapter-title'>Data Cleaning and EDA</span>"
     ]
@@ -284,7 +284,7 @@
     "href": "eda/eda.html#eda-demo-2-mauna-loa-co2-data-a-lesson-in-data-faithfulness",
     "title": "5  Data Cleaning and EDA",
     "section": "5.5 EDA Demo 2: Mauna Loa CO2 Data – A Lesson in Data Faithfulness",
-    "text": "5.5 EDA Demo 2: Mauna Loa CO2 Data – A Lesson in Data Faithfulness\nMauna Loa Observatory has been monitoring CO2 concentrations since 1958.\n\nco2_file = \"data/co2_mm_mlo.txt\"\n\nLet’s do some EDA!!\n\n5.5.1 Reading this file into Pandas?\nLet’s instead check out this .txt file. Some questions to keep in mind: Do we trust this file extension? What structure is it?\nLines 71-78 (inclusive) are shown below:\nline number |                            file contents\n\n71          |   #            decimal     average   interpolated    trend    #days\n72          |   #             date                             (season corr)\n73          |   1958   3    1958.208      315.71      315.71      314.62     -1\n74          |   1958   4    1958.292      317.45      317.45      315.29     -1\n75          |   1958   5    1958.375      317.50      317.50      314.71     -1\n76          |   1958   6    1958.458      -99.99      317.10      314.85     -1\n77          |   1958   7    1958.542      315.86      315.86      314.98     -1\n78          |   1958   8    1958.625      314.93      314.93      315.94     -1\nNotice how:\n\nThe values are separated by white space, possibly tabs.\nThe data line up down the rows. For example, the month appears in 7th to 8th position of each line.\nThe 71st and 72nd lines in the file contain column headings split over two lines.\n\nWe can use read_csv to read the data into a pandas DataFrame, and we provide several arguments to specify that the separators are white space, there is no header (we will set our own column names), and to skip the first 72 rows of the file.\n\nco2 = pd.read_csv(\n    co2_file, header = None, skiprows = 72,\n    sep = r'\\s+'       #delimiter for continuous whitespace (stay tuned for regex next lecture))\n)\nco2.head()\n\n\n\n\n\n\n\n\n0\n1\n2\n3\n4\n5\n6\n\n\n\n\n0\n1958\n3\n1958.21\n315.71\n315.71\n314.62\n-1\n\n\n1\n1958\n4\n1958.29\n317.45\n317.45\n315.29\n-1\n\n\n2\n1958\n5\n1958.38\n317.50\n317.50\n314.71\n-1\n\n\n3\n1958\n6\n1958.46\n-99.99\n317.10\n314.85\n-1\n\n\n4\n1958\n7\n1958.54\n315.86\n315.86\n314.98\n-1\n\n\n\n\n\n\n\nCongratulations! You’ve wrangled the data!\n\n…But our columns aren’t named. We need to do more EDA.\n\n\n5.5.2 Exploring Variable Feature Types\nThe NOAA webpage might have some useful tidbits (in this case it doesn’t).\nUsing this information, we’ll rerun pd.read_csv, but this time with some custom column names.\n\nco2 = pd.read_csv(\n    co2_file, header = None, skiprows = 72,\n    sep = '\\s+', #regex for continuous whitespace (next lecture)\n    names = ['Yr', 'Mo', 'DecDate', 'Avg', 'Int', 'Trend', 'Days']\n)\nco2.head()\n\n&lt;&gt;:3: SyntaxWarning:\n\ninvalid escape sequence '\\s'\n\n&lt;&gt;:3: SyntaxWarning:\n\ninvalid escape sequence '\\s'\n\n/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_60488/150137587.py:3: SyntaxWarning:\n\ninvalid escape sequence '\\s'\n\n\n\n\n\n\n\n\n\n\nYr\nMo\nDecDate\nAvg\nInt\nTrend\nDays\n\n\n\n\n0\n1958\n3\n1958.21\n315.71\n315.71\n314.62\n-1\n\n\n1\n1958\n4\n1958.29\n317.45\n317.45\n315.29\n-1\n\n\n2\n1958\n5\n1958.38\n317.50\n317.50\n314.71\n-1\n\n\n3\n1958\n6\n1958.46\n-99.99\n317.10\n314.85\n-1\n\n\n4\n1958\n7\n1958.54\n315.86\n315.86\n314.98\n-1\n\n\n\n\n\n\n\n\n\n5.5.3 Visualizing CO2\nScientific studies tend to have very clean data, right…? Let’s jump right in and make a time series plot of CO2 monthly averages.\n\n\nCode\nsns.lineplot(x='DecDate', y='Avg', data=co2);\n\n\n\n\n\n\n\n\n\nThe code above uses the seaborn plotting library (abbreviated sns). We will cover this in the Visualization lecture, but now you don’t need to worry about how it works!\nYikes! Plotting the data uncovered a problem. The sharp vertical lines suggest that we have some missing values. What happened here?\n\nco2.head()\n\n\n\n\n\n\n\n\nYr\nMo\nDecDate\nAvg\nInt\nTrend\nDays\n\n\n\n\n0\n1958\n3\n1958.21\n315.71\n315.71\n314.62\n-1\n\n\n1\n1958\n4\n1958.29\n317.45\n317.45\n315.29\n-1\n\n\n2\n1958\n5\n1958.38\n317.50\n317.50\n314.71\n-1\n\n\n3\n1958\n6\n1958.46\n-99.99\n317.10\n314.85\n-1\n\n\n4\n1958\n7\n1958.54\n315.86\n315.86\n314.98\n-1\n\n\n\n\n\n\n\n\nco2.tail()\n\n\n\n\n\n\n\n\nYr\nMo\nDecDate\nAvg\nInt\nTrend\nDays\n\n\n\n\n733\n2019\n4\n2019.29\n413.32\n413.32\n410.49\n26\n\n\n734\n2019\n5\n2019.38\n414.66\n414.66\n411.20\n28\n\n\n735\n2019\n6\n2019.46\n413.92\n413.92\n411.58\n27\n\n\n736\n2019\n7\n2019.54\n411.77\n411.77\n411.43\n23\n\n\n737\n2019\n8\n2019.62\n409.95\n409.95\n411.84\n29\n\n\n\n\n\n\n\nSome data have unusual values like -1 and -99.99.\nLet’s check the description at the top of the file again.\n\n-1 signifies a missing value for the number of days Days the equipment was in operation that month.\n-99.99 denotes a missing monthly average Avg\n\nHow can we fix this? First, let’s explore other aspects of our data. Understanding our data will help us decide what to do with the missing values.\n\n\n\n5.5.4 Sanity Checks: Reasoning about the data\nFirst, we consider the shape of the data. How many rows should we have?\n\nIf chronological order, we should have one record per month.\nData from March 1958 to August 2019.\nWe should have $ 12 (2019-1957) - 2 - 4 = 738 $ records.\n\n\nco2.shape\n\n(738, 7)\n\n\nNice!! The number of rows (i.e. records) match our expectations.\nLet’s now check the quality of each feature.\n\n\n5.5.5 Understanding Missing Value 1: Days\nDays is a time field, so let’s analyze other time fields to see if there is an explanation for missing values of days of operation.\nLet’s start with months, Mo.\nAre we missing any records? The number of months should have 62 or 61 instances (March 1957-August 2019).\n\nco2[\"Mo\"].value_counts().sort_index()\n\nMo\n1     61\n2     61\n3     62\n4     62\n5     62\n6     62\n7     62\n8     62\n9     61\n10    61\n11    61\n12    61\nName: count, dtype: int64\n\n\nAs expected Jan, Feb, Sep, Oct, Nov, and Dec have 61 occurrences and the rest 62.\n\nNext let’s explore days Days itself, which is the number of days that the measurement equipment worked.\n\n\nCode\nsns.displot(co2['Days']);\nplt.title(\"Distribution of days feature\"); # suppresses unneeded plotting output\n\n\n\n\n\n\n\n\n\nIn terms of data quality, a handful of months have averages based on measurements taken on fewer than half the days. In addition, there are nearly 200 missing values–that’s about 27% of the data!\n\nFinally, let’s check the last time feature, year Yr.\nLet’s check to see if there is any connection between missing-ness and the year of the recording.\n\n\nCode\nsns.scatterplot(x=\"Yr\", y=\"Days\", data=co2);\nplt.title(\"Day field by Year\"); # the ; suppresses output\n\n\n\n\n\n\n\n\n\nObservations:\n\nAll of the missing data are in the early years of operation.\nIt appears there may have been problems with equipment in the mid to late 80s.\n\nPotential Next Steps:\n\nConfirm these explanations through documentation about the historical readings.\nMaybe drop the earliest recordings? However, we would want to delay such action until after we have examined the time trends and assess whether there are any potential problems.\n\n\n\n\n5.5.6 Understanding Missing Value 2: Avg\nNext, let’s return to the -99.99 values in Avg to analyze the overall quality of the CO2 measurements. We’ll plot a histogram of the average CO2 measurements\n\n\nCode\n# Histograms of average CO2 measurements\nsns.displot(co2['Avg']);\n\n\n\n\n\n\n\n\n\nThe non-missing values are in the 300-400 range (a regular range of CO2 levels).\nWe also see that there are only a few missing Avg values (&lt;1% of values). Let’s examine all of them:\n\nco2[co2[\"Avg\"] &lt; 0]\n\n\n\n\n\n\n\n\nYr\nMo\nDecDate\nAvg\nInt\nTrend\nDays\n\n\n\n\n3\n1958\n6\n1958.46\n-99.99\n317.10\n314.85\n-1\n\n\n7\n1958\n10\n1958.79\n-99.99\n312.66\n315.61\n-1\n\n\n71\n1964\n2\n1964.12\n-99.99\n320.07\n319.61\n-1\n\n\n72\n1964\n3\n1964.21\n-99.99\n320.73\n319.55\n-1\n\n\n73\n1964\n4\n1964.29\n-99.99\n321.77\n319.48\n-1\n\n\n213\n1975\n12\n1975.96\n-99.99\n330.59\n331.60\n0\n\n\n313\n1984\n4\n1984.29\n-99.99\n346.84\n344.27\n2\n\n\n\n\n\n\n\nThere doesn’t seem to be a pattern to these values, other than that most records also were missing Days data.\n\n\n5.5.7 Drop, NaN, or Impute Missing Avg Data?\nHow should we address the invalid Avg data?\n\nDrop records\nSet to NaN\nImpute using some strategy\n\nRemember we want to fix the following plot:\n\n\nCode\nsns.lineplot(x='DecDate', y='Avg', data=co2)\nplt.title(\"CO2 Average By Month\");\n\n\n\n\n\n\n\n\n\nSince we are plotting Avg vs DecDate, we should just focus on dealing with missing values for Avg.\nLet’s consider a few options: 1. Drop those records 2. Replace -99.99 with NaN 3. Substitute it with a likely value for the average CO2?\nWhat do you think are the pros and cons of each possible action?\nLet’s examine each of these three options.\n\n# 1. Drop missing values\nco2_drop = co2[co2['Avg'] &gt; 0]\nco2_drop.head()\n\n\n\n\n\n\n\n\nYr\nMo\nDecDate\nAvg\nInt\nTrend\nDays\n\n\n\n\n0\n1958\n3\n1958.21\n315.71\n315.71\n314.62\n-1\n\n\n1\n1958\n4\n1958.29\n317.45\n317.45\n315.29\n-1\n\n\n2\n1958\n5\n1958.38\n317.50\n317.50\n314.71\n-1\n\n\n4\n1958\n7\n1958.54\n315.86\n315.86\n314.98\n-1\n\n\n5\n1958\n8\n1958.62\n314.93\n314.93\n315.94\n-1\n\n\n\n\n\n\n\n\n# 2. Replace NaN with -99.99\nco2_NA = co2.replace(-99.99, np.nan)\nco2_NA.head()\n\n\n\n\n\n\n\n\nYr\nMo\nDecDate\nAvg\nInt\nTrend\nDays\n\n\n\n\n0\n1958\n3\n1958.21\n315.71\n315.71\n314.62\n-1\n\n\n1\n1958\n4\n1958.29\n317.45\n317.45\n315.29\n-1\n\n\n2\n1958\n5\n1958.38\n317.50\n317.50\n314.71\n-1\n\n\n3\n1958\n6\n1958.46\nNaN\n317.10\n314.85\n-1\n\n\n4\n1958\n7\n1958.54\n315.86\n315.86\n314.98\n-1\n\n\n\n\n\n\n\nWe’ll also use a third version of the data.\nFirst, we note that the dataset already comes with a substitute value for the -99.99.\nFrom the file description:\n\nThe interpolated column includes average values from the preceding column (average) and interpolated values where data are missing. Interpolated values are computed in two steps…\n\nThe Int feature has values that exactly match those in Avg, except when Avg is -99.99, and then a reasonable estimate is used instead.\nSo, the third version of our data will use the Int feature instead of Avg.\n\n# 3. Use interpolated column which estimates missing Avg values\nco2_impute = co2.copy()\nco2_impute['Avg'] = co2['Int']\nco2_impute.head()\n\n\n\n\n\n\n\n\nYr\nMo\nDecDate\nAvg\nInt\nTrend\nDays\n\n\n\n\n0\n1958\n3\n1958.21\n315.71\n315.71\n314.62\n-1\n\n\n1\n1958\n4\n1958.29\n317.45\n317.45\n315.29\n-1\n\n\n2\n1958\n5\n1958.38\n317.50\n317.50\n314.71\n-1\n\n\n3\n1958\n6\n1958.46\n317.10\n317.10\n314.85\n-1\n\n\n4\n1958\n7\n1958.54\n315.86\n315.86\n314.98\n-1\n\n\n\n\n\n\n\nWhat’s a reasonable estimate?\nTo answer this question, let’s zoom in on a short time period, say the measurements in 1958 (where we know we have two missing values).\n\n\nCode\n# results of plotting data in 1958\n\ndef line_and_points(data, ax, title):\n    # assumes single year, hence Mo\n    ax.plot('Mo', 'Avg', data=data)\n    ax.scatter('Mo', 'Avg', data=data)\n    ax.set_xlim(2, 13)\n    ax.set_title(title)\n    ax.set_xticks(np.arange(3, 13))\n\ndef data_year(data, year):\n    return data[data[\"Yr\"] == 1958]\n    \n# uses matplotlib subplots\n# you may see more next week; focus on output for now\nfig, axes = plt.subplots(ncols = 3, figsize=(12, 4), sharey=True)\n\nyear = 1958\nline_and_points(data_year(co2_drop, year), axes[0], title=\"1. Drop Missing\")\nline_and_points(data_year(co2_NA, year), axes[1], title=\"2. Missing Set to NaN\")\nline_and_points(data_year(co2_impute, year), axes[2], title=\"3. Missing Interpolated\")\n\nfig.suptitle(f\"Monthly Averages for {year}\")\nplt.tight_layout()\n\n\n\n\n\n\n\n\n\nIn the big picture since there are only 7 Avg values missing (&lt;1% of 738 months), any of these approaches would work.\nHowever there is some appeal to option C, Imputing:\n\nShows seasonal trends for CO2\nWe are plotting all months in our data as a line plot\n\nLet’s replot our original figure with option 3:\n\n\nCode\nsns.lineplot(x='DecDate', y='Avg', data=co2_impute)\nplt.title(\"CO2 Average By Month, Imputed\");\n\n\n\n\n\n\n\n\n\nLooks pretty close to what we see on the NOAA website!\n\n\n5.5.8 Presenting the Data: A Discussion on Data Granularity\nFrom the description:\n\nMonthly measurements are averages of average day measurements.\nThe NOAA GML website has datasets for daily/hourly measurements too.\n\nThe data you present depends on your research question.\nHow do CO2 levels vary by season?\n\nYou might want to keep average monthly data.\n\nAre CO2 levels rising over the past 50+ years, consistent with global warming predictions?\n\nYou might be happier with a coarser granularity of average year data!\n\n\n\nCode\nco2_year = co2_impute.groupby('Yr').mean()\nsns.lineplot(x='Yr', y='Avg', data=co2_year)\nplt.title(\"CO2 Average By Year\");\n\n\n\n\n\n\n\n\n\nIndeed, we see a rise by nearly 100 ppm of CO2 since Mauna Loa began recording in 1958.",
+    "text": "5.5 EDA Demo 2: Mauna Loa CO2 Data – A Lesson in Data Faithfulness\nMauna Loa Observatory has been monitoring CO2 concentrations since 1958.\n\nco2_file = \"data/co2_mm_mlo.txt\"\n\nLet’s do some EDA!!\n\n5.5.1 Reading this file into Pandas?\nLet’s instead check out this .txt file. Some questions to keep in mind: Do we trust this file extension? What structure is it?\nLines 71-78 (inclusive) are shown below:\nline number |                            file contents\n\n71          |   #            decimal     average   interpolated    trend    #days\n72          |   #             date                             (season corr)\n73          |   1958   3    1958.208      315.71      315.71      314.62     -1\n74          |   1958   4    1958.292      317.45      317.45      315.29     -1\n75          |   1958   5    1958.375      317.50      317.50      314.71     -1\n76          |   1958   6    1958.458      -99.99      317.10      314.85     -1\n77          |   1958   7    1958.542      315.86      315.86      314.98     -1\n78          |   1958   8    1958.625      314.93      314.93      315.94     -1\nNotice how:\n\nThe values are separated by white space, possibly tabs.\nThe data line up down the rows. For example, the month appears in 7th to 8th position of each line.\nThe 71st and 72nd lines in the file contain column headings split over two lines.\n\nWe can use read_csv to read the data into a pandas DataFrame, and we provide several arguments to specify that the separators are white space, there is no header (we will set our own column names), and to skip the first 72 rows of the file.\n\nco2 = pd.read_csv(\n    co2_file, header = None, skiprows = 72,\n    sep = r'\\s+'       #delimiter for continuous whitespace (stay tuned for regex next lecture))\n)\nco2.head()\n\n\n\n\n\n\n\n\n0\n1\n2\n3\n4\n5\n6\n\n\n\n\n0\n1958\n3\n1958.21\n315.71\n315.71\n314.62\n-1\n\n\n1\n1958\n4\n1958.29\n317.45\n317.45\n315.29\n-1\n\n\n2\n1958\n5\n1958.38\n317.50\n317.50\n314.71\n-1\n\n\n3\n1958\n6\n1958.46\n-99.99\n317.10\n314.85\n-1\n\n\n4\n1958\n7\n1958.54\n315.86\n315.86\n314.98\n-1\n\n\n\n\n\n\n\nCongratulations! You’ve wrangled the data!\n\n…But our columns aren’t named. We need to do more EDA.\n\n\n5.5.2 Exploring Variable Feature Types\nThe NOAA webpage might have some useful tidbits (in this case it doesn’t).\nUsing this information, we’ll rerun pd.read_csv, but this time with some custom column names.\n\nco2 = pd.read_csv(\n    co2_file, header = None, skiprows = 72,\n    sep = '\\s+', #regex for continuous whitespace (next lecture)\n    names = ['Yr', 'Mo', 'DecDate', 'Avg', 'Int', 'Trend', 'Days']\n)\nco2.head()\n\n&lt;&gt;:3: SyntaxWarning:\n\ninvalid escape sequence '\\s'\n\n&lt;&gt;:3: SyntaxWarning:\n\ninvalid escape sequence '\\s'\n\n/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_99046/150137587.py:3: SyntaxWarning:\n\ninvalid escape sequence '\\s'\n\n\n\n\n\n\n\n\n\n\nYr\nMo\nDecDate\nAvg\nInt\nTrend\nDays\n\n\n\n\n0\n1958\n3\n1958.21\n315.71\n315.71\n314.62\n-1\n\n\n1\n1958\n4\n1958.29\n317.45\n317.45\n315.29\n-1\n\n\n2\n1958\n5\n1958.38\n317.50\n317.50\n314.71\n-1\n\n\n3\n1958\n6\n1958.46\n-99.99\n317.10\n314.85\n-1\n\n\n4\n1958\n7\n1958.54\n315.86\n315.86\n314.98\n-1\n\n\n\n\n\n\n\n\n\n5.5.3 Visualizing CO2\nScientific studies tend to have very clean data, right…? Let’s jump right in and make a time series plot of CO2 monthly averages.\n\n\nCode\nsns.lineplot(x='DecDate', y='Avg', data=co2);\n\n\n\n\n\n\n\n\n\nThe code above uses the seaborn plotting library (abbreviated sns). We will cover this in the Visualization lecture, but now you don’t need to worry about how it works!\nYikes! Plotting the data uncovered a problem. The sharp vertical lines suggest that we have some missing values. What happened here?\n\nco2.head()\n\n\n\n\n\n\n\n\nYr\nMo\nDecDate\nAvg\nInt\nTrend\nDays\n\n\n\n\n0\n1958\n3\n1958.21\n315.71\n315.71\n314.62\n-1\n\n\n1\n1958\n4\n1958.29\n317.45\n317.45\n315.29\n-1\n\n\n2\n1958\n5\n1958.38\n317.50\n317.50\n314.71\n-1\n\n\n3\n1958\n6\n1958.46\n-99.99\n317.10\n314.85\n-1\n\n\n4\n1958\n7\n1958.54\n315.86\n315.86\n314.98\n-1\n\n\n\n\n\n\n\n\nco2.tail()\n\n\n\n\n\n\n\n\nYr\nMo\nDecDate\nAvg\nInt\nTrend\nDays\n\n\n\n\n733\n2019\n4\n2019.29\n413.32\n413.32\n410.49\n26\n\n\n734\n2019\n5\n2019.38\n414.66\n414.66\n411.20\n28\n\n\n735\n2019\n6\n2019.46\n413.92\n413.92\n411.58\n27\n\n\n736\n2019\n7\n2019.54\n411.77\n411.77\n411.43\n23\n\n\n737\n2019\n8\n2019.62\n409.95\n409.95\n411.84\n29\n\n\n\n\n\n\n\nSome data have unusual values like -1 and -99.99.\nLet’s check the description at the top of the file again.\n\n-1 signifies a missing value for the number of days Days the equipment was in operation that month.\n-99.99 denotes a missing monthly average Avg\n\nHow can we fix this? First, let’s explore other aspects of our data. Understanding our data will help us decide what to do with the missing values.\n\n\n\n5.5.4 Sanity Checks: Reasoning about the data\nFirst, we consider the shape of the data. How many rows should we have?\n\nIf chronological order, we should have one record per month.\nData from March 1958 to August 2019.\nWe should have $ 12 (2019-1957) - 2 - 4 = 738 $ records.\n\n\nco2.shape\n\n(738, 7)\n\n\nNice!! The number of rows (i.e. records) match our expectations.\nLet’s now check the quality of each feature.\n\n\n5.5.5 Understanding Missing Value 1: Days\nDays is a time field, so let’s analyze other time fields to see if there is an explanation for missing values of days of operation.\nLet’s start with months, Mo.\nAre we missing any records? The number of months should have 62 or 61 instances (March 1957-August 2019).\n\nco2[\"Mo\"].value_counts().sort_index()\n\nMo\n1     61\n2     61\n3     62\n4     62\n5     62\n6     62\n7     62\n8     62\n9     61\n10    61\n11    61\n12    61\nName: count, dtype: int64\n\n\nAs expected Jan, Feb, Sep, Oct, Nov, and Dec have 61 occurrences and the rest 62.\n\nNext let’s explore days Days itself, which is the number of days that the measurement equipment worked.\n\n\nCode\nsns.displot(co2['Days']);\nplt.title(\"Distribution of days feature\"); # suppresses unneeded plotting output\n\n\n\n\n\n\n\n\n\nIn terms of data quality, a handful of months have averages based on measurements taken on fewer than half the days. In addition, there are nearly 200 missing values–that’s about 27% of the data!\n\nFinally, let’s check the last time feature, year Yr.\nLet’s check to see if there is any connection between missing-ness and the year of the recording.\n\n\nCode\nsns.scatterplot(x=\"Yr\", y=\"Days\", data=co2);\nplt.title(\"Day field by Year\"); # the ; suppresses output\n\n\n\n\n\n\n\n\n\nObservations:\n\nAll of the missing data are in the early years of operation.\nIt appears there may have been problems with equipment in the mid to late 80s.\n\nPotential Next Steps:\n\nConfirm these explanations through documentation about the historical readings.\nMaybe drop the earliest recordings? However, we would want to delay such action until after we have examined the time trends and assess whether there are any potential problems.\n\n\n\n\n5.5.6 Understanding Missing Value 2: Avg\nNext, let’s return to the -99.99 values in Avg to analyze the overall quality of the CO2 measurements. We’ll plot a histogram of the average CO2 measurements\n\n\nCode\n# Histograms of average CO2 measurements\nsns.displot(co2['Avg']);\n\n\n\n\n\n\n\n\n\nThe non-missing values are in the 300-400 range (a regular range of CO2 levels).\nWe also see that there are only a few missing Avg values (&lt;1% of values). Let’s examine all of them:\n\nco2[co2[\"Avg\"] &lt; 0]\n\n\n\n\n\n\n\n\nYr\nMo\nDecDate\nAvg\nInt\nTrend\nDays\n\n\n\n\n3\n1958\n6\n1958.46\n-99.99\n317.10\n314.85\n-1\n\n\n7\n1958\n10\n1958.79\n-99.99\n312.66\n315.61\n-1\n\n\n71\n1964\n2\n1964.12\n-99.99\n320.07\n319.61\n-1\n\n\n72\n1964\n3\n1964.21\n-99.99\n320.73\n319.55\n-1\n\n\n73\n1964\n4\n1964.29\n-99.99\n321.77\n319.48\n-1\n\n\n213\n1975\n12\n1975.96\n-99.99\n330.59\n331.60\n0\n\n\n313\n1984\n4\n1984.29\n-99.99\n346.84\n344.27\n2\n\n\n\n\n\n\n\nThere doesn’t seem to be a pattern to these values, other than that most records also were missing Days data.\n\n\n5.5.7 Drop, NaN, or Impute Missing Avg Data?\nHow should we address the invalid Avg data?\n\nDrop records\nSet to NaN\nImpute using some strategy\n\nRemember we want to fix the following plot:\n\n\nCode\nsns.lineplot(x='DecDate', y='Avg', data=co2)\nplt.title(\"CO2 Average By Month\");\n\n\n\n\n\n\n\n\n\nSince we are plotting Avg vs DecDate, we should just focus on dealing with missing values for Avg.\nLet’s consider a few options: 1. Drop those records 2. Replace -99.99 with NaN 3. Substitute it with a likely value for the average CO2?\nWhat do you think are the pros and cons of each possible action?\nLet’s examine each of these three options.\n\n# 1. Drop missing values\nco2_drop = co2[co2['Avg'] &gt; 0]\nco2_drop.head()\n\n\n\n\n\n\n\n\nYr\nMo\nDecDate\nAvg\nInt\nTrend\nDays\n\n\n\n\n0\n1958\n3\n1958.21\n315.71\n315.71\n314.62\n-1\n\n\n1\n1958\n4\n1958.29\n317.45\n317.45\n315.29\n-1\n\n\n2\n1958\n5\n1958.38\n317.50\n317.50\n314.71\n-1\n\n\n4\n1958\n7\n1958.54\n315.86\n315.86\n314.98\n-1\n\n\n5\n1958\n8\n1958.62\n314.93\n314.93\n315.94\n-1\n\n\n\n\n\n\n\n\n# 2. Replace NaN with -99.99\nco2_NA = co2.replace(-99.99, np.nan)\nco2_NA.head()\n\n\n\n\n\n\n\n\nYr\nMo\nDecDate\nAvg\nInt\nTrend\nDays\n\n\n\n\n0\n1958\n3\n1958.21\n315.71\n315.71\n314.62\n-1\n\n\n1\n1958\n4\n1958.29\n317.45\n317.45\n315.29\n-1\n\n\n2\n1958\n5\n1958.38\n317.50\n317.50\n314.71\n-1\n\n\n3\n1958\n6\n1958.46\nNaN\n317.10\n314.85\n-1\n\n\n4\n1958\n7\n1958.54\n315.86\n315.86\n314.98\n-1\n\n\n\n\n\n\n\nWe’ll also use a third version of the data.\nFirst, we note that the dataset already comes with a substitute value for the -99.99.\nFrom the file description:\n\nThe interpolated column includes average values from the preceding column (average) and interpolated values where data are missing. Interpolated values are computed in two steps…\n\nThe Int feature has values that exactly match those in Avg, except when Avg is -99.99, and then a reasonable estimate is used instead.\nSo, the third version of our data will use the Int feature instead of Avg.\n\n# 3. Use interpolated column which estimates missing Avg values\nco2_impute = co2.copy()\nco2_impute['Avg'] = co2['Int']\nco2_impute.head()\n\n\n\n\n\n\n\n\nYr\nMo\nDecDate\nAvg\nInt\nTrend\nDays\n\n\n\n\n0\n1958\n3\n1958.21\n315.71\n315.71\n314.62\n-1\n\n\n1\n1958\n4\n1958.29\n317.45\n317.45\n315.29\n-1\n\n\n2\n1958\n5\n1958.38\n317.50\n317.50\n314.71\n-1\n\n\n3\n1958\n6\n1958.46\n317.10\n317.10\n314.85\n-1\n\n\n4\n1958\n7\n1958.54\n315.86\n315.86\n314.98\n-1\n\n\n\n\n\n\n\nWhat’s a reasonable estimate?\nTo answer this question, let’s zoom in on a short time period, say the measurements in 1958 (where we know we have two missing values).\n\n\nCode\n# results of plotting data in 1958\n\ndef line_and_points(data, ax, title):\n    # assumes single year, hence Mo\n    ax.plot('Mo', 'Avg', data=data)\n    ax.scatter('Mo', 'Avg', data=data)\n    ax.set_xlim(2, 13)\n    ax.set_title(title)\n    ax.set_xticks(np.arange(3, 13))\n\ndef data_year(data, year):\n    return data[data[\"Yr\"] == 1958]\n    \n# uses matplotlib subplots\n# you may see more next week; focus on output for now\nfig, axes = plt.subplots(ncols = 3, figsize=(12, 4), sharey=True)\n\nyear = 1958\nline_and_points(data_year(co2_drop, year), axes[0], title=\"1. Drop Missing\")\nline_and_points(data_year(co2_NA, year), axes[1], title=\"2. Missing Set to NaN\")\nline_and_points(data_year(co2_impute, year), axes[2], title=\"3. Missing Interpolated\")\n\nfig.suptitle(f\"Monthly Averages for {year}\")\nplt.tight_layout()\n\n\n\n\n\n\n\n\n\nIn the big picture since there are only 7 Avg values missing (&lt;1% of 738 months), any of these approaches would work.\nHowever there is some appeal to option C, Imputing:\n\nShows seasonal trends for CO2\nWe are plotting all months in our data as a line plot\n\nLet’s replot our original figure with option 3:\n\n\nCode\nsns.lineplot(x='DecDate', y='Avg', data=co2_impute)\nplt.title(\"CO2 Average By Month, Imputed\");\n\n\n\n\n\n\n\n\n\nLooks pretty close to what we see on the NOAA website!\n\n\n5.5.8 Presenting the Data: A Discussion on Data Granularity\nFrom the description:\n\nMonthly measurements are averages of average day measurements.\nThe NOAA GML website has datasets for daily/hourly measurements too.\n\nThe data you present depends on your research question.\nHow do CO2 levels vary by season?\n\nYou might want to keep average monthly data.\n\nAre CO2 levels rising over the past 50+ years, consistent with global warming predictions?\n\nYou might be happier with a coarser granularity of average year data!\n\n\n\nCode\nco2_year = co2_impute.groupby('Yr').mean()\nsns.lineplot(x='Yr', y='Avg', data=co2_year)\nplt.title(\"CO2 Average By Year\");\n\n\n\n\n\n\n\n\n\nIndeed, we see a rise by nearly 100 ppm of CO2 since Mauna Loa began recording in 1958.",
     "crumbs": [
       "<span class='chapter-number'>5</span>  <span class='chapter-title'>Data Cleaning and EDA</span>"
     ]
@@ -544,7 +544,7 @@
     "href": "sampling/sampling.html#probability-samples",
     "title": "9  Sampling",
     "section": "9.3 Probability Samples",
-    "text": "9.3 Probability Samples\nWhen sampling, it is essential to focus on the quality of the sample rather than the quantity of the sample. A huge sample size does not fix a bad sampling method. Our main goal is to gather a sample that is representative of the population it came from. In this section, we’ll explore the different types of sampling and their pros and cons.\nA convenience sample is whatever you can get ahold of; this type of sampling is non-random. Note that haphazard sampling is not necessarily random sampling; there are many potential sources of bias.\nIn a probability sample, we provide the chance that any specified set of individuals will be in the sample (individuals in the population can have different chances of being selected; they don’t all have to be uniform), and we sample at random based off this known chance. For this reason, probability samples are also called random samples. The randomness provides a few benefits:\n\nBecause we know the source probabilities, we can measure the errors.\nSampling at random gives us a more representative sample of the population, which reduces bias. (Note: this is only the case when the probability distribution we’re sampling from is accurate. Random samples using “bad” or inaccurate distributions can produce biased estimates of population quantities.)\nProbability samples allow us to estimate the bias and chance error, which helps us quantify uncertainty (more in a future lecture).\n\nThe real world is usually more complicated, and we often don’t know the initial probabilities. For example, we do not generally know the probability that a given bacterium is in a microbiome sample or whether people will answer when Gallup calls landlines. That being said, still we try to model probability sampling to the best of our ability even when the sampling or measurement process is not fully under our control.\nA few common random sampling schemes:\n\nA uniform random sample with replacement is a sample drawn uniformly at random with replacement.\n\nRandom doesn’t always mean “uniformly at random,” but in this specific context, it does.\nSome individuals in the population might get picked more than once.\n\nA simple random sample (SRS) is a sample drawn uniformly at random without replacement.\n\nEvery individual (and subset of individuals) has the same chance of being selected from the sampling frame.\nEvery pair has the same chance as every other pair.\nEvery triple has the same chance as every other triple.\nAnd so on.\n\nA stratified random sample, where random sampling is performed on strata (specific groups), and the groups together compose a sample.\n\n\n9.3.1 Example Scheme 1: Probability Sample\nSuppose we have 3 TA’s (Arman, Boyu, Charlie): I decide to sample 2 of them as follows:\n\nI choose A with probability 1.0\nI choose either B or C, each with a probability of 0.5.\n\nWe can list all the possible outcomes and their respective probabilities in a table:\n\n\n\nOutcome\nProbability\n\n\n\n\n{A, B}\n0.5\n\n\n{A, C}\n0.5\n\n\n{B, C}\n0\n\n\n\nThis is a probability sample (though not a great one). Of the 3 people in my population, I know the chance of getting each subset. Suppose I’m measuring the average distance TAs live from campus.\n\nThis scheme does not see the entire population!\nMy estimate using the single sample I take has some chance error depending on if I see AB or AC.\nThis scheme is biased towards A’s response.\n\n\n\n9.3.2 Example Scheme 2: Simple Random Sample\nConsider the following sampling scheme:\n\nA class roster has 1100 students listed alphabetically.\nPick one of the first 10 students on the list at random (e.g. Student 8).\nTo create your sample, take that student and every 10th student listed after that (e.g. Students 8, 18, 28, 38, etc.).\n\n\n\nIs this a probability sample?\n\nYes. For a sample [n, n + 10, n + 20, …, n + 1090], where 1 &lt;= n &lt;= 10, the probability of that sample is 1/10. Otherwise, the probability is 0.\nOnly 10 possible samples!\n\n\n\nDoes each student have the same probability of being selected?\n\nYes. Each student is chosen with a probability of 1/10.\n\n\n\nIs this a simple random sample?\n\nNo. The chance of selecting (8, 18) is 1/10; the chance of selecting (8, 9) is 0.\n\n\n\n9.3.3 Demo: Barbie v. Oppenheimer\nWe are trying to collect a sample from Berkeley residents to predict the which one of Barbie and Oppenheimer would perform better on their opening day, July 21st.\nFirst, let’s grab a dataset that has every single resident in Berkeley (this is a fake dataset) and which movie they actually watched on July 21st.\nLet’s load in the movie.csv table. We can assume that:\n\nis_male is a boolean that indicates if a resident identifies as male.\nThere are only two movies they can watch on July 21st: Barbie and Oppenheimer.\nEvery resident watches a movie (either Barbie or Oppenheimer) on July 21st.\n\n\n\nCode\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport pandas as pd\nimport seaborn as sns\n\nsns.set_theme(style='darkgrid', font_scale = 1.5,\n              rc={'figure.figsize':(7,5)})\n\nrng = np.random.default_rng()\n\n\n\nmovie = pd.read_csv(\"data/movie.csv\")\n\n# create a 1/0 int that indicates Barbie vote\nmovie['barbie'] = (movie['movie'] == 'Barbie').astype(int)\nmovie.head()\n\n\n\n\n\n\n\n\nage\nis_male\nmovie\nbarbie\n\n\n\n\n0\n35\nFalse\nBarbie\n1\n\n\n1\n42\nTrue\nOppenheimer\n0\n\n\n2\n55\nFalse\nBarbie\n1\n\n\n3\n77\nTrue\nOppenheimer\n0\n\n\n4\n31\nFalse\nBarbie\n1\n\n\n\n\n\n\n\nWhat fraction of Berkeley residents chose Barbie?\n\nactual_barbie = np.mean(movie[\"barbie\"])\nactual_barbie\n\nnp.float64(0.5302792307692308)\n\n\nThis is the actual outcome of the competition. Based on this result, Barbie would win. How did our sample of retirees do?\n\n9.3.3.1 Convenience Sample: Retirees\nLet’s take a convenience sample of people who have retired (&gt;= 65 years old). What proportion of them went to see Barbie instead of Oppenheimer?\n\nconvenience_sample = movie[movie['age'] &gt;= 65] # take a convenience sample of retirees\nnp.mean(convenience_sample[\"barbie\"]) # what proportion of them saw Barbie? \n\nnp.float64(0.3744755089093924)\n\n\nBased on this result, we would have predicted that Oppenheimer would win! What happened? Is it possible that our sample is too small or noisy?\n\n# what's the size of our sample? \nlen(convenience_sample)\n\n359396\n\n\n\n# what proportion of our data is in the convenience sample? \nlen(convenience_sample)/len(movie)\n\n0.27645846153846154\n\n\nSeems like our sample is rather large (roughly 360,000 people), so the error is likely not due to solely to chance.\n\n\n9.3.3.2 Check for Bias\nLet us aggregate all choices by age and visualize the fraction of Barbie views, split by gender.\n\nvotes_by_barbie = movie.groupby([\"age\",\"is_male\"]).agg(\"mean\", numeric_only=True).reset_index()\nvotes_by_barbie.head()\n\n\n\n\n\n\n\n\nage\nis_male\nbarbie\n\n\n\n\n0\n18\nFalse\n0.819594\n\n\n1\n18\nTrue\n0.667001\n\n\n2\n19\nFalse\n0.812214\n\n\n3\n19\nTrue\n0.661252\n\n\n4\n20\nFalse\n0.805281\n\n\n\n\n\n\n\n\n\nCode\n# A common matplotlib/seaborn pattern: create the figure and axes object, pass ax\n# to seaborn for drawing into, and later fine-tune the figure via ax.\nfig, ax = plt.subplots();\n\nred_blue = [\"#bf1518\", \"#397eb7\"]\nwith sns.color_palette(red_blue):\n    sns.pointplot(data=votes_by_barbie, x = \"age\", y = \"barbie\", hue = \"is_male\", ax=ax)\n\nnew_ticks = [i.get_text() for i in ax.get_xticklabels()]\nax.set_xticks(range(0, len(new_ticks), 10), new_ticks[::10])\nax.set_title(\"Preferences by Demographics\");\n\n\n\n\n\n\n\n\n\n\nWe see that retirees (in Berkeley) tend to watch Oppenheimer.\nWe also see that residents who identify as non-male tend to prefer Barbie.\n\n\n\n9.3.3.3 Simple Random Sample\nSuppose we took a simple random sample (SRS) of the same size as our retiree sample:\n\nn = len(convenience_sample)\nrandom_sample = movie.sample(n, replace = False) ## By default, replace = False\nnp.mean(random_sample[\"barbie\"])\n\nnp.float64(0.5302952731805585)\n\n\nThis is very close to the actual vote of 0.5302792307692308!\nIt turns out that we can get similar results with a much smaller sample size, say, 800:\n\nn = 800\nrandom_sample = movie.sample(n, replace = False)\n\n# Compute the sample average and the resulting relative error\nsample_barbie = np.mean(random_sample[\"barbie\"])\nerr = abs(sample_barbie-actual_barbie)/actual_barbie\n\n# We can print output with Markdown formatting too...\nfrom IPython.display import Markdown\nMarkdown(f\"**Actual** = {actual_barbie:.4f}, **Sample** = {sample_barbie:.4f}, \"\n         f\"**Err** = {100*err:.2f}%.\")\n\nActual = 0.5303, Sample = 0.5138, Err = 3.12%.\n\n\nWe’ll learn how to choose this number when we (re)learn the Central Limit Theorem later in the semester.\n\n\n9.3.3.4 Quantifying Chance Error\nIn our SRS of size 800, what would be our chance error?\nLet’s simulate 1000 versions of taking the 800-sized SRS from before:\n\nnrep = 1000   # number of simulations\nn = 800       # size of our sample\npoll_result = []\nfor i in range(0, nrep):\n    random_sample = movie.sample(n, replace = False)\n    poll_result.append(np.mean(random_sample[\"barbie\"]))\n\n\n\nCode\nfig, ax = plt.subplots()\nsns.histplot(poll_result, stat='density', ax=ax)\nax.axvline(actual_barbie, color=\"orange\", lw=4);\n\n\n/Users/nikhilreddy/course-notes/ds100env/lib/python3.12/site-packages/seaborn/_oldcore.py:1119: FutureWarning:\n\nuse_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.\n\n\n\n\n\n\n\n\n\n\nWhat fraction of these simulated samples would have predicted Barbie?\n\npoll_result = pd.Series(poll_result)\nnp.sum(poll_result &gt; 0.5)/1000\n\nnp.float64(0.948)\n\n\nYou can see the curve looks roughly Gaussian/normal. Using KDE:\n\n\nCode\nsns.histplot(poll_result, stat='density', kde=True);\n\n\n/Users/nikhilreddy/course-notes/ds100env/lib/python3.12/site-packages/seaborn/_oldcore.py:1119: FutureWarning:\n\nuse_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.",
+    "text": "9.3 Probability Samples\nWhen sampling, it is essential to focus on the quality of the sample rather than the quantity of the sample. A huge sample size does not fix a bad sampling method. Our main goal is to gather a sample that is representative of the population it came from. In this section, we’ll explore the different types of sampling and their pros and cons.\nA convenience sample is whatever you can get ahold of; this type of sampling is non-random. Note that haphazard sampling is not necessarily random sampling; there are many potential sources of bias.\nIn a probability sample, we provide the chance that any specified set of individuals will be in the sample (individuals in the population can have different chances of being selected; they don’t all have to be uniform), and we sample at random based off this known chance. For this reason, probability samples are also called random samples. The randomness provides a few benefits:\n\nBecause we know the source probabilities, we can measure the errors.\nSampling at random gives us a more representative sample of the population, which reduces bias. (Note: this is only the case when the probability distribution we’re sampling from is accurate. Random samples using “bad” or inaccurate distributions can produce biased estimates of population quantities.)\nProbability samples allow us to estimate the bias and chance error, which helps us quantify uncertainty (more in a future lecture).\n\nThe real world is usually more complicated, and we often don’t know the initial probabilities. For example, we do not generally know the probability that a given bacterium is in a microbiome sample or whether people will answer when Gallup calls landlines. That being said, still we try to model probability sampling to the best of our ability even when the sampling or measurement process is not fully under our control.\nA few common random sampling schemes:\n\nA uniform random sample with replacement is a sample drawn uniformly at random with replacement.\n\nRandom doesn’t always mean “uniformly at random,” but in this specific context, it does.\nSome individuals in the population might get picked more than once.\n\nA simple random sample (SRS) is a sample drawn uniformly at random without replacement.\n\nEvery individual (and subset of individuals) has the same chance of being selected from the sampling frame.\nEvery pair has the same chance as every other pair.\nEvery triple has the same chance as every other triple.\nAnd so on.\n\nA stratified random sample, where random sampling is performed on strata (specific groups), and the groups together compose a sample.\n\n\n9.3.1 Example Scheme 1: Probability Sample\nSuppose we have 3 TA’s (Arman, Boyu, Charlie): I decide to sample 2 of them as follows:\n\nI choose A with probability 1.0\nI choose either B or C, each with a probability of 0.5.\n\nWe can list all the possible outcomes and their respective probabilities in a table:\n\n\n\nOutcome\nProbability\n\n\n\n\n{A, B}\n0.5\n\n\n{A, C}\n0.5\n\n\n{B, C}\n0\n\n\n\nThis is a probability sample (though not a great one). Of the 3 people in my population, I know the chance of getting each subset. Suppose I’m measuring the average distance TAs live from campus.\n\nThis scheme does not see the entire population!\nMy estimate using the single sample I take has some chance error depending on if I see AB or AC.\nThis scheme is biased towards A’s response.\n\n\n\n9.3.2 Example Scheme 2: Simple Random Sample\nConsider the following sampling scheme:\n\nA class roster has 1100 students listed alphabetically.\nPick one of the first 10 students on the list at random (e.g. Student 8).\nTo create your sample, take that student and every 10th student listed after that (e.g. Students 8, 18, 28, 38, etc.).\n\n\n\nIs this a probability sample?\n\nYes. For a sample [n, n + 10, n + 20, …, n + 1090], where 1 &lt;= n &lt;= 10, the probability of that sample is 1/10. Otherwise, the probability is 0.\nOnly 10 possible samples!\n\n\n\nDoes each student have the same probability of being selected?\n\nYes. Each student is chosen with a probability of 1/10.\n\n\n\nIs this a simple random sample?\n\nNo. The chance of selecting (8, 18) is 1/10; the chance of selecting (8, 9) is 0.\n\n\n\n9.3.3 Demo: Barbie v. Oppenheimer\nWe are trying to collect a sample from Berkeley residents to predict the which one of Barbie and Oppenheimer would perform better on their opening day, July 21st.\nFirst, let’s grab a dataset that has every single resident in Berkeley (this is a fake dataset) and which movie they actually watched on July 21st.\nLet’s load in the movie.csv table. We can assume that:\n\nis_male is a boolean that indicates if a resident identifies as male.\nThere are only two movies they can watch on July 21st: Barbie and Oppenheimer.\nEvery resident watches a movie (either Barbie or Oppenheimer) on July 21st.\n\n\n\nCode\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport pandas as pd\nimport seaborn as sns\n\nsns.set_theme(style='darkgrid', font_scale = 1.5,\n              rc={'figure.figsize':(7,5)})\n\nrng = np.random.default_rng()\n\n\n\nmovie = pd.read_csv(\"data/movie.csv\")\n\n# create a 1/0 int that indicates Barbie vote\nmovie['barbie'] = (movie['movie'] == 'Barbie').astype(int)\nmovie.head()\n\n\n\n\n\n\n\n\nage\nis_male\nmovie\nbarbie\n\n\n\n\n0\n35\nFalse\nBarbie\n1\n\n\n1\n42\nTrue\nOppenheimer\n0\n\n\n2\n55\nFalse\nBarbie\n1\n\n\n3\n77\nTrue\nOppenheimer\n0\n\n\n4\n31\nFalse\nBarbie\n1\n\n\n\n\n\n\n\nWhat fraction of Berkeley residents chose Barbie?\n\nactual_barbie = np.mean(movie[\"barbie\"])\nactual_barbie\n\nnp.float64(0.5302792307692308)\n\n\nThis is the actual outcome of the competition. Based on this result, Barbie would win. How did our sample of retirees do?\n\n9.3.3.1 Convenience Sample: Retirees\nLet’s take a convenience sample of people who have retired (&gt;= 65 years old). What proportion of them went to see Barbie instead of Oppenheimer?\n\nconvenience_sample = movie[movie['age'] &gt;= 65] # take a convenience sample of retirees\nnp.mean(convenience_sample[\"barbie\"]) # what proportion of them saw Barbie? \n\nnp.float64(0.3744755089093924)\n\n\nBased on this result, we would have predicted that Oppenheimer would win! What happened? Is it possible that our sample is too small or noisy?\n\n# what's the size of our sample? \nlen(convenience_sample)\n\n359396\n\n\n\n# what proportion of our data is in the convenience sample? \nlen(convenience_sample)/len(movie)\n\n0.27645846153846154\n\n\nSeems like our sample is rather large (roughly 360,000 people), so the error is likely not due to solely to chance.\n\n\n9.3.3.2 Check for Bias\nLet us aggregate all choices by age and visualize the fraction of Barbie views, split by gender.\n\nvotes_by_barbie = movie.groupby([\"age\",\"is_male\"]).agg(\"mean\", numeric_only=True).reset_index()\nvotes_by_barbie.head()\n\n\n\n\n\n\n\n\nage\nis_male\nbarbie\n\n\n\n\n0\n18\nFalse\n0.819594\n\n\n1\n18\nTrue\n0.667001\n\n\n2\n19\nFalse\n0.812214\n\n\n3\n19\nTrue\n0.661252\n\n\n4\n20\nFalse\n0.805281\n\n\n\n\n\n\n\n\n\nCode\n# A common matplotlib/seaborn pattern: create the figure and axes object, pass ax\n# to seaborn for drawing into, and later fine-tune the figure via ax.\nfig, ax = plt.subplots();\n\nred_blue = [\"#bf1518\", \"#397eb7\"]\nwith sns.color_palette(red_blue):\n    sns.pointplot(data=votes_by_barbie, x = \"age\", y = \"barbie\", hue = \"is_male\", ax=ax)\n\nnew_ticks = [i.get_text() for i in ax.get_xticklabels()]\nax.set_xticks(range(0, len(new_ticks), 10), new_ticks[::10])\nax.set_title(\"Preferences by Demographics\");\n\n\n\n\n\n\n\n\n\n\nWe see that retirees (in Berkeley) tend to watch Oppenheimer.\nWe also see that residents who identify as non-male tend to prefer Barbie.\n\n\n\n9.3.3.3 Simple Random Sample\nSuppose we took a simple random sample (SRS) of the same size as our retiree sample:\n\nn = len(convenience_sample)\nrandom_sample = movie.sample(n, replace = False) ## By default, replace = False\nnp.mean(random_sample[\"barbie\"])\n\nnp.float64(0.5292351612149273)\n\n\nThis is very close to the actual vote of 0.5302792307692308!\nIt turns out that we can get similar results with a much smaller sample size, say, 800:\n\nn = 800\nrandom_sample = movie.sample(n, replace = False)\n\n# Compute the sample average and the resulting relative error\nsample_barbie = np.mean(random_sample[\"barbie\"])\nerr = abs(sample_barbie-actual_barbie)/actual_barbie\n\n# We can print output with Markdown formatting too...\nfrom IPython.display import Markdown\nMarkdown(f\"**Actual** = {actual_barbie:.4f}, **Sample** = {sample_barbie:.4f}, \"\n         f\"**Err** = {100*err:.2f}%.\")\n\nActual = 0.5303, Sample = 0.5275, Err = 0.52%.\n\n\nWe’ll learn how to choose this number when we (re)learn the Central Limit Theorem later in the semester.\n\n\n9.3.3.4 Quantifying Chance Error\nIn our SRS of size 800, what would be our chance error?\nLet’s simulate 1000 versions of taking the 800-sized SRS from before:\n\nnrep = 1000   # number of simulations\nn = 800       # size of our sample\npoll_result = []\nfor i in range(0, nrep):\n    random_sample = movie.sample(n, replace = False)\n    poll_result.append(np.mean(random_sample[\"barbie\"]))\n\n\n\nCode\nfig, ax = plt.subplots()\nsns.histplot(poll_result, stat='density', ax=ax)\nax.axvline(actual_barbie, color=\"orange\", lw=4);\n\n\n/Users/nikhilreddy/course-notes/ds100env/lib/python3.12/site-packages/seaborn/_oldcore.py:1119: FutureWarning:\n\nuse_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.\n\n\n\n\n\n\n\n\n\n\nWhat fraction of these simulated samples would have predicted Barbie?\n\npoll_result = pd.Series(poll_result)\nnp.sum(poll_result &gt; 0.5)/1000\n\nnp.float64(0.963)\n\n\nYou can see the curve looks roughly Gaussian/normal. Using KDE:\n\n\nCode\nsns.histplot(poll_result, stat='density', kde=True);\n\n\n/Users/nikhilreddy/course-notes/ds100env/lib/python3.12/site-packages/seaborn/_oldcore.py:1119: FutureWarning:\n\nuse_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.",
     "crumbs": [
       "<span class='chapter-number'>9</span>  <span class='chapter-title'>Sampling</span>"
     ]
@@ -778,5 +778,35 @@
     "crumbs": [
       "<span class='chapter-number'>12</span>  <span class='chapter-title'>Ordinary Least Squares</span>"
     ]
+  },
+  {
+    "objectID": "gradient_descent/gradient_descent.html",
+    "href": "gradient_descent/gradient_descent.html",
+    "title": "13  sklearn and Gradient Descent",
+    "section": "",
+    "text": "13.1 sklearn",
+    "crumbs": [
+      "<span class='chapter-number'>13</span>  <span class='chapter-title'>sklearn and Gradient Descent</span>"
+    ]
+  },
+  {
+    "objectID": "gradient_descent/gradient_descent.html#sklearn",
+    "href": "gradient_descent/gradient_descent.html#sklearn",
+    "title": "13  sklearn and Gradient Descent",
+    "section": "",
+    "text": "13.1.1 Implementing Derived Formulas in Code\nThroughout this lecture, we’ll refer to the penguins dataset.\n\n\nCode\nimport pandas as pd\nimport seaborn as sns\nimport numpy as np\n\npenguins = sns.load_dataset(\"penguins\")\npenguins = penguins[penguins[\"species\"] == \"Adelie\"].dropna()\npenguins.head()\n\n\n\n\n\n\n\n\n\nspecies\nisland\nbill_length_mm\nbill_depth_mm\nflipper_length_mm\nbody_mass_g\nsex\n\n\n\n\n0\nAdelie\nTorgersen\n39.1\n18.7\n181.0\n3750.0\nMale\n\n\n1\nAdelie\nTorgersen\n39.5\n17.4\n186.0\n3800.0\nFemale\n\n\n2\nAdelie\nTorgersen\n40.3\n18.0\n195.0\n3250.0\nFemale\n\n\n4\nAdelie\nTorgersen\n36.7\n19.3\n193.0\n3450.0\nFemale\n\n\n5\nAdelie\nTorgersen\n39.3\n20.6\n190.0\n3650.0\nMale\n\n\n\n\n\n\n\nOur goal will be to predict the value of the \"bill_depth_mm\" for a particular penguin given its \"flipper_length_mm\" and \"body_mass_g\". We’ll also add a bias column of all ones to represent the intercept term of our models.\n\n# Add a bias column of all ones to `penguins`\npenguins[\"bias\"] = np.ones(len(penguins), dtype=int) \n\n# Define the design matrix, X...\n# Note that we use .to_numpy() to convert our DataFrame into a NumPy array so it is in Matrix form\nX = penguins[[\"bias\", \"flipper_length_mm\", \"body_mass_g\"]].to_numpy()\n\n# ...as well as the target variable, Y\n# Again, we use .to_numpy() to convert our DataFrame into a NumPy array so it is in Matrix form\nY = penguins[[\"bill_depth_mm\"]].to_numpy()\n\nIn the lecture on ordinary least squares, we expressed multiple linear regression using matrix notation.\n\\[\\hat{\\mathbb{Y}} = \\mathbb{X}\\theta\\]\nWe used a geometric approach to derive the following expression for the optimal model parameters:\n\\[\\hat{\\theta} = (\\mathbb{X}^T \\mathbb{X})^{-1}\\mathbb{X}^T \\mathbb{Y}\\]\nThat’s a whole lot of matrix manipulation. How do we implement it in python?\nThere are three operations we need to perform here: multiplying matrices, taking transposes, and finding inverses.\n\nTo perform matrix multiplication, use the @ operator\nTo take a transpose, call the .T attribute of an NumPy array or DataFrame\nTo compute an inverse, use NumPy’s in-built method np.linalg.inv\n\nPutting this all together, we can compute the OLS estimate for the optimal model parameters, stored in the array theta_hat.\n\ntheta_hat = np.linalg.inv(X.T @ X) @ X.T @ Y\ntheta_hat\n\narray([[1.10029953e+01],\n       [9.82848689e-03],\n       [1.47749591e-03]])\n\n\nTo make predictions using our optimized parameter values, we matrix-multiply the design matrix with the parameter vector:\n\\[\\hat{\\mathbb{Y}} = \\mathbb{X}\\theta\\]\n\nY_hat = X @ theta_hat\npd.DataFrame(Y_hat).head()\n\n\n\n\n\n\n\n\n0\n\n\n\n\n0\n18.322561\n\n\n1\n18.445578\n\n\n2\n17.721412\n\n\n3\n17.997254\n\n\n4\n18.263268\n\n\n\n\n\n\n\n\n\n13.1.2 The sklearn Workflow\nWe’ve already saved a lot of time (and avoided tedious calculations) by translating our derived formulas into code. However, we still had to go through the process of writing out the linear algebra ourselves.\nTo make life even easier, we can turn to the sklearn python library. sklearn is a robust library of machine learning tools used extensively in research and industry. It is the standard for simple machine learning tasks and gives us a wide variety of in-built modeling frameworks and methods, so we’ll keep returning to sklearn techniques as we progress through Data 100.\nRegardless of the specific type of model being implemented, sklearn follows a standard set of steps for creating a model:\n\nImport the LinearRegression model from sklearn\nfrom sklearn.linear_model import LinearRegression\nCreate a model object. This generates a new instance of the model class. You can think of it as making a new “copy” of a standard “template” for a model. In code, this looks like:\nmy_model = LinearRegression()\nFit the model to the X design matrix and Y target vector. This calculates the optimal model parameters “behind the scenes” without us explicitly working through the calculations ourselves. The fitted parameters are then stored within the model for use in future predictions:\nmy_model.fit(X, Y)\nUse the fitted model to make predictions on the X input data using .predict.\nmy_model.predict(X)\n\nTo extract the fitted parameters, we can use:\nmy_model.coef_\n\nmy_model.intercept_\nLet’s put this into action with our multiple regression task!\n1. Initialize an instance of the model class\nsklearn stores “templates” of useful models for machine learning. We begin the modeling process by making a “copy” of one of these templates for our own use. Model initialization looks like ModelClass(), where ModelClass is the type of model we wish to create.\nFor now, let’s create a linear regression model using LinearRegression.\nmy_model is now an instance of the LinearRegression class. You can think of it as the “idea” of a linear regression model. We haven’t trained it yet, so it doesn’t know any model parameters and cannot be used to make predictions. In fact, we haven’t even told it what data to use for modeling! It simply waits for further instructions.\n\nmy_model = LinearRegression()\n\n2. Train the model using .fit\nBefore the model can make predictions, we will need to fit it to our training data. When we fit the model, sklearn will run gradient descent behind the scenes to determine the optimal model parameters. It will then save these model parameters to our model instance for future use.\nAll sklearn model classes include a .fit method, which is used to fit the model. It takes in two inputs: the design matrix, X, and the target variable, Y.\nLet’s start by fitting a model with just one feature: the flipper length. We create a design matrix X by pulling out the \"flipper_length_mm\" column from the DataFrame.\n\n# .fit expects a 2D data design matrix, so we use double brackets to extract a DataFrame\nX = penguins[[\"flipper_length_mm\"]]\nY = penguins[\"bill_depth_mm\"]\n\nmy_model.fit(X, Y)\n\nLinearRegression()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.  LinearRegression?Documentation for LinearRegressioniFittedLinearRegression() \n\n\nNotice that we use double brackets to extract this column. Why double brackets instead of just single brackets? The .fit method, by default, expects to receive 2-dimensional data – some kind of data that includes both rows and columns. Writing penguins[\"flipper_length_mm\"] would return a 1D Series, causing sklearn to error. We avoid this by writing penguins[[\"flipper_length_mm\"]] to produce a 2D DataFrame.\nAnd in just three lines of code, our model has run gradient descent to determine the optimal model parameters! Our single-feature model takes the form:\n\\[\\text{bill depth} = \\theta_0 + \\theta_1 \\text{flipper length}\\]\nNote that LinearRegression will automatically include an intercept term.\nThe fitted model parameters are stored as attributes of the model instance. my_model.intercept_ will return the value of \\(\\hat{\\theta}_0\\) as a scalar. my_model.coef_ will return all values \\(\\hat{\\theta}_1,\n\\hat{\\theta}_1, ...\\) in an array. Because our model only contains one feature, we see just the value of \\(\\hat{\\theta}_1\\) in the cell below.\n\n# The intercept term, theta_0\nmy_model.intercept_\n\nnp.float64(7.297305899612313)\n\n\n\n# All parameters theta_1, ..., theta_p\nmy_model.coef_\n\narray([0.05812622])\n\n\n3. Use the fitted model to make predictions\nNow that the model has been trained, we can use it to make predictions! To do so, we use the .predict method. .predict takes in one argument: the design matrix that should be used to generate predictions. To understand how the model performs on the training set, we would pass in the training data. Alternatively, to make predictions on unseen data, we would pass in a new dataset that wasn’t used to train the model.\nBelow, we call .predict to generate model predictions on the original training data. As before, we use double brackets to ensure that we extract 2-dimensional data.\n\nY_hat_one_feature = my_model.predict(penguins[[\"flipper_length_mm\"]])\n\nprint(f\"The RMSE of the model is {np.sqrt(np.mean((Y-Y_hat_one_feature)**2))}\")\n\nThe RMSE of the model is 1.154936309923901\n\n\nWhat if we wanted a model with two features?\n\\[\\text{bill depth} = \\theta_0 + \\theta_1 \\text{flipper length} + \\theta_2 \\text{body mass}\\]\nWe repeat this three-step process by intializing a new model object, then calling .fit and .predict as before.\n\n# Step 1: initialize LinearRegression model\ntwo_feature_model = LinearRegression()\n\n# Step 2: fit the model\nX_two_features = penguins[[\"flipper_length_mm\", \"body_mass_g\"]]\nY = penguins[\"bill_depth_mm\"]\n\ntwo_feature_model.fit(X_two_features, Y)\n\n# Step 3: make predictions\nY_hat_two_features = two_feature_model.predict(X_two_features)\n\nprint(f\"The RMSE of the model is {np.sqrt(np.mean((Y-Y_hat_two_features)**2))}\")\n\nThe RMSE of the model is 0.9881331104079043\n\n\nWe can also see that we obtain the same predictions using sklearn as we did when applying the ordinary least squares formula before!\n\n\nCode\npd.DataFrame({\"Y_hat from OLS\":np.squeeze(Y_hat), \"Y_hat from sklearn\":Y_hat_two_features}).head()\n\n\n\n\n\n\n\n\n\nY_hat from OLS\nY_hat from sklearn\n\n\n\n\n0\n18.322561\n18.322561\n\n\n1\n18.445578\n18.445578\n\n\n2\n17.721412\n17.721412\n\n\n3\n17.997254\n17.997254\n\n\n4\n18.263268\n18.263268",
+    "crumbs": [
+      "<span class='chapter-number'>13</span>  <span class='chapter-title'>sklearn and Gradient Descent</span>"
+    ]
+  },
+  {
+    "objectID": "gradient_descent/gradient_descent.html#gradient-descent",
+    "href": "gradient_descent/gradient_descent.html#gradient-descent",
+    "title": "13  sklearn and Gradient Descent",
+    "section": "13.2 Gradient Descent",
+    "text": "13.2 Gradient Descent\nAt this point, we’ve grown quite familiar with the process of choosing a model and a corresponding loss function and optimizing parameters by choosing the values of \\(\\theta\\) that minimize the loss function. So far, we’ve optimized \\(\\theta\\) by\n\nUsing calculus to take the derivative of the loss function with respect to \\(\\theta\\), setting it equal to 0, and solving for \\(\\theta\\).\nUsing the geometric argument of orthogonality to derive the OLS solution \\(\\hat{\\theta} = (\\mathbb{X}^T \\mathbb{X})^{-1}\\mathbb{X}^T \\mathbb{Y}\\).\n\nOne thing to note, however, is that the techniques we used above can only be applied if we make some big assumptions. For the calculus approach, we assumed that the loss function was differentiable at all points and that we could algebraically solve for the zero points of the derivative; for the geometric approach, OLS only applies when using a linear model with MSE loss. What happens when we have more complex models with different, more complex loss functions? The techniques we’ve learned so far will not work, so we need a new optimization technique: gradient descent.\n\nBIG IDEA: use an iterative algorithm to numerically compute the minimum of the loss.\n\n\n13.2.1 Minimizing an Arbitrary 1D Function\nLet’s consider an arbitrary function. Our goal is to find the value of \\(x\\) that minimizes this function.\n\ndef arbitrary(x):\n    return (x**4 - 15*x**3 + 80*x**2 - 180*x + 144)/10\n\n\n\n13.2.1.1 The Naive Approach: Guess and Check\nAbove, we saw that the minimum is somewhere around 5.3. Let’s see if we can figure out how to find the exact minimum algorithmically from scratch. One very slow (and terrible) way would be manual guess-and-check.\n\narbitrary(6)\n\n0.0\n\n\nA somewhat better (but still slow) approach is to use brute force to try out a bunch of x values and return the one that yields the lowest loss.\n\ndef simple_minimize(f, xs):\n    # Takes in a function f and a set of values xs. \n    # Calculates the value of the function f at all values x in xs\n    # Takes the minimum value of f(x) and returns the corresponding value x \n    y = [f(x) for x in xs]  \n    return xs[np.argmin(y)]\n\nguesses = [5.3, 5.31, 5.32, 5.33, 5.34, 5.35]\nsimple_minimize(arbitrary, guesses)\n\n5.33\n\n\nThis process is essentially the same as before where we made a graphical plot, it’s just that we’re only looking at 20 selected points.\n\n\nCode\nxs = np.linspace(1, 7, 200)\nsparse_xs = np.linspace(1, 7, 5)\n\nys = arbitrary(xs)\nsparse_ys = arbitrary(sparse_xs)\n\nfig = px.line(x = xs, y = arbitrary(xs))\nfig.add_scatter(x = sparse_xs, y = arbitrary(sparse_xs), mode = \"markers\")\nfig.update_layout(showlegend= False)\nfig.update_layout(autosize=False, width=800, height=600)\nfig.show()\n\n\n                                                \n\n\nThis basic approach suffers from three major flaws:\n\nIf the minimum is outside our range of guesses, the answer will be completely wrong.\nEven if our range of guesses is correct, if the guesses are too coarse, our answer will be inaccurate.\nIt is very computationally inefficient, considering potentially vast numbers of guesses that are useless.\n\n\n\n13.2.1.2 Scipy.optimize.minimize\nOne way to minimize this mathematical function is to use the scipy.optimize.minimize function. It takes a function and a starting guess and tries to find the minimum.\n\nfrom scipy.optimize import minimize\n\n# takes a function f and a starting point x0 and returns a readout \n# with the optimal input value of x which minimizes f\nminimize(arbitrary, x0 = 3.5)\n\n  message: Optimization terminated successfully.\n  success: True\n   status: 0\n      fun: -0.13827491292966557\n        x: [ 2.393e+00]\n      nit: 3\n      jac: [ 6.486e-06]\n hess_inv: [[ 7.385e-01]]\n     nfev: 20\n     njev: 10\n\n\nscipy.optimize.minimize is great. It may also seem a bit magical. How could you write a function that can find the minimum of any mathematical function? There are a number of ways to do this, which we’ll explore in today’s lecture, eventually arriving at the important idea of gradient descent, which is the principle that scipy.optimize.minimize uses.\nIt turns out that under the hood, the fit method for LinearRegression models uses gradient descent. Gradient descent is also how much of machine learning works, including even advanced neural network models.\nIn Data 100, the gradient descent process will usually be invisible to us, hidden beneath an abstraction layer. However, to be good data scientists, it’s important that we know the underlying principles that optimization functions harness to find optimal parameters.\n\n\n13.2.1.3 Digging into Gradient Descent\nLooking at the function across this domain, it is clear that the function’s minimum value occurs around \\(\\theta = 5.3\\). Let’s pretend for a moment that we couldn’t see the full view of the cost function. How would we guess the value of \\(\\theta\\) that minimizes the function?\nIt turns out that the first derivative of the function can give us a clue. In the graph below, the function and its derivative are plotted, with points where the derivative is equal to 0 plotted in light green.\n\n\nCode\nimport plotly.graph_objects as go\n\ndef derivative_arbitrary(x):\n    return (4*x**3 - 45*x**2 + 160*x - 180)/10\n\nfig = go.Figure()\nroots = np.array([2.3927, 3.5309, 5.3263])\n\nfig.add_trace(go.Scatter(x = xs, y = arbitrary(xs), \n                         mode = \"lines\", name = \"f\"))\nfig.add_trace(go.Scatter(x = xs, y = derivative_arbitrary(xs), \n                         mode = \"lines\", name = \"df\", line = {\"dash\": \"dash\"}))\nfig.add_trace(go.Scatter(x = np.array(roots), y = 0*roots, \n                         mode = \"markers\", name = \"df = zero\", marker_size = 12))\nfig.update_layout(font_size = 20, yaxis_range=[-1, 3])\nfig.update_layout(autosize=False, width=800, height=600)\nfig.show()\n\n\n                                                \n\n\nIn the plots below, the line indicates the value of the derivative of each value of \\(\\theta\\). The derivative is negative where it is red and positive where it is green.\nSay we make a guess for the minimizing value of \\(\\theta\\). Remember that we read plots from left to right, and assume that our starting \\(\\theta\\) value is to the left of the optimal \\(\\hat{\\theta}\\). If the guess “undershoots” the true minimizing value – our guess for \\(\\theta\\) is lower than the value of the \\(\\hat{\\theta}\\) that minimizes the function – the derivative will be negative. This means that if we increase \\(\\theta\\) (move further to the right), then we can decrease our loss function further. If this guess “overshoots” the true minimizing value, the derivative will be positive, implying the converse.\n\n\n\n\n\n\n\n\n\nWe can use this pattern to help formulate our next guess for the optimal \\(\\hat{\\theta}\\). Consider the case where we’ve undershot \\(\\theta\\) by guessing too low of a value. We’ll want our next guess to be greater in value than our previous guess – that is, we want to shift our guess to the right. You can think of this as following the slope “downhill” to the function’s minimum value.\n\n\n\n\n\n\n\n\n\nIf we’ve overshot \\(\\hat{\\theta}\\) by guessing too high of a value, we’ll want our next guess to be lower in value – we want to shift our guess for \\(\\hat{\\theta}\\) to the left.\n\n\n\n\n\n\n\n\n\nIn other words, the derivative of the function at each point tells us the direction of our next guess.\n\nA negative slope means we want to step to the right, or move in the positive direction.\nA positive slope means we want to step to the left, or move in the negative direction.\n\n\n\n13.2.1.4 Algorithm Attempt 1\nArmed with this knowledge, let’s try to see if we can use the derivative to optimize the function.\nWe start by making some guess for the minimizing value of \\(x\\). Then, we look at the derivative of the function at this value of \\(x\\), and step downhill in the opposite direction. We can express our new rule as a recurrence relation:\n\\[x^{(t+1)} = x^{(t)} - \\frac{d}{dx} f(x^{(t)})\\]\nTranslating this statement into English: we obtain our next guess for the minimizing value of \\(x\\) at timestep \\(t+1\\) (\\(x^{(t+1)}\\)) by taking our last guess (\\(x^{(t)}\\)) and subtracting the derivative of the function at that point (\\(\\frac{d}{dx} f(x^{(t)})\\)).\nA few steps are shown below, where the old step is shown as a transparent point, and the next step taken is the green-filled dot.\n\n\n\n\n\n\n\n\n\nLooking pretty good! We do have a problem though – once we arrive close to the minimum value of the function, our guesses “bounce” back and forth past the minimum without ever reaching it.\n\n\n\n\n\n\n\n\n\nIn other words, each step we take when updating our guess moves us too far. We can address this by decreasing the size of each step.\n\n\n13.2.1.5 Algorithm Attempt 2\nLet’s update our algorithm to use a learning rate (also sometimes called the step size), which controls how far we move with each update. We represent the learning rate with \\(\\alpha\\).\n\\[x^{(t+1)} = x^{(t)} - \\alpha \\frac{d}{dx} f(x^{(t)})\\]\nA small \\(\\alpha\\) means that we will take small steps; a large \\(\\alpha\\) means we will take large steps. When do we stop updating? We stop updating either after a fixed number of updates or after a subsequent update doesn’t change much.\nUpdating our function to use \\(\\alpha=0.3\\), our algorithm successfully converges (settles on a solution and stops updating significantly, or at all) on the minimum value.\n\n\n\n\n\n\n\n\n\n\n\n\n13.2.2 Convexity\nIn our analysis above, we focused our attention on the global minimum of the loss function. You may be wondering: what about the local minimum that’s just to the left?\nIf we had chosen a different starting guess for \\(\\theta\\), or a different value for the learning rate \\(\\alpha\\), our algorithm may have gotten “stuck” and converged on the local minimum, rather than on the true optimum value of loss.\n\n\n\n\n\n\n\n\n\nIf the loss function is convex, gradient descent is guaranteed to converge and find the global minimum of the objective function. Formally, a function \\(f\\) is convex if: \\[tf(a) + (1-t)f(b) \\geq f(ta + (1-t)b)\\] for all \\(a, b\\) in the domain of \\(f\\) and \\(t \\in [0, 1]\\).\nTo put this into words: if you drew a line between any two points on the curve, all values on the curve must be on or below the line. Importantly, any local minimum of a convex function is also its global minimum so we avoid the situation where the algorithm converges on some critical point that is not the minimum of the function.\n\n\n\n\n\n\n\n\n\nIn summary, non-convex loss functions can cause problems with optimization. This means that our choice of loss function is a key factor in our modeling process. It turns out that MSE is convex, which is a major reason why it is such a popular choice of loss function. Gradient descent is only guaranteed to converge (given enough iterations and an appropriate step size) for convex functions.\n\n\n13.2.3 Gradient Descent in 1 Dimension\n\nTerminology clarification: In past lectures, we have used “loss” to refer to the error incurred on a single datapoint. In applications, we usually care more about the average error across all datapoints. Going forward, we will take the “model’s loss” to mean the model’s average error across the dataset. This is sometimes also known as the empirical risk (R), cost function, or objective function. \\[L(\\theta) = R(\\theta) = \\frac{1}{n} \\sum_{i=1}^{n} L(y, \\hat{y})\\]\n\nIn our discussion above, we worked with some arbitrary function \\(f\\). As data scientists, we will almost always work with gradient descent in the context of optimizing models – specifically, we want to apply gradient descent to find the minimum of a loss function. In a modeling context, our goal is to minimize a loss function by choosing the minimizing model parameters.\nRecall our modeling workflow from the past few lectures:\n\nDefine a model with some parameters \\(\\theta_i\\)\nChoose a loss function\nSelect the values of \\(\\theta_i\\) that minimize the loss function on the data\n\nGradient descent is a powerful technique for completing this last task. By applying the gradient descent algorithm, we can select values for our parameters \\(\\theta_i\\) that will lead to the model having minimal loss on the training data.\nWhen using gradient descent in a modeling context, we:\n\nMake guesses for the minimizing \\(\\theta_i\\)\nCompute the derivative of the loss function \\(L\\)\n\nWe can “translate” our gradient descent rule from before by replacing \\(x\\) with \\(\\theta\\) and \\(f\\) with \\(L\\):\n\\[\\theta^{(t+1)} = \\theta^{(t)} - \\alpha \\frac{d}{d\\theta} L(\\theta^{(t)})\\]\n\n13.2.3.1 Gradient Descent on the tips Dataset\nTo see this in action, let’s consider a case where we have a linear model with no offset. We want to predict the tip (y) given the price of a meal (x). To do this, we\n\nChoose a model: \\(\\hat{y} = \\theta_1 x\\),\nChoose a loss function: \\(L(\\theta) = MSE(\\theta) = \\frac{1}{n} \\sum_{i=1}^n (y_i - \\theta_1x_i)^2\\).\n\nLet’s apply our gradient_descent function from before to optimize our model on the tips dataset. We will try to select the best parameter \\(\\theta_i\\) to predict the tip \\(y\\) from the total_bill \\(x\\).\n\ndf = sns.load_dataset(\"tips\")\ndf.head()\n\n\n\n\n\n\n\n\ntotal_bill\ntip\nsex\nsmoker\nday\ntime\nsize\n\n\n\n\n0\n16.99\n1.01\nFemale\nNo\nSun\nDinner\n2\n\n\n1\n10.34\n1.66\nMale\nNo\nSun\nDinner\n3\n\n\n2\n21.01\n3.50\nMale\nNo\nSun\nDinner\n3\n\n\n3\n23.68\n3.31\nMale\nNo\nSun\nDinner\n2\n\n\n4\n24.59\n3.61\nFemale\nNo\nSun\nDinner\n4\n\n\n\n\n\n\n\nWe can visualize the value of the MSE on our dataset for different possible choices of \\(\\theta_1\\). To optimize our model, we want to select the value of \\(\\theta_1\\) that leads to the lowest MSE.\nTo apply gradient descent, we need to compute the derivative of the loss function with respect to our parameter \\(\\theta_1\\).\n\nGiven our loss function, \\[L(\\theta) = MSE(\\theta) = \\frac{1}{n} \\sum_{i=1}^n (y_i - \\theta_1x_i)^2\\]\nWe take the derivative with respect to \\(\\theta_1\\) \\[\\frac{\\partial}{\\partial \\theta_{1}} L(\\theta_1^{(t)}) = \\frac{-2}{n} \\sum_{i=1}^n (y_i - \\theta_1^{(t)} x_i) x_i\\]\nWhich results in the gradient descent update rule \\[\\theta_1^{(t+1)} = \\theta_1^{(t)} - \\alpha \\frac{d}{d\\theta}L(\\theta_1^{(t)})\\]\n\nfor some learning rate \\(\\alpha\\).\nImplementing this in code, we can visualize the MSE loss on the tips data. MSE is convex, so there is one global minimum.\n\n\nCode\ndef gradient_descent(df, initial_guess, alpha, n):\n    \"\"\"Performs n steps of gradient descent on df using learning rate alpha starting\n       from initial_guess. Returns a numpy array of all guesses over time.\"\"\"\n    guesses = [initial_guess]\n    current_guess = initial_guess\n    while len(guesses) &lt; n:\n        current_guess = current_guess - alpha * df(current_guess)\n        guesses.append(current_guess)\n        \n    return np.array(guesses)\n\ndef mse_single_arg(theta_1):\n    \"\"\"Returns the MSE on our data for the given theta1\"\"\"\n    x = df[\"total_bill\"]\n    y_obs = df[\"tip\"]\n    y_hat = theta_1 * x\n    return np.mean((y_hat - y_obs) ** 2)\n\ndef mse_loss_derivative_single_arg(theta_1):\n    \"\"\"Returns the derivative of the MSE on our data for the given theta1\"\"\"\n    x = df[\"total_bill\"]\n    y_obs = df[\"tip\"]\n    y_hat = theta_1 * x\n    \n    return np.mean(2 * (y_hat - y_obs) * x)\n\nloss_df = pd.DataFrame({\"theta_1\":np.linspace(-1.5, 1), \"MSE\":[mse_single_arg(theta_1) for theta_1 in np.linspace(-1.5, 1)]})\n\ntrajectory = gradient_descent(mse_loss_derivative_single_arg, -0.5, 0.0001, 100)\n\nplt.plot(loss_df[\"theta_1\"], loss_df[\"MSE\"])\nplt.scatter(trajectory, [mse_single_arg(guess) for guess in trajectory], c=\"white\", edgecolor=\"firebrick\")\nplt.scatter(trajectory[-1], mse_single_arg(trajectory[-1]), c=\"firebrick\")\nplt.xlabel(r\"$\\theta_1$\")\nplt.ylabel(r\"$L(\\theta_1)$\");\n\nprint(f\"Final guess for theta_1: {trajectory[-1]}\")\n\n\nFinal guess for theta_1: 0.14369554654231262",
+    "crumbs": [
+      "<span class='chapter-number'>13</span>  <span class='chapter-title'>sklearn and Gradient Descent</span>"
+    ]
   }
 ]
\ No newline at end of file
diff --git a/docs/visualization_1/visualization_1.html b/docs/visualization_1/visualization_1.html
index 2115b697d..8bc70ea74 100644
--- a/docs/visualization_1/visualization_1.html
+++ b/docs/visualization_1/visualization_1.html
@@ -255,6 +255,12 @@
   <a href="../ols/ols.html" class="sidebar-item-text sidebar-link">
  <span class="menu-text"><span class="chapter-number">12</span>&nbsp; <span class="chapter-title">Ordinary Least Squares</span></span></a>
   </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../gradient_descent/gradient_descent.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">13</span>&nbsp; <span class="chapter-title">sklearn and Gradient Descent</span></span></a>
+  </div>
 </li>
     </ul>
     </div>
@@ -425,7 +431,7 @@ <h2 data-number="7.4" class="anchored" data-anchor-id="variable-types-should-inf
 <h2 data-number="7.5" class="anchored" data-anchor-id="qualitative-variables-bar-plots"><span class="header-section-number">7.5</span> Qualitative Variables: Bar Plots</h2>
 <p>A <strong>bar plot</strong> is one of the most common ways of displaying the <strong>distribution</strong> of a <strong>qualitative</strong> (categorical) variable. The length of a bar plot encodes the frequency of a category; the width encodes no useful information. The color <em>could</em> indicate a sub-category, but this is not necessarily the case.</p>
 <p>Let’s contextualize this in an example. We will use the World Bank dataset (<code>wb</code>) in our analysis.</p>
-<div id="b4c023e0" class="cell" data-execution_count="1">
+<div id="1325fb3e" class="cell" data-execution_count="1">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span>
@@ -599,7 +605,7 @@ <h2 data-number="7.5" class="anchored" data-anchor-id="qualitative-variables-bar
 <p>We can visualize the distribution of the <code>Continent</code> column using a bar plot. There are a few ways to do this.</p>
 <section id="plotting-in-pandas" class="level3" data-number="7.5.1">
 <h3 data-number="7.5.1" class="anchored" data-anchor-id="plotting-in-pandas"><span class="header-section-number">7.5.1</span> Plotting in Pandas</h3>
-<div id="f78a4ec8" class="cell" data-execution_count="2">
+<div id="4bc41903" class="cell" data-execution_count="2">
 <div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a>wb[<span class="st">'Continent'</span>].value_counts().plot(kind<span class="op">=</span><span class="st">'bar'</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display">
 <div>
@@ -614,7 +620,7 @@ <h3 data-number="7.5.1" class="anchored" data-anchor-id="plotting-in-pandas"><sp
 </section>
 <section id="plotting-in-matplotlib" class="level3" data-number="7.5.2">
 <h3 data-number="7.5.2" class="anchored" data-anchor-id="plotting-in-matplotlib"><span class="header-section-number">7.5.2</span> Plotting in Matplotlib</h3>
-<div id="ad185f18" class="cell" data-execution_count="3">
+<div id="e3e3480f" class="cell" data-execution_count="3">
 <div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> matplotlib.pyplot <span class="im">as</span> plt <span class="co"># matplotlib is typically given the alias plt</span></span>
 <span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a>continent <span class="op">=</span> wb[<span class="st">'Continent'</span>].value_counts()</span>
@@ -634,7 +640,7 @@ <h3 data-number="7.5.2" class="anchored" data-anchor-id="plotting-in-matplotlib"
 </section>
 <section id="plotting-in-seaborn" class="level3" data-number="7.5.3">
 <h3 data-number="7.5.3" class="anchored" data-anchor-id="plotting-in-seaborn"><span class="header-section-number">7.5.3</span> Plotting in <code>Seaborn</code></h3>
-<div id="d93e36d7" class="cell" data-execution_count="4">
+<div id="642c38fa" class="cell" data-execution_count="4">
 <div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> seaborn <span class="im">as</span> sns <span class="co"># seaborn is typically given the alias sns</span></span>
 <span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a>sns.countplot(data <span class="op">=</span> wb, x <span class="op">=</span> <span class="st">'Continent'</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display">
@@ -657,7 +663,7 @@ <h3 data-number="7.5.3" class="anchored" data-anchor-id="plotting-in-seaborn"><s
 <section id="distributions-of-quantitative-variables" class="level2" data-number="7.6">
 <h2 data-number="7.6" class="anchored" data-anchor-id="distributions-of-quantitative-variables"><span class="header-section-number">7.6</span> Distributions of Quantitative Variables</h2>
 <p>Revisiting our example with the <code>wb</code> DataFrame, let’s plot the distribution of <code>Gross national income per capita</code>.</p>
-<div id="1ca75a07" class="cell" data-execution_count="5">
+<div id="78d0a7d7" class="cell" data-execution_count="5">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a>wb.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -823,7 +829,7 @@ <h2 data-number="7.6" class="anchored" data-anchor-id="distributions-of-quantita
 </div>
 <p>How should we define our categories for this variable? In the previous example, these were a few unique values of the <code>Continent</code> column. If we use similar logic here, our categories are the different numerical values contained in the <code>Gross national income per capita</code> column.</p>
 <p>Under this assumption, let’s plot this distribution using the <code>seaborn.countplot</code> function.</p>
-<div id="03bec2de" class="cell" data-execution_count="6">
+<div id="af5e66b0" class="cell" data-execution_count="6">
 <div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a>sns.countplot(data <span class="op">=</span> wb, x <span class="op">=</span> <span class="st">'Gross national income per capita, Atlas method: $: 2016'</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display">
 <div>
@@ -845,7 +851,7 @@ <h2 data-number="7.6" class="anchored" data-anchor-id="distributions-of-quantita
 <h3 data-number="7.6.1" class="anchored" data-anchor-id="box-plots-and-violin-plots"><span class="header-section-number">7.6.1</span> Box Plots and Violin Plots</h3>
 <p>Box plots and violin plots are two very similar kinds of visualizations. Both display the distribution of a variable using information about <strong>quartiles</strong>.</p>
 <p>In a box plot, the width of the box at any point does not encode meaning. In a violin plot, the width of the plot indicates the density of the distribution at each possible value.</p>
-<div id="d7c8f696" class="cell" data-execution_count="7">
+<div id="9a1b05c9" class="cell" data-execution_count="7">
 <div class="sourceCode cell-code" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a>sns.boxplot(data<span class="op">=</span>wb, y<span class="op">=</span><span class="st">'Gross national income per capita, Atlas method: $: 2016'</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display">
 <div>
@@ -855,7 +861,7 @@ <h3 data-number="7.6.1" class="anchored" data-anchor-id="box-plots-and-violin-pl
 </div>
 </div>
 </div>
-<div id="137c2007" class="cell" data-execution_count="8">
+<div id="59b14a3c" class="cell" data-execution_count="8">
 <div class="sourceCode cell-code" id="cb8"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a>sns.violinplot(data<span class="op">=</span>wb, y<span class="op">=</span><span class="st">"Gross national income per capita, Atlas method: $: 2016"</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display">
 <div>
@@ -872,7 +878,7 @@ <h3 data-number="7.6.1" class="anchored" data-anchor-id="box-plots-and-violin-pl
 <li>The third quartile (Q3) represents the 75th percentile – 75% of the data is smaller than or equal to the third quartile.</li>
 </ul>
 <p>This means that the middle 50% of the data lies between the first and third quartiles. This is demonstrated in the histogram below. The three quartiles are marked with red vertical bars.</p>
-<div id="a38636a7" class="cell" data-execution_count="9">
+<div id="905594bf" class="cell" data-execution_count="9">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a>gdp <span class="op">=</span> wb[<span class="st">'Gross domestic product: </span><span class="sc">% g</span><span class="st">rowth : 2016'</span>]</span>
@@ -911,7 +917,7 @@ <h3 data-number="7.6.1" class="anchored" data-anchor-id="box-plots-and-violin-pl
 </div>
 </div>
 <p>In a box plot, the lower extent of the box lies at Q1, while the upper extent of the box lies at Q3. The horizontal line in the middle of the box corresponds to Q2 (equivalently, the median).</p>
-<div id="3f30aba6" class="cell" data-execution_count="10">
+<div id="b63979a8" class="cell" data-execution_count="10">
 <div class="sourceCode cell-code" id="cb11"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a>sns.boxplot(data<span class="op">=</span>wb, y<span class="op">=</span><span class="st">'Gross domestic product: </span><span class="sc">% g</span><span class="st">rowth : 2016'</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display">
 <div>
@@ -927,7 +933,7 @@ <h3 data-number="7.6.1" class="anchored" data-anchor-id="box-plots-and-violin-pl
 <img src="images/box_plot_diagram.png" width="600">
 </center>
 <p>A violin plot displays quartile information, albeit a bit more subtly through smoothed density curves. Look closely at the center vertical bar of the violin plot below; the three quartiles and “whiskers” are still present!</p>
-<div id="5bec9700" class="cell" data-execution_count="11">
+<div id="635d35f5" class="cell" data-execution_count="11">
 <div class="sourceCode cell-code" id="cb12"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a>sns.violinplot(data<span class="op">=</span>wb, y<span class="op">=</span><span class="st">'Gross domestic product: </span><span class="sc">% g</span><span class="st">rowth : 2016'</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display">
 <div>
@@ -942,7 +948,7 @@ <h3 data-number="7.6.1" class="anchored" data-anchor-id="box-plots-and-violin-pl
 <h3 data-number="7.6.2" class="anchored" data-anchor-id="side-by-side-box-and-violin-plots"><span class="header-section-number">7.6.2</span> Side-by-Side Box and Violin Plots</h3>
 <p>Plotting side-by-side box or violin plots allows us to compare distributions across different categories. In other words, they enable us to plot both a qualitative variable and a quantitative continuous variable in one visualization.</p>
 <p>With <code>seaborn</code>, we can easily create side-by-side plots by specifying both an x and y column.</p>
-<div id="bef0df19" class="cell" data-execution_count="12">
+<div id="e68bf901" class="cell" data-execution_count="12">
 <div class="sourceCode cell-code" id="cb13"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a>sns.boxplot(data<span class="op">=</span>wb, x<span class="op">=</span><span class="st">"Continent"</span>, y<span class="op">=</span><span class="st">'Gross domestic product: </span><span class="sc">% g</span><span class="st">rowth : 2016'</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display">
 <div>
@@ -959,7 +965,7 @@ <h3 data-number="7.6.3" class="anchored" data-anchor-id="histograms"><span class
 <section id="plotting-histograms" class="level4" data-number="7.6.3.1">
 <h4 data-number="7.6.3.1" class="anchored" data-anchor-id="plotting-histograms"><span class="header-section-number">7.6.3.1</span> Plotting Histograms</h4>
 <p>Below, we plot a histogram using matplotlib and seaborn. Which graph do you prefer?</p>
-<div id="5e3a37f9" class="cell" data-execution_count="13">
+<div id="067d1090" class="cell" data-execution_count="13">
 <div class="sourceCode cell-code" id="cb14"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a><span class="co"># The `edgecolor` argument controls the color of the bin edges</span></span>
 <span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a>gni <span class="op">=</span> wb[<span class="st">"Gross national income per capita, Atlas method: $: 2016"</span>]</span>
 <span id="cb14-3"><a href="#cb14-3" aria-hidden="true" tabindex="-1"></a>plt.hist(gni, density<span class="op">=</span><span class="va">True</span>, edgecolor<span class="op">=</span><span class="st">"white"</span>)</span>
@@ -976,7 +982,7 @@ <h4 data-number="7.6.3.1" class="anchored" data-anchor-id="plotting-histograms">
 </div>
 </div>
 </div>
-<div id="bfdb958f" class="cell" data-execution_count="14">
+<div id="5f4446c8" class="cell" data-execution_count="14">
 <div class="sourceCode cell-code" id="cb15"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a>sns.histplot(data<span class="op">=</span>wb, x<span class="op">=</span><span class="st">"Gross national income per capita, Atlas method: $: 2016"</span>, stat<span class="op">=</span><span class="st">"density"</span>)</span>
 <span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">"Distribution of gross national income per capita"</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display">
@@ -993,14 +999,14 @@ <h4 data-number="7.6.3.2" class="anchored" data-anchor-id="overlaid-histograms">
 <p>We can overlay histograms (or density curves) to compare distributions across qualitative categories.</p>
 <p>The <code>hue</code> parameter of <code>sns.histplot</code> specifies the column that should be used to determine the color of each category. <code>hue</code> can be used in many <code>seaborn</code> plotting functions.</p>
 <p>Notice that the resulting plot includes a legend describing which color corresponds to each hemisphere – a legend should always be included if color is used to encode information in a visualization!</p>
-<div id="0079ca73" class="cell" data-execution_count="15">
+<div id="78296ec7" class="cell" data-execution_count="15">
 <div class="sourceCode cell-code" id="cb16"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Create a new variable to store the hemisphere in which each country is located</span></span>
 <span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a>north <span class="op">=</span> [<span class="st">"Asia"</span>, <span class="st">"Europe"</span>, <span class="st">"N. America"</span>]</span>
 <span id="cb16-3"><a href="#cb16-3" aria-hidden="true" tabindex="-1"></a>south <span class="op">=</span> [<span class="st">"Africa"</span>, <span class="st">"Oceania"</span>, <span class="st">"S. America"</span>]</span>
 <span id="cb16-4"><a href="#cb16-4" aria-hidden="true" tabindex="-1"></a>wb.loc[wb[<span class="st">"Continent"</span>].isin(north), <span class="st">"Hemisphere"</span>] <span class="op">=</span> <span class="st">"Northern"</span></span>
 <span id="cb16-5"><a href="#cb16-5" aria-hidden="true" tabindex="-1"></a>wb.loc[wb[<span class="st">"Continent"</span>].isin(south), <span class="st">"Hemisphere"</span>] <span class="op">=</span> <span class="st">"Southern"</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
-<div id="3080ac17" class="cell" data-execution_count="16">
+<div id="2bf604f1" class="cell" data-execution_count="16">
 <div class="sourceCode cell-code" id="cb17"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a>sns.histplot(data<span class="op">=</span>wb, x<span class="op">=</span><span class="st">"Gross national income per capita, Atlas method: $: 2016"</span>, hue<span class="op">=</span><span class="st">"Hemisphere"</span>, stat<span class="op">=</span><span class="st">"density"</span>)</span>
 <span id="cb17-2"><a href="#cb17-2" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">"Distribution of gross national income per capita"</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stderr">
@@ -1030,7 +1036,7 @@ <h4 data-number="7.6.3.2" class="anchored" data-anchor-id="overlaid-histograms">
 </div>
 </div>
 <p>Again, each bin of a histogram is scaled such that its <strong>area</strong> is proportional to the <strong>percentage</strong> of all datapoints that it contains.</p>
-<div id="e49c5c21" class="cell" data-execution_count="17">
+<div id="6616d17d" class="cell" data-execution_count="17">
 <div class="sourceCode cell-code" id="cb19"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a>densities, bins, _ <span class="op">=</span> plt.hist(gni, density<span class="op">=</span><span class="va">True</span>, edgecolor<span class="op">=</span><span class="st">"white"</span>, bins<span class="op">=</span><span class="dv">5</span>)</span>
 <span id="cb19-2"><a href="#cb19-2" aria-hidden="true" tabindex="-1"></a>plt.xlabel(<span class="st">"Gross national income per capita"</span>)</span>
 <span id="cb19-3"><a href="#cb19-3" aria-hidden="true" tabindex="-1"></a>plt.ylabel(<span class="st">"Density"</span>)</span>
@@ -1071,7 +1077,7 @@ <h4 data-number="7.6.3.3" class="anchored" data-anchor-id="evaluating-histograms
 <section id="skewness-and-tails" class="level5" data-number="7.6.3.3.1">
 <h5 data-number="7.6.3.3.1" class="anchored" data-anchor-id="skewness-and-tails"><span class="header-section-number">7.6.3.3.1</span> Skewness and Tails</h5>
 <p>The skew of a histogram describes the direction in which its “tail” extends. - A distribution with a long right tail is <strong>skewed right</strong> (such as <code>Gross national income per capita</code>). In a right-skewed distribution, the few large outliers “pull” the mean to the <strong>right</strong> of the median.</p>
-<div id="a80ce5ec" class="cell" data-execution_count="18">
+<div id="664006cf" class="cell" data-execution_count="18">
 <div class="sourceCode cell-code" id="cb21"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a>sns.histplot(data <span class="op">=</span> wb, x <span class="op">=</span> <span class="st">'Gross national income per capita, Atlas method: $: 2016'</span>, stat <span class="op">=</span> <span class="st">'density'</span>)<span class="op">;</span></span>
 <span id="cb21-2"><a href="#cb21-2" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">'Distribution with a long right tail'</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="18">
@@ -1089,7 +1095,7 @@ <h5 data-number="7.6.3.3.1" class="anchored" data-anchor-id="skewness-and-tails"
 <li>A distribution with a long left tail is <strong>skewed left</strong> (such as <code>Access to an improved water source</code>). In a left-skewed distribution, the few small outliers “pull” the mean to the <strong>left</strong> of the median.</li>
 </ul>
 <p>In the case where a distribution has equal-sized right and left tails, it is <strong>symmetric</strong>. The mean is approximately <strong>equal</strong> to the median. Think of mean as the balancing point of the distribution.</p>
-<div id="d5e0b344" class="cell" data-execution_count="19">
+<div id="c93ca552" class="cell" data-execution_count="19">
 <div class="sourceCode cell-code" id="cb23"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a>sns.histplot(data <span class="op">=</span> wb, x <span class="op">=</span> <span class="st">'Access to an improved water source: </span><span class="sc">% o</span><span class="st">f population: 2015'</span>, stat <span class="op">=</span> <span class="st">'density'</span>)<span class="op">;</span></span>
 <span id="cb23-2"><a href="#cb23-2" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">'Distribution with a long left tail'</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="19">
@@ -1112,7 +1118,7 @@ <h5 data-number="7.6.3.3.2" class="anchored" data-anchor-id="outliers"><span cla
 <h5 data-number="7.6.3.3.3" class="anchored" data-anchor-id="modes"><span class="header-section-number">7.6.3.3.3</span> Modes</h5>
 <p>In Data 100, we describe a “mode” of a histogram as a peak in the distribution. Often, however, it is difficult to determine what counts as its own “peak.” For example, the number of peaks in the distribution of HIV rates across different countries varies depending on the number of histogram bins we plot.</p>
 <p>If we set the number of bins to 5, the distribution appears unimodal.</p>
-<div id="a743a067" class="cell" data-execution_count="20">
+<div id="ae03227d" class="cell" data-execution_count="20">
 <div class="sourceCode cell-code" id="cb25"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb25-1"><a href="#cb25-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Rename the very long column name for convenience</span></span>
 <span id="cb25-2"><a href="#cb25-2" aria-hidden="true" tabindex="-1"></a>wb <span class="op">=</span> wb.rename(columns<span class="op">=</span>{<span class="st">'Antiretroviral therapy coverage: </span><span class="sc">% o</span><span class="st">f people living with HIV: 2015'</span>:<span class="st">"HIV rate"</span>})</span>
 <span id="cb25-3"><a href="#cb25-3" aria-hidden="true" tabindex="-1"></a><span class="co"># With 5 bins, it seems that there is only one peak</span></span>
@@ -1126,7 +1132,7 @@ <h5 data-number="7.6.3.3.3" class="anchored" data-anchor-id="modes"><span class=
 </div>
 </div>
 </div>
-<div id="bc981bf6" class="cell" data-execution_count="21">
+<div id="ffeaf832" class="cell" data-execution_count="21">
 <div class="sourceCode cell-code" id="cb26"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb26-1"><a href="#cb26-1" aria-hidden="true" tabindex="-1"></a><span class="co"># With 10 bins, there seem to be two peaks</span></span>
 <span id="cb26-2"><a href="#cb26-2" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb26-3"><a href="#cb26-3" aria-hidden="true" tabindex="-1"></a>sns.histplot(data<span class="op">=</span>wb, x<span class="op">=</span><span class="st">"HIV rate"</span>, stat<span class="op">=</span><span class="st">"density"</span>, bins<span class="op">=</span><span class="dv">10</span>)</span>
@@ -1139,7 +1145,7 @@ <h5 data-number="7.6.3.3.3" class="anchored" data-anchor-id="modes"><span class=
 </div>
 </div>
 </div>
-<div id="f800659d" class="cell" data-execution_count="22">
+<div id="d237c4a3" class="cell" data-execution_count="22">
 <div class="sourceCode cell-code" id="cb27"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb27-1"><a href="#cb27-1" aria-hidden="true" tabindex="-1"></a><span class="co"># And with 20 bins, it becomes hard to say what counts as a "peak"!</span></span>
 <span id="cb27-2"><a href="#cb27-2" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb27-3"><a href="#cb27-3" aria-hidden="true" tabindex="-1"></a>sns.histplot(data<span class="op">=</span>wb, x <span class="op">=</span><span class="st">"HIV rate"</span>, stat<span class="op">=</span><span class="st">"density"</span>, bins<span class="op">=</span><span class="dv">20</span>)</span>
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-10-output-2.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-10-output-2.pdf
index 546d6b004..e842883aa 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-10-output-2.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-10-output-2.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-11-output-1.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-11-output-1.pdf
index 365036422..58df98a46 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-11-output-1.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-11-output-1.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-12-output-1.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-12-output-1.pdf
index 49d4efbd4..8d875f8a9 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-12-output-1.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-12-output-1.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-13-output-1.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-13-output-1.pdf
index aac34fd89..48d1e4745 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-13-output-1.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-13-output-1.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-14-output-1.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-14-output-1.pdf
index 19ce5f0c8..bdecf6db6 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-14-output-1.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-14-output-1.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-15-output-1.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-15-output-1.pdf
index a53c7245b..efc0175cb 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-15-output-1.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-15-output-1.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-17-output-2.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-17-output-2.pdf
index 925143a79..f6a273029 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-17-output-2.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-17-output-2.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-18-output-2.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-18-output-2.pdf
index 1dbbd61a1..4e9d8bb33 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-18-output-2.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-18-output-2.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-19-output-2.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-19-output-2.pdf
index 3d2137533..3414f0ef9 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-19-output-2.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-19-output-2.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-20-output-2.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-20-output-2.pdf
index d50578f11..5b67e263e 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-20-output-2.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-20-output-2.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-21-output-1.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-21-output-1.pdf
index 6267d280c..eb5d0d246 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-21-output-1.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-21-output-1.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-22-output-1.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-22-output-1.pdf
index be23f86ac..ae7bde08e 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-22-output-1.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-22-output-1.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-23-output-1.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-23-output-1.pdf
index 9d17fe9de..bdef1c8bb 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-23-output-1.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-23-output-1.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-3-output-1.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-3-output-1.pdf
index 1eafc81ec..477ec9e4c 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-3-output-1.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-3-output-1.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-4-output-1.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-4-output-1.pdf
index 1db7040ca..b5d1d1e7e 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-4-output-1.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-4-output-1.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-5-output-1.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-5-output-1.pdf
index c351aae22..c186ec0ee 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-5-output-1.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-5-output-1.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-7-output-1.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-7-output-1.pdf
index 7efb573d2..4d3bb4ea3 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-7-output-1.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-7-output-1.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-8-output-1.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-8-output-1.pdf
index 30d0ce61b..b51a8eebc 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-8-output-1.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-8-output-1.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-9-output-1.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-9-output-1.pdf
index a2c734a70..a83525d57 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-9-output-1.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-9-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2.html b/docs/visualization_2/visualization_2.html
index 4c1f320cb..5796a60e5 100644
--- a/docs/visualization_2/visualization_2.html
+++ b/docs/visualization_2/visualization_2.html
@@ -255,6 +255,12 @@
   <a href="../ols/ols.html" class="sidebar-item-text sidebar-link">
  <span class="menu-text"><span class="chapter-number">12</span>&nbsp; <span class="chapter-title">Ordinary Least Squares</span></span></a>
   </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../gradient_descent/gradient_descent.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">13</span>&nbsp; <span class="chapter-title">sklearn and Gradient Descent</span></span></a>
+  </div>
 </li>
     </ul>
     </div>
@@ -373,7 +379,7 @@ <h3 data-number="8.1.1" class="anchored" data-anchor-id="kde-theory"><span class
 <p>A <strong>kernel density estimate (KDE)</strong> is a smooth, continuous function that approximates a curve. It allows us to represent general trends in a distribution without focusing on the details, which is useful for analyzing the broad structure of a dataset.</p>
 <p>More formally, a KDE attempts to approximate the underlying <strong>probability distribution</strong> from which our dataset was drawn. You may have encountered the idea of a probability distribution in your other classes; if not, we’ll discuss it at length in the next lecture. For now, you can think of a probability distribution as a description of how likely it is for us to sample a particular value in our dataset.</p>
 <p>A KDE curve estimates the probability density function of a random variable. Consider the example below, where we have used <code>sns.displot</code> to plot both a histogram (containing the data points we actually collected) and a KDE curve (representing the <em>approximated</em> probability distribution from which this data was drawn) using data from the World Bank dataset (<code>wb</code>).</p>
-<div id="eb84b280" class="cell" data-execution_count="1">
+<div id="bfaa293b" class="cell" data-execution_count="1">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span>
@@ -548,7 +554,7 @@ <h3 data-number="8.1.1" class="anchored" data-anchor-id="kde-theory"><span class
 </div>
 </div>
 </div>
-<div id="60b53746" class="cell" data-execution_count="2">
+<div id="a3b3d584" class="cell" data-execution_count="2">
 <div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> seaborn <span class="im">as</span> sns</span>
 <span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> matplotlib.pyplot <span class="im">as</span> plt</span>
 <span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a></span>
@@ -577,7 +583,7 @@ <h3 data-number="8.1.2" class="anchored" data-anchor-id="constructing-a-kde"><sp
 </ol>
 <p>We’ll explain what a “kernel” is momentarily.</p>
 <p>To make things simpler, let’s construct a KDE for a small, artificially generated dataset of 5 datapoints: <span class="math inline">\([2.2, 2.8, 3.7, 5.3, 5.7]\)</span>. In the plot below, each vertical bar represents one data point.</p>
-<div id="dc48b23f" class="cell" data-execution_count="3">
+<div id="8b598e12" class="cell" data-execution_count="3">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a>data <span class="op">=</span> [<span class="fl">2.2</span>, <span class="fl">2.8</span>, <span class="fl">3.7</span>, <span class="fl">5.3</span>, <span class="fl">5.7</span>]</span>
@@ -598,7 +604,7 @@ <h3 data-number="8.1.2" class="anchored" data-anchor-id="constructing-a-kde"><sp
 </div>
 </div>
 <p>Our goal is to create the following KDE curve, which was generated automatically by <code>sns.kdeplot</code>.</p>
-<div id="8425a1ee" class="cell" data-execution_count="4">
+<div id="3582ac1d" class="cell" data-execution_count="4">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a>plt.xlabel(<span class="st">"Data"</span>)</span>
@@ -617,7 +623,7 @@ <h3 data-number="8.1.2" class="anchored" data-anchor-id="constructing-a-kde"><sp
 </div>
 </div>
 <p>Alternatively, we can use <code>sns.histplot</code>. You can also get a very similar result in a single call by requesting the KDE be added to the histogram, with <code>kde=True</code> and some extra keywords:</p>
-<div id="30240f88" class="cell" data-execution_count="5">
+<div id="d183d7df" class="cell" data-execution_count="5">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a>plt.xlabel(<span class="st">"Data"</span>)</span>
@@ -640,7 +646,7 @@ <h4 data-number="8.1.2.1" class="anchored" data-anchor-id="step-1-place-a-kernel
 <p>A <strong>kernel</strong> is a density curve. It is the mathematical function that attempts to capture the randomness of each data point in our sampled data. To explain what this means, consider just <em>one</em> of the datapoints in our dataset: <span class="math inline">\(2.2\)</span>. We obtained this datapoint by randomly sampling some information out in the real world (you can imagine <span class="math inline">\(2.2\)</span> as representing a single measurement taken in an experiment, for example). If we were to sample a new datapoint, we may obtain a slightly different value. It could be higher than <span class="math inline">\(2.2\)</span>; it could also be lower than <span class="math inline">\(2.2\)</span>. We make the assumption that any future sampled datapoints will likely be similar in value to the data we’ve already drawn. This means that our <em>kernel</em> – our description of the probability of randomly sampling any new value – will be greatest at the datapoint we’ve already drawn but still have non-zero probability above and below it. The area under any kernel should integrate to 1, representing the total probability of drawing a new datapoint.</p>
 <p>A <strong>bandwidth value</strong>, usually denoted by <span class="math inline">\(\alpha\)</span>, represents the width of the kernel. A large value of <span class="math inline">\(\alpha\)</span> will result in a wide, short kernel function, while a small value with result in a narrow, tall kernel.</p>
 <p>Below, we place a <strong>Gaussian kernel</strong>, plotted in orange, over the datapoint <span class="math inline">\(2.2\)</span>. A Gaussian kernel is simply the normal distribution, which you may have called a bell curve in Data 8.</p>
-<div id="ed0890a6" class="cell" data-execution_count="6">
+<div id="c68c3f13" class="cell" data-execution_count="6">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> gaussian_kernel(x, z, a):</span>
@@ -668,7 +674,7 @@ <h4 data-number="8.1.2.1" class="anchored" data-anchor-id="step-1-place-a-kernel
 </div>
 </div>
 <p>To begin creating our KDE, we place a kernel on <em>each</em> datapoint in our dataset. For our dataset of 5 points, we will have 5 kernels.</p>
-<div id="64659aa2" class="cell" data-execution_count="7">
+<div id="113df6c7" class="cell" data-execution_count="7">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="co"># You will work with the functions below in Lab 4</span></span>
@@ -720,7 +726,7 @@ <h4 data-number="8.1.2.1" class="anchored" data-anchor-id="step-1-place-a-kernel
 <h4 data-number="8.1.2.2" class="anchored" data-anchor-id="step-2-normalize-kernels-to-have-a-total-area-of-1"><span class="header-section-number">8.1.2.2</span> Step 2: Normalize Kernels to Have a Total Area of 1</h4>
 <p>Above, we said that <em>each</em> kernel has an area of 1. Earlier, we also said that our goal is to construct a KDE curve using these kernels with a <em>total</em> area of 1. If we were to directly sum the kernels as they are, we would produce a KDE curve with an integrated area of (5 kernels) <span class="math inline">\(\times\)</span> (area of 1 each) = 5. To avoid this, we will <strong>normalize</strong> each of our kernels. This involves multiplying each kernel by <span class="math inline">\(\frac{1}{\#\:\text{datapoints}}\)</span>.</p>
 <p>In the cell below, we multiply each of our 5 kernels by <span class="math inline">\(\frac{1}{5}\)</span> to apply normalization.</p>
-<div id="7efa1d76" class="cell" data-execution_count="8">
+<div id="0cc85a61" class="cell" data-execution_count="8">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb8"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a>plt.xlim(<span class="op">-</span><span class="dv">3</span>, <span class="dv">10</span>)</span>
@@ -743,7 +749,7 @@ <h4 data-number="8.1.2.2" class="anchored" data-anchor-id="step-2-normalize-kern
 <section id="step-3-sum-the-normalized-kernels" class="level4" data-number="8.1.2.3">
 <h4 data-number="8.1.2.3" class="anchored" data-anchor-id="step-3-sum-the-normalized-kernels"><span class="header-section-number">8.1.2.3</span> Step 3: Sum the Normalized Kernels</h4>
 <p>Our KDE curve is the sum of the normalized kernels. Notice that the final curve is identical to the plot generated by <code>sns.kdeplot</code> we saw earlier!</p>
-<div id="0d4f8b5b" class="cell" data-execution_count="9">
+<div id="8ab0f0ba" class="cell" data-execution_count="9">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a>plt.xlim(<span class="op">-</span><span class="dv">3</span>, <span class="dv">10</span>)</span>
@@ -853,7 +859,7 @@ <h4 data-number="8.1.3.2" class="anchored" data-anchor-id="boxcar-kernel"><span
         0, &amp; \text{else }
     \end{cases}\]</span></p>
 <p>The boxcar kernel is seldom used in practice – we include it here to demonstrate that a kernel function can take whatever form you would like, provided it integrates to 1 and does not output negative values.</p>
-<div id="ceafeb64" class="cell" data-execution_count="10">
+<div id="18a62cb2" class="cell" data-execution_count="10">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb10"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> boxcar_kernel(alpha, x, z):</span>
@@ -900,7 +906,7 @@ <h2 data-number="8.2" class="anchored" data-anchor-id="diving-deeper-into-displo
 <p>As we saw earlier, we can use <code>seaborn</code>’s <code>displot</code> function to plot various distributions. In particular, <code>displot</code> allows you to specify the <code>kind</code> of plot and is a wrapper for <code>histplot</code>, <code>kdeplot</code>, and <code>ecdfplot</code>.</p>
 <p>Below, we can see a couple of examples of how <code>sns.displot</code> can be used to plot various distributions.</p>
 <p>First, we can plot a histogram by setting <code>kind</code> to <code>"hist"</code>. Note that here we’ve specified <code>stat = density</code> to normalize the histogram such that the area under the histogram is equal to 1.</p>
-<div id="92c65a04" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="11">
+<div id="bfb8c0f1" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="11">
 <div class="sourceCode cell-code" id="cb11"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a>sns.displot(data<span class="op">=</span>wb, </span>
 <span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a>            x<span class="op">=</span><span class="st">"gni"</span>, </span>
 <span id="cb11-3"><a href="#cb11-3" aria-hidden="true" tabindex="-1"></a>            kind<span class="op">=</span><span class="st">"hist"</span>, </span>
@@ -915,7 +921,7 @@ <h2 data-number="8.2" class="anchored" data-anchor-id="diving-deeper-into-displo
 </div>
 </div>
 <p>Now, what if we want to generate a KDE plot? We can set <code>kind</code> = to <code>"kde"</code>!</p>
-<div id="0b3ce70f" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="12">
+<div id="7f87bc64" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="12">
 <div class="sourceCode cell-code" id="cb12"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a>sns.displot(data<span class="op">=</span>wb, </span>
 <span id="cb12-2"><a href="#cb12-2" aria-hidden="true" tabindex="-1"></a>            x<span class="op">=</span><span class="st">"gni"</span>, </span>
 <span id="cb12-3"><a href="#cb12-3" aria-hidden="true" tabindex="-1"></a>            kind<span class="op">=</span><span class="st">'kde'</span>)</span>
@@ -929,7 +935,7 @@ <h2 data-number="8.2" class="anchored" data-anchor-id="diving-deeper-into-displo
 </div>
 </div>
 <p>And finally, if we want to generate an Empirical Cumulative Distribution Function (ECDF), we can specify <code>kind = "ecdf"</code>.</p>
-<div id="8ad1e04b" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="13">
+<div id="7d843230" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="13">
 <div class="sourceCode cell-code" id="cb13"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a>sns.displot(data<span class="op">=</span>wb, </span>
 <span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a>            x<span class="op">=</span><span class="st">"gni"</span>, </span>
 <span id="cb13-3"><a href="#cb13-3" aria-hidden="true" tabindex="-1"></a>            kind<span class="op">=</span><span class="st">'ecdf'</span>)</span>
@@ -950,7 +956,7 @@ <h2 data-number="8.3" class="anchored" data-anchor-id="relationships-between-qua
 <h4 data-number="8.3.0.1" class="anchored" data-anchor-id="scatter-plots"><span class="header-section-number">8.3.0.1</span> Scatter Plots</h4>
 <p><strong>Scatter plots</strong> are one of the most useful tools in representing the relationship between <strong>pairs</strong> of quantitative variables. They are particularly important in gauging the strength, or correlation, of the relationship between variables. Knowledge of these relationships can then motivate decisions in our modeling process.</p>
 <p>In <code>matplotlib</code>, we use the function <code>plt.scatter</code> to generate a scatter plot. Notice that, unlike our examples of plotting single-variable distributions, now we specify sequences of values to be plotted along the x-axis <em>and</em> the y-axis.</p>
-<div id="34e3014a" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="14">
+<div id="0d9fdbf4" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="14">
 <div class="sourceCode cell-code" id="cb14"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a>plt.scatter(wb[<span class="st">"per capita: </span><span class="sc">% g</span><span class="st">rowth: 2016"</span>], <span class="op">\</span></span>
 <span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a>            wb[<span class="st">'Adult literacy rate: Female: % ages 15 and older: 2005-14'</span>])</span>
 <span id="cb14-3"><a href="#cb14-3" aria-hidden="true" tabindex="-1"></a></span>
@@ -966,7 +972,7 @@ <h4 data-number="8.3.0.1" class="anchored" data-anchor-id="scatter-plots"><span
 </div>
 </div>
 <p>In <code>seaborn</code>, we call the function <code>sns.scatterplot</code>. We use the <code>x</code> and <code>y</code> parameters to indicate the values to be plotted along the x and y axes, respectively. By using the <code>hue</code> parameter, we can specify a third variable to be used for coloring each scatter point.</p>
-<div id="438cd3c5" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="15">
+<div id="3329e817" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="15">
 <div class="sourceCode cell-code" id="cb15"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a>sns.scatterplot(data <span class="op">=</span> wb, x <span class="op">=</span> <span class="st">"per capita: </span><span class="sc">% g</span><span class="st">rowth: 2016"</span>, <span class="op">\</span></span>
 <span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a>               y <span class="op">=</span> <span class="st">"Adult literacy rate: Female: % ages 15 and older: 2005-14"</span>, </span>
 <span id="cb15-3"><a href="#cb15-3" aria-hidden="true" tabindex="-1"></a>               hue <span class="op">=</span> <span class="st">"Continent"</span>)</span>
@@ -989,7 +995,7 @@ <h5 data-number="8.3.0.1.1" class="anchored" data-anchor-id="overplotting"><span
 <li><strong>Jittering</strong> is the process of adding a small amount of random noise to all x and y values to slightly shift the position of each datapoint. By randomly shifting all the data by some small distance, we can discern individual points more clearly without modifying the major trends of the original dataset.</li>
 </ul>
 <p>In the cell below, we first jitter the data using <code>np.random.uniform</code>, then re-plot it with smaller markers. The resulting plot is much easier to interpret.</p>
-<div id="4a3324a1" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="16">
+<div id="5068af56" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="16">
 <div class="sourceCode cell-code" id="cb16"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Setting a seed ensures that we produce the same plot each time</span></span>
 <span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a><span class="co"># This means that the course notes will not change each time you access them</span></span>
 <span id="cb16-3"><a href="#cb16-3" aria-hidden="true" tabindex="-1"></a>np.random.seed(<span class="dv">150</span>)</span>
@@ -1023,7 +1029,7 @@ <h5 data-number="8.3.0.1.1" class="anchored" data-anchor-id="overplotting"><span
 <h4 data-number="8.3.0.2" class="anchored" data-anchor-id="lmplot-and-jointplot"><span class="header-section-number">8.3.0.2</span> <code>lmplot</code> and <code>jointplot</code></h4>
 <p><code>seaborn</code> also includes several built-in functions for creating more sophisticated scatter plots. Two of the most commonly used examples are <code>sns.lmplot</code> and <code>sns.jointplot</code>.</p>
 <p><code>sns.lmplot</code> plots both a scatter plot <em>and</em> a linear regression line, all in one function call. We’ll discuss linear regression in a few lectures.</p>
-<div id="b47dbafa" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="17">
+<div id="2a4e7702" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="17">
 <div class="sourceCode cell-code" id="cb17"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a>sns.lmplot(data <span class="op">=</span> wb, x <span class="op">=</span> <span class="st">"per capita: </span><span class="sc">% g</span><span class="st">rowth: 2016"</span>, <span class="op">\</span></span>
 <span id="cb17-2"><a href="#cb17-2" aria-hidden="true" tabindex="-1"></a>           y <span class="op">=</span> <span class="st">"Adult literacy rate: Female: % ages 15 and older: 2005-14"</span>)</span>
 <span id="cb17-3"><a href="#cb17-3" aria-hidden="true" tabindex="-1"></a></span>
@@ -1037,7 +1043,7 @@ <h4 data-number="8.3.0.2" class="anchored" data-anchor-id="lmplot-and-jointplot"
 </div>
 </div>
 <p><code>sns.jointplot</code> creates a visualization with three components: a scatter plot, a histogram of the distribution of x values, and a histogram of the distribution of y values.</p>
-<div id="decc744f" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="18">
+<div id="ecd568d5" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="18">
 <div class="sourceCode cell-code" id="cb18"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a>sns.jointplot(data <span class="op">=</span> wb, x <span class="op">=</span> <span class="st">"per capita: </span><span class="sc">% g</span><span class="st">rowth: 2016"</span>, <span class="op">\</span></span>
 <span id="cb18-2"><a href="#cb18-2" aria-hidden="true" tabindex="-1"></a>           y <span class="op">=</span> <span class="st">"Adult literacy rate: Female: % ages 15 and older: 2005-14"</span>)</span>
 <span id="cb18-3"><a href="#cb18-3" aria-hidden="true" tabindex="-1"></a></span>
@@ -1058,7 +1064,7 @@ <h4 data-number="8.3.0.3" class="anchored" data-anchor-id="hex-plots"><span clas
 <p>For datasets with a very large number of datapoints, jittering is unlikely to fully resolve the issue of overplotting. In these cases, we can attempt to visualize our data by its <em>density</em>, rather than displaying each individual datapoint.</p>
 <p><strong>Hex plots</strong> can be thought of as two-dimensional histograms that show the joint distribution between two variables. This is particularly useful when working with very dense data. In a hex plot, the x-y plane is binned into hexagons. Hexagons that are darker in color indicate a greater density of data – that is, there are more data points that lie in the region enclosed by the hexagon.</p>
 <p>We can generate a hex plot using <code>sns.jointplot</code> modified with the <code>kind</code> parameter.</p>
-<div id="531ddb4b" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="19">
+<div id="eea487ab" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="19">
 <div class="sourceCode cell-code" id="cb19"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a>sns.jointplot(data <span class="op">=</span> wb, x <span class="op">=</span> <span class="st">"per capita: </span><span class="sc">% g</span><span class="st">rowth: 2016"</span>, <span class="op">\</span></span>
 <span id="cb19-2"><a href="#cb19-2" aria-hidden="true" tabindex="-1"></a>              y <span class="op">=</span> <span class="st">"Adult literacy rate: Female: % ages 15 and older: 2005-14"</span>, <span class="op">\</span></span>
 <span id="cb19-3"><a href="#cb19-3" aria-hidden="true" tabindex="-1"></a>              kind <span class="op">=</span> <span class="st">"hex"</span>)</span>
@@ -1079,7 +1085,7 @@ <h4 data-number="8.3.0.3" class="anchored" data-anchor-id="hex-plots"><span clas
 <h4 data-number="8.3.0.4" class="anchored" data-anchor-id="contour-plots"><span class="header-section-number">8.3.0.4</span> Contour Plots</h4>
 <p><strong>Contour plots</strong> are an alternative way of plotting the joint distribution of two variables. You can think of them as the 2-dimensional versions of KDE plots. A contour plot can be interpreted in a similar way to a <a href="https://gisgeography.com/contour-lines-topographic-map/">topographic map</a>. Each contour line represents an area that has the same <em>density</em> of datapoints throughout the region. Contours marked with darker colors contain more datapoints (a higher density) in that region.</p>
 <p><code>sns.kdeplot</code> will generate a contour plot if we specify both x and y data.</p>
-<div id="339e5bb5" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="20">
+<div id="803e95db" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="20">
 <div class="sourceCode cell-code" id="cb20"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a>sns.kdeplot(data <span class="op">=</span> wb, x <span class="op">=</span> <span class="st">"per capita: </span><span class="sc">% g</span><span class="st">rowth: 2016"</span>, <span class="op">\</span></span>
 <span id="cb20-2"><a href="#cb20-2" aria-hidden="true" tabindex="-1"></a>            y <span class="op">=</span> <span class="st">"Adult literacy rate: Female: % ages 15 and older: 2005-14"</span>, <span class="op">\</span></span>
 <span id="cb20-3"><a href="#cb20-3" aria-hidden="true" tabindex="-1"></a>            fill <span class="op">=</span> <span class="va">True</span>)</span>
@@ -1101,7 +1107,7 @@ <h2 data-number="8.4" class="anchored" data-anchor-id="transformations"><span cl
 <p>Much of this was done to uncover insights in data, which will prove necessary when we begin building models of data later in the course. A strong graphical correlation between two variables hints at an underlying relationship that we may want to study in greater detail. However, relying on visual relationships alone is limiting - not all plots show association. The presence of outliers and other statistical anomalies makes it hard to interpret data.</p>
 <p><strong>Transformations</strong> are the process of manipulating data to find significant relationships between variables. These are often found by applying mathematical functions to variables that “transform” their range of possible values and highlight some previously hidden associations between data.</p>
 <p>To see why we may want to transform data, consider the following plot of adult literacy rates against gross national income.</p>
-<div id="a7c72e28" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="21">
+<div id="3913c102" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="21">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb21"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Some data cleaning to help with the next example</span></span>
@@ -1145,7 +1151,7 @@ <h3 data-number="8.4.1" class="anchored" data-anchor-id="linearization-and-apply
 </ul>
 <p>One function that produces this result is the <strong>log transformation</strong>. When we take the logarithm of a large number, the original number will decrease in magnitude dramatically. Conversely, when we take the logarithm of a small number, the original number does not change its value by as significant of an amount (to illustrate this, consider the difference between <span class="math inline">\(\log{(100)} = 4.61\)</span> and <span class="math inline">\(\log{(10)} = 2.3\)</span>).</p>
 <p>In Data 100 (and most upper-division STEM classes), <span class="math inline">\(\log\)</span> is used to refer to the natural logarithm with base <span class="math inline">\(e\)</span>.</p>
-<div id="2f165a12" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="22">
+<div id="0be171ee" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="22">
 <div class="sourceCode cell-code" id="cb22"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a><span class="co"># np.log takes the logarithm of an array or Series</span></span>
 <span id="cb22-2"><a href="#cb22-2" aria-hidden="true" tabindex="-1"></a>plt.scatter(np.log(df[<span class="st">"inc"</span>]), df[<span class="st">"lit"</span>])</span>
 <span id="cb22-3"><a href="#cb22-3" aria-hidden="true" tabindex="-1"></a></span>
@@ -1168,7 +1174,7 @@ <h3 data-number="8.4.1" class="anchored" data-anchor-id="linearization-and-apply
 <li>Not substantially alter the scaling of small values of y (we do not want to drastically modify the lower end of the y axis, which is already distributed evenly on the vertical scale).</li>
 </ul>
 <p>In this case, it is helpful to apply a <strong>power transformation</strong> – that is, raise our y values to a power. Let’s try raising our adult literacy rate values to the power of 4. Large values raised to the power of 4 will increase in magnitude proportionally much more than small values raised to the power of 4 (consider the difference between <span class="math inline">\(2^4 = 16\)</span> and <span class="math inline">\(200^4 = 1600000000\)</span>).</p>
-<div id="59ba9e26" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="23">
+<div id="e58854ec" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="23">
 <div class="sourceCode cell-code" id="cb23"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Apply a log transformation to the x values and a power transformation to the y values</span></span>
 <span id="cb23-2"><a href="#cb23-2" aria-hidden="true" tabindex="-1"></a>plt.scatter(np.log(df[<span class="st">"inc"</span>]), df[<span class="st">"lit"</span>]<span class="op">**</span><span class="dv">4</span>)</span>
 <span id="cb23-3"><a href="#cb23-3" aria-hidden="true" tabindex="-1"></a></span>
@@ -1189,7 +1195,7 @@ <h3 data-number="8.4.1" class="anchored" data-anchor-id="linearization-and-apply
 <p><span class="math display">\[y^4 = m(\log{x}) + b\]</span></p>
 <p>Where <span class="math inline">\(m\)</span> represents the slope of the linear fit, while <span class="math inline">\(b\)</span> represents the intercept.</p>
 <p>The cell below computes <span class="math inline">\(m\)</span> and <span class="math inline">\(b\)</span> for our transformed data. We’ll discuss how this code was generated in a future lecture.</p>
-<div id="6dd19bfe" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="24">
+<div id="659558c3" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="24">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb24"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a><span class="co"># The code below fits a linear regression model. We'll discuss it at length in a future lecture</span></span>
@@ -1227,7 +1233,7 @@ <h3 data-number="8.4.1" class="anchored" data-anchor-id="linearization-and-apply
 <p>By rearranging the equation, we find a relationship between the untransformed variables <span class="math inline">\(x\)</span> and <span class="math inline">\(y\)</span>.</p>
 <p><span class="math display">\[y = [m(\log{x}) + b]^{(1/4)}\]</span></p>
 <p>When we plug in the values for <span class="math inline">\(m\)</span> and <span class="math inline">\(b\)</span> computed above, something interesting happens.</p>
-<div id="db537c09" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="25">
+<div id="cf0f6c43" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="25">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb26"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb26-1"><a href="#cb26-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Now, plug the values for m and b into the relationship between the untransformed x and y</span></span>
diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-18-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-18-output-1.png
index 49f378750..636a47636 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-html/cell-18-output-1.png and b/docs/visualization_2/visualization_2_files/figure-html/cell-18-output-1.png differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-10-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-10-output-1.pdf
index e5a9d991e..77b3abbaa 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-10-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-10-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-11-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-11-output-1.pdf
index e8289756b..427df6dc1 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-11-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-11-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-12-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-12-output-1.pdf
index 0dc454a09..642584efd 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-12-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-12-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-13-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-13-output-1.pdf
index 00f5f0f06..672001613 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-13-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-13-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-14-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-14-output-1.pdf
index bbf540c4c..8b5cdac05 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-14-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-14-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-15-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-15-output-1.pdf
index 22114b2a7..fc38614a5 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-15-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-15-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-16-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-16-output-1.pdf
index 709c1bb12..24d242137 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-16-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-16-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-17-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-17-output-1.pdf
index 8f4043d9b..090ab91df 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-17-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-17-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-18-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-18-output-1.pdf
index 863539a08..2029c936a 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-18-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-18-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-19-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-19-output-1.pdf
index c8c164b32..447b13730 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-19-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-19-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-20-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-20-output-1.pdf
index 5295a9ab7..567efe5c5 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-20-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-20-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-21-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-21-output-1.pdf
index e7446a464..25d08fbdb 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-21-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-21-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-22-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-22-output-1.pdf
index cc63f4fca..05eede100 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-22-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-22-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-23-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-23-output-1.pdf
index 83193c018..099754534 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-23-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-23-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-24-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-24-output-1.pdf
index d915bbddf..898a73bc3 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-24-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-24-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-25-output-2.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-25-output-2.pdf
index 651edb7ab..a0efdd4b1 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-25-output-2.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-25-output-2.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-26-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-26-output-1.pdf
index ce92fec37..06ba146e8 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-26-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-26-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-3-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-3-output-1.pdf
index 4e54efb69..14167d045 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-3-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-3-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-4-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-4-output-1.pdf
index 967ea3fc9..def4705b3 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-4-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-4-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-5-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-5-output-1.pdf
index 212333fc8..e0c12390f 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-5-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-5-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-6-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-6-output-1.pdf
index 37406a085..76bb8911b 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-6-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-6-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-7-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-7-output-1.pdf
index 303e4ec70..fa438b251 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-7-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-7-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-8-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-8-output-1.pdf
index 2ccac97dc..7a99f66c6 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-8-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-8-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-9-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-9-output-1.pdf
index 7e637263f..03b218b8b 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-9-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-9-output-1.pdf differ
diff --git a/index.tex b/index.tex
index b6c117280..9c7b00ca8 100644
--- a/index.tex
+++ b/index.tex
@@ -247,7 +247,7 @@ \section*{About the Course Notes}\label{about-the-course-notes}
 
 \chapter{Introduction}\label{introduction}
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, bottomrule=.15mm, leftrule=.75mm, coltitle=black, colback=white, bottomtitle=1mm, colframe=quarto-callout-note-color-frame, toptitle=1mm, breakable, left=2mm, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, arc=.35mm, rightrule=.15mm, opacitybacktitle=0.6, opacityback=0]
+\begin{tcolorbox}[enhanced jigsaw, colbacktitle=quarto-callout-note-color!10!white, bottomtitle=1mm, left=2mm, rightrule=.15mm, titlerule=0mm, breakable, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colframe=quarto-callout-note-color-frame, toprule=.15mm, colback=white, toptitle=1mm, opacitybacktitle=0.6, arc=.35mm, bottomrule=.15mm, opacityback=0, leftrule=.75mm]
 
 \begin{itemize}
 \tightlist
@@ -301,7 +301,7 @@ \chapter{Introduction}\label{introduction}
 allowing you to take data and produce useful insights on the world's
 most challenging and ambiguous problems.
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Course Goals}, bottomrule=.15mm, leftrule=.75mm, coltitle=black, colback=white, bottomtitle=1mm, colframe=quarto-callout-note-color-frame, toptitle=1mm, breakable, left=2mm, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, arc=.35mm, rightrule=.15mm, opacitybacktitle=0.6, opacityback=0]
+\begin{tcolorbox}[enhanced jigsaw, colbacktitle=quarto-callout-note-color!10!white, bottomtitle=1mm, left=2mm, rightrule=.15mm, titlerule=0mm, breakable, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Course Goals}, coltitle=black, colframe=quarto-callout-note-color-frame, toprule=.15mm, colback=white, toptitle=1mm, opacitybacktitle=0.6, arc=.35mm, bottomrule=.15mm, opacityback=0, leftrule=.75mm]
 
 \begin{itemize}
 \tightlist
@@ -319,7 +319,7 @@ \chapter{Introduction}\label{introduction}
 
 \end{tcolorbox}
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Some Topics We'll Cover}, bottomrule=.15mm, leftrule=.75mm, coltitle=black, colback=white, bottomtitle=1mm, colframe=quarto-callout-note-color-frame, toptitle=1mm, breakable, left=2mm, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, arc=.35mm, rightrule=.15mm, opacitybacktitle=0.6, opacityback=0]
+\begin{tcolorbox}[enhanced jigsaw, colbacktitle=quarto-callout-note-color!10!white, bottomtitle=1mm, left=2mm, rightrule=.15mm, titlerule=0mm, breakable, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Some Topics We'll Cover}, coltitle=black, colframe=quarto-callout-note-color-frame, toprule=.15mm, colback=white, toptitle=1mm, opacitybacktitle=0.6, arc=.35mm, bottomrule=.15mm, opacityback=0, leftrule=.75mm]
 
 \begin{itemize}
 \tightlist
@@ -349,7 +349,7 @@ \chapter{Introduction}\label{introduction}
 
 \end{tcolorbox}
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Prerequisites}, bottomrule=.15mm, leftrule=.75mm, coltitle=black, colback=white, bottomtitle=1mm, colframe=quarto-callout-note-color-frame, toptitle=1mm, breakable, left=2mm, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, arc=.35mm, rightrule=.15mm, opacitybacktitle=0.6, opacityback=0]
+\begin{tcolorbox}[enhanced jigsaw, colbacktitle=quarto-callout-note-color!10!white, bottomtitle=1mm, left=2mm, rightrule=.15mm, titlerule=0mm, breakable, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Prerequisites}, coltitle=black, colframe=quarto-callout-note-color-frame, toprule=.15mm, colback=white, toptitle=1mm, opacitybacktitle=0.6, arc=.35mm, bottomrule=.15mm, opacityback=0, leftrule=.75mm]
 
 To ensure that you can get the most out of the course content, please
 make sure that you are familiar with:
@@ -580,7 +580,7 @@ \section{Conclusion}\label{conclusion}
 
 \chapter{Pandas I}\label{pandas-i}
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, bottomrule=.15mm, leftrule=.75mm, coltitle=black, colback=white, bottomtitle=1mm, colframe=quarto-callout-note-color-frame, toptitle=1mm, breakable, left=2mm, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, arc=.35mm, rightrule=.15mm, opacitybacktitle=0.6, opacityback=0]
+\begin{tcolorbox}[enhanced jigsaw, colbacktitle=quarto-callout-note-color!10!white, bottomtitle=1mm, left=2mm, rightrule=.15mm, titlerule=0mm, breakable, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colframe=quarto-callout-note-color-frame, toprule=.15mm, colback=white, toptitle=1mm, opacitybacktitle=0.6, arc=.35mm, bottomrule=.15mm, opacityback=0, leftrule=.75mm]
 
 \begin{itemize}
 \tightlist
@@ -1920,7 +1920,7 @@ \section{Parting Note}\label{parting-note}
 
 \chapter{Pandas II}\label{pandas-ii}
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, bottomrule=.15mm, leftrule=.75mm, coltitle=black, colback=white, bottomtitle=1mm, colframe=quarto-callout-note-color-frame, toptitle=1mm, breakable, left=2mm, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, arc=.35mm, rightrule=.15mm, opacitybacktitle=0.6, opacityback=0]
+\begin{tcolorbox}[enhanced jigsaw, colbacktitle=quarto-callout-note-color!10!white, bottomtitle=1mm, left=2mm, rightrule=.15mm, titlerule=0mm, breakable, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colframe=quarto-callout-note-color-frame, toprule=.15mm, colback=white, toptitle=1mm, opacitybacktitle=0.6, arc=.35mm, bottomrule=.15mm, opacityback=0, leftrule=.75mm]
 
 \begin{itemize}
 \tightlist
@@ -2691,7 +2691,7 @@ \subsection{\texorpdfstring{\texttt{.sample()}}{.sample()}}\label{sample}
 \endhead
 \bottomrule\noalign{}
 \endlastfoot
-287628 & CA & M & 1973 & Agustin & 43 \\
+301530 & CA & M & 1982 & Heath & 35 \\
 \end{longtable}
 
 Naturally, this can be chained with other methods and operators
@@ -2711,11 +2711,11 @@ \subsection{\texorpdfstring{\texttt{.sample()}}{.sample()}}\label{sample}
 \endhead
 \bottomrule\noalign{}
 \endlastfoot
-309621 & 1986 & Gus & 11 \\
-107109 & 1988 & Jasmyn & 16 \\
-267080 & 1956 & Matthew & 490 \\
-243416 & 1921 & Leon & 44 \\
-326987 & 1993 & Dusten & 6 \\
+339072 & 1998 & Conrado & 8 \\
+6462 & 1924 & Dora & 92 \\
+282590 & 1969 & Rojelio & 13 \\
+259618 & 1947 & Christophe & 6 \\
+394708 & 2018 & Cormac & 11 \\
 \end{longtable}
 
 \begin{Shaded}
@@ -2732,10 +2732,10 @@ \subsection{\texorpdfstring{\texttt{.sample()}}{.sample()}}\label{sample}
 \endhead
 \bottomrule\noalign{}
 \endlastfoot
-342970 & 2000 & Eliseo & 46 \\
-150540 & 2000 & Abrianna & 16 \\
-150911 & 2000 & Ryley & 12 \\
-150195 & 2000 & Ada & 24 \\
+344913 & 2000 & Severin & 5 \\
+149805 & 2000 & Sayra & 45 \\
+343708 & 2000 & Akash & 11 \\
+151952 & 2000 & Annalicia & 6 \\
 \end{longtable}
 
 \subsection{\texorpdfstring{\texttt{.value\_counts()}}{.value\_counts()}}\label{value_counts}
@@ -2857,7 +2857,7 @@ \section{Parting Note}\label{parting-note-1}
 
 \chapter{Pandas III}\label{pandas-iii}
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, bottomrule=.15mm, leftrule=.75mm, coltitle=black, colback=white, bottomtitle=1mm, colframe=quarto-callout-note-color-frame, toptitle=1mm, breakable, left=2mm, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, arc=.35mm, rightrule=.15mm, opacitybacktitle=0.6, opacityback=0]
+\begin{tcolorbox}[enhanced jigsaw, colbacktitle=quarto-callout-note-color!10!white, bottomtitle=1mm, left=2mm, rightrule=.15mm, titlerule=0mm, breakable, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colframe=quarto-callout-note-color-frame, toprule=.15mm, colback=white, toptitle=1mm, opacitybacktitle=0.6, arc=.35mm, bottomrule=.15mm, opacityback=0, leftrule=.75mm]
 
 \begin{itemize}
 \tightlist
@@ -3127,7 +3127,7 @@ \section{\texorpdfstring{Aggregating Data with
 \end{Shaded}
 
 \begin{verbatim}
-<pandas.core.groupby.generic.DataFrameGroupBy object at 0x11c4bd8b0>
+<pandas.core.groupby.generic.DataFrameGroupBy object at 0x110615160>
 \end{verbatim}
 
 What does this strange output mean? Calling \texttt{.groupby}
@@ -3467,7 +3467,7 @@ \subsection{Plotting Birth Counts}\label{plotting-birth-counts}
 \end{Shaded}
 
 \begin{verbatim}
-/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_60482/390646742.py:1: FutureWarning:
+/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_99036/390646742.py:1: FutureWarning:
 
 The provided callable <built-in function sum> is currently using DataFrameGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "sum" instead.
 \end{verbatim}
@@ -4118,7 +4118,7 @@ \subsection{\texorpdfstring{Aggregation with \texttt{lambda}
 \end{Shaded}
 
 \begin{verbatim}
-/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_60482/4278286395.py:1: FutureWarning:
+/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_99036/4278286395.py:1: FutureWarning:
 
 The provided callable <built-in function max> is currently using DataFrameGroupBy.max. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "max" instead.
 \end{verbatim}
@@ -4347,7 +4347,7 @@ \section{Aggregating Data with Pivot
 \end{Shaded}
 
 \begin{verbatim}
-/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_60482/3186035650.py:3: FutureWarning:
+/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_99036/3186035650.py:3: FutureWarning:
 
 The provided callable <built-in function sum> is currently using DataFrameGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "sum" instead.
 \end{verbatim}
@@ -4674,7 +4674,7 @@ \chapter{Data Cleaning and EDA}\label{data-cleaning-and-eda}
 \end{Highlighting}
 \end{Shaded}
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, bottomrule=.15mm, leftrule=.75mm, coltitle=black, colback=white, bottomtitle=1mm, colframe=quarto-callout-note-color-frame, toptitle=1mm, breakable, left=2mm, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, arc=.35mm, rightrule=.15mm, opacitybacktitle=0.6, opacityback=0]
+\begin{tcolorbox}[enhanced jigsaw, colbacktitle=quarto-callout-note-color!10!white, bottomtitle=1mm, left=2mm, rightrule=.15mm, titlerule=0mm, breakable, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colframe=quarto-callout-note-color-frame, toprule=.15mm, colback=white, toptitle=1mm, opacitybacktitle=0.6, arc=.35mm, bottomrule=.15mm, opacityback=0, leftrule=.75mm]
 
 \begin{itemize}
 \tightlist
@@ -5518,7 +5518,7 @@ \subsubsection{\texorpdfstring{Temporality with \texttt{pandas}'
 \end{Shaded}
 
 \begin{verbatim}
-/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_60509/874729699.py:1: UserWarning:
+/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_99074/874729699.py:1: UserWarning:
 
 Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.
 \end{verbatim}
@@ -6801,7 +6801,7 @@ \subsection{Exploring Variable Feature
 
 invalid escape sequence '\s'
 
-/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_60509/150137587.py:3: SyntaxWarning:
+/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_99074/150137587.py:3: SyntaxWarning:
 
 invalid escape sequence '\s'
 \end{verbatim}
@@ -7359,7 +7359,7 @@ \subsection{EDA and Data Wrangling}\label{eda-and-data-wrangling}
 
 \chapter{Regular Expressions}\label{regular-expressions}
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, bottomrule=.15mm, leftrule=.75mm, coltitle=black, colback=white, bottomtitle=1mm, colframe=quarto-callout-note-color-frame, toptitle=1mm, breakable, left=2mm, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, arc=.35mm, rightrule=.15mm, opacitybacktitle=0.6, opacityback=0]
+\begin{tcolorbox}[enhanced jigsaw, colbacktitle=quarto-callout-note-color!10!white, bottomtitle=1mm, left=2mm, rightrule=.15mm, titlerule=0mm, breakable, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colframe=quarto-callout-note-color-frame, toprule=.15mm, colback=white, toptitle=1mm, opacitybacktitle=0.6, arc=.35mm, bottomrule=.15mm, opacityback=0, leftrule=.75mm]
 
 \begin{itemize}
 \tightlist
@@ -8578,7 +8578,7 @@ \section{Limitations of Regular
 
 \chapter{Visualization I}\label{visualization-i}
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, bottomrule=.15mm, leftrule=.75mm, coltitle=black, colback=white, bottomtitle=1mm, colframe=quarto-callout-note-color-frame, toptitle=1mm, breakable, left=2mm, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, arc=.35mm, rightrule=.15mm, opacitybacktitle=0.6, opacityback=0]
+\begin{tcolorbox}[enhanced jigsaw, colbacktitle=quarto-callout-note-color!10!white, bottomtitle=1mm, left=2mm, rightrule=.15mm, titlerule=0mm, breakable, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colframe=quarto-callout-note-color-frame, toprule=.15mm, colback=white, toptitle=1mm, opacitybacktitle=0.6, arc=.35mm, bottomrule=.15mm, opacityback=0, leftrule=.75mm]
 
 \begin{itemize}
 \tightlist
@@ -9368,7 +9368,7 @@ \subsubsection{Evaluating Histograms}\label{evaluating-histograms}
 
 \chapter{Visualization II}\label{visualization-ii}
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, bottomrule=.15mm, leftrule=.75mm, coltitle=black, colback=white, bottomtitle=1mm, colframe=quarto-callout-note-color-frame, toptitle=1mm, breakable, left=2mm, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, arc=.35mm, rightrule=.15mm, opacitybacktitle=0.6, opacityback=0]
+\begin{tcolorbox}[enhanced jigsaw, colbacktitle=quarto-callout-note-color!10!white, bottomtitle=1mm, left=2mm, rightrule=.15mm, titlerule=0mm, breakable, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colframe=quarto-callout-note-color-frame, toprule=.15mm, colback=white, toptitle=1mm, opacitybacktitle=0.6, arc=.35mm, bottomrule=.15mm, opacityback=0, leftrule=.75mm]
 
 \begin{itemize}
 \tightlist
@@ -10764,7 +10764,7 @@ \subsection{Harnessing Context}\label{harnessing-context}
 
 \chapter{Sampling}\label{sampling}
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, bottomrule=.15mm, leftrule=.75mm, coltitle=black, colback=white, bottomtitle=1mm, colframe=quarto-callout-note-color-frame, toptitle=1mm, breakable, left=2mm, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, arc=.35mm, rightrule=.15mm, opacitybacktitle=0.6, opacityback=0]
+\begin{tcolorbox}[enhanced jigsaw, colbacktitle=quarto-callout-note-color!10!white, bottomtitle=1mm, left=2mm, rightrule=.15mm, titlerule=0mm, breakable, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colframe=quarto-callout-note-color-frame, toprule=.15mm, colback=white, toptitle=1mm, opacitybacktitle=0.6, arc=.35mm, bottomrule=.15mm, opacityback=0, leftrule=.75mm]
 
 \begin{itemize}
 \tightlist
@@ -11350,7 +11350,7 @@ \subsubsection{Simple Random Sample}\label{simple-random-sample}
 \end{Shaded}
 
 \begin{verbatim}
-np.float64(0.5305067390844639)
+np.float64(0.5302674487195183)
 \end{verbatim}
 
 This is very close to the actual vote of 0.5302792307692308!
@@ -11374,8 +11374,8 @@ \subsubsection{Simple Random Sample}\label{simple-random-sample}
 \end{Highlighting}
 \end{Shaded}
 
-\textbf{Actual} = 0.5303, \textbf{Sample} = 0.5138, \textbf{Err} =
-3.12\%.
+\textbf{Actual} = 0.5303, \textbf{Sample} = 0.5188, \textbf{Err} =
+2.17\%.
 
 We'll learn how to choose this number when we (re)learn the Central
 Limit Theorem later in the semester.
@@ -11423,7 +11423,7 @@ \subsubsection{Quantifying Chance Error}\label{quantifying-chance-error}
 \end{Shaded}
 
 \begin{verbatim}
-np.float64(0.961)
+np.float64(0.949)
 \end{verbatim}
 
 You can see the curve looks roughly Gaussian/normal. Using KDE:
@@ -11454,7 +11454,7 @@ \section{Summary}\label{summary-1}
 
 \chapter{Introduction to Modeling}\label{introduction-to-modeling}
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, bottomrule=.15mm, leftrule=.75mm, coltitle=black, colback=white, bottomtitle=1mm, colframe=quarto-callout-note-color-frame, toptitle=1mm, breakable, left=2mm, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, arc=.35mm, rightrule=.15mm, opacitybacktitle=0.6, opacityback=0]
+\begin{tcolorbox}[enhanced jigsaw, colbacktitle=quarto-callout-note-color!10!white, bottomtitle=1mm, left=2mm, rightrule=.15mm, titlerule=0mm, breakable, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colframe=quarto-callout-note-color-frame, toprule=.15mm, colback=white, toptitle=1mm, opacitybacktitle=0.6, arc=.35mm, bottomrule=.15mm, opacityback=0, leftrule=.75mm]
 
 \begin{itemize}
 \tightlist
@@ -11796,7 +11796,7 @@ \subsection{Derivation}\label{derivation}
   \(\hat{a} = \text{average of }y - \text{slope}\cdot\text{average of }x\)
 \end{itemize}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-color-frame, arc=.35mm, breakable, left=2mm, rightrule=.15mm, bottomrule=.15mm, opacityback=0, toprule=.15mm, colback=white, leftrule=.75mm]
+\begin{tcolorbox}[enhanced jigsaw, leftrule=.75mm, colframe=quarto-callout-color-frame, left=2mm, rightrule=.15mm, colback=white, arc=.35mm, bottomrule=.15mm, breakable, opacityback=0, toprule=.15mm]
 
 Proof:
 
@@ -12445,7 +12445,7 @@ \subsection{Four Mysterious Datasets (Anscombe's
 \chapter{Constant Model, Loss, and
 Transformations}\label{constant-model-loss-and-transformations}
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, bottomrule=.15mm, leftrule=.75mm, coltitle=black, colback=white, bottomtitle=1mm, colframe=quarto-callout-note-color-frame, toptitle=1mm, breakable, left=2mm, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, arc=.35mm, rightrule=.15mm, opacitybacktitle=0.6, opacityback=0]
+\begin{tcolorbox}[enhanced jigsaw, colbacktitle=quarto-callout-note-color!10!white, bottomtitle=1mm, left=2mm, rightrule=.15mm, titlerule=0mm, breakable, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colframe=quarto-callout-note-color-frame, toprule=.15mm, colback=white, toptitle=1mm, opacitybacktitle=0.6, arc=.35mm, bottomrule=.15mm, opacityback=0, leftrule=.75mm]
 
 \begin{itemize}
 \tightlist
@@ -13430,7 +13430,7 @@ \section{Bonus: Calculating Constant Model MSE Using an Algebraic
 
 \chapter{Ordinary Least Squares}\label{ordinary-least-squares}
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, bottomrule=.15mm, leftrule=.75mm, coltitle=black, colback=white, bottomtitle=1mm, colframe=quarto-callout-note-color-frame, toptitle=1mm, breakable, left=2mm, colbacktitle=quarto-callout-note-color!10!white, titlerule=0mm, arc=.35mm, rightrule=.15mm, opacitybacktitle=0.6, opacityback=0]
+\begin{tcolorbox}[enhanced jigsaw, colbacktitle=quarto-callout-note-color!10!white, bottomtitle=1mm, left=2mm, rightrule=.15mm, titlerule=0mm, breakable, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colframe=quarto-callout-note-color-frame, toprule=.15mm, colback=white, toptitle=1mm, opacitybacktitle=0.6, arc=.35mm, bottomrule=.15mm, opacityback=0, leftrule=.75mm]
 
 \begin{itemize}
 \tightlist
@@ -13604,7 +13604,7 @@ \subsection{Multiple Linear
 
 \subsection{Linear Algebra Approach}\label{linear-algebra-approach}
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra Review: Vector Dot Product}, bottomrule=.15mm, leftrule=.75mm, coltitle=black, colback=white, bottomtitle=1mm, colframe=quarto-callout-tip-color-frame, toptitle=1mm, breakable, left=2mm, colbacktitle=quarto-callout-tip-color!10!white, titlerule=0mm, arc=.35mm, rightrule=.15mm, opacitybacktitle=0.6, opacityback=0]
+\begin{tcolorbox}[enhanced jigsaw, colbacktitle=quarto-callout-tip-color!10!white, bottomtitle=1mm, left=2mm, rightrule=.15mm, titlerule=0mm, breakable, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra Review: Vector Dot Product}, coltitle=black, colframe=quarto-callout-tip-color-frame, toprule=.15mm, colback=white, toptitle=1mm, opacitybacktitle=0.6, arc=.35mm, bottomrule=.15mm, opacityback=0, leftrule=.75mm]
 
 The \textbf{dot product (or inner product)} is a vector operation that:
 
@@ -13702,7 +13702,7 @@ \subsection{Linear Algebra Approach}\label{linear-algebra-approach}
 \(\mathbb{Y}\) is also a vector with \(n\) elements
 (\(\mathbb{Y} \in \mathbb{R}^{n}\)).
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra Review: Linearity}, bottomrule=.15mm, leftrule=.75mm, coltitle=black, colback=white, bottomtitle=1mm, colframe=quarto-callout-tip-color-frame, toptitle=1mm, breakable, left=2mm, colbacktitle=quarto-callout-tip-color!10!white, titlerule=0mm, arc=.35mm, rightrule=.15mm, opacitybacktitle=0.6, opacityback=0]
+\begin{tcolorbox}[enhanced jigsaw, colbacktitle=quarto-callout-tip-color!10!white, bottomtitle=1mm, left=2mm, rightrule=.15mm, titlerule=0mm, breakable, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra Review: Linearity}, coltitle=black, colframe=quarto-callout-tip-color-frame, toprule=.15mm, colback=white, toptitle=1mm, opacitybacktitle=0.6, arc=.35mm, bottomrule=.15mm, opacityback=0, leftrule=.75mm]
 
 An expression is \textbf{linear in \(\theta\)} (a set of parameters) if
 it is a linear combination of the elements of the set. Checking if an
@@ -13746,7 +13746,7 @@ \subsection{Mean Squared Error}\label{mean-squared-error}
 indication of how ``far away'' the predictions are from the true values,
 on average.
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra: L2 Norm}, bottomrule=.15mm, leftrule=.75mm, coltitle=black, colback=white, bottomtitle=1mm, colframe=quarto-callout-tip-color-frame, toptitle=1mm, breakable, left=2mm, colbacktitle=quarto-callout-tip-color!10!white, titlerule=0mm, arc=.35mm, rightrule=.15mm, opacitybacktitle=0.6, opacityback=0]
+\begin{tcolorbox}[enhanced jigsaw, colbacktitle=quarto-callout-tip-color!10!white, bottomtitle=1mm, left=2mm, rightrule=.15mm, titlerule=0mm, breakable, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra: L2 Norm}, coltitle=black, colframe=quarto-callout-tip-color-frame, toprule=.15mm, colback=white, toptitle=1mm, opacitybacktitle=0.6, arc=.35mm, bottomrule=.15mm, opacityback=0, leftrule=.75mm]
 
 When working with vectors, this idea of ``distance'' or the vector's
 \textbf{size/length} is represented by the \textbf{norm}. More
@@ -13892,7 +13892,7 @@ \subsection{A Note on Terminology for Multiple Linear
 
 \section{Geometric Derivation}\label{geometric-derivation}
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra: Span}, bottomrule=.15mm, leftrule=.75mm, coltitle=black, colback=white, bottomtitle=1mm, colframe=quarto-callout-tip-color-frame, toptitle=1mm, breakable, left=2mm, colbacktitle=quarto-callout-tip-color!10!white, titlerule=0mm, arc=.35mm, rightrule=.15mm, opacitybacktitle=0.6, opacityback=0]
+\begin{tcolorbox}[enhanced jigsaw, colbacktitle=quarto-callout-tip-color!10!white, bottomtitle=1mm, left=2mm, rightrule=.15mm, titlerule=0mm, breakable, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra: Span}, coltitle=black, colframe=quarto-callout-tip-color-frame, toprule=.15mm, colback=white, toptitle=1mm, opacitybacktitle=0.6, arc=.35mm, bottomrule=.15mm, opacityback=0, leftrule=.75mm]
 
 Recall that the \textbf{span} or \textbf{column space} of a matrix
 \(\mathbb{X}\) (denoted \(span(\mathbb{X})\)) is the set of all possible
@@ -13904,7 +13904,7 @@ \section{Geometric Derivation}\label{geometric-derivation}
 
 \end{tcolorbox}
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra: Matrix-Vector Multiplication}, bottomrule=.15mm, leftrule=.75mm, coltitle=black, colback=white, bottomtitle=1mm, colframe=quarto-callout-tip-color-frame, toptitle=1mm, breakable, left=2mm, colbacktitle=quarto-callout-tip-color!10!white, titlerule=0mm, arc=.35mm, rightrule=.15mm, opacitybacktitle=0.6, opacityback=0]
+\begin{tcolorbox}[enhanced jigsaw, colbacktitle=quarto-callout-tip-color!10!white, bottomtitle=1mm, left=2mm, rightrule=.15mm, titlerule=0mm, breakable, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra: Matrix-Vector Multiplication}, coltitle=black, colframe=quarto-callout-tip-color-frame, toprule=.15mm, colback=white, toptitle=1mm, opacitybacktitle=0.6, arc=.35mm, bottomrule=.15mm, opacityback=0, leftrule=.75mm]
 
 There are 2 ways we can think about matrix-vector multiplication
 
@@ -13993,7 +13993,7 @@ \section{Geometric Derivation}\label{geometric-derivation}
 visualize this as the vector created by dropping a perpendicular line
 from \(\mathbb{Y}\) onto the span of \(\mathbb{X}\).
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra: Orthogonality}, bottomrule=.15mm, leftrule=.75mm, coltitle=black, colback=white, bottomtitle=1mm, colframe=quarto-callout-tip-color-frame, toptitle=1mm, breakable, left=2mm, colbacktitle=quarto-callout-tip-color!10!white, titlerule=0mm, arc=.35mm, rightrule=.15mm, opacitybacktitle=0.6, opacityback=0]
+\begin{tcolorbox}[enhanced jigsaw, colbacktitle=quarto-callout-tip-color!10!white, bottomtitle=1mm, left=2mm, rightrule=.15mm, titlerule=0mm, breakable, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra: Orthogonality}, coltitle=black, colframe=quarto-callout-tip-color-frame, toprule=.15mm, colback=white, toptitle=1mm, opacitybacktitle=0.6, arc=.35mm, bottomrule=.15mm, opacityback=0, leftrule=.75mm]
 
 Recall that two vectors \(\vec{a}\) and \(\vec{b}\) are orthogonal if
 their dot product is zero: \(\vec{a}^{T}\vec{b} = 0\).
@@ -14111,7 +14111,7 @@ \section{OLS Properties}\label{ols-properties}
 
 \[\mathbb{X}^Te = 0 \]
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-color-frame, arc=.35mm, breakable, left=2mm, rightrule=.15mm, bottomrule=.15mm, opacityback=0, toprule=.15mm, colback=white, leftrule=.75mm]
+\begin{tcolorbox}[enhanced jigsaw, leftrule=.75mm, colframe=quarto-callout-color-frame, left=2mm, rightrule=.15mm, colback=white, arc=.35mm, bottomrule=.15mm, breakable, opacityback=0, toprule=.15mm]
 
 Proof:
 
@@ -14149,7 +14149,7 @@ \section{OLS Properties}\label{ols-properties}
 
 \[\sum_i^n e_i = 0\]
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-color-frame, arc=.35mm, breakable, left=2mm, rightrule=.15mm, bottomrule=.15mm, opacityback=0, toprule=.15mm, colback=white, leftrule=.75mm]
+\begin{tcolorbox}[enhanced jigsaw, leftrule=.75mm, colframe=quarto-callout-color-frame, left=2mm, rightrule=.15mm, colback=white, arc=.35mm, bottomrule=.15mm, breakable, opacityback=0, toprule=.15mm]
 
 Proof:
 
@@ -14214,7 +14214,7 @@ \section{Bonus: Uniqueness of the
 The Least Squares estimate \(\hat{\theta}\) is \textbf{unique} if and
 only if \(\mathbb{X}\) is \textbf{full column rank}.
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-color-frame, arc=.35mm, breakable, left=2mm, rightrule=.15mm, bottomrule=.15mm, opacityback=0, toprule=.15mm, colback=white, leftrule=.75mm]
+\begin{tcolorbox}[enhanced jigsaw, leftrule=.75mm, colframe=quarto-callout-color-frame, left=2mm, rightrule=.15mm, colback=white, arc=.35mm, bottomrule=.15mm, breakable, opacityback=0, toprule=.15mm]
 
 Proof:
 
@@ -14298,6 +14298,907 @@ \section{Bonus: Uniqueness of the
   \end{itemize}
 \end{enumerate}
 
+\bookmarksetup{startatroot}
+
+\chapter{sklearn and Gradient
+Descent}\label{sklearn-and-gradient-descent}
+
+\begin{tcolorbox}[enhanced jigsaw, colbacktitle=quarto-callout-note-color!10!white, bottomtitle=1mm, left=2mm, rightrule=.15mm, titlerule=0mm, breakable, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colframe=quarto-callout-note-color-frame, toprule=.15mm, colback=white, toptitle=1mm, opacitybacktitle=0.6, arc=.35mm, bottomrule=.15mm, opacityback=0, leftrule=.75mm]
+
+\begin{itemize}
+\tightlist
+\item
+  Apply the \texttt{sklearn} library for model creation and training
+\item
+  Optimizing complex models
+\item
+  Identifying cases where straight calculus or geometric arguments won't
+  help solve the loss function
+\item
+  Applying gradient descent for numerical optimization
+\end{itemize}
+
+\end{tcolorbox}
+
+\begin{Shaded}
+\begin{Highlighting}[]
+\ImportTok{import}\NormalTok{ pandas }\ImportTok{as}\NormalTok{ pd}
+\ImportTok{import}\NormalTok{ seaborn }\ImportTok{as}\NormalTok{ sns}
+\ImportTok{import}\NormalTok{ plotly.express }\ImportTok{as}\NormalTok{ px}
+\ImportTok{import}\NormalTok{ matplotlib.pyplot }\ImportTok{as}\NormalTok{ plt}
+\ImportTok{import}\NormalTok{ numpy }\ImportTok{as}\NormalTok{ np}
+\ImportTok{from}\NormalTok{ sklearn.linear\_model }\ImportTok{import}\NormalTok{ LinearRegression}
+\NormalTok{pd.options.mode.chained\_assignment }\OperatorTok{=} \VariableTok{None}  \CommentTok{\# default=\textquotesingle{}warn\textquotesingle{}}
+\end{Highlighting}
+\end{Shaded}
+
+\section{\texorpdfstring{\texttt{sklearn}}{sklearn}}\label{sklearn}
+
+\subsection{Implementing Derived Formulas in
+Code}\label{implementing-derived-formulas-in-code}
+
+Throughout this lecture, we'll refer to the \texttt{penguins} dataset.
+
+\begin{Shaded}
+\begin{Highlighting}[]
+\ImportTok{import}\NormalTok{ pandas }\ImportTok{as}\NormalTok{ pd}
+\ImportTok{import}\NormalTok{ seaborn }\ImportTok{as}\NormalTok{ sns}
+\ImportTok{import}\NormalTok{ numpy }\ImportTok{as}\NormalTok{ np}
+
+\NormalTok{penguins }\OperatorTok{=}\NormalTok{ sns.load\_dataset(}\StringTok{"penguins"}\NormalTok{)}
+\NormalTok{penguins }\OperatorTok{=}\NormalTok{ penguins[penguins[}\StringTok{"species"}\NormalTok{] }\OperatorTok{==} \StringTok{"Adelie"}\NormalTok{].dropna()}
+\NormalTok{penguins.head()}
+\end{Highlighting}
+\end{Shaded}
+
+\begin{longtable}[]{@{}llllllll@{}}
+\toprule\noalign{}
+& species & island & bill\_length\_mm & bill\_depth\_mm &
+flipper\_length\_mm & body\_mass\_g & sex \\
+\midrule\noalign{}
+\endhead
+\bottomrule\noalign{}
+\endlastfoot
+0 & Adelie & Torgersen & 39.1 & 18.7 & 181.0 & 3750.0 & Male \\
+1 & Adelie & Torgersen & 39.5 & 17.4 & 186.0 & 3800.0 & Female \\
+2 & Adelie & Torgersen & 40.3 & 18.0 & 195.0 & 3250.0 & Female \\
+4 & Adelie & Torgersen & 36.7 & 19.3 & 193.0 & 3450.0 & Female \\
+5 & Adelie & Torgersen & 39.3 & 20.6 & 190.0 & 3650.0 & Male \\
+\end{longtable}
+
+Our goal will be to predict the value of the \texttt{"bill\_depth\_mm"}
+for a particular penguin given its \texttt{"flipper\_length\_mm"} and
+\texttt{"body\_mass\_g"}. We'll also add a bias column of all ones to
+represent the intercept term of our models.
+
+\begin{Shaded}
+\begin{Highlighting}[]
+\CommentTok{\# Add a bias column of all ones to \textasciigrave{}penguins\textasciigrave{}}
+\NormalTok{penguins[}\StringTok{"bias"}\NormalTok{] }\OperatorTok{=}\NormalTok{ np.ones(}\BuiltInTok{len}\NormalTok{(penguins), dtype}\OperatorTok{=}\BuiltInTok{int}\NormalTok{) }
+
+\CommentTok{\# Define the design matrix, X...}
+\CommentTok{\# Note that we use .to\_numpy() to convert our DataFrame into a NumPy array so it is in Matrix form}
+\NormalTok{X }\OperatorTok{=}\NormalTok{ penguins[[}\StringTok{"bias"}\NormalTok{, }\StringTok{"flipper\_length\_mm"}\NormalTok{, }\StringTok{"body\_mass\_g"}\NormalTok{]].to\_numpy()}
+
+\CommentTok{\# ...as well as the target variable, Y}
+\CommentTok{\# Again, we use .to\_numpy() to convert our DataFrame into a NumPy array so it is in Matrix form}
+\NormalTok{Y }\OperatorTok{=}\NormalTok{ penguins[[}\StringTok{"bill\_depth\_mm"}\NormalTok{]].to\_numpy()}
+\end{Highlighting}
+\end{Shaded}
+
+In the lecture on ordinary least squares, we expressed multiple linear
+regression using matrix notation.
+
+\[\hat{\mathbb{Y}} = \mathbb{X}\theta\]
+
+We used a geometric approach to derive the following expression for the
+optimal model parameters:
+
+\[\hat{\theta} = (\mathbb{X}^T \mathbb{X})^{-1}\mathbb{X}^T \mathbb{Y}\]
+
+That's a whole lot of matrix manipulation. How do we implement it in
+\texttt{python}?
+
+There are three operations we need to perform here: multiplying
+matrices, taking transposes, and finding inverses.
+
+\begin{itemize}
+\tightlist
+\item
+  To perform matrix multiplication, use the \texttt{@} operator
+\item
+  To take a transpose, call the \texttt{.T} attribute of an
+  \texttt{NumPy} array or \texttt{DataFrame}
+\item
+  To compute an inverse, use \texttt{NumPy}'s in-built method
+  \texttt{np.linalg.inv}
+\end{itemize}
+
+Putting this all together, we can compute the OLS estimate for the
+optimal model parameters, stored in the array \texttt{theta\_hat}.
+
+\begin{Shaded}
+\begin{Highlighting}[]
+\NormalTok{theta\_hat }\OperatorTok{=}\NormalTok{ np.linalg.inv(X.T }\OperatorTok{@}\NormalTok{ X) }\OperatorTok{@}\NormalTok{ X.T }\OperatorTok{@}\NormalTok{ Y}
+\NormalTok{theta\_hat}
+\end{Highlighting}
+\end{Shaded}
+
+\begin{verbatim}
+array([[1.10029953e+01],
+       [9.82848689e-03],
+       [1.47749591e-03]])
+\end{verbatim}
+
+To make predictions using our optimized parameter values, we
+matrix-multiply the design matrix with the parameter vector:
+
+\[\hat{\mathbb{Y}} = \mathbb{X}\theta\]
+
+\begin{Shaded}
+\begin{Highlighting}[]
+\NormalTok{Y\_hat }\OperatorTok{=}\NormalTok{ X }\OperatorTok{@}\NormalTok{ theta\_hat}
+\NormalTok{pd.DataFrame(Y\_hat).head()}
+\end{Highlighting}
+\end{Shaded}
+
+\begin{longtable}[]{@{}ll@{}}
+\toprule\noalign{}
+& 0 \\
+\midrule\noalign{}
+\endhead
+\bottomrule\noalign{}
+\endlastfoot
+0 & 18.322561 \\
+1 & 18.445578 \\
+2 & 17.721412 \\
+3 & 17.997254 \\
+4 & 18.263268 \\
+\end{longtable}
+
+\subsection{\texorpdfstring{The \texttt{sklearn}
+Workflow}{The sklearn Workflow}}\label{the-sklearn-workflow}
+
+We've already saved a lot of time (and avoided tedious calculations) by
+translating our derived formulas into code. However, we still had to go
+through the process of writing out the linear algebra ourselves.
+
+To make life \emph{even easier}, we can turn to the \texttt{sklearn}
+\href{https://scikit-learn.org/stable/}{\texttt{python} library}.
+\texttt{sklearn} is a robust library of machine learning tools used
+extensively in research and industry. It is the standard for simple
+machine learning tasks and gives us a wide variety of in-built modeling
+frameworks and methods, so we'll keep returning to \texttt{sklearn}
+techniques as we progress through Data 100.
+
+Regardless of the specific type of model being implemented,
+\texttt{sklearn} follows a standard set of steps for creating a model:
+
+\begin{enumerate}
+\def\labelenumi{\arabic{enumi}.}
+\item
+  Import the \texttt{LinearRegression} model from \texttt{sklearn}
+
+\begin{verbatim}
+from sklearn.linear_model import LinearRegression
+\end{verbatim}
+\item
+  Create a model object. This generates a new instance of the model
+  class. You can think of it as making a new ``copy'' of a standard
+  ``template'' for a model. In code, this looks like:
+
+\begin{verbatim}
+my_model = LinearRegression()
+\end{verbatim}
+\item
+  Fit the model to the \texttt{X} design matrix and \texttt{Y} target
+  vector. This calculates the optimal model parameters ``behind the
+  scenes'' without us explicitly working through the calculations
+  ourselves. The fitted parameters are then stored within the model for
+  use in future predictions:
+
+\begin{verbatim}
+my_model.fit(X, Y)
+\end{verbatim}
+\item
+  Use the fitted model to make predictions on the \texttt{X} input data
+  using \texttt{.predict}.
+
+\begin{verbatim}
+my_model.predict(X)
+\end{verbatim}
+\end{enumerate}
+
+To extract the fitted parameters, we can use:
+
+\begin{verbatim}
+my_model.coef_
+
+my_model.intercept_
+\end{verbatim}
+
+Let's put this into action with our multiple regression task!
+
+\textbf{1. Initialize an instance of the model class}
+
+\texttt{sklearn} stores ``templates'' of useful models for machine
+learning. We begin the modeling process by making a ``copy'' of one of
+these templates for our own use. Model initialization looks like
+\texttt{ModelClass()}, where \texttt{ModelClass} is the type of model we
+wish to create.
+
+For now, let's create a linear regression model using
+\texttt{LinearRegression}.
+
+\texttt{my\_model} is now an instance of the \texttt{LinearRegression}
+class. You can think of it as the ``idea'' of a linear regression model.
+We haven't trained it yet, so it doesn't know any model parameters and
+cannot be used to make predictions. In fact, we haven't even told it
+what data to use for modeling! It simply waits for further instructions.
+
+\begin{Shaded}
+\begin{Highlighting}[]
+\NormalTok{my\_model }\OperatorTok{=}\NormalTok{ LinearRegression()}
+\end{Highlighting}
+\end{Shaded}
+
+\textbf{2. Train the model using \texttt{.fit}}
+
+Before the model can make predictions, we will need to fit it to our
+training data. When we fit the model, \texttt{sklearn} will run gradient
+descent behind the scenes to determine the optimal model parameters. It
+will then save these model parameters to our model instance for future
+use.
+
+All \texttt{sklearn} model classes include a \texttt{.fit} method, which
+is used to fit the model. It takes in two inputs: the design matrix,
+\texttt{X}, and the target variable, \texttt{Y}.
+
+Let's start by fitting a model with just one feature: the flipper
+length. We create a design matrix \texttt{X} by pulling out the
+\texttt{"flipper\_length\_mm"} column from the \texttt{DataFrame}.
+
+\begin{Shaded}
+\begin{Highlighting}[]
+\CommentTok{\# .fit expects a 2D data design matrix, so we use double brackets to extract a DataFrame}
+\NormalTok{X }\OperatorTok{=}\NormalTok{ penguins[[}\StringTok{"flipper\_length\_mm"}\NormalTok{]]}
+\NormalTok{Y }\OperatorTok{=}\NormalTok{ penguins[}\StringTok{"bill\_depth\_mm"}\NormalTok{]}
+
+\NormalTok{my\_model.fit(X, Y)}
+\end{Highlighting}
+\end{Shaded}
+
+\begin{verbatim}
+LinearRegression()
+\end{verbatim}
+
+Notice that we use \textbf{double brackets} to extract this column. Why
+double brackets instead of just single brackets? The \texttt{.fit}
+method, by default, expects to receive \textbf{2-dimensional} data --
+some kind of data that includes both rows and columns. Writing
+\texttt{penguins{[}"flipper\_length\_mm"{]}} would return a 1D
+\texttt{Series}, causing \texttt{sklearn} to error. We avoid this by
+writing \texttt{penguins{[}{[}"flipper\_length\_mm"{]}{]}} to produce a
+2D \texttt{DataFrame}.
+
+And in just three lines of code, our model has run gradient descent to
+determine the optimal model parameters! Our single-feature model takes
+the form:
+
+\[\text{bill depth} = \theta_0 + \theta_1 \text{flipper length}\]
+
+Note that \texttt{LinearRegression} will automatically include an
+intercept term.
+
+The fitted model parameters are stored as attributes of the model
+instance. \texttt{my\_model.intercept\_} will return the value of
+\(\hat{\theta}_0\) as a scalar. \texttt{my\_model.coef\_} will return
+all values \(\hat{\theta}_1,
+\hat{\theta}_1, ...\) in an array. Because our model only contains one
+feature, we see just the value of \(\hat{\theta}_1\) in the cell below.
+
+\begin{Shaded}
+\begin{Highlighting}[]
+\CommentTok{\# The intercept term, theta\_0}
+\NormalTok{my\_model.intercept\_}
+\end{Highlighting}
+\end{Shaded}
+
+\begin{verbatim}
+np.float64(7.297305899612313)
+\end{verbatim}
+
+\begin{Shaded}
+\begin{Highlighting}[]
+\CommentTok{\# All parameters theta\_1, ..., theta\_p}
+\NormalTok{my\_model.coef\_}
+\end{Highlighting}
+\end{Shaded}
+
+\begin{verbatim}
+array([0.05812622])
+\end{verbatim}
+
+\textbf{3. Use the fitted model to make predictions}
+
+Now that the model has been trained, we can use it to make predictions!
+To do so, we use the \texttt{.predict} method. \texttt{.predict} takes
+in one argument: the design matrix that should be used to generate
+predictions. To understand how the model performs on the training set,
+we would pass in the training data. Alternatively, to make predictions
+on unseen data, we would pass in a new dataset that wasn't used to train
+the model.
+
+Below, we call \texttt{.predict} to generate model predictions on the
+original training data. As before, we use double brackets to ensure that
+we extract 2-dimensional data.
+
+\begin{Shaded}
+\begin{Highlighting}[]
+\NormalTok{Y\_hat\_one\_feature }\OperatorTok{=}\NormalTok{ my\_model.predict(penguins[[}\StringTok{"flipper\_length\_mm"}\NormalTok{]])}
+
+\BuiltInTok{print}\NormalTok{(}\SpecialStringTok{f"The RMSE of the model is }\SpecialCharTok{\{}\NormalTok{np}\SpecialCharTok{.}\NormalTok{sqrt(np.mean((Y}\OperatorTok{{-}}\NormalTok{Y\_hat\_one\_feature)}\OperatorTok{**}\DecValTok{2}\NormalTok{))}\SpecialCharTok{\}}\SpecialStringTok{"}\NormalTok{)}
+\end{Highlighting}
+\end{Shaded}
+
+\begin{verbatim}
+The RMSE of the model is 1.154936309923901
+\end{verbatim}
+
+What if we wanted a model with two features?
+
+\[\text{bill depth} = \theta_0 + \theta_1 \text{flipper length} + \theta_2 \text{body mass}\]
+
+We repeat this three-step process by intializing a new model object,
+then calling \texttt{.fit} and \texttt{.predict} as before.
+
+\begin{Shaded}
+\begin{Highlighting}[]
+\CommentTok{\# Step 1: initialize LinearRegression model}
+\NormalTok{two\_feature\_model }\OperatorTok{=}\NormalTok{ LinearRegression()}
+
+\CommentTok{\# Step 2: fit the model}
+\NormalTok{X\_two\_features }\OperatorTok{=}\NormalTok{ penguins[[}\StringTok{"flipper\_length\_mm"}\NormalTok{, }\StringTok{"body\_mass\_g"}\NormalTok{]]}
+\NormalTok{Y }\OperatorTok{=}\NormalTok{ penguins[}\StringTok{"bill\_depth\_mm"}\NormalTok{]}
+
+\NormalTok{two\_feature\_model.fit(X\_two\_features, Y)}
+
+\CommentTok{\# Step 3: make predictions}
+\NormalTok{Y\_hat\_two\_features }\OperatorTok{=}\NormalTok{ two\_feature\_model.predict(X\_two\_features)}
+
+\BuiltInTok{print}\NormalTok{(}\SpecialStringTok{f"The RMSE of the model is }\SpecialCharTok{\{}\NormalTok{np}\SpecialCharTok{.}\NormalTok{sqrt(np.mean((Y}\OperatorTok{{-}}\NormalTok{Y\_hat\_two\_features)}\OperatorTok{**}\DecValTok{2}\NormalTok{))}\SpecialCharTok{\}}\SpecialStringTok{"}\NormalTok{)}
+\end{Highlighting}
+\end{Shaded}
+
+\begin{verbatim}
+The RMSE of the model is 0.9881331104079043
+\end{verbatim}
+
+We can also see that we obtain the same predictions using
+\texttt{sklearn} as we did when applying the ordinary least squares
+formula before!
+
+\begin{Shaded}
+\begin{Highlighting}[]
+\NormalTok{pd.DataFrame(\{}\StringTok{"Y\_hat from OLS"}\NormalTok{:np.squeeze(Y\_hat), }\StringTok{"Y\_hat from sklearn"}\NormalTok{:Y\_hat\_two\_features\}).head()}
+\end{Highlighting}
+\end{Shaded}
+
+\begin{longtable}[]{@{}lll@{}}
+\toprule\noalign{}
+& Y\_hat from OLS & Y\_hat from sklearn \\
+\midrule\noalign{}
+\endhead
+\bottomrule\noalign{}
+\endlastfoot
+0 & 18.322561 & 18.322561 \\
+1 & 18.445578 & 18.445578 \\
+2 & 17.721412 & 17.721412 \\
+3 & 17.997254 & 17.997254 \\
+4 & 18.263268 & 18.263268 \\
+\end{longtable}
+
+\section{Gradient Descent}\label{gradient-descent}
+
+At this point, we've grown quite familiar with the process of choosing a
+model and a corresponding loss function and optimizing parameters by
+choosing the values of \(\theta\) that minimize the loss function. So
+far, we've optimized \(\theta\) by
+
+\begin{enumerate}
+\def\labelenumi{\arabic{enumi}.}
+\tightlist
+\item
+  Using calculus to take the derivative of the loss function with
+  respect to \(\theta\), setting it equal to 0, and solving for
+  \(\theta\).
+\item
+  Using the geometric argument of orthogonality to derive the OLS
+  solution
+  \(\hat{\theta} = (\mathbb{X}^T \mathbb{X})^{-1}\mathbb{X}^T \mathbb{Y}\).
+\end{enumerate}
+
+One thing to note, however, is that the techniques we used above can
+only be applied if we make some big assumptions. For the calculus
+approach, we assumed that the loss function was differentiable at all
+points and that we could algebraically solve for the zero points of the
+derivative; for the geometric approach, OLS \emph{only} applies when
+using a linear model with MSE loss. What happens when we have more
+complex models with different, more complex loss functions? The
+techniques we've learned so far will not work, so we need a new
+optimization technique: \textbf{gradient descent}.
+
+\begin{quote}
+\textbf{BIG IDEA}: use an iterative algorithm to numerically compute the
+minimum of the loss.
+\end{quote}
+
+\subsection{Minimizing an Arbitrary 1D
+Function}\label{minimizing-an-arbitrary-1d-function}
+
+Let's consider an arbitrary function. Our goal is to find the value of
+\(x\) that minimizes this function.
+
+\begin{Shaded}
+\begin{Highlighting}[]
+\KeywordTok{def}\NormalTok{ arbitrary(x):}
+    \ControlFlowTok{return}\NormalTok{ (x}\OperatorTok{**}\DecValTok{4} \OperatorTok{{-}} \DecValTok{15}\OperatorTok{*}\NormalTok{x}\OperatorTok{**}\DecValTok{3} \OperatorTok{+} \DecValTok{80}\OperatorTok{*}\NormalTok{x}\OperatorTok{**}\DecValTok{2} \OperatorTok{{-}} \DecValTok{180}\OperatorTok{*}\NormalTok{x }\OperatorTok{+} \DecValTok{144}\NormalTok{)}\OperatorTok{/}\DecValTok{10}
+\end{Highlighting}
+\end{Shaded}
+
+\subsubsection{The Naive Approach: Guess and
+Check}\label{the-naive-approach-guess-and-check}
+
+Above, we saw that the minimum is somewhere around 5.3. Let's see if we
+can figure out how to find the exact minimum algorithmically from
+scratch. One very slow (and terrible) way would be manual
+guess-and-check.
+
+\begin{Shaded}
+\begin{Highlighting}[]
+\NormalTok{arbitrary(}\DecValTok{6}\NormalTok{)}
+\end{Highlighting}
+\end{Shaded}
+
+\begin{verbatim}
+0.0
+\end{verbatim}
+
+A somewhat better (but still slow) approach is to use brute force to try
+out a bunch of x values and return the one that yields the lowest loss.
+
+\begin{Shaded}
+\begin{Highlighting}[]
+\KeywordTok{def}\NormalTok{ simple\_minimize(f, xs):}
+    \CommentTok{\# Takes in a function f and a set of values xs. }
+    \CommentTok{\# Calculates the value of the function f at all values x in xs}
+    \CommentTok{\# Takes the minimum value of f(x) and returns the corresponding value x }
+\NormalTok{    y }\OperatorTok{=}\NormalTok{ [f(x) }\ControlFlowTok{for}\NormalTok{ x }\KeywordTok{in}\NormalTok{ xs]  }
+    \ControlFlowTok{return}\NormalTok{ xs[np.argmin(y)]}
+
+\NormalTok{guesses }\OperatorTok{=}\NormalTok{ [}\FloatTok{5.3}\NormalTok{, }\FloatTok{5.31}\NormalTok{, }\FloatTok{5.32}\NormalTok{, }\FloatTok{5.33}\NormalTok{, }\FloatTok{5.34}\NormalTok{, }\FloatTok{5.35}\NormalTok{]}
+\NormalTok{simple\_minimize(arbitrary, guesses)}
+\end{Highlighting}
+\end{Shaded}
+
+\begin{verbatim}
+5.33
+\end{verbatim}
+
+This process is essentially the same as before where we made a graphical
+plot, it's just that we're only looking at 20 selected points.
+
+\begin{Shaded}
+\begin{Highlighting}[]
+\NormalTok{xs }\OperatorTok{=}\NormalTok{ np.linspace(}\DecValTok{1}\NormalTok{, }\DecValTok{7}\NormalTok{, }\DecValTok{200}\NormalTok{)}
+\NormalTok{sparse\_xs }\OperatorTok{=}\NormalTok{ np.linspace(}\DecValTok{1}\NormalTok{, }\DecValTok{7}\NormalTok{, }\DecValTok{5}\NormalTok{)}
+
+\NormalTok{ys }\OperatorTok{=}\NormalTok{ arbitrary(xs)}
+\NormalTok{sparse\_ys }\OperatorTok{=}\NormalTok{ arbitrary(sparse\_xs)}
+
+\NormalTok{fig }\OperatorTok{=}\NormalTok{ px.line(x }\OperatorTok{=}\NormalTok{ xs, y }\OperatorTok{=}\NormalTok{ arbitrary(xs))}
+\NormalTok{fig.add\_scatter(x }\OperatorTok{=}\NormalTok{ sparse\_xs, y }\OperatorTok{=}\NormalTok{ arbitrary(sparse\_xs), mode }\OperatorTok{=} \StringTok{"markers"}\NormalTok{)}
+\NormalTok{fig.update\_layout(showlegend}\OperatorTok{=} \VariableTok{False}\NormalTok{)}
+\NormalTok{fig.update\_layout(autosize}\OperatorTok{=}\VariableTok{False}\NormalTok{, width}\OperatorTok{=}\DecValTok{800}\NormalTok{, height}\OperatorTok{=}\DecValTok{600}\NormalTok{)}
+\NormalTok{fig.show()}
+\end{Highlighting}
+\end{Shaded}
+
+\begin{verbatim}
+Unable to display output for mime type(s): text/html
+\end{verbatim}
+
+\begin{verbatim}
+Unable to display output for mime type(s): text/html
+\end{verbatim}
+
+This basic approach suffers from three major flaws:
+
+\begin{enumerate}
+\def\labelenumi{\arabic{enumi}.}
+\tightlist
+\item
+  If the minimum is outside our range of guesses, the answer will be
+  completely wrong.
+\item
+  Even if our range of guesses is correct, if the guesses are too
+  coarse, our answer will be inaccurate.
+\item
+  It is \emph{very} computationally inefficient, considering potentially
+  vast numbers of guesses that are useless.
+\end{enumerate}
+
+\subsubsection{\texorpdfstring{\texttt{Scipy.optimize.minimize}}{Scipy.optimize.minimize}}\label{scipy.optimize.minimize}
+
+One way to minimize this mathematical function is to use the
+\texttt{scipy.optimize.minimize} function. It takes a function and a
+starting guess and tries to find the minimum.
+
+\begin{Shaded}
+\begin{Highlighting}[]
+\ImportTok{from}\NormalTok{ scipy.optimize }\ImportTok{import}\NormalTok{ minimize}
+
+\CommentTok{\# takes a function f and a starting point x0 and returns a readout }
+\CommentTok{\# with the optimal input value of x which minimizes f}
+\NormalTok{minimize(arbitrary, x0 }\OperatorTok{=} \FloatTok{3.5}\NormalTok{)}
+\end{Highlighting}
+\end{Shaded}
+
+\begin{verbatim}
+  message: Optimization terminated successfully.
+  success: True
+   status: 0
+      fun: -0.13827491292966557
+        x: [ 2.393e+00]
+      nit: 3
+      jac: [ 6.486e-06]
+ hess_inv: [[ 7.385e-01]]
+     nfev: 20
+     njev: 10
+\end{verbatim}
+
+\texttt{scipy.optimize.minimize} is great. It may also seem a bit
+magical. How could you write a function that can find the minimum of any
+mathematical function? There are a number of ways to do this, which
+we'll explore in today's lecture, eventually arriving at the important
+idea of \textbf{gradient descent}, which is the principle that
+\texttt{scipy.optimize.minimize} uses.
+
+It turns out that under the hood, the \texttt{fit} method for
+\texttt{LinearRegression} models uses gradient descent. Gradient descent
+is also how much of machine learning works, including even advanced
+neural network models.
+
+In Data 100, the gradient descent process will usually be invisible to
+us, hidden beneath an abstraction layer. However, to be good data
+scientists, it's important that we know the underlying principles that
+optimization functions harness to find optimal parameters.
+
+\subsubsection{Digging into Gradient
+Descent}\label{digging-into-gradient-descent}
+
+Looking at the function across this domain, it is clear that the
+function's minimum value occurs around \(\theta = 5.3\). Let's pretend
+for a moment that we \emph{couldn't} see the full view of the cost
+function. How would we guess the value of \(\theta\) that minimizes the
+function?
+
+It turns out that the first derivative of the function can give us a
+clue. In the graph below, the function and its derivative are plotted,
+with points where the derivative is equal to 0 plotted in light green.
+
+\begin{Shaded}
+\begin{Highlighting}[]
+\ImportTok{import}\NormalTok{ plotly.graph\_objects }\ImportTok{as}\NormalTok{ go}
+
+\KeywordTok{def}\NormalTok{ derivative\_arbitrary(x):}
+    \ControlFlowTok{return}\NormalTok{ (}\DecValTok{4}\OperatorTok{*}\NormalTok{x}\OperatorTok{**}\DecValTok{3} \OperatorTok{{-}} \DecValTok{45}\OperatorTok{*}\NormalTok{x}\OperatorTok{**}\DecValTok{2} \OperatorTok{+} \DecValTok{160}\OperatorTok{*}\NormalTok{x }\OperatorTok{{-}} \DecValTok{180}\NormalTok{)}\OperatorTok{/}\DecValTok{10}
+
+\NormalTok{fig }\OperatorTok{=}\NormalTok{ go.Figure()}
+\NormalTok{roots }\OperatorTok{=}\NormalTok{ np.array([}\FloatTok{2.3927}\NormalTok{, }\FloatTok{3.5309}\NormalTok{, }\FloatTok{5.3263}\NormalTok{])}
+
+\NormalTok{fig.add\_trace(go.Scatter(x }\OperatorTok{=}\NormalTok{ xs, y }\OperatorTok{=}\NormalTok{ arbitrary(xs), }
+\NormalTok{                         mode }\OperatorTok{=} \StringTok{"lines"}\NormalTok{, name }\OperatorTok{=} \StringTok{"f"}\NormalTok{))}
+\NormalTok{fig.add\_trace(go.Scatter(x }\OperatorTok{=}\NormalTok{ xs, y }\OperatorTok{=}\NormalTok{ derivative\_arbitrary(xs), }
+\NormalTok{                         mode }\OperatorTok{=} \StringTok{"lines"}\NormalTok{, name }\OperatorTok{=} \StringTok{"df"}\NormalTok{, line }\OperatorTok{=}\NormalTok{ \{}\StringTok{"dash"}\NormalTok{: }\StringTok{"dash"}\NormalTok{\}))}
+\NormalTok{fig.add\_trace(go.Scatter(x }\OperatorTok{=}\NormalTok{ np.array(roots), y }\OperatorTok{=} \DecValTok{0}\OperatorTok{*}\NormalTok{roots, }
+\NormalTok{                         mode }\OperatorTok{=} \StringTok{"markers"}\NormalTok{, name }\OperatorTok{=} \StringTok{"df = zero"}\NormalTok{, marker\_size }\OperatorTok{=} \DecValTok{12}\NormalTok{))}
+\NormalTok{fig.update\_layout(font\_size }\OperatorTok{=} \DecValTok{20}\NormalTok{, yaxis\_range}\OperatorTok{=}\NormalTok{[}\OperatorTok{{-}}\DecValTok{1}\NormalTok{, }\DecValTok{3}\NormalTok{])}
+\NormalTok{fig.update\_layout(autosize}\OperatorTok{=}\VariableTok{False}\NormalTok{, width}\OperatorTok{=}\DecValTok{800}\NormalTok{, height}\OperatorTok{=}\DecValTok{600}\NormalTok{)}
+\NormalTok{fig.show()}
+\end{Highlighting}
+\end{Shaded}
+
+\begin{verbatim}
+Unable to display output for mime type(s): text/html
+\end{verbatim}
+
+In the plots below, the line indicates the value of the derivative of
+each value of \(\theta\). The derivative is negative where it is red and
+positive where it is green.
+
+Say we make a guess for the minimizing value of \(\theta\). Remember
+that we read plots from left to right, and assume that our starting
+\(\theta\) value is to the left of the optimal \(\hat{\theta}\). If the
+guess ``undershoots'' the true minimizing value -- our guess for
+\(\theta\) is lower than the value of the \(\hat{\theta}\) that
+minimizes the function -- the derivative will be \textbf{negative}. This
+means that if we increase \(\theta\) (move further to the right), then
+we \textbf{can decrease} our loss function further. If this guess
+``overshoots'' the true minimizing value, the derivative will be
+positive, implying the converse.
+
+We can use this pattern to help formulate our next guess for the optimal
+\(\hat{\theta}\). Consider the case where we've undershot \(\theta\) by
+guessing too low of a value. We'll want our next guess to be greater in
+value than our previous guess -- that is, we want to shift our guess to
+the right. You can think of this as following the slope ``downhill'' to
+the function's minimum value.
+
+If we've overshot \(\hat{\theta}\) by guessing too high of a value,
+we'll want our next guess to be lower in value -- we want to shift our
+guess for \(\hat{\theta}\) to the left.
+
+In other words, the derivative of the function at each point tells us
+the direction of our next guess.
+
+\begin{itemize}
+\tightlist
+\item
+  A negative slope means we want to step to the right, or move in the
+  \emph{positive} direction.
+\item
+  A positive slope means we want to step to the left, or move in the
+  \emph{negative} direction.
+\end{itemize}
+
+\subsubsection{Algorithm Attempt 1}\label{algorithm-attempt-1}
+
+Armed with this knowledge, let's try to see if we can use the derivative
+to optimize the function.
+
+We start by making some guess for the minimizing value of \(x\). Then,
+we look at the derivative of the function at this value of \(x\), and
+step downhill in the \emph{opposite} direction. We can express our new
+rule as a recurrence relation:
+
+\[x^{(t+1)} = x^{(t)} - \frac{d}{dx} f(x^{(t)})\]
+
+Translating this statement into English: we obtain \textbf{our next
+guess} for the minimizing value of \(x\) at timestep \(t+1\)
+(\(x^{(t+1)}\)) by taking \textbf{our last guess} (\(x^{(t)}\)) and
+subtracting the \textbf{derivative of the function} at that point
+(\(\frac{d}{dx} f(x^{(t)})\)).
+
+A few steps are shown below, where the old step is shown as a
+transparent point, and the next step taken is the green-filled dot.
+
+Looking pretty good! We do have a problem though -- once we arrive close
+to the minimum value of the function, our guesses ``bounce'' back and
+forth past the minimum without ever reaching it.
+
+In other words, each step we take when updating our guess moves us too
+far. We can address this by decreasing the size of each step.
+
+\subsubsection{Algorithm Attempt 2}\label{algorithm-attempt-2}
+
+Let's update our algorithm to use a \textbf{learning rate} (also
+sometimes called the step size), which controls how far we move with
+each update. We represent the learning rate with \(\alpha\).
+
+\[x^{(t+1)} = x^{(t)} - \alpha \frac{d}{dx} f(x^{(t)})\]
+
+A small \(\alpha\) means that we will take small steps; a large
+\(\alpha\) means we will take large steps. When do we stop updating? We
+stop updating either after a fixed number of updates or after a
+subsequent update doesn't change much.
+
+Updating our function to use \(\alpha=0.3\), our algorithm successfully
+\textbf{converges} (settles on a solution and stops updating
+significantly, or at all) on the minimum value.
+
+\subsection{Convexity}\label{convexity}
+
+In our analysis above, we focused our attention on the global minimum of
+the loss function. You may be wondering: what about the local minimum
+that's just to the left?
+
+If we had chosen a different starting guess for \(\theta\), or a
+different value for the learning rate \(\alpha\), our algorithm may have
+gotten ``stuck'' and converged on the local minimum, rather than on the
+true optimum value of loss.
+
+If the loss function is \textbf{convex}, gradient descent is guaranteed
+to converge and find the global minimum of the objective function.
+Formally, a function \(f\) is convex if:
+\[tf(a) + (1-t)f(b) \geq f(ta + (1-t)b)\] for all \(a, b\) in the domain
+of \(f\) and \(t \in [0, 1]\).
+
+To put this into words: if you drew a line between any two points on the
+curve, all values on the curve must be \emph{on or below} the line.
+Importantly, any local minimum of a convex function is also its global
+minimum so we avoid the situation where the algorithm converges on some
+critical point that is not the minimum of the function.
+
+In summary, non-convex loss functions can cause problems with
+optimization. This means that our choice of loss function is a key
+factor in our modeling process. It turns out that MSE \emph{is} convex,
+which is a major reason why it is such a popular choice of loss
+function. Gradient descent is only guaranteed to converge (given enough
+iterations and an appropriate step size) for convex functions.
+
+\subsection{Gradient Descent in 1
+Dimension}\label{gradient-descent-in-1-dimension}
+
+\begin{quote}
+\textbf{Terminology clarification}: In past lectures, we have used
+``loss'' to refer to the error incurred on a \emph{single} datapoint. In
+applications, we usually care more about the average error across
+\emph{all} datapoints. Going forward, we will take the ``model's loss''
+to mean the model's average error across the dataset. This is sometimes
+also known as the empirical risk (R), cost function, or objective
+function.
+\[L(\theta) = R(\theta) = \frac{1}{n} \sum_{i=1}^{n} L(y, \hat{y})\]
+\end{quote}
+
+In our discussion above, we worked with some arbitrary function \(f\).
+As data scientists, we will almost always work with gradient descent in
+the context of optimizing \emph{models} -- specifically, we want to
+apply gradient descent to find the minimum of a \emph{loss function}. In
+a modeling context, our goal is to minimize a loss function by choosing
+the minimizing model \emph{parameters}.
+
+Recall our modeling workflow from the past few lectures:
+
+\begin{enumerate}
+\def\labelenumi{\arabic{enumi}.}
+\tightlist
+\item
+  Define a model with some parameters \(\theta_i\)
+\item
+  Choose a loss function
+\item
+  Select the values of \(\theta_i\) that minimize the loss function on
+  the data
+\end{enumerate}
+
+Gradient descent is a powerful technique for completing this last task.
+By applying the gradient descent algorithm, we can select values for our
+parameters \(\theta_i\) that will lead to the model having minimal loss
+on the training data.
+
+When using gradient descent in a modeling context, we:
+
+\begin{enumerate}
+\def\labelenumi{\arabic{enumi}.}
+\tightlist
+\item
+  Make guesses for the minimizing \(\theta_i\)
+\item
+  Compute the derivative of the loss function \(L\)
+\end{enumerate}
+
+We can ``translate'' our gradient descent rule from before by replacing
+\(x\) with \(\theta\) and \(f\) with \(L\):
+
+\[\theta^{(t+1)} = \theta^{(t)} - \alpha \frac{d}{d\theta} L(\theta^{(t)})\]
+
+\subsubsection{\texorpdfstring{Gradient Descent on the \texttt{tips}
+Dataset}{Gradient Descent on the tips Dataset}}\label{gradient-descent-on-the-tips-dataset}
+
+To see this in action, let's consider a case where we have a linear
+model with no offset. We want to predict the tip (y) given the price of
+a meal (x). To do this, we
+
+\begin{itemize}
+\tightlist
+\item
+  Choose a model: \(\hat{y} = \theta_1 x\),
+\item
+  Choose a loss function:
+  \(L(\theta) = MSE(\theta) = \frac{1}{n} \sum_{i=1}^n (y_i - \theta_1x_i)^2\).
+\end{itemize}
+
+Let's apply our \texttt{gradient\_descent} function from before to
+optimize our model on the \texttt{tips} dataset. We will try to select
+the best parameter \(\theta_i\) to predict the \texttt{tip} \(y\) from
+the \texttt{total\_bill} \(x\).
+
+\begin{Shaded}
+\begin{Highlighting}[]
+\NormalTok{df }\OperatorTok{=}\NormalTok{ sns.load\_dataset(}\StringTok{"tips"}\NormalTok{)}
+\NormalTok{df.head()}
+\end{Highlighting}
+\end{Shaded}
+
+\begin{longtable}[]{@{}llllllll@{}}
+\toprule\noalign{}
+& total\_bill & tip & sex & smoker & day & time & size \\
+\midrule\noalign{}
+\endhead
+\bottomrule\noalign{}
+\endlastfoot
+0 & 16.99 & 1.01 & Female & No & Sun & Dinner & 2 \\
+1 & 10.34 & 1.66 & Male & No & Sun & Dinner & 3 \\
+2 & 21.01 & 3.50 & Male & No & Sun & Dinner & 3 \\
+3 & 23.68 & 3.31 & Male & No & Sun & Dinner & 2 \\
+4 & 24.59 & 3.61 & Female & No & Sun & Dinner & 4 \\
+\end{longtable}
+
+We can visualize the value of the MSE on our dataset for different
+possible choices of \(\theta_1\). To optimize our model, we want to
+select the value of \(\theta_1\) that leads to the lowest MSE.
+
+To apply gradient descent, we need to compute the derivative of the loss
+function with respect to our parameter \(\theta_1\).
+
+\begin{itemize}
+\tightlist
+\item
+  Given our loss function,
+  \[L(\theta) = MSE(\theta) = \frac{1}{n} \sum_{i=1}^n (y_i - \theta_1x_i)^2\]
+\item
+  We take the derivative with respect to \(\theta_1\)
+  \[\frac{\partial}{\partial \theta_{1}} L(\theta_1^{(t)}) = \frac{-2}{n} \sum_{i=1}^n (y_i - \theta_1^{(t)} x_i) x_i\]
+\item
+  Which results in the gradient descent update rule
+  \[\theta_1^{(t+1)} = \theta_1^{(t)} - \alpha \frac{d}{d\theta}L(\theta_1^{(t)})\]
+\end{itemize}
+
+for some learning rate \(\alpha\).
+
+Implementing this in code, we can visualize the MSE loss on the
+\texttt{tips} data. \textbf{MSE is convex}, so there is one global
+minimum.
+
+\begin{Shaded}
+\begin{Highlighting}[]
+\KeywordTok{def}\NormalTok{ gradient\_descent(df, initial\_guess, alpha, n):}
+    \CommentTok{"""Performs n steps of gradient descent on df using learning rate alpha starting}
+\CommentTok{       from initial\_guess. Returns a numpy array of all guesses over time."""}
+\NormalTok{    guesses }\OperatorTok{=}\NormalTok{ [initial\_guess]}
+\NormalTok{    current\_guess }\OperatorTok{=}\NormalTok{ initial\_guess}
+    \ControlFlowTok{while} \BuiltInTok{len}\NormalTok{(guesses) }\OperatorTok{\textless{}}\NormalTok{ n:}
+\NormalTok{        current\_guess }\OperatorTok{=}\NormalTok{ current\_guess }\OperatorTok{{-}}\NormalTok{ alpha }\OperatorTok{*}\NormalTok{ df(current\_guess)}
+\NormalTok{        guesses.append(current\_guess)}
+        
+    \ControlFlowTok{return}\NormalTok{ np.array(guesses)}
+
+\KeywordTok{def}\NormalTok{ mse\_single\_arg(theta\_1):}
+    \CommentTok{"""Returns the MSE on our data for the given theta1"""}
+\NormalTok{    x }\OperatorTok{=}\NormalTok{ df[}\StringTok{"total\_bill"}\NormalTok{]}
+\NormalTok{    y\_obs }\OperatorTok{=}\NormalTok{ df[}\StringTok{"tip"}\NormalTok{]}
+\NormalTok{    y\_hat }\OperatorTok{=}\NormalTok{ theta\_1 }\OperatorTok{*}\NormalTok{ x}
+    \ControlFlowTok{return}\NormalTok{ np.mean((y\_hat }\OperatorTok{{-}}\NormalTok{ y\_obs) }\OperatorTok{**} \DecValTok{2}\NormalTok{)}
+
+\KeywordTok{def}\NormalTok{ mse\_loss\_derivative\_single\_arg(theta\_1):}
+    \CommentTok{"""Returns the derivative of the MSE on our data for the given theta1"""}
+\NormalTok{    x }\OperatorTok{=}\NormalTok{ df[}\StringTok{"total\_bill"}\NormalTok{]}
+\NormalTok{    y\_obs }\OperatorTok{=}\NormalTok{ df[}\StringTok{"tip"}\NormalTok{]}
+\NormalTok{    y\_hat }\OperatorTok{=}\NormalTok{ theta\_1 }\OperatorTok{*}\NormalTok{ x}
+    
+    \ControlFlowTok{return}\NormalTok{ np.mean(}\DecValTok{2} \OperatorTok{*}\NormalTok{ (y\_hat }\OperatorTok{{-}}\NormalTok{ y\_obs) }\OperatorTok{*}\NormalTok{ x)}
+
+\NormalTok{loss\_df }\OperatorTok{=}\NormalTok{ pd.DataFrame(\{}\StringTok{"theta\_1"}\NormalTok{:np.linspace(}\OperatorTok{{-}}\FloatTok{1.5}\NormalTok{, }\DecValTok{1}\NormalTok{), }\StringTok{"MSE"}\NormalTok{:[mse\_single\_arg(theta\_1) }\ControlFlowTok{for}\NormalTok{ theta\_1 }\KeywordTok{in}\NormalTok{ np.linspace(}\OperatorTok{{-}}\FloatTok{1.5}\NormalTok{, }\DecValTok{1}\NormalTok{)]\})}
+
+\NormalTok{trajectory }\OperatorTok{=}\NormalTok{ gradient\_descent(mse\_loss\_derivative\_single\_arg, }\OperatorTok{{-}}\FloatTok{0.5}\NormalTok{, }\FloatTok{0.0001}\NormalTok{, }\DecValTok{100}\NormalTok{)}
+
+\NormalTok{plt.plot(loss\_df[}\StringTok{"theta\_1"}\NormalTok{], loss\_df[}\StringTok{"MSE"}\NormalTok{])}
+\NormalTok{plt.scatter(trajectory, [mse\_single\_arg(guess) }\ControlFlowTok{for}\NormalTok{ guess }\KeywordTok{in}\NormalTok{ trajectory], c}\OperatorTok{=}\StringTok{"white"}\NormalTok{, edgecolor}\OperatorTok{=}\StringTok{"firebrick"}\NormalTok{)}
+\NormalTok{plt.scatter(trajectory[}\OperatorTok{{-}}\DecValTok{1}\NormalTok{], mse\_single\_arg(trajectory[}\OperatorTok{{-}}\DecValTok{1}\NormalTok{]), c}\OperatorTok{=}\StringTok{"firebrick"}\NormalTok{)}
+\NormalTok{plt.xlabel(}\VerbatimStringTok{r"$\textbackslash{}theta\_1$"}\NormalTok{)}
+\NormalTok{plt.ylabel(}\VerbatimStringTok{r"$L(\textbackslash{}theta\_1)$"}\NormalTok{)}\OperatorTok{;}
+
+\BuiltInTok{print}\NormalTok{(}\SpecialStringTok{f"Final guess for theta\_1: }\SpecialCharTok{\{}\NormalTok{trajectory[}\OperatorTok{{-}}\DecValTok{1}\NormalTok{]}\SpecialCharTok{\}}\SpecialStringTok{"}\NormalTok{)}
+\end{Highlighting}
+\end{Shaded}
+
+\begin{verbatim}
+Final guess for theta_1: 0.14369554654231262
+\end{verbatim}
+
+\includegraphics{gradient_descent/gradient_descent_files/figure-pdf/cell-21-output-2.pdf}
+
 
 
 \end{document}