diff --git a/sql_I/data/example_duck.db.wal b/docs/.nojekyll
similarity index 100%
rename from sql_I/data/example_duck.db.wal
rename to docs/.nojekyll
diff --git a/case_study_HCE/case_study_HCE.html b/docs/case_study_HCE/case_study_HCE.html
similarity index 100%
rename from case_study_HCE/case_study_HCE.html
rename to docs/case_study_HCE/case_study_HCE.html
diff --git a/docs/case_study_HCE/images/data_life_cycle.PNG b/docs/case_study_HCE/images/data_life_cycle.PNG
new file mode 100644
index 000000000..aef5d21de
Binary files /dev/null and b/docs/case_study_HCE/images/data_life_cycle.PNG differ
diff --git a/docs/case_study_HCE/images/vis_1.png b/docs/case_study_HCE/images/vis_1.png
new file mode 100644
index 000000000..a9ecac7b3
Binary files /dev/null and b/docs/case_study_HCE/images/vis_1.png differ
diff --git a/docs/case_study_HCE/images/vis_10.png b/docs/case_study_HCE/images/vis_10.png
new file mode 100644
index 000000000..61daefb9d
Binary files /dev/null and b/docs/case_study_HCE/images/vis_10.png differ
diff --git a/docs/case_study_HCE/images/vis_2.png b/docs/case_study_HCE/images/vis_2.png
new file mode 100644
index 000000000..db39da9e0
Binary files /dev/null and b/docs/case_study_HCE/images/vis_2.png differ
diff --git a/docs/case_study_HCE/images/vis_3.jpg b/docs/case_study_HCE/images/vis_3.jpg
new file mode 100644
index 000000000..72e645396
Binary files /dev/null and b/docs/case_study_HCE/images/vis_3.jpg differ
diff --git a/docs/case_study_HCE/images/vis_4.png b/docs/case_study_HCE/images/vis_4.png
new file mode 100644
index 000000000..472809dfc
Binary files /dev/null and b/docs/case_study_HCE/images/vis_4.png differ
diff --git a/docs/case_study_HCE/images/vis_5.png b/docs/case_study_HCE/images/vis_5.png
new file mode 100644
index 000000000..74853eb27
Binary files /dev/null and b/docs/case_study_HCE/images/vis_5.png differ
diff --git a/docs/case_study_HCE/images/vis_6.png b/docs/case_study_HCE/images/vis_6.png
new file mode 100644
index 000000000..60d63cfb5
Binary files /dev/null and b/docs/case_study_HCE/images/vis_6.png differ
diff --git a/docs/case_study_HCE/images/vis_7.png b/docs/case_study_HCE/images/vis_7.png
new file mode 100644
index 000000000..ed490433d
Binary files /dev/null and b/docs/case_study_HCE/images/vis_7.png differ
diff --git a/docs/case_study_HCE/images/vis_8.png b/docs/case_study_HCE/images/vis_8.png
new file mode 100644
index 000000000..e2ebc46b4
Binary files /dev/null and b/docs/case_study_HCE/images/vis_8.png differ
diff --git a/docs/case_study_HCE/images/vis_9.png b/docs/case_study_HCE/images/vis_9.png
new file mode 100644
index 000000000..aab375803
Binary files /dev/null and b/docs/case_study_HCE/images/vis_9.png differ
diff --git a/docs/constant_model_loss_transformations/images/bulge.png b/docs/constant_model_loss_transformations/images/bulge.png
new file mode 100644
index 000000000..aee1d745e
Binary files /dev/null and b/docs/constant_model_loss_transformations/images/bulge.png differ
diff --git a/docs/constant_model_loss_transformations/images/constant_loss_surface.png b/docs/constant_model_loss_transformations/images/constant_loss_surface.png
new file mode 100644
index 000000000..1cd733bd8
Binary files /dev/null and b/docs/constant_model_loss_transformations/images/constant_loss_surface.png differ
diff --git a/docs/constant_model_loss_transformations/images/dugong_rug.png b/docs/constant_model_loss_transformations/images/dugong_rug.png
new file mode 100644
index 000000000..9c5e9df67
Binary files /dev/null and b/docs/constant_model_loss_transformations/images/dugong_rug.png differ
diff --git a/docs/constant_model_loss_transformations/images/dugong_scatter.png b/docs/constant_model_loss_transformations/images/dugong_scatter.png
new file mode 100644
index 000000000..4bf3a8b06
Binary files /dev/null and b/docs/constant_model_loss_transformations/images/dugong_scatter.png differ
diff --git a/docs/constant_model_loss_transformations/images/error.png b/docs/constant_model_loss_transformations/images/error.png
new file mode 100644
index 000000000..f37677abb
Binary files /dev/null and b/docs/constant_model_loss_transformations/images/error.png differ
diff --git a/docs/constant_model_loss_transformations/images/mae_loss_infinite.png b/docs/constant_model_loss_transformations/images/mae_loss_infinite.png
new file mode 100644
index 000000000..2bd5e9e07
Binary files /dev/null and b/docs/constant_model_loss_transformations/images/mae_loss_infinite.png differ
diff --git a/docs/constant_model_loss_transformations/images/mse_loss_26.png b/docs/constant_model_loss_transformations/images/mse_loss_26.png
new file mode 100644
index 000000000..7c39cc767
Binary files /dev/null and b/docs/constant_model_loss_transformations/images/mse_loss_26.png differ
diff --git a/docs/constant_model_loss_transformations/images/outliers.png b/docs/constant_model_loss_transformations/images/outliers.png
new file mode 100644
index 000000000..61f295ddb
Binary files /dev/null and b/docs/constant_model_loss_transformations/images/outliers.png differ
diff --git a/docs/constant_model_loss_transformations/images/slr_loss_surface.png b/docs/constant_model_loss_transformations/images/slr_loss_surface.png
new file mode 100644
index 000000000..66320e5d9
Binary files /dev/null and b/docs/constant_model_loss_transformations/images/slr_loss_surface.png differ
diff --git a/docs/constant_model_loss_transformations/images/slr_modeling.png b/docs/constant_model_loss_transformations/images/slr_modeling.png
new file mode 100644
index 000000000..c51158f5f
Binary files /dev/null and b/docs/constant_model_loss_transformations/images/slr_modeling.png differ
diff --git a/constant_model_loss_transformations/loss_transformations.html b/docs/constant_model_loss_transformations/loss_transformations.html
similarity index 100%
rename from constant_model_loss_transformations/loss_transformations.html
rename to docs/constant_model_loss_transformations/loss_transformations.html
diff --git a/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-10-output-2.png b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-10-output-2.png
new file mode 100644
index 000000000..2c87cfcca
Binary files /dev/null and b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-10-output-2.png differ
diff --git a/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-12-output-1.png b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-12-output-1.png
new file mode 100644
index 000000000..2d1655c25
Binary files /dev/null and b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-12-output-1.png differ
diff --git a/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-13-output-1.png b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-13-output-1.png
new file mode 100644
index 000000000..d75aaf3b2
Binary files /dev/null and b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-13-output-1.png differ
diff --git a/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-17-output-1.png b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-17-output-1.png
new file mode 100644
index 000000000..ca7e64f1c
Binary files /dev/null and b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-17-output-1.png differ
diff --git a/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-18-output-1.png b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-18-output-1.png
new file mode 100644
index 000000000..f258bbd43
Binary files /dev/null and b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-18-output-1.png differ
diff --git a/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-18-output-2.png b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-18-output-2.png
new file mode 100644
index 000000000..f258bbd43
Binary files /dev/null and b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-18-output-2.png differ
diff --git a/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-19-output-1.png b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-19-output-1.png
new file mode 100644
index 000000000..e36a6dded
Binary files /dev/null and b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-19-output-1.png differ
diff --git a/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-20-output-1.png b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-20-output-1.png
new file mode 100644
index 000000000..7530590cb
Binary files /dev/null and b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-20-output-1.png differ
diff --git a/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-20-output-2.png b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-20-output-2.png
new file mode 100644
index 000000000..7530590cb
Binary files /dev/null and b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-20-output-2.png differ
diff --git a/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-4-output-1.png b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-4-output-1.png
new file mode 100644
index 000000000..224a8b083
Binary files /dev/null and b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-4-output-1.png differ
diff --git a/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-5-output-1.png b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-5-output-1.png
new file mode 100644
index 000000000..3bc172d76
Binary files /dev/null and b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-5-output-1.png differ
diff --git a/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-6-output-1.png b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-6-output-1.png
new file mode 100644
index 000000000..5542b461e
Binary files /dev/null and b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-6-output-1.png differ
diff --git a/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-7-output-1.png b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-7-output-1.png
new file mode 100644
index 000000000..a5b366fc4
Binary files /dev/null and b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-7-output-1.png differ
diff --git a/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-8-output-1.png b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-8-output-1.png
new file mode 100644
index 000000000..ea9a22eae
Binary files /dev/null and b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-8-output-1.png differ
diff --git a/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-9-output-1.png b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-9-output-1.png
new file mode 100644
index 000000000..c60bfd6f6
Binary files /dev/null and b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-9-output-1.png differ
diff --git a/cv_regularization/cv_reg.html b/docs/cv_regularization/cv_reg.html
similarity index 99%
rename from cv_regularization/cv_reg.html
rename to docs/cv_regularization/cv_reg.html
index 817892826..cb4fb3cf6 100644
--- a/cv_regularization/cv_reg.html
+++ b/docs/cv_regularization/cv_reg.html
@@ -435,7 +435,7 @@ <h4 data-number="16.1.1.1" class="anchored" data-anchor-id="test-sets"><span cla
 <span id="cb4-14"><a href="#cb4-14" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="ss">f"Test error: </span><span class="sc">{</span>test_error<span class="sc">}</span><span class="ss">"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stdout">
 <pre><code>Training error: 17.85851684101209
-Test error: 23.192405629701074</code></pre>
+Test error: 23.192405629736058</code></pre>
 </div>
 </div>
 </section>
@@ -606,7 +606,7 @@ <h3 data-number="16.2.2" class="anchored" data-anchor-id="l1-lasso-regularizatio
 <span id="cb6-5"><a href="#cb6-5" aria-hidden="true" tabindex="-1"></a>lasso_model.fit(X_train, Y_train)</span>
 <span id="cb6-6"><a href="#cb6-6" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb6-7"><a href="#cb6-7" aria-hidden="true" tabindex="-1"></a>lasso_model.coef_</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-<div class="cell-output cell-output-display" data-execution_count="4">
+<div class="cell-output cell-output-display" data-execution_count="284">
 <pre><code>array([-2.54932056e-01, -9.48597165e-04,  8.91976284e-06, -1.22872290e-08])</code></pre>
 </div>
 </div>
@@ -621,7 +621,7 @@ <h3 data-number="16.2.3" class="anchored" data-anchor-id="scaling-features-for-r
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb8"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a>X_train.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
-<div class="cell-output cell-output-display" data-execution_count="5">
+<div class="cell-output cell-output-display" data-execution_count="285">
 <div>
 
 
@@ -681,7 +681,7 @@ <h3 data-number="16.2.3" class="anchored" data-anchor-id="scaling-features-for-r
 <p>By inspecting the fitted parameters of our model, we see that this is the case – the parameter for <code>hp</code> is much larger in magnitude than the parameter for <code>hp^4</code>.</p>
 <div class="cell" data-execution_count="6">
 <div class="sourceCode cell-code" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a>pd.DataFrame({<span class="st">"Feature"</span>:X_train.columns, <span class="st">"Parameter"</span>:lasso_model.coef_})</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-<div class="cell-output cell-output-display" data-execution_count="6">
+<div class="cell-output cell-output-display" data-execution_count="286">
 <div>
 
 
@@ -748,8 +748,8 @@ <h3 data-number="16.2.4" class="anchored" data-anchor-id="l2-ridge-regularizatio
 <span id="cb10-2"><a href="#cb10-2" aria-hidden="true" tabindex="-1"></a>ridge_model.fit(X_train, Y_train)</span>
 <span id="cb10-3"><a href="#cb10-3" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb10-4"><a href="#cb10-4" aria-hidden="true" tabindex="-1"></a>ridge_model.coef_</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-<div class="cell-output cell-output-display" data-execution_count="7">
-<pre><code>array([ 5.89130559e-02, -6.42445915e-03,  4.44468157e-05, -8.83981945e-08])</code></pre>
+<div class="cell-output cell-output-display" data-execution_count="287">
+<pre><code>array([ 5.89130560e-02, -6.42445916e-03,  4.44468157e-05, -8.83981945e-08])</code></pre>
 </div>
 </div>
 </section>
diff --git a/docs/cv_regularization/images/constrained_gd.png b/docs/cv_regularization/images/constrained_gd.png
new file mode 100644
index 000000000..4eda732b7
Binary files /dev/null and b/docs/cv_regularization/images/constrained_gd.png differ
diff --git a/docs/cv_regularization/images/cross_validation.png b/docs/cv_regularization/images/cross_validation.png
new file mode 100644
index 000000000..9faee18b6
Binary files /dev/null and b/docs/cv_regularization/images/cross_validation.png differ
diff --git a/docs/cv_regularization/images/diamond.png b/docs/cv_regularization/images/diamond.png
new file mode 100644
index 000000000..cdb03a3b2
Binary files /dev/null and b/docs/cv_regularization/images/diamond.png differ
diff --git a/docs/cv_regularization/images/diamondpoint.png b/docs/cv_regularization/images/diamondpoint.png
new file mode 100644
index 000000000..2d56ec3f4
Binary files /dev/null and b/docs/cv_regularization/images/diamondpoint.png differ
diff --git a/docs/cv_regularization/images/diamondreg.png b/docs/cv_regularization/images/diamondreg.png
new file mode 100644
index 000000000..6bd703484
Binary files /dev/null and b/docs/cv_regularization/images/diamondreg.png differ
diff --git a/docs/cv_regularization/images/green_constrained_gd_sol.png b/docs/cv_regularization/images/green_constrained_gd_sol.png
new file mode 100644
index 000000000..aa481a6f4
Binary files /dev/null and b/docs/cv_regularization/images/green_constrained_gd_sol.png differ
diff --git a/docs/cv_regularization/images/hyperparameter_tuning.png b/docs/cv_regularization/images/hyperparameter_tuning.png
new file mode 100644
index 000000000..fce75441a
Binary files /dev/null and b/docs/cv_regularization/images/hyperparameter_tuning.png differ
diff --git a/docs/cv_regularization/images/largerq.png b/docs/cv_regularization/images/largerq.png
new file mode 100644
index 000000000..b0d2b7979
Binary files /dev/null and b/docs/cv_regularization/images/largerq.png differ
diff --git a/docs/cv_regularization/images/model_selection.png b/docs/cv_regularization/images/model_selection.png
new file mode 100644
index 000000000..219273867
Binary files /dev/null and b/docs/cv_regularization/images/model_selection.png differ
diff --git a/docs/cv_regularization/images/possible_validation_sets.png b/docs/cv_regularization/images/possible_validation_sets.png
new file mode 100644
index 000000000..f41f7d364
Binary files /dev/null and b/docs/cv_regularization/images/possible_validation_sets.png differ
diff --git a/docs/cv_regularization/images/simple_under_overfit.png b/docs/cv_regularization/images/simple_under_overfit.png
new file mode 100644
index 000000000..51bdffdfc
Binary files /dev/null and b/docs/cv_regularization/images/simple_under_overfit.png differ
diff --git a/docs/cv_regularization/images/summary.png b/docs/cv_regularization/images/summary.png
new file mode 100644
index 000000000..59a4ccaf7
Binary files /dev/null and b/docs/cv_regularization/images/summary.png differ
diff --git a/docs/cv_regularization/images/train-test-split.png b/docs/cv_regularization/images/train-test-split.png
new file mode 100644
index 000000000..6c9bfd0bc
Binary files /dev/null and b/docs/cv_regularization/images/train-test-split.png differ
diff --git a/docs/cv_regularization/images/training_validation_curve.png b/docs/cv_regularization/images/training_validation_curve.png
new file mode 100644
index 000000000..0f6fd9aa6
Binary files /dev/null and b/docs/cv_regularization/images/training_validation_curve.png differ
diff --git a/docs/cv_regularization/images/unconstrained.png b/docs/cv_regularization/images/unconstrained.png
new file mode 100644
index 000000000..20ad9e443
Binary files /dev/null and b/docs/cv_regularization/images/unconstrained.png differ
diff --git a/docs/cv_regularization/images/validation-split.png b/docs/cv_regularization/images/validation-split.png
new file mode 100644
index 000000000..5c8aaa3bf
Binary files /dev/null and b/docs/cv_regularization/images/validation-split.png differ
diff --git a/docs/cv_regularization/images/validation_set.png b/docs/cv_regularization/images/validation_set.png
new file mode 100644
index 000000000..7d816e7d6
Binary files /dev/null and b/docs/cv_regularization/images/validation_set.png differ
diff --git a/docs/cv_regularization/images/verylarge.png b/docs/cv_regularization/images/verylarge.png
new file mode 100644
index 000000000..b08a41efe
Binary files /dev/null and b/docs/cv_regularization/images/verylarge.png differ
diff --git a/docs/data100_logo.png b/docs/data100_logo.png
new file mode 100644
index 000000000..b30e64d0e
Binary files /dev/null and b/docs/data100_logo.png differ
diff --git a/eda/eda.html b/docs/eda/eda.html
similarity index 80%
rename from eda/eda.html
rename to docs/eda/eda.html
index 6944abfb7..47fdd83f7 100644
--- a/eda/eda.html
+++ b/docs/eda/eda.html
@@ -686,7 +686,7 @@ <h5 data-number="5.1.1.3.1" class="anchored" data-anchor-id="eda-with-json-berke
 <span id="cb11-6"><a href="#cb11-6" aria-hidden="true" tabindex="-1"></a>    force<span class="op">=</span><span class="va">False</span>)</span>
 <span id="cb11-7"><a href="#cb11-7" aria-hidden="true" tabindex="-1"></a>covid_file          <span class="co"># a file path wrapper object</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stdout">
-<pre><code>Using cached version that was downloaded (UTC): Mon Mar 18 21:13:08 2024</code></pre>
+<pre><code>Using cached version that was downloaded (UTC): Fri Aug 25 09:57:25 2023</code></pre>
 </div>
 <div class="cell-output cell-output-display" data-execution_count="8">
 <pre><code>PosixPath('data/confirmed-cases.json')</code></pre>
@@ -718,7 +718,7 @@ <h6 data-number="5.1.1.3.1.2" class="anchored" data-anchor-id="unix-commands"><s
 <div class="sourceCode cell-code" id="cb16"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a><span class="op">!</span>ls <span class="op">-</span>lh {covid_file}</span>
 <span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a><span class="op">!</span>wc <span class="op">-</span>l {covid_file}</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stdout">
-<pre><code>-rw-r--r--  1 Ishani  staff   114K Mar 18 21:13 data/confirmed-cases.json</code></pre>
+<pre><code>-rw-r--r--  1 lillianweng  staff   114K Aug 25  2023 data/confirmed-cases.json</code></pre>
 </div>
 <div class="cell-output cell-output-stdout">
 <pre><code>    1109 data/confirmed-cases.json</code></pre>
@@ -4130,14 +4130,8 @@ <h3 data-number="5.5.5" class="anchored" data-anchor-id="understanding-missing-v
 <div class="sourceCode cell-code" id="cb97"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb97-1"><a href="#cb97-1" aria-hidden="true" tabindex="-1"></a>sns.displot(co2[<span class="st">'Days'</span>])<span class="op">;</span></span>
 <span id="cb97-2"><a href="#cb97-2" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">"Distribution of days feature"</span>)<span class="op">;</span> <span class="co"># suppresses unneeded plotting output</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
-<div class="cell-output cell-output-stderr">
-<pre><code>/Users/Ishani/micromamba/lib/python3.9/site-packages/seaborn/axisgrid.py:118: UserWarning:
-
-The figure layout has changed to tight
-</code></pre>
-</div>
 <div class="cell-output cell-output-display">
-<p><img src="eda_files/figure-html/cell-67-output-2.png" width="447" height="473"></p>
+<p><img src="eda_files/figure-html/cell-67-output-1.png" width="447" height="473"></p>
 </div>
 </div>
 <p>In terms of data quality, a handful of months have averages based on measurements taken on fewer than half the days. In addition, there are nearly 200 missing values–<strong>that’s about 27% of the data</strong>!</p>
@@ -4147,8 +4141,8 @@ <h3 data-number="5.5.5" class="anchored" data-anchor-id="understanding-missing-v
 <div class="cell" data-execution_count="67">
 <details>
 <summary>Code</summary>
-<div class="sourceCode cell-code" id="cb99"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb99-1"><a href="#cb99-1" aria-hidden="true" tabindex="-1"></a>sns.scatterplot(x<span class="op">=</span><span class="st">"Yr"</span>, y<span class="op">=</span><span class="st">"Days"</span>, data<span class="op">=</span>co2)<span class="op">;</span></span>
-<span id="cb99-2"><a href="#cb99-2" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">"Day field by Year"</span>)<span class="op">;</span> <span class="co"># the ; suppresses output</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb98"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb98-1"><a href="#cb98-1" aria-hidden="true" tabindex="-1"></a>sns.scatterplot(x<span class="op">=</span><span class="st">"Yr"</span>, y<span class="op">=</span><span class="st">"Days"</span>, data<span class="op">=</span>co2)<span class="op">;</span></span>
+<span id="cb98-2"><a href="#cb98-2" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">"Day field by Year"</span>)<span class="op">;</span> <span class="co"># the ; suppresses output</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 <div class="cell-output cell-output-display">
 <p><img src="eda_files/figure-html/cell-68-output-1.png" width="981" height="775"></p>
@@ -4172,23 +4166,17 @@ <h3 data-number="5.5.6" class="anchored" data-anchor-id="understanding-missing-v
 <div class="cell" data-execution_count="68">
 <details>
 <summary>Code</summary>
-<div class="sourceCode cell-code" id="cb100"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb100-1"><a href="#cb100-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Histograms of average CO2 measurements</span></span>
-<span id="cb100-2"><a href="#cb100-2" aria-hidden="true" tabindex="-1"></a>sns.displot(co2[<span class="st">'Avg'</span>])<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb99"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb99-1"><a href="#cb99-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Histograms of average CO2 measurements</span></span>
+<span id="cb99-2"><a href="#cb99-2" aria-hidden="true" tabindex="-1"></a>sns.displot(co2[<span class="st">'Avg'</span>])<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
-<div class="cell-output cell-output-stderr">
-<pre><code>/Users/Ishani/micromamba/lib/python3.9/site-packages/seaborn/axisgrid.py:118: UserWarning:
-
-The figure layout has changed to tight
-</code></pre>
-</div>
 <div class="cell-output cell-output-display">
-<p><img src="eda_files/figure-html/cell-69-output-2.png" width="447" height="447"></p>
+<p><img src="eda_files/figure-html/cell-69-output-1.png" width="447" height="447"></p>
 </div>
 </div>
 <p>The non-missing values are in the 300-400 range (a regular range of CO<sub>2</sub> levels).</p>
 <p>We also see that there are only a few missing <code>Avg</code> values (<strong>&lt;1% of values</strong>). Let’s examine all of them:</p>
 <div class="cell" data-execution_count="69">
-<div class="sourceCode cell-code" id="cb102"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb102-1"><a href="#cb102-1" aria-hidden="true" tabindex="-1"></a>co2[co2[<span class="st">"Avg"</span>] <span class="op">&lt;</span> <span class="dv">0</span>]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb100"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb100-1"><a href="#cb100-1" aria-hidden="true" tabindex="-1"></a>co2[co2[<span class="st">"Avg"</span>] <span class="op">&lt;</span> <span class="dv">0</span>]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="69">
 <div>
 
@@ -4297,8 +4285,8 @@ <h3 data-number="5.5.7" class="anchored" data-anchor-id="drop-nan-or-impute-miss
 <div class="cell" data-execution_count="70">
 <details>
 <summary>Code</summary>
-<div class="sourceCode cell-code" id="cb103"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb103-1"><a href="#cb103-1" aria-hidden="true" tabindex="-1"></a>sns.lineplot(x<span class="op">=</span><span class="st">'DecDate'</span>, y<span class="op">=</span><span class="st">'Avg'</span>, data<span class="op">=</span>co2)</span>
-<span id="cb103-2"><a href="#cb103-2" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">"CO2 Average By Month"</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb101"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb101-1"><a href="#cb101-1" aria-hidden="true" tabindex="-1"></a>sns.lineplot(x<span class="op">=</span><span class="st">'DecDate'</span>, y<span class="op">=</span><span class="st">'Avg'</span>, data<span class="op">=</span>co2)</span>
+<span id="cb101-2"><a href="#cb101-2" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">"CO2 Average By Month"</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 <div class="cell-output cell-output-display">
 <p><img src="eda_files/figure-html/cell-71-output-1.png" width="1006" height="775"></p>
@@ -4309,9 +4297,9 @@ <h3 data-number="5.5.7" class="anchored" data-anchor-id="drop-nan-or-impute-miss
 <p>What do you think are the pros and cons of each possible action?</p>
 <p>Let’s examine each of these three options.</p>
 <div class="cell" data-execution_count="71">
-<div class="sourceCode cell-code" id="cb104"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb104-1"><a href="#cb104-1" aria-hidden="true" tabindex="-1"></a><span class="co"># 1. Drop missing values</span></span>
-<span id="cb104-2"><a href="#cb104-2" aria-hidden="true" tabindex="-1"></a>co2_drop <span class="op">=</span> co2[co2[<span class="st">'Avg'</span>] <span class="op">&gt;</span> <span class="dv">0</span>]</span>
-<span id="cb104-3"><a href="#cb104-3" aria-hidden="true" tabindex="-1"></a>co2_drop.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb102"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb102-1"><a href="#cb102-1" aria-hidden="true" tabindex="-1"></a><span class="co"># 1. Drop missing values</span></span>
+<span id="cb102-2"><a href="#cb102-2" aria-hidden="true" tabindex="-1"></a>co2_drop <span class="op">=</span> co2[co2[<span class="st">'Avg'</span>] <span class="op">&gt;</span> <span class="dv">0</span>]</span>
+<span id="cb102-3"><a href="#cb102-3" aria-hidden="true" tabindex="-1"></a>co2_drop.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="71">
 <div>
 
@@ -4387,9 +4375,9 @@ <h3 data-number="5.5.7" class="anchored" data-anchor-id="drop-nan-or-impute-miss
 </div>
 </div>
 <div class="cell" data-execution_count="72">
-<div class="sourceCode cell-code" id="cb105"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb105-1"><a href="#cb105-1" aria-hidden="true" tabindex="-1"></a><span class="co"># 2. Replace NaN with -99.99</span></span>
-<span id="cb105-2"><a href="#cb105-2" aria-hidden="true" tabindex="-1"></a>co2_NA <span class="op">=</span> co2.replace(<span class="op">-</span><span class="fl">99.99</span>, np.NaN)</span>
-<span id="cb105-3"><a href="#cb105-3" aria-hidden="true" tabindex="-1"></a>co2_NA.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb103"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb103-1"><a href="#cb103-1" aria-hidden="true" tabindex="-1"></a><span class="co"># 2. Replace NaN with -99.99</span></span>
+<span id="cb103-2"><a href="#cb103-2" aria-hidden="true" tabindex="-1"></a>co2_NA <span class="op">=</span> co2.replace(<span class="op">-</span><span class="fl">99.99</span>, np.NaN)</span>
+<span id="cb103-3"><a href="#cb103-3" aria-hidden="true" tabindex="-1"></a>co2_NA.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="72">
 <div>
 
@@ -4473,10 +4461,10 @@ <h3 data-number="5.5.7" class="anchored" data-anchor-id="drop-nan-or-impute-miss
 <p>The <code>Int</code> feature has values that exactly match those in <code>Avg</code>, except when <code>Avg</code> is -99.99, and then a <strong>reasonable</strong> estimate is used instead.</p>
 <p>So, the third version of our data will use the <code>Int</code> feature instead of <code>Avg</code>.</p>
 <div class="cell" data-execution_count="73">
-<div class="sourceCode cell-code" id="cb106"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb106-1"><a href="#cb106-1" aria-hidden="true" tabindex="-1"></a><span class="co"># 3. Use interpolated column which estimates missing Avg values</span></span>
-<span id="cb106-2"><a href="#cb106-2" aria-hidden="true" tabindex="-1"></a>co2_impute <span class="op">=</span> co2.copy()</span>
-<span id="cb106-3"><a href="#cb106-3" aria-hidden="true" tabindex="-1"></a>co2_impute[<span class="st">'Avg'</span>] <span class="op">=</span> co2[<span class="st">'Int'</span>]</span>
-<span id="cb106-4"><a href="#cb106-4" aria-hidden="true" tabindex="-1"></a>co2_impute.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb104"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb104-1"><a href="#cb104-1" aria-hidden="true" tabindex="-1"></a><span class="co"># 3. Use interpolated column which estimates missing Avg values</span></span>
+<span id="cb104-2"><a href="#cb104-2" aria-hidden="true" tabindex="-1"></a>co2_impute <span class="op">=</span> co2.copy()</span>
+<span id="cb104-3"><a href="#cb104-3" aria-hidden="true" tabindex="-1"></a>co2_impute[<span class="st">'Avg'</span>] <span class="op">=</span> co2[<span class="st">'Int'</span>]</span>
+<span id="cb104-4"><a href="#cb104-4" aria-hidden="true" tabindex="-1"></a>co2_impute.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="73">
 <div>
 
@@ -4556,30 +4544,30 @@ <h3 data-number="5.5.7" class="anchored" data-anchor-id="drop-nan-or-impute-miss
 <div class="cell" data-execution_count="74">
 <details>
 <summary>Code</summary>
-<div class="sourceCode cell-code" id="cb107"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb107-1"><a href="#cb107-1" aria-hidden="true" tabindex="-1"></a><span class="co"># results of plotting data in 1958</span></span>
-<span id="cb107-2"><a href="#cb107-2" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb107-3"><a href="#cb107-3" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> line_and_points(data, ax, title):</span>
-<span id="cb107-4"><a href="#cb107-4" aria-hidden="true" tabindex="-1"></a>    <span class="co"># assumes single year, hence Mo</span></span>
-<span id="cb107-5"><a href="#cb107-5" aria-hidden="true" tabindex="-1"></a>    ax.plot(<span class="st">'Mo'</span>, <span class="st">'Avg'</span>, data<span class="op">=</span>data)</span>
-<span id="cb107-6"><a href="#cb107-6" aria-hidden="true" tabindex="-1"></a>    ax.scatter(<span class="st">'Mo'</span>, <span class="st">'Avg'</span>, data<span class="op">=</span>data)</span>
-<span id="cb107-7"><a href="#cb107-7" aria-hidden="true" tabindex="-1"></a>    ax.set_xlim(<span class="dv">2</span>, <span class="dv">13</span>)</span>
-<span id="cb107-8"><a href="#cb107-8" aria-hidden="true" tabindex="-1"></a>    ax.set_title(title)</span>
-<span id="cb107-9"><a href="#cb107-9" aria-hidden="true" tabindex="-1"></a>    ax.set_xticks(np.arange(<span class="dv">3</span>, <span class="dv">13</span>))</span>
-<span id="cb107-10"><a href="#cb107-10" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb107-11"><a href="#cb107-11" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> data_year(data, year):</span>
-<span id="cb107-12"><a href="#cb107-12" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> data[data[<span class="st">"Yr"</span>] <span class="op">==</span> <span class="dv">1958</span>]</span>
-<span id="cb107-13"><a href="#cb107-13" aria-hidden="true" tabindex="-1"></a>    </span>
-<span id="cb107-14"><a href="#cb107-14" aria-hidden="true" tabindex="-1"></a><span class="co"># uses matplotlib subplots</span></span>
-<span id="cb107-15"><a href="#cb107-15" aria-hidden="true" tabindex="-1"></a><span class="co"># you may see more next week; focus on output for now</span></span>
-<span id="cb107-16"><a href="#cb107-16" aria-hidden="true" tabindex="-1"></a>fig, axes <span class="op">=</span> plt.subplots(ncols <span class="op">=</span> <span class="dv">3</span>, figsize<span class="op">=</span>(<span class="dv">12</span>, <span class="dv">4</span>), sharey<span class="op">=</span><span class="va">True</span>)</span>
-<span id="cb107-17"><a href="#cb107-17" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb107-18"><a href="#cb107-18" aria-hidden="true" tabindex="-1"></a>year <span class="op">=</span> <span class="dv">1958</span></span>
-<span id="cb107-19"><a href="#cb107-19" aria-hidden="true" tabindex="-1"></a>line_and_points(data_year(co2_drop, year), axes[<span class="dv">0</span>], title<span class="op">=</span><span class="st">"1. Drop Missing"</span>)</span>
-<span id="cb107-20"><a href="#cb107-20" aria-hidden="true" tabindex="-1"></a>line_and_points(data_year(co2_NA, year), axes[<span class="dv">1</span>], title<span class="op">=</span><span class="st">"2. Missing Set to NaN"</span>)</span>
-<span id="cb107-21"><a href="#cb107-21" aria-hidden="true" tabindex="-1"></a>line_and_points(data_year(co2_impute, year), axes[<span class="dv">2</span>], title<span class="op">=</span><span class="st">"3. Missing Interpolated"</span>)</span>
-<span id="cb107-22"><a href="#cb107-22" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb107-23"><a href="#cb107-23" aria-hidden="true" tabindex="-1"></a>fig.suptitle(<span class="ss">f"Monthly Averages for </span><span class="sc">{</span>year<span class="sc">}</span><span class="ss">"</span>)</span>
-<span id="cb107-24"><a href="#cb107-24" aria-hidden="true" tabindex="-1"></a>plt.tight_layout()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb105"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb105-1"><a href="#cb105-1" aria-hidden="true" tabindex="-1"></a><span class="co"># results of plotting data in 1958</span></span>
+<span id="cb105-2"><a href="#cb105-2" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb105-3"><a href="#cb105-3" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> line_and_points(data, ax, title):</span>
+<span id="cb105-4"><a href="#cb105-4" aria-hidden="true" tabindex="-1"></a>    <span class="co"># assumes single year, hence Mo</span></span>
+<span id="cb105-5"><a href="#cb105-5" aria-hidden="true" tabindex="-1"></a>    ax.plot(<span class="st">'Mo'</span>, <span class="st">'Avg'</span>, data<span class="op">=</span>data)</span>
+<span id="cb105-6"><a href="#cb105-6" aria-hidden="true" tabindex="-1"></a>    ax.scatter(<span class="st">'Mo'</span>, <span class="st">'Avg'</span>, data<span class="op">=</span>data)</span>
+<span id="cb105-7"><a href="#cb105-7" aria-hidden="true" tabindex="-1"></a>    ax.set_xlim(<span class="dv">2</span>, <span class="dv">13</span>)</span>
+<span id="cb105-8"><a href="#cb105-8" aria-hidden="true" tabindex="-1"></a>    ax.set_title(title)</span>
+<span id="cb105-9"><a href="#cb105-9" aria-hidden="true" tabindex="-1"></a>    ax.set_xticks(np.arange(<span class="dv">3</span>, <span class="dv">13</span>))</span>
+<span id="cb105-10"><a href="#cb105-10" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb105-11"><a href="#cb105-11" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> data_year(data, year):</span>
+<span id="cb105-12"><a href="#cb105-12" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> data[data[<span class="st">"Yr"</span>] <span class="op">==</span> <span class="dv">1958</span>]</span>
+<span id="cb105-13"><a href="#cb105-13" aria-hidden="true" tabindex="-1"></a>    </span>
+<span id="cb105-14"><a href="#cb105-14" aria-hidden="true" tabindex="-1"></a><span class="co"># uses matplotlib subplots</span></span>
+<span id="cb105-15"><a href="#cb105-15" aria-hidden="true" tabindex="-1"></a><span class="co"># you may see more next week; focus on output for now</span></span>
+<span id="cb105-16"><a href="#cb105-16" aria-hidden="true" tabindex="-1"></a>fig, axes <span class="op">=</span> plt.subplots(ncols <span class="op">=</span> <span class="dv">3</span>, figsize<span class="op">=</span>(<span class="dv">12</span>, <span class="dv">4</span>), sharey<span class="op">=</span><span class="va">True</span>)</span>
+<span id="cb105-17"><a href="#cb105-17" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb105-18"><a href="#cb105-18" aria-hidden="true" tabindex="-1"></a>year <span class="op">=</span> <span class="dv">1958</span></span>
+<span id="cb105-19"><a href="#cb105-19" aria-hidden="true" tabindex="-1"></a>line_and_points(data_year(co2_drop, year), axes[<span class="dv">0</span>], title<span class="op">=</span><span class="st">"1. Drop Missing"</span>)</span>
+<span id="cb105-20"><a href="#cb105-20" aria-hidden="true" tabindex="-1"></a>line_and_points(data_year(co2_NA, year), axes[<span class="dv">1</span>], title<span class="op">=</span><span class="st">"2. Missing Set to NaN"</span>)</span>
+<span id="cb105-21"><a href="#cb105-21" aria-hidden="true" tabindex="-1"></a>line_and_points(data_year(co2_impute, year), axes[<span class="dv">2</span>], title<span class="op">=</span><span class="st">"3. Missing Interpolated"</span>)</span>
+<span id="cb105-22"><a href="#cb105-22" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb105-23"><a href="#cb105-23" aria-hidden="true" tabindex="-1"></a>fig.suptitle(<span class="ss">f"Monthly Averages for </span><span class="sc">{</span>year<span class="sc">}</span><span class="ss">"</span>)</span>
+<span id="cb105-24"><a href="#cb105-24" aria-hidden="true" tabindex="-1"></a>plt.tight_layout()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 <div class="cell-output cell-output-display">
 <p><img src="eda_files/figure-html/cell-75-output-1.png" width="1119" height="370"></p>
@@ -4595,8 +4583,8 @@ <h3 data-number="5.5.7" class="anchored" data-anchor-id="drop-nan-or-impute-miss
 <div class="cell" data-execution_count="75">
 <details>
 <summary>Code</summary>
-<div class="sourceCode cell-code" id="cb108"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb108-1"><a href="#cb108-1" aria-hidden="true" tabindex="-1"></a>sns.lineplot(x<span class="op">=</span><span class="st">'DecDate'</span>, y<span class="op">=</span><span class="st">'Avg'</span>, data<span class="op">=</span>co2_impute)</span>
-<span id="cb108-2"><a href="#cb108-2" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">"CO2 Average By Month, Imputed"</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb106"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb106-1"><a href="#cb106-1" aria-hidden="true" tabindex="-1"></a>sns.lineplot(x<span class="op">=</span><span class="st">'DecDate'</span>, y<span class="op">=</span><span class="st">'Avg'</span>, data<span class="op">=</span>co2_impute)</span>
+<span id="cb106-2"><a href="#cb106-2" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">"CO2 Average By Month, Imputed"</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 <div class="cell-output cell-output-display">
 <p><img src="eda_files/figure-html/cell-76-output-1.png" width="993" height="775"></p>
@@ -4623,9 +4611,9 @@ <h3 data-number="5.5.8" class="anchored" data-anchor-id="presenting-the-data-a-d
 <div class="cell" data-execution_count="76">
 <details>
 <summary>Code</summary>
-<div class="sourceCode cell-code" id="cb109"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb109-1"><a href="#cb109-1" aria-hidden="true" tabindex="-1"></a>co2_year <span class="op">=</span> co2_impute.groupby(<span class="st">'Yr'</span>).mean()</span>
-<span id="cb109-2"><a href="#cb109-2" aria-hidden="true" tabindex="-1"></a>sns.lineplot(x<span class="op">=</span><span class="st">'Yr'</span>, y<span class="op">=</span><span class="st">'Avg'</span>, data<span class="op">=</span>co2_year)</span>
-<span id="cb109-3"><a href="#cb109-3" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">"CO2 Average By Year"</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb107"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb107-1"><a href="#cb107-1" aria-hidden="true" tabindex="-1"></a>co2_year <span class="op">=</span> co2_impute.groupby(<span class="st">'Yr'</span>).mean()</span>
+<span id="cb107-2"><a href="#cb107-2" aria-hidden="true" tabindex="-1"></a>sns.lineplot(x<span class="op">=</span><span class="st">'Yr'</span>, y<span class="op">=</span><span class="st">'Avg'</span>, data<span class="op">=</span>co2_year)</span>
+<span id="cb107-3"><a href="#cb107-3" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">"CO2 Average By Year"</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 <div class="cell-output cell-output-display">
 <p><img src="eda_files/figure-html/cell-77-output-1.png" width="994" height="775"></p>
@@ -4966,1221 +4954,1221 @@ <h3 data-number="5.6.2" class="anchored" data-anchor-id="eda-and-data-wrangling"
       </a>
   </div>
 </nav><div class="modal fade" id="quarto-embedded-source-code-modal" tabindex="-1" aria-labelledby="quarto-embedded-source-code-modal-label" aria-hidden="true"><div class="modal-dialog modal-dialog-scrollable"><div class="modal-content"><div class="modal-header"><h5 class="modal-title" id="quarto-embedded-source-code-modal-label">Source Code</h5><button class="btn-close" data-bs-dismiss="modal"></button></div><div class="modal-body"><div class="">
-<div class="sourceCode" id="cb110" data-shortcodes="false"><pre class="sourceCode markdown code-with-copy"><code class="sourceCode markdown"><span id="cb110-1"><a href="#cb110-1" aria-hidden="true" tabindex="-1"></a><span class="co">---</span></span>
-<span id="cb110-2"><a href="#cb110-2" aria-hidden="true" tabindex="-1"></a><span class="an">title:</span><span class="co"> Data Cleaning and EDA</span></span>
-<span id="cb110-3"><a href="#cb110-3" aria-hidden="true" tabindex="-1"></a><span class="an">execute:</span></span>
-<span id="cb110-4"><a href="#cb110-4" aria-hidden="true" tabindex="-1"></a><span class="co">  echo: true</span></span>
-<span id="cb110-5"><a href="#cb110-5" aria-hidden="true" tabindex="-1"></a><span class="an">format:</span></span>
-<span id="cb110-6"><a href="#cb110-6" aria-hidden="true" tabindex="-1"></a><span class="co">  html:</span></span>
-<span id="cb110-7"><a href="#cb110-7" aria-hidden="true" tabindex="-1"></a><span class="co">    code-fold: true</span></span>
-<span id="cb110-8"><a href="#cb110-8" aria-hidden="true" tabindex="-1"></a><span class="co">    code-tools: true</span></span>
-<span id="cb110-9"><a href="#cb110-9" aria-hidden="true" tabindex="-1"></a><span class="co">    toc: true</span></span>
-<span id="cb110-10"><a href="#cb110-10" aria-hidden="true" tabindex="-1"></a><span class="co">    toc-title: Data Cleaning and EDA</span></span>
-<span id="cb110-11"><a href="#cb110-11" aria-hidden="true" tabindex="-1"></a><span class="co">    page-layout: full</span></span>
-<span id="cb110-12"><a href="#cb110-12" aria-hidden="true" tabindex="-1"></a><span class="co">    theme:</span></span>
-<span id="cb110-13"><a href="#cb110-13" aria-hidden="true" tabindex="-1"></a><span class="co">      - cosmo</span></span>
-<span id="cb110-14"><a href="#cb110-14" aria-hidden="true" tabindex="-1"></a><span class="co">      - cerulean</span></span>
-<span id="cb110-15"><a href="#cb110-15" aria-hidden="true" tabindex="-1"></a><span class="co">    callout-icon: false</span></span>
-<span id="cb110-16"><a href="#cb110-16" aria-hidden="true" tabindex="-1"></a><span class="an">jupyter:</span></span>
-<span id="cb110-17"><a href="#cb110-17" aria-hidden="true" tabindex="-1"></a><span class="co">  jupytext:</span></span>
-<span id="cb110-18"><a href="#cb110-18" aria-hidden="true" tabindex="-1"></a><span class="co">    text_representation:</span></span>
-<span id="cb110-19"><a href="#cb110-19" aria-hidden="true" tabindex="-1"></a><span class="co">      extension: .qmd</span></span>
-<span id="cb110-20"><a href="#cb110-20" aria-hidden="true" tabindex="-1"></a><span class="co">      format_name: quarto</span></span>
-<span id="cb110-21"><a href="#cb110-21" aria-hidden="true" tabindex="-1"></a><span class="co">      format_version: '1.0'</span></span>
-<span id="cb110-22"><a href="#cb110-22" aria-hidden="true" tabindex="-1"></a><span class="co">      jupytext_version: 1.16.1</span></span>
-<span id="cb110-23"><a href="#cb110-23" aria-hidden="true" tabindex="-1"></a><span class="co">  kernelspec:</span></span>
-<span id="cb110-24"><a href="#cb110-24" aria-hidden="true" tabindex="-1"></a><span class="co">    display_name: Python 3 (ipykernel)</span></span>
-<span id="cb110-25"><a href="#cb110-25" aria-hidden="true" tabindex="-1"></a><span class="co">    language: python</span></span>
-<span id="cb110-26"><a href="#cb110-26" aria-hidden="true" tabindex="-1"></a><span class="co">    name: python3</span></span>
-<span id="cb110-27"><a href="#cb110-27" aria-hidden="true" tabindex="-1"></a><span class="co">---</span></span>
-<span id="cb110-28"><a href="#cb110-28" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-31"><a href="#cb110-31" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-32"><a href="#cb110-32" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
-<span id="cb110-33"><a href="#cb110-33" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
-<span id="cb110-34"><a href="#cb110-34" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span>
-<span id="cb110-35"><a href="#cb110-35" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-36"><a href="#cb110-36" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> matplotlib.pyplot <span class="im">as</span> plt</span>
-<span id="cb110-37"><a href="#cb110-37" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> seaborn <span class="im">as</span> sns</span>
-<span id="cb110-38"><a href="#cb110-38" aria-hidden="true" tabindex="-1"></a><span class="co">#%matplotlib inline</span></span>
-<span id="cb110-39"><a href="#cb110-39" aria-hidden="true" tabindex="-1"></a>plt.rcParams[<span class="st">'figure.figsize'</span>] <span class="op">=</span> (<span class="dv">12</span>, <span class="dv">9</span>)</span>
-<span id="cb110-40"><a href="#cb110-40" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-41"><a href="#cb110-41" aria-hidden="true" tabindex="-1"></a>sns.<span class="bu">set</span>()</span>
-<span id="cb110-42"><a href="#cb110-42" aria-hidden="true" tabindex="-1"></a>sns.set_context(<span class="st">'talk'</span>)</span>
-<span id="cb110-43"><a href="#cb110-43" aria-hidden="true" tabindex="-1"></a>np.set_printoptions(threshold<span class="op">=</span><span class="dv">20</span>, precision<span class="op">=</span><span class="dv">2</span>, suppress<span class="op">=</span><span class="va">True</span>)</span>
-<span id="cb110-44"><a href="#cb110-44" aria-hidden="true" tabindex="-1"></a>pd.set_option(<span class="st">'display.max_rows'</span>, <span class="dv">30</span>)</span>
-<span id="cb110-45"><a href="#cb110-45" aria-hidden="true" tabindex="-1"></a>pd.set_option(<span class="st">'display.max_columns'</span>, <span class="va">None</span>)</span>
-<span id="cb110-46"><a href="#cb110-46" aria-hidden="true" tabindex="-1"></a>pd.set_option(<span class="st">'display.precision'</span>, <span class="dv">2</span>)</span>
-<span id="cb110-47"><a href="#cb110-47" aria-hidden="true" tabindex="-1"></a><span class="co"># This option stops scientific notation for pandas</span></span>
-<span id="cb110-48"><a href="#cb110-48" aria-hidden="true" tabindex="-1"></a>pd.set_option(<span class="st">'display.float_format'</span>, <span class="st">'</span><span class="sc">{:.2f}</span><span class="st">'</span>.<span class="bu">format</span>)</span>
-<span id="cb110-49"><a href="#cb110-49" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-50"><a href="#cb110-50" aria-hidden="true" tabindex="-1"></a><span class="co"># Silence some spurious seaborn warnings</span></span>
-<span id="cb110-51"><a href="#cb110-51" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> warnings</span>
-<span id="cb110-52"><a href="#cb110-52" aria-hidden="true" tabindex="-1"></a>warnings.filterwarnings(<span class="st">"ignore"</span>, category<span class="op">=</span><span class="pp">FutureWarning</span>)</span>
-<span id="cb110-53"><a href="#cb110-53" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-54"><a href="#cb110-54" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-55"><a href="#cb110-55" aria-hidden="true" tabindex="-1"></a>::: {.callout-note collapse="false"}</span>
-<span id="cb110-56"><a href="#cb110-56" aria-hidden="true" tabindex="-1"></a><span class="fu">## Learning Outcomes</span></span>
-<span id="cb110-57"><a href="#cb110-57" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Recognize common file formats</span>
-<span id="cb110-58"><a href="#cb110-58" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Categorize data by its variable type</span>
-<span id="cb110-59"><a href="#cb110-59" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Build awareness of issues with data faithfulness and develop targeted solutions</span>
-<span id="cb110-60"><a href="#cb110-60" aria-hidden="true" tabindex="-1"></a>:::</span>
-<span id="cb110-61"><a href="#cb110-61" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-62"><a href="#cb110-62" aria-hidden="true" tabindex="-1"></a>In the past few lectures, we've learned that <span class="in">`pandas`</span> is a toolkit to restructure, modify, and explore a dataset. What we haven't yet touched on is *how* to make these data transformation decisions. When we receive a new set of data from the "real world," how do we know what processing we should do to convert this data into a usable form?</span>
-<span id="cb110-63"><a href="#cb110-63" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-64"><a href="#cb110-64" aria-hidden="true" tabindex="-1"></a>**Data cleaning**, also called **data wrangling**, is the process of transforming raw data to facilitate subsequent analysis. It is often used to address issues like:</span>
-<span id="cb110-65"><a href="#cb110-65" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-66"><a href="#cb110-66" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Unclear structure or formatting</span>
-<span id="cb110-67"><a href="#cb110-67" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Missing or corrupted values</span>
-<span id="cb110-68"><a href="#cb110-68" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Unit conversions</span>
-<span id="cb110-69"><a href="#cb110-69" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>...and so on</span>
-<span id="cb110-70"><a href="#cb110-70" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-71"><a href="#cb110-71" aria-hidden="true" tabindex="-1"></a>**Exploratory Data Analysis (EDA)** is the process of understanding a new dataset. It is an open-ended, informal analysis that involves familiarizing ourselves with the variables present in the data, discovering potential hypotheses, and identifying possible issues with the data. This last point can often motivate further data cleaning to address any problems with the dataset's format; because of this, EDA and data cleaning are often thought of as an "infinite loop," with each process driving the other.</span>
-<span id="cb110-72"><a href="#cb110-72" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-73"><a href="#cb110-73" aria-hidden="true" tabindex="-1"></a>In this lecture, we will consider the key properties of data to consider when performing data cleaning and EDA. In doing so, we'll develop a "checklist" of sorts for you to consider when approaching a new dataset. Throughout this process, we'll build a deeper understanding of this early (but very important!) stage of the data science lifecycle.</span>
-<span id="cb110-74"><a href="#cb110-74" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-75"><a href="#cb110-75" aria-hidden="true" tabindex="-1"></a><span class="fu">## Structure</span></span>
-<span id="cb110-76"><a href="#cb110-76" aria-hidden="true" tabindex="-1"></a>We often prefer rectangular data for data analysis. Rectangular structures are easy to manipulate and analyze. A key element of data cleaning is about transforming data to be more rectangular. </span>
-<span id="cb110-77"><a href="#cb110-77" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-78"><a href="#cb110-78" aria-hidden="true" tabindex="-1"></a>There are two kinds of rectangular data: tables and matrices. Tables have named columns with different data types and are manipulated using data transformation languages. Matrices contain numeric data of the same type and are manipulated using linear algebra.</span>
-<span id="cb110-79"><a href="#cb110-79" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-80"><a href="#cb110-80" aria-hidden="true" tabindex="-1"></a><span class="fu">### File Formats</span></span>
-<span id="cb110-81"><a href="#cb110-81" aria-hidden="true" tabindex="-1"></a>There are many file types for storing structured data: TSV, JSON, XML, ASCII, SAS, etc. We'll only cover CSV, TSV, and JSON in lecture, but you'll likely encounter other formats as you work with different datasets. Reading documentation is your best bet for understanding how to process the multitude of different file types. </span>
-<span id="cb110-82"><a href="#cb110-82" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-83"><a href="#cb110-83" aria-hidden="true" tabindex="-1"></a><span class="fu">#### CSV</span></span>
-<span id="cb110-84"><a href="#cb110-84" aria-hidden="true" tabindex="-1"></a>CSVs, which stand for **Comma-Separated Values**, are a common tabular data format. </span>
-<span id="cb110-85"><a href="#cb110-85" aria-hidden="true" tabindex="-1"></a>In the past two <span class="in">`pandas`</span> lectures, we briefly touched on the idea of file format: the way data is encoded in a file for storage. Specifically, our <span class="in">`elections`</span> and <span class="in">`babynames`</span> datasets were stored and loaded as CSVs:</span>
-<span id="cb110-86"><a href="#cb110-86" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-89"><a href="#cb110-89" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-90"><a href="#cb110-90" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-91"><a href="#cb110-91" aria-hidden="true" tabindex="-1"></a>pd.read_csv(<span class="st">"data/elections.csv"</span>).head(<span class="dv">5</span>)</span>
-<span id="cb110-92"><a href="#cb110-92" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-93"><a href="#cb110-93" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-94"><a href="#cb110-94" aria-hidden="true" tabindex="-1"></a>To better understand the properties of a CSV, let's take a look at the first few rows of the raw data file to see what it looks like before being loaded into a <span class="in">`DataFrame`</span>. We'll use the <span class="in">`repr()`</span> function to return the raw string with its special characters: </span>
-<span id="cb110-95"><a href="#cb110-95" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-98"><a href="#cb110-98" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-99"><a href="#cb110-99" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-100"><a href="#cb110-100" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> <span class="bu">open</span>(<span class="st">"data/elections.csv"</span>, <span class="st">"r"</span>) <span class="im">as</span> table:</span>
-<span id="cb110-101"><a href="#cb110-101" aria-hidden="true" tabindex="-1"></a>    i <span class="op">=</span> <span class="dv">0</span></span>
-<span id="cb110-102"><a href="#cb110-102" aria-hidden="true" tabindex="-1"></a>    <span class="cf">for</span> row <span class="kw">in</span> table:</span>
-<span id="cb110-103"><a href="#cb110-103" aria-hidden="true" tabindex="-1"></a>        <span class="bu">print</span>(<span class="bu">repr</span>(row))</span>
-<span id="cb110-104"><a href="#cb110-104" aria-hidden="true" tabindex="-1"></a>        i <span class="op">+=</span> <span class="dv">1</span></span>
-<span id="cb110-105"><a href="#cb110-105" aria-hidden="true" tabindex="-1"></a>        <span class="cf">if</span> i <span class="op">&gt;</span> <span class="dv">3</span>:</span>
-<span id="cb110-106"><a href="#cb110-106" aria-hidden="true" tabindex="-1"></a>            <span class="cf">break</span></span>
-<span id="cb110-107"><a href="#cb110-107" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-108"><a href="#cb110-108" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-109"><a href="#cb110-109" aria-hidden="true" tabindex="-1"></a>Each row, or **record**, in the data is delimited by a newline `\n`. Each column, or **field**, in the data is delimited by a comma <span class="in">`,`</span> (hence, comma-separated!). </span>
-<span id="cb110-110"><a href="#cb110-110" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-111"><a href="#cb110-111" aria-hidden="true" tabindex="-1"></a><span class="fu">#### TSV</span></span>
-<span id="cb110-112"><a href="#cb110-112" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-113"><a href="#cb110-113" aria-hidden="true" tabindex="-1"></a>Another common file type is **TSV (Tab-Separated Values)**. In a TSV, records are still delimited by a newline <span class="in">`\n`</span>, while fields are delimited by <span class="in">`\t`</span> tab character. </span>
-<span id="cb110-114"><a href="#cb110-114" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-115"><a href="#cb110-115" aria-hidden="true" tabindex="-1"></a>Let's check out the first few rows of the raw TSV file. Again, we'll use the <span class="in">`repr()`</span> function so that <span class="in">`print`</span> shows the special characters.</span>
-<span id="cb110-116"><a href="#cb110-116" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-119"><a href="#cb110-119" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-120"><a href="#cb110-120" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-121"><a href="#cb110-121" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> <span class="bu">open</span>(<span class="st">"data/elections.txt"</span>, <span class="st">"r"</span>) <span class="im">as</span> table:</span>
-<span id="cb110-122"><a href="#cb110-122" aria-hidden="true" tabindex="-1"></a>    i <span class="op">=</span> <span class="dv">0</span></span>
-<span id="cb110-123"><a href="#cb110-123" aria-hidden="true" tabindex="-1"></a>    <span class="cf">for</span> row <span class="kw">in</span> table:</span>
-<span id="cb110-124"><a href="#cb110-124" aria-hidden="true" tabindex="-1"></a>        <span class="bu">print</span>(<span class="bu">repr</span>(row))</span>
-<span id="cb110-125"><a href="#cb110-125" aria-hidden="true" tabindex="-1"></a>        i <span class="op">+=</span> <span class="dv">1</span></span>
-<span id="cb110-126"><a href="#cb110-126" aria-hidden="true" tabindex="-1"></a>        <span class="cf">if</span> i <span class="op">&gt;</span> <span class="dv">3</span>:</span>
-<span id="cb110-127"><a href="#cb110-127" aria-hidden="true" tabindex="-1"></a>            <span class="cf">break</span></span>
-<span id="cb110-128"><a href="#cb110-128" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-129"><a href="#cb110-129" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-130"><a href="#cb110-130" aria-hidden="true" tabindex="-1"></a>TSVs can be loaded into <span class="in">`pandas`</span> using <span class="in">`pd.read_csv`</span>. We'll need to specify the **delimiter** with parameter<span class="in">` sep='\t'`</span> <span class="co">[</span><span class="ot">(documentation)</span><span class="co">](https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html)</span>.</span>
-<span id="cb110-131"><a href="#cb110-131" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-134"><a href="#cb110-134" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-135"><a href="#cb110-135" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-136"><a href="#cb110-136" aria-hidden="true" tabindex="-1"></a>pd.read_csv(<span class="st">"data/elections.txt"</span>, sep<span class="op">=</span><span class="st">'</span><span class="ch">\t</span><span class="st">'</span>).head(<span class="dv">3</span>)</span>
-<span id="cb110-137"><a href="#cb110-137" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-138"><a href="#cb110-138" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-139"><a href="#cb110-139" aria-hidden="true" tabindex="-1"></a>An issue with CSVs and TSVs comes up whenever there are commas or tabs within the records. How does <span class="in">`pandas`</span> differentiate between a comma delimiter vs. a comma within the field itself, for example <span class="in">`8,900`</span>? To remedy this, check out the <span class="co">[</span><span class="ot">`quotechar` parameter</span><span class="co">](https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html)</span>. </span>
-<span id="cb110-140"><a href="#cb110-140" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-141"><a href="#cb110-141" aria-hidden="true" tabindex="-1"></a><span class="fu">#### JSON</span></span>
-<span id="cb110-142"><a href="#cb110-142" aria-hidden="true" tabindex="-1"></a>**JSON (JavaScript Object Notation)** files behave similarly to Python dictionaries. A raw JSON is shown below.</span>
-<span id="cb110-143"><a href="#cb110-143" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-146"><a href="#cb110-146" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-147"><a href="#cb110-147" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-148"><a href="#cb110-148" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> <span class="bu">open</span>(<span class="st">"data/elections.json"</span>, <span class="st">"r"</span>) <span class="im">as</span> table:</span>
-<span id="cb110-149"><a href="#cb110-149" aria-hidden="true" tabindex="-1"></a>    i <span class="op">=</span> <span class="dv">0</span></span>
-<span id="cb110-150"><a href="#cb110-150" aria-hidden="true" tabindex="-1"></a>    <span class="cf">for</span> row <span class="kw">in</span> table:</span>
-<span id="cb110-151"><a href="#cb110-151" aria-hidden="true" tabindex="-1"></a>        <span class="bu">print</span>(row)</span>
-<span id="cb110-152"><a href="#cb110-152" aria-hidden="true" tabindex="-1"></a>        i <span class="op">+=</span> <span class="dv">1</span></span>
-<span id="cb110-153"><a href="#cb110-153" aria-hidden="true" tabindex="-1"></a>        <span class="cf">if</span> i <span class="op">&gt;</span> <span class="dv">8</span>:</span>
-<span id="cb110-154"><a href="#cb110-154" aria-hidden="true" tabindex="-1"></a>            <span class="cf">break</span></span>
-<span id="cb110-155"><a href="#cb110-155" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-156"><a href="#cb110-156" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-157"><a href="#cb110-157" aria-hidden="true" tabindex="-1"></a>JSON files can be loaded into <span class="in">`pandas`</span> using <span class="in">`pd.read_json`</span>. </span>
-<span id="cb110-158"><a href="#cb110-158" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-161"><a href="#cb110-161" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-162"><a href="#cb110-162" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-163"><a href="#cb110-163" aria-hidden="true" tabindex="-1"></a>pd.read_json(<span class="st">'data/elections.json'</span>).head(<span class="dv">3</span>)</span>
-<span id="cb110-164"><a href="#cb110-164" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-165"><a href="#cb110-165" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-166"><a href="#cb110-166" aria-hidden="true" tabindex="-1"></a><span class="fu">##### EDA with JSON: Berkeley COVID-19 Data</span></span>
-<span id="cb110-167"><a href="#cb110-167" aria-hidden="true" tabindex="-1"></a>The City of Berkeley Open Data <span class="co">[</span><span class="ot">website</span><span class="co">](https://data.cityofberkeley.info/Health/COVID-19-Confirmed-Cases/xn6j-b766)</span> has a dataset with COVID-19 Confirmed Cases among Berkeley residents by date. Let's download the file and save it as a JSON (note the source URL file type is also a JSON). In the interest of reproducible data science, we will download the data programatically. We have defined some helper functions in the <span class="co">[</span><span class="ot">`ds100_utils.py`</span><span class="co">](https://ds100.org/fa23/resources/assets/lectures/lec05/lec05-eda.html)</span> file that we can reuse these helper functions in many different notebooks.</span>
-<span id="cb110-168"><a href="#cb110-168" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-171"><a href="#cb110-171" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-172"><a href="#cb110-172" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-173"><a href="#cb110-173" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> ds100_utils <span class="im">import</span> fetch_and_cache</span>
-<span id="cb110-174"><a href="#cb110-174" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-175"><a href="#cb110-175" aria-hidden="true" tabindex="-1"></a>covid_file <span class="op">=</span> fetch_and_cache(</span>
-<span id="cb110-176"><a href="#cb110-176" aria-hidden="true" tabindex="-1"></a>    <span class="st">"https://data.cityofberkeley.info/api/views/xn6j-b766/rows.json?accessType=DOWNLOAD"</span>,</span>
-<span id="cb110-177"><a href="#cb110-177" aria-hidden="true" tabindex="-1"></a>    <span class="st">"confirmed-cases.json"</span>,</span>
-<span id="cb110-178"><a href="#cb110-178" aria-hidden="true" tabindex="-1"></a>    force<span class="op">=</span><span class="va">False</span>)</span>
-<span id="cb110-179"><a href="#cb110-179" aria-hidden="true" tabindex="-1"></a>covid_file          <span class="co"># a file path wrapper object</span></span>
-<span id="cb110-180"><a href="#cb110-180" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-181"><a href="#cb110-181" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-182"><a href="#cb110-182" aria-hidden="true" tabindex="-1"></a><span class="fu">###### File Size</span></span>
-<span id="cb110-183"><a href="#cb110-183" aria-hidden="true" tabindex="-1"></a>Let's start our analysis by getting a rough estimate of the size of the dataset to inform the tools we use to view the data. For relatively small datasets, we can use a text editor or spreadsheet. For larger datasets, more programmatic exploration or distributed computing tools may be more fitting. Here we will use <span class="in">`Python`</span> tools to probe the file.</span>
-<span id="cb110-184"><a href="#cb110-184" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-185"><a href="#cb110-185" aria-hidden="true" tabindex="-1"></a>Since there seem to be text files, let's investigate the number of lines, which often corresponds to the number of records</span>
-<span id="cb110-186"><a href="#cb110-186" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-189"><a href="#cb110-189" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-190"><a href="#cb110-190" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-191"><a href="#cb110-191" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> os</span>
-<span id="cb110-192"><a href="#cb110-192" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-193"><a href="#cb110-193" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(covid_file, <span class="st">"is"</span>, os.path.getsize(covid_file) <span class="op">/</span> <span class="fl">1e6</span>, <span class="st">"MB"</span>)</span>
-<span id="cb110-194"><a href="#cb110-194" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-195"><a href="#cb110-195" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> <span class="bu">open</span>(covid_file, <span class="st">"r"</span>) <span class="im">as</span> f:</span>
-<span id="cb110-196"><a href="#cb110-196" aria-hidden="true" tabindex="-1"></a>    <span class="bu">print</span>(covid_file, <span class="st">"is"</span>, <span class="bu">sum</span>(<span class="dv">1</span> <span class="cf">for</span> l <span class="kw">in</span> f), <span class="st">"lines."</span>)</span>
-<span id="cb110-197"><a href="#cb110-197" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-198"><a href="#cb110-198" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-199"><a href="#cb110-199" aria-hidden="true" tabindex="-1"></a><span class="fu">###### Unix Commands</span></span>
-<span id="cb110-200"><a href="#cb110-200" aria-hidden="true" tabindex="-1"></a>As part of the EDA workflow, Unix commands can come in very handy. In fact, there's an entire book called <span class="co">[</span><span class="ot">"Data Science at the Command Line"</span><span class="co">](https://datascienceatthecommandline.com/)</span> that explores this idea in depth! </span>
-<span id="cb110-201"><a href="#cb110-201" aria-hidden="true" tabindex="-1"></a>In Jupyter/IPython, you can prefix lines with <span class="in">`!`</span> to execute arbitrary Unix commands, and within those lines, you can refer to Python variables and expressions with the syntax <span class="in">`{expr}`</span>.</span>
-<span id="cb110-202"><a href="#cb110-202" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-203"><a href="#cb110-203" aria-hidden="true" tabindex="-1"></a>Here, we use the <span class="in">`ls`</span> command to list files, using the <span class="in">`-lh`</span> flags, which request "long format with information in human-readable form." We also use the <span class="in">`wc`</span> command for "word count," but with the <span class="in">`-l`</span> flag, which asks for line counts instead of words.</span>
-<span id="cb110-204"><a href="#cb110-204" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-205"><a href="#cb110-205" aria-hidden="true" tabindex="-1"></a>These two give us the same information as the code above, albeit in a slightly different form:</span>
-<span id="cb110-206"><a href="#cb110-206" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-209"><a href="#cb110-209" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-210"><a href="#cb110-210" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-211"><a href="#cb110-211" aria-hidden="true" tabindex="-1"></a><span class="op">!</span>ls <span class="op">-</span>lh {covid_file}</span>
-<span id="cb110-212"><a href="#cb110-212" aria-hidden="true" tabindex="-1"></a><span class="op">!</span>wc <span class="op">-</span>l {covid_file}</span>
-<span id="cb110-213"><a href="#cb110-213" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-214"><a href="#cb110-214" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-215"><a href="#cb110-215" aria-hidden="true" tabindex="-1"></a><span class="fu">###### File Contents</span></span>
-<span id="cb110-216"><a href="#cb110-216" aria-hidden="true" tabindex="-1"></a>Let's explore the data format using <span class="in">`Python`</span>. </span>
-<span id="cb110-217"><a href="#cb110-217" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-220"><a href="#cb110-220" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-221"><a href="#cb110-221" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-222"><a href="#cb110-222" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> <span class="bu">open</span>(covid_file, <span class="st">"r"</span>) <span class="im">as</span> f:</span>
-<span id="cb110-223"><a href="#cb110-223" aria-hidden="true" tabindex="-1"></a>    <span class="cf">for</span> i, row <span class="kw">in</span> <span class="bu">enumerate</span>(f):</span>
-<span id="cb110-224"><a href="#cb110-224" aria-hidden="true" tabindex="-1"></a>        <span class="bu">print</span>(<span class="bu">repr</span>(row)) <span class="co"># print raw strings</span></span>
-<span id="cb110-225"><a href="#cb110-225" aria-hidden="true" tabindex="-1"></a>        <span class="cf">if</span> i <span class="op">&gt;=</span> <span class="dv">4</span>: <span class="cf">break</span></span>
-<span id="cb110-226"><a href="#cb110-226" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-227"><a href="#cb110-227" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-228"><a href="#cb110-228" aria-hidden="true" tabindex="-1"></a>We can use the <span class="in">`head`</span> Unix command (which is where <span class="in">`pandas`</span>' <span class="in">`head`</span> method comes from!) to see the first few lines of the file:</span>
-<span id="cb110-229"><a href="#cb110-229" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-232"><a href="#cb110-232" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-233"><a href="#cb110-233" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-234"><a href="#cb110-234" aria-hidden="true" tabindex="-1"></a><span class="op">!</span>head <span class="op">-</span><span class="dv">5</span> {covid_file}</span>
-<span id="cb110-235"><a href="#cb110-235" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-236"><a href="#cb110-236" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-237"><a href="#cb110-237" aria-hidden="true" tabindex="-1"></a>In order to load the JSON file into <span class="in">`pandas`</span>, Let's first do some EDA with Oython's <span class="in">`json`</span> package to understand the particular structure of this JSON file so that we can decide what (if anything) to load into <span class="in">`pandas`</span>. Python has relatively good support for JSON data since it closely matches the internal python object model. In the following cell we import the entire JSON datafile into a python dictionary using the <span class="in">`json`</span> package.</span>
-<span id="cb110-238"><a href="#cb110-238" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-241"><a href="#cb110-241" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-242"><a href="#cb110-242" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-243"><a href="#cb110-243" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> json</span>
-<span id="cb110-244"><a href="#cb110-244" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-245"><a href="#cb110-245" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> <span class="bu">open</span>(covid_file, <span class="st">"rb"</span>) <span class="im">as</span> f:</span>
-<span id="cb110-246"><a href="#cb110-246" aria-hidden="true" tabindex="-1"></a>    covid_json <span class="op">=</span> json.load(f)</span>
-<span id="cb110-247"><a href="#cb110-247" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-248"><a href="#cb110-248" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-249"><a href="#cb110-249" aria-hidden="true" tabindex="-1"></a>The <span class="in">`covid_json`</span> variable is now a dictionary encoding the data in the file:</span>
-<span id="cb110-250"><a href="#cb110-250" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-253"><a href="#cb110-253" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-254"><a href="#cb110-254" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-255"><a href="#cb110-255" aria-hidden="true" tabindex="-1"></a><span class="bu">type</span>(covid_json)</span>
-<span id="cb110-256"><a href="#cb110-256" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-257"><a href="#cb110-257" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-258"><a href="#cb110-258" aria-hidden="true" tabindex="-1"></a>We can examine what keys are in the top level JSON object by listing out the keys. </span>
-<span id="cb110-259"><a href="#cb110-259" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-262"><a href="#cb110-262" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-263"><a href="#cb110-263" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-264"><a href="#cb110-264" aria-hidden="true" tabindex="-1"></a>covid_json.keys()</span>
-<span id="cb110-265"><a href="#cb110-265" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-266"><a href="#cb110-266" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-267"><a href="#cb110-267" aria-hidden="true" tabindex="-1"></a>**Observation**: The JSON dictionary contains a <span class="in">`meta`</span> key which likely refers to metadata (data about the data).  Metadata is often maintained with the data and can be a good source of additional information.</span>
-<span id="cb110-268"><a href="#cb110-268" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-269"><a href="#cb110-269" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-270"><a href="#cb110-270" aria-hidden="true" tabindex="-1"></a>We can investigate the metadata further by examining the keys associated with the metadata.</span>
-<span id="cb110-271"><a href="#cb110-271" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-274"><a href="#cb110-274" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-275"><a href="#cb110-275" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-276"><a href="#cb110-276" aria-hidden="true" tabindex="-1"></a>covid_json[<span class="st">'meta'</span>].keys()</span>
-<span id="cb110-277"><a href="#cb110-277" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-278"><a href="#cb110-278" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-279"><a href="#cb110-279" aria-hidden="true" tabindex="-1"></a>The <span class="in">`meta`</span> key contains another dictionary called <span class="in">`view`</span>.  This likely refers to metadata about a particular "view" of some underlying database. We will learn more about views when we study SQL later in the class.    </span>
-<span id="cb110-280"><a href="#cb110-280" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-283"><a href="#cb110-283" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-284"><a href="#cb110-284" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-285"><a href="#cb110-285" aria-hidden="true" tabindex="-1"></a>covid_json[<span class="st">'meta'</span>][<span class="st">'view'</span>].keys()</span>
-<span id="cb110-286"><a href="#cb110-286" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-287"><a href="#cb110-287" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-288"><a href="#cb110-288" aria-hidden="true" tabindex="-1"></a>Notice that this a nested/recursive data structure.  As we dig deeper we reveal more and more keys and the corresponding data:</span>
-<span id="cb110-289"><a href="#cb110-289" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-290"><a href="#cb110-290" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-291"><a href="#cb110-291" aria-hidden="true" tabindex="-1"></a><span class="in">meta</span></span>
-<span id="cb110-292"><a href="#cb110-292" aria-hidden="true" tabindex="-1"></a><span class="in">|-&gt; data</span></span>
-<span id="cb110-293"><a href="#cb110-293" aria-hidden="true" tabindex="-1"></a><span class="in">    | ... (haven't explored yet)</span></span>
-<span id="cb110-294"><a href="#cb110-294" aria-hidden="true" tabindex="-1"></a><span class="in">|-&gt; view</span></span>
-<span id="cb110-295"><a href="#cb110-295" aria-hidden="true" tabindex="-1"></a><span class="in">    | -&gt; id</span></span>
-<span id="cb110-296"><a href="#cb110-296" aria-hidden="true" tabindex="-1"></a><span class="in">    | -&gt; name</span></span>
-<span id="cb110-297"><a href="#cb110-297" aria-hidden="true" tabindex="-1"></a><span class="in">    | -&gt; attribution </span></span>
-<span id="cb110-298"><a href="#cb110-298" aria-hidden="true" tabindex="-1"></a><span class="in">    ...</span></span>
-<span id="cb110-299"><a href="#cb110-299" aria-hidden="true" tabindex="-1"></a><span class="in">    | -&gt; description</span></span>
-<span id="cb110-300"><a href="#cb110-300" aria-hidden="true" tabindex="-1"></a><span class="in">    ...</span></span>
-<span id="cb110-301"><a href="#cb110-301" aria-hidden="true" tabindex="-1"></a><span class="in">    | -&gt; columns</span></span>
-<span id="cb110-302"><a href="#cb110-302" aria-hidden="true" tabindex="-1"></a><span class="in">    ...</span></span>
-<span id="cb110-303"><a href="#cb110-303" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-304"><a href="#cb110-304" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-305"><a href="#cb110-305" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-306"><a href="#cb110-306" aria-hidden="true" tabindex="-1"></a>There is a key called description in the view sub dictionary.  This likely contains a description of the data:</span>
-<span id="cb110-307"><a href="#cb110-307" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-310"><a href="#cb110-310" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-311"><a href="#cb110-311" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-312"><a href="#cb110-312" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(covid_json[<span class="st">'meta'</span>][<span class="st">'view'</span>][<span class="st">'description'</span>])</span>
-<span id="cb110-313"><a href="#cb110-313" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-314"><a href="#cb110-314" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-315"><a href="#cb110-315" aria-hidden="true" tabindex="-1"></a><span class="fu">###### Examining the Data Field for Records</span></span>
-<span id="cb110-316"><a href="#cb110-316" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-317"><a href="#cb110-317" aria-hidden="true" tabindex="-1"></a>We can look at a few entries in the <span class="in">`data`</span> field. This is what we'll load into <span class="in">`pandas`</span>.</span>
-<span id="cb110-318"><a href="#cb110-318" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-321"><a href="#cb110-321" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-322"><a href="#cb110-322" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-323"><a href="#cb110-323" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> i <span class="kw">in</span> <span class="bu">range</span>(<span class="dv">3</span>):</span>
-<span id="cb110-324"><a href="#cb110-324" aria-hidden="true" tabindex="-1"></a>    <span class="bu">print</span>(<span class="ss">f"</span><span class="sc">{</span>i<span class="sc">:03}</span><span class="ss"> | </span><span class="sc">{</span>covid_json[<span class="st">'data'</span>][i]<span class="sc">}</span><span class="ss">"</span>)</span>
-<span id="cb110-325"><a href="#cb110-325" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-326"><a href="#cb110-326" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-327"><a href="#cb110-327" aria-hidden="true" tabindex="-1"></a>Observations:</span>
-<span id="cb110-328"><a href="#cb110-328" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>These look like equal-length records, so maybe <span class="in">`data`</span> is a table!</span>
-<span id="cb110-329"><a href="#cb110-329" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>But what do each of values in the record mean? Where can we find column headers?</span>
-<span id="cb110-330"><a href="#cb110-330" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-331"><a href="#cb110-331" aria-hidden="true" tabindex="-1"></a>For that, we'll need the <span class="in">`columns`</span> key in the metadata dictionary. This returns a list: </span>
-<span id="cb110-332"><a href="#cb110-332" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-335"><a href="#cb110-335" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-336"><a href="#cb110-336" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-337"><a href="#cb110-337" aria-hidden="true" tabindex="-1"></a><span class="bu">type</span>(covid_json[<span class="st">'meta'</span>][<span class="st">'view'</span>][<span class="st">'columns'</span>])</span>
-<span id="cb110-338"><a href="#cb110-338" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-339"><a href="#cb110-339" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-340"><a href="#cb110-340" aria-hidden="true" tabindex="-1"></a><span class="fu">###### Summary of exploring the JSON file</span></span>
-<span id="cb110-341"><a href="#cb110-341" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-342"><a href="#cb110-342" aria-hidden="true" tabindex="-1"></a><span class="ss">1. </span>The above **metadata** tells us a lot about the columns in the data including column names, potential data anomalies, and a basic statistic. </span>
-<span id="cb110-343"><a href="#cb110-343" aria-hidden="true" tabindex="-1"></a><span class="ss">1. </span>Because of its non-tabular structure, JSON makes it easier (than CSV) to create **self-documenting data**, meaning that information about the data is stored in the same file as the data.</span>
-<span id="cb110-344"><a href="#cb110-344" aria-hidden="true" tabindex="-1"></a><span class="ss">1. </span>Self-documenting data can be helpful since it maintains its own description and these descriptions are more likely to be updated as data changes. </span>
-<span id="cb110-345"><a href="#cb110-345" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-346"><a href="#cb110-346" aria-hidden="true" tabindex="-1"></a><span class="fu">###### Loading COVID Data into `pandas`</span></span>
-<span id="cb110-347"><a href="#cb110-347" aria-hidden="true" tabindex="-1"></a>Finally, let's load the data (not the metadata) into a <span class="in">`pandas`</span> <span class="in">`DataFrame`</span>. In the following block of code we:</span>
-<span id="cb110-348"><a href="#cb110-348" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-349"><a href="#cb110-349" aria-hidden="true" tabindex="-1"></a><span class="ss">1. </span>Translate the JSON records into a <span class="in">`DataFrame`</span>:</span>
-<span id="cb110-350"><a href="#cb110-350" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-351"><a href="#cb110-351" aria-hidden="true" tabindex="-1"></a><span class="ss">    * </span>fields: <span class="in">`covid_json['meta']['view']['columns']`</span></span>
-<span id="cb110-352"><a href="#cb110-352" aria-hidden="true" tabindex="-1"></a><span class="ss">    * </span>records: <span class="in">`covid_json['data']`</span></span>
-<span id="cb110-353"><a href="#cb110-353" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-354"><a href="#cb110-354" aria-hidden="true" tabindex="-1"></a>    </span>
-<span id="cb110-355"><a href="#cb110-355" aria-hidden="true" tabindex="-1"></a><span class="ss">1. </span>Remove columns that have no metadata description.  This would be a bad idea in general, but here we remove these columns since the above analysis suggests they are unlikely to contain useful information.</span>
-<span id="cb110-356"><a href="#cb110-356" aria-hidden="true" tabindex="-1"></a>   </span>
-<span id="cb110-357"><a href="#cb110-357" aria-hidden="true" tabindex="-1"></a><span class="ss">1. </span>Examine the <span class="in">`tail`</span> of the table.</span>
-<span id="cb110-358"><a href="#cb110-358" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-361"><a href="#cb110-361" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-362"><a href="#cb110-362" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-363"><a href="#cb110-363" aria-hidden="true" tabindex="-1"></a><span class="co"># Load the data from JSON and assign column titles</span></span>
-<span id="cb110-364"><a href="#cb110-364" aria-hidden="true" tabindex="-1"></a>covid <span class="op">=</span> pd.DataFrame(</span>
-<span id="cb110-365"><a href="#cb110-365" aria-hidden="true" tabindex="-1"></a>    covid_json[<span class="st">'data'</span>],</span>
-<span id="cb110-366"><a href="#cb110-366" aria-hidden="true" tabindex="-1"></a>    columns<span class="op">=</span>[c[<span class="st">'name'</span>] <span class="cf">for</span> c <span class="kw">in</span> covid_json[<span class="st">'meta'</span>][<span class="st">'view'</span>][<span class="st">'columns'</span>]])</span>
-<span id="cb110-367"><a href="#cb110-367" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-368"><a href="#cb110-368" aria-hidden="true" tabindex="-1"></a>covid.tail()</span>
-<span id="cb110-369"><a href="#cb110-369" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-370"><a href="#cb110-370" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-371"><a href="#cb110-371" aria-hidden="true" tabindex="-1"></a><span class="fu">### Primary and Foreign Keys</span></span>
-<span id="cb110-372"><a href="#cb110-372" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-373"><a href="#cb110-373" aria-hidden="true" tabindex="-1"></a>Last time, we introduced <span class="in">`.merge`</span> as the <span class="in">`pandas`</span> method for joining multiple <span class="in">`DataFrame`</span>s together. In our discussion of joins, we touched on the idea of using a "key" to determine what rows should be merged from each table. Let's take a moment to examine this idea more closely.</span>
-<span id="cb110-374"><a href="#cb110-374" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-375"><a href="#cb110-375" aria-hidden="true" tabindex="-1"></a>The **primary key** is the column or set of columns in a table that *uniquely* determine the values of the remaining columns. It can be thought of as the unique identifier for each individual row in the table. For example, a table of Data 100 students might use each student's Cal ID as the primary key. </span>
-<span id="cb110-376"><a href="#cb110-376" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-379"><a href="#cb110-379" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-380"><a href="#cb110-380" aria-hidden="true" tabindex="-1"></a><span class="co">#| echo: false</span></span>
-<span id="cb110-381"><a href="#cb110-381" aria-hidden="true" tabindex="-1"></a>pd.DataFrame({<span class="st">"Cal ID"</span>:[<span class="dv">3034619471</span>, <span class="dv">3035619472</span>, <span class="dv">3025619473</span>, <span class="dv">3046789372</span>], <span class="op">\</span></span>
-<span id="cb110-382"><a href="#cb110-382" aria-hidden="true" tabindex="-1"></a>             <span class="st">"Name"</span>:[<span class="st">"Oski"</span>, <span class="st">"Ollie"</span>, <span class="st">"Orrie"</span>, <span class="st">"Ollie"</span>], <span class="op">\</span></span>
-<span id="cb110-383"><a href="#cb110-383" aria-hidden="true" tabindex="-1"></a>             <span class="st">"Major"</span>:[<span class="st">"Data Science"</span>, <span class="st">"Computer Science"</span>, <span class="st">"Data Science"</span>, <span class="st">"Economics"</span>]})</span>
-<span id="cb110-384"><a href="#cb110-384" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-385"><a href="#cb110-385" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-386"><a href="#cb110-386" aria-hidden="true" tabindex="-1"></a>The **foreign key** is the column or set of columns in a table that reference primary keys in other tables. Knowing a dataset's foreign keys can be useful when assigning the <span class="in">`left_on`</span> and <span class="in">`right_on`</span> parameters of <span class="in">`.merge`</span>. In the table of office hour tickets below, <span class="in">`"Cal ID"`</span> is a foreign key referencing the previous table.</span>
-<span id="cb110-387"><a href="#cb110-387" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-390"><a href="#cb110-390" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-391"><a href="#cb110-391" aria-hidden="true" tabindex="-1"></a><span class="co">#| echo: false</span></span>
-<span id="cb110-392"><a href="#cb110-392" aria-hidden="true" tabindex="-1"></a>pd.DataFrame({<span class="st">"OH Request"</span>:[<span class="dv">1</span>, <span class="dv">2</span>, <span class="dv">3</span>, <span class="dv">4</span>], <span class="op">\</span></span>
-<span id="cb110-393"><a href="#cb110-393" aria-hidden="true" tabindex="-1"></a>             <span class="st">"Cal ID"</span>:[<span class="dv">3034619471</span>, <span class="dv">3035619472</span>, <span class="dv">3025619473</span>, <span class="dv">3035619472</span>], <span class="op">\</span></span>
-<span id="cb110-394"><a href="#cb110-394" aria-hidden="true" tabindex="-1"></a>             <span class="st">"Question"</span>:[<span class="st">"HW 2 Q1"</span>, <span class="st">"HW 2 Q3"</span>, <span class="st">"Lab 3 Q4"</span>, <span class="st">"HW 2 Q7"</span>]})</span>
-<span id="cb110-395"><a href="#cb110-395" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-396"><a href="#cb110-396" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-397"><a href="#cb110-397" aria-hidden="true" tabindex="-1"></a><span class="fu">### Variable Types</span></span>
-<span id="cb110-398"><a href="#cb110-398" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-399"><a href="#cb110-399" aria-hidden="true" tabindex="-1"></a>Variables are columns. A variable is a measurement of a particular concept. Variables have two common properties: data type/storage type and variable type/feature type. The data type of a variable indicates how each variable value is stored in memory (integer, floating point, boolean, etc.) and affects which <span class="in">`pandas`</span> functions are used. The variable type is a conceptualized measurement of information (and therefore indicates what values a variable can take on). Variable type is identified through expert knowledge, exploring the data itself, or consulting the data codebook. The variable type affects how one visualizes and inteprets the data. In this class, "variable types" are conceptual.</span>
-<span id="cb110-400"><a href="#cb110-400" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-401"><a href="#cb110-401" aria-hidden="true" tabindex="-1"></a>After loading data into a file, it's a good idea to take the time to understand what pieces of information are encoded in the dataset. In particular, we want to identify what variable types are present in our data. Broadly speaking, we can categorize variables into one of two overarching types. </span>
-<span id="cb110-402"><a href="#cb110-402" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-403"><a href="#cb110-403" aria-hidden="true" tabindex="-1"></a>**Quantitative variables** describe some numeric quantity or amount. We can divide quantitative data further into:</span>
-<span id="cb110-404"><a href="#cb110-404" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-405"><a href="#cb110-405" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>**Continuous quantitative variables**: numeric data that can be measured on a continuous scale to arbitrary precision. Continuous variables do not have a strict set of possible values – they can be recorded to any number of decimal places. For example, weights, GPA, or CO<span class="kw">&lt;sub&gt;</span>2<span class="kw">&lt;/sub&gt;</span> concentrations.</span>
-<span id="cb110-406"><a href="#cb110-406" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>**Discrete quantitative variables**: numeric data that can only take on a finite set of possible values. For example, someone's age or the number of siblings they have.</span>
-<span id="cb110-407"><a href="#cb110-407" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-408"><a href="#cb110-408" aria-hidden="true" tabindex="-1"></a>**Qualitative variables**, also known as **categorical variables**, describe data that isn't measuring some quantity or amount. The sub-categories of categorical data are:</span>
-<span id="cb110-409"><a href="#cb110-409" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-410"><a href="#cb110-410" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>**Ordinal qualitative variables**: categories with ordered levels. Specifically, ordinal variables are those where the difference between levels has no consistent, quantifiable meaning. Some examples include levels of education (high school, undergrad, grad, etc.), income bracket (low, medium, high), or Yelp rating. </span>
-<span id="cb110-411"><a href="#cb110-411" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>**Nominal qualitative variables**: categories with no specific order. For example, someone's political affiliation or Cal ID number.</span>
-<span id="cb110-412"><a href="#cb110-412" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-413"><a href="#cb110-413" aria-hidden="true" tabindex="-1"></a><span class="al">![Classification of variable types](images/variable.png)</span></span>
-<span id="cb110-414"><a href="#cb110-414" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-415"><a href="#cb110-415" aria-hidden="true" tabindex="-1"></a>Note that many variables don't sit neatly in just one of these categories. Qualitative variables could have numeric levels, and conversely, quantitative variables could be stored as strings. </span>
-<span id="cb110-416"><a href="#cb110-416" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-417"><a href="#cb110-417" aria-hidden="true" tabindex="-1"></a><span class="fu">## Granularity, Scope, and Temporality</span></span>
-<span id="cb110-418"><a href="#cb110-418" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-419"><a href="#cb110-419" aria-hidden="true" tabindex="-1"></a>After understanding the structure of the dataset, the next task is to determine what exactly the data represents. We'll do so by considering the data's granularity, scope, and temporality.</span>
-<span id="cb110-420"><a href="#cb110-420" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-421"><a href="#cb110-421" aria-hidden="true" tabindex="-1"></a><span class="fu">### Granularity</span></span>
-<span id="cb110-422"><a href="#cb110-422" aria-hidden="true" tabindex="-1"></a>The **granularity** of a dataset is what a single row represents. You can also think of it as the level of detail included in the data. To determine the data's granularity, ask: what does each row in the dataset represent? Fine-grained data contains a high level of detail, with a single row representing a small individual unit. For example, each record may represent one person. Coarse-grained data is encoded such that a single row represents a large individual unit – for example, each record may represent a group of people.</span>
-<span id="cb110-423"><a href="#cb110-423" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-424"><a href="#cb110-424" aria-hidden="true" tabindex="-1"></a><span class="fu">### Scope</span></span>
-<span id="cb110-425"><a href="#cb110-425" aria-hidden="true" tabindex="-1"></a>The **scope** of a dataset is the subset of the population covered by the data. If we were investigating student performance in Data Science courses, a dataset with a narrow scope might encompass all students enrolled in Data 100 whereas a dataset with an expansive scope might encompass all students in California. </span>
-<span id="cb110-426"><a href="#cb110-426" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-427"><a href="#cb110-427" aria-hidden="true" tabindex="-1"></a><span class="fu">### Temporality</span></span>
-<span id="cb110-428"><a href="#cb110-428" aria-hidden="true" tabindex="-1"></a>The **temporality** of a dataset describes the periodicity over which the data was collected as well as when the data was most recently collected or updated. </span>
-<span id="cb110-429"><a href="#cb110-429" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-430"><a href="#cb110-430" aria-hidden="true" tabindex="-1"></a>Time and date fields of a dataset could represent a few things:</span>
-<span id="cb110-431"><a href="#cb110-431" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-432"><a href="#cb110-432" aria-hidden="true" tabindex="-1"></a><span class="ss">1. </span>when the "event" happened</span>
-<span id="cb110-433"><a href="#cb110-433" aria-hidden="true" tabindex="-1"></a><span class="ss">2. </span>when the data was collected, or when it was entered into the system</span>
-<span id="cb110-434"><a href="#cb110-434" aria-hidden="true" tabindex="-1"></a><span class="ss">3. </span>when the data was copied into the database </span>
-<span id="cb110-435"><a href="#cb110-435" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-436"><a href="#cb110-436" aria-hidden="true" tabindex="-1"></a>To fully understand the temporality of the data, it also may be necessary to standardize time zones or inspect recurring time-based trends in the data (do patterns recur in 24-hour periods? Over the course of a month? Seasonally?). The convention for standardizing time is the Coordinated Universal Time (UTC), an international time standard measured at 0 degrees latitude that stays consistent throughout the year (no daylight savings). We can represent Berkeley's time zone, Pacific Standard Time (PST), as UTC-7 (with daylight savings). </span>
-<span id="cb110-437"><a href="#cb110-437" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-438"><a href="#cb110-438" aria-hidden="true" tabindex="-1"></a><span class="fu">#### Temporality with `pandas`' `dt` accessors </span></span>
-<span id="cb110-439"><a href="#cb110-439" aria-hidden="true" tabindex="-1"></a>Let's briefly look at how we can use <span class="in">`pandas`</span>' <span class="in">`dt`</span> accessors to work with dates/times in a dataset using the dataset you'll see in Lab 3: the Berkeley PD Calls for Service dataset.</span>
-<span id="cb110-440"><a href="#cb110-440" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-443"><a href="#cb110-443" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-444"><a href="#cb110-444" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
-<span id="cb110-445"><a href="#cb110-445" aria-hidden="true" tabindex="-1"></a>calls <span class="op">=</span> pd.read_csv(<span class="st">"data/Berkeley_PD_-_Calls_for_Service.csv"</span>)</span>
-<span id="cb110-446"><a href="#cb110-446" aria-hidden="true" tabindex="-1"></a>calls.head()</span>
-<span id="cb110-447"><a href="#cb110-447" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-448"><a href="#cb110-448" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-449"><a href="#cb110-449" aria-hidden="true" tabindex="-1"></a>Looks like there are three columns with dates/times: <span class="in">`EVENTDT`</span>, <span class="in">`EVENTTM`</span>, and <span class="in">`InDbDate`</span>. </span>
-<span id="cb110-450"><a href="#cb110-450" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-451"><a href="#cb110-451" aria-hidden="true" tabindex="-1"></a>Most likely, <span class="in">`EVENTDT`</span> stands for the date when the event took place, <span class="in">`EVENTTM`</span> stands for the time of day the event took place (in 24-hr format), and <span class="in">`InDbDate`</span> is the date this call is recorded onto the database.</span>
-<span id="cb110-452"><a href="#cb110-452" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-453"><a href="#cb110-453" aria-hidden="true" tabindex="-1"></a>If we check the data type of these columns, we will see they are stored as strings. We can convert them to <span class="in">`datetime`</span> objects using pandas <span class="in">`to_datetime`</span> function.</span>
-<span id="cb110-454"><a href="#cb110-454" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-457"><a href="#cb110-457" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-458"><a href="#cb110-458" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-459"><a href="#cb110-459" aria-hidden="true" tabindex="-1"></a>calls[<span class="st">"EVENTDT"</span>] <span class="op">=</span> pd.to_datetime(calls[<span class="st">"EVENTDT"</span>])</span>
-<span id="cb110-460"><a href="#cb110-460" aria-hidden="true" tabindex="-1"></a>calls.head()</span>
-<span id="cb110-461"><a href="#cb110-461" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-462"><a href="#cb110-462" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-463"><a href="#cb110-463" aria-hidden="true" tabindex="-1"></a>Now, we can use the <span class="in">`dt`</span> accessor on this column.</span>
-<span id="cb110-464"><a href="#cb110-464" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-465"><a href="#cb110-465" aria-hidden="true" tabindex="-1"></a>We can get the month: </span>
-<span id="cb110-466"><a href="#cb110-466" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-469"><a href="#cb110-469" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-470"><a href="#cb110-470" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-471"><a href="#cb110-471" aria-hidden="true" tabindex="-1"></a>calls[<span class="st">"EVENTDT"</span>].dt.month.head()</span>
-<span id="cb110-472"><a href="#cb110-472" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-473"><a href="#cb110-473" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-474"><a href="#cb110-474" aria-hidden="true" tabindex="-1"></a>Which day of the week the date is on:</span>
-<span id="cb110-475"><a href="#cb110-475" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-478"><a href="#cb110-478" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-479"><a href="#cb110-479" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-480"><a href="#cb110-480" aria-hidden="true" tabindex="-1"></a>calls[<span class="st">"EVENTDT"</span>].dt.dayofweek.head()</span>
-<span id="cb110-481"><a href="#cb110-481" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-482"><a href="#cb110-482" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-483"><a href="#cb110-483" aria-hidden="true" tabindex="-1"></a>Check the mimimum values to see if there are any suspicious-looking, 70s dates:</span>
-<span id="cb110-484"><a href="#cb110-484" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-487"><a href="#cb110-487" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-488"><a href="#cb110-488" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-489"><a href="#cb110-489" aria-hidden="true" tabindex="-1"></a>calls.sort_values(<span class="st">"EVENTDT"</span>).head()</span>
-<span id="cb110-490"><a href="#cb110-490" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-491"><a href="#cb110-491" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-492"><a href="#cb110-492" aria-hidden="true" tabindex="-1"></a>Doesn't look like it! We are good!</span>
-<span id="cb110-493"><a href="#cb110-493" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-494"><a href="#cb110-494" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-495"><a href="#cb110-495" aria-hidden="true" tabindex="-1"></a>We can also do many things with the <span class="in">`dt`</span> accessor like switching time zones and converting time back to UNIX/POSIX time. Check out the documentation on <span class="co">[</span><span class="ot">`.dt` accessor</span><span class="co">](https://pandas.pydata.org/docs/user_guide/basics.html#basics-dt-accessors)</span> and <span class="co">[</span><span class="ot">time series/date functionality</span><span class="co">](https://pandas.pydata.org/docs/user_guide/timeseries.html#)</span>.</span>
-<span id="cb110-496"><a href="#cb110-496" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-497"><a href="#cb110-497" aria-hidden="true" tabindex="-1"></a><span class="fu">## Faithfulness</span></span>
-<span id="cb110-498"><a href="#cb110-498" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-499"><a href="#cb110-499" aria-hidden="true" tabindex="-1"></a>At this stage in our data cleaning and EDA workflow, we've achieved quite a lot: we've identified how our data is structured, come to terms with what information it encodes, and gained insight as to how it was generated. Throughout this process, we should always recall the original intent of our work in Data Science – to use data to better understand and model the real world. To achieve this goal, we need to ensure that the data we use is faithful to reality; that is, that our data accurately captures the "real world."</span>
-<span id="cb110-500"><a href="#cb110-500" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-501"><a href="#cb110-501" aria-hidden="true" tabindex="-1"></a>Data used in research or industry is often "messy" – there may be errors or inaccuracies that impact the faithfulness of the dataset. Signs that data may not be faithful include:</span>
-<span id="cb110-502"><a href="#cb110-502" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-503"><a href="#cb110-503" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Unrealistic or "incorrect" values, such as negative counts, locations that don't exist, or dates set in the future</span>
-<span id="cb110-504"><a href="#cb110-504" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Violations of obvious dependencies, like an age that does not match a birthday</span>
-<span id="cb110-505"><a href="#cb110-505" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Clear signs that data was entered by hand, which can lead to spelling errors or fields that are incorrectly shifted</span>
-<span id="cb110-506"><a href="#cb110-506" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Signs of data falsification, such as fake email addresses or repeated use of the same names</span>
-<span id="cb110-507"><a href="#cb110-507" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Duplicated records or fields containing the same information</span>
-<span id="cb110-508"><a href="#cb110-508" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Truncated data, e.g. Microsoft Excel would limit the number of rows to 655536 and the number of columns to 255</span>
-<span id="cb110-509"><a href="#cb110-509" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-510"><a href="#cb110-510" aria-hidden="true" tabindex="-1"></a>We often solve some of these more common issues in the following ways: </span>
-<span id="cb110-511"><a href="#cb110-511" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-512"><a href="#cb110-512" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Spelling errors: apply corrections or drop records that aren't in a dictionary</span>
-<span id="cb110-513"><a href="#cb110-513" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Time zone inconsistencies: convert to a common time zone (e.g. UTC) </span>
-<span id="cb110-514"><a href="#cb110-514" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Duplicated records or fields: identify and eliminate duplicates (using primary keys)</span>
-<span id="cb110-515"><a href="#cb110-515" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Unspecified or inconsistent units: infer the units and check that values are in reasonable ranges in the data</span>
-<span id="cb110-516"><a href="#cb110-516" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-517"><a href="#cb110-517" aria-hidden="true" tabindex="-1"></a><span class="fu">### Missing Values</span></span>
-<span id="cb110-518"><a href="#cb110-518" aria-hidden="true" tabindex="-1"></a>Another common issue encountered with real-world datasets is that of missing data. One strategy to resolve this is to simply drop any records with missing values from the dataset. This does, however, introduce the risk of inducing biases – it is possible that the missing or corrupt records may be systemically related to some feature of interest in the data. Another solution is to keep the data as <span class="in">`NaN`</span> values. </span>
-<span id="cb110-519"><a href="#cb110-519" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-520"><a href="#cb110-520" aria-hidden="true" tabindex="-1"></a>A third method to address missing data is to perform **imputation**: infer the missing values using other data available in the dataset. There is a wide variety of imputation techniques that can be implemented; some of the most common are listed below.</span>
-<span id="cb110-521"><a href="#cb110-521" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-522"><a href="#cb110-522" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Average imputation: replace missing values with the average value for that field</span>
-<span id="cb110-523"><a href="#cb110-523" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Hot deck imputation: replace missing values with some random value</span>
-<span id="cb110-524"><a href="#cb110-524" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Regression imputation: develop a model to predict missing values and replace with the predicted value from the model.</span>
-<span id="cb110-525"><a href="#cb110-525" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Multiple imputation: replace missing values with multiple random values</span>
-<span id="cb110-526"><a href="#cb110-526" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-527"><a href="#cb110-527" aria-hidden="true" tabindex="-1"></a>Regardless of the strategy used to deal with missing data, we should think carefully about *why* particular records or fields may be missing – this can help inform whether or not the absence of these values is significant or meaningful.</span>
-<span id="cb110-528"><a href="#cb110-528" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-529"><a href="#cb110-529" aria-hidden="true" tabindex="-1"></a><span class="fu">## EDA Demo 1: Tuberculosis in the United States</span></span>
-<span id="cb110-530"><a href="#cb110-530" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-531"><a href="#cb110-531" aria-hidden="true" tabindex="-1"></a>Now, let's walk through the data-cleaning and EDA workflow to see what can we learn about the presence of Tuberculosis in the United States!</span>
-<span id="cb110-532"><a href="#cb110-532" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-533"><a href="#cb110-533" aria-hidden="true" tabindex="-1"></a>We will examine the data included in the <span class="co">[</span><span class="ot">original CDC article</span><span class="co">](https://www.cdc.gov/mmwr/volumes/71/wr/mm7112a1.htm?s_cid=mm7112a1_w#T1_down)</span> published in 2021.</span>
-<span id="cb110-534"><a href="#cb110-534" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-535"><a href="#cb110-535" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-536"><a href="#cb110-536" aria-hidden="true" tabindex="-1"></a><span class="fu">### CSVs and Field Names</span></span>
-<span id="cb110-537"><a href="#cb110-537" aria-hidden="true" tabindex="-1"></a>Suppose Table 1 was saved as a CSV file located in <span class="in">`data/cdc_tuberculosis.csv`</span>.</span>
-<span id="cb110-538"><a href="#cb110-538" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-539"><a href="#cb110-539" aria-hidden="true" tabindex="-1"></a>We can then explore the CSV (which is a text file, and does not contain binary-encoded data) in many ways:</span>
-<span id="cb110-540"><a href="#cb110-540" aria-hidden="true" tabindex="-1"></a><span class="ss">1. </span>Using a text editor like emacs, vim, VSCode, etc.</span>
-<span id="cb110-541"><a href="#cb110-541" aria-hidden="true" tabindex="-1"></a><span class="ss">2. </span>Opening the CSV directly in DataHub (read-only), Excel, Google Sheets, etc.</span>
-<span id="cb110-542"><a href="#cb110-542" aria-hidden="true" tabindex="-1"></a><span class="ss">3. </span>The <span class="in">`Python`</span> file object</span>
-<span id="cb110-543"><a href="#cb110-543" aria-hidden="true" tabindex="-1"></a><span class="ss">4. </span><span class="in">`pandas`</span>, using <span class="in">`pd.read_csv()`</span></span>
-<span id="cb110-544"><a href="#cb110-544" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-545"><a href="#cb110-545" aria-hidden="true" tabindex="-1"></a>To try out options 1 and 2, you can view or download the Tuberculosis from the <span class="co">[</span><span class="ot">lecture demo notebook</span><span class="co">](https://data100.datahub.berkeley.edu/hub/user-redirect/git-pull?repo=https%3A%2F%2Fgithub.com%2FDS-100%2Ffa23-student&amp;urlpath=lab%2Ftree%2Ffa23-student%2Flecture%2Flec05%2Flec04-eda.ipynb&amp;branch=main)</span> under the <span class="in">`data`</span> folder in the left hand menu. Notice how the CSV file is a type of **rectangular data (i.e., tabular data) stored as comma-separated values**.</span>
-<span id="cb110-546"><a href="#cb110-546" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-547"><a href="#cb110-547" aria-hidden="true" tabindex="-1"></a>Next, let's try out option 3 using the <span class="in">`Python`</span> file object. We'll look at the first four lines:</span>
-<span id="cb110-548"><a href="#cb110-548" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-551"><a href="#cb110-551" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-552"><a href="#cb110-552" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
-<span id="cb110-553"><a href="#cb110-553" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> <span class="bu">open</span>(<span class="st">"data/cdc_tuberculosis.csv"</span>, <span class="st">"r"</span>) <span class="im">as</span> f:</span>
-<span id="cb110-554"><a href="#cb110-554" aria-hidden="true" tabindex="-1"></a>    i <span class="op">=</span> <span class="dv">0</span></span>
-<span id="cb110-555"><a href="#cb110-555" aria-hidden="true" tabindex="-1"></a>    <span class="cf">for</span> row <span class="kw">in</span> f:</span>
-<span id="cb110-556"><a href="#cb110-556" aria-hidden="true" tabindex="-1"></a>        <span class="bu">print</span>(row)</span>
-<span id="cb110-557"><a href="#cb110-557" aria-hidden="true" tabindex="-1"></a>        i <span class="op">+=</span> <span class="dv">1</span></span>
-<span id="cb110-558"><a href="#cb110-558" aria-hidden="true" tabindex="-1"></a>        <span class="cf">if</span> i <span class="op">&gt;</span> <span class="dv">3</span>:</span>
-<span id="cb110-559"><a href="#cb110-559" aria-hidden="true" tabindex="-1"></a>            <span class="cf">break</span></span>
-<span id="cb110-560"><a href="#cb110-560" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-561"><a href="#cb110-561" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-562"><a href="#cb110-562" aria-hidden="true" tabindex="-1"></a>Whoa, why are there blank lines interspaced between the lines of the CSV?</span>
-<span id="cb110-563"><a href="#cb110-563" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-564"><a href="#cb110-564" aria-hidden="true" tabindex="-1"></a>You may recall that all line breaks in text files are encoded as the special newline character <span class="in">`\n`</span>. Python's <span class="in">`print()`</span> prints each string (including the newline), and an additional newline on top of that.</span>
-<span id="cb110-565"><a href="#cb110-565" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-566"><a href="#cb110-566" aria-hidden="true" tabindex="-1"></a>If you're curious, we can use the <span class="in">`repr()`</span> function to return the raw string with all special characters:</span>
-<span id="cb110-567"><a href="#cb110-567" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-570"><a href="#cb110-570" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-571"><a href="#cb110-571" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
-<span id="cb110-572"><a href="#cb110-572" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> <span class="bu">open</span>(<span class="st">"data/cdc_tuberculosis.csv"</span>, <span class="st">"r"</span>) <span class="im">as</span> f:</span>
-<span id="cb110-573"><a href="#cb110-573" aria-hidden="true" tabindex="-1"></a>    i <span class="op">=</span> <span class="dv">0</span></span>
-<span id="cb110-574"><a href="#cb110-574" aria-hidden="true" tabindex="-1"></a>    <span class="cf">for</span> row <span class="kw">in</span> f:</span>
-<span id="cb110-575"><a href="#cb110-575" aria-hidden="true" tabindex="-1"></a>        <span class="bu">print</span>(<span class="bu">repr</span>(row)) <span class="co"># print raw strings</span></span>
-<span id="cb110-576"><a href="#cb110-576" aria-hidden="true" tabindex="-1"></a>        i <span class="op">+=</span> <span class="dv">1</span></span>
-<span id="cb110-577"><a href="#cb110-577" aria-hidden="true" tabindex="-1"></a>        <span class="cf">if</span> i <span class="op">&gt;</span> <span class="dv">3</span>:</span>
-<span id="cb110-578"><a href="#cb110-578" aria-hidden="true" tabindex="-1"></a>            <span class="cf">break</span></span>
-<span id="cb110-579"><a href="#cb110-579" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-580"><a href="#cb110-580" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-581"><a href="#cb110-581" aria-hidden="true" tabindex="-1"></a>Finally, let's try option 4 and use the tried-and-true Data 100 approach: <span class="in">`pandas`</span>.</span>
-<span id="cb110-582"><a href="#cb110-582" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-585"><a href="#cb110-585" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-586"><a href="#cb110-586" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-587"><a href="#cb110-587" aria-hidden="true" tabindex="-1"></a>tb_df <span class="op">=</span> pd.read_csv(<span class="st">"data/cdc_tuberculosis.csv"</span>)</span>
-<span id="cb110-588"><a href="#cb110-588" aria-hidden="true" tabindex="-1"></a>tb_df.head()</span>
-<span id="cb110-589"><a href="#cb110-589" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-590"><a href="#cb110-590" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-591"><a href="#cb110-591" aria-hidden="true" tabindex="-1"></a>You may notice some strange things about this table: what's up with the "Unnamed" column names and the first row? </span>
-<span id="cb110-592"><a href="#cb110-592" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-593"><a href="#cb110-593" aria-hidden="true" tabindex="-1"></a>Congratulations — you're ready to wrangle your data! Because of how things are stored, we'll need to clean the data a bit to name our columns better.</span>
-<span id="cb110-594"><a href="#cb110-594" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-595"><a href="#cb110-595" aria-hidden="true" tabindex="-1"></a>A reasonable first step is to identify the row with the right header. The <span class="in">`pd.read_csv()`</span> function (<span class="co">[</span><span class="ot">documentation</span><span class="co">](https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html)</span>) has the convenient <span class="in">`header`</span> parameter that we can set to use the elements in row 1 as the appropriate columns:</span>
-<span id="cb110-596"><a href="#cb110-596" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-599"><a href="#cb110-599" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-600"><a href="#cb110-600" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-601"><a href="#cb110-601" aria-hidden="true" tabindex="-1"></a>tb_df <span class="op">=</span> pd.read_csv(<span class="st">"data/cdc_tuberculosis.csv"</span>, header<span class="op">=</span><span class="dv">1</span>) <span class="co"># row index</span></span>
-<span id="cb110-602"><a href="#cb110-602" aria-hidden="true" tabindex="-1"></a>tb_df.head(<span class="dv">5</span>)</span>
-<span id="cb110-603"><a href="#cb110-603" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-604"><a href="#cb110-604" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-605"><a href="#cb110-605" aria-hidden="true" tabindex="-1"></a>Wait...but now we can't differentiate betwen the "Number of TB cases" and "TB incidence" year columns. <span class="in">`pandas`</span> has tried to make our lives easier by automatically adding ".1" to the latter columns, but this doesn't help us, as humans, understand the data.</span>
-<span id="cb110-606"><a href="#cb110-606" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-607"><a href="#cb110-607" aria-hidden="true" tabindex="-1"></a>We can do this manually with <span class="in">`df.rename()`</span> (<span class="co">[</span><span class="ot">documentation</span><span class="co">](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.rename.html?highlight=rename#pandas.DataFrame.rename)</span>):</span>
-<span id="cb110-608"><a href="#cb110-608" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-611"><a href="#cb110-611" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-612"><a href="#cb110-612" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-613"><a href="#cb110-613" aria-hidden="true" tabindex="-1"></a>rename_dict <span class="op">=</span> {<span class="st">'2019'</span>: <span class="st">'TB cases 2019'</span>,</span>
-<span id="cb110-614"><a href="#cb110-614" aria-hidden="true" tabindex="-1"></a>               <span class="st">'2020'</span>: <span class="st">'TB cases 2020'</span>,</span>
-<span id="cb110-615"><a href="#cb110-615" aria-hidden="true" tabindex="-1"></a>               <span class="st">'2021'</span>: <span class="st">'TB cases 2021'</span>,</span>
-<span id="cb110-616"><a href="#cb110-616" aria-hidden="true" tabindex="-1"></a>               <span class="st">'2019.1'</span>: <span class="st">'TB incidence 2019'</span>,</span>
-<span id="cb110-617"><a href="#cb110-617" aria-hidden="true" tabindex="-1"></a>               <span class="st">'2020.1'</span>: <span class="st">'TB incidence 2020'</span>,</span>
-<span id="cb110-618"><a href="#cb110-618" aria-hidden="true" tabindex="-1"></a>               <span class="st">'2021.1'</span>: <span class="st">'TB incidence 2021'</span>}</span>
-<span id="cb110-619"><a href="#cb110-619" aria-hidden="true" tabindex="-1"></a>tb_df <span class="op">=</span> tb_df.rename(columns<span class="op">=</span>rename_dict)</span>
-<span id="cb110-620"><a href="#cb110-620" aria-hidden="true" tabindex="-1"></a>tb_df.head(<span class="dv">5</span>)</span>
-<span id="cb110-621"><a href="#cb110-621" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-622"><a href="#cb110-622" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-623"><a href="#cb110-623" aria-hidden="true" tabindex="-1"></a><span class="fu">### Record Granularity</span></span>
-<span id="cb110-624"><a href="#cb110-624" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-625"><a href="#cb110-625" aria-hidden="true" tabindex="-1"></a>You might already be wondering: what's up with that first record?</span>
-<span id="cb110-626"><a href="#cb110-626" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-627"><a href="#cb110-627" aria-hidden="true" tabindex="-1"></a>Row 0 is what we call a **rollup record**, or summary record. It's often useful when displaying tables to humans. The **granularity** of record 0 (Totals) vs the rest of the records (States) is different.</span>
-<span id="cb110-628"><a href="#cb110-628" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-629"><a href="#cb110-629" aria-hidden="true" tabindex="-1"></a>Okay, EDA step two. How was the rollup record aggregated?</span>
-<span id="cb110-630"><a href="#cb110-630" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-631"><a href="#cb110-631" aria-hidden="true" tabindex="-1"></a>Let's check if Total TB cases is the sum of all state TB cases. If we sum over all rows, we should get **2x** the total cases in each of our TB cases by year (why do you think this is?).</span>
-<span id="cb110-632"><a href="#cb110-632" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-635"><a href="#cb110-635" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-636"><a href="#cb110-636" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
-<span id="cb110-637"><a href="#cb110-637" aria-hidden="true" tabindex="-1"></a>tb_df.<span class="bu">sum</span>(axis<span class="op">=</span><span class="dv">0</span>)</span>
-<span id="cb110-638"><a href="#cb110-638" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-639"><a href="#cb110-639" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-640"><a href="#cb110-640" aria-hidden="true" tabindex="-1"></a>Whoa, what's going on with the TB cases in 2019, 2020, and 2021? Check out the column types:</span>
-<span id="cb110-641"><a href="#cb110-641" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-644"><a href="#cb110-644" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-645"><a href="#cb110-645" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
-<span id="cb110-646"><a href="#cb110-646" aria-hidden="true" tabindex="-1"></a>tb_df.dtypes</span>
-<span id="cb110-647"><a href="#cb110-647" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-648"><a href="#cb110-648" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-649"><a href="#cb110-649" aria-hidden="true" tabindex="-1"></a>Since there are commas in the values for TB cases, the numbers are read as the <span class="in">`object`</span> datatype, or **storage type** (close to the <span class="in">`Python`</span> string datatype), so <span class="in">`pandas`</span> is concatenating strings instead of adding integers (recall that Python can "sum", or concatenate, strings together: <span class="in">`"data" + "100"`</span> evaluates to <span class="in">`"data100"`</span>). </span>
-<span id="cb110-650"><a href="#cb110-650" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-651"><a href="#cb110-651" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-652"><a href="#cb110-652" aria-hidden="true" tabindex="-1"></a>Fortunately <span class="in">`read_csv`</span> also has a <span class="in">`thousands`</span> parameter (<span class="co">[</span><span class="ot">documentation</span><span class="co">](https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html)</span>):</span>
-<span id="cb110-653"><a href="#cb110-653" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-656"><a href="#cb110-656" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-657"><a href="#cb110-657" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-658"><a href="#cb110-658" aria-hidden="true" tabindex="-1"></a><span class="co"># improve readability: chaining method calls with outer parentheses/line breaks</span></span>
-<span id="cb110-659"><a href="#cb110-659" aria-hidden="true" tabindex="-1"></a>tb_df <span class="op">=</span> (</span>
-<span id="cb110-660"><a href="#cb110-660" aria-hidden="true" tabindex="-1"></a>    pd.read_csv(<span class="st">"data/cdc_tuberculosis.csv"</span>, header<span class="op">=</span><span class="dv">1</span>, thousands<span class="op">=</span><span class="st">','</span>)</span>
-<span id="cb110-661"><a href="#cb110-661" aria-hidden="true" tabindex="-1"></a>    .rename(columns<span class="op">=</span>rename_dict)</span>
-<span id="cb110-662"><a href="#cb110-662" aria-hidden="true" tabindex="-1"></a>)</span>
-<span id="cb110-663"><a href="#cb110-663" aria-hidden="true" tabindex="-1"></a>tb_df.head(<span class="dv">5</span>)</span>
-<span id="cb110-664"><a href="#cb110-664" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-665"><a href="#cb110-665" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-668"><a href="#cb110-668" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-669"><a href="#cb110-669" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-670"><a href="#cb110-670" aria-hidden="true" tabindex="-1"></a>tb_df.<span class="bu">sum</span>()</span>
-<span id="cb110-671"><a href="#cb110-671" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-672"><a href="#cb110-672" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-673"><a href="#cb110-673" aria-hidden="true" tabindex="-1"></a>The total TB cases look right. Phew!</span>
-<span id="cb110-674"><a href="#cb110-674" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-675"><a href="#cb110-675" aria-hidden="true" tabindex="-1"></a>Let's just look at the records with **state-level granularity**:</span>
-<span id="cb110-676"><a href="#cb110-676" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-679"><a href="#cb110-679" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-680"><a href="#cb110-680" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
-<span id="cb110-681"><a href="#cb110-681" aria-hidden="true" tabindex="-1"></a>state_tb_df <span class="op">=</span> tb_df[<span class="dv">1</span>:]</span>
-<span id="cb110-682"><a href="#cb110-682" aria-hidden="true" tabindex="-1"></a>state_tb_df.head(<span class="dv">5</span>)</span>
-<span id="cb110-683"><a href="#cb110-683" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-684"><a href="#cb110-684" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-685"><a href="#cb110-685" aria-hidden="true" tabindex="-1"></a><span class="fu">### Gather Census Data</span></span>
-<span id="cb110-686"><a href="#cb110-686" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-687"><a href="#cb110-687" aria-hidden="true" tabindex="-1"></a>U.S. Census population estimates <span class="co">[</span><span class="ot">source</span><span class="co">](https://www.census.gov/data/tables/time-series/demo/popest/2010s-state-total.html)</span> (2019), <span class="co">[</span><span class="ot">source</span><span class="co">](https://www.census.gov/data/tables/time-series/demo/popest/2020s-state-total.html)</span> (2020-2021).</span>
-<span id="cb110-688"><a href="#cb110-688" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-689"><a href="#cb110-689" aria-hidden="true" tabindex="-1"></a>Running the below cells cleans the data.</span>
-<span id="cb110-690"><a href="#cb110-690" aria-hidden="true" tabindex="-1"></a>There are a few new methods here:</span>
-<span id="cb110-691"><a href="#cb110-691" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span><span class="in">`df.convert_dtypes()`</span> (<span class="co">[</span><span class="ot">documentation</span><span class="co">](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.convert_dtypes.html)</span>) conveniently converts all float dtypes into ints and is out of scope for the class.</span>
-<span id="cb110-692"><a href="#cb110-692" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span><span class="in">`df.drop_na()`</span> (<span class="co">[</span><span class="ot">documentation</span><span class="co">](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.dropna.html)</span>) will be explained in more detail next time.</span>
-<span id="cb110-693"><a href="#cb110-693" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-696"><a href="#cb110-696" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-697"><a href="#cb110-697" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
-<span id="cb110-698"><a href="#cb110-698" aria-hidden="true" tabindex="-1"></a><span class="co"># 2010s census data</span></span>
-<span id="cb110-699"><a href="#cb110-699" aria-hidden="true" tabindex="-1"></a>census_2010s_df <span class="op">=</span> pd.read_csv(<span class="st">"data/nst-est2019-01.csv"</span>, header<span class="op">=</span><span class="dv">3</span>, thousands<span class="op">=</span><span class="st">","</span>)</span>
-<span id="cb110-700"><a href="#cb110-700" aria-hidden="true" tabindex="-1"></a>census_2010s_df <span class="op">=</span> (</span>
-<span id="cb110-701"><a href="#cb110-701" aria-hidden="true" tabindex="-1"></a>    census_2010s_df</span>
-<span id="cb110-702"><a href="#cb110-702" aria-hidden="true" tabindex="-1"></a>    .reset_index()</span>
-<span id="cb110-703"><a href="#cb110-703" aria-hidden="true" tabindex="-1"></a>    .drop(columns<span class="op">=</span>[<span class="st">"index"</span>, <span class="st">"Census"</span>, <span class="st">"Estimates Base"</span>])</span>
-<span id="cb110-704"><a href="#cb110-704" aria-hidden="true" tabindex="-1"></a>    .rename(columns<span class="op">=</span>{<span class="st">"Unnamed: 0"</span>: <span class="st">"Geographic Area"</span>})</span>
-<span id="cb110-705"><a href="#cb110-705" aria-hidden="true" tabindex="-1"></a>    .convert_dtypes()                 <span class="co"># "smart" converting of columns, use at your own risk</span></span>
-<span id="cb110-706"><a href="#cb110-706" aria-hidden="true" tabindex="-1"></a>    .dropna()                         <span class="co"># we'll introduce this next time</span></span>
-<span id="cb110-707"><a href="#cb110-707" aria-hidden="true" tabindex="-1"></a>)</span>
-<span id="cb110-708"><a href="#cb110-708" aria-hidden="true" tabindex="-1"></a>census_2010s_df[<span class="st">'Geographic Area'</span>] <span class="op">=</span> census_2010s_df[<span class="st">'Geographic Area'</span>].<span class="bu">str</span>.strip(<span class="st">'.'</span>)</span>
-<span id="cb110-709"><a href="#cb110-709" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-710"><a href="#cb110-710" aria-hidden="true" tabindex="-1"></a><span class="co"># with pd.option_context('display.min_rows', 30): # shows more rows</span></span>
-<span id="cb110-711"><a href="#cb110-711" aria-hidden="true" tabindex="-1"></a><span class="co">#     display(census_2010s_df)</span></span>
-<span id="cb110-712"><a href="#cb110-712" aria-hidden="true" tabindex="-1"></a>    </span>
-<span id="cb110-713"><a href="#cb110-713" aria-hidden="true" tabindex="-1"></a>census_2010s_df.head(<span class="dv">5</span>)</span>
-<span id="cb110-714"><a href="#cb110-714" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-715"><a href="#cb110-715" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-716"><a href="#cb110-716" aria-hidden="true" tabindex="-1"></a>Occasionally, you will want to modify code that you have imported.  To reimport those modifications you can either use <span class="in">`python`</span>'s <span class="in">`importlib`</span> library:</span>
-<span id="cb110-717"><a href="#cb110-717" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-718"><a href="#cb110-718" aria-hidden="true" tabindex="-1"></a><span class="in">```python</span></span>
-<span id="cb110-719"><a href="#cb110-719" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> importlib <span class="im">import</span> <span class="bu">reload</span></span>
-<span id="cb110-720"><a href="#cb110-720" aria-hidden="true" tabindex="-1"></a><span class="bu">reload</span>(utils)</span>
-<span id="cb110-721"><a href="#cb110-721" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-722"><a href="#cb110-722" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-723"><a href="#cb110-723" aria-hidden="true" tabindex="-1"></a>or use <span class="in">`iPython`</span> magic which will intelligently import code when files change:</span>
-<span id="cb110-724"><a href="#cb110-724" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-725"><a href="#cb110-725" aria-hidden="true" tabindex="-1"></a><span class="in">```python</span></span>
-<span id="cb110-726"><a href="#cb110-726" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>load_ext autoreload</span>
-<span id="cb110-727"><a href="#cb110-727" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>autoreload <span class="dv">2</span></span>
-<span id="cb110-728"><a href="#cb110-728" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-729"><a href="#cb110-729" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-732"><a href="#cb110-732" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-733"><a href="#cb110-733" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
-<span id="cb110-734"><a href="#cb110-734" aria-hidden="true" tabindex="-1"></a><span class="co"># census 2020s data</span></span>
-<span id="cb110-735"><a href="#cb110-735" aria-hidden="true" tabindex="-1"></a>census_2020s_df <span class="op">=</span> pd.read_csv(<span class="st">"data/NST-EST2022-POP.csv"</span>, header<span class="op">=</span><span class="dv">3</span>, thousands<span class="op">=</span><span class="st">","</span>)</span>
-<span id="cb110-736"><a href="#cb110-736" aria-hidden="true" tabindex="-1"></a>census_2020s_df <span class="op">=</span> (</span>
-<span id="cb110-737"><a href="#cb110-737" aria-hidden="true" tabindex="-1"></a>    census_2020s_df</span>
-<span id="cb110-738"><a href="#cb110-738" aria-hidden="true" tabindex="-1"></a>    .reset_index()</span>
-<span id="cb110-739"><a href="#cb110-739" aria-hidden="true" tabindex="-1"></a>    .drop(columns<span class="op">=</span>[<span class="st">"index"</span>, <span class="st">"Unnamed: 1"</span>])</span>
-<span id="cb110-740"><a href="#cb110-740" aria-hidden="true" tabindex="-1"></a>    .rename(columns<span class="op">=</span>{<span class="st">"Unnamed: 0"</span>: <span class="st">"Geographic Area"</span>})</span>
-<span id="cb110-741"><a href="#cb110-741" aria-hidden="true" tabindex="-1"></a>    .convert_dtypes()                 <span class="co"># "smart" converting of columns, use at your own risk</span></span>
-<span id="cb110-742"><a href="#cb110-742" aria-hidden="true" tabindex="-1"></a>    .dropna()                         <span class="co"># we'll introduce this next time</span></span>
-<span id="cb110-743"><a href="#cb110-743" aria-hidden="true" tabindex="-1"></a>)</span>
-<span id="cb110-744"><a href="#cb110-744" aria-hidden="true" tabindex="-1"></a>census_2020s_df[<span class="st">'Geographic Area'</span>] <span class="op">=</span> census_2020s_df[<span class="st">'Geographic Area'</span>].<span class="bu">str</span>.strip(<span class="st">'.'</span>)</span>
-<span id="cb110-745"><a href="#cb110-745" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-746"><a href="#cb110-746" aria-hidden="true" tabindex="-1"></a>census_2020s_df.head(<span class="dv">5</span>)</span>
-<span id="cb110-747"><a href="#cb110-747" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-748"><a href="#cb110-748" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-749"><a href="#cb110-749" aria-hidden="true" tabindex="-1"></a><span class="fu">### Joining Data (Merging `DataFrame`s)</span></span>
-<span id="cb110-750"><a href="#cb110-750" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-751"><a href="#cb110-751" aria-hidden="true" tabindex="-1"></a>Time to <span class="in">`merge`</span>! Here we use the <span class="in">`DataFrame`</span> method <span class="in">`df1.merge(right=df2, ...)`</span> on <span class="in">`DataFrame`</span> <span class="in">`df1`</span> (<span class="co">[</span><span class="ot">documentation</span><span class="co">](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.merge.html)</span>). Contrast this with the function <span class="in">`pd.merge(left=df1, right=df2, ...)`</span> (<span class="co">[</span><span class="ot">documentation</span><span class="co">](https://pandas.pydata.org/docs/reference/api/pandas.merge.html?highlight=pandas%20merge#pandas.merge)</span>). Feel free to use either.</span>
-<span id="cb110-752"><a href="#cb110-752" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-755"><a href="#cb110-755" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-756"><a href="#cb110-756" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-757"><a href="#cb110-757" aria-hidden="true" tabindex="-1"></a><span class="co"># merge TB DataFrame with two US census DataFrames</span></span>
-<span id="cb110-758"><a href="#cb110-758" aria-hidden="true" tabindex="-1"></a>tb_census_df <span class="op">=</span> (</span>
-<span id="cb110-759"><a href="#cb110-759" aria-hidden="true" tabindex="-1"></a>    tb_df</span>
-<span id="cb110-760"><a href="#cb110-760" aria-hidden="true" tabindex="-1"></a>    .merge(right<span class="op">=</span>census_2010s_df,</span>
-<span id="cb110-761"><a href="#cb110-761" aria-hidden="true" tabindex="-1"></a>           left_on<span class="op">=</span><span class="st">"U.S. jurisdiction"</span>, right_on<span class="op">=</span><span class="st">"Geographic Area"</span>)</span>
-<span id="cb110-762"><a href="#cb110-762" aria-hidden="true" tabindex="-1"></a>    .merge(right<span class="op">=</span>census_2020s_df,</span>
-<span id="cb110-763"><a href="#cb110-763" aria-hidden="true" tabindex="-1"></a>           left_on<span class="op">=</span><span class="st">"U.S. jurisdiction"</span>, right_on<span class="op">=</span><span class="st">"Geographic Area"</span>)</span>
-<span id="cb110-764"><a href="#cb110-764" aria-hidden="true" tabindex="-1"></a>)</span>
-<span id="cb110-765"><a href="#cb110-765" aria-hidden="true" tabindex="-1"></a>tb_census_df.head(<span class="dv">5</span>)</span>
-<span id="cb110-766"><a href="#cb110-766" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-767"><a href="#cb110-767" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-768"><a href="#cb110-768" aria-hidden="true" tabindex="-1"></a>Having all of these columns is a little unwieldy. We could either drop the unneeded columns now, or just merge on smaller census <span class="in">`DataFrame`</span>s. Let's do the latter.</span>
-<span id="cb110-769"><a href="#cb110-769" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-772"><a href="#cb110-772" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-773"><a href="#cb110-773" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-774"><a href="#cb110-774" aria-hidden="true" tabindex="-1"></a><span class="co"># try merging again, but cleaner this time</span></span>
-<span id="cb110-775"><a href="#cb110-775" aria-hidden="true" tabindex="-1"></a>tb_census_df <span class="op">=</span> (</span>
-<span id="cb110-776"><a href="#cb110-776" aria-hidden="true" tabindex="-1"></a>    tb_df</span>
-<span id="cb110-777"><a href="#cb110-777" aria-hidden="true" tabindex="-1"></a>    .merge(right<span class="op">=</span>census_2010s_df[[<span class="st">"Geographic Area"</span>, <span class="st">"2019"</span>]],</span>
-<span id="cb110-778"><a href="#cb110-778" aria-hidden="true" tabindex="-1"></a>           left_on<span class="op">=</span><span class="st">"U.S. jurisdiction"</span>, right_on<span class="op">=</span><span class="st">"Geographic Area"</span>)</span>
-<span id="cb110-779"><a href="#cb110-779" aria-hidden="true" tabindex="-1"></a>    .drop(columns<span class="op">=</span><span class="st">"Geographic Area"</span>)</span>
-<span id="cb110-780"><a href="#cb110-780" aria-hidden="true" tabindex="-1"></a>    .merge(right<span class="op">=</span>census_2020s_df[[<span class="st">"Geographic Area"</span>, <span class="st">"2020"</span>, <span class="st">"2021"</span>]],</span>
-<span id="cb110-781"><a href="#cb110-781" aria-hidden="true" tabindex="-1"></a>           left_on<span class="op">=</span><span class="st">"U.S. jurisdiction"</span>, right_on<span class="op">=</span><span class="st">"Geographic Area"</span>)</span>
-<span id="cb110-782"><a href="#cb110-782" aria-hidden="true" tabindex="-1"></a>    .drop(columns<span class="op">=</span><span class="st">"Geographic Area"</span>)</span>
-<span id="cb110-783"><a href="#cb110-783" aria-hidden="true" tabindex="-1"></a>)</span>
-<span id="cb110-784"><a href="#cb110-784" aria-hidden="true" tabindex="-1"></a>tb_census_df.head(<span class="dv">5</span>)</span>
-<span id="cb110-785"><a href="#cb110-785" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-786"><a href="#cb110-786" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-787"><a href="#cb110-787" aria-hidden="true" tabindex="-1"></a><span class="fu">### Reproducing Data: Compute Incidence</span></span>
-<span id="cb110-788"><a href="#cb110-788" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-789"><a href="#cb110-789" aria-hidden="true" tabindex="-1"></a>Let's recompute incidence to make sure we know where the original CDC numbers came from.</span>
-<span id="cb110-790"><a href="#cb110-790" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-791"><a href="#cb110-791" aria-hidden="true" tabindex="-1"></a>From the <span class="co">[</span><span class="ot">CDC report</span><span class="co">](https://www.cdc.gov/mmwr/volumes/71/wr/mm7112a1.htm?s_cid=mm7112a1_w#T1_down)</span>: TB incidence is computed as “Cases per 100,000 persons using mid-year population estimates from the U.S. Census Bureau.”</span>
-<span id="cb110-792"><a href="#cb110-792" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-793"><a href="#cb110-793" aria-hidden="true" tabindex="-1"></a>If we define a group as 100,000 people, then we can compute the TB incidence for a given state population as</span>
-<span id="cb110-794"><a href="#cb110-794" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-795"><a href="#cb110-795" aria-hidden="true" tabindex="-1"></a>$$\text{TB incidence} = \frac{\text{TB cases in population}}{\text{groups in population}} = \frac{\text{TB cases in population}}{\text{population}/100000} $$</span>
-<span id="cb110-796"><a href="#cb110-796" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-797"><a href="#cb110-797" aria-hidden="true" tabindex="-1"></a>$$= \frac{\text{TB cases in population}}{\text{population}} \times 100000$$</span>
-<span id="cb110-798"><a href="#cb110-798" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-799"><a href="#cb110-799" aria-hidden="true" tabindex="-1"></a>Let's try this for 2019:</span>
-<span id="cb110-800"><a href="#cb110-800" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-803"><a href="#cb110-803" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-804"><a href="#cb110-804" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-805"><a href="#cb110-805" aria-hidden="true" tabindex="-1"></a>tb_census_df[<span class="st">"recompute incidence 2019"</span>] <span class="op">=</span> tb_census_df[<span class="st">"TB cases 2019"</span>]<span class="op">/</span>tb_census_df[<span class="st">"2019"</span>]<span class="op">*</span><span class="dv">100000</span></span>
-<span id="cb110-806"><a href="#cb110-806" aria-hidden="true" tabindex="-1"></a>tb_census_df.head(<span class="dv">5</span>)</span>
-<span id="cb110-807"><a href="#cb110-807" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-808"><a href="#cb110-808" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-809"><a href="#cb110-809" aria-hidden="true" tabindex="-1"></a>Awesome!!!</span>
-<span id="cb110-810"><a href="#cb110-810" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-811"><a href="#cb110-811" aria-hidden="true" tabindex="-1"></a>Let's use a for-loop and Python format strings to compute TB incidence for all years. Python f-strings are just used for the purposes of this demo, but they're handy to know when you explore data beyond this course (<span class="co">[</span><span class="ot">documentation</span><span class="co">](https://docs.python.org/3/tutorial/inputoutput.html)</span>).</span>
-<span id="cb110-812"><a href="#cb110-812" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-815"><a href="#cb110-815" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-816"><a href="#cb110-816" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-817"><a href="#cb110-817" aria-hidden="true" tabindex="-1"></a><span class="co"># recompute incidence for all years</span></span>
-<span id="cb110-818"><a href="#cb110-818" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> year <span class="kw">in</span> [<span class="dv">2019</span>, <span class="dv">2020</span>, <span class="dv">2021</span>]:</span>
-<span id="cb110-819"><a href="#cb110-819" aria-hidden="true" tabindex="-1"></a>    tb_census_df[<span class="ss">f"recompute incidence </span><span class="sc">{</span>year<span class="sc">}</span><span class="ss">"</span>] <span class="op">=</span> tb_census_df[<span class="ss">f"TB cases </span><span class="sc">{</span>year<span class="sc">}</span><span class="ss">"</span>]<span class="op">/</span>tb_census_df[<span class="ss">f"</span><span class="sc">{</span>year<span class="sc">}</span><span class="ss">"</span>]<span class="op">*</span><span class="dv">100000</span></span>
-<span id="cb110-820"><a href="#cb110-820" aria-hidden="true" tabindex="-1"></a>tb_census_df.head(<span class="dv">5</span>)</span>
-<span id="cb110-821"><a href="#cb110-821" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-822"><a href="#cb110-822" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-823"><a href="#cb110-823" aria-hidden="true" tabindex="-1"></a>These numbers look pretty close!!! There are a few errors in the hundredths place, particularly in 2021. It may be useful to further explore reasons behind this discrepancy. </span>
-<span id="cb110-824"><a href="#cb110-824" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-827"><a href="#cb110-827" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-828"><a href="#cb110-828" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-829"><a href="#cb110-829" aria-hidden="true" tabindex="-1"></a>tb_census_df.describe()</span>
-<span id="cb110-830"><a href="#cb110-830" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-831"><a href="#cb110-831" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-832"><a href="#cb110-832" aria-hidden="true" tabindex="-1"></a><span class="fu">### Bonus EDA: Reproducing the Reported Statistic</span></span>
-<span id="cb110-833"><a href="#cb110-833" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-834"><a href="#cb110-834" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-835"><a href="#cb110-835" aria-hidden="true" tabindex="-1"></a>**How do we reproduce that reported statistic in the original [CDC report](https://www.cdc.gov/mmwr/volumes/71/wr/mm7112a1.htm?s_cid=mm7112a1_w)?**</span>
-<span id="cb110-836"><a href="#cb110-836" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-837"><a href="#cb110-837" aria-hidden="true" tabindex="-1"></a><span class="at">&gt; Reported TB incidence (cases per 100,000 persons) increased **9.4%**, from **2.2** during 2020 to **2.4** during 2021 but was lower than incidence during 2019 (2.7). Increases occurred among both U.S.-born and non–U.S.-born persons.</span></span>
-<span id="cb110-838"><a href="#cb110-838" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-839"><a href="#cb110-839" aria-hidden="true" tabindex="-1"></a>This is TB incidence computed across the entire U.S. population! How do we reproduce this?</span>
-<span id="cb110-840"><a href="#cb110-840" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>We need to reproduce the "Total" TB incidences in our rolled record.</span>
-<span id="cb110-841"><a href="#cb110-841" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>But our current <span class="in">`tb_census_df`</span> only has 51 entries (50 states plus Washington, D.C.). There is no rolled record.</span>
-<span id="cb110-842"><a href="#cb110-842" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>What happened...?</span>
-<span id="cb110-843"><a href="#cb110-843" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-844"><a href="#cb110-844" aria-hidden="true" tabindex="-1"></a>Let's get exploring!</span>
-<span id="cb110-845"><a href="#cb110-845" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-846"><a href="#cb110-846" aria-hidden="true" tabindex="-1"></a>Before we keep exploring, we'll set all indexes to more meaningful values, instead of just numbers that pertain to some row at some point. This will make our cleaning slightly easier.</span>
-<span id="cb110-847"><a href="#cb110-847" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-850"><a href="#cb110-850" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-851"><a href="#cb110-851" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
-<span id="cb110-852"><a href="#cb110-852" aria-hidden="true" tabindex="-1"></a>tb_df <span class="op">=</span> tb_df.set_index(<span class="st">"U.S. jurisdiction"</span>)</span>
-<span id="cb110-853"><a href="#cb110-853" aria-hidden="true" tabindex="-1"></a>tb_df.head(<span class="dv">5</span>)</span>
-<span id="cb110-854"><a href="#cb110-854" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-855"><a href="#cb110-855" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-858"><a href="#cb110-858" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-859"><a href="#cb110-859" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-860"><a href="#cb110-860" aria-hidden="true" tabindex="-1"></a>census_2010s_df <span class="op">=</span> census_2010s_df.set_index(<span class="st">"Geographic Area"</span>)</span>
-<span id="cb110-861"><a href="#cb110-861" aria-hidden="true" tabindex="-1"></a>census_2010s_df.head(<span class="dv">5</span>)</span>
-<span id="cb110-862"><a href="#cb110-862" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-863"><a href="#cb110-863" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-866"><a href="#cb110-866" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-867"><a href="#cb110-867" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-868"><a href="#cb110-868" aria-hidden="true" tabindex="-1"></a>census_2020s_df <span class="op">=</span> census_2020s_df.set_index(<span class="st">"Geographic Area"</span>)</span>
-<span id="cb110-869"><a href="#cb110-869" aria-hidden="true" tabindex="-1"></a>census_2020s_df.head(<span class="dv">5</span>)</span>
-<span id="cb110-870"><a href="#cb110-870" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-871"><a href="#cb110-871" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-872"><a href="#cb110-872" aria-hidden="true" tabindex="-1"></a>It turns out that our merge above only kept state records, even though our original <span class="in">`tb_df`</span> had the "Total" rolled record:</span>
-<span id="cb110-873"><a href="#cb110-873" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-876"><a href="#cb110-876" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-877"><a href="#cb110-877" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-878"><a href="#cb110-878" aria-hidden="true" tabindex="-1"></a>tb_df.head()</span>
-<span id="cb110-879"><a href="#cb110-879" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-880"><a href="#cb110-880" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-881"><a href="#cb110-881" aria-hidden="true" tabindex="-1"></a>Recall that <span class="in">`merge`</span> by default does an **inner** merge by default, meaning that it only preserves keys that are present in **both** <span class="in">`DataFrame`</span>s.</span>
-<span id="cb110-882"><a href="#cb110-882" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-883"><a href="#cb110-883" aria-hidden="true" tabindex="-1"></a>The rolled records in our census <span class="in">`DataFrame`</span> have different <span class="in">`Geographic Area`</span> fields, which was the key we merged on:</span>
-<span id="cb110-884"><a href="#cb110-884" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-887"><a href="#cb110-887" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-888"><a href="#cb110-888" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-889"><a href="#cb110-889" aria-hidden="true" tabindex="-1"></a>census_2010s_df.head(<span class="dv">5</span>)</span>
-<span id="cb110-890"><a href="#cb110-890" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-891"><a href="#cb110-891" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-892"><a href="#cb110-892" aria-hidden="true" tabindex="-1"></a>The Census <span class="in">`DataFrame`</span> has several rolled records. The aggregate record we are looking for actually has the Geographic Area named "United States".</span>
-<span id="cb110-893"><a href="#cb110-893" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-894"><a href="#cb110-894" aria-hidden="true" tabindex="-1"></a>One straightforward way to get the right merge is to rename the value itself. Because we now have the Geographic Area index, we'll use <span class="in">`df.rename()`</span> (<span class="co">[</span><span class="ot">documentation</span><span class="co">](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.rename.html)</span>):</span>
-<span id="cb110-895"><a href="#cb110-895" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-898"><a href="#cb110-898" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-899"><a href="#cb110-899" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-900"><a href="#cb110-900" aria-hidden="true" tabindex="-1"></a><span class="co"># rename rolled record for 2010s</span></span>
-<span id="cb110-901"><a href="#cb110-901" aria-hidden="true" tabindex="-1"></a>census_2010s_df.rename(index<span class="op">=</span>{<span class="st">'United States'</span>:<span class="st">'Total'</span>}, inplace<span class="op">=</span><span class="va">True</span>)</span>
-<span id="cb110-902"><a href="#cb110-902" aria-hidden="true" tabindex="-1"></a>census_2010s_df.head(<span class="dv">5</span>)</span>
-<span id="cb110-903"><a href="#cb110-903" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-904"><a href="#cb110-904" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-907"><a href="#cb110-907" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-908"><a href="#cb110-908" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-909"><a href="#cb110-909" aria-hidden="true" tabindex="-1"></a><span class="co"># same, but for 2020s rename rolled record</span></span>
-<span id="cb110-910"><a href="#cb110-910" aria-hidden="true" tabindex="-1"></a>census_2020s_df.rename(index<span class="op">=</span>{<span class="st">'United States'</span>:<span class="st">'Total'</span>}, inplace<span class="op">=</span><span class="va">True</span>)</span>
-<span id="cb110-911"><a href="#cb110-911" aria-hidden="true" tabindex="-1"></a>census_2020s_df.head(<span class="dv">5</span>)</span>
-<span id="cb110-912"><a href="#cb110-912" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-913"><a href="#cb110-913" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-914"><a href="#cb110-914" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;br/&gt;</span></span>
-<span id="cb110-915"><a href="#cb110-915" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-916"><a href="#cb110-916" aria-hidden="true" tabindex="-1"></a>Next let's rerun our merge. Note the different chaining, because we are now merging on indexes (<span class="in">`df.merge()`</span> <span class="co">[</span><span class="ot">documentation</span><span class="co">](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.merge.html)</span>).</span>
-<span id="cb110-917"><a href="#cb110-917" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-920"><a href="#cb110-920" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-921"><a href="#cb110-921" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-922"><a href="#cb110-922" aria-hidden="true" tabindex="-1"></a>tb_census_df <span class="op">=</span> (</span>
-<span id="cb110-923"><a href="#cb110-923" aria-hidden="true" tabindex="-1"></a>    tb_df</span>
-<span id="cb110-924"><a href="#cb110-924" aria-hidden="true" tabindex="-1"></a>    .merge(right<span class="op">=</span>census_2010s_df[[<span class="st">"2019"</span>]],</span>
-<span id="cb110-925"><a href="#cb110-925" aria-hidden="true" tabindex="-1"></a>           left_index<span class="op">=</span><span class="va">True</span>, right_index<span class="op">=</span><span class="va">True</span>)</span>
-<span id="cb110-926"><a href="#cb110-926" aria-hidden="true" tabindex="-1"></a>    .merge(right<span class="op">=</span>census_2020s_df[[<span class="st">"2020"</span>, <span class="st">"2021"</span>]],</span>
-<span id="cb110-927"><a href="#cb110-927" aria-hidden="true" tabindex="-1"></a>           left_index<span class="op">=</span><span class="va">True</span>, right_index<span class="op">=</span><span class="va">True</span>)</span>
-<span id="cb110-928"><a href="#cb110-928" aria-hidden="true" tabindex="-1"></a>)</span>
-<span id="cb110-929"><a href="#cb110-929" aria-hidden="true" tabindex="-1"></a>tb_census_df.head(<span class="dv">5</span>)</span>
-<span id="cb110-930"><a href="#cb110-930" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-931"><a href="#cb110-931" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-932"><a href="#cb110-932" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;br/&gt;</span></span>
-<span id="cb110-933"><a href="#cb110-933" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-934"><a href="#cb110-934" aria-hidden="true" tabindex="-1"></a>Finally, let's recompute our incidences:</span>
-<span id="cb110-935"><a href="#cb110-935" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-938"><a href="#cb110-938" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-939"><a href="#cb110-939" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-940"><a href="#cb110-940" aria-hidden="true" tabindex="-1"></a><span class="co"># recompute incidence for all years</span></span>
-<span id="cb110-941"><a href="#cb110-941" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> year <span class="kw">in</span> [<span class="dv">2019</span>, <span class="dv">2020</span>, <span class="dv">2021</span>]:</span>
-<span id="cb110-942"><a href="#cb110-942" aria-hidden="true" tabindex="-1"></a>    tb_census_df[<span class="ss">f"recompute incidence </span><span class="sc">{</span>year<span class="sc">}</span><span class="ss">"</span>] <span class="op">=</span> tb_census_df[<span class="ss">f"TB cases </span><span class="sc">{</span>year<span class="sc">}</span><span class="ss">"</span>]<span class="op">/</span>tb_census_df[<span class="ss">f"</span><span class="sc">{</span>year<span class="sc">}</span><span class="ss">"</span>]<span class="op">*</span><span class="dv">100000</span></span>
-<span id="cb110-943"><a href="#cb110-943" aria-hidden="true" tabindex="-1"></a>tb_census_df.head(<span class="dv">5</span>)</span>
-<span id="cb110-944"><a href="#cb110-944" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-945"><a href="#cb110-945" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-946"><a href="#cb110-946" aria-hidden="true" tabindex="-1"></a>We reproduced the total U.S. incidences correctly!</span>
-<span id="cb110-947"><a href="#cb110-947" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-948"><a href="#cb110-948" aria-hidden="true" tabindex="-1"></a>We're almost there. Let's revisit the quote:</span>
-<span id="cb110-949"><a href="#cb110-949" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-950"><a href="#cb110-950" aria-hidden="true" tabindex="-1"></a><span class="at">&gt; Reported TB incidence (cases per 100,000 persons) increased **9.4%**, from **2.2** during 2020 to **2.4** during 2021 but was lower than incidence during 2019 (2.7). Increases occurred among both U.S.-born and non–U.S.-born persons.</span></span>
-<span id="cb110-951"><a href="#cb110-951" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-952"><a href="#cb110-952" aria-hidden="true" tabindex="-1"></a>Recall that percent change from $A$ to $B$ is computed as</span>
-<span id="cb110-953"><a href="#cb110-953" aria-hidden="true" tabindex="-1"></a>$\text{percent change} = \frac{B - A}{A} \times 100$.</span>
-<span id="cb110-954"><a href="#cb110-954" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-957"><a href="#cb110-957" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-958"><a href="#cb110-958" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-959"><a href="#cb110-959" aria-hidden="true" tabindex="-1"></a>incidence_2020 <span class="op">=</span> tb_census_df.loc[<span class="st">'Total'</span>, <span class="st">'recompute incidence 2020'</span>]</span>
-<span id="cb110-960"><a href="#cb110-960" aria-hidden="true" tabindex="-1"></a>incidence_2020</span>
-<span id="cb110-961"><a href="#cb110-961" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-962"><a href="#cb110-962" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-965"><a href="#cb110-965" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-966"><a href="#cb110-966" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-967"><a href="#cb110-967" aria-hidden="true" tabindex="-1"></a>incidence_2021 <span class="op">=</span> tb_census_df.loc[<span class="st">'Total'</span>, <span class="st">'recompute incidence 2021'</span>]</span>
-<span id="cb110-968"><a href="#cb110-968" aria-hidden="true" tabindex="-1"></a>incidence_2021</span>
-<span id="cb110-969"><a href="#cb110-969" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-970"><a href="#cb110-970" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-973"><a href="#cb110-973" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-974"><a href="#cb110-974" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-975"><a href="#cb110-975" aria-hidden="true" tabindex="-1"></a>difference <span class="op">=</span> (incidence_2021 <span class="op">-</span> incidence_2020)<span class="op">/</span>incidence_2020 <span class="op">*</span> <span class="dv">100</span></span>
-<span id="cb110-976"><a href="#cb110-976" aria-hidden="true" tabindex="-1"></a>difference</span>
-<span id="cb110-977"><a href="#cb110-977" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-978"><a href="#cb110-978" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-979"><a href="#cb110-979" aria-hidden="true" tabindex="-1"></a><span class="fu">## EDA Demo 2: Mauna Loa CO&lt;sub&gt;2&lt;/sub&gt; Data -- A Lesson in Data Faithfulness</span></span>
-<span id="cb110-980"><a href="#cb110-980" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-981"><a href="#cb110-981" aria-hidden="true" tabindex="-1"></a><span class="co">[</span><span class="ot">Mauna Loa Observatory</span><span class="co">](https://gml.noaa.gov/ccgg/trends/data.html)</span> has been monitoring CO<span class="kw">&lt;sub&gt;</span>2<span class="kw">&lt;/sub&gt;</span> concentrations since 1958.</span>
-<span id="cb110-982"><a href="#cb110-982" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-985"><a href="#cb110-985" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-986"><a href="#cb110-986" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-987"><a href="#cb110-987" aria-hidden="true" tabindex="-1"></a>co2_file <span class="op">=</span> <span class="st">"data/co2_mm_mlo.txt"</span></span>
-<span id="cb110-988"><a href="#cb110-988" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-989"><a href="#cb110-989" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-990"><a href="#cb110-990" aria-hidden="true" tabindex="-1"></a>Let's do some **EDA**!!</span>
-<span id="cb110-991"><a href="#cb110-991" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-992"><a href="#cb110-992" aria-hidden="true" tabindex="-1"></a><span class="fu">### Reading this file into `Pandas`?</span></span>
-<span id="cb110-993"><a href="#cb110-993" aria-hidden="true" tabindex="-1"></a>Let's instead check out this <span class="in">`.txt`</span> file. Some questions to keep in mind: Do we trust this file extension? What structure is it? </span>
-<span id="cb110-994"><a href="#cb110-994" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-995"><a href="#cb110-995" aria-hidden="true" tabindex="-1"></a>Lines 71-78 (inclusive) are shown below: </span>
-<span id="cb110-996"><a href="#cb110-996" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-997"><a href="#cb110-997" aria-hidden="true" tabindex="-1"></a><span class="in">    line number |                            file contents</span></span>
-<span id="cb110-998"><a href="#cb110-998" aria-hidden="true" tabindex="-1"></a><span class="in">    </span></span>
-<span id="cb110-999"><a href="#cb110-999" aria-hidden="true" tabindex="-1"></a><span class="in">    71          |   #            decimal     average   interpolated    trend    #days</span></span>
-<span id="cb110-1000"><a href="#cb110-1000" aria-hidden="true" tabindex="-1"></a><span class="in">    72          |   #             date                             (season corr)</span></span>
-<span id="cb110-1001"><a href="#cb110-1001" aria-hidden="true" tabindex="-1"></a><span class="in">    73          |   1958   3    1958.208      315.71      315.71      314.62     -1</span></span>
-<span id="cb110-1002"><a href="#cb110-1002" aria-hidden="true" tabindex="-1"></a><span class="in">    74          |   1958   4    1958.292      317.45      317.45      315.29     -1</span></span>
-<span id="cb110-1003"><a href="#cb110-1003" aria-hidden="true" tabindex="-1"></a><span class="in">    75          |   1958   5    1958.375      317.50      317.50      314.71     -1</span></span>
-<span id="cb110-1004"><a href="#cb110-1004" aria-hidden="true" tabindex="-1"></a><span class="in">    76          |   1958   6    1958.458      -99.99      317.10      314.85     -1</span></span>
-<span id="cb110-1005"><a href="#cb110-1005" aria-hidden="true" tabindex="-1"></a><span class="in">    77          |   1958   7    1958.542      315.86      315.86      314.98     -1</span></span>
-<span id="cb110-1006"><a href="#cb110-1006" aria-hidden="true" tabindex="-1"></a><span class="in">    78          |   1958   8    1958.625      314.93      314.93      315.94     -1</span></span>
-<span id="cb110-1007"><a href="#cb110-1007" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1008"><a href="#cb110-1008" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1009"><a href="#cb110-1009" aria-hidden="true" tabindex="-1"></a>Notice how: </span>
-<span id="cb110-1010"><a href="#cb110-1010" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1011"><a href="#cb110-1011" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>The values are separated by white space, possibly tabs.</span>
-<span id="cb110-1012"><a href="#cb110-1012" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>The data line up down the rows. For example, the month appears in 7th to 8th position of each line.</span>
-<span id="cb110-1013"><a href="#cb110-1013" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>The 71st and 72nd lines in the file contain column headings split over two lines.</span>
-<span id="cb110-1014"><a href="#cb110-1014" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1015"><a href="#cb110-1015" aria-hidden="true" tabindex="-1"></a>We can use&nbsp;<span class="in">`read_csv`</span>&nbsp;to read the data into a <span class="in">`pandas`</span> <span class="in">`DataFrame`</span>, and we provide several arguments to specify that the separators are white space, there is no header (**we will set our own column names**), and to skip the first 72 rows of the file.</span>
-<span id="cb110-1016"><a href="#cb110-1016" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1019"><a href="#cb110-1019" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-1020"><a href="#cb110-1020" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-1021"><a href="#cb110-1021" aria-hidden="true" tabindex="-1"></a>co2 <span class="op">=</span> pd.read_csv(</span>
-<span id="cb110-1022"><a href="#cb110-1022" aria-hidden="true" tabindex="-1"></a>    co2_file, header <span class="op">=</span> <span class="va">None</span>, skiprows <span class="op">=</span> <span class="dv">72</span>,</span>
-<span id="cb110-1023"><a href="#cb110-1023" aria-hidden="true" tabindex="-1"></a>    sep <span class="op">=</span> <span class="vs">r'\s+'</span>       <span class="co">#delimiter for continuous whitespace (stay tuned for regex next lecture))</span></span>
-<span id="cb110-1024"><a href="#cb110-1024" aria-hidden="true" tabindex="-1"></a>)</span>
-<span id="cb110-1025"><a href="#cb110-1025" aria-hidden="true" tabindex="-1"></a>co2.head()</span>
-<span id="cb110-1026"><a href="#cb110-1026" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-1027"><a href="#cb110-1027" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1028"><a href="#cb110-1028" aria-hidden="true" tabindex="-1"></a>Congratulations! You've wrangled the data!</span>
-<span id="cb110-1029"><a href="#cb110-1029" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1030"><a href="#cb110-1030" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;br/&gt;</span></span>
-<span id="cb110-1031"><a href="#cb110-1031" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1032"><a href="#cb110-1032" aria-hidden="true" tabindex="-1"></a>...But our columns aren't named.</span>
-<span id="cb110-1033"><a href="#cb110-1033" aria-hidden="true" tabindex="-1"></a>**We need to do more EDA.**</span>
-<span id="cb110-1034"><a href="#cb110-1034" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1035"><a href="#cb110-1035" aria-hidden="true" tabindex="-1"></a><span class="fu">### Exploring Variable Feature Types</span></span>
-<span id="cb110-1036"><a href="#cb110-1036" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1037"><a href="#cb110-1037" aria-hidden="true" tabindex="-1"></a>The NOAA <span class="co">[</span><span class="ot">webpage</span><span class="co">](https://gml.noaa.gov/ccgg/trends/)</span> might have some useful tidbits (in this case it doesn't).</span>
-<span id="cb110-1038"><a href="#cb110-1038" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1039"><a href="#cb110-1039" aria-hidden="true" tabindex="-1"></a>Using this information, we'll rerun <span class="in">`pd.read_csv`</span>, but this time with some **custom column names.**</span>
-<span id="cb110-1040"><a href="#cb110-1040" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1043"><a href="#cb110-1043" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-1044"><a href="#cb110-1044" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-1045"><a href="#cb110-1045" aria-hidden="true" tabindex="-1"></a>co2 <span class="op">=</span> pd.read_csv(</span>
-<span id="cb110-1046"><a href="#cb110-1046" aria-hidden="true" tabindex="-1"></a>    co2_file, header <span class="op">=</span> <span class="va">None</span>, skiprows <span class="op">=</span> <span class="dv">72</span>,</span>
-<span id="cb110-1047"><a href="#cb110-1047" aria-hidden="true" tabindex="-1"></a>    sep <span class="op">=</span> <span class="st">'\s+'</span>, <span class="co">#regex for continuous whitespace (next lecture)</span></span>
-<span id="cb110-1048"><a href="#cb110-1048" aria-hidden="true" tabindex="-1"></a>    names <span class="op">=</span> [<span class="st">'Yr'</span>, <span class="st">'Mo'</span>, <span class="st">'DecDate'</span>, <span class="st">'Avg'</span>, <span class="st">'Int'</span>, <span class="st">'Trend'</span>, <span class="st">'Days'</span>]</span>
-<span id="cb110-1049"><a href="#cb110-1049" aria-hidden="true" tabindex="-1"></a>)</span>
-<span id="cb110-1050"><a href="#cb110-1050" aria-hidden="true" tabindex="-1"></a>co2.head()</span>
-<span id="cb110-1051"><a href="#cb110-1051" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-1052"><a href="#cb110-1052" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1053"><a href="#cb110-1053" aria-hidden="true" tabindex="-1"></a><span class="fu">### Visualizing CO&lt;sub&gt;2&lt;/sub&gt;</span></span>
-<span id="cb110-1054"><a href="#cb110-1054" aria-hidden="true" tabindex="-1"></a>Scientific studies tend to have very clean data, right...? Let's jump right in and make a time series plot of CO<span class="kw">&lt;sub&gt;</span>2<span class="kw">&lt;/sub&gt;</span> monthly averages.</span>
-<span id="cb110-1055"><a href="#cb110-1055" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1058"><a href="#cb110-1058" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-1059"><a href="#cb110-1059" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
-<span id="cb110-1060"><a href="#cb110-1060" aria-hidden="true" tabindex="-1"></a>sns.lineplot(x<span class="op">=</span><span class="st">'DecDate'</span>, y<span class="op">=</span><span class="st">'Avg'</span>, data<span class="op">=</span>co2)<span class="op">;</span></span>
-<span id="cb110-1061"><a href="#cb110-1061" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-1062"><a href="#cb110-1062" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1063"><a href="#cb110-1063" aria-hidden="true" tabindex="-1"></a>The code above uses the <span class="in">`seaborn`</span> plotting library (abbreviated <span class="in">`sns`</span>). We will cover this in the Visualization lecture, but now you don't need to worry about how it works!</span>
-<span id="cb110-1064"><a href="#cb110-1064" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1065"><a href="#cb110-1065" aria-hidden="true" tabindex="-1"></a>Yikes! Plotting the data uncovered a problem. The sharp vertical lines suggest that we have some **missing values**. What happened here?</span>
-<span id="cb110-1066"><a href="#cb110-1066" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1069"><a href="#cb110-1069" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-1070"><a href="#cb110-1070" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-1071"><a href="#cb110-1071" aria-hidden="true" tabindex="-1"></a>co2.head()</span>
-<span id="cb110-1072"><a href="#cb110-1072" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-1073"><a href="#cb110-1073" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1076"><a href="#cb110-1076" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-1077"><a href="#cb110-1077" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-1078"><a href="#cb110-1078" aria-hidden="true" tabindex="-1"></a>co2.tail()</span>
-<span id="cb110-1079"><a href="#cb110-1079" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-1080"><a href="#cb110-1080" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1081"><a href="#cb110-1081" aria-hidden="true" tabindex="-1"></a>Some data have unusual values like -1 and -99.99.</span>
-<span id="cb110-1082"><a href="#cb110-1082" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1083"><a href="#cb110-1083" aria-hidden="true" tabindex="-1"></a>Let's check the description at the top of the file again.</span>
-<span id="cb110-1084"><a href="#cb110-1084" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1085"><a href="#cb110-1085" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>-1 signifies a missing value for the number of days <span class="in">`Days`</span> the equipment was in operation that month.</span>
-<span id="cb110-1086"><a href="#cb110-1086" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>-99.99 denotes a missing monthly average <span class="in">`Avg`</span></span>
-<span id="cb110-1087"><a href="#cb110-1087" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1088"><a href="#cb110-1088" aria-hidden="true" tabindex="-1"></a>How can we fix this? First, let's explore other aspects of our data. Understanding our data will help us decide what to do with the missing values.</span>
-<span id="cb110-1089"><a href="#cb110-1089" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1090"><a href="#cb110-1090" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;br/&gt;</span></span>
-<span id="cb110-1091"><a href="#cb110-1091" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1092"><a href="#cb110-1092" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1093"><a href="#cb110-1093" aria-hidden="true" tabindex="-1"></a><span class="fu">### Sanity Checks: Reasoning about the data</span></span>
-<span id="cb110-1094"><a href="#cb110-1094" aria-hidden="true" tabindex="-1"></a>First, we consider the shape of the data. How many rows should we have?</span>
-<span id="cb110-1095"><a href="#cb110-1095" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1096"><a href="#cb110-1096" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>If chronological order, we should have one record per month.</span>
-<span id="cb110-1097"><a href="#cb110-1097" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Data from March 1958 to August 2019.</span>
-<span id="cb110-1098"><a href="#cb110-1098" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>We should have $ 12 \times (2019-1957) - 2 - 4 = 738 $ records.</span>
-<span id="cb110-1099"><a href="#cb110-1099" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1102"><a href="#cb110-1102" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-1103"><a href="#cb110-1103" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-1104"><a href="#cb110-1104" aria-hidden="true" tabindex="-1"></a>co2.shape</span>
-<span id="cb110-1105"><a href="#cb110-1105" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-1106"><a href="#cb110-1106" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1107"><a href="#cb110-1107" aria-hidden="true" tabindex="-1"></a>Nice!! The number of rows (i.e. records) match our expectations.</span>
-<span id="cb110-1108"><a href="#cb110-1108" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1109"><a href="#cb110-1109" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1110"><a href="#cb110-1110" aria-hidden="true" tabindex="-1"></a>Let's now check the quality of each feature.</span>
-<span id="cb110-1111"><a href="#cb110-1111" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1112"><a href="#cb110-1112" aria-hidden="true" tabindex="-1"></a><span class="fu">### Understanding Missing Value 1: `Days`</span></span>
-<span id="cb110-1113"><a href="#cb110-1113" aria-hidden="true" tabindex="-1"></a><span class="in">`Days`</span> is a time field, so let's analyze other time fields to see if there is an explanation for missing values of days of operation.</span>
-<span id="cb110-1114"><a href="#cb110-1114" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1115"><a href="#cb110-1115" aria-hidden="true" tabindex="-1"></a>Let's start with **months**, <span class="in">`Mo`</span>.</span>
-<span id="cb110-1116"><a href="#cb110-1116" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1117"><a href="#cb110-1117" aria-hidden="true" tabindex="-1"></a>Are we missing any records? The number of months should have 62 or 61 instances (March 1957-August 2019).</span>
-<span id="cb110-1118"><a href="#cb110-1118" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1121"><a href="#cb110-1121" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-1122"><a href="#cb110-1122" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-1123"><a href="#cb110-1123" aria-hidden="true" tabindex="-1"></a>co2[<span class="st">"Mo"</span>].value_counts().sort_index()</span>
-<span id="cb110-1124"><a href="#cb110-1124" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-1125"><a href="#cb110-1125" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1126"><a href="#cb110-1126" aria-hidden="true" tabindex="-1"></a>As expected Jan, Feb, Sep, Oct, Nov, and Dec have 61 occurrences and the rest 62.</span>
-<span id="cb110-1127"><a href="#cb110-1127" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1128"><a href="#cb110-1128" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;br/&gt;</span></span>
-<span id="cb110-1129"><a href="#cb110-1129" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1130"><a href="#cb110-1130" aria-hidden="true" tabindex="-1"></a>Next let's explore **days** <span class="in">`Days`</span> itself, which is the number of days that the measurement equipment worked.</span>
-<span id="cb110-1131"><a href="#cb110-1131" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1134"><a href="#cb110-1134" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-1135"><a href="#cb110-1135" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
-<span id="cb110-1136"><a href="#cb110-1136" aria-hidden="true" tabindex="-1"></a>sns.displot(co2[<span class="st">'Days'</span>])<span class="op">;</span></span>
-<span id="cb110-1137"><a href="#cb110-1137" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">"Distribution of days feature"</span>)<span class="op">;</span> <span class="co"># suppresses unneeded plotting output</span></span>
-<span id="cb110-1138"><a href="#cb110-1138" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-1139"><a href="#cb110-1139" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1140"><a href="#cb110-1140" aria-hidden="true" tabindex="-1"></a>In terms of data quality, a handful of months have averages based on measurements taken on fewer than half the days. In addition, there are nearly 200 missing values--**that's about 27% of the data**!</span>
-<span id="cb110-1141"><a href="#cb110-1141" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1142"><a href="#cb110-1142" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;br/&gt;</span></span>
-<span id="cb110-1143"><a href="#cb110-1143" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1144"><a href="#cb110-1144" aria-hidden="true" tabindex="-1"></a>Finally, let's check the last time feature, **year** <span class="in">`Yr`</span>.</span>
-<span id="cb110-1145"><a href="#cb110-1145" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1146"><a href="#cb110-1146" aria-hidden="true" tabindex="-1"></a>Let's check to see if there is any connection between missing-ness and the year of the recording.</span>
-<span id="cb110-1147"><a href="#cb110-1147" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1150"><a href="#cb110-1150" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-1151"><a href="#cb110-1151" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
-<span id="cb110-1152"><a href="#cb110-1152" aria-hidden="true" tabindex="-1"></a>sns.scatterplot(x<span class="op">=</span><span class="st">"Yr"</span>, y<span class="op">=</span><span class="st">"Days"</span>, data<span class="op">=</span>co2)<span class="op">;</span></span>
-<span id="cb110-1153"><a href="#cb110-1153" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">"Day field by Year"</span>)<span class="op">;</span> <span class="co"># the ; suppresses output</span></span>
-<span id="cb110-1154"><a href="#cb110-1154" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-1155"><a href="#cb110-1155" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1156"><a href="#cb110-1156" aria-hidden="true" tabindex="-1"></a>**Observations**:</span>
-<span id="cb110-1157"><a href="#cb110-1157" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1158"><a href="#cb110-1158" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>All of the missing data are in the early years of operation.</span>
-<span id="cb110-1159"><a href="#cb110-1159" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>It appears there may have been problems with equipment in the mid to late 80s.</span>
-<span id="cb110-1160"><a href="#cb110-1160" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1161"><a href="#cb110-1161" aria-hidden="true" tabindex="-1"></a>**Potential Next Steps**:</span>
-<span id="cb110-1162"><a href="#cb110-1162" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1163"><a href="#cb110-1163" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Confirm these explanations through documentation about the historical readings.</span>
-<span id="cb110-1164"><a href="#cb110-1164" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Maybe drop the earliest recordings? However, we would want to delay such action until after we have examined the time trends and assess whether there are any potential problems.</span>
-<span id="cb110-1165"><a href="#cb110-1165" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1166"><a href="#cb110-1166" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;br/&gt;</span></span>
-<span id="cb110-1167"><a href="#cb110-1167" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1168"><a href="#cb110-1168" aria-hidden="true" tabindex="-1"></a><span class="fu">### Understanding Missing Value 2: `Avg`</span></span>
-<span id="cb110-1169"><a href="#cb110-1169" aria-hidden="true" tabindex="-1"></a>Next, let's return to the -99.99 values in <span class="in">`Avg`</span> to analyze the overall quality of the CO<span class="kw">&lt;sub&gt;</span>2<span class="kw">&lt;/sub&gt;</span> measurements. We'll plot a histogram of the average CO<span class="kw">&lt;sub&gt;</span>2<span class="kw">&lt;/sub&gt;</span> measurements</span>
-<span id="cb110-1170"><a href="#cb110-1170" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1173"><a href="#cb110-1173" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-1174"><a href="#cb110-1174" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
-<span id="cb110-1175"><a href="#cb110-1175" aria-hidden="true" tabindex="-1"></a><span class="co"># Histograms of average CO2 measurements</span></span>
-<span id="cb110-1176"><a href="#cb110-1176" aria-hidden="true" tabindex="-1"></a>sns.displot(co2[<span class="st">'Avg'</span>])<span class="op">;</span></span>
-<span id="cb110-1177"><a href="#cb110-1177" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-1178"><a href="#cb110-1178" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1179"><a href="#cb110-1179" aria-hidden="true" tabindex="-1"></a>The non-missing values are in the 300-400 range (a regular range of CO<span class="kw">&lt;sub&gt;</span>2<span class="kw">&lt;/sub&gt;</span> levels).</span>
-<span id="cb110-1180"><a href="#cb110-1180" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1181"><a href="#cb110-1181" aria-hidden="true" tabindex="-1"></a>We also see that there are only a few missing <span class="in">`Avg`</span> values (**&lt;1% of values**). Let's examine all of them:</span>
-<span id="cb110-1182"><a href="#cb110-1182" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1185"><a href="#cb110-1185" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-1186"><a href="#cb110-1186" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-1187"><a href="#cb110-1187" aria-hidden="true" tabindex="-1"></a>co2[co2[<span class="st">"Avg"</span>] <span class="op">&lt;</span> <span class="dv">0</span>]</span>
-<span id="cb110-1188"><a href="#cb110-1188" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-1189"><a href="#cb110-1189" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1190"><a href="#cb110-1190" aria-hidden="true" tabindex="-1"></a>There doesn't seem to be a pattern to these values, other than that most records also were missing <span class="in">`Days`</span> data.</span>
-<span id="cb110-1191"><a href="#cb110-1191" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1192"><a href="#cb110-1192" aria-hidden="true" tabindex="-1"></a><span class="fu">### Drop, `NaN`, or Impute Missing `Avg` Data?</span></span>
-<span id="cb110-1193"><a href="#cb110-1193" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1194"><a href="#cb110-1194" aria-hidden="true" tabindex="-1"></a>How should we address the invalid <span class="in">`Avg`</span> data?</span>
-<span id="cb110-1195"><a href="#cb110-1195" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1196"><a href="#cb110-1196" aria-hidden="true" tabindex="-1"></a><span class="ss">1. </span>Drop records</span>
-<span id="cb110-1197"><a href="#cb110-1197" aria-hidden="true" tabindex="-1"></a><span class="ss">2. </span>Set to NaN</span>
-<span id="cb110-1198"><a href="#cb110-1198" aria-hidden="true" tabindex="-1"></a><span class="ss">3. </span>Impute using some strategy</span>
-<span id="cb110-1199"><a href="#cb110-1199" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1200"><a href="#cb110-1200" aria-hidden="true" tabindex="-1"></a>Remember we want to fix the following plot:</span>
-<span id="cb110-1201"><a href="#cb110-1201" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1204"><a href="#cb110-1204" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-1205"><a href="#cb110-1205" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
-<span id="cb110-1206"><a href="#cb110-1206" aria-hidden="true" tabindex="-1"></a>sns.lineplot(x<span class="op">=</span><span class="st">'DecDate'</span>, y<span class="op">=</span><span class="st">'Avg'</span>, data<span class="op">=</span>co2)</span>
-<span id="cb110-1207"><a href="#cb110-1207" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">"CO2 Average By Month"</span>)<span class="op">;</span></span>
-<span id="cb110-1208"><a href="#cb110-1208" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-1209"><a href="#cb110-1209" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1210"><a href="#cb110-1210" aria-hidden="true" tabindex="-1"></a>Since we are plotting <span class="in">`Avg`</span> vs <span class="in">`DecDate`</span>, we should just focus on dealing with missing values for <span class="in">`Avg`</span>.</span>
-<span id="cb110-1211"><a href="#cb110-1211" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1212"><a href="#cb110-1212" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1213"><a href="#cb110-1213" aria-hidden="true" tabindex="-1"></a>Let's consider a few options:</span>
-<span id="cb110-1214"><a href="#cb110-1214" aria-hidden="true" tabindex="-1"></a><span class="ss">1. </span>Drop those records</span>
-<span id="cb110-1215"><a href="#cb110-1215" aria-hidden="true" tabindex="-1"></a><span class="ss">2. </span>Replace -99.99 with NaN</span>
-<span id="cb110-1216"><a href="#cb110-1216" aria-hidden="true" tabindex="-1"></a><span class="ss">3. </span>Substitute it with a likely value for the average CO<span class="kw">&lt;sub&gt;</span>2<span class="kw">&lt;/sub&gt;</span>?</span>
-<span id="cb110-1217"><a href="#cb110-1217" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1218"><a href="#cb110-1218" aria-hidden="true" tabindex="-1"></a>What do you think are the pros and cons of each possible action?</span>
-<span id="cb110-1219"><a href="#cb110-1219" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1220"><a href="#cb110-1220" aria-hidden="true" tabindex="-1"></a>Let's examine each of these three options.</span>
-<span id="cb110-1221"><a href="#cb110-1221" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1224"><a href="#cb110-1224" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-1225"><a href="#cb110-1225" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-1226"><a href="#cb110-1226" aria-hidden="true" tabindex="-1"></a><span class="co"># 1. Drop missing values</span></span>
-<span id="cb110-1227"><a href="#cb110-1227" aria-hidden="true" tabindex="-1"></a>co2_drop <span class="op">=</span> co2[co2[<span class="st">'Avg'</span>] <span class="op">&gt;</span> <span class="dv">0</span>]</span>
-<span id="cb110-1228"><a href="#cb110-1228" aria-hidden="true" tabindex="-1"></a>co2_drop.head()</span>
-<span id="cb110-1229"><a href="#cb110-1229" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-1230"><a href="#cb110-1230" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1233"><a href="#cb110-1233" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-1234"><a href="#cb110-1234" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-1235"><a href="#cb110-1235" aria-hidden="true" tabindex="-1"></a><span class="co"># 2. Replace NaN with -99.99</span></span>
-<span id="cb110-1236"><a href="#cb110-1236" aria-hidden="true" tabindex="-1"></a>co2_NA <span class="op">=</span> co2.replace(<span class="op">-</span><span class="fl">99.99</span>, np.NaN)</span>
-<span id="cb110-1237"><a href="#cb110-1237" aria-hidden="true" tabindex="-1"></a>co2_NA.head()</span>
-<span id="cb110-1238"><a href="#cb110-1238" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-1239"><a href="#cb110-1239" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1240"><a href="#cb110-1240" aria-hidden="true" tabindex="-1"></a>We'll also use a third version of the data.</span>
-<span id="cb110-1241"><a href="#cb110-1241" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1242"><a href="#cb110-1242" aria-hidden="true" tabindex="-1"></a>First, we note that the dataset already comes with a **substitute value** for the -99.99.</span>
-<span id="cb110-1243"><a href="#cb110-1243" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1244"><a href="#cb110-1244" aria-hidden="true" tabindex="-1"></a>From the file description:</span>
-<span id="cb110-1245"><a href="#cb110-1245" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1246"><a href="#cb110-1246" aria-hidden="true" tabindex="-1"></a><span class="at">&gt;  The </span><span class="in">`interpolated`</span><span class="at"> column includes average values from the preceding column (</span><span class="in">`average`</span><span class="at">)</span></span>
-<span id="cb110-1247"><a href="#cb110-1247" aria-hidden="true" tabindex="-1"></a><span class="at">and **interpolated values** where data are missing.  Interpolated values are</span></span>
-<span id="cb110-1248"><a href="#cb110-1248" aria-hidden="true" tabindex="-1"></a><span class="at">computed in two steps...</span></span>
-<span id="cb110-1249"><a href="#cb110-1249" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1250"><a href="#cb110-1250" aria-hidden="true" tabindex="-1"></a>The <span class="in">`Int`</span> feature has values that exactly match those in <span class="in">`Avg`</span>, except when <span class="in">`Avg`</span> is -99.99, and then a **reasonable** estimate is used instead.</span>
-<span id="cb110-1251"><a href="#cb110-1251" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1252"><a href="#cb110-1252" aria-hidden="true" tabindex="-1"></a>So, the third version of our data will use the <span class="in">`Int`</span> feature instead of <span class="in">`Avg`</span>.</span>
-<span id="cb110-1253"><a href="#cb110-1253" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1256"><a href="#cb110-1256" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-1257"><a href="#cb110-1257" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb110-1258"><a href="#cb110-1258" aria-hidden="true" tabindex="-1"></a><span class="co"># 3. Use interpolated column which estimates missing Avg values</span></span>
-<span id="cb110-1259"><a href="#cb110-1259" aria-hidden="true" tabindex="-1"></a>co2_impute <span class="op">=</span> co2.copy()</span>
-<span id="cb110-1260"><a href="#cb110-1260" aria-hidden="true" tabindex="-1"></a>co2_impute[<span class="st">'Avg'</span>] <span class="op">=</span> co2[<span class="st">'Int'</span>]</span>
-<span id="cb110-1261"><a href="#cb110-1261" aria-hidden="true" tabindex="-1"></a>co2_impute.head()</span>
-<span id="cb110-1262"><a href="#cb110-1262" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-1263"><a href="#cb110-1263" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1264"><a href="#cb110-1264" aria-hidden="true" tabindex="-1"></a>What's a **reasonable** estimate?</span>
-<span id="cb110-1265"><a href="#cb110-1265" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1266"><a href="#cb110-1266" aria-hidden="true" tabindex="-1"></a>To answer this question, let's zoom in on a short time period, say the measurements in 1958 (where we know we have two missing values).</span>
-<span id="cb110-1267"><a href="#cb110-1267" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1270"><a href="#cb110-1270" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-1271"><a href="#cb110-1271" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
-<span id="cb110-1272"><a href="#cb110-1272" aria-hidden="true" tabindex="-1"></a><span class="co"># results of plotting data in 1958</span></span>
-<span id="cb110-1273"><a href="#cb110-1273" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1274"><a href="#cb110-1274" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> line_and_points(data, ax, title):</span>
-<span id="cb110-1275"><a href="#cb110-1275" aria-hidden="true" tabindex="-1"></a>    <span class="co"># assumes single year, hence Mo</span></span>
-<span id="cb110-1276"><a href="#cb110-1276" aria-hidden="true" tabindex="-1"></a>    ax.plot(<span class="st">'Mo'</span>, <span class="st">'Avg'</span>, data<span class="op">=</span>data)</span>
-<span id="cb110-1277"><a href="#cb110-1277" aria-hidden="true" tabindex="-1"></a>    ax.scatter(<span class="st">'Mo'</span>, <span class="st">'Avg'</span>, data<span class="op">=</span>data)</span>
-<span id="cb110-1278"><a href="#cb110-1278" aria-hidden="true" tabindex="-1"></a>    ax.set_xlim(<span class="dv">2</span>, <span class="dv">13</span>)</span>
-<span id="cb110-1279"><a href="#cb110-1279" aria-hidden="true" tabindex="-1"></a>    ax.set_title(title)</span>
-<span id="cb110-1280"><a href="#cb110-1280" aria-hidden="true" tabindex="-1"></a>    ax.set_xticks(np.arange(<span class="dv">3</span>, <span class="dv">13</span>))</span>
-<span id="cb110-1281"><a href="#cb110-1281" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1282"><a href="#cb110-1282" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> data_year(data, year):</span>
-<span id="cb110-1283"><a href="#cb110-1283" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> data[data[<span class="st">"Yr"</span>] <span class="op">==</span> <span class="dv">1958</span>]</span>
-<span id="cb110-1284"><a href="#cb110-1284" aria-hidden="true" tabindex="-1"></a>    </span>
-<span id="cb110-1285"><a href="#cb110-1285" aria-hidden="true" tabindex="-1"></a><span class="co"># uses matplotlib subplots</span></span>
-<span id="cb110-1286"><a href="#cb110-1286" aria-hidden="true" tabindex="-1"></a><span class="co"># you may see more next week; focus on output for now</span></span>
-<span id="cb110-1287"><a href="#cb110-1287" aria-hidden="true" tabindex="-1"></a>fig, axes <span class="op">=</span> plt.subplots(ncols <span class="op">=</span> <span class="dv">3</span>, figsize<span class="op">=</span>(<span class="dv">12</span>, <span class="dv">4</span>), sharey<span class="op">=</span><span class="va">True</span>)</span>
-<span id="cb110-1288"><a href="#cb110-1288" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1289"><a href="#cb110-1289" aria-hidden="true" tabindex="-1"></a>year <span class="op">=</span> <span class="dv">1958</span></span>
-<span id="cb110-1290"><a href="#cb110-1290" aria-hidden="true" tabindex="-1"></a>line_and_points(data_year(co2_drop, year), axes[<span class="dv">0</span>], title<span class="op">=</span><span class="st">"1. Drop Missing"</span>)</span>
-<span id="cb110-1291"><a href="#cb110-1291" aria-hidden="true" tabindex="-1"></a>line_and_points(data_year(co2_NA, year), axes[<span class="dv">1</span>], title<span class="op">=</span><span class="st">"2. Missing Set to NaN"</span>)</span>
-<span id="cb110-1292"><a href="#cb110-1292" aria-hidden="true" tabindex="-1"></a>line_and_points(data_year(co2_impute, year), axes[<span class="dv">2</span>], title<span class="op">=</span><span class="st">"3. Missing Interpolated"</span>)</span>
-<span id="cb110-1293"><a href="#cb110-1293" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1294"><a href="#cb110-1294" aria-hidden="true" tabindex="-1"></a>fig.suptitle(<span class="ss">f"Monthly Averages for </span><span class="sc">{</span>year<span class="sc">}</span><span class="ss">"</span>)</span>
-<span id="cb110-1295"><a href="#cb110-1295" aria-hidden="true" tabindex="-1"></a>plt.tight_layout()</span>
-<span id="cb110-1296"><a href="#cb110-1296" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-1297"><a href="#cb110-1297" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1298"><a href="#cb110-1298" aria-hidden="true" tabindex="-1"></a>In the big picture since there are only 7 <span class="in">`Avg`</span> values missing (**&lt;1%** of 738 months), any of these approaches would work.</span>
-<span id="cb110-1299"><a href="#cb110-1299" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1300"><a href="#cb110-1300" aria-hidden="true" tabindex="-1"></a>However there is some appeal to **option C, Imputing**:</span>
-<span id="cb110-1301"><a href="#cb110-1301" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1302"><a href="#cb110-1302" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Shows seasonal trends for CO<span class="kw">&lt;sub&gt;</span>2<span class="kw">&lt;/sub&gt;</span></span>
-<span id="cb110-1303"><a href="#cb110-1303" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>We are plotting all months in our data as a line plot</span>
-<span id="cb110-1304"><a href="#cb110-1304" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1305"><a href="#cb110-1305" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1306"><a href="#cb110-1306" aria-hidden="true" tabindex="-1"></a>Let's replot our original figure with option 3:</span>
-<span id="cb110-1307"><a href="#cb110-1307" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1310"><a href="#cb110-1310" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-1311"><a href="#cb110-1311" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
-<span id="cb110-1312"><a href="#cb110-1312" aria-hidden="true" tabindex="-1"></a>sns.lineplot(x<span class="op">=</span><span class="st">'DecDate'</span>, y<span class="op">=</span><span class="st">'Avg'</span>, data<span class="op">=</span>co2_impute)</span>
-<span id="cb110-1313"><a href="#cb110-1313" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">"CO2 Average By Month, Imputed"</span>)<span class="op">;</span></span>
-<span id="cb110-1314"><a href="#cb110-1314" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-1315"><a href="#cb110-1315" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1316"><a href="#cb110-1316" aria-hidden="true" tabindex="-1"></a>Looks pretty close to what we see on the NOAA <span class="co">[</span><span class="ot">website</span><span class="co">](https://gml.noaa.gov/ccgg/trends/)</span>!</span>
-<span id="cb110-1317"><a href="#cb110-1317" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1318"><a href="#cb110-1318" aria-hidden="true" tabindex="-1"></a><span class="fu">### Presenting the Data: A Discussion on Data Granularity</span></span>
-<span id="cb110-1319"><a href="#cb110-1319" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1320"><a href="#cb110-1320" aria-hidden="true" tabindex="-1"></a>From the description:</span>
-<span id="cb110-1321"><a href="#cb110-1321" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1322"><a href="#cb110-1322" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Monthly measurements are averages of average day measurements.</span>
-<span id="cb110-1323"><a href="#cb110-1323" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>The NOAA GML website has datasets for daily/hourly measurements too.</span>
-<span id="cb110-1324"><a href="#cb110-1324" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1325"><a href="#cb110-1325" aria-hidden="true" tabindex="-1"></a>The data you present depends on your research question.</span>
-<span id="cb110-1326"><a href="#cb110-1326" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1327"><a href="#cb110-1327" aria-hidden="true" tabindex="-1"></a>**How do CO&lt;sub&gt;2&lt;/sub&gt; levels vary by season?**</span>
-<span id="cb110-1328"><a href="#cb110-1328" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1329"><a href="#cb110-1329" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>You might want to keep average monthly data.</span>
-<span id="cb110-1330"><a href="#cb110-1330" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1331"><a href="#cb110-1331" aria-hidden="true" tabindex="-1"></a>**Are CO&lt;sub&gt;2&lt;/sub&gt; levels rising over the past 50+ years, consistent with global warming predictions?**</span>
-<span id="cb110-1332"><a href="#cb110-1332" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1333"><a href="#cb110-1333" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>You might be happier with a **coarser granularity** of average year data!</span>
-<span id="cb110-1334"><a href="#cb110-1334" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1337"><a href="#cb110-1337" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb110-1338"><a href="#cb110-1338" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
-<span id="cb110-1339"><a href="#cb110-1339" aria-hidden="true" tabindex="-1"></a>co2_year <span class="op">=</span> co2_impute.groupby(<span class="st">'Yr'</span>).mean()</span>
-<span id="cb110-1340"><a href="#cb110-1340" aria-hidden="true" tabindex="-1"></a>sns.lineplot(x<span class="op">=</span><span class="st">'Yr'</span>, y<span class="op">=</span><span class="st">'Avg'</span>, data<span class="op">=</span>co2_year)</span>
-<span id="cb110-1341"><a href="#cb110-1341" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">"CO2 Average By Year"</span>)<span class="op">;</span></span>
-<span id="cb110-1342"><a href="#cb110-1342" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb110-1343"><a href="#cb110-1343" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1344"><a href="#cb110-1344" aria-hidden="true" tabindex="-1"></a>Indeed, we see a rise by nearly 100 ppm of CO<span class="kw">&lt;sub&gt;</span>2<span class="kw">&lt;/sub&gt;</span> since Mauna Loa began recording in 1958.</span>
-<span id="cb110-1345"><a href="#cb110-1345" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1346"><a href="#cb110-1346" aria-hidden="true" tabindex="-1"></a><span class="fu">## Summary</span></span>
-<span id="cb110-1347"><a href="#cb110-1347" aria-hidden="true" tabindex="-1"></a>We went over a lot of content this lecture; let's summarize the most important points: </span>
-<span id="cb110-1348"><a href="#cb110-1348" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1349"><a href="#cb110-1349" aria-hidden="true" tabindex="-1"></a><span class="fu">### Dealing with Missing Values</span></span>
-<span id="cb110-1350"><a href="#cb110-1350" aria-hidden="true" tabindex="-1"></a>There are a few options we can take to deal with missing data:</span>
-<span id="cb110-1351"><a href="#cb110-1351" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1352"><a href="#cb110-1352" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Drop missing records</span>
-<span id="cb110-1353"><a href="#cb110-1353" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Keep <span class="in">`NaN`</span> missing values</span>
-<span id="cb110-1354"><a href="#cb110-1354" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Impute using an interpolated column</span>
-<span id="cb110-1355"><a href="#cb110-1355" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1356"><a href="#cb110-1356" aria-hidden="true" tabindex="-1"></a><span class="fu">### EDA and Data Wrangling</span></span>
-<span id="cb110-1357"><a href="#cb110-1357" aria-hidden="true" tabindex="-1"></a>There are several ways to approach EDA and Data Wrangling: </span>
-<span id="cb110-1358"><a href="#cb110-1358" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb110-1359"><a href="#cb110-1359" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Examine the **data and metadata**: what is the date, size, organization, and structure of the data? </span>
-<span id="cb110-1360"><a href="#cb110-1360" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Examine each **field/attribute/dimension** individually.</span>
-<span id="cb110-1361"><a href="#cb110-1361" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Examine pairs of related dimensions (e.g. breaking down grades by major).</span>
-<span id="cb110-1362"><a href="#cb110-1362" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Along the way, we can:</span>
-<span id="cb110-1363"><a href="#cb110-1363" aria-hidden="true" tabindex="-1"></a><span class="ss">    * </span>**Visualize** or summarize the data.</span>
-<span id="cb110-1364"><a href="#cb110-1364" aria-hidden="true" tabindex="-1"></a><span class="ss">    * </span>**Validate assumptions** about data and its collection process. Pay particular attention to when the data was collected. </span>
-<span id="cb110-1365"><a href="#cb110-1365" aria-hidden="true" tabindex="-1"></a><span class="ss">    * </span>Identify and **address anomalies**.</span>
-<span id="cb110-1366"><a href="#cb110-1366" aria-hidden="true" tabindex="-1"></a><span class="ss">    * </span>Apply data transformations and corrections (we'll cover this in the upcoming lecture).</span>
-<span id="cb110-1367"><a href="#cb110-1367" aria-hidden="true" tabindex="-1"></a><span class="ss">    * </span>**Record everything you do!** Developing in Jupyter Notebook promotes *reproducibility* of your own work!</span>
+<div class="sourceCode" id="cb108" data-shortcodes="false"><pre class="sourceCode markdown code-with-copy"><code class="sourceCode markdown"><span id="cb108-1"><a href="#cb108-1" aria-hidden="true" tabindex="-1"></a><span class="co">---</span></span>
+<span id="cb108-2"><a href="#cb108-2" aria-hidden="true" tabindex="-1"></a><span class="an">title:</span><span class="co"> Data Cleaning and EDA</span></span>
+<span id="cb108-3"><a href="#cb108-3" aria-hidden="true" tabindex="-1"></a><span class="an">execute:</span></span>
+<span id="cb108-4"><a href="#cb108-4" aria-hidden="true" tabindex="-1"></a><span class="co">  echo: true</span></span>
+<span id="cb108-5"><a href="#cb108-5" aria-hidden="true" tabindex="-1"></a><span class="an">format:</span></span>
+<span id="cb108-6"><a href="#cb108-6" aria-hidden="true" tabindex="-1"></a><span class="co">  html:</span></span>
+<span id="cb108-7"><a href="#cb108-7" aria-hidden="true" tabindex="-1"></a><span class="co">    code-fold: true</span></span>
+<span id="cb108-8"><a href="#cb108-8" aria-hidden="true" tabindex="-1"></a><span class="co">    code-tools: true</span></span>
+<span id="cb108-9"><a href="#cb108-9" aria-hidden="true" tabindex="-1"></a><span class="co">    toc: true</span></span>
+<span id="cb108-10"><a href="#cb108-10" aria-hidden="true" tabindex="-1"></a><span class="co">    toc-title: Data Cleaning and EDA</span></span>
+<span id="cb108-11"><a href="#cb108-11" aria-hidden="true" tabindex="-1"></a><span class="co">    page-layout: full</span></span>
+<span id="cb108-12"><a href="#cb108-12" aria-hidden="true" tabindex="-1"></a><span class="co">    theme:</span></span>
+<span id="cb108-13"><a href="#cb108-13" aria-hidden="true" tabindex="-1"></a><span class="co">      - cosmo</span></span>
+<span id="cb108-14"><a href="#cb108-14" aria-hidden="true" tabindex="-1"></a><span class="co">      - cerulean</span></span>
+<span id="cb108-15"><a href="#cb108-15" aria-hidden="true" tabindex="-1"></a><span class="co">    callout-icon: false</span></span>
+<span id="cb108-16"><a href="#cb108-16" aria-hidden="true" tabindex="-1"></a><span class="an">jupyter:</span></span>
+<span id="cb108-17"><a href="#cb108-17" aria-hidden="true" tabindex="-1"></a><span class="co">  jupytext:</span></span>
+<span id="cb108-18"><a href="#cb108-18" aria-hidden="true" tabindex="-1"></a><span class="co">    text_representation:</span></span>
+<span id="cb108-19"><a href="#cb108-19" aria-hidden="true" tabindex="-1"></a><span class="co">      extension: .qmd</span></span>
+<span id="cb108-20"><a href="#cb108-20" aria-hidden="true" tabindex="-1"></a><span class="co">      format_name: quarto</span></span>
+<span id="cb108-21"><a href="#cb108-21" aria-hidden="true" tabindex="-1"></a><span class="co">      format_version: '1.0'</span></span>
+<span id="cb108-22"><a href="#cb108-22" aria-hidden="true" tabindex="-1"></a><span class="co">      jupytext_version: 1.16.1</span></span>
+<span id="cb108-23"><a href="#cb108-23" aria-hidden="true" tabindex="-1"></a><span class="co">  kernelspec:</span></span>
+<span id="cb108-24"><a href="#cb108-24" aria-hidden="true" tabindex="-1"></a><span class="co">    display_name: Python 3 (ipykernel)</span></span>
+<span id="cb108-25"><a href="#cb108-25" aria-hidden="true" tabindex="-1"></a><span class="co">    language: python</span></span>
+<span id="cb108-26"><a href="#cb108-26" aria-hidden="true" tabindex="-1"></a><span class="co">    name: python3</span></span>
+<span id="cb108-27"><a href="#cb108-27" aria-hidden="true" tabindex="-1"></a><span class="co">---</span></span>
+<span id="cb108-28"><a href="#cb108-28" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-31"><a href="#cb108-31" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-32"><a href="#cb108-32" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
+<span id="cb108-33"><a href="#cb108-33" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
+<span id="cb108-34"><a href="#cb108-34" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span>
+<span id="cb108-35"><a href="#cb108-35" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-36"><a href="#cb108-36" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> matplotlib.pyplot <span class="im">as</span> plt</span>
+<span id="cb108-37"><a href="#cb108-37" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> seaborn <span class="im">as</span> sns</span>
+<span id="cb108-38"><a href="#cb108-38" aria-hidden="true" tabindex="-1"></a><span class="co">#%matplotlib inline</span></span>
+<span id="cb108-39"><a href="#cb108-39" aria-hidden="true" tabindex="-1"></a>plt.rcParams[<span class="st">'figure.figsize'</span>] <span class="op">=</span> (<span class="dv">12</span>, <span class="dv">9</span>)</span>
+<span id="cb108-40"><a href="#cb108-40" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-41"><a href="#cb108-41" aria-hidden="true" tabindex="-1"></a>sns.<span class="bu">set</span>()</span>
+<span id="cb108-42"><a href="#cb108-42" aria-hidden="true" tabindex="-1"></a>sns.set_context(<span class="st">'talk'</span>)</span>
+<span id="cb108-43"><a href="#cb108-43" aria-hidden="true" tabindex="-1"></a>np.set_printoptions(threshold<span class="op">=</span><span class="dv">20</span>, precision<span class="op">=</span><span class="dv">2</span>, suppress<span class="op">=</span><span class="va">True</span>)</span>
+<span id="cb108-44"><a href="#cb108-44" aria-hidden="true" tabindex="-1"></a>pd.set_option(<span class="st">'display.max_rows'</span>, <span class="dv">30</span>)</span>
+<span id="cb108-45"><a href="#cb108-45" aria-hidden="true" tabindex="-1"></a>pd.set_option(<span class="st">'display.max_columns'</span>, <span class="va">None</span>)</span>
+<span id="cb108-46"><a href="#cb108-46" aria-hidden="true" tabindex="-1"></a>pd.set_option(<span class="st">'display.precision'</span>, <span class="dv">2</span>)</span>
+<span id="cb108-47"><a href="#cb108-47" aria-hidden="true" tabindex="-1"></a><span class="co"># This option stops scientific notation for pandas</span></span>
+<span id="cb108-48"><a href="#cb108-48" aria-hidden="true" tabindex="-1"></a>pd.set_option(<span class="st">'display.float_format'</span>, <span class="st">'</span><span class="sc">{:.2f}</span><span class="st">'</span>.<span class="bu">format</span>)</span>
+<span id="cb108-49"><a href="#cb108-49" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-50"><a href="#cb108-50" aria-hidden="true" tabindex="-1"></a><span class="co"># Silence some spurious seaborn warnings</span></span>
+<span id="cb108-51"><a href="#cb108-51" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> warnings</span>
+<span id="cb108-52"><a href="#cb108-52" aria-hidden="true" tabindex="-1"></a>warnings.filterwarnings(<span class="st">"ignore"</span>, category<span class="op">=</span><span class="pp">FutureWarning</span>)</span>
+<span id="cb108-53"><a href="#cb108-53" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-54"><a href="#cb108-54" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-55"><a href="#cb108-55" aria-hidden="true" tabindex="-1"></a>::: {.callout-note collapse="false"}</span>
+<span id="cb108-56"><a href="#cb108-56" aria-hidden="true" tabindex="-1"></a><span class="fu">## Learning Outcomes</span></span>
+<span id="cb108-57"><a href="#cb108-57" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Recognize common file formats</span>
+<span id="cb108-58"><a href="#cb108-58" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Categorize data by its variable type</span>
+<span id="cb108-59"><a href="#cb108-59" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Build awareness of issues with data faithfulness and develop targeted solutions</span>
+<span id="cb108-60"><a href="#cb108-60" aria-hidden="true" tabindex="-1"></a>:::</span>
+<span id="cb108-61"><a href="#cb108-61" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-62"><a href="#cb108-62" aria-hidden="true" tabindex="-1"></a>In the past few lectures, we've learned that <span class="in">`pandas`</span> is a toolkit to restructure, modify, and explore a dataset. What we haven't yet touched on is *how* to make these data transformation decisions. When we receive a new set of data from the "real world," how do we know what processing we should do to convert this data into a usable form?</span>
+<span id="cb108-63"><a href="#cb108-63" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-64"><a href="#cb108-64" aria-hidden="true" tabindex="-1"></a>**Data cleaning**, also called **data wrangling**, is the process of transforming raw data to facilitate subsequent analysis. It is often used to address issues like:</span>
+<span id="cb108-65"><a href="#cb108-65" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-66"><a href="#cb108-66" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Unclear structure or formatting</span>
+<span id="cb108-67"><a href="#cb108-67" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Missing or corrupted values</span>
+<span id="cb108-68"><a href="#cb108-68" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Unit conversions</span>
+<span id="cb108-69"><a href="#cb108-69" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>...and so on</span>
+<span id="cb108-70"><a href="#cb108-70" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-71"><a href="#cb108-71" aria-hidden="true" tabindex="-1"></a>**Exploratory Data Analysis (EDA)** is the process of understanding a new dataset. It is an open-ended, informal analysis that involves familiarizing ourselves with the variables present in the data, discovering potential hypotheses, and identifying possible issues with the data. This last point can often motivate further data cleaning to address any problems with the dataset's format; because of this, EDA and data cleaning are often thought of as an "infinite loop," with each process driving the other.</span>
+<span id="cb108-72"><a href="#cb108-72" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-73"><a href="#cb108-73" aria-hidden="true" tabindex="-1"></a>In this lecture, we will consider the key properties of data to consider when performing data cleaning and EDA. In doing so, we'll develop a "checklist" of sorts for you to consider when approaching a new dataset. Throughout this process, we'll build a deeper understanding of this early (but very important!) stage of the data science lifecycle.</span>
+<span id="cb108-74"><a href="#cb108-74" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-75"><a href="#cb108-75" aria-hidden="true" tabindex="-1"></a><span class="fu">## Structure</span></span>
+<span id="cb108-76"><a href="#cb108-76" aria-hidden="true" tabindex="-1"></a>We often prefer rectangular data for data analysis. Rectangular structures are easy to manipulate and analyze. A key element of data cleaning is about transforming data to be more rectangular. </span>
+<span id="cb108-77"><a href="#cb108-77" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-78"><a href="#cb108-78" aria-hidden="true" tabindex="-1"></a>There are two kinds of rectangular data: tables and matrices. Tables have named columns with different data types and are manipulated using data transformation languages. Matrices contain numeric data of the same type and are manipulated using linear algebra.</span>
+<span id="cb108-79"><a href="#cb108-79" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-80"><a href="#cb108-80" aria-hidden="true" tabindex="-1"></a><span class="fu">### File Formats</span></span>
+<span id="cb108-81"><a href="#cb108-81" aria-hidden="true" tabindex="-1"></a>There are many file types for storing structured data: TSV, JSON, XML, ASCII, SAS, etc. We'll only cover CSV, TSV, and JSON in lecture, but you'll likely encounter other formats as you work with different datasets. Reading documentation is your best bet for understanding how to process the multitude of different file types. </span>
+<span id="cb108-82"><a href="#cb108-82" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-83"><a href="#cb108-83" aria-hidden="true" tabindex="-1"></a><span class="fu">#### CSV</span></span>
+<span id="cb108-84"><a href="#cb108-84" aria-hidden="true" tabindex="-1"></a>CSVs, which stand for **Comma-Separated Values**, are a common tabular data format. </span>
+<span id="cb108-85"><a href="#cb108-85" aria-hidden="true" tabindex="-1"></a>In the past two <span class="in">`pandas`</span> lectures, we briefly touched on the idea of file format: the way data is encoded in a file for storage. Specifically, our <span class="in">`elections`</span> and <span class="in">`babynames`</span> datasets were stored and loaded as CSVs:</span>
+<span id="cb108-86"><a href="#cb108-86" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-89"><a href="#cb108-89" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-90"><a href="#cb108-90" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-91"><a href="#cb108-91" aria-hidden="true" tabindex="-1"></a>pd.read_csv(<span class="st">"data/elections.csv"</span>).head(<span class="dv">5</span>)</span>
+<span id="cb108-92"><a href="#cb108-92" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-93"><a href="#cb108-93" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-94"><a href="#cb108-94" aria-hidden="true" tabindex="-1"></a>To better understand the properties of a CSV, let's take a look at the first few rows of the raw data file to see what it looks like before being loaded into a <span class="in">`DataFrame`</span>. We'll use the <span class="in">`repr()`</span> function to return the raw string with its special characters: </span>
+<span id="cb108-95"><a href="#cb108-95" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-98"><a href="#cb108-98" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-99"><a href="#cb108-99" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-100"><a href="#cb108-100" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> <span class="bu">open</span>(<span class="st">"data/elections.csv"</span>, <span class="st">"r"</span>) <span class="im">as</span> table:</span>
+<span id="cb108-101"><a href="#cb108-101" aria-hidden="true" tabindex="-1"></a>    i <span class="op">=</span> <span class="dv">0</span></span>
+<span id="cb108-102"><a href="#cb108-102" aria-hidden="true" tabindex="-1"></a>    <span class="cf">for</span> row <span class="kw">in</span> table:</span>
+<span id="cb108-103"><a href="#cb108-103" aria-hidden="true" tabindex="-1"></a>        <span class="bu">print</span>(<span class="bu">repr</span>(row))</span>
+<span id="cb108-104"><a href="#cb108-104" aria-hidden="true" tabindex="-1"></a>        i <span class="op">+=</span> <span class="dv">1</span></span>
+<span id="cb108-105"><a href="#cb108-105" aria-hidden="true" tabindex="-1"></a>        <span class="cf">if</span> i <span class="op">&gt;</span> <span class="dv">3</span>:</span>
+<span id="cb108-106"><a href="#cb108-106" aria-hidden="true" tabindex="-1"></a>            <span class="cf">break</span></span>
+<span id="cb108-107"><a href="#cb108-107" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-108"><a href="#cb108-108" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-109"><a href="#cb108-109" aria-hidden="true" tabindex="-1"></a>Each row, or **record**, in the data is delimited by a newline `\n`. Each column, or **field**, in the data is delimited by a comma <span class="in">`,`</span> (hence, comma-separated!). </span>
+<span id="cb108-110"><a href="#cb108-110" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-111"><a href="#cb108-111" aria-hidden="true" tabindex="-1"></a><span class="fu">#### TSV</span></span>
+<span id="cb108-112"><a href="#cb108-112" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-113"><a href="#cb108-113" aria-hidden="true" tabindex="-1"></a>Another common file type is **TSV (Tab-Separated Values)**. In a TSV, records are still delimited by a newline <span class="in">`\n`</span>, while fields are delimited by <span class="in">`\t`</span> tab character. </span>
+<span id="cb108-114"><a href="#cb108-114" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-115"><a href="#cb108-115" aria-hidden="true" tabindex="-1"></a>Let's check out the first few rows of the raw TSV file. Again, we'll use the <span class="in">`repr()`</span> function so that <span class="in">`print`</span> shows the special characters.</span>
+<span id="cb108-116"><a href="#cb108-116" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-119"><a href="#cb108-119" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-120"><a href="#cb108-120" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-121"><a href="#cb108-121" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> <span class="bu">open</span>(<span class="st">"data/elections.txt"</span>, <span class="st">"r"</span>) <span class="im">as</span> table:</span>
+<span id="cb108-122"><a href="#cb108-122" aria-hidden="true" tabindex="-1"></a>    i <span class="op">=</span> <span class="dv">0</span></span>
+<span id="cb108-123"><a href="#cb108-123" aria-hidden="true" tabindex="-1"></a>    <span class="cf">for</span> row <span class="kw">in</span> table:</span>
+<span id="cb108-124"><a href="#cb108-124" aria-hidden="true" tabindex="-1"></a>        <span class="bu">print</span>(<span class="bu">repr</span>(row))</span>
+<span id="cb108-125"><a href="#cb108-125" aria-hidden="true" tabindex="-1"></a>        i <span class="op">+=</span> <span class="dv">1</span></span>
+<span id="cb108-126"><a href="#cb108-126" aria-hidden="true" tabindex="-1"></a>        <span class="cf">if</span> i <span class="op">&gt;</span> <span class="dv">3</span>:</span>
+<span id="cb108-127"><a href="#cb108-127" aria-hidden="true" tabindex="-1"></a>            <span class="cf">break</span></span>
+<span id="cb108-128"><a href="#cb108-128" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-129"><a href="#cb108-129" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-130"><a href="#cb108-130" aria-hidden="true" tabindex="-1"></a>TSVs can be loaded into <span class="in">`pandas`</span> using <span class="in">`pd.read_csv`</span>. We'll need to specify the **delimiter** with parameter<span class="in">` sep='\t'`</span> <span class="co">[</span><span class="ot">(documentation)</span><span class="co">](https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html)</span>.</span>
+<span id="cb108-131"><a href="#cb108-131" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-134"><a href="#cb108-134" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-135"><a href="#cb108-135" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-136"><a href="#cb108-136" aria-hidden="true" tabindex="-1"></a>pd.read_csv(<span class="st">"data/elections.txt"</span>, sep<span class="op">=</span><span class="st">'</span><span class="ch">\t</span><span class="st">'</span>).head(<span class="dv">3</span>)</span>
+<span id="cb108-137"><a href="#cb108-137" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-138"><a href="#cb108-138" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-139"><a href="#cb108-139" aria-hidden="true" tabindex="-1"></a>An issue with CSVs and TSVs comes up whenever there are commas or tabs within the records. How does <span class="in">`pandas`</span> differentiate between a comma delimiter vs. a comma within the field itself, for example <span class="in">`8,900`</span>? To remedy this, check out the <span class="co">[</span><span class="ot">`quotechar` parameter</span><span class="co">](https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html)</span>. </span>
+<span id="cb108-140"><a href="#cb108-140" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-141"><a href="#cb108-141" aria-hidden="true" tabindex="-1"></a><span class="fu">#### JSON</span></span>
+<span id="cb108-142"><a href="#cb108-142" aria-hidden="true" tabindex="-1"></a>**JSON (JavaScript Object Notation)** files behave similarly to Python dictionaries. A raw JSON is shown below.</span>
+<span id="cb108-143"><a href="#cb108-143" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-146"><a href="#cb108-146" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-147"><a href="#cb108-147" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-148"><a href="#cb108-148" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> <span class="bu">open</span>(<span class="st">"data/elections.json"</span>, <span class="st">"r"</span>) <span class="im">as</span> table:</span>
+<span id="cb108-149"><a href="#cb108-149" aria-hidden="true" tabindex="-1"></a>    i <span class="op">=</span> <span class="dv">0</span></span>
+<span id="cb108-150"><a href="#cb108-150" aria-hidden="true" tabindex="-1"></a>    <span class="cf">for</span> row <span class="kw">in</span> table:</span>
+<span id="cb108-151"><a href="#cb108-151" aria-hidden="true" tabindex="-1"></a>        <span class="bu">print</span>(row)</span>
+<span id="cb108-152"><a href="#cb108-152" aria-hidden="true" tabindex="-1"></a>        i <span class="op">+=</span> <span class="dv">1</span></span>
+<span id="cb108-153"><a href="#cb108-153" aria-hidden="true" tabindex="-1"></a>        <span class="cf">if</span> i <span class="op">&gt;</span> <span class="dv">8</span>:</span>
+<span id="cb108-154"><a href="#cb108-154" aria-hidden="true" tabindex="-1"></a>            <span class="cf">break</span></span>
+<span id="cb108-155"><a href="#cb108-155" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-156"><a href="#cb108-156" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-157"><a href="#cb108-157" aria-hidden="true" tabindex="-1"></a>JSON files can be loaded into <span class="in">`pandas`</span> using <span class="in">`pd.read_json`</span>. </span>
+<span id="cb108-158"><a href="#cb108-158" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-161"><a href="#cb108-161" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-162"><a href="#cb108-162" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-163"><a href="#cb108-163" aria-hidden="true" tabindex="-1"></a>pd.read_json(<span class="st">'data/elections.json'</span>).head(<span class="dv">3</span>)</span>
+<span id="cb108-164"><a href="#cb108-164" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-165"><a href="#cb108-165" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-166"><a href="#cb108-166" aria-hidden="true" tabindex="-1"></a><span class="fu">##### EDA with JSON: Berkeley COVID-19 Data</span></span>
+<span id="cb108-167"><a href="#cb108-167" aria-hidden="true" tabindex="-1"></a>The City of Berkeley Open Data <span class="co">[</span><span class="ot">website</span><span class="co">](https://data.cityofberkeley.info/Health/COVID-19-Confirmed-Cases/xn6j-b766)</span> has a dataset with COVID-19 Confirmed Cases among Berkeley residents by date. Let's download the file and save it as a JSON (note the source URL file type is also a JSON). In the interest of reproducible data science, we will download the data programatically. We have defined some helper functions in the <span class="co">[</span><span class="ot">`ds100_utils.py`</span><span class="co">](https://ds100.org/fa23/resources/assets/lectures/lec05/lec05-eda.html)</span> file that we can reuse these helper functions in many different notebooks.</span>
+<span id="cb108-168"><a href="#cb108-168" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-171"><a href="#cb108-171" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-172"><a href="#cb108-172" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-173"><a href="#cb108-173" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> ds100_utils <span class="im">import</span> fetch_and_cache</span>
+<span id="cb108-174"><a href="#cb108-174" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-175"><a href="#cb108-175" aria-hidden="true" tabindex="-1"></a>covid_file <span class="op">=</span> fetch_and_cache(</span>
+<span id="cb108-176"><a href="#cb108-176" aria-hidden="true" tabindex="-1"></a>    <span class="st">"https://data.cityofberkeley.info/api/views/xn6j-b766/rows.json?accessType=DOWNLOAD"</span>,</span>
+<span id="cb108-177"><a href="#cb108-177" aria-hidden="true" tabindex="-1"></a>    <span class="st">"confirmed-cases.json"</span>,</span>
+<span id="cb108-178"><a href="#cb108-178" aria-hidden="true" tabindex="-1"></a>    force<span class="op">=</span><span class="va">False</span>)</span>
+<span id="cb108-179"><a href="#cb108-179" aria-hidden="true" tabindex="-1"></a>covid_file          <span class="co"># a file path wrapper object</span></span>
+<span id="cb108-180"><a href="#cb108-180" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-181"><a href="#cb108-181" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-182"><a href="#cb108-182" aria-hidden="true" tabindex="-1"></a><span class="fu">###### File Size</span></span>
+<span id="cb108-183"><a href="#cb108-183" aria-hidden="true" tabindex="-1"></a>Let's start our analysis by getting a rough estimate of the size of the dataset to inform the tools we use to view the data. For relatively small datasets, we can use a text editor or spreadsheet. For larger datasets, more programmatic exploration or distributed computing tools may be more fitting. Here we will use <span class="in">`Python`</span> tools to probe the file.</span>
+<span id="cb108-184"><a href="#cb108-184" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-185"><a href="#cb108-185" aria-hidden="true" tabindex="-1"></a>Since there seem to be text files, let's investigate the number of lines, which often corresponds to the number of records</span>
+<span id="cb108-186"><a href="#cb108-186" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-189"><a href="#cb108-189" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-190"><a href="#cb108-190" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-191"><a href="#cb108-191" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> os</span>
+<span id="cb108-192"><a href="#cb108-192" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-193"><a href="#cb108-193" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(covid_file, <span class="st">"is"</span>, os.path.getsize(covid_file) <span class="op">/</span> <span class="fl">1e6</span>, <span class="st">"MB"</span>)</span>
+<span id="cb108-194"><a href="#cb108-194" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-195"><a href="#cb108-195" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> <span class="bu">open</span>(covid_file, <span class="st">"r"</span>) <span class="im">as</span> f:</span>
+<span id="cb108-196"><a href="#cb108-196" aria-hidden="true" tabindex="-1"></a>    <span class="bu">print</span>(covid_file, <span class="st">"is"</span>, <span class="bu">sum</span>(<span class="dv">1</span> <span class="cf">for</span> l <span class="kw">in</span> f), <span class="st">"lines."</span>)</span>
+<span id="cb108-197"><a href="#cb108-197" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-198"><a href="#cb108-198" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-199"><a href="#cb108-199" aria-hidden="true" tabindex="-1"></a><span class="fu">###### Unix Commands</span></span>
+<span id="cb108-200"><a href="#cb108-200" aria-hidden="true" tabindex="-1"></a>As part of the EDA workflow, Unix commands can come in very handy. In fact, there's an entire book called <span class="co">[</span><span class="ot">"Data Science at the Command Line"</span><span class="co">](https://datascienceatthecommandline.com/)</span> that explores this idea in depth! </span>
+<span id="cb108-201"><a href="#cb108-201" aria-hidden="true" tabindex="-1"></a>In Jupyter/IPython, you can prefix lines with <span class="in">`!`</span> to execute arbitrary Unix commands, and within those lines, you can refer to Python variables and expressions with the syntax <span class="in">`{expr}`</span>.</span>
+<span id="cb108-202"><a href="#cb108-202" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-203"><a href="#cb108-203" aria-hidden="true" tabindex="-1"></a>Here, we use the <span class="in">`ls`</span> command to list files, using the <span class="in">`-lh`</span> flags, which request "long format with information in human-readable form." We also use the <span class="in">`wc`</span> command for "word count," but with the <span class="in">`-l`</span> flag, which asks for line counts instead of words.</span>
+<span id="cb108-204"><a href="#cb108-204" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-205"><a href="#cb108-205" aria-hidden="true" tabindex="-1"></a>These two give us the same information as the code above, albeit in a slightly different form:</span>
+<span id="cb108-206"><a href="#cb108-206" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-209"><a href="#cb108-209" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-210"><a href="#cb108-210" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-211"><a href="#cb108-211" aria-hidden="true" tabindex="-1"></a><span class="op">!</span>ls <span class="op">-</span>lh {covid_file}</span>
+<span id="cb108-212"><a href="#cb108-212" aria-hidden="true" tabindex="-1"></a><span class="op">!</span>wc <span class="op">-</span>l {covid_file}</span>
+<span id="cb108-213"><a href="#cb108-213" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-214"><a href="#cb108-214" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-215"><a href="#cb108-215" aria-hidden="true" tabindex="-1"></a><span class="fu">###### File Contents</span></span>
+<span id="cb108-216"><a href="#cb108-216" aria-hidden="true" tabindex="-1"></a>Let's explore the data format using <span class="in">`Python`</span>. </span>
+<span id="cb108-217"><a href="#cb108-217" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-220"><a href="#cb108-220" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-221"><a href="#cb108-221" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-222"><a href="#cb108-222" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> <span class="bu">open</span>(covid_file, <span class="st">"r"</span>) <span class="im">as</span> f:</span>
+<span id="cb108-223"><a href="#cb108-223" aria-hidden="true" tabindex="-1"></a>    <span class="cf">for</span> i, row <span class="kw">in</span> <span class="bu">enumerate</span>(f):</span>
+<span id="cb108-224"><a href="#cb108-224" aria-hidden="true" tabindex="-1"></a>        <span class="bu">print</span>(<span class="bu">repr</span>(row)) <span class="co"># print raw strings</span></span>
+<span id="cb108-225"><a href="#cb108-225" aria-hidden="true" tabindex="-1"></a>        <span class="cf">if</span> i <span class="op">&gt;=</span> <span class="dv">4</span>: <span class="cf">break</span></span>
+<span id="cb108-226"><a href="#cb108-226" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-227"><a href="#cb108-227" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-228"><a href="#cb108-228" aria-hidden="true" tabindex="-1"></a>We can use the <span class="in">`head`</span> Unix command (which is where <span class="in">`pandas`</span>' <span class="in">`head`</span> method comes from!) to see the first few lines of the file:</span>
+<span id="cb108-229"><a href="#cb108-229" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-232"><a href="#cb108-232" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-233"><a href="#cb108-233" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-234"><a href="#cb108-234" aria-hidden="true" tabindex="-1"></a><span class="op">!</span>head <span class="op">-</span><span class="dv">5</span> {covid_file}</span>
+<span id="cb108-235"><a href="#cb108-235" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-236"><a href="#cb108-236" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-237"><a href="#cb108-237" aria-hidden="true" tabindex="-1"></a>In order to load the JSON file into <span class="in">`pandas`</span>, Let's first do some EDA with Oython's <span class="in">`json`</span> package to understand the particular structure of this JSON file so that we can decide what (if anything) to load into <span class="in">`pandas`</span>. Python has relatively good support for JSON data since it closely matches the internal python object model. In the following cell we import the entire JSON datafile into a python dictionary using the <span class="in">`json`</span> package.</span>
+<span id="cb108-238"><a href="#cb108-238" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-241"><a href="#cb108-241" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-242"><a href="#cb108-242" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-243"><a href="#cb108-243" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> json</span>
+<span id="cb108-244"><a href="#cb108-244" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-245"><a href="#cb108-245" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> <span class="bu">open</span>(covid_file, <span class="st">"rb"</span>) <span class="im">as</span> f:</span>
+<span id="cb108-246"><a href="#cb108-246" aria-hidden="true" tabindex="-1"></a>    covid_json <span class="op">=</span> json.load(f)</span>
+<span id="cb108-247"><a href="#cb108-247" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-248"><a href="#cb108-248" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-249"><a href="#cb108-249" aria-hidden="true" tabindex="-1"></a>The <span class="in">`covid_json`</span> variable is now a dictionary encoding the data in the file:</span>
+<span id="cb108-250"><a href="#cb108-250" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-253"><a href="#cb108-253" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-254"><a href="#cb108-254" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-255"><a href="#cb108-255" aria-hidden="true" tabindex="-1"></a><span class="bu">type</span>(covid_json)</span>
+<span id="cb108-256"><a href="#cb108-256" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-257"><a href="#cb108-257" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-258"><a href="#cb108-258" aria-hidden="true" tabindex="-1"></a>We can examine what keys are in the top level JSON object by listing out the keys. </span>
+<span id="cb108-259"><a href="#cb108-259" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-262"><a href="#cb108-262" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-263"><a href="#cb108-263" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-264"><a href="#cb108-264" aria-hidden="true" tabindex="-1"></a>covid_json.keys()</span>
+<span id="cb108-265"><a href="#cb108-265" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-266"><a href="#cb108-266" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-267"><a href="#cb108-267" aria-hidden="true" tabindex="-1"></a>**Observation**: The JSON dictionary contains a <span class="in">`meta`</span> key which likely refers to metadata (data about the data).  Metadata is often maintained with the data and can be a good source of additional information.</span>
+<span id="cb108-268"><a href="#cb108-268" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-269"><a href="#cb108-269" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-270"><a href="#cb108-270" aria-hidden="true" tabindex="-1"></a>We can investigate the metadata further by examining the keys associated with the metadata.</span>
+<span id="cb108-271"><a href="#cb108-271" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-274"><a href="#cb108-274" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-275"><a href="#cb108-275" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-276"><a href="#cb108-276" aria-hidden="true" tabindex="-1"></a>covid_json[<span class="st">'meta'</span>].keys()</span>
+<span id="cb108-277"><a href="#cb108-277" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-278"><a href="#cb108-278" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-279"><a href="#cb108-279" aria-hidden="true" tabindex="-1"></a>The <span class="in">`meta`</span> key contains another dictionary called <span class="in">`view`</span>.  This likely refers to metadata about a particular "view" of some underlying database. We will learn more about views when we study SQL later in the class.    </span>
+<span id="cb108-280"><a href="#cb108-280" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-283"><a href="#cb108-283" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-284"><a href="#cb108-284" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-285"><a href="#cb108-285" aria-hidden="true" tabindex="-1"></a>covid_json[<span class="st">'meta'</span>][<span class="st">'view'</span>].keys()</span>
+<span id="cb108-286"><a href="#cb108-286" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-287"><a href="#cb108-287" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-288"><a href="#cb108-288" aria-hidden="true" tabindex="-1"></a>Notice that this a nested/recursive data structure.  As we dig deeper we reveal more and more keys and the corresponding data:</span>
+<span id="cb108-289"><a href="#cb108-289" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-290"><a href="#cb108-290" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-291"><a href="#cb108-291" aria-hidden="true" tabindex="-1"></a><span class="in">meta</span></span>
+<span id="cb108-292"><a href="#cb108-292" aria-hidden="true" tabindex="-1"></a><span class="in">|-&gt; data</span></span>
+<span id="cb108-293"><a href="#cb108-293" aria-hidden="true" tabindex="-1"></a><span class="in">    | ... (haven't explored yet)</span></span>
+<span id="cb108-294"><a href="#cb108-294" aria-hidden="true" tabindex="-1"></a><span class="in">|-&gt; view</span></span>
+<span id="cb108-295"><a href="#cb108-295" aria-hidden="true" tabindex="-1"></a><span class="in">    | -&gt; id</span></span>
+<span id="cb108-296"><a href="#cb108-296" aria-hidden="true" tabindex="-1"></a><span class="in">    | -&gt; name</span></span>
+<span id="cb108-297"><a href="#cb108-297" aria-hidden="true" tabindex="-1"></a><span class="in">    | -&gt; attribution </span></span>
+<span id="cb108-298"><a href="#cb108-298" aria-hidden="true" tabindex="-1"></a><span class="in">    ...</span></span>
+<span id="cb108-299"><a href="#cb108-299" aria-hidden="true" tabindex="-1"></a><span class="in">    | -&gt; description</span></span>
+<span id="cb108-300"><a href="#cb108-300" aria-hidden="true" tabindex="-1"></a><span class="in">    ...</span></span>
+<span id="cb108-301"><a href="#cb108-301" aria-hidden="true" tabindex="-1"></a><span class="in">    | -&gt; columns</span></span>
+<span id="cb108-302"><a href="#cb108-302" aria-hidden="true" tabindex="-1"></a><span class="in">    ...</span></span>
+<span id="cb108-303"><a href="#cb108-303" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-304"><a href="#cb108-304" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-305"><a href="#cb108-305" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-306"><a href="#cb108-306" aria-hidden="true" tabindex="-1"></a>There is a key called description in the view sub dictionary.  This likely contains a description of the data:</span>
+<span id="cb108-307"><a href="#cb108-307" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-310"><a href="#cb108-310" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-311"><a href="#cb108-311" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-312"><a href="#cb108-312" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(covid_json[<span class="st">'meta'</span>][<span class="st">'view'</span>][<span class="st">'description'</span>])</span>
+<span id="cb108-313"><a href="#cb108-313" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-314"><a href="#cb108-314" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-315"><a href="#cb108-315" aria-hidden="true" tabindex="-1"></a><span class="fu">###### Examining the Data Field for Records</span></span>
+<span id="cb108-316"><a href="#cb108-316" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-317"><a href="#cb108-317" aria-hidden="true" tabindex="-1"></a>We can look at a few entries in the <span class="in">`data`</span> field. This is what we'll load into <span class="in">`pandas`</span>.</span>
+<span id="cb108-318"><a href="#cb108-318" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-321"><a href="#cb108-321" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-322"><a href="#cb108-322" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-323"><a href="#cb108-323" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> i <span class="kw">in</span> <span class="bu">range</span>(<span class="dv">3</span>):</span>
+<span id="cb108-324"><a href="#cb108-324" aria-hidden="true" tabindex="-1"></a>    <span class="bu">print</span>(<span class="ss">f"</span><span class="sc">{</span>i<span class="sc">:03}</span><span class="ss"> | </span><span class="sc">{</span>covid_json[<span class="st">'data'</span>][i]<span class="sc">}</span><span class="ss">"</span>)</span>
+<span id="cb108-325"><a href="#cb108-325" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-326"><a href="#cb108-326" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-327"><a href="#cb108-327" aria-hidden="true" tabindex="-1"></a>Observations:</span>
+<span id="cb108-328"><a href="#cb108-328" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>These look like equal-length records, so maybe <span class="in">`data`</span> is a table!</span>
+<span id="cb108-329"><a href="#cb108-329" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>But what do each of values in the record mean? Where can we find column headers?</span>
+<span id="cb108-330"><a href="#cb108-330" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-331"><a href="#cb108-331" aria-hidden="true" tabindex="-1"></a>For that, we'll need the <span class="in">`columns`</span> key in the metadata dictionary. This returns a list: </span>
+<span id="cb108-332"><a href="#cb108-332" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-335"><a href="#cb108-335" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-336"><a href="#cb108-336" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-337"><a href="#cb108-337" aria-hidden="true" tabindex="-1"></a><span class="bu">type</span>(covid_json[<span class="st">'meta'</span>][<span class="st">'view'</span>][<span class="st">'columns'</span>])</span>
+<span id="cb108-338"><a href="#cb108-338" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-339"><a href="#cb108-339" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-340"><a href="#cb108-340" aria-hidden="true" tabindex="-1"></a><span class="fu">###### Summary of exploring the JSON file</span></span>
+<span id="cb108-341"><a href="#cb108-341" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-342"><a href="#cb108-342" aria-hidden="true" tabindex="-1"></a><span class="ss">1. </span>The above **metadata** tells us a lot about the columns in the data including column names, potential data anomalies, and a basic statistic. </span>
+<span id="cb108-343"><a href="#cb108-343" aria-hidden="true" tabindex="-1"></a><span class="ss">1. </span>Because of its non-tabular structure, JSON makes it easier (than CSV) to create **self-documenting data**, meaning that information about the data is stored in the same file as the data.</span>
+<span id="cb108-344"><a href="#cb108-344" aria-hidden="true" tabindex="-1"></a><span class="ss">1. </span>Self-documenting data can be helpful since it maintains its own description and these descriptions are more likely to be updated as data changes. </span>
+<span id="cb108-345"><a href="#cb108-345" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-346"><a href="#cb108-346" aria-hidden="true" tabindex="-1"></a><span class="fu">###### Loading COVID Data into `pandas`</span></span>
+<span id="cb108-347"><a href="#cb108-347" aria-hidden="true" tabindex="-1"></a>Finally, let's load the data (not the metadata) into a <span class="in">`pandas`</span> <span class="in">`DataFrame`</span>. In the following block of code we:</span>
+<span id="cb108-348"><a href="#cb108-348" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-349"><a href="#cb108-349" aria-hidden="true" tabindex="-1"></a><span class="ss">1. </span>Translate the JSON records into a <span class="in">`DataFrame`</span>:</span>
+<span id="cb108-350"><a href="#cb108-350" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-351"><a href="#cb108-351" aria-hidden="true" tabindex="-1"></a><span class="ss">    * </span>fields: <span class="in">`covid_json['meta']['view']['columns']`</span></span>
+<span id="cb108-352"><a href="#cb108-352" aria-hidden="true" tabindex="-1"></a><span class="ss">    * </span>records: <span class="in">`covid_json['data']`</span></span>
+<span id="cb108-353"><a href="#cb108-353" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-354"><a href="#cb108-354" aria-hidden="true" tabindex="-1"></a>    </span>
+<span id="cb108-355"><a href="#cb108-355" aria-hidden="true" tabindex="-1"></a><span class="ss">1. </span>Remove columns that have no metadata description.  This would be a bad idea in general, but here we remove these columns since the above analysis suggests they are unlikely to contain useful information.</span>
+<span id="cb108-356"><a href="#cb108-356" aria-hidden="true" tabindex="-1"></a>   </span>
+<span id="cb108-357"><a href="#cb108-357" aria-hidden="true" tabindex="-1"></a><span class="ss">1. </span>Examine the <span class="in">`tail`</span> of the table.</span>
+<span id="cb108-358"><a href="#cb108-358" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-361"><a href="#cb108-361" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-362"><a href="#cb108-362" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-363"><a href="#cb108-363" aria-hidden="true" tabindex="-1"></a><span class="co"># Load the data from JSON and assign column titles</span></span>
+<span id="cb108-364"><a href="#cb108-364" aria-hidden="true" tabindex="-1"></a>covid <span class="op">=</span> pd.DataFrame(</span>
+<span id="cb108-365"><a href="#cb108-365" aria-hidden="true" tabindex="-1"></a>    covid_json[<span class="st">'data'</span>],</span>
+<span id="cb108-366"><a href="#cb108-366" aria-hidden="true" tabindex="-1"></a>    columns<span class="op">=</span>[c[<span class="st">'name'</span>] <span class="cf">for</span> c <span class="kw">in</span> covid_json[<span class="st">'meta'</span>][<span class="st">'view'</span>][<span class="st">'columns'</span>]])</span>
+<span id="cb108-367"><a href="#cb108-367" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-368"><a href="#cb108-368" aria-hidden="true" tabindex="-1"></a>covid.tail()</span>
+<span id="cb108-369"><a href="#cb108-369" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-370"><a href="#cb108-370" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-371"><a href="#cb108-371" aria-hidden="true" tabindex="-1"></a><span class="fu">### Primary and Foreign Keys</span></span>
+<span id="cb108-372"><a href="#cb108-372" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-373"><a href="#cb108-373" aria-hidden="true" tabindex="-1"></a>Last time, we introduced <span class="in">`.merge`</span> as the <span class="in">`pandas`</span> method for joining multiple <span class="in">`DataFrame`</span>s together. In our discussion of joins, we touched on the idea of using a "key" to determine what rows should be merged from each table. Let's take a moment to examine this idea more closely.</span>
+<span id="cb108-374"><a href="#cb108-374" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-375"><a href="#cb108-375" aria-hidden="true" tabindex="-1"></a>The **primary key** is the column or set of columns in a table that *uniquely* determine the values of the remaining columns. It can be thought of as the unique identifier for each individual row in the table. For example, a table of Data 100 students might use each student's Cal ID as the primary key. </span>
+<span id="cb108-376"><a href="#cb108-376" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-379"><a href="#cb108-379" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-380"><a href="#cb108-380" aria-hidden="true" tabindex="-1"></a><span class="co">#| echo: false</span></span>
+<span id="cb108-381"><a href="#cb108-381" aria-hidden="true" tabindex="-1"></a>pd.DataFrame({<span class="st">"Cal ID"</span>:[<span class="dv">3034619471</span>, <span class="dv">3035619472</span>, <span class="dv">3025619473</span>, <span class="dv">3046789372</span>], <span class="op">\</span></span>
+<span id="cb108-382"><a href="#cb108-382" aria-hidden="true" tabindex="-1"></a>             <span class="st">"Name"</span>:[<span class="st">"Oski"</span>, <span class="st">"Ollie"</span>, <span class="st">"Orrie"</span>, <span class="st">"Ollie"</span>], <span class="op">\</span></span>
+<span id="cb108-383"><a href="#cb108-383" aria-hidden="true" tabindex="-1"></a>             <span class="st">"Major"</span>:[<span class="st">"Data Science"</span>, <span class="st">"Computer Science"</span>, <span class="st">"Data Science"</span>, <span class="st">"Economics"</span>]})</span>
+<span id="cb108-384"><a href="#cb108-384" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-385"><a href="#cb108-385" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-386"><a href="#cb108-386" aria-hidden="true" tabindex="-1"></a>The **foreign key** is the column or set of columns in a table that reference primary keys in other tables. Knowing a dataset's foreign keys can be useful when assigning the <span class="in">`left_on`</span> and <span class="in">`right_on`</span> parameters of <span class="in">`.merge`</span>. In the table of office hour tickets below, <span class="in">`"Cal ID"`</span> is a foreign key referencing the previous table.</span>
+<span id="cb108-387"><a href="#cb108-387" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-390"><a href="#cb108-390" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-391"><a href="#cb108-391" aria-hidden="true" tabindex="-1"></a><span class="co">#| echo: false</span></span>
+<span id="cb108-392"><a href="#cb108-392" aria-hidden="true" tabindex="-1"></a>pd.DataFrame({<span class="st">"OH Request"</span>:[<span class="dv">1</span>, <span class="dv">2</span>, <span class="dv">3</span>, <span class="dv">4</span>], <span class="op">\</span></span>
+<span id="cb108-393"><a href="#cb108-393" aria-hidden="true" tabindex="-1"></a>             <span class="st">"Cal ID"</span>:[<span class="dv">3034619471</span>, <span class="dv">3035619472</span>, <span class="dv">3025619473</span>, <span class="dv">3035619472</span>], <span class="op">\</span></span>
+<span id="cb108-394"><a href="#cb108-394" aria-hidden="true" tabindex="-1"></a>             <span class="st">"Question"</span>:[<span class="st">"HW 2 Q1"</span>, <span class="st">"HW 2 Q3"</span>, <span class="st">"Lab 3 Q4"</span>, <span class="st">"HW 2 Q7"</span>]})</span>
+<span id="cb108-395"><a href="#cb108-395" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-396"><a href="#cb108-396" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-397"><a href="#cb108-397" aria-hidden="true" tabindex="-1"></a><span class="fu">### Variable Types</span></span>
+<span id="cb108-398"><a href="#cb108-398" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-399"><a href="#cb108-399" aria-hidden="true" tabindex="-1"></a>Variables are columns. A variable is a measurement of a particular concept. Variables have two common properties: data type/storage type and variable type/feature type. The data type of a variable indicates how each variable value is stored in memory (integer, floating point, boolean, etc.) and affects which <span class="in">`pandas`</span> functions are used. The variable type is a conceptualized measurement of information (and therefore indicates what values a variable can take on). Variable type is identified through expert knowledge, exploring the data itself, or consulting the data codebook. The variable type affects how one visualizes and inteprets the data. In this class, "variable types" are conceptual.</span>
+<span id="cb108-400"><a href="#cb108-400" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-401"><a href="#cb108-401" aria-hidden="true" tabindex="-1"></a>After loading data into a file, it's a good idea to take the time to understand what pieces of information are encoded in the dataset. In particular, we want to identify what variable types are present in our data. Broadly speaking, we can categorize variables into one of two overarching types. </span>
+<span id="cb108-402"><a href="#cb108-402" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-403"><a href="#cb108-403" aria-hidden="true" tabindex="-1"></a>**Quantitative variables** describe some numeric quantity or amount. We can divide quantitative data further into:</span>
+<span id="cb108-404"><a href="#cb108-404" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-405"><a href="#cb108-405" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>**Continuous quantitative variables**: numeric data that can be measured on a continuous scale to arbitrary precision. Continuous variables do not have a strict set of possible values – they can be recorded to any number of decimal places. For example, weights, GPA, or CO<span class="kw">&lt;sub&gt;</span>2<span class="kw">&lt;/sub&gt;</span> concentrations.</span>
+<span id="cb108-406"><a href="#cb108-406" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>**Discrete quantitative variables**: numeric data that can only take on a finite set of possible values. For example, someone's age or the number of siblings they have.</span>
+<span id="cb108-407"><a href="#cb108-407" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-408"><a href="#cb108-408" aria-hidden="true" tabindex="-1"></a>**Qualitative variables**, also known as **categorical variables**, describe data that isn't measuring some quantity or amount. The sub-categories of categorical data are:</span>
+<span id="cb108-409"><a href="#cb108-409" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-410"><a href="#cb108-410" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>**Ordinal qualitative variables**: categories with ordered levels. Specifically, ordinal variables are those where the difference between levels has no consistent, quantifiable meaning. Some examples include levels of education (high school, undergrad, grad, etc.), income bracket (low, medium, high), or Yelp rating. </span>
+<span id="cb108-411"><a href="#cb108-411" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>**Nominal qualitative variables**: categories with no specific order. For example, someone's political affiliation or Cal ID number.</span>
+<span id="cb108-412"><a href="#cb108-412" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-413"><a href="#cb108-413" aria-hidden="true" tabindex="-1"></a><span class="al">![Classification of variable types](images/variable.png)</span></span>
+<span id="cb108-414"><a href="#cb108-414" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-415"><a href="#cb108-415" aria-hidden="true" tabindex="-1"></a>Note that many variables don't sit neatly in just one of these categories. Qualitative variables could have numeric levels, and conversely, quantitative variables could be stored as strings. </span>
+<span id="cb108-416"><a href="#cb108-416" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-417"><a href="#cb108-417" aria-hidden="true" tabindex="-1"></a><span class="fu">## Granularity, Scope, and Temporality</span></span>
+<span id="cb108-418"><a href="#cb108-418" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-419"><a href="#cb108-419" aria-hidden="true" tabindex="-1"></a>After understanding the structure of the dataset, the next task is to determine what exactly the data represents. We'll do so by considering the data's granularity, scope, and temporality.</span>
+<span id="cb108-420"><a href="#cb108-420" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-421"><a href="#cb108-421" aria-hidden="true" tabindex="-1"></a><span class="fu">### Granularity</span></span>
+<span id="cb108-422"><a href="#cb108-422" aria-hidden="true" tabindex="-1"></a>The **granularity** of a dataset is what a single row represents. You can also think of it as the level of detail included in the data. To determine the data's granularity, ask: what does each row in the dataset represent? Fine-grained data contains a high level of detail, with a single row representing a small individual unit. For example, each record may represent one person. Coarse-grained data is encoded such that a single row represents a large individual unit – for example, each record may represent a group of people.</span>
+<span id="cb108-423"><a href="#cb108-423" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-424"><a href="#cb108-424" aria-hidden="true" tabindex="-1"></a><span class="fu">### Scope</span></span>
+<span id="cb108-425"><a href="#cb108-425" aria-hidden="true" tabindex="-1"></a>The **scope** of a dataset is the subset of the population covered by the data. If we were investigating student performance in Data Science courses, a dataset with a narrow scope might encompass all students enrolled in Data 100 whereas a dataset with an expansive scope might encompass all students in California. </span>
+<span id="cb108-426"><a href="#cb108-426" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-427"><a href="#cb108-427" aria-hidden="true" tabindex="-1"></a><span class="fu">### Temporality</span></span>
+<span id="cb108-428"><a href="#cb108-428" aria-hidden="true" tabindex="-1"></a>The **temporality** of a dataset describes the periodicity over which the data was collected as well as when the data was most recently collected or updated. </span>
+<span id="cb108-429"><a href="#cb108-429" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-430"><a href="#cb108-430" aria-hidden="true" tabindex="-1"></a>Time and date fields of a dataset could represent a few things:</span>
+<span id="cb108-431"><a href="#cb108-431" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-432"><a href="#cb108-432" aria-hidden="true" tabindex="-1"></a><span class="ss">1. </span>when the "event" happened</span>
+<span id="cb108-433"><a href="#cb108-433" aria-hidden="true" tabindex="-1"></a><span class="ss">2. </span>when the data was collected, or when it was entered into the system</span>
+<span id="cb108-434"><a href="#cb108-434" aria-hidden="true" tabindex="-1"></a><span class="ss">3. </span>when the data was copied into the database </span>
+<span id="cb108-435"><a href="#cb108-435" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-436"><a href="#cb108-436" aria-hidden="true" tabindex="-1"></a>To fully understand the temporality of the data, it also may be necessary to standardize time zones or inspect recurring time-based trends in the data (do patterns recur in 24-hour periods? Over the course of a month? Seasonally?). The convention for standardizing time is the Coordinated Universal Time (UTC), an international time standard measured at 0 degrees latitude that stays consistent throughout the year (no daylight savings). We can represent Berkeley's time zone, Pacific Standard Time (PST), as UTC-7 (with daylight savings). </span>
+<span id="cb108-437"><a href="#cb108-437" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-438"><a href="#cb108-438" aria-hidden="true" tabindex="-1"></a><span class="fu">#### Temporality with `pandas`' `dt` accessors </span></span>
+<span id="cb108-439"><a href="#cb108-439" aria-hidden="true" tabindex="-1"></a>Let's briefly look at how we can use <span class="in">`pandas`</span>' <span class="in">`dt`</span> accessors to work with dates/times in a dataset using the dataset you'll see in Lab 3: the Berkeley PD Calls for Service dataset.</span>
+<span id="cb108-440"><a href="#cb108-440" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-443"><a href="#cb108-443" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-444"><a href="#cb108-444" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
+<span id="cb108-445"><a href="#cb108-445" aria-hidden="true" tabindex="-1"></a>calls <span class="op">=</span> pd.read_csv(<span class="st">"data/Berkeley_PD_-_Calls_for_Service.csv"</span>)</span>
+<span id="cb108-446"><a href="#cb108-446" aria-hidden="true" tabindex="-1"></a>calls.head()</span>
+<span id="cb108-447"><a href="#cb108-447" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-448"><a href="#cb108-448" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-449"><a href="#cb108-449" aria-hidden="true" tabindex="-1"></a>Looks like there are three columns with dates/times: <span class="in">`EVENTDT`</span>, <span class="in">`EVENTTM`</span>, and <span class="in">`InDbDate`</span>. </span>
+<span id="cb108-450"><a href="#cb108-450" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-451"><a href="#cb108-451" aria-hidden="true" tabindex="-1"></a>Most likely, <span class="in">`EVENTDT`</span> stands for the date when the event took place, <span class="in">`EVENTTM`</span> stands for the time of day the event took place (in 24-hr format), and <span class="in">`InDbDate`</span> is the date this call is recorded onto the database.</span>
+<span id="cb108-452"><a href="#cb108-452" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-453"><a href="#cb108-453" aria-hidden="true" tabindex="-1"></a>If we check the data type of these columns, we will see they are stored as strings. We can convert them to <span class="in">`datetime`</span> objects using pandas <span class="in">`to_datetime`</span> function.</span>
+<span id="cb108-454"><a href="#cb108-454" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-457"><a href="#cb108-457" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-458"><a href="#cb108-458" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-459"><a href="#cb108-459" aria-hidden="true" tabindex="-1"></a>calls[<span class="st">"EVENTDT"</span>] <span class="op">=</span> pd.to_datetime(calls[<span class="st">"EVENTDT"</span>])</span>
+<span id="cb108-460"><a href="#cb108-460" aria-hidden="true" tabindex="-1"></a>calls.head()</span>
+<span id="cb108-461"><a href="#cb108-461" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-462"><a href="#cb108-462" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-463"><a href="#cb108-463" aria-hidden="true" tabindex="-1"></a>Now, we can use the <span class="in">`dt`</span> accessor on this column.</span>
+<span id="cb108-464"><a href="#cb108-464" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-465"><a href="#cb108-465" aria-hidden="true" tabindex="-1"></a>We can get the month: </span>
+<span id="cb108-466"><a href="#cb108-466" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-469"><a href="#cb108-469" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-470"><a href="#cb108-470" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-471"><a href="#cb108-471" aria-hidden="true" tabindex="-1"></a>calls[<span class="st">"EVENTDT"</span>].dt.month.head()</span>
+<span id="cb108-472"><a href="#cb108-472" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-473"><a href="#cb108-473" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-474"><a href="#cb108-474" aria-hidden="true" tabindex="-1"></a>Which day of the week the date is on:</span>
+<span id="cb108-475"><a href="#cb108-475" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-478"><a href="#cb108-478" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-479"><a href="#cb108-479" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-480"><a href="#cb108-480" aria-hidden="true" tabindex="-1"></a>calls[<span class="st">"EVENTDT"</span>].dt.dayofweek.head()</span>
+<span id="cb108-481"><a href="#cb108-481" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-482"><a href="#cb108-482" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-483"><a href="#cb108-483" aria-hidden="true" tabindex="-1"></a>Check the mimimum values to see if there are any suspicious-looking, 70s dates:</span>
+<span id="cb108-484"><a href="#cb108-484" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-487"><a href="#cb108-487" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-488"><a href="#cb108-488" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-489"><a href="#cb108-489" aria-hidden="true" tabindex="-1"></a>calls.sort_values(<span class="st">"EVENTDT"</span>).head()</span>
+<span id="cb108-490"><a href="#cb108-490" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-491"><a href="#cb108-491" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-492"><a href="#cb108-492" aria-hidden="true" tabindex="-1"></a>Doesn't look like it! We are good!</span>
+<span id="cb108-493"><a href="#cb108-493" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-494"><a href="#cb108-494" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-495"><a href="#cb108-495" aria-hidden="true" tabindex="-1"></a>We can also do many things with the <span class="in">`dt`</span> accessor like switching time zones and converting time back to UNIX/POSIX time. Check out the documentation on <span class="co">[</span><span class="ot">`.dt` accessor</span><span class="co">](https://pandas.pydata.org/docs/user_guide/basics.html#basics-dt-accessors)</span> and <span class="co">[</span><span class="ot">time series/date functionality</span><span class="co">](https://pandas.pydata.org/docs/user_guide/timeseries.html#)</span>.</span>
+<span id="cb108-496"><a href="#cb108-496" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-497"><a href="#cb108-497" aria-hidden="true" tabindex="-1"></a><span class="fu">## Faithfulness</span></span>
+<span id="cb108-498"><a href="#cb108-498" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-499"><a href="#cb108-499" aria-hidden="true" tabindex="-1"></a>At this stage in our data cleaning and EDA workflow, we've achieved quite a lot: we've identified how our data is structured, come to terms with what information it encodes, and gained insight as to how it was generated. Throughout this process, we should always recall the original intent of our work in Data Science – to use data to better understand and model the real world. To achieve this goal, we need to ensure that the data we use is faithful to reality; that is, that our data accurately captures the "real world."</span>
+<span id="cb108-500"><a href="#cb108-500" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-501"><a href="#cb108-501" aria-hidden="true" tabindex="-1"></a>Data used in research or industry is often "messy" – there may be errors or inaccuracies that impact the faithfulness of the dataset. Signs that data may not be faithful include:</span>
+<span id="cb108-502"><a href="#cb108-502" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-503"><a href="#cb108-503" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Unrealistic or "incorrect" values, such as negative counts, locations that don't exist, or dates set in the future</span>
+<span id="cb108-504"><a href="#cb108-504" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Violations of obvious dependencies, like an age that does not match a birthday</span>
+<span id="cb108-505"><a href="#cb108-505" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Clear signs that data was entered by hand, which can lead to spelling errors or fields that are incorrectly shifted</span>
+<span id="cb108-506"><a href="#cb108-506" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Signs of data falsification, such as fake email addresses or repeated use of the same names</span>
+<span id="cb108-507"><a href="#cb108-507" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Duplicated records or fields containing the same information</span>
+<span id="cb108-508"><a href="#cb108-508" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Truncated data, e.g. Microsoft Excel would limit the number of rows to 655536 and the number of columns to 255</span>
+<span id="cb108-509"><a href="#cb108-509" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-510"><a href="#cb108-510" aria-hidden="true" tabindex="-1"></a>We often solve some of these more common issues in the following ways: </span>
+<span id="cb108-511"><a href="#cb108-511" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-512"><a href="#cb108-512" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Spelling errors: apply corrections or drop records that aren't in a dictionary</span>
+<span id="cb108-513"><a href="#cb108-513" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Time zone inconsistencies: convert to a common time zone (e.g. UTC) </span>
+<span id="cb108-514"><a href="#cb108-514" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Duplicated records or fields: identify and eliminate duplicates (using primary keys)</span>
+<span id="cb108-515"><a href="#cb108-515" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Unspecified or inconsistent units: infer the units and check that values are in reasonable ranges in the data</span>
+<span id="cb108-516"><a href="#cb108-516" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-517"><a href="#cb108-517" aria-hidden="true" tabindex="-1"></a><span class="fu">### Missing Values</span></span>
+<span id="cb108-518"><a href="#cb108-518" aria-hidden="true" tabindex="-1"></a>Another common issue encountered with real-world datasets is that of missing data. One strategy to resolve this is to simply drop any records with missing values from the dataset. This does, however, introduce the risk of inducing biases – it is possible that the missing or corrupt records may be systemically related to some feature of interest in the data. Another solution is to keep the data as <span class="in">`NaN`</span> values. </span>
+<span id="cb108-519"><a href="#cb108-519" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-520"><a href="#cb108-520" aria-hidden="true" tabindex="-1"></a>A third method to address missing data is to perform **imputation**: infer the missing values using other data available in the dataset. There is a wide variety of imputation techniques that can be implemented; some of the most common are listed below.</span>
+<span id="cb108-521"><a href="#cb108-521" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-522"><a href="#cb108-522" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Average imputation: replace missing values with the average value for that field</span>
+<span id="cb108-523"><a href="#cb108-523" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Hot deck imputation: replace missing values with some random value</span>
+<span id="cb108-524"><a href="#cb108-524" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Regression imputation: develop a model to predict missing values and replace with the predicted value from the model.</span>
+<span id="cb108-525"><a href="#cb108-525" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Multiple imputation: replace missing values with multiple random values</span>
+<span id="cb108-526"><a href="#cb108-526" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-527"><a href="#cb108-527" aria-hidden="true" tabindex="-1"></a>Regardless of the strategy used to deal with missing data, we should think carefully about *why* particular records or fields may be missing – this can help inform whether or not the absence of these values is significant or meaningful.</span>
+<span id="cb108-528"><a href="#cb108-528" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-529"><a href="#cb108-529" aria-hidden="true" tabindex="-1"></a><span class="fu">## EDA Demo 1: Tuberculosis in the United States</span></span>
+<span id="cb108-530"><a href="#cb108-530" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-531"><a href="#cb108-531" aria-hidden="true" tabindex="-1"></a>Now, let's walk through the data-cleaning and EDA workflow to see what can we learn about the presence of Tuberculosis in the United States!</span>
+<span id="cb108-532"><a href="#cb108-532" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-533"><a href="#cb108-533" aria-hidden="true" tabindex="-1"></a>We will examine the data included in the <span class="co">[</span><span class="ot">original CDC article</span><span class="co">](https://www.cdc.gov/mmwr/volumes/71/wr/mm7112a1.htm?s_cid=mm7112a1_w#T1_down)</span> published in 2021.</span>
+<span id="cb108-534"><a href="#cb108-534" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-535"><a href="#cb108-535" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-536"><a href="#cb108-536" aria-hidden="true" tabindex="-1"></a><span class="fu">### CSVs and Field Names</span></span>
+<span id="cb108-537"><a href="#cb108-537" aria-hidden="true" tabindex="-1"></a>Suppose Table 1 was saved as a CSV file located in <span class="in">`data/cdc_tuberculosis.csv`</span>.</span>
+<span id="cb108-538"><a href="#cb108-538" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-539"><a href="#cb108-539" aria-hidden="true" tabindex="-1"></a>We can then explore the CSV (which is a text file, and does not contain binary-encoded data) in many ways:</span>
+<span id="cb108-540"><a href="#cb108-540" aria-hidden="true" tabindex="-1"></a><span class="ss">1. </span>Using a text editor like emacs, vim, VSCode, etc.</span>
+<span id="cb108-541"><a href="#cb108-541" aria-hidden="true" tabindex="-1"></a><span class="ss">2. </span>Opening the CSV directly in DataHub (read-only), Excel, Google Sheets, etc.</span>
+<span id="cb108-542"><a href="#cb108-542" aria-hidden="true" tabindex="-1"></a><span class="ss">3. </span>The <span class="in">`Python`</span> file object</span>
+<span id="cb108-543"><a href="#cb108-543" aria-hidden="true" tabindex="-1"></a><span class="ss">4. </span><span class="in">`pandas`</span>, using <span class="in">`pd.read_csv()`</span></span>
+<span id="cb108-544"><a href="#cb108-544" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-545"><a href="#cb108-545" aria-hidden="true" tabindex="-1"></a>To try out options 1 and 2, you can view or download the Tuberculosis from the <span class="co">[</span><span class="ot">lecture demo notebook</span><span class="co">](https://data100.datahub.berkeley.edu/hub/user-redirect/git-pull?repo=https%3A%2F%2Fgithub.com%2FDS-100%2Ffa23-student&amp;urlpath=lab%2Ftree%2Ffa23-student%2Flecture%2Flec05%2Flec04-eda.ipynb&amp;branch=main)</span> under the <span class="in">`data`</span> folder in the left hand menu. Notice how the CSV file is a type of **rectangular data (i.e., tabular data) stored as comma-separated values**.</span>
+<span id="cb108-546"><a href="#cb108-546" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-547"><a href="#cb108-547" aria-hidden="true" tabindex="-1"></a>Next, let's try out option 3 using the <span class="in">`Python`</span> file object. We'll look at the first four lines:</span>
+<span id="cb108-548"><a href="#cb108-548" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-551"><a href="#cb108-551" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-552"><a href="#cb108-552" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
+<span id="cb108-553"><a href="#cb108-553" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> <span class="bu">open</span>(<span class="st">"data/cdc_tuberculosis.csv"</span>, <span class="st">"r"</span>) <span class="im">as</span> f:</span>
+<span id="cb108-554"><a href="#cb108-554" aria-hidden="true" tabindex="-1"></a>    i <span class="op">=</span> <span class="dv">0</span></span>
+<span id="cb108-555"><a href="#cb108-555" aria-hidden="true" tabindex="-1"></a>    <span class="cf">for</span> row <span class="kw">in</span> f:</span>
+<span id="cb108-556"><a href="#cb108-556" aria-hidden="true" tabindex="-1"></a>        <span class="bu">print</span>(row)</span>
+<span id="cb108-557"><a href="#cb108-557" aria-hidden="true" tabindex="-1"></a>        i <span class="op">+=</span> <span class="dv">1</span></span>
+<span id="cb108-558"><a href="#cb108-558" aria-hidden="true" tabindex="-1"></a>        <span class="cf">if</span> i <span class="op">&gt;</span> <span class="dv">3</span>:</span>
+<span id="cb108-559"><a href="#cb108-559" aria-hidden="true" tabindex="-1"></a>            <span class="cf">break</span></span>
+<span id="cb108-560"><a href="#cb108-560" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-561"><a href="#cb108-561" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-562"><a href="#cb108-562" aria-hidden="true" tabindex="-1"></a>Whoa, why are there blank lines interspaced between the lines of the CSV?</span>
+<span id="cb108-563"><a href="#cb108-563" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-564"><a href="#cb108-564" aria-hidden="true" tabindex="-1"></a>You may recall that all line breaks in text files are encoded as the special newline character <span class="in">`\n`</span>. Python's <span class="in">`print()`</span> prints each string (including the newline), and an additional newline on top of that.</span>
+<span id="cb108-565"><a href="#cb108-565" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-566"><a href="#cb108-566" aria-hidden="true" tabindex="-1"></a>If you're curious, we can use the <span class="in">`repr()`</span> function to return the raw string with all special characters:</span>
+<span id="cb108-567"><a href="#cb108-567" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-570"><a href="#cb108-570" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-571"><a href="#cb108-571" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
+<span id="cb108-572"><a href="#cb108-572" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> <span class="bu">open</span>(<span class="st">"data/cdc_tuberculosis.csv"</span>, <span class="st">"r"</span>) <span class="im">as</span> f:</span>
+<span id="cb108-573"><a href="#cb108-573" aria-hidden="true" tabindex="-1"></a>    i <span class="op">=</span> <span class="dv">0</span></span>
+<span id="cb108-574"><a href="#cb108-574" aria-hidden="true" tabindex="-1"></a>    <span class="cf">for</span> row <span class="kw">in</span> f:</span>
+<span id="cb108-575"><a href="#cb108-575" aria-hidden="true" tabindex="-1"></a>        <span class="bu">print</span>(<span class="bu">repr</span>(row)) <span class="co"># print raw strings</span></span>
+<span id="cb108-576"><a href="#cb108-576" aria-hidden="true" tabindex="-1"></a>        i <span class="op">+=</span> <span class="dv">1</span></span>
+<span id="cb108-577"><a href="#cb108-577" aria-hidden="true" tabindex="-1"></a>        <span class="cf">if</span> i <span class="op">&gt;</span> <span class="dv">3</span>:</span>
+<span id="cb108-578"><a href="#cb108-578" aria-hidden="true" tabindex="-1"></a>            <span class="cf">break</span></span>
+<span id="cb108-579"><a href="#cb108-579" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-580"><a href="#cb108-580" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-581"><a href="#cb108-581" aria-hidden="true" tabindex="-1"></a>Finally, let's try option 4 and use the tried-and-true Data 100 approach: <span class="in">`pandas`</span>.</span>
+<span id="cb108-582"><a href="#cb108-582" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-585"><a href="#cb108-585" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-586"><a href="#cb108-586" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-587"><a href="#cb108-587" aria-hidden="true" tabindex="-1"></a>tb_df <span class="op">=</span> pd.read_csv(<span class="st">"data/cdc_tuberculosis.csv"</span>)</span>
+<span id="cb108-588"><a href="#cb108-588" aria-hidden="true" tabindex="-1"></a>tb_df.head()</span>
+<span id="cb108-589"><a href="#cb108-589" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-590"><a href="#cb108-590" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-591"><a href="#cb108-591" aria-hidden="true" tabindex="-1"></a>You may notice some strange things about this table: what's up with the "Unnamed" column names and the first row? </span>
+<span id="cb108-592"><a href="#cb108-592" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-593"><a href="#cb108-593" aria-hidden="true" tabindex="-1"></a>Congratulations — you're ready to wrangle your data! Because of how things are stored, we'll need to clean the data a bit to name our columns better.</span>
+<span id="cb108-594"><a href="#cb108-594" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-595"><a href="#cb108-595" aria-hidden="true" tabindex="-1"></a>A reasonable first step is to identify the row with the right header. The <span class="in">`pd.read_csv()`</span> function (<span class="co">[</span><span class="ot">documentation</span><span class="co">](https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html)</span>) has the convenient <span class="in">`header`</span> parameter that we can set to use the elements in row 1 as the appropriate columns:</span>
+<span id="cb108-596"><a href="#cb108-596" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-599"><a href="#cb108-599" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-600"><a href="#cb108-600" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-601"><a href="#cb108-601" aria-hidden="true" tabindex="-1"></a>tb_df <span class="op">=</span> pd.read_csv(<span class="st">"data/cdc_tuberculosis.csv"</span>, header<span class="op">=</span><span class="dv">1</span>) <span class="co"># row index</span></span>
+<span id="cb108-602"><a href="#cb108-602" aria-hidden="true" tabindex="-1"></a>tb_df.head(<span class="dv">5</span>)</span>
+<span id="cb108-603"><a href="#cb108-603" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-604"><a href="#cb108-604" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-605"><a href="#cb108-605" aria-hidden="true" tabindex="-1"></a>Wait...but now we can't differentiate betwen the "Number of TB cases" and "TB incidence" year columns. <span class="in">`pandas`</span> has tried to make our lives easier by automatically adding ".1" to the latter columns, but this doesn't help us, as humans, understand the data.</span>
+<span id="cb108-606"><a href="#cb108-606" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-607"><a href="#cb108-607" aria-hidden="true" tabindex="-1"></a>We can do this manually with <span class="in">`df.rename()`</span> (<span class="co">[</span><span class="ot">documentation</span><span class="co">](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.rename.html?highlight=rename#pandas.DataFrame.rename)</span>):</span>
+<span id="cb108-608"><a href="#cb108-608" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-611"><a href="#cb108-611" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-612"><a href="#cb108-612" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-613"><a href="#cb108-613" aria-hidden="true" tabindex="-1"></a>rename_dict <span class="op">=</span> {<span class="st">'2019'</span>: <span class="st">'TB cases 2019'</span>,</span>
+<span id="cb108-614"><a href="#cb108-614" aria-hidden="true" tabindex="-1"></a>               <span class="st">'2020'</span>: <span class="st">'TB cases 2020'</span>,</span>
+<span id="cb108-615"><a href="#cb108-615" aria-hidden="true" tabindex="-1"></a>               <span class="st">'2021'</span>: <span class="st">'TB cases 2021'</span>,</span>
+<span id="cb108-616"><a href="#cb108-616" aria-hidden="true" tabindex="-1"></a>               <span class="st">'2019.1'</span>: <span class="st">'TB incidence 2019'</span>,</span>
+<span id="cb108-617"><a href="#cb108-617" aria-hidden="true" tabindex="-1"></a>               <span class="st">'2020.1'</span>: <span class="st">'TB incidence 2020'</span>,</span>
+<span id="cb108-618"><a href="#cb108-618" aria-hidden="true" tabindex="-1"></a>               <span class="st">'2021.1'</span>: <span class="st">'TB incidence 2021'</span>}</span>
+<span id="cb108-619"><a href="#cb108-619" aria-hidden="true" tabindex="-1"></a>tb_df <span class="op">=</span> tb_df.rename(columns<span class="op">=</span>rename_dict)</span>
+<span id="cb108-620"><a href="#cb108-620" aria-hidden="true" tabindex="-1"></a>tb_df.head(<span class="dv">5</span>)</span>
+<span id="cb108-621"><a href="#cb108-621" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-622"><a href="#cb108-622" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-623"><a href="#cb108-623" aria-hidden="true" tabindex="-1"></a><span class="fu">### Record Granularity</span></span>
+<span id="cb108-624"><a href="#cb108-624" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-625"><a href="#cb108-625" aria-hidden="true" tabindex="-1"></a>You might already be wondering: what's up with that first record?</span>
+<span id="cb108-626"><a href="#cb108-626" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-627"><a href="#cb108-627" aria-hidden="true" tabindex="-1"></a>Row 0 is what we call a **rollup record**, or summary record. It's often useful when displaying tables to humans. The **granularity** of record 0 (Totals) vs the rest of the records (States) is different.</span>
+<span id="cb108-628"><a href="#cb108-628" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-629"><a href="#cb108-629" aria-hidden="true" tabindex="-1"></a>Okay, EDA step two. How was the rollup record aggregated?</span>
+<span id="cb108-630"><a href="#cb108-630" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-631"><a href="#cb108-631" aria-hidden="true" tabindex="-1"></a>Let's check if Total TB cases is the sum of all state TB cases. If we sum over all rows, we should get **2x** the total cases in each of our TB cases by year (why do you think this is?).</span>
+<span id="cb108-632"><a href="#cb108-632" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-635"><a href="#cb108-635" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-636"><a href="#cb108-636" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
+<span id="cb108-637"><a href="#cb108-637" aria-hidden="true" tabindex="-1"></a>tb_df.<span class="bu">sum</span>(axis<span class="op">=</span><span class="dv">0</span>)</span>
+<span id="cb108-638"><a href="#cb108-638" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-639"><a href="#cb108-639" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-640"><a href="#cb108-640" aria-hidden="true" tabindex="-1"></a>Whoa, what's going on with the TB cases in 2019, 2020, and 2021? Check out the column types:</span>
+<span id="cb108-641"><a href="#cb108-641" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-644"><a href="#cb108-644" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-645"><a href="#cb108-645" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
+<span id="cb108-646"><a href="#cb108-646" aria-hidden="true" tabindex="-1"></a>tb_df.dtypes</span>
+<span id="cb108-647"><a href="#cb108-647" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-648"><a href="#cb108-648" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-649"><a href="#cb108-649" aria-hidden="true" tabindex="-1"></a>Since there are commas in the values for TB cases, the numbers are read as the <span class="in">`object`</span> datatype, or **storage type** (close to the <span class="in">`Python`</span> string datatype), so <span class="in">`pandas`</span> is concatenating strings instead of adding integers (recall that Python can "sum", or concatenate, strings together: <span class="in">`"data" + "100"`</span> evaluates to <span class="in">`"data100"`</span>). </span>
+<span id="cb108-650"><a href="#cb108-650" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-651"><a href="#cb108-651" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-652"><a href="#cb108-652" aria-hidden="true" tabindex="-1"></a>Fortunately <span class="in">`read_csv`</span> also has a <span class="in">`thousands`</span> parameter (<span class="co">[</span><span class="ot">documentation</span><span class="co">](https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html)</span>):</span>
+<span id="cb108-653"><a href="#cb108-653" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-656"><a href="#cb108-656" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-657"><a href="#cb108-657" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-658"><a href="#cb108-658" aria-hidden="true" tabindex="-1"></a><span class="co"># improve readability: chaining method calls with outer parentheses/line breaks</span></span>
+<span id="cb108-659"><a href="#cb108-659" aria-hidden="true" tabindex="-1"></a>tb_df <span class="op">=</span> (</span>
+<span id="cb108-660"><a href="#cb108-660" aria-hidden="true" tabindex="-1"></a>    pd.read_csv(<span class="st">"data/cdc_tuberculosis.csv"</span>, header<span class="op">=</span><span class="dv">1</span>, thousands<span class="op">=</span><span class="st">','</span>)</span>
+<span id="cb108-661"><a href="#cb108-661" aria-hidden="true" tabindex="-1"></a>    .rename(columns<span class="op">=</span>rename_dict)</span>
+<span id="cb108-662"><a href="#cb108-662" aria-hidden="true" tabindex="-1"></a>)</span>
+<span id="cb108-663"><a href="#cb108-663" aria-hidden="true" tabindex="-1"></a>tb_df.head(<span class="dv">5</span>)</span>
+<span id="cb108-664"><a href="#cb108-664" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-665"><a href="#cb108-665" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-668"><a href="#cb108-668" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-669"><a href="#cb108-669" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-670"><a href="#cb108-670" aria-hidden="true" tabindex="-1"></a>tb_df.<span class="bu">sum</span>()</span>
+<span id="cb108-671"><a href="#cb108-671" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-672"><a href="#cb108-672" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-673"><a href="#cb108-673" aria-hidden="true" tabindex="-1"></a>The total TB cases look right. Phew!</span>
+<span id="cb108-674"><a href="#cb108-674" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-675"><a href="#cb108-675" aria-hidden="true" tabindex="-1"></a>Let's just look at the records with **state-level granularity**:</span>
+<span id="cb108-676"><a href="#cb108-676" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-679"><a href="#cb108-679" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-680"><a href="#cb108-680" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
+<span id="cb108-681"><a href="#cb108-681" aria-hidden="true" tabindex="-1"></a>state_tb_df <span class="op">=</span> tb_df[<span class="dv">1</span>:]</span>
+<span id="cb108-682"><a href="#cb108-682" aria-hidden="true" tabindex="-1"></a>state_tb_df.head(<span class="dv">5</span>)</span>
+<span id="cb108-683"><a href="#cb108-683" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-684"><a href="#cb108-684" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-685"><a href="#cb108-685" aria-hidden="true" tabindex="-1"></a><span class="fu">### Gather Census Data</span></span>
+<span id="cb108-686"><a href="#cb108-686" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-687"><a href="#cb108-687" aria-hidden="true" tabindex="-1"></a>U.S. Census population estimates <span class="co">[</span><span class="ot">source</span><span class="co">](https://www.census.gov/data/tables/time-series/demo/popest/2010s-state-total.html)</span> (2019), <span class="co">[</span><span class="ot">source</span><span class="co">](https://www.census.gov/data/tables/time-series/demo/popest/2020s-state-total.html)</span> (2020-2021).</span>
+<span id="cb108-688"><a href="#cb108-688" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-689"><a href="#cb108-689" aria-hidden="true" tabindex="-1"></a>Running the below cells cleans the data.</span>
+<span id="cb108-690"><a href="#cb108-690" aria-hidden="true" tabindex="-1"></a>There are a few new methods here:</span>
+<span id="cb108-691"><a href="#cb108-691" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span><span class="in">`df.convert_dtypes()`</span> (<span class="co">[</span><span class="ot">documentation</span><span class="co">](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.convert_dtypes.html)</span>) conveniently converts all float dtypes into ints and is out of scope for the class.</span>
+<span id="cb108-692"><a href="#cb108-692" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span><span class="in">`df.drop_na()`</span> (<span class="co">[</span><span class="ot">documentation</span><span class="co">](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.dropna.html)</span>) will be explained in more detail next time.</span>
+<span id="cb108-693"><a href="#cb108-693" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-696"><a href="#cb108-696" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-697"><a href="#cb108-697" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
+<span id="cb108-698"><a href="#cb108-698" aria-hidden="true" tabindex="-1"></a><span class="co"># 2010s census data</span></span>
+<span id="cb108-699"><a href="#cb108-699" aria-hidden="true" tabindex="-1"></a>census_2010s_df <span class="op">=</span> pd.read_csv(<span class="st">"data/nst-est2019-01.csv"</span>, header<span class="op">=</span><span class="dv">3</span>, thousands<span class="op">=</span><span class="st">","</span>)</span>
+<span id="cb108-700"><a href="#cb108-700" aria-hidden="true" tabindex="-1"></a>census_2010s_df <span class="op">=</span> (</span>
+<span id="cb108-701"><a href="#cb108-701" aria-hidden="true" tabindex="-1"></a>    census_2010s_df</span>
+<span id="cb108-702"><a href="#cb108-702" aria-hidden="true" tabindex="-1"></a>    .reset_index()</span>
+<span id="cb108-703"><a href="#cb108-703" aria-hidden="true" tabindex="-1"></a>    .drop(columns<span class="op">=</span>[<span class="st">"index"</span>, <span class="st">"Census"</span>, <span class="st">"Estimates Base"</span>])</span>
+<span id="cb108-704"><a href="#cb108-704" aria-hidden="true" tabindex="-1"></a>    .rename(columns<span class="op">=</span>{<span class="st">"Unnamed: 0"</span>: <span class="st">"Geographic Area"</span>})</span>
+<span id="cb108-705"><a href="#cb108-705" aria-hidden="true" tabindex="-1"></a>    .convert_dtypes()                 <span class="co"># "smart" converting of columns, use at your own risk</span></span>
+<span id="cb108-706"><a href="#cb108-706" aria-hidden="true" tabindex="-1"></a>    .dropna()                         <span class="co"># we'll introduce this next time</span></span>
+<span id="cb108-707"><a href="#cb108-707" aria-hidden="true" tabindex="-1"></a>)</span>
+<span id="cb108-708"><a href="#cb108-708" aria-hidden="true" tabindex="-1"></a>census_2010s_df[<span class="st">'Geographic Area'</span>] <span class="op">=</span> census_2010s_df[<span class="st">'Geographic Area'</span>].<span class="bu">str</span>.strip(<span class="st">'.'</span>)</span>
+<span id="cb108-709"><a href="#cb108-709" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-710"><a href="#cb108-710" aria-hidden="true" tabindex="-1"></a><span class="co"># with pd.option_context('display.min_rows', 30): # shows more rows</span></span>
+<span id="cb108-711"><a href="#cb108-711" aria-hidden="true" tabindex="-1"></a><span class="co">#     display(census_2010s_df)</span></span>
+<span id="cb108-712"><a href="#cb108-712" aria-hidden="true" tabindex="-1"></a>    </span>
+<span id="cb108-713"><a href="#cb108-713" aria-hidden="true" tabindex="-1"></a>census_2010s_df.head(<span class="dv">5</span>)</span>
+<span id="cb108-714"><a href="#cb108-714" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-715"><a href="#cb108-715" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-716"><a href="#cb108-716" aria-hidden="true" tabindex="-1"></a>Occasionally, you will want to modify code that you have imported.  To reimport those modifications you can either use <span class="in">`python`</span>'s <span class="in">`importlib`</span> library:</span>
+<span id="cb108-717"><a href="#cb108-717" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-718"><a href="#cb108-718" aria-hidden="true" tabindex="-1"></a><span class="in">```python</span></span>
+<span id="cb108-719"><a href="#cb108-719" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> importlib <span class="im">import</span> <span class="bu">reload</span></span>
+<span id="cb108-720"><a href="#cb108-720" aria-hidden="true" tabindex="-1"></a><span class="bu">reload</span>(utils)</span>
+<span id="cb108-721"><a href="#cb108-721" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-722"><a href="#cb108-722" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-723"><a href="#cb108-723" aria-hidden="true" tabindex="-1"></a>or use <span class="in">`iPython`</span> magic which will intelligently import code when files change:</span>
+<span id="cb108-724"><a href="#cb108-724" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-725"><a href="#cb108-725" aria-hidden="true" tabindex="-1"></a><span class="in">```python</span></span>
+<span id="cb108-726"><a href="#cb108-726" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>load_ext autoreload</span>
+<span id="cb108-727"><a href="#cb108-727" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>autoreload <span class="dv">2</span></span>
+<span id="cb108-728"><a href="#cb108-728" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-729"><a href="#cb108-729" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-732"><a href="#cb108-732" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-733"><a href="#cb108-733" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
+<span id="cb108-734"><a href="#cb108-734" aria-hidden="true" tabindex="-1"></a><span class="co"># census 2020s data</span></span>
+<span id="cb108-735"><a href="#cb108-735" aria-hidden="true" tabindex="-1"></a>census_2020s_df <span class="op">=</span> pd.read_csv(<span class="st">"data/NST-EST2022-POP.csv"</span>, header<span class="op">=</span><span class="dv">3</span>, thousands<span class="op">=</span><span class="st">","</span>)</span>
+<span id="cb108-736"><a href="#cb108-736" aria-hidden="true" tabindex="-1"></a>census_2020s_df <span class="op">=</span> (</span>
+<span id="cb108-737"><a href="#cb108-737" aria-hidden="true" tabindex="-1"></a>    census_2020s_df</span>
+<span id="cb108-738"><a href="#cb108-738" aria-hidden="true" tabindex="-1"></a>    .reset_index()</span>
+<span id="cb108-739"><a href="#cb108-739" aria-hidden="true" tabindex="-1"></a>    .drop(columns<span class="op">=</span>[<span class="st">"index"</span>, <span class="st">"Unnamed: 1"</span>])</span>
+<span id="cb108-740"><a href="#cb108-740" aria-hidden="true" tabindex="-1"></a>    .rename(columns<span class="op">=</span>{<span class="st">"Unnamed: 0"</span>: <span class="st">"Geographic Area"</span>})</span>
+<span id="cb108-741"><a href="#cb108-741" aria-hidden="true" tabindex="-1"></a>    .convert_dtypes()                 <span class="co"># "smart" converting of columns, use at your own risk</span></span>
+<span id="cb108-742"><a href="#cb108-742" aria-hidden="true" tabindex="-1"></a>    .dropna()                         <span class="co"># we'll introduce this next time</span></span>
+<span id="cb108-743"><a href="#cb108-743" aria-hidden="true" tabindex="-1"></a>)</span>
+<span id="cb108-744"><a href="#cb108-744" aria-hidden="true" tabindex="-1"></a>census_2020s_df[<span class="st">'Geographic Area'</span>] <span class="op">=</span> census_2020s_df[<span class="st">'Geographic Area'</span>].<span class="bu">str</span>.strip(<span class="st">'.'</span>)</span>
+<span id="cb108-745"><a href="#cb108-745" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-746"><a href="#cb108-746" aria-hidden="true" tabindex="-1"></a>census_2020s_df.head(<span class="dv">5</span>)</span>
+<span id="cb108-747"><a href="#cb108-747" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-748"><a href="#cb108-748" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-749"><a href="#cb108-749" aria-hidden="true" tabindex="-1"></a><span class="fu">### Joining Data (Merging `DataFrame`s)</span></span>
+<span id="cb108-750"><a href="#cb108-750" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-751"><a href="#cb108-751" aria-hidden="true" tabindex="-1"></a>Time to <span class="in">`merge`</span>! Here we use the <span class="in">`DataFrame`</span> method <span class="in">`df1.merge(right=df2, ...)`</span> on <span class="in">`DataFrame`</span> <span class="in">`df1`</span> (<span class="co">[</span><span class="ot">documentation</span><span class="co">](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.merge.html)</span>). Contrast this with the function <span class="in">`pd.merge(left=df1, right=df2, ...)`</span> (<span class="co">[</span><span class="ot">documentation</span><span class="co">](https://pandas.pydata.org/docs/reference/api/pandas.merge.html?highlight=pandas%20merge#pandas.merge)</span>). Feel free to use either.</span>
+<span id="cb108-752"><a href="#cb108-752" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-755"><a href="#cb108-755" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-756"><a href="#cb108-756" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-757"><a href="#cb108-757" aria-hidden="true" tabindex="-1"></a><span class="co"># merge TB DataFrame with two US census DataFrames</span></span>
+<span id="cb108-758"><a href="#cb108-758" aria-hidden="true" tabindex="-1"></a>tb_census_df <span class="op">=</span> (</span>
+<span id="cb108-759"><a href="#cb108-759" aria-hidden="true" tabindex="-1"></a>    tb_df</span>
+<span id="cb108-760"><a href="#cb108-760" aria-hidden="true" tabindex="-1"></a>    .merge(right<span class="op">=</span>census_2010s_df,</span>
+<span id="cb108-761"><a href="#cb108-761" aria-hidden="true" tabindex="-1"></a>           left_on<span class="op">=</span><span class="st">"U.S. jurisdiction"</span>, right_on<span class="op">=</span><span class="st">"Geographic Area"</span>)</span>
+<span id="cb108-762"><a href="#cb108-762" aria-hidden="true" tabindex="-1"></a>    .merge(right<span class="op">=</span>census_2020s_df,</span>
+<span id="cb108-763"><a href="#cb108-763" aria-hidden="true" tabindex="-1"></a>           left_on<span class="op">=</span><span class="st">"U.S. jurisdiction"</span>, right_on<span class="op">=</span><span class="st">"Geographic Area"</span>)</span>
+<span id="cb108-764"><a href="#cb108-764" aria-hidden="true" tabindex="-1"></a>)</span>
+<span id="cb108-765"><a href="#cb108-765" aria-hidden="true" tabindex="-1"></a>tb_census_df.head(<span class="dv">5</span>)</span>
+<span id="cb108-766"><a href="#cb108-766" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-767"><a href="#cb108-767" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-768"><a href="#cb108-768" aria-hidden="true" tabindex="-1"></a>Having all of these columns is a little unwieldy. We could either drop the unneeded columns now, or just merge on smaller census <span class="in">`DataFrame`</span>s. Let's do the latter.</span>
+<span id="cb108-769"><a href="#cb108-769" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-772"><a href="#cb108-772" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-773"><a href="#cb108-773" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-774"><a href="#cb108-774" aria-hidden="true" tabindex="-1"></a><span class="co"># try merging again, but cleaner this time</span></span>
+<span id="cb108-775"><a href="#cb108-775" aria-hidden="true" tabindex="-1"></a>tb_census_df <span class="op">=</span> (</span>
+<span id="cb108-776"><a href="#cb108-776" aria-hidden="true" tabindex="-1"></a>    tb_df</span>
+<span id="cb108-777"><a href="#cb108-777" aria-hidden="true" tabindex="-1"></a>    .merge(right<span class="op">=</span>census_2010s_df[[<span class="st">"Geographic Area"</span>, <span class="st">"2019"</span>]],</span>
+<span id="cb108-778"><a href="#cb108-778" aria-hidden="true" tabindex="-1"></a>           left_on<span class="op">=</span><span class="st">"U.S. jurisdiction"</span>, right_on<span class="op">=</span><span class="st">"Geographic Area"</span>)</span>
+<span id="cb108-779"><a href="#cb108-779" aria-hidden="true" tabindex="-1"></a>    .drop(columns<span class="op">=</span><span class="st">"Geographic Area"</span>)</span>
+<span id="cb108-780"><a href="#cb108-780" aria-hidden="true" tabindex="-1"></a>    .merge(right<span class="op">=</span>census_2020s_df[[<span class="st">"Geographic Area"</span>, <span class="st">"2020"</span>, <span class="st">"2021"</span>]],</span>
+<span id="cb108-781"><a href="#cb108-781" aria-hidden="true" tabindex="-1"></a>           left_on<span class="op">=</span><span class="st">"U.S. jurisdiction"</span>, right_on<span class="op">=</span><span class="st">"Geographic Area"</span>)</span>
+<span id="cb108-782"><a href="#cb108-782" aria-hidden="true" tabindex="-1"></a>    .drop(columns<span class="op">=</span><span class="st">"Geographic Area"</span>)</span>
+<span id="cb108-783"><a href="#cb108-783" aria-hidden="true" tabindex="-1"></a>)</span>
+<span id="cb108-784"><a href="#cb108-784" aria-hidden="true" tabindex="-1"></a>tb_census_df.head(<span class="dv">5</span>)</span>
+<span id="cb108-785"><a href="#cb108-785" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-786"><a href="#cb108-786" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-787"><a href="#cb108-787" aria-hidden="true" tabindex="-1"></a><span class="fu">### Reproducing Data: Compute Incidence</span></span>
+<span id="cb108-788"><a href="#cb108-788" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-789"><a href="#cb108-789" aria-hidden="true" tabindex="-1"></a>Let's recompute incidence to make sure we know where the original CDC numbers came from.</span>
+<span id="cb108-790"><a href="#cb108-790" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-791"><a href="#cb108-791" aria-hidden="true" tabindex="-1"></a>From the <span class="co">[</span><span class="ot">CDC report</span><span class="co">](https://www.cdc.gov/mmwr/volumes/71/wr/mm7112a1.htm?s_cid=mm7112a1_w#T1_down)</span>: TB incidence is computed as “Cases per 100,000 persons using mid-year population estimates from the U.S. Census Bureau.”</span>
+<span id="cb108-792"><a href="#cb108-792" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-793"><a href="#cb108-793" aria-hidden="true" tabindex="-1"></a>If we define a group as 100,000 people, then we can compute the TB incidence for a given state population as</span>
+<span id="cb108-794"><a href="#cb108-794" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-795"><a href="#cb108-795" aria-hidden="true" tabindex="-1"></a>$$\text{TB incidence} = \frac{\text{TB cases in population}}{\text{groups in population}} = \frac{\text{TB cases in population}}{\text{population}/100000} $$</span>
+<span id="cb108-796"><a href="#cb108-796" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-797"><a href="#cb108-797" aria-hidden="true" tabindex="-1"></a>$$= \frac{\text{TB cases in population}}{\text{population}} \times 100000$$</span>
+<span id="cb108-798"><a href="#cb108-798" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-799"><a href="#cb108-799" aria-hidden="true" tabindex="-1"></a>Let's try this for 2019:</span>
+<span id="cb108-800"><a href="#cb108-800" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-803"><a href="#cb108-803" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-804"><a href="#cb108-804" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-805"><a href="#cb108-805" aria-hidden="true" tabindex="-1"></a>tb_census_df[<span class="st">"recompute incidence 2019"</span>] <span class="op">=</span> tb_census_df[<span class="st">"TB cases 2019"</span>]<span class="op">/</span>tb_census_df[<span class="st">"2019"</span>]<span class="op">*</span><span class="dv">100000</span></span>
+<span id="cb108-806"><a href="#cb108-806" aria-hidden="true" tabindex="-1"></a>tb_census_df.head(<span class="dv">5</span>)</span>
+<span id="cb108-807"><a href="#cb108-807" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-808"><a href="#cb108-808" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-809"><a href="#cb108-809" aria-hidden="true" tabindex="-1"></a>Awesome!!!</span>
+<span id="cb108-810"><a href="#cb108-810" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-811"><a href="#cb108-811" aria-hidden="true" tabindex="-1"></a>Let's use a for-loop and Python format strings to compute TB incidence for all years. Python f-strings are just used for the purposes of this demo, but they're handy to know when you explore data beyond this course (<span class="co">[</span><span class="ot">documentation</span><span class="co">](https://docs.python.org/3/tutorial/inputoutput.html)</span>).</span>
+<span id="cb108-812"><a href="#cb108-812" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-815"><a href="#cb108-815" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-816"><a href="#cb108-816" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-817"><a href="#cb108-817" aria-hidden="true" tabindex="-1"></a><span class="co"># recompute incidence for all years</span></span>
+<span id="cb108-818"><a href="#cb108-818" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> year <span class="kw">in</span> [<span class="dv">2019</span>, <span class="dv">2020</span>, <span class="dv">2021</span>]:</span>
+<span id="cb108-819"><a href="#cb108-819" aria-hidden="true" tabindex="-1"></a>    tb_census_df[<span class="ss">f"recompute incidence </span><span class="sc">{</span>year<span class="sc">}</span><span class="ss">"</span>] <span class="op">=</span> tb_census_df[<span class="ss">f"TB cases </span><span class="sc">{</span>year<span class="sc">}</span><span class="ss">"</span>]<span class="op">/</span>tb_census_df[<span class="ss">f"</span><span class="sc">{</span>year<span class="sc">}</span><span class="ss">"</span>]<span class="op">*</span><span class="dv">100000</span></span>
+<span id="cb108-820"><a href="#cb108-820" aria-hidden="true" tabindex="-1"></a>tb_census_df.head(<span class="dv">5</span>)</span>
+<span id="cb108-821"><a href="#cb108-821" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-822"><a href="#cb108-822" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-823"><a href="#cb108-823" aria-hidden="true" tabindex="-1"></a>These numbers look pretty close!!! There are a few errors in the hundredths place, particularly in 2021. It may be useful to further explore reasons behind this discrepancy. </span>
+<span id="cb108-824"><a href="#cb108-824" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-827"><a href="#cb108-827" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-828"><a href="#cb108-828" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-829"><a href="#cb108-829" aria-hidden="true" tabindex="-1"></a>tb_census_df.describe()</span>
+<span id="cb108-830"><a href="#cb108-830" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-831"><a href="#cb108-831" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-832"><a href="#cb108-832" aria-hidden="true" tabindex="-1"></a><span class="fu">### Bonus EDA: Reproducing the Reported Statistic</span></span>
+<span id="cb108-833"><a href="#cb108-833" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-834"><a href="#cb108-834" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-835"><a href="#cb108-835" aria-hidden="true" tabindex="-1"></a>**How do we reproduce that reported statistic in the original [CDC report](https://www.cdc.gov/mmwr/volumes/71/wr/mm7112a1.htm?s_cid=mm7112a1_w)?**</span>
+<span id="cb108-836"><a href="#cb108-836" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-837"><a href="#cb108-837" aria-hidden="true" tabindex="-1"></a><span class="at">&gt; Reported TB incidence (cases per 100,000 persons) increased **9.4%**, from **2.2** during 2020 to **2.4** during 2021 but was lower than incidence during 2019 (2.7). Increases occurred among both U.S.-born and non–U.S.-born persons.</span></span>
+<span id="cb108-838"><a href="#cb108-838" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-839"><a href="#cb108-839" aria-hidden="true" tabindex="-1"></a>This is TB incidence computed across the entire U.S. population! How do we reproduce this?</span>
+<span id="cb108-840"><a href="#cb108-840" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>We need to reproduce the "Total" TB incidences in our rolled record.</span>
+<span id="cb108-841"><a href="#cb108-841" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>But our current <span class="in">`tb_census_df`</span> only has 51 entries (50 states plus Washington, D.C.). There is no rolled record.</span>
+<span id="cb108-842"><a href="#cb108-842" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>What happened...?</span>
+<span id="cb108-843"><a href="#cb108-843" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-844"><a href="#cb108-844" aria-hidden="true" tabindex="-1"></a>Let's get exploring!</span>
+<span id="cb108-845"><a href="#cb108-845" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-846"><a href="#cb108-846" aria-hidden="true" tabindex="-1"></a>Before we keep exploring, we'll set all indexes to more meaningful values, instead of just numbers that pertain to some row at some point. This will make our cleaning slightly easier.</span>
+<span id="cb108-847"><a href="#cb108-847" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-850"><a href="#cb108-850" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-851"><a href="#cb108-851" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
+<span id="cb108-852"><a href="#cb108-852" aria-hidden="true" tabindex="-1"></a>tb_df <span class="op">=</span> tb_df.set_index(<span class="st">"U.S. jurisdiction"</span>)</span>
+<span id="cb108-853"><a href="#cb108-853" aria-hidden="true" tabindex="-1"></a>tb_df.head(<span class="dv">5</span>)</span>
+<span id="cb108-854"><a href="#cb108-854" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-855"><a href="#cb108-855" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-858"><a href="#cb108-858" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-859"><a href="#cb108-859" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-860"><a href="#cb108-860" aria-hidden="true" tabindex="-1"></a>census_2010s_df <span class="op">=</span> census_2010s_df.set_index(<span class="st">"Geographic Area"</span>)</span>
+<span id="cb108-861"><a href="#cb108-861" aria-hidden="true" tabindex="-1"></a>census_2010s_df.head(<span class="dv">5</span>)</span>
+<span id="cb108-862"><a href="#cb108-862" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-863"><a href="#cb108-863" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-866"><a href="#cb108-866" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-867"><a href="#cb108-867" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-868"><a href="#cb108-868" aria-hidden="true" tabindex="-1"></a>census_2020s_df <span class="op">=</span> census_2020s_df.set_index(<span class="st">"Geographic Area"</span>)</span>
+<span id="cb108-869"><a href="#cb108-869" aria-hidden="true" tabindex="-1"></a>census_2020s_df.head(<span class="dv">5</span>)</span>
+<span id="cb108-870"><a href="#cb108-870" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-871"><a href="#cb108-871" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-872"><a href="#cb108-872" aria-hidden="true" tabindex="-1"></a>It turns out that our merge above only kept state records, even though our original <span class="in">`tb_df`</span> had the "Total" rolled record:</span>
+<span id="cb108-873"><a href="#cb108-873" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-876"><a href="#cb108-876" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-877"><a href="#cb108-877" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-878"><a href="#cb108-878" aria-hidden="true" tabindex="-1"></a>tb_df.head()</span>
+<span id="cb108-879"><a href="#cb108-879" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-880"><a href="#cb108-880" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-881"><a href="#cb108-881" aria-hidden="true" tabindex="-1"></a>Recall that <span class="in">`merge`</span> by default does an **inner** merge by default, meaning that it only preserves keys that are present in **both** <span class="in">`DataFrame`</span>s.</span>
+<span id="cb108-882"><a href="#cb108-882" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-883"><a href="#cb108-883" aria-hidden="true" tabindex="-1"></a>The rolled records in our census <span class="in">`DataFrame`</span> have different <span class="in">`Geographic Area`</span> fields, which was the key we merged on:</span>
+<span id="cb108-884"><a href="#cb108-884" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-887"><a href="#cb108-887" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-888"><a href="#cb108-888" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-889"><a href="#cb108-889" aria-hidden="true" tabindex="-1"></a>census_2010s_df.head(<span class="dv">5</span>)</span>
+<span id="cb108-890"><a href="#cb108-890" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-891"><a href="#cb108-891" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-892"><a href="#cb108-892" aria-hidden="true" tabindex="-1"></a>The Census <span class="in">`DataFrame`</span> has several rolled records. The aggregate record we are looking for actually has the Geographic Area named "United States".</span>
+<span id="cb108-893"><a href="#cb108-893" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-894"><a href="#cb108-894" aria-hidden="true" tabindex="-1"></a>One straightforward way to get the right merge is to rename the value itself. Because we now have the Geographic Area index, we'll use <span class="in">`df.rename()`</span> (<span class="co">[</span><span class="ot">documentation</span><span class="co">](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.rename.html)</span>):</span>
+<span id="cb108-895"><a href="#cb108-895" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-898"><a href="#cb108-898" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-899"><a href="#cb108-899" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-900"><a href="#cb108-900" aria-hidden="true" tabindex="-1"></a><span class="co"># rename rolled record for 2010s</span></span>
+<span id="cb108-901"><a href="#cb108-901" aria-hidden="true" tabindex="-1"></a>census_2010s_df.rename(index<span class="op">=</span>{<span class="st">'United States'</span>:<span class="st">'Total'</span>}, inplace<span class="op">=</span><span class="va">True</span>)</span>
+<span id="cb108-902"><a href="#cb108-902" aria-hidden="true" tabindex="-1"></a>census_2010s_df.head(<span class="dv">5</span>)</span>
+<span id="cb108-903"><a href="#cb108-903" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-904"><a href="#cb108-904" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-907"><a href="#cb108-907" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-908"><a href="#cb108-908" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-909"><a href="#cb108-909" aria-hidden="true" tabindex="-1"></a><span class="co"># same, but for 2020s rename rolled record</span></span>
+<span id="cb108-910"><a href="#cb108-910" aria-hidden="true" tabindex="-1"></a>census_2020s_df.rename(index<span class="op">=</span>{<span class="st">'United States'</span>:<span class="st">'Total'</span>}, inplace<span class="op">=</span><span class="va">True</span>)</span>
+<span id="cb108-911"><a href="#cb108-911" aria-hidden="true" tabindex="-1"></a>census_2020s_df.head(<span class="dv">5</span>)</span>
+<span id="cb108-912"><a href="#cb108-912" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-913"><a href="#cb108-913" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-914"><a href="#cb108-914" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;br/&gt;</span></span>
+<span id="cb108-915"><a href="#cb108-915" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-916"><a href="#cb108-916" aria-hidden="true" tabindex="-1"></a>Next let's rerun our merge. Note the different chaining, because we are now merging on indexes (<span class="in">`df.merge()`</span> <span class="co">[</span><span class="ot">documentation</span><span class="co">](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.merge.html)</span>).</span>
+<span id="cb108-917"><a href="#cb108-917" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-920"><a href="#cb108-920" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-921"><a href="#cb108-921" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-922"><a href="#cb108-922" aria-hidden="true" tabindex="-1"></a>tb_census_df <span class="op">=</span> (</span>
+<span id="cb108-923"><a href="#cb108-923" aria-hidden="true" tabindex="-1"></a>    tb_df</span>
+<span id="cb108-924"><a href="#cb108-924" aria-hidden="true" tabindex="-1"></a>    .merge(right<span class="op">=</span>census_2010s_df[[<span class="st">"2019"</span>]],</span>
+<span id="cb108-925"><a href="#cb108-925" aria-hidden="true" tabindex="-1"></a>           left_index<span class="op">=</span><span class="va">True</span>, right_index<span class="op">=</span><span class="va">True</span>)</span>
+<span id="cb108-926"><a href="#cb108-926" aria-hidden="true" tabindex="-1"></a>    .merge(right<span class="op">=</span>census_2020s_df[[<span class="st">"2020"</span>, <span class="st">"2021"</span>]],</span>
+<span id="cb108-927"><a href="#cb108-927" aria-hidden="true" tabindex="-1"></a>           left_index<span class="op">=</span><span class="va">True</span>, right_index<span class="op">=</span><span class="va">True</span>)</span>
+<span id="cb108-928"><a href="#cb108-928" aria-hidden="true" tabindex="-1"></a>)</span>
+<span id="cb108-929"><a href="#cb108-929" aria-hidden="true" tabindex="-1"></a>tb_census_df.head(<span class="dv">5</span>)</span>
+<span id="cb108-930"><a href="#cb108-930" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-931"><a href="#cb108-931" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-932"><a href="#cb108-932" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;br/&gt;</span></span>
+<span id="cb108-933"><a href="#cb108-933" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-934"><a href="#cb108-934" aria-hidden="true" tabindex="-1"></a>Finally, let's recompute our incidences:</span>
+<span id="cb108-935"><a href="#cb108-935" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-938"><a href="#cb108-938" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-939"><a href="#cb108-939" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-940"><a href="#cb108-940" aria-hidden="true" tabindex="-1"></a><span class="co"># recompute incidence for all years</span></span>
+<span id="cb108-941"><a href="#cb108-941" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> year <span class="kw">in</span> [<span class="dv">2019</span>, <span class="dv">2020</span>, <span class="dv">2021</span>]:</span>
+<span id="cb108-942"><a href="#cb108-942" aria-hidden="true" tabindex="-1"></a>    tb_census_df[<span class="ss">f"recompute incidence </span><span class="sc">{</span>year<span class="sc">}</span><span class="ss">"</span>] <span class="op">=</span> tb_census_df[<span class="ss">f"TB cases </span><span class="sc">{</span>year<span class="sc">}</span><span class="ss">"</span>]<span class="op">/</span>tb_census_df[<span class="ss">f"</span><span class="sc">{</span>year<span class="sc">}</span><span class="ss">"</span>]<span class="op">*</span><span class="dv">100000</span></span>
+<span id="cb108-943"><a href="#cb108-943" aria-hidden="true" tabindex="-1"></a>tb_census_df.head(<span class="dv">5</span>)</span>
+<span id="cb108-944"><a href="#cb108-944" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-945"><a href="#cb108-945" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-946"><a href="#cb108-946" aria-hidden="true" tabindex="-1"></a>We reproduced the total U.S. incidences correctly!</span>
+<span id="cb108-947"><a href="#cb108-947" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-948"><a href="#cb108-948" aria-hidden="true" tabindex="-1"></a>We're almost there. Let's revisit the quote:</span>
+<span id="cb108-949"><a href="#cb108-949" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-950"><a href="#cb108-950" aria-hidden="true" tabindex="-1"></a><span class="at">&gt; Reported TB incidence (cases per 100,000 persons) increased **9.4%**, from **2.2** during 2020 to **2.4** during 2021 but was lower than incidence during 2019 (2.7). Increases occurred among both U.S.-born and non–U.S.-born persons.</span></span>
+<span id="cb108-951"><a href="#cb108-951" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-952"><a href="#cb108-952" aria-hidden="true" tabindex="-1"></a>Recall that percent change from $A$ to $B$ is computed as</span>
+<span id="cb108-953"><a href="#cb108-953" aria-hidden="true" tabindex="-1"></a>$\text{percent change} = \frac{B - A}{A} \times 100$.</span>
+<span id="cb108-954"><a href="#cb108-954" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-957"><a href="#cb108-957" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-958"><a href="#cb108-958" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-959"><a href="#cb108-959" aria-hidden="true" tabindex="-1"></a>incidence_2020 <span class="op">=</span> tb_census_df.loc[<span class="st">'Total'</span>, <span class="st">'recompute incidence 2020'</span>]</span>
+<span id="cb108-960"><a href="#cb108-960" aria-hidden="true" tabindex="-1"></a>incidence_2020</span>
+<span id="cb108-961"><a href="#cb108-961" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-962"><a href="#cb108-962" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-965"><a href="#cb108-965" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-966"><a href="#cb108-966" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-967"><a href="#cb108-967" aria-hidden="true" tabindex="-1"></a>incidence_2021 <span class="op">=</span> tb_census_df.loc[<span class="st">'Total'</span>, <span class="st">'recompute incidence 2021'</span>]</span>
+<span id="cb108-968"><a href="#cb108-968" aria-hidden="true" tabindex="-1"></a>incidence_2021</span>
+<span id="cb108-969"><a href="#cb108-969" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-970"><a href="#cb108-970" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-973"><a href="#cb108-973" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-974"><a href="#cb108-974" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-975"><a href="#cb108-975" aria-hidden="true" tabindex="-1"></a>difference <span class="op">=</span> (incidence_2021 <span class="op">-</span> incidence_2020)<span class="op">/</span>incidence_2020 <span class="op">*</span> <span class="dv">100</span></span>
+<span id="cb108-976"><a href="#cb108-976" aria-hidden="true" tabindex="-1"></a>difference</span>
+<span id="cb108-977"><a href="#cb108-977" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-978"><a href="#cb108-978" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-979"><a href="#cb108-979" aria-hidden="true" tabindex="-1"></a><span class="fu">## EDA Demo 2: Mauna Loa CO&lt;sub&gt;2&lt;/sub&gt; Data -- A Lesson in Data Faithfulness</span></span>
+<span id="cb108-980"><a href="#cb108-980" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-981"><a href="#cb108-981" aria-hidden="true" tabindex="-1"></a><span class="co">[</span><span class="ot">Mauna Loa Observatory</span><span class="co">](https://gml.noaa.gov/ccgg/trends/data.html)</span> has been monitoring CO<span class="kw">&lt;sub&gt;</span>2<span class="kw">&lt;/sub&gt;</span> concentrations since 1958.</span>
+<span id="cb108-982"><a href="#cb108-982" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-985"><a href="#cb108-985" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-986"><a href="#cb108-986" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-987"><a href="#cb108-987" aria-hidden="true" tabindex="-1"></a>co2_file <span class="op">=</span> <span class="st">"data/co2_mm_mlo.txt"</span></span>
+<span id="cb108-988"><a href="#cb108-988" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-989"><a href="#cb108-989" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-990"><a href="#cb108-990" aria-hidden="true" tabindex="-1"></a>Let's do some **EDA**!!</span>
+<span id="cb108-991"><a href="#cb108-991" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-992"><a href="#cb108-992" aria-hidden="true" tabindex="-1"></a><span class="fu">### Reading this file into `Pandas`?</span></span>
+<span id="cb108-993"><a href="#cb108-993" aria-hidden="true" tabindex="-1"></a>Let's instead check out this <span class="in">`.txt`</span> file. Some questions to keep in mind: Do we trust this file extension? What structure is it? </span>
+<span id="cb108-994"><a href="#cb108-994" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-995"><a href="#cb108-995" aria-hidden="true" tabindex="-1"></a>Lines 71-78 (inclusive) are shown below: </span>
+<span id="cb108-996"><a href="#cb108-996" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-997"><a href="#cb108-997" aria-hidden="true" tabindex="-1"></a><span class="in">    line number |                            file contents</span></span>
+<span id="cb108-998"><a href="#cb108-998" aria-hidden="true" tabindex="-1"></a><span class="in">    </span></span>
+<span id="cb108-999"><a href="#cb108-999" aria-hidden="true" tabindex="-1"></a><span class="in">    71          |   #            decimal     average   interpolated    trend    #days</span></span>
+<span id="cb108-1000"><a href="#cb108-1000" aria-hidden="true" tabindex="-1"></a><span class="in">    72          |   #             date                             (season corr)</span></span>
+<span id="cb108-1001"><a href="#cb108-1001" aria-hidden="true" tabindex="-1"></a><span class="in">    73          |   1958   3    1958.208      315.71      315.71      314.62     -1</span></span>
+<span id="cb108-1002"><a href="#cb108-1002" aria-hidden="true" tabindex="-1"></a><span class="in">    74          |   1958   4    1958.292      317.45      317.45      315.29     -1</span></span>
+<span id="cb108-1003"><a href="#cb108-1003" aria-hidden="true" tabindex="-1"></a><span class="in">    75          |   1958   5    1958.375      317.50      317.50      314.71     -1</span></span>
+<span id="cb108-1004"><a href="#cb108-1004" aria-hidden="true" tabindex="-1"></a><span class="in">    76          |   1958   6    1958.458      -99.99      317.10      314.85     -1</span></span>
+<span id="cb108-1005"><a href="#cb108-1005" aria-hidden="true" tabindex="-1"></a><span class="in">    77          |   1958   7    1958.542      315.86      315.86      314.98     -1</span></span>
+<span id="cb108-1006"><a href="#cb108-1006" aria-hidden="true" tabindex="-1"></a><span class="in">    78          |   1958   8    1958.625      314.93      314.93      315.94     -1</span></span>
+<span id="cb108-1007"><a href="#cb108-1007" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1008"><a href="#cb108-1008" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1009"><a href="#cb108-1009" aria-hidden="true" tabindex="-1"></a>Notice how: </span>
+<span id="cb108-1010"><a href="#cb108-1010" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1011"><a href="#cb108-1011" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>The values are separated by white space, possibly tabs.</span>
+<span id="cb108-1012"><a href="#cb108-1012" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>The data line up down the rows. For example, the month appears in 7th to 8th position of each line.</span>
+<span id="cb108-1013"><a href="#cb108-1013" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>The 71st and 72nd lines in the file contain column headings split over two lines.</span>
+<span id="cb108-1014"><a href="#cb108-1014" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1015"><a href="#cb108-1015" aria-hidden="true" tabindex="-1"></a>We can use&nbsp;<span class="in">`read_csv`</span>&nbsp;to read the data into a <span class="in">`pandas`</span> <span class="in">`DataFrame`</span>, and we provide several arguments to specify that the separators are white space, there is no header (**we will set our own column names**), and to skip the first 72 rows of the file.</span>
+<span id="cb108-1016"><a href="#cb108-1016" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1019"><a href="#cb108-1019" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-1020"><a href="#cb108-1020" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-1021"><a href="#cb108-1021" aria-hidden="true" tabindex="-1"></a>co2 <span class="op">=</span> pd.read_csv(</span>
+<span id="cb108-1022"><a href="#cb108-1022" aria-hidden="true" tabindex="-1"></a>    co2_file, header <span class="op">=</span> <span class="va">None</span>, skiprows <span class="op">=</span> <span class="dv">72</span>,</span>
+<span id="cb108-1023"><a href="#cb108-1023" aria-hidden="true" tabindex="-1"></a>    sep <span class="op">=</span> <span class="vs">r'\s+'</span>       <span class="co">#delimiter for continuous whitespace (stay tuned for regex next lecture))</span></span>
+<span id="cb108-1024"><a href="#cb108-1024" aria-hidden="true" tabindex="-1"></a>)</span>
+<span id="cb108-1025"><a href="#cb108-1025" aria-hidden="true" tabindex="-1"></a>co2.head()</span>
+<span id="cb108-1026"><a href="#cb108-1026" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-1027"><a href="#cb108-1027" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1028"><a href="#cb108-1028" aria-hidden="true" tabindex="-1"></a>Congratulations! You've wrangled the data!</span>
+<span id="cb108-1029"><a href="#cb108-1029" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1030"><a href="#cb108-1030" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;br/&gt;</span></span>
+<span id="cb108-1031"><a href="#cb108-1031" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1032"><a href="#cb108-1032" aria-hidden="true" tabindex="-1"></a>...But our columns aren't named.</span>
+<span id="cb108-1033"><a href="#cb108-1033" aria-hidden="true" tabindex="-1"></a>**We need to do more EDA.**</span>
+<span id="cb108-1034"><a href="#cb108-1034" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1035"><a href="#cb108-1035" aria-hidden="true" tabindex="-1"></a><span class="fu">### Exploring Variable Feature Types</span></span>
+<span id="cb108-1036"><a href="#cb108-1036" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1037"><a href="#cb108-1037" aria-hidden="true" tabindex="-1"></a>The NOAA <span class="co">[</span><span class="ot">webpage</span><span class="co">](https://gml.noaa.gov/ccgg/trends/)</span> might have some useful tidbits (in this case it doesn't).</span>
+<span id="cb108-1038"><a href="#cb108-1038" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1039"><a href="#cb108-1039" aria-hidden="true" tabindex="-1"></a>Using this information, we'll rerun <span class="in">`pd.read_csv`</span>, but this time with some **custom column names.**</span>
+<span id="cb108-1040"><a href="#cb108-1040" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1043"><a href="#cb108-1043" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-1044"><a href="#cb108-1044" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-1045"><a href="#cb108-1045" aria-hidden="true" tabindex="-1"></a>co2 <span class="op">=</span> pd.read_csv(</span>
+<span id="cb108-1046"><a href="#cb108-1046" aria-hidden="true" tabindex="-1"></a>    co2_file, header <span class="op">=</span> <span class="va">None</span>, skiprows <span class="op">=</span> <span class="dv">72</span>,</span>
+<span id="cb108-1047"><a href="#cb108-1047" aria-hidden="true" tabindex="-1"></a>    sep <span class="op">=</span> <span class="st">'\s+'</span>, <span class="co">#regex for continuous whitespace (next lecture)</span></span>
+<span id="cb108-1048"><a href="#cb108-1048" aria-hidden="true" tabindex="-1"></a>    names <span class="op">=</span> [<span class="st">'Yr'</span>, <span class="st">'Mo'</span>, <span class="st">'DecDate'</span>, <span class="st">'Avg'</span>, <span class="st">'Int'</span>, <span class="st">'Trend'</span>, <span class="st">'Days'</span>]</span>
+<span id="cb108-1049"><a href="#cb108-1049" aria-hidden="true" tabindex="-1"></a>)</span>
+<span id="cb108-1050"><a href="#cb108-1050" aria-hidden="true" tabindex="-1"></a>co2.head()</span>
+<span id="cb108-1051"><a href="#cb108-1051" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-1052"><a href="#cb108-1052" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1053"><a href="#cb108-1053" aria-hidden="true" tabindex="-1"></a><span class="fu">### Visualizing CO&lt;sub&gt;2&lt;/sub&gt;</span></span>
+<span id="cb108-1054"><a href="#cb108-1054" aria-hidden="true" tabindex="-1"></a>Scientific studies tend to have very clean data, right...? Let's jump right in and make a time series plot of CO<span class="kw">&lt;sub&gt;</span>2<span class="kw">&lt;/sub&gt;</span> monthly averages.</span>
+<span id="cb108-1055"><a href="#cb108-1055" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1058"><a href="#cb108-1058" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-1059"><a href="#cb108-1059" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
+<span id="cb108-1060"><a href="#cb108-1060" aria-hidden="true" tabindex="-1"></a>sns.lineplot(x<span class="op">=</span><span class="st">'DecDate'</span>, y<span class="op">=</span><span class="st">'Avg'</span>, data<span class="op">=</span>co2)<span class="op">;</span></span>
+<span id="cb108-1061"><a href="#cb108-1061" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-1062"><a href="#cb108-1062" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1063"><a href="#cb108-1063" aria-hidden="true" tabindex="-1"></a>The code above uses the <span class="in">`seaborn`</span> plotting library (abbreviated <span class="in">`sns`</span>). We will cover this in the Visualization lecture, but now you don't need to worry about how it works!</span>
+<span id="cb108-1064"><a href="#cb108-1064" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1065"><a href="#cb108-1065" aria-hidden="true" tabindex="-1"></a>Yikes! Plotting the data uncovered a problem. The sharp vertical lines suggest that we have some **missing values**. What happened here?</span>
+<span id="cb108-1066"><a href="#cb108-1066" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1069"><a href="#cb108-1069" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-1070"><a href="#cb108-1070" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-1071"><a href="#cb108-1071" aria-hidden="true" tabindex="-1"></a>co2.head()</span>
+<span id="cb108-1072"><a href="#cb108-1072" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-1073"><a href="#cb108-1073" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1076"><a href="#cb108-1076" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-1077"><a href="#cb108-1077" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-1078"><a href="#cb108-1078" aria-hidden="true" tabindex="-1"></a>co2.tail()</span>
+<span id="cb108-1079"><a href="#cb108-1079" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-1080"><a href="#cb108-1080" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1081"><a href="#cb108-1081" aria-hidden="true" tabindex="-1"></a>Some data have unusual values like -1 and -99.99.</span>
+<span id="cb108-1082"><a href="#cb108-1082" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1083"><a href="#cb108-1083" aria-hidden="true" tabindex="-1"></a>Let's check the description at the top of the file again.</span>
+<span id="cb108-1084"><a href="#cb108-1084" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1085"><a href="#cb108-1085" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>-1 signifies a missing value for the number of days <span class="in">`Days`</span> the equipment was in operation that month.</span>
+<span id="cb108-1086"><a href="#cb108-1086" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>-99.99 denotes a missing monthly average <span class="in">`Avg`</span></span>
+<span id="cb108-1087"><a href="#cb108-1087" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1088"><a href="#cb108-1088" aria-hidden="true" tabindex="-1"></a>How can we fix this? First, let's explore other aspects of our data. Understanding our data will help us decide what to do with the missing values.</span>
+<span id="cb108-1089"><a href="#cb108-1089" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1090"><a href="#cb108-1090" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;br/&gt;</span></span>
+<span id="cb108-1091"><a href="#cb108-1091" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1092"><a href="#cb108-1092" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1093"><a href="#cb108-1093" aria-hidden="true" tabindex="-1"></a><span class="fu">### Sanity Checks: Reasoning about the data</span></span>
+<span id="cb108-1094"><a href="#cb108-1094" aria-hidden="true" tabindex="-1"></a>First, we consider the shape of the data. How many rows should we have?</span>
+<span id="cb108-1095"><a href="#cb108-1095" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1096"><a href="#cb108-1096" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>If chronological order, we should have one record per month.</span>
+<span id="cb108-1097"><a href="#cb108-1097" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Data from March 1958 to August 2019.</span>
+<span id="cb108-1098"><a href="#cb108-1098" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>We should have $ 12 \times (2019-1957) - 2 - 4 = 738 $ records.</span>
+<span id="cb108-1099"><a href="#cb108-1099" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1102"><a href="#cb108-1102" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-1103"><a href="#cb108-1103" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-1104"><a href="#cb108-1104" aria-hidden="true" tabindex="-1"></a>co2.shape</span>
+<span id="cb108-1105"><a href="#cb108-1105" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-1106"><a href="#cb108-1106" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1107"><a href="#cb108-1107" aria-hidden="true" tabindex="-1"></a>Nice!! The number of rows (i.e. records) match our expectations.</span>
+<span id="cb108-1108"><a href="#cb108-1108" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1109"><a href="#cb108-1109" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1110"><a href="#cb108-1110" aria-hidden="true" tabindex="-1"></a>Let's now check the quality of each feature.</span>
+<span id="cb108-1111"><a href="#cb108-1111" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1112"><a href="#cb108-1112" aria-hidden="true" tabindex="-1"></a><span class="fu">### Understanding Missing Value 1: `Days`</span></span>
+<span id="cb108-1113"><a href="#cb108-1113" aria-hidden="true" tabindex="-1"></a><span class="in">`Days`</span> is a time field, so let's analyze other time fields to see if there is an explanation for missing values of days of operation.</span>
+<span id="cb108-1114"><a href="#cb108-1114" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1115"><a href="#cb108-1115" aria-hidden="true" tabindex="-1"></a>Let's start with **months**, <span class="in">`Mo`</span>.</span>
+<span id="cb108-1116"><a href="#cb108-1116" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1117"><a href="#cb108-1117" aria-hidden="true" tabindex="-1"></a>Are we missing any records? The number of months should have 62 or 61 instances (March 1957-August 2019).</span>
+<span id="cb108-1118"><a href="#cb108-1118" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1121"><a href="#cb108-1121" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-1122"><a href="#cb108-1122" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-1123"><a href="#cb108-1123" aria-hidden="true" tabindex="-1"></a>co2[<span class="st">"Mo"</span>].value_counts().sort_index()</span>
+<span id="cb108-1124"><a href="#cb108-1124" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-1125"><a href="#cb108-1125" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1126"><a href="#cb108-1126" aria-hidden="true" tabindex="-1"></a>As expected Jan, Feb, Sep, Oct, Nov, and Dec have 61 occurrences and the rest 62.</span>
+<span id="cb108-1127"><a href="#cb108-1127" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1128"><a href="#cb108-1128" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;br/&gt;</span></span>
+<span id="cb108-1129"><a href="#cb108-1129" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1130"><a href="#cb108-1130" aria-hidden="true" tabindex="-1"></a>Next let's explore **days** <span class="in">`Days`</span> itself, which is the number of days that the measurement equipment worked.</span>
+<span id="cb108-1131"><a href="#cb108-1131" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1134"><a href="#cb108-1134" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-1135"><a href="#cb108-1135" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
+<span id="cb108-1136"><a href="#cb108-1136" aria-hidden="true" tabindex="-1"></a>sns.displot(co2[<span class="st">'Days'</span>])<span class="op">;</span></span>
+<span id="cb108-1137"><a href="#cb108-1137" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">"Distribution of days feature"</span>)<span class="op">;</span> <span class="co"># suppresses unneeded plotting output</span></span>
+<span id="cb108-1138"><a href="#cb108-1138" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-1139"><a href="#cb108-1139" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1140"><a href="#cb108-1140" aria-hidden="true" tabindex="-1"></a>In terms of data quality, a handful of months have averages based on measurements taken on fewer than half the days. In addition, there are nearly 200 missing values--**that's about 27% of the data**!</span>
+<span id="cb108-1141"><a href="#cb108-1141" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1142"><a href="#cb108-1142" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;br/&gt;</span></span>
+<span id="cb108-1143"><a href="#cb108-1143" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1144"><a href="#cb108-1144" aria-hidden="true" tabindex="-1"></a>Finally, let's check the last time feature, **year** <span class="in">`Yr`</span>.</span>
+<span id="cb108-1145"><a href="#cb108-1145" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1146"><a href="#cb108-1146" aria-hidden="true" tabindex="-1"></a>Let's check to see if there is any connection between missing-ness and the year of the recording.</span>
+<span id="cb108-1147"><a href="#cb108-1147" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1150"><a href="#cb108-1150" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-1151"><a href="#cb108-1151" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
+<span id="cb108-1152"><a href="#cb108-1152" aria-hidden="true" tabindex="-1"></a>sns.scatterplot(x<span class="op">=</span><span class="st">"Yr"</span>, y<span class="op">=</span><span class="st">"Days"</span>, data<span class="op">=</span>co2)<span class="op">;</span></span>
+<span id="cb108-1153"><a href="#cb108-1153" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">"Day field by Year"</span>)<span class="op">;</span> <span class="co"># the ; suppresses output</span></span>
+<span id="cb108-1154"><a href="#cb108-1154" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-1155"><a href="#cb108-1155" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1156"><a href="#cb108-1156" aria-hidden="true" tabindex="-1"></a>**Observations**:</span>
+<span id="cb108-1157"><a href="#cb108-1157" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1158"><a href="#cb108-1158" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>All of the missing data are in the early years of operation.</span>
+<span id="cb108-1159"><a href="#cb108-1159" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>It appears there may have been problems with equipment in the mid to late 80s.</span>
+<span id="cb108-1160"><a href="#cb108-1160" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1161"><a href="#cb108-1161" aria-hidden="true" tabindex="-1"></a>**Potential Next Steps**:</span>
+<span id="cb108-1162"><a href="#cb108-1162" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1163"><a href="#cb108-1163" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Confirm these explanations through documentation about the historical readings.</span>
+<span id="cb108-1164"><a href="#cb108-1164" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Maybe drop the earliest recordings? However, we would want to delay such action until after we have examined the time trends and assess whether there are any potential problems.</span>
+<span id="cb108-1165"><a href="#cb108-1165" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1166"><a href="#cb108-1166" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;br/&gt;</span></span>
+<span id="cb108-1167"><a href="#cb108-1167" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1168"><a href="#cb108-1168" aria-hidden="true" tabindex="-1"></a><span class="fu">### Understanding Missing Value 2: `Avg`</span></span>
+<span id="cb108-1169"><a href="#cb108-1169" aria-hidden="true" tabindex="-1"></a>Next, let's return to the -99.99 values in <span class="in">`Avg`</span> to analyze the overall quality of the CO<span class="kw">&lt;sub&gt;</span>2<span class="kw">&lt;/sub&gt;</span> measurements. We'll plot a histogram of the average CO<span class="kw">&lt;sub&gt;</span>2<span class="kw">&lt;/sub&gt;</span> measurements</span>
+<span id="cb108-1170"><a href="#cb108-1170" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1173"><a href="#cb108-1173" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-1174"><a href="#cb108-1174" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
+<span id="cb108-1175"><a href="#cb108-1175" aria-hidden="true" tabindex="-1"></a><span class="co"># Histograms of average CO2 measurements</span></span>
+<span id="cb108-1176"><a href="#cb108-1176" aria-hidden="true" tabindex="-1"></a>sns.displot(co2[<span class="st">'Avg'</span>])<span class="op">;</span></span>
+<span id="cb108-1177"><a href="#cb108-1177" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-1178"><a href="#cb108-1178" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1179"><a href="#cb108-1179" aria-hidden="true" tabindex="-1"></a>The non-missing values are in the 300-400 range (a regular range of CO<span class="kw">&lt;sub&gt;</span>2<span class="kw">&lt;/sub&gt;</span> levels).</span>
+<span id="cb108-1180"><a href="#cb108-1180" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1181"><a href="#cb108-1181" aria-hidden="true" tabindex="-1"></a>We also see that there are only a few missing <span class="in">`Avg`</span> values (**&lt;1% of values**). Let's examine all of them:</span>
+<span id="cb108-1182"><a href="#cb108-1182" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1185"><a href="#cb108-1185" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-1186"><a href="#cb108-1186" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-1187"><a href="#cb108-1187" aria-hidden="true" tabindex="-1"></a>co2[co2[<span class="st">"Avg"</span>] <span class="op">&lt;</span> <span class="dv">0</span>]</span>
+<span id="cb108-1188"><a href="#cb108-1188" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-1189"><a href="#cb108-1189" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1190"><a href="#cb108-1190" aria-hidden="true" tabindex="-1"></a>There doesn't seem to be a pattern to these values, other than that most records also were missing <span class="in">`Days`</span> data.</span>
+<span id="cb108-1191"><a href="#cb108-1191" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1192"><a href="#cb108-1192" aria-hidden="true" tabindex="-1"></a><span class="fu">### Drop, `NaN`, or Impute Missing `Avg` Data?</span></span>
+<span id="cb108-1193"><a href="#cb108-1193" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1194"><a href="#cb108-1194" aria-hidden="true" tabindex="-1"></a>How should we address the invalid <span class="in">`Avg`</span> data?</span>
+<span id="cb108-1195"><a href="#cb108-1195" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1196"><a href="#cb108-1196" aria-hidden="true" tabindex="-1"></a><span class="ss">1. </span>Drop records</span>
+<span id="cb108-1197"><a href="#cb108-1197" aria-hidden="true" tabindex="-1"></a><span class="ss">2. </span>Set to NaN</span>
+<span id="cb108-1198"><a href="#cb108-1198" aria-hidden="true" tabindex="-1"></a><span class="ss">3. </span>Impute using some strategy</span>
+<span id="cb108-1199"><a href="#cb108-1199" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1200"><a href="#cb108-1200" aria-hidden="true" tabindex="-1"></a>Remember we want to fix the following plot:</span>
+<span id="cb108-1201"><a href="#cb108-1201" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1204"><a href="#cb108-1204" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-1205"><a href="#cb108-1205" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
+<span id="cb108-1206"><a href="#cb108-1206" aria-hidden="true" tabindex="-1"></a>sns.lineplot(x<span class="op">=</span><span class="st">'DecDate'</span>, y<span class="op">=</span><span class="st">'Avg'</span>, data<span class="op">=</span>co2)</span>
+<span id="cb108-1207"><a href="#cb108-1207" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">"CO2 Average By Month"</span>)<span class="op">;</span></span>
+<span id="cb108-1208"><a href="#cb108-1208" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-1209"><a href="#cb108-1209" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1210"><a href="#cb108-1210" aria-hidden="true" tabindex="-1"></a>Since we are plotting <span class="in">`Avg`</span> vs <span class="in">`DecDate`</span>, we should just focus on dealing with missing values for <span class="in">`Avg`</span>.</span>
+<span id="cb108-1211"><a href="#cb108-1211" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1212"><a href="#cb108-1212" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1213"><a href="#cb108-1213" aria-hidden="true" tabindex="-1"></a>Let's consider a few options:</span>
+<span id="cb108-1214"><a href="#cb108-1214" aria-hidden="true" tabindex="-1"></a><span class="ss">1. </span>Drop those records</span>
+<span id="cb108-1215"><a href="#cb108-1215" aria-hidden="true" tabindex="-1"></a><span class="ss">2. </span>Replace -99.99 with NaN</span>
+<span id="cb108-1216"><a href="#cb108-1216" aria-hidden="true" tabindex="-1"></a><span class="ss">3. </span>Substitute it with a likely value for the average CO<span class="kw">&lt;sub&gt;</span>2<span class="kw">&lt;/sub&gt;</span>?</span>
+<span id="cb108-1217"><a href="#cb108-1217" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1218"><a href="#cb108-1218" aria-hidden="true" tabindex="-1"></a>What do you think are the pros and cons of each possible action?</span>
+<span id="cb108-1219"><a href="#cb108-1219" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1220"><a href="#cb108-1220" aria-hidden="true" tabindex="-1"></a>Let's examine each of these three options.</span>
+<span id="cb108-1221"><a href="#cb108-1221" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1224"><a href="#cb108-1224" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-1225"><a href="#cb108-1225" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-1226"><a href="#cb108-1226" aria-hidden="true" tabindex="-1"></a><span class="co"># 1. Drop missing values</span></span>
+<span id="cb108-1227"><a href="#cb108-1227" aria-hidden="true" tabindex="-1"></a>co2_drop <span class="op">=</span> co2[co2[<span class="st">'Avg'</span>] <span class="op">&gt;</span> <span class="dv">0</span>]</span>
+<span id="cb108-1228"><a href="#cb108-1228" aria-hidden="true" tabindex="-1"></a>co2_drop.head()</span>
+<span id="cb108-1229"><a href="#cb108-1229" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-1230"><a href="#cb108-1230" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1233"><a href="#cb108-1233" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-1234"><a href="#cb108-1234" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-1235"><a href="#cb108-1235" aria-hidden="true" tabindex="-1"></a><span class="co"># 2. Replace NaN with -99.99</span></span>
+<span id="cb108-1236"><a href="#cb108-1236" aria-hidden="true" tabindex="-1"></a>co2_NA <span class="op">=</span> co2.replace(<span class="op">-</span><span class="fl">99.99</span>, np.NaN)</span>
+<span id="cb108-1237"><a href="#cb108-1237" aria-hidden="true" tabindex="-1"></a>co2_NA.head()</span>
+<span id="cb108-1238"><a href="#cb108-1238" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-1239"><a href="#cb108-1239" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1240"><a href="#cb108-1240" aria-hidden="true" tabindex="-1"></a>We'll also use a third version of the data.</span>
+<span id="cb108-1241"><a href="#cb108-1241" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1242"><a href="#cb108-1242" aria-hidden="true" tabindex="-1"></a>First, we note that the dataset already comes with a **substitute value** for the -99.99.</span>
+<span id="cb108-1243"><a href="#cb108-1243" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1244"><a href="#cb108-1244" aria-hidden="true" tabindex="-1"></a>From the file description:</span>
+<span id="cb108-1245"><a href="#cb108-1245" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1246"><a href="#cb108-1246" aria-hidden="true" tabindex="-1"></a><span class="at">&gt;  The </span><span class="in">`interpolated`</span><span class="at"> column includes average values from the preceding column (</span><span class="in">`average`</span><span class="at">)</span></span>
+<span id="cb108-1247"><a href="#cb108-1247" aria-hidden="true" tabindex="-1"></a><span class="at">and **interpolated values** where data are missing.  Interpolated values are</span></span>
+<span id="cb108-1248"><a href="#cb108-1248" aria-hidden="true" tabindex="-1"></a><span class="at">computed in two steps...</span></span>
+<span id="cb108-1249"><a href="#cb108-1249" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1250"><a href="#cb108-1250" aria-hidden="true" tabindex="-1"></a>The <span class="in">`Int`</span> feature has values that exactly match those in <span class="in">`Avg`</span>, except when <span class="in">`Avg`</span> is -99.99, and then a **reasonable** estimate is used instead.</span>
+<span id="cb108-1251"><a href="#cb108-1251" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1252"><a href="#cb108-1252" aria-hidden="true" tabindex="-1"></a>So, the third version of our data will use the <span class="in">`Int`</span> feature instead of <span class="in">`Avg`</span>.</span>
+<span id="cb108-1253"><a href="#cb108-1253" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1256"><a href="#cb108-1256" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-1257"><a href="#cb108-1257" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb108-1258"><a href="#cb108-1258" aria-hidden="true" tabindex="-1"></a><span class="co"># 3. Use interpolated column which estimates missing Avg values</span></span>
+<span id="cb108-1259"><a href="#cb108-1259" aria-hidden="true" tabindex="-1"></a>co2_impute <span class="op">=</span> co2.copy()</span>
+<span id="cb108-1260"><a href="#cb108-1260" aria-hidden="true" tabindex="-1"></a>co2_impute[<span class="st">'Avg'</span>] <span class="op">=</span> co2[<span class="st">'Int'</span>]</span>
+<span id="cb108-1261"><a href="#cb108-1261" aria-hidden="true" tabindex="-1"></a>co2_impute.head()</span>
+<span id="cb108-1262"><a href="#cb108-1262" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-1263"><a href="#cb108-1263" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1264"><a href="#cb108-1264" aria-hidden="true" tabindex="-1"></a>What's a **reasonable** estimate?</span>
+<span id="cb108-1265"><a href="#cb108-1265" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1266"><a href="#cb108-1266" aria-hidden="true" tabindex="-1"></a>To answer this question, let's zoom in on a short time period, say the measurements in 1958 (where we know we have two missing values).</span>
+<span id="cb108-1267"><a href="#cb108-1267" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1270"><a href="#cb108-1270" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-1271"><a href="#cb108-1271" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
+<span id="cb108-1272"><a href="#cb108-1272" aria-hidden="true" tabindex="-1"></a><span class="co"># results of plotting data in 1958</span></span>
+<span id="cb108-1273"><a href="#cb108-1273" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1274"><a href="#cb108-1274" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> line_and_points(data, ax, title):</span>
+<span id="cb108-1275"><a href="#cb108-1275" aria-hidden="true" tabindex="-1"></a>    <span class="co"># assumes single year, hence Mo</span></span>
+<span id="cb108-1276"><a href="#cb108-1276" aria-hidden="true" tabindex="-1"></a>    ax.plot(<span class="st">'Mo'</span>, <span class="st">'Avg'</span>, data<span class="op">=</span>data)</span>
+<span id="cb108-1277"><a href="#cb108-1277" aria-hidden="true" tabindex="-1"></a>    ax.scatter(<span class="st">'Mo'</span>, <span class="st">'Avg'</span>, data<span class="op">=</span>data)</span>
+<span id="cb108-1278"><a href="#cb108-1278" aria-hidden="true" tabindex="-1"></a>    ax.set_xlim(<span class="dv">2</span>, <span class="dv">13</span>)</span>
+<span id="cb108-1279"><a href="#cb108-1279" aria-hidden="true" tabindex="-1"></a>    ax.set_title(title)</span>
+<span id="cb108-1280"><a href="#cb108-1280" aria-hidden="true" tabindex="-1"></a>    ax.set_xticks(np.arange(<span class="dv">3</span>, <span class="dv">13</span>))</span>
+<span id="cb108-1281"><a href="#cb108-1281" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1282"><a href="#cb108-1282" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> data_year(data, year):</span>
+<span id="cb108-1283"><a href="#cb108-1283" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> data[data[<span class="st">"Yr"</span>] <span class="op">==</span> <span class="dv">1958</span>]</span>
+<span id="cb108-1284"><a href="#cb108-1284" aria-hidden="true" tabindex="-1"></a>    </span>
+<span id="cb108-1285"><a href="#cb108-1285" aria-hidden="true" tabindex="-1"></a><span class="co"># uses matplotlib subplots</span></span>
+<span id="cb108-1286"><a href="#cb108-1286" aria-hidden="true" tabindex="-1"></a><span class="co"># you may see more next week; focus on output for now</span></span>
+<span id="cb108-1287"><a href="#cb108-1287" aria-hidden="true" tabindex="-1"></a>fig, axes <span class="op">=</span> plt.subplots(ncols <span class="op">=</span> <span class="dv">3</span>, figsize<span class="op">=</span>(<span class="dv">12</span>, <span class="dv">4</span>), sharey<span class="op">=</span><span class="va">True</span>)</span>
+<span id="cb108-1288"><a href="#cb108-1288" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1289"><a href="#cb108-1289" aria-hidden="true" tabindex="-1"></a>year <span class="op">=</span> <span class="dv">1958</span></span>
+<span id="cb108-1290"><a href="#cb108-1290" aria-hidden="true" tabindex="-1"></a>line_and_points(data_year(co2_drop, year), axes[<span class="dv">0</span>], title<span class="op">=</span><span class="st">"1. Drop Missing"</span>)</span>
+<span id="cb108-1291"><a href="#cb108-1291" aria-hidden="true" tabindex="-1"></a>line_and_points(data_year(co2_NA, year), axes[<span class="dv">1</span>], title<span class="op">=</span><span class="st">"2. Missing Set to NaN"</span>)</span>
+<span id="cb108-1292"><a href="#cb108-1292" aria-hidden="true" tabindex="-1"></a>line_and_points(data_year(co2_impute, year), axes[<span class="dv">2</span>], title<span class="op">=</span><span class="st">"3. Missing Interpolated"</span>)</span>
+<span id="cb108-1293"><a href="#cb108-1293" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1294"><a href="#cb108-1294" aria-hidden="true" tabindex="-1"></a>fig.suptitle(<span class="ss">f"Monthly Averages for </span><span class="sc">{</span>year<span class="sc">}</span><span class="ss">"</span>)</span>
+<span id="cb108-1295"><a href="#cb108-1295" aria-hidden="true" tabindex="-1"></a>plt.tight_layout()</span>
+<span id="cb108-1296"><a href="#cb108-1296" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-1297"><a href="#cb108-1297" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1298"><a href="#cb108-1298" aria-hidden="true" tabindex="-1"></a>In the big picture since there are only 7 <span class="in">`Avg`</span> values missing (**&lt;1%** of 738 months), any of these approaches would work.</span>
+<span id="cb108-1299"><a href="#cb108-1299" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1300"><a href="#cb108-1300" aria-hidden="true" tabindex="-1"></a>However there is some appeal to **option C, Imputing**:</span>
+<span id="cb108-1301"><a href="#cb108-1301" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1302"><a href="#cb108-1302" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Shows seasonal trends for CO<span class="kw">&lt;sub&gt;</span>2<span class="kw">&lt;/sub&gt;</span></span>
+<span id="cb108-1303"><a href="#cb108-1303" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>We are plotting all months in our data as a line plot</span>
+<span id="cb108-1304"><a href="#cb108-1304" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1305"><a href="#cb108-1305" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1306"><a href="#cb108-1306" aria-hidden="true" tabindex="-1"></a>Let's replot our original figure with option 3:</span>
+<span id="cb108-1307"><a href="#cb108-1307" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1310"><a href="#cb108-1310" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-1311"><a href="#cb108-1311" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
+<span id="cb108-1312"><a href="#cb108-1312" aria-hidden="true" tabindex="-1"></a>sns.lineplot(x<span class="op">=</span><span class="st">'DecDate'</span>, y<span class="op">=</span><span class="st">'Avg'</span>, data<span class="op">=</span>co2_impute)</span>
+<span id="cb108-1313"><a href="#cb108-1313" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">"CO2 Average By Month, Imputed"</span>)<span class="op">;</span></span>
+<span id="cb108-1314"><a href="#cb108-1314" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-1315"><a href="#cb108-1315" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1316"><a href="#cb108-1316" aria-hidden="true" tabindex="-1"></a>Looks pretty close to what we see on the NOAA <span class="co">[</span><span class="ot">website</span><span class="co">](https://gml.noaa.gov/ccgg/trends/)</span>!</span>
+<span id="cb108-1317"><a href="#cb108-1317" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1318"><a href="#cb108-1318" aria-hidden="true" tabindex="-1"></a><span class="fu">### Presenting the Data: A Discussion on Data Granularity</span></span>
+<span id="cb108-1319"><a href="#cb108-1319" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1320"><a href="#cb108-1320" aria-hidden="true" tabindex="-1"></a>From the description:</span>
+<span id="cb108-1321"><a href="#cb108-1321" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1322"><a href="#cb108-1322" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Monthly measurements are averages of average day measurements.</span>
+<span id="cb108-1323"><a href="#cb108-1323" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>The NOAA GML website has datasets for daily/hourly measurements too.</span>
+<span id="cb108-1324"><a href="#cb108-1324" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1325"><a href="#cb108-1325" aria-hidden="true" tabindex="-1"></a>The data you present depends on your research question.</span>
+<span id="cb108-1326"><a href="#cb108-1326" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1327"><a href="#cb108-1327" aria-hidden="true" tabindex="-1"></a>**How do CO&lt;sub&gt;2&lt;/sub&gt; levels vary by season?**</span>
+<span id="cb108-1328"><a href="#cb108-1328" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1329"><a href="#cb108-1329" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>You might want to keep average monthly data.</span>
+<span id="cb108-1330"><a href="#cb108-1330" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1331"><a href="#cb108-1331" aria-hidden="true" tabindex="-1"></a>**Are CO&lt;sub&gt;2&lt;/sub&gt; levels rising over the past 50+ years, consistent with global warming predictions?**</span>
+<span id="cb108-1332"><a href="#cb108-1332" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1333"><a href="#cb108-1333" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>You might be happier with a **coarser granularity** of average year data!</span>
+<span id="cb108-1334"><a href="#cb108-1334" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1337"><a href="#cb108-1337" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb108-1338"><a href="#cb108-1338" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
+<span id="cb108-1339"><a href="#cb108-1339" aria-hidden="true" tabindex="-1"></a>co2_year <span class="op">=</span> co2_impute.groupby(<span class="st">'Yr'</span>).mean()</span>
+<span id="cb108-1340"><a href="#cb108-1340" aria-hidden="true" tabindex="-1"></a>sns.lineplot(x<span class="op">=</span><span class="st">'Yr'</span>, y<span class="op">=</span><span class="st">'Avg'</span>, data<span class="op">=</span>co2_year)</span>
+<span id="cb108-1341"><a href="#cb108-1341" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">"CO2 Average By Year"</span>)<span class="op">;</span></span>
+<span id="cb108-1342"><a href="#cb108-1342" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb108-1343"><a href="#cb108-1343" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1344"><a href="#cb108-1344" aria-hidden="true" tabindex="-1"></a>Indeed, we see a rise by nearly 100 ppm of CO<span class="kw">&lt;sub&gt;</span>2<span class="kw">&lt;/sub&gt;</span> since Mauna Loa began recording in 1958.</span>
+<span id="cb108-1345"><a href="#cb108-1345" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1346"><a href="#cb108-1346" aria-hidden="true" tabindex="-1"></a><span class="fu">## Summary</span></span>
+<span id="cb108-1347"><a href="#cb108-1347" aria-hidden="true" tabindex="-1"></a>We went over a lot of content this lecture; let's summarize the most important points: </span>
+<span id="cb108-1348"><a href="#cb108-1348" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1349"><a href="#cb108-1349" aria-hidden="true" tabindex="-1"></a><span class="fu">### Dealing with Missing Values</span></span>
+<span id="cb108-1350"><a href="#cb108-1350" aria-hidden="true" tabindex="-1"></a>There are a few options we can take to deal with missing data:</span>
+<span id="cb108-1351"><a href="#cb108-1351" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1352"><a href="#cb108-1352" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Drop missing records</span>
+<span id="cb108-1353"><a href="#cb108-1353" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Keep <span class="in">`NaN`</span> missing values</span>
+<span id="cb108-1354"><a href="#cb108-1354" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Impute using an interpolated column</span>
+<span id="cb108-1355"><a href="#cb108-1355" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1356"><a href="#cb108-1356" aria-hidden="true" tabindex="-1"></a><span class="fu">### EDA and Data Wrangling</span></span>
+<span id="cb108-1357"><a href="#cb108-1357" aria-hidden="true" tabindex="-1"></a>There are several ways to approach EDA and Data Wrangling: </span>
+<span id="cb108-1358"><a href="#cb108-1358" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb108-1359"><a href="#cb108-1359" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Examine the **data and metadata**: what is the date, size, organization, and structure of the data? </span>
+<span id="cb108-1360"><a href="#cb108-1360" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Examine each **field/attribute/dimension** individually.</span>
+<span id="cb108-1361"><a href="#cb108-1361" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Examine pairs of related dimensions (e.g. breaking down grades by major).</span>
+<span id="cb108-1362"><a href="#cb108-1362" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Along the way, we can:</span>
+<span id="cb108-1363"><a href="#cb108-1363" aria-hidden="true" tabindex="-1"></a><span class="ss">    * </span>**Visualize** or summarize the data.</span>
+<span id="cb108-1364"><a href="#cb108-1364" aria-hidden="true" tabindex="-1"></a><span class="ss">    * </span>**Validate assumptions** about data and its collection process. Pay particular attention to when the data was collected. </span>
+<span id="cb108-1365"><a href="#cb108-1365" aria-hidden="true" tabindex="-1"></a><span class="ss">    * </span>Identify and **address anomalies**.</span>
+<span id="cb108-1366"><a href="#cb108-1366" aria-hidden="true" tabindex="-1"></a><span class="ss">    * </span>Apply data transformations and corrections (we'll cover this in the upcoming lecture).</span>
+<span id="cb108-1367"><a href="#cb108-1367" aria-hidden="true" tabindex="-1"></a><span class="ss">    * </span>**Record everything you do!** Developing in Jupyter Notebook promotes *reproducibility* of your own work!</span>
 </code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div></div></div></div></div>
 </div> <!-- /content -->
diff --git a/docs/eda/eda_files/figure-html/cell-62-output-1.png b/docs/eda/eda_files/figure-html/cell-62-output-1.png
new file mode 100644
index 000000000..f392d5f92
Binary files /dev/null and b/docs/eda/eda_files/figure-html/cell-62-output-1.png differ
diff --git a/docs/eda/eda_files/figure-html/cell-67-output-1.png b/docs/eda/eda_files/figure-html/cell-67-output-1.png
new file mode 100644
index 000000000..be96b8c94
Binary files /dev/null and b/docs/eda/eda_files/figure-html/cell-67-output-1.png differ
diff --git a/docs/eda/eda_files/figure-html/cell-68-output-1.png b/docs/eda/eda_files/figure-html/cell-68-output-1.png
new file mode 100644
index 000000000..ffd29ff8f
Binary files /dev/null and b/docs/eda/eda_files/figure-html/cell-68-output-1.png differ
diff --git a/docs/eda/eda_files/figure-html/cell-69-output-1.png b/docs/eda/eda_files/figure-html/cell-69-output-1.png
new file mode 100644
index 000000000..290889288
Binary files /dev/null and b/docs/eda/eda_files/figure-html/cell-69-output-1.png differ
diff --git a/docs/eda/eda_files/figure-html/cell-71-output-1.png b/docs/eda/eda_files/figure-html/cell-71-output-1.png
new file mode 100644
index 000000000..49ef3d6a6
Binary files /dev/null and b/docs/eda/eda_files/figure-html/cell-71-output-1.png differ
diff --git a/docs/eda/eda_files/figure-html/cell-75-output-1.png b/docs/eda/eda_files/figure-html/cell-75-output-1.png
new file mode 100644
index 000000000..15a5fe82d
Binary files /dev/null and b/docs/eda/eda_files/figure-html/cell-75-output-1.png differ
diff --git a/docs/eda/eda_files/figure-html/cell-76-output-1.png b/docs/eda/eda_files/figure-html/cell-76-output-1.png
new file mode 100644
index 000000000..40b1fc714
Binary files /dev/null and b/docs/eda/eda_files/figure-html/cell-76-output-1.png differ
diff --git a/docs/eda/eda_files/figure-html/cell-77-output-1.png b/docs/eda/eda_files/figure-html/cell-77-output-1.png
new file mode 100644
index 000000000..99b6c2d1e
Binary files /dev/null and b/docs/eda/eda_files/figure-html/cell-77-output-1.png differ
diff --git a/docs/eda/images/variable.png b/docs/eda/images/variable.png
new file mode 100644
index 000000000..3cd730a94
Binary files /dev/null and b/docs/eda/images/variable.png differ
diff --git a/feature_engineering/feature_engineering.html b/docs/feature_engineering/feature_engineering.html
similarity index 99%
rename from feature_engineering/feature_engineering.html
rename to docs/feature_engineering/feature_engineering.html
index ddd15067c..0dce32e25 100644
--- a/feature_engineering/feature_engineering.html
+++ b/docs/feature_engineering/feature_engineering.html
@@ -752,7 +752,7 @@ <h2 data-number="14.5" class="anchored" data-anchor-id="polynomial-features"><sp
 <span id="cb6-12"><a href="#cb6-12" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb6-13"><a href="#cb6-13" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="ss">f"MSE of model with (hp^2) feature: </span><span class="sc">{</span>np<span class="sc">.</span>mean((Y<span class="op">-</span>hp2_model_predictions)<span class="op">**</span><span class="dv">2</span>)<span class="sc">}</span><span class="ss">"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stdout">
-<pre><code>MSE of model with (hp^2) feature: 18.984768907617223</code></pre>
+<pre><code>MSE of model with (hp^2) feature: 18.984768907617216</code></pre>
 </div>
 <div class="cell-output cell-output-display">
 <p><img src="feature_engineering_files/figure-html/cell-6-output-2.png" width="585" height="429"></p>
diff --git a/docs/feature_engineering/feature_engineering_files/figure-html/cell-16-output-2.png b/docs/feature_engineering/feature_engineering_files/figure-html/cell-16-output-2.png
new file mode 100644
index 000000000..f83966673
Binary files /dev/null and b/docs/feature_engineering/feature_engineering_files/figure-html/cell-16-output-2.png differ
diff --git a/docs/feature_engineering/feature_engineering_files/figure-html/cell-17-output-2.png b/docs/feature_engineering/feature_engineering_files/figure-html/cell-17-output-2.png
new file mode 100644
index 000000000..ceecd30f2
Binary files /dev/null and b/docs/feature_engineering/feature_engineering_files/figure-html/cell-17-output-2.png differ
diff --git a/docs/feature_engineering/feature_engineering_files/figure-html/cell-5-output-2.png b/docs/feature_engineering/feature_engineering_files/figure-html/cell-5-output-2.png
new file mode 100644
index 000000000..802e311e2
Binary files /dev/null and b/docs/feature_engineering/feature_engineering_files/figure-html/cell-5-output-2.png differ
diff --git a/docs/feature_engineering/feature_engineering_files/figure-html/cell-6-output-2.png b/docs/feature_engineering/feature_engineering_files/figure-html/cell-6-output-2.png
new file mode 100644
index 000000000..f57138e12
Binary files /dev/null and b/docs/feature_engineering/feature_engineering_files/figure-html/cell-6-output-2.png differ
diff --git a/docs/feature_engineering/images/bias.png b/docs/feature_engineering/images/bias.png
new file mode 100644
index 000000000..e6455ca22
Binary files /dev/null and b/docs/feature_engineering/images/bias.png differ
diff --git a/docs/feature_engineering/images/bvt.png b/docs/feature_engineering/images/bvt.png
new file mode 100644
index 000000000..7baffea82
Binary files /dev/null and b/docs/feature_engineering/images/bvt.png differ
diff --git a/docs/feature_engineering/images/complex.png b/docs/feature_engineering/images/complex.png
new file mode 100644
index 000000000..61769f1a3
Binary files /dev/null and b/docs/feature_engineering/images/complex.png differ
diff --git a/docs/feature_engineering/images/complexity_grad_descent.png b/docs/feature_engineering/images/complexity_grad_descent.png
new file mode 100644
index 000000000..8a48dbbe4
Binary files /dev/null and b/docs/feature_engineering/images/complexity_grad_descent.png differ
diff --git a/docs/feature_engineering/images/complexity_normal_solution.png b/docs/feature_engineering/images/complexity_normal_solution.png
new file mode 100644
index 000000000..c41ad6a7a
Binary files /dev/null and b/docs/feature_engineering/images/complexity_normal_solution.png differ
diff --git a/docs/feature_engineering/images/degree_comparison.png b/docs/feature_engineering/images/degree_comparison.png
new file mode 100644
index 000000000..9bb1992e7
Binary files /dev/null and b/docs/feature_engineering/images/degree_comparison.png differ
diff --git a/docs/feature_engineering/images/degree_comparison2.png b/docs/feature_engineering/images/degree_comparison2.png
new file mode 100644
index 000000000..95ee200a0
Binary files /dev/null and b/docs/feature_engineering/images/degree_comparison2.png differ
diff --git a/docs/feature_engineering/images/gd.png b/docs/feature_engineering/images/gd.png
new file mode 100644
index 000000000..6ba0c3376
Binary files /dev/null and b/docs/feature_engineering/images/gd.png differ
diff --git a/docs/feature_engineering/images/ohe.png b/docs/feature_engineering/images/ohe.png
new file mode 100644
index 000000000..c5f26296c
Binary files /dev/null and b/docs/feature_engineering/images/ohe.png differ
diff --git a/docs/feature_engineering/images/ohemodel.png b/docs/feature_engineering/images/ohemodel.png
new file mode 100644
index 000000000..06dddaea7
Binary files /dev/null and b/docs/feature_engineering/images/ohemodel.png differ
diff --git a/docs/feature_engineering/images/perfect_poly_fits.png b/docs/feature_engineering/images/perfect_poly_fits.png
new file mode 100644
index 000000000..86943ecfc
Binary files /dev/null and b/docs/feature_engineering/images/perfect_poly_fits.png differ
diff --git a/docs/feature_engineering/images/phi.png b/docs/feature_engineering/images/phi.png
new file mode 100644
index 000000000..4c0b04e91
Binary files /dev/null and b/docs/feature_engineering/images/phi.png differ
diff --git a/docs/feature_engineering/images/pytorchsgd.png b/docs/feature_engineering/images/pytorchsgd.png
new file mode 100644
index 000000000..85b07dbcd
Binary files /dev/null and b/docs/feature_engineering/images/pytorchsgd.png differ
diff --git a/docs/feature_engineering/images/remove.png b/docs/feature_engineering/images/remove.png
new file mode 100644
index 000000000..bd09ddcf1
Binary files /dev/null and b/docs/feature_engineering/images/remove.png differ
diff --git a/docs/feature_engineering/images/resamples.png b/docs/feature_engineering/images/resamples.png
new file mode 100644
index 000000000..28f904ab1
Binary files /dev/null and b/docs/feature_engineering/images/resamples.png differ
diff --git a/docs/feature_engineering/images/sgd.png b/docs/feature_engineering/images/sgd.png
new file mode 100644
index 000000000..ee579a100
Binary files /dev/null and b/docs/feature_engineering/images/sgd.png differ
diff --git a/docs/feature_engineering/images/train_error.png b/docs/feature_engineering/images/train_error.png
new file mode 100644
index 000000000..a2993b42b
Binary files /dev/null and b/docs/feature_engineering/images/train_error.png differ
diff --git a/gradient_descent/gradient_descent.html b/docs/gradient_descent/gradient_descent.html
similarity index 87%
rename from gradient_descent/gradient_descent.html
rename to docs/gradient_descent/gradient_descent.html
index 1589ea599..6c0d68abc 100644
--- a/gradient_descent/gradient_descent.html
+++ b/docs/gradient_descent/gradient_descent.html
@@ -106,7 +106,7 @@
 require.undef("plotly");
 requirejs.config({
     paths: {
-        'plotly': ['https://cdn.plot.ly/plotly-2.25.2.min']
+        'plotly': ['https://cdn.plot.ly/plotly-2.12.1.min']
     }
 });
 require(['plotly'], function(Plotly) {
@@ -591,7 +591,7 @@ <h3 data-number="13.1.2" class="anchored" data-anchor-id="the-sklearn-workflow">
 <span id="cb13-4"><a href="#cb13-4" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb13-5"><a href="#cb13-5" aria-hidden="true" tabindex="-1"></a>my_model.fit(X, Y)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="7">
-<style>#sk-container-id-1 {color: black;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: "▸";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: "▾";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: "";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: "";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: "";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id="sk-container-id-1" class="sk-top-container"><div class="sk-text-repr-fallback"><pre>LinearRegression()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br>On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class="sk-container" hidden=""><div class="sk-item"><div class="sk-estimator sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-1" type="checkbox" checked=""><label for="sk-estimator-id-1" class="sk-toggleable__label sk-toggleable__label-arrow">LinearRegression</label><div class="sk-toggleable__content"><pre>LinearRegression()</pre></div></div></div></div></div>
+<style>#sk-container-id-1 {color: black;background-color: white;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: "▸";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: "▾";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: "";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: "";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: "";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id="sk-container-id-1" class="sk-top-container"><div class="sk-text-repr-fallback"><pre>LinearRegression()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br>On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class="sk-container" hidden=""><div class="sk-item"><div class="sk-estimator sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-1" type="checkbox" checked=""><label for="sk-estimator-id-1" class="sk-toggleable__label sk-toggleable__label-arrow">LinearRegression</label><div class="sk-toggleable__content"><pre>LinearRegression()</pre></div></div></div></div></div>
 </div>
 </div>
 <p>Notice that we use <strong>double brackets</strong> to extract this column. Why double brackets instead of just single brackets? The <code>.fit</code> method, by default, expects to receive <strong>2-dimensional</strong> data – some kind of data that includes both rows and columns. Writing <code>penguins["flipper_length_mm"]</code> would return a 1D <code>Series</code>, causing <code>sklearn</code> to error. We avoid this by writing <code>penguins[["flipper_length_mm"]]</code> to produce a 2D <code>DataFrame</code>.</p>
@@ -642,7 +642,7 @@ <h3 data-number="13.1.2" class="anchored" data-anchor-id="the-sklearn-workflow">
 <span id="cb20-12"><a href="#cb20-12" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb20-13"><a href="#cb20-13" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="ss">f"The RMSE of the model is </span><span class="sc">{</span>np<span class="sc">.</span>sqrt(np.mean((Y<span class="op">-</span>Y_hat_two_features)<span class="op">**</span><span class="dv">2</span>))<span class="sc">}</span><span class="ss">"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stdout">
-<pre><code>The RMSE of the model is 0.9881331104079044</code></pre>
+<pre><code>The RMSE of the model is 0.9881331104079045</code></pre>
 </div>
 </div>
 <p>We can also see that we obtain the same predictions using <code>sklearn</code> as we did when applying the ordinary least squares formula before!</p>
@@ -758,9 +758,9 @@ <h4 data-number="13.2.1.1" class="anchored" data-anchor-id="the-naive-approach-g
 </details>
 <div class="cell-output cell-output-display">
 
-<div>                            <div id="cf4e1213-cbed-4f99-afbf-b8ca1cb3983f" class="plotly-graph-div" style="height:600px; width:800px;"></div>            <script type="text/javascript">                require(["plotly"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("cf4e1213-cbed-4f99-afbf-b8ca1cb3983f")) {                    Plotly.newPlot(                        "cf4e1213-cbed-4f99-afbf-b8ca1cb3983f",                        [{"hovertemplate":"x=%{x}\u003cbr\u003ey=%{y}\u003cextra\u003e\u003c\u002fextra\u003e","legendgroup":"","line":{"color":"#636efa","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"","orientation":"v","showlegend":false,"x":[1.0,1.0301507537688441,1.0603015075376885,1.0904522613065326,1.120603015075377,1.150753768844221,1.1809045226130652,1.2110552763819096,1.2412060301507537,1.271356783919598,1.3015075376884422,1.3316582914572863,1.3618090452261307,1.3919597989949748,1.4221105527638191,1.4522613065326633,1.4824120603015074,1.5125628140703518,1.542713567839196,1.5728643216080402,1.6030150753768844,1.6331658291457285,1.6633165829145728,1.6934673366834172,1.7236180904522613,1.7537688442211055,1.7839195979899496,1.814070351758794,1.8442211055276383,1.8743718592964824,1.9045226130653266,1.9346733668341707,1.964824120603015,1.9949748743718594,2.0251256281407035,2.0552763819095476,2.085427135678392,2.115577889447236,2.1457286432160805,2.1758793969849246,2.2060301507537687,2.2361809045226133,2.266331658291457,2.2964824120603016,2.3266331658291457,2.35678391959799,2.3869346733668344,2.417085427135678,2.4472361809045227,2.477386934673367,2.507537688442211,2.5376884422110555,2.567839195979899,2.5979899497487438,2.628140703517588,2.658291457286432,2.6884422110552766,2.7185929648241203,2.748743718592965,2.778894472361809,2.809045226130653,2.8391959798994977,2.8693467336683414,2.899497487437186,2.92964824120603,2.959798994974874,2.9899497487437188,3.020100502512563,3.050251256281407,3.080402010050251,3.1105527638190953,3.1407035175879394,3.170854271356784,3.201005025125628,3.2311557788944723,3.2613065326633164,3.2914572864321605,3.321608040201005,3.351758793969849,3.3819095477386933,3.4120603015075375,3.4422110552763816,3.472361809045226,3.5025125628140703,3.5326633165829144,3.5628140703517586,3.5929648241206027,3.6231155778894473,3.6532663316582914,3.6834170854271355,3.7135678391959797,3.743718592964824,3.7738693467336684,3.8040201005025125,3.8341708542713566,3.8643216080402008,3.8944723618090453,3.9246231155778895,3.9547738693467336,3.9849246231155777,4.015075376884422,4.045226130653266,4.075376884422111,4.105527638190955,4.135678391959798,4.165829145728643,4.1959798994974875,4.226130653266331,4.256281407035176,4.28643216080402,4.316582914572864,4.346733668341709,4.376884422110553,4.407035175879397,4.4371859296482405,4.467336683417085,4.49748743718593,4.527638190954773,4.557788944723618,4.5879396984924625,4.618090452261306,4.648241206030151,4.678391959798995,4.708542713567839,4.738693467336683,4.768844221105527,4.798994974874372,4.829145728643216,4.85929648241206,4.889447236180905,4.919597989949748,4.949748743718593,4.9798994974874375,5.010050251256281,5.040201005025126,5.0703517587939695,5.100502512562814,5.130653266331658,5.160804020100502,5.190954773869347,5.221105527638191,5.251256281407035,5.281407035175879,5.311557788944723,5.341708542713568,5.371859296482412,5.402010050251256,5.4321608040201,5.4623115577889445,5.492462311557789,5.522613065326633,5.552763819095477,5.582914572864321,5.613065326633166,5.64321608040201,5.673366834170854,5.703517587939698,5.733668341708542,5.763819095477387,5.793969849246231,5.824120603015075,5.8542713567839195,5.884422110552763,5.914572864321608,5.944723618090452,5.974874371859296,6.005025125628141,6.035175879396984,6.065326633165829,6.0954773869346734,6.125628140703517,6.155778894472362,6.185929648241205,6.21608040201005,6.2462311557788945,6.276381909547738,6.306532663316583,6.3366834170854265,6.366834170854271,6.396984924623116,6.427135678391959,6.457286432160804,6.487437185929648,6.517587939698492,6.547738693467337,6.57788944723618,6.608040201005025,6.638190954773869,6.668341708542713,6.698492462311558,6.7286432160804015,6.758793969849246,6.788944723618091,6.819095477386934,6.849246231155779,6.879396984924623,6.909547738693467,6.939698492462312,6.969849246231155,7.0],"xaxis":"x","y":[3.0,2.8197775132646994,2.6468296407545298,2.480978457571409,2.3220480221881674,2.169864376448527,2.0242555455671196,1.8850515381294826,1.7520843460920474,1.6251879447821538,1.5041982928980473,1.3889533325088705,1.2792929890546703,1.175059171346399,1.076095771565909,0.9822486652659563,0.8933657113701969,0.809296752173205,0.7298936133404282,0.6550101039082478,0.5845020162839318,0.5182271262456482,0.45604519294247436,0.39781795889439875,0.34340914999228855,0.2926844754979413,0.24551162804404497,0.20176028363418083,0.1613021016428462,0.1240107248154402,0.08976177926825812,0.05843287448851129,0.029903603334304307,0.0040555420346265695,-0.019227749810596606,-0.04006072923056081,-0.0585558698835257,-0.07482366205689459,-0.08897261266714337,-0.10110924525984047,-0.11133810000965809,-0.119761733720361,-0.12648071982483203,-0.1315936483850237,-0.13519712609199247,-0.13738577626590426,-0.1382522388560119,-0.13788717044065493,-0.13637924422729383,-0.13381515005247593,-0.13027959438184666,-0.1258553003101497,-0.12062300756120407,-0.11466147248795551,-0.10804746807244214,-0.10085578392577758,-0.09315922628821909,-0.08502861802905386,-0.0765327986467014,-0.06773862426869641,-0.05871096765167181,-0.04951271818131318,-0.04020478187242702,-0.030846081368940757,-0.021493555943828825,-0.012202161499220664,-0.0030248705662870635,0.005987327694657552,0.014785427494228998,0.023322406413939234,0.03155322540619636,0.039434828794344415,0.046926144272549666,0.053988082905993905,0.06058353913065275,0.06667739075349459,0.0722364989522987,0.0772297082758655,0.08162784664377228,0.0854037253466231,0.0885321390457932,0.09098986577366759,0.09275566693348196,0.0938102872993909,0.09413645501645647,0.09371888160064827,0.09254426193878089,0.09060127428864462,0.08788058027890884,0.08437482490911634,0.08007863654980837,0.07498862694224044,0.06910339119879154,0.062423507802634504,0.05495153860777009,0.04669202883931121,0.037651507093005424,0.027838485335746555,0.01726345890522225,0.005938906509970821,-0.006120709770470967,-0.018898944485715673,-0.03237736881442288,-0.04653557056435602,-0.061351154172564294,-0.07679974070485969,-0.0928549678563968,-0.10948848995135449,-0.12666997794297002,-0.14436711941368685,-0.16254561857489308,-0.1811691962672853,-0.2001995899604026,-0.21959655375303555,-0.23931785837306735,-0.25931929117742814,-0.2795546561521974,-0.2999757739125698,-0.32053248170270765,-0.34117263339597914,-0.3618420994948906,-0.38248476713084756,-0.40304254006464363,-0.4234553386859261,-0.4436611000135485,-0.46359577769538873,-0.483193342008542,-0.5023857798591053,-0.5211030947822792,-0.5392733069424139,-0.5568224531329065,-0.5736745867762579,-0.589751777924107,-0.6049741132570944,-0.6192596960850778,-0.632524646347008,-0.6446831006107573,-0.6556472120734724,-0.6653271505614157,-0.6736311025297482,-0.6804652710629284,-0.6857338758744731,-0.689339153306878,-0.6911813563318902,-0.6911587545502471,-0.6891676341918014,-0.6851022981155552,-0.6788550658095346,-0.6703162733909721,-0.6593742736060904,-0.6459154358302271,-0.6298241460678924,-0.6109828069525293,-0.5892718377469237,-0.5645696743426925,-0.5367527692607041,-0.5056955916510105,-0.47127062729252883,-0.4333483785933822,-0.39179736459091147,-0.3464841209514134,-0.2972731999702091,-0.24402717057191695,-0.18660661831017933,-0.12487014536754941,-0.05867437055599112,0.01212607068350735,0.08767852628116088,0.1681323275376599,0.2536387891250115,0.34435120908576666,0.4404248688334974,0.5420170331528652,0.6492869501990981,0.7623958514986725,0.8815069519487224,1.0067854498173574,1.13839852674364,1.2765153477374043,1.421307061179641,1.572946798821863,1.7316096757870127,1.897472790568304,2.070715225030267,2.251518044408317,2.4400642973086635,2.636539015708513,2.841129214955754,3.0540238937694992,3.27541403423952,3.5054926018266315,3.7444545453624185,3.9924967970495344,4.249818272461312,4.516619870542331,4.793104473607627,5.079476947343528,5.37594414080711,5.682714886426311,6.0],"yaxis":"y","type":"scatter"},{"mode":"markers","x":[1.0,2.5,4.0,5.5,7.0],"y":[3.0,-0.13125,0.0,-0.65625,6.0],"type":"scatter"}],                        {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"}}},"xaxis":{"anchor":"y","domain":[0.0,1.0],"title":{"text":"x"}},"yaxis":{"anchor":"x","domain":[0.0,1.0],"title":{"text":"y"}},"legend":{"tracegroupgap":0},"margin":{"t":60},"showlegend":false,"autosize":false,"width":800,"height":600},                        {"responsive": true}                    ).then(function(){
+<div>                            <div id="2b728ff4-626c-4d55-94b6-f48961ccc5df" class="plotly-graph-div" style="height:600px; width:800px;"></div>            <script type="text/javascript">                require(["plotly"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("2b728ff4-626c-4d55-94b6-f48961ccc5df")) {                    Plotly.newPlot(                        "2b728ff4-626c-4d55-94b6-f48961ccc5df",                        [{"hovertemplate":"x=%{x}<br>y=%{y}<extra></extra>","legendgroup":"","line":{"color":"#636efa","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"","orientation":"v","showlegend":false,"x":[1.0,1.0301507537688441,1.0603015075376885,1.0904522613065326,1.120603015075377,1.150753768844221,1.1809045226130652,1.2110552763819096,1.2412060301507537,1.271356783919598,1.3015075376884422,1.3316582914572863,1.3618090452261307,1.3919597989949748,1.4221105527638191,1.4522613065326633,1.4824120603015074,1.5125628140703518,1.542713567839196,1.5728643216080402,1.6030150753768844,1.6331658291457285,1.6633165829145728,1.6934673366834172,1.7236180904522613,1.7537688442211055,1.7839195979899496,1.814070351758794,1.8442211055276383,1.8743718592964824,1.9045226130653266,1.9346733668341707,1.964824120603015,1.9949748743718594,2.0251256281407035,2.0552763819095476,2.085427135678392,2.115577889447236,2.1457286432160805,2.1758793969849246,2.2060301507537687,2.2361809045226133,2.266331658291457,2.2964824120603016,2.3266331658291457,2.35678391959799,2.3869346733668344,2.417085427135678,2.4472361809045227,2.477386934673367,2.507537688442211,2.5376884422110555,2.567839195979899,2.5979899497487438,2.628140703517588,2.658291457286432,2.6884422110552766,2.7185929648241203,2.748743718592965,2.778894472361809,2.809045226130653,2.8391959798994977,2.8693467336683414,2.899497487437186,2.92964824120603,2.959798994974874,2.9899497487437188,3.020100502512563,3.050251256281407,3.080402010050251,3.1105527638190953,3.1407035175879394,3.170854271356784,3.201005025125628,3.2311557788944723,3.2613065326633164,3.2914572864321605,3.321608040201005,3.351758793969849,3.3819095477386933,3.4120603015075375,3.4422110552763816,3.472361809045226,3.5025125628140703,3.5326633165829144,3.5628140703517586,3.5929648241206027,3.6231155778894473,3.6532663316582914,3.6834170854271355,3.7135678391959797,3.743718592964824,3.7738693467336684,3.8040201005025125,3.8341708542713566,3.8643216080402008,3.8944723618090453,3.9246231155778895,3.9547738693467336,3.9849246231155777,4.015075376884422,4.045226130653266,4.075376884422111,4.105527638190955,4.135678391959798,4.165829145728643,4.1959798994974875,4.226130653266331,4.256281407035176,4.28643216080402,4.316582914572864,4.346733668341709,4.376884422110553,4.407035175879397,4.4371859296482405,4.467336683417085,4.49748743718593,4.527638190954773,4.557788944723618,4.5879396984924625,4.618090452261306,4.648241206030151,4.678391959798995,4.708542713567839,4.738693467336683,4.768844221105527,4.798994974874372,4.829145728643216,4.85929648241206,4.889447236180905,4.919597989949748,4.949748743718593,4.9798994974874375,5.010050251256281,5.040201005025126,5.0703517587939695,5.100502512562814,5.130653266331658,5.160804020100502,5.190954773869347,5.221105527638191,5.251256281407035,5.281407035175879,5.311557788944723,5.341708542713568,5.371859296482412,5.402010050251256,5.4321608040201,5.4623115577889445,5.492462311557789,5.522613065326633,5.552763819095477,5.582914572864321,5.613065326633166,5.64321608040201,5.673366834170854,5.703517587939698,5.733668341708542,5.763819095477387,5.793969849246231,5.824120603015075,5.8542713567839195,5.884422110552763,5.914572864321608,5.944723618090452,5.974874371859296,6.005025125628141,6.035175879396984,6.065326633165829,6.0954773869346734,6.125628140703517,6.155778894472362,6.185929648241205,6.21608040201005,6.2462311557788945,6.276381909547738,6.306532663316583,6.3366834170854265,6.366834170854271,6.396984924623116,6.427135678391959,6.457286432160804,6.487437185929648,6.517587939698492,6.547738693467337,6.57788944723618,6.608040201005025,6.638190954773869,6.668341708542713,6.698492462311558,6.7286432160804015,6.758793969849246,6.788944723618091,6.819095477386934,6.849246231155779,6.879396984924623,6.909547738693467,6.939698492462312,6.969849246231155,7.0],"xaxis":"x","y":[3.0,2.8197775132646994,2.6468296407545298,2.480978457571409,2.3220480221881674,2.169864376448527,2.0242555455671196,1.8850515381294826,1.7520843460920474,1.6251879447821538,1.5041982928980473,1.3889533325088705,1.2792929890546703,1.175059171346399,1.076095771565909,0.9822486652659563,0.8933657113701969,0.809296752173205,0.7298936133404282,0.6550101039082478,0.5845020162839318,0.5182271262456482,0.45604519294247436,0.39781795889439875,0.34340914999228855,0.2926844754979413,0.24551162804404497,0.20176028363418083,0.1613021016428462,0.1240107248154402,0.08976177926825812,0.05843287448851129,0.029903603334304307,0.0040555420346265695,-0.019227749810596606,-0.04006072923056081,-0.0585558698835257,-0.07482366205689459,-0.08897261266714337,-0.10110924525984047,-0.11133810000965809,-0.119761733720361,-0.12648071982483203,-0.1315936483850237,-0.13519712609199247,-0.13738577626590426,-0.1382522388560119,-0.13788717044065493,-0.13637924422729383,-0.13381515005247593,-0.13027959438184666,-0.1258553003101497,-0.12062300756120407,-0.11466147248795551,-0.10804746807244214,-0.10085578392577758,-0.09315922628821909,-0.08502861802905386,-0.0765327986467014,-0.06773862426869641,-0.05871096765167181,-0.04951271818131318,-0.04020478187242702,-0.030846081368940757,-0.021493555943828825,-0.012202161499220664,-0.0030248705662870635,0.005987327694657552,0.014785427494228998,0.023322406413939234,0.03155322540619636,0.039434828794344415,0.046926144272549666,0.053988082905993905,0.06058353913065275,0.06667739075349459,0.0722364989522987,0.0772297082758655,0.08162784664377228,0.0854037253466231,0.0885321390457932,0.09098986577366759,0.09275566693348196,0.0938102872993909,0.09413645501645647,0.09371888160064827,0.09254426193878089,0.09060127428864462,0.08788058027890884,0.08437482490911634,0.08007863654980837,0.07498862694224044,0.06910339119879154,0.062423507802634504,0.05495153860777009,0.04669202883931121,0.037651507093005424,0.027838485335746555,0.01726345890522225,0.005938906509970821,-0.006120709770470967,-0.018898944485715673,-0.03237736881442288,-0.04653557056435602,-0.061351154172564294,-0.07679974070485969,-0.0928549678563968,-0.10948848995135449,-0.12666997794297002,-0.14436711941368685,-0.16254561857489308,-0.1811691962672853,-0.2001995899604026,-0.21959655375303555,-0.23931785837306735,-0.25931929117742814,-0.2795546561521974,-0.2999757739125698,-0.32053248170270765,-0.34117263339597914,-0.3618420994948906,-0.38248476713084756,-0.40304254006464363,-0.4234553386859261,-0.4436611000135485,-0.46359577769538873,-0.483193342008542,-0.5023857798591053,-0.5211030947822792,-0.5392733069424139,-0.5568224531329065,-0.5736745867762579,-0.589751777924107,-0.6049741132570944,-0.6192596960850778,-0.632524646347008,-0.6446831006107573,-0.6556472120734724,-0.6653271505614157,-0.6736311025297482,-0.6804652710629284,-0.6857338758744731,-0.689339153306878,-0.6911813563318902,-0.6911587545502471,-0.6891676341918014,-0.6851022981155552,-0.6788550658095346,-0.6703162733909721,-0.6593742736060904,-0.6459154358302271,-0.6298241460678924,-0.6109828069525293,-0.5892718377469237,-0.5645696743426925,-0.5367527692607041,-0.5056955916510105,-0.47127062729252883,-0.4333483785933822,-0.39179736459091147,-0.3464841209514134,-0.2972731999702091,-0.24402717057191695,-0.18660661831017933,-0.12487014536754941,-0.05867437055599112,0.01212607068350735,0.08767852628116088,0.1681323275376599,0.2536387891250115,0.34435120908576666,0.4404248688334974,0.5420170331528652,0.6492869501990981,0.7623958514986725,0.8815069519487224,1.0067854498173574,1.13839852674364,1.2765153477374043,1.421307061179641,1.572946798821863,1.7316096757870127,1.897472790568304,2.070715225030267,2.251518044408317,2.4400642973086635,2.636539015708513,2.841129214955754,3.0540238937694992,3.27541403423952,3.5054926018266315,3.7444545453624185,3.9924967970495344,4.249818272461312,4.516619870542331,4.793104473607627,5.079476947343528,5.37594414080711,5.682714886426311,6.0],"yaxis":"y","type":"scatter"},{"mode":"markers","x":[1.0,2.5,4.0,5.5,7.0],"y":[3.0,-0.13125,0.0,-0.65625,6.0],"type":"scatter"}],                        {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"}}},"xaxis":{"anchor":"y","domain":[0.0,1.0],"title":{"text":"x"}},"yaxis":{"anchor":"x","domain":[0.0,1.0],"title":{"text":"y"}},"legend":{"tracegroupgap":0},"margin":{"t":60},"showlegend":false,"autosize":false,"width":800,"height":600},                        {"responsive": true}                    ).then(function(){
                             
-var gd = document.getElementById('cf4e1213-cbed-4f99-afbf-b8ca1cb3983f');
+var gd = document.getElementById('2b728ff4-626c-4d55-94b6-f48961ccc5df');
 var x = new MutationObserver(function (mutations, observer) {{
         var display = window.getComputedStyle(gd).display;
         if (!display || display === 'none') {{
@@ -1074,9 +1074,9 @@ <h4 data-number="13.2.3.1" class="anchored" data-anchor-id="gradient-descent-on-
 </details>
 <div class="cell-output cell-output-display">
 
-<div>                            <div id="3a299d59-1008-4228-b4f1-88fa162952ab" class="plotly-graph-div" style="height:600px; width:800px;"></div>            <script type="text/javascript">                require(["plotly"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("3a299d59-1008-4228-b4f1-88fa162952ab")) {                    Plotly.newPlot(                        "3a299d59-1008-4228-b4f1-88fa162952ab",                        [{"mode":"lines","name":"f","x":[1.0,1.0301507537688441,1.0603015075376885,1.0904522613065326,1.120603015075377,1.150753768844221,1.1809045226130652,1.2110552763819096,1.2412060301507537,1.271356783919598,1.3015075376884422,1.3316582914572863,1.3618090452261307,1.3919597989949748,1.4221105527638191,1.4522613065326633,1.4824120603015074,1.5125628140703518,1.542713567839196,1.5728643216080402,1.6030150753768844,1.6331658291457285,1.6633165829145728,1.6934673366834172,1.7236180904522613,1.7537688442211055,1.7839195979899496,1.814070351758794,1.8442211055276383,1.8743718592964824,1.9045226130653266,1.9346733668341707,1.964824120603015,1.9949748743718594,2.0251256281407035,2.0552763819095476,2.085427135678392,2.115577889447236,2.1457286432160805,2.1758793969849246,2.2060301507537687,2.2361809045226133,2.266331658291457,2.2964824120603016,2.3266331658291457,2.35678391959799,2.3869346733668344,2.417085427135678,2.4472361809045227,2.477386934673367,2.507537688442211,2.5376884422110555,2.567839195979899,2.5979899497487438,2.628140703517588,2.658291457286432,2.6884422110552766,2.7185929648241203,2.748743718592965,2.778894472361809,2.809045226130653,2.8391959798994977,2.8693467336683414,2.899497487437186,2.92964824120603,2.959798994974874,2.9899497487437188,3.020100502512563,3.050251256281407,3.080402010050251,3.1105527638190953,3.1407035175879394,3.170854271356784,3.201005025125628,3.2311557788944723,3.2613065326633164,3.2914572864321605,3.321608040201005,3.351758793969849,3.3819095477386933,3.4120603015075375,3.4422110552763816,3.472361809045226,3.5025125628140703,3.5326633165829144,3.5628140703517586,3.5929648241206027,3.6231155778894473,3.6532663316582914,3.6834170854271355,3.7135678391959797,3.743718592964824,3.7738693467336684,3.8040201005025125,3.8341708542713566,3.8643216080402008,3.8944723618090453,3.9246231155778895,3.9547738693467336,3.9849246231155777,4.015075376884422,4.045226130653266,4.075376884422111,4.105527638190955,4.135678391959798,4.165829145728643,4.1959798994974875,4.226130653266331,4.256281407035176,4.28643216080402,4.316582914572864,4.346733668341709,4.376884422110553,4.407035175879397,4.4371859296482405,4.467336683417085,4.49748743718593,4.527638190954773,4.557788944723618,4.5879396984924625,4.618090452261306,4.648241206030151,4.678391959798995,4.708542713567839,4.738693467336683,4.768844221105527,4.798994974874372,4.829145728643216,4.85929648241206,4.889447236180905,4.919597989949748,4.949748743718593,4.9798994974874375,5.010050251256281,5.040201005025126,5.0703517587939695,5.100502512562814,5.130653266331658,5.160804020100502,5.190954773869347,5.221105527638191,5.251256281407035,5.281407035175879,5.311557788944723,5.341708542713568,5.371859296482412,5.402010050251256,5.4321608040201,5.4623115577889445,5.492462311557789,5.522613065326633,5.552763819095477,5.582914572864321,5.613065326633166,5.64321608040201,5.673366834170854,5.703517587939698,5.733668341708542,5.763819095477387,5.793969849246231,5.824120603015075,5.8542713567839195,5.884422110552763,5.914572864321608,5.944723618090452,5.974874371859296,6.005025125628141,6.035175879396984,6.065326633165829,6.0954773869346734,6.125628140703517,6.155778894472362,6.185929648241205,6.21608040201005,6.2462311557788945,6.276381909547738,6.306532663316583,6.3366834170854265,6.366834170854271,6.396984924623116,6.427135678391959,6.457286432160804,6.487437185929648,6.517587939698492,6.547738693467337,6.57788944723618,6.608040201005025,6.638190954773869,6.668341708542713,6.698492462311558,6.7286432160804015,6.758793969849246,6.788944723618091,6.819095477386934,6.849246231155779,6.879396984924623,6.909547738693467,6.939698492462312,6.969849246231155,7.0],"y":[3.0,2.8197775132646994,2.6468296407545298,2.480978457571409,2.3220480221881674,2.169864376448527,2.0242555455671196,1.8850515381294826,1.7520843460920474,1.6251879447821538,1.5041982928980473,1.3889533325088705,1.2792929890546703,1.175059171346399,1.076095771565909,0.9822486652659563,0.8933657113701969,0.809296752173205,0.7298936133404282,0.6550101039082478,0.5845020162839318,0.5182271262456482,0.45604519294247436,0.39781795889439875,0.34340914999228855,0.2926844754979413,0.24551162804404497,0.20176028363418083,0.1613021016428462,0.1240107248154402,0.08976177926825812,0.05843287448851129,0.029903603334304307,0.0040555420346265695,-0.019227749810596606,-0.04006072923056081,-0.0585558698835257,-0.07482366205689459,-0.08897261266714337,-0.10110924525984047,-0.11133810000965809,-0.119761733720361,-0.12648071982483203,-0.1315936483850237,-0.13519712609199247,-0.13738577626590426,-0.1382522388560119,-0.13788717044065493,-0.13637924422729383,-0.13381515005247593,-0.13027959438184666,-0.1258553003101497,-0.12062300756120407,-0.11466147248795551,-0.10804746807244214,-0.10085578392577758,-0.09315922628821909,-0.08502861802905386,-0.0765327986467014,-0.06773862426869641,-0.05871096765167181,-0.04951271818131318,-0.04020478187242702,-0.030846081368940757,-0.021493555943828825,-0.012202161499220664,-0.0030248705662870635,0.005987327694657552,0.014785427494228998,0.023322406413939234,0.03155322540619636,0.039434828794344415,0.046926144272549666,0.053988082905993905,0.06058353913065275,0.06667739075349459,0.0722364989522987,0.0772297082758655,0.08162784664377228,0.0854037253466231,0.0885321390457932,0.09098986577366759,0.09275566693348196,0.0938102872993909,0.09413645501645647,0.09371888160064827,0.09254426193878089,0.09060127428864462,0.08788058027890884,0.08437482490911634,0.08007863654980837,0.07498862694224044,0.06910339119879154,0.062423507802634504,0.05495153860777009,0.04669202883931121,0.037651507093005424,0.027838485335746555,0.01726345890522225,0.005938906509970821,-0.006120709770470967,-0.018898944485715673,-0.03237736881442288,-0.04653557056435602,-0.061351154172564294,-0.07679974070485969,-0.0928549678563968,-0.10948848995135449,-0.12666997794297002,-0.14436711941368685,-0.16254561857489308,-0.1811691962672853,-0.2001995899604026,-0.21959655375303555,-0.23931785837306735,-0.25931929117742814,-0.2795546561521974,-0.2999757739125698,-0.32053248170270765,-0.34117263339597914,-0.3618420994948906,-0.38248476713084756,-0.40304254006464363,-0.4234553386859261,-0.4436611000135485,-0.46359577769538873,-0.483193342008542,-0.5023857798591053,-0.5211030947822792,-0.5392733069424139,-0.5568224531329065,-0.5736745867762579,-0.589751777924107,-0.6049741132570944,-0.6192596960850778,-0.632524646347008,-0.6446831006107573,-0.6556472120734724,-0.6653271505614157,-0.6736311025297482,-0.6804652710629284,-0.6857338758744731,-0.689339153306878,-0.6911813563318902,-0.6911587545502471,-0.6891676341918014,-0.6851022981155552,-0.6788550658095346,-0.6703162733909721,-0.6593742736060904,-0.6459154358302271,-0.6298241460678924,-0.6109828069525293,-0.5892718377469237,-0.5645696743426925,-0.5367527692607041,-0.5056955916510105,-0.47127062729252883,-0.4333483785933822,-0.39179736459091147,-0.3464841209514134,-0.2972731999702091,-0.24402717057191695,-0.18660661831017933,-0.12487014536754941,-0.05867437055599112,0.01212607068350735,0.08767852628116088,0.1681323275376599,0.2536387891250115,0.34435120908576666,0.4404248688334974,0.5420170331528652,0.6492869501990981,0.7623958514986725,0.8815069519487224,1.0067854498173574,1.13839852674364,1.2765153477374043,1.421307061179641,1.572946798821863,1.7316096757870127,1.897472790568304,2.070715225030267,2.251518044408317,2.4400642973086635,2.636539015708513,2.841129214955754,3.0540238937694992,3.27541403423952,3.5054926018266315,3.7444545453624185,3.9924967970495344,4.249818272461312,4.516619870542331,4.793104473607627,5.079476947343528,5.37594414080711,5.682714886426311,6.0],"type":"scatter"},{"line":{"dash":"dash"},"mode":"lines","name":"df","x":[1.0,1.0301507537688441,1.0603015075376885,1.0904522613065326,1.120603015075377,1.150753768844221,1.1809045226130652,1.2110552763819096,1.2412060301507537,1.271356783919598,1.3015075376884422,1.3316582914572863,1.3618090452261307,1.3919597989949748,1.4221105527638191,1.4522613065326633,1.4824120603015074,1.5125628140703518,1.542713567839196,1.5728643216080402,1.6030150753768844,1.6331658291457285,1.6633165829145728,1.6934673366834172,1.7236180904522613,1.7537688442211055,1.7839195979899496,1.814070351758794,1.8442211055276383,1.8743718592964824,1.9045226130653266,1.9346733668341707,1.964824120603015,1.9949748743718594,2.0251256281407035,2.0552763819095476,2.085427135678392,2.115577889447236,2.1457286432160805,2.1758793969849246,2.2060301507537687,2.2361809045226133,2.266331658291457,2.2964824120603016,2.3266331658291457,2.35678391959799,2.3869346733668344,2.417085427135678,2.4472361809045227,2.477386934673367,2.507537688442211,2.5376884422110555,2.567839195979899,2.5979899497487438,2.628140703517588,2.658291457286432,2.6884422110552766,2.7185929648241203,2.748743718592965,2.778894472361809,2.809045226130653,2.8391959798994977,2.8693467336683414,2.899497487437186,2.92964824120603,2.959798994974874,2.9899497487437188,3.020100502512563,3.050251256281407,3.080402010050251,3.1105527638190953,3.1407035175879394,3.170854271356784,3.201005025125628,3.2311557788944723,3.2613065326633164,3.2914572864321605,3.321608040201005,3.351758793969849,3.3819095477386933,3.4120603015075375,3.4422110552763816,3.472361809045226,3.5025125628140703,3.5326633165829144,3.5628140703517586,3.5929648241206027,3.6231155778894473,3.6532663316582914,3.6834170854271355,3.7135678391959797,3.743718592964824,3.7738693467336684,3.8040201005025125,3.8341708542713566,3.8643216080402008,3.8944723618090453,3.9246231155778895,3.9547738693467336,3.9849246231155777,4.015075376884422,4.045226130653266,4.075376884422111,4.105527638190955,4.135678391959798,4.165829145728643,4.1959798994974875,4.226130653266331,4.256281407035176,4.28643216080402,4.316582914572864,4.346733668341709,4.376884422110553,4.407035175879397,4.4371859296482405,4.467336683417085,4.49748743718593,4.527638190954773,4.557788944723618,4.5879396984924625,4.618090452261306,4.648241206030151,4.678391959798995,4.708542713567839,4.738693467336683,4.768844221105527,4.798994974874372,4.829145728643216,4.85929648241206,4.889447236180905,4.919597989949748,4.949748743718593,4.9798994974874375,5.010050251256281,5.040201005025126,5.0703517587939695,5.100502512562814,5.130653266331658,5.160804020100502,5.190954773869347,5.221105527638191,5.251256281407035,5.281407035175879,5.311557788944723,5.341708542713568,5.371859296482412,5.402010050251256,5.4321608040201,5.4623115577889445,5.492462311557789,5.522613065326633,5.552763819095477,5.582914572864321,5.613065326633166,5.64321608040201,5.673366834170854,5.703517587939698,5.733668341708542,5.763819095477387,5.793969849246231,5.824120603015075,5.8542713567839195,5.884422110552763,5.914572864321608,5.944723618090452,5.974874371859296,6.005025125628141,6.035175879396984,6.065326633165829,6.0954773869346734,6.125628140703517,6.155778894472362,6.185929648241205,6.21608040201005,6.2462311557788945,6.276381909547738,6.306532663316583,6.3366834170854265,6.366834170854271,6.396984924623116,6.427135678391959,6.457286432160804,6.487437185929648,6.517587939698492,6.547738693467337,6.57788944723618,6.608040201005025,6.638190954773869,6.668341708542713,6.698492462311558,6.7286432160804015,6.758793969849246,6.788944723618091,6.819095477386934,6.849246231155779,6.879396984924623,6.909547738693467,6.939698492462312,6.969849246231155,7.0],"y":[-6.1,-5.855752779706215,-5.617439626099488,-5.384994757378214,-5.158352391740783,-4.937446747385573,-4.72221204251098,-4.512582495315394,-4.3084923239972,-4.109875746754784,-3.9166669817865367,-3.728800247290849,-3.546209761466102,-3.3688297425106897,-3.1965944086229996,-3.0294379780014196,-2.867294668844335,-2.710098699350141,-2.5577842877172143,-2.410285652143955,-2.2675370108287467,-2.129472581969978,-1.9960265837660303,-1.8671332344153029,-1.7427267521161809,-1.6227413550670406,-1.5071112614662923,-1.3957706895123068,-1.288653857403483,-1.1856949833381947,-1.0868282855148437,-0.9919879821318176,-0.9011082913874986,-0.8141234314802744,-0.7309676206085385,-0.6515750769706727,-0.5758800187650707,-0.5038166641901227,-0.4353192314442083,-0.37032193872572067,-0.30875900423305325,-0.2505646461645881,-0.1956730827187158,-0.14401853209381557,-0.09553521248829214,-0.050157342100516186,-0.007819139128892516,0.03154517822820253,0.0680013917723727,0.10161528330524447,0.1324526346284074,0.1605792275434908,0.18606084385210409,0.20896326535584536,0.22935227385634108,0.247293651155195,0.26285317905403077,0.2760966393544493,0.2870898138580628,0.295898484366478,0.30258843268132407,0.30722544060420204,0.30987528993671276,0.3106037624804969,0.3094766400371327,0.3065597044082608,0.3019187373954651,0.2956195208003862,0.28772783642461375,0.278309466069777,0.2674301915374713,0.2551557946293144,0.24155205714693012,0.2266847608919079,0.2106196876658714,0.19342261927042728,0.17515933750721616,0.15589562417781053,0.13569726108383975,0.11463003002689902,0.09275971280864041,0.07015209123063641,0.04687294709450498,0.022988062201864067,-0.0014367816456569925,-0.026335802646468665,-0.05164321899897004,-0.07729324890150338,-0.10322011055251892,-0.12935802215034756,-0.15564120189339975,-0.18200386798008594,-0.20838023860876548,-0.23470453197783173,-0.26091096628567245,-0.28693375973069807,-0.3127071305112679,-0.33816529682578106,-0.36324247687263095,-0.387872888850211,-0.4119907509568804,-0.4355302813910555,-0.4584256983510954,-0.48061122003543344,-0.5020210646424061,-0.5225894503704523,-0.5422505954179314,-0.560938717983231,-0.5785880362647674,-0.5951327684608827,-0.6105071327699989,-0.6246453473904694,-0.6374816305207218,-0.648950200359127,-0.6589852751040894,-0.6675210729539799,-0.6744918121071806,-0.6798317107620733,-0.6834749871170857,-0.6853558593705884,-0.6854085457209521,-0.6835672643665817,-0.6797662335058476,-0.6739396713371661,-0.666021796058908,-0.6559468258694551,-0.643648978967201,-0.6290624735505503,-0.6121215278178852,-0.5927603599675649,-0.5709131881979829,-0.546514230707578,-0.5194977056946982,-0.48979783135773686,-0.4573488258950988,-0.42208490750512057,-0.38394029438626376,-0.3428492047368536,-0.29874585675530624,-0.2515644686400151,-0.20123925858937355,-0.1477044448017068,-0.09089424547549925,-0.030742878809076047,0.032815436999146644,0.09984648375078678,0.17041604324746232,0.24458989729080258,0.3224338276823687,0.4040136162238241,0.48939504471677536,0.5786438949628178,0.6718259487635351,0.7690069879205907,0.8702527942355687,0.9756291495100868,1.0852018355457402,1.1990366341441927,1.3171993271069824,1.439755696235784,1.5667715233321815,1.6983125901977814,1.834444678634202,1.9752335704430606,2.1207450474259644,2.2710448913845083,2.4261988841203674,2.58627280743508,2.7513324431302637,2.9214435730075934,3.0966719788686077,3.2770834425149475,3.462743745748264,3.6537186703701194,3.8500739981821313,4.051875510985894,4.259188990583084,4.472080218775238,4.690614977364044,4.914859048151049,5.144878212937897,5.380738253526147,5.622504951717474,5.870244089313519,6.124021448115786,6.383902809925985,6.649953956545687,6.922240669776488,7.200828731420051,7.485783923277927,7.777172027151755,8.075058824843131,8.37951009815372,8.690591628885068,9.00836919883884,9.332908589816588,9.664275583619997,10.002535962050592,10.347755506910039,10.7],"type":"scatter"},{"marker":{"size":12},"mode":"markers","name":"df = zero","x":[2.3927,3.5309,5.3263],"y":[0.0,0.0,0.0],"type":"scatter"}],                        {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"}}},"font":{"size":20},"yaxis":{"range":[-1,3]},"autosize":false,"width":800,"height":600},                        {"responsive": true}                    ).then(function(){
+<div>                            <div id="2cc2fd96-8ec0-4471-bc34-563e2fad558d" class="plotly-graph-div" style="height:600px; width:800px;"></div>            <script type="text/javascript">                require(["plotly"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("2cc2fd96-8ec0-4471-bc34-563e2fad558d")) {                    Plotly.newPlot(                        "2cc2fd96-8ec0-4471-bc34-563e2fad558d",                        [{"mode":"lines","name":"f","x":[1.0,1.0301507537688441,1.0603015075376885,1.0904522613065326,1.120603015075377,1.150753768844221,1.1809045226130652,1.2110552763819096,1.2412060301507537,1.271356783919598,1.3015075376884422,1.3316582914572863,1.3618090452261307,1.3919597989949748,1.4221105527638191,1.4522613065326633,1.4824120603015074,1.5125628140703518,1.542713567839196,1.5728643216080402,1.6030150753768844,1.6331658291457285,1.6633165829145728,1.6934673366834172,1.7236180904522613,1.7537688442211055,1.7839195979899496,1.814070351758794,1.8442211055276383,1.8743718592964824,1.9045226130653266,1.9346733668341707,1.964824120603015,1.9949748743718594,2.0251256281407035,2.0552763819095476,2.085427135678392,2.115577889447236,2.1457286432160805,2.1758793969849246,2.2060301507537687,2.2361809045226133,2.266331658291457,2.2964824120603016,2.3266331658291457,2.35678391959799,2.3869346733668344,2.417085427135678,2.4472361809045227,2.477386934673367,2.507537688442211,2.5376884422110555,2.567839195979899,2.5979899497487438,2.628140703517588,2.658291457286432,2.6884422110552766,2.7185929648241203,2.748743718592965,2.778894472361809,2.809045226130653,2.8391959798994977,2.8693467336683414,2.899497487437186,2.92964824120603,2.959798994974874,2.9899497487437188,3.020100502512563,3.050251256281407,3.080402010050251,3.1105527638190953,3.1407035175879394,3.170854271356784,3.201005025125628,3.2311557788944723,3.2613065326633164,3.2914572864321605,3.321608040201005,3.351758793969849,3.3819095477386933,3.4120603015075375,3.4422110552763816,3.472361809045226,3.5025125628140703,3.5326633165829144,3.5628140703517586,3.5929648241206027,3.6231155778894473,3.6532663316582914,3.6834170854271355,3.7135678391959797,3.743718592964824,3.7738693467336684,3.8040201005025125,3.8341708542713566,3.8643216080402008,3.8944723618090453,3.9246231155778895,3.9547738693467336,3.9849246231155777,4.015075376884422,4.045226130653266,4.075376884422111,4.105527638190955,4.135678391959798,4.165829145728643,4.1959798994974875,4.226130653266331,4.256281407035176,4.28643216080402,4.316582914572864,4.346733668341709,4.376884422110553,4.407035175879397,4.4371859296482405,4.467336683417085,4.49748743718593,4.527638190954773,4.557788944723618,4.5879396984924625,4.618090452261306,4.648241206030151,4.678391959798995,4.708542713567839,4.738693467336683,4.768844221105527,4.798994974874372,4.829145728643216,4.85929648241206,4.889447236180905,4.919597989949748,4.949748743718593,4.9798994974874375,5.010050251256281,5.040201005025126,5.0703517587939695,5.100502512562814,5.130653266331658,5.160804020100502,5.190954773869347,5.221105527638191,5.251256281407035,5.281407035175879,5.311557788944723,5.341708542713568,5.371859296482412,5.402010050251256,5.4321608040201,5.4623115577889445,5.492462311557789,5.522613065326633,5.552763819095477,5.582914572864321,5.613065326633166,5.64321608040201,5.673366834170854,5.703517587939698,5.733668341708542,5.763819095477387,5.793969849246231,5.824120603015075,5.8542713567839195,5.884422110552763,5.914572864321608,5.944723618090452,5.974874371859296,6.005025125628141,6.035175879396984,6.065326633165829,6.0954773869346734,6.125628140703517,6.155778894472362,6.185929648241205,6.21608040201005,6.2462311557788945,6.276381909547738,6.306532663316583,6.3366834170854265,6.366834170854271,6.396984924623116,6.427135678391959,6.457286432160804,6.487437185929648,6.517587939698492,6.547738693467337,6.57788944723618,6.608040201005025,6.638190954773869,6.668341708542713,6.698492462311558,6.7286432160804015,6.758793969849246,6.788944723618091,6.819095477386934,6.849246231155779,6.879396984924623,6.909547738693467,6.939698492462312,6.969849246231155,7.0],"y":[3.0,2.8197775132646994,2.6468296407545298,2.480978457571409,2.3220480221881674,2.169864376448527,2.0242555455671196,1.8850515381294826,1.7520843460920474,1.6251879447821538,1.5041982928980473,1.3889533325088705,1.2792929890546703,1.175059171346399,1.076095771565909,0.9822486652659563,0.8933657113701969,0.809296752173205,0.7298936133404282,0.6550101039082478,0.5845020162839318,0.5182271262456482,0.45604519294247436,0.39781795889439875,0.34340914999228855,0.2926844754979413,0.24551162804404497,0.20176028363418083,0.1613021016428462,0.1240107248154402,0.08976177926825812,0.05843287448851129,0.029903603334304307,0.0040555420346265695,-0.019227749810596606,-0.04006072923056081,-0.0585558698835257,-0.07482366205689459,-0.08897261266714337,-0.10110924525984047,-0.11133810000965809,-0.119761733720361,-0.12648071982483203,-0.1315936483850237,-0.13519712609199247,-0.13738577626590426,-0.1382522388560119,-0.13788717044065493,-0.13637924422729383,-0.13381515005247593,-0.13027959438184666,-0.1258553003101497,-0.12062300756120407,-0.11466147248795551,-0.10804746807244214,-0.10085578392577758,-0.09315922628821909,-0.08502861802905386,-0.0765327986467014,-0.06773862426869641,-0.05871096765167181,-0.04951271818131318,-0.04020478187242702,-0.030846081368940757,-0.021493555943828825,-0.012202161499220664,-0.0030248705662870635,0.005987327694657552,0.014785427494228998,0.023322406413939234,0.03155322540619636,0.039434828794344415,0.046926144272549666,0.053988082905993905,0.06058353913065275,0.06667739075349459,0.0722364989522987,0.0772297082758655,0.08162784664377228,0.0854037253466231,0.0885321390457932,0.09098986577366759,0.09275566693348196,0.0938102872993909,0.09413645501645647,0.09371888160064827,0.09254426193878089,0.09060127428864462,0.08788058027890884,0.08437482490911634,0.08007863654980837,0.07498862694224044,0.06910339119879154,0.062423507802634504,0.05495153860777009,0.04669202883931121,0.037651507093005424,0.027838485335746555,0.01726345890522225,0.005938906509970821,-0.006120709770470967,-0.018898944485715673,-0.03237736881442288,-0.04653557056435602,-0.061351154172564294,-0.07679974070485969,-0.0928549678563968,-0.10948848995135449,-0.12666997794297002,-0.14436711941368685,-0.16254561857489308,-0.1811691962672853,-0.2001995899604026,-0.21959655375303555,-0.23931785837306735,-0.25931929117742814,-0.2795546561521974,-0.2999757739125698,-0.32053248170270765,-0.34117263339597914,-0.3618420994948906,-0.38248476713084756,-0.40304254006464363,-0.4234553386859261,-0.4436611000135485,-0.46359577769538873,-0.483193342008542,-0.5023857798591053,-0.5211030947822792,-0.5392733069424139,-0.5568224531329065,-0.5736745867762579,-0.589751777924107,-0.6049741132570944,-0.6192596960850778,-0.632524646347008,-0.6446831006107573,-0.6556472120734724,-0.6653271505614157,-0.6736311025297482,-0.6804652710629284,-0.6857338758744731,-0.689339153306878,-0.6911813563318902,-0.6911587545502471,-0.6891676341918014,-0.6851022981155552,-0.6788550658095346,-0.6703162733909721,-0.6593742736060904,-0.6459154358302271,-0.6298241460678924,-0.6109828069525293,-0.5892718377469237,-0.5645696743426925,-0.5367527692607041,-0.5056955916510105,-0.47127062729252883,-0.4333483785933822,-0.39179736459091147,-0.3464841209514134,-0.2972731999702091,-0.24402717057191695,-0.18660661831017933,-0.12487014536754941,-0.05867437055599112,0.01212607068350735,0.08767852628116088,0.1681323275376599,0.2536387891250115,0.34435120908576666,0.4404248688334974,0.5420170331528652,0.6492869501990981,0.7623958514986725,0.8815069519487224,1.0067854498173574,1.13839852674364,1.2765153477374043,1.421307061179641,1.572946798821863,1.7316096757870127,1.897472790568304,2.070715225030267,2.251518044408317,2.4400642973086635,2.636539015708513,2.841129214955754,3.0540238937694992,3.27541403423952,3.5054926018266315,3.7444545453624185,3.9924967970495344,4.249818272461312,4.516619870542331,4.793104473607627,5.079476947343528,5.37594414080711,5.682714886426311,6.0],"type":"scatter"},{"line":{"dash":"dash"},"mode":"lines","name":"df","x":[1.0,1.0301507537688441,1.0603015075376885,1.0904522613065326,1.120603015075377,1.150753768844221,1.1809045226130652,1.2110552763819096,1.2412060301507537,1.271356783919598,1.3015075376884422,1.3316582914572863,1.3618090452261307,1.3919597989949748,1.4221105527638191,1.4522613065326633,1.4824120603015074,1.5125628140703518,1.542713567839196,1.5728643216080402,1.6030150753768844,1.6331658291457285,1.6633165829145728,1.6934673366834172,1.7236180904522613,1.7537688442211055,1.7839195979899496,1.814070351758794,1.8442211055276383,1.8743718592964824,1.9045226130653266,1.9346733668341707,1.964824120603015,1.9949748743718594,2.0251256281407035,2.0552763819095476,2.085427135678392,2.115577889447236,2.1457286432160805,2.1758793969849246,2.2060301507537687,2.2361809045226133,2.266331658291457,2.2964824120603016,2.3266331658291457,2.35678391959799,2.3869346733668344,2.417085427135678,2.4472361809045227,2.477386934673367,2.507537688442211,2.5376884422110555,2.567839195979899,2.5979899497487438,2.628140703517588,2.658291457286432,2.6884422110552766,2.7185929648241203,2.748743718592965,2.778894472361809,2.809045226130653,2.8391959798994977,2.8693467336683414,2.899497487437186,2.92964824120603,2.959798994974874,2.9899497487437188,3.020100502512563,3.050251256281407,3.080402010050251,3.1105527638190953,3.1407035175879394,3.170854271356784,3.201005025125628,3.2311557788944723,3.2613065326633164,3.2914572864321605,3.321608040201005,3.351758793969849,3.3819095477386933,3.4120603015075375,3.4422110552763816,3.472361809045226,3.5025125628140703,3.5326633165829144,3.5628140703517586,3.5929648241206027,3.6231155778894473,3.6532663316582914,3.6834170854271355,3.7135678391959797,3.743718592964824,3.7738693467336684,3.8040201005025125,3.8341708542713566,3.8643216080402008,3.8944723618090453,3.9246231155778895,3.9547738693467336,3.9849246231155777,4.015075376884422,4.045226130653266,4.075376884422111,4.105527638190955,4.135678391959798,4.165829145728643,4.1959798994974875,4.226130653266331,4.256281407035176,4.28643216080402,4.316582914572864,4.346733668341709,4.376884422110553,4.407035175879397,4.4371859296482405,4.467336683417085,4.49748743718593,4.527638190954773,4.557788944723618,4.5879396984924625,4.618090452261306,4.648241206030151,4.678391959798995,4.708542713567839,4.738693467336683,4.768844221105527,4.798994974874372,4.829145728643216,4.85929648241206,4.889447236180905,4.919597989949748,4.949748743718593,4.9798994974874375,5.010050251256281,5.040201005025126,5.0703517587939695,5.100502512562814,5.130653266331658,5.160804020100502,5.190954773869347,5.221105527638191,5.251256281407035,5.281407035175879,5.311557788944723,5.341708542713568,5.371859296482412,5.402010050251256,5.4321608040201,5.4623115577889445,5.492462311557789,5.522613065326633,5.552763819095477,5.582914572864321,5.613065326633166,5.64321608040201,5.673366834170854,5.703517587939698,5.733668341708542,5.763819095477387,5.793969849246231,5.824120603015075,5.8542713567839195,5.884422110552763,5.914572864321608,5.944723618090452,5.974874371859296,6.005025125628141,6.035175879396984,6.065326633165829,6.0954773869346734,6.125628140703517,6.155778894472362,6.185929648241205,6.21608040201005,6.2462311557788945,6.276381909547738,6.306532663316583,6.3366834170854265,6.366834170854271,6.396984924623116,6.427135678391959,6.457286432160804,6.487437185929648,6.517587939698492,6.547738693467337,6.57788944723618,6.608040201005025,6.638190954773869,6.668341708542713,6.698492462311558,6.7286432160804015,6.758793969849246,6.788944723618091,6.819095477386934,6.849246231155779,6.879396984924623,6.909547738693467,6.939698492462312,6.969849246231155,7.0],"y":[-6.1,-5.855752779706215,-5.617439626099488,-5.384994757378214,-5.158352391740783,-4.937446747385573,-4.72221204251098,-4.512582495315394,-4.3084923239972,-4.109875746754784,-3.9166669817865367,-3.728800247290849,-3.546209761466102,-3.3688297425106897,-3.1965944086229996,-3.0294379780014196,-2.867294668844335,-2.710098699350141,-2.5577842877172143,-2.410285652143955,-2.2675370108287467,-2.129472581969978,-1.9960265837660303,-1.8671332344153029,-1.7427267521161809,-1.6227413550670406,-1.5071112614662923,-1.3957706895123068,-1.288653857403483,-1.1856949833381947,-1.0868282855148437,-0.9919879821318176,-0.9011082913874986,-0.8141234314802744,-0.7309676206085385,-0.6515750769706727,-0.5758800187650707,-0.5038166641901227,-0.4353192314442083,-0.37032193872572067,-0.30875900423305325,-0.2505646461645881,-0.1956730827187158,-0.14401853209381557,-0.09553521248829214,-0.050157342100516186,-0.007819139128892516,0.03154517822820253,0.0680013917723727,0.10161528330524447,0.1324526346284074,0.1605792275434908,0.18606084385210409,0.20896326535584536,0.22935227385634108,0.247293651155195,0.26285317905403077,0.2760966393544493,0.2870898138580628,0.295898484366478,0.30258843268132407,0.30722544060420204,0.30987528993671276,0.3106037624804969,0.3094766400371327,0.3065597044082608,0.3019187373954651,0.2956195208003862,0.28772783642461375,0.278309466069777,0.2674301915374713,0.2551557946293144,0.24155205714693012,0.2266847608919079,0.2106196876658714,0.19342261927042728,0.17515933750721616,0.15589562417781053,0.13569726108383975,0.11463003002689902,0.09275971280864041,0.07015209123063641,0.04687294709450498,0.022988062201864067,-0.0014367816456569925,-0.026335802646468665,-0.05164321899897004,-0.07729324890150338,-0.10322011055251892,-0.12935802215034756,-0.15564120189339975,-0.18200386798008594,-0.20838023860876548,-0.23470453197783173,-0.26091096628567245,-0.28693375973069807,-0.3127071305112679,-0.33816529682578106,-0.36324247687263095,-0.387872888850211,-0.4119907509568804,-0.4355302813910555,-0.4584256983510954,-0.48061122003543344,-0.5020210646424061,-0.5225894503704523,-0.5422505954179314,-0.560938717983231,-0.5785880362647674,-0.5951327684608827,-0.6105071327699989,-0.6246453473904694,-0.6374816305207218,-0.648950200359127,-0.6589852751040894,-0.6675210729539799,-0.6744918121071806,-0.6798317107620733,-0.6834749871170857,-0.6853558593705884,-0.6854085457209521,-0.6835672643665817,-0.6797662335058476,-0.6739396713371661,-0.666021796058908,-0.6559468258694551,-0.643648978967201,-0.6290624735505503,-0.6121215278178852,-0.5927603599675649,-0.5709131881979829,-0.546514230707578,-0.5194977056946982,-0.48979783135773686,-0.4573488258950988,-0.42208490750512057,-0.38394029438626376,-0.3428492047368536,-0.29874585675530624,-0.2515644686400151,-0.20123925858937355,-0.1477044448017068,-0.09089424547549925,-0.030742878809076047,0.032815436999146644,0.09984648375078678,0.17041604324746232,0.24458989729080258,0.3224338276823687,0.4040136162238241,0.48939504471677536,0.5786438949628178,0.6718259487635351,0.7690069879205907,0.8702527942355687,0.9756291495100868,1.0852018355457402,1.1990366341441927,1.3171993271069824,1.439755696235784,1.5667715233321815,1.6983125901977814,1.834444678634202,1.9752335704430606,2.1207450474259644,2.2710448913845083,2.4261988841203674,2.58627280743508,2.7513324431302637,2.9214435730075934,3.0966719788686077,3.2770834425149475,3.462743745748264,3.6537186703701194,3.8500739981821313,4.051875510985894,4.259188990583084,4.472080218775238,4.690614977364044,4.914859048151049,5.144878212937897,5.380738253526147,5.622504951717474,5.870244089313519,6.124021448115786,6.383902809925985,6.649953956545687,6.922240669776488,7.200828731420051,7.485783923277927,7.777172027151755,8.075058824843131,8.37951009815372,8.690591628885068,9.00836919883884,9.332908589816588,9.664275583619997,10.002535962050592,10.347755506910039,10.7],"type":"scatter"},{"marker":{"size":12},"mode":"markers","name":"df = zero","x":[2.3927,3.5309,5.3263],"y":[0.0,0.0,0.0],"type":"scatter"}],                        {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"}}},"font":{"size":20},"yaxis":{"range":[-1,3]},"autosize":false,"width":800,"height":600},                        {"responsive": true}                    ).then(function(){
                             
-var gd = document.getElementById('3a299d59-1008-4228-b4f1-88fa162952ab');
+var gd = document.getElementById('2cc2fd96-8ec0-4471-bc34-563e2fad558d');
 var x = new MutationObserver(function (mutations, observer) {{
         var display = window.getComputedStyle(gd).display;
         if (!display || display === 'none') {{
@@ -1206,9 +1206,9 @@ <h3 data-number="13.2.4" class="anchored" data-anchor-id="gradient-descent-on-mu
 </details>
 <div class="cell-output cell-output-display">
 
-<div>                            <div id="1cecc580-5f45-4bbe-9714-6ad91a39b095" class="plotly-graph-div" style="height:600px; width:800px;"></div>            <script type="text/javascript">                require(["plotly"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("1cecc580-5f45-4bbe-9714-6ad91a39b095")) {                    Plotly.newPlot(                        "1cecc580-5f45-4bbe-9714-6ad91a39b095",                        [{"x":[[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0]],"y":[[-0.1,-0.1,-0.1,-0.1,-0.1,-0.1,-0.1,-0.1,-0.1,-0.1],[-0.05000000000000001,-0.05000000000000001,-0.05000000000000001,-0.05000000000000001,-0.05000000000000001,-0.05000000000000001,-0.05000000000000001,-0.05000000000000001,-0.05000000000000001,-0.05000000000000001],[-1.3877787807814457e-17,-1.3877787807814457e-17,-1.3877787807814457e-17,-1.3877787807814457e-17,-1.3877787807814457e-17,-1.3877787807814457e-17,-1.3877787807814457e-17,-1.3877787807814457e-17,-1.3877787807814457e-17,-1.3877787807814457e-17],[0.04999999999999999,0.04999999999999999,0.04999999999999999,0.04999999999999999,0.04999999999999999,0.04999999999999999,0.04999999999999999,0.04999999999999999,0.04999999999999999,0.04999999999999999],[0.09999999999999998,0.09999999999999998,0.09999999999999998,0.09999999999999998,0.09999999999999998,0.09999999999999998,0.09999999999999998,0.09999999999999998,0.09999999999999998,0.09999999999999998],[0.14999999999999997,0.14999999999999997,0.14999999999999997,0.14999999999999997,0.14999999999999997,0.14999999999999997,0.14999999999999997,0.14999999999999997,0.14999999999999997,0.14999999999999997],[0.19999999999999998,0.19999999999999998,0.19999999999999998,0.19999999999999998,0.19999999999999998,0.19999999999999998,0.19999999999999998,0.19999999999999998,0.19999999999999998,0.19999999999999998],[0.24999999999999997,0.24999999999999997,0.24999999999999997,0.24999999999999997,0.24999999999999997,0.24999999999999997,0.24999999999999997,0.24999999999999997,0.24999999999999997,0.24999999999999997],[0.29999999999999993,0.29999999999999993,0.29999999999999993,0.29999999999999993,0.29999999999999993,0.29999999999999993,0.29999999999999993,0.29999999999999993,0.29999999999999993,0.29999999999999993],[0.35,0.35,0.35,0.35,0.35,0.35,0.35,0.35,0.35,0.35]],"z":[[29.123031979508195,26.96047116185995,24.896675776310463,22.931645822859743,21.065381301507795,19.297882212254603,17.629148555100183,16.059180330044526,14.587977537087637,13.21554017622951],[18.833628650614756,17.11075544680986,15.486647675103727,13.961305335496359,12.534728427987758,11.20691695257792,9.97787090926685,8.847590298054545,7.816075118941006,6.883325371926231],[10.896283606557377,9.613098016595831,8.428677858733053,7.343023132969036,6.356133839303786,5.468009977737301,4.678651548269582,3.9880585509006288,3.3962309856304405,2.903168852459017],[5.310996847336067,4.467498871217872,3.722766327198442,3.0767992152777786,2.5295975354558795,2.0811612877327472,1.7314904721083795,1.4805850885827772,1.3284451371559403,1.2750706178278688],[2.0777683729508207,1.6739580106759773,1.3689130804998995,1.1626335824225869,1.05511951644404,1.0463708825642581,1.1363876807832418,1.3251699111009911,1.6127175735175057,1.9990306680327863],[1.196598183401639,1.232475434970147,1.3671181186374206,1.6005262344034599,1.9326997822682639,2.3636387622318336,2.8933431742941687,3.521813018455269,4.249048294715136,5.075049003073768],[2.667486278688523,3.143051144100383,3.7173814416110083,4.390477171220399,5.162338332928554,6.032964926735477,7.002356952641165,8.070514410645615,9.237437300748834,10.503125622950817],[6.490432658811471,7.405685138066683,8.419703049420658,9.532486392873404,10.74403516842491,12.054349376075185,13.463429015824222,14.971274087672022,16.57788459161859,18.283260527663927],[12.665437323770481,14.020377416869044,15.474082942066373,17.02655389936246,18.677790288757322,20.427792110250948,22.27655936384334,24.22409204953449,26.27039016732441,28.4154537172131],[21.19250027356557,22.98712798050748,24.880521119548167,26.87267969068761,28.96360369392582,31.153293129262796,33.44174799669854,35.828968296233036,38.314954027866314,40.89970519159836]],"type":"surface"},{"marker":{"color":"red","size":10},"name":"Optimal Point","x":[1.1111111111111112],"y":[0.09999999999999998],"z":[1.0463708825642581],"type":"scatter3d"}],                        {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"}}},"scene":{"xaxis":{"title":{"text":"theta0"}},"yaxis":{"title":{"text":"theta1"}},"zaxis":{"title":{"text":"MSE"}}},"autosize":false,"width":800,"height":600},                        {"responsive": true}                    ).then(function(){
+<div>                            <div id="db188831-c5ab-4859-b65a-efc333e3847e" class="plotly-graph-div" style="height:600px; width:800px;"></div>            <script type="text/javascript">                require(["plotly"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("db188831-c5ab-4859-b65a-efc333e3847e")) {                    Plotly.newPlot(                        "db188831-c5ab-4859-b65a-efc333e3847e",                        [{"x":[[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0]],"y":[[-0.1,-0.1,-0.1,-0.1,-0.1,-0.1,-0.1,-0.1,-0.1,-0.1],[-0.05000000000000001,-0.05000000000000001,-0.05000000000000001,-0.05000000000000001,-0.05000000000000001,-0.05000000000000001,-0.05000000000000001,-0.05000000000000001,-0.05000000000000001,-0.05000000000000001],[-1.3877787807814457e-17,-1.3877787807814457e-17,-1.3877787807814457e-17,-1.3877787807814457e-17,-1.3877787807814457e-17,-1.3877787807814457e-17,-1.3877787807814457e-17,-1.3877787807814457e-17,-1.3877787807814457e-17,-1.3877787807814457e-17],[0.04999999999999999,0.04999999999999999,0.04999999999999999,0.04999999999999999,0.04999999999999999,0.04999999999999999,0.04999999999999999,0.04999999999999999,0.04999999999999999,0.04999999999999999],[0.09999999999999998,0.09999999999999998,0.09999999999999998,0.09999999999999998,0.09999999999999998,0.09999999999999998,0.09999999999999998,0.09999999999999998,0.09999999999999998,0.09999999999999998],[0.14999999999999997,0.14999999999999997,0.14999999999999997,0.14999999999999997,0.14999999999999997,0.14999999999999997,0.14999999999999997,0.14999999999999997,0.14999999999999997,0.14999999999999997],[0.19999999999999998,0.19999999999999998,0.19999999999999998,0.19999999999999998,0.19999999999999998,0.19999999999999998,0.19999999999999998,0.19999999999999998,0.19999999999999998,0.19999999999999998],[0.24999999999999997,0.24999999999999997,0.24999999999999997,0.24999999999999997,0.24999999999999997,0.24999999999999997,0.24999999999999997,0.24999999999999997,0.24999999999999997,0.24999999999999997],[0.29999999999999993,0.29999999999999993,0.29999999999999993,0.29999999999999993,0.29999999999999993,0.29999999999999993,0.29999999999999993,0.29999999999999993,0.29999999999999993,0.29999999999999993],[0.35,0.35,0.35,0.35,0.35,0.35,0.35,0.35,0.35,0.35]],"z":[[29.123031979508195,26.96047116185995,24.896675776310463,22.931645822859743,21.065381301507795,19.297882212254603,17.629148555100183,16.059180330044526,14.587977537087637,13.21554017622951],[18.833628650614756,17.11075544680986,15.486647675103727,13.961305335496359,12.534728427987758,11.20691695257792,9.97787090926685,8.847590298054545,7.816075118941006,6.883325371926231],[10.896283606557377,9.613098016595831,8.428677858733053,7.343023132969036,6.356133839303786,5.468009977737301,4.678651548269582,3.9880585509006288,3.3962309856304405,2.903168852459017],[5.310996847336067,4.467498871217872,3.722766327198442,3.0767992152777786,2.5295975354558795,2.0811612877327472,1.7314904721083795,1.4805850885827772,1.3284451371559403,1.275070617827869],[2.0777683729508207,1.6739580106759773,1.3689130804998992,1.1626335824225869,1.05511951644404,1.0463708825642581,1.1363876807832418,1.325169911100991,1.6127175735175057,1.9990306680327863],[1.196598183401639,1.232475434970147,1.3671181186374208,1.6005262344034599,1.9326997822682639,2.3636387622318336,2.8933431742941687,3.521813018455269,4.249048294715135,5.075049003073768],[2.667486278688523,3.1430511441003834,3.717381441611009,4.390477171220399,5.162338332928555,6.032964926735477,7.002356952641165,8.070514410645615,9.237437300748834,10.503125622950817],[6.490432658811471,7.405685138066682,8.419703049420658,9.532486392873402,10.74403516842491,12.054349376075182,13.463429015824218,14.971274087672022,16.57788459161859,18.283260527663924],[12.665437323770481,14.020377416869042,15.474082942066373,17.02655389936246,18.677790288757322,20.427792110250945,22.27655936384334,24.224092049534487,26.27039016732441,28.4154537172131],[21.19250027356557,22.98712798050748,24.880521119548167,26.87267969068761,28.96360369392582,31.153293129262796,33.44174799669854,35.828968296233036,38.314954027866314,40.89970519159835]],"type":"surface"},{"marker":{"color":"red","size":10},"name":"Optimal Point","x":[1.1111111111111112],"y":[0.09999999999999998],"z":[1.0463708825642581],"type":"scatter3d"}],                        {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"}}},"scene":{"xaxis":{"title":{"text":"theta0"}},"yaxis":{"title":{"text":"theta1"}},"zaxis":{"title":{"text":"MSE"}}},"autosize":false,"width":800,"height":600},                        {"responsive": true}                    ).then(function(){
                             
-var gd = document.getElementById('1cecc580-5f45-4bbe-9714-6ad91a39b095');
+var gd = document.getElementById('db188831-c5ab-4859-b65a-efc333e3847e');
 var x = new MutationObserver(function (mutations, observer) {{
         var display = window.getComputedStyle(gd).display;
         if (!display || display === 'none') {{
@@ -1247,9 +1247,9 @@ <h3 data-number="13.2.4" class="anchored" data-anchor-id="gradient-descent-on-mu
 </details>
 <div class="cell-output cell-output-display">
 
-<div>                            <div id="179c3a71-0728-4bd3-90d0-d9793f36e129" class="plotly-graph-div" style="height:600px; width:800px;"></div>            <script type="text/javascript">                require(["plotly"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("179c3a71-0728-4bd3-90d0-d9793f36e129")) {                    Plotly.newPlot(                        "179c3a71-0728-4bd3-90d0-d9793f36e129",                        [{"x":[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],"y":[-0.1,-0.05000000000000001,-1.3877787807814457e-17,0.04999999999999999,0.09999999999999998,0.14999999999999997,0.19999999999999998,0.24999999999999997,0.29999999999999993,0.35],"z":[[29.123031979508195,26.96047116185995,24.896675776310463,22.931645822859743,21.065381301507795,19.297882212254603,17.629148555100183,16.059180330044526,14.587977537087637,13.21554017622951],[18.833628650614756,17.11075544680986,15.486647675103727,13.961305335496359,12.534728427987758,11.20691695257792,9.97787090926685,8.847590298054545,7.816075118941006,6.883325371926231],[10.896283606557377,9.613098016595831,8.428677858733053,7.343023132969036,6.356133839303786,5.468009977737301,4.678651548269582,3.9880585509006288,3.3962309856304405,2.903168852459017],[5.310996847336067,4.467498871217872,3.722766327198442,3.0767992152777786,2.5295975354558795,2.0811612877327472,1.7314904721083795,1.4805850885827772,1.3284451371559403,1.2750706178278688],[2.0777683729508207,1.6739580106759773,1.3689130804998995,1.1626335824225869,1.05511951644404,1.0463708825642581,1.1363876807832418,1.3251699111009911,1.6127175735175057,1.9990306680327863],[1.196598183401639,1.232475434970147,1.3671181186374206,1.6005262344034599,1.9326997822682639,2.3636387622318336,2.8933431742941687,3.521813018455269,4.249048294715136,5.075049003073768],[2.667486278688523,3.143051144100383,3.7173814416110083,4.390477171220399,5.162338332928554,6.032964926735477,7.002356952641165,8.070514410645615,9.237437300748834,10.503125622950817],[6.490432658811471,7.405685138066683,8.419703049420658,9.532486392873404,10.74403516842491,12.054349376075185,13.463429015824222,14.971274087672022,16.57788459161859,18.283260527663927],[12.665437323770481,14.020377416869044,15.474082942066373,17.02655389936246,18.677790288757322,20.427792110250948,22.27655936384334,24.22409204953449,26.27039016732441,28.4154537172131],[21.19250027356557,22.98712798050748,24.880521119548167,26.87267969068761,28.96360369392582,31.153293129262796,33.44174799669854,35.828968296233036,38.314954027866314,40.89970519159836]],"type":"contour"}],                        {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"}}},"xaxis":{"title":{"text":"theta0"}},"yaxis":{"title":{"text":"theta1"}},"autosize":false,"width":800,"height":600},                        {"responsive": true}                    ).then(function(){
+<div>                            <div id="f3eb7824-6785-4e03-aaea-d2b7c45df5d0" class="plotly-graph-div" style="height:600px; width:800px;"></div>            <script type="text/javascript">                require(["plotly"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("f3eb7824-6785-4e03-aaea-d2b7c45df5d0")) {                    Plotly.newPlot(                        "f3eb7824-6785-4e03-aaea-d2b7c45df5d0",                        [{"x":[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],"y":[-0.1,-0.05000000000000001,-1.3877787807814457e-17,0.04999999999999999,0.09999999999999998,0.14999999999999997,0.19999999999999998,0.24999999999999997,0.29999999999999993,0.35],"z":[[29.123031979508195,26.96047116185995,24.896675776310463,22.931645822859743,21.065381301507795,19.297882212254603,17.629148555100183,16.059180330044526,14.587977537087637,13.21554017622951],[18.833628650614756,17.11075544680986,15.486647675103727,13.961305335496359,12.534728427987758,11.20691695257792,9.97787090926685,8.847590298054545,7.816075118941006,6.883325371926231],[10.896283606557377,9.613098016595831,8.428677858733053,7.343023132969036,6.356133839303786,5.468009977737301,4.678651548269582,3.9880585509006288,3.3962309856304405,2.903168852459017],[5.310996847336067,4.467498871217872,3.722766327198442,3.0767992152777786,2.5295975354558795,2.0811612877327472,1.7314904721083795,1.4805850885827772,1.3284451371559403,1.275070617827869],[2.0777683729508207,1.6739580106759773,1.3689130804998992,1.1626335824225869,1.05511951644404,1.0463708825642581,1.1363876807832418,1.325169911100991,1.6127175735175057,1.9990306680327863],[1.196598183401639,1.232475434970147,1.3671181186374208,1.6005262344034599,1.9326997822682639,2.3636387622318336,2.8933431742941687,3.521813018455269,4.249048294715135,5.075049003073768],[2.667486278688523,3.1430511441003834,3.717381441611009,4.390477171220399,5.162338332928555,6.032964926735477,7.002356952641165,8.070514410645615,9.237437300748834,10.503125622950817],[6.490432658811471,7.405685138066682,8.419703049420658,9.532486392873402,10.74403516842491,12.054349376075182,13.463429015824218,14.971274087672022,16.57788459161859,18.283260527663924],[12.665437323770481,14.020377416869042,15.474082942066373,17.02655389936246,18.677790288757322,20.427792110250945,22.27655936384334,24.224092049534487,26.27039016732441,28.4154537172131],[21.19250027356557,22.98712798050748,24.880521119548167,26.87267969068761,28.96360369392582,31.153293129262796,33.44174799669854,35.828968296233036,38.314954027866314,40.89970519159835]],"type":"contour"}],                        {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"}}},"xaxis":{"title":{"text":"theta0"}},"yaxis":{"title":{"text":"theta1"}},"autosize":false,"width":800,"height":600},                        {"responsive": true}                    ).then(function(){
                             
-var gd = document.getElementById('179c3a71-0728-4bd3-90d0-d9793f36e129');
+var gd = document.getElementById('f3eb7824-6785-4e03-aaea-d2b7c45df5d0');
 var x = new MutationObserver(function (mutations, observer) {{
         var display = window.getComputedStyle(gd).display;
         if (!display || display === 'none') {{
diff --git a/docs/gradient_descent/gradient_descent_files/figure-html/cell-10-output-2.png b/docs/gradient_descent/gradient_descent_files/figure-html/cell-10-output-2.png
new file mode 100644
index 000000000..06dfea3aa
Binary files /dev/null and b/docs/gradient_descent/gradient_descent_files/figure-html/cell-10-output-2.png differ
diff --git a/docs/gradient_descent/gradient_descent_files/figure-html/cell-21-output-2.png b/docs/gradient_descent/gradient_descent_files/figure-html/cell-21-output-2.png
new file mode 100644
index 000000000..06dfea3aa
Binary files /dev/null and b/docs/gradient_descent/gradient_descent_files/figure-html/cell-21-output-2.png differ
diff --git a/docs/gradient_descent/gradient_descent_files/figure-html/cell-22-output-2.png b/docs/gradient_descent/gradient_descent_files/figure-html/cell-22-output-2.png
new file mode 100644
index 000000000..06dfea3aa
Binary files /dev/null and b/docs/gradient_descent/gradient_descent_files/figure-html/cell-22-output-2.png differ
diff --git a/docs/gradient_descent/images/arbitrary.png b/docs/gradient_descent/images/arbitrary.png
new file mode 100644
index 000000000..06bb4fb69
Binary files /dev/null and b/docs/gradient_descent/images/arbitrary.png differ
diff --git a/docs/gradient_descent/images/convex.png b/docs/gradient_descent/images/convex.png
new file mode 100644
index 000000000..72bf6a47a
Binary files /dev/null and b/docs/gradient_descent/images/convex.png differ
diff --git a/docs/gradient_descent/images/grad_descent_1.png b/docs/gradient_descent/images/grad_descent_1.png
new file mode 100644
index 000000000..8361821fe
Binary files /dev/null and b/docs/gradient_descent/images/grad_descent_1.png differ
diff --git a/docs/gradient_descent/images/grad_descent_2.png b/docs/gradient_descent/images/grad_descent_2.png
new file mode 100644
index 000000000..9c320b2a8
Binary files /dev/null and b/docs/gradient_descent/images/grad_descent_2.png differ
diff --git a/docs/gradient_descent/images/grad_descent_3.png b/docs/gradient_descent/images/grad_descent_3.png
new file mode 100644
index 000000000..a93a9f67a
Binary files /dev/null and b/docs/gradient_descent/images/grad_descent_3.png differ
diff --git a/docs/gradient_descent/images/local.png b/docs/gradient_descent/images/local.png
new file mode 100644
index 000000000..d753299ad
Binary files /dev/null and b/docs/gradient_descent/images/local.png differ
diff --git a/docs/gradient_descent/images/loss_surface.png b/docs/gradient_descent/images/loss_surface.png
new file mode 100644
index 000000000..47fdc3089
Binary files /dev/null and b/docs/gradient_descent/images/loss_surface.png differ
diff --git a/docs/gradient_descent/images/neg_step.png b/docs/gradient_descent/images/neg_step.png
new file mode 100644
index 000000000..92b4d0e6c
Binary files /dev/null and b/docs/gradient_descent/images/neg_step.png differ
diff --git a/docs/gradient_descent/images/pos_step.png b/docs/gradient_descent/images/pos_step.png
new file mode 100644
index 000000000..61f9ccd84
Binary files /dev/null and b/docs/gradient_descent/images/pos_step.png differ
diff --git a/docs/gradient_descent/images/step.png b/docs/gradient_descent/images/step.png
new file mode 100644
index 000000000..712933064
Binary files /dev/null and b/docs/gradient_descent/images/step.png differ
diff --git a/docs/gradient_descent/images/stochastic.png b/docs/gradient_descent/images/stochastic.png
new file mode 100644
index 000000000..122862722
Binary files /dev/null and b/docs/gradient_descent/images/stochastic.png differ
diff --git a/index.html b/docs/index.html
similarity index 100%
rename from index.html
rename to docs/index.html
diff --git a/docs/inference_causality/images/bootstrap.png b/docs/inference_causality/images/bootstrap.png
new file mode 100644
index 000000000..64330be7e
Binary files /dev/null and b/docs/inference_causality/images/bootstrap.png differ
diff --git a/docs/inference_causality/images/bootstrapped_samples.png b/docs/inference_causality/images/bootstrapped_samples.png
new file mode 100644
index 000000000..7faccee50
Binary files /dev/null and b/docs/inference_causality/images/bootstrapped_samples.png differ
diff --git a/docs/inference_causality/images/confounder.png b/docs/inference_causality/images/confounder.png
new file mode 100644
index 000000000..acc3b1b59
Binary files /dev/null and b/docs/inference_causality/images/confounder.png differ
diff --git a/docs/inference_causality/images/experiment.png b/docs/inference_causality/images/experiment.png
new file mode 100644
index 000000000..735d58d0c
Binary files /dev/null and b/docs/inference_causality/images/experiment.png differ
diff --git a/docs/inference_causality/images/observational.png b/docs/inference_causality/images/observational.png
new file mode 100644
index 000000000..5d1ae856d
Binary files /dev/null and b/docs/inference_causality/images/observational.png differ
diff --git a/docs/inference_causality/images/plover_eggs.jpg b/docs/inference_causality/images/plover_eggs.jpg
new file mode 100644
index 000000000..eb957e921
Binary files /dev/null and b/docs/inference_causality/images/plover_eggs.jpg differ
diff --git a/docs/inference_causality/images/population_samples.png b/docs/inference_causality/images/population_samples.png
new file mode 100644
index 000000000..594a34dbf
Binary files /dev/null and b/docs/inference_causality/images/population_samples.png differ
diff --git a/inference_causality/inference_causality.html b/docs/inference_causality/inference_causality.html
similarity index 77%
rename from inference_causality/inference_causality.html
rename to docs/inference_causality/inference_causality.html
index 6e138356e..530500378 100644
--- a/inference_causality/inference_causality.html
+++ b/docs/inference_causality/inference_causality.html
@@ -735,7 +735,7 @@ <h3 data-number="19.3.1" class="anchored" data-anchor-id="hypothesis-testing-thr
 </div>
 </div>
 <div class="cell-output cell-output-stdout">
-<pre><code>RMSE 0.04547085380275766</code></pre>
+<pre><code>RMSE 0.04547085380275768</code></pre>
 </div>
 </div>
 <p>Our single sample of data gives us the value of <span class="math inline">\(\hat{\theta}_1=0.431\)</span>. To get a sense of how this estimate might vary if we were to draw different random samples, we will use <a href="https://inferentialthinking.com/chapters/13/2/Bootstrap.html?">bootstrapping</a>. As a refresher, to construct a bootstrap sample, we will draw a resample from the collected data that:</p>
@@ -778,7 +778,7 @@ <h3 data-number="19.3.1" class="anchored" data-anchor-id="hypothesis-testing-thr
 <span id="cb5-31"><a href="#cb5-31" aria-hidden="true" tabindex="-1"></a>conf_interval <span class="op">=</span> (lower, upper)</span>
 <span id="cb5-32"><a href="#cb5-32" aria-hidden="true" tabindex="-1"></a>conf_interval</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="3">
-<pre><code>(-0.25864811956848754, 1.1034243854204049)</code></pre>
+<pre><code>(-0.258648119568487, 1.1034243854204047)</code></pre>
 </div>
 </div>
 <p>Our bootstrapped 95% confidence interval for <span class="math inline">\(\theta_1\)</span> is <span class="math inline">\([-0.259, 1.103]\)</span>. Immediately, we can see that 0 <em>is</em> indeed contained in this interval – this means that we <em>cannot</em> conclude that <span class="math inline">\(\theta_1\)</span> is non-zero! More formally, we fail to reject the null hypothesis (that <span class="math inline">\(\theta_1\)</span> is 0) under a 5% p-value cutoff.</p>
@@ -866,14 +866,8 @@ <h3 data-number="19.3.1" class="anchored" data-anchor-id="hypothesis-testing-thr
 <div class="sourceCode cell-code" id="cb8"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> seaborn <span class="im">as</span> sns</span>
 <span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a>sns.pairplot(eggs[[<span class="st">"egg_length"</span>, <span class="st">"egg_breadth"</span>, <span class="st">"egg_weight"</span>, <span class="st">'bird_weight'</span>]])<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
-<div class="cell-output cell-output-stderr">
-<pre><code>/Users/Ishani/micromamba/lib/python3.9/site-packages/seaborn/axisgrid.py:118: UserWarning:
-
-The figure layout has changed to tight
-</code></pre>
-</div>
 <div class="cell-output cell-output-display">
-<p><img src="inference_causality_files/figure-html/cell-6-output-2.png" width="946" height="947"></p>
+<p><img src="inference_causality_files/figure-html/cell-6-output-1.png" width="946" height="945"></p>
 </div>
 </div>
 <p>This issue is known as <strong>collinearity</strong>, sometimes also called <strong>multicollinearity</strong>. Collinearity occurs when one feature can be predicted fairly accurately by a linear combination of the other features, which happens when one feature is highly correlated with the others.</p>
@@ -892,19 +886,19 @@ <h3 data-number="19.3.2" class="anchored" data-anchor-id="a-simpler-model"><span
 <div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="6">
 <details>
 <summary>Code</summary>
-<div class="sourceCode cell-code" id="cb10"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> sklearn.linear_model <span class="im">import</span> LinearRegression</span>
-<span id="cb10-2"><a href="#cb10-2" aria-hidden="true" tabindex="-1"></a>X_int <span class="op">=</span> eggs[[<span class="st">"egg_weight"</span>]]</span>
-<span id="cb10-3"><a href="#cb10-3" aria-hidden="true" tabindex="-1"></a>Y_int <span class="op">=</span> eggs[<span class="st">"bird_weight"</span>]</span>
-<span id="cb10-4"><a href="#cb10-4" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb10-5"><a href="#cb10-5" aria-hidden="true" tabindex="-1"></a>model_int <span class="op">=</span> LinearRegression()</span>
-<span id="cb10-6"><a href="#cb10-6" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb10-7"><a href="#cb10-7" aria-hidden="true" tabindex="-1"></a>model_int.fit(X_int, Y_int)</span>
-<span id="cb10-8"><a href="#cb10-8" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb10-9"><a href="#cb10-9" aria-hidden="true" tabindex="-1"></a><span class="co"># This gives an array containing the fitted model parameter estimates</span></span>
-<span id="cb10-10"><a href="#cb10-10" aria-hidden="true" tabindex="-1"></a>thetas_int <span class="op">=</span> model_int.coef_</span>
-<span id="cb10-11"><a href="#cb10-11" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb10-12"><a href="#cb10-12" aria-hidden="true" tabindex="-1"></a><span class="co"># Put the parameter estimates in a nice table for viewing</span></span>
-<span id="cb10-13"><a href="#cb10-13" aria-hidden="true" tabindex="-1"></a>pd.DataFrame({<span class="st">"theta_hat"</span>:[model_int.intercept_, thetas_int[<span class="dv">0</span>]]}, index<span class="op">=</span>[<span class="st">"theta_0"</span>, <span class="st">"theta_1"</span>])</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> sklearn.linear_model <span class="im">import</span> LinearRegression</span>
+<span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a>X_int <span class="op">=</span> eggs[[<span class="st">"egg_weight"</span>]]</span>
+<span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a>Y_int <span class="op">=</span> eggs[<span class="st">"bird_weight"</span>]</span>
+<span id="cb9-4"><a href="#cb9-4" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb9-5"><a href="#cb9-5" aria-hidden="true" tabindex="-1"></a>model_int <span class="op">=</span> LinearRegression()</span>
+<span id="cb9-6"><a href="#cb9-6" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb9-7"><a href="#cb9-7" aria-hidden="true" tabindex="-1"></a>model_int.fit(X_int, Y_int)</span>
+<span id="cb9-8"><a href="#cb9-8" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb9-9"><a href="#cb9-9" aria-hidden="true" tabindex="-1"></a><span class="co"># This gives an array containing the fitted model parameter estimates</span></span>
+<span id="cb9-10"><a href="#cb9-10" aria-hidden="true" tabindex="-1"></a>thetas_int <span class="op">=</span> model_int.coef_</span>
+<span id="cb9-11"><a href="#cb9-11" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb9-12"><a href="#cb9-12" aria-hidden="true" tabindex="-1"></a><span class="co"># Put the parameter estimates in a nice table for viewing</span></span>
+<span id="cb9-13"><a href="#cb9-13" aria-hidden="true" tabindex="-1"></a>pd.DataFrame({<span class="st">"theta_hat"</span>:[model_int.intercept_, thetas_int[<span class="dv">0</span>]]}, index<span class="op">=</span>[<span class="st">"theta_0"</span>, <span class="st">"theta_1"</span>])</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 <div class="cell-output cell-output-display" data-execution_count="6">
 <div>
@@ -935,35 +929,35 @@ <h3 data-number="19.3.2" class="anchored" data-anchor-id="a-simpler-model"><span
 <div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="7">
 <details>
 <summary>Code</summary>
-<div class="sourceCode cell-code" id="cb11"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> matplotlib.pyplot <span class="im">as</span> plt</span>
-<span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb11-3"><a href="#cb11-3" aria-hidden="true" tabindex="-1"></a><span class="co"># Set a random seed so you generate the same random sample as staff</span></span>
-<span id="cb11-4"><a href="#cb11-4" aria-hidden="true" tabindex="-1"></a><span class="co"># In the "real world", we wouldn't do this</span></span>
-<span id="cb11-5"><a href="#cb11-5" aria-hidden="true" tabindex="-1"></a>np.random.seed(<span class="dv">1337</span>)</span>
-<span id="cb11-6"><a href="#cb11-6" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb11-7"><a href="#cb11-7" aria-hidden="true" tabindex="-1"></a><span class="co"># Set the sample size of each bootstrap sample</span></span>
-<span id="cb11-8"><a href="#cb11-8" aria-hidden="true" tabindex="-1"></a>n <span class="op">=</span> <span class="bu">len</span>(eggs)</span>
-<span id="cb11-9"><a href="#cb11-9" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb11-10"><a href="#cb11-10" aria-hidden="true" tabindex="-1"></a><span class="co"># Create a list to store all the bootstrapped estimates</span></span>
-<span id="cb11-11"><a href="#cb11-11" aria-hidden="true" tabindex="-1"></a>estimates_int <span class="op">=</span> []</span>
-<span id="cb11-12"><a href="#cb11-12" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb11-13"><a href="#cb11-13" aria-hidden="true" tabindex="-1"></a><span class="co"># Generate a bootstrap resample from `eggs` and find an estimate for theta_1 using this sample. </span></span>
-<span id="cb11-14"><a href="#cb11-14" aria-hidden="true" tabindex="-1"></a><span class="co"># Repeat 10000 times.</span></span>
-<span id="cb11-15"><a href="#cb11-15" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> i <span class="kw">in</span> <span class="bu">range</span>(<span class="dv">10000</span>):</span>
-<span id="cb11-16"><a href="#cb11-16" aria-hidden="true" tabindex="-1"></a>    bootstrap_resample_int <span class="op">=</span> eggs.sample(n, replace<span class="op">=</span><span class="va">True</span>)</span>
-<span id="cb11-17"><a href="#cb11-17" aria-hidden="true" tabindex="-1"></a>    X_bootstrap_int <span class="op">=</span> bootstrap_resample_int[[<span class="st">"egg_weight"</span>]]</span>
-<span id="cb11-18"><a href="#cb11-18" aria-hidden="true" tabindex="-1"></a>    Y_bootstrap_int <span class="op">=</span> bootstrap_resample_int[<span class="st">"bird_weight"</span>]</span>
-<span id="cb11-19"><a href="#cb11-19" aria-hidden="true" tabindex="-1"></a>    </span>
-<span id="cb11-20"><a href="#cb11-20" aria-hidden="true" tabindex="-1"></a>    bootstrap_model_int <span class="op">=</span> LinearRegression()</span>
-<span id="cb11-21"><a href="#cb11-21" aria-hidden="true" tabindex="-1"></a>    bootstrap_model_int.fit(X_bootstrap_int, Y_bootstrap_int)</span>
-<span id="cb11-22"><a href="#cb11-22" aria-hidden="true" tabindex="-1"></a>    bootstrap_thetas_int <span class="op">=</span> bootstrap_model_int.coef_</span>
-<span id="cb11-23"><a href="#cb11-23" aria-hidden="true" tabindex="-1"></a>    </span>
-<span id="cb11-24"><a href="#cb11-24" aria-hidden="true" tabindex="-1"></a>    estimates_int.append(bootstrap_thetas_int[<span class="dv">0</span>])</span>
-<span id="cb11-25"><a href="#cb11-25" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb11-26"><a href="#cb11-26" aria-hidden="true" tabindex="-1"></a>plt.figure(dpi<span class="op">=</span><span class="dv">120</span>)</span>
-<span id="cb11-27"><a href="#cb11-27" aria-hidden="true" tabindex="-1"></a>sns.histplot(estimates_int, stat<span class="op">=</span><span class="st">"density"</span>)</span>
-<span id="cb11-28"><a href="#cb11-28" aria-hidden="true" tabindex="-1"></a>plt.xlabel(<span class="vs">r"$\hat{\theta}_1$"</span>)</span>
-<span id="cb11-29"><a href="#cb11-29" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="vs">r"Bootstrapped estimates $\hat{\theta}_1$ Under the Interpretable Model"</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb10"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> matplotlib.pyplot <span class="im">as</span> plt</span>
+<span id="cb10-2"><a href="#cb10-2" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb10-3"><a href="#cb10-3" aria-hidden="true" tabindex="-1"></a><span class="co"># Set a random seed so you generate the same random sample as staff</span></span>
+<span id="cb10-4"><a href="#cb10-4" aria-hidden="true" tabindex="-1"></a><span class="co"># In the "real world", we wouldn't do this</span></span>
+<span id="cb10-5"><a href="#cb10-5" aria-hidden="true" tabindex="-1"></a>np.random.seed(<span class="dv">1337</span>)</span>
+<span id="cb10-6"><a href="#cb10-6" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb10-7"><a href="#cb10-7" aria-hidden="true" tabindex="-1"></a><span class="co"># Set the sample size of each bootstrap sample</span></span>
+<span id="cb10-8"><a href="#cb10-8" aria-hidden="true" tabindex="-1"></a>n <span class="op">=</span> <span class="bu">len</span>(eggs)</span>
+<span id="cb10-9"><a href="#cb10-9" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb10-10"><a href="#cb10-10" aria-hidden="true" tabindex="-1"></a><span class="co"># Create a list to store all the bootstrapped estimates</span></span>
+<span id="cb10-11"><a href="#cb10-11" aria-hidden="true" tabindex="-1"></a>estimates_int <span class="op">=</span> []</span>
+<span id="cb10-12"><a href="#cb10-12" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb10-13"><a href="#cb10-13" aria-hidden="true" tabindex="-1"></a><span class="co"># Generate a bootstrap resample from `eggs` and find an estimate for theta_1 using this sample. </span></span>
+<span id="cb10-14"><a href="#cb10-14" aria-hidden="true" tabindex="-1"></a><span class="co"># Repeat 10000 times.</span></span>
+<span id="cb10-15"><a href="#cb10-15" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> i <span class="kw">in</span> <span class="bu">range</span>(<span class="dv">10000</span>):</span>
+<span id="cb10-16"><a href="#cb10-16" aria-hidden="true" tabindex="-1"></a>    bootstrap_resample_int <span class="op">=</span> eggs.sample(n, replace<span class="op">=</span><span class="va">True</span>)</span>
+<span id="cb10-17"><a href="#cb10-17" aria-hidden="true" tabindex="-1"></a>    X_bootstrap_int <span class="op">=</span> bootstrap_resample_int[[<span class="st">"egg_weight"</span>]]</span>
+<span id="cb10-18"><a href="#cb10-18" aria-hidden="true" tabindex="-1"></a>    Y_bootstrap_int <span class="op">=</span> bootstrap_resample_int[<span class="st">"bird_weight"</span>]</span>
+<span id="cb10-19"><a href="#cb10-19" aria-hidden="true" tabindex="-1"></a>    </span>
+<span id="cb10-20"><a href="#cb10-20" aria-hidden="true" tabindex="-1"></a>    bootstrap_model_int <span class="op">=</span> LinearRegression()</span>
+<span id="cb10-21"><a href="#cb10-21" aria-hidden="true" tabindex="-1"></a>    bootstrap_model_int.fit(X_bootstrap_int, Y_bootstrap_int)</span>
+<span id="cb10-22"><a href="#cb10-22" aria-hidden="true" tabindex="-1"></a>    bootstrap_thetas_int <span class="op">=</span> bootstrap_model_int.coef_</span>
+<span id="cb10-23"><a href="#cb10-23" aria-hidden="true" tabindex="-1"></a>    </span>
+<span id="cb10-24"><a href="#cb10-24" aria-hidden="true" tabindex="-1"></a>    estimates_int.append(bootstrap_thetas_int[<span class="dv">0</span>])</span>
+<span id="cb10-25"><a href="#cb10-25" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb10-26"><a href="#cb10-26" aria-hidden="true" tabindex="-1"></a>plt.figure(dpi<span class="op">=</span><span class="dv">120</span>)</span>
+<span id="cb10-27"><a href="#cb10-27" aria-hidden="true" tabindex="-1"></a>sns.histplot(estimates_int, stat<span class="op">=</span><span class="st">"density"</span>)</span>
+<span id="cb10-28"><a href="#cb10-28" aria-hidden="true" tabindex="-1"></a>plt.xlabel(<span class="vs">r"$\hat{\theta}_1$"</span>)</span>
+<span id="cb10-29"><a href="#cb10-29" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="vs">r"Bootstrapped estimates $\hat{\theta}_1$ Under the Interpretable Model"</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 <div class="cell-output cell-output-display">
 <p><img src="inference_causality_files/figure-html/cell-8-output-1.png" class="img-fluid"></p>
@@ -973,30 +967,30 @@ <h3 data-number="19.3.2" class="anchored" data-anchor-id="a-simpler-model"><span
 <div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="8">
 <details>
 <summary>Code</summary>
-<div class="sourceCode cell-code" id="cb12"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> sklearn.metrics <span class="im">import</span> mean_squared_error</span>
-<span id="cb12-2"><a href="#cb12-2" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb12-3"><a href="#cb12-3" aria-hidden="true" tabindex="-1"></a>rmse <span class="op">=</span> mean_squared_error(Y, model.predict(X))</span>
-<span id="cb12-4"><a href="#cb12-4" aria-hidden="true" tabindex="-1"></a>rmse_int <span class="op">=</span> mean_squared_error(Y_int, model_int.predict(X_int))</span>
-<span id="cb12-5"><a href="#cb12-5" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="ss">f'RMSE of Original Model: </span><span class="sc">{</span>rmse<span class="sc">}</span><span class="ss">'</span>)</span>
-<span id="cb12-6"><a href="#cb12-6" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="ss">f'RMSE of Interpretable Model: </span><span class="sc">{</span>rmse_int<span class="sc">}</span><span class="ss">'</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb11"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> sklearn.metrics <span class="im">import</span> mean_squared_error</span>
+<span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb11-3"><a href="#cb11-3" aria-hidden="true" tabindex="-1"></a>rmse <span class="op">=</span> mean_squared_error(Y, model.predict(X))</span>
+<span id="cb11-4"><a href="#cb11-4" aria-hidden="true" tabindex="-1"></a>rmse_int <span class="op">=</span> mean_squared_error(Y_int, model_int.predict(X_int))</span>
+<span id="cb11-5"><a href="#cb11-5" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="ss">f'RMSE of Original Model: </span><span class="sc">{</span>rmse<span class="sc">}</span><span class="ss">'</span>)</span>
+<span id="cb11-6"><a href="#cb11-6" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="ss">f'RMSE of Interpretable Model: </span><span class="sc">{</span>rmse_int<span class="sc">}</span><span class="ss">'</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 <div class="cell-output cell-output-stdout">
-<pre><code>RMSE of Original Model: 0.04547085380275766
-RMSE of Interpretable Model: 0.046493941375556846</code></pre>
+<pre><code>RMSE of Original Model: 0.04547085380275768
+RMSE of Interpretable Model: 0.04649394137555684</code></pre>
 </div>
 </div>
 <p>Yet, the confidence interval for the true parameter <span class="math inline">\(\theta_{1}\)</span> does not contain zero.</p>
 <div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="9">
 <details>
 <summary>Code</summary>
-<div class="sourceCode cell-code" id="cb14"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a>lower_int <span class="op">=</span> np.percentile(estimates_int, <span class="fl">2.5</span>)</span>
-<span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a>upper_int <span class="op">=</span> np.percentile(estimates_int, <span class="fl">97.5</span>)</span>
-<span id="cb14-3"><a href="#cb14-3" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb14-4"><a href="#cb14-4" aria-hidden="true" tabindex="-1"></a>conf_interval_int <span class="op">=</span> (lower_int, upper_int)</span>
-<span id="cb14-5"><a href="#cb14-5" aria-hidden="true" tabindex="-1"></a>conf_interval_int</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb13"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a>lower_int <span class="op">=</span> np.percentile(estimates_int, <span class="fl">2.5</span>)</span>
+<span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a>upper_int <span class="op">=</span> np.percentile(estimates_int, <span class="fl">97.5</span>)</span>
+<span id="cb13-3"><a href="#cb13-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb13-4"><a href="#cb13-4" aria-hidden="true" tabindex="-1"></a>conf_interval_int <span class="op">=</span> (lower_int, upper_int)</span>
+<span id="cb13-5"><a href="#cb13-5" aria-hidden="true" tabindex="-1"></a>conf_interval_int</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 <div class="cell-output cell-output-display" data-execution_count="9">
-<pre><code>(0.6029335250209633, 0.8208401738546206)</code></pre>
+<pre><code>(0.6029335250209632, 0.8208401738546206)</code></pre>
 </div>
 </div>
 <p>In retrospect, it’s no surprise that the weight of an egg best predicts the weight of a newly-hatched chick.</p>
@@ -1374,630 +1368,630 @@ <h3 data-number="19.4.3" class="anchored" data-anchor-id="how-to-perform-causal-
       </a>
   </div>
 </nav><div class="modal fade" id="quarto-embedded-source-code-modal" tabindex="-1" aria-labelledby="quarto-embedded-source-code-modal-label" aria-hidden="true"><div class="modal-dialog modal-dialog-scrollable"><div class="modal-content"><div class="modal-header"><h5 class="modal-title" id="quarto-embedded-source-code-modal-label">Source Code</h5><button class="btn-close" data-bs-dismiss="modal"></button></div><div class="modal-body"><div class="">
-<div class="sourceCode" id="cb16" data-shortcodes="false"><pre class="sourceCode markdown code-with-copy"><code class="sourceCode markdown"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a><span class="co">---</span></span>
-<span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a><span class="an">title:</span><span class="co"> Causal Inference and Confounding</span></span>
-<span id="cb16-3"><a href="#cb16-3" aria-hidden="true" tabindex="-1"></a><span class="an">execute:</span></span>
-<span id="cb16-4"><a href="#cb16-4" aria-hidden="true" tabindex="-1"></a><span class="co">  echo: true</span></span>
-<span id="cb16-5"><a href="#cb16-5" aria-hidden="true" tabindex="-1"></a><span class="an">format:</span></span>
-<span id="cb16-6"><a href="#cb16-6" aria-hidden="true" tabindex="-1"></a><span class="co">  html:</span></span>
-<span id="cb16-7"><a href="#cb16-7" aria-hidden="true" tabindex="-1"></a><span class="co">    code-fold: true</span></span>
-<span id="cb16-8"><a href="#cb16-8" aria-hidden="true" tabindex="-1"></a><span class="co">    code-tools: true</span></span>
-<span id="cb16-9"><a href="#cb16-9" aria-hidden="true" tabindex="-1"></a><span class="co">    toc: true</span></span>
-<span id="cb16-10"><a href="#cb16-10" aria-hidden="true" tabindex="-1"></a><span class="co">    toc-title: Causal Inference and Confounding</span></span>
-<span id="cb16-11"><a href="#cb16-11" aria-hidden="true" tabindex="-1"></a><span class="co">    page-layout: full</span></span>
-<span id="cb16-12"><a href="#cb16-12" aria-hidden="true" tabindex="-1"></a><span class="co">    theme:</span></span>
-<span id="cb16-13"><a href="#cb16-13" aria-hidden="true" tabindex="-1"></a><span class="co">      - cosmo</span></span>
-<span id="cb16-14"><a href="#cb16-14" aria-hidden="true" tabindex="-1"></a><span class="co">      - cerulean</span></span>
-<span id="cb16-15"><a href="#cb16-15" aria-hidden="true" tabindex="-1"></a><span class="co">    callout-icon: false</span></span>
-<span id="cb16-16"><a href="#cb16-16" aria-hidden="true" tabindex="-1"></a><span class="an">jupyter:</span></span>
-<span id="cb16-17"><a href="#cb16-17" aria-hidden="true" tabindex="-1"></a><span class="co">  jupytext:</span></span>
-<span id="cb16-18"><a href="#cb16-18" aria-hidden="true" tabindex="-1"></a><span class="co">    text_representation:</span></span>
-<span id="cb16-19"><a href="#cb16-19" aria-hidden="true" tabindex="-1"></a><span class="co">      extension: .qmd</span></span>
-<span id="cb16-20"><a href="#cb16-20" aria-hidden="true" tabindex="-1"></a><span class="co">      format_name: quarto</span></span>
-<span id="cb16-21"><a href="#cb16-21" aria-hidden="true" tabindex="-1"></a><span class="co">      format_version: '1.0'</span></span>
-<span id="cb16-22"><a href="#cb16-22" aria-hidden="true" tabindex="-1"></a><span class="co">      jupytext_version: 1.16.1</span></span>
-<span id="cb16-23"><a href="#cb16-23" aria-hidden="true" tabindex="-1"></a><span class="co">  kernelspec:</span></span>
-<span id="cb16-24"><a href="#cb16-24" aria-hidden="true" tabindex="-1"></a><span class="co">    display_name: Python 3 (ipykernel)</span></span>
-<span id="cb16-25"><a href="#cb16-25" aria-hidden="true" tabindex="-1"></a><span class="co">    language: python</span></span>
-<span id="cb16-26"><a href="#cb16-26" aria-hidden="true" tabindex="-1"></a><span class="co">    name: python3</span></span>
-<span id="cb16-27"><a href="#cb16-27" aria-hidden="true" tabindex="-1"></a><span class="co">---</span></span>
-<span id="cb16-28"><a href="#cb16-28" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-29"><a href="#cb16-29" aria-hidden="true" tabindex="-1"></a><span class="co">&lt;!-- </span></span>
-<span id="cb16-30"><a href="#cb16-30" aria-hidden="true" tabindex="-1"></a><span class="co">The **bias** of an estimator is how far off it is from the parameter, on average.</span></span>
-<span id="cb16-31"><a href="#cb16-31" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-32"><a href="#cb16-32" aria-hidden="true" tabindex="-1"></a><span class="co">$$\begin{align}\text{Bias}(\hat{\theta}) = \mathbb{E}[\hat{\theta} - \theta] = \mathbb{E}[\hat{\theta}] - \theta\end{align}$$</span></span>
-<span id="cb16-33"><a href="#cb16-33" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-34"><a href="#cb16-34" aria-hidden="true" tabindex="-1"></a><span class="co">For example, the bias of the sample mean as an estimator of the population mean is:</span></span>
-<span id="cb16-35"><a href="#cb16-35" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-36"><a href="#cb16-36" aria-hidden="true" tabindex="-1"></a><span class="co">$$\begin{align}\mathbb{E}[\bar{X}_n - \mu]</span></span>
-<span id="cb16-37"><a href="#cb16-37" aria-hidden="true" tabindex="-1"></a><span class="co">&amp;= \mathbb{E}[\frac{1}{n}\sum_{i=1}^n (X_i)] - \mu \\</span></span>
-<span id="cb16-38"><a href="#cb16-38" aria-hidden="true" tabindex="-1"></a><span class="co">&amp;= \frac{1}{n}\sum_{i=1}^n \mathbb{E}[X_i] - \mu \\</span></span>
-<span id="cb16-39"><a href="#cb16-39" aria-hidden="true" tabindex="-1"></a><span class="co">&amp;= \frac{1}{n} (n\mu) - \mu \\</span></span>
-<span id="cb16-40"><a href="#cb16-40" aria-hidden="true" tabindex="-1"></a><span class="co">&amp;= 0\end{align}$$</span></span>
-<span id="cb16-41"><a href="#cb16-41" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-42"><a href="#cb16-42" aria-hidden="true" tabindex="-1"></a><span class="co">Because its bias is equal to 0, the sample mean is said to be an **unbiased** estimator of the population mean.</span></span>
-<span id="cb16-43"><a href="#cb16-43" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-44"><a href="#cb16-44" aria-hidden="true" tabindex="-1"></a><span class="co">The **variance** of an estimator is a measure of how much the estimator tends to vary from its mean value.</span></span>
-<span id="cb16-45"><a href="#cb16-45" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-46"><a href="#cb16-46" aria-hidden="true" tabindex="-1"></a><span class="co">$$\begin{align}\text{Var}(\hat{\theta}) = \mathbb{E}\left[(\hat{\theta} - \mathbb{E}[\hat{\theta}])^2 \right]\end{align}$$</span></span>
-<span id="cb16-47"><a href="#cb16-47" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-48"><a href="#cb16-48" aria-hidden="true" tabindex="-1"></a><span class="co">The **mean squared error** measures the "goodness" of an estimator by incorporating both the bias and variance. Formally, it is defined as:</span></span>
-<span id="cb16-49"><a href="#cb16-49" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-50"><a href="#cb16-50" aria-hidden="true" tabindex="-1"></a><span class="co">$$\begin{align}\text{MSE}(\hat{\theta}) = \mathbb{E}\left[(\hat{\theta} - \theta)^2</span></span>
-<span id="cb16-51"><a href="#cb16-51" aria-hidden="true" tabindex="-1"></a><span class="co">\right]\end{align}$$ --&gt;</span></span>
-<span id="cb16-52"><a href="#cb16-52" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-53"><a href="#cb16-53" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-54"><a href="#cb16-54" aria-hidden="true" tabindex="-1"></a>::: {.callout-note collapse="false"}</span>
-<span id="cb16-55"><a href="#cb16-55" aria-hidden="true" tabindex="-1"></a><span class="fu">## Learning Outcomes</span></span>
-<span id="cb16-56"><a href="#cb16-56" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Construct confidence intervals for hypothesis testing using bootstrapping</span>
-<span id="cb16-57"><a href="#cb16-57" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Understand the assumptions we make and their impact on our regression inference</span>
-<span id="cb16-58"><a href="#cb16-58" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Explore ways to overcome issues of multicollinearity</span>
-<span id="cb16-59"><a href="#cb16-59" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Compare regression correlation and causation</span>
-<span id="cb16-60"><a href="#cb16-60" aria-hidden="true" tabindex="-1"></a>:::</span>
-<span id="cb16-61"><a href="#cb16-61" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-62"><a href="#cb16-62" aria-hidden="true" tabindex="-1"></a>Last time, we introduced the idea of random variables and how they affect the data and model we construct.</span>
-<span id="cb16-63"><a href="#cb16-63" aria-hidden="true" tabindex="-1"></a>We also demonstrated the decomposition of model risk from a fitted model and dived into the bias-variance tradeoff.</span>
-<span id="cb16-64"><a href="#cb16-64" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-65"><a href="#cb16-65" aria-hidden="true" tabindex="-1"></a>In this lecture, we will explore regression inference via hypothesis testing, understand how to use bootstrapping under the right assumptions, and consider the environment of understanding causality in theory and in practice.</span>
-<span id="cb16-66"><a href="#cb16-66" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-67"><a href="#cb16-67" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-68"><a href="#cb16-68" aria-hidden="true" tabindex="-1"></a><span class="fu">## Parameter Inference: Interpreting Regression Coefficients</span></span>
-<span id="cb16-69"><a href="#cb16-69" aria-hidden="true" tabindex="-1"></a>There are two main reasons why we build models: </span>
-<span id="cb16-70"><a href="#cb16-70" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-71"><a href="#cb16-71" aria-hidden="true" tabindex="-1"></a><span class="ss">1. </span>**Prediction**: using our model to make accurate predictions about unseen data</span>
-<span id="cb16-72"><a href="#cb16-72" aria-hidden="true" tabindex="-1"></a><span class="ss">2. </span>**Inference**: using our model to draw conclusions about the underlying relationship(s) between our features and response. We want to understand the complex phenomena occurring in the world we live in. While training is the process of fitting a model, inference is the *process of making predictions*.</span>
-<span id="cb16-73"><a href="#cb16-73" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-74"><a href="#cb16-74" aria-hidden="true" tabindex="-1"></a>Recall the framework we established in the last lecture. The relationship between datapoints is given by $Y = g(x) + \epsilon$, where $g(x)$ is the *true underlying relationship*, and $\epsilon$ represents randomness. If we assume $g(x)$ is linear, we can express this relationship in terms of the unknown, true model parameters $\theta$.</span>
-<span id="cb16-75"><a href="#cb16-75" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-76"><a href="#cb16-76" aria-hidden="true" tabindex="-1"></a>$$f_{\theta}(x) = g(x) + \epsilon = \theta_0 + \theta_1 x_1 + \ldots + \theta_p x_p + \epsilon$$</span>
-<span id="cb16-77"><a href="#cb16-77" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-78"><a href="#cb16-78" aria-hidden="true" tabindex="-1"></a>Our model attempts to estimate each true population parameter $\theta_i$ using the sample estimates $\hat{\theta}_i$ calculated from the design matrix $\Bbb{X}$ and response vector $\Bbb{Y}$.</span>
-<span id="cb16-79"><a href="#cb16-79" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-80"><a href="#cb16-80" aria-hidden="true" tabindex="-1"></a>$$f_{\hat{\theta}}(x) = \hat{\theta}_0 + \hat{\theta}_1 x_1 + \ldots + \hat{\theta}_p x_p$$</span>
-<span id="cb16-81"><a href="#cb16-81" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-82"><a href="#cb16-82" aria-hidden="true" tabindex="-1"></a>Let's pause for a moment. At this point, we're very used to working with the idea of a model parameter. But what exactly does each coefficient $\theta_i$ actually *mean*? We can think of each $\theta_i$ as a *slope* of the linear model. If all other variables are held constant, a unit change in $x_i$ will result in a $\theta_i$ change in $f_{\theta}(x)$. Broadly speaking, a large value of $\theta_i$ means that the feature $x_i$ has a large effect on the response; conversely, a small value of $\theta_i$ means that $x_i$ has little effect on the response. In the extreme case, if the true parameter $\theta_i$ is 0, then the feature $x_i$ has **no effect** on $Y(x)$. </span>
-<span id="cb16-83"><a href="#cb16-83" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-84"><a href="#cb16-84" aria-hidden="true" tabindex="-1"></a>If the true parameter $\theta_i$ for a particular feature is 0, this tells us something pretty significant about the world: there is no underlying relationship between $x_i$ and $Y(x)$! But how can we test if a parameter is actually 0? As a baseline, we go through our usual process of drawing a sample, using this data to fit a model, and computing an estimate $\hat{\theta}_i$. However, we also need to consider that if our random sample comes out differently, we may find a different result for $\hat{\theta}_i$. To infer if the true parameter $\theta_i$ is 0, we want to draw our conclusion from the distribution of $\hat{\theta}_i$ estimates we could have drawn across all other random samples. This is where <span class="co">[</span><span class="ot">hypothesis testing</span><span class="co">](https://inferentialthinking.com/chapters/11/Testing_Hypotheses.html)</span> comes in handy! </span>
-<span id="cb16-85"><a href="#cb16-85" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-86"><a href="#cb16-86" aria-hidden="true" tabindex="-1"></a>To test if the true parameter $\theta_i$ is 0, we construct a **hypothesis test** where our null hypothesis states that the true parameter $\theta_i$ is 0, and the alternative hypothesis states that the true parameter $\theta_i$ is *not* 0. If our p-value is smaller than our cutoff value (usually p = 0.05), we reject the null hypothesis in favor of the alternative hypothesis. </span>
-<span id="cb16-87"><a href="#cb16-87" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-88"><a href="#cb16-88" aria-hidden="true" tabindex="-1"></a><span class="fu">## Review: Bootstrap Resampling</span></span>
-<span id="cb16-89"><a href="#cb16-89" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-90"><a href="#cb16-90" aria-hidden="true" tabindex="-1"></a>To determine the properties (e.g., variance) of the sampling distribution of an estimator, we’d need access to the population. Ideally, we'd want to consider all possible samples in the population, compute an estimate for each sample, and study the distribution of those estimates.</span>
-<span id="cb16-91"><a href="#cb16-91" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-92"><a href="#cb16-92" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;p</span> <span class="er">align</span><span class="ot">=</span><span class="st">"center"</span><span class="kw">&gt;</span></span>
-<span id="cb16-93"><a href="#cb16-93" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;img</span> <span class="er">src</span><span class="ot">=</span><span class="st">"images/population_samples.png"</span> <span class="er">alt</span><span class="ot">=</span><span class="st">'y_hat'</span> <span class="er">width</span><span class="ot">=</span><span class="st">'650'</span><span class="kw">&gt;</span></span>
-<span id="cb16-94"><a href="#cb16-94" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;/p&gt;</span></span>
-<span id="cb16-95"><a href="#cb16-95" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-96"><a href="#cb16-96" aria-hidden="true" tabindex="-1"></a>However, this can be quite expensive and time-consuming. Even more importantly, we don’t have access to the population —— we only have *one* random sample from the population. How can we consider all possible samples if we only have one?</span>
-<span id="cb16-97"><a href="#cb16-97" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-98"><a href="#cb16-98" aria-hidden="true" tabindex="-1"></a>Bootstrapping comes in handy here! With bootstrapping, we treat our random sample as a "population" and resample from it *with replacement*. Intuitively, a random sample resembles the population (if it is big enough), so a random *resample* also resembles a random sample of the population. When sampling, there are a couple things to keep in mind:</span>
-<span id="cb16-99"><a href="#cb16-99" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-100"><a href="#cb16-100" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>We need to sample the same way we constructed the original sample. Typically, this involves taking a simple random sample with replacement.</span>
-<span id="cb16-101"><a href="#cb16-101" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>New samples must be the same size as the original sample. We need to accurately model the variability of our estimates.</span>
-<span id="cb16-102"><a href="#cb16-102" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-103"><a href="#cb16-103" aria-hidden="true" tabindex="-1"></a>::: {.callout-warning collapse=\"true\"}</span>
-<span id="cb16-104"><a href="#cb16-104" aria-hidden="true" tabindex="-1"></a><span class="fu">### Why must we resample *with replacement*?</span></span>
-<span id="cb16-105"><a href="#cb16-105" aria-hidden="true" tabindex="-1"></a>Given an original sample of size $n$, we want a resample that has the same size $n$ as the original. Sampling *without* replacement will give us the original sample with shuffled rows. Hence, when we calculate summary statistics like the average, our sample *without* replacement will always have the same average as the original sample, defeating the purpose of a bootstrap.</span>
-<span id="cb16-106"><a href="#cb16-106" aria-hidden="true" tabindex="-1"></a>:::</span>
-<span id="cb16-107"><a href="#cb16-107" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-108"><a href="#cb16-108" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;p</span> <span class="er">align</span><span class="ot">=</span><span class="st">"center"</span><span class="kw">&gt;</span></span>
-<span id="cb16-109"><a href="#cb16-109" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;img</span> <span class="er">src</span><span class="ot">=</span><span class="st">"images/bootstrap.png"</span> <span class="er">alt</span><span class="ot">=</span><span class="st">'y_hat'</span> <span class="er">width</span><span class="ot">=</span><span class="st">'700'</span><span class="kw">&gt;</span></span>
-<span id="cb16-110"><a href="#cb16-110" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;/p&gt;</span></span>
-<span id="cb16-111"><a href="#cb16-111" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-112"><a href="#cb16-112" aria-hidden="true" tabindex="-1"></a>Bootstrap resampling is a technique for estimating the sampling distribution of an estimator. To execute it, we can follow the pseudocode below:</span>
-<span id="cb16-113"><a href="#cb16-113" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb16-114"><a href="#cb16-114" aria-hidden="true" tabindex="-1"></a><span class="in">collect a random sample of size n (called the bootstrap population)</span></span>
-<span id="cb16-115"><a href="#cb16-115" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-116"><a href="#cb16-116" aria-hidden="true" tabindex="-1"></a><span class="in">initiate a list of estimates</span></span>
-<span id="cb16-117"><a href="#cb16-117" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-118"><a href="#cb16-118" aria-hidden="true" tabindex="-1"></a><span class="in">repeat 10,000 times:</span></span>
-<span id="cb16-119"><a href="#cb16-119" aria-hidden="true" tabindex="-1"></a><span class="in">    resample with replacement from the bootstrap population</span></span>
-<span id="cb16-120"><a href="#cb16-120" aria-hidden="true" tabindex="-1"></a><span class="in">    apply estimator f to the resample</span></span>
-<span id="cb16-121"><a href="#cb16-121" aria-hidden="true" tabindex="-1"></a><span class="in">    store in list</span></span>
-<span id="cb16-122"><a href="#cb16-122" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-123"><a href="#cb16-123" aria-hidden="true" tabindex="-1"></a><span class="in">list of estimates is the bootstrapped sampling distribution of f</span></span>
-<span id="cb16-124"><a href="#cb16-124" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb16-125"><a href="#cb16-125" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-126"><a href="#cb16-126" aria-hidden="true" tabindex="-1"></a>How well does bootstrapping actually represent our population? The bootstrapped sampling distribution of an estimator does not exactly match the sampling distribution of that estimator, but it is often close. Similarly, the variance of the bootstrapped distribution is often close to the true variance of the estimator. The example below displays the results of different bootstraps from a *known* population using a sample size of $n=50$.</span>
-<span id="cb16-127"><a href="#cb16-127" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-128"><a href="#cb16-128" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;p</span> <span class="er">align</span><span class="ot">=</span><span class="st">"center"</span><span class="kw">&gt;</span></span>
-<span id="cb16-129"><a href="#cb16-129" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;img</span> <span class="er">src</span><span class="ot">=</span><span class="st">"images/bootstrapped_samples.png"</span> <span class="er">alt</span><span class="ot">=</span><span class="st">'y_hat'</span> <span class="er">width</span><span class="ot">=</span><span class="st">'600'</span><span class="kw">&gt;</span></span>
-<span id="cb16-130"><a href="#cb16-130" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;/p&gt;</span></span>
-<span id="cb16-131"><a href="#cb16-131" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-132"><a href="#cb16-132" aria-hidden="true" tabindex="-1"></a>In the real world, we don't know the population distribution. The center of the bootstrapped distribution is the estimator applied to our original sample, so we have no way of understanding the estimator's true expected value; the center and spread of our bootstrap are *approximations*. The quality of our bootstrapped distribution also depends on the quality of our original sample. If our original sample was not representative of the population (like Sample 5 in the image above), then the bootstrap is next to useless. In general, bootstrapping works better for *large samples*, when the population distribution is *not heavily skewed* (no outliers), and when the estimator is *“low variance”* (insensitive to extreme values).</span>
-<span id="cb16-133"><a href="#cb16-133" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-134"><a href="#cb16-134" aria-hidden="true" tabindex="-1"></a><span class="co">&lt;!-- #### </span><span class="al">TODO</span><span class="co">: Good to include this example but make sure to integrate well with the following example and ensure it flows. Following example is explained under the assumption that people haven't seen bootstrapping example before.</span></span>
-<span id="cb16-135"><a href="#cb16-135" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-136"><a href="#cb16-136" aria-hidden="true" tabindex="-1"></a><span class="al">###</span><span class="co"> Simple Bootstrap Example</span></span>
-<span id="cb16-137"><a href="#cb16-137" aria-hidden="true" tabindex="-1"></a><span class="co">To get a better idea of how bootstrapping works in practice, let's walk through a simple example of bootstrapping to estimate the relationship between miles per gallon and the weight of a vehicle.</span></span>
-<span id="cb16-138"><a href="#cb16-138" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-139"><a href="#cb16-139" aria-hidden="true" tabindex="-1"></a><span class="co">Suppose we collected a sample of 20 cars from a population. For the purposes of this demo, we will assume that the seaborn dataset represents the entire population. The following is a visualization of our sample:</span></span>
-<span id="cb16-140"><a href="#cb16-140" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-141"><a href="#cb16-141" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
-<span id="cb16-142"><a href="#cb16-142" aria-hidden="true" tabindex="-1"></a><span class="co">import numpy as np</span></span>
-<span id="cb16-143"><a href="#cb16-143" aria-hidden="true" tabindex="-1"></a><span class="co">import pandas as pd</span></span>
-<span id="cb16-144"><a href="#cb16-144" aria-hidden="true" tabindex="-1"></a><span class="co">import plotly.express as px</span></span>
-<span id="cb16-145"><a href="#cb16-145" aria-hidden="true" tabindex="-1"></a><span class="co">import sklearn.linear_model as lm</span></span>
-<span id="cb16-146"><a href="#cb16-146" aria-hidden="true" tabindex="-1"></a><span class="co">import seaborn as sns</span></span>
-<span id="cb16-147"><a href="#cb16-147" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-148"><a href="#cb16-148" aria-hidden="true" tabindex="-1"></a><span class="co">np.random.seed(42)</span></span>
-<span id="cb16-149"><a href="#cb16-149" aria-hidden="true" tabindex="-1"></a><span class="co">mpg_sample = sns.load_dataset('mpg').sample(20)</span></span>
-<span id="cb16-150"><a href="#cb16-150" aria-hidden="true" tabindex="-1"></a><span class="co">px.scatter(mpg_sample, x='weight', y='mpg', trendline='ols', width=800)</span></span>
-<span id="cb16-151"><a href="#cb16-151" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-152"><a href="#cb16-152" aria-hidden="true" tabindex="-1"></a><span class="co">Fitting a linear model, we get an estimate of the slope:</span></span>
-<span id="cb16-153"><a href="#cb16-153" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-154"><a href="#cb16-154" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb16-155"><a href="#cb16-155" aria-hidden="true" tabindex="-1"></a><span class="co">model = lm.LinearRegression().fit(mpg_sample[['weight']], mpg_sample['mpg'])</span></span>
-<span id="cb16-156"><a href="#cb16-156" aria-hidden="true" tabindex="-1"></a><span class="co">model.coef_[0] </span></span>
-<span id="cb16-157"><a href="#cb16-157" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-158"><a href="#cb16-158" aria-hidden="true" tabindex="-1"></a><span class="co">#### Bootstrap Implementation</span></span>
-<span id="cb16-159"><a href="#cb16-159" aria-hidden="true" tabindex="-1"></a><span class="co">We can use bootstrapping to estimate the distribution of that coefficient. Here we construct a bootstrap function that takes an estimator function and uses that function to construct many bootstrap estimates of the slope.</span></span>
-<span id="cb16-160"><a href="#cb16-160" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-161"><a href="#cb16-161" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb16-162"><a href="#cb16-162" aria-hidden="true" tabindex="-1"></a><span class="co">def estimator(sample):</span></span>
-<span id="cb16-163"><a href="#cb16-163" aria-hidden="true" tabindex="-1"></a><span class="co">    model = lm.LinearRegression().fit(sample[['weight']], sample['mpg'])</span></span>
-<span id="cb16-164"><a href="#cb16-164" aria-hidden="true" tabindex="-1"></a><span class="co">    return model.coef_[0]</span></span>
-<span id="cb16-165"><a href="#cb16-165" aria-hidden="true" tabindex="-1"></a><span class="co">    </span></span>
-<span id="cb16-166"><a href="#cb16-166" aria-hidden="true" tabindex="-1"></a><span class="co">The code below uses `df.sample` [(documentation)](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.sample.html) to generate a bootstrap sample that is the same size as the original sample.</span></span>
-<span id="cb16-167"><a href="#cb16-167" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-168"><a href="#cb16-168" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb16-169"><a href="#cb16-169" aria-hidden="true" tabindex="-1"></a><span class="co">def bootstrap(sample, statistic, num_repetitions):</span></span>
-<span id="cb16-170"><a href="#cb16-170" aria-hidden="true" tabindex="-1"></a><span class="co">    """</span></span>
-<span id="cb16-171"><a href="#cb16-171" aria-hidden="true" tabindex="-1"></a><span class="co">    Returns the statistic computed on a num_repetitions  </span></span>
-<span id="cb16-172"><a href="#cb16-172" aria-hidden="true" tabindex="-1"></a><span class="co">    bootstrap samples from sample.</span></span>
-<span id="cb16-173"><a href="#cb16-173" aria-hidden="true" tabindex="-1"></a><span class="co">    """</span></span>
-<span id="cb16-174"><a href="#cb16-174" aria-hidden="true" tabindex="-1"></a><span class="co">    stats = []</span></span>
-<span id="cb16-175"><a href="#cb16-175" aria-hidden="true" tabindex="-1"></a><span class="co">    for i in np.arange(num_repetitions):</span></span>
-<span id="cb16-176"><a href="#cb16-176" aria-hidden="true" tabindex="-1"></a><span class="co">        # Step 1: Sample the Sample</span></span>
-<span id="cb16-177"><a href="#cb16-177" aria-hidden="true" tabindex="-1"></a><span class="co">        bootstrap_sample = sample.sample(frac=1, replace=True)</span></span>
-<span id="cb16-178"><a href="#cb16-178" aria-hidden="true" tabindex="-1"></a><span class="co">        # Step 2: compute statistics on the sample of the sample</span></span>
-<span id="cb16-179"><a href="#cb16-179" aria-hidden="true" tabindex="-1"></a><span class="co">        bootstrap_stat = statistic(bootstrap_sample)</span></span>
-<span id="cb16-180"><a href="#cb16-180" aria-hidden="true" tabindex="-1"></a><span class="co">        # Accumulate the statistics</span></span>
-<span id="cb16-181"><a href="#cb16-181" aria-hidden="true" tabindex="-1"></a><span class="co">        stats.append(bootstrap_stat)</span></span>
-<span id="cb16-182"><a href="#cb16-182" aria-hidden="true" tabindex="-1"></a><span class="co">    return stats    </span></span>
-<span id="cb16-183"><a href="#cb16-183" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-184"><a href="#cb16-184" aria-hidden="true" tabindex="-1"></a><span class="co">After constructing many bootstrap slope estimates (in this case, 10,000), we can visualize the distribution of these estimates.</span></span>
-<span id="cb16-185"><a href="#cb16-185" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-186"><a href="#cb16-186" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
-<span id="cb16-187"><a href="#cb16-187" aria-hidden="true" tabindex="-1"></a><span class="co">#Construct 10,000 bootstrap slope estimates</span></span>
-<span id="cb16-188"><a href="#cb16-188" aria-hidden="true" tabindex="-1"></a><span class="co">bs_thetas = bootstrap(mpg_sample, estimator, 10000)</span></span>
-<span id="cb16-189"><a href="#cb16-189" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-190"><a href="#cb16-190" aria-hidden="true" tabindex="-1"></a><span class="co">#Visualize the distribution of these estimates</span></span>
-<span id="cb16-191"><a href="#cb16-191" aria-hidden="true" tabindex="-1"></a><span class="co">px.histogram(bs_thetas, title='Bootstrap Distribution of the Slope', </span></span>
-<span id="cb16-192"><a href="#cb16-192" aria-hidden="true" tabindex="-1"></a><span class="co">             width=800)</span></span>
-<span id="cb16-193"><a href="#cb16-193" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-194"><a href="#cb16-194" aria-hidden="true" tabindex="-1"></a><span class="co">#### Computing a Bootstrap CI</span></span>
-<span id="cb16-195"><a href="#cb16-195" aria-hidden="true" tabindex="-1"></a><span class="co">We can now compute the confidence interval for the slopes using the percentiles of the empirical distribution. Here, we are looking for a 95% confidence interval, so we want values at the 2.5 and 97.5 percentiles of the bootstrap samples to be the bounds of our interval. To find the interval, we can use the function defined below:</span></span>
-<span id="cb16-196"><a href="#cb16-196" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-197"><a href="#cb16-197" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
-<span id="cb16-198"><a href="#cb16-198" aria-hidden="true" tabindex="-1"></a><span class="co">def bootstrap_ci(bootstrap_samples, confidence_level=95):</span></span>
-<span id="cb16-199"><a href="#cb16-199" aria-hidden="true" tabindex="-1"></a><span class="co">    """</span></span>
-<span id="cb16-200"><a href="#cb16-200" aria-hidden="true" tabindex="-1"></a><span class="co">    Returns the confidence interval for the bootstrap samples.</span></span>
-<span id="cb16-201"><a href="#cb16-201" aria-hidden="true" tabindex="-1"></a><span class="co">    """</span></span>
-<span id="cb16-202"><a href="#cb16-202" aria-hidden="true" tabindex="-1"></a><span class="co">    lower_percentile = (100 - confidence_level) / 2</span></span>
-<span id="cb16-203"><a href="#cb16-203" aria-hidden="true" tabindex="-1"></a><span class="co">    upper_percentile = 100 - lower_percentile</span></span>
-<span id="cb16-204"><a href="#cb16-204" aria-hidden="true" tabindex="-1"></a><span class="co">    return np.percentile(bootstrap_samples, [lower_percentile, upper_percentile])</span></span>
-<span id="cb16-205"><a href="#cb16-205" aria-hidden="true" tabindex="-1"></a><span class="co">print(bootstrap_ci(bs_thetas))</span></span>
-<span id="cb16-206"><a href="#cb16-206" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-207"><a href="#cb16-207" aria-hidden="true" tabindex="-1"></a><span class="co">#### Comparing to the Population CIs</span></span>
-<span id="cb16-208"><a href="#cb16-208" aria-hidden="true" tabindex="-1"></a><span class="co">In practice, you don't have access to the population. In this example, we took a sample from a larger dataset that we can treat as the population. Let's compare our results to what they would be if we had resampled from the larger dataset. Here is the 95% confidence interval for the slope when sampling 10,000 times from the entire dataset:</span></span>
-<span id="cb16-209"><a href="#cb16-209" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-210"><a href="#cb16-210" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
-<span id="cb16-211"><a href="#cb16-211" aria-hidden="true" tabindex="-1"></a><span class="co">mpg_pop = sns.load_dataset('mpg')</span></span>
-<span id="cb16-212"><a href="#cb16-212" aria-hidden="true" tabindex="-1"></a><span class="co">theta_est = [estimator(mpg_pop.sample(20)) for i in range(10000)]</span></span>
-<span id="cb16-213"><a href="#cb16-213" aria-hidden="true" tabindex="-1"></a><span class="co">print(bootstrap_ci(theta_est))</span></span>
-<span id="cb16-214"><a href="#cb16-214" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-215"><a href="#cb16-215" aria-hidden="true" tabindex="-1"></a><span class="co">Visualizing the two distributions:</span></span>
-<span id="cb16-216"><a href="#cb16-216" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
-<span id="cb16-217"><a href="#cb16-217" aria-hidden="true" tabindex="-1"></a><span class="co">thetas = pd.DataFrame({"bs_thetas": bs_thetas, "thetas": theta_est})</span></span>
-<span id="cb16-218"><a href="#cb16-218" aria-hidden="true" tabindex="-1"></a><span class="co">px.histogram(thetas.melt(), x='value', facet_row='variable', </span></span>
-<span id="cb16-219"><a href="#cb16-219" aria-hidden="true" tabindex="-1"></a><span class="co">             title='Distribution of the Slope', width=800)</span></span>
-<span id="cb16-220"><a href="#cb16-220" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-221"><a href="#cb16-221" aria-hidden="true" tabindex="-1"></a><span class="co">Although our bootstrapped sample distribution does not exactly match the sampling distribution of the population, we can see that it is relatively close. This demonstrates the benefit of bootstrapping —— without knowing the actual population distribution, we can still roughly approximate the true slope for the model by using only a single random sample of 20 cars.</span></span>
-<span id="cb16-222"><a href="#cb16-222" aria-hidden="true" tabindex="-1"></a><span class="co">--&gt;</span></span>
-<span id="cb16-223"><a href="#cb16-223" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-224"><a href="#cb16-224" aria-hidden="true" tabindex="-1"></a>Although our bootstrapped sample distribution does not exactly match the sampling distribution of the population, we can see that it is relatively close. This demonstrates the benefit of bootstrapping —— without knowing the actual population distribution, we can still roughly approximate the true slope for the model by using only a single random sample of 20 cars.</span>
-<span id="cb16-225"><a href="#cb16-225" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-226"><a href="#cb16-226" aria-hidden="true" tabindex="-1"></a><span class="co">&lt;!-- #### PurpleAir (chose to skip this section because it's too complex for the amount of pedagogical value it adds)</span></span>
-<span id="cb16-227"><a href="#cb16-227" aria-hidden="true" tabindex="-1"></a><span class="co">To show an example of this hypothesis testing process, we'll work with air quality measurement data. There are 2 common sources of air quality information: Air Quality System (AQS) and [PurpleAir sensors](https://www2.purpleair.com/). AQS is seen as the gold standard because it is high quality, well-calibrated, and publicly available. However, it is very expensive, and the sensors are far apart; reports are also delayed due to extensive calibration.  </span></span>
-<span id="cb16-228"><a href="#cb16-228" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-229"><a href="#cb16-229" aria-hidden="true" tabindex="-1"></a><span class="co">On the other hand, PurpleAir (PA) sensors are much cheaper, easier to install, and has denser coverage (measurements are taken every 2 minutes). Unfortunately, its measurements are much less accurate than AQS. </span></span>
-<span id="cb16-230"><a href="#cb16-230" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-231"><a href="#cb16-231" aria-hidden="true" tabindex="-1"></a><span class="co">For this demo, our goal is to use AQS sensor measurements to improve PurpleAir measurements by training a model that adjusts PA measurements based on AQS measurements</span></span>
-<span id="cb16-232"><a href="#cb16-232" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-233"><a href="#cb16-233" aria-hidden="true" tabindex="-1"></a><span class="co">$$PA \approx \theta_0 + \theta_1 AQS$$</span></span>
-<span id="cb16-234"><a href="#cb16-234" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-235"><a href="#cb16-235" aria-hidden="true" tabindex="-1"></a><span class="co">Using this approximation, we'll invert the model to predict the true air quality from PA measurements</span></span>
-<span id="cb16-236"><a href="#cb16-236" aria-hidden="true" tabindex="-1"></a><span class="co">$$ \text{True Air Quality } \approx -\frac{\theta_0}{\theta_1} + \frac{1}{\theta_1} PA$$</span></span>
-<span id="cb16-237"><a href="#cb16-237" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-238"><a href="#cb16-238" aria-hidden="true" tabindex="-1"></a><span class="co">::: {.callout-tip collapse="false"}</span></span>
-<span id="cb16-239"><a href="#cb16-239" aria-hidden="true" tabindex="-1"></a><span class="al">###</span><span class="co"> Inverse Model Derivation </span></span>
-<span id="cb16-240"><a href="#cb16-240" aria-hidden="true" tabindex="-1"></a><span class="co">Intuitively, AQS measurements are very accurate, so we can treat AQS as the true air quality: </span></span>
-<span id="cb16-241"><a href="#cb16-241" aria-hidden="true" tabindex="-1"></a><span class="co">$AQS = \text{True Air Quality}$</span></span>
-<span id="cb16-242"><a href="#cb16-242" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-243"><a href="#cb16-243" aria-hidden="true" tabindex="-1"></a><span class="co">$$</span></span>
-<span id="cb16-244"><a href="#cb16-244" aria-hidden="true" tabindex="-1"></a><span class="co">\begin{align}</span></span>
-<span id="cb16-245"><a href="#cb16-245" aria-hidden="true" tabindex="-1"></a><span class="co">PA &amp;\approx \theta_0 + \theta_1 AQS \\</span></span>
-<span id="cb16-246"><a href="#cb16-246" aria-hidden="true" tabindex="-1"></a><span class="co">&amp;\approx \theta_0 + \theta_1 \text{True Air Quality} \\</span></span>
-<span id="cb16-247"><a href="#cb16-247" aria-hidden="true" tabindex="-1"></a><span class="co">PA - \theta_0 &amp;\approx + \theta_1 \text{True Air Quality} \\</span></span>
-<span id="cb16-248"><a href="#cb16-248" aria-hidden="true" tabindex="-1"></a><span class="co">\frac{PA - \theta_0}{\theta_1} &amp;\approx \text{True Air Quality} \\</span></span>
-<span id="cb16-249"><a href="#cb16-249" aria-hidden="true" tabindex="-1"></a><span class="co">\text{True Air Quality } &amp;\approx -\frac{\theta_0}{\theta_1} + \frac{1}{\theta_1} PA </span></span>
-<span id="cb16-250"><a href="#cb16-250" aria-hidden="true" tabindex="-1"></a><span class="co">\end{align}</span></span>
-<span id="cb16-251"><a href="#cb16-251" aria-hidden="true" tabindex="-1"></a><span class="co">$$</span></span>
-<span id="cb16-252"><a href="#cb16-252" aria-hidden="true" tabindex="-1"></a><span class="co">:::</span></span>
-<span id="cb16-253"><a href="#cb16-253" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-254"><a href="#cb16-254" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
-<span id="cb16-255"><a href="#cb16-255" aria-hidden="true" tabindex="-1"></a><span class="co">import numpy as np</span></span>
-<span id="cb16-256"><a href="#cb16-256" aria-hidden="true" tabindex="-1"></a><span class="co">import pandas as pd</span></span>
-<span id="cb16-257"><a href="#cb16-257" aria-hidden="true" tabindex="-1"></a><span class="co">import matplotlib</span></span>
-<span id="cb16-258"><a href="#cb16-258" aria-hidden="true" tabindex="-1"></a><span class="co">import matplotlib.pyplot as plt</span></span>
-<span id="cb16-259"><a href="#cb16-259" aria-hidden="true" tabindex="-1"></a><span class="co">import seaborn as sns</span></span>
-<span id="cb16-260"><a href="#cb16-260" aria-hidden="true" tabindex="-1"></a><span class="co">import sklearn.linear_model as lm</span></span>
-<span id="cb16-261"><a href="#cb16-261" aria-hidden="true" tabindex="-1"></a><span class="co">from sklearn.linear_model import LinearRegression</span></span>
-<span id="cb16-262"><a href="#cb16-262" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-263"><a href="#cb16-263" aria-hidden="true" tabindex="-1"></a><span class="co"># big font helper</span></span>
-<span id="cb16-264"><a href="#cb16-264" aria-hidden="true" tabindex="-1"></a><span class="co">def adjust_fontsize(size=None):</span></span>
-<span id="cb16-265"><a href="#cb16-265" aria-hidden="true" tabindex="-1"></a><span class="co">    SMALL_SIZE = 8</span></span>
-<span id="cb16-266"><a href="#cb16-266" aria-hidden="true" tabindex="-1"></a><span class="co">    MEDIUM_SIZE = 10</span></span>
-<span id="cb16-267"><a href="#cb16-267" aria-hidden="true" tabindex="-1"></a><span class="co">    BIGGER_SIZE = 12</span></span>
-<span id="cb16-268"><a href="#cb16-268" aria-hidden="true" tabindex="-1"></a><span class="co">    if size != None:</span></span>
-<span id="cb16-269"><a href="#cb16-269" aria-hidden="true" tabindex="-1"></a><span class="co">        SMALL_SIZE = MEDIUM_SIZE = BIGGER_SIZE = size</span></span>
-<span id="cb16-270"><a href="#cb16-270" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-271"><a href="#cb16-271" aria-hidden="true" tabindex="-1"></a><span class="co">    plt.rc('font', size=SMALL_SIZE)          # controls default text sizes</span></span>
-<span id="cb16-272"><a href="#cb16-272" aria-hidden="true" tabindex="-1"></a><span class="co">    plt.rc('axes', titlesize=SMALL_SIZE)     # fontsize of the axes title</span></span>
-<span id="cb16-273"><a href="#cb16-273" aria-hidden="true" tabindex="-1"></a><span class="co">    plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels</span></span>
-<span id="cb16-274"><a href="#cb16-274" aria-hidden="true" tabindex="-1"></a><span class="co">    plt.rc('xtick', labelsize=SMALL_SIZE)    # fontsize of the tick labels</span></span>
-<span id="cb16-275"><a href="#cb16-275" aria-hidden="true" tabindex="-1"></a><span class="co">    plt.rc('ytick', labelsize=SMALL_SIZE)    # fontsize of the tick labels</span></span>
-<span id="cb16-276"><a href="#cb16-276" aria-hidden="true" tabindex="-1"></a><span class="co">    plt.rc('legend', fontsize=SMALL_SIZE)    # legend fontsize</span></span>
-<span id="cb16-277"><a href="#cb16-277" aria-hidden="true" tabindex="-1"></a><span class="co">    plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title</span></span>
-<span id="cb16-278"><a href="#cb16-278" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-279"><a href="#cb16-279" aria-hidden="true" tabindex="-1"></a><span class="co">plt.style.use('fivethirtyeight')</span></span>
-<span id="cb16-280"><a href="#cb16-280" aria-hidden="true" tabindex="-1"></a><span class="co">sns.set_context("talk")</span></span>
-<span id="cb16-281"><a href="#cb16-281" aria-hidden="true" tabindex="-1"></a><span class="co">sns.set_theme()</span></span>
-<span id="cb16-282"><a href="#cb16-282" aria-hidden="true" tabindex="-1"></a><span class="co">#plt.style.use('default') # revert style to default mpl</span></span>
-<span id="cb16-283"><a href="#cb16-283" aria-hidden="true" tabindex="-1"></a><span class="co">adjust_fontsize(size=20)</span></span>
-<span id="cb16-284"><a href="#cb16-284" aria-hidden="true" tabindex="-1"></a><span class="co">%matplotlib inline</span></span>
-<span id="cb16-285"><a href="#cb16-285" aria-hidden="true" tabindex="-1"></a><span class="co">csv_file = 'data/Full24hrdataset.csv'</span></span>
-<span id="cb16-286"><a href="#cb16-286" aria-hidden="true" tabindex="-1"></a><span class="co">usecols = ['Date', 'ID', 'region', 'PM25FM', 'PM25cf1', 'TempC', 'RH', 'Dewpoint']</span></span>
-<span id="cb16-287"><a href="#cb16-287" aria-hidden="true" tabindex="-1"></a><span class="co">full_df = (pd.read_csv(csv_file, usecols=usecols, parse_dates=['Date'])</span></span>
-<span id="cb16-288"><a href="#cb16-288" aria-hidden="true" tabindex="-1"></a><span class="co">        .dropna())</span></span>
-<span id="cb16-289"><a href="#cb16-289" aria-hidden="true" tabindex="-1"></a><span class="co">full_df.columns = ['date', 'id', 'region', 'pm25aqs', 'pm25pa', 'temp', 'rh', 'dew']</span></span>
-<span id="cb16-290"><a href="#cb16-290" aria-hidden="true" tabindex="-1"></a><span class="co">full_df = full_df.loc[(full_df['pm25aqs'] &lt; 50)]</span></span>
-<span id="cb16-291"><a href="#cb16-291" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-292"><a href="#cb16-292" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-293"><a href="#cb16-293" aria-hidden="true" tabindex="-1"></a><span class="co">bad_dates = ['2019-08-21', '2019-08-22', '2019-09-24']</span></span>
-<span id="cb16-294"><a href="#cb16-294" aria-hidden="true" tabindex="-1"></a><span class="co">GA = full_df.loc[(full_df['id'] == 'GA1') &amp; (~full_df['date'].isin(bad_dates)) , :]</span></span>
-<span id="cb16-295"><a href="#cb16-295" aria-hidden="true" tabindex="-1"></a><span class="co">AQS, PA = GA[['pm25aqs']], GA['pm25pa']</span></span>
-<span id="cb16-296"><a href="#cb16-296" aria-hidden="true" tabindex="-1"></a><span class="co">AQS.head()</span></span>
-<span id="cb16-297"><a href="#cb16-297" aria-hidden="true" tabindex="-1"></a><span class="co">pd.DataFrame(PA).head()</span></span>
-<span id="cb16-298"><a href="#cb16-298" aria-hidden="true" tabindex="-1"></a><span class="co">--&gt;</span></span>
-<span id="cb16-299"><a href="#cb16-299" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-300"><a href="#cb16-300" aria-hidden="true" tabindex="-1"></a><span class="fu">## Collinearity</span></span>
-<span id="cb16-301"><a href="#cb16-301" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-302"><a href="#cb16-302" aria-hidden="true" tabindex="-1"></a><span class="fu">### Hypothesis Testing Through Bootstrap: Snowy Plover Demo</span></span>
-<span id="cb16-303"><a href="#cb16-303" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-304"><a href="#cb16-304" aria-hidden="true" tabindex="-1"></a>We can conduct the hypothesis testing described earlier through **bootstrapping** (this equivalence can be proven through the <span class="co">[</span><span class="ot">duality argument</span><span class="co">](https://stats.stackexchange.com/questions/179902/confidence-interval-p-value-duality-vs-frequentist-interpretation-of-cis)</span>, which is out of scope for this class). We use bootstrapping to compute approximate 95% confidence intervals for each $\theta_i$. If the interval doesn't contain 0, we reject the null hypothesis at the p=5% level. Otherwise, the data is consistent with the null, as the true parameter *could possibly* be 0.</span>
-<span id="cb16-305"><a href="#cb16-305" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-306"><a href="#cb16-306" aria-hidden="true" tabindex="-1"></a>To show an example of this hypothesis testing process, we'll work with the <span class="co">[</span><span class="ot">snowy plover</span><span class="co">](https://www.audubon.org/field-guide/bird/snowy-plover)</span> dataset throughout this section. The data are about the eggs and newly hatched chicks of the Snowy Plover. The data were collected at the Point Reyes National Seashore by a former <span class="co">[</span><span class="ot">student at Berkeley</span><span class="co">](https://openlibrary.org/books/OL2038693M/BLSS_the_Berkeley_interactive_statistical_system)</span>. Here's a <span class="co">[</span><span class="ot">parent bird and some eggs</span><span class="co">](http://cescos.fau.edu/jay/eps/articles/snowyplover.html)</span>.</span>
-<span id="cb16-307"><a href="#cb16-307" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-308"><a href="#cb16-308" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;p</span> <span class="er">align</span><span class="ot">=</span><span class="st">"center"</span><span class="kw">&gt;</span></span>
-<span id="cb16-309"><a href="#cb16-309" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;img</span> <span class="er">src</span><span class="ot">=</span><span class="st">"images/plover_eggs.jpg"</span> <span class="er">alt</span><span class="ot">=</span><span class="st">'bvt'</span> <span class="er">width</span><span class="ot">=</span><span class="st">'550'</span><span class="kw">&gt;</span></span>
-<span id="cb16-310"><a href="#cb16-310" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;/p&gt;</span></span>
-<span id="cb16-311"><a href="#cb16-311" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-312"><a href="#cb16-312" aria-hidden="true" tabindex="-1"></a>Note that <span class="in">`Egg Length`</span> and <span class="in">`Egg Breadth`</span> (widest diameter) are measured in millimeters, and <span class="in">`Egg Weight`</span> and <span class="in">`Bird Weight`</span> are measured in grams. For reference, a standard paper clip weighs about one gram.</span>
-<span id="cb16-313"><a href="#cb16-313" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-316"><a href="#cb16-316" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb16-317"><a href="#cb16-317" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
-<span id="cb16-318"><a href="#cb16-318" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
-<span id="cb16-319"><a href="#cb16-319" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span>
-<span id="cb16-320"><a href="#cb16-320" aria-hidden="true" tabindex="-1"></a>eggs <span class="op">=</span> pd.read_csv(<span class="st">"data/snowy_plover.csv"</span>)</span>
-<span id="cb16-321"><a href="#cb16-321" aria-hidden="true" tabindex="-1"></a>eggs.head(<span class="dv">5</span>)</span>
-<span id="cb16-322"><a href="#cb16-322" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb16-323"><a href="#cb16-323" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-324"><a href="#cb16-324" aria-hidden="true" tabindex="-1"></a>Our goal will be to predict the weight of a newborn plover chick, which we assume follows the true relationship $Y = f_{\theta}(x)$ below.</span>
-<span id="cb16-325"><a href="#cb16-325" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-326"><a href="#cb16-326" aria-hidden="true" tabindex="-1"></a>$$\text{bird<span class="sc">\_</span>weight} = \theta_0 + \theta_1 \text{egg<span class="sc">\_</span>weight} + \theta_2 \text{egg<span class="sc">\_</span>length} + \theta_3 \text{egg<span class="sc">\_</span>breadth} + \epsilon$$</span>
-<span id="cb16-327"><a href="#cb16-327" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-328"><a href="#cb16-328" aria-hidden="true" tabindex="-1"></a>Note that for each $i$, the parameter $\theta_i$ is a fixed number, but it is unobservable. We can only estimate it. The random error $\epsilon$ is also unobservable, but it is assumed to have expectation 0 and be independent and identically distributed across eggs.</span>
-<span id="cb16-329"><a href="#cb16-329" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-330"><a href="#cb16-330" aria-hidden="true" tabindex="-1"></a>Say we wish to determine if the <span class="in">`egg_weight`</span> impacts the <span class="in">`bird_weight`</span> of a chick – we want to infer if $\theta_1$ is equal to 0.</span>
-<span id="cb16-331"><a href="#cb16-331" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-332"><a href="#cb16-332" aria-hidden="true" tabindex="-1"></a>First, we define our hypotheses:</span>
-<span id="cb16-333"><a href="#cb16-333" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-334"><a href="#cb16-334" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>**Null hypothesis**: the true parameter $\theta_1$ is 0; any variation is due to random chance.</span>
-<span id="cb16-335"><a href="#cb16-335" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>**Alternative hypothesis**: the true parameter $\theta_1$ is not 0.</span>
-<span id="cb16-336"><a href="#cb16-336" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-337"><a href="#cb16-337" aria-hidden="true" tabindex="-1"></a>Next, we use our data to fit a model $\hat{Y} = f_{\hat{\theta}}(x)$ that approximates the relationship above. This gives us the **observed value** of $\hat{\theta}_1$ from our data.</span>
-<span id="cb16-338"><a href="#cb16-338" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-341"><a href="#cb16-341" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb16-342"><a href="#cb16-342" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb16-343"><a href="#cb16-343" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
-<span id="cb16-344"><a href="#cb16-344" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> sklearn.linear_model <span class="im">import</span> LinearRegression</span>
-<span id="cb16-345"><a href="#cb16-345" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
-<span id="cb16-346"><a href="#cb16-346" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-347"><a href="#cb16-347" aria-hidden="true" tabindex="-1"></a>X <span class="op">=</span> eggs[[<span class="st">"egg_weight"</span>, <span class="st">"egg_length"</span>, <span class="st">"egg_breadth"</span>]]</span>
-<span id="cb16-348"><a href="#cb16-348" aria-hidden="true" tabindex="-1"></a>Y <span class="op">=</span> eggs[<span class="st">"bird_weight"</span>]</span>
-<span id="cb16-349"><a href="#cb16-349" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-350"><a href="#cb16-350" aria-hidden="true" tabindex="-1"></a>model <span class="op">=</span> LinearRegression()</span>
-<span id="cb16-351"><a href="#cb16-351" aria-hidden="true" tabindex="-1"></a>model.fit(X, Y)</span>
-<span id="cb16-352"><a href="#cb16-352" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-353"><a href="#cb16-353" aria-hidden="true" tabindex="-1"></a><span class="co"># This gives an array containing the fitted model parameter estimates</span></span>
-<span id="cb16-354"><a href="#cb16-354" aria-hidden="true" tabindex="-1"></a>thetas <span class="op">=</span> model.coef_</span>
-<span id="cb16-355"><a href="#cb16-355" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-356"><a href="#cb16-356" aria-hidden="true" tabindex="-1"></a><span class="co"># Put the parameter estimates in a nice table for viewing</span></span>
-<span id="cb16-357"><a href="#cb16-357" aria-hidden="true" tabindex="-1"></a>display(pd.DataFrame(</span>
-<span id="cb16-358"><a href="#cb16-358" aria-hidden="true" tabindex="-1"></a>  [model.intercept_] <span class="op">+</span> <span class="bu">list</span>(model.coef_),</span>
-<span id="cb16-359"><a href="#cb16-359" aria-hidden="true" tabindex="-1"></a>  columns<span class="op">=</span>[<span class="st">'theta_hat'</span>],</span>
-<span id="cb16-360"><a href="#cb16-360" aria-hidden="true" tabindex="-1"></a>  index<span class="op">=</span>[<span class="st">'intercept'</span>, <span class="st">'egg_weight'</span>, <span class="st">'egg_length'</span>, <span class="st">'egg_breadth'</span>]</span>
-<span id="cb16-361"><a href="#cb16-361" aria-hidden="true" tabindex="-1"></a>))</span>
-<span id="cb16-362"><a href="#cb16-362" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-363"><a href="#cb16-363" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="st">"RMSE"</span>, np.mean((Y <span class="op">-</span> model.predict(X)) <span class="op">**</span> <span class="dv">2</span>))</span>
-<span id="cb16-364"><a href="#cb16-364" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb16-365"><a href="#cb16-365" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-366"><a href="#cb16-366" aria-hidden="true" tabindex="-1"></a>Our single sample of data gives us the value of $\hat{\theta}_1=0.431$. To get a sense of how this estimate might vary if we were to draw different random samples, we will use <span class="co">[</span><span class="ot">bootstrapping</span><span class="co">](https://inferentialthinking.com/chapters/13/2/Bootstrap.html?)</span>. As a refresher, to construct a bootstrap sample, we will draw a resample from the collected data that:</span>
-<span id="cb16-367"><a href="#cb16-367" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-368"><a href="#cb16-368" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Has the same sample size as the collected data</span>
-<span id="cb16-369"><a href="#cb16-369" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Is drawn with replacement (this ensures that we don't draw the exact same sample every time!)</span>
-<span id="cb16-370"><a href="#cb16-370" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-371"><a href="#cb16-371" aria-hidden="true" tabindex="-1"></a>We draw a bootstrap sample, use this sample to fit a model, and record the result for $\hat{\theta}_1$ on this bootstrapped sample. We then repeat this process many times to generate a **bootstrapped empirical distribution** of $\hat{\theta}_1$. This gives us an estimate of what the true distribution of $\hat{\theta}_1$ across all possible samples might look like.</span>
-<span id="cb16-372"><a href="#cb16-372" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-375"><a href="#cb16-375" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb16-376"><a href="#cb16-376" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb16-377"><a href="#cb16-377" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
-<span id="cb16-378"><a href="#cb16-378" aria-hidden="true" tabindex="-1"></a><span class="co"># Set a random seed so you generate the same random sample as staff</span></span>
-<span id="cb16-379"><a href="#cb16-379" aria-hidden="true" tabindex="-1"></a><span class="co"># In the "real world", we wouldn't do this</span></span>
-<span id="cb16-380"><a href="#cb16-380" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
-<span id="cb16-381"><a href="#cb16-381" aria-hidden="true" tabindex="-1"></a>np.random.seed(<span class="dv">1337</span>)</span>
-<span id="cb16-382"><a href="#cb16-382" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-383"><a href="#cb16-383" aria-hidden="true" tabindex="-1"></a><span class="co"># Set the sample size of each bootstrap sample</span></span>
-<span id="cb16-384"><a href="#cb16-384" aria-hidden="true" tabindex="-1"></a>n <span class="op">=</span> <span class="bu">len</span>(eggs)</span>
-<span id="cb16-385"><a href="#cb16-385" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-386"><a href="#cb16-386" aria-hidden="true" tabindex="-1"></a><span class="co"># Create a list to store all the bootstrapped estimates</span></span>
-<span id="cb16-387"><a href="#cb16-387" aria-hidden="true" tabindex="-1"></a>estimates <span class="op">=</span> []</span>
-<span id="cb16-388"><a href="#cb16-388" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-389"><a href="#cb16-389" aria-hidden="true" tabindex="-1"></a><span class="co"># Generate a bootstrap resample from `eggs` and find an estimate for theta_1 using this sample. </span></span>
-<span id="cb16-390"><a href="#cb16-390" aria-hidden="true" tabindex="-1"></a><span class="co"># Repeat 10000 times.</span></span>
-<span id="cb16-391"><a href="#cb16-391" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> i <span class="kw">in</span> <span class="bu">range</span>(<span class="dv">10000</span>):</span>
-<span id="cb16-392"><a href="#cb16-392" aria-hidden="true" tabindex="-1"></a>    <span class="co"># draw a bootstrap sample</span></span>
-<span id="cb16-393"><a href="#cb16-393" aria-hidden="true" tabindex="-1"></a>    bootstrap_resample <span class="op">=</span> eggs.sample(n, replace<span class="op">=</span><span class="va">True</span>)</span>
-<span id="cb16-394"><a href="#cb16-394" aria-hidden="true" tabindex="-1"></a>    X_bootstrap <span class="op">=</span> bootstrap_resample[[<span class="st">"egg_weight"</span>, <span class="st">"egg_length"</span>, <span class="st">"egg_breadth"</span>]]</span>
-<span id="cb16-395"><a href="#cb16-395" aria-hidden="true" tabindex="-1"></a>    Y_bootstrap <span class="op">=</span> bootstrap_resample[<span class="st">"bird_weight"</span>]</span>
-<span id="cb16-396"><a href="#cb16-396" aria-hidden="true" tabindex="-1"></a>    </span>
-<span id="cb16-397"><a href="#cb16-397" aria-hidden="true" tabindex="-1"></a>    <span class="co"># use bootstrapped sample to fit a model</span></span>
-<span id="cb16-398"><a href="#cb16-398" aria-hidden="true" tabindex="-1"></a>    bootstrap_model <span class="op">=</span> LinearRegression()</span>
-<span id="cb16-399"><a href="#cb16-399" aria-hidden="true" tabindex="-1"></a>    bootstrap_model.fit(X_bootstrap, Y_bootstrap)</span>
-<span id="cb16-400"><a href="#cb16-400" aria-hidden="true" tabindex="-1"></a>    bootstrap_thetas <span class="op">=</span> bootstrap_model.coef_</span>
-<span id="cb16-401"><a href="#cb16-401" aria-hidden="true" tabindex="-1"></a>    </span>
-<span id="cb16-402"><a href="#cb16-402" aria-hidden="true" tabindex="-1"></a>    <span class="co"># record the result for theta_1</span></span>
-<span id="cb16-403"><a href="#cb16-403" aria-hidden="true" tabindex="-1"></a>    estimates.append(bootstrap_thetas[<span class="dv">0</span>])</span>
-<span id="cb16-404"><a href="#cb16-404" aria-hidden="true" tabindex="-1"></a>    </span>
-<span id="cb16-405"><a href="#cb16-405" aria-hidden="true" tabindex="-1"></a><span class="co"># calculate the 95% confidence interval </span></span>
-<span id="cb16-406"><a href="#cb16-406" aria-hidden="true" tabindex="-1"></a>lower <span class="op">=</span> np.percentile(estimates, <span class="fl">2.5</span>, axis<span class="op">=</span><span class="dv">0</span>)</span>
-<span id="cb16-407"><a href="#cb16-407" aria-hidden="true" tabindex="-1"></a>upper <span class="op">=</span> np.percentile(estimates, <span class="fl">97.5</span>, axis<span class="op">=</span><span class="dv">0</span>)</span>
-<span id="cb16-408"><a href="#cb16-408" aria-hidden="true" tabindex="-1"></a>conf_interval <span class="op">=</span> (lower, upper)</span>
-<span id="cb16-409"><a href="#cb16-409" aria-hidden="true" tabindex="-1"></a>conf_interval</span>
-<span id="cb16-410"><a href="#cb16-410" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb16-411"><a href="#cb16-411" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-412"><a href="#cb16-412" aria-hidden="true" tabindex="-1"></a>Our bootstrapped 95% confidence interval for $\theta_1$ is $<span class="co">[</span><span class="ot">-0.259, 1.103</span><span class="co">]</span>$. Immediately, we can see that 0 *is* indeed contained in this interval – this means that we *cannot* conclude that $\theta_1$ is non-zero! More formally, we fail to reject the null hypothesis (that $\theta_1$ is 0) under a 5% p-value cutoff. </span>
-<span id="cb16-413"><a href="#cb16-413" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-414"><a href="#cb16-414" aria-hidden="true" tabindex="-1"></a>We can repeat this process to construct 95% confidence intervals for the other parameters of the model.</span>
-<span id="cb16-415"><a href="#cb16-415" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-418"><a href="#cb16-418" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb16-419"><a href="#cb16-419" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
-<span id="cb16-420"><a href="#cb16-420" aria-hidden="true" tabindex="-1"></a>np.random.seed(<span class="dv">1337</span>)</span>
-<span id="cb16-421"><a href="#cb16-421" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-422"><a href="#cb16-422" aria-hidden="true" tabindex="-1"></a>theta_0_estimates <span class="op">=</span> []</span>
-<span id="cb16-423"><a href="#cb16-423" aria-hidden="true" tabindex="-1"></a>theta_1_estimates <span class="op">=</span> []</span>
-<span id="cb16-424"><a href="#cb16-424" aria-hidden="true" tabindex="-1"></a>theta_2_estimates <span class="op">=</span> []</span>
-<span id="cb16-425"><a href="#cb16-425" aria-hidden="true" tabindex="-1"></a>theta_3_estimates <span class="op">=</span> []</span>
-<span id="cb16-426"><a href="#cb16-426" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-427"><a href="#cb16-427" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-428"><a href="#cb16-428" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> i <span class="kw">in</span> <span class="bu">range</span>(<span class="dv">10000</span>):</span>
-<span id="cb16-429"><a href="#cb16-429" aria-hidden="true" tabindex="-1"></a>    bootstrap_resample <span class="op">=</span> eggs.sample(n, replace<span class="op">=</span><span class="va">True</span>)</span>
-<span id="cb16-430"><a href="#cb16-430" aria-hidden="true" tabindex="-1"></a>    X_bootstrap <span class="op">=</span> bootstrap_resample[[<span class="st">"egg_weight"</span>, <span class="st">"egg_length"</span>, <span class="st">"egg_breadth"</span>]]</span>
-<span id="cb16-431"><a href="#cb16-431" aria-hidden="true" tabindex="-1"></a>    Y_bootstrap <span class="op">=</span> bootstrap_resample[<span class="st">"bird_weight"</span>]</span>
-<span id="cb16-432"><a href="#cb16-432" aria-hidden="true" tabindex="-1"></a>    </span>
-<span id="cb16-433"><a href="#cb16-433" aria-hidden="true" tabindex="-1"></a>    bootstrap_model <span class="op">=</span> LinearRegression()</span>
-<span id="cb16-434"><a href="#cb16-434" aria-hidden="true" tabindex="-1"></a>    bootstrap_model.fit(X_bootstrap, Y_bootstrap)</span>
-<span id="cb16-435"><a href="#cb16-435" aria-hidden="true" tabindex="-1"></a>    bootstrap_theta_0 <span class="op">=</span> bootstrap_model.intercept_</span>
-<span id="cb16-436"><a href="#cb16-436" aria-hidden="true" tabindex="-1"></a>    bootstrap_theta_1, bootstrap_theta_2, bootstrap_theta_3 <span class="op">=</span> bootstrap_model.coef_</span>
-<span id="cb16-437"><a href="#cb16-437" aria-hidden="true" tabindex="-1"></a>    </span>
-<span id="cb16-438"><a href="#cb16-438" aria-hidden="true" tabindex="-1"></a>    theta_0_estimates.append(bootstrap_theta_0)</span>
-<span id="cb16-439"><a href="#cb16-439" aria-hidden="true" tabindex="-1"></a>    theta_1_estimates.append(bootstrap_theta_1)</span>
-<span id="cb16-440"><a href="#cb16-440" aria-hidden="true" tabindex="-1"></a>    theta_2_estimates.append(bootstrap_theta_2)</span>
-<span id="cb16-441"><a href="#cb16-441" aria-hidden="true" tabindex="-1"></a>    theta_3_estimates.append(bootstrap_theta_3)</span>
-<span id="cb16-442"><a href="#cb16-442" aria-hidden="true" tabindex="-1"></a>    </span>
-<span id="cb16-443"><a href="#cb16-443" aria-hidden="true" tabindex="-1"></a>theta_0_lower, theta_0_upper <span class="op">=</span> np.percentile(theta_0_estimates, <span class="fl">2.5</span>), np.percentile(theta_0_estimates, <span class="fl">97.5</span>)</span>
-<span id="cb16-444"><a href="#cb16-444" aria-hidden="true" tabindex="-1"></a>theta_1_lower, theta_1_upper <span class="op">=</span> np.percentile(theta_1_estimates, <span class="fl">2.5</span>), np.percentile(theta_1_estimates, <span class="fl">97.5</span>)</span>
-<span id="cb16-445"><a href="#cb16-445" aria-hidden="true" tabindex="-1"></a>theta_2_lower, theta_2_upper <span class="op">=</span> np.percentile(theta_2_estimates, <span class="fl">2.5</span>), np.percentile(theta_2_estimates, <span class="fl">97.5</span>)</span>
-<span id="cb16-446"><a href="#cb16-446" aria-hidden="true" tabindex="-1"></a>theta_3_lower, theta_3_upper <span class="op">=</span> np.percentile(theta_3_estimates, <span class="fl">2.5</span>), np.percentile(theta_3_estimates, <span class="fl">97.5</span>)</span>
-<span id="cb16-447"><a href="#cb16-447" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-448"><a href="#cb16-448" aria-hidden="true" tabindex="-1"></a><span class="co"># Make a nice table to view results</span></span>
-<span id="cb16-449"><a href="#cb16-449" aria-hidden="true" tabindex="-1"></a>pd.DataFrame({<span class="st">"lower"</span>:[theta_0_lower, theta_1_lower, theta_2_lower, theta_3_lower], <span class="st">"upper"</span>:[theta_0_upper, <span class="op">\</span></span>
-<span id="cb16-450"><a href="#cb16-450" aria-hidden="true" tabindex="-1"></a>                theta_1_upper, theta_2_upper, theta_3_upper]}, index<span class="op">=</span>[<span class="st">"theta_0"</span>, <span class="st">"theta_1"</span>, <span class="st">"theta_2"</span>, <span class="st">"theta_3"</span>])</span>
-<span id="cb16-451"><a href="#cb16-451" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb16-452"><a href="#cb16-452" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-453"><a href="#cb16-453" aria-hidden="true" tabindex="-1"></a>Something's off here. Notice that 0 is included in the 95% confidence interval for *every* parameter of the model. Using the interpretation we outlined above, this would suggest that we can't say for certain that *any* of the input variables impact the response variable! This makes it seem like our model can't make any predictions – and yet, each model we fit in our bootstrap experiment above could very much make predictions of $Y$. </span>
-<span id="cb16-454"><a href="#cb16-454" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-455"><a href="#cb16-455" aria-hidden="true" tabindex="-1"></a>How can we explain this result? Think back to how we first interpreted the parameters of a linear model. We treated each $\theta_i$ as a slope, where a unit increase in $x_i$ leads to a $\theta_i$ increase in $Y$, **if all other variables are held constant**. It turns out that this last assumption is very important. If variables in our model are somehow related to one another, then it might not be possible to have a change in one of them while holding the others constant. This means that our interpretation framework is no longer valid! In the models we fit above, we incorporated <span class="in">`egg_length`</span>, <span class="in">`egg_breadth`</span>, and <span class="in">`egg_weight`</span> as input variables. These variables are very likely related to one another – an egg with large <span class="in">`egg_length`</span> and <span class="in">`egg_breadth`</span> will likely be heavy in <span class="in">`egg_weight`</span>. This means that the model parameters cannot be meaningfully interpreted as slopes. </span>
-<span id="cb16-456"><a href="#cb16-456" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-457"><a href="#cb16-457" aria-hidden="true" tabindex="-1"></a>To support this conclusion, we can visualize the relationships between our feature variables. Notice the strong positive association between the features.</span>
-<span id="cb16-458"><a href="#cb16-458" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-461"><a href="#cb16-461" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb16-462"><a href="#cb16-462" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
-<span id="cb16-463"><a href="#cb16-463" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> seaborn <span class="im">as</span> sns</span>
-<span id="cb16-464"><a href="#cb16-464" aria-hidden="true" tabindex="-1"></a>sns.pairplot(eggs[[<span class="st">"egg_length"</span>, <span class="st">"egg_breadth"</span>, <span class="st">"egg_weight"</span>, <span class="st">'bird_weight'</span>]])<span class="op">;</span></span>
-<span id="cb16-465"><a href="#cb16-465" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb16-466"><a href="#cb16-466" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-467"><a href="#cb16-467" aria-hidden="true" tabindex="-1"></a>This issue is known as **collinearity**, sometimes also called **multicollinearity**. Collinearity occurs when one feature can be predicted fairly accurately by a linear combination of the other features, which happens when one feature is highly correlated with the others. </span>
-<span id="cb16-468"><a href="#cb16-468" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-469"><a href="#cb16-469" aria-hidden="true" tabindex="-1"></a>Why is collinearity a problem? Its consequences span several aspects of the modeling process:</span>
-<span id="cb16-470"><a href="#cb16-470" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-471"><a href="#cb16-471" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>**Inference**: Slopes can't be interpreted for an inference task.</span>
-<span id="cb16-472"><a href="#cb16-472" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>**Model Variance**: If features strongly influence one another, even small changes in the sampled data can lead to large changes in the estimated slopes.</span>
-<span id="cb16-473"><a href="#cb16-473" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>**Unique Solution**: If one feature is a linear combination of the other features, the design matrix will not be full rank, and $\mathbb{X}^{\top}\mathbb{X}$ is not invertible. This means that least squares does not have a unique solution. See <span class="co">[</span><span class="ot">this section</span><span class="co">](https://ds100.org/course-notes/ols/ols.html#bonus-uniqueness-of-the-solution)</span> of Course Note 12 for more on this.</span>
-<span id="cb16-474"><a href="#cb16-474" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-475"><a href="#cb16-475" aria-hidden="true" tabindex="-1"></a>The take-home point is that we need to be careful with what features we select for modeling. If two features likely encode similar information, it is often a good idea to choose only one of them as an input variable.</span>
-<span id="cb16-476"><a href="#cb16-476" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-477"><a href="#cb16-477" aria-hidden="true" tabindex="-1"></a><span class="fu">### A Simpler Model</span></span>
-<span id="cb16-478"><a href="#cb16-478" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-479"><a href="#cb16-479" aria-hidden="true" tabindex="-1"></a>Let us now consider a more interpretable model: we instead assume a true relationship using only egg weight:</span>
-<span id="cb16-480"><a href="#cb16-480" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-481"><a href="#cb16-481" aria-hidden="true" tabindex="-1"></a>$$f_\theta(x) = \theta_0 + \theta_1 \text{egg<span class="sc">\_</span>weight} + \epsilon$$</span>
-<span id="cb16-482"><a href="#cb16-482" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-485"><a href="#cb16-485" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb16-486"><a href="#cb16-486" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
-<span id="cb16-487"><a href="#cb16-487" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> sklearn.linear_model <span class="im">import</span> LinearRegression</span>
-<span id="cb16-488"><a href="#cb16-488" aria-hidden="true" tabindex="-1"></a>X_int <span class="op">=</span> eggs[[<span class="st">"egg_weight"</span>]]</span>
-<span id="cb16-489"><a href="#cb16-489" aria-hidden="true" tabindex="-1"></a>Y_int <span class="op">=</span> eggs[<span class="st">"bird_weight"</span>]</span>
-<span id="cb16-490"><a href="#cb16-490" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-491"><a href="#cb16-491" aria-hidden="true" tabindex="-1"></a>model_int <span class="op">=</span> LinearRegression()</span>
-<span id="cb16-492"><a href="#cb16-492" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-493"><a href="#cb16-493" aria-hidden="true" tabindex="-1"></a>model_int.fit(X_int, Y_int)</span>
-<span id="cb16-494"><a href="#cb16-494" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-495"><a href="#cb16-495" aria-hidden="true" tabindex="-1"></a><span class="co"># This gives an array containing the fitted model parameter estimates</span></span>
-<span id="cb16-496"><a href="#cb16-496" aria-hidden="true" tabindex="-1"></a>thetas_int <span class="op">=</span> model_int.coef_</span>
-<span id="cb16-497"><a href="#cb16-497" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-498"><a href="#cb16-498" aria-hidden="true" tabindex="-1"></a><span class="co"># Put the parameter estimates in a nice table for viewing</span></span>
-<span id="cb16-499"><a href="#cb16-499" aria-hidden="true" tabindex="-1"></a>pd.DataFrame({<span class="st">"theta_hat"</span>:[model_int.intercept_, thetas_int[<span class="dv">0</span>]]}, index<span class="op">=</span>[<span class="st">"theta_0"</span>, <span class="st">"theta_1"</span>])</span>
-<span id="cb16-500"><a href="#cb16-500" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb16-501"><a href="#cb16-501" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-504"><a href="#cb16-504" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb16-505"><a href="#cb16-505" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
-<span id="cb16-506"><a href="#cb16-506" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
-<span id="cb16-507"><a href="#cb16-507" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> matplotlib.pyplot <span class="im">as</span> plt</span>
-<span id="cb16-508"><a href="#cb16-508" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-509"><a href="#cb16-509" aria-hidden="true" tabindex="-1"></a><span class="co"># Set a random seed so you generate the same random sample as staff</span></span>
-<span id="cb16-510"><a href="#cb16-510" aria-hidden="true" tabindex="-1"></a><span class="co"># In the "real world", we wouldn't do this</span></span>
-<span id="cb16-511"><a href="#cb16-511" aria-hidden="true" tabindex="-1"></a>np.random.seed(<span class="dv">1337</span>)</span>
-<span id="cb16-512"><a href="#cb16-512" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-513"><a href="#cb16-513" aria-hidden="true" tabindex="-1"></a><span class="co"># Set the sample size of each bootstrap sample</span></span>
-<span id="cb16-514"><a href="#cb16-514" aria-hidden="true" tabindex="-1"></a>n <span class="op">=</span> <span class="bu">len</span>(eggs)</span>
-<span id="cb16-515"><a href="#cb16-515" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-516"><a href="#cb16-516" aria-hidden="true" tabindex="-1"></a><span class="co"># Create a list to store all the bootstrapped estimates</span></span>
-<span id="cb16-517"><a href="#cb16-517" aria-hidden="true" tabindex="-1"></a>estimates_int <span class="op">=</span> []</span>
-<span id="cb16-518"><a href="#cb16-518" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-519"><a href="#cb16-519" aria-hidden="true" tabindex="-1"></a><span class="co"># Generate a bootstrap resample from `eggs` and find an estimate for theta_1 using this sample. </span></span>
-<span id="cb16-520"><a href="#cb16-520" aria-hidden="true" tabindex="-1"></a><span class="co"># Repeat 10000 times.</span></span>
-<span id="cb16-521"><a href="#cb16-521" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> i <span class="kw">in</span> <span class="bu">range</span>(<span class="dv">10000</span>):</span>
-<span id="cb16-522"><a href="#cb16-522" aria-hidden="true" tabindex="-1"></a>    bootstrap_resample_int <span class="op">=</span> eggs.sample(n, replace<span class="op">=</span><span class="va">True</span>)</span>
-<span id="cb16-523"><a href="#cb16-523" aria-hidden="true" tabindex="-1"></a>    X_bootstrap_int <span class="op">=</span> bootstrap_resample_int[[<span class="st">"egg_weight"</span>]]</span>
-<span id="cb16-524"><a href="#cb16-524" aria-hidden="true" tabindex="-1"></a>    Y_bootstrap_int <span class="op">=</span> bootstrap_resample_int[<span class="st">"bird_weight"</span>]</span>
-<span id="cb16-525"><a href="#cb16-525" aria-hidden="true" tabindex="-1"></a>    </span>
-<span id="cb16-526"><a href="#cb16-526" aria-hidden="true" tabindex="-1"></a>    bootstrap_model_int <span class="op">=</span> LinearRegression()</span>
-<span id="cb16-527"><a href="#cb16-527" aria-hidden="true" tabindex="-1"></a>    bootstrap_model_int.fit(X_bootstrap_int, Y_bootstrap_int)</span>
-<span id="cb16-528"><a href="#cb16-528" aria-hidden="true" tabindex="-1"></a>    bootstrap_thetas_int <span class="op">=</span> bootstrap_model_int.coef_</span>
-<span id="cb16-529"><a href="#cb16-529" aria-hidden="true" tabindex="-1"></a>    </span>
-<span id="cb16-530"><a href="#cb16-530" aria-hidden="true" tabindex="-1"></a>    estimates_int.append(bootstrap_thetas_int[<span class="dv">0</span>])</span>
-<span id="cb16-531"><a href="#cb16-531" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-532"><a href="#cb16-532" aria-hidden="true" tabindex="-1"></a>plt.figure(dpi<span class="op">=</span><span class="dv">120</span>)</span>
-<span id="cb16-533"><a href="#cb16-533" aria-hidden="true" tabindex="-1"></a>sns.histplot(estimates_int, stat<span class="op">=</span><span class="st">"density"</span>)</span>
-<span id="cb16-534"><a href="#cb16-534" aria-hidden="true" tabindex="-1"></a>plt.xlabel(<span class="vs">r"$\hat{\theta}_1$"</span>)</span>
-<span id="cb16-535"><a href="#cb16-535" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="vs">r"Bootstrapped estimates $\hat{\theta}_1$ Under the Interpretable Model"</span>)<span class="op">;</span></span>
-<span id="cb16-536"><a href="#cb16-536" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb16-537"><a href="#cb16-537" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-538"><a href="#cb16-538" aria-hidden="true" tabindex="-1"></a>Notice how the interpretable model performs almost as well as our other model:</span>
-<span id="cb16-539"><a href="#cb16-539" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-542"><a href="#cb16-542" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb16-543"><a href="#cb16-543" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
-<span id="cb16-544"><a href="#cb16-544" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> sklearn.metrics <span class="im">import</span> mean_squared_error</span>
-<span id="cb16-545"><a href="#cb16-545" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-546"><a href="#cb16-546" aria-hidden="true" tabindex="-1"></a>rmse <span class="op">=</span> mean_squared_error(Y, model.predict(X))</span>
-<span id="cb16-547"><a href="#cb16-547" aria-hidden="true" tabindex="-1"></a>rmse_int <span class="op">=</span> mean_squared_error(Y_int, model_int.predict(X_int))</span>
-<span id="cb16-548"><a href="#cb16-548" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="ss">f'RMSE of Original Model: </span><span class="sc">{</span>rmse<span class="sc">}</span><span class="ss">'</span>)</span>
-<span id="cb16-549"><a href="#cb16-549" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="ss">f'RMSE of Interpretable Model: </span><span class="sc">{</span>rmse_int<span class="sc">}</span><span class="ss">'</span>)</span>
-<span id="cb16-550"><a href="#cb16-550" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb16-551"><a href="#cb16-551" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-552"><a href="#cb16-552" aria-hidden="true" tabindex="-1"></a>Yet, the confidence interval for the true parameter $\theta_{1}$ does not contain zero.</span>
-<span id="cb16-553"><a href="#cb16-553" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-556"><a href="#cb16-556" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb16-557"><a href="#cb16-557" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
-<span id="cb16-558"><a href="#cb16-558" aria-hidden="true" tabindex="-1"></a>lower_int <span class="op">=</span> np.percentile(estimates_int, <span class="fl">2.5</span>)</span>
-<span id="cb16-559"><a href="#cb16-559" aria-hidden="true" tabindex="-1"></a>upper_int <span class="op">=</span> np.percentile(estimates_int, <span class="fl">97.5</span>)</span>
-<span id="cb16-560"><a href="#cb16-560" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-561"><a href="#cb16-561" aria-hidden="true" tabindex="-1"></a>conf_interval_int <span class="op">=</span> (lower_int, upper_int)</span>
-<span id="cb16-562"><a href="#cb16-562" aria-hidden="true" tabindex="-1"></a>conf_interval_int</span>
-<span id="cb16-563"><a href="#cb16-563" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb16-564"><a href="#cb16-564" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-565"><a href="#cb16-565" aria-hidden="true" tabindex="-1"></a>In retrospect, it’s no surprise that the weight of an egg best predicts the weight of a newly-hatched chick.</span>
-<span id="cb16-566"><a href="#cb16-566" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-567"><a href="#cb16-567" aria-hidden="true" tabindex="-1"></a>A model with highly correlated variables prevents us from interpreting how the variables are related to the prediction.</span>
-<span id="cb16-568"><a href="#cb16-568" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-569"><a href="#cb16-569" aria-hidden="true" tabindex="-1"></a><span class="fu">### Reminder: Assumptions Matter</span></span>
-<span id="cb16-570"><a href="#cb16-570" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-571"><a href="#cb16-571" aria-hidden="true" tabindex="-1"></a>Keep the following in mind:</span>
-<span id="cb16-572"><a href="#cb16-572" aria-hidden="true" tabindex="-1"></a>All inference assumes that the regression model holds.</span>
-<span id="cb16-573"><a href="#cb16-573" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-574"><a href="#cb16-574" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>If the model doesn’t hold, the inference might not be valid.</span>
-<span id="cb16-575"><a href="#cb16-575" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>If the <span class="co">[</span><span class="ot">assumptions of the bootstrap</span><span class="co">](https://inferentialthinking.com/chapters/13/3/Confidence_Intervals.html?highlight=p%20value%20confidence%20interval#care-in-using-the-bootstrap-percentile-method)</span> don’t hold…</span>
-<span id="cb16-576"><a href="#cb16-576" aria-hidden="true" tabindex="-1"></a><span class="ss">    * </span>Sample size n is large</span>
-<span id="cb16-577"><a href="#cb16-577" aria-hidden="true" tabindex="-1"></a><span class="ss">    * </span>Sample is representative of population distribution (drawn i.i.d., unbiased)</span>
-<span id="cb16-578"><a href="#cb16-578" aria-hidden="true" tabindex="-1"></a>    </span>
-<span id="cb16-579"><a href="#cb16-579" aria-hidden="true" tabindex="-1"></a>    …then the results of the bootstrap might not be valid.</span>
-<span id="cb16-580"><a href="#cb16-580" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-581"><a href="#cb16-581" aria-hidden="true" tabindex="-1"></a><span class="fu">## [Bonus Content] </span></span>
-<span id="cb16-582"><a href="#cb16-582" aria-hidden="true" tabindex="-1"></a>Note: the content in this section is out of scope.</span>
-<span id="cb16-583"><a href="#cb16-583" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-584"><a href="#cb16-584" aria-hidden="true" tabindex="-1"></a><span class="co">&lt;!-- </span><span class="al">###</span><span class="co"> Correlation vs. Causation</span></span>
-<span id="cb16-585"><a href="#cb16-585" aria-hidden="true" tabindex="-1"></a><span class="co">Let us consider some questions in an arbitrary regression problem. </span></span>
-<span id="cb16-586"><a href="#cb16-586" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-587"><a href="#cb16-587" aria-hidden="true" tabindex="-1"></a><span class="co">What does $\theta_{j}$ mean in our regression?</span></span>
-<span id="cb16-588"><a href="#cb16-588" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-589"><a href="#cb16-589" aria-hidden="true" tabindex="-1"></a><span class="co">* Holding other variables fixed, how much should our prediction change with $X_{j}$?</span></span>
-<span id="cb16-590"><a href="#cb16-590" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-591"><a href="#cb16-591" aria-hidden="true" tabindex="-1"></a><span class="co">For simple linear regression, this boils down to the correlation coefficient</span></span>
-<span id="cb16-592"><a href="#cb16-592" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-593"><a href="#cb16-593" aria-hidden="true" tabindex="-1"></a><span class="co">* Does having more $x$ predict more $y$ (and by how much)? --&gt;</span></span>
-<span id="cb16-594"><a href="#cb16-594" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-595"><a href="#cb16-595" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-596"><a href="#cb16-596" aria-hidden="true" tabindex="-1"></a><span class="fu">### Prediction vs Causation</span></span>
-<span id="cb16-597"><a href="#cb16-597" aria-hidden="true" tabindex="-1"></a>The difference between correlation/prediction vs. causation is best illustrated through examples. </span>
-<span id="cb16-598"><a href="#cb16-598" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-599"><a href="#cb16-599" aria-hidden="true" tabindex="-1"></a>Some questions about **correlation / prediction** include:</span>
-<span id="cb16-600"><a href="#cb16-600" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-601"><a href="#cb16-601" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Are homes with granite countertops worth more money?</span>
-<span id="cb16-602"><a href="#cb16-602" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Is college GPA higher for students who win a certain scholarship?</span>
-<span id="cb16-603"><a href="#cb16-603" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Are breastfed babies less likely to develop asthma?</span>
-<span id="cb16-604"><a href="#cb16-604" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Do cancer patients given some aggressive treatment have a higher 5-year survival rate?</span>
-<span id="cb16-605"><a href="#cb16-605" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Are people who smoke more likely to get cancer? </span>
-<span id="cb16-606"><a href="#cb16-606" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-607"><a href="#cb16-607" aria-hidden="true" tabindex="-1"></a>While these may sound like causal questions, they are not! Questions about **causality** are about the **effects** of **interventions** (not just passive observation). For example:</span>
-<span id="cb16-608"><a href="#cb16-608" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-609"><a href="#cb16-609" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>How much do granite countertops **raise** the value of a house?</span>
-<span id="cb16-610"><a href="#cb16-610" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Does getting the scholarship **improve** students’ GPAs?</span>
-<span id="cb16-611"><a href="#cb16-611" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Does breastfeeding **protect** babies against asthma?</span>
-<span id="cb16-612"><a href="#cb16-612" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Does the treatment **improve** cancer survival?</span>
-<span id="cb16-613"><a href="#cb16-613" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Does smoking **cause** cancer?</span>
-<span id="cb16-614"><a href="#cb16-614" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-615"><a href="#cb16-615" aria-hidden="true" tabindex="-1"></a>Note, however, that regression coefficients are sometimes called “effects”, which can be deceptive!</span>
-<span id="cb16-616"><a href="#cb16-616" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-617"><a href="#cb16-617" aria-hidden="true" tabindex="-1"></a>When using data alone, **predictive questions** (i.e., are breastfed babies healthier?) can be answered, but **causal questions** (i.e., does breastfeeding improve babies’ health?) cannot. The reason for this is that there are many possible causes for our predictive question. For example, possible explanations for why breastfed babies are healthier on average include:</span>
-<span id="cb16-618"><a href="#cb16-618" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-619"><a href="#cb16-619" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>**Causal effect:** breastfeeding makes babies healthier</span>
-<span id="cb16-620"><a href="#cb16-620" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>**Reverse causality:** healthier babies more likely to successfully breastfeed</span>
-<span id="cb16-621"><a href="#cb16-621" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>**Common cause:** healthier / richer parents have healthier babies and are more likely to breastfeed</span>
-<span id="cb16-622"><a href="#cb16-622" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-623"><a href="#cb16-623" aria-hidden="true" tabindex="-1"></a>We cannot tell which explanations are true (or to what extent) just by observing ($x$,$y$) pairs. Additionally, causal questions implicitly involve **counterfactuals**, events that didn't happen. For example, we could ask, **would** the **same** breastfed babies have been less healthy **if** they hadn’t been breastfed? Explanation 1 from above implies they would be, but explanations 2 and 3 do not. </span>
-<span id="cb16-624"><a href="#cb16-624" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-625"><a href="#cb16-625" aria-hidden="true" tabindex="-1"></a><span class="fu">### Confounders</span></span>
-<span id="cb16-626"><a href="#cb16-626" aria-hidden="true" tabindex="-1"></a>Let T represent a treatment (for example, alcohol use) and Y represent an outcome (for example, lung cancer).</span>
-<span id="cb16-627"><a href="#cb16-627" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-628"><a href="#cb16-628" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;img</span> <span class="er">src</span><span class="ot">=</span><span class="st">"images/confounder.png"</span> <span class="er">alt</span><span class="ot">=</span><span class="st">'confounder'</span> <span class="er">width</span><span class="ot">=</span><span class="st">'600'</span><span class="kw">&gt;</span></span>
-<span id="cb16-629"><a href="#cb16-629" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-630"><a href="#cb16-630" aria-hidden="true" tabindex="-1"></a>A **confounder** is a variable that affects both T and Y, distorting the correlation between them. Using the example above, rich parents could be a confounder for breastfeeding and a baby's health. Confounders can be a measured covariate (a feature) or an unmeasured variable we don’t know about, and they generally cause problems, as the relationship between T and Y is affected by data we cannot see. We commonly *assume that all confounders are observed* (this is also called **ignorability**).</span>
-<span id="cb16-631"><a href="#cb16-631" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-632"><a href="#cb16-632" aria-hidden="true" tabindex="-1"></a><span class="fu">### How to perform causal inference?</span></span>
-<span id="cb16-633"><a href="#cb16-633" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-634"><a href="#cb16-634" aria-hidden="true" tabindex="-1"></a>In a **randomized experiment**, participants are randomly assigned into two groups: treatment and control. A treatment is applied *only* to the treatment group. We assume ignorability and gather as many measurements as possible so that we can compare them between the control and treatment groups to determine whether or not the treatment has a true effect or is just a confounding factor. </span>
-<span id="cb16-635"><a href="#cb16-635" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-636"><a href="#cb16-636" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;img</span> <span class="er">src</span><span class="ot">=</span><span class="st">"images/experiment.png"</span> <span class="er">alt</span><span class="ot">=</span><span class="st">'experiment'</span> <span class="er">width</span><span class="ot">=</span><span class="st">'600'</span><span class="kw">&gt;</span></span>
-<span id="cb16-637"><a href="#cb16-637" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-638"><a href="#cb16-638" aria-hidden="true" tabindex="-1"></a>However, often, randomly assigning treatments is impractical or unethical. For example, assigning a treatment of cigarettes to test the effect of smoking on the lungs would not only be impractical but also unethical.</span>
-<span id="cb16-639"><a href="#cb16-639" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-640"><a href="#cb16-640" aria-hidden="true" tabindex="-1"></a>An alternative to bypass this issue is to utilize **observational studies**. This can be done by obtaining two participant groups separated based on some identified treatment variable. Unlike randomized experiments, however, we cannot assume ignorability here: the participants could have separated into two groups based on other covariates! In addition, there could also be unmeasured confounders.</span>
-<span id="cb16-641"><a href="#cb16-641" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-642"><a href="#cb16-642" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;img</span> <span class="er">src</span><span class="ot">=</span><span class="st">"images/observational.png"</span> <span class="er">alt</span><span class="ot">=</span><span class="st">'observational'</span> <span class="er">width</span><span class="ot">=</span><span class="st">'600'</span><span class="kw">&gt;</span></span>
+<div class="sourceCode" id="cb15" data-shortcodes="false"><pre class="sourceCode markdown code-with-copy"><code class="sourceCode markdown"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a><span class="co">---</span></span>
+<span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a><span class="an">title:</span><span class="co"> Causal Inference and Confounding</span></span>
+<span id="cb15-3"><a href="#cb15-3" aria-hidden="true" tabindex="-1"></a><span class="an">execute:</span></span>
+<span id="cb15-4"><a href="#cb15-4" aria-hidden="true" tabindex="-1"></a><span class="co">  echo: true</span></span>
+<span id="cb15-5"><a href="#cb15-5" aria-hidden="true" tabindex="-1"></a><span class="an">format:</span></span>
+<span id="cb15-6"><a href="#cb15-6" aria-hidden="true" tabindex="-1"></a><span class="co">  html:</span></span>
+<span id="cb15-7"><a href="#cb15-7" aria-hidden="true" tabindex="-1"></a><span class="co">    code-fold: true</span></span>
+<span id="cb15-8"><a href="#cb15-8" aria-hidden="true" tabindex="-1"></a><span class="co">    code-tools: true</span></span>
+<span id="cb15-9"><a href="#cb15-9" aria-hidden="true" tabindex="-1"></a><span class="co">    toc: true</span></span>
+<span id="cb15-10"><a href="#cb15-10" aria-hidden="true" tabindex="-1"></a><span class="co">    toc-title: Causal Inference and Confounding</span></span>
+<span id="cb15-11"><a href="#cb15-11" aria-hidden="true" tabindex="-1"></a><span class="co">    page-layout: full</span></span>
+<span id="cb15-12"><a href="#cb15-12" aria-hidden="true" tabindex="-1"></a><span class="co">    theme:</span></span>
+<span id="cb15-13"><a href="#cb15-13" aria-hidden="true" tabindex="-1"></a><span class="co">      - cosmo</span></span>
+<span id="cb15-14"><a href="#cb15-14" aria-hidden="true" tabindex="-1"></a><span class="co">      - cerulean</span></span>
+<span id="cb15-15"><a href="#cb15-15" aria-hidden="true" tabindex="-1"></a><span class="co">    callout-icon: false</span></span>
+<span id="cb15-16"><a href="#cb15-16" aria-hidden="true" tabindex="-1"></a><span class="an">jupyter:</span></span>
+<span id="cb15-17"><a href="#cb15-17" aria-hidden="true" tabindex="-1"></a><span class="co">  jupytext:</span></span>
+<span id="cb15-18"><a href="#cb15-18" aria-hidden="true" tabindex="-1"></a><span class="co">    text_representation:</span></span>
+<span id="cb15-19"><a href="#cb15-19" aria-hidden="true" tabindex="-1"></a><span class="co">      extension: .qmd</span></span>
+<span id="cb15-20"><a href="#cb15-20" aria-hidden="true" tabindex="-1"></a><span class="co">      format_name: quarto</span></span>
+<span id="cb15-21"><a href="#cb15-21" aria-hidden="true" tabindex="-1"></a><span class="co">      format_version: '1.0'</span></span>
+<span id="cb15-22"><a href="#cb15-22" aria-hidden="true" tabindex="-1"></a><span class="co">      jupytext_version: 1.16.1</span></span>
+<span id="cb15-23"><a href="#cb15-23" aria-hidden="true" tabindex="-1"></a><span class="co">  kernelspec:</span></span>
+<span id="cb15-24"><a href="#cb15-24" aria-hidden="true" tabindex="-1"></a><span class="co">    display_name: Python 3 (ipykernel)</span></span>
+<span id="cb15-25"><a href="#cb15-25" aria-hidden="true" tabindex="-1"></a><span class="co">    language: python</span></span>
+<span id="cb15-26"><a href="#cb15-26" aria-hidden="true" tabindex="-1"></a><span class="co">    name: python3</span></span>
+<span id="cb15-27"><a href="#cb15-27" aria-hidden="true" tabindex="-1"></a><span class="co">---</span></span>
+<span id="cb15-28"><a href="#cb15-28" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-29"><a href="#cb15-29" aria-hidden="true" tabindex="-1"></a><span class="co">&lt;!-- </span></span>
+<span id="cb15-30"><a href="#cb15-30" aria-hidden="true" tabindex="-1"></a><span class="co">The **bias** of an estimator is how far off it is from the parameter, on average.</span></span>
+<span id="cb15-31"><a href="#cb15-31" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-32"><a href="#cb15-32" aria-hidden="true" tabindex="-1"></a><span class="co">$$\begin{align}\text{Bias}(\hat{\theta}) = \mathbb{E}[\hat{\theta} - \theta] = \mathbb{E}[\hat{\theta}] - \theta\end{align}$$</span></span>
+<span id="cb15-33"><a href="#cb15-33" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-34"><a href="#cb15-34" aria-hidden="true" tabindex="-1"></a><span class="co">For example, the bias of the sample mean as an estimator of the population mean is:</span></span>
+<span id="cb15-35"><a href="#cb15-35" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-36"><a href="#cb15-36" aria-hidden="true" tabindex="-1"></a><span class="co">$$\begin{align}\mathbb{E}[\bar{X}_n - \mu]</span></span>
+<span id="cb15-37"><a href="#cb15-37" aria-hidden="true" tabindex="-1"></a><span class="co">&amp;= \mathbb{E}[\frac{1}{n}\sum_{i=1}^n (X_i)] - \mu \\</span></span>
+<span id="cb15-38"><a href="#cb15-38" aria-hidden="true" tabindex="-1"></a><span class="co">&amp;= \frac{1}{n}\sum_{i=1}^n \mathbb{E}[X_i] - \mu \\</span></span>
+<span id="cb15-39"><a href="#cb15-39" aria-hidden="true" tabindex="-1"></a><span class="co">&amp;= \frac{1}{n} (n\mu) - \mu \\</span></span>
+<span id="cb15-40"><a href="#cb15-40" aria-hidden="true" tabindex="-1"></a><span class="co">&amp;= 0\end{align}$$</span></span>
+<span id="cb15-41"><a href="#cb15-41" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-42"><a href="#cb15-42" aria-hidden="true" tabindex="-1"></a><span class="co">Because its bias is equal to 0, the sample mean is said to be an **unbiased** estimator of the population mean.</span></span>
+<span id="cb15-43"><a href="#cb15-43" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-44"><a href="#cb15-44" aria-hidden="true" tabindex="-1"></a><span class="co">The **variance** of an estimator is a measure of how much the estimator tends to vary from its mean value.</span></span>
+<span id="cb15-45"><a href="#cb15-45" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-46"><a href="#cb15-46" aria-hidden="true" tabindex="-1"></a><span class="co">$$\begin{align}\text{Var}(\hat{\theta}) = \mathbb{E}\left[(\hat{\theta} - \mathbb{E}[\hat{\theta}])^2 \right]\end{align}$$</span></span>
+<span id="cb15-47"><a href="#cb15-47" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-48"><a href="#cb15-48" aria-hidden="true" tabindex="-1"></a><span class="co">The **mean squared error** measures the "goodness" of an estimator by incorporating both the bias and variance. Formally, it is defined as:</span></span>
+<span id="cb15-49"><a href="#cb15-49" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-50"><a href="#cb15-50" aria-hidden="true" tabindex="-1"></a><span class="co">$$\begin{align}\text{MSE}(\hat{\theta}) = \mathbb{E}\left[(\hat{\theta} - \theta)^2</span></span>
+<span id="cb15-51"><a href="#cb15-51" aria-hidden="true" tabindex="-1"></a><span class="co">\right]\end{align}$$ --&gt;</span></span>
+<span id="cb15-52"><a href="#cb15-52" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-53"><a href="#cb15-53" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-54"><a href="#cb15-54" aria-hidden="true" tabindex="-1"></a>::: {.callout-note collapse="false"}</span>
+<span id="cb15-55"><a href="#cb15-55" aria-hidden="true" tabindex="-1"></a><span class="fu">## Learning Outcomes</span></span>
+<span id="cb15-56"><a href="#cb15-56" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Construct confidence intervals for hypothesis testing using bootstrapping</span>
+<span id="cb15-57"><a href="#cb15-57" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Understand the assumptions we make and their impact on our regression inference</span>
+<span id="cb15-58"><a href="#cb15-58" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Explore ways to overcome issues of multicollinearity</span>
+<span id="cb15-59"><a href="#cb15-59" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Compare regression correlation and causation</span>
+<span id="cb15-60"><a href="#cb15-60" aria-hidden="true" tabindex="-1"></a>:::</span>
+<span id="cb15-61"><a href="#cb15-61" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-62"><a href="#cb15-62" aria-hidden="true" tabindex="-1"></a>Last time, we introduced the idea of random variables and how they affect the data and model we construct.</span>
+<span id="cb15-63"><a href="#cb15-63" aria-hidden="true" tabindex="-1"></a>We also demonstrated the decomposition of model risk from a fitted model and dived into the bias-variance tradeoff.</span>
+<span id="cb15-64"><a href="#cb15-64" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-65"><a href="#cb15-65" aria-hidden="true" tabindex="-1"></a>In this lecture, we will explore regression inference via hypothesis testing, understand how to use bootstrapping under the right assumptions, and consider the environment of understanding causality in theory and in practice.</span>
+<span id="cb15-66"><a href="#cb15-66" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-67"><a href="#cb15-67" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-68"><a href="#cb15-68" aria-hidden="true" tabindex="-1"></a><span class="fu">## Parameter Inference: Interpreting Regression Coefficients</span></span>
+<span id="cb15-69"><a href="#cb15-69" aria-hidden="true" tabindex="-1"></a>There are two main reasons why we build models: </span>
+<span id="cb15-70"><a href="#cb15-70" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-71"><a href="#cb15-71" aria-hidden="true" tabindex="-1"></a><span class="ss">1. </span>**Prediction**: using our model to make accurate predictions about unseen data</span>
+<span id="cb15-72"><a href="#cb15-72" aria-hidden="true" tabindex="-1"></a><span class="ss">2. </span>**Inference**: using our model to draw conclusions about the underlying relationship(s) between our features and response. We want to understand the complex phenomena occurring in the world we live in. While training is the process of fitting a model, inference is the *process of making predictions*.</span>
+<span id="cb15-73"><a href="#cb15-73" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-74"><a href="#cb15-74" aria-hidden="true" tabindex="-1"></a>Recall the framework we established in the last lecture. The relationship between datapoints is given by $Y = g(x) + \epsilon$, where $g(x)$ is the *true underlying relationship*, and $\epsilon$ represents randomness. If we assume $g(x)$ is linear, we can express this relationship in terms of the unknown, true model parameters $\theta$.</span>
+<span id="cb15-75"><a href="#cb15-75" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-76"><a href="#cb15-76" aria-hidden="true" tabindex="-1"></a>$$f_{\theta}(x) = g(x) + \epsilon = \theta_0 + \theta_1 x_1 + \ldots + \theta_p x_p + \epsilon$$</span>
+<span id="cb15-77"><a href="#cb15-77" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-78"><a href="#cb15-78" aria-hidden="true" tabindex="-1"></a>Our model attempts to estimate each true population parameter $\theta_i$ using the sample estimates $\hat{\theta}_i$ calculated from the design matrix $\Bbb{X}$ and response vector $\Bbb{Y}$.</span>
+<span id="cb15-79"><a href="#cb15-79" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-80"><a href="#cb15-80" aria-hidden="true" tabindex="-1"></a>$$f_{\hat{\theta}}(x) = \hat{\theta}_0 + \hat{\theta}_1 x_1 + \ldots + \hat{\theta}_p x_p$$</span>
+<span id="cb15-81"><a href="#cb15-81" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-82"><a href="#cb15-82" aria-hidden="true" tabindex="-1"></a>Let's pause for a moment. At this point, we're very used to working with the idea of a model parameter. But what exactly does each coefficient $\theta_i$ actually *mean*? We can think of each $\theta_i$ as a *slope* of the linear model. If all other variables are held constant, a unit change in $x_i$ will result in a $\theta_i$ change in $f_{\theta}(x)$. Broadly speaking, a large value of $\theta_i$ means that the feature $x_i$ has a large effect on the response; conversely, a small value of $\theta_i$ means that $x_i$ has little effect on the response. In the extreme case, if the true parameter $\theta_i$ is 0, then the feature $x_i$ has **no effect** on $Y(x)$. </span>
+<span id="cb15-83"><a href="#cb15-83" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-84"><a href="#cb15-84" aria-hidden="true" tabindex="-1"></a>If the true parameter $\theta_i$ for a particular feature is 0, this tells us something pretty significant about the world: there is no underlying relationship between $x_i$ and $Y(x)$! But how can we test if a parameter is actually 0? As a baseline, we go through our usual process of drawing a sample, using this data to fit a model, and computing an estimate $\hat{\theta}_i$. However, we also need to consider that if our random sample comes out differently, we may find a different result for $\hat{\theta}_i$. To infer if the true parameter $\theta_i$ is 0, we want to draw our conclusion from the distribution of $\hat{\theta}_i$ estimates we could have drawn across all other random samples. This is where <span class="co">[</span><span class="ot">hypothesis testing</span><span class="co">](https://inferentialthinking.com/chapters/11/Testing_Hypotheses.html)</span> comes in handy! </span>
+<span id="cb15-85"><a href="#cb15-85" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-86"><a href="#cb15-86" aria-hidden="true" tabindex="-1"></a>To test if the true parameter $\theta_i$ is 0, we construct a **hypothesis test** where our null hypothesis states that the true parameter $\theta_i$ is 0, and the alternative hypothesis states that the true parameter $\theta_i$ is *not* 0. If our p-value is smaller than our cutoff value (usually p = 0.05), we reject the null hypothesis in favor of the alternative hypothesis. </span>
+<span id="cb15-87"><a href="#cb15-87" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-88"><a href="#cb15-88" aria-hidden="true" tabindex="-1"></a><span class="fu">## Review: Bootstrap Resampling</span></span>
+<span id="cb15-89"><a href="#cb15-89" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-90"><a href="#cb15-90" aria-hidden="true" tabindex="-1"></a>To determine the properties (e.g., variance) of the sampling distribution of an estimator, we’d need access to the population. Ideally, we'd want to consider all possible samples in the population, compute an estimate for each sample, and study the distribution of those estimates.</span>
+<span id="cb15-91"><a href="#cb15-91" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-92"><a href="#cb15-92" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;p</span> <span class="er">align</span><span class="ot">=</span><span class="st">"center"</span><span class="kw">&gt;</span></span>
+<span id="cb15-93"><a href="#cb15-93" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;img</span> <span class="er">src</span><span class="ot">=</span><span class="st">"images/population_samples.png"</span> <span class="er">alt</span><span class="ot">=</span><span class="st">'y_hat'</span> <span class="er">width</span><span class="ot">=</span><span class="st">'650'</span><span class="kw">&gt;</span></span>
+<span id="cb15-94"><a href="#cb15-94" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;/p&gt;</span></span>
+<span id="cb15-95"><a href="#cb15-95" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-96"><a href="#cb15-96" aria-hidden="true" tabindex="-1"></a>However, this can be quite expensive and time-consuming. Even more importantly, we don’t have access to the population —— we only have *one* random sample from the population. How can we consider all possible samples if we only have one?</span>
+<span id="cb15-97"><a href="#cb15-97" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-98"><a href="#cb15-98" aria-hidden="true" tabindex="-1"></a>Bootstrapping comes in handy here! With bootstrapping, we treat our random sample as a "population" and resample from it *with replacement*. Intuitively, a random sample resembles the population (if it is big enough), so a random *resample* also resembles a random sample of the population. When sampling, there are a couple things to keep in mind:</span>
+<span id="cb15-99"><a href="#cb15-99" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-100"><a href="#cb15-100" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>We need to sample the same way we constructed the original sample. Typically, this involves taking a simple random sample with replacement.</span>
+<span id="cb15-101"><a href="#cb15-101" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>New samples must be the same size as the original sample. We need to accurately model the variability of our estimates.</span>
+<span id="cb15-102"><a href="#cb15-102" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-103"><a href="#cb15-103" aria-hidden="true" tabindex="-1"></a>::: {.callout-warning collapse=\"true\"}</span>
+<span id="cb15-104"><a href="#cb15-104" aria-hidden="true" tabindex="-1"></a><span class="fu">### Why must we resample *with replacement*?</span></span>
+<span id="cb15-105"><a href="#cb15-105" aria-hidden="true" tabindex="-1"></a>Given an original sample of size $n$, we want a resample that has the same size $n$ as the original. Sampling *without* replacement will give us the original sample with shuffled rows. Hence, when we calculate summary statistics like the average, our sample *without* replacement will always have the same average as the original sample, defeating the purpose of a bootstrap.</span>
+<span id="cb15-106"><a href="#cb15-106" aria-hidden="true" tabindex="-1"></a>:::</span>
+<span id="cb15-107"><a href="#cb15-107" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-108"><a href="#cb15-108" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;p</span> <span class="er">align</span><span class="ot">=</span><span class="st">"center"</span><span class="kw">&gt;</span></span>
+<span id="cb15-109"><a href="#cb15-109" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;img</span> <span class="er">src</span><span class="ot">=</span><span class="st">"images/bootstrap.png"</span> <span class="er">alt</span><span class="ot">=</span><span class="st">'y_hat'</span> <span class="er">width</span><span class="ot">=</span><span class="st">'700'</span><span class="kw">&gt;</span></span>
+<span id="cb15-110"><a href="#cb15-110" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;/p&gt;</span></span>
+<span id="cb15-111"><a href="#cb15-111" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-112"><a href="#cb15-112" aria-hidden="true" tabindex="-1"></a>Bootstrap resampling is a technique for estimating the sampling distribution of an estimator. To execute it, we can follow the pseudocode below:</span>
+<span id="cb15-113"><a href="#cb15-113" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb15-114"><a href="#cb15-114" aria-hidden="true" tabindex="-1"></a><span class="in">collect a random sample of size n (called the bootstrap population)</span></span>
+<span id="cb15-115"><a href="#cb15-115" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-116"><a href="#cb15-116" aria-hidden="true" tabindex="-1"></a><span class="in">initiate a list of estimates</span></span>
+<span id="cb15-117"><a href="#cb15-117" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-118"><a href="#cb15-118" aria-hidden="true" tabindex="-1"></a><span class="in">repeat 10,000 times:</span></span>
+<span id="cb15-119"><a href="#cb15-119" aria-hidden="true" tabindex="-1"></a><span class="in">    resample with replacement from the bootstrap population</span></span>
+<span id="cb15-120"><a href="#cb15-120" aria-hidden="true" tabindex="-1"></a><span class="in">    apply estimator f to the resample</span></span>
+<span id="cb15-121"><a href="#cb15-121" aria-hidden="true" tabindex="-1"></a><span class="in">    store in list</span></span>
+<span id="cb15-122"><a href="#cb15-122" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-123"><a href="#cb15-123" aria-hidden="true" tabindex="-1"></a><span class="in">list of estimates is the bootstrapped sampling distribution of f</span></span>
+<span id="cb15-124"><a href="#cb15-124" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb15-125"><a href="#cb15-125" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-126"><a href="#cb15-126" aria-hidden="true" tabindex="-1"></a>How well does bootstrapping actually represent our population? The bootstrapped sampling distribution of an estimator does not exactly match the sampling distribution of that estimator, but it is often close. Similarly, the variance of the bootstrapped distribution is often close to the true variance of the estimator. The example below displays the results of different bootstraps from a *known* population using a sample size of $n=50$.</span>
+<span id="cb15-127"><a href="#cb15-127" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-128"><a href="#cb15-128" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;p</span> <span class="er">align</span><span class="ot">=</span><span class="st">"center"</span><span class="kw">&gt;</span></span>
+<span id="cb15-129"><a href="#cb15-129" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;img</span> <span class="er">src</span><span class="ot">=</span><span class="st">"images/bootstrapped_samples.png"</span> <span class="er">alt</span><span class="ot">=</span><span class="st">'y_hat'</span> <span class="er">width</span><span class="ot">=</span><span class="st">'600'</span><span class="kw">&gt;</span></span>
+<span id="cb15-130"><a href="#cb15-130" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;/p&gt;</span></span>
+<span id="cb15-131"><a href="#cb15-131" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-132"><a href="#cb15-132" aria-hidden="true" tabindex="-1"></a>In the real world, we don't know the population distribution. The center of the bootstrapped distribution is the estimator applied to our original sample, so we have no way of understanding the estimator's true expected value; the center and spread of our bootstrap are *approximations*. The quality of our bootstrapped distribution also depends on the quality of our original sample. If our original sample was not representative of the population (like Sample 5 in the image above), then the bootstrap is next to useless. In general, bootstrapping works better for *large samples*, when the population distribution is *not heavily skewed* (no outliers), and when the estimator is *“low variance”* (insensitive to extreme values).</span>
+<span id="cb15-133"><a href="#cb15-133" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-134"><a href="#cb15-134" aria-hidden="true" tabindex="-1"></a><span class="co">&lt;!-- #### </span><span class="al">TODO</span><span class="co">: Good to include this example but make sure to integrate well with the following example and ensure it flows. Following example is explained under the assumption that people haven't seen bootstrapping example before.</span></span>
+<span id="cb15-135"><a href="#cb15-135" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-136"><a href="#cb15-136" aria-hidden="true" tabindex="-1"></a><span class="al">###</span><span class="co"> Simple Bootstrap Example</span></span>
+<span id="cb15-137"><a href="#cb15-137" aria-hidden="true" tabindex="-1"></a><span class="co">To get a better idea of how bootstrapping works in practice, let's walk through a simple example of bootstrapping to estimate the relationship between miles per gallon and the weight of a vehicle.</span></span>
+<span id="cb15-138"><a href="#cb15-138" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-139"><a href="#cb15-139" aria-hidden="true" tabindex="-1"></a><span class="co">Suppose we collected a sample of 20 cars from a population. For the purposes of this demo, we will assume that the seaborn dataset represents the entire population. The following is a visualization of our sample:</span></span>
+<span id="cb15-140"><a href="#cb15-140" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-141"><a href="#cb15-141" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
+<span id="cb15-142"><a href="#cb15-142" aria-hidden="true" tabindex="-1"></a><span class="co">import numpy as np</span></span>
+<span id="cb15-143"><a href="#cb15-143" aria-hidden="true" tabindex="-1"></a><span class="co">import pandas as pd</span></span>
+<span id="cb15-144"><a href="#cb15-144" aria-hidden="true" tabindex="-1"></a><span class="co">import plotly.express as px</span></span>
+<span id="cb15-145"><a href="#cb15-145" aria-hidden="true" tabindex="-1"></a><span class="co">import sklearn.linear_model as lm</span></span>
+<span id="cb15-146"><a href="#cb15-146" aria-hidden="true" tabindex="-1"></a><span class="co">import seaborn as sns</span></span>
+<span id="cb15-147"><a href="#cb15-147" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-148"><a href="#cb15-148" aria-hidden="true" tabindex="-1"></a><span class="co">np.random.seed(42)</span></span>
+<span id="cb15-149"><a href="#cb15-149" aria-hidden="true" tabindex="-1"></a><span class="co">mpg_sample = sns.load_dataset('mpg').sample(20)</span></span>
+<span id="cb15-150"><a href="#cb15-150" aria-hidden="true" tabindex="-1"></a><span class="co">px.scatter(mpg_sample, x='weight', y='mpg', trendline='ols', width=800)</span></span>
+<span id="cb15-151"><a href="#cb15-151" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-152"><a href="#cb15-152" aria-hidden="true" tabindex="-1"></a><span class="co">Fitting a linear model, we get an estimate of the slope:</span></span>
+<span id="cb15-153"><a href="#cb15-153" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-154"><a href="#cb15-154" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb15-155"><a href="#cb15-155" aria-hidden="true" tabindex="-1"></a><span class="co">model = lm.LinearRegression().fit(mpg_sample[['weight']], mpg_sample['mpg'])</span></span>
+<span id="cb15-156"><a href="#cb15-156" aria-hidden="true" tabindex="-1"></a><span class="co">model.coef_[0] </span></span>
+<span id="cb15-157"><a href="#cb15-157" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-158"><a href="#cb15-158" aria-hidden="true" tabindex="-1"></a><span class="co">#### Bootstrap Implementation</span></span>
+<span id="cb15-159"><a href="#cb15-159" aria-hidden="true" tabindex="-1"></a><span class="co">We can use bootstrapping to estimate the distribution of that coefficient. Here we construct a bootstrap function that takes an estimator function and uses that function to construct many bootstrap estimates of the slope.</span></span>
+<span id="cb15-160"><a href="#cb15-160" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-161"><a href="#cb15-161" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb15-162"><a href="#cb15-162" aria-hidden="true" tabindex="-1"></a><span class="co">def estimator(sample):</span></span>
+<span id="cb15-163"><a href="#cb15-163" aria-hidden="true" tabindex="-1"></a><span class="co">    model = lm.LinearRegression().fit(sample[['weight']], sample['mpg'])</span></span>
+<span id="cb15-164"><a href="#cb15-164" aria-hidden="true" tabindex="-1"></a><span class="co">    return model.coef_[0]</span></span>
+<span id="cb15-165"><a href="#cb15-165" aria-hidden="true" tabindex="-1"></a><span class="co">    </span></span>
+<span id="cb15-166"><a href="#cb15-166" aria-hidden="true" tabindex="-1"></a><span class="co">The code below uses `df.sample` [(documentation)](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.sample.html) to generate a bootstrap sample that is the same size as the original sample.</span></span>
+<span id="cb15-167"><a href="#cb15-167" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-168"><a href="#cb15-168" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb15-169"><a href="#cb15-169" aria-hidden="true" tabindex="-1"></a><span class="co">def bootstrap(sample, statistic, num_repetitions):</span></span>
+<span id="cb15-170"><a href="#cb15-170" aria-hidden="true" tabindex="-1"></a><span class="co">    """</span></span>
+<span id="cb15-171"><a href="#cb15-171" aria-hidden="true" tabindex="-1"></a><span class="co">    Returns the statistic computed on a num_repetitions  </span></span>
+<span id="cb15-172"><a href="#cb15-172" aria-hidden="true" tabindex="-1"></a><span class="co">    bootstrap samples from sample.</span></span>
+<span id="cb15-173"><a href="#cb15-173" aria-hidden="true" tabindex="-1"></a><span class="co">    """</span></span>
+<span id="cb15-174"><a href="#cb15-174" aria-hidden="true" tabindex="-1"></a><span class="co">    stats = []</span></span>
+<span id="cb15-175"><a href="#cb15-175" aria-hidden="true" tabindex="-1"></a><span class="co">    for i in np.arange(num_repetitions):</span></span>
+<span id="cb15-176"><a href="#cb15-176" aria-hidden="true" tabindex="-1"></a><span class="co">        # Step 1: Sample the Sample</span></span>
+<span id="cb15-177"><a href="#cb15-177" aria-hidden="true" tabindex="-1"></a><span class="co">        bootstrap_sample = sample.sample(frac=1, replace=True)</span></span>
+<span id="cb15-178"><a href="#cb15-178" aria-hidden="true" tabindex="-1"></a><span class="co">        # Step 2: compute statistics on the sample of the sample</span></span>
+<span id="cb15-179"><a href="#cb15-179" aria-hidden="true" tabindex="-1"></a><span class="co">        bootstrap_stat = statistic(bootstrap_sample)</span></span>
+<span id="cb15-180"><a href="#cb15-180" aria-hidden="true" tabindex="-1"></a><span class="co">        # Accumulate the statistics</span></span>
+<span id="cb15-181"><a href="#cb15-181" aria-hidden="true" tabindex="-1"></a><span class="co">        stats.append(bootstrap_stat)</span></span>
+<span id="cb15-182"><a href="#cb15-182" aria-hidden="true" tabindex="-1"></a><span class="co">    return stats    </span></span>
+<span id="cb15-183"><a href="#cb15-183" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-184"><a href="#cb15-184" aria-hidden="true" tabindex="-1"></a><span class="co">After constructing many bootstrap slope estimates (in this case, 10,000), we can visualize the distribution of these estimates.</span></span>
+<span id="cb15-185"><a href="#cb15-185" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-186"><a href="#cb15-186" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
+<span id="cb15-187"><a href="#cb15-187" aria-hidden="true" tabindex="-1"></a><span class="co">#Construct 10,000 bootstrap slope estimates</span></span>
+<span id="cb15-188"><a href="#cb15-188" aria-hidden="true" tabindex="-1"></a><span class="co">bs_thetas = bootstrap(mpg_sample, estimator, 10000)</span></span>
+<span id="cb15-189"><a href="#cb15-189" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-190"><a href="#cb15-190" aria-hidden="true" tabindex="-1"></a><span class="co">#Visualize the distribution of these estimates</span></span>
+<span id="cb15-191"><a href="#cb15-191" aria-hidden="true" tabindex="-1"></a><span class="co">px.histogram(bs_thetas, title='Bootstrap Distribution of the Slope', </span></span>
+<span id="cb15-192"><a href="#cb15-192" aria-hidden="true" tabindex="-1"></a><span class="co">             width=800)</span></span>
+<span id="cb15-193"><a href="#cb15-193" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-194"><a href="#cb15-194" aria-hidden="true" tabindex="-1"></a><span class="co">#### Computing a Bootstrap CI</span></span>
+<span id="cb15-195"><a href="#cb15-195" aria-hidden="true" tabindex="-1"></a><span class="co">We can now compute the confidence interval for the slopes using the percentiles of the empirical distribution. Here, we are looking for a 95% confidence interval, so we want values at the 2.5 and 97.5 percentiles of the bootstrap samples to be the bounds of our interval. To find the interval, we can use the function defined below:</span></span>
+<span id="cb15-196"><a href="#cb15-196" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-197"><a href="#cb15-197" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
+<span id="cb15-198"><a href="#cb15-198" aria-hidden="true" tabindex="-1"></a><span class="co">def bootstrap_ci(bootstrap_samples, confidence_level=95):</span></span>
+<span id="cb15-199"><a href="#cb15-199" aria-hidden="true" tabindex="-1"></a><span class="co">    """</span></span>
+<span id="cb15-200"><a href="#cb15-200" aria-hidden="true" tabindex="-1"></a><span class="co">    Returns the confidence interval for the bootstrap samples.</span></span>
+<span id="cb15-201"><a href="#cb15-201" aria-hidden="true" tabindex="-1"></a><span class="co">    """</span></span>
+<span id="cb15-202"><a href="#cb15-202" aria-hidden="true" tabindex="-1"></a><span class="co">    lower_percentile = (100 - confidence_level) / 2</span></span>
+<span id="cb15-203"><a href="#cb15-203" aria-hidden="true" tabindex="-1"></a><span class="co">    upper_percentile = 100 - lower_percentile</span></span>
+<span id="cb15-204"><a href="#cb15-204" aria-hidden="true" tabindex="-1"></a><span class="co">    return np.percentile(bootstrap_samples, [lower_percentile, upper_percentile])</span></span>
+<span id="cb15-205"><a href="#cb15-205" aria-hidden="true" tabindex="-1"></a><span class="co">print(bootstrap_ci(bs_thetas))</span></span>
+<span id="cb15-206"><a href="#cb15-206" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-207"><a href="#cb15-207" aria-hidden="true" tabindex="-1"></a><span class="co">#### Comparing to the Population CIs</span></span>
+<span id="cb15-208"><a href="#cb15-208" aria-hidden="true" tabindex="-1"></a><span class="co">In practice, you don't have access to the population. In this example, we took a sample from a larger dataset that we can treat as the population. Let's compare our results to what they would be if we had resampled from the larger dataset. Here is the 95% confidence interval for the slope when sampling 10,000 times from the entire dataset:</span></span>
+<span id="cb15-209"><a href="#cb15-209" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-210"><a href="#cb15-210" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
+<span id="cb15-211"><a href="#cb15-211" aria-hidden="true" tabindex="-1"></a><span class="co">mpg_pop = sns.load_dataset('mpg')</span></span>
+<span id="cb15-212"><a href="#cb15-212" aria-hidden="true" tabindex="-1"></a><span class="co">theta_est = [estimator(mpg_pop.sample(20)) for i in range(10000)]</span></span>
+<span id="cb15-213"><a href="#cb15-213" aria-hidden="true" tabindex="-1"></a><span class="co">print(bootstrap_ci(theta_est))</span></span>
+<span id="cb15-214"><a href="#cb15-214" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-215"><a href="#cb15-215" aria-hidden="true" tabindex="-1"></a><span class="co">Visualizing the two distributions:</span></span>
+<span id="cb15-216"><a href="#cb15-216" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
+<span id="cb15-217"><a href="#cb15-217" aria-hidden="true" tabindex="-1"></a><span class="co">thetas = pd.DataFrame({"bs_thetas": bs_thetas, "thetas": theta_est})</span></span>
+<span id="cb15-218"><a href="#cb15-218" aria-hidden="true" tabindex="-1"></a><span class="co">px.histogram(thetas.melt(), x='value', facet_row='variable', </span></span>
+<span id="cb15-219"><a href="#cb15-219" aria-hidden="true" tabindex="-1"></a><span class="co">             title='Distribution of the Slope', width=800)</span></span>
+<span id="cb15-220"><a href="#cb15-220" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-221"><a href="#cb15-221" aria-hidden="true" tabindex="-1"></a><span class="co">Although our bootstrapped sample distribution does not exactly match the sampling distribution of the population, we can see that it is relatively close. This demonstrates the benefit of bootstrapping —— without knowing the actual population distribution, we can still roughly approximate the true slope for the model by using only a single random sample of 20 cars.</span></span>
+<span id="cb15-222"><a href="#cb15-222" aria-hidden="true" tabindex="-1"></a><span class="co">--&gt;</span></span>
+<span id="cb15-223"><a href="#cb15-223" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-224"><a href="#cb15-224" aria-hidden="true" tabindex="-1"></a>Although our bootstrapped sample distribution does not exactly match the sampling distribution of the population, we can see that it is relatively close. This demonstrates the benefit of bootstrapping —— without knowing the actual population distribution, we can still roughly approximate the true slope for the model by using only a single random sample of 20 cars.</span>
+<span id="cb15-225"><a href="#cb15-225" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-226"><a href="#cb15-226" aria-hidden="true" tabindex="-1"></a><span class="co">&lt;!-- #### PurpleAir (chose to skip this section because it's too complex for the amount of pedagogical value it adds)</span></span>
+<span id="cb15-227"><a href="#cb15-227" aria-hidden="true" tabindex="-1"></a><span class="co">To show an example of this hypothesis testing process, we'll work with air quality measurement data. There are 2 common sources of air quality information: Air Quality System (AQS) and [PurpleAir sensors](https://www2.purpleair.com/). AQS is seen as the gold standard because it is high quality, well-calibrated, and publicly available. However, it is very expensive, and the sensors are far apart; reports are also delayed due to extensive calibration.  </span></span>
+<span id="cb15-228"><a href="#cb15-228" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-229"><a href="#cb15-229" aria-hidden="true" tabindex="-1"></a><span class="co">On the other hand, PurpleAir (PA) sensors are much cheaper, easier to install, and has denser coverage (measurements are taken every 2 minutes). Unfortunately, its measurements are much less accurate than AQS. </span></span>
+<span id="cb15-230"><a href="#cb15-230" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-231"><a href="#cb15-231" aria-hidden="true" tabindex="-1"></a><span class="co">For this demo, our goal is to use AQS sensor measurements to improve PurpleAir measurements by training a model that adjusts PA measurements based on AQS measurements</span></span>
+<span id="cb15-232"><a href="#cb15-232" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-233"><a href="#cb15-233" aria-hidden="true" tabindex="-1"></a><span class="co">$$PA \approx \theta_0 + \theta_1 AQS$$</span></span>
+<span id="cb15-234"><a href="#cb15-234" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-235"><a href="#cb15-235" aria-hidden="true" tabindex="-1"></a><span class="co">Using this approximation, we'll invert the model to predict the true air quality from PA measurements</span></span>
+<span id="cb15-236"><a href="#cb15-236" aria-hidden="true" tabindex="-1"></a><span class="co">$$ \text{True Air Quality } \approx -\frac{\theta_0}{\theta_1} + \frac{1}{\theta_1} PA$$</span></span>
+<span id="cb15-237"><a href="#cb15-237" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-238"><a href="#cb15-238" aria-hidden="true" tabindex="-1"></a><span class="co">::: {.callout-tip collapse="false"}</span></span>
+<span id="cb15-239"><a href="#cb15-239" aria-hidden="true" tabindex="-1"></a><span class="al">###</span><span class="co"> Inverse Model Derivation </span></span>
+<span id="cb15-240"><a href="#cb15-240" aria-hidden="true" tabindex="-1"></a><span class="co">Intuitively, AQS measurements are very accurate, so we can treat AQS as the true air quality: </span></span>
+<span id="cb15-241"><a href="#cb15-241" aria-hidden="true" tabindex="-1"></a><span class="co">$AQS = \text{True Air Quality}$</span></span>
+<span id="cb15-242"><a href="#cb15-242" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-243"><a href="#cb15-243" aria-hidden="true" tabindex="-1"></a><span class="co">$$</span></span>
+<span id="cb15-244"><a href="#cb15-244" aria-hidden="true" tabindex="-1"></a><span class="co">\begin{align}</span></span>
+<span id="cb15-245"><a href="#cb15-245" aria-hidden="true" tabindex="-1"></a><span class="co">PA &amp;\approx \theta_0 + \theta_1 AQS \\</span></span>
+<span id="cb15-246"><a href="#cb15-246" aria-hidden="true" tabindex="-1"></a><span class="co">&amp;\approx \theta_0 + \theta_1 \text{True Air Quality} \\</span></span>
+<span id="cb15-247"><a href="#cb15-247" aria-hidden="true" tabindex="-1"></a><span class="co">PA - \theta_0 &amp;\approx + \theta_1 \text{True Air Quality} \\</span></span>
+<span id="cb15-248"><a href="#cb15-248" aria-hidden="true" tabindex="-1"></a><span class="co">\frac{PA - \theta_0}{\theta_1} &amp;\approx \text{True Air Quality} \\</span></span>
+<span id="cb15-249"><a href="#cb15-249" aria-hidden="true" tabindex="-1"></a><span class="co">\text{True Air Quality } &amp;\approx -\frac{\theta_0}{\theta_1} + \frac{1}{\theta_1} PA </span></span>
+<span id="cb15-250"><a href="#cb15-250" aria-hidden="true" tabindex="-1"></a><span class="co">\end{align}</span></span>
+<span id="cb15-251"><a href="#cb15-251" aria-hidden="true" tabindex="-1"></a><span class="co">$$</span></span>
+<span id="cb15-252"><a href="#cb15-252" aria-hidden="true" tabindex="-1"></a><span class="co">:::</span></span>
+<span id="cb15-253"><a href="#cb15-253" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-254"><a href="#cb15-254" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
+<span id="cb15-255"><a href="#cb15-255" aria-hidden="true" tabindex="-1"></a><span class="co">import numpy as np</span></span>
+<span id="cb15-256"><a href="#cb15-256" aria-hidden="true" tabindex="-1"></a><span class="co">import pandas as pd</span></span>
+<span id="cb15-257"><a href="#cb15-257" aria-hidden="true" tabindex="-1"></a><span class="co">import matplotlib</span></span>
+<span id="cb15-258"><a href="#cb15-258" aria-hidden="true" tabindex="-1"></a><span class="co">import matplotlib.pyplot as plt</span></span>
+<span id="cb15-259"><a href="#cb15-259" aria-hidden="true" tabindex="-1"></a><span class="co">import seaborn as sns</span></span>
+<span id="cb15-260"><a href="#cb15-260" aria-hidden="true" tabindex="-1"></a><span class="co">import sklearn.linear_model as lm</span></span>
+<span id="cb15-261"><a href="#cb15-261" aria-hidden="true" tabindex="-1"></a><span class="co">from sklearn.linear_model import LinearRegression</span></span>
+<span id="cb15-262"><a href="#cb15-262" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-263"><a href="#cb15-263" aria-hidden="true" tabindex="-1"></a><span class="co"># big font helper</span></span>
+<span id="cb15-264"><a href="#cb15-264" aria-hidden="true" tabindex="-1"></a><span class="co">def adjust_fontsize(size=None):</span></span>
+<span id="cb15-265"><a href="#cb15-265" aria-hidden="true" tabindex="-1"></a><span class="co">    SMALL_SIZE = 8</span></span>
+<span id="cb15-266"><a href="#cb15-266" aria-hidden="true" tabindex="-1"></a><span class="co">    MEDIUM_SIZE = 10</span></span>
+<span id="cb15-267"><a href="#cb15-267" aria-hidden="true" tabindex="-1"></a><span class="co">    BIGGER_SIZE = 12</span></span>
+<span id="cb15-268"><a href="#cb15-268" aria-hidden="true" tabindex="-1"></a><span class="co">    if size != None:</span></span>
+<span id="cb15-269"><a href="#cb15-269" aria-hidden="true" tabindex="-1"></a><span class="co">        SMALL_SIZE = MEDIUM_SIZE = BIGGER_SIZE = size</span></span>
+<span id="cb15-270"><a href="#cb15-270" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-271"><a href="#cb15-271" aria-hidden="true" tabindex="-1"></a><span class="co">    plt.rc('font', size=SMALL_SIZE)          # controls default text sizes</span></span>
+<span id="cb15-272"><a href="#cb15-272" aria-hidden="true" tabindex="-1"></a><span class="co">    plt.rc('axes', titlesize=SMALL_SIZE)     # fontsize of the axes title</span></span>
+<span id="cb15-273"><a href="#cb15-273" aria-hidden="true" tabindex="-1"></a><span class="co">    plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels</span></span>
+<span id="cb15-274"><a href="#cb15-274" aria-hidden="true" tabindex="-1"></a><span class="co">    plt.rc('xtick', labelsize=SMALL_SIZE)    # fontsize of the tick labels</span></span>
+<span id="cb15-275"><a href="#cb15-275" aria-hidden="true" tabindex="-1"></a><span class="co">    plt.rc('ytick', labelsize=SMALL_SIZE)    # fontsize of the tick labels</span></span>
+<span id="cb15-276"><a href="#cb15-276" aria-hidden="true" tabindex="-1"></a><span class="co">    plt.rc('legend', fontsize=SMALL_SIZE)    # legend fontsize</span></span>
+<span id="cb15-277"><a href="#cb15-277" aria-hidden="true" tabindex="-1"></a><span class="co">    plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title</span></span>
+<span id="cb15-278"><a href="#cb15-278" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-279"><a href="#cb15-279" aria-hidden="true" tabindex="-1"></a><span class="co">plt.style.use('fivethirtyeight')</span></span>
+<span id="cb15-280"><a href="#cb15-280" aria-hidden="true" tabindex="-1"></a><span class="co">sns.set_context("talk")</span></span>
+<span id="cb15-281"><a href="#cb15-281" aria-hidden="true" tabindex="-1"></a><span class="co">sns.set_theme()</span></span>
+<span id="cb15-282"><a href="#cb15-282" aria-hidden="true" tabindex="-1"></a><span class="co">#plt.style.use('default') # revert style to default mpl</span></span>
+<span id="cb15-283"><a href="#cb15-283" aria-hidden="true" tabindex="-1"></a><span class="co">adjust_fontsize(size=20)</span></span>
+<span id="cb15-284"><a href="#cb15-284" aria-hidden="true" tabindex="-1"></a><span class="co">%matplotlib inline</span></span>
+<span id="cb15-285"><a href="#cb15-285" aria-hidden="true" tabindex="-1"></a><span class="co">csv_file = 'data/Full24hrdataset.csv'</span></span>
+<span id="cb15-286"><a href="#cb15-286" aria-hidden="true" tabindex="-1"></a><span class="co">usecols = ['Date', 'ID', 'region', 'PM25FM', 'PM25cf1', 'TempC', 'RH', 'Dewpoint']</span></span>
+<span id="cb15-287"><a href="#cb15-287" aria-hidden="true" tabindex="-1"></a><span class="co">full_df = (pd.read_csv(csv_file, usecols=usecols, parse_dates=['Date'])</span></span>
+<span id="cb15-288"><a href="#cb15-288" aria-hidden="true" tabindex="-1"></a><span class="co">        .dropna())</span></span>
+<span id="cb15-289"><a href="#cb15-289" aria-hidden="true" tabindex="-1"></a><span class="co">full_df.columns = ['date', 'id', 'region', 'pm25aqs', 'pm25pa', 'temp', 'rh', 'dew']</span></span>
+<span id="cb15-290"><a href="#cb15-290" aria-hidden="true" tabindex="-1"></a><span class="co">full_df = full_df.loc[(full_df['pm25aqs'] &lt; 50)]</span></span>
+<span id="cb15-291"><a href="#cb15-291" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-292"><a href="#cb15-292" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-293"><a href="#cb15-293" aria-hidden="true" tabindex="-1"></a><span class="co">bad_dates = ['2019-08-21', '2019-08-22', '2019-09-24']</span></span>
+<span id="cb15-294"><a href="#cb15-294" aria-hidden="true" tabindex="-1"></a><span class="co">GA = full_df.loc[(full_df['id'] == 'GA1') &amp; (~full_df['date'].isin(bad_dates)) , :]</span></span>
+<span id="cb15-295"><a href="#cb15-295" aria-hidden="true" tabindex="-1"></a><span class="co">AQS, PA = GA[['pm25aqs']], GA['pm25pa']</span></span>
+<span id="cb15-296"><a href="#cb15-296" aria-hidden="true" tabindex="-1"></a><span class="co">AQS.head()</span></span>
+<span id="cb15-297"><a href="#cb15-297" aria-hidden="true" tabindex="-1"></a><span class="co">pd.DataFrame(PA).head()</span></span>
+<span id="cb15-298"><a href="#cb15-298" aria-hidden="true" tabindex="-1"></a><span class="co">--&gt;</span></span>
+<span id="cb15-299"><a href="#cb15-299" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-300"><a href="#cb15-300" aria-hidden="true" tabindex="-1"></a><span class="fu">## Collinearity</span></span>
+<span id="cb15-301"><a href="#cb15-301" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-302"><a href="#cb15-302" aria-hidden="true" tabindex="-1"></a><span class="fu">### Hypothesis Testing Through Bootstrap: Snowy Plover Demo</span></span>
+<span id="cb15-303"><a href="#cb15-303" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-304"><a href="#cb15-304" aria-hidden="true" tabindex="-1"></a>We can conduct the hypothesis testing described earlier through **bootstrapping** (this equivalence can be proven through the <span class="co">[</span><span class="ot">duality argument</span><span class="co">](https://stats.stackexchange.com/questions/179902/confidence-interval-p-value-duality-vs-frequentist-interpretation-of-cis)</span>, which is out of scope for this class). We use bootstrapping to compute approximate 95% confidence intervals for each $\theta_i$. If the interval doesn't contain 0, we reject the null hypothesis at the p=5% level. Otherwise, the data is consistent with the null, as the true parameter *could possibly* be 0.</span>
+<span id="cb15-305"><a href="#cb15-305" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-306"><a href="#cb15-306" aria-hidden="true" tabindex="-1"></a>To show an example of this hypothesis testing process, we'll work with the <span class="co">[</span><span class="ot">snowy plover</span><span class="co">](https://www.audubon.org/field-guide/bird/snowy-plover)</span> dataset throughout this section. The data are about the eggs and newly hatched chicks of the Snowy Plover. The data were collected at the Point Reyes National Seashore by a former <span class="co">[</span><span class="ot">student at Berkeley</span><span class="co">](https://openlibrary.org/books/OL2038693M/BLSS_the_Berkeley_interactive_statistical_system)</span>. Here's a <span class="co">[</span><span class="ot">parent bird and some eggs</span><span class="co">](http://cescos.fau.edu/jay/eps/articles/snowyplover.html)</span>.</span>
+<span id="cb15-307"><a href="#cb15-307" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-308"><a href="#cb15-308" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;p</span> <span class="er">align</span><span class="ot">=</span><span class="st">"center"</span><span class="kw">&gt;</span></span>
+<span id="cb15-309"><a href="#cb15-309" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;img</span> <span class="er">src</span><span class="ot">=</span><span class="st">"images/plover_eggs.jpg"</span> <span class="er">alt</span><span class="ot">=</span><span class="st">'bvt'</span> <span class="er">width</span><span class="ot">=</span><span class="st">'550'</span><span class="kw">&gt;</span></span>
+<span id="cb15-310"><a href="#cb15-310" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;/p&gt;</span></span>
+<span id="cb15-311"><a href="#cb15-311" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-312"><a href="#cb15-312" aria-hidden="true" tabindex="-1"></a>Note that <span class="in">`Egg Length`</span> and <span class="in">`Egg Breadth`</span> (widest diameter) are measured in millimeters, and <span class="in">`Egg Weight`</span> and <span class="in">`Bird Weight`</span> are measured in grams. For reference, a standard paper clip weighs about one gram.</span>
+<span id="cb15-313"><a href="#cb15-313" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-316"><a href="#cb15-316" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb15-317"><a href="#cb15-317" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
+<span id="cb15-318"><a href="#cb15-318" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
+<span id="cb15-319"><a href="#cb15-319" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span>
+<span id="cb15-320"><a href="#cb15-320" aria-hidden="true" tabindex="-1"></a>eggs <span class="op">=</span> pd.read_csv(<span class="st">"data/snowy_plover.csv"</span>)</span>
+<span id="cb15-321"><a href="#cb15-321" aria-hidden="true" tabindex="-1"></a>eggs.head(<span class="dv">5</span>)</span>
+<span id="cb15-322"><a href="#cb15-322" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb15-323"><a href="#cb15-323" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-324"><a href="#cb15-324" aria-hidden="true" tabindex="-1"></a>Our goal will be to predict the weight of a newborn plover chick, which we assume follows the true relationship $Y = f_{\theta}(x)$ below.</span>
+<span id="cb15-325"><a href="#cb15-325" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-326"><a href="#cb15-326" aria-hidden="true" tabindex="-1"></a>$$\text{bird<span class="sc">\_</span>weight} = \theta_0 + \theta_1 \text{egg<span class="sc">\_</span>weight} + \theta_2 \text{egg<span class="sc">\_</span>length} + \theta_3 \text{egg<span class="sc">\_</span>breadth} + \epsilon$$</span>
+<span id="cb15-327"><a href="#cb15-327" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-328"><a href="#cb15-328" aria-hidden="true" tabindex="-1"></a>Note that for each $i$, the parameter $\theta_i$ is a fixed number, but it is unobservable. We can only estimate it. The random error $\epsilon$ is also unobservable, but it is assumed to have expectation 0 and be independent and identically distributed across eggs.</span>
+<span id="cb15-329"><a href="#cb15-329" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-330"><a href="#cb15-330" aria-hidden="true" tabindex="-1"></a>Say we wish to determine if the <span class="in">`egg_weight`</span> impacts the <span class="in">`bird_weight`</span> of a chick – we want to infer if $\theta_1$ is equal to 0.</span>
+<span id="cb15-331"><a href="#cb15-331" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-332"><a href="#cb15-332" aria-hidden="true" tabindex="-1"></a>First, we define our hypotheses:</span>
+<span id="cb15-333"><a href="#cb15-333" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-334"><a href="#cb15-334" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>**Null hypothesis**: the true parameter $\theta_1$ is 0; any variation is due to random chance.</span>
+<span id="cb15-335"><a href="#cb15-335" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>**Alternative hypothesis**: the true parameter $\theta_1$ is not 0.</span>
+<span id="cb15-336"><a href="#cb15-336" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-337"><a href="#cb15-337" aria-hidden="true" tabindex="-1"></a>Next, we use our data to fit a model $\hat{Y} = f_{\hat{\theta}}(x)$ that approximates the relationship above. This gives us the **observed value** of $\hat{\theta}_1$ from our data.</span>
+<span id="cb15-338"><a href="#cb15-338" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-341"><a href="#cb15-341" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb15-342"><a href="#cb15-342" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb15-343"><a href="#cb15-343" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
+<span id="cb15-344"><a href="#cb15-344" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> sklearn.linear_model <span class="im">import</span> LinearRegression</span>
+<span id="cb15-345"><a href="#cb15-345" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
+<span id="cb15-346"><a href="#cb15-346" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-347"><a href="#cb15-347" aria-hidden="true" tabindex="-1"></a>X <span class="op">=</span> eggs[[<span class="st">"egg_weight"</span>, <span class="st">"egg_length"</span>, <span class="st">"egg_breadth"</span>]]</span>
+<span id="cb15-348"><a href="#cb15-348" aria-hidden="true" tabindex="-1"></a>Y <span class="op">=</span> eggs[<span class="st">"bird_weight"</span>]</span>
+<span id="cb15-349"><a href="#cb15-349" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-350"><a href="#cb15-350" aria-hidden="true" tabindex="-1"></a>model <span class="op">=</span> LinearRegression()</span>
+<span id="cb15-351"><a href="#cb15-351" aria-hidden="true" tabindex="-1"></a>model.fit(X, Y)</span>
+<span id="cb15-352"><a href="#cb15-352" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-353"><a href="#cb15-353" aria-hidden="true" tabindex="-1"></a><span class="co"># This gives an array containing the fitted model parameter estimates</span></span>
+<span id="cb15-354"><a href="#cb15-354" aria-hidden="true" tabindex="-1"></a>thetas <span class="op">=</span> model.coef_</span>
+<span id="cb15-355"><a href="#cb15-355" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-356"><a href="#cb15-356" aria-hidden="true" tabindex="-1"></a><span class="co"># Put the parameter estimates in a nice table for viewing</span></span>
+<span id="cb15-357"><a href="#cb15-357" aria-hidden="true" tabindex="-1"></a>display(pd.DataFrame(</span>
+<span id="cb15-358"><a href="#cb15-358" aria-hidden="true" tabindex="-1"></a>  [model.intercept_] <span class="op">+</span> <span class="bu">list</span>(model.coef_),</span>
+<span id="cb15-359"><a href="#cb15-359" aria-hidden="true" tabindex="-1"></a>  columns<span class="op">=</span>[<span class="st">'theta_hat'</span>],</span>
+<span id="cb15-360"><a href="#cb15-360" aria-hidden="true" tabindex="-1"></a>  index<span class="op">=</span>[<span class="st">'intercept'</span>, <span class="st">'egg_weight'</span>, <span class="st">'egg_length'</span>, <span class="st">'egg_breadth'</span>]</span>
+<span id="cb15-361"><a href="#cb15-361" aria-hidden="true" tabindex="-1"></a>))</span>
+<span id="cb15-362"><a href="#cb15-362" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-363"><a href="#cb15-363" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="st">"RMSE"</span>, np.mean((Y <span class="op">-</span> model.predict(X)) <span class="op">**</span> <span class="dv">2</span>))</span>
+<span id="cb15-364"><a href="#cb15-364" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb15-365"><a href="#cb15-365" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-366"><a href="#cb15-366" aria-hidden="true" tabindex="-1"></a>Our single sample of data gives us the value of $\hat{\theta}_1=0.431$. To get a sense of how this estimate might vary if we were to draw different random samples, we will use <span class="co">[</span><span class="ot">bootstrapping</span><span class="co">](https://inferentialthinking.com/chapters/13/2/Bootstrap.html?)</span>. As a refresher, to construct a bootstrap sample, we will draw a resample from the collected data that:</span>
+<span id="cb15-367"><a href="#cb15-367" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-368"><a href="#cb15-368" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Has the same sample size as the collected data</span>
+<span id="cb15-369"><a href="#cb15-369" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Is drawn with replacement (this ensures that we don't draw the exact same sample every time!)</span>
+<span id="cb15-370"><a href="#cb15-370" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-371"><a href="#cb15-371" aria-hidden="true" tabindex="-1"></a>We draw a bootstrap sample, use this sample to fit a model, and record the result for $\hat{\theta}_1$ on this bootstrapped sample. We then repeat this process many times to generate a **bootstrapped empirical distribution** of $\hat{\theta}_1$. This gives us an estimate of what the true distribution of $\hat{\theta}_1$ across all possible samples might look like.</span>
+<span id="cb15-372"><a href="#cb15-372" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-375"><a href="#cb15-375" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb15-376"><a href="#cb15-376" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb15-377"><a href="#cb15-377" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
+<span id="cb15-378"><a href="#cb15-378" aria-hidden="true" tabindex="-1"></a><span class="co"># Set a random seed so you generate the same random sample as staff</span></span>
+<span id="cb15-379"><a href="#cb15-379" aria-hidden="true" tabindex="-1"></a><span class="co"># In the "real world", we wouldn't do this</span></span>
+<span id="cb15-380"><a href="#cb15-380" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
+<span id="cb15-381"><a href="#cb15-381" aria-hidden="true" tabindex="-1"></a>np.random.seed(<span class="dv">1337</span>)</span>
+<span id="cb15-382"><a href="#cb15-382" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-383"><a href="#cb15-383" aria-hidden="true" tabindex="-1"></a><span class="co"># Set the sample size of each bootstrap sample</span></span>
+<span id="cb15-384"><a href="#cb15-384" aria-hidden="true" tabindex="-1"></a>n <span class="op">=</span> <span class="bu">len</span>(eggs)</span>
+<span id="cb15-385"><a href="#cb15-385" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-386"><a href="#cb15-386" aria-hidden="true" tabindex="-1"></a><span class="co"># Create a list to store all the bootstrapped estimates</span></span>
+<span id="cb15-387"><a href="#cb15-387" aria-hidden="true" tabindex="-1"></a>estimates <span class="op">=</span> []</span>
+<span id="cb15-388"><a href="#cb15-388" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-389"><a href="#cb15-389" aria-hidden="true" tabindex="-1"></a><span class="co"># Generate a bootstrap resample from `eggs` and find an estimate for theta_1 using this sample. </span></span>
+<span id="cb15-390"><a href="#cb15-390" aria-hidden="true" tabindex="-1"></a><span class="co"># Repeat 10000 times.</span></span>
+<span id="cb15-391"><a href="#cb15-391" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> i <span class="kw">in</span> <span class="bu">range</span>(<span class="dv">10000</span>):</span>
+<span id="cb15-392"><a href="#cb15-392" aria-hidden="true" tabindex="-1"></a>    <span class="co"># draw a bootstrap sample</span></span>
+<span id="cb15-393"><a href="#cb15-393" aria-hidden="true" tabindex="-1"></a>    bootstrap_resample <span class="op">=</span> eggs.sample(n, replace<span class="op">=</span><span class="va">True</span>)</span>
+<span id="cb15-394"><a href="#cb15-394" aria-hidden="true" tabindex="-1"></a>    X_bootstrap <span class="op">=</span> bootstrap_resample[[<span class="st">"egg_weight"</span>, <span class="st">"egg_length"</span>, <span class="st">"egg_breadth"</span>]]</span>
+<span id="cb15-395"><a href="#cb15-395" aria-hidden="true" tabindex="-1"></a>    Y_bootstrap <span class="op">=</span> bootstrap_resample[<span class="st">"bird_weight"</span>]</span>
+<span id="cb15-396"><a href="#cb15-396" aria-hidden="true" tabindex="-1"></a>    </span>
+<span id="cb15-397"><a href="#cb15-397" aria-hidden="true" tabindex="-1"></a>    <span class="co"># use bootstrapped sample to fit a model</span></span>
+<span id="cb15-398"><a href="#cb15-398" aria-hidden="true" tabindex="-1"></a>    bootstrap_model <span class="op">=</span> LinearRegression()</span>
+<span id="cb15-399"><a href="#cb15-399" aria-hidden="true" tabindex="-1"></a>    bootstrap_model.fit(X_bootstrap, Y_bootstrap)</span>
+<span id="cb15-400"><a href="#cb15-400" aria-hidden="true" tabindex="-1"></a>    bootstrap_thetas <span class="op">=</span> bootstrap_model.coef_</span>
+<span id="cb15-401"><a href="#cb15-401" aria-hidden="true" tabindex="-1"></a>    </span>
+<span id="cb15-402"><a href="#cb15-402" aria-hidden="true" tabindex="-1"></a>    <span class="co"># record the result for theta_1</span></span>
+<span id="cb15-403"><a href="#cb15-403" aria-hidden="true" tabindex="-1"></a>    estimates.append(bootstrap_thetas[<span class="dv">0</span>])</span>
+<span id="cb15-404"><a href="#cb15-404" aria-hidden="true" tabindex="-1"></a>    </span>
+<span id="cb15-405"><a href="#cb15-405" aria-hidden="true" tabindex="-1"></a><span class="co"># calculate the 95% confidence interval </span></span>
+<span id="cb15-406"><a href="#cb15-406" aria-hidden="true" tabindex="-1"></a>lower <span class="op">=</span> np.percentile(estimates, <span class="fl">2.5</span>, axis<span class="op">=</span><span class="dv">0</span>)</span>
+<span id="cb15-407"><a href="#cb15-407" aria-hidden="true" tabindex="-1"></a>upper <span class="op">=</span> np.percentile(estimates, <span class="fl">97.5</span>, axis<span class="op">=</span><span class="dv">0</span>)</span>
+<span id="cb15-408"><a href="#cb15-408" aria-hidden="true" tabindex="-1"></a>conf_interval <span class="op">=</span> (lower, upper)</span>
+<span id="cb15-409"><a href="#cb15-409" aria-hidden="true" tabindex="-1"></a>conf_interval</span>
+<span id="cb15-410"><a href="#cb15-410" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb15-411"><a href="#cb15-411" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-412"><a href="#cb15-412" aria-hidden="true" tabindex="-1"></a>Our bootstrapped 95% confidence interval for $\theta_1$ is $<span class="co">[</span><span class="ot">-0.259, 1.103</span><span class="co">]</span>$. Immediately, we can see that 0 *is* indeed contained in this interval – this means that we *cannot* conclude that $\theta_1$ is non-zero! More formally, we fail to reject the null hypothesis (that $\theta_1$ is 0) under a 5% p-value cutoff. </span>
+<span id="cb15-413"><a href="#cb15-413" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-414"><a href="#cb15-414" aria-hidden="true" tabindex="-1"></a>We can repeat this process to construct 95% confidence intervals for the other parameters of the model.</span>
+<span id="cb15-415"><a href="#cb15-415" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-418"><a href="#cb15-418" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb15-419"><a href="#cb15-419" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
+<span id="cb15-420"><a href="#cb15-420" aria-hidden="true" tabindex="-1"></a>np.random.seed(<span class="dv">1337</span>)</span>
+<span id="cb15-421"><a href="#cb15-421" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-422"><a href="#cb15-422" aria-hidden="true" tabindex="-1"></a>theta_0_estimates <span class="op">=</span> []</span>
+<span id="cb15-423"><a href="#cb15-423" aria-hidden="true" tabindex="-1"></a>theta_1_estimates <span class="op">=</span> []</span>
+<span id="cb15-424"><a href="#cb15-424" aria-hidden="true" tabindex="-1"></a>theta_2_estimates <span class="op">=</span> []</span>
+<span id="cb15-425"><a href="#cb15-425" aria-hidden="true" tabindex="-1"></a>theta_3_estimates <span class="op">=</span> []</span>
+<span id="cb15-426"><a href="#cb15-426" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-427"><a href="#cb15-427" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-428"><a href="#cb15-428" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> i <span class="kw">in</span> <span class="bu">range</span>(<span class="dv">10000</span>):</span>
+<span id="cb15-429"><a href="#cb15-429" aria-hidden="true" tabindex="-1"></a>    bootstrap_resample <span class="op">=</span> eggs.sample(n, replace<span class="op">=</span><span class="va">True</span>)</span>
+<span id="cb15-430"><a href="#cb15-430" aria-hidden="true" tabindex="-1"></a>    X_bootstrap <span class="op">=</span> bootstrap_resample[[<span class="st">"egg_weight"</span>, <span class="st">"egg_length"</span>, <span class="st">"egg_breadth"</span>]]</span>
+<span id="cb15-431"><a href="#cb15-431" aria-hidden="true" tabindex="-1"></a>    Y_bootstrap <span class="op">=</span> bootstrap_resample[<span class="st">"bird_weight"</span>]</span>
+<span id="cb15-432"><a href="#cb15-432" aria-hidden="true" tabindex="-1"></a>    </span>
+<span id="cb15-433"><a href="#cb15-433" aria-hidden="true" tabindex="-1"></a>    bootstrap_model <span class="op">=</span> LinearRegression()</span>
+<span id="cb15-434"><a href="#cb15-434" aria-hidden="true" tabindex="-1"></a>    bootstrap_model.fit(X_bootstrap, Y_bootstrap)</span>
+<span id="cb15-435"><a href="#cb15-435" aria-hidden="true" tabindex="-1"></a>    bootstrap_theta_0 <span class="op">=</span> bootstrap_model.intercept_</span>
+<span id="cb15-436"><a href="#cb15-436" aria-hidden="true" tabindex="-1"></a>    bootstrap_theta_1, bootstrap_theta_2, bootstrap_theta_3 <span class="op">=</span> bootstrap_model.coef_</span>
+<span id="cb15-437"><a href="#cb15-437" aria-hidden="true" tabindex="-1"></a>    </span>
+<span id="cb15-438"><a href="#cb15-438" aria-hidden="true" tabindex="-1"></a>    theta_0_estimates.append(bootstrap_theta_0)</span>
+<span id="cb15-439"><a href="#cb15-439" aria-hidden="true" tabindex="-1"></a>    theta_1_estimates.append(bootstrap_theta_1)</span>
+<span id="cb15-440"><a href="#cb15-440" aria-hidden="true" tabindex="-1"></a>    theta_2_estimates.append(bootstrap_theta_2)</span>
+<span id="cb15-441"><a href="#cb15-441" aria-hidden="true" tabindex="-1"></a>    theta_3_estimates.append(bootstrap_theta_3)</span>
+<span id="cb15-442"><a href="#cb15-442" aria-hidden="true" tabindex="-1"></a>    </span>
+<span id="cb15-443"><a href="#cb15-443" aria-hidden="true" tabindex="-1"></a>theta_0_lower, theta_0_upper <span class="op">=</span> np.percentile(theta_0_estimates, <span class="fl">2.5</span>), np.percentile(theta_0_estimates, <span class="fl">97.5</span>)</span>
+<span id="cb15-444"><a href="#cb15-444" aria-hidden="true" tabindex="-1"></a>theta_1_lower, theta_1_upper <span class="op">=</span> np.percentile(theta_1_estimates, <span class="fl">2.5</span>), np.percentile(theta_1_estimates, <span class="fl">97.5</span>)</span>
+<span id="cb15-445"><a href="#cb15-445" aria-hidden="true" tabindex="-1"></a>theta_2_lower, theta_2_upper <span class="op">=</span> np.percentile(theta_2_estimates, <span class="fl">2.5</span>), np.percentile(theta_2_estimates, <span class="fl">97.5</span>)</span>
+<span id="cb15-446"><a href="#cb15-446" aria-hidden="true" tabindex="-1"></a>theta_3_lower, theta_3_upper <span class="op">=</span> np.percentile(theta_3_estimates, <span class="fl">2.5</span>), np.percentile(theta_3_estimates, <span class="fl">97.5</span>)</span>
+<span id="cb15-447"><a href="#cb15-447" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-448"><a href="#cb15-448" aria-hidden="true" tabindex="-1"></a><span class="co"># Make a nice table to view results</span></span>
+<span id="cb15-449"><a href="#cb15-449" aria-hidden="true" tabindex="-1"></a>pd.DataFrame({<span class="st">"lower"</span>:[theta_0_lower, theta_1_lower, theta_2_lower, theta_3_lower], <span class="st">"upper"</span>:[theta_0_upper, <span class="op">\</span></span>
+<span id="cb15-450"><a href="#cb15-450" aria-hidden="true" tabindex="-1"></a>                theta_1_upper, theta_2_upper, theta_3_upper]}, index<span class="op">=</span>[<span class="st">"theta_0"</span>, <span class="st">"theta_1"</span>, <span class="st">"theta_2"</span>, <span class="st">"theta_3"</span>])</span>
+<span id="cb15-451"><a href="#cb15-451" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb15-452"><a href="#cb15-452" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-453"><a href="#cb15-453" aria-hidden="true" tabindex="-1"></a>Something's off here. Notice that 0 is included in the 95% confidence interval for *every* parameter of the model. Using the interpretation we outlined above, this would suggest that we can't say for certain that *any* of the input variables impact the response variable! This makes it seem like our model can't make any predictions – and yet, each model we fit in our bootstrap experiment above could very much make predictions of $Y$. </span>
+<span id="cb15-454"><a href="#cb15-454" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-455"><a href="#cb15-455" aria-hidden="true" tabindex="-1"></a>How can we explain this result? Think back to how we first interpreted the parameters of a linear model. We treated each $\theta_i$ as a slope, where a unit increase in $x_i$ leads to a $\theta_i$ increase in $Y$, **if all other variables are held constant**. It turns out that this last assumption is very important. If variables in our model are somehow related to one another, then it might not be possible to have a change in one of them while holding the others constant. This means that our interpretation framework is no longer valid! In the models we fit above, we incorporated <span class="in">`egg_length`</span>, <span class="in">`egg_breadth`</span>, and <span class="in">`egg_weight`</span> as input variables. These variables are very likely related to one another – an egg with large <span class="in">`egg_length`</span> and <span class="in">`egg_breadth`</span> will likely be heavy in <span class="in">`egg_weight`</span>. This means that the model parameters cannot be meaningfully interpreted as slopes. </span>
+<span id="cb15-456"><a href="#cb15-456" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-457"><a href="#cb15-457" aria-hidden="true" tabindex="-1"></a>To support this conclusion, we can visualize the relationships between our feature variables. Notice the strong positive association between the features.</span>
+<span id="cb15-458"><a href="#cb15-458" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-461"><a href="#cb15-461" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb15-462"><a href="#cb15-462" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
+<span id="cb15-463"><a href="#cb15-463" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> seaborn <span class="im">as</span> sns</span>
+<span id="cb15-464"><a href="#cb15-464" aria-hidden="true" tabindex="-1"></a>sns.pairplot(eggs[[<span class="st">"egg_length"</span>, <span class="st">"egg_breadth"</span>, <span class="st">"egg_weight"</span>, <span class="st">'bird_weight'</span>]])<span class="op">;</span></span>
+<span id="cb15-465"><a href="#cb15-465" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb15-466"><a href="#cb15-466" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-467"><a href="#cb15-467" aria-hidden="true" tabindex="-1"></a>This issue is known as **collinearity**, sometimes also called **multicollinearity**. Collinearity occurs when one feature can be predicted fairly accurately by a linear combination of the other features, which happens when one feature is highly correlated with the others. </span>
+<span id="cb15-468"><a href="#cb15-468" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-469"><a href="#cb15-469" aria-hidden="true" tabindex="-1"></a>Why is collinearity a problem? Its consequences span several aspects of the modeling process:</span>
+<span id="cb15-470"><a href="#cb15-470" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-471"><a href="#cb15-471" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>**Inference**: Slopes can't be interpreted for an inference task.</span>
+<span id="cb15-472"><a href="#cb15-472" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>**Model Variance**: If features strongly influence one another, even small changes in the sampled data can lead to large changes in the estimated slopes.</span>
+<span id="cb15-473"><a href="#cb15-473" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>**Unique Solution**: If one feature is a linear combination of the other features, the design matrix will not be full rank, and $\mathbb{X}^{\top}\mathbb{X}$ is not invertible. This means that least squares does not have a unique solution. See <span class="co">[</span><span class="ot">this section</span><span class="co">](https://ds100.org/course-notes/ols/ols.html#bonus-uniqueness-of-the-solution)</span> of Course Note 12 for more on this.</span>
+<span id="cb15-474"><a href="#cb15-474" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-475"><a href="#cb15-475" aria-hidden="true" tabindex="-1"></a>The take-home point is that we need to be careful with what features we select for modeling. If two features likely encode similar information, it is often a good idea to choose only one of them as an input variable.</span>
+<span id="cb15-476"><a href="#cb15-476" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-477"><a href="#cb15-477" aria-hidden="true" tabindex="-1"></a><span class="fu">### A Simpler Model</span></span>
+<span id="cb15-478"><a href="#cb15-478" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-479"><a href="#cb15-479" aria-hidden="true" tabindex="-1"></a>Let us now consider a more interpretable model: we instead assume a true relationship using only egg weight:</span>
+<span id="cb15-480"><a href="#cb15-480" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-481"><a href="#cb15-481" aria-hidden="true" tabindex="-1"></a>$$f_\theta(x) = \theta_0 + \theta_1 \text{egg<span class="sc">\_</span>weight} + \epsilon$$</span>
+<span id="cb15-482"><a href="#cb15-482" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-485"><a href="#cb15-485" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb15-486"><a href="#cb15-486" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
+<span id="cb15-487"><a href="#cb15-487" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> sklearn.linear_model <span class="im">import</span> LinearRegression</span>
+<span id="cb15-488"><a href="#cb15-488" aria-hidden="true" tabindex="-1"></a>X_int <span class="op">=</span> eggs[[<span class="st">"egg_weight"</span>]]</span>
+<span id="cb15-489"><a href="#cb15-489" aria-hidden="true" tabindex="-1"></a>Y_int <span class="op">=</span> eggs[<span class="st">"bird_weight"</span>]</span>
+<span id="cb15-490"><a href="#cb15-490" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-491"><a href="#cb15-491" aria-hidden="true" tabindex="-1"></a>model_int <span class="op">=</span> LinearRegression()</span>
+<span id="cb15-492"><a href="#cb15-492" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-493"><a href="#cb15-493" aria-hidden="true" tabindex="-1"></a>model_int.fit(X_int, Y_int)</span>
+<span id="cb15-494"><a href="#cb15-494" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-495"><a href="#cb15-495" aria-hidden="true" tabindex="-1"></a><span class="co"># This gives an array containing the fitted model parameter estimates</span></span>
+<span id="cb15-496"><a href="#cb15-496" aria-hidden="true" tabindex="-1"></a>thetas_int <span class="op">=</span> model_int.coef_</span>
+<span id="cb15-497"><a href="#cb15-497" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-498"><a href="#cb15-498" aria-hidden="true" tabindex="-1"></a><span class="co"># Put the parameter estimates in a nice table for viewing</span></span>
+<span id="cb15-499"><a href="#cb15-499" aria-hidden="true" tabindex="-1"></a>pd.DataFrame({<span class="st">"theta_hat"</span>:[model_int.intercept_, thetas_int[<span class="dv">0</span>]]}, index<span class="op">=</span>[<span class="st">"theta_0"</span>, <span class="st">"theta_1"</span>])</span>
+<span id="cb15-500"><a href="#cb15-500" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb15-501"><a href="#cb15-501" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-504"><a href="#cb15-504" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb15-505"><a href="#cb15-505" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
+<span id="cb15-506"><a href="#cb15-506" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
+<span id="cb15-507"><a href="#cb15-507" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> matplotlib.pyplot <span class="im">as</span> plt</span>
+<span id="cb15-508"><a href="#cb15-508" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-509"><a href="#cb15-509" aria-hidden="true" tabindex="-1"></a><span class="co"># Set a random seed so you generate the same random sample as staff</span></span>
+<span id="cb15-510"><a href="#cb15-510" aria-hidden="true" tabindex="-1"></a><span class="co"># In the "real world", we wouldn't do this</span></span>
+<span id="cb15-511"><a href="#cb15-511" aria-hidden="true" tabindex="-1"></a>np.random.seed(<span class="dv">1337</span>)</span>
+<span id="cb15-512"><a href="#cb15-512" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-513"><a href="#cb15-513" aria-hidden="true" tabindex="-1"></a><span class="co"># Set the sample size of each bootstrap sample</span></span>
+<span id="cb15-514"><a href="#cb15-514" aria-hidden="true" tabindex="-1"></a>n <span class="op">=</span> <span class="bu">len</span>(eggs)</span>
+<span id="cb15-515"><a href="#cb15-515" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-516"><a href="#cb15-516" aria-hidden="true" tabindex="-1"></a><span class="co"># Create a list to store all the bootstrapped estimates</span></span>
+<span id="cb15-517"><a href="#cb15-517" aria-hidden="true" tabindex="-1"></a>estimates_int <span class="op">=</span> []</span>
+<span id="cb15-518"><a href="#cb15-518" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-519"><a href="#cb15-519" aria-hidden="true" tabindex="-1"></a><span class="co"># Generate a bootstrap resample from `eggs` and find an estimate for theta_1 using this sample. </span></span>
+<span id="cb15-520"><a href="#cb15-520" aria-hidden="true" tabindex="-1"></a><span class="co"># Repeat 10000 times.</span></span>
+<span id="cb15-521"><a href="#cb15-521" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> i <span class="kw">in</span> <span class="bu">range</span>(<span class="dv">10000</span>):</span>
+<span id="cb15-522"><a href="#cb15-522" aria-hidden="true" tabindex="-1"></a>    bootstrap_resample_int <span class="op">=</span> eggs.sample(n, replace<span class="op">=</span><span class="va">True</span>)</span>
+<span id="cb15-523"><a href="#cb15-523" aria-hidden="true" tabindex="-1"></a>    X_bootstrap_int <span class="op">=</span> bootstrap_resample_int[[<span class="st">"egg_weight"</span>]]</span>
+<span id="cb15-524"><a href="#cb15-524" aria-hidden="true" tabindex="-1"></a>    Y_bootstrap_int <span class="op">=</span> bootstrap_resample_int[<span class="st">"bird_weight"</span>]</span>
+<span id="cb15-525"><a href="#cb15-525" aria-hidden="true" tabindex="-1"></a>    </span>
+<span id="cb15-526"><a href="#cb15-526" aria-hidden="true" tabindex="-1"></a>    bootstrap_model_int <span class="op">=</span> LinearRegression()</span>
+<span id="cb15-527"><a href="#cb15-527" aria-hidden="true" tabindex="-1"></a>    bootstrap_model_int.fit(X_bootstrap_int, Y_bootstrap_int)</span>
+<span id="cb15-528"><a href="#cb15-528" aria-hidden="true" tabindex="-1"></a>    bootstrap_thetas_int <span class="op">=</span> bootstrap_model_int.coef_</span>
+<span id="cb15-529"><a href="#cb15-529" aria-hidden="true" tabindex="-1"></a>    </span>
+<span id="cb15-530"><a href="#cb15-530" aria-hidden="true" tabindex="-1"></a>    estimates_int.append(bootstrap_thetas_int[<span class="dv">0</span>])</span>
+<span id="cb15-531"><a href="#cb15-531" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-532"><a href="#cb15-532" aria-hidden="true" tabindex="-1"></a>plt.figure(dpi<span class="op">=</span><span class="dv">120</span>)</span>
+<span id="cb15-533"><a href="#cb15-533" aria-hidden="true" tabindex="-1"></a>sns.histplot(estimates_int, stat<span class="op">=</span><span class="st">"density"</span>)</span>
+<span id="cb15-534"><a href="#cb15-534" aria-hidden="true" tabindex="-1"></a>plt.xlabel(<span class="vs">r"$\hat{\theta}_1$"</span>)</span>
+<span id="cb15-535"><a href="#cb15-535" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="vs">r"Bootstrapped estimates $\hat{\theta}_1$ Under the Interpretable Model"</span>)<span class="op">;</span></span>
+<span id="cb15-536"><a href="#cb15-536" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb15-537"><a href="#cb15-537" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-538"><a href="#cb15-538" aria-hidden="true" tabindex="-1"></a>Notice how the interpretable model performs almost as well as our other model:</span>
+<span id="cb15-539"><a href="#cb15-539" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-542"><a href="#cb15-542" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb15-543"><a href="#cb15-543" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
+<span id="cb15-544"><a href="#cb15-544" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> sklearn.metrics <span class="im">import</span> mean_squared_error</span>
+<span id="cb15-545"><a href="#cb15-545" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-546"><a href="#cb15-546" aria-hidden="true" tabindex="-1"></a>rmse <span class="op">=</span> mean_squared_error(Y, model.predict(X))</span>
+<span id="cb15-547"><a href="#cb15-547" aria-hidden="true" tabindex="-1"></a>rmse_int <span class="op">=</span> mean_squared_error(Y_int, model_int.predict(X_int))</span>
+<span id="cb15-548"><a href="#cb15-548" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="ss">f'RMSE of Original Model: </span><span class="sc">{</span>rmse<span class="sc">}</span><span class="ss">'</span>)</span>
+<span id="cb15-549"><a href="#cb15-549" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="ss">f'RMSE of Interpretable Model: </span><span class="sc">{</span>rmse_int<span class="sc">}</span><span class="ss">'</span>)</span>
+<span id="cb15-550"><a href="#cb15-550" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb15-551"><a href="#cb15-551" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-552"><a href="#cb15-552" aria-hidden="true" tabindex="-1"></a>Yet, the confidence interval for the true parameter $\theta_{1}$ does not contain zero.</span>
+<span id="cb15-553"><a href="#cb15-553" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-556"><a href="#cb15-556" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb15-557"><a href="#cb15-557" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
+<span id="cb15-558"><a href="#cb15-558" aria-hidden="true" tabindex="-1"></a>lower_int <span class="op">=</span> np.percentile(estimates_int, <span class="fl">2.5</span>)</span>
+<span id="cb15-559"><a href="#cb15-559" aria-hidden="true" tabindex="-1"></a>upper_int <span class="op">=</span> np.percentile(estimates_int, <span class="fl">97.5</span>)</span>
+<span id="cb15-560"><a href="#cb15-560" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-561"><a href="#cb15-561" aria-hidden="true" tabindex="-1"></a>conf_interval_int <span class="op">=</span> (lower_int, upper_int)</span>
+<span id="cb15-562"><a href="#cb15-562" aria-hidden="true" tabindex="-1"></a>conf_interval_int</span>
+<span id="cb15-563"><a href="#cb15-563" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb15-564"><a href="#cb15-564" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-565"><a href="#cb15-565" aria-hidden="true" tabindex="-1"></a>In retrospect, it’s no surprise that the weight of an egg best predicts the weight of a newly-hatched chick.</span>
+<span id="cb15-566"><a href="#cb15-566" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-567"><a href="#cb15-567" aria-hidden="true" tabindex="-1"></a>A model with highly correlated variables prevents us from interpreting how the variables are related to the prediction.</span>
+<span id="cb15-568"><a href="#cb15-568" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-569"><a href="#cb15-569" aria-hidden="true" tabindex="-1"></a><span class="fu">### Reminder: Assumptions Matter</span></span>
+<span id="cb15-570"><a href="#cb15-570" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-571"><a href="#cb15-571" aria-hidden="true" tabindex="-1"></a>Keep the following in mind:</span>
+<span id="cb15-572"><a href="#cb15-572" aria-hidden="true" tabindex="-1"></a>All inference assumes that the regression model holds.</span>
+<span id="cb15-573"><a href="#cb15-573" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-574"><a href="#cb15-574" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>If the model doesn’t hold, the inference might not be valid.</span>
+<span id="cb15-575"><a href="#cb15-575" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>If the <span class="co">[</span><span class="ot">assumptions of the bootstrap</span><span class="co">](https://inferentialthinking.com/chapters/13/3/Confidence_Intervals.html?highlight=p%20value%20confidence%20interval#care-in-using-the-bootstrap-percentile-method)</span> don’t hold…</span>
+<span id="cb15-576"><a href="#cb15-576" aria-hidden="true" tabindex="-1"></a><span class="ss">    * </span>Sample size n is large</span>
+<span id="cb15-577"><a href="#cb15-577" aria-hidden="true" tabindex="-1"></a><span class="ss">    * </span>Sample is representative of population distribution (drawn i.i.d., unbiased)</span>
+<span id="cb15-578"><a href="#cb15-578" aria-hidden="true" tabindex="-1"></a>    </span>
+<span id="cb15-579"><a href="#cb15-579" aria-hidden="true" tabindex="-1"></a>    …then the results of the bootstrap might not be valid.</span>
+<span id="cb15-580"><a href="#cb15-580" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-581"><a href="#cb15-581" aria-hidden="true" tabindex="-1"></a><span class="fu">## [Bonus Content] </span></span>
+<span id="cb15-582"><a href="#cb15-582" aria-hidden="true" tabindex="-1"></a>Note: the content in this section is out of scope.</span>
+<span id="cb15-583"><a href="#cb15-583" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-584"><a href="#cb15-584" aria-hidden="true" tabindex="-1"></a><span class="co">&lt;!-- </span><span class="al">###</span><span class="co"> Correlation vs. Causation</span></span>
+<span id="cb15-585"><a href="#cb15-585" aria-hidden="true" tabindex="-1"></a><span class="co">Let us consider some questions in an arbitrary regression problem. </span></span>
+<span id="cb15-586"><a href="#cb15-586" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-587"><a href="#cb15-587" aria-hidden="true" tabindex="-1"></a><span class="co">What does $\theta_{j}$ mean in our regression?</span></span>
+<span id="cb15-588"><a href="#cb15-588" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-589"><a href="#cb15-589" aria-hidden="true" tabindex="-1"></a><span class="co">* Holding other variables fixed, how much should our prediction change with $X_{j}$?</span></span>
+<span id="cb15-590"><a href="#cb15-590" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-591"><a href="#cb15-591" aria-hidden="true" tabindex="-1"></a><span class="co">For simple linear regression, this boils down to the correlation coefficient</span></span>
+<span id="cb15-592"><a href="#cb15-592" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-593"><a href="#cb15-593" aria-hidden="true" tabindex="-1"></a><span class="co">* Does having more $x$ predict more $y$ (and by how much)? --&gt;</span></span>
+<span id="cb15-594"><a href="#cb15-594" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-595"><a href="#cb15-595" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-596"><a href="#cb15-596" aria-hidden="true" tabindex="-1"></a><span class="fu">### Prediction vs Causation</span></span>
+<span id="cb15-597"><a href="#cb15-597" aria-hidden="true" tabindex="-1"></a>The difference between correlation/prediction vs. causation is best illustrated through examples. </span>
+<span id="cb15-598"><a href="#cb15-598" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-599"><a href="#cb15-599" aria-hidden="true" tabindex="-1"></a>Some questions about **correlation / prediction** include:</span>
+<span id="cb15-600"><a href="#cb15-600" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-601"><a href="#cb15-601" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Are homes with granite countertops worth more money?</span>
+<span id="cb15-602"><a href="#cb15-602" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Is college GPA higher for students who win a certain scholarship?</span>
+<span id="cb15-603"><a href="#cb15-603" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Are breastfed babies less likely to develop asthma?</span>
+<span id="cb15-604"><a href="#cb15-604" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Do cancer patients given some aggressive treatment have a higher 5-year survival rate?</span>
+<span id="cb15-605"><a href="#cb15-605" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Are people who smoke more likely to get cancer? </span>
+<span id="cb15-606"><a href="#cb15-606" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-607"><a href="#cb15-607" aria-hidden="true" tabindex="-1"></a>While these may sound like causal questions, they are not! Questions about **causality** are about the **effects** of **interventions** (not just passive observation). For example:</span>
+<span id="cb15-608"><a href="#cb15-608" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-609"><a href="#cb15-609" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>How much do granite countertops **raise** the value of a house?</span>
+<span id="cb15-610"><a href="#cb15-610" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Does getting the scholarship **improve** students’ GPAs?</span>
+<span id="cb15-611"><a href="#cb15-611" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Does breastfeeding **protect** babies against asthma?</span>
+<span id="cb15-612"><a href="#cb15-612" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Does the treatment **improve** cancer survival?</span>
+<span id="cb15-613"><a href="#cb15-613" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Does smoking **cause** cancer?</span>
+<span id="cb15-614"><a href="#cb15-614" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-615"><a href="#cb15-615" aria-hidden="true" tabindex="-1"></a>Note, however, that regression coefficients are sometimes called “effects”, which can be deceptive!</span>
+<span id="cb15-616"><a href="#cb15-616" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-617"><a href="#cb15-617" aria-hidden="true" tabindex="-1"></a>When using data alone, **predictive questions** (i.e., are breastfed babies healthier?) can be answered, but **causal questions** (i.e., does breastfeeding improve babies’ health?) cannot. The reason for this is that there are many possible causes for our predictive question. For example, possible explanations for why breastfed babies are healthier on average include:</span>
+<span id="cb15-618"><a href="#cb15-618" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-619"><a href="#cb15-619" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>**Causal effect:** breastfeeding makes babies healthier</span>
+<span id="cb15-620"><a href="#cb15-620" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>**Reverse causality:** healthier babies more likely to successfully breastfeed</span>
+<span id="cb15-621"><a href="#cb15-621" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>**Common cause:** healthier / richer parents have healthier babies and are more likely to breastfeed</span>
+<span id="cb15-622"><a href="#cb15-622" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-623"><a href="#cb15-623" aria-hidden="true" tabindex="-1"></a>We cannot tell which explanations are true (or to what extent) just by observing ($x$,$y$) pairs. Additionally, causal questions implicitly involve **counterfactuals**, events that didn't happen. For example, we could ask, **would** the **same** breastfed babies have been less healthy **if** they hadn’t been breastfed? Explanation 1 from above implies they would be, but explanations 2 and 3 do not. </span>
+<span id="cb15-624"><a href="#cb15-624" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-625"><a href="#cb15-625" aria-hidden="true" tabindex="-1"></a><span class="fu">### Confounders</span></span>
+<span id="cb15-626"><a href="#cb15-626" aria-hidden="true" tabindex="-1"></a>Let T represent a treatment (for example, alcohol use) and Y represent an outcome (for example, lung cancer).</span>
+<span id="cb15-627"><a href="#cb15-627" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-628"><a href="#cb15-628" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;img</span> <span class="er">src</span><span class="ot">=</span><span class="st">"images/confounder.png"</span> <span class="er">alt</span><span class="ot">=</span><span class="st">'confounder'</span> <span class="er">width</span><span class="ot">=</span><span class="st">'600'</span><span class="kw">&gt;</span></span>
+<span id="cb15-629"><a href="#cb15-629" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-630"><a href="#cb15-630" aria-hidden="true" tabindex="-1"></a>A **confounder** is a variable that affects both T and Y, distorting the correlation between them. Using the example above, rich parents could be a confounder for breastfeeding and a baby's health. Confounders can be a measured covariate (a feature) or an unmeasured variable we don’t know about, and they generally cause problems, as the relationship between T and Y is affected by data we cannot see. We commonly *assume that all confounders are observed* (this is also called **ignorability**).</span>
+<span id="cb15-631"><a href="#cb15-631" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-632"><a href="#cb15-632" aria-hidden="true" tabindex="-1"></a><span class="fu">### How to perform causal inference?</span></span>
+<span id="cb15-633"><a href="#cb15-633" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-634"><a href="#cb15-634" aria-hidden="true" tabindex="-1"></a>In a **randomized experiment**, participants are randomly assigned into two groups: treatment and control. A treatment is applied *only* to the treatment group. We assume ignorability and gather as many measurements as possible so that we can compare them between the control and treatment groups to determine whether or not the treatment has a true effect or is just a confounding factor. </span>
+<span id="cb15-635"><a href="#cb15-635" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-636"><a href="#cb15-636" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;img</span> <span class="er">src</span><span class="ot">=</span><span class="st">"images/experiment.png"</span> <span class="er">alt</span><span class="ot">=</span><span class="st">'experiment'</span> <span class="er">width</span><span class="ot">=</span><span class="st">'600'</span><span class="kw">&gt;</span></span>
+<span id="cb15-637"><a href="#cb15-637" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-638"><a href="#cb15-638" aria-hidden="true" tabindex="-1"></a>However, often, randomly assigning treatments is impractical or unethical. For example, assigning a treatment of cigarettes to test the effect of smoking on the lungs would not only be impractical but also unethical.</span>
+<span id="cb15-639"><a href="#cb15-639" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-640"><a href="#cb15-640" aria-hidden="true" tabindex="-1"></a>An alternative to bypass this issue is to utilize **observational studies**. This can be done by obtaining two participant groups separated based on some identified treatment variable. Unlike randomized experiments, however, we cannot assume ignorability here: the participants could have separated into two groups based on other covariates! In addition, there could also be unmeasured confounders.</span>
+<span id="cb15-641"><a href="#cb15-641" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-642"><a href="#cb15-642" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;img</span> <span class="er">src</span><span class="ot">=</span><span class="st">"images/observational.png"</span> <span class="er">alt</span><span class="ot">=</span><span class="st">'observational'</span> <span class="er">width</span><span class="ot">=</span><span class="st">'600'</span><span class="kw">&gt;</span></span>
 </code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div></div></div></div></div>
 </div> <!-- /content -->
diff --git a/docs/inference_causality/inference_causality_files/figure-html/cell-6-output-1.png b/docs/inference_causality/inference_causality_files/figure-html/cell-6-output-1.png
new file mode 100644
index 000000000..20e246124
Binary files /dev/null and b/docs/inference_causality/inference_causality_files/figure-html/cell-6-output-1.png differ
diff --git a/docs/inference_causality/inference_causality_files/figure-html/cell-7-output-1.png b/docs/inference_causality/inference_causality_files/figure-html/cell-7-output-1.png
new file mode 100644
index 000000000..a31a3b229
Binary files /dev/null and b/docs/inference_causality/inference_causality_files/figure-html/cell-7-output-1.png differ
diff --git a/docs/inference_causality/inference_causality_files/figure-html/cell-7-output-2.png b/docs/inference_causality/inference_causality_files/figure-html/cell-7-output-2.png
new file mode 100644
index 000000000..081ede86f
Binary files /dev/null and b/docs/inference_causality/inference_causality_files/figure-html/cell-7-output-2.png differ
diff --git a/docs/inference_causality/inference_causality_files/figure-html/cell-8-output-1.png b/docs/inference_causality/inference_causality_files/figure-html/cell-8-output-1.png
new file mode 100644
index 000000000..43240a6a2
Binary files /dev/null and b/docs/inference_causality/inference_causality_files/figure-html/cell-8-output-1.png differ
diff --git a/docs/inference_causality/inference_causality_files/figure-html/cell-9-output-1.png b/docs/inference_causality/inference_causality_files/figure-html/cell-9-output-1.png
new file mode 100644
index 000000000..e35c78824
Binary files /dev/null and b/docs/inference_causality/inference_causality_files/figure-html/cell-9-output-1.png differ
diff --git a/docs/intro_lec/images/ask_question.PNG b/docs/intro_lec/images/ask_question.PNG
new file mode 100644
index 000000000..eaa96aeb3
Binary files /dev/null and b/docs/intro_lec/images/ask_question.PNG differ
diff --git a/docs/intro_lec/images/data_acquisition.PNG b/docs/intro_lec/images/data_acquisition.PNG
new file mode 100644
index 000000000..98128b8ec
Binary files /dev/null and b/docs/intro_lec/images/data_acquisition.PNG differ
diff --git a/docs/intro_lec/images/data_life_cycle.PNG b/docs/intro_lec/images/data_life_cycle.PNG
new file mode 100644
index 000000000..aef5d21de
Binary files /dev/null and b/docs/intro_lec/images/data_life_cycle.PNG differ
diff --git a/docs/intro_lec/images/understand_data.PNG b/docs/intro_lec/images/understand_data.PNG
new file mode 100644
index 000000000..c22ea279c
Binary files /dev/null and b/docs/intro_lec/images/understand_data.PNG differ
diff --git a/docs/intro_lec/images/understand_world.PNG b/docs/intro_lec/images/understand_world.PNG
new file mode 100644
index 000000000..f50f7cf53
Binary files /dev/null and b/docs/intro_lec/images/understand_world.PNG differ
diff --git a/docs/intro_lec/images/venn.png b/docs/intro_lec/images/venn.png
new file mode 100644
index 000000000..f75d2fd24
Binary files /dev/null and b/docs/intro_lec/images/venn.png differ
diff --git a/intro_lec/introduction.html b/docs/intro_lec/introduction.html
similarity index 100%
rename from intro_lec/introduction.html
rename to docs/intro_lec/introduction.html
diff --git a/docs/intro_to_modeling/images/reg_line_1.png b/docs/intro_to_modeling/images/reg_line_1.png
new file mode 100644
index 000000000..f85fd0635
Binary files /dev/null and b/docs/intro_to_modeling/images/reg_line_1.png differ
diff --git a/docs/intro_to_modeling/images/reg_line_2.png b/docs/intro_to_modeling/images/reg_line_2.png
new file mode 100644
index 000000000..10f5246c1
Binary files /dev/null and b/docs/intro_to_modeling/images/reg_line_2.png differ
diff --git a/intro_to_modeling/intro_to_modeling.html b/docs/intro_to_modeling/intro_to_modeling.html
similarity index 100%
rename from intro_to_modeling/intro_to_modeling.html
rename to docs/intro_to_modeling/intro_to_modeling.html
diff --git a/docs/intro_to_modeling/intro_to_modeling_files/figure-html/cell-2-output-1.png b/docs/intro_to_modeling/intro_to_modeling_files/figure-html/cell-2-output-1.png
new file mode 100644
index 000000000..18854c510
Binary files /dev/null and b/docs/intro_to_modeling/intro_to_modeling_files/figure-html/cell-2-output-1.png differ
diff --git a/docs/intro_to_modeling/intro_to_modeling_files/figure-html/cell-3-output-1.png b/docs/intro_to_modeling/intro_to_modeling_files/figure-html/cell-3-output-1.png
new file mode 100644
index 000000000..2ad9801d7
Binary files /dev/null and b/docs/intro_to_modeling/intro_to_modeling_files/figure-html/cell-3-output-1.png differ
diff --git a/docs/ols/images/columns.png b/docs/ols/images/columns.png
new file mode 100644
index 000000000..1bbb36d1d
Binary files /dev/null and b/docs/ols/images/columns.png differ
diff --git a/docs/ols/images/design_matrix.png b/docs/ols/images/design_matrix.png
new file mode 100644
index 000000000..2f098eca5
Binary files /dev/null and b/docs/ols/images/design_matrix.png differ
diff --git a/docs/ols/images/matmul1.png b/docs/ols/images/matmul1.png
new file mode 100644
index 000000000..9443c4cca
Binary files /dev/null and b/docs/ols/images/matmul1.png differ
diff --git a/docs/ols/images/matmul2.png b/docs/ols/images/matmul2.png
new file mode 100644
index 000000000..ac184baee
Binary files /dev/null and b/docs/ols/images/matmul2.png differ
diff --git a/docs/ols/images/observation.png b/docs/ols/images/observation.png
new file mode 100644
index 000000000..c943fc80c
Binary files /dev/null and b/docs/ols/images/observation.png differ
diff --git a/docs/ols/images/residual.png b/docs/ols/images/residual.png
new file mode 100644
index 000000000..c35b336e0
Binary files /dev/null and b/docs/ols/images/residual.png differ
diff --git a/docs/ols/images/residual_plot.png b/docs/ols/images/residual_plot.png
new file mode 100644
index 000000000..9a54148fa
Binary files /dev/null and b/docs/ols/images/residual_plot.png differ
diff --git a/docs/ols/images/row_col.png b/docs/ols/images/row_col.png
new file mode 100644
index 000000000..4a387f5ee
Binary files /dev/null and b/docs/ols/images/row_col.png differ
diff --git a/docs/ols/images/span.png b/docs/ols/images/span.png
new file mode 100644
index 000000000..876e08337
Binary files /dev/null and b/docs/ols/images/span.png differ
diff --git a/ols/ols.html b/docs/ols/ols.html
similarity index 100%
rename from ols/ols.html
rename to docs/ols/ols.html
diff --git a/docs/pandas_1/images/df_elections.png b/docs/pandas_1/images/df_elections.png
new file mode 100644
index 000000000..224087bf6
Binary files /dev/null and b/docs/pandas_1/images/df_elections.png differ
diff --git a/docs/pandas_1/images/locgraphic.png b/docs/pandas_1/images/locgraphic.png
new file mode 100644
index 000000000..b37e8422e
Binary files /dev/null and b/docs/pandas_1/images/locgraphic.png differ
diff --git a/docs/pandas_1/images/non-uniqueindex.png b/docs/pandas_1/images/non-uniqueindex.png
new file mode 100644
index 000000000..64ab25a3e
Binary files /dev/null and b/docs/pandas_1/images/non-uniqueindex.png differ
diff --git a/docs/pandas_1/images/row_col.png b/docs/pandas_1/images/row_col.png
new file mode 100644
index 000000000..f9e5faded
Binary files /dev/null and b/docs/pandas_1/images/row_col.png differ
diff --git a/docs/pandas_1/images/uniqueindex.png b/docs/pandas_1/images/uniqueindex.png
new file mode 100644
index 000000000..e95341f30
Binary files /dev/null and b/docs/pandas_1/images/uniqueindex.png differ
diff --git a/pandas_1/pandas_1.html b/docs/pandas_1/pandas_1.html
similarity index 100%
rename from pandas_1/pandas_1.html
rename to docs/pandas_1/pandas_1.html
diff --git a/pandas_2/pandas_2.html b/docs/pandas_2/pandas_2.html
similarity index 99%
rename from pandas_2/pandas_2.html
rename to docs/pandas_2/pandas_2.html
index a4654f64d..de8c2c7a2 100644
--- a/pandas_2/pandas_2.html
+++ b/docs/pandas_2/pandas_2.html
@@ -1656,12 +1656,12 @@ <h3 data-number="3.3.4" class="anchored" data-anchor-id="sample"><span class="he
 </thead>
 <tbody>
 <tr class="odd">
-<td data-quarto-table-cell-role="th">113505</td>
+<td data-quarto-table-cell-role="th">331940</td>
 <td>CA</td>
-<td>F</td>
-<td>1990</td>
-<td>Corrine</td>
-<td>36</td>
+<td>M</td>
+<td>1995</td>
+<td>Maynor</td>
+<td>7</td>
 </tr>
 </tbody>
 </table>
@@ -1688,34 +1688,34 @@ <h3 data-number="3.3.4" class="anchored" data-anchor-id="sample"><span class="he
 </thead>
 <tbody>
 <tr class="odd">
-<td data-quarto-table-cell-role="th">209393</td>
-<td>2014</td>
-<td>Lorely</td>
-<td>5</td>
+<td data-quarto-table-cell-role="th">388733</td>
+<td>2016</td>
+<td>Eitan</td>
+<td>13</td>
 </tr>
 <tr class="even">
-<td data-quarto-table-cell-role="th">345514</td>
-<td>2001</td>
-<td>Ralph</td>
-<td>44</td>
+<td data-quarto-table-cell-role="th">278824</td>
+<td>1966</td>
+<td>Kip</td>
+<td>22</td>
 </tr>
 <tr class="odd">
-<td data-quarto-table-cell-role="th">20093</td>
-<td>1943</td>
-<td>Sallie</td>
-<td>16</td>
+<td data-quarto-table-cell-role="th">260812</td>
+<td>1949</td>
+<td>Nick</td>
+<td>71</td>
 </tr>
 <tr class="even">
-<td data-quarto-table-cell-role="th">171204</td>
-<td>2005</td>
-<td>Abigael</td>
-<td>7</td>
+<td data-quarto-table-cell-role="th">104112</td>
+<td>1987</td>
+<td>Aspen</td>
+<td>13</td>
 </tr>
 <tr class="odd">
-<td data-quarto-table-cell-role="th">244443</td>
-<td>1923</td>
-<td>Chester</td>
-<td>67</td>
+<td data-quarto-table-cell-role="th">386859</td>
+<td>2015</td>
+<td>Pearce</td>
+<td>6</td>
 </tr>
 </tbody>
 </table>
@@ -1741,28 +1741,28 @@ <h3 data-number="3.3.4" class="anchored" data-anchor-id="sample"><span class="he
 </thead>
 <tbody>
 <tr class="odd">
-<td data-quarto-table-cell-role="th">149144</td>
+<td data-quarto-table-cell-role="th">343664</td>
 <td>2000</td>
-<td>Monica</td>
-<td>523</td>
+<td>Hanson</td>
+<td>12</td>
 </tr>
 <tr class="even">
-<td data-quarto-table-cell-role="th">151582</td>
+<td data-quarto-table-cell-role="th">343674</td>
 <td>2000</td>
-<td>Mellanie</td>
-<td>8</td>
+<td>Karim</td>
+<td>12</td>
 </tr>
 <tr class="odd">
-<td data-quarto-table-cell-role="th">149136</td>
+<td data-quarto-table-cell-role="th">343181</td>
 <td>2000</td>
-<td>Mia</td>
-<td>556</td>
+<td>Samson</td>
+<td>27</td>
 </tr>
 <tr class="even">
-<td data-quarto-table-cell-role="th">150394</td>
+<td data-quarto-table-cell-role="th">151914</td>
 <td>2000</td>
-<td>Breeanna</td>
-<td>19</td>
+<td>Adali</td>
+<td>6</td>
 </tr>
 </tbody>
 </table>
diff --git a/docs/pandas_3/images/agg.png b/docs/pandas_3/images/agg.png
new file mode 100644
index 000000000..ec5e8e430
Binary files /dev/null and b/docs/pandas_3/images/agg.png differ
diff --git a/docs/pandas_3/images/aggregation.png b/docs/pandas_3/images/aggregation.png
new file mode 100644
index 000000000..7eb718c81
Binary files /dev/null and b/docs/pandas_3/images/aggregation.png differ
diff --git a/docs/pandas_3/images/error.png b/docs/pandas_3/images/error.png
new file mode 100644
index 000000000..fcf7f141f
Binary files /dev/null and b/docs/pandas_3/images/error.png differ
diff --git a/docs/pandas_3/images/filter_demo.png b/docs/pandas_3/images/filter_demo.png
new file mode 100644
index 000000000..669da3257
Binary files /dev/null and b/docs/pandas_3/images/filter_demo.png differ
diff --git a/docs/pandas_3/images/first.png b/docs/pandas_3/images/first.png
new file mode 100644
index 000000000..f44b90d00
Binary files /dev/null and b/docs/pandas_3/images/first.png differ
diff --git a/docs/pandas_3/images/gb.png b/docs/pandas_3/images/gb.png
new file mode 100644
index 000000000..4c8abae60
Binary files /dev/null and b/docs/pandas_3/images/gb.png differ
diff --git a/docs/pandas_3/images/groupby_demo.png b/docs/pandas_3/images/groupby_demo.png
new file mode 100644
index 000000000..f87b62e82
Binary files /dev/null and b/docs/pandas_3/images/groupby_demo.png differ
diff --git a/docs/pandas_3/images/pivot.png b/docs/pandas_3/images/pivot.png
new file mode 100644
index 000000000..667ae45be
Binary files /dev/null and b/docs/pandas_3/images/pivot.png differ
diff --git a/docs/pandas_3/images/puzzle_demo.png b/docs/pandas_3/images/puzzle_demo.png
new file mode 100644
index 000000000..bc21fd910
Binary files /dev/null and b/docs/pandas_3/images/puzzle_demo.png differ
diff --git a/pandas_3/pandas_3.html b/docs/pandas_3/pandas_3.html
similarity index 89%
rename from pandas_3/pandas_3.html
rename to docs/pandas_3/pandas_3.html
index 8e65797e6..8025bc370 100644
--- a/pandas_3/pandas_3.html
+++ b/docs/pandas_3/pandas_3.html
@@ -106,7 +106,7 @@
 require.undef("plotly");
 requirejs.config({
     paths: {
-        'plotly': ['https://cdn.plot.ly/plotly-2.25.2.min']
+        'plotly': ['https://cdn.plot.ly/plotly-2.12.1.min']
     }
 });
 require(['plotly'], function(Plotly) {
@@ -948,7 +948,7 @@ <h2 data-number="4.2" class="anchored" data-anchor-id="aggregating-data-with-.gr
 <div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="8">
 <div class="sourceCode cell-code" id="cb8"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a>babynames.groupby(<span class="st">"Year"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="8">
-<pre><code>&lt;pandas.core.groupby.generic.DataFrameGroupBy object at 0x14a75bc10&gt;</code></pre>
+<pre><code>&lt;pandas.core.groupby.generic.DataFrameGroupBy object at 0x7f7d9800dae0&gt;</code></pre>
 </div>
 </div>
 <p>What does this strange output mean? Calling <code>.groupby</code> <a href="https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.groupby.html">(documentation)</a> has generated a <code>GroupBy</code> object. You can imagine this as a set of “mini” sub-<code>DataFrame</code>s, where each subframe contains all of the rows from <code>babynames</code> that correspond to a particular year.</p>
@@ -1483,9 +1483,9 @@ <h3 data-number="4.2.2" class="anchored" data-anchor-id="plotting-birth-counts">
 </details>
 <div class="cell-output cell-output-display">
 
-<div>                            <div id="54c22a41-3718-4199-aece-766ddce5ea04" class="plotly-graph-div" style="height:525px; width:100%;"></div>            <script type="text/javascript">                require(["plotly"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("54c22a41-3718-4199-aece-766ddce5ea04")) {                    Plotly.newPlot(                        "54c22a41-3718-4199-aece-766ddce5ea04",                        [{"hovertemplate":"Year=%{x}\u003cbr\u003eCount=%{y}\u003cextra\u003e\u003c\u002fextra\u003e","legendgroup":"","line":{"color":"#636efa","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"","orientation":"v","showlegend":false,"x":[1910,1911,1912,1913,1914,1915,1916,1917,1918,1919,1920,1921,1922,1923,1924,1925,1926,1927,1928,1929,1930,1931,1932,1933,1934,1935,1936,1937,1938,1939,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022],"xaxis":"x","y":[9163,9983,17946,22094,26926,35835,37501,39916,44692,45119,54142,58983,61004,67917,74451,73493,72910,74201,74264,72108,75294,71467,69522,66895,69789,71603,74932,83738,91626,93461,102627,114296,142033,159813,164349,171764,204945,232313,229033,233625,235582,250468,271681,287484,297099,304567,324186,340083,337562,345901,358544,363926,360475,361897,355386,336567,319421,318819,321040,333671,342411,310020,287239,275036,286947,290518,302547,315011,322241,343070,365973,382156,390581,394608,404961,425583,435964,453824,480602,512615,552647,549317,541054,524983,509302,494635,483288,468412,464300,460844,471649,466934,467742,477651,480892,484503,494971,497627,483360,460305,444619,437818,439402,431945,440683,431317,427015,411058,395436,386996,362882,362582,360023],"yaxis":"y","type":"scatter"}],                        {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"}}},"xaxis":{"anchor":"y","domain":[0.0,1.0],"title":{"text":"Year"}},"yaxis":{"anchor":"x","domain":[0.0,1.0],"title":{"text":"Count"}},"legend":{"tracegroupgap":0},"margin":{"t":60}},                        {"responsive": true}                    ).then(function(){
+<div>                            <div id="1208b1a7-c42d-4369-8857-e2af746c8472" class="plotly-graph-div" style="height:525px; width:100%;"></div>            <script type="text/javascript">                require(["plotly"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("1208b1a7-c42d-4369-8857-e2af746c8472")) {                    Plotly.newPlot(                        "1208b1a7-c42d-4369-8857-e2af746c8472",                        [{"hovertemplate":"Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"","line":{"color":"#636efa","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"","orientation":"v","showlegend":false,"x":[1910,1911,1912,1913,1914,1915,1916,1917,1918,1919,1920,1921,1922,1923,1924,1925,1926,1927,1928,1929,1930,1931,1932,1933,1934,1935,1936,1937,1938,1939,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022],"xaxis":"x","y":[9163,9983,17946,22094,26926,35835,37501,39916,44692,45119,54142,58983,61004,67917,74451,73493,72910,74201,74264,72108,75294,71467,69522,66895,69789,71603,74932,83738,91626,93461,102627,114296,142033,159813,164349,171764,204945,232313,229033,233625,235582,250468,271681,287484,297099,304567,324186,340083,337562,345901,358544,363926,360475,361897,355386,336567,319421,318819,321040,333671,342411,310020,287239,275036,286947,290518,302547,315011,322241,343070,365973,382156,390581,394608,404961,425583,435964,453824,480602,512615,552647,549317,541054,524983,509302,494635,483288,468412,464300,460844,471649,466934,467742,477651,480892,484503,494971,497627,483360,460305,444619,437818,439402,431945,440683,431317,427015,411058,395436,386996,362882,362582,360023],"yaxis":"y","type":"scatter"}],                        {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"}}},"xaxis":{"anchor":"y","domain":[0.0,1.0],"title":{"text":"Year"}},"yaxis":{"anchor":"x","domain":[0.0,1.0],"title":{"text":"Count"}},"legend":{"tracegroupgap":0},"margin":{"t":60}},                        {"responsive": true}                    ).then(function(){
                             
-var gd = document.getElementById('54c22a41-3718-4199-aece-766ddce5ea04');
+var gd = document.getElementById('1208b1a7-c42d-4369-8857-e2af746c8472');
 var x = new MutationObserver(function (mutations, observer) {{
         var display = window.getComputedStyle(gd).display;
         if (!display || display === 'none') {{
@@ -1764,9 +1764,9 @@ <h3 data-number="4.2.7" class="anchored" data-anchor-id="some-data-science-payof
 </details>
 <div class="cell-output cell-output-display">
 
-<div>                            <div id="4d54f5f1-d0f3-4b3f-98ac-2bd92e4d3bf3" class="plotly-graph-div" style="height:525px; width:100%;"></div>            <script type="text/javascript">                require(["plotly"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("4d54f5f1-d0f3-4b3f-98ac-2bd92e4d3bf3")) {                    Plotly.newPlot(                        "4d54f5f1-d0f3-4b3f-98ac-2bd92e4d3bf3",                        [{"hovertemplate":"Year=%{x}\u003cbr\u003eCount=%{y}\u003cextra\u003e\u003c\u002fextra\u003e","legendgroup":"","line":{"color":"#636efa","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"","orientation":"v","showlegend":false,"x":[1940,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2012,2013,2016],"xaxis":"x","y":[7,7,8,15,19,20,56,92,199,601,1510,2351,3295,3784,3969,3755,3318,2660,2290,2014,1647,1592,1430,1287,1154,958,818,748,647,547,463,318,242,236,159,151,151,164,130,141,97,114,97,95,93,64,78,69,71,51,62,41,34,28,28,12,14,16,10,13,14,10,7,12,13,12,13,6,7,5,8,5],"yaxis":"y","type":"scatter"}],                        {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"}}},"xaxis":{"anchor":"y","domain":[0.0,1.0],"title":{"text":"Year"}},"yaxis":{"anchor":"x","domain":[0.0,1.0],"title":{"text":"Count"}},"legend":{"tracegroupgap":0},"margin":{"t":60}},                        {"responsive": true}                    ).then(function(){
+<div>                            <div id="894b6094-991d-4849-8a55-fdc113b5b981" class="plotly-graph-div" style="height:525px; width:100%;"></div>            <script type="text/javascript">                require(["plotly"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("894b6094-991d-4849-8a55-fdc113b5b981")) {                    Plotly.newPlot(                        "894b6094-991d-4849-8a55-fdc113b5b981",                        [{"hovertemplate":"Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"","line":{"color":"#636efa","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"","orientation":"v","showlegend":false,"x":[1940,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2012,2013,2016],"xaxis":"x","y":[7,7,8,15,19,20,56,92,199,601,1510,2351,3295,3784,3969,3755,3318,2660,2290,2014,1647,1592,1430,1287,1154,958,818,748,647,547,463,318,242,236,159,151,151,164,130,141,97,114,97,95,93,64,78,69,71,51,62,41,34,28,28,12,14,16,10,13,14,10,7,12,13,12,13,6,7,5,8,5],"yaxis":"y","type":"scatter"}],                        {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"}}},"xaxis":{"anchor":"y","domain":[0.0,1.0],"title":{"text":"Year"}},"yaxis":{"anchor":"x","domain":[0.0,1.0],"title":{"text":"Count"}},"legend":{"tracegroupgap":0},"margin":{"t":60}},                        {"responsive": true}                    ).then(function(){
                             
-var gd = document.getElementById('4d54f5f1-d0f3-4b3f-98ac-2bd92e4d3bf3');
+var gd = document.getElementById('894b6094-991d-4849-8a55-fdc113b5b981');
 var x = new MutationObserver(function (mutations, observer) {{
         var display = window.getComputedStyle(gd).display;
         if (!display || display === 'none') {{
@@ -1802,9 +1802,9 @@ <h3 data-number="4.2.7" class="anchored" data-anchor-id="some-data-science-payof
 <span id="cb28-7"><a href="#cb28-7" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display">
 
-<div>                            <div id="0474cbbb-5ac0-4d69-afbd-193ab9e1ba0e" class="plotly-graph-div" style="height:525px; width:100%;"></div>            <script type="text/javascript">                require(["plotly"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("0474cbbb-5ac0-4d69-afbd-193ab9e1ba0e")) {                    Plotly.newPlot(                        "0474cbbb-5ac0-4d69-afbd-193ab9e1ba0e",                        [{"hovertemplate":"Name=Carol\u003cbr\u003eYear=%{x}\u003cbr\u003eCount=%{y}\u003cextra\u003e\u003c\u002fextra\u003e","legendgroup":"Carol","line":{"color":"#636efa","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Carol","orientation":"v","showlegend":true,"x":[1910,1911,1912,1913,1914,1915,1916,1917,1918,1919,1920,1921,1922,1923,1924,1925,1926,1927,1928,1929,1930,1931,1932,1933,1934,1935,1936,1937,1938,1939,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022],"xaxis":"x","y":[8,13,17,16,26,38,59,47,55,48,64,67,74,94,138,153,151,148,193,279,270,297,367,453,559,669,873,1015,1050,1109,1079,1339,1672,1937,2089,2138,2152,2201,1954,1779,1737,1734,1727,1597,1684,1651,1704,1703,1545,1480,1359,1283,1191,993,1034,815,622,577,543,468,366,267,223,187,173,146,145,145,121,132,123,128,106,114,111,101,120,107,108,134,150,136,129,89,92,75,87,64,61,46,64,33,43,47,52,76,62,38,44,26,17,47,31,36,24,13,25,18,29,20,17,8,7],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Susan\u003cbr\u003eYear=%{x}\u003cbr\u003eCount=%{y}\u003cextra\u003e\u003c\u002fextra\u003e","legendgroup":"Susan","line":{"color":"#EF553B","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Susan","orientation":"v","showlegend":true,"x":[1911,1912,1913,1914,1915,1916,1917,1918,1919,1920,1921,1922,1923,1924,1925,1926,1927,1928,1929,1930,1931,1932,1933,1934,1935,1936,1937,1938,1939,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022],"xaxis":"x","y":[6,8,8,10,16,17,15,20,22,21,19,15,22,26,32,29,43,25,37,63,47,63,74,101,118,138,183,271,433,630,795,1058,1380,1596,1991,2689,2831,3338,3180,3260,3346,3424,3753,3934,3900,3771,3631,3504,3123,3145,3135,2952,2839,2535,2008,1825,1644,1367,1232,1070,861,651,530,552,496,456,437,424,409,420,361,391,352,338,273,280,272,286,267,272,260,196,202,172,152,152,114,116,103,100,104,85,76,70,71,74,53,56,41,39,43,28,44,26,45,22,26,22,19,17,8,13],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Tina\u003cbr\u003eYear=%{x}\u003cbr\u003eCount=%{y}\u003cextra\u003e\u003c\u002fextra\u003e","legendgroup":"Tina","line":{"color":"#00cc96","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Tina","orientation":"v","showlegend":true,"x":[1915,1916,1917,1918,1920,1921,1922,1924,1925,1927,1928,1929,1930,1937,1938,1939,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022],"xaxis":"x","y":[5,6,5,5,5,7,5,9,5,8,8,5,10,10,7,8,12,9,28,45,43,53,64,80,80,88,92,128,168,163,177,366,569,569,700,753,889,1045,1228,1212,1129,1202,1282,1342,1402,1302,1248,1091,941,634,642,546,450,370,414,363,335,371,310,268,271,310,238,252,252,208,180,196,163,171,147,121,111,91,80,83,90,80,67,64,63,69,36,37,47,39,39,27,39,28,46,38,33,36,26,21,15,13,6],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Cheryl\u003cbr\u003eYear=%{x}\u003cbr\u003eCount=%{y}\u003cextra\u003e\u003c\u002fextra\u003e","legendgroup":"Cheryl","line":{"color":"#ab63fa","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Cheryl","orientation":"v","showlegend":true,"x":[1930,1934,1935,1936,1937,1938,1939,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2011,2012,2013,2014,2015,2016,2017,2018,2019,2021,2022],"xaxis":"x","y":[6,8,12,10,16,76,49,42,48,87,377,759,801,1063,1093,1021,916,903,993,955,1058,1465,1639,1715,1833,1832,1639,1624,1565,1420,1295,1207,1051,950,899,751,635,550,428,371,293,271,236,199,178,303,299,272,204,229,164,135,129,130,98,106,88,90,65,55,39,47,38,30,30,19,22,24,14,11,16,17,16,13,21,14,11,15,12,10,12,15,8,10,9,6,7],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Michele\u003cbr\u003eYear=%{x}\u003cbr\u003eCount=%{y}\u003cextra\u003e\u003c\u002fextra\u003e","legendgroup":"Michele","line":{"color":"#FFA15A","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Michele","orientation":"v","showlegend":true,"x":[1936,1937,1938,1939,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2022],"xaxis":"x","y":[7,8,5,8,18,34,113,132,166,171,172,253,213,335,295,306,401,421,500,498,464,454,470,506,576,763,766,775,768,796,1037,1033,1111,1016,973,700,702,571,494,484,437,390,381,305,281,223,230,227,200,162,206,146,143,164,137,142,125,104,82,65,52,47,45,38,28,37,27,22,28,16,21,15,15,11,14,7,5,10,6,11,5],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Debbie\u003cbr\u003eYear=%{x}\u003cbr\u003eCount=%{y}\u003cextra\u003e\u003c\u002fextra\u003e","legendgroup":"Debbie","line":{"color":"#19d3f3","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Debbie","orientation":"v","showlegend":true,"x":[1936,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2014,2015,2016,2017,2021],"xaxis":"x","y":[5,9,9,10,16,11,32,74,91,115,120,191,233,300,427,697,902,1313,1656,1776,1675,1547,1458,1215,1004,648,504,415,338,279,243,192,145,108,108,92,72,64,87,91,81,65,79,67,74,64,56,71,78,93,85,78,50,61,70,53,46,39,22,28,19,11,16,14,13,8,21,10,11,10,12,8,9,6,5,5,5],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Terri\u003cbr\u003eYear=%{x}\u003cbr\u003eCount=%{y}\u003cextra\u003e\u003c\u002fextra\u003e","legendgroup":"Terri","line":{"color":"#FF6692","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Terri","orientation":"v","showlegend":true,"x":[1938,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2004,2005,2006,2016,2021,2022],"xaxis":"x","y":[6,8,12,26,32,38,65,99,130,132,168,154,236,306,379,542,604,685,839,875,1052,964,937,902,826,737,486,448,398,323,312,263,191,153,120,106,81,59,84,57,44,49,47,53,44,36,37,35,32,34,20,26,29,15,19,22,11,15,12,13,11,14,9,7,6,7,5,5,5,5],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Shannon\u003cbr\u003eYear=%{x}\u003cbr\u003eCount=%{y}\u003cextra\u003e\u003c\u002fextra\u003e","legendgroup":"Shannon","line":{"color":"#B6E880","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Shannon","orientation":"v","showlegend":true,"x":[1938,1939,1940,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022],"xaxis":"x","y":[6,9,6,10,14,19,25,16,34,23,34,43,51,59,73,83,111,106,126,129,161,145,206,216,305,409,441,516,587,932,1419,1650,1436,1198,1090,1127,982,1218,1136,1052,991,923,968,969,971,945,872,803,699,642,597,527,493,594,615,531,438,428,366,303,217,199,200,165,133,133,110,90,88,63,42,43,37,41,32,19,31,22,17,14,21,8,8,7],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Debra\u003cbr\u003eYear=%{x}\u003cbr\u003eCount=%{y}\u003cextra\u003e\u003c\u002fextra\u003e","legendgroup":"Debra","line":{"color":"#FF97FF","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Debra","orientation":"v","showlegend":true,"x":[1940,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2012,2013,2016],"xaxis":"x","y":[7,7,8,15,19,20,56,92,199,601,1510,2351,3295,3784,3969,3755,3318,2660,2290,2014,1647,1592,1430,1287,1154,958,818,748,647,547,463,318,242,236,159,151,151,164,130,141,97,114,97,95,93,64,78,69,71,51,62,41,34,28,28,12,14,16,10,13,14,10,7,12,13,12,13,6,7,5,8,5],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Tammy\u003cbr\u003eYear=%{x}\u003cbr\u003eCount=%{y}\u003cextra\u003e\u003c\u002fextra\u003e","legendgroup":"Tammy","line":{"color":"#FECB52","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Tammy","orientation":"v","showlegend":true,"x":[1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2019,2022],"xaxis":"x","y":[7,5,10,9,12,13,10,9,9,13,28,14,26,37,368,746,990,1038,1136,1223,1539,1273,1219,1168,1143,1099,977,1013,859,704,544,421,392,328,275,229,227,181,168,157,96,120,102,85,120,88,85,94,77,82,74,61,49,45,45,54,50,47,49,45,44,36,30,24,29,14,16,12,11,9,5,13,9,15,11,7,5],"yaxis":"y","type":"scatter"}],                        {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"}}},"xaxis":{"anchor":"y","domain":[0.0,1.0],"title":{"text":"Year"}},"yaxis":{"anchor":"x","domain":[0.0,1.0],"title":{"text":"Count"}},"legend":{"title":{"text":"Name"},"tracegroupgap":0},"margin":{"t":60}},                        {"responsive": true}                    ).then(function(){
+<div>                            <div id="802864b7-4736-42c1-9d54-ea7a0c0a9b5a" class="plotly-graph-div" style="height:525px; width:100%;"></div>            <script type="text/javascript">                require(["plotly"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("802864b7-4736-42c1-9d54-ea7a0c0a9b5a")) {                    Plotly.newPlot(                        "802864b7-4736-42c1-9d54-ea7a0c0a9b5a",                        [{"hovertemplate":"Name=Carol<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Carol","line":{"color":"#636efa","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Carol","orientation":"v","showlegend":true,"x":[1910,1911,1912,1913,1914,1915,1916,1917,1918,1919,1920,1921,1922,1923,1924,1925,1926,1927,1928,1929,1930,1931,1932,1933,1934,1935,1936,1937,1938,1939,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022],"xaxis":"x","y":[8,13,17,16,26,38,59,47,55,48,64,67,74,94,138,153,151,148,193,279,270,297,367,453,559,669,873,1015,1050,1109,1079,1339,1672,1937,2089,2138,2152,2201,1954,1779,1737,1734,1727,1597,1684,1651,1704,1703,1545,1480,1359,1283,1191,993,1034,815,622,577,543,468,366,267,223,187,173,146,145,145,121,132,123,128,106,114,111,101,120,107,108,134,150,136,129,89,92,75,87,64,61,46,64,33,43,47,52,76,62,38,44,26,17,47,31,36,24,13,25,18,29,20,17,8,7],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Susan<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Susan","line":{"color":"#EF553B","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Susan","orientation":"v","showlegend":true,"x":[1911,1912,1913,1914,1915,1916,1917,1918,1919,1920,1921,1922,1923,1924,1925,1926,1927,1928,1929,1930,1931,1932,1933,1934,1935,1936,1937,1938,1939,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022],"xaxis":"x","y":[6,8,8,10,16,17,15,20,22,21,19,15,22,26,32,29,43,25,37,63,47,63,74,101,118,138,183,271,433,630,795,1058,1380,1596,1991,2689,2831,3338,3180,3260,3346,3424,3753,3934,3900,3771,3631,3504,3123,3145,3135,2952,2839,2535,2008,1825,1644,1367,1232,1070,861,651,530,552,496,456,437,424,409,420,361,391,352,338,273,280,272,286,267,272,260,196,202,172,152,152,114,116,103,100,104,85,76,70,71,74,53,56,41,39,43,28,44,26,45,22,26,22,19,17,8,13],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Tina<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Tina","line":{"color":"#00cc96","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Tina","orientation":"v","showlegend":true,"x":[1915,1916,1917,1918,1920,1921,1922,1924,1925,1927,1928,1929,1930,1937,1938,1939,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022],"xaxis":"x","y":[5,6,5,5,5,7,5,9,5,8,8,5,10,10,7,8,12,9,28,45,43,53,64,80,80,88,92,128,168,163,177,366,569,569,700,753,889,1045,1228,1212,1129,1202,1282,1342,1402,1302,1248,1091,941,634,642,546,450,370,414,363,335,371,310,268,271,310,238,252,252,208,180,196,163,171,147,121,111,91,80,83,90,80,67,64,63,69,36,37,47,39,39,27,39,28,46,38,33,36,26,21,15,13,6],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Cheryl<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Cheryl","line":{"color":"#ab63fa","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Cheryl","orientation":"v","showlegend":true,"x":[1930,1934,1935,1936,1937,1938,1939,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2011,2012,2013,2014,2015,2016,2017,2018,2019,2021,2022],"xaxis":"x","y":[6,8,12,10,16,76,49,42,48,87,377,759,801,1063,1093,1021,916,903,993,955,1058,1465,1639,1715,1833,1832,1639,1624,1565,1420,1295,1207,1051,950,899,751,635,550,428,371,293,271,236,199,178,303,299,272,204,229,164,135,129,130,98,106,88,90,65,55,39,47,38,30,30,19,22,24,14,11,16,17,16,13,21,14,11,15,12,10,12,15,8,10,9,6,7],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Michele<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Michele","line":{"color":"#FFA15A","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Michele","orientation":"v","showlegend":true,"x":[1936,1937,1938,1939,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2022],"xaxis":"x","y":[7,8,5,8,18,34,113,132,166,171,172,253,213,335,295,306,401,421,500,498,464,454,470,506,576,763,766,775,768,796,1037,1033,1111,1016,973,700,702,571,494,484,437,390,381,305,281,223,230,227,200,162,206,146,143,164,137,142,125,104,82,65,52,47,45,38,28,37,27,22,28,16,21,15,15,11,14,7,5,10,6,11,5],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Debbie<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Debbie","line":{"color":"#19d3f3","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Debbie","orientation":"v","showlegend":true,"x":[1936,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2014,2015,2016,2017,2021],"xaxis":"x","y":[5,9,9,10,16,11,32,74,91,115,120,191,233,300,427,697,902,1313,1656,1776,1675,1547,1458,1215,1004,648,504,415,338,279,243,192,145,108,108,92,72,64,87,91,81,65,79,67,74,64,56,71,78,93,85,78,50,61,70,53,46,39,22,28,19,11,16,14,13,8,21,10,11,10,12,8,9,6,5,5,5],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Terri<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Terri","line":{"color":"#FF6692","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Terri","orientation":"v","showlegend":true,"x":[1938,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2004,2005,2006,2016,2021,2022],"xaxis":"x","y":[6,8,12,26,32,38,65,99,130,132,168,154,236,306,379,542,604,685,839,875,1052,964,937,902,826,737,486,448,398,323,312,263,191,153,120,106,81,59,84,57,44,49,47,53,44,36,37,35,32,34,20,26,29,15,19,22,11,15,12,13,11,14,9,7,6,7,5,5,5,5],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Shannon<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Shannon","line":{"color":"#B6E880","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Shannon","orientation":"v","showlegend":true,"x":[1938,1939,1940,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022],"xaxis":"x","y":[6,9,6,10,14,19,25,16,34,23,34,43,51,59,73,83,111,106,126,129,161,145,206,216,305,409,441,516,587,932,1419,1650,1436,1198,1090,1127,982,1218,1136,1052,991,923,968,969,971,945,872,803,699,642,597,527,493,594,615,531,438,428,366,303,217,199,200,165,133,133,110,90,88,63,42,43,37,41,32,19,31,22,17,14,21,8,8,7],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Debra<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Debra","line":{"color":"#FF97FF","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Debra","orientation":"v","showlegend":true,"x":[1940,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2012,2013,2016],"xaxis":"x","y":[7,7,8,15,19,20,56,92,199,601,1510,2351,3295,3784,3969,3755,3318,2660,2290,2014,1647,1592,1430,1287,1154,958,818,748,647,547,463,318,242,236,159,151,151,164,130,141,97,114,97,95,93,64,78,69,71,51,62,41,34,28,28,12,14,16,10,13,14,10,7,12,13,12,13,6,7,5,8,5],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Tammy<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Tammy","line":{"color":"#FECB52","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Tammy","orientation":"v","showlegend":true,"x":[1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2019,2022],"xaxis":"x","y":[7,5,10,9,12,13,10,9,9,13,28,14,26,37,368,746,990,1038,1136,1223,1539,1273,1219,1168,1143,1099,977,1013,859,704,544,421,392,328,275,229,227,181,168,157,96,120,102,85,120,88,85,94,77,82,74,61,49,45,45,54,50,47,49,45,44,36,30,24,29,14,16,12,11,9,5,13,9,15,11,7,5],"yaxis":"y","type":"scatter"}],                        {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"}}},"xaxis":{"anchor":"y","domain":[0.0,1.0],"title":{"text":"Year"}},"yaxis":{"anchor":"x","domain":[0.0,1.0],"title":{"text":"Count"}},"legend":{"title":{"text":"Name"},"tracegroupgap":0},"margin":{"t":60}},                        {"responsive": true}                    ).then(function(){
                             
-var gd = document.getElementById('0474cbbb-5ac0-4d69-afbd-193ab9e1ba0e');
+var gd = document.getElementById('802864b7-4736-42c1-9d54-ea7a0c0a9b5a');
 var x = new MutationObserver(function (mutations, observer) {{
         var display = window.getComputedStyle(gd).display;
         if (!display || display === 'none') {{
diff --git a/docs/probability_1/images/discrete_continuous.png b/docs/probability_1/images/discrete_continuous.png
new file mode 100644
index 000000000..6c0100129
Binary files /dev/null and b/docs/probability_1/images/discrete_continuous.png differ
diff --git a/docs/probability_1/images/distribution.png b/docs/probability_1/images/distribution.png
new file mode 100644
index 000000000..ba92219a2
Binary files /dev/null and b/docs/probability_1/images/distribution.png differ
diff --git a/docs/probability_1/images/exp_var.png b/docs/probability_1/images/exp_var.png
new file mode 100644
index 000000000..ca2af61dd
Binary files /dev/null and b/docs/probability_1/images/exp_var.png differ
diff --git a/docs/probability_1/images/probability_areas.png b/docs/probability_1/images/probability_areas.png
new file mode 100644
index 000000000..f9acbf7a3
Binary files /dev/null and b/docs/probability_1/images/probability_areas.png differ
diff --git a/docs/probability_1/images/rv.png b/docs/probability_1/images/rv.png
new file mode 100644
index 000000000..3a5f70bdf
Binary files /dev/null and b/docs/probability_1/images/rv.png differ
diff --git a/docs/probability_1/images/transformation.png b/docs/probability_1/images/transformation.png
new file mode 100644
index 000000000..d6c454a56
Binary files /dev/null and b/docs/probability_1/images/transformation.png differ
diff --git a/docs/probability_1/images/yz.png b/docs/probability_1/images/yz.png
new file mode 100644
index 000000000..e0ca34e1f
Binary files /dev/null and b/docs/probability_1/images/yz.png differ
diff --git a/docs/probability_1/images/yz_distribution.png b/docs/probability_1/images/yz_distribution.png
new file mode 100644
index 000000000..b208edafc
Binary files /dev/null and b/docs/probability_1/images/yz_distribution.png differ
diff --git a/probability_1/probability_1.html b/docs/probability_1/probability_1.html
similarity index 100%
rename from probability_1/probability_1.html
rename to docs/probability_1/probability_1.html
diff --git a/docs/probability_2/images/CLTdiff.png b/docs/probability_2/images/CLTdiff.png
new file mode 100644
index 000000000..0ce9a27cc
Binary files /dev/null and b/docs/probability_2/images/CLTdiff.png differ
diff --git a/docs/probability_2/images/bias_v_variance.png b/docs/probability_2/images/bias_v_variance.png
new file mode 100644
index 000000000..598e833f3
Binary files /dev/null and b/docs/probability_2/images/bias_v_variance.png differ
diff --git a/docs/probability_2/images/breakdown.png b/docs/probability_2/images/breakdown.png
new file mode 100644
index 000000000..433b6796b
Binary files /dev/null and b/docs/probability_2/images/breakdown.png differ
diff --git a/docs/probability_2/images/bvt.png b/docs/probability_2/images/bvt.png
new file mode 100644
index 000000000..0af708197
Binary files /dev/null and b/docs/probability_2/images/bvt.png differ
diff --git a/docs/probability_2/images/bvt_old.png b/docs/probability_2/images/bvt_old.png
new file mode 100644
index 000000000..9cf5c999c
Binary files /dev/null and b/docs/probability_2/images/bvt_old.png differ
diff --git a/docs/probability_2/images/clt.png b/docs/probability_2/images/clt.png
new file mode 100644
index 000000000..0a93294b5
Binary files /dev/null and b/docs/probability_2/images/clt.png differ
diff --git a/docs/probability_2/images/data.png b/docs/probability_2/images/data.png
new file mode 100644
index 000000000..77808547d
Binary files /dev/null and b/docs/probability_2/images/data.png differ
diff --git a/docs/probability_2/images/decomposition.png b/docs/probability_2/images/decomposition.png
new file mode 100644
index 000000000..21ad6054f
Binary files /dev/null and b/docs/probability_2/images/decomposition.png differ
diff --git a/docs/probability_2/images/error.png b/docs/probability_2/images/error.png
new file mode 100644
index 000000000..7441a3179
Binary files /dev/null and b/docs/probability_2/images/error.png differ
diff --git a/docs/probability_2/images/errors.png b/docs/probability_2/images/errors.png
new file mode 100644
index 000000000..0929d47d9
Binary files /dev/null and b/docs/probability_2/images/errors.png differ
diff --git a/docs/probability_2/images/y_hat.png b/docs/probability_2/images/y_hat.png
new file mode 100644
index 000000000..fe953ddc7
Binary files /dev/null and b/docs/probability_2/images/y_hat.png differ
diff --git a/docs/probability_2/images/y_hat2.png b/docs/probability_2/images/y_hat2.png
new file mode 100644
index 000000000..3b9b8e263
Binary files /dev/null and b/docs/probability_2/images/y_hat2.png differ
diff --git a/probability_2/probability_2.html b/docs/probability_2/probability_2.html
similarity index 100%
rename from probability_2/probability_2.html
rename to docs/probability_2/probability_2.html
diff --git a/regex/regex.html b/docs/regex/regex.html
similarity index 99%
rename from regex/regex.html
rename to docs/regex/regex.html
index e5e6b4bf2..8c48a2246 100644
--- a/regex/regex.html
+++ b/docs/regex/regex.html
@@ -692,14 +692,10 @@ <h4 data-number="6.2.1.2" class="anchored" data-anchor-id="canonicalization-with
 <span id="cb6-13"><a href="#cb6-13" aria-hidden="true" tabindex="-1"></a>county_and_state[<span class="st">'clean_county_pandas'</span>] <span class="op">=</span> canonicalize_county_series(county_and_state[<span class="st">'County'</span>])</span>
 <span id="cb6-14"><a href="#cb6-14" aria-hidden="true" tabindex="-1"></a>display(county_and_pop), display(county_and_state)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stderr">
-<pre><code>/var/folders/7t/zbwy02ts2m7cn64fvwjqb8xw0000gp/T/ipykernel_91946/2523629438.py:3: FutureWarning:
-
-The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.
-
-/var/folders/7t/zbwy02ts2m7cn64fvwjqb8xw0000gp/T/ipykernel_91946/2523629438.py:3: FutureWarning:
-
-The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.
-</code></pre>
+<pre><code>/var/folders/sy/b85yc0p951zdr__z5hvdmbjm0000gn/T/ipykernel_58197/2523629438.py:7: FutureWarning: The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.
+  .str.replace('.', '')
+/var/folders/sy/b85yc0p951zdr__z5hvdmbjm0000gn/T/ipykernel_58197/2523629438.py:7: FutureWarning: The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.
+  .str.replace('.', '')</code></pre>
 </div>
 <div class="cell-output cell-output-display">
 <div>
diff --git a/docs/sampling/images/data_life_cycle_sampling.png b/docs/sampling/images/data_life_cycle_sampling.png
new file mode 100644
index 000000000..ea49768ed
Binary files /dev/null and b/docs/sampling/images/data_life_cycle_sampling.png differ
diff --git a/docs/sampling/images/samplingframe.png b/docs/sampling/images/samplingframe.png
new file mode 100644
index 000000000..fba469633
Binary files /dev/null and b/docs/sampling/images/samplingframe.png differ
diff --git a/sampling/sampling.html b/docs/sampling/sampling.html
similarity index 99%
rename from sampling/sampling.html
rename to docs/sampling/sampling.html
index 15d335f75..bdad78bf9 100644
--- a/sampling/sampling.html
+++ b/docs/sampling/sampling.html
@@ -710,7 +710,7 @@ <h4 data-number="9.3.3.3" class="anchored" data-anchor-id="simple-random-sample"
 <span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a>random_sample <span class="op">=</span> movie.sample(n, replace <span class="op">=</span> <span class="va">False</span>) <span class="co">## By default, replace = False</span></span>
 <span id="cb13-3"><a href="#cb13-3" aria-hidden="true" tabindex="-1"></a>np.mean(random_sample[<span class="st">"barbie"</span>])</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="9">
-<pre><code>0.5302396242584781</code></pre>
+<pre><code>0.5297081770526105</code></pre>
 </div>
 </div>
 <p>This is very close to the actual vote of 0.5302792307692308!</p>
@@ -728,7 +728,7 @@ <h4 data-number="9.3.3.3" class="anchored" data-anchor-id="simple-random-sample"
 <span id="cb15-10"><a href="#cb15-10" aria-hidden="true" tabindex="-1"></a>Markdown(<span class="ss">f"**Actual** = </span><span class="sc">{</span>actual_barbie<span class="sc">:.4f}</span><span class="ss">, **Sample** = </span><span class="sc">{</span>sample_barbie<span class="sc">:.4f}</span><span class="ss">, "</span></span>
 <span id="cb15-11"><a href="#cb15-11" aria-hidden="true" tabindex="-1"></a>         <span class="ss">f"**Err** = </span><span class="sc">{</span><span class="dv">100</span><span class="op">*</span>err<span class="sc">:.2f}</span><span class="ss">%."</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="10">
-<p><strong>Actual</strong> = 0.5303, <strong>Sample</strong> = 0.5363, <strong>Err</strong> = 1.13%.</p>
+<p><strong>Actual</strong> = 0.5303, <strong>Sample</strong> = 0.5300, <strong>Err</strong> = 0.05%.</p>
 </div>
 </div>
 <p>We’ll learn how to choose this number when we (re)learn the Central Limit Theorem later in the semester.</p>
@@ -753,7 +753,7 @@ <h4 data-number="9.3.3.4" class="anchored" data-anchor-id="quantifying-chance-er
 <span id="cb17-3"><a href="#cb17-3" aria-hidden="true" tabindex="-1"></a>ax.axvline(actual_barbie, color<span class="op">=</span><span class="st">"orange"</span>, lw<span class="op">=</span><span class="dv">4</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 <div class="cell-output cell-output-display">
-<p><img src="sampling_files/figure-html/cell-13-output-1.png" width="605" height="429"></p>
+<p><img src="sampling_files/figure-html/cell-13-output-1.png" width="605" height="421"></p>
 </div>
 </div>
 <p>What fraction of these simulated samples would have predicted Barbie?</p>
@@ -761,7 +761,7 @@ <h4 data-number="9.3.3.4" class="anchored" data-anchor-id="quantifying-chance-er
 <div class="sourceCode cell-code" id="cb18"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a>poll_result <span class="op">=</span> pd.Series(poll_result)</span>
 <span id="cb18-2"><a href="#cb18-2" aria-hidden="true" tabindex="-1"></a>np.<span class="bu">sum</span>(poll_result <span class="op">&gt;</span> <span class="fl">0.5</span>)<span class="op">/</span><span class="dv">1000</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="13">
-<pre><code>0.959</code></pre>
+<pre><code>0.956</code></pre>
 </div>
 </div>
 <p>You can see the curve looks roughly Gaussian/normal. Using KDE:</p>
@@ -771,7 +771,7 @@ <h4 data-number="9.3.3.4" class="anchored" data-anchor-id="quantifying-chance-er
 <div class="sourceCode cell-code" id="cb20"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a>sns.histplot(poll_result, stat<span class="op">=</span><span class="st">'density'</span>, kde<span class="op">=</span><span class="va">True</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 <div class="cell-output cell-output-display">
-<p><img src="sampling_files/figure-html/cell-15-output-1.png" width="605" height="429"></p>
+<p><img src="sampling_files/figure-html/cell-15-output-1.png" width="605" height="421"></p>
 </div>
 </div>
 </section>
diff --git a/docs/sampling/sampling_files/figure-html/cell-11-output-2.png b/docs/sampling/sampling_files/figure-html/cell-11-output-2.png
new file mode 100644
index 000000000..21db18592
Binary files /dev/null and b/docs/sampling/sampling_files/figure-html/cell-11-output-2.png differ
diff --git a/docs/sampling/sampling_files/figure-html/cell-13-output-1.png b/docs/sampling/sampling_files/figure-html/cell-13-output-1.png
new file mode 100644
index 000000000..cd8e8547f
Binary files /dev/null and b/docs/sampling/sampling_files/figure-html/cell-13-output-1.png differ
diff --git a/docs/sampling/sampling_files/figure-html/cell-15-output-1.png b/docs/sampling/sampling_files/figure-html/cell-15-output-1.png
new file mode 100644
index 000000000..4eaf275f4
Binary files /dev/null and b/docs/sampling/sampling_files/figure-html/cell-15-output-1.png differ
diff --git a/docs/sampling/sampling_files/figure-html/cell-22-output-2.png b/docs/sampling/sampling_files/figure-html/cell-22-output-2.png
new file mode 100644
index 000000000..dbaba8152
Binary files /dev/null and b/docs/sampling/sampling_files/figure-html/cell-22-output-2.png differ
diff --git a/docs/sampling/sampling_files/figure-html/cell-25-output-2.png b/docs/sampling/sampling_files/figure-html/cell-25-output-2.png
new file mode 100644
index 000000000..e45708fdc
Binary files /dev/null and b/docs/sampling/sampling_files/figure-html/cell-25-output-2.png differ
diff --git a/docs/sampling/sampling_files/figure-html/cell-8-output-2.png b/docs/sampling/sampling_files/figure-html/cell-8-output-2.png
new file mode 100644
index 000000000..dbaba8152
Binary files /dev/null and b/docs/sampling/sampling_files/figure-html/cell-8-output-2.png differ
diff --git a/docs/sampling/sampling_files/figure-html/cell-9-output-1.png b/docs/sampling/sampling_files/figure-html/cell-9-output-1.png
new file mode 100644
index 000000000..eea9f2e95
Binary files /dev/null and b/docs/sampling/sampling_files/figure-html/cell-9-output-1.png differ
diff --git a/site_libs/bootstrap/bootstrap-icons.css b/docs/site_libs/bootstrap/bootstrap-icons.css
similarity index 100%
rename from site_libs/bootstrap/bootstrap-icons.css
rename to docs/site_libs/bootstrap/bootstrap-icons.css
diff --git a/site_libs/bootstrap/bootstrap-icons.woff b/docs/site_libs/bootstrap/bootstrap-icons.woff
similarity index 100%
rename from site_libs/bootstrap/bootstrap-icons.woff
rename to docs/site_libs/bootstrap/bootstrap-icons.woff
diff --git a/site_libs/bootstrap/bootstrap.min.css b/docs/site_libs/bootstrap/bootstrap.min.css
similarity index 99%
rename from site_libs/bootstrap/bootstrap.min.css
rename to docs/site_libs/bootstrap/bootstrap.min.css
index d3c0f9593..020a55e1d 100644
--- a/site_libs/bootstrap/bootstrap.min.css
+++ b/docs/site_libs/bootstrap/bootstrap.min.css
@@ -7,4 +7,4 @@
 *
 * ansi colors from IPython notebook's
 *
-*/.ansi-black-fg{color:#3e424d}.ansi-black-bg{background-color:#3e424d}.ansi-black-intense-fg{color:#282c36}.ansi-black-intense-bg{background-color:#282c36}.ansi-red-fg{color:#e75c58}.ansi-red-bg{background-color:#e75c58}.ansi-red-intense-fg{color:#b22b31}.ansi-red-intense-bg{background-color:#b22b31}.ansi-green-fg{color:#00a250}.ansi-green-bg{background-color:#00a250}.ansi-green-intense-fg{color:#007427}.ansi-green-intense-bg{background-color:#007427}.ansi-yellow-fg{color:#ddb62b}.ansi-yellow-bg{background-color:#ddb62b}.ansi-yellow-intense-fg{color:#b27d12}.ansi-yellow-intense-bg{background-color:#b27d12}.ansi-blue-fg{color:#208ffb}.ansi-blue-bg{background-color:#208ffb}.ansi-blue-intense-fg{color:#0065ca}.ansi-blue-intense-bg{background-color:#0065ca}.ansi-magenta-fg{color:#d160c4}.ansi-magenta-bg{background-color:#d160c4}.ansi-magenta-intense-fg{color:#a03196}.ansi-magenta-intense-bg{background-color:#a03196}.ansi-cyan-fg{color:#60c6c8}.ansi-cyan-bg{background-color:#60c6c8}.ansi-cyan-intense-fg{color:#258f8f}.ansi-cyan-intense-bg{background-color:#258f8f}.ansi-white-fg{color:#c5c1b4}.ansi-white-bg{background-color:#c5c1b4}.ansi-white-intense-fg{color:#a1a6b2}.ansi-white-intense-bg{background-color:#a1a6b2}.ansi-default-inverse-fg{color:#fff}.ansi-default-inverse-bg{background-color:#000}.ansi-bold{font-weight:bold}.ansi-underline{text-decoration:underline}:root{--quarto-body-bg: #fff;--quarto-body-color: #373a3c;--quarto-text-muted: #6c757d;--quarto-border-color: #dee2e6;--quarto-border-width: 1px;--quarto-border-radius: 0.25rem}table.gt_table{color:var(--quarto-body-color);font-size:1em;width:100%;background-color:rgba(0,0,0,0);border-top-width:inherit;border-bottom-width:inherit;border-color:var(--quarto-border-color)}table.gt_table th.gt_column_spanner_outer{color:var(--quarto-body-color);background-color:rgba(0,0,0,0);border-top-width:inherit;border-bottom-width:inherit;border-color:var(--quarto-border-color)}table.gt_table th.gt_col_heading{color:var(--quarto-body-color);font-weight:bold;background-color:rgba(0,0,0,0)}table.gt_table thead.gt_col_headings{border-bottom:1px solid currentColor;border-top-width:inherit;border-top-color:var(--quarto-border-color)}table.gt_table thead.gt_col_headings:not(:first-child){border-top-width:1px;border-top-color:var(--quarto-border-color)}table.gt_table td.gt_row{border-bottom-width:1px;border-bottom-color:var(--quarto-border-color);border-top-width:0px}table.gt_table tbody.gt_table_body{border-top-width:1px;border-bottom-width:1px;border-bottom-color:var(--quarto-border-color);border-top-color:currentColor}div.columns{display:initial;gap:initial}div.column{display:inline-block;overflow-x:initial;vertical-align:top;width:50%}.code-annotation-tip-content{word-wrap:break-word}.code-annotation-container-hidden{display:none !important}dl.code-annotation-container-grid{display:grid;grid-template-columns:min-content auto}dl.code-annotation-container-grid dt{grid-column:1}dl.code-annotation-container-grid dd{grid-column:2}pre.sourceCode.code-annotation-code{padding-right:0}code.sourceCode .code-annotation-anchor{z-index:100;position:absolute;right:.5em;left:inherit;background-color:rgba(0,0,0,0)}:root{--mermaid-bg-color: #fff;--mermaid-edge-color: #373a3c;--mermaid-node-fg-color: #373a3c;--mermaid-fg-color: #373a3c;--mermaid-fg-color--lighter: #4f5457;--mermaid-fg-color--lightest: #686d71;--mermaid-font-family: Source Sans Pro, -apple-system, BlinkMacSystemFont, Segoe UI, Roboto, Helvetica Neue, Arial, sans-serif, Apple Color Emoji, Segoe UI Emoji, Segoe UI Symbol;--mermaid-label-bg-color: #fff;--mermaid-label-fg-color: #2780e3;--mermaid-node-bg-color: rgba(39, 128, 227, 0.1);--mermaid-node-fg-color: #373a3c}@media print{:root{font-size:11pt}#quarto-sidebar,#TOC,.nav-page{display:none}.page-columns .content{grid-column-start:page-start}.fixed-top{position:relative}.panel-caption,.figure-caption,figcaption{color:#666}}.code-copy-button{position:absolute;top:0;right:0;border:0;margin-top:5px;margin-right:5px;background-color:rgba(0,0,0,0);z-index:3}.code-copy-button:focus{outline:none}.code-copy-button-tooltip{font-size:.75em}pre.sourceCode:hover>.code-copy-button>.bi::before{display:inline-block;height:1rem;width:1rem;content:"";vertical-align:-0.125em;background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="rgb(94, 94, 94)" viewBox="0 0 16 16"><path d="M4 1.5H3a2 2 0 0 0-2 2V14a2 2 0 0 0 2 2h10a2 2 0 0 0 2-2V3.5a2 2 0 0 0-2-2h-1v1h1a1 1 0 0 1 1 1V14a1 1 0 0 1-1 1H3a1 1 0 0 1-1-1V3.5a1 1 0 0 1 1-1h1v-1z"/><path d="M9.5 1a.5.5 0 0 1 .5.5v1a.5.5 0 0 1-.5.5h-3a.5.5 0 0 1-.5-.5v-1a.5.5 0 0 1 .5-.5h3zm-3-1A1.5 1.5 0 0 0 5 1.5v1A1.5 1.5 0 0 0 6.5 4h3A1.5 1.5 0 0 0 11 2.5v-1A1.5 1.5 0 0 0 9.5 0h-3z"/></svg>');background-repeat:no-repeat;background-size:1rem 1rem}pre.sourceCode:hover>.code-copy-button-checked>.bi::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="rgb(94, 94, 94)" viewBox="0 0 16 16"><path d="M13.854 3.646a.5.5 0 0 1 0 .708l-7 7a.5.5 0 0 1-.708 0l-3.5-3.5a.5.5 0 1 1 .708-.708L6.5 10.293l6.646-6.647a.5.5 0 0 1 .708 0z"/></svg>')}pre.sourceCode:hover>.code-copy-button:hover>.bi::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="rgb(71, 88, 171)" viewBox="0 0 16 16"><path d="M4 1.5H3a2 2 0 0 0-2 2V14a2 2 0 0 0 2 2h10a2 2 0 0 0 2-2V3.5a2 2 0 0 0-2-2h-1v1h1a1 1 0 0 1 1 1V14a1 1 0 0 1-1 1H3a1 1 0 0 1-1-1V3.5a1 1 0 0 1 1-1h1v-1z"/><path d="M9.5 1a.5.5 0 0 1 .5.5v1a.5.5 0 0 1-.5.5h-3a.5.5 0 0 1-.5-.5v-1a.5.5 0 0 1 .5-.5h3zm-3-1A1.5 1.5 0 0 0 5 1.5v1A1.5 1.5 0 0 0 6.5 4h3A1.5 1.5 0 0 0 11 2.5v-1A1.5 1.5 0 0 0 9.5 0h-3z"/></svg>')}pre.sourceCode:hover>.code-copy-button-checked:hover>.bi::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="rgb(71, 88, 171)"  viewBox="0 0 16 16"><path d="M13.854 3.646a.5.5 0 0 1 0 .708l-7 7a.5.5 0 0 1-.708 0l-3.5-3.5a.5.5 0 1 1 .708-.708L6.5 10.293l6.646-6.647a.5.5 0 0 1 .708 0z"/></svg>')}main ol ol,main ul ul,main ol ul,main ul ol{margin-bottom:1em}ul>li:not(:has(>p))>ul,ol>li:not(:has(>p))>ul,ul>li:not(:has(>p))>ol,ol>li:not(:has(>p))>ol{margin-bottom:0}ul>li:not(:has(>p))>ul>li:has(>p),ol>li:not(:has(>p))>ul>li:has(>p),ul>li:not(:has(>p))>ol>li:has(>p),ol>li:not(:has(>p))>ol>li:has(>p){margin-top:1rem}body{margin:0}main.page-columns>header>h1.title,main.page-columns>header>.title.h1{margin-bottom:0}@media(min-width: 992px){body .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start page-start-inset] 35px [body-start-outset] 35px [body-start] 1.5em [body-content-start] minmax(500px, calc( 850px - 3em )) [body-content-end] 1.5em [body-end] 35px [body-end-outset] minmax(75px, 145px) [page-end-inset] 35px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.fullcontent:not(.floating):not(.docked) .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start page-start-inset] 35px [body-start-outset] 35px [body-start] 1.5em [body-content-start] minmax(500px, calc( 850px - 3em )) [body-content-end] 1.5em [body-end] 35px [body-end-outset] 35px [page-end-inset page-end] 5fr [screen-end-inset] 1.5em}body.slimcontent:not(.floating):not(.docked) .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start page-start-inset] 35px [body-start-outset] 35px [body-start] 1.5em [body-content-start] minmax(500px, calc( 850px - 3em )) [body-content-end] 1.5em [body-end] 50px [body-end-outset] minmax(0px, 200px) [page-end-inset] 35px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.listing:not(.floating):not(.docked) .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start] minmax(50px, 100px) [page-start-inset] 50px [body-start-outset] 50px [body-start] 1.5em [body-content-start] minmax(500px, calc( 850px - 3em )) [body-content-end] 3em [body-end] 50px [body-end-outset] minmax(0px, 250px) [page-end-inset] minmax(50px, 100px) [page-end] 1fr [screen-end-inset] 1.5em [screen-end]}body:not(.floating):not(.docked) .page-columns.toc-left{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start] 35px [page-start-inset] minmax(0px, 175px) [body-start-outset] 35px [body-start] 1.5em [body-content-start] minmax(450px, calc( 800px - 3em )) [body-content-end] 1.5em [body-end] 50px [body-end-outset] minmax(0px, 200px) [page-end-inset] 50px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body:not(.floating):not(.docked) .page-columns.toc-left .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start] 35px [page-start-inset] minmax(0px, 175px) [body-start-outset] 35px [body-start] 1.5em [body-content-start] minmax(450px, calc( 800px - 3em )) [body-content-end] 1.5em [body-end] 50px [body-end-outset] minmax(0px, 200px) [page-end-inset] 50px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.floating .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start] minmax(25px, 50px) [page-start-inset] minmax(50px, 150px) [body-start-outset] minmax(25px, 50px) [body-start] 1.5em [body-content-start] minmax(500px, calc( 800px - 3em )) [body-content-end] 1.5em [body-end] minmax(25px, 50px) [body-end-outset] minmax(50px, 150px) [page-end-inset] minmax(25px, 50px) [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.docked .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start] minmax(50px, 100px) [page-start-inset] 50px [body-start-outset] 50px [body-start] 1.5em [body-content-start] minmax(500px, calc( 1000px - 3em )) [body-content-end] 1.5em [body-end] 50px [body-end-outset] minmax(50px, 100px) [page-end-inset] 50px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.docked.fullcontent .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start] minmax(50px, 100px) [page-start-inset] 50px [body-start-outset] 50px [body-start] 1.5em [body-content-start] minmax(500px, calc( 1000px - 3em )) [body-content-end] 1.5em [body-end body-end-outset page-end-inset page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.floating.fullcontent .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start] 50px [page-start-inset] minmax(50px, 150px) [body-start-outset] 50px [body-start] 1.5em [body-content-start] minmax(500px, calc( 800px - 3em )) [body-content-end] 1.5em [body-end body-end-outset page-end-inset page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.docked.slimcontent .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start] minmax(50px, 100px) [page-start-inset] 50px [body-start-outset] 50px [body-start] 1.5em [body-content-start] minmax(450px, calc( 750px - 3em )) [body-content-end] 1.5em [body-end] 50px [body-end-outset] minmax(0px, 200px) [page-end-inset] 50px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.docked.listing .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start] minmax(50px, 100px) [page-start-inset] 50px [body-start-outset] 50px [body-start] 1.5em [body-content-start] minmax(500px, calc( 1000px - 3em )) [body-content-end] 1.5em [body-end] 50px [body-end-outset] minmax(0px, 200px) [page-end-inset] 50px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.floating.slimcontent .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start] 50px [page-start-inset] minmax(50px, 150px) [body-start-outset] 50px [body-start] 1.5em [body-content-start] minmax(450px, calc( 750px - 3em )) [body-content-end] 1.5em [body-end] 50px [body-end-outset] minmax(50px, 150px) [page-end-inset] 50px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.floating.listing .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start] minmax(25px, 50px) [page-start-inset] minmax(50px, 150px) [body-start-outset] minmax(25px, 50px) [body-start] 1.5em [body-content-start] minmax(500px, calc( 800px - 3em )) [body-content-end] 1.5em [body-end] minmax(25px, 50px) [body-end-outset] minmax(50px, 150px) [page-end-inset] minmax(25px, 50px) [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}}@media(max-width: 991.98px){body .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset] 5fr [body-start] 1.5em [body-content-start] minmax(500px, calc( 800px - 3em )) [body-content-end] 1.5em [body-end] 35px [body-end-outset] minmax(75px, 145px) [page-end-inset] 35px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.fullcontent:not(.floating):not(.docked) .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset] 5fr [body-start] 1.5em [body-content-start] minmax(500px, calc( 800px - 3em )) [body-content-end] 1.5em [body-end body-end-outset page-end-inset page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.slimcontent:not(.floating):not(.docked) .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset] 5fr [body-start] 1.5em [body-content-start] minmax(500px, calc( 800px - 3em )) [body-content-end] 1.5em [body-end] 35px [body-end-outset] minmax(75px, 145px) [page-end-inset] 35px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.listing:not(.floating):not(.docked) .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset] 5fr [body-start] 1.5em [body-content-start] minmax(500px, calc( 1250px - 3em )) [body-content-end body-end body-end-outset page-end-inset page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body:not(.floating):not(.docked) .page-columns.toc-left{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start] 35px [page-start-inset] minmax(0px, 145px) [body-start-outset] 35px [body-start] 1.5em [body-content-start] minmax(450px, calc( 800px - 3em )) [body-content-end] 1.5em [body-end body-end-outset page-end-inset page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body:not(.floating):not(.docked) .page-columns.toc-left .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start] 35px [page-start-inset] minmax(0px, 145px) [body-start-outset] 35px [body-start] 1.5em [body-content-start] minmax(450px, calc( 800px - 3em )) [body-content-end] 1.5em [body-end body-end-outset page-end-inset page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.floating .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start page-start-inset body-start-outset body-start] 1.5em [body-content-start] minmax(500px, calc( 750px - 3em )) [body-content-end] 1.5em [body-end] 50px [body-end-outset] minmax(75px, 150px) [page-end-inset] 25px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.docked .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset body-start body-content-start] minmax(500px, calc( 750px - 3em )) [body-content-end] 1.5em [body-end] 50px [body-end-outset] minmax(25px, 50px) [page-end-inset] 50px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.docked.fullcontent .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset body-start body-content-start] minmax(500px, calc( 1000px - 3em )) [body-content-end] 1.5em [body-end body-end-outset page-end-inset page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.floating.fullcontent .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start page-start-inset body-start-outset body-start] 1em [body-content-start] minmax(500px, calc( 800px - 3em )) [body-content-end] 1.5em [body-end body-end-outset page-end-inset page-end] 4fr [screen-end-inset] 1.5em [screen-end]}body.docked.slimcontent .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset body-start body-content-start] minmax(500px, calc( 750px - 3em )) [body-content-end] 1.5em [body-end] 50px [body-end-outset] minmax(25px, 50px) [page-end-inset] 50px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.docked.listing .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset body-start body-content-start] minmax(500px, calc( 750px - 3em )) [body-content-end] 1.5em [body-end] 50px [body-end-outset] minmax(25px, 50px) [page-end-inset] 50px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.floating.slimcontent .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start page-start-inset body-start-outset body-start] 1em [body-content-start] minmax(500px, calc( 750px - 3em )) [body-content-end] 1.5em [body-end] 35px [body-end-outset] minmax(75px, 145px) [page-end-inset] 35px [page-end] 4fr [screen-end-inset] 1.5em [screen-end]}body.floating.listing .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start page-start-inset body-start-outset body-start] 1em [body-content-start] minmax(500px, calc( 750px - 3em )) [body-content-end] 1.5em [body-end] 50px [body-end-outset] minmax(75px, 150px) [page-end-inset] 25px [page-end] 4fr [screen-end-inset] 1.5em [screen-end]}}@media(max-width: 767.98px){body .page-columns,body.fullcontent:not(.floating):not(.docked) .page-columns,body.slimcontent:not(.floating):not(.docked) .page-columns,body.docked .page-columns,body.docked.slimcontent .page-columns,body.docked.fullcontent .page-columns,body.floating .page-columns,body.floating.slimcontent .page-columns,body.floating.fullcontent .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset body-start body-content-start] minmax(0px, 1fr) [body-content-end body-end body-end-outset page-end-inset page-end screen-end-inset] 1.5em [screen-end]}body:not(.floating):not(.docked) .page-columns.toc-left{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset body-start body-content-start] minmax(0px, 1fr) [body-content-end body-end body-end-outset page-end-inset page-end screen-end-inset] 1.5em [screen-end]}body:not(.floating):not(.docked) .page-columns.toc-left .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset body-start body-content-start] minmax(0px, 1fr) [body-content-end body-end body-end-outset page-end-inset page-end screen-end-inset] 1.5em [screen-end]}nav[role=doc-toc]{display:none}}body,.page-row-navigation{grid-template-rows:[page-top] max-content [contents-top] max-content [contents-bottom] max-content [page-bottom]}.page-rows-contents{grid-template-rows:[content-top] minmax(max-content, 1fr) [content-bottom] minmax(60px, max-content) [page-bottom]}.page-full{grid-column:screen-start/screen-end !important}.page-columns>*{grid-column:body-content-start/body-content-end}.page-columns.column-page>*{grid-column:page-start/page-end}.page-columns.column-page-left>*{grid-column:page-start/body-content-end}.page-columns.column-page-right>*{grid-column:body-content-start/page-end}.page-rows{grid-auto-rows:auto}.header{grid-column:screen-start/screen-end;grid-row:page-top/contents-top}#quarto-content{padding:0;grid-column:screen-start/screen-end;grid-row:contents-top/contents-bottom}body.floating .sidebar.sidebar-navigation{grid-column:page-start/body-start;grid-row:content-top/page-bottom}body.docked .sidebar.sidebar-navigation{grid-column:screen-start/body-start;grid-row:content-top/page-bottom}.sidebar.toc-left{grid-column:page-start/body-start;grid-row:content-top/page-bottom}.sidebar.margin-sidebar{grid-column:body-end/page-end;grid-row:content-top/page-bottom}.page-columns .content{grid-column:body-content-start/body-content-end;grid-row:content-top/content-bottom;align-content:flex-start}.page-columns .page-navigation{grid-column:body-content-start/body-content-end;grid-row:content-bottom/page-bottom}.page-columns .footer{grid-column:screen-start/screen-end;grid-row:contents-bottom/page-bottom}.page-columns .column-body{grid-column:body-content-start/body-content-end}.page-columns .column-body-fullbleed{grid-column:body-start/body-end}.page-columns .column-body-outset{grid-column:body-start-outset/body-end-outset;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-body-outset table{background:#fff}.page-columns .column-body-outset-left{grid-column:body-start-outset/body-content-end;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-body-outset-left table{background:#fff}.page-columns .column-body-outset-right{grid-column:body-content-start/body-end-outset;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-body-outset-right table{background:#fff}.page-columns .column-page{grid-column:page-start/page-end;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-page table{background:#fff}.page-columns .column-page-inset{grid-column:page-start-inset/page-end-inset;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-page-inset table{background:#fff}.page-columns .column-page-inset-left{grid-column:page-start-inset/body-content-end;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-page-inset-left table{background:#fff}.page-columns .column-page-inset-right{grid-column:body-content-start/page-end-inset;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-page-inset-right figcaption table{background:#fff}.page-columns .column-page-left{grid-column:page-start/body-content-end;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-page-left table{background:#fff}.page-columns .column-page-right{grid-column:body-content-start/page-end;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-page-right figcaption table{background:#fff}#quarto-content.page-columns #quarto-margin-sidebar,#quarto-content.page-columns #quarto-sidebar{z-index:1}@media(max-width: 991.98px){#quarto-content.page-columns #quarto-margin-sidebar.collapse,#quarto-content.page-columns #quarto-sidebar.collapse,#quarto-content.page-columns #quarto-margin-sidebar.collapsing,#quarto-content.page-columns #quarto-sidebar.collapsing{z-index:1055}}#quarto-content.page-columns main.column-page,#quarto-content.page-columns main.column-page-right,#quarto-content.page-columns main.column-page-left{z-index:0}.page-columns .column-screen-inset{grid-column:screen-start-inset/screen-end-inset;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-screen-inset table{background:#fff}.page-columns .column-screen-inset-left{grid-column:screen-start-inset/body-content-end;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-screen-inset-left table{background:#fff}.page-columns .column-screen-inset-right{grid-column:body-content-start/screen-end-inset;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-screen-inset-right table{background:#fff}.page-columns .column-screen{grid-column:screen-start/screen-end;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-screen table{background:#fff}.page-columns .column-screen-left{grid-column:screen-start/body-content-end;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-screen-left table{background:#fff}.page-columns .column-screen-right{grid-column:body-content-start/screen-end;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-screen-right table{background:#fff}.page-columns .column-screen-inset-shaded{grid-column:screen-start/screen-end;padding:1em;background:#f8f9fa;z-index:998;transform:translate3d(0, 0, 0);margin-bottom:1em}.zindex-content{z-index:998;transform:translate3d(0, 0, 0)}.zindex-modal{z-index:1055;transform:translate3d(0, 0, 0)}.zindex-over-content{z-index:999;transform:translate3d(0, 0, 0)}img.img-fluid.column-screen,img.img-fluid.column-screen-inset-shaded,img.img-fluid.column-screen-inset,img.img-fluid.column-screen-inset-left,img.img-fluid.column-screen-inset-right,img.img-fluid.column-screen-left,img.img-fluid.column-screen-right{width:100%}@media(min-width: 992px){.margin-caption,div.aside,aside,.column-margin{grid-column:body-end/page-end !important;z-index:998}.column-sidebar{grid-column:page-start/body-start !important;z-index:998}.column-leftmargin{grid-column:screen-start-inset/body-start !important;z-index:998}.no-row-height{height:1em;overflow:visible}}@media(max-width: 991.98px){.margin-caption,div.aside,aside,.column-margin{grid-column:body-end/page-end !important;z-index:998}.no-row-height{height:1em;overflow:visible}.page-columns.page-full{overflow:visible}.page-columns.toc-left .margin-caption,.page-columns.toc-left div.aside,.page-columns.toc-left aside,.page-columns.toc-left .column-margin{grid-column:body-content-start/body-content-end !important;z-index:998;transform:translate3d(0, 0, 0)}.page-columns.toc-left .no-row-height{height:initial;overflow:initial}}@media(max-width: 767.98px){.margin-caption,div.aside,aside,.column-margin{grid-column:body-content-start/body-content-end !important;z-index:998;transform:translate3d(0, 0, 0)}.no-row-height{height:initial;overflow:initial}#quarto-margin-sidebar{display:none}#quarto-sidebar-toc-left{display:none}.hidden-sm{display:none}}.panel-grid{display:grid;grid-template-rows:repeat(1, 1fr);grid-template-columns:repeat(24, 1fr);gap:1em}.panel-grid .g-col-1{grid-column:auto/span 1}.panel-grid .g-col-2{grid-column:auto/span 2}.panel-grid .g-col-3{grid-column:auto/span 3}.panel-grid .g-col-4{grid-column:auto/span 4}.panel-grid .g-col-5{grid-column:auto/span 5}.panel-grid .g-col-6{grid-column:auto/span 6}.panel-grid .g-col-7{grid-column:auto/span 7}.panel-grid .g-col-8{grid-column:auto/span 8}.panel-grid .g-col-9{grid-column:auto/span 9}.panel-grid .g-col-10{grid-column:auto/span 10}.panel-grid .g-col-11{grid-column:auto/span 11}.panel-grid .g-col-12{grid-column:auto/span 12}.panel-grid .g-col-13{grid-column:auto/span 13}.panel-grid .g-col-14{grid-column:auto/span 14}.panel-grid .g-col-15{grid-column:auto/span 15}.panel-grid .g-col-16{grid-column:auto/span 16}.panel-grid .g-col-17{grid-column:auto/span 17}.panel-grid .g-col-18{grid-column:auto/span 18}.panel-grid .g-col-19{grid-column:auto/span 19}.panel-grid .g-col-20{grid-column:auto/span 20}.panel-grid .g-col-21{grid-column:auto/span 21}.panel-grid .g-col-22{grid-column:auto/span 22}.panel-grid .g-col-23{grid-column:auto/span 23}.panel-grid .g-col-24{grid-column:auto/span 24}.panel-grid .g-start-1{grid-column-start:1}.panel-grid .g-start-2{grid-column-start:2}.panel-grid .g-start-3{grid-column-start:3}.panel-grid .g-start-4{grid-column-start:4}.panel-grid .g-start-5{grid-column-start:5}.panel-grid .g-start-6{grid-column-start:6}.panel-grid .g-start-7{grid-column-start:7}.panel-grid .g-start-8{grid-column-start:8}.panel-grid .g-start-9{grid-column-start:9}.panel-grid .g-start-10{grid-column-start:10}.panel-grid .g-start-11{grid-column-start:11}.panel-grid .g-start-12{grid-column-start:12}.panel-grid .g-start-13{grid-column-start:13}.panel-grid .g-start-14{grid-column-start:14}.panel-grid .g-start-15{grid-column-start:15}.panel-grid .g-start-16{grid-column-start:16}.panel-grid .g-start-17{grid-column-start:17}.panel-grid .g-start-18{grid-column-start:18}.panel-grid .g-start-19{grid-column-start:19}.panel-grid .g-start-20{grid-column-start:20}.panel-grid .g-start-21{grid-column-start:21}.panel-grid .g-start-22{grid-column-start:22}.panel-grid .g-start-23{grid-column-start:23}@media(min-width: 576px){.panel-grid .g-col-sm-1{grid-column:auto/span 1}.panel-grid .g-col-sm-2{grid-column:auto/span 2}.panel-grid .g-col-sm-3{grid-column:auto/span 3}.panel-grid .g-col-sm-4{grid-column:auto/span 4}.panel-grid .g-col-sm-5{grid-column:auto/span 5}.panel-grid .g-col-sm-6{grid-column:auto/span 6}.panel-grid .g-col-sm-7{grid-column:auto/span 7}.panel-grid .g-col-sm-8{grid-column:auto/span 8}.panel-grid .g-col-sm-9{grid-column:auto/span 9}.panel-grid .g-col-sm-10{grid-column:auto/span 10}.panel-grid .g-col-sm-11{grid-column:auto/span 11}.panel-grid .g-col-sm-12{grid-column:auto/span 12}.panel-grid .g-col-sm-13{grid-column:auto/span 13}.panel-grid .g-col-sm-14{grid-column:auto/span 14}.panel-grid .g-col-sm-15{grid-column:auto/span 15}.panel-grid .g-col-sm-16{grid-column:auto/span 16}.panel-grid .g-col-sm-17{grid-column:auto/span 17}.panel-grid .g-col-sm-18{grid-column:auto/span 18}.panel-grid .g-col-sm-19{grid-column:auto/span 19}.panel-grid .g-col-sm-20{grid-column:auto/span 20}.panel-grid .g-col-sm-21{grid-column:auto/span 21}.panel-grid .g-col-sm-22{grid-column:auto/span 22}.panel-grid .g-col-sm-23{grid-column:auto/span 23}.panel-grid .g-col-sm-24{grid-column:auto/span 24}.panel-grid .g-start-sm-1{grid-column-start:1}.panel-grid .g-start-sm-2{grid-column-start:2}.panel-grid .g-start-sm-3{grid-column-start:3}.panel-grid .g-start-sm-4{grid-column-start:4}.panel-grid .g-start-sm-5{grid-column-start:5}.panel-grid .g-start-sm-6{grid-column-start:6}.panel-grid .g-start-sm-7{grid-column-start:7}.panel-grid .g-start-sm-8{grid-column-start:8}.panel-grid .g-start-sm-9{grid-column-start:9}.panel-grid .g-start-sm-10{grid-column-start:10}.panel-grid .g-start-sm-11{grid-column-start:11}.panel-grid .g-start-sm-12{grid-column-start:12}.panel-grid .g-start-sm-13{grid-column-start:13}.panel-grid .g-start-sm-14{grid-column-start:14}.panel-grid .g-start-sm-15{grid-column-start:15}.panel-grid .g-start-sm-16{grid-column-start:16}.panel-grid .g-start-sm-17{grid-column-start:17}.panel-grid .g-start-sm-18{grid-column-start:18}.panel-grid .g-start-sm-19{grid-column-start:19}.panel-grid .g-start-sm-20{grid-column-start:20}.panel-grid .g-start-sm-21{grid-column-start:21}.panel-grid .g-start-sm-22{grid-column-start:22}.panel-grid .g-start-sm-23{grid-column-start:23}}@media(min-width: 768px){.panel-grid .g-col-md-1{grid-column:auto/span 1}.panel-grid .g-col-md-2{grid-column:auto/span 2}.panel-grid .g-col-md-3{grid-column:auto/span 3}.panel-grid .g-col-md-4{grid-column:auto/span 4}.panel-grid .g-col-md-5{grid-column:auto/span 5}.panel-grid .g-col-md-6{grid-column:auto/span 6}.panel-grid .g-col-md-7{grid-column:auto/span 7}.panel-grid .g-col-md-8{grid-column:auto/span 8}.panel-grid .g-col-md-9{grid-column:auto/span 9}.panel-grid .g-col-md-10{grid-column:auto/span 10}.panel-grid .g-col-md-11{grid-column:auto/span 11}.panel-grid .g-col-md-12{grid-column:auto/span 12}.panel-grid .g-col-md-13{grid-column:auto/span 13}.panel-grid .g-col-md-14{grid-column:auto/span 14}.panel-grid .g-col-md-15{grid-column:auto/span 15}.panel-grid .g-col-md-16{grid-column:auto/span 16}.panel-grid .g-col-md-17{grid-column:auto/span 17}.panel-grid .g-col-md-18{grid-column:auto/span 18}.panel-grid .g-col-md-19{grid-column:auto/span 19}.panel-grid .g-col-md-20{grid-column:auto/span 20}.panel-grid .g-col-md-21{grid-column:auto/span 21}.panel-grid .g-col-md-22{grid-column:auto/span 22}.panel-grid .g-col-md-23{grid-column:auto/span 23}.panel-grid .g-col-md-24{grid-column:auto/span 24}.panel-grid .g-start-md-1{grid-column-start:1}.panel-grid .g-start-md-2{grid-column-start:2}.panel-grid .g-start-md-3{grid-column-start:3}.panel-grid .g-start-md-4{grid-column-start:4}.panel-grid .g-start-md-5{grid-column-start:5}.panel-grid .g-start-md-6{grid-column-start:6}.panel-grid .g-start-md-7{grid-column-start:7}.panel-grid .g-start-md-8{grid-column-start:8}.panel-grid .g-start-md-9{grid-column-start:9}.panel-grid .g-start-md-10{grid-column-start:10}.panel-grid .g-start-md-11{grid-column-start:11}.panel-grid .g-start-md-12{grid-column-start:12}.panel-grid .g-start-md-13{grid-column-start:13}.panel-grid .g-start-md-14{grid-column-start:14}.panel-grid .g-start-md-15{grid-column-start:15}.panel-grid .g-start-md-16{grid-column-start:16}.panel-grid .g-start-md-17{grid-column-start:17}.panel-grid .g-start-md-18{grid-column-start:18}.panel-grid .g-start-md-19{grid-column-start:19}.panel-grid .g-start-md-20{grid-column-start:20}.panel-grid .g-start-md-21{grid-column-start:21}.panel-grid .g-start-md-22{grid-column-start:22}.panel-grid .g-start-md-23{grid-column-start:23}}@media(min-width: 992px){.panel-grid .g-col-lg-1{grid-column:auto/span 1}.panel-grid .g-col-lg-2{grid-column:auto/span 2}.panel-grid .g-col-lg-3{grid-column:auto/span 3}.panel-grid .g-col-lg-4{grid-column:auto/span 4}.panel-grid .g-col-lg-5{grid-column:auto/span 5}.panel-grid .g-col-lg-6{grid-column:auto/span 6}.panel-grid .g-col-lg-7{grid-column:auto/span 7}.panel-grid .g-col-lg-8{grid-column:auto/span 8}.panel-grid .g-col-lg-9{grid-column:auto/span 9}.panel-grid .g-col-lg-10{grid-column:auto/span 10}.panel-grid .g-col-lg-11{grid-column:auto/span 11}.panel-grid .g-col-lg-12{grid-column:auto/span 12}.panel-grid .g-col-lg-13{grid-column:auto/span 13}.panel-grid .g-col-lg-14{grid-column:auto/span 14}.panel-grid .g-col-lg-15{grid-column:auto/span 15}.panel-grid .g-col-lg-16{grid-column:auto/span 16}.panel-grid .g-col-lg-17{grid-column:auto/span 17}.panel-grid .g-col-lg-18{grid-column:auto/span 18}.panel-grid .g-col-lg-19{grid-column:auto/span 19}.panel-grid .g-col-lg-20{grid-column:auto/span 20}.panel-grid .g-col-lg-21{grid-column:auto/span 21}.panel-grid .g-col-lg-22{grid-column:auto/span 22}.panel-grid .g-col-lg-23{grid-column:auto/span 23}.panel-grid .g-col-lg-24{grid-column:auto/span 24}.panel-grid .g-start-lg-1{grid-column-start:1}.panel-grid .g-start-lg-2{grid-column-start:2}.panel-grid .g-start-lg-3{grid-column-start:3}.panel-grid .g-start-lg-4{grid-column-start:4}.panel-grid .g-start-lg-5{grid-column-start:5}.panel-grid .g-start-lg-6{grid-column-start:6}.panel-grid .g-start-lg-7{grid-column-start:7}.panel-grid .g-start-lg-8{grid-column-start:8}.panel-grid .g-start-lg-9{grid-column-start:9}.panel-grid .g-start-lg-10{grid-column-start:10}.panel-grid .g-start-lg-11{grid-column-start:11}.panel-grid .g-start-lg-12{grid-column-start:12}.panel-grid .g-start-lg-13{grid-column-start:13}.panel-grid .g-start-lg-14{grid-column-start:14}.panel-grid .g-start-lg-15{grid-column-start:15}.panel-grid .g-start-lg-16{grid-column-start:16}.panel-grid .g-start-lg-17{grid-column-start:17}.panel-grid .g-start-lg-18{grid-column-start:18}.panel-grid .g-start-lg-19{grid-column-start:19}.panel-grid .g-start-lg-20{grid-column-start:20}.panel-grid .g-start-lg-21{grid-column-start:21}.panel-grid .g-start-lg-22{grid-column-start:22}.panel-grid .g-start-lg-23{grid-column-start:23}}@media(min-width: 1200px){.panel-grid .g-col-xl-1{grid-column:auto/span 1}.panel-grid .g-col-xl-2{grid-column:auto/span 2}.panel-grid .g-col-xl-3{grid-column:auto/span 3}.panel-grid .g-col-xl-4{grid-column:auto/span 4}.panel-grid .g-col-xl-5{grid-column:auto/span 5}.panel-grid .g-col-xl-6{grid-column:auto/span 6}.panel-grid .g-col-xl-7{grid-column:auto/span 7}.panel-grid .g-col-xl-8{grid-column:auto/span 8}.panel-grid .g-col-xl-9{grid-column:auto/span 9}.panel-grid .g-col-xl-10{grid-column:auto/span 10}.panel-grid .g-col-xl-11{grid-column:auto/span 11}.panel-grid .g-col-xl-12{grid-column:auto/span 12}.panel-grid .g-col-xl-13{grid-column:auto/span 13}.panel-grid .g-col-xl-14{grid-column:auto/span 14}.panel-grid .g-col-xl-15{grid-column:auto/span 15}.panel-grid .g-col-xl-16{grid-column:auto/span 16}.panel-grid .g-col-xl-17{grid-column:auto/span 17}.panel-grid .g-col-xl-18{grid-column:auto/span 18}.panel-grid .g-col-xl-19{grid-column:auto/span 19}.panel-grid .g-col-xl-20{grid-column:auto/span 20}.panel-grid .g-col-xl-21{grid-column:auto/span 21}.panel-grid .g-col-xl-22{grid-column:auto/span 22}.panel-grid .g-col-xl-23{grid-column:auto/span 23}.panel-grid .g-col-xl-24{grid-column:auto/span 24}.panel-grid .g-start-xl-1{grid-column-start:1}.panel-grid .g-start-xl-2{grid-column-start:2}.panel-grid .g-start-xl-3{grid-column-start:3}.panel-grid .g-start-xl-4{grid-column-start:4}.panel-grid .g-start-xl-5{grid-column-start:5}.panel-grid .g-start-xl-6{grid-column-start:6}.panel-grid .g-start-xl-7{grid-column-start:7}.panel-grid .g-start-xl-8{grid-column-start:8}.panel-grid .g-start-xl-9{grid-column-start:9}.panel-grid .g-start-xl-10{grid-column-start:10}.panel-grid .g-start-xl-11{grid-column-start:11}.panel-grid .g-start-xl-12{grid-column-start:12}.panel-grid .g-start-xl-13{grid-column-start:13}.panel-grid .g-start-xl-14{grid-column-start:14}.panel-grid .g-start-xl-15{grid-column-start:15}.panel-grid .g-start-xl-16{grid-column-start:16}.panel-grid .g-start-xl-17{grid-column-start:17}.panel-grid .g-start-xl-18{grid-column-start:18}.panel-grid .g-start-xl-19{grid-column-start:19}.panel-grid .g-start-xl-20{grid-column-start:20}.panel-grid .g-start-xl-21{grid-column-start:21}.panel-grid .g-start-xl-22{grid-column-start:22}.panel-grid .g-start-xl-23{grid-column-start:23}}@media(min-width: 1400px){.panel-grid .g-col-xxl-1{grid-column:auto/span 1}.panel-grid .g-col-xxl-2{grid-column:auto/span 2}.panel-grid .g-col-xxl-3{grid-column:auto/span 3}.panel-grid .g-col-xxl-4{grid-column:auto/span 4}.panel-grid .g-col-xxl-5{grid-column:auto/span 5}.panel-grid .g-col-xxl-6{grid-column:auto/span 6}.panel-grid .g-col-xxl-7{grid-column:auto/span 7}.panel-grid .g-col-xxl-8{grid-column:auto/span 8}.panel-grid .g-col-xxl-9{grid-column:auto/span 9}.panel-grid .g-col-xxl-10{grid-column:auto/span 10}.panel-grid .g-col-xxl-11{grid-column:auto/span 11}.panel-grid .g-col-xxl-12{grid-column:auto/span 12}.panel-grid .g-col-xxl-13{grid-column:auto/span 13}.panel-grid .g-col-xxl-14{grid-column:auto/span 14}.panel-grid .g-col-xxl-15{grid-column:auto/span 15}.panel-grid .g-col-xxl-16{grid-column:auto/span 16}.panel-grid .g-col-xxl-17{grid-column:auto/span 17}.panel-grid .g-col-xxl-18{grid-column:auto/span 18}.panel-grid .g-col-xxl-19{grid-column:auto/span 19}.panel-grid .g-col-xxl-20{grid-column:auto/span 20}.panel-grid .g-col-xxl-21{grid-column:auto/span 21}.panel-grid .g-col-xxl-22{grid-column:auto/span 22}.panel-grid .g-col-xxl-23{grid-column:auto/span 23}.panel-grid .g-col-xxl-24{grid-column:auto/span 24}.panel-grid .g-start-xxl-1{grid-column-start:1}.panel-grid .g-start-xxl-2{grid-column-start:2}.panel-grid .g-start-xxl-3{grid-column-start:3}.panel-grid .g-start-xxl-4{grid-column-start:4}.panel-grid .g-start-xxl-5{grid-column-start:5}.panel-grid .g-start-xxl-6{grid-column-start:6}.panel-grid .g-start-xxl-7{grid-column-start:7}.panel-grid .g-start-xxl-8{grid-column-start:8}.panel-grid .g-start-xxl-9{grid-column-start:9}.panel-grid .g-start-xxl-10{grid-column-start:10}.panel-grid .g-start-xxl-11{grid-column-start:11}.panel-grid .g-start-xxl-12{grid-column-start:12}.panel-grid .g-start-xxl-13{grid-column-start:13}.panel-grid .g-start-xxl-14{grid-column-start:14}.panel-grid .g-start-xxl-15{grid-column-start:15}.panel-grid .g-start-xxl-16{grid-column-start:16}.panel-grid .g-start-xxl-17{grid-column-start:17}.panel-grid .g-start-xxl-18{grid-column-start:18}.panel-grid .g-start-xxl-19{grid-column-start:19}.panel-grid .g-start-xxl-20{grid-column-start:20}.panel-grid .g-start-xxl-21{grid-column-start:21}.panel-grid .g-start-xxl-22{grid-column-start:22}.panel-grid .g-start-xxl-23{grid-column-start:23}}main{margin-top:1em;margin-bottom:1em}h1,.h1,h2,.h2{opacity:.9;margin-top:2rem;margin-bottom:1rem;font-weight:600}h1.title,.title.h1{margin-top:0}h2,.h2{border-bottom:1px solid #dee2e6;padding-bottom:.5rem}h3,.h3{font-weight:600}h3,.h3,h4,.h4{opacity:.9;margin-top:1.5rem}h5,.h5,h6,.h6{opacity:.9}.header-section-number{color:#747a7f}.nav-link.active .header-section-number{color:inherit}mark,.mark{padding:0em}.panel-caption,caption,.figure-caption{font-size:.9rem}.panel-caption,.figure-caption,figcaption{color:#747a7f}.table-caption,caption{color:#373a3c}.quarto-layout-cell[data-ref-parent] caption{color:#747a7f}.column-margin figcaption,.margin-caption,div.aside,aside,.column-margin{color:#747a7f;font-size:.825rem}.panel-caption.margin-caption{text-align:inherit}.column-margin.column-container p{margin-bottom:0}.column-margin.column-container>*:not(.collapse){padding-top:.5em;padding-bottom:.5em;display:block}.column-margin.column-container>*.collapse:not(.show){display:none}@media(min-width: 768px){.column-margin.column-container .callout-margin-content:first-child{margin-top:4.5em}.column-margin.column-container .callout-margin-content-simple:first-child{margin-top:3.5em}}.margin-caption>*{padding-top:.5em;padding-bottom:.5em}@media(max-width: 767.98px){.quarto-layout-row{flex-direction:column}}.nav-tabs .nav-item{margin-top:1px;cursor:pointer}.tab-content{margin-top:0px;border-left:#dee2e6 1px solid;border-right:#dee2e6 1px solid;border-bottom:#dee2e6 1px solid;margin-left:0;padding:1em;margin-bottom:1em}@media(max-width: 767.98px){.layout-sidebar{margin-left:0;margin-right:0}}.panel-sidebar,.panel-sidebar .form-control,.panel-input,.panel-input .form-control,.selectize-dropdown{font-size:.9rem}.panel-sidebar .form-control,.panel-input .form-control{padding-top:.1rem}.tab-pane div.sourceCode{margin-top:0px}.tab-pane>p{padding-top:1em}.tab-content>.tab-pane:not(.active){display:none !important}div.sourceCode{background-color:rgba(233,236,239,.65);border:1px solid rgba(233,236,239,.65);border-radius:.25rem}pre.sourceCode{background-color:rgba(0,0,0,0)}pre.sourceCode{border:none;font-size:.875em;overflow:visible !important;padding:.4em}.callout pre.sourceCode{padding-left:0}div.sourceCode{overflow-y:hidden}.callout div.sourceCode{margin-left:initial}.blockquote{font-size:inherit;padding-left:1rem;padding-right:1.5rem;color:#747a7f}.blockquote h1:first-child,.blockquote .h1:first-child,.blockquote h2:first-child,.blockquote .h2:first-child,.blockquote h3:first-child,.blockquote .h3:first-child,.blockquote h4:first-child,.blockquote .h4:first-child,.blockquote h5:first-child,.blockquote .h5:first-child{margin-top:0}pre{background-color:initial;padding:initial;border:initial}p code:not(.sourceCode),li code:not(.sourceCode),td code:not(.sourceCode){background-color:#f7f7f7;padding:.2em}nav p code:not(.sourceCode),nav li code:not(.sourceCode),nav td code:not(.sourceCode){background-color:rgba(0,0,0,0);padding:0}td code:not(.sourceCode){white-space:pre-wrap}#quarto-embedded-source-code-modal>.modal-dialog{max-width:1000px;padding-left:1.75rem;padding-right:1.75rem}#quarto-embedded-source-code-modal>.modal-dialog>.modal-content>.modal-body{padding:0}#quarto-embedded-source-code-modal>.modal-dialog>.modal-content>.modal-body div.sourceCode{margin:0;padding:.2rem .2rem;border-radius:0px;border:none}#quarto-embedded-source-code-modal>.modal-dialog>.modal-content>.modal-header{padding:.7rem}.code-tools-button{font-size:1rem;padding:.15rem .15rem;margin-left:5px;color:#6c757d;background-color:rgba(0,0,0,0);transition:initial;cursor:pointer}.code-tools-button>.bi::before{display:inline-block;height:1rem;width:1rem;content:"";vertical-align:-0.125em;background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="rgb(108, 117, 125)" viewBox="0 0 16 16"><path d="M10.478 1.647a.5.5 0 1 0-.956-.294l-4 13a.5.5 0 0 0 .956.294l4-13zM4.854 4.146a.5.5 0 0 1 0 .708L1.707 8l3.147 3.146a.5.5 0 0 1-.708.708l-3.5-3.5a.5.5 0 0 1 0-.708l3.5-3.5a.5.5 0 0 1 .708 0zm6.292 0a.5.5 0 0 0 0 .708L14.293 8l-3.147 3.146a.5.5 0 0 0 .708.708l3.5-3.5a.5.5 0 0 0 0-.708l-3.5-3.5a.5.5 0 0 0-.708 0z"/></svg>');background-repeat:no-repeat;background-size:1rem 1rem}.code-tools-button:hover>.bi::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="rgb(55, 58, 60)" viewBox="0 0 16 16"><path d="M10.478 1.647a.5.5 0 1 0-.956-.294l-4 13a.5.5 0 0 0 .956.294l4-13zM4.854 4.146a.5.5 0 0 1 0 .708L1.707 8l3.147 3.146a.5.5 0 0 1-.708.708l-3.5-3.5a.5.5 0 0 1 0-.708l3.5-3.5a.5.5 0 0 1 .708 0zm6.292 0a.5.5 0 0 0 0 .708L14.293 8l-3.147 3.146a.5.5 0 0 0 .708.708l3.5-3.5a.5.5 0 0 0 0-.708l-3.5-3.5a.5.5 0 0 0-.708 0z"/></svg>')}#quarto-embedded-source-code-modal .code-copy-button>.bi::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="rgb(108, 117, 125)" viewBox="0 0 16 16"><path d="M4 1.5H3a2 2 0 0 0-2 2V14a2 2 0 0 0 2 2h10a2 2 0 0 0 2-2V3.5a2 2 0 0 0-2-2h-1v1h1a1 1 0 0 1 1 1V14a1 1 0 0 1-1 1H3a1 1 0 0 1-1-1V3.5a1 1 0 0 1 1-1h1v-1z"/><path d="M9.5 1a.5.5 0 0 1 .5.5v1a.5.5 0 0 1-.5.5h-3a.5.5 0 0 1-.5-.5v-1a.5.5 0 0 1 .5-.5h3zm-3-1A1.5 1.5 0 0 0 5 1.5v1A1.5 1.5 0 0 0 6.5 4h3A1.5 1.5 0 0 0 11 2.5v-1A1.5 1.5 0 0 0 9.5 0h-3z"/></svg>')}#quarto-embedded-source-code-modal .code-copy-button-checked>.bi::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="rgb(108, 117, 125)" viewBox="0 0 16 16"><path d="M13.854 3.646a.5.5 0 0 1 0 .708l-7 7a.5.5 0 0 1-.708 0l-3.5-3.5a.5.5 0 1 1 .708-.708L6.5 10.293l6.646-6.647a.5.5 0 0 1 .708 0z"/></svg>')}.sidebar{will-change:top;transition:top 200ms linear;position:sticky;overflow-y:auto;padding-top:1.2em;max-height:100vh}.sidebar.toc-left,.sidebar.margin-sidebar{top:0px;padding-top:1em}.sidebar.toc-left>*,.sidebar.margin-sidebar>*{padding-top:.5em}.sidebar.quarto-banner-title-block-sidebar>*{padding-top:1.65em}figure .quarto-notebook-link{margin-top:.5em}.quarto-notebook-link{font-size:.75em;color:#6c757d;margin-bottom:1em;text-decoration:none;display:block}.quarto-notebook-link:hover{text-decoration:underline;color:#2780e3}.quarto-notebook-link::before{display:inline-block;height:.75rem;width:.75rem;margin-bottom:0em;margin-right:.25em;content:"";vertical-align:-0.125em;background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="rgb(108, 117, 125)" class="bi bi-journal-code" viewBox="0 0 16 16"><path fill-rule="evenodd" d="M8.646 5.646a.5.5 0 0 1 .708 0l2 2a.5.5 0 0 1 0 .708l-2 2a.5.5 0 0 1-.708-.708L10.293 8 8.646 6.354a.5.5 0 0 1 0-.708zm-1.292 0a.5.5 0 0 0-.708 0l-2 2a.5.5 0 0 0 0 .708l2 2a.5.5 0 0 0 .708-.708L5.707 8l1.647-1.646a.5.5 0 0 0 0-.708z"/><path d="M3 0h10a2 2 0 0 1 2 2v12a2 2 0 0 1-2 2H3a2 2 0 0 1-2-2v-1h1v1a1 1 0 0 0 1 1h10a1 1 0 0 0 1-1V2a1 1 0 0 0-1-1H3a1 1 0 0 0-1 1v1H1V2a2 2 0 0 1 2-2z"/><path d="M1 5v-.5a.5.5 0 0 1 1 0V5h.5a.5.5 0 0 1 0 1h-2a.5.5 0 0 1 0-1H1zm0 3v-.5a.5.5 0 0 1 1 0V8h.5a.5.5 0 0 1 0 1h-2a.5.5 0 0 1 0-1H1zm0 3v-.5a.5.5 0 0 1 1 0v.5h.5a.5.5 0 0 1 0 1h-2a.5.5 0 0 1 0-1H1z"/></svg>');background-repeat:no-repeat;background-size:.75rem .75rem}.quarto-alternate-notebooks i.bi,.quarto-alternate-formats i.bi{margin-right:.4em}.quarto-notebook .cell-container{display:flex}.quarto-notebook .cell-container .cell{flex-grow:4}.quarto-notebook .cell-container .cell-decorator{padding-top:1.5em;padding-right:1em;text-align:right}.quarto-notebook h2,.quarto-notebook .h2{border-bottom:none}.sidebar .quarto-alternate-formats a,.sidebar .quarto-alternate-notebooks a{text-decoration:none}.sidebar .quarto-alternate-formats a:hover,.sidebar .quarto-alternate-notebooks a:hover{color:#2780e3}.sidebar .quarto-alternate-notebooks h2,.sidebar .quarto-alternate-notebooks .h2,.sidebar .quarto-alternate-formats h2,.sidebar .quarto-alternate-formats .h2,.sidebar nav[role=doc-toc]>h2,.sidebar nav[role=doc-toc]>.h2{font-size:.875rem;font-weight:400;margin-bottom:.5rem;margin-top:.3rem;font-family:inherit;border-bottom:0;padding-bottom:0;padding-top:0px}.sidebar .quarto-alternate-notebooks h2,.sidebar .quarto-alternate-notebooks .h2,.sidebar .quarto-alternate-formats h2,.sidebar .quarto-alternate-formats .h2{margin-top:1rem}.sidebar nav[role=doc-toc]>ul a{border-left:1px solid #e9ecef;padding-left:.6rem}.sidebar .quarto-alternate-notebooks h2>ul a,.sidebar .quarto-alternate-notebooks .h2>ul a,.sidebar .quarto-alternate-formats h2>ul a,.sidebar .quarto-alternate-formats .h2>ul a{border-left:none;padding-left:.6rem}.sidebar .quarto-alternate-notebooks ul a:empty,.sidebar .quarto-alternate-formats ul a:empty,.sidebar nav[role=doc-toc]>ul a:empty{display:none}.sidebar .quarto-alternate-notebooks ul,.sidebar .quarto-alternate-formats ul,.sidebar nav[role=doc-toc] ul{padding-left:0;list-style:none;font-size:.875rem;font-weight:300}.sidebar .quarto-alternate-notebooks ul li a,.sidebar .quarto-alternate-formats ul li a,.sidebar nav[role=doc-toc]>ul li a{line-height:1.1rem;padding-bottom:.2rem;padding-top:.2rem;color:inherit}.sidebar nav[role=doc-toc] ul>li>ul>li>a{padding-left:1.2em}.sidebar nav[role=doc-toc] ul>li>ul>li>ul>li>a{padding-left:2.4em}.sidebar nav[role=doc-toc] ul>li>ul>li>ul>li>ul>li>a{padding-left:3.6em}.sidebar nav[role=doc-toc] ul>li>ul>li>ul>li>ul>li>ul>li>a{padding-left:4.8em}.sidebar nav[role=doc-toc] ul>li>ul>li>ul>li>ul>li>ul>li>ul>li>a{padding-left:6em}.sidebar nav[role=doc-toc] ul>li>a.active,.sidebar nav[role=doc-toc] ul>li>ul>li>a.active{border-left:1px solid #2780e3;color:#2780e3 !important}.sidebar nav[role=doc-toc] ul>li>a:hover,.sidebar nav[role=doc-toc] ul>li>ul>li>a:hover{color:#2780e3 !important}kbd,.kbd{color:#373a3c;background-color:#f8f9fa;border:1px solid;border-radius:5px;border-color:#dee2e6}div.hanging-indent{margin-left:1em;text-indent:-1em}.citation a,.footnote-ref{text-decoration:none}.footnotes ol{padding-left:1em}.tippy-content>*{margin-bottom:.7em}.tippy-content>*:last-child{margin-bottom:0}.table a{word-break:break-word}.table>thead{border-top-width:1px;border-top-color:#dee2e6;border-bottom:1px solid #b6babc}.callout{margin-top:1.25rem;margin-bottom:1.25rem;border-radius:.25rem;overflow-wrap:break-word}.callout .callout-title-container{overflow-wrap:anywhere}.callout.callout-style-simple{padding:.4em .7em;border-left:5px solid;border-right:1px solid #dee2e6;border-top:1px solid #dee2e6;border-bottom:1px solid #dee2e6}.callout.callout-style-default{border-left:5px solid;border-right:1px solid #dee2e6;border-top:1px solid #dee2e6;border-bottom:1px solid #dee2e6}.callout .callout-body-container{flex-grow:1}.callout.callout-style-simple .callout-body{font-size:.9rem;font-weight:400}.callout.callout-style-default .callout-body{font-size:.9rem;font-weight:400}.callout.callout-titled .callout-body{margin-top:.2em}.callout:not(.no-icon).callout-titled.callout-style-simple .callout-body{padding-left:1.6em}.callout.callout-titled>.callout-header{padding-top:.2em;margin-bottom:-0.2em}.callout.callout-style-simple>div.callout-header{border-bottom:none;font-size:.9rem;font-weight:600;opacity:75%}.callout.callout-style-default>div.callout-header{border-bottom:none;font-weight:600;opacity:85%;font-size:.9rem;padding-left:.5em;padding-right:.5em}.callout.callout-style-default div.callout-body{padding-left:.5em;padding-right:.5em}.callout.callout-style-default div.callout-body>:first-child{margin-top:.5em}.callout>div.callout-header[data-bs-toggle=collapse]{cursor:pointer}.callout.callout-style-default .callout-header[aria-expanded=false],.callout.callout-style-default .callout-header[aria-expanded=true]{padding-top:0px;margin-bottom:0px;align-items:center}.callout.callout-titled .callout-body>:last-child:not(.sourceCode),.callout.callout-titled .callout-body>div>:last-child:not(.sourceCode){margin-bottom:.5rem}.callout:not(.callout-titled) .callout-body>:first-child,.callout:not(.callout-titled) .callout-body>div>:first-child{margin-top:.25rem}.callout:not(.callout-titled) .callout-body>:last-child,.callout:not(.callout-titled) .callout-body>div>:last-child{margin-bottom:.2rem}.callout.callout-style-simple .callout-icon::before,.callout.callout-style-simple .callout-toggle::before{height:1rem;width:1rem;display:inline-block;content:"";background-repeat:no-repeat;background-size:1rem 1rem}.callout.callout-style-default .callout-icon::before,.callout.callout-style-default .callout-toggle::before{height:.9rem;width:.9rem;display:inline-block;content:"";background-repeat:no-repeat;background-size:.9rem .9rem}.callout.callout-style-default .callout-toggle::before{margin-top:5px}.callout .callout-btn-toggle .callout-toggle::before{transition:transform .2s linear}.callout .callout-header[aria-expanded=false] .callout-toggle::before{transform:rotate(-90deg)}.callout .callout-header[aria-expanded=true] .callout-toggle::before{transform:none}.callout.callout-style-simple:not(.no-icon) div.callout-icon-container{padding-top:.2em;padding-right:.55em}.callout.callout-style-default:not(.no-icon) div.callout-icon-container{padding-top:.1em;padding-right:.35em}.callout.callout-style-default:not(.no-icon) div.callout-title-container{margin-top:-1px}.callout.callout-style-default.callout-caution:not(.no-icon) div.callout-icon-container{padding-top:.3em;padding-right:.35em}.callout>.callout-body>.callout-icon-container>.no-icon,.callout>.callout-header>.callout-icon-container>.no-icon{display:none}div.callout.callout{border-left-color:#6c757d}div.callout.callout-style-default>.callout-header{background-color:#6c757d}div.callout-note.callout{border-left-color:#2780e3}div.callout-note.callout-style-default>.callout-header{background-color:#e9f2fc}div.callout-note:not(.callout-titled) .callout-icon::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" style="fill: %232373cc" class="bi bi-info-circle" viewBox="0 0 16 16"><path d="M8 15A7 7 0 1 1 8 1a7 7 0 0 1 0 14zm0 1A8 8 0 1 0 8 0a8 8 0 0 0 0 16z"/><path d="m8.93 6.588-2.29.287-.082.38.45.083c.294.07.352.176.288.469l-.738 3.468c-.194.897.105 1.319.808 1.319.545 0 1.178-.252 1.465-.598l.088-.416c-.2.176-.492.246-.686.246-.275 0-.375-.193-.304-.533L8.93 6.588zM9 4.5a1 1 0 1 1-2 0 1 1 0 0 1 2 0z"/></svg>');}div.callout-note.callout-titled .callout-icon::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" style="fill: %232373cc" class="bi bi-info-circle" viewBox="0 0 16 16"><path d="M8 15A7 7 0 1 1 8 1a7 7 0 0 1 0 14zm0 1A8 8 0 1 0 8 0a8 8 0 0 0 0 16z"/><path d="m8.93 6.588-2.29.287-.082.38.45.083c.294.07.352.176.288.469l-.738 3.468c-.194.897.105 1.319.808 1.319.545 0 1.178-.252 1.465-.598l.088-.416c-.2.176-.492.246-.686.246-.275 0-.375-.193-.304-.533L8.93 6.588zM9 4.5a1 1 0 1 1-2 0 1 1 0 0 1 2 0z"/></svg>');}div.callout-note .callout-toggle::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="rgb(55, 58, 60)" class="bi bi-chevron-down" viewBox="0 0 16 16"><path fill-rule="evenodd" d="M1.646 4.646a.5.5 0 0 1 .708 0L8 10.293l5.646-5.647a.5.5 0 0 1 .708.708l-6 6a.5.5 0 0 1-.708 0l-6-6a.5.5 0 0 1 0-.708z"/></svg>')}div.callout-tip.callout{border-left-color:#3fb618}div.callout-tip.callout-style-default>.callout-header{background-color:#ecf8e8}div.callout-tip:not(.callout-titled) .callout-icon::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" style="fill: %2339a416" class="bi bi-lightbulb" viewBox="0 0 16 16"><path d="M2 6a6 6 0 1 1 10.174 4.31c-.203.196-.359.4-.453.619l-.762 1.769A.5.5 0 0 1 10.5 13a.5.5 0 0 1 0 1 .5.5 0 0 1 0 1l-.224.447a1 1 0 0 1-.894.553H6.618a1 1 0 0 1-.894-.553L5.5 15a.5.5 0 0 1 0-1 .5.5 0 0 1 0-1 .5.5 0 0 1-.46-.302l-.761-1.77a1.964 1.964 0 0 0-.453-.618A5.984 5.984 0 0 1 2 6zm6-5a5 5 0 0 0-3.479 8.592c.263.254.514.564.676.941L5.83 12h4.342l.632-1.467c.162-.377.413-.687.676-.941A5 5 0 0 0 8 1z"/></svg>');}div.callout-tip.callout-titled .callout-icon::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" style="fill: %2339a416" class="bi bi-lightbulb" viewBox="0 0 16 16"><path d="M2 6a6 6 0 1 1 10.174 4.31c-.203.196-.359.4-.453.619l-.762 1.769A.5.5 0 0 1 10.5 13a.5.5 0 0 1 0 1 .5.5 0 0 1 0 1l-.224.447a1 1 0 0 1-.894.553H6.618a1 1 0 0 1-.894-.553L5.5 15a.5.5 0 0 1 0-1 .5.5 0 0 1 0-1 .5.5 0 0 1-.46-.302l-.761-1.77a1.964 1.964 0 0 0-.453-.618A5.984 5.984 0 0 1 2 6zm6-5a5 5 0 0 0-3.479 8.592c.263.254.514.564.676.941L5.83 12h4.342l.632-1.467c.162-.377.413-.687.676-.941A5 5 0 0 0 8 1z"/></svg>');}div.callout-tip .callout-toggle::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="rgb(55, 58, 60)" class="bi bi-chevron-down" viewBox="0 0 16 16"><path fill-rule="evenodd" d="M1.646 4.646a.5.5 0 0 1 .708 0L8 10.293l5.646-5.647a.5.5 0 0 1 .708.708l-6 6a.5.5 0 0 1-.708 0l-6-6a.5.5 0 0 1 0-.708z"/></svg>')}div.callout-warning.callout{border-left-color:#ff7518}div.callout-warning.callout-style-default>.callout-header{background-color:#fff1e8}div.callout-warning:not(.callout-titled) .callout-icon::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" style="fill: %23e66916" class="bi bi-exclamation-triangle" viewBox="0 0 16 16"><path d="M7.938 2.016A.13.13 0 0 1 8.002 2a.13.13 0 0 1 .063.016.146.146 0 0 1 .054.057l6.857 11.667c.036.06.035.124.002.183a.163.163 0 0 1-.054.06.116.116 0 0 1-.066.017H1.146a.115.115 0 0 1-.066-.017.163.163 0 0 1-.054-.06.176.176 0 0 1 .002-.183L7.884 2.073a.147.147 0 0 1 .054-.057zm1.044-.45a1.13 1.13 0 0 0-1.96 0L.165 13.233c-.457.778.091 1.767.98 1.767h13.713c.889 0 1.438-.99.98-1.767L8.982 1.566z"/><path d="M7.002 12a1 1 0 1 1 2 0 1 1 0 0 1-2 0zM7.1 5.995a.905.905 0 1 1 1.8 0l-.35 3.507a.552.552 0 0 1-1.1 0L7.1 5.995z"/></svg>');}div.callout-warning.callout-titled .callout-icon::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" style="fill: %23e66916" class="bi bi-exclamation-triangle" viewBox="0 0 16 16"><path d="M7.938 2.016A.13.13 0 0 1 8.002 2a.13.13 0 0 1 .063.016.146.146 0 0 1 .054.057l6.857 11.667c.036.06.035.124.002.183a.163.163 0 0 1-.054.06.116.116 0 0 1-.066.017H1.146a.115.115 0 0 1-.066-.017.163.163 0 0 1-.054-.06.176.176 0 0 1 .002-.183L7.884 2.073a.147.147 0 0 1 .054-.057zm1.044-.45a1.13 1.13 0 0 0-1.96 0L.165 13.233c-.457.778.091 1.767.98 1.767h13.713c.889 0 1.438-.99.98-1.767L8.982 1.566z"/><path d="M7.002 12a1 1 0 1 1 2 0 1 1 0 0 1-2 0zM7.1 5.995a.905.905 0 1 1 1.8 0l-.35 3.507a.552.552 0 0 1-1.1 0L7.1 5.995z"/></svg>');}div.callout-warning .callout-toggle::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="rgb(55, 58, 60)" class="bi bi-chevron-down" viewBox="0 0 16 16"><path fill-rule="evenodd" d="M1.646 4.646a.5.5 0 0 1 .708 0L8 10.293l5.646-5.647a.5.5 0 0 1 .708.708l-6 6a.5.5 0 0 1-.708 0l-6-6a.5.5 0 0 1 0-.708z"/></svg>')}div.callout-caution.callout{border-left-color:#f0ad4e}div.callout-caution.callout-style-default>.callout-header{background-color:#fef7ed}div.callout-caution:not(.callout-titled) .callout-icon::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" style="fill: %23d89c46" class="bi bi-cone-striped" viewBox="0 0 16 16"><path d="M9.97 4.88l.953 3.811C10.158 8.878 9.14 9 8 9c-1.14 0-2.159-.122-2.923-.309L6.03 4.88C6.635 4.957 7.3 5 8 5s1.365-.043 1.97-.12zm-.245-.978L8.97.88C8.718-.13 7.282-.13 7.03.88L6.274 3.9C6.8 3.965 7.382 4 8 4c.618 0 1.2-.036 1.725-.098zm4.396 8.613a.5.5 0 0 1 .037.96l-6 2a.5.5 0 0 1-.316 0l-6-2a.5.5 0 0 1 .037-.96l2.391-.598.565-2.257c.862.212 1.964.339 3.165.339s2.303-.127 3.165-.339l.565 2.257 2.391.598z"/></svg>');}div.callout-caution.callout-titled .callout-icon::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" style="fill: %23d89c46" class="bi bi-cone-striped" viewBox="0 0 16 16"><path d="M9.97 4.88l.953 3.811C10.158 8.878 9.14 9 8 9c-1.14 0-2.159-.122-2.923-.309L6.03 4.88C6.635 4.957 7.3 5 8 5s1.365-.043 1.97-.12zm-.245-.978L8.97.88C8.718-.13 7.282-.13 7.03.88L6.274 3.9C6.8 3.965 7.382 4 8 4c.618 0 1.2-.036 1.725-.098zm4.396 8.613a.5.5 0 0 1 .037.96l-6 2a.5.5 0 0 1-.316 0l-6-2a.5.5 0 0 1 .037-.96l2.391-.598.565-2.257c.862.212 1.964.339 3.165.339s2.303-.127 3.165-.339l.565 2.257 2.391.598z"/></svg>');}div.callout-caution .callout-toggle::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="rgb(55, 58, 60)" class="bi bi-chevron-down" viewBox="0 0 16 16"><path fill-rule="evenodd" d="M1.646 4.646a.5.5 0 0 1 .708 0L8 10.293l5.646-5.647a.5.5 0 0 1 .708.708l-6 6a.5.5 0 0 1-.708 0l-6-6a.5.5 0 0 1 0-.708z"/></svg>')}div.callout-important.callout{border-left-color:#ff0039}div.callout-important.callout-style-default>.callout-header{background-color:#ffe6eb}div.callout-important:not(.callout-titled) .callout-icon::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" style="fill: %23e60033" class="bi bi-exclamation-circle" viewBox="0 0 16 16"><path d="M8 15A7 7 0 1 1 8 1a7 7 0 0 1 0 14zm0 1A8 8 0 1 0 8 0a8 8 0 0 0 0 16z"/><path d="M7.002 11a1 1 0 1 1 2 0 1 1 0 0 1-2 0zM7.1 4.995a.905.905 0 1 1 1.8 0l-.35 3.507a.552.552 0 0 1-1.1 0L7.1 4.995z"/></svg>');}div.callout-important.callout-titled .callout-icon::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" style="fill: %23e60033" class="bi bi-exclamation-circle" viewBox="0 0 16 16"><path d="M8 15A7 7 0 1 1 8 1a7 7 0 0 1 0 14zm0 1A8 8 0 1 0 8 0a8 8 0 0 0 0 16z"/><path d="M7.002 11a1 1 0 1 1 2 0 1 1 0 0 1-2 0zM7.1 4.995a.905.905 0 1 1 1.8 0l-.35 3.507a.552.552 0 0 1-1.1 0L7.1 4.995z"/></svg>');}div.callout-important .callout-toggle::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="rgb(55, 58, 60)" class="bi bi-chevron-down" viewBox="0 0 16 16"><path fill-rule="evenodd" d="M1.646 4.646a.5.5 0 0 1 .708 0L8 10.293l5.646-5.647a.5.5 0 0 1 .708.708l-6 6a.5.5 0 0 1-.708 0l-6-6a.5.5 0 0 1 0-.708z"/></svg>')}.quarto-toggle-container{display:flex;align-items:center}.quarto-reader-toggle .bi::before,.quarto-color-scheme-toggle .bi::before{display:inline-block;height:1rem;width:1rem;content:"";background-repeat:no-repeat;background-size:1rem 1rem}.sidebar-navigation{padding-left:20px}.navbar .quarto-color-scheme-toggle:not(.alternate) .bi::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="rgba(84, 85, 85, 1)" class="bi bi-toggle-off" viewBox="0 0 16 16"><path d="M11 4a4 4 0 0 1 0 8H8a4.992 4.992 0 0 0 2-4 4.992 4.992 0 0 0-2-4h3zm-6 8a4 4 0 1 1 0-8 4 4 0 0 1 0 8zM0 8a5 5 0 0 0 5 5h6a5 5 0 0 0 0-10H5a5 5 0 0 0-5 5z"/></svg>')}.navbar .quarto-color-scheme-toggle.alternate .bi::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="rgba(84, 85, 85, 1)" class="bi bi-toggle-on" viewBox="0 0 16 16"><path d="M5 3a5 5 0 0 0 0 10h6a5 5 0 0 0 0-10H5zm6 9a4 4 0 1 1 0-8 4 4 0 0 1 0 8z"/></svg>')}.sidebar-navigation .quarto-color-scheme-toggle:not(.alternate) .bi::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="rgba(79, 84, 87, 1)" class="bi bi-toggle-off" viewBox="0 0 16 16"><path d="M11 4a4 4 0 0 1 0 8H8a4.992 4.992 0 0 0 2-4 4.992 4.992 0 0 0-2-4h3zm-6 8a4 4 0 1 1 0-8 4 4 0 0 1 0 8zM0 8a5 5 0 0 0 5 5h6a5 5 0 0 0 0-10H5a5 5 0 0 0-5 5z"/></svg>')}.sidebar-navigation .quarto-color-scheme-toggle.alternate .bi::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="rgba(79, 84, 87, 1)" class="bi bi-toggle-on" viewBox="0 0 16 16"><path d="M5 3a5 5 0 0 0 0 10h6a5 5 0 0 0 0-10H5zm6 9a4 4 0 1 1 0-8 4 4 0 0 1 0 8z"/></svg>')}.quarto-sidebar-toggle{border-color:#dee2e6;border-bottom-left-radius:.25rem;border-bottom-right-radius:.25rem;border-style:solid;border-width:1px;overflow:hidden;border-top-width:0px;padding-top:0px !important}.quarto-sidebar-toggle-title{cursor:pointer;padding-bottom:2px;margin-left:.25em;text-align:center;font-weight:400;font-size:.775em}#quarto-content .quarto-sidebar-toggle{background:#fafafa}#quarto-content .quarto-sidebar-toggle-title{color:#373a3c}.quarto-sidebar-toggle-icon{color:#dee2e6;margin-right:.5em;float:right;transition:transform .2s ease}.quarto-sidebar-toggle-icon::before{padding-top:5px}.quarto-sidebar-toggle.expanded .quarto-sidebar-toggle-icon{transform:rotate(-180deg)}.quarto-sidebar-toggle.expanded .quarto-sidebar-toggle-title{border-bottom:solid #dee2e6 1px}.quarto-sidebar-toggle-contents{background-color:#fff;padding-right:10px;padding-left:10px;margin-top:0px !important;transition:max-height .5s ease}.quarto-sidebar-toggle.expanded .quarto-sidebar-toggle-contents{padding-top:1em;padding-bottom:10px}.quarto-sidebar-toggle:not(.expanded) .quarto-sidebar-toggle-contents{padding-top:0px !important;padding-bottom:0px}nav[role=doc-toc]{z-index:1020}#quarto-sidebar>*,nav[role=doc-toc]>*{transition:opacity .1s ease,border .1s ease}#quarto-sidebar.slow>*,nav[role=doc-toc].slow>*{transition:opacity .4s ease,border .4s ease}.quarto-color-scheme-toggle:not(.alternate).top-right .bi::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="rgba(142, 148, 151, 1)" class="bi bi-toggle-off" viewBox="0 0 16 16"><path d="M11 4a4 4 0 0 1 0 8H8a4.992 4.992 0 0 0 2-4 4.992 4.992 0 0 0-2-4h3zm-6 8a4 4 0 1 1 0-8 4 4 0 0 1 0 8zM0 8a5 5 0 0 0 5 5h6a5 5 0 0 0 0-10H5a5 5 0 0 0-5 5z"/></svg>')}.quarto-color-scheme-toggle.alternate.top-right .bi::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="rgba(104, 109, 113, 1)" class="bi bi-toggle-on" viewBox="0 0 16 16"><path d="M5 3a5 5 0 0 0 0 10h6a5 5 0 0 0 0-10H5zm6 9a4 4 0 1 1 0-8 4 4 0 0 1 0 8z"/></svg>')}#quarto-appendix.default{border-top:1px solid #dee2e6}#quarto-appendix.default{background-color:#fff;padding-top:1.5em;margin-top:2em;z-index:998}#quarto-appendix.default .quarto-appendix-heading{margin-top:0;line-height:1.4em;font-weight:600;opacity:.9;border-bottom:none;margin-bottom:0}#quarto-appendix.default .footnotes ol,#quarto-appendix.default .footnotes ol li>p:last-of-type,#quarto-appendix.default .quarto-appendix-contents>p:last-of-type{margin-bottom:0}#quarto-appendix.default .quarto-appendix-secondary-label{margin-bottom:.4em}#quarto-appendix.default .quarto-appendix-bibtex{font-size:.7em;padding:1em;border:solid 1px #dee2e6;margin-bottom:1em}#quarto-appendix.default .quarto-appendix-bibtex code.sourceCode{white-space:pre-wrap}#quarto-appendix.default .quarto-appendix-citeas{font-size:.9em;padding:1em;border:solid 1px #dee2e6;margin-bottom:1em}#quarto-appendix.default .quarto-appendix-heading{font-size:1em !important}#quarto-appendix.default *[role=doc-endnotes]>ol,#quarto-appendix.default .quarto-appendix-contents>*:not(h2):not(.h2){font-size:.9em}#quarto-appendix.default section{padding-bottom:1.5em}#quarto-appendix.default section *[role=doc-endnotes],#quarto-appendix.default section>*:not(a){opacity:.9;word-wrap:break-word}.btn.btn-quarto,div.cell-output-display .btn-quarto{color:#cbcccc;background-color:#373a3c;border-color:#373a3c}.btn.btn-quarto:hover,div.cell-output-display .btn-quarto:hover{color:#cbcccc;background-color:#555859;border-color:#4b4e50}.btn-check:focus+.btn.btn-quarto,.btn.btn-quarto:focus,.btn-check:focus+div.cell-output-display .btn-quarto,div.cell-output-display .btn-quarto:focus{color:#cbcccc;background-color:#555859;border-color:#4b4e50;box-shadow:0 0 0 .25rem rgba(77,80,82,.5)}.btn-check:checked+.btn.btn-quarto,.btn-check:active+.btn.btn-quarto,.btn.btn-quarto:active,.btn.btn-quarto.active,.show>.btn.btn-quarto.dropdown-toggle,.btn-check:checked+div.cell-output-display .btn-quarto,.btn-check:active+div.cell-output-display .btn-quarto,div.cell-output-display .btn-quarto:active,div.cell-output-display .btn-quarto.active,.show>div.cell-output-display .btn-quarto.dropdown-toggle{color:#fff;background-color:#5f6163;border-color:#4b4e50}.btn-check:checked+.btn.btn-quarto:focus,.btn-check:active+.btn.btn-quarto:focus,.btn.btn-quarto:active:focus,.btn.btn-quarto.active:focus,.show>.btn.btn-quarto.dropdown-toggle:focus,.btn-check:checked+div.cell-output-display .btn-quarto:focus,.btn-check:active+div.cell-output-display .btn-quarto:focus,div.cell-output-display .btn-quarto:active:focus,div.cell-output-display .btn-quarto.active:focus,.show>div.cell-output-display .btn-quarto.dropdown-toggle:focus{box-shadow:0 0 0 .25rem rgba(77,80,82,.5)}.btn.btn-quarto:disabled,.btn.btn-quarto.disabled,div.cell-output-display .btn-quarto:disabled,div.cell-output-display .btn-quarto.disabled{color:#fff;background-color:#373a3c;border-color:#373a3c}nav.quarto-secondary-nav.color-navbar{background-color:#f8f9fa;color:#545555}nav.quarto-secondary-nav.color-navbar h1,nav.quarto-secondary-nav.color-navbar .h1,nav.quarto-secondary-nav.color-navbar .quarto-btn-toggle{color:#545555}@media(max-width: 991.98px){body.nav-sidebar .quarto-title-banner{margin-bottom:0;padding-bottom:0}body.nav-sidebar #title-block-header{margin-block-end:0}}p.subtitle{margin-top:.25em;margin-bottom:.5em}code a:any-link{color:inherit;text-decoration-color:#6c757d}/*! light */div.observablehq table thead tr th{background-color:var(--bs-body-bg)}input,button,select,optgroup,textarea{background-color:var(--bs-body-bg)}.code-annotated .code-copy-button{margin-right:1.25em;margin-top:0;padding-bottom:0;padding-top:3px}.code-annotation-gutter-bg{background-color:#fff}.code-annotation-gutter{background-color:rgba(233,236,239,.65)}.code-annotation-gutter,.code-annotation-gutter-bg{height:100%;width:calc(20px + .5em);position:absolute;top:0;right:0}dl.code-annotation-container-grid dt{margin-right:1em;margin-top:.25rem}dl.code-annotation-container-grid dt{font-family:var(--bs-font-monospace);color:#4f5457;border:solid #4f5457 1px;border-radius:50%;height:22px;width:22px;line-height:22px;font-size:11px;text-align:center;vertical-align:middle;text-decoration:none}dl.code-annotation-container-grid dt[data-target-cell]{cursor:pointer}dl.code-annotation-container-grid dt[data-target-cell].code-annotation-active{color:#fff;border:solid #aaa 1px;background-color:#aaa}pre.code-annotation-code{padding-top:0;padding-bottom:0}pre.code-annotation-code code{z-index:3}#code-annotation-line-highlight-gutter{width:100%;border-top:solid rgba(170,170,170,.2666666667) 1px;border-bottom:solid rgba(170,170,170,.2666666667) 1px;z-index:2;background-color:rgba(170,170,170,.1333333333)}#code-annotation-line-highlight{margin-left:-4em;width:calc(100% + 4em);border-top:solid rgba(170,170,170,.2666666667) 1px;border-bottom:solid rgba(170,170,170,.2666666667) 1px;z-index:2;background-color:rgba(170,170,170,.1333333333)}code.sourceCode .code-annotation-anchor.code-annotation-active{background-color:var(--quarto-hl-normal-color, #aaaaaa);border:solid var(--quarto-hl-normal-color, #aaaaaa) 1px;color:#e9ecef;font-weight:bolder}code.sourceCode .code-annotation-anchor{font-family:var(--bs-font-monospace);color:var(--quarto-hl-co-color);border:solid var(--quarto-hl-co-color) 1px;border-radius:50%;height:18px;width:18px;font-size:9px;margin-top:2px}code.sourceCode button.code-annotation-anchor{padding:2px}code.sourceCode a.code-annotation-anchor{line-height:18px;text-align:center;vertical-align:middle;cursor:default;text-decoration:none}@media print{.page-columns .column-screen-inset{grid-column:page-start-inset/page-end-inset;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-screen-inset table{background:#fff}.page-columns .column-screen-inset-left{grid-column:page-start-inset/body-content-end;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-screen-inset-left table{background:#fff}.page-columns .column-screen-inset-right{grid-column:body-content-start/page-end-inset;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-screen-inset-right table{background:#fff}.page-columns .column-screen{grid-column:page-start/page-end;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-screen table{background:#fff}.page-columns .column-screen-left{grid-column:page-start/body-content-end;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-screen-left table{background:#fff}.page-columns .column-screen-right{grid-column:body-content-start/page-end;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-screen-right table{background:#fff}.page-columns .column-screen-inset-shaded{grid-column:page-start-inset/page-end-inset;padding:1em;background:#f8f9fa;z-index:998;transform:translate3d(0, 0, 0);margin-bottom:1em}}.quarto-video{margin-bottom:1em}.table>thead{border-top-width:0}.table>:not(caption)>*:not(:last-child)>*{border-bottom-color:#ebeced;border-bottom-style:solid;border-bottom-width:1px}.table>:not(:first-child){border-top:1px solid #b6babc;border-bottom:1px solid inherit}.table tbody{border-bottom-color:#b6babc}a.external:after{display:inline-block;height:.75rem;width:.75rem;margin-bottom:.15em;margin-left:.25em;content:"";vertical-align:-0.125em;background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="rgb(39, 128, 227)" class="bi bi-box-arrow-up-right" viewBox="0 0 16 16"><path fill-rule="evenodd" d="M8.636 3.5a.5.5 0 0 0-.5-.5H1.5A1.5 1.5 0 0 0 0 4.5v10A1.5 1.5 0 0 0 1.5 16h10a1.5 1.5 0 0 0 1.5-1.5V7.864a.5.5 0 0 0-1 0V14.5a.5.5 0 0 1-.5.5h-10a.5.5 0 0 1-.5-.5v-10a.5.5 0 0 1 .5-.5h6.636a.5.5 0 0 0 .5-.5z"/><path fill-rule="evenodd" d="M16 .5a.5.5 0 0 0-.5-.5h-5a.5.5 0 0 0 0 1h3.793L6.146 9.146a.5.5 0 1 0 .708.708L15 1.707V5.5a.5.5 0 0 0 1 0v-5z"/></svg>');background-repeat:no-repeat;background-size:.75rem .75rem}div.sourceCode code a.external:after{content:none}a.external:after:hover{cursor:pointer}.quarto-ext-icon{display:inline-block;font-size:.75em;padding-left:.3em}.code-with-filename .code-with-filename-file{margin-bottom:0;padding-bottom:2px;padding-top:2px;padding-left:.7em;border:var(--quarto-border-width) solid var(--quarto-border-color);border-radius:var(--quarto-border-radius);border-bottom:0;border-bottom-left-radius:0%;border-bottom-right-radius:0%}.code-with-filename div.sourceCode,.reveal .code-with-filename div.sourceCode{margin-top:0;border-top-left-radius:0%;border-top-right-radius:0%}.code-with-filename .code-with-filename-file pre{margin-bottom:0}.code-with-filename .code-with-filename-file,.code-with-filename .code-with-filename-file pre{background-color:rgba(219,219,219,.8)}.quarto-dark .code-with-filename .code-with-filename-file,.quarto-dark .code-with-filename .code-with-filename-file pre{background-color:#555}.code-with-filename .code-with-filename-file strong{font-weight:400}.quarto-title-banner{margin-bottom:1em;color:#545555;background:#f8f9fa}.quarto-title-banner .code-tools-button{color:#878888}.quarto-title-banner .code-tools-button:hover{color:#545555}.quarto-title-banner .code-tools-button>.bi::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="rgb(135, 136, 136)" viewBox="0 0 16 16"><path d="M10.478 1.647a.5.5 0 1 0-.956-.294l-4 13a.5.5 0 0 0 .956.294l4-13zM4.854 4.146a.5.5 0 0 1 0 .708L1.707 8l3.147 3.146a.5.5 0 0 1-.708.708l-3.5-3.5a.5.5 0 0 1 0-.708l3.5-3.5a.5.5 0 0 1 .708 0zm6.292 0a.5.5 0 0 0 0 .708L14.293 8l-3.147 3.146a.5.5 0 0 0 .708.708l3.5-3.5a.5.5 0 0 0 0-.708l-3.5-3.5a.5.5 0 0 0-.708 0z"/></svg>')}.quarto-title-banner .code-tools-button:hover>.bi::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="rgb(84, 85, 85)" viewBox="0 0 16 16"><path d="M10.478 1.647a.5.5 0 1 0-.956-.294l-4 13a.5.5 0 0 0 .956.294l4-13zM4.854 4.146a.5.5 0 0 1 0 .708L1.707 8l3.147 3.146a.5.5 0 0 1-.708.708l-3.5-3.5a.5.5 0 0 1 0-.708l3.5-3.5a.5.5 0 0 1 .708 0zm6.292 0a.5.5 0 0 0 0 .708L14.293 8l-3.147 3.146a.5.5 0 0 0 .708.708l3.5-3.5a.5.5 0 0 0 0-.708l-3.5-3.5a.5.5 0 0 0-.708 0z"/></svg>')}.quarto-title-banner .quarto-title .title{font-weight:600}.quarto-title-banner .quarto-categories{margin-top:.75em}@media(min-width: 992px){.quarto-title-banner{padding-top:2.5em;padding-bottom:2.5em}}@media(max-width: 991.98px){.quarto-title-banner{padding-top:1em;padding-bottom:1em}}main.quarto-banner-title-block>section:first-child>h2,main.quarto-banner-title-block>section:first-child>.h2,main.quarto-banner-title-block>section:first-child>h3,main.quarto-banner-title-block>section:first-child>.h3,main.quarto-banner-title-block>section:first-child>h4,main.quarto-banner-title-block>section:first-child>.h4{margin-top:0}.quarto-title .quarto-categories{display:flex;flex-wrap:wrap;row-gap:.5em;column-gap:.4em;padding-bottom:.5em;margin-top:.75em}.quarto-title .quarto-categories .quarto-category{padding:.25em .75em;font-size:.65em;text-transform:uppercase;border:solid 1px;border-radius:.25rem;opacity:.6}.quarto-title .quarto-categories .quarto-category a{color:inherit}#title-block-header.quarto-title-block.default .quarto-title-meta{display:grid;grid-template-columns:repeat(2, 1fr)}#title-block-header.quarto-title-block.default .quarto-title .title{margin-bottom:0}#title-block-header.quarto-title-block.default .quarto-title-author-orcid img{margin-top:-5px}#title-block-header.quarto-title-block.default .quarto-description p:last-of-type{margin-bottom:0}#title-block-header.quarto-title-block.default .quarto-title-meta-contents p,#title-block-header.quarto-title-block.default .quarto-title-authors p,#title-block-header.quarto-title-block.default .quarto-title-affiliations p{margin-bottom:.1em}#title-block-header.quarto-title-block.default .quarto-title-meta-heading{text-transform:uppercase;margin-top:1em;font-size:.8em;opacity:.8;font-weight:400}#title-block-header.quarto-title-block.default .quarto-title-meta-contents{font-size:.9em}#title-block-header.quarto-title-block.default .quarto-title-meta-contents a{color:#373a3c}#title-block-header.quarto-title-block.default .quarto-title-meta-contents p.affiliation:last-of-type{margin-bottom:.7em}#title-block-header.quarto-title-block.default p.affiliation{margin-bottom:.1em}#title-block-header.quarto-title-block.default .description,#title-block-header.quarto-title-block.default .abstract{margin-top:0}#title-block-header.quarto-title-block.default .description>p,#title-block-header.quarto-title-block.default .abstract>p{font-size:.9em}#title-block-header.quarto-title-block.default .description>p:last-of-type,#title-block-header.quarto-title-block.default .abstract>p:last-of-type{margin-bottom:0}#title-block-header.quarto-title-block.default .description .abstract-title,#title-block-header.quarto-title-block.default .abstract .abstract-title{margin-top:1em;text-transform:uppercase;font-size:.8em;opacity:.8;font-weight:400}#title-block-header.quarto-title-block.default .quarto-title-meta-author{display:grid;grid-template-columns:1fr 1fr}.quarto-title-tools-only{display:flex;justify-content:right}body{-webkit-font-smoothing:antialiased}.badge.bg-light{color:#373a3c}.progress .progress-bar{font-size:8px;line-height:8px}/*# sourceMappingURL=603954f6f730b7a48ae583e90c07e56e.css.map */
+*/.ansi-black-fg{color:#3e424d}.ansi-black-bg{background-color:#3e424d}.ansi-black-intense-fg{color:#282c36}.ansi-black-intense-bg{background-color:#282c36}.ansi-red-fg{color:#e75c58}.ansi-red-bg{background-color:#e75c58}.ansi-red-intense-fg{color:#b22b31}.ansi-red-intense-bg{background-color:#b22b31}.ansi-green-fg{color:#00a250}.ansi-green-bg{background-color:#00a250}.ansi-green-intense-fg{color:#007427}.ansi-green-intense-bg{background-color:#007427}.ansi-yellow-fg{color:#ddb62b}.ansi-yellow-bg{background-color:#ddb62b}.ansi-yellow-intense-fg{color:#b27d12}.ansi-yellow-intense-bg{background-color:#b27d12}.ansi-blue-fg{color:#208ffb}.ansi-blue-bg{background-color:#208ffb}.ansi-blue-intense-fg{color:#0065ca}.ansi-blue-intense-bg{background-color:#0065ca}.ansi-magenta-fg{color:#d160c4}.ansi-magenta-bg{background-color:#d160c4}.ansi-magenta-intense-fg{color:#a03196}.ansi-magenta-intense-bg{background-color:#a03196}.ansi-cyan-fg{color:#60c6c8}.ansi-cyan-bg{background-color:#60c6c8}.ansi-cyan-intense-fg{color:#258f8f}.ansi-cyan-intense-bg{background-color:#258f8f}.ansi-white-fg{color:#c5c1b4}.ansi-white-bg{background-color:#c5c1b4}.ansi-white-intense-fg{color:#a1a6b2}.ansi-white-intense-bg{background-color:#a1a6b2}.ansi-default-inverse-fg{color:#fff}.ansi-default-inverse-bg{background-color:#000}.ansi-bold{font-weight:bold}.ansi-underline{text-decoration:underline}:root{--quarto-body-bg: #fff;--quarto-body-color: #373a3c;--quarto-text-muted: #6c757d;--quarto-border-color: #dee2e6;--quarto-border-width: 1px;--quarto-border-radius: 0.25rem}table.gt_table{color:var(--quarto-body-color);font-size:1em;width:100%;background-color:rgba(0,0,0,0);border-top-width:inherit;border-bottom-width:inherit;border-color:var(--quarto-border-color)}table.gt_table th.gt_column_spanner_outer{color:var(--quarto-body-color);background-color:rgba(0,0,0,0);border-top-width:inherit;border-bottom-width:inherit;border-color:var(--quarto-border-color)}table.gt_table th.gt_col_heading{color:var(--quarto-body-color);font-weight:bold;background-color:rgba(0,0,0,0)}table.gt_table thead.gt_col_headings{border-bottom:1px solid currentColor;border-top-width:inherit;border-top-color:var(--quarto-border-color)}table.gt_table thead.gt_col_headings:not(:first-child){border-top-width:1px;border-top-color:var(--quarto-border-color)}table.gt_table td.gt_row{border-bottom-width:1px;border-bottom-color:var(--quarto-border-color);border-top-width:0px}table.gt_table tbody.gt_table_body{border-top-width:1px;border-bottom-width:1px;border-bottom-color:var(--quarto-border-color);border-top-color:currentColor}div.columns{display:initial;gap:initial}div.column{display:inline-block;overflow-x:initial;vertical-align:top;width:50%}.code-annotation-tip-content{word-wrap:break-word}.code-annotation-container-hidden{display:none !important}dl.code-annotation-container-grid{display:grid;grid-template-columns:min-content auto}dl.code-annotation-container-grid dt{grid-column:1}dl.code-annotation-container-grid dd{grid-column:2}pre.sourceCode.code-annotation-code{padding-right:0}code.sourceCode .code-annotation-anchor{z-index:100;position:absolute;right:.5em;left:inherit;background-color:rgba(0,0,0,0)}:root{--mermaid-bg-color: #fff;--mermaid-edge-color: #373a3c;--mermaid-node-fg-color: #373a3c;--mermaid-fg-color: #373a3c;--mermaid-fg-color--lighter: #4f5457;--mermaid-fg-color--lightest: #686d71;--mermaid-font-family: Source Sans Pro, -apple-system, BlinkMacSystemFont, Segoe UI, Roboto, Helvetica Neue, Arial, sans-serif, Apple Color Emoji, Segoe UI Emoji, Segoe UI Symbol;--mermaid-label-bg-color: #fff;--mermaid-label-fg-color: #2780e3;--mermaid-node-bg-color: rgba(39, 128, 227, 0.1);--mermaid-node-fg-color: #373a3c}@media print{:root{font-size:11pt}#quarto-sidebar,#TOC,.nav-page{display:none}.page-columns .content{grid-column-start:page-start}.fixed-top{position:relative}.panel-caption,.figure-caption,figcaption{color:#666}}.code-copy-button{position:absolute;top:0;right:0;border:0;margin-top:5px;margin-right:5px;background-color:rgba(0,0,0,0);z-index:3}.code-copy-button:focus{outline:none}.code-copy-button-tooltip{font-size:.75em}pre.sourceCode:hover>.code-copy-button>.bi::before{display:inline-block;height:1rem;width:1rem;content:"";vertical-align:-0.125em;background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="rgb(94, 94, 94)" viewBox="0 0 16 16"><path d="M4 1.5H3a2 2 0 0 0-2 2V14a2 2 0 0 0 2 2h10a2 2 0 0 0 2-2V3.5a2 2 0 0 0-2-2h-1v1h1a1 1 0 0 1 1 1V14a1 1 0 0 1-1 1H3a1 1 0 0 1-1-1V3.5a1 1 0 0 1 1-1h1v-1z"/><path d="M9.5 1a.5.5 0 0 1 .5.5v1a.5.5 0 0 1-.5.5h-3a.5.5 0 0 1-.5-.5v-1a.5.5 0 0 1 .5-.5h3zm-3-1A1.5 1.5 0 0 0 5 1.5v1A1.5 1.5 0 0 0 6.5 4h3A1.5 1.5 0 0 0 11 2.5v-1A1.5 1.5 0 0 0 9.5 0h-3z"/></svg>');background-repeat:no-repeat;background-size:1rem 1rem}pre.sourceCode:hover>.code-copy-button-checked>.bi::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="rgb(94, 94, 94)" viewBox="0 0 16 16"><path d="M13.854 3.646a.5.5 0 0 1 0 .708l-7 7a.5.5 0 0 1-.708 0l-3.5-3.5a.5.5 0 1 1 .708-.708L6.5 10.293l6.646-6.647a.5.5 0 0 1 .708 0z"/></svg>')}pre.sourceCode:hover>.code-copy-button:hover>.bi::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="rgb(71, 88, 171)" viewBox="0 0 16 16"><path d="M4 1.5H3a2 2 0 0 0-2 2V14a2 2 0 0 0 2 2h10a2 2 0 0 0 2-2V3.5a2 2 0 0 0-2-2h-1v1h1a1 1 0 0 1 1 1V14a1 1 0 0 1-1 1H3a1 1 0 0 1-1-1V3.5a1 1 0 0 1 1-1h1v-1z"/><path d="M9.5 1a.5.5 0 0 1 .5.5v1a.5.5 0 0 1-.5.5h-3a.5.5 0 0 1-.5-.5v-1a.5.5 0 0 1 .5-.5h3zm-3-1A1.5 1.5 0 0 0 5 1.5v1A1.5 1.5 0 0 0 6.5 4h3A1.5 1.5 0 0 0 11 2.5v-1A1.5 1.5 0 0 0 9.5 0h-3z"/></svg>')}pre.sourceCode:hover>.code-copy-button-checked:hover>.bi::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="rgb(71, 88, 171)"  viewBox="0 0 16 16"><path d="M13.854 3.646a.5.5 0 0 1 0 .708l-7 7a.5.5 0 0 1-.708 0l-3.5-3.5a.5.5 0 1 1 .708-.708L6.5 10.293l6.646-6.647a.5.5 0 0 1 .708 0z"/></svg>')}main ol ol,main ul ul,main ol ul,main ul ol{margin-bottom:1em}ul>li:not(:has(>p))>ul,ol>li:not(:has(>p))>ul,ul>li:not(:has(>p))>ol,ol>li:not(:has(>p))>ol{margin-bottom:0}ul>li:not(:has(>p))>ul>li:has(>p),ol>li:not(:has(>p))>ul>li:has(>p),ul>li:not(:has(>p))>ol>li:has(>p),ol>li:not(:has(>p))>ol>li:has(>p){margin-top:1rem}body{margin:0}main.page-columns>header>h1.title,main.page-columns>header>.title.h1{margin-bottom:0}@media(min-width: 992px){body .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start page-start-inset] 35px [body-start-outset] 35px [body-start] 1.5em [body-content-start] minmax(500px, calc( 850px - 3em )) [body-content-end] 1.5em [body-end] 35px [body-end-outset] minmax(75px, 145px) [page-end-inset] 35px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.fullcontent:not(.floating):not(.docked) .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start page-start-inset] 35px [body-start-outset] 35px [body-start] 1.5em [body-content-start] minmax(500px, calc( 850px - 3em )) [body-content-end] 1.5em [body-end] 35px [body-end-outset] 35px [page-end-inset page-end] 5fr [screen-end-inset] 1.5em}body.slimcontent:not(.floating):not(.docked) .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start page-start-inset] 35px [body-start-outset] 35px [body-start] 1.5em [body-content-start] minmax(500px, calc( 850px - 3em )) [body-content-end] 1.5em [body-end] 50px [body-end-outset] minmax(0px, 200px) [page-end-inset] 35px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.listing:not(.floating):not(.docked) .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start] minmax(50px, 100px) [page-start-inset] 50px [body-start-outset] 50px [body-start] 1.5em [body-content-start] minmax(500px, calc( 850px - 3em )) [body-content-end] 3em [body-end] 50px [body-end-outset] minmax(0px, 250px) [page-end-inset] minmax(50px, 100px) [page-end] 1fr [screen-end-inset] 1.5em [screen-end]}body:not(.floating):not(.docked) .page-columns.toc-left{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start] 35px [page-start-inset] minmax(0px, 175px) [body-start-outset] 35px [body-start] 1.5em [body-content-start] minmax(450px, calc( 800px - 3em )) [body-content-end] 1.5em [body-end] 50px [body-end-outset] minmax(0px, 200px) [page-end-inset] 50px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body:not(.floating):not(.docked) .page-columns.toc-left .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start] 35px [page-start-inset] minmax(0px, 175px) [body-start-outset] 35px [body-start] 1.5em [body-content-start] minmax(450px, calc( 800px - 3em )) [body-content-end] 1.5em [body-end] 50px [body-end-outset] minmax(0px, 200px) [page-end-inset] 50px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.floating .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start] minmax(25px, 50px) [page-start-inset] minmax(50px, 150px) [body-start-outset] minmax(25px, 50px) [body-start] 1.5em [body-content-start] minmax(500px, calc( 800px - 3em )) [body-content-end] 1.5em [body-end] minmax(25px, 50px) [body-end-outset] minmax(50px, 150px) [page-end-inset] minmax(25px, 50px) [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.docked .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start] minmax(50px, 100px) [page-start-inset] 50px [body-start-outset] 50px [body-start] 1.5em [body-content-start] minmax(500px, calc( 1000px - 3em )) [body-content-end] 1.5em [body-end] 50px [body-end-outset] minmax(50px, 100px) [page-end-inset] 50px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.docked.fullcontent .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start] minmax(50px, 100px) [page-start-inset] 50px [body-start-outset] 50px [body-start] 1.5em [body-content-start] minmax(500px, calc( 1000px - 3em )) [body-content-end] 1.5em [body-end body-end-outset page-end-inset page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.floating.fullcontent .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start] 50px [page-start-inset] minmax(50px, 150px) [body-start-outset] 50px [body-start] 1.5em [body-content-start] minmax(500px, calc( 800px - 3em )) [body-content-end] 1.5em [body-end body-end-outset page-end-inset page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.docked.slimcontent .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start] minmax(50px, 100px) [page-start-inset] 50px [body-start-outset] 50px [body-start] 1.5em [body-content-start] minmax(450px, calc( 750px - 3em )) [body-content-end] 1.5em [body-end] 50px [body-end-outset] minmax(0px, 200px) [page-end-inset] 50px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.docked.listing .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start] minmax(50px, 100px) [page-start-inset] 50px [body-start-outset] 50px [body-start] 1.5em [body-content-start] minmax(500px, calc( 1000px - 3em )) [body-content-end] 1.5em [body-end] 50px [body-end-outset] minmax(0px, 200px) [page-end-inset] 50px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.floating.slimcontent .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start] 50px [page-start-inset] minmax(50px, 150px) [body-start-outset] 50px [body-start] 1.5em [body-content-start] minmax(450px, calc( 750px - 3em )) [body-content-end] 1.5em [body-end] 50px [body-end-outset] minmax(50px, 150px) [page-end-inset] 50px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.floating.listing .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start] minmax(25px, 50px) [page-start-inset] minmax(50px, 150px) [body-start-outset] minmax(25px, 50px) [body-start] 1.5em [body-content-start] minmax(500px, calc( 800px - 3em )) [body-content-end] 1.5em [body-end] minmax(25px, 50px) [body-end-outset] minmax(50px, 150px) [page-end-inset] minmax(25px, 50px) [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}}@media(max-width: 991.98px){body .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset] 5fr [body-start] 1.5em [body-content-start] minmax(500px, calc( 800px - 3em )) [body-content-end] 1.5em [body-end] 35px [body-end-outset] minmax(75px, 145px) [page-end-inset] 35px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.fullcontent:not(.floating):not(.docked) .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset] 5fr [body-start] 1.5em [body-content-start] minmax(500px, calc( 800px - 3em )) [body-content-end] 1.5em [body-end body-end-outset page-end-inset page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.slimcontent:not(.floating):not(.docked) .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset] 5fr [body-start] 1.5em [body-content-start] minmax(500px, calc( 800px - 3em )) [body-content-end] 1.5em [body-end] 35px [body-end-outset] minmax(75px, 145px) [page-end-inset] 35px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.listing:not(.floating):not(.docked) .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset] 5fr [body-start] 1.5em [body-content-start] minmax(500px, calc( 1250px - 3em )) [body-content-end body-end body-end-outset page-end-inset page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body:not(.floating):not(.docked) .page-columns.toc-left{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start] 35px [page-start-inset] minmax(0px, 145px) [body-start-outset] 35px [body-start] 1.5em [body-content-start] minmax(450px, calc( 800px - 3em )) [body-content-end] 1.5em [body-end body-end-outset page-end-inset page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body:not(.floating):not(.docked) .page-columns.toc-left .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start] 35px [page-start-inset] minmax(0px, 145px) [body-start-outset] 35px [body-start] 1.5em [body-content-start] minmax(450px, calc( 800px - 3em )) [body-content-end] 1.5em [body-end body-end-outset page-end-inset page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.floating .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start page-start-inset body-start-outset body-start] 1.5em [body-content-start] minmax(500px, calc( 750px - 3em )) [body-content-end] 1.5em [body-end] 50px [body-end-outset] minmax(75px, 150px) [page-end-inset] 25px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.docked .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset body-start body-content-start] minmax(500px, calc( 750px - 3em )) [body-content-end] 1.5em [body-end] 50px [body-end-outset] minmax(25px, 50px) [page-end-inset] 50px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.docked.fullcontent .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset body-start body-content-start] minmax(500px, calc( 1000px - 3em )) [body-content-end] 1.5em [body-end body-end-outset page-end-inset page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.floating.fullcontent .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start page-start-inset body-start-outset body-start] 1em [body-content-start] minmax(500px, calc( 800px - 3em )) [body-content-end] 1.5em [body-end body-end-outset page-end-inset page-end] 4fr [screen-end-inset] 1.5em [screen-end]}body.docked.slimcontent .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset body-start body-content-start] minmax(500px, calc( 750px - 3em )) [body-content-end] 1.5em [body-end] 50px [body-end-outset] minmax(25px, 50px) [page-end-inset] 50px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.docked.listing .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset body-start body-content-start] minmax(500px, calc( 750px - 3em )) [body-content-end] 1.5em [body-end] 50px [body-end-outset] minmax(25px, 50px) [page-end-inset] 50px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.floating.slimcontent .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start page-start-inset body-start-outset body-start] 1em [body-content-start] minmax(500px, calc( 750px - 3em )) [body-content-end] 1.5em [body-end] 35px [body-end-outset] minmax(75px, 145px) [page-end-inset] 35px [page-end] 4fr [screen-end-inset] 1.5em [screen-end]}body.floating.listing .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start page-start-inset body-start-outset body-start] 1em [body-content-start] minmax(500px, calc( 750px - 3em )) [body-content-end] 1.5em [body-end] 50px [body-end-outset] minmax(75px, 150px) [page-end-inset] 25px [page-end] 4fr [screen-end-inset] 1.5em [screen-end]}}@media(max-width: 767.98px){body .page-columns,body.fullcontent:not(.floating):not(.docked) .page-columns,body.slimcontent:not(.floating):not(.docked) .page-columns,body.docked .page-columns,body.docked.slimcontent .page-columns,body.docked.fullcontent .page-columns,body.floating .page-columns,body.floating.slimcontent .page-columns,body.floating.fullcontent .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset body-start body-content-start] minmax(0px, 1fr) [body-content-end body-end body-end-outset page-end-inset page-end screen-end-inset] 1.5em [screen-end]}body:not(.floating):not(.docked) .page-columns.toc-left{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset body-start body-content-start] minmax(0px, 1fr) [body-content-end body-end body-end-outset page-end-inset page-end screen-end-inset] 1.5em [screen-end]}body:not(.floating):not(.docked) .page-columns.toc-left .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset body-start body-content-start] minmax(0px, 1fr) [body-content-end body-end body-end-outset page-end-inset page-end screen-end-inset] 1.5em [screen-end]}nav[role=doc-toc]{display:none}}body,.page-row-navigation{grid-template-rows:[page-top] max-content [contents-top] max-content [contents-bottom] max-content [page-bottom]}.page-rows-contents{grid-template-rows:[content-top] minmax(max-content, 1fr) [content-bottom] minmax(60px, max-content) [page-bottom]}.page-full{grid-column:screen-start/screen-end !important}.page-columns>*{grid-column:body-content-start/body-content-end}.page-columns.column-page>*{grid-column:page-start/page-end}.page-columns.column-page-left>*{grid-column:page-start/body-content-end}.page-columns.column-page-right>*{grid-column:body-content-start/page-end}.page-rows{grid-auto-rows:auto}.header{grid-column:screen-start/screen-end;grid-row:page-top/contents-top}#quarto-content{padding:0;grid-column:screen-start/screen-end;grid-row:contents-top/contents-bottom}body.floating .sidebar.sidebar-navigation{grid-column:page-start/body-start;grid-row:content-top/page-bottom}body.docked .sidebar.sidebar-navigation{grid-column:screen-start/body-start;grid-row:content-top/page-bottom}.sidebar.toc-left{grid-column:page-start/body-start;grid-row:content-top/page-bottom}.sidebar.margin-sidebar{grid-column:body-end/page-end;grid-row:content-top/page-bottom}.page-columns .content{grid-column:body-content-start/body-content-end;grid-row:content-top/content-bottom;align-content:flex-start}.page-columns .page-navigation{grid-column:body-content-start/body-content-end;grid-row:content-bottom/page-bottom}.page-columns .footer{grid-column:screen-start/screen-end;grid-row:contents-bottom/page-bottom}.page-columns .column-body{grid-column:body-content-start/body-content-end}.page-columns .column-body-fullbleed{grid-column:body-start/body-end}.page-columns .column-body-outset{grid-column:body-start-outset/body-end-outset;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-body-outset table{background:#fff}.page-columns .column-body-outset-left{grid-column:body-start-outset/body-content-end;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-body-outset-left table{background:#fff}.page-columns .column-body-outset-right{grid-column:body-content-start/body-end-outset;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-body-outset-right table{background:#fff}.page-columns .column-page{grid-column:page-start/page-end;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-page table{background:#fff}.page-columns .column-page-inset{grid-column:page-start-inset/page-end-inset;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-page-inset table{background:#fff}.page-columns .column-page-inset-left{grid-column:page-start-inset/body-content-end;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-page-inset-left table{background:#fff}.page-columns .column-page-inset-right{grid-column:body-content-start/page-end-inset;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-page-inset-right figcaption table{background:#fff}.page-columns .column-page-left{grid-column:page-start/body-content-end;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-page-left table{background:#fff}.page-columns .column-page-right{grid-column:body-content-start/page-end;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-page-right figcaption table{background:#fff}#quarto-content.page-columns #quarto-margin-sidebar,#quarto-content.page-columns #quarto-sidebar{z-index:1}@media(max-width: 991.98px){#quarto-content.page-columns #quarto-margin-sidebar.collapse,#quarto-content.page-columns #quarto-sidebar.collapse,#quarto-content.page-columns #quarto-margin-sidebar.collapsing,#quarto-content.page-columns #quarto-sidebar.collapsing{z-index:1055}}#quarto-content.page-columns main.column-page,#quarto-content.page-columns main.column-page-right,#quarto-content.page-columns main.column-page-left{z-index:0}.page-columns .column-screen-inset{grid-column:screen-start-inset/screen-end-inset;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-screen-inset table{background:#fff}.page-columns .column-screen-inset-left{grid-column:screen-start-inset/body-content-end;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-screen-inset-left table{background:#fff}.page-columns .column-screen-inset-right{grid-column:body-content-start/screen-end-inset;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-screen-inset-right table{background:#fff}.page-columns .column-screen{grid-column:screen-start/screen-end;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-screen table{background:#fff}.page-columns .column-screen-left{grid-column:screen-start/body-content-end;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-screen-left table{background:#fff}.page-columns .column-screen-right{grid-column:body-content-start/screen-end;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-screen-right table{background:#fff}.page-columns .column-screen-inset-shaded{grid-column:screen-start/screen-end;padding:1em;background:#f8f9fa;z-index:998;transform:translate3d(0, 0, 0);margin-bottom:1em}.zindex-content{z-index:998;transform:translate3d(0, 0, 0)}.zindex-modal{z-index:1055;transform:translate3d(0, 0, 0)}.zindex-over-content{z-index:999;transform:translate3d(0, 0, 0)}img.img-fluid.column-screen,img.img-fluid.column-screen-inset-shaded,img.img-fluid.column-screen-inset,img.img-fluid.column-screen-inset-left,img.img-fluid.column-screen-inset-right,img.img-fluid.column-screen-left,img.img-fluid.column-screen-right{width:100%}@media(min-width: 992px){.margin-caption,div.aside,aside,.column-margin{grid-column:body-end/page-end !important;z-index:998}.column-sidebar{grid-column:page-start/body-start !important;z-index:998}.column-leftmargin{grid-column:screen-start-inset/body-start !important;z-index:998}.no-row-height{height:1em;overflow:visible}}@media(max-width: 991.98px){.margin-caption,div.aside,aside,.column-margin{grid-column:body-end/page-end !important;z-index:998}.no-row-height{height:1em;overflow:visible}.page-columns.page-full{overflow:visible}.page-columns.toc-left .margin-caption,.page-columns.toc-left div.aside,.page-columns.toc-left aside,.page-columns.toc-left .column-margin{grid-column:body-content-start/body-content-end !important;z-index:998;transform:translate3d(0, 0, 0)}.page-columns.toc-left .no-row-height{height:initial;overflow:initial}}@media(max-width: 767.98px){.margin-caption,div.aside,aside,.column-margin{grid-column:body-content-start/body-content-end !important;z-index:998;transform:translate3d(0, 0, 0)}.no-row-height{height:initial;overflow:initial}#quarto-margin-sidebar{display:none}#quarto-sidebar-toc-left{display:none}.hidden-sm{display:none}}.panel-grid{display:grid;grid-template-rows:repeat(1, 1fr);grid-template-columns:repeat(24, 1fr);gap:1em}.panel-grid .g-col-1{grid-column:auto/span 1}.panel-grid .g-col-2{grid-column:auto/span 2}.panel-grid .g-col-3{grid-column:auto/span 3}.panel-grid .g-col-4{grid-column:auto/span 4}.panel-grid .g-col-5{grid-column:auto/span 5}.panel-grid .g-col-6{grid-column:auto/span 6}.panel-grid .g-col-7{grid-column:auto/span 7}.panel-grid .g-col-8{grid-column:auto/span 8}.panel-grid .g-col-9{grid-column:auto/span 9}.panel-grid .g-col-10{grid-column:auto/span 10}.panel-grid .g-col-11{grid-column:auto/span 11}.panel-grid .g-col-12{grid-column:auto/span 12}.panel-grid .g-col-13{grid-column:auto/span 13}.panel-grid .g-col-14{grid-column:auto/span 14}.panel-grid .g-col-15{grid-column:auto/span 15}.panel-grid .g-col-16{grid-column:auto/span 16}.panel-grid .g-col-17{grid-column:auto/span 17}.panel-grid .g-col-18{grid-column:auto/span 18}.panel-grid .g-col-19{grid-column:auto/span 19}.panel-grid .g-col-20{grid-column:auto/span 20}.panel-grid .g-col-21{grid-column:auto/span 21}.panel-grid .g-col-22{grid-column:auto/span 22}.panel-grid .g-col-23{grid-column:auto/span 23}.panel-grid .g-col-24{grid-column:auto/span 24}.panel-grid .g-start-1{grid-column-start:1}.panel-grid .g-start-2{grid-column-start:2}.panel-grid .g-start-3{grid-column-start:3}.panel-grid .g-start-4{grid-column-start:4}.panel-grid .g-start-5{grid-column-start:5}.panel-grid .g-start-6{grid-column-start:6}.panel-grid .g-start-7{grid-column-start:7}.panel-grid .g-start-8{grid-column-start:8}.panel-grid .g-start-9{grid-column-start:9}.panel-grid .g-start-10{grid-column-start:10}.panel-grid .g-start-11{grid-column-start:11}.panel-grid .g-start-12{grid-column-start:12}.panel-grid .g-start-13{grid-column-start:13}.panel-grid .g-start-14{grid-column-start:14}.panel-grid .g-start-15{grid-column-start:15}.panel-grid .g-start-16{grid-column-start:16}.panel-grid .g-start-17{grid-column-start:17}.panel-grid .g-start-18{grid-column-start:18}.panel-grid .g-start-19{grid-column-start:19}.panel-grid .g-start-20{grid-column-start:20}.panel-grid .g-start-21{grid-column-start:21}.panel-grid .g-start-22{grid-column-start:22}.panel-grid .g-start-23{grid-column-start:23}@media(min-width: 576px){.panel-grid .g-col-sm-1{grid-column:auto/span 1}.panel-grid .g-col-sm-2{grid-column:auto/span 2}.panel-grid .g-col-sm-3{grid-column:auto/span 3}.panel-grid .g-col-sm-4{grid-column:auto/span 4}.panel-grid .g-col-sm-5{grid-column:auto/span 5}.panel-grid .g-col-sm-6{grid-column:auto/span 6}.panel-grid .g-col-sm-7{grid-column:auto/span 7}.panel-grid .g-col-sm-8{grid-column:auto/span 8}.panel-grid .g-col-sm-9{grid-column:auto/span 9}.panel-grid .g-col-sm-10{grid-column:auto/span 10}.panel-grid .g-col-sm-11{grid-column:auto/span 11}.panel-grid .g-col-sm-12{grid-column:auto/span 12}.panel-grid .g-col-sm-13{grid-column:auto/span 13}.panel-grid .g-col-sm-14{grid-column:auto/span 14}.panel-grid .g-col-sm-15{grid-column:auto/span 15}.panel-grid .g-col-sm-16{grid-column:auto/span 16}.panel-grid .g-col-sm-17{grid-column:auto/span 17}.panel-grid .g-col-sm-18{grid-column:auto/span 18}.panel-grid .g-col-sm-19{grid-column:auto/span 19}.panel-grid .g-col-sm-20{grid-column:auto/span 20}.panel-grid .g-col-sm-21{grid-column:auto/span 21}.panel-grid .g-col-sm-22{grid-column:auto/span 22}.panel-grid .g-col-sm-23{grid-column:auto/span 23}.panel-grid .g-col-sm-24{grid-column:auto/span 24}.panel-grid .g-start-sm-1{grid-column-start:1}.panel-grid .g-start-sm-2{grid-column-start:2}.panel-grid .g-start-sm-3{grid-column-start:3}.panel-grid .g-start-sm-4{grid-column-start:4}.panel-grid .g-start-sm-5{grid-column-start:5}.panel-grid .g-start-sm-6{grid-column-start:6}.panel-grid .g-start-sm-7{grid-column-start:7}.panel-grid .g-start-sm-8{grid-column-start:8}.panel-grid .g-start-sm-9{grid-column-start:9}.panel-grid .g-start-sm-10{grid-column-start:10}.panel-grid .g-start-sm-11{grid-column-start:11}.panel-grid .g-start-sm-12{grid-column-start:12}.panel-grid .g-start-sm-13{grid-column-start:13}.panel-grid .g-start-sm-14{grid-column-start:14}.panel-grid .g-start-sm-15{grid-column-start:15}.panel-grid .g-start-sm-16{grid-column-start:16}.panel-grid .g-start-sm-17{grid-column-start:17}.panel-grid .g-start-sm-18{grid-column-start:18}.panel-grid .g-start-sm-19{grid-column-start:19}.panel-grid .g-start-sm-20{grid-column-start:20}.panel-grid .g-start-sm-21{grid-column-start:21}.panel-grid .g-start-sm-22{grid-column-start:22}.panel-grid .g-start-sm-23{grid-column-start:23}}@media(min-width: 768px){.panel-grid .g-col-md-1{grid-column:auto/span 1}.panel-grid .g-col-md-2{grid-column:auto/span 2}.panel-grid .g-col-md-3{grid-column:auto/span 3}.panel-grid .g-col-md-4{grid-column:auto/span 4}.panel-grid .g-col-md-5{grid-column:auto/span 5}.panel-grid .g-col-md-6{grid-column:auto/span 6}.panel-grid .g-col-md-7{grid-column:auto/span 7}.panel-grid .g-col-md-8{grid-column:auto/span 8}.panel-grid .g-col-md-9{grid-column:auto/span 9}.panel-grid .g-col-md-10{grid-column:auto/span 10}.panel-grid .g-col-md-11{grid-column:auto/span 11}.panel-grid .g-col-md-12{grid-column:auto/span 12}.panel-grid .g-col-md-13{grid-column:auto/span 13}.panel-grid .g-col-md-14{grid-column:auto/span 14}.panel-grid .g-col-md-15{grid-column:auto/span 15}.panel-grid .g-col-md-16{grid-column:auto/span 16}.panel-grid .g-col-md-17{grid-column:auto/span 17}.panel-grid .g-col-md-18{grid-column:auto/span 18}.panel-grid .g-col-md-19{grid-column:auto/span 19}.panel-grid .g-col-md-20{grid-column:auto/span 20}.panel-grid .g-col-md-21{grid-column:auto/span 21}.panel-grid .g-col-md-22{grid-column:auto/span 22}.panel-grid .g-col-md-23{grid-column:auto/span 23}.panel-grid .g-col-md-24{grid-column:auto/span 24}.panel-grid .g-start-md-1{grid-column-start:1}.panel-grid .g-start-md-2{grid-column-start:2}.panel-grid .g-start-md-3{grid-column-start:3}.panel-grid .g-start-md-4{grid-column-start:4}.panel-grid .g-start-md-5{grid-column-start:5}.panel-grid .g-start-md-6{grid-column-start:6}.panel-grid .g-start-md-7{grid-column-start:7}.panel-grid .g-start-md-8{grid-column-start:8}.panel-grid .g-start-md-9{grid-column-start:9}.panel-grid .g-start-md-10{grid-column-start:10}.panel-grid .g-start-md-11{grid-column-start:11}.panel-grid .g-start-md-12{grid-column-start:12}.panel-grid .g-start-md-13{grid-column-start:13}.panel-grid .g-start-md-14{grid-column-start:14}.panel-grid .g-start-md-15{grid-column-start:15}.panel-grid .g-start-md-16{grid-column-start:16}.panel-grid .g-start-md-17{grid-column-start:17}.panel-grid .g-start-md-18{grid-column-start:18}.panel-grid .g-start-md-19{grid-column-start:19}.panel-grid .g-start-md-20{grid-column-start:20}.panel-grid .g-start-md-21{grid-column-start:21}.panel-grid .g-start-md-22{grid-column-start:22}.panel-grid .g-start-md-23{grid-column-start:23}}@media(min-width: 992px){.panel-grid .g-col-lg-1{grid-column:auto/span 1}.panel-grid .g-col-lg-2{grid-column:auto/span 2}.panel-grid .g-col-lg-3{grid-column:auto/span 3}.panel-grid .g-col-lg-4{grid-column:auto/span 4}.panel-grid .g-col-lg-5{grid-column:auto/span 5}.panel-grid .g-col-lg-6{grid-column:auto/span 6}.panel-grid .g-col-lg-7{grid-column:auto/span 7}.panel-grid .g-col-lg-8{grid-column:auto/span 8}.panel-grid .g-col-lg-9{grid-column:auto/span 9}.panel-grid .g-col-lg-10{grid-column:auto/span 10}.panel-grid .g-col-lg-11{grid-column:auto/span 11}.panel-grid .g-col-lg-12{grid-column:auto/span 12}.panel-grid .g-col-lg-13{grid-column:auto/span 13}.panel-grid .g-col-lg-14{grid-column:auto/span 14}.panel-grid .g-col-lg-15{grid-column:auto/span 15}.panel-grid .g-col-lg-16{grid-column:auto/span 16}.panel-grid .g-col-lg-17{grid-column:auto/span 17}.panel-grid .g-col-lg-18{grid-column:auto/span 18}.panel-grid .g-col-lg-19{grid-column:auto/span 19}.panel-grid .g-col-lg-20{grid-column:auto/span 20}.panel-grid .g-col-lg-21{grid-column:auto/span 21}.panel-grid .g-col-lg-22{grid-column:auto/span 22}.panel-grid .g-col-lg-23{grid-column:auto/span 23}.panel-grid .g-col-lg-24{grid-column:auto/span 24}.panel-grid .g-start-lg-1{grid-column-start:1}.panel-grid .g-start-lg-2{grid-column-start:2}.panel-grid .g-start-lg-3{grid-column-start:3}.panel-grid .g-start-lg-4{grid-column-start:4}.panel-grid .g-start-lg-5{grid-column-start:5}.panel-grid .g-start-lg-6{grid-column-start:6}.panel-grid .g-start-lg-7{grid-column-start:7}.panel-grid .g-start-lg-8{grid-column-start:8}.panel-grid .g-start-lg-9{grid-column-start:9}.panel-grid .g-start-lg-10{grid-column-start:10}.panel-grid .g-start-lg-11{grid-column-start:11}.panel-grid .g-start-lg-12{grid-column-start:12}.panel-grid .g-start-lg-13{grid-column-start:13}.panel-grid .g-start-lg-14{grid-column-start:14}.panel-grid .g-start-lg-15{grid-column-start:15}.panel-grid .g-start-lg-16{grid-column-start:16}.panel-grid .g-start-lg-17{grid-column-start:17}.panel-grid .g-start-lg-18{grid-column-start:18}.panel-grid .g-start-lg-19{grid-column-start:19}.panel-grid .g-start-lg-20{grid-column-start:20}.panel-grid .g-start-lg-21{grid-column-start:21}.panel-grid .g-start-lg-22{grid-column-start:22}.panel-grid .g-start-lg-23{grid-column-start:23}}@media(min-width: 1200px){.panel-grid .g-col-xl-1{grid-column:auto/span 1}.panel-grid .g-col-xl-2{grid-column:auto/span 2}.panel-grid .g-col-xl-3{grid-column:auto/span 3}.panel-grid .g-col-xl-4{grid-column:auto/span 4}.panel-grid .g-col-xl-5{grid-column:auto/span 5}.panel-grid .g-col-xl-6{grid-column:auto/span 6}.panel-grid .g-col-xl-7{grid-column:auto/span 7}.panel-grid .g-col-xl-8{grid-column:auto/span 8}.panel-grid .g-col-xl-9{grid-column:auto/span 9}.panel-grid .g-col-xl-10{grid-column:auto/span 10}.panel-grid .g-col-xl-11{grid-column:auto/span 11}.panel-grid .g-col-xl-12{grid-column:auto/span 12}.panel-grid .g-col-xl-13{grid-column:auto/span 13}.panel-grid .g-col-xl-14{grid-column:auto/span 14}.panel-grid .g-col-xl-15{grid-column:auto/span 15}.panel-grid .g-col-xl-16{grid-column:auto/span 16}.panel-grid .g-col-xl-17{grid-column:auto/span 17}.panel-grid .g-col-xl-18{grid-column:auto/span 18}.panel-grid .g-col-xl-19{grid-column:auto/span 19}.panel-grid .g-col-xl-20{grid-column:auto/span 20}.panel-grid .g-col-xl-21{grid-column:auto/span 21}.panel-grid .g-col-xl-22{grid-column:auto/span 22}.panel-grid .g-col-xl-23{grid-column:auto/span 23}.panel-grid .g-col-xl-24{grid-column:auto/span 24}.panel-grid .g-start-xl-1{grid-column-start:1}.panel-grid .g-start-xl-2{grid-column-start:2}.panel-grid .g-start-xl-3{grid-column-start:3}.panel-grid .g-start-xl-4{grid-column-start:4}.panel-grid .g-start-xl-5{grid-column-start:5}.panel-grid .g-start-xl-6{grid-column-start:6}.panel-grid .g-start-xl-7{grid-column-start:7}.panel-grid .g-start-xl-8{grid-column-start:8}.panel-grid .g-start-xl-9{grid-column-start:9}.panel-grid .g-start-xl-10{grid-column-start:10}.panel-grid .g-start-xl-11{grid-column-start:11}.panel-grid .g-start-xl-12{grid-column-start:12}.panel-grid .g-start-xl-13{grid-column-start:13}.panel-grid .g-start-xl-14{grid-column-start:14}.panel-grid .g-start-xl-15{grid-column-start:15}.panel-grid .g-start-xl-16{grid-column-start:16}.panel-grid .g-start-xl-17{grid-column-start:17}.panel-grid .g-start-xl-18{grid-column-start:18}.panel-grid .g-start-xl-19{grid-column-start:19}.panel-grid .g-start-xl-20{grid-column-start:20}.panel-grid .g-start-xl-21{grid-column-start:21}.panel-grid .g-start-xl-22{grid-column-start:22}.panel-grid .g-start-xl-23{grid-column-start:23}}@media(min-width: 1400px){.panel-grid .g-col-xxl-1{grid-column:auto/span 1}.panel-grid .g-col-xxl-2{grid-column:auto/span 2}.panel-grid .g-col-xxl-3{grid-column:auto/span 3}.panel-grid .g-col-xxl-4{grid-column:auto/span 4}.panel-grid .g-col-xxl-5{grid-column:auto/span 5}.panel-grid .g-col-xxl-6{grid-column:auto/span 6}.panel-grid .g-col-xxl-7{grid-column:auto/span 7}.panel-grid .g-col-xxl-8{grid-column:auto/span 8}.panel-grid .g-col-xxl-9{grid-column:auto/span 9}.panel-grid .g-col-xxl-10{grid-column:auto/span 10}.panel-grid .g-col-xxl-11{grid-column:auto/span 11}.panel-grid .g-col-xxl-12{grid-column:auto/span 12}.panel-grid .g-col-xxl-13{grid-column:auto/span 13}.panel-grid .g-col-xxl-14{grid-column:auto/span 14}.panel-grid .g-col-xxl-15{grid-column:auto/span 15}.panel-grid .g-col-xxl-16{grid-column:auto/span 16}.panel-grid .g-col-xxl-17{grid-column:auto/span 17}.panel-grid .g-col-xxl-18{grid-column:auto/span 18}.panel-grid .g-col-xxl-19{grid-column:auto/span 19}.panel-grid .g-col-xxl-20{grid-column:auto/span 20}.panel-grid .g-col-xxl-21{grid-column:auto/span 21}.panel-grid .g-col-xxl-22{grid-column:auto/span 22}.panel-grid .g-col-xxl-23{grid-column:auto/span 23}.panel-grid .g-col-xxl-24{grid-column:auto/span 24}.panel-grid .g-start-xxl-1{grid-column-start:1}.panel-grid .g-start-xxl-2{grid-column-start:2}.panel-grid .g-start-xxl-3{grid-column-start:3}.panel-grid .g-start-xxl-4{grid-column-start:4}.panel-grid .g-start-xxl-5{grid-column-start:5}.panel-grid .g-start-xxl-6{grid-column-start:6}.panel-grid .g-start-xxl-7{grid-column-start:7}.panel-grid .g-start-xxl-8{grid-column-start:8}.panel-grid .g-start-xxl-9{grid-column-start:9}.panel-grid .g-start-xxl-10{grid-column-start:10}.panel-grid .g-start-xxl-11{grid-column-start:11}.panel-grid .g-start-xxl-12{grid-column-start:12}.panel-grid .g-start-xxl-13{grid-column-start:13}.panel-grid .g-start-xxl-14{grid-column-start:14}.panel-grid .g-start-xxl-15{grid-column-start:15}.panel-grid .g-start-xxl-16{grid-column-start:16}.panel-grid .g-start-xxl-17{grid-column-start:17}.panel-grid .g-start-xxl-18{grid-column-start:18}.panel-grid .g-start-xxl-19{grid-column-start:19}.panel-grid .g-start-xxl-20{grid-column-start:20}.panel-grid .g-start-xxl-21{grid-column-start:21}.panel-grid .g-start-xxl-22{grid-column-start:22}.panel-grid .g-start-xxl-23{grid-column-start:23}}main{margin-top:1em;margin-bottom:1em}h1,.h1,h2,.h2{opacity:.9;margin-top:2rem;margin-bottom:1rem;font-weight:600}h1.title,.title.h1{margin-top:0}h2,.h2{border-bottom:1px solid #dee2e6;padding-bottom:.5rem}h3,.h3{font-weight:600}h3,.h3,h4,.h4{opacity:.9;margin-top:1.5rem}h5,.h5,h6,.h6{opacity:.9}.header-section-number{color:#747a7f}.nav-link.active .header-section-number{color:inherit}mark,.mark{padding:0em}.panel-caption,caption,.figure-caption{font-size:.9rem}.panel-caption,.figure-caption,figcaption{color:#747a7f}.table-caption,caption{color:#373a3c}.quarto-layout-cell[data-ref-parent] caption{color:#747a7f}.column-margin figcaption,.margin-caption,div.aside,aside,.column-margin{color:#747a7f;font-size:.825rem}.panel-caption.margin-caption{text-align:inherit}.column-margin.column-container p{margin-bottom:0}.column-margin.column-container>*:not(.collapse){padding-top:.5em;padding-bottom:.5em;display:block}.column-margin.column-container>*.collapse:not(.show){display:none}@media(min-width: 768px){.column-margin.column-container .callout-margin-content:first-child{margin-top:4.5em}.column-margin.column-container .callout-margin-content-simple:first-child{margin-top:3.5em}}.margin-caption>*{padding-top:.5em;padding-bottom:.5em}@media(max-width: 767.98px){.quarto-layout-row{flex-direction:column}}.nav-tabs .nav-item{margin-top:1px;cursor:pointer}.tab-content{margin-top:0px;border-left:#dee2e6 1px solid;border-right:#dee2e6 1px solid;border-bottom:#dee2e6 1px solid;margin-left:0;padding:1em;margin-bottom:1em}@media(max-width: 767.98px){.layout-sidebar{margin-left:0;margin-right:0}}.panel-sidebar,.panel-sidebar .form-control,.panel-input,.panel-input .form-control,.selectize-dropdown{font-size:.9rem}.panel-sidebar .form-control,.panel-input .form-control{padding-top:.1rem}.tab-pane div.sourceCode{margin-top:0px}.tab-pane>p{padding-top:1em}.tab-content>.tab-pane:not(.active){display:none !important}div.sourceCode{background-color:rgba(233,236,239,.65);border:1px solid rgba(233,236,239,.65);border-radius:.25rem}pre.sourceCode{background-color:rgba(0,0,0,0)}pre.sourceCode{border:none;font-size:.875em;overflow:visible !important;padding:.4em}.callout pre.sourceCode{padding-left:0}div.sourceCode{overflow-y:hidden}.callout div.sourceCode{margin-left:initial}.blockquote{font-size:inherit;padding-left:1rem;padding-right:1.5rem;color:#747a7f}.blockquote h1:first-child,.blockquote .h1:first-child,.blockquote h2:first-child,.blockquote .h2:first-child,.blockquote h3:first-child,.blockquote .h3:first-child,.blockquote h4:first-child,.blockquote .h4:first-child,.blockquote h5:first-child,.blockquote .h5:first-child{margin-top:0}pre{background-color:initial;padding:initial;border:initial}p code:not(.sourceCode),li code:not(.sourceCode),td code:not(.sourceCode){background-color:#f7f7f7;padding:.2em}nav p code:not(.sourceCode),nav li code:not(.sourceCode),nav td code:not(.sourceCode){background-color:rgba(0,0,0,0);padding:0}td code:not(.sourceCode){white-space:pre-wrap}#quarto-embedded-source-code-modal>.modal-dialog{max-width:1000px;padding-left:1.75rem;padding-right:1.75rem}#quarto-embedded-source-code-modal>.modal-dialog>.modal-content>.modal-body{padding:0}#quarto-embedded-source-code-modal>.modal-dialog>.modal-content>.modal-body div.sourceCode{margin:0;padding:.2rem .2rem;border-radius:0px;border:none}#quarto-embedded-source-code-modal>.modal-dialog>.modal-content>.modal-header{padding:.7rem}.code-tools-button{font-size:1rem;padding:.15rem .15rem;margin-left:5px;color:#6c757d;background-color:rgba(0,0,0,0);transition:initial;cursor:pointer}.code-tools-button>.bi::before{display:inline-block;height:1rem;width:1rem;content:"";vertical-align:-0.125em;background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="rgb(108, 117, 125)" viewBox="0 0 16 16"><path d="M10.478 1.647a.5.5 0 1 0-.956-.294l-4 13a.5.5 0 0 0 .956.294l4-13zM4.854 4.146a.5.5 0 0 1 0 .708L1.707 8l3.147 3.146a.5.5 0 0 1-.708.708l-3.5-3.5a.5.5 0 0 1 0-.708l3.5-3.5a.5.5 0 0 1 .708 0zm6.292 0a.5.5 0 0 0 0 .708L14.293 8l-3.147 3.146a.5.5 0 0 0 .708.708l3.5-3.5a.5.5 0 0 0 0-.708l-3.5-3.5a.5.5 0 0 0-.708 0z"/></svg>');background-repeat:no-repeat;background-size:1rem 1rem}.code-tools-button:hover>.bi::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="rgb(55, 58, 60)" viewBox="0 0 16 16"><path d="M10.478 1.647a.5.5 0 1 0-.956-.294l-4 13a.5.5 0 0 0 .956.294l4-13zM4.854 4.146a.5.5 0 0 1 0 .708L1.707 8l3.147 3.146a.5.5 0 0 1-.708.708l-3.5-3.5a.5.5 0 0 1 0-.708l3.5-3.5a.5.5 0 0 1 .708 0zm6.292 0a.5.5 0 0 0 0 .708L14.293 8l-3.147 3.146a.5.5 0 0 0 .708.708l3.5-3.5a.5.5 0 0 0 0-.708l-3.5-3.5a.5.5 0 0 0-.708 0z"/></svg>')}#quarto-embedded-source-code-modal .code-copy-button>.bi::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="rgb(108, 117, 125)" viewBox="0 0 16 16"><path d="M4 1.5H3a2 2 0 0 0-2 2V14a2 2 0 0 0 2 2h10a2 2 0 0 0 2-2V3.5a2 2 0 0 0-2-2h-1v1h1a1 1 0 0 1 1 1V14a1 1 0 0 1-1 1H3a1 1 0 0 1-1-1V3.5a1 1 0 0 1 1-1h1v-1z"/><path d="M9.5 1a.5.5 0 0 1 .5.5v1a.5.5 0 0 1-.5.5h-3a.5.5 0 0 1-.5-.5v-1a.5.5 0 0 1 .5-.5h3zm-3-1A1.5 1.5 0 0 0 5 1.5v1A1.5 1.5 0 0 0 6.5 4h3A1.5 1.5 0 0 0 11 2.5v-1A1.5 1.5 0 0 0 9.5 0h-3z"/></svg>')}#quarto-embedded-source-code-modal .code-copy-button-checked>.bi::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="rgb(108, 117, 125)" viewBox="0 0 16 16"><path d="M13.854 3.646a.5.5 0 0 1 0 .708l-7 7a.5.5 0 0 1-.708 0l-3.5-3.5a.5.5 0 1 1 .708-.708L6.5 10.293l6.646-6.647a.5.5 0 0 1 .708 0z"/></svg>')}.sidebar{will-change:top;transition:top 200ms linear;position:sticky;overflow-y:auto;padding-top:1.2em;max-height:100vh}.sidebar.toc-left,.sidebar.margin-sidebar{top:0px;padding-top:1em}.sidebar.toc-left>*,.sidebar.margin-sidebar>*{padding-top:.5em}.sidebar.quarto-banner-title-block-sidebar>*{padding-top:1.65em}figure .quarto-notebook-link{margin-top:.5em}.quarto-notebook-link{font-size:.75em;color:#6c757d;margin-bottom:1em;text-decoration:none;display:block}.quarto-notebook-link:hover{text-decoration:underline;color:#2780e3}.quarto-notebook-link::before{display:inline-block;height:.75rem;width:.75rem;margin-bottom:0em;margin-right:.25em;content:"";vertical-align:-0.125em;background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="rgb(108, 117, 125)" class="bi bi-journal-code" viewBox="0 0 16 16"><path fill-rule="evenodd" d="M8.646 5.646a.5.5 0 0 1 .708 0l2 2a.5.5 0 0 1 0 .708l-2 2a.5.5 0 0 1-.708-.708L10.293 8 8.646 6.354a.5.5 0 0 1 0-.708zm-1.292 0a.5.5 0 0 0-.708 0l-2 2a.5.5 0 0 0 0 .708l2 2a.5.5 0 0 0 .708-.708L5.707 8l1.647-1.646a.5.5 0 0 0 0-.708z"/><path d="M3 0h10a2 2 0 0 1 2 2v12a2 2 0 0 1-2 2H3a2 2 0 0 1-2-2v-1h1v1a1 1 0 0 0 1 1h10a1 1 0 0 0 1-1V2a1 1 0 0 0-1-1H3a1 1 0 0 0-1 1v1H1V2a2 2 0 0 1 2-2z"/><path d="M1 5v-.5a.5.5 0 0 1 1 0V5h.5a.5.5 0 0 1 0 1h-2a.5.5 0 0 1 0-1H1zm0 3v-.5a.5.5 0 0 1 1 0V8h.5a.5.5 0 0 1 0 1h-2a.5.5 0 0 1 0-1H1zm0 3v-.5a.5.5 0 0 1 1 0v.5h.5a.5.5 0 0 1 0 1h-2a.5.5 0 0 1 0-1H1z"/></svg>');background-repeat:no-repeat;background-size:.75rem .75rem}.quarto-alternate-notebooks i.bi,.quarto-alternate-formats i.bi{margin-right:.4em}.quarto-notebook .cell-container{display:flex}.quarto-notebook .cell-container .cell{flex-grow:4}.quarto-notebook .cell-container .cell-decorator{padding-top:1.5em;padding-right:1em;text-align:right}.quarto-notebook h2,.quarto-notebook .h2{border-bottom:none}.sidebar .quarto-alternate-formats a,.sidebar .quarto-alternate-notebooks a{text-decoration:none}.sidebar .quarto-alternate-formats a:hover,.sidebar .quarto-alternate-notebooks a:hover{color:#2780e3}.sidebar .quarto-alternate-notebooks h2,.sidebar .quarto-alternate-notebooks .h2,.sidebar .quarto-alternate-formats h2,.sidebar .quarto-alternate-formats .h2,.sidebar nav[role=doc-toc]>h2,.sidebar nav[role=doc-toc]>.h2{font-size:.875rem;font-weight:400;margin-bottom:.5rem;margin-top:.3rem;font-family:inherit;border-bottom:0;padding-bottom:0;padding-top:0px}.sidebar .quarto-alternate-notebooks h2,.sidebar .quarto-alternate-notebooks .h2,.sidebar .quarto-alternate-formats h2,.sidebar .quarto-alternate-formats .h2{margin-top:1rem}.sidebar nav[role=doc-toc]>ul a{border-left:1px solid #e9ecef;padding-left:.6rem}.sidebar .quarto-alternate-notebooks h2>ul a,.sidebar .quarto-alternate-notebooks .h2>ul a,.sidebar .quarto-alternate-formats h2>ul a,.sidebar .quarto-alternate-formats .h2>ul a{border-left:none;padding-left:.6rem}.sidebar .quarto-alternate-notebooks ul a:empty,.sidebar .quarto-alternate-formats ul a:empty,.sidebar nav[role=doc-toc]>ul a:empty{display:none}.sidebar .quarto-alternate-notebooks ul,.sidebar .quarto-alternate-formats ul,.sidebar nav[role=doc-toc] ul{padding-left:0;list-style:none;font-size:.875rem;font-weight:300}.sidebar .quarto-alternate-notebooks ul li a,.sidebar .quarto-alternate-formats ul li a,.sidebar nav[role=doc-toc]>ul li a{line-height:1.1rem;padding-bottom:.2rem;padding-top:.2rem;color:inherit}.sidebar nav[role=doc-toc] ul>li>ul>li>a{padding-left:1.2em}.sidebar nav[role=doc-toc] ul>li>ul>li>ul>li>a{padding-left:2.4em}.sidebar nav[role=doc-toc] ul>li>ul>li>ul>li>ul>li>a{padding-left:3.6em}.sidebar nav[role=doc-toc] ul>li>ul>li>ul>li>ul>li>ul>li>a{padding-left:4.8em}.sidebar nav[role=doc-toc] ul>li>ul>li>ul>li>ul>li>ul>li>ul>li>a{padding-left:6em}.sidebar nav[role=doc-toc] ul>li>a.active,.sidebar nav[role=doc-toc] ul>li>ul>li>a.active{border-left:1px solid #2780e3;color:#2780e3 !important}.sidebar nav[role=doc-toc] ul>li>a:hover,.sidebar nav[role=doc-toc] ul>li>ul>li>a:hover{color:#2780e3 !important}kbd,.kbd{color:#373a3c;background-color:#f8f9fa;border:1px solid;border-radius:5px;border-color:#dee2e6}div.hanging-indent{margin-left:1em;text-indent:-1em}.citation a,.footnote-ref{text-decoration:none}.footnotes ol{padding-left:1em}.tippy-content>*{margin-bottom:.7em}.tippy-content>*:last-child{margin-bottom:0}.table a{word-break:break-word}.table>thead{border-top-width:1px;border-top-color:#dee2e6;border-bottom:1px solid #b6babc}.callout{margin-top:1.25rem;margin-bottom:1.25rem;border-radius:.25rem;overflow-wrap:break-word}.callout .callout-title-container{overflow-wrap:anywhere}.callout.callout-style-simple{padding:.4em .7em;border-left:5px solid;border-right:1px solid #dee2e6;border-top:1px solid #dee2e6;border-bottom:1px solid #dee2e6}.callout.callout-style-default{border-left:5px solid;border-right:1px solid #dee2e6;border-top:1px solid #dee2e6;border-bottom:1px solid #dee2e6}.callout .callout-body-container{flex-grow:1}.callout.callout-style-simple .callout-body{font-size:.9rem;font-weight:400}.callout.callout-style-default .callout-body{font-size:.9rem;font-weight:400}.callout.callout-titled .callout-body{margin-top:.2em}.callout:not(.no-icon).callout-titled.callout-style-simple .callout-body{padding-left:1.6em}.callout.callout-titled>.callout-header{padding-top:.2em;margin-bottom:-0.2em}.callout.callout-style-simple>div.callout-header{border-bottom:none;font-size:.9rem;font-weight:600;opacity:75%}.callout.callout-style-default>div.callout-header{border-bottom:none;font-weight:600;opacity:85%;font-size:.9rem;padding-left:.5em;padding-right:.5em}.callout.callout-style-default div.callout-body{padding-left:.5em;padding-right:.5em}.callout.callout-style-default div.callout-body>:first-child{margin-top:.5em}.callout>div.callout-header[data-bs-toggle=collapse]{cursor:pointer}.callout.callout-style-default .callout-header[aria-expanded=false],.callout.callout-style-default .callout-header[aria-expanded=true]{padding-top:0px;margin-bottom:0px;align-items:center}.callout.callout-titled .callout-body>:last-child:not(.sourceCode),.callout.callout-titled .callout-body>div>:last-child:not(.sourceCode){margin-bottom:.5rem}.callout:not(.callout-titled) .callout-body>:first-child,.callout:not(.callout-titled) .callout-body>div>:first-child{margin-top:.25rem}.callout:not(.callout-titled) .callout-body>:last-child,.callout:not(.callout-titled) .callout-body>div>:last-child{margin-bottom:.2rem}.callout.callout-style-simple .callout-icon::before,.callout.callout-style-simple .callout-toggle::before{height:1rem;width:1rem;display:inline-block;content:"";background-repeat:no-repeat;background-size:1rem 1rem}.callout.callout-style-default .callout-icon::before,.callout.callout-style-default .callout-toggle::before{height:.9rem;width:.9rem;display:inline-block;content:"";background-repeat:no-repeat;background-size:.9rem .9rem}.callout.callout-style-default .callout-toggle::before{margin-top:5px}.callout .callout-btn-toggle .callout-toggle::before{transition:transform .2s linear}.callout .callout-header[aria-expanded=false] .callout-toggle::before{transform:rotate(-90deg)}.callout .callout-header[aria-expanded=true] .callout-toggle::before{transform:none}.callout.callout-style-simple:not(.no-icon) div.callout-icon-container{padding-top:.2em;padding-right:.55em}.callout.callout-style-default:not(.no-icon) div.callout-icon-container{padding-top:.1em;padding-right:.35em}.callout.callout-style-default:not(.no-icon) div.callout-title-container{margin-top:-1px}.callout.callout-style-default.callout-caution:not(.no-icon) div.callout-icon-container{padding-top:.3em;padding-right:.35em}.callout>.callout-body>.callout-icon-container>.no-icon,.callout>.callout-header>.callout-icon-container>.no-icon{display:none}div.callout.callout{border-left-color:#6c757d}div.callout.callout-style-default>.callout-header{background-color:#6c757d}div.callout-note.callout{border-left-color:#2780e3}div.callout-note.callout-style-default>.callout-header{background-color:#e9f2fc}div.callout-note:not(.callout-titled) .callout-icon::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" style="fill: %232373cc" class="bi bi-info-circle" viewBox="0 0 16 16"><path d="M8 15A7 7 0 1 1 8 1a7 7 0 0 1 0 14zm0 1A8 8 0 1 0 8 0a8 8 0 0 0 0 16z"/><path d="m8.93 6.588-2.29.287-.082.38.45.083c.294.07.352.176.288.469l-.738 3.468c-.194.897.105 1.319.808 1.319.545 0 1.178-.252 1.465-.598l.088-.416c-.2.176-.492.246-.686.246-.275 0-.375-.193-.304-.533L8.93 6.588zM9 4.5a1 1 0 1 1-2 0 1 1 0 0 1 2 0z"/></svg>');}div.callout-note.callout-titled .callout-icon::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" style="fill: %232373cc" class="bi bi-info-circle" viewBox="0 0 16 16"><path d="M8 15A7 7 0 1 1 8 1a7 7 0 0 1 0 14zm0 1A8 8 0 1 0 8 0a8 8 0 0 0 0 16z"/><path d="m8.93 6.588-2.29.287-.082.38.45.083c.294.07.352.176.288.469l-.738 3.468c-.194.897.105 1.319.808 1.319.545 0 1.178-.252 1.465-.598l.088-.416c-.2.176-.492.246-.686.246-.275 0-.375-.193-.304-.533L8.93 6.588zM9 4.5a1 1 0 1 1-2 0 1 1 0 0 1 2 0z"/></svg>');}div.callout-note .callout-toggle::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="rgb(55, 58, 60)" class="bi bi-chevron-down" viewBox="0 0 16 16"><path fill-rule="evenodd" d="M1.646 4.646a.5.5 0 0 1 .708 0L8 10.293l5.646-5.647a.5.5 0 0 1 .708.708l-6 6a.5.5 0 0 1-.708 0l-6-6a.5.5 0 0 1 0-.708z"/></svg>')}div.callout-tip.callout{border-left-color:#3fb618}div.callout-tip.callout-style-default>.callout-header{background-color:#ecf8e8}div.callout-tip:not(.callout-titled) .callout-icon::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" style="fill: %2339a416" class="bi bi-lightbulb" viewBox="0 0 16 16"><path d="M2 6a6 6 0 1 1 10.174 4.31c-.203.196-.359.4-.453.619l-.762 1.769A.5.5 0 0 1 10.5 13a.5.5 0 0 1 0 1 .5.5 0 0 1 0 1l-.224.447a1 1 0 0 1-.894.553H6.618a1 1 0 0 1-.894-.553L5.5 15a.5.5 0 0 1 0-1 .5.5 0 0 1 0-1 .5.5 0 0 1-.46-.302l-.761-1.77a1.964 1.964 0 0 0-.453-.618A5.984 5.984 0 0 1 2 6zm6-5a5 5 0 0 0-3.479 8.592c.263.254.514.564.676.941L5.83 12h4.342l.632-1.467c.162-.377.413-.687.676-.941A5 5 0 0 0 8 1z"/></svg>');}div.callout-tip.callout-titled .callout-icon::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" style="fill: %2339a416" class="bi bi-lightbulb" viewBox="0 0 16 16"><path d="M2 6a6 6 0 1 1 10.174 4.31c-.203.196-.359.4-.453.619l-.762 1.769A.5.5 0 0 1 10.5 13a.5.5 0 0 1 0 1 .5.5 0 0 1 0 1l-.224.447a1 1 0 0 1-.894.553H6.618a1 1 0 0 1-.894-.553L5.5 15a.5.5 0 0 1 0-1 .5.5 0 0 1 0-1 .5.5 0 0 1-.46-.302l-.761-1.77a1.964 1.964 0 0 0-.453-.618A5.984 5.984 0 0 1 2 6zm6-5a5 5 0 0 0-3.479 8.592c.263.254.514.564.676.941L5.83 12h4.342l.632-1.467c.162-.377.413-.687.676-.941A5 5 0 0 0 8 1z"/></svg>');}div.callout-tip .callout-toggle::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="rgb(55, 58, 60)" class="bi bi-chevron-down" viewBox="0 0 16 16"><path fill-rule="evenodd" d="M1.646 4.646a.5.5 0 0 1 .708 0L8 10.293l5.646-5.647a.5.5 0 0 1 .708.708l-6 6a.5.5 0 0 1-.708 0l-6-6a.5.5 0 0 1 0-.708z"/></svg>')}div.callout-warning.callout{border-left-color:#ff7518}div.callout-warning.callout-style-default>.callout-header{background-color:#fff1e8}div.callout-warning:not(.callout-titled) .callout-icon::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" style="fill: %23e66916" class="bi bi-exclamation-triangle" viewBox="0 0 16 16"><path d="M7.938 2.016A.13.13 0 0 1 8.002 2a.13.13 0 0 1 .063.016.146.146 0 0 1 .054.057l6.857 11.667c.036.06.035.124.002.183a.163.163 0 0 1-.054.06.116.116 0 0 1-.066.017H1.146a.115.115 0 0 1-.066-.017.163.163 0 0 1-.054-.06.176.176 0 0 1 .002-.183L7.884 2.073a.147.147 0 0 1 .054-.057zm1.044-.45a1.13 1.13 0 0 0-1.96 0L.165 13.233c-.457.778.091 1.767.98 1.767h13.713c.889 0 1.438-.99.98-1.767L8.982 1.566z"/><path d="M7.002 12a1 1 0 1 1 2 0 1 1 0 0 1-2 0zM7.1 5.995a.905.905 0 1 1 1.8 0l-.35 3.507a.552.552 0 0 1-1.1 0L7.1 5.995z"/></svg>');}div.callout-warning.callout-titled .callout-icon::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" style="fill: %23e66916" class="bi bi-exclamation-triangle" viewBox="0 0 16 16"><path d="M7.938 2.016A.13.13 0 0 1 8.002 2a.13.13 0 0 1 .063.016.146.146 0 0 1 .054.057l6.857 11.667c.036.06.035.124.002.183a.163.163 0 0 1-.054.06.116.116 0 0 1-.066.017H1.146a.115.115 0 0 1-.066-.017.163.163 0 0 1-.054-.06.176.176 0 0 1 .002-.183L7.884 2.073a.147.147 0 0 1 .054-.057zm1.044-.45a1.13 1.13 0 0 0-1.96 0L.165 13.233c-.457.778.091 1.767.98 1.767h13.713c.889 0 1.438-.99.98-1.767L8.982 1.566z"/><path d="M7.002 12a1 1 0 1 1 2 0 1 1 0 0 1-2 0zM7.1 5.995a.905.905 0 1 1 1.8 0l-.35 3.507a.552.552 0 0 1-1.1 0L7.1 5.995z"/></svg>');}div.callout-warning .callout-toggle::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="rgb(55, 58, 60)" class="bi bi-chevron-down" viewBox="0 0 16 16"><path fill-rule="evenodd" d="M1.646 4.646a.5.5 0 0 1 .708 0L8 10.293l5.646-5.647a.5.5 0 0 1 .708.708l-6 6a.5.5 0 0 1-.708 0l-6-6a.5.5 0 0 1 0-.708z"/></svg>')}div.callout-caution.callout{border-left-color:#f0ad4e}div.callout-caution.callout-style-default>.callout-header{background-color:#fef7ed}div.callout-caution:not(.callout-titled) .callout-icon::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" style="fill: %23d89c46" class="bi bi-cone-striped" viewBox="0 0 16 16"><path d="M9.97 4.88l.953 3.811C10.158 8.878 9.14 9 8 9c-1.14 0-2.159-.122-2.923-.309L6.03 4.88C6.635 4.957 7.3 5 8 5s1.365-.043 1.97-.12zm-.245-.978L8.97.88C8.718-.13 7.282-.13 7.03.88L6.274 3.9C6.8 3.965 7.382 4 8 4c.618 0 1.2-.036 1.725-.098zm4.396 8.613a.5.5 0 0 1 .037.96l-6 2a.5.5 0 0 1-.316 0l-6-2a.5.5 0 0 1 .037-.96l2.391-.598.565-2.257c.862.212 1.964.339 3.165.339s2.303-.127 3.165-.339l.565 2.257 2.391.598z"/></svg>');}div.callout-caution.callout-titled .callout-icon::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" style="fill: %23d89c46" class="bi bi-cone-striped" viewBox="0 0 16 16"><path d="M9.97 4.88l.953 3.811C10.158 8.878 9.14 9 8 9c-1.14 0-2.159-.122-2.923-.309L6.03 4.88C6.635 4.957 7.3 5 8 5s1.365-.043 1.97-.12zm-.245-.978L8.97.88C8.718-.13 7.282-.13 7.03.88L6.274 3.9C6.8 3.965 7.382 4 8 4c.618 0 1.2-.036 1.725-.098zm4.396 8.613a.5.5 0 0 1 .037.96l-6 2a.5.5 0 0 1-.316 0l-6-2a.5.5 0 0 1 .037-.96l2.391-.598.565-2.257c.862.212 1.964.339 3.165.339s2.303-.127 3.165-.339l.565 2.257 2.391.598z"/></svg>');}div.callout-caution .callout-toggle::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="rgb(55, 58, 60)" class="bi bi-chevron-down" viewBox="0 0 16 16"><path fill-rule="evenodd" d="M1.646 4.646a.5.5 0 0 1 .708 0L8 10.293l5.646-5.647a.5.5 0 0 1 .708.708l-6 6a.5.5 0 0 1-.708 0l-6-6a.5.5 0 0 1 0-.708z"/></svg>')}div.callout-important.callout{border-left-color:#ff0039}div.callout-important.callout-style-default>.callout-header{background-color:#ffe6eb}div.callout-important:not(.callout-titled) .callout-icon::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" style="fill: %23e60033" class="bi bi-exclamation-circle" viewBox="0 0 16 16"><path d="M8 15A7 7 0 1 1 8 1a7 7 0 0 1 0 14zm0 1A8 8 0 1 0 8 0a8 8 0 0 0 0 16z"/><path d="M7.002 11a1 1 0 1 1 2 0 1 1 0 0 1-2 0zM7.1 4.995a.905.905 0 1 1 1.8 0l-.35 3.507a.552.552 0 0 1-1.1 0L7.1 4.995z"/></svg>');}div.callout-important.callout-titled .callout-icon::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" style="fill: %23e60033" class="bi bi-exclamation-circle" viewBox="0 0 16 16"><path d="M8 15A7 7 0 1 1 8 1a7 7 0 0 1 0 14zm0 1A8 8 0 1 0 8 0a8 8 0 0 0 0 16z"/><path d="M7.002 11a1 1 0 1 1 2 0 1 1 0 0 1-2 0zM7.1 4.995a.905.905 0 1 1 1.8 0l-.35 3.507a.552.552 0 0 1-1.1 0L7.1 4.995z"/></svg>');}div.callout-important .callout-toggle::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="rgb(55, 58, 60)" class="bi bi-chevron-down" viewBox="0 0 16 16"><path fill-rule="evenodd" d="M1.646 4.646a.5.5 0 0 1 .708 0L8 10.293l5.646-5.647a.5.5 0 0 1 .708.708l-6 6a.5.5 0 0 1-.708 0l-6-6a.5.5 0 0 1 0-.708z"/></svg>')}.quarto-toggle-container{display:flex;align-items:center}.quarto-reader-toggle .bi::before,.quarto-color-scheme-toggle .bi::before{display:inline-block;height:1rem;width:1rem;content:"";background-repeat:no-repeat;background-size:1rem 1rem}.sidebar-navigation{padding-left:20px}.navbar .quarto-color-scheme-toggle:not(.alternate) .bi::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="rgba(84, 85, 85, 1)" class="bi bi-toggle-off" viewBox="0 0 16 16"><path d="M11 4a4 4 0 0 1 0 8H8a4.992 4.992 0 0 0 2-4 4.992 4.992 0 0 0-2-4h3zm-6 8a4 4 0 1 1 0-8 4 4 0 0 1 0 8zM0 8a5 5 0 0 0 5 5h6a5 5 0 0 0 0-10H5a5 5 0 0 0-5 5z"/></svg>')}.navbar .quarto-color-scheme-toggle.alternate .bi::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="rgba(84, 85, 85, 1)" class="bi bi-toggle-on" viewBox="0 0 16 16"><path d="M5 3a5 5 0 0 0 0 10h6a5 5 0 0 0 0-10H5zm6 9a4 4 0 1 1 0-8 4 4 0 0 1 0 8z"/></svg>')}.sidebar-navigation .quarto-color-scheme-toggle:not(.alternate) .bi::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="rgba(79, 84, 87, 1)" class="bi bi-toggle-off" viewBox="0 0 16 16"><path d="M11 4a4 4 0 0 1 0 8H8a4.992 4.992 0 0 0 2-4 4.992 4.992 0 0 0-2-4h3zm-6 8a4 4 0 1 1 0-8 4 4 0 0 1 0 8zM0 8a5 5 0 0 0 5 5h6a5 5 0 0 0 0-10H5a5 5 0 0 0-5 5z"/></svg>')}.sidebar-navigation .quarto-color-scheme-toggle.alternate .bi::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="rgba(79, 84, 87, 1)" class="bi bi-toggle-on" viewBox="0 0 16 16"><path d="M5 3a5 5 0 0 0 0 10h6a5 5 0 0 0 0-10H5zm6 9a4 4 0 1 1 0-8 4 4 0 0 1 0 8z"/></svg>')}.quarto-sidebar-toggle{border-color:#dee2e6;border-bottom-left-radius:.25rem;border-bottom-right-radius:.25rem;border-style:solid;border-width:1px;overflow:hidden;border-top-width:0px;padding-top:0px !important}.quarto-sidebar-toggle-title{cursor:pointer;padding-bottom:2px;margin-left:.25em;text-align:center;font-weight:400;font-size:.775em}#quarto-content .quarto-sidebar-toggle{background:#fafafa}#quarto-content .quarto-sidebar-toggle-title{color:#373a3c}.quarto-sidebar-toggle-icon{color:#dee2e6;margin-right:.5em;float:right;transition:transform .2s ease}.quarto-sidebar-toggle-icon::before{padding-top:5px}.quarto-sidebar-toggle.expanded .quarto-sidebar-toggle-icon{transform:rotate(-180deg)}.quarto-sidebar-toggle.expanded .quarto-sidebar-toggle-title{border-bottom:solid #dee2e6 1px}.quarto-sidebar-toggle-contents{background-color:#fff;padding-right:10px;padding-left:10px;margin-top:0px !important;transition:max-height .5s ease}.quarto-sidebar-toggle.expanded .quarto-sidebar-toggle-contents{padding-top:1em;padding-bottom:10px}.quarto-sidebar-toggle:not(.expanded) .quarto-sidebar-toggle-contents{padding-top:0px !important;padding-bottom:0px}nav[role=doc-toc]{z-index:1020}#quarto-sidebar>*,nav[role=doc-toc]>*{transition:opacity .1s ease,border .1s ease}#quarto-sidebar.slow>*,nav[role=doc-toc].slow>*{transition:opacity .4s ease,border .4s ease}.quarto-color-scheme-toggle:not(.alternate).top-right .bi::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="rgba(142, 148, 151, 1)" class="bi bi-toggle-off" viewBox="0 0 16 16"><path d="M11 4a4 4 0 0 1 0 8H8a4.992 4.992 0 0 0 2-4 4.992 4.992 0 0 0-2-4h3zm-6 8a4 4 0 1 1 0-8 4 4 0 0 1 0 8zM0 8a5 5 0 0 0 5 5h6a5 5 0 0 0 0-10H5a5 5 0 0 0-5 5z"/></svg>')}.quarto-color-scheme-toggle.alternate.top-right .bi::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="rgba(104, 109, 113, 1)" class="bi bi-toggle-on" viewBox="0 0 16 16"><path d="M5 3a5 5 0 0 0 0 10h6a5 5 0 0 0 0-10H5zm6 9a4 4 0 1 1 0-8 4 4 0 0 1 0 8z"/></svg>')}#quarto-appendix.default{border-top:1px solid #dee2e6}#quarto-appendix.default{background-color:#fff;padding-top:1.5em;margin-top:2em;z-index:998}#quarto-appendix.default .quarto-appendix-heading{margin-top:0;line-height:1.4em;font-weight:600;opacity:.9;border-bottom:none;margin-bottom:0}#quarto-appendix.default .footnotes ol,#quarto-appendix.default .footnotes ol li>p:last-of-type,#quarto-appendix.default .quarto-appendix-contents>p:last-of-type{margin-bottom:0}#quarto-appendix.default .quarto-appendix-secondary-label{margin-bottom:.4em}#quarto-appendix.default .quarto-appendix-bibtex{font-size:.7em;padding:1em;border:solid 1px #dee2e6;margin-bottom:1em}#quarto-appendix.default .quarto-appendix-bibtex code.sourceCode{white-space:pre-wrap}#quarto-appendix.default .quarto-appendix-citeas{font-size:.9em;padding:1em;border:solid 1px #dee2e6;margin-bottom:1em}#quarto-appendix.default .quarto-appendix-heading{font-size:1em !important}#quarto-appendix.default *[role=doc-endnotes]>ol,#quarto-appendix.default .quarto-appendix-contents>*:not(h2):not(.h2){font-size:.9em}#quarto-appendix.default section{padding-bottom:1.5em}#quarto-appendix.default section *[role=doc-endnotes],#quarto-appendix.default section>*:not(a){opacity:.9;word-wrap:break-word}.btn.btn-quarto,div.cell-output-display .btn-quarto{color:#cbcccc;background-color:#373a3c;border-color:#373a3c}.btn.btn-quarto:hover,div.cell-output-display .btn-quarto:hover{color:#cbcccc;background-color:#555859;border-color:#4b4e50}.btn-check:focus+.btn.btn-quarto,.btn.btn-quarto:focus,.btn-check:focus+div.cell-output-display .btn-quarto,div.cell-output-display .btn-quarto:focus{color:#cbcccc;background-color:#555859;border-color:#4b4e50;box-shadow:0 0 0 .25rem rgba(77,80,82,.5)}.btn-check:checked+.btn.btn-quarto,.btn-check:active+.btn.btn-quarto,.btn.btn-quarto:active,.btn.btn-quarto.active,.show>.btn.btn-quarto.dropdown-toggle,.btn-check:checked+div.cell-output-display .btn-quarto,.btn-check:active+div.cell-output-display .btn-quarto,div.cell-output-display .btn-quarto:active,div.cell-output-display .btn-quarto.active,.show>div.cell-output-display .btn-quarto.dropdown-toggle{color:#fff;background-color:#5f6163;border-color:#4b4e50}.btn-check:checked+.btn.btn-quarto:focus,.btn-check:active+.btn.btn-quarto:focus,.btn.btn-quarto:active:focus,.btn.btn-quarto.active:focus,.show>.btn.btn-quarto.dropdown-toggle:focus,.btn-check:checked+div.cell-output-display .btn-quarto:focus,.btn-check:active+div.cell-output-display .btn-quarto:focus,div.cell-output-display .btn-quarto:active:focus,div.cell-output-display .btn-quarto.active:focus,.show>div.cell-output-display .btn-quarto.dropdown-toggle:focus{box-shadow:0 0 0 .25rem rgba(77,80,82,.5)}.btn.btn-quarto:disabled,.btn.btn-quarto.disabled,div.cell-output-display .btn-quarto:disabled,div.cell-output-display .btn-quarto.disabled{color:#fff;background-color:#373a3c;border-color:#373a3c}nav.quarto-secondary-nav.color-navbar{background-color:#f8f9fa;color:#545555}nav.quarto-secondary-nav.color-navbar h1,nav.quarto-secondary-nav.color-navbar .h1,nav.quarto-secondary-nav.color-navbar .quarto-btn-toggle{color:#545555}@media(max-width: 991.98px){body.nav-sidebar .quarto-title-banner{margin-bottom:0;padding-bottom:0}body.nav-sidebar #title-block-header{margin-block-end:0}}p.subtitle{margin-top:.25em;margin-bottom:.5em}code a:any-link{color:inherit;text-decoration-color:#6c757d}/*! light */div.observablehq table thead tr th{background-color:var(--bs-body-bg)}input,button,select,optgroup,textarea{background-color:var(--bs-body-bg)}.code-annotated .code-copy-button{margin-right:1.25em;margin-top:0;padding-bottom:0;padding-top:3px}.code-annotation-gutter-bg{background-color:#fff}.code-annotation-gutter{background-color:rgba(233,236,239,.65)}.code-annotation-gutter,.code-annotation-gutter-bg{height:100%;width:calc(20px + .5em);position:absolute;top:0;right:0}dl.code-annotation-container-grid dt{margin-right:1em;margin-top:.25rem}dl.code-annotation-container-grid dt{font-family:var(--bs-font-monospace);color:#4f5457;border:solid #4f5457 1px;border-radius:50%;height:22px;width:22px;line-height:22px;font-size:11px;text-align:center;vertical-align:middle;text-decoration:none}dl.code-annotation-container-grid dt[data-target-cell]{cursor:pointer}dl.code-annotation-container-grid dt[data-target-cell].code-annotation-active{color:#fff;border:solid #aaa 1px;background-color:#aaa}pre.code-annotation-code{padding-top:0;padding-bottom:0}pre.code-annotation-code code{z-index:3}#code-annotation-line-highlight-gutter{width:100%;border-top:solid rgba(170,170,170,.2666666667) 1px;border-bottom:solid rgba(170,170,170,.2666666667) 1px;z-index:2;background-color:rgba(170,170,170,.1333333333)}#code-annotation-line-highlight{margin-left:-4em;width:calc(100% + 4em);border-top:solid rgba(170,170,170,.2666666667) 1px;border-bottom:solid rgba(170,170,170,.2666666667) 1px;z-index:2;background-color:rgba(170,170,170,.1333333333)}code.sourceCode .code-annotation-anchor.code-annotation-active{background-color:var(--quarto-hl-normal-color, #aaaaaa);border:solid var(--quarto-hl-normal-color, #aaaaaa) 1px;color:#e9ecef;font-weight:bolder}code.sourceCode .code-annotation-anchor{font-family:var(--bs-font-monospace);color:var(--quarto-hl-co-color);border:solid var(--quarto-hl-co-color) 1px;border-radius:50%;height:18px;width:18px;font-size:9px;margin-top:2px}code.sourceCode button.code-annotation-anchor{padding:2px}code.sourceCode a.code-annotation-anchor{line-height:18px;text-align:center;vertical-align:middle;cursor:default;text-decoration:none}@media print{.page-columns .column-screen-inset{grid-column:page-start-inset/page-end-inset;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-screen-inset table{background:#fff}.page-columns .column-screen-inset-left{grid-column:page-start-inset/body-content-end;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-screen-inset-left table{background:#fff}.page-columns .column-screen-inset-right{grid-column:body-content-start/page-end-inset;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-screen-inset-right table{background:#fff}.page-columns .column-screen{grid-column:page-start/page-end;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-screen table{background:#fff}.page-columns .column-screen-left{grid-column:page-start/body-content-end;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-screen-left table{background:#fff}.page-columns .column-screen-right{grid-column:body-content-start/page-end;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-screen-right table{background:#fff}.page-columns .column-screen-inset-shaded{grid-column:page-start-inset/page-end-inset;padding:1em;background:#f8f9fa;z-index:998;transform:translate3d(0, 0, 0);margin-bottom:1em}}.quarto-video{margin-bottom:1em}.table>thead{border-top-width:0}.table>:not(caption)>*:not(:last-child)>*{border-bottom-color:#ebeced;border-bottom-style:solid;border-bottom-width:1px}.table>:not(:first-child){border-top:1px solid #b6babc;border-bottom:1px solid inherit}.table tbody{border-bottom-color:#b6babc}a.external:after{display:inline-block;height:.75rem;width:.75rem;margin-bottom:.15em;margin-left:.25em;content:"";vertical-align:-0.125em;background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="rgb(39, 128, 227)" class="bi bi-box-arrow-up-right" viewBox="0 0 16 16"><path fill-rule="evenodd" d="M8.636 3.5a.5.5 0 0 0-.5-.5H1.5A1.5 1.5 0 0 0 0 4.5v10A1.5 1.5 0 0 0 1.5 16h10a1.5 1.5 0 0 0 1.5-1.5V7.864a.5.5 0 0 0-1 0V14.5a.5.5 0 0 1-.5.5h-10a.5.5 0 0 1-.5-.5v-10a.5.5 0 0 1 .5-.5h6.636a.5.5 0 0 0 .5-.5z"/><path fill-rule="evenodd" d="M16 .5a.5.5 0 0 0-.5-.5h-5a.5.5 0 0 0 0 1h3.793L6.146 9.146a.5.5 0 1 0 .708.708L15 1.707V5.5a.5.5 0 0 0 1 0v-5z"/></svg>');background-repeat:no-repeat;background-size:.75rem .75rem}div.sourceCode code a.external:after{content:none}a.external:after:hover{cursor:pointer}.quarto-ext-icon{display:inline-block;font-size:.75em;padding-left:.3em}.code-with-filename .code-with-filename-file{margin-bottom:0;padding-bottom:2px;padding-top:2px;padding-left:.7em;border:var(--quarto-border-width) solid var(--quarto-border-color);border-radius:var(--quarto-border-radius);border-bottom:0;border-bottom-left-radius:0%;border-bottom-right-radius:0%}.code-with-filename div.sourceCode,.reveal .code-with-filename div.sourceCode{margin-top:0;border-top-left-radius:0%;border-top-right-radius:0%}.code-with-filename .code-with-filename-file pre{margin-bottom:0}.code-with-filename .code-with-filename-file,.code-with-filename .code-with-filename-file pre{background-color:rgba(219,219,219,.8)}.quarto-dark .code-with-filename .code-with-filename-file,.quarto-dark .code-with-filename .code-with-filename-file pre{background-color:#555}.code-with-filename .code-with-filename-file strong{font-weight:400}.quarto-title-banner{margin-bottom:1em;color:#545555;background:#f8f9fa}.quarto-title-banner .code-tools-button{color:#878888}.quarto-title-banner .code-tools-button:hover{color:#545555}.quarto-title-banner .code-tools-button>.bi::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="rgb(135, 136, 136)" viewBox="0 0 16 16"><path d="M10.478 1.647a.5.5 0 1 0-.956-.294l-4 13a.5.5 0 0 0 .956.294l4-13zM4.854 4.146a.5.5 0 0 1 0 .708L1.707 8l3.147 3.146a.5.5 0 0 1-.708.708l-3.5-3.5a.5.5 0 0 1 0-.708l3.5-3.5a.5.5 0 0 1 .708 0zm6.292 0a.5.5 0 0 0 0 .708L14.293 8l-3.147 3.146a.5.5 0 0 0 .708.708l3.5-3.5a.5.5 0 0 0 0-.708l-3.5-3.5a.5.5 0 0 0-.708 0z"/></svg>')}.quarto-title-banner .code-tools-button:hover>.bi::before{background-image:url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="rgb(84, 85, 85)" viewBox="0 0 16 16"><path d="M10.478 1.647a.5.5 0 1 0-.956-.294l-4 13a.5.5 0 0 0 .956.294l4-13zM4.854 4.146a.5.5 0 0 1 0 .708L1.707 8l3.147 3.146a.5.5 0 0 1-.708.708l-3.5-3.5a.5.5 0 0 1 0-.708l3.5-3.5a.5.5 0 0 1 .708 0zm6.292 0a.5.5 0 0 0 0 .708L14.293 8l-3.147 3.146a.5.5 0 0 0 .708.708l3.5-3.5a.5.5 0 0 0 0-.708l-3.5-3.5a.5.5 0 0 0-.708 0z"/></svg>')}.quarto-title-banner .quarto-title .title{font-weight:600}.quarto-title-banner .quarto-categories{margin-top:.75em}@media(min-width: 992px){.quarto-title-banner{padding-top:2.5em;padding-bottom:2.5em}}@media(max-width: 991.98px){.quarto-title-banner{padding-top:1em;padding-bottom:1em}}main.quarto-banner-title-block>section:first-child>h2,main.quarto-banner-title-block>section:first-child>.h2,main.quarto-banner-title-block>section:first-child>h3,main.quarto-banner-title-block>section:first-child>.h3,main.quarto-banner-title-block>section:first-child>h4,main.quarto-banner-title-block>section:first-child>.h4{margin-top:0}.quarto-title .quarto-categories{display:flex;flex-wrap:wrap;row-gap:.5em;column-gap:.4em;padding-bottom:.5em;margin-top:.75em}.quarto-title .quarto-categories .quarto-category{padding:.25em .75em;font-size:.65em;text-transform:uppercase;border:solid 1px;border-radius:.25rem;opacity:.6}.quarto-title .quarto-categories .quarto-category a{color:inherit}#title-block-header.quarto-title-block.default .quarto-title-meta{display:grid;grid-template-columns:repeat(2, 1fr)}#title-block-header.quarto-title-block.default .quarto-title .title{margin-bottom:0}#title-block-header.quarto-title-block.default .quarto-title-author-orcid img{margin-top:-5px}#title-block-header.quarto-title-block.default .quarto-description p:last-of-type{margin-bottom:0}#title-block-header.quarto-title-block.default .quarto-title-meta-contents p,#title-block-header.quarto-title-block.default .quarto-title-authors p,#title-block-header.quarto-title-block.default .quarto-title-affiliations p{margin-bottom:.1em}#title-block-header.quarto-title-block.default .quarto-title-meta-heading{text-transform:uppercase;margin-top:1em;font-size:.8em;opacity:.8;font-weight:400}#title-block-header.quarto-title-block.default .quarto-title-meta-contents{font-size:.9em}#title-block-header.quarto-title-block.default .quarto-title-meta-contents a{color:#373a3c}#title-block-header.quarto-title-block.default .quarto-title-meta-contents p.affiliation:last-of-type{margin-bottom:.7em}#title-block-header.quarto-title-block.default p.affiliation{margin-bottom:.1em}#title-block-header.quarto-title-block.default .description,#title-block-header.quarto-title-block.default .abstract{margin-top:0}#title-block-header.quarto-title-block.default .description>p,#title-block-header.quarto-title-block.default .abstract>p{font-size:.9em}#title-block-header.quarto-title-block.default .description>p:last-of-type,#title-block-header.quarto-title-block.default .abstract>p:last-of-type{margin-bottom:0}#title-block-header.quarto-title-block.default .description .abstract-title,#title-block-header.quarto-title-block.default .abstract .abstract-title{margin-top:1em;text-transform:uppercase;font-size:.8em;opacity:.8;font-weight:400}#title-block-header.quarto-title-block.default .quarto-title-meta-author{display:grid;grid-template-columns:1fr 1fr}.quarto-title-tools-only{display:flex;justify-content:right}body{-webkit-font-smoothing:antialiased}.badge.bg-light{color:#373a3c}.progress .progress-bar{font-size:8px;line-height:8px}/*# sourceMappingURL=b5095878d7f8e19a7a39c0bdbfd52d00.css.map */
diff --git a/site_libs/bootstrap/bootstrap.min.js b/docs/site_libs/bootstrap/bootstrap.min.js
similarity index 100%
rename from site_libs/bootstrap/bootstrap.min.js
rename to docs/site_libs/bootstrap/bootstrap.min.js
diff --git a/site_libs/clipboard/clipboard.min.js b/docs/site_libs/clipboard/clipboard.min.js
similarity index 100%
rename from site_libs/clipboard/clipboard.min.js
rename to docs/site_libs/clipboard/clipboard.min.js
diff --git a/site_libs/quarto-html/anchor.min.js b/docs/site_libs/quarto-html/anchor.min.js
similarity index 100%
rename from site_libs/quarto-html/anchor.min.js
rename to docs/site_libs/quarto-html/anchor.min.js
diff --git a/site_libs/quarto-html/popper.min.js b/docs/site_libs/quarto-html/popper.min.js
similarity index 100%
rename from site_libs/quarto-html/popper.min.js
rename to docs/site_libs/quarto-html/popper.min.js
diff --git a/site_libs/quarto-html/quarto-syntax-highlighting.css b/docs/site_libs/quarto-html/quarto-syntax-highlighting.css
similarity index 100%
rename from site_libs/quarto-html/quarto-syntax-highlighting.css
rename to docs/site_libs/quarto-html/quarto-syntax-highlighting.css
diff --git a/site_libs/quarto-html/quarto.js b/docs/site_libs/quarto-html/quarto.js
similarity index 100%
rename from site_libs/quarto-html/quarto.js
rename to docs/site_libs/quarto-html/quarto.js
diff --git a/site_libs/quarto-html/tippy.css b/docs/site_libs/quarto-html/tippy.css
similarity index 100%
rename from site_libs/quarto-html/tippy.css
rename to docs/site_libs/quarto-html/tippy.css
diff --git a/site_libs/quarto-html/tippy.umd.min.js b/docs/site_libs/quarto-html/tippy.umd.min.js
similarity index 100%
rename from site_libs/quarto-html/tippy.umd.min.js
rename to docs/site_libs/quarto-html/tippy.umd.min.js
diff --git a/site_libs/quarto-nav/headroom.min.js b/docs/site_libs/quarto-nav/headroom.min.js
similarity index 100%
rename from site_libs/quarto-nav/headroom.min.js
rename to docs/site_libs/quarto-nav/headroom.min.js
diff --git a/site_libs/quarto-nav/quarto-nav.js b/docs/site_libs/quarto-nav/quarto-nav.js
similarity index 100%
rename from site_libs/quarto-nav/quarto-nav.js
rename to docs/site_libs/quarto-nav/quarto-nav.js
diff --git a/site_libs/quarto-search/autocomplete.umd.js b/docs/site_libs/quarto-search/autocomplete.umd.js
similarity index 100%
rename from site_libs/quarto-search/autocomplete.umd.js
rename to docs/site_libs/quarto-search/autocomplete.umd.js
diff --git a/site_libs/quarto-search/fuse.min.js b/docs/site_libs/quarto-search/fuse.min.js
similarity index 100%
rename from site_libs/quarto-search/fuse.min.js
rename to docs/site_libs/quarto-search/fuse.min.js
diff --git a/site_libs/quarto-search/quarto-search.js b/docs/site_libs/quarto-search/quarto-search.js
similarity index 100%
rename from site_libs/quarto-search/quarto-search.js
rename to docs/site_libs/quarto-search/quarto-search.js
diff --git a/docs/sql_I/images/data_storage.png b/docs/sql_I/images/data_storage.png
new file mode 100644
index 000000000..0a4bdc8ad
Binary files /dev/null and b/docs/sql_I/images/data_storage.png differ
diff --git a/docs/sql_I/images/sql_terminology.png b/docs/sql_I/images/sql_terminology.png
new file mode 100644
index 000000000..abf03043e
Binary files /dev/null and b/docs/sql_I/images/sql_terminology.png differ
diff --git a/sql_I/sql_I.html b/docs/sql_I/sql_I.html
similarity index 73%
rename from sql_I/sql_I.html
rename to docs/sql_I/sql_I.html
index 53147ce94..bfc4ffb8b 100644
--- a/sql_I/sql_I.html
+++ b/docs/sql_I/sql_I.html
@@ -392,11 +392,12 @@ <h2 data-number="20.2" class="anchored" data-anchor-id="intro-to-sql"><span clas
 <div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="4">
 <div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
 <span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a>SELECT <span class="op">*</span> FROM Dragon<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-<div class="cell-output cell-output-display">
-<span style="None">Running query in 'duck'</span>
+<div class="cell-output cell-output-stdout">
+<pre><code> * duckdb:///data/example_duck.db
+Done.</code></pre>
 </div>
 <div class="cell-output cell-output-display" data-execution_count="4">
-<table data-quarto-postprocess="true" class="table table-sm table-striped small">
+<table data-quarto-postprocess="true">
 <thead>
 <tr class="header">
 <th data-quarto-table-cell-role="th">name</th>
@@ -405,35 +406,8 @@ <h2 data-number="20.2" class="anchored" data-anchor-id="intro-to-sql"><span clas
 </tr>
 </thead>
 <tbody>
-<tr class="odd">
-<td>hiccup</td>
-<td>2010</td>
-<td>10</td>
-</tr>
-<tr class="even">
-<td>drogon</td>
-<td>2011</td>
-<td>-100</td>
-</tr>
-<tr class="odd">
-<td>dragon 2</td>
-<td>2019</td>
-<td>0</td>
-</tr>
-<tr class="even">
-<td>puff</td>
-<td>2010</td>
-<td>100</td>
-</tr>
-<tr class="odd">
-<td>smaug</td>
-<td>2011</td>
-<td>None</td>
-</tr>
 </tbody>
 </table>
-
-<span style="font-style:italic;font-size:11px"><code>ResultSet</code> : to convert to pandas, call <a href="https://jupysql.ploomber.io/en/latest/integrations/pandas.html"><code>.DataFrame()</code></a> or to polars, call <a href="https://jupysql.ploomber.io/en/latest/integrations/polars.html"><code>.PolarsDataFrame()</code></a></span><br>
 </div>
 </div>
 <p>Thanks to the <code>pandas</code> magic, the resulting return data is displayed in a format almost identical to our <code>pandas</code> tables but without an index.</p>
@@ -446,15 +420,16 @@ <h2 data-number="20.3" class="anchored" data-anchor-id="tables-and-schema"><span
 <p>Looking at the <code>Dragon</code> table above, we can see that it contains contains three columns. The first of these, <code>"name"</code>, contains text data. The <code>"year"</code> column contains integer data, with the constraint that year values must be greater than or equal to 2000. The final column, <code>"cute"</code>, contains integer data with no restrictions on allowable values.</p>
 <p>Now, let’s look at the <strong>schema</strong> of our database. A schema describes the logical structure of a table. Whenever a new table is created, the creator must declare its schema.</p>
 <div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="5">
-<div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
-<span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a>SELECT <span class="op">*</span> </span>
-<span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a>FROM sqlite_master</span>
-<span id="cb5-4"><a href="#cb5-4" aria-hidden="true" tabindex="-1"></a>WHERE <span class="bu">type</span><span class="op">=</span><span class="st">'table'</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-<div class="cell-output cell-output-display">
-<span style="None">Running query in 'duck'</span>
+<div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a>SELECT <span class="op">*</span> </span>
+<span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a>FROM sqlite_master</span>
+<span id="cb6-4"><a href="#cb6-4" aria-hidden="true" tabindex="-1"></a>WHERE <span class="bu">type</span><span class="op">=</span><span class="st">'table'</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code> * duckdb:///data/example_duck.db
+Done.</code></pre>
 </div>
 <div class="cell-output cell-output-display" data-execution_count="5">
-<table data-quarto-postprocess="true" class="table table-sm table-striped small">
+<table data-quarto-postprocess="true">
 <thead>
 <tr class="header">
 <th data-quarto-table-cell-role="th">type</th>
@@ -465,31 +440,8 @@ <h2 data-number="20.3" class="anchored" data-anchor-id="tables-and-schema"><span
 </tr>
 </thead>
 <tbody>
-<tr class="odd">
-<td>table</td>
-<td>dish</td>
-<td>dish</td>
-<td>0</td>
-<td>CREATE TABLE dish("name" VARCHAR PRIMARY KEY, "type" VARCHAR, "cost" INTEGER, CHECK(("cost" &gt;= 0)));</td>
-</tr>
-<tr class="even">
-<td>table</td>
-<td>dragon</td>
-<td>dragon</td>
-<td>0</td>
-<td>CREATE TABLE dragon("name" VARCHAR PRIMARY KEY, "year" INTEGER, cute INTEGER, CHECK(("year" &gt;= 2000)));</td>
-</tr>
-<tr class="odd">
-<td>table</td>
-<td>scene</td>
-<td>scene</td>
-<td>0</td>
-<td>CREATE TABLE scene(id INTEGER PRIMARY KEY, biome VARCHAR NOT NULL, city VARCHAR NOT NULL, visitors INTEGER, created_at TIMESTAMP DEFAULT(current_date()), CHECK((visitors &gt;= 0)));</td>
-</tr>
 </tbody>
 </table>
-
-<span style="font-style:italic;font-size:11px"><code>ResultSet</code> : to convert to pandas, call <a href="https://jupysql.ploomber.io/en/latest/integrations/pandas.html"><code>.DataFrame()</code></a> or to polars, call <a href="https://jupysql.ploomber.io/en/latest/integrations/polars.html"><code>.PolarsDataFrame()</code></a></span><br>
 </div>
 </div>
 <p>The summary above displays information about the database; it contains four tables named <code>sqlite_sequence</code>, <code>Dragon</code>, <code>Dish</code>, and <code>Scene</code>. The rightmost column above lists the command that was used to construct each table.</p>
@@ -553,14 +505,15 @@ <h2 data-number="20.4" class="anchored" data-anchor-id="basic-queries"><span cla
 <h3 data-number="20.4.1" class="anchored" data-anchor-id="selecting-from-tables"><span class="header-section-number">20.4.1</span> <code>SELECT</code>ing From Tables</h3>
 <p>The basic unit of a SQL query is the <code>SELECT</code> statement. <code>SELECT</code> specifies what columns we would like to extract from a given table. We use <code>FROM</code> to tell SQL the table from which we want to <code>SELECT</code> our data.</p>
 <div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="6">
-<div class="sourceCode cell-code" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
-<span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a>SELECT <span class="op">*</span></span>
-<span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a>FROM Dragon<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-<div class="cell-output cell-output-display">
-<span style="None">Running query in 'duck'</span>
+<div class="sourceCode cell-code" id="cb11"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a>SELECT <span class="op">*</span></span>
+<span id="cb11-3"><a href="#cb11-3" aria-hidden="true" tabindex="-1"></a>FROM Dragon<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code> * duckdb:///data/example_duck.db
+Done.</code></pre>
 </div>
 <div class="cell-output cell-output-display" data-execution_count="6">
-<table data-quarto-postprocess="true" class="table table-sm table-striped small">
+<table data-quarto-postprocess="true">
 <thead>
 <tr class="header">
 <th data-quarto-table-cell-role="th">name</th>
@@ -569,47 +522,21 @@ <h3 data-number="20.4.1" class="anchored" data-anchor-id="selecting-from-tables"
 </tr>
 </thead>
 <tbody>
-<tr class="odd">
-<td>hiccup</td>
-<td>2010</td>
-<td>10</td>
-</tr>
-<tr class="even">
-<td>drogon</td>
-<td>2011</td>
-<td>-100</td>
-</tr>
-<tr class="odd">
-<td>dragon 2</td>
-<td>2019</td>
-<td>0</td>
-</tr>
-<tr class="even">
-<td>puff</td>
-<td>2010</td>
-<td>100</td>
-</tr>
-<tr class="odd">
-<td>smaug</td>
-<td>2011</td>
-<td>None</td>
-</tr>
 </tbody>
 </table>
-
-<span style="font-style:italic;font-size:11px"><code>ResultSet</code> : to convert to pandas, call <a href="https://jupysql.ploomber.io/en/latest/integrations/pandas.html"><code>.DataFrame()</code></a> or to polars, call <a href="https://jupysql.ploomber.io/en/latest/integrations/polars.html"><code>.PolarsDataFrame()</code></a></span><br>
 </div>
 </div>
 <p>In SQL, <code>*</code> means “everything.” The query above grabs <em>all</em> the columns in <code>Dragon</code> and displays them in the outputted table. We can also specify a specific subset of columns to be <code>SELECT</code>ed.&nbsp;Notice that the outputted columns appear in the order they were <code>SELECT</code>ed.</p>
 <div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="7">
-<div class="sourceCode cell-code" id="cb10"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
-<span id="cb10-2"><a href="#cb10-2" aria-hidden="true" tabindex="-1"></a>SELECT cute, year</span>
-<span id="cb10-3"><a href="#cb10-3" aria-hidden="true" tabindex="-1"></a>FROM Dragon<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-<div class="cell-output cell-output-display">
-<span style="None">Running query in 'duck'</span>
+<div class="sourceCode cell-code" id="cb13"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a>SELECT cute, year</span>
+<span id="cb13-3"><a href="#cb13-3" aria-hidden="true" tabindex="-1"></a>FROM Dragon<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code> * duckdb:///data/example_duck.db
+Done.</code></pre>
 </div>
 <div class="cell-output cell-output-display" data-execution_count="7">
-<table data-quarto-postprocess="true" class="table table-sm table-striped small">
+<table data-quarto-postprocess="true">
 <thead>
 <tr class="header">
 <th data-quarto-table-cell-role="th">cute</th>
@@ -617,30 +544,8 @@ <h3 data-number="20.4.1" class="anchored" data-anchor-id="selecting-from-tables"
 </tr>
 </thead>
 <tbody>
-<tr class="odd">
-<td>10</td>
-<td>2010</td>
-</tr>
-<tr class="even">
-<td>-100</td>
-<td>2011</td>
-</tr>
-<tr class="odd">
-<td>0</td>
-<td>2019</td>
-</tr>
-<tr class="even">
-<td>100</td>
-<td>2010</td>
-</tr>
-<tr class="odd">
-<td>None</td>
-<td>2011</td>
-</tr>
 </tbody>
 </table>
-
-<span style="font-style:italic;font-size:11px"><code>ResultSet</code> : to convert to pandas, call <a href="https://jupysql.ploomber.io/en/latest/integrations/pandas.html"><code>.DataFrame()</code></a> or to polars, call <a href="https://jupysql.ploomber.io/en/latest/integrations/polars.html"><code>.PolarsDataFrame()</code></a></span><br>
 </div>
 </div>
 <p><strong>Every</strong> SQL query must include both a <code>SELECT</code> and <code>FROM</code> statement. Intuitively, this makes sense —— we know that we’ll want to extract some piece of information from the table; to do so, we also need to indicate what table we want to consider.</p>
@@ -658,14 +563,15 @@ <h4 data-number="20.4.1.2" class="anchored" data-anchor-id="aliasing-with-as"><s
 <p>The <code>AS</code> keyword allows us to give a column a new name (called an <strong>alias</strong>) after it has been <code>SELECT</code>ed.&nbsp;The general syntax is:</p>
 <pre><code>SELECT column_in_input_table AS new_name_in_output_table</code></pre>
 <div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="8">
-<div class="sourceCode cell-code" id="cb13"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
-<span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a>SELECT cute AS cuteness, year AS birth</span>
-<span id="cb13-3"><a href="#cb13-3" aria-hidden="true" tabindex="-1"></a>FROM Dragon<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-<div class="cell-output cell-output-display">
-<span style="None">Running query in 'duck'</span>
+<div class="sourceCode cell-code" id="cb17"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb17-2"><a href="#cb17-2" aria-hidden="true" tabindex="-1"></a>SELECT cute AS cuteness, year AS birth</span>
+<span id="cb17-3"><a href="#cb17-3" aria-hidden="true" tabindex="-1"></a>FROM Dragon<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code> * duckdb:///data/example_duck.db
+Done.</code></pre>
 </div>
 <div class="cell-output cell-output-display" data-execution_count="8">
-<table data-quarto-postprocess="true" class="table table-sm table-striped small">
+<table data-quarto-postprocess="true">
 <thead>
 <tr class="header">
 <th data-quarto-table-cell-role="th">cuteness</th>
@@ -673,30 +579,8 @@ <h4 data-number="20.4.1.2" class="anchored" data-anchor-id="aliasing-with-as"><s
 </tr>
 </thead>
 <tbody>
-<tr class="odd">
-<td>10</td>
-<td>2010</td>
-</tr>
-<tr class="even">
-<td>-100</td>
-<td>2011</td>
-</tr>
-<tr class="odd">
-<td>0</td>
-<td>2019</td>
-</tr>
-<tr class="even">
-<td>100</td>
-<td>2010</td>
-</tr>
-<tr class="odd">
-<td>None</td>
-<td>2011</td>
-</tr>
 </tbody>
 </table>
-
-<span style="font-style:italic;font-size:11px"><code>ResultSet</code> : to convert to pandas, call <a href="https://jupysql.ploomber.io/en/latest/integrations/pandas.html"><code>.DataFrame()</code></a> or to polars, call <a href="https://jupysql.ploomber.io/en/latest/integrations/polars.html"><code>.PolarsDataFrame()</code></a></span><br>
 </div>
 </div>
 </section>
@@ -704,33 +588,23 @@ <h4 data-number="20.4.1.2" class="anchored" data-anchor-id="aliasing-with-as"><s
 <h4 data-number="20.4.1.3" class="anchored" data-anchor-id="uniqueness-with-distinct"><span class="header-section-number">20.4.1.3</span> Uniqueness with <code>DISTINCT</code></h4>
 <p>To <code>SELECT</code> only the <em>unique</em> values in a column, we use the <code>DISTINCT</code> keyword. This will cause any any duplicate entries in a column to be removed. If we want to find only the unique years in <code>Dragon</code>, without any repeats, we would write:</p>
 <div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="9">
-<div class="sourceCode cell-code" id="cb14"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
-<span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a>SELECT DISTINCT year</span>
-<span id="cb14-3"><a href="#cb14-3" aria-hidden="true" tabindex="-1"></a>FROM Dragon<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-<div class="cell-output cell-output-display">
-<span style="None">Running query in 'duck'</span>
+<div class="sourceCode cell-code" id="cb19"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb19-2"><a href="#cb19-2" aria-hidden="true" tabindex="-1"></a>SELECT DISTINCT year</span>
+<span id="cb19-3"><a href="#cb19-3" aria-hidden="true" tabindex="-1"></a>FROM Dragon<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code> * duckdb:///data/example_duck.db
+Done.</code></pre>
 </div>
 <div class="cell-output cell-output-display" data-execution_count="9">
-<table data-quarto-postprocess="true" class="table table-sm table-striped small">
+<table data-quarto-postprocess="true">
 <thead>
 <tr class="header">
 <th data-quarto-table-cell-role="th">year</th>
 </tr>
 </thead>
 <tbody>
-<tr class="odd">
-<td>2010</td>
-</tr>
-<tr class="even">
-<td>2019</td>
-</tr>
-<tr class="odd">
-<td>2011</td>
-</tr>
 </tbody>
 </table>
-
-<span style="font-style:italic;font-size:11px"><code>ResultSet</code> : to convert to pandas, call <a href="https://jupysql.ploomber.io/en/latest/integrations/pandas.html"><code>.DataFrame()</code></a> or to polars, call <a href="https://jupysql.ploomber.io/en/latest/integrations/polars.html"><code>.PolarsDataFrame()</code></a></span><br>
 </div>
 </div>
 </section>
@@ -739,15 +613,16 @@ <h4 data-number="20.4.1.3" class="anchored" data-anchor-id="uniqueness-with-dist
 <h3 data-number="20.4.2" class="anchored" data-anchor-id="applying-where-conditions"><span class="header-section-number">20.4.2</span> Applying <code>WHERE</code> Conditions</h3>
 <p>The <code>WHERE</code> keyword is used to select only some rows of a table, filtered on a given Boolean condition.</p>
 <div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="10">
-<div class="sourceCode cell-code" id="cb15"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
-<span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a>SELECT name, year</span>
-<span id="cb15-3"><a href="#cb15-3" aria-hidden="true" tabindex="-1"></a>FROM Dragon</span>
-<span id="cb15-4"><a href="#cb15-4" aria-hidden="true" tabindex="-1"></a>WHERE cute <span class="op">&gt;</span> <span class="dv">0</span><span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-<div class="cell-output cell-output-display">
-<span style="None">Running query in 'duck'</span>
+<div class="sourceCode cell-code" id="cb21"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb21-2"><a href="#cb21-2" aria-hidden="true" tabindex="-1"></a>SELECT name, year</span>
+<span id="cb21-3"><a href="#cb21-3" aria-hidden="true" tabindex="-1"></a>FROM Dragon</span>
+<span id="cb21-4"><a href="#cb21-4" aria-hidden="true" tabindex="-1"></a>WHERE cute <span class="op">&gt;</span> <span class="dv">0</span><span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code> * duckdb:///data/example_duck.db
+Done.</code></pre>
 </div>
 <div class="cell-output cell-output-display" data-execution_count="10">
-<table data-quarto-postprocess="true" class="table table-sm table-striped small">
+<table data-quarto-postprocess="true">
 <thead>
 <tr class="header">
 <th data-quarto-table-cell-role="th">name</th>
@@ -755,31 +630,22 @@ <h3 data-number="20.4.2" class="anchored" data-anchor-id="applying-where-conditi
 </tr>
 </thead>
 <tbody>
-<tr class="odd">
-<td>hiccup</td>
-<td>2010</td>
-</tr>
-<tr class="even">
-<td>puff</td>
-<td>2010</td>
-</tr>
 </tbody>
 </table>
-
-<span style="font-style:italic;font-size:11px"><code>ResultSet</code> : to convert to pandas, call <a href="https://jupysql.ploomber.io/en/latest/integrations/pandas.html"><code>.DataFrame()</code></a> or to polars, call <a href="https://jupysql.ploomber.io/en/latest/integrations/polars.html"><code>.PolarsDataFrame()</code></a></span><br>
 </div>
 </div>
 <p>We can add complexity to the <code>WHERE</code> condition using the keywords <code>AND</code>, <code>OR</code>, and <code>NOT</code>, much like we would in Python.</p>
 <div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="11">
-<div class="sourceCode cell-code" id="cb16"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
-<span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a>SELECT name, year</span>
-<span id="cb16-3"><a href="#cb16-3" aria-hidden="true" tabindex="-1"></a>FROM Dragon</span>
-<span id="cb16-4"><a href="#cb16-4" aria-hidden="true" tabindex="-1"></a>WHERE cute <span class="op">&gt;</span> <span class="dv">0</span> OR year <span class="op">&gt;</span> <span class="dv">2013</span><span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-<div class="cell-output cell-output-display">
-<span style="None">Running query in 'duck'</span>
+<div class="sourceCode cell-code" id="cb23"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb23-2"><a href="#cb23-2" aria-hidden="true" tabindex="-1"></a>SELECT name, year</span>
+<span id="cb23-3"><a href="#cb23-3" aria-hidden="true" tabindex="-1"></a>FROM Dragon</span>
+<span id="cb23-4"><a href="#cb23-4" aria-hidden="true" tabindex="-1"></a>WHERE cute <span class="op">&gt;</span> <span class="dv">0</span> OR year <span class="op">&gt;</span> <span class="dv">2013</span><span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code> * duckdb:///data/example_duck.db
+Done.</code></pre>
 </div>
 <div class="cell-output cell-output-display" data-execution_count="11">
-<table data-quarto-postprocess="true" class="table table-sm table-striped small">
+<table data-quarto-postprocess="true">
 <thead>
 <tr class="header">
 <th data-quarto-table-cell-role="th">name</th>
@@ -787,35 +653,22 @@ <h3 data-number="20.4.2" class="anchored" data-anchor-id="applying-where-conditi
 </tr>
 </thead>
 <tbody>
-<tr class="odd">
-<td>hiccup</td>
-<td>2010</td>
-</tr>
-<tr class="even">
-<td>puff</td>
-<td>2010</td>
-</tr>
-<tr class="odd">
-<td>dragon 2</td>
-<td>2019</td>
-</tr>
 </tbody>
 </table>
-
-<span style="font-style:italic;font-size:11px"><code>ResultSet</code> : to convert to pandas, call <a href="https://jupysql.ploomber.io/en/latest/integrations/pandas.html"><code>.DataFrame()</code></a> or to polars, call <a href="https://jupysql.ploomber.io/en/latest/integrations/polars.html"><code>.PolarsDataFrame()</code></a></span><br>
 </div>
 </div>
 <p>To spare ourselves needing to write complicated logical expressions by combining several conditions, we can also filter for entries that are <code>IN</code> a specified list of values. This is similar to the use of <code>in</code> or <code>.isin</code> in Python.</p>
 <div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="12">
-<div class="sourceCode cell-code" id="cb17"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
-<span id="cb17-2"><a href="#cb17-2" aria-hidden="true" tabindex="-1"></a>SELECT name, year</span>
-<span id="cb17-3"><a href="#cb17-3" aria-hidden="true" tabindex="-1"></a>FROM Dragon</span>
-<span id="cb17-4"><a href="#cb17-4" aria-hidden="true" tabindex="-1"></a>WHERE name IN (<span class="st">'hiccup'</span>, <span class="st">'puff'</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-<div class="cell-output cell-output-display">
-<span style="None">Running query in 'duck'</span>
+<div class="sourceCode cell-code" id="cb25"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb25-1"><a href="#cb25-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb25-2"><a href="#cb25-2" aria-hidden="true" tabindex="-1"></a>SELECT name, year</span>
+<span id="cb25-3"><a href="#cb25-3" aria-hidden="true" tabindex="-1"></a>FROM Dragon</span>
+<span id="cb25-4"><a href="#cb25-4" aria-hidden="true" tabindex="-1"></a>WHERE name IN (<span class="st">'hiccup'</span>, <span class="st">'puff'</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code> * duckdb:///data/example_duck.db
+Done.</code></pre>
 </div>
 <div class="cell-output cell-output-display" data-execution_count="12">
-<table data-quarto-postprocess="true" class="table table-sm table-striped small">
+<table data-quarto-postprocess="true">
 <thead>
 <tr class="header">
 <th data-quarto-table-cell-role="th">name</th>
@@ -823,18 +676,8 @@ <h3 data-number="20.4.2" class="anchored" data-anchor-id="applying-where-conditi
 </tr>
 </thead>
 <tbody>
-<tr class="odd">
-<td>hiccup</td>
-<td>2010</td>
-</tr>
-<tr class="even">
-<td>puff</td>
-<td>2010</td>
-</tr>
 </tbody>
 </table>
-
-<span style="font-style:italic;font-size:11px"><code>ResultSet</code> : to convert to pandas, call <a href="https://jupysql.ploomber.io/en/latest/integrations/pandas.html"><code>.DataFrame()</code></a> or to polars, call <a href="https://jupysql.ploomber.io/en/latest/integrations/polars.html"><code>.PolarsDataFrame()</code></a></span><br>
 </div>
 </div>
 <section id="strings-in-sql" class="level4" data-number="20.4.2.1">
@@ -849,15 +692,16 @@ <h4 data-number="20.4.2.1" class="anchored" data-anchor-id="strings-in-sql"><spa
 <h4 data-number="20.4.2.2" class="anchored" data-anchor-id="where-with-null-values"><span class="header-section-number">20.4.2.2</span> <code>WHERE</code> WITH <code>NULL</code> Values</h4>
 <p>You may have noticed earlier that our table actually has a missing value. In SQL, missing data is given the special value <code>NULL</code>. <code>NULL</code> behaves in a fundamentally different way to other data types. We can’t use the typical operators (=, &gt;, and &lt;) on <code>NULL</code> values (in fact, <code>NULL == NULL</code> returns <code>False</code>!). Instead, we check to see if a value <code>IS</code> or <code>IS NOT</code> <code>NULL</code>.</p>
 <div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="13">
-<div class="sourceCode cell-code" id="cb19"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
-<span id="cb19-2"><a href="#cb19-2" aria-hidden="true" tabindex="-1"></a>SELECT name, cute</span>
-<span id="cb19-3"><a href="#cb19-3" aria-hidden="true" tabindex="-1"></a>FROM Dragon</span>
-<span id="cb19-4"><a href="#cb19-4" aria-hidden="true" tabindex="-1"></a>WHERE cute IS NOT NULL<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-<div class="cell-output cell-output-display">
-<span style="None">Running query in 'duck'</span>
+<div class="sourceCode cell-code" id="cb28"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb28-1"><a href="#cb28-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb28-2"><a href="#cb28-2" aria-hidden="true" tabindex="-1"></a>SELECT name, cute</span>
+<span id="cb28-3"><a href="#cb28-3" aria-hidden="true" tabindex="-1"></a>FROM Dragon</span>
+<span id="cb28-4"><a href="#cb28-4" aria-hidden="true" tabindex="-1"></a>WHERE cute IS NOT NULL<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code> * duckdb:///data/example_duck.db
+Done.</code></pre>
 </div>
 <div class="cell-output cell-output-display" data-execution_count="13">
-<table data-quarto-postprocess="true" class="table table-sm table-striped small">
+<table data-quarto-postprocess="true">
 <thead>
 <tr class="header">
 <th data-quarto-table-cell-role="th">name</th>
@@ -865,26 +709,8 @@ <h4 data-number="20.4.2.2" class="anchored" data-anchor-id="where-with-null-valu
 </tr>
 </thead>
 <tbody>
-<tr class="odd">
-<td>hiccup</td>
-<td>10</td>
-</tr>
-<tr class="even">
-<td>drogon</td>
-<td>-100</td>
-</tr>
-<tr class="odd">
-<td>dragon 2</td>
-<td>0</td>
-</tr>
-<tr class="even">
-<td>puff</td>
-<td>100</td>
-</tr>
 </tbody>
 </table>
-
-<span style="font-style:italic;font-size:11px"><code>ResultSet</code> : to convert to pandas, call <a href="https://jupysql.ploomber.io/en/latest/integrations/pandas.html"><code>.DataFrame()</code></a> or to polars, call <a href="https://jupysql.ploomber.io/en/latest/integrations/polars.html"><code>.PolarsDataFrame()</code></a></span><br>
 </div>
 </div>
 </section>
@@ -895,15 +721,16 @@ <h3 data-number="20.4.3" class="anchored" data-anchor-id="sorting-and-restrictin
 <h4 data-number="20.4.3.1" class="anchored" data-anchor-id="sorting-with-order-by"><span class="header-section-number">20.4.3.1</span> Sorting with <code>ORDER BY</code></h4>
 <p>What if we want the output table to appear in a certain order? The <code>ORDER BY</code> keyword behaves similarly to <code>.sort_values()</code> in <code>pandas</code>.</p>
 <div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="14">
-<div class="sourceCode cell-code" id="cb20"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
-<span id="cb20-2"><a href="#cb20-2" aria-hidden="true" tabindex="-1"></a>SELECT <span class="op">*</span></span>
-<span id="cb20-3"><a href="#cb20-3" aria-hidden="true" tabindex="-1"></a>FROM Dragon</span>
-<span id="cb20-4"><a href="#cb20-4" aria-hidden="true" tabindex="-1"></a>ORDER BY cute<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-<div class="cell-output cell-output-display">
-<span style="None">Running query in 'duck'</span>
+<div class="sourceCode cell-code" id="cb30"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb30-1"><a href="#cb30-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb30-2"><a href="#cb30-2" aria-hidden="true" tabindex="-1"></a>SELECT <span class="op">*</span></span>
+<span id="cb30-3"><a href="#cb30-3" aria-hidden="true" tabindex="-1"></a>FROM Dragon</span>
+<span id="cb30-4"><a href="#cb30-4" aria-hidden="true" tabindex="-1"></a>ORDER BY cute<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code> * duckdb:///data/example_duck.db
+Done.</code></pre>
 </div>
 <div class="cell-output cell-output-display" data-execution_count="14">
-<table data-quarto-postprocess="true" class="table table-sm table-striped small">
+<table data-quarto-postprocess="true">
 <thead>
 <tr class="header">
 <th data-quarto-table-cell-role="th">name</th>
@@ -912,48 +739,22 @@ <h4 data-number="20.4.3.1" class="anchored" data-anchor-id="sorting-with-order-b
 </tr>
 </thead>
 <tbody>
-<tr class="odd">
-<td>drogon</td>
-<td>2011</td>
-<td>-100</td>
-</tr>
-<tr class="even">
-<td>dragon 2</td>
-<td>2019</td>
-<td>0</td>
-</tr>
-<tr class="odd">
-<td>hiccup</td>
-<td>2010</td>
-<td>10</td>
-</tr>
-<tr class="even">
-<td>puff</td>
-<td>2010</td>
-<td>100</td>
-</tr>
-<tr class="odd">
-<td>smaug</td>
-<td>2011</td>
-<td>None</td>
-</tr>
 </tbody>
 </table>
-
-<span style="font-style:italic;font-size:11px"><code>ResultSet</code> : to convert to pandas, call <a href="https://jupysql.ploomber.io/en/latest/integrations/pandas.html"><code>.DataFrame()</code></a> or to polars, call <a href="https://jupysql.ploomber.io/en/latest/integrations/polars.html"><code>.PolarsDataFrame()</code></a></span><br>
 </div>
 </div>
 <p>By default, <code>ORDER BY</code> will display results in ascending order (<code>ASC</code>) with the lowest values at the top of the table. To sort in descending order, we use the <code>DESC</code> keyword after specifying the column to be used for ordering.</p>
 <div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="15">
-<div class="sourceCode cell-code" id="cb21"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
-<span id="cb21-2"><a href="#cb21-2" aria-hidden="true" tabindex="-1"></a>SELECT <span class="op">*</span></span>
-<span id="cb21-3"><a href="#cb21-3" aria-hidden="true" tabindex="-1"></a>FROM Dragon</span>
-<span id="cb21-4"><a href="#cb21-4" aria-hidden="true" tabindex="-1"></a>ORDER BY cute DESC<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-<div class="cell-output cell-output-display">
-<span style="None">Running query in 'duck'</span>
+<div class="sourceCode cell-code" id="cb32"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb32-1"><a href="#cb32-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb32-2"><a href="#cb32-2" aria-hidden="true" tabindex="-1"></a>SELECT <span class="op">*</span></span>
+<span id="cb32-3"><a href="#cb32-3" aria-hidden="true" tabindex="-1"></a>FROM Dragon</span>
+<span id="cb32-4"><a href="#cb32-4" aria-hidden="true" tabindex="-1"></a>ORDER BY cute DESC<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code> * duckdb:///data/example_duck.db
+Done.</code></pre>
 </div>
 <div class="cell-output cell-output-display" data-execution_count="15">
-<table data-quarto-postprocess="true" class="table table-sm table-striped small">
+<table data-quarto-postprocess="true">
 <thead>
 <tr class="header">
 <th data-quarto-table-cell-role="th">name</th>
@@ -962,48 +763,22 @@ <h4 data-number="20.4.3.1" class="anchored" data-anchor-id="sorting-with-order-b
 </tr>
 </thead>
 <tbody>
-<tr class="odd">
-<td>puff</td>
-<td>2010</td>
-<td>100</td>
-</tr>
-<tr class="even">
-<td>hiccup</td>
-<td>2010</td>
-<td>10</td>
-</tr>
-<tr class="odd">
-<td>dragon 2</td>
-<td>2019</td>
-<td>0</td>
-</tr>
-<tr class="even">
-<td>drogon</td>
-<td>2011</td>
-<td>-100</td>
-</tr>
-<tr class="odd">
-<td>smaug</td>
-<td>2011</td>
-<td>None</td>
-</tr>
 </tbody>
 </table>
-
-<span style="font-style:italic;font-size:11px"><code>ResultSet</code> : to convert to pandas, call <a href="https://jupysql.ploomber.io/en/latest/integrations/pandas.html"><code>.DataFrame()</code></a> or to polars, call <a href="https://jupysql.ploomber.io/en/latest/integrations/polars.html"><code>.PolarsDataFrame()</code></a></span><br>
 </div>
 </div>
 <p>We can also tell SQL to <code>ORDER BY</code> two columns at once. This will sort the table by the first listed column, then use the values in the second listed column to break any ties.</p>
 <div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="16">
-<div class="sourceCode cell-code" id="cb22"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
-<span id="cb22-2"><a href="#cb22-2" aria-hidden="true" tabindex="-1"></a>SELECT <span class="op">*</span></span>
-<span id="cb22-3"><a href="#cb22-3" aria-hidden="true" tabindex="-1"></a>FROM Dragon</span>
-<span id="cb22-4"><a href="#cb22-4" aria-hidden="true" tabindex="-1"></a>ORDER BY year, cute DESC<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-<div class="cell-output cell-output-display">
-<span style="None">Running query in 'duck'</span>
+<div class="sourceCode cell-code" id="cb34"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb34-1"><a href="#cb34-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb34-2"><a href="#cb34-2" aria-hidden="true" tabindex="-1"></a>SELECT <span class="op">*</span></span>
+<span id="cb34-3"><a href="#cb34-3" aria-hidden="true" tabindex="-1"></a>FROM Dragon</span>
+<span id="cb34-4"><a href="#cb34-4" aria-hidden="true" tabindex="-1"></a>ORDER BY year, cute DESC<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code> * duckdb:///data/example_duck.db
+Done.</code></pre>
 </div>
 <div class="cell-output cell-output-display" data-execution_count="16">
-<table data-quarto-postprocess="true" class="table table-sm table-striped small">
+<table data-quarto-postprocess="true">
 <thead>
 <tr class="header">
 <th data-quarto-table-cell-role="th">name</th>
@@ -1012,35 +787,8 @@ <h4 data-number="20.4.3.1" class="anchored" data-anchor-id="sorting-with-order-b
 </tr>
 </thead>
 <tbody>
-<tr class="odd">
-<td>puff</td>
-<td>2010</td>
-<td>100</td>
-</tr>
-<tr class="even">
-<td>hiccup</td>
-<td>2010</td>
-<td>10</td>
-</tr>
-<tr class="odd">
-<td>drogon</td>
-<td>2011</td>
-<td>-100</td>
-</tr>
-<tr class="even">
-<td>smaug</td>
-<td>2011</td>
-<td>None</td>
-</tr>
-<tr class="odd">
-<td>dragon 2</td>
-<td>2019</td>
-<td>0</td>
-</tr>
 </tbody>
 </table>
-
-<span style="font-style:italic;font-size:11px"><code>ResultSet</code> : to convert to pandas, call <a href="https://jupysql.ploomber.io/en/latest/integrations/pandas.html"><code>.DataFrame()</code></a> or to polars, call <a href="https://jupysql.ploomber.io/en/latest/integrations/polars.html"><code>.PolarsDataFrame()</code></a></span><br>
 </div>
 </div>
 <p>Note that in this example, <code>year</code> is sorted in ascending order and <code>cute</code> in descending order. If you want <code>year</code> to be ordered in descending order as well, you need to specify <code>year DESC, cute DESC;</code>.</p>
@@ -1049,15 +797,16 @@ <h4 data-number="20.4.3.1" class="anchored" data-anchor-id="sorting-with-order-b
 <h4 data-number="20.4.3.2" class="anchored" data-anchor-id="limit-vs.-offset"><span class="header-section-number">20.4.3.2</span> <code>LIMIT</code> vs.&nbsp;<code>OFFSET</code></h4>
 <p>In many instances, we are only concerned with a certain number of rows in the output table (for example, wanting to find the first two dragons in the table). The <code>LIMIT</code> keyword restricts the output to a specified number of rows. It serves a function similar to that of <code>.head()</code> in <code>pandas</code>.</p>
 <div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="17">
-<div class="sourceCode cell-code" id="cb23"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
-<span id="cb23-2"><a href="#cb23-2" aria-hidden="true" tabindex="-1"></a>SELECT <span class="op">*</span></span>
-<span id="cb23-3"><a href="#cb23-3" aria-hidden="true" tabindex="-1"></a>FROM Dragon</span>
-<span id="cb23-4"><a href="#cb23-4" aria-hidden="true" tabindex="-1"></a>LIMIT <span class="dv">2</span><span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-<div class="cell-output cell-output-display">
-<span style="None">Running query in 'duck'</span>
+<div class="sourceCode cell-code" id="cb36"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb36-1"><a href="#cb36-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb36-2"><a href="#cb36-2" aria-hidden="true" tabindex="-1"></a>SELECT <span class="op">*</span></span>
+<span id="cb36-3"><a href="#cb36-3" aria-hidden="true" tabindex="-1"></a>FROM Dragon</span>
+<span id="cb36-4"><a href="#cb36-4" aria-hidden="true" tabindex="-1"></a>LIMIT <span class="dv">2</span><span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code> * duckdb:///data/example_duck.db
+Done.</code></pre>
 </div>
 <div class="cell-output cell-output-display" data-execution_count="17">
-<table data-quarto-postprocess="true" class="table table-sm table-striped small">
+<table data-quarto-postprocess="true">
 <thead>
 <tr class="header">
 <th data-quarto-table-cell-role="th">name</th>
@@ -1066,34 +815,23 @@ <h4 data-number="20.4.3.2" class="anchored" data-anchor-id="limit-vs.-offset"><s
 </tr>
 </thead>
 <tbody>
-<tr class="odd">
-<td>hiccup</td>
-<td>2010</td>
-<td>10</td>
-</tr>
-<tr class="even">
-<td>drogon</td>
-<td>2011</td>
-<td>-100</td>
-</tr>
 </tbody>
 </table>
-
-<span style="font-style:italic;font-size:11px"><code>ResultSet</code> : to convert to pandas, call <a href="https://jupysql.ploomber.io/en/latest/integrations/pandas.html"><code>.DataFrame()</code></a> or to polars, call <a href="https://jupysql.ploomber.io/en/latest/integrations/polars.html"><code>.PolarsDataFrame()</code></a></span><br>
 </div>
 </div>
 <p>The <code>OFFSET</code> keyword indicates the index at which <code>LIMIT</code> should start. In other words, we can use <code>OFFSET</code> to shift where the <code>LIMIT</code>ing begins by a specified number of rows. For example, we might care about the dragons that are at positions 2 and 3 in the table.</p>
 <div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="18">
-<div class="sourceCode cell-code" id="cb24"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
-<span id="cb24-2"><a href="#cb24-2" aria-hidden="true" tabindex="-1"></a>SELECT <span class="op">*</span></span>
-<span id="cb24-3"><a href="#cb24-3" aria-hidden="true" tabindex="-1"></a>FROM Dragon</span>
-<span id="cb24-4"><a href="#cb24-4" aria-hidden="true" tabindex="-1"></a>LIMIT <span class="dv">2</span></span>
-<span id="cb24-5"><a href="#cb24-5" aria-hidden="true" tabindex="-1"></a>OFFSET <span class="dv">1</span><span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-<div class="cell-output cell-output-display">
-<span style="None">Running query in 'duck'</span>
+<div class="sourceCode cell-code" id="cb38"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb38-1"><a href="#cb38-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb38-2"><a href="#cb38-2" aria-hidden="true" tabindex="-1"></a>SELECT <span class="op">*</span></span>
+<span id="cb38-3"><a href="#cb38-3" aria-hidden="true" tabindex="-1"></a>FROM Dragon</span>
+<span id="cb38-4"><a href="#cb38-4" aria-hidden="true" tabindex="-1"></a>LIMIT <span class="dv">2</span></span>
+<span id="cb38-5"><a href="#cb38-5" aria-hidden="true" tabindex="-1"></a>OFFSET <span class="dv">1</span><span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code> * duckdb:///data/example_duck.db
+Done.</code></pre>
 </div>
 <div class="cell-output cell-output-display" data-execution_count="18">
-<table data-quarto-postprocess="true" class="table table-sm table-striped small">
+<table data-quarto-postprocess="true">
 <thead>
 <tr class="header">
 <th data-quarto-table-cell-role="th">name</th>
@@ -1102,20 +840,8 @@ <h4 data-number="20.4.3.2" class="anchored" data-anchor-id="limit-vs.-offset"><s
 </tr>
 </thead>
 <tbody>
-<tr class="odd">
-<td>drogon</td>
-<td>2011</td>
-<td>-100</td>
-</tr>
-<tr class="even">
-<td>dragon 2</td>
-<td>2019</td>
-<td>0</td>
-</tr>
 </tbody>
 </table>
-
-<span style="font-style:italic;font-size:11px"><code>ResultSet</code> : to convert to pandas, call <a href="https://jupysql.ploomber.io/en/latest/integrations/pandas.html"><code>.DataFrame()</code></a> or to polars, call <a href="https://jupysql.ploomber.io/en/latest/integrations/polars.html"><code>.PolarsDataFrame()</code></a></span><br>
 </div>
 </div>
 <p>With these keywords in hand, let’s update our SQL order of operations. Remember: <em>every</em> SQL query must list clauses in this order.</p>
@@ -1442,360 +1168,360 @@ <h2 data-number="20.5" class="anchored" data-anchor-id="summary"><span class="he
       </a>
   </div>
 </nav><div class="modal fade" id="quarto-embedded-source-code-modal" tabindex="-1" aria-labelledby="quarto-embedded-source-code-modal-label" aria-hidden="true"><div class="modal-dialog modal-dialog-scrollable"><div class="modal-content"><div class="modal-header"><h5 class="modal-title" id="quarto-embedded-source-code-modal-label">Source Code</h5><button class="btn-close" data-bs-dismiss="modal"></button></div><div class="modal-body"><div class="">
-<div class="sourceCode" id="cb27" data-shortcodes="false"><pre class="sourceCode markdown code-with-copy"><code class="sourceCode markdown"><span id="cb27-1"><a href="#cb27-1" aria-hidden="true" tabindex="-1"></a><span class="co">---</span></span>
-<span id="cb27-2"><a href="#cb27-2" aria-hidden="true" tabindex="-1"></a><span class="an">title:</span><span class="co"> SQL I</span></span>
-<span id="cb27-3"><a href="#cb27-3" aria-hidden="true" tabindex="-1"></a><span class="an">execute:</span></span>
-<span id="cb27-4"><a href="#cb27-4" aria-hidden="true" tabindex="-1"></a><span class="co">  echo: true</span></span>
-<span id="cb27-5"><a href="#cb27-5" aria-hidden="true" tabindex="-1"></a><span class="an">format:</span></span>
-<span id="cb27-6"><a href="#cb27-6" aria-hidden="true" tabindex="-1"></a><span class="co">  html:</span></span>
-<span id="cb27-7"><a href="#cb27-7" aria-hidden="true" tabindex="-1"></a><span class="co">    code-fold: false</span></span>
-<span id="cb27-8"><a href="#cb27-8" aria-hidden="true" tabindex="-1"></a><span class="co">    code-tools: true</span></span>
-<span id="cb27-9"><a href="#cb27-9" aria-hidden="true" tabindex="-1"></a><span class="co">    toc: true</span></span>
-<span id="cb27-10"><a href="#cb27-10" aria-hidden="true" tabindex="-1"></a><span class="co">    toc-title: SQL I</span></span>
-<span id="cb27-11"><a href="#cb27-11" aria-hidden="true" tabindex="-1"></a><span class="co">    page-layout: full</span></span>
-<span id="cb27-12"><a href="#cb27-12" aria-hidden="true" tabindex="-1"></a><span class="co">    theme:</span></span>
-<span id="cb27-13"><a href="#cb27-13" aria-hidden="true" tabindex="-1"></a><span class="co">      - cosmo</span></span>
-<span id="cb27-14"><a href="#cb27-14" aria-hidden="true" tabindex="-1"></a><span class="co">      - cerulean</span></span>
-<span id="cb27-15"><a href="#cb27-15" aria-hidden="true" tabindex="-1"></a><span class="co">    callout-icon: false</span></span>
-<span id="cb27-16"><a href="#cb27-16" aria-hidden="true" tabindex="-1"></a><span class="an">jupyter:</span></span>
-<span id="cb27-17"><a href="#cb27-17" aria-hidden="true" tabindex="-1"></a><span class="co">  jupytext:</span></span>
-<span id="cb27-18"><a href="#cb27-18" aria-hidden="true" tabindex="-1"></a><span class="co">    text_representation:</span></span>
-<span id="cb27-19"><a href="#cb27-19" aria-hidden="true" tabindex="-1"></a><span class="co">      extension: .qmd</span></span>
-<span id="cb27-20"><a href="#cb27-20" aria-hidden="true" tabindex="-1"></a><span class="co">      format_name: quarto</span></span>
-<span id="cb27-21"><a href="#cb27-21" aria-hidden="true" tabindex="-1"></a><span class="co">      format_version: '1.0'</span></span>
-<span id="cb27-22"><a href="#cb27-22" aria-hidden="true" tabindex="-1"></a><span class="co">      jupytext_version: 1.16.1</span></span>
-<span id="cb27-23"><a href="#cb27-23" aria-hidden="true" tabindex="-1"></a><span class="co">  kernelspec:</span></span>
-<span id="cb27-24"><a href="#cb27-24" aria-hidden="true" tabindex="-1"></a><span class="co">    display_name: Python 3 (ipykernel)</span></span>
-<span id="cb27-25"><a href="#cb27-25" aria-hidden="true" tabindex="-1"></a><span class="co">    language: python</span></span>
-<span id="cb27-26"><a href="#cb27-26" aria-hidden="true" tabindex="-1"></a><span class="co">    name: python3</span></span>
-<span id="cb27-27"><a href="#cb27-27" aria-hidden="true" tabindex="-1"></a><span class="co">---</span></span>
-<span id="cb27-28"><a href="#cb27-28" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-29"><a href="#cb27-29" aria-hidden="true" tabindex="-1"></a>::: {.callout-note collapse="false"}</span>
-<span id="cb27-30"><a href="#cb27-30" aria-hidden="true" tabindex="-1"></a><span class="fu">## Learning Outcomes</span></span>
-<span id="cb27-31"><a href="#cb27-31" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Recognizing situations where we need “bigger” tools for manipulating data</span>
-<span id="cb27-32"><a href="#cb27-32" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Write basic SQL queries using <span class="in">`SELECT`</span>, <span class="in">`FROM`</span>, <span class="in">`WHERE`</span>, <span class="in">`ORDER BY`</span>, <span class="in">`LIMIT`</span>, and <span class="in">`OFFSET`</span></span>
-<span id="cb27-33"><a href="#cb27-33" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Perform aggregations using <span class="in">`GROUP BY`</span></span>
-<span id="cb27-34"><a href="#cb27-34" aria-hidden="true" tabindex="-1"></a>:::</span>
-<span id="cb27-35"><a href="#cb27-35" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-36"><a href="#cb27-36" aria-hidden="true" tabindex="-1"></a>So far in the course, we have made our way through the entire data science lifecycle: we learned how to load and explore a dataset, formulate questions, and use the tools of prediction and inference to come up with answers. For the remaining weeks of the semester, we are going to make a second pass through the lifecycle, this time with a different set of tools, ideas, and abstractions. </span>
-<span id="cb27-37"><a href="#cb27-37" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-38"><a href="#cb27-38" aria-hidden="true" tabindex="-1"></a><span class="fu">## Databases</span></span>
-<span id="cb27-39"><a href="#cb27-39" aria-hidden="true" tabindex="-1"></a>With this goal in mind, let's go back to the very beginning of the lifecycle. We first started our work in data analysis by looking at the <span class="in">`pandas`</span> library, which offered us powerful tools to manipulate tabular data stored in (primarily) CSV files. CSVs work well when analyzing relatively small datasets (less than 10GB) that don't need to be shared across many users. In research and industry, however, data scientists often need to access enormous bodies of data that cannot be easily stored in a CSV format. Collaborating with others when working with CSVs can also be tricky —— a real-world data scientist may run into problems when multiple users try to make modifications or more dire security issues arise regarding who should and should not have access to the data. </span>
-<span id="cb27-40"><a href="#cb27-40" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-41"><a href="#cb27-41" aria-hidden="true" tabindex="-1"></a>A **database** is a large, organized collection of data. Databases are administered by **Database Management Systems (DBMS)**, which are software systems that store, manage, and facilitate access to one or more databases. Databases help mitigate many of the issues that come with using CSVs for data storage: they provide reliable storage that can survive system crashes or disk failures, are optimized to compute on data that does not fit into memory, and contain special data structures to improve performance. Using databases rather than CSVs offers further benefits from the standpoint of data management. A DBMS can apply settings that configure how data is organized, block certain data anomalies (for example, enforcing non-negative weights or ages), and determine who is allowed access to the data. It can also ensure safe concurrent operations where multiple users reading and writing to the database will not lead to fatal errors. Below, you can see the functionality of the different types of data storage and management architectures. In data science, common large-scale DBMS systems used are Google BigQuery, Amazon Redshift, Snowflake, Databricks, Microsoft SQL Server, and more. To learn more about these, consider taking <span class="co">[</span><span class="ot">Data 101</span><span class="co">](https://www.data101.org/sp24/)</span>!</span>
-<span id="cb27-42"><a href="#cb27-42" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-43"><a href="#cb27-43" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;p</span> <span class="er">style</span><span class="ot">=</span><span class="st">"text-align:center"</span><span class="kw">&gt;</span></span>
-<span id="cb27-44"><a href="#cb27-44" aria-hidden="true" tabindex="-1"></a>    <span class="kw">&lt;img</span> <span class="er">src</span><span class="ot">=</span><span class="st">'images/data_storage.png'</span> <span class="er">width</span><span class="ot">=</span><span class="st">"700px"</span> <span class="kw">/&gt;</span></span>
-<span id="cb27-45"><a href="#cb27-45" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;/p&gt;</span></span>
-<span id="cb27-46"><a href="#cb27-46" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-47"><a href="#cb27-47" aria-hidden="true" tabindex="-1"></a>As you may have guessed, we can't use our usual <span class="in">`pandas`</span> methods to work with data in a database. Instead, we'll turn to Structured Query Language.</span>
-<span id="cb27-48"><a href="#cb27-48" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-49"><a href="#cb27-49" aria-hidden="true" tabindex="-1"></a><span class="fu">## Intro to SQL</span></span>
-<span id="cb27-50"><a href="#cb27-50" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-51"><a href="#cb27-51" aria-hidden="true" tabindex="-1"></a>**Structured Query Language**, or **SQL** (commonly pronounced "sequel," though this is the subject of [fierce debate](https://patorjk.com/blog/2012/01/26/pronouncing-sql-s-q-l-or-sequel/)), is a special programming language designed to communicate with databases, and it is the dominant language/technology for working with data. You may have encountered it in classes like CS 61A or Data C88C before, and you likely will encounter it in the future. It is a language of tables: all inputs and outputs are tables. Unlike Python, it is a **declarative programming language** – this means that rather than writing the exact logic needed to complete a task, a piece of SQL code "declares" what the desired final output should be and leaves the program to determine what logic should be implemented. This logic differs depending on the SQL code itself or on the system it's running on (ie. <span class="co">[</span><span class="ot">MongoDB</span><span class="co">](https://www.mongodb.com/)</span>, <span class="co">[</span><span class="ot">SQLite</span><span class="co">](https://www.sqlite.org/)</span>, <span class="co">[</span><span class="ot">DuckDB</span><span class="co">](https://duckdb.org/)</span>, etc.). Most systems don’t follow the standards, and every system you work with will be a little different. </span>
-<span id="cb27-52"><a href="#cb27-52" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-53"><a href="#cb27-53" aria-hidden="true" tabindex="-1"></a>For the purposes of Data 100, we use SQLite or DuckDB. SQLite is an easy-to-use library that allows users to directly manipulate a database file or an in-memory database with a simplified version of SQL. It's commonly used to store data for small apps on mobile devices and is optimized for simplicity and speed of simple data tasks. DuckDB is an easy-to-use library that lets you directly manipulate a database file, collection of table formatted files (e.g., CSV), or in-memory <span class="in">`pandas`</span> <span class="in">`DataFrame`</span>s using a more complete version of SQL. It's optimized for simplicity and speed of advanced data analysis tasks and is becoming increasingly popular for data analysis tasks on large datasets.</span>
-<span id="cb27-54"><a href="#cb27-54" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-55"><a href="#cb27-55" aria-hidden="true" tabindex="-1"></a>It is important to reiterate that SQL is an entirely different language from Python. However, Python *does* have special engines that allow us to run SQL code in a Jupyter notebook. While this is typically not how SQL is used outside of an educational setting, we will use this workflow to illustrate how SQL queries are constructed using the tools we've already worked with this semester. You will learn more about how to run SQL queries in Jupyter in Lab 10 and Homework 10.</span>
-<span id="cb27-56"><a href="#cb27-56" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-57"><a href="#cb27-57" aria-hidden="true" tabindex="-1"></a>The syntax below will seem unfamiliar to you; for now, just focus on understanding the output displayed. We will clarify the SQL code in a bit.</span>
-<span id="cb27-58"><a href="#cb27-58" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-59"><a href="#cb27-59" aria-hidden="true" tabindex="-1"></a>To start, we'll look at a database called <span class="in">`example_duck.db`</span> and connect to it using SQLite.</span>
-<span id="cb27-60"><a href="#cb27-60" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-63"><a href="#cb27-63" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb27-64"><a href="#cb27-64" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
-<span id="cb27-65"><a href="#cb27-65" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
-<span id="cb27-66"><a href="#cb27-66" aria-hidden="true" tabindex="-1"></a><span class="co"># Load the SQL Alchemy Python library and DuckDB</span></span>
-<span id="cb27-67"><a href="#cb27-67" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> sqlalchemy</span>
-<span id="cb27-68"><a href="#cb27-68" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> duckdb</span>
-<span id="cb27-69"><a href="#cb27-69" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb27-70"><a href="#cb27-70" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-73"><a href="#cb27-73" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb27-74"><a href="#cb27-74" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
-<span id="cb27-75"><a href="#cb27-75" aria-hidden="true" tabindex="-1"></a><span class="co"># Load %%sql cell magic</span></span>
-<span id="cb27-76"><a href="#cb27-76" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>load_ext sql</span>
-<span id="cb27-77"><a href="#cb27-77" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb27-78"><a href="#cb27-78" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-81"><a href="#cb27-81" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb27-82"><a href="#cb27-82" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
-<span id="cb27-83"><a href="#cb27-83" aria-hidden="true" tabindex="-1"></a><span class="co"># Connect to the database</span></span>
-<span id="cb27-84"><a href="#cb27-84" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>sql duckdb:<span class="op">///</span>data<span class="op">/</span>example_duck.db <span class="op">--</span>alias duck</span>
-<span id="cb27-85"><a href="#cb27-85" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb27-86"><a href="#cb27-86" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-87"><a href="#cb27-87" aria-hidden="true" tabindex="-1"></a>Now that we’re connected, let’s make some queries!</span>
-<span id="cb27-88"><a href="#cb27-88" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-91"><a href="#cb27-91" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb27-92"><a href="#cb27-92" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
-<span id="cb27-93"><a href="#cb27-93" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
-<span id="cb27-94"><a href="#cb27-94" aria-hidden="true" tabindex="-1"></a>SELECT <span class="op">*</span> FROM Dragon<span class="op">;</span></span>
-<span id="cb27-95"><a href="#cb27-95" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb27-96"><a href="#cb27-96" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-97"><a href="#cb27-97" aria-hidden="true" tabindex="-1"></a>Thanks to the <span class="in">`pandas`</span> magic, the resulting return data is displayed in a format almost identical to our <span class="in">`pandas`</span> tables but without an index.</span>
-<span id="cb27-98"><a href="#cb27-98" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-99"><a href="#cb27-99" aria-hidden="true" tabindex="-1"></a><span class="fu">## Tables and Schema</span></span>
-<span id="cb27-100"><a href="#cb27-100" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;p</span> <span class="er">style</span><span class="ot">=</span><span class="st">"text-align:center"</span><span class="kw">&gt;</span></span>
-<span id="cb27-101"><a href="#cb27-101" aria-hidden="true" tabindex="-1"></a>    <span class="kw">&lt;img</span> <span class="er">src</span><span class="ot">=</span><span class="st">'images/sql_terminology.png'</span> <span class="er">width</span><span class="ot">=</span><span class="st">"700px"</span> <span class="kw">/&gt;</span></span>
-<span id="cb27-102"><a href="#cb27-102" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;/p&gt;</span></span>
-<span id="cb27-103"><a href="#cb27-103" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-104"><a href="#cb27-104" aria-hidden="true" tabindex="-1"></a>Looking at the <span class="in">`Dragon`</span> table above, we can see that it contains contains three columns. The first of these, <span class="in">`"name"`</span>, contains text data. The <span class="in">`"year"`</span> column contains integer data, with the constraint that year values must be greater than or equal to 2000. The final column, <span class="in">`"cute"`</span>, contains integer data with no restrictions on allowable values.</span>
-<span id="cb27-105"><a href="#cb27-105" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-106"><a href="#cb27-106" aria-hidden="true" tabindex="-1"></a>Now, let's look at the **schema** of our database. A schema describes the logical structure of a table. Whenever a new table is created, the creator must declare its schema.</span>
-<span id="cb27-107"><a href="#cb27-107" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-110"><a href="#cb27-110" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb27-111"><a href="#cb27-111" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
-<span id="cb27-112"><a href="#cb27-112" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
-<span id="cb27-113"><a href="#cb27-113" aria-hidden="true" tabindex="-1"></a>SELECT <span class="op">*</span> </span>
-<span id="cb27-114"><a href="#cb27-114" aria-hidden="true" tabindex="-1"></a>FROM sqlite_master</span>
-<span id="cb27-115"><a href="#cb27-115" aria-hidden="true" tabindex="-1"></a>WHERE <span class="bu">type</span><span class="op">=</span><span class="st">'table'</span></span>
-<span id="cb27-116"><a href="#cb27-116" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb27-117"><a href="#cb27-117" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-118"><a href="#cb27-118" aria-hidden="true" tabindex="-1"></a>The summary above displays information about the database; it contains four tables named <span class="in">`sqlite_sequence`</span>, <span class="in">`Dragon`</span>, <span class="in">`Dish`</span>, and <span class="in">`Scene`</span>. The rightmost column above lists the command that was used to construct each table. </span>
-<span id="cb27-119"><a href="#cb27-119" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-120"><a href="#cb27-120" aria-hidden="true" tabindex="-1"></a>Let's look more closely at the command used to create the <span class="in">`Dragon`</span> table (the second entry above). </span>
-<span id="cb27-121"><a href="#cb27-121" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-122"><a href="#cb27-122" aria-hidden="true" tabindex="-1"></a><span class="in">    CREATE TABLE Dragon (name TEXT PRIMARY KEY,</span></span>
-<span id="cb27-123"><a href="#cb27-123" aria-hidden="true" tabindex="-1"></a><span class="in">                         year INTEGER CHECK (year &gt;= 2000),</span></span>
-<span id="cb27-124"><a href="#cb27-124" aria-hidden="true" tabindex="-1"></a><span class="in">                         cute INTEGER)</span></span>
-<span id="cb27-125"><a href="#cb27-125" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-126"><a href="#cb27-126" aria-hidden="true" tabindex="-1"></a>The statement <span class="in">`CREATE TABLE`</span> is used to specify the **schema** of the table – a description of what logic is used to organize the table. Schema follows a set format:</span>
-<span id="cb27-127"><a href="#cb27-127" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-128"><a href="#cb27-128" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span><span class="in">`ColName`</span>: the name of a column</span>
-<span id="cb27-129"><a href="#cb27-129" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span><span class="in">`DataType`</span>: the type of data to be stored in a column. Some of the most common SQL data types are:</span>
-<span id="cb27-130"><a href="#cb27-130" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-131"><a href="#cb27-131" aria-hidden="true" tabindex="-1"></a><span class="ss">  * </span><span class="in">`INT`</span> (integers)</span>
-<span id="cb27-132"><a href="#cb27-132" aria-hidden="true" tabindex="-1"></a><span class="ss">  * </span><span class="in">`FLOAT`</span> (floating point numbers)</span>
-<span id="cb27-133"><a href="#cb27-133" aria-hidden="true" tabindex="-1"></a><span class="ss">  * </span><span class="in">`TEXT`</span> (strings)</span>
-<span id="cb27-134"><a href="#cb27-134" aria-hidden="true" tabindex="-1"></a><span class="ss">  * </span><span class="in">`BLOB`</span> (arbitrary data, such as audio/video files)</span>
-<span id="cb27-135"><a href="#cb27-135" aria-hidden="true" tabindex="-1"></a><span class="ss">  * </span><span class="in">`DATETIME`</span> (a date and time)</span>
-<span id="cb27-136"><a href="#cb27-136" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span><span class="in">`Constraint`</span>: some restriction on the data to be stored in the column. Common constraints are:</span>
-<span id="cb27-137"><a href="#cb27-137" aria-hidden="true" tabindex="-1"></a><span class="ss">  * </span><span class="in">`CHECK`</span> (data must obey a certain condition)</span>
-<span id="cb27-138"><a href="#cb27-138" aria-hidden="true" tabindex="-1"></a><span class="ss">  * </span><span class="in">`PRIMARY KEY`</span> (designate a column as the table's primary key)</span>
-<span id="cb27-139"><a href="#cb27-139" aria-hidden="true" tabindex="-1"></a><span class="ss">  * </span><span class="in">`NOT NULL`</span> (data cannot be null)</span>
-<span id="cb27-140"><a href="#cb27-140" aria-hidden="true" tabindex="-1"></a><span class="ss">  * </span><span class="in">`DEFAULT`</span> (a default fill value if no specific entry is given)</span>
-<span id="cb27-141"><a href="#cb27-141" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-142"><a href="#cb27-142" aria-hidden="true" tabindex="-1"></a>Note that different implementations of SQL (e.g., <span class="co">[</span><span class="ot">DuckDB</span><span class="co">](https://duckdb.org/docs/sql/data_types/overview.html)</span>, <span class="co">[</span><span class="ot">SQLite</span><span class="co">](https://www.sqlite.org/datatype3.html)</span>, <span class="co">[</span><span class="ot">MySQL</span><span class="co">](https://dev.mysql.com/doc/refman/8.0/en/data-types.html)</span>) will support different types. In Data 100, we'll primarily use DuckDB.</span>
-<span id="cb27-143"><a href="#cb27-143" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-144"><a href="#cb27-144" aria-hidden="true" tabindex="-1"></a>Database tables (also referred to as **relations**) are structured much like `DataFrame`s in `pandas`. Each row, sometimes called a **tuple**, represents a single record in the dataset. Each column, sometimes called an **attribute** or **field**, describes some feature of the record. </span>
-<span id="cb27-145"><a href="#cb27-145" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-146"><a href="#cb27-146" aria-hidden="true" tabindex="-1"></a><span class="fu">### Primary Keys</span></span>
-<span id="cb27-147"><a href="#cb27-147" aria-hidden="true" tabindex="-1"></a>The **primary key** is a set of column(s) that uniquely identify each record in the table. In the <span class="in">`Dragon`</span> table, the <span class="in">`"name"`</span> column is its primary key that *uniquely* identifies each entry in the table. Because <span class="in">`"name"`</span> is the primary key of the table, no two entries in the table can have the same name – a given value of <span class="in">`"name"`</span> is unique to each dragon. Primary keys are used to ensure data integrity and to optimize data access.</span>
-<span id="cb27-148"><a href="#cb27-148" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-149"><a href="#cb27-149" aria-hidden="true" tabindex="-1"></a><span class="fu">### Foreign Keys</span></span>
-<span id="cb27-150"><a href="#cb27-150" aria-hidden="true" tabindex="-1"></a>A foreign key is a column or set of columns that references a *primary key in another table*. A foreign key constraint ensures that a primary key exists in the referenced table. For example, let's say we have 2 tables, <span class="in">`student`</span> and <span class="in">`assignment`</span>, with the following schemas: </span>
-<span id="cb27-151"><a href="#cb27-151" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-152"><a href="#cb27-152" aria-hidden="true" tabindex="-1"></a><span class="in">    CREATE TABLE student (</span></span>
-<span id="cb27-153"><a href="#cb27-153" aria-hidden="true" tabindex="-1"></a><span class="in">        student_id INTEGER PRIMARY KEY,</span></span>
-<span id="cb27-154"><a href="#cb27-154" aria-hidden="true" tabindex="-1"></a><span class="in">        name VARCHAR,</span></span>
-<span id="cb27-155"><a href="#cb27-155" aria-hidden="true" tabindex="-1"></a><span class="in">        email VARCHAR</span></span>
-<span id="cb27-156"><a href="#cb27-156" aria-hidden="true" tabindex="-1"></a><span class="in">    );</span></span>
-<span id="cb27-157"><a href="#cb27-157" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-158"><a href="#cb27-158" aria-hidden="true" tabindex="-1"></a><span class="in">    CREATE TABLE assignment (</span></span>
-<span id="cb27-159"><a href="#cb27-159" aria-hidden="true" tabindex="-1"></a><span class="in">        assignment_id INTEGER PRIMARY KEY,</span></span>
-<span id="cb27-160"><a href="#cb27-160" aria-hidden="true" tabindex="-1"></a><span class="in">        description VARCHAR</span></span>
-<span id="cb27-161"><a href="#cb27-161" aria-hidden="true" tabindex="-1"></a><span class="in">    );</span></span>
-<span id="cb27-162"><a href="#cb27-162" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-163"><a href="#cb27-163" aria-hidden="true" tabindex="-1"></a>Note that each table has a primary key that uniquely identifies each student and assignment.</span>
-<span id="cb27-164"><a href="#cb27-164" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-165"><a href="#cb27-165" aria-hidden="true" tabindex="-1"></a>Say we want to create the table <span class="in">`grade`</span> to store the score each student got on each assignment. Naturally, this will depend on the information in <span class="in">`student`</span> and <span class="in">`assignment`</span>; we should not be saving the grade for a nonexisistent student nor a nonexisistent assignment. Hence, we can create the columns <span class="in">`student_id`</span> and <span class="in">`assignment_id`</span> that reference foreign tables <span class="in">`student`</span> and <span class="in">`assignment`</span>, respectively. This way, we ensure that the data in <span class="in">`grade`</span> is always up-to-date with the other tables.  </span>
-<span id="cb27-166"><a href="#cb27-166" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-167"><a href="#cb27-167" aria-hidden="true" tabindex="-1"></a><span class="in">    CREATE TABLE grade (</span></span>
-<span id="cb27-168"><a href="#cb27-168" aria-hidden="true" tabindex="-1"></a><span class="in">        student_id INTEGER,</span></span>
-<span id="cb27-169"><a href="#cb27-169" aria-hidden="true" tabindex="-1"></a><span class="in">        assignment_id INTEGER,</span></span>
-<span id="cb27-170"><a href="#cb27-170" aria-hidden="true" tabindex="-1"></a><span class="in">        score REAL,</span></span>
-<span id="cb27-171"><a href="#cb27-171" aria-hidden="true" tabindex="-1"></a><span class="in">        FOREIGN KEY (student_id) REFERENCES student(student_id),</span></span>
-<span id="cb27-172"><a href="#cb27-172" aria-hidden="true" tabindex="-1"></a><span class="in">        FOREIGN KEY (assignment_id) REFERENCES assignment(assignment_id)</span></span>
-<span id="cb27-173"><a href="#cb27-173" aria-hidden="true" tabindex="-1"></a><span class="in">    );</span></span>
-<span id="cb27-174"><a href="#cb27-174" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-175"><a href="#cb27-175" aria-hidden="true" tabindex="-1"></a><span class="fu">## Basic Queries</span></span>
-<span id="cb27-176"><a href="#cb27-176" aria-hidden="true" tabindex="-1"></a>To extract and manipulate data stored in a SQL table, we will need to familiarize ourselves with the syntax to write pieces of SQL code, which we call **queries**. </span>
-<span id="cb27-177"><a href="#cb27-177" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-178"><a href="#cb27-178" aria-hidden="true" tabindex="-1"></a><span class="fu">### `SELECT`ing From Tables</span></span>
-<span id="cb27-179"><a href="#cb27-179" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-180"><a href="#cb27-180" aria-hidden="true" tabindex="-1"></a>The basic unit of a SQL query is the <span class="in">`SELECT`</span> statement. <span class="in">`SELECT`</span> specifies what columns we would like to extract from a given table. We use <span class="in">`FROM`</span> to tell SQL the table from which we want to <span class="in">`SELECT`</span> our data. </span>
-<span id="cb27-181"><a href="#cb27-181" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-184"><a href="#cb27-184" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb27-185"><a href="#cb27-185" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
-<span id="cb27-186"><a href="#cb27-186" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
-<span id="cb27-187"><a href="#cb27-187" aria-hidden="true" tabindex="-1"></a>SELECT <span class="op">*</span></span>
-<span id="cb27-188"><a href="#cb27-188" aria-hidden="true" tabindex="-1"></a>FROM Dragon<span class="op">;</span></span>
-<span id="cb27-189"><a href="#cb27-189" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb27-190"><a href="#cb27-190" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-191"><a href="#cb27-191" aria-hidden="true" tabindex="-1"></a>In SQL, <span class="in">`*`</span> means "everything." The query above grabs *all* the columns in <span class="in">`Dragon`</span> and displays them in the outputted table. We can also specify a specific subset of columns to be <span class="in">`SELECT`</span>ed. Notice that the outputted columns appear in the order they were <span class="in">`SELECT`</span>ed.</span>
-<span id="cb27-192"><a href="#cb27-192" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-195"><a href="#cb27-195" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb27-196"><a href="#cb27-196" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
-<span id="cb27-197"><a href="#cb27-197" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
-<span id="cb27-198"><a href="#cb27-198" aria-hidden="true" tabindex="-1"></a>SELECT cute, year</span>
-<span id="cb27-199"><a href="#cb27-199" aria-hidden="true" tabindex="-1"></a>FROM Dragon<span class="op">;</span></span>
-<span id="cb27-200"><a href="#cb27-200" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb27-201"><a href="#cb27-201" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-202"><a href="#cb27-202" aria-hidden="true" tabindex="-1"></a>**Every** SQL query must include both a <span class="in">`SELECT`</span> and <span class="in">`FROM`</span> statement. Intuitively, this makes sense —— we know that we'll want to extract some piece of information from the table; to do so, we also need to indicate what table we want to consider. </span>
-<span id="cb27-203"><a href="#cb27-203" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-204"><a href="#cb27-204" aria-hidden="true" tabindex="-1"></a>It is important to note that SQL enforces a strict "order of operations" —— SQL clauses must *always* follow the same sequence. For example, the <span class="in">`SELECT`</span> statement must always precede <span class="in">`FROM`</span>. This means that any SQL query will follow the same structure. </span>
-<span id="cb27-205"><a href="#cb27-205" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-206"><a href="#cb27-206" aria-hidden="true" tabindex="-1"></a><span class="in">    SELECT &lt;column list&gt;</span></span>
-<span id="cb27-207"><a href="#cb27-207" aria-hidden="true" tabindex="-1"></a><span class="in">    FROM &lt;table&gt;</span></span>
-<span id="cb27-208"><a href="#cb27-208" aria-hidden="true" tabindex="-1"></a><span class="in">    [additional clauses]</span></span>
-<span id="cb27-209"><a href="#cb27-209" aria-hidden="true" tabindex="-1"></a><span class="in">    </span></span>
-<span id="cb27-210"><a href="#cb27-210" aria-hidden="true" tabindex="-1"></a>The additional clauses we use depend on the specific task we're trying to achieve. We may refine our query to filter on a certain condition, aggregate a particular column, or join several tables together. We will spend the rest of this lecture outlining some useful clauses to build up our understanding of the order of operations.</span>
-<span id="cb27-211"><a href="#cb27-211" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-212"><a href="#cb27-212" aria-hidden="true" tabindex="-1"></a><span class="fu">#### SQL Style Conventions</span></span>
-<span id="cb27-213"><a href="#cb27-213" aria-hidden="true" tabindex="-1"></a>And just like that, we've already written two SQL queries. There are a few things to note in the queries above. Firstly, notice that every "verb" is written in uppercase. It is convention to write SQL operations in capital letters, but your code will run just fine even if you choose to keep things in lowercase. Second, the query above separates each statement with a new line. SQL queries are not impacted by whitespace within the query; this means that SQL code is typically written with a new line after each statement to make things more readable. The semicolon (<span class="in">`;`</span>) indicates the end of a query. There are some "flavors" of SQL in which a query will not run if no semicolon is present; however, in Data 100, the SQL version we will use works with or without an ending semicolon. Queries in these notes will end with semicolons to build up good habits.</span>
-<span id="cb27-214"><a href="#cb27-214" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-215"><a href="#cb27-215" aria-hidden="true" tabindex="-1"></a><span class="fu">#### Aliasing with `AS`</span></span>
-<span id="cb27-216"><a href="#cb27-216" aria-hidden="true" tabindex="-1"></a>The <span class="in">`AS`</span> keyword allows us to give a column a new name (called an **alias**) after it has been <span class="in">`SELECT`</span>ed. The general syntax is:</span>
-<span id="cb27-217"><a href="#cb27-217" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-218"><a href="#cb27-218" aria-hidden="true" tabindex="-1"></a><span class="in">    SELECT column_in_input_table AS new_name_in_output_table</span></span>
-<span id="cb27-219"><a href="#cb27-219" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-222"><a href="#cb27-222" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb27-223"><a href="#cb27-223" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
-<span id="cb27-224"><a href="#cb27-224" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
-<span id="cb27-225"><a href="#cb27-225" aria-hidden="true" tabindex="-1"></a>SELECT cute AS cuteness, year AS birth</span>
-<span id="cb27-226"><a href="#cb27-226" aria-hidden="true" tabindex="-1"></a>FROM Dragon<span class="op">;</span></span>
-<span id="cb27-227"><a href="#cb27-227" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb27-228"><a href="#cb27-228" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-229"><a href="#cb27-229" aria-hidden="true" tabindex="-1"></a><span class="fu">#### Uniqueness with `DISTINCT`</span></span>
-<span id="cb27-230"><a href="#cb27-230" aria-hidden="true" tabindex="-1"></a>To <span class="in">`SELECT`</span> only the *unique* values in a column, we use the <span class="in">`DISTINCT`</span> keyword. This will cause any any duplicate entries in a column to be removed. If we want to find only the unique years in <span class="in">`Dragon`</span>, without any repeats, we would write:</span>
-<span id="cb27-231"><a href="#cb27-231" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-234"><a href="#cb27-234" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb27-235"><a href="#cb27-235" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
-<span id="cb27-236"><a href="#cb27-236" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
-<span id="cb27-237"><a href="#cb27-237" aria-hidden="true" tabindex="-1"></a>SELECT DISTINCT year</span>
-<span id="cb27-238"><a href="#cb27-238" aria-hidden="true" tabindex="-1"></a>FROM Dragon<span class="op">;</span></span>
-<span id="cb27-239"><a href="#cb27-239" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb27-240"><a href="#cb27-240" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-241"><a href="#cb27-241" aria-hidden="true" tabindex="-1"></a><span class="fu">### Applying `WHERE` Conditions</span></span>
-<span id="cb27-242"><a href="#cb27-242" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-243"><a href="#cb27-243" aria-hidden="true" tabindex="-1"></a>The <span class="in">`WHERE`</span> keyword is used to select only some rows of a table, filtered on a given Boolean condition. </span>
-<span id="cb27-244"><a href="#cb27-244" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-247"><a href="#cb27-247" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb27-248"><a href="#cb27-248" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
-<span id="cb27-249"><a href="#cb27-249" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
-<span id="cb27-250"><a href="#cb27-250" aria-hidden="true" tabindex="-1"></a>SELECT name, year</span>
-<span id="cb27-251"><a href="#cb27-251" aria-hidden="true" tabindex="-1"></a>FROM Dragon</span>
-<span id="cb27-252"><a href="#cb27-252" aria-hidden="true" tabindex="-1"></a>WHERE cute <span class="op">&gt;</span> <span class="dv">0</span><span class="op">;</span></span>
-<span id="cb27-253"><a href="#cb27-253" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb27-254"><a href="#cb27-254" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-255"><a href="#cb27-255" aria-hidden="true" tabindex="-1"></a>We can add complexity to the <span class="in">`WHERE`</span> condition using the keywords <span class="in">`AND`</span>, <span class="in">`OR`</span>, and <span class="in">`NOT`</span>, much like we would in Python.</span>
-<span id="cb27-256"><a href="#cb27-256" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-259"><a href="#cb27-259" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb27-260"><a href="#cb27-260" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
-<span id="cb27-261"><a href="#cb27-261" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
-<span id="cb27-262"><a href="#cb27-262" aria-hidden="true" tabindex="-1"></a>SELECT name, year</span>
-<span id="cb27-263"><a href="#cb27-263" aria-hidden="true" tabindex="-1"></a>FROM Dragon</span>
-<span id="cb27-264"><a href="#cb27-264" aria-hidden="true" tabindex="-1"></a>WHERE cute <span class="op">&gt;</span> <span class="dv">0</span> OR year <span class="op">&gt;</span> <span class="dv">2013</span><span class="op">;</span></span>
-<span id="cb27-265"><a href="#cb27-265" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb27-266"><a href="#cb27-266" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-267"><a href="#cb27-267" aria-hidden="true" tabindex="-1"></a>To spare ourselves needing to write complicated logical expressions by combining several conditions, we can also filter for entries that are <span class="in">`IN`</span> a specified list of values. This is similar to the use of <span class="in">`in`</span> or <span class="in">`.isin`</span> in Python.</span>
-<span id="cb27-268"><a href="#cb27-268" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-271"><a href="#cb27-271" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb27-272"><a href="#cb27-272" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
-<span id="cb27-273"><a href="#cb27-273" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
-<span id="cb27-274"><a href="#cb27-274" aria-hidden="true" tabindex="-1"></a>SELECT name, year</span>
-<span id="cb27-275"><a href="#cb27-275" aria-hidden="true" tabindex="-1"></a>FROM Dragon</span>
-<span id="cb27-276"><a href="#cb27-276" aria-hidden="true" tabindex="-1"></a>WHERE name IN (<span class="st">'hiccup'</span>, <span class="st">'puff'</span>)<span class="op">;</span></span>
-<span id="cb27-277"><a href="#cb27-277" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb27-278"><a href="#cb27-278" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-279"><a href="#cb27-279" aria-hidden="true" tabindex="-1"></a><span class="fu">#### Strings in SQL</span></span>
-<span id="cb27-280"><a href="#cb27-280" aria-hidden="true" tabindex="-1"></a>In <span class="in">`Python`</span>, there is no distinction between double <span class="in">`""`</span> and single quotes <span class="in">`''`</span>. SQL, on the other hand, distinguishes double quotes <span class="in">`""`</span> as *column names* and single quotes `''` as *strings*. For example, we can make the call</span>
-<span id="cb27-281"><a href="#cb27-281" aria-hidden="true" tabindex="-1"></a>  </span>
-<span id="cb27-282"><a href="#cb27-282" aria-hidden="true" tabindex="-1"></a><span class="in">    SELECT "birth weight"</span></span>
-<span id="cb27-283"><a href="#cb27-283" aria-hidden="true" tabindex="-1"></a><span class="in">    FROM patient</span></span>
-<span id="cb27-284"><a href="#cb27-284" aria-hidden="true" tabindex="-1"></a><span class="in">    WHERE "first name" = 'Joey'</span></span>
-<span id="cb27-285"><a href="#cb27-285" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-286"><a href="#cb27-286" aria-hidden="true" tabindex="-1"></a>to select the column <span class="in">`"birth weight"`</span> from the <span class="in">`patient`</span> table and only select rows where the column <span class="in">`"first name"`</span> is equal to <span class="in">`'Joey'`</span>.</span>
-<span id="cb27-287"><a href="#cb27-287" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-288"><a href="#cb27-288" aria-hidden="true" tabindex="-1"></a><span class="fu">#### `WHERE` WITH `NULL` Values</span></span>
-<span id="cb27-289"><a href="#cb27-289" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-290"><a href="#cb27-290" aria-hidden="true" tabindex="-1"></a>You may have noticed earlier that our table actually has a missing value. In SQL, missing data is given the special value <span class="in">`NULL`</span>. <span class="in">`NULL`</span> behaves in a fundamentally different way to other data types. We can't use the typical operators (=, &gt;, and &lt;) on <span class="in">`NULL`</span> values (in fact, <span class="in">`NULL == NULL`</span> returns <span class="in">`False`</span>!). Instead, we check to see if a value <span class="in">`IS`</span> or <span class="in">`IS NOT`</span> <span class="in">`NULL`</span>.</span>
-<span id="cb27-291"><a href="#cb27-291" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-294"><a href="#cb27-294" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb27-295"><a href="#cb27-295" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
-<span id="cb27-296"><a href="#cb27-296" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
-<span id="cb27-297"><a href="#cb27-297" aria-hidden="true" tabindex="-1"></a>SELECT name, cute</span>
-<span id="cb27-298"><a href="#cb27-298" aria-hidden="true" tabindex="-1"></a>FROM Dragon</span>
-<span id="cb27-299"><a href="#cb27-299" aria-hidden="true" tabindex="-1"></a>WHERE cute IS NOT NULL<span class="op">;</span></span>
-<span id="cb27-300"><a href="#cb27-300" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb27-301"><a href="#cb27-301" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-302"><a href="#cb27-302" aria-hidden="true" tabindex="-1"></a><span class="fu">### Sorting and Restricting Output</span></span>
-<span id="cb27-303"><a href="#cb27-303" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-304"><a href="#cb27-304" aria-hidden="true" tabindex="-1"></a><span class="fu">#### Sorting with  `ORDER BY`</span></span>
-<span id="cb27-305"><a href="#cb27-305" aria-hidden="true" tabindex="-1"></a>What if we want the output table to appear in a certain order? The <span class="in">`ORDER BY`</span> keyword behaves similarly to <span class="in">`.sort_values()`</span> in <span class="in">`pandas`</span>. </span>
-<span id="cb27-306"><a href="#cb27-306" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-309"><a href="#cb27-309" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb27-310"><a href="#cb27-310" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
-<span id="cb27-311"><a href="#cb27-311" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
-<span id="cb27-312"><a href="#cb27-312" aria-hidden="true" tabindex="-1"></a>SELECT <span class="op">*</span></span>
-<span id="cb27-313"><a href="#cb27-313" aria-hidden="true" tabindex="-1"></a>FROM Dragon</span>
-<span id="cb27-314"><a href="#cb27-314" aria-hidden="true" tabindex="-1"></a>ORDER BY cute<span class="op">;</span></span>
-<span id="cb27-315"><a href="#cb27-315" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb27-316"><a href="#cb27-316" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-317"><a href="#cb27-317" aria-hidden="true" tabindex="-1"></a>By default, <span class="in">`ORDER BY`</span> will display results in ascending order (<span class="in">`ASC`</span>) with the lowest values at the top of the table. To sort in descending order, we use the <span class="in">`DESC`</span> keyword after specifying the column to be used for ordering.</span>
-<span id="cb27-318"><a href="#cb27-318" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-321"><a href="#cb27-321" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb27-322"><a href="#cb27-322" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
-<span id="cb27-323"><a href="#cb27-323" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
-<span id="cb27-324"><a href="#cb27-324" aria-hidden="true" tabindex="-1"></a>SELECT <span class="op">*</span></span>
-<span id="cb27-325"><a href="#cb27-325" aria-hidden="true" tabindex="-1"></a>FROM Dragon</span>
-<span id="cb27-326"><a href="#cb27-326" aria-hidden="true" tabindex="-1"></a>ORDER BY cute DESC<span class="op">;</span></span>
-<span id="cb27-327"><a href="#cb27-327" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb27-328"><a href="#cb27-328" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-329"><a href="#cb27-329" aria-hidden="true" tabindex="-1"></a>We can also tell SQL to <span class="in">`ORDER BY`</span> two columns at once. This will sort the table by the first listed column, then use the values in the second listed column to break any ties.</span>
-<span id="cb27-330"><a href="#cb27-330" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-333"><a href="#cb27-333" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb27-334"><a href="#cb27-334" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
-<span id="cb27-335"><a href="#cb27-335" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
-<span id="cb27-336"><a href="#cb27-336" aria-hidden="true" tabindex="-1"></a>SELECT <span class="op">*</span></span>
-<span id="cb27-337"><a href="#cb27-337" aria-hidden="true" tabindex="-1"></a>FROM Dragon</span>
-<span id="cb27-338"><a href="#cb27-338" aria-hidden="true" tabindex="-1"></a>ORDER BY year, cute DESC<span class="op">;</span></span>
-<span id="cb27-339"><a href="#cb27-339" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb27-340"><a href="#cb27-340" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-341"><a href="#cb27-341" aria-hidden="true" tabindex="-1"></a>Note that in this example, <span class="in">`year`</span> is sorted in ascending order and <span class="in">`cute`</span> in descending order. If you want <span class="in">`year`</span> to be ordered in descending order as well, you need to specify <span class="in">`year DESC, cute DESC;`</span>.</span>
-<span id="cb27-342"><a href="#cb27-342" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-343"><a href="#cb27-343" aria-hidden="true" tabindex="-1"></a><span class="fu">#### `LIMIT` vs. `OFFSET`</span></span>
-<span id="cb27-344"><a href="#cb27-344" aria-hidden="true" tabindex="-1"></a>In many instances, we are only concerned with a certain number of rows in the output table (for example, wanting to find the first two dragons in the table). The <span class="in">`LIMIT`</span> keyword restricts the output to a specified number of rows. It serves a function similar to that of <span class="in">`.head()`</span> in <span class="in">`pandas`</span>.</span>
-<span id="cb27-345"><a href="#cb27-345" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-348"><a href="#cb27-348" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb27-349"><a href="#cb27-349" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
-<span id="cb27-350"><a href="#cb27-350" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
-<span id="cb27-351"><a href="#cb27-351" aria-hidden="true" tabindex="-1"></a>SELECT <span class="op">*</span></span>
-<span id="cb27-352"><a href="#cb27-352" aria-hidden="true" tabindex="-1"></a>FROM Dragon</span>
-<span id="cb27-353"><a href="#cb27-353" aria-hidden="true" tabindex="-1"></a>LIMIT <span class="dv">2</span><span class="op">;</span></span>
-<span id="cb27-354"><a href="#cb27-354" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb27-355"><a href="#cb27-355" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-356"><a href="#cb27-356" aria-hidden="true" tabindex="-1"></a>The <span class="in">`OFFSET`</span> keyword indicates the index at which <span class="in">`LIMIT`</span> should start. In other words, we can use <span class="in">`OFFSET`</span> to shift where the <span class="in">`LIMIT`</span>ing begins by a specified number of rows. For example, we might care about the dragons that are at positions 2 and 3 in the table. </span>
-<span id="cb27-357"><a href="#cb27-357" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-360"><a href="#cb27-360" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb27-361"><a href="#cb27-361" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
-<span id="cb27-362"><a href="#cb27-362" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
-<span id="cb27-363"><a href="#cb27-363" aria-hidden="true" tabindex="-1"></a>SELECT <span class="op">*</span></span>
-<span id="cb27-364"><a href="#cb27-364" aria-hidden="true" tabindex="-1"></a>FROM Dragon</span>
-<span id="cb27-365"><a href="#cb27-365" aria-hidden="true" tabindex="-1"></a>LIMIT <span class="dv">2</span></span>
-<span id="cb27-366"><a href="#cb27-366" aria-hidden="true" tabindex="-1"></a>OFFSET <span class="dv">1</span><span class="op">;</span></span>
-<span id="cb27-367"><a href="#cb27-367" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb27-368"><a href="#cb27-368" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-369"><a href="#cb27-369" aria-hidden="true" tabindex="-1"></a>With these keywords in hand, let's update our SQL order of operations. Remember: *every* SQL query must list clauses in this order. </span>
-<span id="cb27-370"><a href="#cb27-370" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-371"><a href="#cb27-371" aria-hidden="true" tabindex="-1"></a><span class="in">    SELECT &lt;column expression list&gt;</span></span>
-<span id="cb27-372"><a href="#cb27-372" aria-hidden="true" tabindex="-1"></a><span class="in">    FROM &lt;table&gt;</span></span>
-<span id="cb27-373"><a href="#cb27-373" aria-hidden="true" tabindex="-1"></a><span class="in">    [WHERE &lt;predicate&gt;]</span></span>
-<span id="cb27-374"><a href="#cb27-374" aria-hidden="true" tabindex="-1"></a><span class="in">    [ORDER BY &lt;column list&gt;]</span></span>
-<span id="cb27-375"><a href="#cb27-375" aria-hidden="true" tabindex="-1"></a><span class="in">    [LIMIT &lt;number of rows&gt;]</span></span>
-<span id="cb27-376"><a href="#cb27-376" aria-hidden="true" tabindex="-1"></a><span class="in">    [OFFSET &lt;number of rows&gt;];</span></span>
-<span id="cb27-377"><a href="#cb27-377" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-378"><a href="#cb27-378" aria-hidden="true" tabindex="-1"></a><span class="fu">## Summary</span></span>
-<span id="cb27-379"><a href="#cb27-379" aria-hidden="true" tabindex="-1"></a>Let's summarize what we've learned so far. We know that <span class="in">`SELECT`</span> and <span class="in">`FROM`</span> are the fundamental building blocks of any SQL query. We can augment these two keywords with additional clauses to refine the data in our output table. </span>
-<span id="cb27-380"><a href="#cb27-380" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-381"><a href="#cb27-381" aria-hidden="true" tabindex="-1"></a>Any clauses that we include must follow a strict ordering within the query:</span>
-<span id="cb27-382"><a href="#cb27-382" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-383"><a href="#cb27-383" aria-hidden="true" tabindex="-1"></a><span class="in">    SELECT &lt;column list&gt;</span></span>
-<span id="cb27-384"><a href="#cb27-384" aria-hidden="true" tabindex="-1"></a><span class="in">    FROM &lt;table&gt;</span></span>
-<span id="cb27-385"><a href="#cb27-385" aria-hidden="true" tabindex="-1"></a><span class="in">    [WHERE &lt;predicate&gt;]</span></span>
-<span id="cb27-386"><a href="#cb27-386" aria-hidden="true" tabindex="-1"></a><span class="in">    [ORDER BY &lt;column list&gt;]</span></span>
-<span id="cb27-387"><a href="#cb27-387" aria-hidden="true" tabindex="-1"></a><span class="in">    [LIMIT &lt;number of rows&gt;]</span></span>
-<span id="cb27-388"><a href="#cb27-388" aria-hidden="true" tabindex="-1"></a><span class="in">    [OFFSET &lt;number of rows&gt;]</span></span>
-<span id="cb27-389"><a href="#cb27-389" aria-hidden="true" tabindex="-1"></a><span class="in">    </span></span>
-<span id="cb27-390"><a href="#cb27-390" aria-hidden="true" tabindex="-1"></a>Here, any clause contained in square brackets <span class="in">`[ ]`</span> is optional —— we only need to use the keyword if it is relevant to the table operation we want to perform. Also note that by convention, we use all caps for keywords in SQL statements and use newlines to make code more readable.</span>
+<div class="sourceCode" id="cb42" data-shortcodes="false"><pre class="sourceCode markdown code-with-copy"><code class="sourceCode markdown"><span id="cb42-1"><a href="#cb42-1" aria-hidden="true" tabindex="-1"></a><span class="co">---</span></span>
+<span id="cb42-2"><a href="#cb42-2" aria-hidden="true" tabindex="-1"></a><span class="an">title:</span><span class="co"> SQL I</span></span>
+<span id="cb42-3"><a href="#cb42-3" aria-hidden="true" tabindex="-1"></a><span class="an">execute:</span></span>
+<span id="cb42-4"><a href="#cb42-4" aria-hidden="true" tabindex="-1"></a><span class="co">  echo: true</span></span>
+<span id="cb42-5"><a href="#cb42-5" aria-hidden="true" tabindex="-1"></a><span class="an">format:</span></span>
+<span id="cb42-6"><a href="#cb42-6" aria-hidden="true" tabindex="-1"></a><span class="co">  html:</span></span>
+<span id="cb42-7"><a href="#cb42-7" aria-hidden="true" tabindex="-1"></a><span class="co">    code-fold: false</span></span>
+<span id="cb42-8"><a href="#cb42-8" aria-hidden="true" tabindex="-1"></a><span class="co">    code-tools: true</span></span>
+<span id="cb42-9"><a href="#cb42-9" aria-hidden="true" tabindex="-1"></a><span class="co">    toc: true</span></span>
+<span id="cb42-10"><a href="#cb42-10" aria-hidden="true" tabindex="-1"></a><span class="co">    toc-title: SQL I</span></span>
+<span id="cb42-11"><a href="#cb42-11" aria-hidden="true" tabindex="-1"></a><span class="co">    page-layout: full</span></span>
+<span id="cb42-12"><a href="#cb42-12" aria-hidden="true" tabindex="-1"></a><span class="co">    theme:</span></span>
+<span id="cb42-13"><a href="#cb42-13" aria-hidden="true" tabindex="-1"></a><span class="co">      - cosmo</span></span>
+<span id="cb42-14"><a href="#cb42-14" aria-hidden="true" tabindex="-1"></a><span class="co">      - cerulean</span></span>
+<span id="cb42-15"><a href="#cb42-15" aria-hidden="true" tabindex="-1"></a><span class="co">    callout-icon: false</span></span>
+<span id="cb42-16"><a href="#cb42-16" aria-hidden="true" tabindex="-1"></a><span class="an">jupyter:</span></span>
+<span id="cb42-17"><a href="#cb42-17" aria-hidden="true" tabindex="-1"></a><span class="co">  jupytext:</span></span>
+<span id="cb42-18"><a href="#cb42-18" aria-hidden="true" tabindex="-1"></a><span class="co">    text_representation:</span></span>
+<span id="cb42-19"><a href="#cb42-19" aria-hidden="true" tabindex="-1"></a><span class="co">      extension: .qmd</span></span>
+<span id="cb42-20"><a href="#cb42-20" aria-hidden="true" tabindex="-1"></a><span class="co">      format_name: quarto</span></span>
+<span id="cb42-21"><a href="#cb42-21" aria-hidden="true" tabindex="-1"></a><span class="co">      format_version: '1.0'</span></span>
+<span id="cb42-22"><a href="#cb42-22" aria-hidden="true" tabindex="-1"></a><span class="co">      jupytext_version: 1.16.1</span></span>
+<span id="cb42-23"><a href="#cb42-23" aria-hidden="true" tabindex="-1"></a><span class="co">  kernelspec:</span></span>
+<span id="cb42-24"><a href="#cb42-24" aria-hidden="true" tabindex="-1"></a><span class="co">    display_name: Python 3 (ipykernel)</span></span>
+<span id="cb42-25"><a href="#cb42-25" aria-hidden="true" tabindex="-1"></a><span class="co">    language: python</span></span>
+<span id="cb42-26"><a href="#cb42-26" aria-hidden="true" tabindex="-1"></a><span class="co">    name: python3</span></span>
+<span id="cb42-27"><a href="#cb42-27" aria-hidden="true" tabindex="-1"></a><span class="co">---</span></span>
+<span id="cb42-28"><a href="#cb42-28" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-29"><a href="#cb42-29" aria-hidden="true" tabindex="-1"></a>::: {.callout-note collapse="false"}</span>
+<span id="cb42-30"><a href="#cb42-30" aria-hidden="true" tabindex="-1"></a><span class="fu">## Learning Outcomes</span></span>
+<span id="cb42-31"><a href="#cb42-31" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Recognizing situations where we need “bigger” tools for manipulating data</span>
+<span id="cb42-32"><a href="#cb42-32" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Write basic SQL queries using <span class="in">`SELECT`</span>, <span class="in">`FROM`</span>, <span class="in">`WHERE`</span>, <span class="in">`ORDER BY`</span>, <span class="in">`LIMIT`</span>, and <span class="in">`OFFSET`</span></span>
+<span id="cb42-33"><a href="#cb42-33" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Perform aggregations using <span class="in">`GROUP BY`</span></span>
+<span id="cb42-34"><a href="#cb42-34" aria-hidden="true" tabindex="-1"></a>:::</span>
+<span id="cb42-35"><a href="#cb42-35" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-36"><a href="#cb42-36" aria-hidden="true" tabindex="-1"></a>So far in the course, we have made our way through the entire data science lifecycle: we learned how to load and explore a dataset, formulate questions, and use the tools of prediction and inference to come up with answers. For the remaining weeks of the semester, we are going to make a second pass through the lifecycle, this time with a different set of tools, ideas, and abstractions. </span>
+<span id="cb42-37"><a href="#cb42-37" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-38"><a href="#cb42-38" aria-hidden="true" tabindex="-1"></a><span class="fu">## Databases</span></span>
+<span id="cb42-39"><a href="#cb42-39" aria-hidden="true" tabindex="-1"></a>With this goal in mind, let's go back to the very beginning of the lifecycle. We first started our work in data analysis by looking at the <span class="in">`pandas`</span> library, which offered us powerful tools to manipulate tabular data stored in (primarily) CSV files. CSVs work well when analyzing relatively small datasets (less than 10GB) that don't need to be shared across many users. In research and industry, however, data scientists often need to access enormous bodies of data that cannot be easily stored in a CSV format. Collaborating with others when working with CSVs can also be tricky —— a real-world data scientist may run into problems when multiple users try to make modifications or more dire security issues arise regarding who should and should not have access to the data. </span>
+<span id="cb42-40"><a href="#cb42-40" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-41"><a href="#cb42-41" aria-hidden="true" tabindex="-1"></a>A **database** is a large, organized collection of data. Databases are administered by **Database Management Systems (DBMS)**, which are software systems that store, manage, and facilitate access to one or more databases. Databases help mitigate many of the issues that come with using CSVs for data storage: they provide reliable storage that can survive system crashes or disk failures, are optimized to compute on data that does not fit into memory, and contain special data structures to improve performance. Using databases rather than CSVs offers further benefits from the standpoint of data management. A DBMS can apply settings that configure how data is organized, block certain data anomalies (for example, enforcing non-negative weights or ages), and determine who is allowed access to the data. It can also ensure safe concurrent operations where multiple users reading and writing to the database will not lead to fatal errors. Below, you can see the functionality of the different types of data storage and management architectures. In data science, common large-scale DBMS systems used are Google BigQuery, Amazon Redshift, Snowflake, Databricks, Microsoft SQL Server, and more. To learn more about these, consider taking <span class="co">[</span><span class="ot">Data 101</span><span class="co">](https://www.data101.org/sp24/)</span>!</span>
+<span id="cb42-42"><a href="#cb42-42" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-43"><a href="#cb42-43" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;p</span> <span class="er">style</span><span class="ot">=</span><span class="st">"text-align:center"</span><span class="kw">&gt;</span></span>
+<span id="cb42-44"><a href="#cb42-44" aria-hidden="true" tabindex="-1"></a>    <span class="kw">&lt;img</span> <span class="er">src</span><span class="ot">=</span><span class="st">'images/data_storage.png'</span> <span class="er">width</span><span class="ot">=</span><span class="st">"700px"</span> <span class="kw">/&gt;</span></span>
+<span id="cb42-45"><a href="#cb42-45" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;/p&gt;</span></span>
+<span id="cb42-46"><a href="#cb42-46" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-47"><a href="#cb42-47" aria-hidden="true" tabindex="-1"></a>As you may have guessed, we can't use our usual <span class="in">`pandas`</span> methods to work with data in a database. Instead, we'll turn to Structured Query Language.</span>
+<span id="cb42-48"><a href="#cb42-48" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-49"><a href="#cb42-49" aria-hidden="true" tabindex="-1"></a><span class="fu">## Intro to SQL</span></span>
+<span id="cb42-50"><a href="#cb42-50" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-51"><a href="#cb42-51" aria-hidden="true" tabindex="-1"></a>**Structured Query Language**, or **SQL** (commonly pronounced "sequel," though this is the subject of [fierce debate](https://patorjk.com/blog/2012/01/26/pronouncing-sql-s-q-l-or-sequel/)), is a special programming language designed to communicate with databases, and it is the dominant language/technology for working with data. You may have encountered it in classes like CS 61A or Data C88C before, and you likely will encounter it in the future. It is a language of tables: all inputs and outputs are tables. Unlike Python, it is a **declarative programming language** – this means that rather than writing the exact logic needed to complete a task, a piece of SQL code "declares" what the desired final output should be and leaves the program to determine what logic should be implemented. This logic differs depending on the SQL code itself or on the system it's running on (ie. <span class="co">[</span><span class="ot">MongoDB</span><span class="co">](https://www.mongodb.com/)</span>, <span class="co">[</span><span class="ot">SQLite</span><span class="co">](https://www.sqlite.org/)</span>, <span class="co">[</span><span class="ot">DuckDB</span><span class="co">](https://duckdb.org/)</span>, etc.). Most systems don’t follow the standards, and every system you work with will be a little different. </span>
+<span id="cb42-52"><a href="#cb42-52" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-53"><a href="#cb42-53" aria-hidden="true" tabindex="-1"></a>For the purposes of Data 100, we use SQLite or DuckDB. SQLite is an easy-to-use library that allows users to directly manipulate a database file or an in-memory database with a simplified version of SQL. It's commonly used to store data for small apps on mobile devices and is optimized for simplicity and speed of simple data tasks. DuckDB is an easy-to-use library that lets you directly manipulate a database file, collection of table formatted files (e.g., CSV), or in-memory <span class="in">`pandas`</span> <span class="in">`DataFrame`</span>s using a more complete version of SQL. It's optimized for simplicity and speed of advanced data analysis tasks and is becoming increasingly popular for data analysis tasks on large datasets.</span>
+<span id="cb42-54"><a href="#cb42-54" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-55"><a href="#cb42-55" aria-hidden="true" tabindex="-1"></a>It is important to reiterate that SQL is an entirely different language from Python. However, Python *does* have special engines that allow us to run SQL code in a Jupyter notebook. While this is typically not how SQL is used outside of an educational setting, we will use this workflow to illustrate how SQL queries are constructed using the tools we've already worked with this semester. You will learn more about how to run SQL queries in Jupyter in Lab 10 and Homework 10.</span>
+<span id="cb42-56"><a href="#cb42-56" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-57"><a href="#cb42-57" aria-hidden="true" tabindex="-1"></a>The syntax below will seem unfamiliar to you; for now, just focus on understanding the output displayed. We will clarify the SQL code in a bit.</span>
+<span id="cb42-58"><a href="#cb42-58" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-59"><a href="#cb42-59" aria-hidden="true" tabindex="-1"></a>To start, we'll look at a database called <span class="in">`example_duck.db`</span> and connect to it using SQLite.</span>
+<span id="cb42-60"><a href="#cb42-60" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-63"><a href="#cb42-63" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb42-64"><a href="#cb42-64" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
+<span id="cb42-65"><a href="#cb42-65" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
+<span id="cb42-66"><a href="#cb42-66" aria-hidden="true" tabindex="-1"></a><span class="co"># Load the SQL Alchemy Python library and DuckDB</span></span>
+<span id="cb42-67"><a href="#cb42-67" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> sqlalchemy</span>
+<span id="cb42-68"><a href="#cb42-68" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> duckdb</span>
+<span id="cb42-69"><a href="#cb42-69" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb42-70"><a href="#cb42-70" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-73"><a href="#cb42-73" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb42-74"><a href="#cb42-74" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
+<span id="cb42-75"><a href="#cb42-75" aria-hidden="true" tabindex="-1"></a><span class="co"># Load %%sql cell magic</span></span>
+<span id="cb42-76"><a href="#cb42-76" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>load_ext sql</span>
+<span id="cb42-77"><a href="#cb42-77" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb42-78"><a href="#cb42-78" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-81"><a href="#cb42-81" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb42-82"><a href="#cb42-82" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
+<span id="cb42-83"><a href="#cb42-83" aria-hidden="true" tabindex="-1"></a><span class="co"># Connect to the database</span></span>
+<span id="cb42-84"><a href="#cb42-84" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>sql duckdb:<span class="op">///</span>data<span class="op">/</span>example_duck.db <span class="op">--</span>alias duck</span>
+<span id="cb42-85"><a href="#cb42-85" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb42-86"><a href="#cb42-86" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-87"><a href="#cb42-87" aria-hidden="true" tabindex="-1"></a>Now that we’re connected, let’s make some queries!</span>
+<span id="cb42-88"><a href="#cb42-88" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-91"><a href="#cb42-91" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb42-92"><a href="#cb42-92" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
+<span id="cb42-93"><a href="#cb42-93" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb42-94"><a href="#cb42-94" aria-hidden="true" tabindex="-1"></a>SELECT <span class="op">*</span> FROM Dragon<span class="op">;</span></span>
+<span id="cb42-95"><a href="#cb42-95" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb42-96"><a href="#cb42-96" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-97"><a href="#cb42-97" aria-hidden="true" tabindex="-1"></a>Thanks to the <span class="in">`pandas`</span> magic, the resulting return data is displayed in a format almost identical to our <span class="in">`pandas`</span> tables but without an index.</span>
+<span id="cb42-98"><a href="#cb42-98" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-99"><a href="#cb42-99" aria-hidden="true" tabindex="-1"></a><span class="fu">## Tables and Schema</span></span>
+<span id="cb42-100"><a href="#cb42-100" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;p</span> <span class="er">style</span><span class="ot">=</span><span class="st">"text-align:center"</span><span class="kw">&gt;</span></span>
+<span id="cb42-101"><a href="#cb42-101" aria-hidden="true" tabindex="-1"></a>    <span class="kw">&lt;img</span> <span class="er">src</span><span class="ot">=</span><span class="st">'images/sql_terminology.png'</span> <span class="er">width</span><span class="ot">=</span><span class="st">"700px"</span> <span class="kw">/&gt;</span></span>
+<span id="cb42-102"><a href="#cb42-102" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;/p&gt;</span></span>
+<span id="cb42-103"><a href="#cb42-103" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-104"><a href="#cb42-104" aria-hidden="true" tabindex="-1"></a>Looking at the <span class="in">`Dragon`</span> table above, we can see that it contains contains three columns. The first of these, <span class="in">`"name"`</span>, contains text data. The <span class="in">`"year"`</span> column contains integer data, with the constraint that year values must be greater than or equal to 2000. The final column, <span class="in">`"cute"`</span>, contains integer data with no restrictions on allowable values.</span>
+<span id="cb42-105"><a href="#cb42-105" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-106"><a href="#cb42-106" aria-hidden="true" tabindex="-1"></a>Now, let's look at the **schema** of our database. A schema describes the logical structure of a table. Whenever a new table is created, the creator must declare its schema.</span>
+<span id="cb42-107"><a href="#cb42-107" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-110"><a href="#cb42-110" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb42-111"><a href="#cb42-111" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
+<span id="cb42-112"><a href="#cb42-112" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb42-113"><a href="#cb42-113" aria-hidden="true" tabindex="-1"></a>SELECT <span class="op">*</span> </span>
+<span id="cb42-114"><a href="#cb42-114" aria-hidden="true" tabindex="-1"></a>FROM sqlite_master</span>
+<span id="cb42-115"><a href="#cb42-115" aria-hidden="true" tabindex="-1"></a>WHERE <span class="bu">type</span><span class="op">=</span><span class="st">'table'</span></span>
+<span id="cb42-116"><a href="#cb42-116" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb42-117"><a href="#cb42-117" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-118"><a href="#cb42-118" aria-hidden="true" tabindex="-1"></a>The summary above displays information about the database; it contains four tables named <span class="in">`sqlite_sequence`</span>, <span class="in">`Dragon`</span>, <span class="in">`Dish`</span>, and <span class="in">`Scene`</span>. The rightmost column above lists the command that was used to construct each table. </span>
+<span id="cb42-119"><a href="#cb42-119" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-120"><a href="#cb42-120" aria-hidden="true" tabindex="-1"></a>Let's look more closely at the command used to create the <span class="in">`Dragon`</span> table (the second entry above). </span>
+<span id="cb42-121"><a href="#cb42-121" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-122"><a href="#cb42-122" aria-hidden="true" tabindex="-1"></a><span class="in">    CREATE TABLE Dragon (name TEXT PRIMARY KEY,</span></span>
+<span id="cb42-123"><a href="#cb42-123" aria-hidden="true" tabindex="-1"></a><span class="in">                         year INTEGER CHECK (year &gt;= 2000),</span></span>
+<span id="cb42-124"><a href="#cb42-124" aria-hidden="true" tabindex="-1"></a><span class="in">                         cute INTEGER)</span></span>
+<span id="cb42-125"><a href="#cb42-125" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-126"><a href="#cb42-126" aria-hidden="true" tabindex="-1"></a>The statement <span class="in">`CREATE TABLE`</span> is used to specify the **schema** of the table – a description of what logic is used to organize the table. Schema follows a set format:</span>
+<span id="cb42-127"><a href="#cb42-127" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-128"><a href="#cb42-128" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span><span class="in">`ColName`</span>: the name of a column</span>
+<span id="cb42-129"><a href="#cb42-129" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span><span class="in">`DataType`</span>: the type of data to be stored in a column. Some of the most common SQL data types are:</span>
+<span id="cb42-130"><a href="#cb42-130" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-131"><a href="#cb42-131" aria-hidden="true" tabindex="-1"></a><span class="ss">  * </span><span class="in">`INT`</span> (integers)</span>
+<span id="cb42-132"><a href="#cb42-132" aria-hidden="true" tabindex="-1"></a><span class="ss">  * </span><span class="in">`FLOAT`</span> (floating point numbers)</span>
+<span id="cb42-133"><a href="#cb42-133" aria-hidden="true" tabindex="-1"></a><span class="ss">  * </span><span class="in">`TEXT`</span> (strings)</span>
+<span id="cb42-134"><a href="#cb42-134" aria-hidden="true" tabindex="-1"></a><span class="ss">  * </span><span class="in">`BLOB`</span> (arbitrary data, such as audio/video files)</span>
+<span id="cb42-135"><a href="#cb42-135" aria-hidden="true" tabindex="-1"></a><span class="ss">  * </span><span class="in">`DATETIME`</span> (a date and time)</span>
+<span id="cb42-136"><a href="#cb42-136" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span><span class="in">`Constraint`</span>: some restriction on the data to be stored in the column. Common constraints are:</span>
+<span id="cb42-137"><a href="#cb42-137" aria-hidden="true" tabindex="-1"></a><span class="ss">  * </span><span class="in">`CHECK`</span> (data must obey a certain condition)</span>
+<span id="cb42-138"><a href="#cb42-138" aria-hidden="true" tabindex="-1"></a><span class="ss">  * </span><span class="in">`PRIMARY KEY`</span> (designate a column as the table's primary key)</span>
+<span id="cb42-139"><a href="#cb42-139" aria-hidden="true" tabindex="-1"></a><span class="ss">  * </span><span class="in">`NOT NULL`</span> (data cannot be null)</span>
+<span id="cb42-140"><a href="#cb42-140" aria-hidden="true" tabindex="-1"></a><span class="ss">  * </span><span class="in">`DEFAULT`</span> (a default fill value if no specific entry is given)</span>
+<span id="cb42-141"><a href="#cb42-141" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-142"><a href="#cb42-142" aria-hidden="true" tabindex="-1"></a>Note that different implementations of SQL (e.g., <span class="co">[</span><span class="ot">DuckDB</span><span class="co">](https://duckdb.org/docs/sql/data_types/overview.html)</span>, <span class="co">[</span><span class="ot">SQLite</span><span class="co">](https://www.sqlite.org/datatype3.html)</span>, <span class="co">[</span><span class="ot">MySQL</span><span class="co">](https://dev.mysql.com/doc/refman/8.0/en/data-types.html)</span>) will support different types. In Data 100, we'll primarily use DuckDB.</span>
+<span id="cb42-143"><a href="#cb42-143" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-144"><a href="#cb42-144" aria-hidden="true" tabindex="-1"></a>Database tables (also referred to as **relations**) are structured much like `DataFrame`s in `pandas`. Each row, sometimes called a **tuple**, represents a single record in the dataset. Each column, sometimes called an **attribute** or **field**, describes some feature of the record. </span>
+<span id="cb42-145"><a href="#cb42-145" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-146"><a href="#cb42-146" aria-hidden="true" tabindex="-1"></a><span class="fu">### Primary Keys</span></span>
+<span id="cb42-147"><a href="#cb42-147" aria-hidden="true" tabindex="-1"></a>The **primary key** is a set of column(s) that uniquely identify each record in the table. In the <span class="in">`Dragon`</span> table, the <span class="in">`"name"`</span> column is its primary key that *uniquely* identifies each entry in the table. Because <span class="in">`"name"`</span> is the primary key of the table, no two entries in the table can have the same name – a given value of <span class="in">`"name"`</span> is unique to each dragon. Primary keys are used to ensure data integrity and to optimize data access.</span>
+<span id="cb42-148"><a href="#cb42-148" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-149"><a href="#cb42-149" aria-hidden="true" tabindex="-1"></a><span class="fu">### Foreign Keys</span></span>
+<span id="cb42-150"><a href="#cb42-150" aria-hidden="true" tabindex="-1"></a>A foreign key is a column or set of columns that references a *primary key in another table*. A foreign key constraint ensures that a primary key exists in the referenced table. For example, let's say we have 2 tables, <span class="in">`student`</span> and <span class="in">`assignment`</span>, with the following schemas: </span>
+<span id="cb42-151"><a href="#cb42-151" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-152"><a href="#cb42-152" aria-hidden="true" tabindex="-1"></a><span class="in">    CREATE TABLE student (</span></span>
+<span id="cb42-153"><a href="#cb42-153" aria-hidden="true" tabindex="-1"></a><span class="in">        student_id INTEGER PRIMARY KEY,</span></span>
+<span id="cb42-154"><a href="#cb42-154" aria-hidden="true" tabindex="-1"></a><span class="in">        name VARCHAR,</span></span>
+<span id="cb42-155"><a href="#cb42-155" aria-hidden="true" tabindex="-1"></a><span class="in">        email VARCHAR</span></span>
+<span id="cb42-156"><a href="#cb42-156" aria-hidden="true" tabindex="-1"></a><span class="in">    );</span></span>
+<span id="cb42-157"><a href="#cb42-157" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-158"><a href="#cb42-158" aria-hidden="true" tabindex="-1"></a><span class="in">    CREATE TABLE assignment (</span></span>
+<span id="cb42-159"><a href="#cb42-159" aria-hidden="true" tabindex="-1"></a><span class="in">        assignment_id INTEGER PRIMARY KEY,</span></span>
+<span id="cb42-160"><a href="#cb42-160" aria-hidden="true" tabindex="-1"></a><span class="in">        description VARCHAR</span></span>
+<span id="cb42-161"><a href="#cb42-161" aria-hidden="true" tabindex="-1"></a><span class="in">    );</span></span>
+<span id="cb42-162"><a href="#cb42-162" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-163"><a href="#cb42-163" aria-hidden="true" tabindex="-1"></a>Note that each table has a primary key that uniquely identifies each student and assignment.</span>
+<span id="cb42-164"><a href="#cb42-164" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-165"><a href="#cb42-165" aria-hidden="true" tabindex="-1"></a>Say we want to create the table <span class="in">`grade`</span> to store the score each student got on each assignment. Naturally, this will depend on the information in <span class="in">`student`</span> and <span class="in">`assignment`</span>; we should not be saving the grade for a nonexisistent student nor a nonexisistent assignment. Hence, we can create the columns <span class="in">`student_id`</span> and <span class="in">`assignment_id`</span> that reference foreign tables <span class="in">`student`</span> and <span class="in">`assignment`</span>, respectively. This way, we ensure that the data in <span class="in">`grade`</span> is always up-to-date with the other tables.  </span>
+<span id="cb42-166"><a href="#cb42-166" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-167"><a href="#cb42-167" aria-hidden="true" tabindex="-1"></a><span class="in">    CREATE TABLE grade (</span></span>
+<span id="cb42-168"><a href="#cb42-168" aria-hidden="true" tabindex="-1"></a><span class="in">        student_id INTEGER,</span></span>
+<span id="cb42-169"><a href="#cb42-169" aria-hidden="true" tabindex="-1"></a><span class="in">        assignment_id INTEGER,</span></span>
+<span id="cb42-170"><a href="#cb42-170" aria-hidden="true" tabindex="-1"></a><span class="in">        score REAL,</span></span>
+<span id="cb42-171"><a href="#cb42-171" aria-hidden="true" tabindex="-1"></a><span class="in">        FOREIGN KEY (student_id) REFERENCES student(student_id),</span></span>
+<span id="cb42-172"><a href="#cb42-172" aria-hidden="true" tabindex="-1"></a><span class="in">        FOREIGN KEY (assignment_id) REFERENCES assignment(assignment_id)</span></span>
+<span id="cb42-173"><a href="#cb42-173" aria-hidden="true" tabindex="-1"></a><span class="in">    );</span></span>
+<span id="cb42-174"><a href="#cb42-174" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-175"><a href="#cb42-175" aria-hidden="true" tabindex="-1"></a><span class="fu">## Basic Queries</span></span>
+<span id="cb42-176"><a href="#cb42-176" aria-hidden="true" tabindex="-1"></a>To extract and manipulate data stored in a SQL table, we will need to familiarize ourselves with the syntax to write pieces of SQL code, which we call **queries**. </span>
+<span id="cb42-177"><a href="#cb42-177" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-178"><a href="#cb42-178" aria-hidden="true" tabindex="-1"></a><span class="fu">### `SELECT`ing From Tables</span></span>
+<span id="cb42-179"><a href="#cb42-179" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-180"><a href="#cb42-180" aria-hidden="true" tabindex="-1"></a>The basic unit of a SQL query is the <span class="in">`SELECT`</span> statement. <span class="in">`SELECT`</span> specifies what columns we would like to extract from a given table. We use <span class="in">`FROM`</span> to tell SQL the table from which we want to <span class="in">`SELECT`</span> our data. </span>
+<span id="cb42-181"><a href="#cb42-181" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-184"><a href="#cb42-184" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb42-185"><a href="#cb42-185" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
+<span id="cb42-186"><a href="#cb42-186" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb42-187"><a href="#cb42-187" aria-hidden="true" tabindex="-1"></a>SELECT <span class="op">*</span></span>
+<span id="cb42-188"><a href="#cb42-188" aria-hidden="true" tabindex="-1"></a>FROM Dragon<span class="op">;</span></span>
+<span id="cb42-189"><a href="#cb42-189" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb42-190"><a href="#cb42-190" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-191"><a href="#cb42-191" aria-hidden="true" tabindex="-1"></a>In SQL, <span class="in">`*`</span> means "everything." The query above grabs *all* the columns in <span class="in">`Dragon`</span> and displays them in the outputted table. We can also specify a specific subset of columns to be <span class="in">`SELECT`</span>ed. Notice that the outputted columns appear in the order they were <span class="in">`SELECT`</span>ed.</span>
+<span id="cb42-192"><a href="#cb42-192" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-195"><a href="#cb42-195" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb42-196"><a href="#cb42-196" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
+<span id="cb42-197"><a href="#cb42-197" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb42-198"><a href="#cb42-198" aria-hidden="true" tabindex="-1"></a>SELECT cute, year</span>
+<span id="cb42-199"><a href="#cb42-199" aria-hidden="true" tabindex="-1"></a>FROM Dragon<span class="op">;</span></span>
+<span id="cb42-200"><a href="#cb42-200" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb42-201"><a href="#cb42-201" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-202"><a href="#cb42-202" aria-hidden="true" tabindex="-1"></a>**Every** SQL query must include both a <span class="in">`SELECT`</span> and <span class="in">`FROM`</span> statement. Intuitively, this makes sense —— we know that we'll want to extract some piece of information from the table; to do so, we also need to indicate what table we want to consider. </span>
+<span id="cb42-203"><a href="#cb42-203" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-204"><a href="#cb42-204" aria-hidden="true" tabindex="-1"></a>It is important to note that SQL enforces a strict "order of operations" —— SQL clauses must *always* follow the same sequence. For example, the <span class="in">`SELECT`</span> statement must always precede <span class="in">`FROM`</span>. This means that any SQL query will follow the same structure. </span>
+<span id="cb42-205"><a href="#cb42-205" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-206"><a href="#cb42-206" aria-hidden="true" tabindex="-1"></a><span class="in">    SELECT &lt;column list&gt;</span></span>
+<span id="cb42-207"><a href="#cb42-207" aria-hidden="true" tabindex="-1"></a><span class="in">    FROM &lt;table&gt;</span></span>
+<span id="cb42-208"><a href="#cb42-208" aria-hidden="true" tabindex="-1"></a><span class="in">    [additional clauses]</span></span>
+<span id="cb42-209"><a href="#cb42-209" aria-hidden="true" tabindex="-1"></a><span class="in">    </span></span>
+<span id="cb42-210"><a href="#cb42-210" aria-hidden="true" tabindex="-1"></a>The additional clauses we use depend on the specific task we're trying to achieve. We may refine our query to filter on a certain condition, aggregate a particular column, or join several tables together. We will spend the rest of this lecture outlining some useful clauses to build up our understanding of the order of operations.</span>
+<span id="cb42-211"><a href="#cb42-211" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-212"><a href="#cb42-212" aria-hidden="true" tabindex="-1"></a><span class="fu">#### SQL Style Conventions</span></span>
+<span id="cb42-213"><a href="#cb42-213" aria-hidden="true" tabindex="-1"></a>And just like that, we've already written two SQL queries. There are a few things to note in the queries above. Firstly, notice that every "verb" is written in uppercase. It is convention to write SQL operations in capital letters, but your code will run just fine even if you choose to keep things in lowercase. Second, the query above separates each statement with a new line. SQL queries are not impacted by whitespace within the query; this means that SQL code is typically written with a new line after each statement to make things more readable. The semicolon (<span class="in">`;`</span>) indicates the end of a query. There are some "flavors" of SQL in which a query will not run if no semicolon is present; however, in Data 100, the SQL version we will use works with or without an ending semicolon. Queries in these notes will end with semicolons to build up good habits.</span>
+<span id="cb42-214"><a href="#cb42-214" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-215"><a href="#cb42-215" aria-hidden="true" tabindex="-1"></a><span class="fu">#### Aliasing with `AS`</span></span>
+<span id="cb42-216"><a href="#cb42-216" aria-hidden="true" tabindex="-1"></a>The <span class="in">`AS`</span> keyword allows us to give a column a new name (called an **alias**) after it has been <span class="in">`SELECT`</span>ed. The general syntax is:</span>
+<span id="cb42-217"><a href="#cb42-217" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-218"><a href="#cb42-218" aria-hidden="true" tabindex="-1"></a><span class="in">    SELECT column_in_input_table AS new_name_in_output_table</span></span>
+<span id="cb42-219"><a href="#cb42-219" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-222"><a href="#cb42-222" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb42-223"><a href="#cb42-223" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
+<span id="cb42-224"><a href="#cb42-224" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb42-225"><a href="#cb42-225" aria-hidden="true" tabindex="-1"></a>SELECT cute AS cuteness, year AS birth</span>
+<span id="cb42-226"><a href="#cb42-226" aria-hidden="true" tabindex="-1"></a>FROM Dragon<span class="op">;</span></span>
+<span id="cb42-227"><a href="#cb42-227" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb42-228"><a href="#cb42-228" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-229"><a href="#cb42-229" aria-hidden="true" tabindex="-1"></a><span class="fu">#### Uniqueness with `DISTINCT`</span></span>
+<span id="cb42-230"><a href="#cb42-230" aria-hidden="true" tabindex="-1"></a>To <span class="in">`SELECT`</span> only the *unique* values in a column, we use the <span class="in">`DISTINCT`</span> keyword. This will cause any any duplicate entries in a column to be removed. If we want to find only the unique years in <span class="in">`Dragon`</span>, without any repeats, we would write:</span>
+<span id="cb42-231"><a href="#cb42-231" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-234"><a href="#cb42-234" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb42-235"><a href="#cb42-235" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
+<span id="cb42-236"><a href="#cb42-236" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb42-237"><a href="#cb42-237" aria-hidden="true" tabindex="-1"></a>SELECT DISTINCT year</span>
+<span id="cb42-238"><a href="#cb42-238" aria-hidden="true" tabindex="-1"></a>FROM Dragon<span class="op">;</span></span>
+<span id="cb42-239"><a href="#cb42-239" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb42-240"><a href="#cb42-240" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-241"><a href="#cb42-241" aria-hidden="true" tabindex="-1"></a><span class="fu">### Applying `WHERE` Conditions</span></span>
+<span id="cb42-242"><a href="#cb42-242" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-243"><a href="#cb42-243" aria-hidden="true" tabindex="-1"></a>The <span class="in">`WHERE`</span> keyword is used to select only some rows of a table, filtered on a given Boolean condition. </span>
+<span id="cb42-244"><a href="#cb42-244" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-247"><a href="#cb42-247" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb42-248"><a href="#cb42-248" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
+<span id="cb42-249"><a href="#cb42-249" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb42-250"><a href="#cb42-250" aria-hidden="true" tabindex="-1"></a>SELECT name, year</span>
+<span id="cb42-251"><a href="#cb42-251" aria-hidden="true" tabindex="-1"></a>FROM Dragon</span>
+<span id="cb42-252"><a href="#cb42-252" aria-hidden="true" tabindex="-1"></a>WHERE cute <span class="op">&gt;</span> <span class="dv">0</span><span class="op">;</span></span>
+<span id="cb42-253"><a href="#cb42-253" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb42-254"><a href="#cb42-254" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-255"><a href="#cb42-255" aria-hidden="true" tabindex="-1"></a>We can add complexity to the <span class="in">`WHERE`</span> condition using the keywords <span class="in">`AND`</span>, <span class="in">`OR`</span>, and <span class="in">`NOT`</span>, much like we would in Python.</span>
+<span id="cb42-256"><a href="#cb42-256" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-259"><a href="#cb42-259" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb42-260"><a href="#cb42-260" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
+<span id="cb42-261"><a href="#cb42-261" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb42-262"><a href="#cb42-262" aria-hidden="true" tabindex="-1"></a>SELECT name, year</span>
+<span id="cb42-263"><a href="#cb42-263" aria-hidden="true" tabindex="-1"></a>FROM Dragon</span>
+<span id="cb42-264"><a href="#cb42-264" aria-hidden="true" tabindex="-1"></a>WHERE cute <span class="op">&gt;</span> <span class="dv">0</span> OR year <span class="op">&gt;</span> <span class="dv">2013</span><span class="op">;</span></span>
+<span id="cb42-265"><a href="#cb42-265" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb42-266"><a href="#cb42-266" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-267"><a href="#cb42-267" aria-hidden="true" tabindex="-1"></a>To spare ourselves needing to write complicated logical expressions by combining several conditions, we can also filter for entries that are <span class="in">`IN`</span> a specified list of values. This is similar to the use of <span class="in">`in`</span> or <span class="in">`.isin`</span> in Python.</span>
+<span id="cb42-268"><a href="#cb42-268" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-271"><a href="#cb42-271" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb42-272"><a href="#cb42-272" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
+<span id="cb42-273"><a href="#cb42-273" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb42-274"><a href="#cb42-274" aria-hidden="true" tabindex="-1"></a>SELECT name, year</span>
+<span id="cb42-275"><a href="#cb42-275" aria-hidden="true" tabindex="-1"></a>FROM Dragon</span>
+<span id="cb42-276"><a href="#cb42-276" aria-hidden="true" tabindex="-1"></a>WHERE name IN (<span class="st">'hiccup'</span>, <span class="st">'puff'</span>)<span class="op">;</span></span>
+<span id="cb42-277"><a href="#cb42-277" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb42-278"><a href="#cb42-278" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-279"><a href="#cb42-279" aria-hidden="true" tabindex="-1"></a><span class="fu">#### Strings in SQL</span></span>
+<span id="cb42-280"><a href="#cb42-280" aria-hidden="true" tabindex="-1"></a>In <span class="in">`Python`</span>, there is no distinction between double <span class="in">`""`</span> and single quotes <span class="in">`''`</span>. SQL, on the other hand, distinguishes double quotes <span class="in">`""`</span> as *column names* and single quotes `''` as *strings*. For example, we can make the call</span>
+<span id="cb42-281"><a href="#cb42-281" aria-hidden="true" tabindex="-1"></a>  </span>
+<span id="cb42-282"><a href="#cb42-282" aria-hidden="true" tabindex="-1"></a><span class="in">    SELECT "birth weight"</span></span>
+<span id="cb42-283"><a href="#cb42-283" aria-hidden="true" tabindex="-1"></a><span class="in">    FROM patient</span></span>
+<span id="cb42-284"><a href="#cb42-284" aria-hidden="true" tabindex="-1"></a><span class="in">    WHERE "first name" = 'Joey'</span></span>
+<span id="cb42-285"><a href="#cb42-285" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-286"><a href="#cb42-286" aria-hidden="true" tabindex="-1"></a>to select the column <span class="in">`"birth weight"`</span> from the <span class="in">`patient`</span> table and only select rows where the column <span class="in">`"first name"`</span> is equal to <span class="in">`'Joey'`</span>.</span>
+<span id="cb42-287"><a href="#cb42-287" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-288"><a href="#cb42-288" aria-hidden="true" tabindex="-1"></a><span class="fu">#### `WHERE` WITH `NULL` Values</span></span>
+<span id="cb42-289"><a href="#cb42-289" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-290"><a href="#cb42-290" aria-hidden="true" tabindex="-1"></a>You may have noticed earlier that our table actually has a missing value. In SQL, missing data is given the special value <span class="in">`NULL`</span>. <span class="in">`NULL`</span> behaves in a fundamentally different way to other data types. We can't use the typical operators (=, &gt;, and &lt;) on <span class="in">`NULL`</span> values (in fact, <span class="in">`NULL == NULL`</span> returns <span class="in">`False`</span>!). Instead, we check to see if a value <span class="in">`IS`</span> or <span class="in">`IS NOT`</span> <span class="in">`NULL`</span>.</span>
+<span id="cb42-291"><a href="#cb42-291" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-294"><a href="#cb42-294" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb42-295"><a href="#cb42-295" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
+<span id="cb42-296"><a href="#cb42-296" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb42-297"><a href="#cb42-297" aria-hidden="true" tabindex="-1"></a>SELECT name, cute</span>
+<span id="cb42-298"><a href="#cb42-298" aria-hidden="true" tabindex="-1"></a>FROM Dragon</span>
+<span id="cb42-299"><a href="#cb42-299" aria-hidden="true" tabindex="-1"></a>WHERE cute IS NOT NULL<span class="op">;</span></span>
+<span id="cb42-300"><a href="#cb42-300" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb42-301"><a href="#cb42-301" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-302"><a href="#cb42-302" aria-hidden="true" tabindex="-1"></a><span class="fu">### Sorting and Restricting Output</span></span>
+<span id="cb42-303"><a href="#cb42-303" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-304"><a href="#cb42-304" aria-hidden="true" tabindex="-1"></a><span class="fu">#### Sorting with  `ORDER BY`</span></span>
+<span id="cb42-305"><a href="#cb42-305" aria-hidden="true" tabindex="-1"></a>What if we want the output table to appear in a certain order? The <span class="in">`ORDER BY`</span> keyword behaves similarly to <span class="in">`.sort_values()`</span> in <span class="in">`pandas`</span>. </span>
+<span id="cb42-306"><a href="#cb42-306" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-309"><a href="#cb42-309" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb42-310"><a href="#cb42-310" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
+<span id="cb42-311"><a href="#cb42-311" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb42-312"><a href="#cb42-312" aria-hidden="true" tabindex="-1"></a>SELECT <span class="op">*</span></span>
+<span id="cb42-313"><a href="#cb42-313" aria-hidden="true" tabindex="-1"></a>FROM Dragon</span>
+<span id="cb42-314"><a href="#cb42-314" aria-hidden="true" tabindex="-1"></a>ORDER BY cute<span class="op">;</span></span>
+<span id="cb42-315"><a href="#cb42-315" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb42-316"><a href="#cb42-316" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-317"><a href="#cb42-317" aria-hidden="true" tabindex="-1"></a>By default, <span class="in">`ORDER BY`</span> will display results in ascending order (<span class="in">`ASC`</span>) with the lowest values at the top of the table. To sort in descending order, we use the <span class="in">`DESC`</span> keyword after specifying the column to be used for ordering.</span>
+<span id="cb42-318"><a href="#cb42-318" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-321"><a href="#cb42-321" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb42-322"><a href="#cb42-322" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
+<span id="cb42-323"><a href="#cb42-323" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb42-324"><a href="#cb42-324" aria-hidden="true" tabindex="-1"></a>SELECT <span class="op">*</span></span>
+<span id="cb42-325"><a href="#cb42-325" aria-hidden="true" tabindex="-1"></a>FROM Dragon</span>
+<span id="cb42-326"><a href="#cb42-326" aria-hidden="true" tabindex="-1"></a>ORDER BY cute DESC<span class="op">;</span></span>
+<span id="cb42-327"><a href="#cb42-327" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb42-328"><a href="#cb42-328" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-329"><a href="#cb42-329" aria-hidden="true" tabindex="-1"></a>We can also tell SQL to <span class="in">`ORDER BY`</span> two columns at once. This will sort the table by the first listed column, then use the values in the second listed column to break any ties.</span>
+<span id="cb42-330"><a href="#cb42-330" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-333"><a href="#cb42-333" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb42-334"><a href="#cb42-334" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
+<span id="cb42-335"><a href="#cb42-335" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb42-336"><a href="#cb42-336" aria-hidden="true" tabindex="-1"></a>SELECT <span class="op">*</span></span>
+<span id="cb42-337"><a href="#cb42-337" aria-hidden="true" tabindex="-1"></a>FROM Dragon</span>
+<span id="cb42-338"><a href="#cb42-338" aria-hidden="true" tabindex="-1"></a>ORDER BY year, cute DESC<span class="op">;</span></span>
+<span id="cb42-339"><a href="#cb42-339" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb42-340"><a href="#cb42-340" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-341"><a href="#cb42-341" aria-hidden="true" tabindex="-1"></a>Note that in this example, <span class="in">`year`</span> is sorted in ascending order and <span class="in">`cute`</span> in descending order. If you want <span class="in">`year`</span> to be ordered in descending order as well, you need to specify <span class="in">`year DESC, cute DESC;`</span>.</span>
+<span id="cb42-342"><a href="#cb42-342" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-343"><a href="#cb42-343" aria-hidden="true" tabindex="-1"></a><span class="fu">#### `LIMIT` vs. `OFFSET`</span></span>
+<span id="cb42-344"><a href="#cb42-344" aria-hidden="true" tabindex="-1"></a>In many instances, we are only concerned with a certain number of rows in the output table (for example, wanting to find the first two dragons in the table). The <span class="in">`LIMIT`</span> keyword restricts the output to a specified number of rows. It serves a function similar to that of <span class="in">`.head()`</span> in <span class="in">`pandas`</span>.</span>
+<span id="cb42-345"><a href="#cb42-345" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-348"><a href="#cb42-348" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb42-349"><a href="#cb42-349" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
+<span id="cb42-350"><a href="#cb42-350" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb42-351"><a href="#cb42-351" aria-hidden="true" tabindex="-1"></a>SELECT <span class="op">*</span></span>
+<span id="cb42-352"><a href="#cb42-352" aria-hidden="true" tabindex="-1"></a>FROM Dragon</span>
+<span id="cb42-353"><a href="#cb42-353" aria-hidden="true" tabindex="-1"></a>LIMIT <span class="dv">2</span><span class="op">;</span></span>
+<span id="cb42-354"><a href="#cb42-354" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb42-355"><a href="#cb42-355" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-356"><a href="#cb42-356" aria-hidden="true" tabindex="-1"></a>The <span class="in">`OFFSET`</span> keyword indicates the index at which <span class="in">`LIMIT`</span> should start. In other words, we can use <span class="in">`OFFSET`</span> to shift where the <span class="in">`LIMIT`</span>ing begins by a specified number of rows. For example, we might care about the dragons that are at positions 2 and 3 in the table. </span>
+<span id="cb42-357"><a href="#cb42-357" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-360"><a href="#cb42-360" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb42-361"><a href="#cb42-361" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
+<span id="cb42-362"><a href="#cb42-362" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb42-363"><a href="#cb42-363" aria-hidden="true" tabindex="-1"></a>SELECT <span class="op">*</span></span>
+<span id="cb42-364"><a href="#cb42-364" aria-hidden="true" tabindex="-1"></a>FROM Dragon</span>
+<span id="cb42-365"><a href="#cb42-365" aria-hidden="true" tabindex="-1"></a>LIMIT <span class="dv">2</span></span>
+<span id="cb42-366"><a href="#cb42-366" aria-hidden="true" tabindex="-1"></a>OFFSET <span class="dv">1</span><span class="op">;</span></span>
+<span id="cb42-367"><a href="#cb42-367" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb42-368"><a href="#cb42-368" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-369"><a href="#cb42-369" aria-hidden="true" tabindex="-1"></a>With these keywords in hand, let's update our SQL order of operations. Remember: *every* SQL query must list clauses in this order. </span>
+<span id="cb42-370"><a href="#cb42-370" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-371"><a href="#cb42-371" aria-hidden="true" tabindex="-1"></a><span class="in">    SELECT &lt;column expression list&gt;</span></span>
+<span id="cb42-372"><a href="#cb42-372" aria-hidden="true" tabindex="-1"></a><span class="in">    FROM &lt;table&gt;</span></span>
+<span id="cb42-373"><a href="#cb42-373" aria-hidden="true" tabindex="-1"></a><span class="in">    [WHERE &lt;predicate&gt;]</span></span>
+<span id="cb42-374"><a href="#cb42-374" aria-hidden="true" tabindex="-1"></a><span class="in">    [ORDER BY &lt;column list&gt;]</span></span>
+<span id="cb42-375"><a href="#cb42-375" aria-hidden="true" tabindex="-1"></a><span class="in">    [LIMIT &lt;number of rows&gt;]</span></span>
+<span id="cb42-376"><a href="#cb42-376" aria-hidden="true" tabindex="-1"></a><span class="in">    [OFFSET &lt;number of rows&gt;];</span></span>
+<span id="cb42-377"><a href="#cb42-377" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-378"><a href="#cb42-378" aria-hidden="true" tabindex="-1"></a><span class="fu">## Summary</span></span>
+<span id="cb42-379"><a href="#cb42-379" aria-hidden="true" tabindex="-1"></a>Let's summarize what we've learned so far. We know that <span class="in">`SELECT`</span> and <span class="in">`FROM`</span> are the fundamental building blocks of any SQL query. We can augment these two keywords with additional clauses to refine the data in our output table. </span>
+<span id="cb42-380"><a href="#cb42-380" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-381"><a href="#cb42-381" aria-hidden="true" tabindex="-1"></a>Any clauses that we include must follow a strict ordering within the query:</span>
+<span id="cb42-382"><a href="#cb42-382" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-383"><a href="#cb42-383" aria-hidden="true" tabindex="-1"></a><span class="in">    SELECT &lt;column list&gt;</span></span>
+<span id="cb42-384"><a href="#cb42-384" aria-hidden="true" tabindex="-1"></a><span class="in">    FROM &lt;table&gt;</span></span>
+<span id="cb42-385"><a href="#cb42-385" aria-hidden="true" tabindex="-1"></a><span class="in">    [WHERE &lt;predicate&gt;]</span></span>
+<span id="cb42-386"><a href="#cb42-386" aria-hidden="true" tabindex="-1"></a><span class="in">    [ORDER BY &lt;column list&gt;]</span></span>
+<span id="cb42-387"><a href="#cb42-387" aria-hidden="true" tabindex="-1"></a><span class="in">    [LIMIT &lt;number of rows&gt;]</span></span>
+<span id="cb42-388"><a href="#cb42-388" aria-hidden="true" tabindex="-1"></a><span class="in">    [OFFSET &lt;number of rows&gt;]</span></span>
+<span id="cb42-389"><a href="#cb42-389" aria-hidden="true" tabindex="-1"></a><span class="in">    </span></span>
+<span id="cb42-390"><a href="#cb42-390" aria-hidden="true" tabindex="-1"></a>Here, any clause contained in square brackets <span class="in">`[ ]`</span> is optional —— we only need to use the keyword if it is relevant to the table operation we want to perform. Also note that by convention, we use all caps for keywords in SQL statements and use newlines to make code more readable.</span>
 </code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div></div></div></div></div>
 </div> <!-- /content -->
diff --git a/docs/sql_II/images/cats.png b/docs/sql_II/images/cats.png
new file mode 100644
index 000000000..090796220
Binary files /dev/null and b/docs/sql_II/images/cats.png differ
diff --git a/docs/sql_II/images/cross.png b/docs/sql_II/images/cross.png
new file mode 100644
index 000000000..421a0668f
Binary files /dev/null and b/docs/sql_II/images/cross.png differ
diff --git a/docs/sql_II/images/full.png b/docs/sql_II/images/full.png
new file mode 100644
index 000000000..84eb20fef
Binary files /dev/null and b/docs/sql_II/images/full.png differ
diff --git a/docs/sql_II/images/inner.png b/docs/sql_II/images/inner.png
new file mode 100644
index 000000000..ce9830378
Binary files /dev/null and b/docs/sql_II/images/inner.png differ
diff --git a/docs/sql_II/images/left.png b/docs/sql_II/images/left.png
new file mode 100644
index 000000000..43482170b
Binary files /dev/null and b/docs/sql_II/images/left.png differ
diff --git a/docs/sql_II/images/multidimensional.png b/docs/sql_II/images/multidimensional.png
new file mode 100644
index 000000000..f3e2582fb
Binary files /dev/null and b/docs/sql_II/images/multidimensional.png differ
diff --git a/docs/sql_II/images/right.png b/docs/sql_II/images/right.png
new file mode 100644
index 000000000..53baaeaaa
Binary files /dev/null and b/docs/sql_II/images/right.png differ
diff --git a/docs/sql_II/images/star.png b/docs/sql_II/images/star.png
new file mode 100644
index 000000000..bc9643a26
Binary files /dev/null and b/docs/sql_II/images/star.png differ
diff --git a/docs/sql_II/sql_II.html b/docs/sql_II/sql_II.html
new file mode 100644
index 000000000..30e7c2ee1
--- /dev/null
+++ b/docs/sql_II/sql_II.html
@@ -0,0 +1,1676 @@
+<!DOCTYPE html>
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
+
+<meta charset="utf-8">
+<meta name="generator" content="quarto-1.3.450">
+
+<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
+
+
+<title>Principles and Techniques of Data Science - 21&nbsp; SQL II</title>
+<style>
+code{white-space: pre-wrap;}
+span.smallcaps{font-variant: small-caps;}
+div.columns{display: flex; gap: min(4vw, 1.5em);}
+div.column{flex: auto; overflow-x: auto;}
+div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
+ul.task-list{list-style: none;}
+ul.task-list li input[type="checkbox"] {
+  width: 0.8em;
+  margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */ 
+  vertical-align: middle;
+}
+/* CSS for syntax highlighting */
+pre > code.sourceCode { white-space: pre; position: relative; }
+pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
+pre > code.sourceCode > span:empty { height: 1.2em; }
+.sourceCode { overflow: visible; }
+code.sourceCode > span { color: inherit; text-decoration: inherit; }
+div.sourceCode { margin: 1em 0; }
+pre.sourceCode { margin: 0; }
+@media screen {
+div.sourceCode { overflow: auto; }
+}
+@media print {
+pre > code.sourceCode { white-space: pre-wrap; }
+pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
+}
+pre.numberSource code
+  { counter-reset: source-line 0; }
+pre.numberSource code > span
+  { position: relative; left: -4em; counter-increment: source-line; }
+pre.numberSource code > span > a:first-child::before
+  { content: counter(source-line);
+    position: relative; left: -1em; text-align: right; vertical-align: baseline;
+    border: none; display: inline-block;
+    -webkit-touch-callout: none; -webkit-user-select: none;
+    -khtml-user-select: none; -moz-user-select: none;
+    -ms-user-select: none; user-select: none;
+    padding: 0 4px; width: 4em;
+  }
+pre.numberSource { margin-left: 3em;  padding-left: 4px; }
+div.sourceCode
+  {   }
+@media screen {
+pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
+}
+</style>
+
+
+<script src="../site_libs/quarto-nav/quarto-nav.js"></script>
+<script src="../site_libs/quarto-nav/headroom.min.js"></script>
+<script src="../site_libs/clipboard/clipboard.min.js"></script>
+<script src="../site_libs/quarto-search/autocomplete.umd.js"></script>
+<script src="../site_libs/quarto-search/fuse.min.js"></script>
+<script src="../site_libs/quarto-search/quarto-search.js"></script>
+<meta name="quarto:offset" content="../">
+<link href="../sql_I/sql_I.html" rel="prev">
+<link href="../data100_logo.png" rel="icon" type="image/png">
+<script src="../site_libs/quarto-html/quarto.js"></script>
+<script src="../site_libs/quarto-html/popper.min.js"></script>
+<script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
+<script src="../site_libs/quarto-html/anchor.min.js"></script>
+<link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
+<link href="../site_libs/quarto-html/quarto-syntax-highlighting.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<script src="../site_libs/bootstrap/bootstrap.min.js"></script>
+<link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
+<link href="../site_libs/bootstrap/bootstrap.min.css" rel="stylesheet" id="quarto-bootstrap" data-mode="light">
+<script id="quarto-search-options" type="application/json">{
+  "location": "sidebar",
+  "copy-button": false,
+  "collapse-after": 3,
+  "panel-placement": "start",
+  "type": "textbox",
+  "limit": 20,
+  "language": {
+    "search-no-results-text": "No results",
+    "search-matching-documents-text": "matching documents",
+    "search-copy-link-title": "Copy link to search",
+    "search-hide-matches-text": "Hide additional matches",
+    "search-more-match-text": "more match in this document",
+    "search-more-matches-text": "more matches in this document",
+    "search-clear-button-title": "Clear",
+    "search-detached-cancel-button-title": "Cancel",
+    "search-submit-button-title": "Submit",
+    "search-label": "Search"
+  }
+}</script>
+<script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" integrity="sha512-c3Nl8+7g4LMSTdrm621y7kf9v3SDPnhxLNhcjFJbKECVnmZHTdo+IRO05sNLTH/D3vA6u1X32ehoLC7WFVdheg==" crossorigin="anonymous"></script>
+<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.5.1/jquery.min.js" integrity="sha512-bLT0Qm9VnAYZDflyKcBaQ2gg0hSYNQrJ8RilYldYQ1FxQYoCLtUjuuRuZo+fjqhx/qtq/1itJ0C2ejDxltZVFg==" crossorigin="anonymous"></script>
+<script type="application/javascript">define('jquery', [],function() {return window.jQuery;})</script>
+
+
+</head>
+
+<body class="nav-sidebar floating">
+
+<div id="quarto-search-results"></div>
+  <header id="quarto-header" class="headroom fixed-top">
+  <nav class="quarto-secondary-nav">
+    <div class="container-fluid d-flex">
+      <button type="button" class="quarto-btn-toggle btn" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar,#quarto-sidebar-glass" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
+        <i class="bi bi-layout-text-sidebar-reverse"></i>
+      </button>
+      <nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="../sql_II/sql_II.html"><span class="chapter-number">21</span>&nbsp; <span class="chapter-title">SQL II</span></a></li></ol></nav>
+      <a class="flex-grow-1" role="button" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar,#quarto-sidebar-glass" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">      
+      </a>
+      <button type="button" class="btn quarto-search-button" aria-label="" onclick="window.quartoOpenSearch();">
+        <i class="bi bi-search"></i>
+      </button>
+    </div>
+  </nav>
+</header>
+<!-- content -->
+<div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-full">
+<!-- sidebar -->
+  <nav id="quarto-sidebar" class="sidebar collapse collapse-horizontal sidebar-navigation floating overflow-auto">
+    <div class="pt-lg-2 mt-2 text-left sidebar-header sidebar-header-stacked">
+      <a href="../index.html" class="sidebar-logo-link">
+      <img src="../data100_logo.png" alt="" class="sidebar-logo py-0 d-lg-inline d-none">
+      </a>
+    <div class="sidebar-title mb-0 py-0">
+      <a href="../">Principles and Techniques of Data Science</a> 
+        <div class="sidebar-tools-main">
+    <a href="https://github.com/DS-100/course-notes" rel="" title="Source Code" class="quarto-navigation-tool px-1" aria-label="Source Code"><i class="bi bi-github"></i></a>
+</div>
+    </div>
+      </div>
+        <div class="mt-2 flex-shrink-0 align-items-center">
+        <div class="sidebar-search">
+        <div id="quarto-search" class="" title="Search"></div>
+        </div>
+        </div>
+    <div class="sidebar-menu-container"> 
+    <ul class="list-unstyled mt-1">
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../index.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text">Welcome</span></a>
+  </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../intro_lec/introduction.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">1</span>&nbsp; <span class="chapter-title">Introduction</span></span></a>
+  </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../pandas_1/pandas_1.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">2</span>&nbsp; <span class="chapter-title">Pandas I</span></span></a>
+  </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../pandas_2/pandas_2.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">3</span>&nbsp; <span class="chapter-title">Pandas II</span></span></a>
+  </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../pandas_3/pandas_3.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">4</span>&nbsp; <span class="chapter-title">Pandas III</span></span></a>
+  </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../eda/eda.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">5</span>&nbsp; <span class="chapter-title">Data Cleaning and EDA</span></span></a>
+  </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../regex/regex.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">6</span>&nbsp; <span class="chapter-title">Regular Expressions</span></span></a>
+  </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../visualization_1/visualization_1.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">7</span>&nbsp; <span class="chapter-title">Visualization I</span></span></a>
+  </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../visualization_2/visualization_2.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">8</span>&nbsp; <span class="chapter-title">Visualization II</span></span></a>
+  </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../sampling/sampling.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">9</span>&nbsp; <span class="chapter-title">Sampling</span></span></a>
+  </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../intro_to_modeling/intro_to_modeling.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">10</span>&nbsp; <span class="chapter-title">Introduction to Modeling</span></span></a>
+  </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../constant_model_loss_transformations/loss_transformations.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">11</span>&nbsp; <span class="chapter-title">Constant Model, Loss, and Transformations</span></span></a>
+  </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../ols/ols.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">12</span>&nbsp; <span class="chapter-title">Ordinary Least Squares</span></span></a>
+  </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../gradient_descent/gradient_descent.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">13</span>&nbsp; <span class="chapter-title">sklearn and Gradient Descent</span></span></a>
+  </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../feature_engineering/feature_engineering.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">14</span>&nbsp; <span class="chapter-title">Feature Engineering</span></span></a>
+  </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../case_study_HCE/case_study_HCE.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">15</span>&nbsp; <span class="chapter-title">Case Study in Human Contexts and Ethics</span></span></a>
+  </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../cv_regularization/cv_reg.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">16</span>&nbsp; <span class="chapter-title">Cross Validation and Regularization</span></span></a>
+  </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../probability_1/probability_1.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">17</span>&nbsp; <span class="chapter-title">Random Variables</span></span></a>
+  </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../probability_2/probability_2.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">18</span>&nbsp; <span class="chapter-title">Estimators, Bias, and Variance</span></span></a>
+  </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../inference_causality/inference_causality.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">19</span>&nbsp; <span class="chapter-title">Causal Inference and Confounding</span></span></a>
+  </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../sql_I/sql_I.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">20</span>&nbsp; <span class="chapter-title">SQL I</span></span></a>
+  </div>
+</li>
+        <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="../sql_II/sql_II.html" class="sidebar-item-text sidebar-link active">
+ <span class="menu-text"><span class="chapter-number">21</span>&nbsp; <span class="chapter-title">SQL II</span></span></a>
+  </div>
+</li>
+    </ul>
+    </div>
+</nav>
+<div id="quarto-sidebar-glass" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar,#quarto-sidebar-glass"></div>
+<!-- margin-sidebar -->
+    <div id="quarto-margin-sidebar" class="sidebar margin-sidebar">
+        <nav id="TOC" role="doc-toc" class="toc-active">
+    <h2 id="toc-title">SQL II</h2>
+   
+  <ul>
+  <li><a href="#aggregating-with-group-by" id="toc-aggregating-with-group-by" class="nav-link active" data-scroll-target="#aggregating-with-group-by"><span class="header-section-number">21.1</span> Aggregating with <code>GROUP BY</code></a></li>
+  <li><a href="#filtering-groups" id="toc-filtering-groups" class="nav-link" data-scroll-target="#filtering-groups"><span class="header-section-number">21.2</span> Filtering Groups</a></li>
+  <li><a href="#summary-sql" id="toc-summary-sql" class="nav-link" data-scroll-target="#summary-sql"><span class="header-section-number">21.3</span> Summary: SQL</a></li>
+  <li><a href="#eda-in-sql" id="toc-eda-in-sql" class="nav-link" data-scroll-target="#eda-in-sql"><span class="header-section-number">21.4</span> EDA in SQL</a>
+  <ul>
+  <li><a href="#matching-text-using-like" id="toc-matching-text-using-like" class="nav-link" data-scroll-target="#matching-text-using-like"><span class="header-section-number">21.4.1</span> Matching Text using <code>LIKE</code></a></li>
+  <li><a href="#casting-data-types" id="toc-casting-data-types" class="nav-link" data-scroll-target="#casting-data-types"><span class="header-section-number">21.4.2</span> <code>CAST</code>ing Data Types</a></li>
+  <li><a href="#using-conditional-statements-with-case" id="toc-using-conditional-statements-with-case" class="nav-link" data-scroll-target="#using-conditional-statements-with-case"><span class="header-section-number">21.4.3</span> Using Conditional Statements with <code>CASE</code></a></li>
+  </ul></li>
+  <li><a href="#joining-tables" id="toc-joining-tables" class="nav-link" data-scroll-target="#joining-tables"><span class="header-section-number">21.5</span> <code>JOIN</code>ing Tables</a>
+  <ul>
+  <li><a href="#aliasing-in-joins" id="toc-aliasing-in-joins" class="nav-link" data-scroll-target="#aliasing-in-joins"><span class="header-section-number">21.5.1</span> Aliasing in <code>JOIN</code>s</a></li>
+  <li><a href="#common-table-expressions" id="toc-common-table-expressions" class="nav-link" data-scroll-target="#common-table-expressions"><span class="header-section-number">21.5.2</span> Common Table Expressions</a></li>
+  </ul></li>
+  </ul>
+</nav>
+    </div>
+<!-- main -->
+<main class="content column-body" id="quarto-document-content">
+
+<header id="title-block-header" class="quarto-title-block default">
+<div class="quarto-title">
+<div class="quarto-title-block"><div><h1 class="title"><span class="chapter-number">21</span>&nbsp; <span class="chapter-title">SQL II</span></h1><button type="button" class="btn code-tools-button dropdown-toggle" id="quarto-code-tools-menu" data-bs-toggle="dropdown" aria-expanded="false"><i class="bi"></i> Code</button><ul class="dropdown-menu dropdown-menu-end" aria-labelelledby="quarto-code-tools-menu"><li><a id="quarto-show-all-code" class="dropdown-item" href="javascript:void(0)" role="button">Show All Code</a></li><li><a id="quarto-hide-all-code" class="dropdown-item" href="javascript:void(0)" role="button">Hide All Code</a></li><li><hr class="dropdown-divider"></li><li><a id="quarto-view-source" class="dropdown-item" href="javascript:void(0)" role="button">View Source</a></li></ul></div></div>
+</div>
+
+
+
+<div class="quarto-title-meta">
+
+    
+  
+    
+  </div>
+  
+
+</header>
+
+<div class="callout callout-style-default callout-note no-icon callout-titled">
+<div class="callout-header d-flex align-content-center" data-bs-toggle="collapse" data-bs-target=".callout-1-contents" aria-controls="callout-1" aria-expanded="true" aria-label="Toggle callout">
+<div class="callout-icon-container">
+<i class="callout-icon no-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Learning Outcomes
+</div>
+<div class="callout-btn-toggle d-inline-block border-0 py-1 ps-1 pe-0 float-end"><i class="callout-toggle"></i></div>
+</div>
+<div id="callout-1" class="callout-1-contents callout-collapse collapse show">
+<div class="callout-body-container callout-body">
+<ul>
+<li>Perform aggregations using <code>GROUP BY</code></li>
+<li>Introduce the ability to filter groups</li>
+<li>Perform data cleaning and text manipulation in SQL</li>
+<li>Join data across tables</li>
+</ul>
+</div>
+</div>
+</div>
+<p>In this lecture, we’ll continue our work from last time to introduce some advanced SQL syntax.</p>
+<p>First, let’s load in the <code>basic_examples.db</code> database.</p>
+<div class="cell" data-execution_count="1">
+<details>
+<summary>Code</summary>
+<div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Load the SQL Alchemy Python library and DuckDB</span></span>
+<span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> sqlalchemy</span>
+<span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> duckdb</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</details>
+</div>
+<div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="2">
+<div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Load %%sql cell magic</span></span>
+<span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>load_ext sql</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+<div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="3">
+<div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Connect to the database</span></span>
+<span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>sql duckdb:<span class="op">///</span>data<span class="op">/</span>basic_examples.db <span class="op">--</span>alias basic</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+<section id="aggregating-with-group-by" class="level2" data-number="21.1">
+<h2 data-number="21.1" class="anchored" data-anchor-id="aggregating-with-group-by"><span class="header-section-number">21.1</span> Aggregating with <code>GROUP BY</code></h2>
+<p>At this point, we’ve seen that SQL offers much of the same functionality that was given to us by <code>pandas</code>. We can extract data from a table, filter it, and reorder it to suit our needs.</p>
+<p>In <code>pandas</code>, much of our analysis work relied heavily on being able to use <code>.groupby()</code> to aggregate across the rows of our dataset. SQL’s answer to this task is the (very conveniently named) <code>GROUP BY</code> clause. While the outputs of <code>GROUP BY</code> are similar to those of <code>.groupby()</code> —— in both cases, we obtain an output table where some column has been used for grouping —— the syntax and logic used to group data in SQL are fairly different to the <code>pandas</code> implementation.</p>
+<p>To illustrate <code>GROUP BY</code>, we will consider the <code>Dish</code> table from our database.</p>
+<div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="4">
+<div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a>SELECT <span class="op">*</span> </span>
+<span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a>FROM Dish<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code> * duckdb:///data/basic_examples.db
+Done.</code></pre>
+</div>
+<div class="cell-output cell-output-display" data-execution_count="4">
+<table data-quarto-postprocess="true">
+<thead>
+<tr class="header">
+<th data-quarto-table-cell-role="th">name</th>
+<th data-quarto-table-cell-role="th">type</th>
+<th data-quarto-table-cell-role="th">cost</th>
+</tr>
+</thead>
+<tbody>
+</tbody>
+</table>
+</div>
+</div>
+<p>Notice that there are multiple dishes of the same <code>type</code>. What if we wanted to find the total costs of dishes of a certain <code>type</code>? To accomplish this, we would write the following code.</p>
+<div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="5">
+<div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a>SELECT <span class="bu">type</span>, SUM(cost)</span>
+<span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a>FROM Dish</span>
+<span id="cb6-4"><a href="#cb6-4" aria-hidden="true" tabindex="-1"></a>GROUP BY <span class="bu">type</span><span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code> * duckdb:///data/basic_examples.db
+Done.</code></pre>
+</div>
+<div class="cell-output cell-output-display" data-execution_count="5">
+<table data-quarto-postprocess="true">
+<thead>
+<tr class="header">
+<th data-quarto-table-cell-role="th">type</th>
+<th data-quarto-table-cell-role="th">sum("cost")</th>
+</tr>
+</thead>
+<tbody>
+</tbody>
+</table>
+</div>
+</div>
+<p>What is going on here? The statement <code>GROUP BY type</code> tells SQL to group the data based on the value contained in the <code>type</code> column (whether a record is an appetizer, entree, or dessert). <code>SUM(cost)</code> sums up the costs of dishes in each <code>type</code> and displays the result in the output table.</p>
+<p>You may be wondering: why does <code>SUM(cost)</code> come before the command to <code>GROUP BY type</code>? Don’t we need to form groups before we can count the number of entries in each? Remember that SQL is a <em>declarative</em> programming language —— a SQL programmer simply states what end result they would like to see, and leaves the task of figuring out <em>how</em> to obtain this result to SQL itself. This means that SQL queries sometimes don’t follow what a reader sees as a “logical” sequence of thought. Instead, SQL requires that we follow its set order of operations when constructing queries. So long as we follow this order, SQL will handle the underlying logic.</p>
+<p>In practical terms: our goal with this query was to output the total <code>cost</code>s of each <code>type</code>. To communicate this to SQL, we say that we want to <code>SELECT</code> the <code>SUM</code>med <code>cost</code> values for each <code>type</code> group.</p>
+<p>There are many aggregation functions that can be used to aggregate the data contained in each group. Some common examples are:</p>
+<ul>
+<li><code>COUNT</code>: count the number of rows associated with each group</li>
+<li><code>MIN</code>: find the minimum value of each group</li>
+<li><code>MAX</code>: find the maximum value of each group</li>
+<li><code>SUM</code>: sum across all records in each group</li>
+<li><code>AVG</code>: find the average value of each group</li>
+</ul>
+<p>We can easily compute multiple aggregations all at once (a task that was very tricky in <code>pandas</code>).</p>
+<div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="6">
+<div class="sourceCode cell-code" id="cb8"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a>SELECT <span class="bu">type</span>, SUM(cost), MIN(cost), MAX(name)</span>
+<span id="cb8-3"><a href="#cb8-3" aria-hidden="true" tabindex="-1"></a>FROM Dish</span>
+<span id="cb8-4"><a href="#cb8-4" aria-hidden="true" tabindex="-1"></a>GROUP BY <span class="bu">type</span><span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code> * duckdb:///data/basic_examples.db
+Done.</code></pre>
+</div>
+<div class="cell-output cell-output-display" data-execution_count="6">
+<table data-quarto-postprocess="true">
+<thead>
+<tr class="header">
+<th data-quarto-table-cell-role="th">type</th>
+<th data-quarto-table-cell-role="th">sum("cost")</th>
+<th data-quarto-table-cell-role="th">min("cost")</th>
+<th data-quarto-table-cell-role="th">max("name")</th>
+</tr>
+</thead>
+<tbody>
+</tbody>
+</table>
+</div>
+</div>
+<p>To count the number of rows associated with each group, we use the <code>COUNT</code> keyword. Calling <code>COUNT(*)</code> will compute the total number of rows in each group, including rows with null values. Its <code>pandas</code> equivalent is <code>.groupby().size()</code>.</p>
+<p>Recall the <code>Dragon</code> table from the previous lecture:</p>
+<div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="7">
+<div class="sourceCode cell-code" id="cb10"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb10-2"><a href="#cb10-2" aria-hidden="true" tabindex="-1"></a>SELECT <span class="op">*</span> FROM Dragon<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code> * duckdb:///data/basic_examples.db
+Done.</code></pre>
+</div>
+<div class="cell-output cell-output-display" data-execution_count="7">
+<table data-quarto-postprocess="true">
+<thead>
+<tr class="header">
+<th data-quarto-table-cell-role="th">name</th>
+<th data-quarto-table-cell-role="th">year</th>
+<th data-quarto-table-cell-role="th">cute</th>
+</tr>
+</thead>
+<tbody>
+</tbody>
+</table>
+</div>
+</div>
+<p>Notice that <code>COUNT(*)</code> and <code>COUNT(cute)</code> result in different outputs.</p>
+<div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="8">
+<div class="sourceCode cell-code" id="cb12"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb12-2"><a href="#cb12-2" aria-hidden="true" tabindex="-1"></a>SELECT year, COUNT(<span class="op">*</span>)</span>
+<span id="cb12-3"><a href="#cb12-3" aria-hidden="true" tabindex="-1"></a>FROM Dragon</span>
+<span id="cb12-4"><a href="#cb12-4" aria-hidden="true" tabindex="-1"></a>GROUP BY year<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code> * duckdb:///data/basic_examples.db
+Done.</code></pre>
+</div>
+<div class="cell-output cell-output-display" data-execution_count="8">
+<table data-quarto-postprocess="true">
+<thead>
+<tr class="header">
+<th data-quarto-table-cell-role="th">year</th>
+<th data-quarto-table-cell-role="th">count_star()</th>
+</tr>
+</thead>
+<tbody>
+</tbody>
+</table>
+</div>
+</div>
+<div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="9">
+<div class="sourceCode cell-code" id="cb14"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a>SELECT year, COUNT(cute)</span>
+<span id="cb14-3"><a href="#cb14-3" aria-hidden="true" tabindex="-1"></a>FROM Dragon</span>
+<span id="cb14-4"><a href="#cb14-4" aria-hidden="true" tabindex="-1"></a>GROUP BY year<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code> * duckdb:///data/basic_examples.db
+Done.</code></pre>
+</div>
+<div class="cell-output cell-output-display" data-execution_count="9">
+<table data-quarto-postprocess="true">
+<thead>
+<tr class="header">
+<th data-quarto-table-cell-role="th">year</th>
+<th data-quarto-table-cell-role="th">count(cute)</th>
+</tr>
+</thead>
+<tbody>
+</tbody>
+</table>
+</div>
+</div>
+<p>With this definition of <code>GROUP BY</code> in hand, let’s update our SQL order of operations. Remember: <em>every</em> SQL query must list clauses in this order.</p>
+<pre><code>SELECT &lt;column expression list&gt;
+FROM &lt;table&gt;
+[WHERE &lt;predicate&gt;]
+[GROUP BY &lt;column list&gt;]
+[ORDER BY &lt;column list&gt;]
+[LIMIT &lt;number of rows&gt;]
+[OFFSET &lt;number of rows&gt;];</code></pre>
+<p>Note that we can use the <code>AS</code> keyword to rename columns during the selection process and that column expressions may include aggregation functions (<code>MAX</code>, <code>MIN</code>, etc.).</p>
+</section>
+<section id="filtering-groups" class="level2" data-number="21.2">
+<h2 data-number="21.2" class="anchored" data-anchor-id="filtering-groups"><span class="header-section-number">21.2</span> Filtering Groups</h2>
+<p>Now, what if we only want groups that meet a certain condition? <code>HAVING</code> filters groups by applying some condition across all rows in each group. We interpret it as a way to keep only the groups <code>HAVING</code> some condition. Note the difference between <code>WHERE</code> and <code>HAVING</code>: we use <code>WHERE</code> to filter rows, whereas we use <code>HAVING</code> to filter <em>groups</em>. <code>WHERE</code> precedes <code>HAVING</code> in terms of how SQL executes a query.</p>
+<p>Let’s take a look at the <code>Dish</code> table to see how we can use <code>HAVING</code>. Say we want to group dishes with a cost greater than 4 by <code>type</code> and only keep groups where the max cost is less than 10.</p>
+<div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="10">
+<div class="sourceCode cell-code" id="cb17"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb17-2"><a href="#cb17-2" aria-hidden="true" tabindex="-1"></a>SELECT <span class="bu">type</span>, COUNT(<span class="op">*</span>)</span>
+<span id="cb17-3"><a href="#cb17-3" aria-hidden="true" tabindex="-1"></a>FROM Dish</span>
+<span id="cb17-4"><a href="#cb17-4" aria-hidden="true" tabindex="-1"></a>WHERE cost <span class="op">&gt;</span> <span class="dv">4</span></span>
+<span id="cb17-5"><a href="#cb17-5" aria-hidden="true" tabindex="-1"></a>GROUP BY <span class="bu">type</span></span>
+<span id="cb17-6"><a href="#cb17-6" aria-hidden="true" tabindex="-1"></a>HAVING MAX(cost) <span class="op">&lt;</span>  <span class="dv">10</span><span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code> * duckdb:///data/basic_examples.db
+Done.</code></pre>
+</div>
+<div class="cell-output cell-output-display" data-execution_count="10">
+<table data-quarto-postprocess="true">
+<thead>
+<tr class="header">
+<th data-quarto-table-cell-role="th">type</th>
+<th data-quarto-table-cell-role="th">count_star()</th>
+</tr>
+</thead>
+<tbody>
+</tbody>
+</table>
+</div>
+</div>
+<p>Here, we first use <code>WHERE</code> to filter for rows with a cost greater than 4. We then group our values by <code>type</code> before applying the <code>HAVING</code> operator. With <code>HAVING</code>, we can filter our groups based on if the max cost is less than 10.</p>
+</section>
+<section id="summary-sql" class="level2" data-number="21.3">
+<h2 data-number="21.3" class="anchored" data-anchor-id="summary-sql"><span class="header-section-number">21.3</span> Summary: SQL</h2>
+<p>With this definition of <code>GROUP BY</code> and <code>HAVING</code> in hand, let’s update our SQL order of operations. Remember: <em>every</em> SQL query must list clauses in this order.</p>
+<pre><code>SELECT &lt;column expression list&gt;
+FROM &lt;table&gt;
+[WHERE &lt;predicate&gt;]
+[GROUP BY &lt;column list&gt;]
+[ORDER BY &lt;column list&gt;]
+[LIMIT &lt;number of rows&gt;]
+[OFFSET &lt;number of rows&gt;];</code></pre>
+<p>Note that we can use the <code>AS</code> keyword to rename columns during the selection process and that column expressions may include aggregation functions (<code>MAX</code>, <code>MIN</code>, etc.).</p>
+</section>
+<section id="eda-in-sql" class="level2" data-number="21.4">
+<h2 data-number="21.4" class="anchored" data-anchor-id="eda-in-sql"><span class="header-section-number">21.4</span> EDA in SQL</h2>
+<p>In the last lecture, we mostly worked under the assumption that our data had already been cleaned. However, as we saw in our first pass through the data science lifecycle, we’re very unlikely to be given data that is free of formatting issues. With this in mind, we’ll want to learn how to clean and transform data in SQL.</p>
+<p>Our typical workflow when working with “big data” is:</p>
+<ol type="1">
+<li>Use SQL to query data from a database</li>
+<li>Use Python (with <code>pandas</code>) to analyze this data in detail</li>
+</ol>
+<p>We can, however, still perform simple data cleaning and re-structuring using SQL directly. To do so, we’ll use the <code>Title</code> table from the <code>imdb_duck</code> database, which contains information about movies and actors.</p>
+<p>Let’s load in the <code>imdb_duck</code> database.</p>
+<div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="11">
+<div class="sourceCode cell-code" id="cb20"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> os</span>
+<span id="cb20-2"><a href="#cb20-2" aria-hidden="true" tabindex="-1"></a><span class="cf">if</span> os.path.exists(<span class="st">"/home/jovyan/shared/sql/imdb_duck.db"</span>):</span>
+<span id="cb20-3"><a href="#cb20-3" aria-hidden="true" tabindex="-1"></a>    imdbpath <span class="op">=</span> <span class="st">"duckdb:////home/jovyan/shared/sql/imdb_duck.db"</span></span>
+<span id="cb20-4"><a href="#cb20-4" aria-hidden="true" tabindex="-1"></a><span class="cf">elif</span> os.path.exists(<span class="st">"data/imdb_duck.db"</span>):</span>
+<span id="cb20-5"><a href="#cb20-5" aria-hidden="true" tabindex="-1"></a>    imdbpath <span class="op">=</span>  <span class="st">"duckdb:///data/imdb_duck.db"</span></span>
+<span id="cb20-6"><a href="#cb20-6" aria-hidden="true" tabindex="-1"></a><span class="cf">else</span>:</span>
+<span id="cb20-7"><a href="#cb20-7" aria-hidden="true" tabindex="-1"></a>    <span class="im">import</span> gdown</span>
+<span id="cb20-8"><a href="#cb20-8" aria-hidden="true" tabindex="-1"></a>    url <span class="op">=</span> <span class="st">'https://drive.google.com/uc?id=10tKOHGLt9QoOgq5Ii-FhxpB9lDSQgl1O'</span></span>
+<span id="cb20-9"><a href="#cb20-9" aria-hidden="true" tabindex="-1"></a>    output_path <span class="op">=</span> <span class="st">'data/imdb_duck.db'</span></span>
+<span id="cb20-10"><a href="#cb20-10" aria-hidden="true" tabindex="-1"></a>    gdown.download(url, output_path, quiet<span class="op">=</span><span class="va">False</span>)</span>
+<span id="cb20-11"><a href="#cb20-11" aria-hidden="true" tabindex="-1"></a>    imdbpath <span class="op">=</span> <span class="st">"duckdb:///data/imdb_duck.db"</span></span>
+<span id="cb20-12"><a href="#cb20-12" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(imdbpath)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code>duckdb:///data/imdb_duck.db</code></pre>
+</div>
+</div>
+<div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="12">
+<div class="sourceCode cell-code" id="cb22"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> sqlalchemy <span class="im">import</span> create_engine</span>
+<span id="cb22-2"><a href="#cb22-2" aria-hidden="true" tabindex="-1"></a>imdb_engine <span class="op">=</span> create_engine(imdbpath, connect_args<span class="op">=</span>{<span class="st">'read_only'</span>: <span class="va">True</span>})</span>
+<span id="cb22-3"><a href="#cb22-3" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>sql imdb_engine <span class="op">--</span>alias imdb</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code> * duckdb:///data/basic_examples.db
+(duckdb.duckdb.ParserException) Parser Error: syntax error at or near "imdb_engine"
+[SQL: imdb_engine]
+(Background on this error at: https://sqlalche.me/e/20/f405)</code></pre>
+</div>
+</div>
+<p>Since we’ll be working with the <code>Title</code> table, let’s take a quick look at what it contains.</p>
+<div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="13">
+<div class="sourceCode cell-code" id="cb24"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql imdb </span>
+<span id="cb24-2"><a href="#cb24-2" aria-hidden="true" tabindex="-1"></a>    </span>
+<span id="cb24-3"><a href="#cb24-3" aria-hidden="true" tabindex="-1"></a>SELECT <span class="op">*</span></span>
+<span id="cb24-4"><a href="#cb24-4" aria-hidden="true" tabindex="-1"></a>FROM Title</span>
+<span id="cb24-5"><a href="#cb24-5" aria-hidden="true" tabindex="-1"></a>WHERE primaryTitle IN (<span class="st">'Ginny &amp; Georgia'</span>, <span class="st">'What If...?'</span>, <span class="st">'Succession'</span>, <span class="st">'Veep'</span>, <span class="st">'Tenet'</span>)</span>
+<span id="cb24-6"><a href="#cb24-6" aria-hidden="true" tabindex="-1"></a>LIMIT <span class="dv">10</span><span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code> * duckdb:///data/basic_examples.db
+(duckdb.duckdb.ParserException) Parser Error: syntax error at or near "imdb"
+[SQL: imdb
+    
+SELECT *
+FROM Title
+WHERE primaryTitle IN ('Ginny &amp; Georgia', 'What If...?', 'Succession', 'Veep', 'Tenet')
+LIMIT 10;]
+(Background on this error at: https://sqlalche.me/e/20/f405)</code></pre>
+</div>
+</div>
+<section id="matching-text-using-like" class="level3" data-number="21.4.1">
+<h3 data-number="21.4.1" class="anchored" data-anchor-id="matching-text-using-like"><span class="header-section-number">21.4.1</span> Matching Text using <code>LIKE</code></h3>
+<p>One common task we encountered in our first look at EDA was needing to match string data. For example, we might want to remove entries beginning with the same prefix as part of the data cleaning process.</p>
+<p>In SQL, we use the <code>LIKE</code> operator to (you guessed it) look for strings that are <em>like</em> a given string pattern.</p>
+<div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="14">
+<div class="sourceCode cell-code" id="cb26"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb26-1"><a href="#cb26-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb26-2"><a href="#cb26-2" aria-hidden="true" tabindex="-1"></a>SELECT titleType, primaryTitle</span>
+<span id="cb26-3"><a href="#cb26-3" aria-hidden="true" tabindex="-1"></a>FROM Title</span>
+<span id="cb26-4"><a href="#cb26-4" aria-hidden="true" tabindex="-1"></a>WHERE primaryTitle LIKE <span class="st">'Star Wars: Episode I - The Phantom Menace'</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code> * duckdb:///data/basic_examples.db
+(duckdb.duckdb.CatalogException) Catalog Error: Table with name Title does not exist!
+Did you mean "temp.information_schema.tables"?
+LINE 2: FROM Title
+             ^
+[SQL: SELECT titleType, primaryTitle
+FROM Title
+WHERE primaryTitle LIKE 'Star Wars: Episode I - The Phantom Menace']
+(Background on this error at: https://sqlalche.me/e/20/f405)</code></pre>
+</div>
+</div>
+<p>What if we wanted to find <em>all</em> Star Wars movies? <code>%</code> is the wildcard operator, it means “look for any character, any number of times”. This makes it helpful for identifying strings that are similar to our desired pattern, even when we don’t know the full text of what we aim to extract.</p>
+<div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="15">
+<div class="sourceCode cell-code" id="cb28"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb28-1"><a href="#cb28-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb28-2"><a href="#cb28-2" aria-hidden="true" tabindex="-1"></a>SELECT titleType, primaryTitle</span>
+<span id="cb28-3"><a href="#cb28-3" aria-hidden="true" tabindex="-1"></a>FROM Title</span>
+<span id="cb28-4"><a href="#cb28-4" aria-hidden="true" tabindex="-1"></a>WHERE primaryTitle LIKE <span class="st">'%Star Wars%'</span></span>
+<span id="cb28-5"><a href="#cb28-5" aria-hidden="true" tabindex="-1"></a>LIMIT <span class="dv">10</span><span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code> * duckdb:///data/basic_examples.db
+(duckdb.duckdb.CatalogException) Catalog Error: Table with name Title does not exist!
+Did you mean "temp.information_schema.tables"?
+LINE 2: FROM Title
+             ^
+[SQL: SELECT titleType, primaryTitle
+FROM Title
+WHERE primaryTitle LIKE '%Star Wars%'
+LIMIT 10;]
+(Background on this error at: https://sqlalche.me/e/20/f405)</code></pre>
+</div>
+</div>
+<p>Alternatively, we can use RegEx! DuckDB and most real DBMSs allow for this. Note that here, we have to use the <code>SIMILAR TO</code> operater rather than <code>LIKE</code>.</p>
+<div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="16">
+<div class="sourceCode cell-code" id="cb30"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb30-1"><a href="#cb30-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb30-2"><a href="#cb30-2" aria-hidden="true" tabindex="-1"></a>SELECT titleType, primaryTitle</span>
+<span id="cb30-3"><a href="#cb30-3" aria-hidden="true" tabindex="-1"></a>FROM Title</span>
+<span id="cb30-4"><a href="#cb30-4" aria-hidden="true" tabindex="-1"></a>WHERE primaryTitle SIMILAR TO <span class="st">'.*Star Wars*.'</span></span>
+<span id="cb30-5"><a href="#cb30-5" aria-hidden="true" tabindex="-1"></a>LIMIT <span class="dv">10</span><span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code> * duckdb:///data/basic_examples.db
+(duckdb.duckdb.CatalogException) Catalog Error: Table with name Title does not exist!
+Did you mean "temp.information_schema.tables"?
+LINE 2: FROM Title
+             ^
+[SQL: SELECT titleType, primaryTitle
+FROM Title
+WHERE primaryTitle SIMILAR TO '.*Star Wars*.'
+LIMIT 10;]
+(Background on this error at: https://sqlalche.me/e/20/f405)</code></pre>
+</div>
+</div>
+</section>
+<section id="casting-data-types" class="level3" data-number="21.4.2">
+<h3 data-number="21.4.2" class="anchored" data-anchor-id="casting-data-types"><span class="header-section-number">21.4.2</span> <code>CAST</code>ing Data Types</h3>
+<p>A common data cleaning task is converting data to the correct variable type. The <code>CAST</code> keyword is used to generate a new output column. Each entry in this output column is the result of converting the data in an existing column to a new data type. For example, we may wish to convert numeric data stored as a string to an integer.</p>
+<div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="17">
+<div class="sourceCode cell-code" id="cb32"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb32-1"><a href="#cb32-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb32-2"><a href="#cb32-2" aria-hidden="true" tabindex="-1"></a>SELECT primaryTitle, CAST(runtimeMinutes AS INT)</span>
+<span id="cb32-3"><a href="#cb32-3" aria-hidden="true" tabindex="-1"></a>FROM Title<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code> * duckdb:///data/basic_examples.db
+(duckdb.duckdb.CatalogException) Catalog Error: Table with name Title does not exist!
+Did you mean "temp.information_schema.tables"?
+LINE 2: FROM Title;
+             ^
+[SQL: SELECT primaryTitle, CAST(runtimeMinutes AS INT)
+FROM Title;]
+(Background on this error at: https://sqlalche.me/e/20/f405)</code></pre>
+</div>
+</div>
+<p>We use <code>CAST</code> when <code>SELECT</code>ing colunns for our output table. In the example above, we want to <code>SELECT</code> the columns of integer year and runtime data that is created by the <code>CAST</code>.</p>
+<p>SQL will automatically name a new column according to the command used to <code>SELECT</code> it, which can lead to unwieldy column names. We can rename the <code>CAST</code>ed column using the <code>AS</code> keyword.</p>
+<div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="18">
+<div class="sourceCode cell-code" id="cb34"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb34-1"><a href="#cb34-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb34-2"><a href="#cb34-2" aria-hidden="true" tabindex="-1"></a>SELECT primaryTitle AS title, CAST(runtimeMinutes AS INT) AS minutes, CAST(startYear AS INT) AS year</span>
+<span id="cb34-3"><a href="#cb34-3" aria-hidden="true" tabindex="-1"></a>FROM Title</span>
+<span id="cb34-4"><a href="#cb34-4" aria-hidden="true" tabindex="-1"></a>LIMIT <span class="dv">5</span><span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code> * duckdb:///data/basic_examples.db
+(duckdb.duckdb.CatalogException) Catalog Error: Table with name Title does not exist!
+Did you mean "temp.information_schema.tables"?
+LINE 2: FROM Title
+             ^
+[SQL: SELECT primaryTitle AS title, CAST(runtimeMinutes AS INT) AS minutes, CAST(startYear AS INT) AS year
+FROM Title
+LIMIT 5;]
+(Background on this error at: https://sqlalche.me/e/20/f405)</code></pre>
+</div>
+</div>
+</section>
+<section id="using-conditional-statements-with-case" class="level3" data-number="21.4.3">
+<h3 data-number="21.4.3" class="anchored" data-anchor-id="using-conditional-statements-with-case"><span class="header-section-number">21.4.3</span> Using Conditional Statements with <code>CASE</code></h3>
+<p>When working with <code>pandas</code>, we often ran into situations where we wanted to generate new columns using some form of conditional statement. For example, say we wanted to describe a film title as “old,” “mid-aged,” or “new,” depending on the year of its release.</p>
+<p>In SQL, conditional operations are performed using a <code>CASE</code> clause. Conceptually, <code>CASE</code> behaves much like the <code>CAST</code> operation: it creates a new column that we can then <code>SELECT</code> to appear in the output. The syntax for a <code>CASE</code> clause is as follows:</p>
+<pre><code>CASE WHEN &lt;condition&gt; THEN &lt;value&gt;
+     WHEN &lt;other condition&gt; THEN &lt;other value&gt;
+     ...
+     ELSE &lt;yet another value&gt;
+     END</code></pre>
+<p>Scanning through the skeleton code above, you can see that the logic is similar to that of an <code>if</code> statement in Python. The conditional statement is first opened by calling <code>CASE</code>. Each new condition is specified by <code>WHEN</code>, with <code>THEN</code> indicating what value should be filled if the condition is met. <code>ELSE</code> specifies the value that should be filled if no other conditions are met. Lastly, <code>END</code> indicates the end of the conditional statement; once <code>END</code> has been called, SQL will continue evaluating the query as usual.</p>
+<p>Let’s see this in action. In the example below, we give the new column created by the <code>CASE</code> statement the name <code>movie_age</code>.</p>
+<div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="19">
+<div class="sourceCode cell-code" id="cb37"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb37-1"><a href="#cb37-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb37-2"><a href="#cb37-2" aria-hidden="true" tabindex="-1"></a><span class="op">/*</span> If a movie was filmed before <span class="dv">1950</span>, it <span class="kw">is</span> <span class="st">"old"</span></span>
+<span id="cb37-3"><a href="#cb37-3" aria-hidden="true" tabindex="-1"></a>Otherwise, <span class="cf">if</span> a movie was filmed before <span class="dv">2000</span>, it <span class="kw">is</span> <span class="st">"mid-aged"</span></span>
+<span id="cb37-4"><a href="#cb37-4" aria-hidden="true" tabindex="-1"></a>Else, a movie <span class="kw">is</span> <span class="st">"new"</span> <span class="op">*/</span></span>
+<span id="cb37-5"><a href="#cb37-5" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb37-6"><a href="#cb37-6" aria-hidden="true" tabindex="-1"></a>SELECT titleType, startYear,</span>
+<span id="cb37-7"><a href="#cb37-7" aria-hidden="true" tabindex="-1"></a>CASE WHEN startYear <span class="op">&lt;</span> <span class="dv">1950</span> THEN <span class="st">'old'</span></span>
+<span id="cb37-8"><a href="#cb37-8" aria-hidden="true" tabindex="-1"></a>     WHEN startYear <span class="op">&lt;</span> <span class="dv">2000</span> THEN <span class="st">'mid-aged'</span></span>
+<span id="cb37-9"><a href="#cb37-9" aria-hidden="true" tabindex="-1"></a>     ELSE <span class="st">'new'</span></span>
+<span id="cb37-10"><a href="#cb37-10" aria-hidden="true" tabindex="-1"></a>     END AS movie_age</span>
+<span id="cb37-11"><a href="#cb37-11" aria-hidden="true" tabindex="-1"></a>FROM Title<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code> * duckdb:///data/basic_examples.db
+(duckdb.duckdb.CatalogException) Catalog Error: Table with name Title does not exist!
+Did you mean "temp.information_schema.tables"?
+LINE 10: FROM Title;
+              ^
+[SQL: /* If a movie was filmed before 1950, it is "old"
+Otherwise, if a movie was filmed before 2000, it is "mid-aged"
+Else, a movie is "new" */
+
+SELECT titleType, startYear,
+CASE WHEN startYear &lt; 1950 THEN 'old'
+     WHEN startYear &lt; 2000 THEN 'mid-aged'
+     ELSE 'new'
+     END AS movie_age
+FROM Title;]
+(Background on this error at: https://sqlalche.me/e/20/f405)</code></pre>
+</div>
+</div>
+</section>
+</section>
+<section id="joining-tables" class="level2" data-number="21.5">
+<h2 data-number="21.5" class="anchored" data-anchor-id="joining-tables"><span class="header-section-number">21.5</span> <code>JOIN</code>ing Tables</h2>
+<p>At this point, we’re well-versed in using SQL as a tool to clean, manipulate, and transform data in a table. Notice that this sentence referred to one <em>table</em>, specifically. What happens if the data we need is distributed across multiple tables? This is an important consideration when using SQL —— recall that we first introduced SQL as a language to query from databases. Databases often store data in a multidimensional structure. In other words, information is stored across several tables, with each table containing a small subset of all the data housed by the database.</p>
+<p>A common way of organizing a database is by using a <strong>star schema</strong>. A star schema is composed of two types of tables. A <strong>fact table</strong> is the central table of the database —— it contains the information needed to link entries across several <strong>dimension tables</strong>, which contain more detailed information about the data.</p>
+<p>Say we were working with a database about boba offerings in Berkeley. The dimension tables of the database might contain information about tea varieties and boba toppings. The fact table would be used to link this information across the various dimension tables.</p>
+<div style="text-align: center;">
+<p><img src="images/multidimensional.png" alt="multidimensional" width="850"></p>
+</div>
+<p>If we explicitly mark the relationships between tables, we start to see the star-like structure of the star schema.</p>
+<div style="text-align: center;">
+<p><img src="images/star.png" alt="star" width="650"></p>
+</div>
+<p>To join data across multiple tables, we’ll use the (creatively named) <code>JOIN</code> keyword. We’ll make things easier for now by first considering the simpler <code>cats</code> dataset, which consists of the tables <code>s</code> and <code>t</code>.</p>
+<div style="text-align: center;">
+<p><img src="images/cats.png" alt="cats" width="500"></p>
+</div>
+<p>To perform a join, we amend the <code>FROM</code> clause. You can think of this as saying, “<code>SELECT</code> my data <code>FROM</code> tables that have been <code>JOIN</code>ed together.”</p>
+<p>Remember: SQL does not consider newlines or whitespace when interpreting queries. The indentation given in the example below is to help improve readability. If you wish, you can write code that does not follow this formatting.</p>
+<pre><code>SELECT &lt;column list&gt;
+FROM table_1 
+    JOIN table_2 
+    ON key_1 = key_2;</code></pre>
+<p>We also need to specify what column from each table should be used to determine matching entries. By defining these keys, we provide SQL with the information it needs to pair rows of data together.</p>
+<p>The most commonly used type of SQL <code>JOIN</code> is the <strong>inner join</strong>. It turns out you’re already familiar with what an inner join does, and how it works – this is the type of join we’ve been using in <code>pandas</code> all along! In an inner join, we combine every row in our first table with its matching entry in the second table. If a row from either table does not have a match in the other table, it is omitted from the output.</p>
+<div style="text-align: center;">
+<p><img src="images/inner.png" alt="inner" width="800"></p>
+</div>
+<p>In a <strong>cross join</strong>, <em>all</em> possible combinations of rows appear in the output table, regardless of whether or not rows share a matching key. Because all rows are joined, even if there is no matching key, it is not necessary to specify what keys to consider in an <code>ON</code> statement. A cross join is also known as a cartesian product.</p>
+<div style="text-align: center;">
+<p><img src="images/cross.png" alt="cross" width="800"></p>
+</div>
+<p>Conceptually, we can interpret an inner join as a cross join, followed by removing all rows that do not share a matching key. Notice that the output of the inner join above contains all rows of the cross join example that contain a single color across the entire row.</p>
+<p>In a <strong>left outer join</strong>, <em>all</em> rows in the left table are kept in the output table. If a row in the right table shares a match with the left table, this row will be kept; otherwise, the rows in the right table are omitted from the output. We can fill in any missing values with <code>NULL</code>.</p>
+<div style="text-align: center;">
+<p><img src="images/left.png" alt="left" width="800"></p>
+</div>
+<p>A <strong>right outer join</strong> keeps all rows in the right table. Rows in the left table are only kept if they share a match in the right table. Again, we can fill in any missing values with <code>NULL</code>.</p>
+<div style="text-align: center;">
+<p><img src="images/right.png" alt="right" width="800"></p>
+</div>
+<p>In a <strong>full outer join</strong>, all rows that have a match between the two tables are joined together. If a row has no match in the second table, then the values of the columns for that second table are filled with <code>NULL</code>. In other words, a full outer join performs an inner join <em>while still keeping</em> rows that have no match in the other table. This is best understood visually:</p>
+<div style="text-align: center;">
+<p><img src="images/full.png" alt="full" width="800"></p>
+</div>
+<p>We have kept the same output achieved using an inner join, with the addition of partially null rows for entries in <code>s</code> and <code>t</code> that had no match in the second table.</p>
+<section id="aliasing-in-joins" class="level3" data-number="21.5.1">
+<h3 data-number="21.5.1" class="anchored" data-anchor-id="aliasing-in-joins"><span class="header-section-number">21.5.1</span> Aliasing in <code>JOIN</code>s</h3>
+<p>When joining tables, we often create aliases for table names (similarly to what we did with column names in the last lecture). We do this as it is typically easier to refer to aliases, especially when we are working with long table names. We can even reference columns using aliased table names!</p>
+<p>Let’s say we want to determine the average rating of various movies. We’ll need to <code>JOIN</code> the <code>Title</code> and <code>Rating</code> tables and can create aliases for both tables.</p>
+<div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="20">
+<div class="sourceCode cell-code" id="cb40"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb40-1"><a href="#cb40-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb40-2"><a href="#cb40-2" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb40-3"><a href="#cb40-3" aria-hidden="true" tabindex="-1"></a>SELECT primaryTitle, averageRating</span>
+<span id="cb40-4"><a href="#cb40-4" aria-hidden="true" tabindex="-1"></a>FROM Title AS T INNER JOIN Rating AS R</span>
+<span id="cb40-5"><a href="#cb40-5" aria-hidden="true" tabindex="-1"></a>ON T.tconst <span class="op">=</span> R.tconst<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code> * duckdb:///data/basic_examples.db
+(duckdb.duckdb.CatalogException) Catalog Error: Table with name Title does not exist!
+Did you mean "temp.information_schema.tables"?
+LINE 2: FROM Title AS T INNER JOIN Rating AS R
+             ^
+[SQL: SELECT primaryTitle, averageRating
+FROM Title AS T INNER JOIN Rating AS R
+ON T.tconst = R.tconst;]
+(Background on this error at: https://sqlalche.me/e/20/f405)</code></pre>
+</div>
+</div>
+<p>Note that the <code>AS</code> is actually optional! We can create aliases for our tables even without it, but we usually include it for clarity.</p>
+<div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="21">
+<div class="sourceCode cell-code" id="cb42"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb42-1"><a href="#cb42-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb42-2"><a href="#cb42-2" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-3"><a href="#cb42-3" aria-hidden="true" tabindex="-1"></a>SELECT primaryTitle, averageRating</span>
+<span id="cb42-4"><a href="#cb42-4" aria-hidden="true" tabindex="-1"></a>FROM Title T INNER JOIN Rating R</span>
+<span id="cb42-5"><a href="#cb42-5" aria-hidden="true" tabindex="-1"></a>ON T.tconst <span class="op">=</span> R.tconst<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code> * duckdb:///data/basic_examples.db
+(duckdb.duckdb.CatalogException) Catalog Error: Table with name Title does not exist!
+Did you mean "temp.information_schema.tables"?
+LINE 2: FROM Title T INNER JOIN Rating R
+             ^
+[SQL: SELECT primaryTitle, averageRating
+FROM Title T INNER JOIN Rating R
+ON T.tconst = R.tconst;]
+(Background on this error at: https://sqlalche.me/e/20/f405)</code></pre>
+</div>
+</div>
+</section>
+<section id="common-table-expressions" class="level3" data-number="21.5.2">
+<h3 data-number="21.5.2" class="anchored" data-anchor-id="common-table-expressions"><span class="header-section-number">21.5.2</span> Common Table Expressions</h3>
+<p>For more sophisticated data problems, the queries can become very complex. Common table expressions (CTEs) allow us to break down these complex queries into more manageable parts. To do so, we create temporary tables corresponding to different aspects of the problem and then reference them in the final query:</p>
+<pre><code>WITH 
+table_name1 AS ( 
+    SELECT ...
+),
+table_name2 AS ( 
+    SELECT ...
+)
+SELECT ... 
+FROM 
+table_name1, 
+table_name2, ...</code></pre>
+<p>Let’s say we want to identify the top 10 action movies that are highly rated (with an average rating greater than 7) and popular (having more than 5000 votes), along with the primary actors who are the most popular. We can use CTEs to break this query down into separate problems. Initially, we can filter to find good action movies and prolific actors separately. This way, in our final join, we only need to change the order.</p>
+<div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="22">
+<div class="sourceCode cell-code" id="cb45"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb45-1"><a href="#cb45-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb45-2"><a href="#cb45-2" aria-hidden="true" tabindex="-1"></a>WITH </span>
+<span id="cb45-3"><a href="#cb45-3" aria-hidden="true" tabindex="-1"></a>good_action_movies AS (</span>
+<span id="cb45-4"><a href="#cb45-4" aria-hidden="true" tabindex="-1"></a>    SELECT <span class="op">*</span></span>
+<span id="cb45-5"><a href="#cb45-5" aria-hidden="true" tabindex="-1"></a>    FROM Title T JOIN Rating R ON T.tconst <span class="op">=</span> R.tconst  </span>
+<span id="cb45-6"><a href="#cb45-6" aria-hidden="true" tabindex="-1"></a>    WHERE genres LIKE <span class="st">'%Action%'</span> AND averageRating <span class="op">&gt;</span> <span class="dv">7</span> AND numVotes <span class="op">&gt;</span> <span class="dv">5000</span></span>
+<span id="cb45-7"><a href="#cb45-7" aria-hidden="true" tabindex="-1"></a>),</span>
+<span id="cb45-8"><a href="#cb45-8" aria-hidden="true" tabindex="-1"></a>prolific_actors AS (</span>
+<span id="cb45-9"><a href="#cb45-9" aria-hidden="true" tabindex="-1"></a>    SELECT N.nconst, primaryName, COUNT(<span class="op">*</span>) <span class="im">as</span> numRoles</span>
+<span id="cb45-10"><a href="#cb45-10" aria-hidden="true" tabindex="-1"></a>    FROM Name N JOIN Principal P ON N.nconst <span class="op">=</span> P.nconst</span>
+<span id="cb45-11"><a href="#cb45-11" aria-hidden="true" tabindex="-1"></a>    WHERE category <span class="op">=</span> <span class="st">'actor'</span></span>
+<span id="cb45-12"><a href="#cb45-12" aria-hidden="true" tabindex="-1"></a>    GROUP BY N.nconst, primaryName</span>
+<span id="cb45-13"><a href="#cb45-13" aria-hidden="true" tabindex="-1"></a>)</span>
+<span id="cb45-14"><a href="#cb45-14" aria-hidden="true" tabindex="-1"></a>SELECT primaryTitle, primaryName, numRoles, ROUND(averageRating) AS rating</span>
+<span id="cb45-15"><a href="#cb45-15" aria-hidden="true" tabindex="-1"></a>FROM good_action_movies m, prolific_actors a, principal p</span>
+<span id="cb45-16"><a href="#cb45-16" aria-hidden="true" tabindex="-1"></a>WHERE p.tconst <span class="op">=</span> m.tconst AND p.nconst <span class="op">=</span> a.nconst</span>
+<span id="cb45-17"><a href="#cb45-17" aria-hidden="true" tabindex="-1"></a>ORDER BY rating DESC, numRoles DESC</span>
+<span id="cb45-18"><a href="#cb45-18" aria-hidden="true" tabindex="-1"></a>LIMIT <span class="dv">10</span><span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="cell-output cell-output-stdout">
+<pre><code> * duckdb:///data/basic_examples.db
+(duckdb.duckdb.CatalogException) Catalog Error: Table with name Title does not exist!
+Did you mean "temp.information_schema.tables"?
+LINE 4:     F...
+                 ^
+[SQL: WITH 
+good_action_movies AS (
+    SELECT *
+    FROM Title T JOIN Rating R ON T.tconst = R.tconst  
+    WHERE genres LIKE '%Action%' AND averageRating &gt; 7 AND numVotes &gt; 5000
+),
+prolific_actors AS (
+    SELECT N.nconst, primaryName, COUNT(*) as numRoles
+    FROM Name N JOIN Principal P ON N.nconst = P.nconst
+    WHERE category = 'actor'
+    GROUP BY N.nconst, primaryName
+)
+SELECT primaryTitle, primaryName, numRoles, ROUND(averageRating) AS rating
+FROM good_action_movies m, prolific_actors a, principal p
+WHERE p.tconst = m.tconst AND p.nconst = a.nconst
+ORDER BY rating DESC, numRoles DESC
+LIMIT 10;]
+(Background on this error at: https://sqlalche.me/e/20/f405)</code></pre>
+</div>
+</div>
+
+
+<!-- -->
+
+</section>
+</section>
+
+</main> <!-- /main -->
+<script id="quarto-html-after-body" type="application/javascript">
+window.document.addEventListener("DOMContentLoaded", function (event) {
+  const toggleBodyColorMode = (bsSheetEl) => {
+    const mode = bsSheetEl.getAttribute("data-mode");
+    const bodyEl = window.document.querySelector("body");
+    if (mode === "dark") {
+      bodyEl.classList.add("quarto-dark");
+      bodyEl.classList.remove("quarto-light");
+    } else {
+      bodyEl.classList.add("quarto-light");
+      bodyEl.classList.remove("quarto-dark");
+    }
+  }
+  const toggleBodyColorPrimary = () => {
+    const bsSheetEl = window.document.querySelector("link#quarto-bootstrap");
+    if (bsSheetEl) {
+      toggleBodyColorMode(bsSheetEl);
+    }
+  }
+  toggleBodyColorPrimary();  
+  const icon = "";
+  const anchorJS = new window.AnchorJS();
+  anchorJS.options = {
+    placement: 'right',
+    icon: icon
+  };
+  anchorJS.add('.anchored');
+  const isCodeAnnotation = (el) => {
+    for (const clz of el.classList) {
+      if (clz.startsWith('code-annotation-')) {                     
+        return true;
+      }
+    }
+    return false;
+  }
+  const clipboard = new window.ClipboardJS('.code-copy-button', {
+    text: function(trigger) {
+      const codeEl = trigger.previousElementSibling.cloneNode(true);
+      for (const childEl of codeEl.children) {
+        if (isCodeAnnotation(childEl)) {
+          childEl.remove();
+        }
+      }
+      return codeEl.innerText;
+    }
+  });
+  clipboard.on('success', function(e) {
+    // button target
+    const button = e.trigger;
+    // don't keep focus
+    button.blur();
+    // flash "checked"
+    button.classList.add('code-copy-button-checked');
+    var currentTitle = button.getAttribute("title");
+    button.setAttribute("title", "Copied!");
+    let tooltip;
+    if (window.bootstrap) {
+      button.setAttribute("data-bs-toggle", "tooltip");
+      button.setAttribute("data-bs-placement", "left");
+      button.setAttribute("data-bs-title", "Copied!");
+      tooltip = new bootstrap.Tooltip(button, 
+        { trigger: "manual", 
+          customClass: "code-copy-button-tooltip",
+          offset: [0, -8]});
+      tooltip.show();    
+    }
+    setTimeout(function() {
+      if (tooltip) {
+        tooltip.hide();
+        button.removeAttribute("data-bs-title");
+        button.removeAttribute("data-bs-toggle");
+        button.removeAttribute("data-bs-placement");
+      }
+      button.setAttribute("title", currentTitle);
+      button.classList.remove('code-copy-button-checked');
+    }, 1000);
+    // clear code selection
+    e.clearSelection();
+  });
+  const viewSource = window.document.getElementById('quarto-view-source') ||
+                     window.document.getElementById('quarto-code-tools-source');
+  if (viewSource) {
+    const sourceUrl = viewSource.getAttribute("data-quarto-source-url");
+    viewSource.addEventListener("click", function(e) {
+      if (sourceUrl) {
+        // rstudio viewer pane
+        if (/\bcapabilities=\b/.test(window.location)) {
+          window.open(sourceUrl);
+        } else {
+          window.location.href = sourceUrl;
+        }
+      } else {
+        const modal = new bootstrap.Modal(document.getElementById('quarto-embedded-source-code-modal'));
+        modal.show();
+      }
+      return false;
+    });
+  }
+  function toggleCodeHandler(show) {
+    return function(e) {
+      const detailsSrc = window.document.querySelectorAll(".cell > details > .sourceCode");
+      for (let i=0; i<detailsSrc.length; i++) {
+        const details = detailsSrc[i].parentElement;
+        if (show) {
+          details.open = true;
+        } else {
+          details.removeAttribute("open");
+        }
+      }
+      const cellCodeDivs = window.document.querySelectorAll(".cell > .sourceCode");
+      const fromCls = show ? "hidden" : "unhidden";
+      const toCls = show ? "unhidden" : "hidden";
+      for (let i=0; i<cellCodeDivs.length; i++) {
+        const codeDiv = cellCodeDivs[i];
+        if (codeDiv.classList.contains(fromCls)) {
+          codeDiv.classList.remove(fromCls);
+          codeDiv.classList.add(toCls);
+        } 
+      }
+      return false;
+    }
+  }
+  const hideAllCode = window.document.getElementById("quarto-hide-all-code");
+  if (hideAllCode) {
+    hideAllCode.addEventListener("click", toggleCodeHandler(false));
+  }
+  const showAllCode = window.document.getElementById("quarto-show-all-code");
+  if (showAllCode) {
+    showAllCode.addEventListener("click", toggleCodeHandler(true));
+  }
+  function tippyHover(el, contentFn) {
+    const config = {
+      allowHTML: true,
+      content: contentFn,
+      maxWidth: 500,
+      delay: 100,
+      arrow: false,
+      appendTo: function(el) {
+          return el.parentElement;
+      },
+      interactive: true,
+      interactiveBorder: 10,
+      theme: 'quarto',
+      placement: 'bottom-start'
+    };
+    window.tippy(el, config); 
+  }
+  const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
+  for (var i=0; i<noterefs.length; i++) {
+    const ref = noterefs[i];
+    tippyHover(ref, function() {
+      // use id or data attribute instead here
+      let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
+      try { href = new URL(href).hash; } catch {}
+      const id = href.replace(/^#\/?/, "");
+      const note = window.document.getElementById(id);
+      return note.innerHTML;
+    });
+  }
+      let selectedAnnoteEl;
+      const selectorForAnnotation = ( cell, annotation) => {
+        let cellAttr = 'data-code-cell="' + cell + '"';
+        let lineAttr = 'data-code-annotation="' +  annotation + '"';
+        const selector = 'span[' + cellAttr + '][' + lineAttr + ']';
+        return selector;
+      }
+      const selectCodeLines = (annoteEl) => {
+        const doc = window.document;
+        const targetCell = annoteEl.getAttribute("data-target-cell");
+        const targetAnnotation = annoteEl.getAttribute("data-target-annotation");
+        const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation));
+        const lines = annoteSpan.getAttribute("data-code-lines").split(",");
+        const lineIds = lines.map((line) => {
+          return targetCell + "-" + line;
+        })
+        let top = null;
+        let height = null;
+        let parent = null;
+        if (lineIds.length > 0) {
+            //compute the position of the single el (top and bottom and make a div)
+            const el = window.document.getElementById(lineIds[0]);
+            top = el.offsetTop;
+            height = el.offsetHeight;
+            parent = el.parentElement.parentElement;
+          if (lineIds.length > 1) {
+            const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]);
+            const bottom = lastEl.offsetTop + lastEl.offsetHeight;
+            height = bottom - top;
+          }
+          if (top !== null && height !== null && parent !== null) {
+            // cook up a div (if necessary) and position it 
+            let div = window.document.getElementById("code-annotation-line-highlight");
+            if (div === null) {
+              div = window.document.createElement("div");
+              div.setAttribute("id", "code-annotation-line-highlight");
+              div.style.position = 'absolute';
+              parent.appendChild(div);
+            }
+            div.style.top = top - 2 + "px";
+            div.style.height = height + 4 + "px";
+            let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter");
+            if (gutterDiv === null) {
+              gutterDiv = window.document.createElement("div");
+              gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter");
+              gutterDiv.style.position = 'absolute';
+              const codeCell = window.document.getElementById(targetCell);
+              const gutter = codeCell.querySelector('.code-annotation-gutter');
+              gutter.appendChild(gutterDiv);
+            }
+            gutterDiv.style.top = top - 2 + "px";
+            gutterDiv.style.height = height + 4 + "px";
+          }
+          selectedAnnoteEl = annoteEl;
+        }
+      };
+      const unselectCodeLines = () => {
+        const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"];
+        elementsIds.forEach((elId) => {
+          const div = window.document.getElementById(elId);
+          if (div) {
+            div.remove();
+          }
+        });
+        selectedAnnoteEl = undefined;
+      };
+      // Attach click handler to the DT
+      const annoteDls = window.document.querySelectorAll('dt[data-target-cell]');
+      for (const annoteDlNode of annoteDls) {
+        annoteDlNode.addEventListener('click', (event) => {
+          const clickedEl = event.target;
+          if (clickedEl !== selectedAnnoteEl) {
+            unselectCodeLines();
+            const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active');
+            if (activeEl) {
+              activeEl.classList.remove('code-annotation-active');
+            }
+            selectCodeLines(clickedEl);
+            clickedEl.classList.add('code-annotation-active');
+          } else {
+            // Unselect the line
+            unselectCodeLines();
+            clickedEl.classList.remove('code-annotation-active');
+          }
+        });
+      }
+  const findCites = (el) => {
+    const parentEl = el.parentElement;
+    if (parentEl) {
+      const cites = parentEl.dataset.cites;
+      if (cites) {
+        return {
+          el,
+          cites: cites.split(' ')
+        };
+      } else {
+        return findCites(el.parentElement)
+      }
+    } else {
+      return undefined;
+    }
+  };
+  var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
+  for (var i=0; i<bibliorefs.length; i++) {
+    const ref = bibliorefs[i];
+    const citeInfo = findCites(ref);
+    if (citeInfo) {
+      tippyHover(citeInfo.el, function() {
+        var popup = window.document.createElement('div');
+        citeInfo.cites.forEach(function(cite) {
+          var citeDiv = window.document.createElement('div');
+          citeDiv.classList.add('hanging-indent');
+          citeDiv.classList.add('csl-entry');
+          var biblioDiv = window.document.getElementById('ref-' + cite);
+          if (biblioDiv) {
+            citeDiv.innerHTML = biblioDiv.innerHTML;
+          }
+          popup.appendChild(citeDiv);
+        });
+        return popup.innerHTML;
+      });
+    }
+  }
+});
+</script>
+<nav class="page-navigation column-body">
+  <div class="nav-page nav-page-previous">
+      <a href="../sql_I/sql_I.html" class="pagination-link">
+        <i class="bi bi-arrow-left-short"></i> <span class="nav-page-text"><span class="chapter-number">20</span>&nbsp; <span class="chapter-title">SQL I</span></span>
+      </a>          
+  </div>
+  <div class="nav-page nav-page-next">
+  </div>
+</nav><div class="modal fade" id="quarto-embedded-source-code-modal" tabindex="-1" aria-labelledby="quarto-embedded-source-code-modal-label" aria-hidden="true"><div class="modal-dialog modal-dialog-scrollable"><div class="modal-content"><div class="modal-header"><h5 class="modal-title" id="quarto-embedded-source-code-modal-label">Source Code</h5><button class="btn-close" data-bs-dismiss="modal"></button></div><div class="modal-body"><div class="">
+<div class="sourceCode" id="cb47" data-shortcodes="false"><pre class="sourceCode markdown code-with-copy"><code class="sourceCode markdown"><span id="cb47-1"><a href="#cb47-1" aria-hidden="true" tabindex="-1"></a><span class="co">---</span></span>
+<span id="cb47-2"><a href="#cb47-2" aria-hidden="true" tabindex="-1"></a><span class="an">title:</span><span class="co"> SQL II</span></span>
+<span id="cb47-3"><a href="#cb47-3" aria-hidden="true" tabindex="-1"></a><span class="an">execute:</span></span>
+<span id="cb47-4"><a href="#cb47-4" aria-hidden="true" tabindex="-1"></a><span class="co">  echo: true</span></span>
+<span id="cb47-5"><a href="#cb47-5" aria-hidden="true" tabindex="-1"></a><span class="an">format:</span></span>
+<span id="cb47-6"><a href="#cb47-6" aria-hidden="true" tabindex="-1"></a><span class="co">  html:</span></span>
+<span id="cb47-7"><a href="#cb47-7" aria-hidden="true" tabindex="-1"></a><span class="co">    code-fold: false</span></span>
+<span id="cb47-8"><a href="#cb47-8" aria-hidden="true" tabindex="-1"></a><span class="co">    code-tools: true</span></span>
+<span id="cb47-9"><a href="#cb47-9" aria-hidden="true" tabindex="-1"></a><span class="co">    toc: true</span></span>
+<span id="cb47-10"><a href="#cb47-10" aria-hidden="true" tabindex="-1"></a><span class="co">    toc-title: SQL II</span></span>
+<span id="cb47-11"><a href="#cb47-11" aria-hidden="true" tabindex="-1"></a><span class="co">    page-layout: full</span></span>
+<span id="cb47-12"><a href="#cb47-12" aria-hidden="true" tabindex="-1"></a><span class="co">    theme:</span></span>
+<span id="cb47-13"><a href="#cb47-13" aria-hidden="true" tabindex="-1"></a><span class="co">      - cosmo</span></span>
+<span id="cb47-14"><a href="#cb47-14" aria-hidden="true" tabindex="-1"></a><span class="co">      - cerulean</span></span>
+<span id="cb47-15"><a href="#cb47-15" aria-hidden="true" tabindex="-1"></a><span class="co">    callout-icon: false</span></span>
+<span id="cb47-16"><a href="#cb47-16" aria-hidden="true" tabindex="-1"></a><span class="an">jupyter:</span></span>
+<span id="cb47-17"><a href="#cb47-17" aria-hidden="true" tabindex="-1"></a><span class="co">  jupytext:</span></span>
+<span id="cb47-18"><a href="#cb47-18" aria-hidden="true" tabindex="-1"></a><span class="co">    text_representation:</span></span>
+<span id="cb47-19"><a href="#cb47-19" aria-hidden="true" tabindex="-1"></a><span class="co">      extension: .qmd</span></span>
+<span id="cb47-20"><a href="#cb47-20" aria-hidden="true" tabindex="-1"></a><span class="co">      format_name: quarto</span></span>
+<span id="cb47-21"><a href="#cb47-21" aria-hidden="true" tabindex="-1"></a><span class="co">      format_version: '1.0'</span></span>
+<span id="cb47-22"><a href="#cb47-22" aria-hidden="true" tabindex="-1"></a><span class="co">      jupytext_version: 1.16.1</span></span>
+<span id="cb47-23"><a href="#cb47-23" aria-hidden="true" tabindex="-1"></a><span class="co">  kernelspec:</span></span>
+<span id="cb47-24"><a href="#cb47-24" aria-hidden="true" tabindex="-1"></a><span class="co">    display_name: Python 3 (ipykernel)</span></span>
+<span id="cb47-25"><a href="#cb47-25" aria-hidden="true" tabindex="-1"></a><span class="co">    language: python</span></span>
+<span id="cb47-26"><a href="#cb47-26" aria-hidden="true" tabindex="-1"></a><span class="co">    name: python3</span></span>
+<span id="cb47-27"><a href="#cb47-27" aria-hidden="true" tabindex="-1"></a><span class="co">---</span></span>
+<span id="cb47-28"><a href="#cb47-28" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-29"><a href="#cb47-29" aria-hidden="true" tabindex="-1"></a>::: {.callout-note collapse="false"}</span>
+<span id="cb47-30"><a href="#cb47-30" aria-hidden="true" tabindex="-1"></a><span class="fu">## Learning Outcomes</span></span>
+<span id="cb47-31"><a href="#cb47-31" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Perform aggregations using <span class="in">`GROUP BY`</span></span>
+<span id="cb47-32"><a href="#cb47-32" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Introduce the ability to filter groups</span>
+<span id="cb47-33"><a href="#cb47-33" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Perform data cleaning and text manipulation in SQL</span>
+<span id="cb47-34"><a href="#cb47-34" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span>Join data across tables</span>
+<span id="cb47-35"><a href="#cb47-35" aria-hidden="true" tabindex="-1"></a>:::</span>
+<span id="cb47-36"><a href="#cb47-36" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-37"><a href="#cb47-37" aria-hidden="true" tabindex="-1"></a>In this lecture, we'll continue our work from last time to introduce some advanced SQL syntax. </span>
+<span id="cb47-38"><a href="#cb47-38" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-39"><a href="#cb47-39" aria-hidden="true" tabindex="-1"></a>First, let's load in the <span class="in">`basic_examples.db`</span> database.</span>
+<span id="cb47-40"><a href="#cb47-40" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-43"><a href="#cb47-43" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb47-44"><a href="#cb47-44" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
+<span id="cb47-45"><a href="#cb47-45" aria-hidden="true" tabindex="-1"></a><span class="co"># Load the SQL Alchemy Python library and DuckDB</span></span>
+<span id="cb47-46"><a href="#cb47-46" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> sqlalchemy</span>
+<span id="cb47-47"><a href="#cb47-47" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> duckdb</span>
+<span id="cb47-48"><a href="#cb47-48" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb47-49"><a href="#cb47-49" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-52"><a href="#cb47-52" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb47-53"><a href="#cb47-53" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
+<span id="cb47-54"><a href="#cb47-54" aria-hidden="true" tabindex="-1"></a><span class="co"># Load %%sql cell magic</span></span>
+<span id="cb47-55"><a href="#cb47-55" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>load_ext sql</span>
+<span id="cb47-56"><a href="#cb47-56" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb47-57"><a href="#cb47-57" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-60"><a href="#cb47-60" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb47-61"><a href="#cb47-61" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
+<span id="cb47-62"><a href="#cb47-62" aria-hidden="true" tabindex="-1"></a><span class="co"># Connect to the database</span></span>
+<span id="cb47-63"><a href="#cb47-63" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>sql duckdb:<span class="op">///</span>data<span class="op">/</span>basic_examples.db <span class="op">--</span>alias basic</span>
+<span id="cb47-64"><a href="#cb47-64" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb47-65"><a href="#cb47-65" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-66"><a href="#cb47-66" aria-hidden="true" tabindex="-1"></a><span class="fu">## Aggregating with `GROUP BY`</span></span>
+<span id="cb47-67"><a href="#cb47-67" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-68"><a href="#cb47-68" aria-hidden="true" tabindex="-1"></a>At this point, we've seen that SQL offers much of the same functionality that was given to us by <span class="in">`pandas`</span>. We can extract data from a table, filter it, and reorder it to suit our needs.</span>
+<span id="cb47-69"><a href="#cb47-69" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-70"><a href="#cb47-70" aria-hidden="true" tabindex="-1"></a>In <span class="in">`pandas`</span>, much of our analysis work relied heavily on being able to use <span class="in">`.groupby()`</span> to aggregate across the rows of our dataset. SQL's answer to this task is the (very conveniently named) <span class="in">`GROUP BY`</span> clause. While the outputs of <span class="in">`GROUP BY`</span> are similar to those of <span class="in">`.groupby()`</span> —— in both cases, we obtain an output table where some column has been used for grouping —— the syntax and logic used to group data in SQL are fairly different to the <span class="in">`pandas`</span> implementation.</span>
+<span id="cb47-71"><a href="#cb47-71" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-72"><a href="#cb47-72" aria-hidden="true" tabindex="-1"></a>To illustrate <span class="in">`GROUP BY`</span>, we will consider the <span class="in">`Dish`</span> table from our database.</span>
+<span id="cb47-73"><a href="#cb47-73" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-76"><a href="#cb47-76" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb47-77"><a href="#cb47-77" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
+<span id="cb47-78"><a href="#cb47-78" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb47-79"><a href="#cb47-79" aria-hidden="true" tabindex="-1"></a>SELECT <span class="op">*</span> </span>
+<span id="cb47-80"><a href="#cb47-80" aria-hidden="true" tabindex="-1"></a>FROM Dish<span class="op">;</span></span>
+<span id="cb47-81"><a href="#cb47-81" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb47-82"><a href="#cb47-82" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-83"><a href="#cb47-83" aria-hidden="true" tabindex="-1"></a>Notice that there are multiple dishes of the same <span class="in">`type`</span>. What if we wanted to find the total costs of dishes of a certain <span class="in">`type`</span>? To accomplish this, we would write the following code.</span>
+<span id="cb47-84"><a href="#cb47-84" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-87"><a href="#cb47-87" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb47-88"><a href="#cb47-88" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
+<span id="cb47-89"><a href="#cb47-89" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb47-90"><a href="#cb47-90" aria-hidden="true" tabindex="-1"></a>SELECT <span class="bu">type</span>, SUM(cost)</span>
+<span id="cb47-91"><a href="#cb47-91" aria-hidden="true" tabindex="-1"></a>FROM Dish</span>
+<span id="cb47-92"><a href="#cb47-92" aria-hidden="true" tabindex="-1"></a>GROUP BY <span class="bu">type</span><span class="op">;</span></span>
+<span id="cb47-93"><a href="#cb47-93" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb47-94"><a href="#cb47-94" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-95"><a href="#cb47-95" aria-hidden="true" tabindex="-1"></a>What is going on here? The statement <span class="in">`GROUP BY type`</span> tells SQL to group the data based on the value contained in the <span class="in">`type`</span> column (whether a record is an appetizer, entree, or dessert). <span class="in">`SUM(cost)`</span> sums up the costs of dishes in each <span class="in">`type`</span> and displays the result in the output table.</span>
+<span id="cb47-96"><a href="#cb47-96" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-97"><a href="#cb47-97" aria-hidden="true" tabindex="-1"></a>You may be wondering: why does <span class="in">`SUM(cost)`</span> come before the command to <span class="in">`GROUP BY type`</span>? Don't we need to form groups before we can count the number of entries in each? Remember that SQL is a *declarative* programming language —— a SQL programmer simply states what end result they would like to see, and leaves the task of figuring out *how* to obtain this result to SQL itself. This means that SQL queries sometimes don't follow what a reader sees as a "logical" sequence of thought. Instead, SQL requires that we follow its set order of operations when constructing queries. So long as we follow this order, SQL will handle the underlying logic.</span>
+<span id="cb47-98"><a href="#cb47-98" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-99"><a href="#cb47-99" aria-hidden="true" tabindex="-1"></a>In practical terms: our goal with this query was to output the total <span class="in">`cost`</span>s of each <span class="in">`type`</span>. To communicate this to SQL, we say that we want to <span class="in">`SELECT`</span> the <span class="in">`SUM`</span>med <span class="in">`cost`</span> values for each <span class="in">`type`</span> group. </span>
+<span id="cb47-100"><a href="#cb47-100" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-101"><a href="#cb47-101" aria-hidden="true" tabindex="-1"></a>There are many aggregation functions that can be used to aggregate the data contained in each group. Some common examples are:</span>
+<span id="cb47-102"><a href="#cb47-102" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-103"><a href="#cb47-103" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span><span class="in">`COUNT`</span>: count the number of rows associated with each group</span>
+<span id="cb47-104"><a href="#cb47-104" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span><span class="in">`MIN`</span>: find the minimum value of each group</span>
+<span id="cb47-105"><a href="#cb47-105" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span><span class="in">`MAX`</span>: find the maximum value of each group</span>
+<span id="cb47-106"><a href="#cb47-106" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span><span class="in">`SUM`</span>: sum across all records in each group</span>
+<span id="cb47-107"><a href="#cb47-107" aria-hidden="true" tabindex="-1"></a><span class="ss">* </span><span class="in">`AVG`</span>: find the average value of each group</span>
+<span id="cb47-108"><a href="#cb47-108" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-109"><a href="#cb47-109" aria-hidden="true" tabindex="-1"></a>We can easily compute multiple aggregations all at once (a task that was very tricky in <span class="in">`pandas`</span>).</span>
+<span id="cb47-110"><a href="#cb47-110" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-113"><a href="#cb47-113" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb47-114"><a href="#cb47-114" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
+<span id="cb47-115"><a href="#cb47-115" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb47-116"><a href="#cb47-116" aria-hidden="true" tabindex="-1"></a>SELECT <span class="bu">type</span>, SUM(cost), MIN(cost), MAX(name)</span>
+<span id="cb47-117"><a href="#cb47-117" aria-hidden="true" tabindex="-1"></a>FROM Dish</span>
+<span id="cb47-118"><a href="#cb47-118" aria-hidden="true" tabindex="-1"></a>GROUP BY <span class="bu">type</span><span class="op">;</span></span>
+<span id="cb47-119"><a href="#cb47-119" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb47-120"><a href="#cb47-120" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-121"><a href="#cb47-121" aria-hidden="true" tabindex="-1"></a>To count the number of rows associated with each group, we use the <span class="in">`COUNT`</span> keyword. Calling <span class="in">`COUNT(*)`</span> will compute the total number of rows in each group, including rows with null values. Its <span class="in">`pandas`</span> equivalent is <span class="in">`.groupby().size()`</span>.</span>
+<span id="cb47-122"><a href="#cb47-122" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-123"><a href="#cb47-123" aria-hidden="true" tabindex="-1"></a>Recall the <span class="in">`Dragon`</span> table from the previous lecture:</span>
+<span id="cb47-124"><a href="#cb47-124" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-127"><a href="#cb47-127" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb47-128"><a href="#cb47-128" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
+<span id="cb47-129"><a href="#cb47-129" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb47-130"><a href="#cb47-130" aria-hidden="true" tabindex="-1"></a>SELECT <span class="op">*</span> FROM Dragon<span class="op">;</span></span>
+<span id="cb47-131"><a href="#cb47-131" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb47-132"><a href="#cb47-132" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-133"><a href="#cb47-133" aria-hidden="true" tabindex="-1"></a>Notice that <span class="in">`COUNT(*)`</span> and <span class="in">`COUNT(cute)`</span> result in different outputs.</span>
+<span id="cb47-134"><a href="#cb47-134" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-137"><a href="#cb47-137" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb47-138"><a href="#cb47-138" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
+<span id="cb47-139"><a href="#cb47-139" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb47-140"><a href="#cb47-140" aria-hidden="true" tabindex="-1"></a>SELECT year, COUNT(<span class="op">*</span>)</span>
+<span id="cb47-141"><a href="#cb47-141" aria-hidden="true" tabindex="-1"></a>FROM Dragon</span>
+<span id="cb47-142"><a href="#cb47-142" aria-hidden="true" tabindex="-1"></a>GROUP BY year<span class="op">;</span></span>
+<span id="cb47-143"><a href="#cb47-143" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb47-144"><a href="#cb47-144" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-147"><a href="#cb47-147" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb47-148"><a href="#cb47-148" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
+<span id="cb47-149"><a href="#cb47-149" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb47-150"><a href="#cb47-150" aria-hidden="true" tabindex="-1"></a>SELECT year, COUNT(cute)</span>
+<span id="cb47-151"><a href="#cb47-151" aria-hidden="true" tabindex="-1"></a>FROM Dragon</span>
+<span id="cb47-152"><a href="#cb47-152" aria-hidden="true" tabindex="-1"></a>GROUP BY year<span class="op">;</span></span>
+<span id="cb47-153"><a href="#cb47-153" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb47-154"><a href="#cb47-154" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-155"><a href="#cb47-155" aria-hidden="true" tabindex="-1"></a>With this definition of <span class="in">`GROUP BY`</span> in hand, let's update our SQL order of operations. Remember: *every* SQL query must list clauses in this order. </span>
+<span id="cb47-156"><a href="#cb47-156" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-157"><a href="#cb47-157" aria-hidden="true" tabindex="-1"></a><span class="in">    SELECT &lt;column expression list&gt;</span></span>
+<span id="cb47-158"><a href="#cb47-158" aria-hidden="true" tabindex="-1"></a><span class="in">    FROM &lt;table&gt;</span></span>
+<span id="cb47-159"><a href="#cb47-159" aria-hidden="true" tabindex="-1"></a><span class="in">    [WHERE &lt;predicate&gt;]</span></span>
+<span id="cb47-160"><a href="#cb47-160" aria-hidden="true" tabindex="-1"></a><span class="in">    [GROUP BY &lt;column list&gt;]</span></span>
+<span id="cb47-161"><a href="#cb47-161" aria-hidden="true" tabindex="-1"></a><span class="in">    [ORDER BY &lt;column list&gt;]</span></span>
+<span id="cb47-162"><a href="#cb47-162" aria-hidden="true" tabindex="-1"></a><span class="in">    [LIMIT &lt;number of rows&gt;]</span></span>
+<span id="cb47-163"><a href="#cb47-163" aria-hidden="true" tabindex="-1"></a><span class="in">    [OFFSET &lt;number of rows&gt;];</span></span>
+<span id="cb47-164"><a href="#cb47-164" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-165"><a href="#cb47-165" aria-hidden="true" tabindex="-1"></a>Note that we can use the <span class="in">`AS`</span> keyword to rename columns during the selection process and that column expressions may include aggregation functions (<span class="in">`MAX`</span>, <span class="in">`MIN`</span>, etc.).</span>
+<span id="cb47-166"><a href="#cb47-166" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-167"><a href="#cb47-167" aria-hidden="true" tabindex="-1"></a><span class="fu">## Filtering Groups</span></span>
+<span id="cb47-168"><a href="#cb47-168" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-169"><a href="#cb47-169" aria-hidden="true" tabindex="-1"></a>Now, what if we only want groups that meet a certain condition? <span class="in">`HAVING`</span> filters groups by applying some condition across all rows in each group. We interpret it as a way to keep only the groups <span class="in">`HAVING`</span> some condition. Note the difference between <span class="in">`WHERE`</span> and <span class="in">`HAVING`</span>: we use <span class="in">`WHERE`</span> to filter rows, whereas we use <span class="in">`HAVING`</span> to filter *groups*. <span class="in">`WHERE`</span> precedes <span class="in">`HAVING`</span> in terms of how SQL executes a query.</span>
+<span id="cb47-170"><a href="#cb47-170" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-171"><a href="#cb47-171" aria-hidden="true" tabindex="-1"></a>Let's take a look at the <span class="in">`Dish`</span> table to see how we can use <span class="in">`HAVING`</span>. Say we want to group dishes with a cost greater than 4 by <span class="in">`type`</span> and only keep groups where the max cost is less than 10.</span>
+<span id="cb47-172"><a href="#cb47-172" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-175"><a href="#cb47-175" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb47-176"><a href="#cb47-176" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
+<span id="cb47-177"><a href="#cb47-177" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb47-178"><a href="#cb47-178" aria-hidden="true" tabindex="-1"></a>SELECT <span class="bu">type</span>, COUNT(<span class="op">*</span>)</span>
+<span id="cb47-179"><a href="#cb47-179" aria-hidden="true" tabindex="-1"></a>FROM Dish</span>
+<span id="cb47-180"><a href="#cb47-180" aria-hidden="true" tabindex="-1"></a>WHERE cost <span class="op">&gt;</span> <span class="dv">4</span></span>
+<span id="cb47-181"><a href="#cb47-181" aria-hidden="true" tabindex="-1"></a>GROUP BY <span class="bu">type</span></span>
+<span id="cb47-182"><a href="#cb47-182" aria-hidden="true" tabindex="-1"></a>HAVING MAX(cost) <span class="op">&lt;</span>  <span class="dv">10</span><span class="op">;</span></span>
+<span id="cb47-183"><a href="#cb47-183" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb47-184"><a href="#cb47-184" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-185"><a href="#cb47-185" aria-hidden="true" tabindex="-1"></a>Here, we first use <span class="in">`WHERE`</span> to filter for rows with a cost greater than 4. We then group our values by <span class="in">`type`</span> before applying the <span class="in">`HAVING`</span> operator. With <span class="in">`HAVING`</span>, we can filter our groups based on if the max cost is less than 10.</span>
+<span id="cb47-186"><a href="#cb47-186" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-187"><a href="#cb47-187" aria-hidden="true" tabindex="-1"></a><span class="fu">## Summary: SQL</span></span>
+<span id="cb47-188"><a href="#cb47-188" aria-hidden="true" tabindex="-1"></a>With this definition of <span class="in">`GROUP BY`</span> and <span class="in">`HAVING`</span> in hand, let's update our SQL order of operations. Remember: *every* SQL query must list clauses in this order. </span>
+<span id="cb47-189"><a href="#cb47-189" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-190"><a href="#cb47-190" aria-hidden="true" tabindex="-1"></a><span class="in">    SELECT &lt;column expression list&gt;</span></span>
+<span id="cb47-191"><a href="#cb47-191" aria-hidden="true" tabindex="-1"></a><span class="in">    FROM &lt;table&gt;</span></span>
+<span id="cb47-192"><a href="#cb47-192" aria-hidden="true" tabindex="-1"></a><span class="in">    [WHERE &lt;predicate&gt;]</span></span>
+<span id="cb47-193"><a href="#cb47-193" aria-hidden="true" tabindex="-1"></a><span class="in">    [GROUP BY &lt;column list&gt;]</span></span>
+<span id="cb47-194"><a href="#cb47-194" aria-hidden="true" tabindex="-1"></a><span class="in">    [ORDER BY &lt;column list&gt;]</span></span>
+<span id="cb47-195"><a href="#cb47-195" aria-hidden="true" tabindex="-1"></a><span class="in">    [LIMIT &lt;number of rows&gt;]</span></span>
+<span id="cb47-196"><a href="#cb47-196" aria-hidden="true" tabindex="-1"></a><span class="in">    [OFFSET &lt;number of rows&gt;];</span></span>
+<span id="cb47-197"><a href="#cb47-197" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-198"><a href="#cb47-198" aria-hidden="true" tabindex="-1"></a>Note that we can use the <span class="in">`AS`</span> keyword to rename columns during the selection process and that column expressions may include aggregation functions (<span class="in">`MAX`</span>, <span class="in">`MIN`</span>, etc.).</span>
+<span id="cb47-199"><a href="#cb47-199" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-200"><a href="#cb47-200" aria-hidden="true" tabindex="-1"></a><span class="fu">## EDA in SQL</span></span>
+<span id="cb47-201"><a href="#cb47-201" aria-hidden="true" tabindex="-1"></a>In the last lecture, we mostly worked under the assumption that our data had already been cleaned. However, as we saw in our first pass through the data science lifecycle, we're very unlikely to be given data that is free of formatting issues. With this in mind, we'll want to learn how to clean and transform data in SQL. </span>
+<span id="cb47-202"><a href="#cb47-202" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-203"><a href="#cb47-203" aria-hidden="true" tabindex="-1"></a>Our typical workflow when working with "big data" is:</span>
+<span id="cb47-204"><a href="#cb47-204" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-205"><a href="#cb47-205" aria-hidden="true" tabindex="-1"></a><span class="ss">1. </span>Use SQL to query data from a database</span>
+<span id="cb47-206"><a href="#cb47-206" aria-hidden="true" tabindex="-1"></a><span class="ss">2. </span>Use Python (with <span class="in">`pandas`</span>) to analyze this data in detail</span>
+<span id="cb47-207"><a href="#cb47-207" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-208"><a href="#cb47-208" aria-hidden="true" tabindex="-1"></a>We can, however, still perform simple data cleaning and re-structuring using SQL directly. To do so, we'll use the <span class="in">`Title`</span> table from the <span class="in">`imdb_duck`</span> database, which contains information about movies and actors.</span>
+<span id="cb47-209"><a href="#cb47-209" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-210"><a href="#cb47-210" aria-hidden="true" tabindex="-1"></a>Let's load in the <span class="in">`imdb_duck`</span> database.</span>
+<span id="cb47-211"><a href="#cb47-211" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-214"><a href="#cb47-214" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb47-215"><a href="#cb47-215" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
+<span id="cb47-216"><a href="#cb47-216" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> os</span>
+<span id="cb47-217"><a href="#cb47-217" aria-hidden="true" tabindex="-1"></a><span class="cf">if</span> os.path.exists(<span class="st">"/home/jovyan/shared/sql/imdb_duck.db"</span>):</span>
+<span id="cb47-218"><a href="#cb47-218" aria-hidden="true" tabindex="-1"></a>    imdbpath <span class="op">=</span> <span class="st">"duckdb:////home/jovyan/shared/sql/imdb_duck.db"</span></span>
+<span id="cb47-219"><a href="#cb47-219" aria-hidden="true" tabindex="-1"></a><span class="cf">elif</span> os.path.exists(<span class="st">"data/imdb_duck.db"</span>):</span>
+<span id="cb47-220"><a href="#cb47-220" aria-hidden="true" tabindex="-1"></a>    imdbpath <span class="op">=</span>  <span class="st">"duckdb:///data/imdb_duck.db"</span></span>
+<span id="cb47-221"><a href="#cb47-221" aria-hidden="true" tabindex="-1"></a><span class="cf">else</span>:</span>
+<span id="cb47-222"><a href="#cb47-222" aria-hidden="true" tabindex="-1"></a>    <span class="im">import</span> gdown</span>
+<span id="cb47-223"><a href="#cb47-223" aria-hidden="true" tabindex="-1"></a>    url <span class="op">=</span> <span class="st">'https://drive.google.com/uc?id=10tKOHGLt9QoOgq5Ii-FhxpB9lDSQgl1O'</span></span>
+<span id="cb47-224"><a href="#cb47-224" aria-hidden="true" tabindex="-1"></a>    output_path <span class="op">=</span> <span class="st">'data/imdb_duck.db'</span></span>
+<span id="cb47-225"><a href="#cb47-225" aria-hidden="true" tabindex="-1"></a>    gdown.download(url, output_path, quiet<span class="op">=</span><span class="va">False</span>)</span>
+<span id="cb47-226"><a href="#cb47-226" aria-hidden="true" tabindex="-1"></a>    imdbpath <span class="op">=</span> <span class="st">"duckdb:///data/imdb_duck.db"</span></span>
+<span id="cb47-227"><a href="#cb47-227" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(imdbpath)</span>
+<span id="cb47-228"><a href="#cb47-228" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb47-229"><a href="#cb47-229" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-232"><a href="#cb47-232" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb47-233"><a href="#cb47-233" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
+<span id="cb47-234"><a href="#cb47-234" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> sqlalchemy <span class="im">import</span> create_engine</span>
+<span id="cb47-235"><a href="#cb47-235" aria-hidden="true" tabindex="-1"></a>imdb_engine <span class="op">=</span> create_engine(imdbpath, connect_args<span class="op">=</span>{<span class="st">'read_only'</span>: <span class="va">True</span>})</span>
+<span id="cb47-236"><a href="#cb47-236" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>sql imdb_engine <span class="op">--</span>alias imdb</span>
+<span id="cb47-237"><a href="#cb47-237" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb47-238"><a href="#cb47-238" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-239"><a href="#cb47-239" aria-hidden="true" tabindex="-1"></a>Since we'll be working with the <span class="in">`Title`</span> table, let's take a quick look at what it contains. </span>
+<span id="cb47-240"><a href="#cb47-240" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-243"><a href="#cb47-243" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb47-244"><a href="#cb47-244" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
+<span id="cb47-245"><a href="#cb47-245" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql imdb </span>
+<span id="cb47-246"><a href="#cb47-246" aria-hidden="true" tabindex="-1"></a>    </span>
+<span id="cb47-247"><a href="#cb47-247" aria-hidden="true" tabindex="-1"></a>SELECT <span class="op">*</span></span>
+<span id="cb47-248"><a href="#cb47-248" aria-hidden="true" tabindex="-1"></a>FROM Title</span>
+<span id="cb47-249"><a href="#cb47-249" aria-hidden="true" tabindex="-1"></a>WHERE primaryTitle IN (<span class="st">'Ginny &amp; Georgia'</span>, <span class="st">'What If...?'</span>, <span class="st">'Succession'</span>, <span class="st">'Veep'</span>, <span class="st">'Tenet'</span>)</span>
+<span id="cb47-250"><a href="#cb47-250" aria-hidden="true" tabindex="-1"></a>LIMIT <span class="dv">10</span><span class="op">;</span></span>
+<span id="cb47-251"><a href="#cb47-251" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb47-252"><a href="#cb47-252" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-253"><a href="#cb47-253" aria-hidden="true" tabindex="-1"></a><span class="fu">### Matching Text using `LIKE`</span></span>
+<span id="cb47-254"><a href="#cb47-254" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-255"><a href="#cb47-255" aria-hidden="true" tabindex="-1"></a>One common task we encountered in our first look at EDA was needing to match string data. For example, we might want to remove entries beginning with the same prefix as part of the data cleaning process.</span>
+<span id="cb47-256"><a href="#cb47-256" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-257"><a href="#cb47-257" aria-hidden="true" tabindex="-1"></a>In SQL, we use the <span class="in">`LIKE`</span> operator to (you guessed it) look for strings that are *like* a given string pattern. </span>
+<span id="cb47-258"><a href="#cb47-258" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-261"><a href="#cb47-261" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb47-262"><a href="#cb47-262" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
+<span id="cb47-263"><a href="#cb47-263" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb47-264"><a href="#cb47-264" aria-hidden="true" tabindex="-1"></a>SELECT titleType, primaryTitle</span>
+<span id="cb47-265"><a href="#cb47-265" aria-hidden="true" tabindex="-1"></a>FROM Title</span>
+<span id="cb47-266"><a href="#cb47-266" aria-hidden="true" tabindex="-1"></a>WHERE primaryTitle LIKE <span class="st">'Star Wars: Episode I - The Phantom Menace'</span></span>
+<span id="cb47-267"><a href="#cb47-267" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb47-268"><a href="#cb47-268" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-269"><a href="#cb47-269" aria-hidden="true" tabindex="-1"></a>What if we wanted to find *all* Star Wars movies? <span class="in">`%`</span> is the wildcard operator, it means "look for any character, any number of times". This makes it helpful for identifying strings that are similar to our desired pattern, even when we don't know the full text of what we aim to extract.</span>
+<span id="cb47-270"><a href="#cb47-270" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-273"><a href="#cb47-273" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb47-274"><a href="#cb47-274" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
+<span id="cb47-275"><a href="#cb47-275" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb47-276"><a href="#cb47-276" aria-hidden="true" tabindex="-1"></a>SELECT titleType, primaryTitle</span>
+<span id="cb47-277"><a href="#cb47-277" aria-hidden="true" tabindex="-1"></a>FROM Title</span>
+<span id="cb47-278"><a href="#cb47-278" aria-hidden="true" tabindex="-1"></a>WHERE primaryTitle LIKE <span class="st">'%Star Wars%'</span></span>
+<span id="cb47-279"><a href="#cb47-279" aria-hidden="true" tabindex="-1"></a>LIMIT <span class="dv">10</span><span class="op">;</span></span>
+<span id="cb47-280"><a href="#cb47-280" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb47-281"><a href="#cb47-281" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-282"><a href="#cb47-282" aria-hidden="true" tabindex="-1"></a>Alternatively, we can use RegEx! DuckDB and most real DBMSs allow for this. Note that here, we have to use the <span class="in">`SIMILAR TO`</span> operater rather than <span class="in">`LIKE`</span>.</span>
+<span id="cb47-283"><a href="#cb47-283" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-286"><a href="#cb47-286" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb47-287"><a href="#cb47-287" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
+<span id="cb47-288"><a href="#cb47-288" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb47-289"><a href="#cb47-289" aria-hidden="true" tabindex="-1"></a>SELECT titleType, primaryTitle</span>
+<span id="cb47-290"><a href="#cb47-290" aria-hidden="true" tabindex="-1"></a>FROM Title</span>
+<span id="cb47-291"><a href="#cb47-291" aria-hidden="true" tabindex="-1"></a>WHERE primaryTitle SIMILAR TO <span class="st">'.*Star Wars*.'</span></span>
+<span id="cb47-292"><a href="#cb47-292" aria-hidden="true" tabindex="-1"></a>LIMIT <span class="dv">10</span><span class="op">;</span></span>
+<span id="cb47-293"><a href="#cb47-293" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb47-294"><a href="#cb47-294" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-295"><a href="#cb47-295" aria-hidden="true" tabindex="-1"></a><span class="fu">### `CAST`ing Data Types</span></span>
+<span id="cb47-296"><a href="#cb47-296" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-297"><a href="#cb47-297" aria-hidden="true" tabindex="-1"></a>A common data cleaning task is converting data to the correct variable type. The <span class="in">`CAST`</span> keyword is used to generate a new output column. Each entry in this output column is the result of converting the data in an existing column to a new data type. For example, we may wish to convert numeric data stored as a string to an integer.</span>
+<span id="cb47-298"><a href="#cb47-298" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-301"><a href="#cb47-301" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb47-302"><a href="#cb47-302" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
+<span id="cb47-303"><a href="#cb47-303" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb47-304"><a href="#cb47-304" aria-hidden="true" tabindex="-1"></a>SELECT primaryTitle, CAST(runtimeMinutes AS INT)</span>
+<span id="cb47-305"><a href="#cb47-305" aria-hidden="true" tabindex="-1"></a>FROM Title<span class="op">;</span></span>
+<span id="cb47-306"><a href="#cb47-306" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb47-307"><a href="#cb47-307" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-308"><a href="#cb47-308" aria-hidden="true" tabindex="-1"></a>We use <span class="in">`CAST`</span> when <span class="in">`SELECT`</span>ing colunns for our output table. In the example above, we want to <span class="in">`SELECT`</span> the columns of integer year and runtime data that is created by the <span class="in">`CAST`</span>. </span>
+<span id="cb47-309"><a href="#cb47-309" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-310"><a href="#cb47-310" aria-hidden="true" tabindex="-1"></a>SQL will automatically name a new column according to the command used to <span class="in">`SELECT`</span> it, which can lead to unwieldy column names. We can rename the <span class="in">`CAST`</span>ed column using the <span class="in">`AS`</span> keyword.</span>
+<span id="cb47-311"><a href="#cb47-311" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-314"><a href="#cb47-314" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb47-315"><a href="#cb47-315" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
+<span id="cb47-316"><a href="#cb47-316" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb47-317"><a href="#cb47-317" aria-hidden="true" tabindex="-1"></a>SELECT primaryTitle AS title, CAST(runtimeMinutes AS INT) AS minutes, CAST(startYear AS INT) AS year</span>
+<span id="cb47-318"><a href="#cb47-318" aria-hidden="true" tabindex="-1"></a>FROM Title</span>
+<span id="cb47-319"><a href="#cb47-319" aria-hidden="true" tabindex="-1"></a>LIMIT <span class="dv">5</span><span class="op">;</span></span>
+<span id="cb47-320"><a href="#cb47-320" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb47-321"><a href="#cb47-321" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-322"><a href="#cb47-322" aria-hidden="true" tabindex="-1"></a><span class="fu">### Using Conditional Statements with `CASE`</span></span>
+<span id="cb47-323"><a href="#cb47-323" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-324"><a href="#cb47-324" aria-hidden="true" tabindex="-1"></a>When working with <span class="in">`pandas`</span>, we often ran into situations where we wanted to generate new columns using some form of conditional statement. For example, say we wanted to describe a film title as "old," "mid-aged," or "new," depending on the year of its release.</span>
+<span id="cb47-325"><a href="#cb47-325" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-326"><a href="#cb47-326" aria-hidden="true" tabindex="-1"></a>In SQL, conditional operations are performed using a <span class="in">`CASE`</span> clause. Conceptually, <span class="in">`CASE`</span> behaves much like the <span class="in">`CAST`</span> operation: it creates a new column that we can then <span class="in">`SELECT`</span> to appear in the output. The syntax for a <span class="in">`CASE`</span> clause is as follows:</span>
+<span id="cb47-327"><a href="#cb47-327" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-328"><a href="#cb47-328" aria-hidden="true" tabindex="-1"></a><span class="in">    CASE WHEN &lt;condition&gt; THEN &lt;value&gt;</span></span>
+<span id="cb47-329"><a href="#cb47-329" aria-hidden="true" tabindex="-1"></a><span class="in">         WHEN &lt;other condition&gt; THEN &lt;other value&gt;</span></span>
+<span id="cb47-330"><a href="#cb47-330" aria-hidden="true" tabindex="-1"></a><span class="in">         ...</span></span>
+<span id="cb47-331"><a href="#cb47-331" aria-hidden="true" tabindex="-1"></a><span class="in">         ELSE &lt;yet another value&gt;</span></span>
+<span id="cb47-332"><a href="#cb47-332" aria-hidden="true" tabindex="-1"></a><span class="in">         END</span></span>
+<span id="cb47-333"><a href="#cb47-333" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-334"><a href="#cb47-334" aria-hidden="true" tabindex="-1"></a>Scanning through the skeleton code above, you can see that the logic is similar to that of an <span class="in">`if`</span> statement in Python. The conditional statement is first opened by calling <span class="in">`CASE`</span>. Each new condition is specified by <span class="in">`WHEN`</span>, with <span class="in">`THEN`</span> indicating what value should be filled if the condition is met. <span class="in">`ELSE`</span> specifies the value that should be filled if no other conditions are met. Lastly, <span class="in">`END`</span> indicates the end of the conditional statement; once <span class="in">`END`</span> has been called, SQL will continue evaluating the query as usual. </span>
+<span id="cb47-335"><a href="#cb47-335" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-336"><a href="#cb47-336" aria-hidden="true" tabindex="-1"></a>Let's see this in action. In the example below, we give the new column created by the <span class="in">`CASE`</span> statement the name <span class="in">`movie_age`</span>.</span>
+<span id="cb47-337"><a href="#cb47-337" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-340"><a href="#cb47-340" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb47-341"><a href="#cb47-341" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
+<span id="cb47-342"><a href="#cb47-342" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb47-343"><a href="#cb47-343" aria-hidden="true" tabindex="-1"></a><span class="op">/*</span> If a movie was filmed before <span class="dv">1950</span>, it <span class="kw">is</span> <span class="st">"old"</span></span>
+<span id="cb47-344"><a href="#cb47-344" aria-hidden="true" tabindex="-1"></a>Otherwise, <span class="cf">if</span> a movie was filmed before <span class="dv">2000</span>, it <span class="kw">is</span> <span class="st">"mid-aged"</span></span>
+<span id="cb47-345"><a href="#cb47-345" aria-hidden="true" tabindex="-1"></a>Else, a movie <span class="kw">is</span> <span class="st">"new"</span> <span class="op">*/</span></span>
+<span id="cb47-346"><a href="#cb47-346" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-347"><a href="#cb47-347" aria-hidden="true" tabindex="-1"></a>SELECT titleType, startYear,</span>
+<span id="cb47-348"><a href="#cb47-348" aria-hidden="true" tabindex="-1"></a>CASE WHEN startYear <span class="op">&lt;</span> <span class="dv">1950</span> THEN <span class="st">'old'</span></span>
+<span id="cb47-349"><a href="#cb47-349" aria-hidden="true" tabindex="-1"></a>     WHEN startYear <span class="op">&lt;</span> <span class="dv">2000</span> THEN <span class="st">'mid-aged'</span></span>
+<span id="cb47-350"><a href="#cb47-350" aria-hidden="true" tabindex="-1"></a>     ELSE <span class="st">'new'</span></span>
+<span id="cb47-351"><a href="#cb47-351" aria-hidden="true" tabindex="-1"></a>     END AS movie_age</span>
+<span id="cb47-352"><a href="#cb47-352" aria-hidden="true" tabindex="-1"></a>FROM Title<span class="op">;</span></span>
+<span id="cb47-353"><a href="#cb47-353" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb47-354"><a href="#cb47-354" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-355"><a href="#cb47-355" aria-hidden="true" tabindex="-1"></a><span class="fu">## `JOIN`ing Tables</span></span>
+<span id="cb47-356"><a href="#cb47-356" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-357"><a href="#cb47-357" aria-hidden="true" tabindex="-1"></a>At this point, we're well-versed in using SQL as a tool to clean, manipulate, and transform data in a table. Notice that this sentence referred to one *table*, specifically. What happens if the data we need is distributed across multiple tables? This is an important consideration when using SQL —— recall that we first introduced SQL as a language to query from databases. Databases often store data in a multidimensional structure. In other words, information is stored across several tables, with each table containing a small subset of all the data housed by the database. </span>
+<span id="cb47-358"><a href="#cb47-358" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-359"><a href="#cb47-359" aria-hidden="true" tabindex="-1"></a>A common way of organizing a database is by using a **star schema**. A star schema is composed of two types of tables. A **fact table** is the central table of the database —— it contains the information needed to link entries across several **dimension tables**, which contain more detailed information about the data. </span>
+<span id="cb47-360"><a href="#cb47-360" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-361"><a href="#cb47-361" aria-hidden="true" tabindex="-1"></a>Say we were working with a database about boba offerings in Berkeley. The dimension tables of the database might contain information about tea varieties and boba toppings. The fact table would be used to link this information across the various dimension tables.</span>
+<span id="cb47-362"><a href="#cb47-362" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-363"><a href="#cb47-363" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;div</span> <span class="er">style</span><span class="ot">=</span><span class="st">"text-align: center;"</span><span class="kw">&gt;</span></span>
+<span id="cb47-364"><a href="#cb47-364" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;img</span> <span class="er">src</span><span class="ot">=</span><span class="st">"images/multidimensional.png"</span> <span class="er">alt</span><span class="ot">=</span><span class="st">'multidimensional'</span> <span class="er">width</span><span class="ot">=</span><span class="st">'850'</span><span class="kw">&gt;</span></span>
+<span id="cb47-365"><a href="#cb47-365" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;/div&gt;</span> </span>
+<span id="cb47-366"><a href="#cb47-366" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-367"><a href="#cb47-367" aria-hidden="true" tabindex="-1"></a>If we explicitly mark the relationships between tables, we start to see the star-like structure of the star schema.</span>
+<span id="cb47-368"><a href="#cb47-368" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-369"><a href="#cb47-369" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;div</span> <span class="er">style</span><span class="ot">=</span><span class="st">"text-align: center;"</span><span class="kw">&gt;</span></span>
+<span id="cb47-370"><a href="#cb47-370" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;img</span> <span class="er">src</span><span class="ot">=</span><span class="st">"images/star.png"</span> <span class="er">alt</span><span class="ot">=</span><span class="st">'star'</span> <span class="er">width</span><span class="ot">=</span><span class="st">'650'</span><span class="kw">&gt;</span></span>
+<span id="cb47-371"><a href="#cb47-371" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;/div&gt;</span> </span>
+<span id="cb47-372"><a href="#cb47-372" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-373"><a href="#cb47-373" aria-hidden="true" tabindex="-1"></a>To join data across multiple tables, we'll use the (creatively named) <span class="in">`JOIN`</span> keyword. We'll make things easier for now by first considering the simpler <span class="in">`cats`</span> dataset, which consists of the tables <span class="in">`s`</span> and <span class="in">`t`</span>.</span>
+<span id="cb47-374"><a href="#cb47-374" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-375"><a href="#cb47-375" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;div</span> <span class="er">style</span><span class="ot">=</span><span class="st">"text-align: center;"</span><span class="kw">&gt;</span></span>
+<span id="cb47-376"><a href="#cb47-376" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;img</span> <span class="er">src</span><span class="ot">=</span><span class="st">"images/cats.png"</span> <span class="er">alt</span><span class="ot">=</span><span class="st">'cats'</span> <span class="er">width</span><span class="ot">=</span><span class="st">'500'</span><span class="kw">&gt;</span></span>
+<span id="cb47-377"><a href="#cb47-377" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;/div&gt;</span> </span>
+<span id="cb47-378"><a href="#cb47-378" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-379"><a href="#cb47-379" aria-hidden="true" tabindex="-1"></a>To perform a join, we amend the <span class="in">`FROM`</span> clause. You can think of this as saying, "<span class="in">`SELECT`</span> my data <span class="in">`FROM`</span> tables that have  been <span class="in">`JOIN`</span>ed together." </span>
+<span id="cb47-380"><a href="#cb47-380" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-381"><a href="#cb47-381" aria-hidden="true" tabindex="-1"></a>Remember: SQL does not consider newlines or whitespace when interpreting queries. The indentation given in the example below is to help improve readability. If you wish, you can write code that does not follow this formatting.</span>
+<span id="cb47-382"><a href="#cb47-382" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-383"><a href="#cb47-383" aria-hidden="true" tabindex="-1"></a><span class="in">    SELECT &lt;column list&gt;</span></span>
+<span id="cb47-384"><a href="#cb47-384" aria-hidden="true" tabindex="-1"></a><span class="in">    FROM table_1 </span></span>
+<span id="cb47-385"><a href="#cb47-385" aria-hidden="true" tabindex="-1"></a><span class="in">        JOIN table_2 </span></span>
+<span id="cb47-386"><a href="#cb47-386" aria-hidden="true" tabindex="-1"></a><span class="in">        ON key_1 = key_2;</span></span>
+<span id="cb47-387"><a href="#cb47-387" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-388"><a href="#cb47-388" aria-hidden="true" tabindex="-1"></a>We also need to specify what column from each table should be used to determine matching entries. By defining these keys, we provide SQL with the information it needs to pair rows of data together.</span>
+<span id="cb47-389"><a href="#cb47-389" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-390"><a href="#cb47-390" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-391"><a href="#cb47-391" aria-hidden="true" tabindex="-1"></a>The most commonly used type of SQL <span class="in">`JOIN`</span> is the **inner join**. It turns out you're already familiar with what an inner join does, and how it works – this is the type of join we've been using in <span class="in">`pandas`</span> all along! In an inner join, we combine every row in our first table with its matching entry in the second table. If a row from either table does not have a match in the other table, it is omitted from the output. </span>
+<span id="cb47-392"><a href="#cb47-392" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-393"><a href="#cb47-393" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;div</span> <span class="er">style</span><span class="ot">=</span><span class="st">"text-align: center;"</span><span class="kw">&gt;</span></span>
+<span id="cb47-394"><a href="#cb47-394" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;img</span> <span class="er">src</span><span class="ot">=</span><span class="st">"images/inner.png"</span> <span class="er">alt</span><span class="ot">=</span><span class="st">'inner'</span> <span class="er">width</span><span class="ot">=</span><span class="st">'800'</span><span class="kw">&gt;</span></span>
+<span id="cb47-395"><a href="#cb47-395" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;/div&gt;</span> </span>
+<span id="cb47-396"><a href="#cb47-396" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-397"><a href="#cb47-397" aria-hidden="true" tabindex="-1"></a>In a **cross join**, *all* possible combinations of rows appear in the output table, regardless of whether or not rows share a matching key. Because all rows are joined, even if there is no matching key, it is not necessary to specify what keys to consider in an <span class="in">`ON`</span> statement. A cross join is also known as a cartesian product.</span>
+<span id="cb47-398"><a href="#cb47-398" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-399"><a href="#cb47-399" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;div</span> <span class="er">style</span><span class="ot">=</span><span class="st">"text-align: center;"</span><span class="kw">&gt;</span></span>
+<span id="cb47-400"><a href="#cb47-400" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;img</span> <span class="er">src</span><span class="ot">=</span><span class="st">"images/cross.png"</span> <span class="er">alt</span><span class="ot">=</span><span class="st">'cross'</span> <span class="er">width</span><span class="ot">=</span><span class="st">'800'</span><span class="kw">&gt;</span></span>
+<span id="cb47-401"><a href="#cb47-401" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;/div&gt;</span> </span>
+<span id="cb47-402"><a href="#cb47-402" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-403"><a href="#cb47-403" aria-hidden="true" tabindex="-1"></a>Conceptually, we can interpret an inner join as a cross join, followed by removing all rows that do not share a matching key. Notice that the output of the inner join above contains all rows of the cross join example that contain a single color across the entire row.</span>
+<span id="cb47-404"><a href="#cb47-404" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-405"><a href="#cb47-405" aria-hidden="true" tabindex="-1"></a>In a **left outer join**, *all* rows in the left table are kept in the output table. If a row in the right table shares a match with the left table, this row will be kept; otherwise, the rows in the right table are omitted from the output. We can fill in any missing values with <span class="in">`NULL`</span>.</span>
+<span id="cb47-406"><a href="#cb47-406" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-407"><a href="#cb47-407" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;div</span> <span class="er">style</span><span class="ot">=</span><span class="st">"text-align: center;"</span><span class="kw">&gt;</span></span>
+<span id="cb47-408"><a href="#cb47-408" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;img</span> <span class="er">src</span><span class="ot">=</span><span class="st">"images/left.png"</span> <span class="er">alt</span><span class="ot">=</span><span class="st">'left'</span> <span class="er">width</span><span class="ot">=</span><span class="st">'800'</span><span class="kw">&gt;</span></span>
+<span id="cb47-409"><a href="#cb47-409" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;/div&gt;</span> </span>
+<span id="cb47-410"><a href="#cb47-410" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-411"><a href="#cb47-411" aria-hidden="true" tabindex="-1"></a>A **right outer join** keeps all rows in the right table. Rows in the left table are only kept if they share a match in the right table. Again, we can fill in any missing values with <span class="in">`NULL`</span>. </span>
+<span id="cb47-412"><a href="#cb47-412" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-413"><a href="#cb47-413" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;div</span> <span class="er">style</span><span class="ot">=</span><span class="st">"text-align: center;"</span><span class="kw">&gt;</span></span>
+<span id="cb47-414"><a href="#cb47-414" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;img</span> <span class="er">src</span><span class="ot">=</span><span class="st">"images/right.png"</span> <span class="er">alt</span><span class="ot">=</span><span class="st">'right'</span> <span class="er">width</span><span class="ot">=</span><span class="st">'800'</span><span class="kw">&gt;</span></span>
+<span id="cb47-415"><a href="#cb47-415" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;/div&gt;</span> </span>
+<span id="cb47-416"><a href="#cb47-416" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-417"><a href="#cb47-417" aria-hidden="true" tabindex="-1"></a>In a **full outer join**, all rows that have a match between the two tables are joined together. If a row has no match in the second table, then the values of the columns for that second table are filled with <span class="in">`NULL`</span>. In other words, a full outer join performs an inner join *while still keeping* rows that have no match in the other table. This is best understood visually:</span>
+<span id="cb47-418"><a href="#cb47-418" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-419"><a href="#cb47-419" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;div</span> <span class="er">style</span><span class="ot">=</span><span class="st">"text-align: center;"</span><span class="kw">&gt;</span></span>
+<span id="cb47-420"><a href="#cb47-420" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;img</span> <span class="er">src</span><span class="ot">=</span><span class="st">"images/full.png"</span> <span class="er">alt</span><span class="ot">=</span><span class="st">'full'</span> <span class="er">width</span><span class="ot">=</span><span class="st">'800'</span><span class="kw">&gt;</span></span>
+<span id="cb47-421"><a href="#cb47-421" aria-hidden="true" tabindex="-1"></a><span class="kw">&lt;/div&gt;</span> </span>
+<span id="cb47-422"><a href="#cb47-422" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-423"><a href="#cb47-423" aria-hidden="true" tabindex="-1"></a>We have kept the same output achieved using an inner join, with the addition of partially null rows for entries in <span class="in">`s`</span> and <span class="in">`t`</span> that had no match in the second table. </span>
+<span id="cb47-424"><a href="#cb47-424" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-425"><a href="#cb47-425" aria-hidden="true" tabindex="-1"></a><span class="fu">### Aliasing in `JOIN`s</span></span>
+<span id="cb47-426"><a href="#cb47-426" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-427"><a href="#cb47-427" aria-hidden="true" tabindex="-1"></a>When joining tables, we often create aliases for table names (similarly to what we did with column names in the last lecture). We do this as it is typically easier to refer to aliases, especially when we are working with long table names. We can even reference columns using aliased table names!</span>
+<span id="cb47-428"><a href="#cb47-428" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-429"><a href="#cb47-429" aria-hidden="true" tabindex="-1"></a>Let's say we want to determine the average rating of various movies. We'll need to <span class="in">`JOIN`</span> the <span class="in">`Title`</span> and <span class="in">`Rating`</span> tables and can create aliases for both tables.</span>
+<span id="cb47-430"><a href="#cb47-430" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-433"><a href="#cb47-433" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb47-434"><a href="#cb47-434" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
+<span id="cb47-435"><a href="#cb47-435" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb47-436"><a href="#cb47-436" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-437"><a href="#cb47-437" aria-hidden="true" tabindex="-1"></a>SELECT primaryTitle, averageRating</span>
+<span id="cb47-438"><a href="#cb47-438" aria-hidden="true" tabindex="-1"></a>FROM Title AS T INNER JOIN Rating AS R</span>
+<span id="cb47-439"><a href="#cb47-439" aria-hidden="true" tabindex="-1"></a>ON T.tconst <span class="op">=</span> R.tconst<span class="op">;</span></span>
+<span id="cb47-440"><a href="#cb47-440" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb47-441"><a href="#cb47-441" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-442"><a href="#cb47-442" aria-hidden="true" tabindex="-1"></a>Note that the <span class="in">`AS`</span> is actually optional! We can create aliases for our tables even without it, but we usually include it for clarity.</span>
+<span id="cb47-443"><a href="#cb47-443" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-446"><a href="#cb47-446" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb47-447"><a href="#cb47-447" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
+<span id="cb47-448"><a href="#cb47-448" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb47-449"><a href="#cb47-449" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-450"><a href="#cb47-450" aria-hidden="true" tabindex="-1"></a>SELECT primaryTitle, averageRating</span>
+<span id="cb47-451"><a href="#cb47-451" aria-hidden="true" tabindex="-1"></a>FROM Title T INNER JOIN Rating R</span>
+<span id="cb47-452"><a href="#cb47-452" aria-hidden="true" tabindex="-1"></a>ON T.tconst <span class="op">=</span> R.tconst<span class="op">;</span></span>
+<span id="cb47-453"><a href="#cb47-453" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb47-454"><a href="#cb47-454" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-455"><a href="#cb47-455" aria-hidden="true" tabindex="-1"></a><span class="fu">### Common Table Expressions</span></span>
+<span id="cb47-456"><a href="#cb47-456" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-457"><a href="#cb47-457" aria-hidden="true" tabindex="-1"></a>For more sophisticated data problems, the queries can become very complex. Common table expressions (CTEs) allow us to break down these complex queries into more manageable parts. To do so, we create temporary tables corresponding to different aspects of the problem and then reference them in the final query: </span>
+<span id="cb47-458"><a href="#cb47-458" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-459"><a href="#cb47-459" aria-hidden="true" tabindex="-1"></a><span class="in">    WITH </span></span>
+<span id="cb47-460"><a href="#cb47-460" aria-hidden="true" tabindex="-1"></a><span class="in">    table_name1 AS ( </span></span>
+<span id="cb47-461"><a href="#cb47-461" aria-hidden="true" tabindex="-1"></a><span class="in">        SELECT ...</span></span>
+<span id="cb47-462"><a href="#cb47-462" aria-hidden="true" tabindex="-1"></a><span class="in">    ),</span></span>
+<span id="cb47-463"><a href="#cb47-463" aria-hidden="true" tabindex="-1"></a><span class="in">    table_name2 AS ( </span></span>
+<span id="cb47-464"><a href="#cb47-464" aria-hidden="true" tabindex="-1"></a><span class="in">        SELECT ...</span></span>
+<span id="cb47-465"><a href="#cb47-465" aria-hidden="true" tabindex="-1"></a><span class="in">    )</span></span>
+<span id="cb47-466"><a href="#cb47-466" aria-hidden="true" tabindex="-1"></a><span class="in">    SELECT ... </span></span>
+<span id="cb47-467"><a href="#cb47-467" aria-hidden="true" tabindex="-1"></a><span class="in">    FROM </span></span>
+<span id="cb47-468"><a href="#cb47-468" aria-hidden="true" tabindex="-1"></a><span class="in">    table_name1, </span></span>
+<span id="cb47-469"><a href="#cb47-469" aria-hidden="true" tabindex="-1"></a><span class="in">    table_name2, ...</span></span>
+<span id="cb47-470"><a href="#cb47-470" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-471"><a href="#cb47-471" aria-hidden="true" tabindex="-1"></a>Let's say we want to identify the top 10 action movies that are highly rated (with an average rating greater than 7) and popular (having more than 5000 votes), along with the primary actors who are the most popular. We can use CTEs to break this query down into separate problems. Initially, we can filter to find good action movies and prolific actors separately. This way, in our final join, we only need to change the order.</span>
+<span id="cb47-472"><a href="#cb47-472" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb47-475"><a href="#cb47-475" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb47-476"><a href="#cb47-476" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
+<span id="cb47-477"><a href="#cb47-477" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
+<span id="cb47-478"><a href="#cb47-478" aria-hidden="true" tabindex="-1"></a>WITH </span>
+<span id="cb47-479"><a href="#cb47-479" aria-hidden="true" tabindex="-1"></a>good_action_movies AS (</span>
+<span id="cb47-480"><a href="#cb47-480" aria-hidden="true" tabindex="-1"></a>    SELECT <span class="op">*</span></span>
+<span id="cb47-481"><a href="#cb47-481" aria-hidden="true" tabindex="-1"></a>    FROM Title T JOIN Rating R ON T.tconst <span class="op">=</span> R.tconst  </span>
+<span id="cb47-482"><a href="#cb47-482" aria-hidden="true" tabindex="-1"></a>    WHERE genres LIKE <span class="st">'%Action%'</span> AND averageRating <span class="op">&gt;</span> <span class="dv">7</span> AND numVotes <span class="op">&gt;</span> <span class="dv">5000</span></span>
+<span id="cb47-483"><a href="#cb47-483" aria-hidden="true" tabindex="-1"></a>),</span>
+<span id="cb47-484"><a href="#cb47-484" aria-hidden="true" tabindex="-1"></a>prolific_actors AS (</span>
+<span id="cb47-485"><a href="#cb47-485" aria-hidden="true" tabindex="-1"></a>    SELECT N.nconst, primaryName, COUNT(<span class="op">*</span>) <span class="im">as</span> numRoles</span>
+<span id="cb47-486"><a href="#cb47-486" aria-hidden="true" tabindex="-1"></a>    FROM Name N JOIN Principal P ON N.nconst <span class="op">=</span> P.nconst</span>
+<span id="cb47-487"><a href="#cb47-487" aria-hidden="true" tabindex="-1"></a>    WHERE category <span class="op">=</span> <span class="st">'actor'</span></span>
+<span id="cb47-488"><a href="#cb47-488" aria-hidden="true" tabindex="-1"></a>    GROUP BY N.nconst, primaryName</span>
+<span id="cb47-489"><a href="#cb47-489" aria-hidden="true" tabindex="-1"></a>)</span>
+<span id="cb47-490"><a href="#cb47-490" aria-hidden="true" tabindex="-1"></a>SELECT primaryTitle, primaryName, numRoles, ROUND(averageRating) AS rating</span>
+<span id="cb47-491"><a href="#cb47-491" aria-hidden="true" tabindex="-1"></a>FROM good_action_movies m, prolific_actors a, principal p</span>
+<span id="cb47-492"><a href="#cb47-492" aria-hidden="true" tabindex="-1"></a>WHERE p.tconst <span class="op">=</span> m.tconst AND p.nconst <span class="op">=</span> a.nconst</span>
+<span id="cb47-493"><a href="#cb47-493" aria-hidden="true" tabindex="-1"></a>ORDER BY rating DESC, numRoles DESC</span>
+<span id="cb47-494"><a href="#cb47-494" aria-hidden="true" tabindex="-1"></a>LIMIT <span class="dv">10</span><span class="op">;</span></span>
+<span id="cb47-495"><a href="#cb47-495" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+</code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div></div></div></div></div>
+</div> <!-- /content -->
+
+
+
+</body></html>
\ No newline at end of file
diff --git a/docs/visualization_1/images/bad_distro.png b/docs/visualization_1/images/bad_distro.png
new file mode 100644
index 000000000..da18378e1
Binary files /dev/null and b/docs/visualization_1/images/bad_distro.png differ
diff --git a/docs/visualization_1/images/box_plot_diagram.png b/docs/visualization_1/images/box_plot_diagram.png
new file mode 100644
index 000000000..1da125972
Binary files /dev/null and b/docs/visualization_1/images/box_plot_diagram.png differ
diff --git a/docs/visualization_1/images/good_distro.png b/docs/visualization_1/images/good_distro.png
new file mode 100644
index 000000000..ee7be0663
Binary files /dev/null and b/docs/visualization_1/images/good_distro.png differ
diff --git a/docs/visualization_1/images/histogram_viz.png b/docs/visualization_1/images/histogram_viz.png
new file mode 100644
index 000000000..4a50ec4b9
Binary files /dev/null and b/docs/visualization_1/images/histogram_viz.png differ
diff --git a/docs/visualization_1/images/line_chart_viz.png b/docs/visualization_1/images/line_chart_viz.png
new file mode 100644
index 000000000..bbec9dc15
Binary files /dev/null and b/docs/visualization_1/images/line_chart_viz.png differ
diff --git a/docs/visualization_1/images/scatter.png b/docs/visualization_1/images/scatter.png
new file mode 100644
index 000000000..3ee8bb834
Binary files /dev/null and b/docs/visualization_1/images/scatter.png differ
diff --git a/docs/visualization_1/images/variable_types_vis_1.png b/docs/visualization_1/images/variable_types_vis_1.png
new file mode 100644
index 000000000..0409b3cf1
Binary files /dev/null and b/docs/visualization_1/images/variable_types_vis_1.png differ
diff --git a/visualization_1/visualization_1.html b/docs/visualization_1/visualization_1.html
similarity index 99%
rename from visualization_1/visualization_1.html
rename to docs/visualization_1/visualization_1.html
index 2ffa4ddcc..0765f5540 100644
--- a/visualization_1/visualization_1.html
+++ b/docs/visualization_1/visualization_1.html
@@ -854,7 +854,7 @@ <h3 data-number="7.6.1" class="anchored" data-anchor-id="box-plots-and-violin-pl
 <div class="cell" data-execution_count="8">
 <div class="sourceCode cell-code" id="cb8"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a>sns.violinplot(data<span class="op">=</span>wb, y<span class="op">=</span><span class="st">"Gross national income per capita, Atlas method: $: 2016"</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display">
-<p><img src="visualization_1_files/figure-html/cell-9-output-1.png" width="618" height="396"></p>
+<p><img src="visualization_1_files/figure-html/cell-9-output-1.png" width="619" height="396"></p>
 </div>
 </div>
 <p>A quartile represents a 25% portion of the data. We say that:</p>
diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-10-output-1.png b/docs/visualization_1/visualization_1_files/figure-html/cell-10-output-1.png
new file mode 100644
index 000000000..322782d44
Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-10-output-1.png differ
diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-11-output-1.png b/docs/visualization_1/visualization_1_files/figure-html/cell-11-output-1.png
new file mode 100644
index 000000000..bdab34fba
Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-11-output-1.png differ
diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-12-output-1.png b/docs/visualization_1/visualization_1_files/figure-html/cell-12-output-1.png
new file mode 100644
index 000000000..c318462e8
Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-12-output-1.png differ
diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-13-output-1.png b/docs/visualization_1/visualization_1_files/figure-html/cell-13-output-1.png
new file mode 100644
index 000000000..98c2427fb
Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-13-output-1.png differ
diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-14-output-1.png b/docs/visualization_1/visualization_1_files/figure-html/cell-14-output-1.png
new file mode 100644
index 000000000..f67232c9e
Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-14-output-1.png differ
diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-15-output-1.png b/docs/visualization_1/visualization_1_files/figure-html/cell-15-output-1.png
new file mode 100644
index 000000000..bc418fbf8
Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-15-output-1.png differ
diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-17-output-1.png b/docs/visualization_1/visualization_1_files/figure-html/cell-17-output-1.png
new file mode 100644
index 000000000..b23e61eea
Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-17-output-1.png differ
diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-18-output-2.png b/docs/visualization_1/visualization_1_files/figure-html/cell-18-output-2.png
new file mode 100644
index 000000000..dd3a63d27
Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-18-output-2.png differ
diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-19-output-2.png b/docs/visualization_1/visualization_1_files/figure-html/cell-19-output-2.png
new file mode 100644
index 000000000..5db38e270
Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-19-output-2.png differ
diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-20-output-2.png b/docs/visualization_1/visualization_1_files/figure-html/cell-20-output-2.png
new file mode 100644
index 000000000..b0da5ec46
Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-20-output-2.png differ
diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-21-output-1.png b/docs/visualization_1/visualization_1_files/figure-html/cell-21-output-1.png
new file mode 100644
index 000000000..e8d03b85b
Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-21-output-1.png differ
diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-22-output-1.png b/docs/visualization_1/visualization_1_files/figure-html/cell-22-output-1.png
new file mode 100644
index 000000000..8d349afdb
Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-22-output-1.png differ
diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-23-output-1.png b/docs/visualization_1/visualization_1_files/figure-html/cell-23-output-1.png
new file mode 100644
index 000000000..3ab690cdd
Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-23-output-1.png differ
diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-25-output-1.png b/docs/visualization_1/visualization_1_files/figure-html/cell-25-output-1.png
new file mode 100644
index 000000000..610626314
Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-25-output-1.png differ
diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-26-output-1.png b/docs/visualization_1/visualization_1_files/figure-html/cell-26-output-1.png
new file mode 100644
index 000000000..3c68e6497
Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-26-output-1.png differ
diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-27-output-1.png b/docs/visualization_1/visualization_1_files/figure-html/cell-27-output-1.png
new file mode 100644
index 000000000..185c1b768
Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-27-output-1.png differ
diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-28-output-1.png b/docs/visualization_1/visualization_1_files/figure-html/cell-28-output-1.png
new file mode 100644
index 000000000..ff012f3a9
Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-28-output-1.png differ
diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-29-output-1.png b/docs/visualization_1/visualization_1_files/figure-html/cell-29-output-1.png
new file mode 100644
index 000000000..06af8ea74
Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-29-output-1.png differ
diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-3-output-1.png b/docs/visualization_1/visualization_1_files/figure-html/cell-3-output-1.png
new file mode 100644
index 000000000..aff0c869e
Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-3-output-1.png differ
diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-30-output-1.png b/docs/visualization_1/visualization_1_files/figure-html/cell-30-output-1.png
new file mode 100644
index 000000000..9f71a6727
Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-30-output-1.png differ
diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-31-output-1.png b/docs/visualization_1/visualization_1_files/figure-html/cell-31-output-1.png
new file mode 100644
index 000000000..827062832
Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-31-output-1.png differ
diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-32-output-1.png b/docs/visualization_1/visualization_1_files/figure-html/cell-32-output-1.png
new file mode 100644
index 000000000..5a833e4e2
Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-32-output-1.png differ
diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-4-output-1.png b/docs/visualization_1/visualization_1_files/figure-html/cell-4-output-1.png
new file mode 100644
index 000000000..2130b77e2
Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-4-output-1.png differ
diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-5-output-1.png b/docs/visualization_1/visualization_1_files/figure-html/cell-5-output-1.png
new file mode 100644
index 000000000..93e34b23a
Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-5-output-1.png differ
diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-7-output-1.png b/docs/visualization_1/visualization_1_files/figure-html/cell-7-output-1.png
new file mode 100644
index 000000000..2e52c1f98
Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-7-output-1.png differ
diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-8-output-1.png b/docs/visualization_1/visualization_1_files/figure-html/cell-8-output-1.png
new file mode 100644
index 000000000..2be9dc6eb
Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-8-output-1.png differ
diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-9-output-1.png b/docs/visualization_1/visualization_1_files/figure-html/cell-9-output-1.png
new file mode 100644
index 000000000..8ef478384
Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-9-output-1.png differ
diff --git a/docs/visualization_2/images/boxcar_kernel.png b/docs/visualization_2/images/boxcar_kernel.png
new file mode 100644
index 000000000..8d652b1e6
Binary files /dev/null and b/docs/visualization_2/images/boxcar_kernel.png differ
diff --git a/docs/visualization_2/images/bulge.png b/docs/visualization_2/images/bulge.png
new file mode 100644
index 000000000..304f40f14
Binary files /dev/null and b/docs/visualization_2/images/bulge.png differ
diff --git a/docs/visualization_2/images/gaussian_0.1.png b/docs/visualization_2/images/gaussian_0.1.png
new file mode 100644
index 000000000..5a71d3cc5
Binary files /dev/null and b/docs/visualization_2/images/gaussian_0.1.png differ
diff --git a/docs/visualization_2/images/gaussian_1.png b/docs/visualization_2/images/gaussian_1.png
new file mode 100644
index 000000000..e51846be2
Binary files /dev/null and b/docs/visualization_2/images/gaussian_1.png differ
diff --git a/docs/visualization_2/images/gaussian_10.png b/docs/visualization_2/images/gaussian_10.png
new file mode 100644
index 000000000..45d1974d3
Binary files /dev/null and b/docs/visualization_2/images/gaussian_10.png differ
diff --git a/docs/visualization_2/images/gaussian_2.png b/docs/visualization_2/images/gaussian_2.png
new file mode 100644
index 000000000..6357afff5
Binary files /dev/null and b/docs/visualization_2/images/gaussian_2.png differ
diff --git a/docs/visualization_2/images/gaussian_kernel.png b/docs/visualization_2/images/gaussian_kernel.png
new file mode 100644
index 000000000..8be7f2dcd
Binary files /dev/null and b/docs/visualization_2/images/gaussian_kernel.png differ
diff --git a/docs/visualization_2/images/good_viz_scale_1.png b/docs/visualization_2/images/good_viz_scale_1.png
new file mode 100644
index 000000000..4576b61e1
Binary files /dev/null and b/docs/visualization_2/images/good_viz_scale_1.png differ
diff --git a/docs/visualization_2/images/good_viz_scale_2.png b/docs/visualization_2/images/good_viz_scale_2.png
new file mode 100644
index 000000000..ccbda9388
Binary files /dev/null and b/docs/visualization_2/images/good_viz_scale_2.png differ
diff --git a/docs/visualization_2/images/horizontal.png b/docs/visualization_2/images/horizontal.png
new file mode 100644
index 000000000..afcfa4856
Binary files /dev/null and b/docs/visualization_2/images/horizontal.png differ
diff --git a/docs/visualization_2/images/jet_3_images.png b/docs/visualization_2/images/jet_3_images.png
new file mode 100644
index 000000000..1067c77c7
Binary files /dev/null and b/docs/visualization_2/images/jet_3_images.png differ
diff --git a/docs/visualization_2/images/jet_colormap.png b/docs/visualization_2/images/jet_colormap.png
new file mode 100644
index 000000000..93d07c106
Binary files /dev/null and b/docs/visualization_2/images/jet_colormap.png differ
diff --git a/docs/visualization_2/images/jet_four_by_four.png b/docs/visualization_2/images/jet_four_by_four.png
new file mode 100644
index 000000000..a46062b04
Binary files /dev/null and b/docs/visualization_2/images/jet_four_by_four.png differ
diff --git a/docs/visualization_2/images/jet_perceptually_uniform.png b/docs/visualization_2/images/jet_perceptually_uniform.png
new file mode 100644
index 000000000..b0490ed8f
Binary files /dev/null and b/docs/visualization_2/images/jet_perceptually_uniform.png differ
diff --git a/docs/visualization_2/images/kde_function.png b/docs/visualization_2/images/kde_function.png
new file mode 100644
index 000000000..392f8656a
Binary files /dev/null and b/docs/visualization_2/images/kde_function.png differ
diff --git a/docs/visualization_2/images/linearize.png b/docs/visualization_2/images/linearize.png
new file mode 100644
index 000000000..14eec3a92
Binary files /dev/null and b/docs/visualization_2/images/linearize.png differ
diff --git a/docs/visualization_2/images/male_female_earnings_barplot.png b/docs/visualization_2/images/male_female_earnings_barplot.png
new file mode 100644
index 000000000..425ceb383
Binary files /dev/null and b/docs/visualization_2/images/male_female_earnings_barplot.png differ
diff --git a/docs/visualization_2/images/male_female_earnings_scatterplot.png b/docs/visualization_2/images/male_female_earnings_scatterplot.png
new file mode 100644
index 000000000..827631a08
Binary files /dev/null and b/docs/visualization_2/images/male_female_earnings_scatterplot.png differ
diff --git a/docs/visualization_2/images/markings_viz.png b/docs/visualization_2/images/markings_viz.png
new file mode 100644
index 000000000..a68e77643
Binary files /dev/null and b/docs/visualization_2/images/markings_viz.png differ
diff --git a/docs/visualization_2/images/mutli_dim_encodings.png b/docs/visualization_2/images/mutli_dim_encodings.png
new file mode 100644
index 000000000..67ede5ee6
Binary files /dev/null and b/docs/visualization_2/images/mutli_dim_encodings.png differ
diff --git a/docs/visualization_2/images/revealed_viz.png b/docs/visualization_2/images/revealed_viz.png
new file mode 100644
index 000000000..a5cbf2d83
Binary files /dev/null and b/docs/visualization_2/images/revealed_viz.png differ
diff --git a/docs/visualization_2/images/rugplot_encoding.png b/docs/visualization_2/images/rugplot_encoding.png
new file mode 100644
index 000000000..e568644eb
Binary files /dev/null and b/docs/visualization_2/images/rugplot_encoding.png differ
diff --git a/docs/visualization_2/images/small_multiples.png b/docs/visualization_2/images/small_multiples.png
new file mode 100644
index 000000000..d624de378
Binary files /dev/null and b/docs/visualization_2/images/small_multiples.png differ
diff --git a/docs/visualization_2/images/tukey_mosteller.png b/docs/visualization_2/images/tukey_mosteller.png
new file mode 100644
index 000000000..6c322a019
Binary files /dev/null and b/docs/visualization_2/images/tukey_mosteller.png differ
diff --git a/docs/visualization_2/images/unrevealed_viz.png b/docs/visualization_2/images/unrevealed_viz.png
new file mode 100644
index 000000000..f371ed74d
Binary files /dev/null and b/docs/visualization_2/images/unrevealed_viz.png differ
diff --git a/docs/visualization_2/images/viridis_colormap.png b/docs/visualization_2/images/viridis_colormap.png
new file mode 100644
index 000000000..37496838f
Binary files /dev/null and b/docs/visualization_2/images/viridis_colormap.png differ
diff --git a/docs/visualization_2/images/viridis_perceptually_uniform.png b/docs/visualization_2/images/viridis_perceptually_uniform.png
new file mode 100644
index 000000000..266f869ec
Binary files /dev/null and b/docs/visualization_2/images/viridis_perceptually_uniform.png differ
diff --git a/docs/visualization_2/images/wrong_scale_viz.png b/docs/visualization_2/images/wrong_scale_viz.png
new file mode 100644
index 000000000..c6cda3d97
Binary files /dev/null and b/docs/visualization_2/images/wrong_scale_viz.png differ
diff --git a/visualization_2/visualization_2.html b/docs/visualization_2/visualization_2.html
similarity index 93%
rename from visualization_2/visualization_2.html
rename to docs/visualization_2/visualization_2.html
index 6036cc023..416250f8c 100644
--- a/visualization_2/visualization_2.html
+++ b/docs/visualization_2/visualization_2.html
@@ -572,14 +572,8 @@ <h3 data-number="8.1.1" class="anchored" data-anchor-id="kde-theory"><span class
 <span id="cb2-5"><a href="#cb2-5" aria-hidden="true" tabindex="-1"></a>                       kde <span class="op">=</span> <span class="va">True</span>, stat <span class="op">=</span> <span class="st">"density"</span>)</span>
 <span id="cb2-6"><a href="#cb2-6" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb2-7"><a href="#cb2-7" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">"Distribution of HIV rates"</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-<div class="cell-output cell-output-stderr">
-<pre><code>/Users/Ishani/micromamba/lib/python3.9/site-packages/seaborn/axisgrid.py:118: UserWarning:
-
-The figure layout has changed to tight
-</code></pre>
-</div>
 <div class="cell-output cell-output-display">
-<p><img src="visualization_2_files/figure-html/cell-3-output-2.png" width="470" height="490"></p>
+<p><img src="visualization_2_files/figure-html/cell-3-output-1.png" width="469" height="488"></p>
 </div>
 </div>
 <p>Notice that the smooth KDE curve is higher when the histogram bins are taller. You can think of the height of the KDE curve as representing how “probable” it is that we randomly sample a datapoint with the corresponding value. This intuitively makes sense – if we have already collected more datapoints with a particular value (resulting in a tall histogram bin), it is more likely that, if we randomly sample another datapoint, we will sample one with a similar value (resulting in a high KDE curve).</p>
@@ -598,14 +592,14 @@ <h3 data-number="8.1.2" class="anchored" data-anchor-id="constructing-a-kde"><sp
 <div class="cell" data-execution_count="3">
 <details>
 <summary>Code</summary>
-<div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a>data <span class="op">=</span> [<span class="fl">2.2</span>, <span class="fl">2.8</span>, <span class="fl">3.7</span>, <span class="fl">5.3</span>, <span class="fl">5.7</span>]</span>
-<span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a>sns.rugplot(data, height<span class="op">=</span><span class="fl">0.3</span>)</span>
-<span id="cb4-4"><a href="#cb4-4" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb4-5"><a href="#cb4-5" aria-hidden="true" tabindex="-1"></a>plt.xlabel(<span class="st">"Data"</span>)</span>
-<span id="cb4-6"><a href="#cb4-6" aria-hidden="true" tabindex="-1"></a>plt.ylabel(<span class="st">"Density"</span>)</span>
-<span id="cb4-7"><a href="#cb4-7" aria-hidden="true" tabindex="-1"></a>plt.xlim(<span class="op">-</span><span class="dv">3</span>, <span class="dv">10</span>)</span>
-<span id="cb4-8"><a href="#cb4-8" aria-hidden="true" tabindex="-1"></a>plt.ylim(<span class="dv">0</span>, <span class="fl">0.5</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a>data <span class="op">=</span> [<span class="fl">2.2</span>, <span class="fl">2.8</span>, <span class="fl">3.7</span>, <span class="fl">5.3</span>, <span class="fl">5.7</span>]</span>
+<span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a>sns.rugplot(data, height<span class="op">=</span><span class="fl">0.3</span>)</span>
+<span id="cb3-4"><a href="#cb3-4" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb3-5"><a href="#cb3-5" aria-hidden="true" tabindex="-1"></a>plt.xlabel(<span class="st">"Data"</span>)</span>
+<span id="cb3-6"><a href="#cb3-6" aria-hidden="true" tabindex="-1"></a>plt.ylabel(<span class="st">"Density"</span>)</span>
+<span id="cb3-7"><a href="#cb3-7" aria-hidden="true" tabindex="-1"></a>plt.xlim(<span class="op">-</span><span class="dv">3</span>, <span class="dv">10</span>)</span>
+<span id="cb3-8"><a href="#cb3-8" aria-hidden="true" tabindex="-1"></a>plt.ylim(<span class="dv">0</span>, <span class="fl">0.5</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 <div class="cell-output cell-output-display">
 <p><img src="visualization_2_files/figure-html/cell-4-output-1.png" width="597" height="434"></p>
@@ -615,11 +609,11 @@ <h3 data-number="8.1.2" class="anchored" data-anchor-id="constructing-a-kde"><sp
 <div class="cell" data-execution_count="4">
 <details>
 <summary>Code</summary>
-<div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a>sns.kdeplot(data)</span>
-<span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a>plt.xlabel(<span class="st">"Data"</span>)</span>
-<span id="cb5-4"><a href="#cb5-4" aria-hidden="true" tabindex="-1"></a>plt.xlim(<span class="op">-</span><span class="dv">3</span>, <span class="dv">10</span>)</span>
-<span id="cb5-5"><a href="#cb5-5" aria-hidden="true" tabindex="-1"></a>plt.ylim(<span class="dv">0</span>, <span class="fl">0.5</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a>sns.kdeplot(data)</span>
+<span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a>plt.xlabel(<span class="st">"Data"</span>)</span>
+<span id="cb4-4"><a href="#cb4-4" aria-hidden="true" tabindex="-1"></a>plt.xlim(<span class="op">-</span><span class="dv">3</span>, <span class="dv">10</span>)</span>
+<span id="cb4-5"><a href="#cb4-5" aria-hidden="true" tabindex="-1"></a>plt.ylim(<span class="dv">0</span>, <span class="fl">0.5</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 <div class="cell-output cell-output-display">
 <p><img src="visualization_2_files/figure-html/cell-5-output-1.png" width="597" height="434"></p>
@@ -634,21 +628,21 @@ <h4 data-number="8.1.2.1" class="anchored" data-anchor-id="step-1-place-a-kernel
 <div class="cell" data-execution_count="5">
 <details>
 <summary>Code</summary>
-<div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> gaussian_kernel(x, z, a):</span>
-<span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a>    <span class="co"># We'll discuss where this mathematical formulation came from later</span></span>
-<span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> (<span class="dv">1</span><span class="op">/</span>np.sqrt(<span class="dv">2</span><span class="op">*</span>np.pi<span class="op">*</span>a<span class="op">**</span><span class="dv">2</span>)) <span class="op">*</span> np.exp((<span class="op">-</span>(x <span class="op">-</span> z)<span class="op">**</span><span class="dv">2</span> <span class="op">/</span> (<span class="dv">2</span> <span class="op">*</span> a<span class="op">**</span><span class="dv">2</span>)))</span>
-<span id="cb6-4"><a href="#cb6-4" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb6-5"><a href="#cb6-5" aria-hidden="true" tabindex="-1"></a><span class="co"># Plot our datapoint</span></span>
-<span id="cb6-6"><a href="#cb6-6" aria-hidden="true" tabindex="-1"></a>sns.rugplot([<span class="fl">2.2</span>], height<span class="op">=</span><span class="fl">0.3</span>)</span>
-<span id="cb6-7"><a href="#cb6-7" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb6-8"><a href="#cb6-8" aria-hidden="true" tabindex="-1"></a><span class="co"># Plot the kernel</span></span>
-<span id="cb6-9"><a href="#cb6-9" aria-hidden="true" tabindex="-1"></a>x <span class="op">=</span> np.linspace(<span class="op">-</span><span class="dv">3</span>, <span class="dv">10</span>, <span class="dv">1000</span>)</span>
-<span id="cb6-10"><a href="#cb6-10" aria-hidden="true" tabindex="-1"></a>plt.plot(x, gaussian_kernel(x, <span class="fl">2.2</span>, <span class="dv">1</span>))</span>
-<span id="cb6-11"><a href="#cb6-11" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb6-12"><a href="#cb6-12" aria-hidden="true" tabindex="-1"></a>plt.xlabel(<span class="st">"Data"</span>)</span>
-<span id="cb6-13"><a href="#cb6-13" aria-hidden="true" tabindex="-1"></a>plt.ylabel(<span class="st">"Density"</span>)</span>
-<span id="cb6-14"><a href="#cb6-14" aria-hidden="true" tabindex="-1"></a>plt.xlim(<span class="op">-</span><span class="dv">3</span>, <span class="dv">10</span>)</span>
-<span id="cb6-15"><a href="#cb6-15" aria-hidden="true" tabindex="-1"></a>plt.ylim(<span class="dv">0</span>, <span class="fl">0.5</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> gaussian_kernel(x, z, a):</span>
+<span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a>    <span class="co"># We'll discuss where this mathematical formulation came from later</span></span>
+<span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> (<span class="dv">1</span><span class="op">/</span>np.sqrt(<span class="dv">2</span><span class="op">*</span>np.pi<span class="op">*</span>a<span class="op">**</span><span class="dv">2</span>)) <span class="op">*</span> np.exp((<span class="op">-</span>(x <span class="op">-</span> z)<span class="op">**</span><span class="dv">2</span> <span class="op">/</span> (<span class="dv">2</span> <span class="op">*</span> a<span class="op">**</span><span class="dv">2</span>)))</span>
+<span id="cb5-4"><a href="#cb5-4" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb5-5"><a href="#cb5-5" aria-hidden="true" tabindex="-1"></a><span class="co"># Plot our datapoint</span></span>
+<span id="cb5-6"><a href="#cb5-6" aria-hidden="true" tabindex="-1"></a>sns.rugplot([<span class="fl">2.2</span>], height<span class="op">=</span><span class="fl">0.3</span>)</span>
+<span id="cb5-7"><a href="#cb5-7" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb5-8"><a href="#cb5-8" aria-hidden="true" tabindex="-1"></a><span class="co"># Plot the kernel</span></span>
+<span id="cb5-9"><a href="#cb5-9" aria-hidden="true" tabindex="-1"></a>x <span class="op">=</span> np.linspace(<span class="op">-</span><span class="dv">3</span>, <span class="dv">10</span>, <span class="dv">1000</span>)</span>
+<span id="cb5-10"><a href="#cb5-10" aria-hidden="true" tabindex="-1"></a>plt.plot(x, gaussian_kernel(x, <span class="fl">2.2</span>, <span class="dv">1</span>))</span>
+<span id="cb5-11"><a href="#cb5-11" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb5-12"><a href="#cb5-12" aria-hidden="true" tabindex="-1"></a>plt.xlabel(<span class="st">"Data"</span>)</span>
+<span id="cb5-13"><a href="#cb5-13" aria-hidden="true" tabindex="-1"></a>plt.ylabel(<span class="st">"Density"</span>)</span>
+<span id="cb5-14"><a href="#cb5-14" aria-hidden="true" tabindex="-1"></a>plt.xlim(<span class="op">-</span><span class="dv">3</span>, <span class="dv">10</span>)</span>
+<span id="cb5-15"><a href="#cb5-15" aria-hidden="true" tabindex="-1"></a>plt.ylim(<span class="dv">0</span>, <span class="fl">0.5</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 <div class="cell-output cell-output-display">
 <p><img src="visualization_2_files/figure-html/cell-6-output-1.png" width="597" height="434"></p>
@@ -658,41 +652,41 @@ <h4 data-number="8.1.2.1" class="anchored" data-anchor-id="step-1-place-a-kernel
 <div class="cell" data-execution_count="6">
 <details>
 <summary>Code</summary>
-<div class="sourceCode cell-code" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="co"># You will work with the functions below in Lab 4</span></span>
-<span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> create_kde(kernel, pts, a):</span>
-<span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a>    <span class="co"># Takes in a kernel, set of points, and alpha</span></span>
-<span id="cb7-4"><a href="#cb7-4" aria-hidden="true" tabindex="-1"></a>    <span class="co"># Returns the KDE as a function</span></span>
-<span id="cb7-5"><a href="#cb7-5" aria-hidden="true" tabindex="-1"></a>    <span class="kw">def</span> f(x):</span>
-<span id="cb7-6"><a href="#cb7-6" aria-hidden="true" tabindex="-1"></a>        output <span class="op">=</span> <span class="dv">0</span></span>
-<span id="cb7-7"><a href="#cb7-7" aria-hidden="true" tabindex="-1"></a>        <span class="cf">for</span> pt <span class="kw">in</span> pts:</span>
-<span id="cb7-8"><a href="#cb7-8" aria-hidden="true" tabindex="-1"></a>            output <span class="op">+=</span> kernel(x, pt, a)</span>
-<span id="cb7-9"><a href="#cb7-9" aria-hidden="true" tabindex="-1"></a>        <span class="cf">return</span> output <span class="op">/</span> <span class="bu">len</span>(pts) <span class="co"># Normalization factor</span></span>
-<span id="cb7-10"><a href="#cb7-10" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> f</span>
-<span id="cb7-11"><a href="#cb7-11" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb7-12"><a href="#cb7-12" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> plot_kde(kernel, pts, a):</span>
-<span id="cb7-13"><a href="#cb7-13" aria-hidden="true" tabindex="-1"></a>    <span class="co"># Calls create_kde and plots the corresponding KDE</span></span>
-<span id="cb7-14"><a href="#cb7-14" aria-hidden="true" tabindex="-1"></a>    f <span class="op">=</span> create_kde(kernel, pts, a)</span>
-<span id="cb7-15"><a href="#cb7-15" aria-hidden="true" tabindex="-1"></a>    x <span class="op">=</span> np.linspace(<span class="bu">min</span>(pts) <span class="op">-</span> <span class="dv">5</span>, <span class="bu">max</span>(pts) <span class="op">+</span> <span class="dv">5</span>, <span class="dv">1000</span>)</span>
-<span id="cb7-16"><a href="#cb7-16" aria-hidden="true" tabindex="-1"></a>    y <span class="op">=</span> [f(xi) <span class="cf">for</span> xi <span class="kw">in</span> x]</span>
-<span id="cb7-17"><a href="#cb7-17" aria-hidden="true" tabindex="-1"></a>    plt.plot(x, y)<span class="op">;</span></span>
-<span id="cb7-18"><a href="#cb7-18" aria-hidden="true" tabindex="-1"></a>    </span>
-<span id="cb7-19"><a href="#cb7-19" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> plot_separate_kernels(kernel, pts, a, norm<span class="op">=</span><span class="va">False</span>):</span>
-<span id="cb7-20"><a href="#cb7-20" aria-hidden="true" tabindex="-1"></a>    <span class="co"># Plots individual kernels, which are then summed to create the KDE</span></span>
-<span id="cb7-21"><a href="#cb7-21" aria-hidden="true" tabindex="-1"></a>    x <span class="op">=</span> np.linspace(<span class="bu">min</span>(pts) <span class="op">-</span> <span class="dv">5</span>, <span class="bu">max</span>(pts) <span class="op">+</span> <span class="dv">5</span>, <span class="dv">1000</span>)</span>
-<span id="cb7-22"><a href="#cb7-22" aria-hidden="true" tabindex="-1"></a>    <span class="cf">for</span> pt <span class="kw">in</span> pts:</span>
-<span id="cb7-23"><a href="#cb7-23" aria-hidden="true" tabindex="-1"></a>        y <span class="op">=</span> kernel(x, pt, a)</span>
-<span id="cb7-24"><a href="#cb7-24" aria-hidden="true" tabindex="-1"></a>        <span class="cf">if</span> norm:</span>
-<span id="cb7-25"><a href="#cb7-25" aria-hidden="true" tabindex="-1"></a>            y <span class="op">/=</span> <span class="bu">len</span>(pts)</span>
-<span id="cb7-26"><a href="#cb7-26" aria-hidden="true" tabindex="-1"></a>        plt.plot(x, y)</span>
-<span id="cb7-27"><a href="#cb7-27" aria-hidden="true" tabindex="-1"></a>    </span>
-<span id="cb7-28"><a href="#cb7-28" aria-hidden="true" tabindex="-1"></a>    plt.show()<span class="op">;</span></span>
-<span id="cb7-29"><a href="#cb7-29" aria-hidden="true" tabindex="-1"></a>    </span>
-<span id="cb7-30"><a href="#cb7-30" aria-hidden="true" tabindex="-1"></a>plt.xlim(<span class="op">-</span><span class="dv">3</span>, <span class="dv">10</span>)</span>
-<span id="cb7-31"><a href="#cb7-31" aria-hidden="true" tabindex="-1"></a>plt.ylim(<span class="dv">0</span>, <span class="fl">0.5</span>)</span>
-<span id="cb7-32"><a href="#cb7-32" aria-hidden="true" tabindex="-1"></a>plt.xlabel(<span class="st">"Data"</span>)</span>
-<span id="cb7-33"><a href="#cb7-33" aria-hidden="true" tabindex="-1"></a>plt.ylabel(<span class="st">"Density"</span>)</span>
-<span id="cb7-34"><a href="#cb7-34" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb7-35"><a href="#cb7-35" aria-hidden="true" tabindex="-1"></a>plot_separate_kernels(gaussian_kernel, data, a <span class="op">=</span> <span class="dv">1</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="co"># You will work with the functions below in Lab 4</span></span>
+<span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> create_kde(kernel, pts, a):</span>
+<span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a>    <span class="co"># Takes in a kernel, set of points, and alpha</span></span>
+<span id="cb6-4"><a href="#cb6-4" aria-hidden="true" tabindex="-1"></a>    <span class="co"># Returns the KDE as a function</span></span>
+<span id="cb6-5"><a href="#cb6-5" aria-hidden="true" tabindex="-1"></a>    <span class="kw">def</span> f(x):</span>
+<span id="cb6-6"><a href="#cb6-6" aria-hidden="true" tabindex="-1"></a>        output <span class="op">=</span> <span class="dv">0</span></span>
+<span id="cb6-7"><a href="#cb6-7" aria-hidden="true" tabindex="-1"></a>        <span class="cf">for</span> pt <span class="kw">in</span> pts:</span>
+<span id="cb6-8"><a href="#cb6-8" aria-hidden="true" tabindex="-1"></a>            output <span class="op">+=</span> kernel(x, pt, a)</span>
+<span id="cb6-9"><a href="#cb6-9" aria-hidden="true" tabindex="-1"></a>        <span class="cf">return</span> output <span class="op">/</span> <span class="bu">len</span>(pts) <span class="co"># Normalization factor</span></span>
+<span id="cb6-10"><a href="#cb6-10" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> f</span>
+<span id="cb6-11"><a href="#cb6-11" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb6-12"><a href="#cb6-12" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> plot_kde(kernel, pts, a):</span>
+<span id="cb6-13"><a href="#cb6-13" aria-hidden="true" tabindex="-1"></a>    <span class="co"># Calls create_kde and plots the corresponding KDE</span></span>
+<span id="cb6-14"><a href="#cb6-14" aria-hidden="true" tabindex="-1"></a>    f <span class="op">=</span> create_kde(kernel, pts, a)</span>
+<span id="cb6-15"><a href="#cb6-15" aria-hidden="true" tabindex="-1"></a>    x <span class="op">=</span> np.linspace(<span class="bu">min</span>(pts) <span class="op">-</span> <span class="dv">5</span>, <span class="bu">max</span>(pts) <span class="op">+</span> <span class="dv">5</span>, <span class="dv">1000</span>)</span>
+<span id="cb6-16"><a href="#cb6-16" aria-hidden="true" tabindex="-1"></a>    y <span class="op">=</span> [f(xi) <span class="cf">for</span> xi <span class="kw">in</span> x]</span>
+<span id="cb6-17"><a href="#cb6-17" aria-hidden="true" tabindex="-1"></a>    plt.plot(x, y)<span class="op">;</span></span>
+<span id="cb6-18"><a href="#cb6-18" aria-hidden="true" tabindex="-1"></a>    </span>
+<span id="cb6-19"><a href="#cb6-19" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> plot_separate_kernels(kernel, pts, a, norm<span class="op">=</span><span class="va">False</span>):</span>
+<span id="cb6-20"><a href="#cb6-20" aria-hidden="true" tabindex="-1"></a>    <span class="co"># Plots individual kernels, which are then summed to create the KDE</span></span>
+<span id="cb6-21"><a href="#cb6-21" aria-hidden="true" tabindex="-1"></a>    x <span class="op">=</span> np.linspace(<span class="bu">min</span>(pts) <span class="op">-</span> <span class="dv">5</span>, <span class="bu">max</span>(pts) <span class="op">+</span> <span class="dv">5</span>, <span class="dv">1000</span>)</span>
+<span id="cb6-22"><a href="#cb6-22" aria-hidden="true" tabindex="-1"></a>    <span class="cf">for</span> pt <span class="kw">in</span> pts:</span>
+<span id="cb6-23"><a href="#cb6-23" aria-hidden="true" tabindex="-1"></a>        y <span class="op">=</span> kernel(x, pt, a)</span>
+<span id="cb6-24"><a href="#cb6-24" aria-hidden="true" tabindex="-1"></a>        <span class="cf">if</span> norm:</span>
+<span id="cb6-25"><a href="#cb6-25" aria-hidden="true" tabindex="-1"></a>            y <span class="op">/=</span> <span class="bu">len</span>(pts)</span>
+<span id="cb6-26"><a href="#cb6-26" aria-hidden="true" tabindex="-1"></a>        plt.plot(x, y)</span>
+<span id="cb6-27"><a href="#cb6-27" aria-hidden="true" tabindex="-1"></a>    </span>
+<span id="cb6-28"><a href="#cb6-28" aria-hidden="true" tabindex="-1"></a>    plt.show()<span class="op">;</span></span>
+<span id="cb6-29"><a href="#cb6-29" aria-hidden="true" tabindex="-1"></a>    </span>
+<span id="cb6-30"><a href="#cb6-30" aria-hidden="true" tabindex="-1"></a>plt.xlim(<span class="op">-</span><span class="dv">3</span>, <span class="dv">10</span>)</span>
+<span id="cb6-31"><a href="#cb6-31" aria-hidden="true" tabindex="-1"></a>plt.ylim(<span class="dv">0</span>, <span class="fl">0.5</span>)</span>
+<span id="cb6-32"><a href="#cb6-32" aria-hidden="true" tabindex="-1"></a>plt.xlabel(<span class="st">"Data"</span>)</span>
+<span id="cb6-33"><a href="#cb6-33" aria-hidden="true" tabindex="-1"></a>plt.ylabel(<span class="st">"Density"</span>)</span>
+<span id="cb6-34"><a href="#cb6-34" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb6-35"><a href="#cb6-35" aria-hidden="true" tabindex="-1"></a>plot_separate_kernels(gaussian_kernel, data, a <span class="op">=</span> <span class="dv">1</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 <div class="cell-output cell-output-display">
 <p><img src="visualization_2_files/figure-html/cell-7-output-1.png" width="597" height="434"></p>
@@ -706,13 +700,13 @@ <h4 data-number="8.1.2.2" class="anchored" data-anchor-id="step-2-normalize-kern
 <div class="cell" data-execution_count="7">
 <details>
 <summary>Code</summary>
-<div class="sourceCode cell-code" id="cb8"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a>plt.xlim(<span class="op">-</span><span class="dv">3</span>, <span class="dv">10</span>)</span>
-<span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a>plt.ylim(<span class="dv">0</span>, <span class="fl">0.5</span>)</span>
-<span id="cb8-3"><a href="#cb8-3" aria-hidden="true" tabindex="-1"></a>plt.xlabel(<span class="st">"Data"</span>)</span>
-<span id="cb8-4"><a href="#cb8-4" aria-hidden="true" tabindex="-1"></a>plt.ylabel(<span class="st">"Density"</span>)</span>
-<span id="cb8-5"><a href="#cb8-5" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb8-6"><a href="#cb8-6" aria-hidden="true" tabindex="-1"></a><span class="co"># The `norm` argument specifies whether or not to normalize the kernels</span></span>
-<span id="cb8-7"><a href="#cb8-7" aria-hidden="true" tabindex="-1"></a>plot_separate_kernels(gaussian_kernel, data, a <span class="op">=</span> <span class="dv">1</span>, norm <span class="op">=</span> <span class="va">True</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a>plt.xlim(<span class="op">-</span><span class="dv">3</span>, <span class="dv">10</span>)</span>
+<span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a>plt.ylim(<span class="dv">0</span>, <span class="fl">0.5</span>)</span>
+<span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a>plt.xlabel(<span class="st">"Data"</span>)</span>
+<span id="cb7-4"><a href="#cb7-4" aria-hidden="true" tabindex="-1"></a>plt.ylabel(<span class="st">"Density"</span>)</span>
+<span id="cb7-5"><a href="#cb7-5" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb7-6"><a href="#cb7-6" aria-hidden="true" tabindex="-1"></a><span class="co"># The `norm` argument specifies whether or not to normalize the kernels</span></span>
+<span id="cb7-7"><a href="#cb7-7" aria-hidden="true" tabindex="-1"></a>plot_separate_kernels(gaussian_kernel, data, a <span class="op">=</span> <span class="dv">1</span>, norm <span class="op">=</span> <span class="va">True</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 <div class="cell-output cell-output-display">
 <p><img src="visualization_2_files/figure-html/cell-8-output-1.png" width="597" height="434"></p>
@@ -725,12 +719,12 @@ <h4 data-number="8.1.2.3" class="anchored" data-anchor-id="step-3-sum-the-normal
 <div class="cell" data-execution_count="8">
 <details>
 <summary>Code</summary>
-<div class="sourceCode cell-code" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a>plt.xlim(<span class="op">-</span><span class="dv">3</span>, <span class="dv">10</span>)</span>
-<span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a>plt.ylim(<span class="dv">0</span>, <span class="fl">0.5</span>)</span>
-<span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a>plt.xlabel(<span class="st">"Data"</span>)</span>
-<span id="cb9-4"><a href="#cb9-4" aria-hidden="true" tabindex="-1"></a>plt.ylabel(<span class="st">"Density"</span>)</span>
-<span id="cb9-5"><a href="#cb9-5" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb9-6"><a href="#cb9-6" aria-hidden="true" tabindex="-1"></a>plot_kde(gaussian_kernel, data, a <span class="op">=</span> <span class="dv">1</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb8"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a>plt.xlim(<span class="op">-</span><span class="dv">3</span>, <span class="dv">10</span>)</span>
+<span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a>plt.ylim(<span class="dv">0</span>, <span class="fl">0.5</span>)</span>
+<span id="cb8-3"><a href="#cb8-3" aria-hidden="true" tabindex="-1"></a>plt.xlabel(<span class="st">"Data"</span>)</span>
+<span id="cb8-4"><a href="#cb8-4" aria-hidden="true" tabindex="-1"></a>plt.ylabel(<span class="st">"Density"</span>)</span>
+<span id="cb8-5"><a href="#cb8-5" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb8-6"><a href="#cb8-6" aria-hidden="true" tabindex="-1"></a>plot_kde(gaussian_kernel, data, a <span class="op">=</span> <span class="dv">1</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 <div class="cell-output cell-output-display">
 <p><img src="visualization_2_files/figure-html/cell-9-output-1.png" width="597" height="434"></p>
@@ -831,13 +825,13 @@ <h4 data-number="8.1.3.2" class="anchored" data-anchor-id="boxcar-kernel"><span
 <div class="cell" data-execution_count="9">
 <details>
 <summary>Code</summary>
-<div class="sourceCode cell-code" id="cb10"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> boxcar_kernel(alpha, x, z):</span>
-<span id="cb10-2"><a href="#cb10-2" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> (((x<span class="op">-</span>z)<span class="op">&gt;=-</span>alpha<span class="op">/</span><span class="dv">2</span>)<span class="op">&amp;</span>((x<span class="op">-</span>z)<span class="op">&lt;=</span>alpha<span class="op">/</span><span class="dv">2</span>))<span class="op">/</span>alpha</span>
-<span id="cb10-3"><a href="#cb10-3" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb10-4"><a href="#cb10-4" aria-hidden="true" tabindex="-1"></a>xs <span class="op">=</span> np.linspace(<span class="op">-</span><span class="dv">5</span>, <span class="dv">5</span>, <span class="dv">200</span>)</span>
-<span id="cb10-5"><a href="#cb10-5" aria-hidden="true" tabindex="-1"></a>alpha<span class="op">=</span><span class="dv">1</span></span>
-<span id="cb10-6"><a href="#cb10-6" aria-hidden="true" tabindex="-1"></a>kde_curve <span class="op">=</span> [boxcar_kernel(alpha, x, <span class="dv">0</span>) <span class="cf">for</span> x <span class="kw">in</span> xs]</span>
-<span id="cb10-7"><a href="#cb10-7" aria-hidden="true" tabindex="-1"></a>plt.plot(xs, kde_curve)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> boxcar_kernel(alpha, x, z):</span>
+<span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> (((x<span class="op">-</span>z)<span class="op">&gt;=-</span>alpha<span class="op">/</span><span class="dv">2</span>)<span class="op">&amp;</span>((x<span class="op">-</span>z)<span class="op">&lt;=</span>alpha<span class="op">/</span><span class="dv">2</span>))<span class="op">/</span>alpha</span>
+<span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb9-4"><a href="#cb9-4" aria-hidden="true" tabindex="-1"></a>xs <span class="op">=</span> np.linspace(<span class="op">-</span><span class="dv">5</span>, <span class="dv">5</span>, <span class="dv">200</span>)</span>
+<span id="cb9-5"><a href="#cb9-5" aria-hidden="true" tabindex="-1"></a>alpha<span class="op">=</span><span class="dv">1</span></span>
+<span id="cb9-6"><a href="#cb9-6" aria-hidden="true" tabindex="-1"></a>kde_curve <span class="op">=</span> [boxcar_kernel(alpha, x, <span class="dv">0</span>) <span class="cf">for</span> x <span class="kw">in</span> xs]</span>
+<span id="cb9-7"><a href="#cb9-7" aria-hidden="true" tabindex="-1"></a>plt.plot(xs, kde_curve)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 <div class="cell-output cell-output-display">
 <div class="quarto-figure quarto-figure-center">
@@ -876,51 +870,33 @@ <h2 data-number="8.2" class="anchored" data-anchor-id="diving-deeper-into-displo
 <p>Below, we can see a couple of examples of how <code>sns.displot</code> can be used to plot various distributions.</p>
 <p>First, we can plot a histogram by setting <code>kind</code> to <code>"hist"</code>. Note that here we’ve specified <code>stat = density</code> to normalize the histogram such that the area under the histogram is equal to 1.</p>
 <div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="10">
-<div class="sourceCode cell-code" id="cb11"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a>sns.displot(data<span class="op">=</span>wb, </span>
-<span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a>            x<span class="op">=</span><span class="st">"gni"</span>, </span>
-<span id="cb11-3"><a href="#cb11-3" aria-hidden="true" tabindex="-1"></a>            kind<span class="op">=</span><span class="st">"hist"</span>, </span>
-<span id="cb11-4"><a href="#cb11-4" aria-hidden="true" tabindex="-1"></a>            stat<span class="op">=</span><span class="st">"density"</span>) <span class="co"># default: stat=count and density integrates to 1</span></span>
-<span id="cb11-5"><a href="#cb11-5" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">"Distribution of gross national income per capita"</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-<div class="cell-output cell-output-stderr">
-<pre><code>/Users/Ishani/micromamba/lib/python3.9/site-packages/seaborn/axisgrid.py:118: UserWarning:
-
-The figure layout has changed to tight
-</code></pre>
-</div>
+<div class="sourceCode cell-code" id="cb10"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a>sns.displot(data<span class="op">=</span>wb, </span>
+<span id="cb10-2"><a href="#cb10-2" aria-hidden="true" tabindex="-1"></a>            x<span class="op">=</span><span class="st">"gni"</span>, </span>
+<span id="cb10-3"><a href="#cb10-3" aria-hidden="true" tabindex="-1"></a>            kind<span class="op">=</span><span class="st">"hist"</span>, </span>
+<span id="cb10-4"><a href="#cb10-4" aria-hidden="true" tabindex="-1"></a>            stat<span class="op">=</span><span class="st">"density"</span>) <span class="co"># default: stat=count and density integrates to 1</span></span>
+<span id="cb10-5"><a href="#cb10-5" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">"Distribution of gross national income per capita"</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display">
-<p><img src="visualization_2_files/figure-html/cell-11-output-2.png" width="476" height="490"></p>
+<p><img src="visualization_2_files/figure-html/cell-11-output-1.png" width="476" height="488"></p>
 </div>
 </div>
 <p>Now, what if we want to generate a KDE plot? We can set <code>kind</code> = to <code>"kde"</code>!</p>
 <div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="11">
-<div class="sourceCode cell-code" id="cb13"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a>sns.displot(data<span class="op">=</span>wb, </span>
-<span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a>            x<span class="op">=</span><span class="st">"gni"</span>, </span>
-<span id="cb13-3"><a href="#cb13-3" aria-hidden="true" tabindex="-1"></a>            kind<span class="op">=</span><span class="st">'kde'</span>)</span>
-<span id="cb13-4"><a href="#cb13-4" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">"Distribution of gross national income per capita"</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-<div class="cell-output cell-output-stderr">
-<pre><code>/Users/Ishani/micromamba/lib/python3.9/site-packages/seaborn/axisgrid.py:118: UserWarning:
-
-The figure layout has changed to tight
-</code></pre>
-</div>
+<div class="sourceCode cell-code" id="cb11"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a>sns.displot(data<span class="op">=</span>wb, </span>
+<span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a>            x<span class="op">=</span><span class="st">"gni"</span>, </span>
+<span id="cb11-3"><a href="#cb11-3" aria-hidden="true" tabindex="-1"></a>            kind<span class="op">=</span><span class="st">'kde'</span>)</span>
+<span id="cb11-4"><a href="#cb11-4" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">"Distribution of gross national income per capita"</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display">
-<p><img src="visualization_2_files/figure-html/cell-12-output-2.png" width="471" height="490"></p>
+<p><img src="visualization_2_files/figure-html/cell-12-output-1.png" width="470" height="487"></p>
 </div>
 </div>
 <p>And finally, if we want to generate an Empirical Cumulative Distribution Function (ECDF), we can specify <code>kind = "ecdf"</code>.</p>
 <div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="12">
-<div class="sourceCode cell-code" id="cb15"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a>sns.displot(data<span class="op">=</span>wb, </span>
-<span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a>            x<span class="op">=</span><span class="st">"gni"</span>, </span>
-<span id="cb15-3"><a href="#cb15-3" aria-hidden="true" tabindex="-1"></a>            kind<span class="op">=</span><span class="st">'ecdf'</span>)</span>
-<span id="cb15-4"><a href="#cb15-4" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">"Cumulative Distribution of gross national income per capita"</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-<div class="cell-output cell-output-stderr">
-<pre><code>/Users/Ishani/micromamba/lib/python3.9/site-packages/seaborn/axisgrid.py:118: UserWarning:
-
-The figure layout has changed to tight
-</code></pre>
-</div>
+<div class="sourceCode cell-code" id="cb12"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a>sns.displot(data<span class="op">=</span>wb, </span>
+<span id="cb12-2"><a href="#cb12-2" aria-hidden="true" tabindex="-1"></a>            x<span class="op">=</span><span class="st">"gni"</span>, </span>
+<span id="cb12-3"><a href="#cb12-3" aria-hidden="true" tabindex="-1"></a>            kind<span class="op">=</span><span class="st">'ecdf'</span>)</span>
+<span id="cb12-4"><a href="#cb12-4" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">"Cumulative Distribution of gross national income per capita"</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display">
-<p><img src="visualization_2_files/figure-html/cell-13-output-2.png" width="508" height="485"></p>
+<p><img src="visualization_2_files/figure-html/cell-13-output-1.png" width="507" height="483"></p>
 </div>
 </div>
 </section>
@@ -932,23 +908,23 @@ <h4 data-number="8.3.0.1" class="anchored" data-anchor-id="scatter-plots"><span
 <p><strong>Scatter plots</strong> are one of the most useful tools in representing the relationship between <strong>pairs</strong> of quantitative variables. They are particularly important in gauging the strength, or correlation, of the relationship between variables. Knowledge of these relationships can then motivate decisions in our modeling process.</p>
 <p>In <code>matplotlib</code>, we use the function <code>plt.scatter</code> to generate a scatter plot. Notice that, unlike our examples of plotting single-variable distributions, now we specify sequences of values to be plotted along the x-axis <em>and</em> the y-axis.</p>
 <div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="13">
-<div class="sourceCode cell-code" id="cb17"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a>plt.scatter(wb[<span class="st">"per capita: </span><span class="sc">% g</span><span class="st">rowth: 2016"</span>], <span class="op">\</span></span>
-<span id="cb17-2"><a href="#cb17-2" aria-hidden="true" tabindex="-1"></a>            wb[<span class="st">'Adult literacy rate: Female: % ages 15 and older: 2005-14'</span>])</span>
-<span id="cb17-3"><a href="#cb17-3" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb17-4"><a href="#cb17-4" aria-hidden="true" tabindex="-1"></a>plt.xlabel(<span class="st">"</span><span class="sc">% g</span><span class="st">rowth per capita"</span>)</span>
-<span id="cb17-5"><a href="#cb17-5" aria-hidden="true" tabindex="-1"></a>plt.ylabel(<span class="st">"Female adult literacy rate"</span>)</span>
-<span id="cb17-6"><a href="#cb17-6" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">"Female adult literacy against </span><span class="sc">% g</span><span class="st">rowth"</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb13"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a>plt.scatter(wb[<span class="st">"per capita: </span><span class="sc">% g</span><span class="st">rowth: 2016"</span>], <span class="op">\</span></span>
+<span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a>            wb[<span class="st">'Adult literacy rate: Female: % ages 15 and older: 2005-14'</span>])</span>
+<span id="cb13-3"><a href="#cb13-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb13-4"><a href="#cb13-4" aria-hidden="true" tabindex="-1"></a>plt.xlabel(<span class="st">"</span><span class="sc">% g</span><span class="st">rowth per capita"</span>)</span>
+<span id="cb13-5"><a href="#cb13-5" aria-hidden="true" tabindex="-1"></a>plt.ylabel(<span class="st">"Female adult literacy rate"</span>)</span>
+<span id="cb13-6"><a href="#cb13-6" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">"Female adult literacy against </span><span class="sc">% g</span><span class="st">rowth"</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display">
 <p><img src="visualization_2_files/figure-html/cell-14-output-1.png" width="593" height="449"></p>
 </div>
 </div>
 <p>In <code>seaborn</code>, we call the function <code>sns.scatterplot</code>. We use the <code>x</code> and <code>y</code> parameters to indicate the values to be plotted along the x and y axes, respectively. By using the <code>hue</code> parameter, we can specify a third variable to be used for coloring each scatter point.</p>
 <div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="14">
-<div class="sourceCode cell-code" id="cb18"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a>sns.scatterplot(data <span class="op">=</span> wb, x <span class="op">=</span> <span class="st">"per capita: </span><span class="sc">% g</span><span class="st">rowth: 2016"</span>, <span class="op">\</span></span>
-<span id="cb18-2"><a href="#cb18-2" aria-hidden="true" tabindex="-1"></a>               y <span class="op">=</span> <span class="st">"Adult literacy rate: Female: % ages 15 and older: 2005-14"</span>, </span>
-<span id="cb18-3"><a href="#cb18-3" aria-hidden="true" tabindex="-1"></a>               hue <span class="op">=</span> <span class="st">"Continent"</span>)</span>
-<span id="cb18-4"><a href="#cb18-4" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb18-5"><a href="#cb18-5" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">"Female adult literacy against </span><span class="sc">% g</span><span class="st">rowth"</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb14"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a>sns.scatterplot(data <span class="op">=</span> wb, x <span class="op">=</span> <span class="st">"per capita: </span><span class="sc">% g</span><span class="st">rowth: 2016"</span>, <span class="op">\</span></span>
+<span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a>               y <span class="op">=</span> <span class="st">"Adult literacy rate: Female: % ages 15 and older: 2005-14"</span>, </span>
+<span id="cb14-3"><a href="#cb14-3" aria-hidden="true" tabindex="-1"></a>               hue <span class="op">=</span> <span class="st">"Continent"</span>)</span>
+<span id="cb14-4"><a href="#cb14-4" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb14-5"><a href="#cb14-5" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">"Female adult literacy against </span><span class="sc">% g</span><span class="st">rowth"</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display">
 <p><img src="visualization_2_files/figure-html/cell-15-output-1.png" width="593" height="449"></p>
 </div>
@@ -963,25 +939,25 @@ <h5 data-number="8.3.0.1.1" class="anchored" data-anchor-id="overplotting"><span
 </ul>
 <p>In the cell below, we first jitter the data using <code>np.random.uniform</code>, then re-plot it with smaller markers. The resulting plot is much easier to interpret.</p>
 <div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="15">
-<div class="sourceCode cell-code" id="cb19"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Setting a seed ensures that we produce the same plot each time</span></span>
-<span id="cb19-2"><a href="#cb19-2" aria-hidden="true" tabindex="-1"></a><span class="co"># This means that the course notes will not change each time you access them</span></span>
-<span id="cb19-3"><a href="#cb19-3" aria-hidden="true" tabindex="-1"></a>np.random.seed(<span class="dv">150</span>)</span>
-<span id="cb19-4"><a href="#cb19-4" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb19-5"><a href="#cb19-5" aria-hidden="true" tabindex="-1"></a><span class="co"># This call to np.random.uniform generates random numbers between -1 and 1</span></span>
-<span id="cb19-6"><a href="#cb19-6" aria-hidden="true" tabindex="-1"></a><span class="co"># We add these random numbers to the original x data to jitter it slightly</span></span>
-<span id="cb19-7"><a href="#cb19-7" aria-hidden="true" tabindex="-1"></a>x_noise <span class="op">=</span> np.random.uniform(<span class="op">-</span><span class="dv">1</span>, <span class="dv">1</span>, <span class="bu">len</span>(wb))</span>
-<span id="cb19-8"><a href="#cb19-8" aria-hidden="true" tabindex="-1"></a>jittered_x <span class="op">=</span> wb[<span class="st">"per capita: </span><span class="sc">% g</span><span class="st">rowth: 2016"</span>] <span class="op">+</span> x_noise</span>
-<span id="cb19-9"><a href="#cb19-9" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb19-10"><a href="#cb19-10" aria-hidden="true" tabindex="-1"></a><span class="co"># Repeat for y data</span></span>
-<span id="cb19-11"><a href="#cb19-11" aria-hidden="true" tabindex="-1"></a>y_noise <span class="op">=</span> np.random.uniform(<span class="op">-</span><span class="dv">5</span>, <span class="dv">5</span>, <span class="bu">len</span>(wb))</span>
-<span id="cb19-12"><a href="#cb19-12" aria-hidden="true" tabindex="-1"></a>jittered_y <span class="op">=</span> wb[<span class="st">"Adult literacy rate: Female: % ages 15 and older: 2005-14"</span>] <span class="op">+</span> y_noise</span>
-<span id="cb19-13"><a href="#cb19-13" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb19-14"><a href="#cb19-14" aria-hidden="true" tabindex="-1"></a><span class="co"># Setting the size parameter `s` changes the size of each point</span></span>
-<span id="cb19-15"><a href="#cb19-15" aria-hidden="true" tabindex="-1"></a>plt.scatter(jittered_x, jittered_y, s<span class="op">=</span><span class="dv">15</span>)</span>
-<span id="cb19-16"><a href="#cb19-16" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb19-17"><a href="#cb19-17" aria-hidden="true" tabindex="-1"></a>plt.xlabel(<span class="st">"</span><span class="sc">% g</span><span class="st">rowth per capita (jittered)"</span>)</span>
-<span id="cb19-18"><a href="#cb19-18" aria-hidden="true" tabindex="-1"></a>plt.ylabel(<span class="st">"Female adult literacy rate (jittered)"</span>)</span>
-<span id="cb19-19"><a href="#cb19-19" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">"Female adult literacy against </span><span class="sc">% g</span><span class="st">rowth"</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb15"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Setting a seed ensures that we produce the same plot each time</span></span>
+<span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a><span class="co"># This means that the course notes will not change each time you access them</span></span>
+<span id="cb15-3"><a href="#cb15-3" aria-hidden="true" tabindex="-1"></a>np.random.seed(<span class="dv">150</span>)</span>
+<span id="cb15-4"><a href="#cb15-4" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-5"><a href="#cb15-5" aria-hidden="true" tabindex="-1"></a><span class="co"># This call to np.random.uniform generates random numbers between -1 and 1</span></span>
+<span id="cb15-6"><a href="#cb15-6" aria-hidden="true" tabindex="-1"></a><span class="co"># We add these random numbers to the original x data to jitter it slightly</span></span>
+<span id="cb15-7"><a href="#cb15-7" aria-hidden="true" tabindex="-1"></a>x_noise <span class="op">=</span> np.random.uniform(<span class="op">-</span><span class="dv">1</span>, <span class="dv">1</span>, <span class="bu">len</span>(wb))</span>
+<span id="cb15-8"><a href="#cb15-8" aria-hidden="true" tabindex="-1"></a>jittered_x <span class="op">=</span> wb[<span class="st">"per capita: </span><span class="sc">% g</span><span class="st">rowth: 2016"</span>] <span class="op">+</span> x_noise</span>
+<span id="cb15-9"><a href="#cb15-9" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-10"><a href="#cb15-10" aria-hidden="true" tabindex="-1"></a><span class="co"># Repeat for y data</span></span>
+<span id="cb15-11"><a href="#cb15-11" aria-hidden="true" tabindex="-1"></a>y_noise <span class="op">=</span> np.random.uniform(<span class="op">-</span><span class="dv">5</span>, <span class="dv">5</span>, <span class="bu">len</span>(wb))</span>
+<span id="cb15-12"><a href="#cb15-12" aria-hidden="true" tabindex="-1"></a>jittered_y <span class="op">=</span> wb[<span class="st">"Adult literacy rate: Female: % ages 15 and older: 2005-14"</span>] <span class="op">+</span> y_noise</span>
+<span id="cb15-13"><a href="#cb15-13" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-14"><a href="#cb15-14" aria-hidden="true" tabindex="-1"></a><span class="co"># Setting the size parameter `s` changes the size of each point</span></span>
+<span id="cb15-15"><a href="#cb15-15" aria-hidden="true" tabindex="-1"></a>plt.scatter(jittered_x, jittered_y, s<span class="op">=</span><span class="dv">15</span>)</span>
+<span id="cb15-16"><a href="#cb15-16" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-17"><a href="#cb15-17" aria-hidden="true" tabindex="-1"></a>plt.xlabel(<span class="st">"</span><span class="sc">% g</span><span class="st">rowth per capita (jittered)"</span>)</span>
+<span id="cb15-18"><a href="#cb15-18" aria-hidden="true" tabindex="-1"></a>plt.ylabel(<span class="st">"Female adult literacy rate (jittered)"</span>)</span>
+<span id="cb15-19"><a href="#cb15-19" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">"Female adult literacy against </span><span class="sc">% g</span><span class="st">rowth"</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display">
 <p><img src="visualization_2_files/figure-html/cell-16-output-1.png" width="593" height="449"></p>
 </div>
@@ -993,30 +969,24 @@ <h4 data-number="8.3.0.2" class="anchored" data-anchor-id="lmplot-and-jointplot"
 <p><code>seaborn</code> also includes several built-in functions for creating more sophisticated scatter plots. Two of the most commonly used examples are <code>sns.lmplot</code> and <code>sns.jointplot</code>.</p>
 <p><code>sns.lmplot</code> plots both a scatter plot <em>and</em> a linear regression line, all in one function call. We’ll discuss linear regression in a few lectures.</p>
 <div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="16">
-<div class="sourceCode cell-code" id="cb20"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a>sns.lmplot(data <span class="op">=</span> wb, x <span class="op">=</span> <span class="st">"per capita: </span><span class="sc">% g</span><span class="st">rowth: 2016"</span>, <span class="op">\</span></span>
-<span id="cb20-2"><a href="#cb20-2" aria-hidden="true" tabindex="-1"></a>           y <span class="op">=</span> <span class="st">"Adult literacy rate: Female: % ages 15 and older: 2005-14"</span>)</span>
-<span id="cb20-3"><a href="#cb20-3" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb20-4"><a href="#cb20-4" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">"Female adult literacy against </span><span class="sc">% g</span><span class="st">rowth"</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-<div class="cell-output cell-output-stderr">
-<pre><code>/Users/Ishani/micromamba/lib/python3.9/site-packages/seaborn/axisgrid.py:118: UserWarning:
-
-The figure layout has changed to tight
-</code></pre>
-</div>
+<div class="sourceCode cell-code" id="cb16"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a>sns.lmplot(data <span class="op">=</span> wb, x <span class="op">=</span> <span class="st">"per capita: </span><span class="sc">% g</span><span class="st">rowth: 2016"</span>, <span class="op">\</span></span>
+<span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a>           y <span class="op">=</span> <span class="st">"Adult literacy rate: Female: % ages 15 and older: 2005-14"</span>)</span>
+<span id="cb16-3"><a href="#cb16-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb16-4"><a href="#cb16-4" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">"Female adult literacy against </span><span class="sc">% g</span><span class="st">rowth"</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display">
-<p><img src="visualization_2_files/figure-html/cell-17-output-2.png" width="470" height="490"></p>
+<p><img src="visualization_2_files/figure-html/cell-17-output-1.png" width="469" height="488"></p>
 </div>
 </div>
 <p><code>sns.jointplot</code> creates a visualization with three components: a scatter plot, a histogram of the distribution of x values, and a histogram of the distribution of y values.</p>
 <div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="17">
-<div class="sourceCode cell-code" id="cb22"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a>sns.jointplot(data <span class="op">=</span> wb, x <span class="op">=</span> <span class="st">"per capita: </span><span class="sc">% g</span><span class="st">rowth: 2016"</span>, <span class="op">\</span></span>
-<span id="cb22-2"><a href="#cb22-2" aria-hidden="true" tabindex="-1"></a>           y <span class="op">=</span> <span class="st">"Adult literacy rate: Female: % ages 15 and older: 2005-14"</span>)</span>
-<span id="cb22-3"><a href="#cb22-3" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb22-4"><a href="#cb22-4" aria-hidden="true" tabindex="-1"></a><span class="co"># plt.suptitle allows us to shift the title up so it does not overlap with the histogram</span></span>
-<span id="cb22-5"><a href="#cb22-5" aria-hidden="true" tabindex="-1"></a>plt.suptitle(<span class="st">"Female adult literacy against </span><span class="sc">% g</span><span class="st">rowth"</span>)</span>
-<span id="cb22-6"><a href="#cb22-6" aria-hidden="true" tabindex="-1"></a>plt.subplots_adjust(top<span class="op">=</span><span class="fl">0.9</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb17"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a>sns.jointplot(data <span class="op">=</span> wb, x <span class="op">=</span> <span class="st">"per capita: </span><span class="sc">% g</span><span class="st">rowth: 2016"</span>, <span class="op">\</span></span>
+<span id="cb17-2"><a href="#cb17-2" aria-hidden="true" tabindex="-1"></a>           y <span class="op">=</span> <span class="st">"Adult literacy rate: Female: % ages 15 and older: 2005-14"</span>)</span>
+<span id="cb17-3"><a href="#cb17-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb17-4"><a href="#cb17-4" aria-hidden="true" tabindex="-1"></a><span class="co"># plt.suptitle allows us to shift the title up so it does not overlap with the histogram</span></span>
+<span id="cb17-5"><a href="#cb17-5" aria-hidden="true" tabindex="-1"></a>plt.suptitle(<span class="st">"Female adult literacy against </span><span class="sc">% g</span><span class="st">rowth"</span>)</span>
+<span id="cb17-6"><a href="#cb17-6" aria-hidden="true" tabindex="-1"></a>plt.subplots_adjust(top<span class="op">=</span><span class="fl">0.9</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display">
-<p><img src="visualization_2_files/figure-html/cell-18-output-1.png" width="570" height="569"></p>
+<p><img src="visualization_2_files/figure-html/cell-18-output-1.png" width="570" height="567"></p>
 </div>
 </div>
 </section>
@@ -1026,15 +996,15 @@ <h4 data-number="8.3.0.3" class="anchored" data-anchor-id="hex-plots"><span clas
 <p><strong>Hex plots</strong> can be thought of as two-dimensional histograms that show the joint distribution between two variables. This is particularly useful when working with very dense data. In a hex plot, the x-y plane is binned into hexagons. Hexagons that are darker in color indicate a greater density of data – that is, there are more data points that lie in the region enclosed by the hexagon.</p>
 <p>We can generate a hex plot using <code>sns.jointplot</code> modified with the <code>kind</code> parameter.</p>
 <div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="18">
-<div class="sourceCode cell-code" id="cb23"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a>sns.jointplot(data <span class="op">=</span> wb, x <span class="op">=</span> <span class="st">"per capita: </span><span class="sc">% g</span><span class="st">rowth: 2016"</span>, <span class="op">\</span></span>
-<span id="cb23-2"><a href="#cb23-2" aria-hidden="true" tabindex="-1"></a>              y <span class="op">=</span> <span class="st">"Adult literacy rate: Female: % ages 15 and older: 2005-14"</span>, <span class="op">\</span></span>
-<span id="cb23-3"><a href="#cb23-3" aria-hidden="true" tabindex="-1"></a>              kind <span class="op">=</span> <span class="st">"hex"</span>)</span>
-<span id="cb23-4"><a href="#cb23-4" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb23-5"><a href="#cb23-5" aria-hidden="true" tabindex="-1"></a><span class="co"># plt.suptitle allows us to shift the title up so it does not overlap with the histogram</span></span>
-<span id="cb23-6"><a href="#cb23-6" aria-hidden="true" tabindex="-1"></a>plt.suptitle(<span class="st">"Female adult literacy against </span><span class="sc">% g</span><span class="st">rowth"</span>)</span>
-<span id="cb23-7"><a href="#cb23-7" aria-hidden="true" tabindex="-1"></a>plt.subplots_adjust(top<span class="op">=</span><span class="fl">0.9</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb18"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a>sns.jointplot(data <span class="op">=</span> wb, x <span class="op">=</span> <span class="st">"per capita: </span><span class="sc">% g</span><span class="st">rowth: 2016"</span>, <span class="op">\</span></span>
+<span id="cb18-2"><a href="#cb18-2" aria-hidden="true" tabindex="-1"></a>              y <span class="op">=</span> <span class="st">"Adult literacy rate: Female: % ages 15 and older: 2005-14"</span>, <span class="op">\</span></span>
+<span id="cb18-3"><a href="#cb18-3" aria-hidden="true" tabindex="-1"></a>              kind <span class="op">=</span> <span class="st">"hex"</span>)</span>
+<span id="cb18-4"><a href="#cb18-4" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb18-5"><a href="#cb18-5" aria-hidden="true" tabindex="-1"></a><span class="co"># plt.suptitle allows us to shift the title up so it does not overlap with the histogram</span></span>
+<span id="cb18-6"><a href="#cb18-6" aria-hidden="true" tabindex="-1"></a>plt.suptitle(<span class="st">"Female adult literacy against </span><span class="sc">% g</span><span class="st">rowth"</span>)</span>
+<span id="cb18-7"><a href="#cb18-7" aria-hidden="true" tabindex="-1"></a>plt.subplots_adjust(top<span class="op">=</span><span class="fl">0.9</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display">
-<p><img src="visualization_2_files/figure-html/cell-19-output-1.png" width="570" height="569"></p>
+<p><img src="visualization_2_files/figure-html/cell-19-output-1.png" width="570" height="567"></p>
 </div>
 </div>
 </section>
@@ -1043,11 +1013,11 @@ <h4 data-number="8.3.0.4" class="anchored" data-anchor-id="contour-plots"><span
 <p><strong>Contour plots</strong> are an alternative way of plotting the joint distribution of two variables. You can think of them as the 2-dimensional versions of KDE plots. A contour plot can be interpreted in a similar way to a <a href="https://gisgeography.com/contour-lines-topographic-map/">topographic map</a>. Each contour line represents an area that has the same <em>density</em> of datapoints throughout the region. Contours marked with darker colors contain more datapoints (a higher density) in that region.</p>
 <p><code>sns.kdeplot</code> will generate a contour plot if we specify both x and y data.</p>
 <div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="19">
-<div class="sourceCode cell-code" id="cb24"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a>sns.kdeplot(data <span class="op">=</span> wb, x <span class="op">=</span> <span class="st">"per capita: </span><span class="sc">% g</span><span class="st">rowth: 2016"</span>, <span class="op">\</span></span>
-<span id="cb24-2"><a href="#cb24-2" aria-hidden="true" tabindex="-1"></a>            y <span class="op">=</span> <span class="st">"Adult literacy rate: Female: % ages 15 and older: 2005-14"</span>, <span class="op">\</span></span>
-<span id="cb24-3"><a href="#cb24-3" aria-hidden="true" tabindex="-1"></a>            fill <span class="op">=</span> <span class="va">True</span>)</span>
-<span id="cb24-4"><a href="#cb24-4" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb24-5"><a href="#cb24-5" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">"Female adult literacy against </span><span class="sc">% g</span><span class="st">rowth"</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb19"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a>sns.kdeplot(data <span class="op">=</span> wb, x <span class="op">=</span> <span class="st">"per capita: </span><span class="sc">% g</span><span class="st">rowth: 2016"</span>, <span class="op">\</span></span>
+<span id="cb19-2"><a href="#cb19-2" aria-hidden="true" tabindex="-1"></a>            y <span class="op">=</span> <span class="st">"Adult literacy rate: Female: % ages 15 and older: 2005-14"</span>, <span class="op">\</span></span>
+<span id="cb19-3"><a href="#cb19-3" aria-hidden="true" tabindex="-1"></a>            fill <span class="op">=</span> <span class="va">True</span>)</span>
+<span id="cb19-4"><a href="#cb19-4" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb19-5"><a href="#cb19-5" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">"Female adult literacy against </span><span class="sc">% g</span><span class="st">rowth"</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display">
 <p><img src="visualization_2_files/figure-html/cell-20-output-1.png" width="596" height="449"></p>
 </div>
@@ -1063,17 +1033,17 @@ <h2 data-number="8.4" class="anchored" data-anchor-id="transformations"><span cl
 <div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="20">
 <details>
 <summary>Code</summary>
-<div class="sourceCode cell-code" id="cb25"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb25-1"><a href="#cb25-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Some data cleaning to help with the next example</span></span>
-<span id="cb25-2"><a href="#cb25-2" aria-hidden="true" tabindex="-1"></a>df <span class="op">=</span> pd.DataFrame(index<span class="op">=</span>wb.index)</span>
-<span id="cb25-3"><a href="#cb25-3" aria-hidden="true" tabindex="-1"></a>df[<span class="st">'lit'</span>] <span class="op">=</span> wb[<span class="st">'Adult literacy rate: Female: % ages 15 and older: 2005-14'</span>] <span class="op">\</span></span>
-<span id="cb25-4"><a href="#cb25-4" aria-hidden="true" tabindex="-1"></a>            <span class="op">+</span> wb[<span class="st">"Adult literacy rate: Male: % ages 15 and older: 2005-14"</span>]</span>
-<span id="cb25-5"><a href="#cb25-5" aria-hidden="true" tabindex="-1"></a>df[<span class="st">'inc'</span>] <span class="op">=</span> wb[<span class="st">'gni'</span>]</span>
-<span id="cb25-6"><a href="#cb25-6" aria-hidden="true" tabindex="-1"></a>df.dropna(inplace<span class="op">=</span><span class="va">True</span>)</span>
-<span id="cb25-7"><a href="#cb25-7" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb25-8"><a href="#cb25-8" aria-hidden="true" tabindex="-1"></a>plt.scatter(df[<span class="st">"inc"</span>], df[<span class="st">"lit"</span>])</span>
-<span id="cb25-9"><a href="#cb25-9" aria-hidden="true" tabindex="-1"></a>plt.xlabel(<span class="st">"Gross national income per capita"</span>)</span>
-<span id="cb25-10"><a href="#cb25-10" aria-hidden="true" tabindex="-1"></a>plt.ylabel(<span class="st">"Adult literacy rate"</span>)</span>
-<span id="cb25-11"><a href="#cb25-11" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">"Adult literacy rate against GNI per capita"</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb20"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Some data cleaning to help with the next example</span></span>
+<span id="cb20-2"><a href="#cb20-2" aria-hidden="true" tabindex="-1"></a>df <span class="op">=</span> pd.DataFrame(index<span class="op">=</span>wb.index)</span>
+<span id="cb20-3"><a href="#cb20-3" aria-hidden="true" tabindex="-1"></a>df[<span class="st">'lit'</span>] <span class="op">=</span> wb[<span class="st">'Adult literacy rate: Female: % ages 15 and older: 2005-14'</span>] <span class="op">\</span></span>
+<span id="cb20-4"><a href="#cb20-4" aria-hidden="true" tabindex="-1"></a>            <span class="op">+</span> wb[<span class="st">"Adult literacy rate: Male: % ages 15 and older: 2005-14"</span>]</span>
+<span id="cb20-5"><a href="#cb20-5" aria-hidden="true" tabindex="-1"></a>df[<span class="st">'inc'</span>] <span class="op">=</span> wb[<span class="st">'gni'</span>]</span>
+<span id="cb20-6"><a href="#cb20-6" aria-hidden="true" tabindex="-1"></a>df.dropna(inplace<span class="op">=</span><span class="va">True</span>)</span>
+<span id="cb20-7"><a href="#cb20-7" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb20-8"><a href="#cb20-8" aria-hidden="true" tabindex="-1"></a>plt.scatter(df[<span class="st">"inc"</span>], df[<span class="st">"lit"</span>])</span>
+<span id="cb20-9"><a href="#cb20-9" aria-hidden="true" tabindex="-1"></a>plt.xlabel(<span class="st">"Gross national income per capita"</span>)</span>
+<span id="cb20-10"><a href="#cb20-10" aria-hidden="true" tabindex="-1"></a>plt.ylabel(<span class="st">"Adult literacy rate"</span>)</span>
+<span id="cb20-11"><a href="#cb20-11" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">"Adult literacy rate against GNI per capita"</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 <div class="cell-output cell-output-display">
 <p><img src="visualization_2_files/figure-html/cell-21-output-1.png" width="593" height="449"></p>
@@ -1101,12 +1071,12 @@ <h3 data-number="8.4.1" class="anchored" data-anchor-id="linearization-and-apply
 <p>One function that produces this result is the <strong>log transformation</strong>. When we take the logarithm of a large number, the original number will decrease in magnitude dramatically. Conversely, when we take the logarithm of a small number, the original number does not change its value by as significant of an amount (to illustrate this, consider the difference between <span class="math inline">\(\log{(100)} = 4.61\)</span> and <span class="math inline">\(\log{(10)} = 2.3\)</span>).</p>
 <p>In Data 100 (and most upper-division STEM classes), <span class="math inline">\(\log\)</span> is used to refer to the natural logarithm with base <span class="math inline">\(e\)</span>.</p>
 <div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="21">
-<div class="sourceCode cell-code" id="cb26"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb26-1"><a href="#cb26-1" aria-hidden="true" tabindex="-1"></a><span class="co"># np.log takes the logarithm of an array or Series</span></span>
-<span id="cb26-2"><a href="#cb26-2" aria-hidden="true" tabindex="-1"></a>plt.scatter(np.log(df[<span class="st">"inc"</span>]), df[<span class="st">"lit"</span>])</span>
-<span id="cb26-3"><a href="#cb26-3" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb26-4"><a href="#cb26-4" aria-hidden="true" tabindex="-1"></a>plt.xlabel(<span class="st">"Log(gross national income per capita)"</span>)</span>
-<span id="cb26-5"><a href="#cb26-5" aria-hidden="true" tabindex="-1"></a>plt.ylabel(<span class="st">"Adult literacy rate"</span>)</span>
-<span id="cb26-6"><a href="#cb26-6" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">"Adult literacy rate against Log(GNI per capita)"</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb21"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a><span class="co"># np.log takes the logarithm of an array or Series</span></span>
+<span id="cb21-2"><a href="#cb21-2" aria-hidden="true" tabindex="-1"></a>plt.scatter(np.log(df[<span class="st">"inc"</span>]), df[<span class="st">"lit"</span>])</span>
+<span id="cb21-3"><a href="#cb21-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-4"><a href="#cb21-4" aria-hidden="true" tabindex="-1"></a>plt.xlabel(<span class="st">"Log(gross national income per capita)"</span>)</span>
+<span id="cb21-5"><a href="#cb21-5" aria-hidden="true" tabindex="-1"></a>plt.ylabel(<span class="st">"Adult literacy rate"</span>)</span>
+<span id="cb21-6"><a href="#cb21-6" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">"Adult literacy rate against Log(GNI per capita)"</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display">
 <p><img src="visualization_2_files/figure-html/cell-22-output-1.png" width="593" height="449"></p>
 </div>
@@ -1120,13 +1090,13 @@ <h3 data-number="8.4.1" class="anchored" data-anchor-id="linearization-and-apply
 </ul>
 <p>In this case, it is helpful to apply a <strong>power transformation</strong> – that is, raise our y values to a power. Let’s try raising our adult literacy rate values to the power of 4. Large values raised to the power of 4 will increase in magnitude proportionally much more than small values raised to the power of 4 (consider the difference between <span class="math inline">\(2^4 = 16\)</span> and <span class="math inline">\(200^4 = 1600000000\)</span>).</p>
 <div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="22">
-<div class="sourceCode cell-code" id="cb27"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb27-1"><a href="#cb27-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Apply a log transformation to the x values and a power transformation to the y values</span></span>
-<span id="cb27-2"><a href="#cb27-2" aria-hidden="true" tabindex="-1"></a>plt.scatter(np.log(df[<span class="st">"inc"</span>]), df[<span class="st">"lit"</span>]<span class="op">**</span><span class="dv">4</span>)</span>
-<span id="cb27-3"><a href="#cb27-3" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-4"><a href="#cb27-4" aria-hidden="true" tabindex="-1"></a>plt.xlabel(<span class="st">"Log(gross national income per capita)"</span>)</span>
-<span id="cb27-5"><a href="#cb27-5" aria-hidden="true" tabindex="-1"></a>plt.ylabel(<span class="st">"Adult literacy rate (4th power)"</span>)</span>
-<span id="cb27-6"><a href="#cb27-6" aria-hidden="true" tabindex="-1"></a>plt.suptitle(<span class="st">"Adult literacy rate (4th power) against Log(GNI per capita)"</span>)</span>
-<span id="cb27-7"><a href="#cb27-7" aria-hidden="true" tabindex="-1"></a>plt.subplots_adjust(top<span class="op">=</span><span class="fl">0.9</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb22"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Apply a log transformation to the x values and a power transformation to the y values</span></span>
+<span id="cb22-2"><a href="#cb22-2" aria-hidden="true" tabindex="-1"></a>plt.scatter(np.log(df[<span class="st">"inc"</span>]), df[<span class="st">"lit"</span>]<span class="op">**</span><span class="dv">4</span>)</span>
+<span id="cb22-3"><a href="#cb22-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb22-4"><a href="#cb22-4" aria-hidden="true" tabindex="-1"></a>plt.xlabel(<span class="st">"Log(gross national income per capita)"</span>)</span>
+<span id="cb22-5"><a href="#cb22-5" aria-hidden="true" tabindex="-1"></a>plt.ylabel(<span class="st">"Adult literacy rate (4th power)"</span>)</span>
+<span id="cb22-6"><a href="#cb22-6" aria-hidden="true" tabindex="-1"></a>plt.suptitle(<span class="st">"Adult literacy rate (4th power) against Log(GNI per capita)"</span>)</span>
+<span id="cb22-7"><a href="#cb22-7" aria-hidden="true" tabindex="-1"></a>plt.subplots_adjust(top<span class="op">=</span><span class="fl">0.9</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display">
 <p><img src="visualization_2_files/figure-html/cell-23-output-1.png" width="589" height="477"></p>
 </div>
@@ -1139,26 +1109,26 @@ <h3 data-number="8.4.1" class="anchored" data-anchor-id="linearization-and-apply
 <div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="23">
 <details>
 <summary>Code</summary>
-<div class="sourceCode cell-code" id="cb28"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb28-1"><a href="#cb28-1" aria-hidden="true" tabindex="-1"></a><span class="co"># The code below fits a linear regression model. We'll discuss it at length in a future lecture</span></span>
-<span id="cb28-2"><a href="#cb28-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> sklearn.linear_model <span class="im">import</span> LinearRegression</span>
-<span id="cb28-3"><a href="#cb28-3" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb28-4"><a href="#cb28-4" aria-hidden="true" tabindex="-1"></a>model <span class="op">=</span> LinearRegression()</span>
-<span id="cb28-5"><a href="#cb28-5" aria-hidden="true" tabindex="-1"></a>model.fit(np.log(df[[<span class="st">"inc"</span>]]), df[<span class="st">"lit"</span>]<span class="op">**</span><span class="dv">4</span>)</span>
-<span id="cb28-6"><a href="#cb28-6" aria-hidden="true" tabindex="-1"></a>m, b <span class="op">=</span> model.coef_[<span class="dv">0</span>], model.intercept_</span>
-<span id="cb28-7"><a href="#cb28-7" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb28-8"><a href="#cb28-8" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="ss">f"The slope, m, of the transformed data is: </span><span class="sc">{</span>m<span class="sc">}</span><span class="ss">"</span>)</span>
-<span id="cb28-9"><a href="#cb28-9" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="ss">f"The intercept, b, of the transformed data is: </span><span class="sc">{</span>b<span class="sc">}</span><span class="ss">"</span>)</span>
-<span id="cb28-10"><a href="#cb28-10" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb28-11"><a href="#cb28-11" aria-hidden="true" tabindex="-1"></a>df <span class="op">=</span> df.sort_values(<span class="st">"inc"</span>)</span>
-<span id="cb28-12"><a href="#cb28-12" aria-hidden="true" tabindex="-1"></a>plt.scatter(np.log(df[<span class="st">"inc"</span>]), df[<span class="st">"lit"</span>]<span class="op">**</span><span class="dv">4</span>, label<span class="op">=</span><span class="st">"Transformed data"</span>)</span>
-<span id="cb28-13"><a href="#cb28-13" aria-hidden="true" tabindex="-1"></a>plt.plot(np.log(df[<span class="st">"inc"</span>]), m<span class="op">*</span>np.log(df[<span class="st">"inc"</span>])<span class="op">+</span>b, c<span class="op">=</span><span class="st">"red"</span>, label<span class="op">=</span><span class="st">"Linear regression"</span>)</span>
-<span id="cb28-14"><a href="#cb28-14" aria-hidden="true" tabindex="-1"></a>plt.xlabel(<span class="st">"Log(gross national income per capita)"</span>)</span>
-<span id="cb28-15"><a href="#cb28-15" aria-hidden="true" tabindex="-1"></a>plt.ylabel(<span class="st">"Adult literacy rate (4th power)"</span>)</span>
-<span id="cb28-16"><a href="#cb28-16" aria-hidden="true" tabindex="-1"></a>plt.legend()<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb23"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a><span class="co"># The code below fits a linear regression model. We'll discuss it at length in a future lecture</span></span>
+<span id="cb23-2"><a href="#cb23-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> sklearn.linear_model <span class="im">import</span> LinearRegression</span>
+<span id="cb23-3"><a href="#cb23-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb23-4"><a href="#cb23-4" aria-hidden="true" tabindex="-1"></a>model <span class="op">=</span> LinearRegression()</span>
+<span id="cb23-5"><a href="#cb23-5" aria-hidden="true" tabindex="-1"></a>model.fit(np.log(df[[<span class="st">"inc"</span>]]), df[<span class="st">"lit"</span>]<span class="op">**</span><span class="dv">4</span>)</span>
+<span id="cb23-6"><a href="#cb23-6" aria-hidden="true" tabindex="-1"></a>m, b <span class="op">=</span> model.coef_[<span class="dv">0</span>], model.intercept_</span>
+<span id="cb23-7"><a href="#cb23-7" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb23-8"><a href="#cb23-8" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="ss">f"The slope, m, of the transformed data is: </span><span class="sc">{</span>m<span class="sc">}</span><span class="ss">"</span>)</span>
+<span id="cb23-9"><a href="#cb23-9" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="ss">f"The intercept, b, of the transformed data is: </span><span class="sc">{</span>b<span class="sc">}</span><span class="ss">"</span>)</span>
+<span id="cb23-10"><a href="#cb23-10" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb23-11"><a href="#cb23-11" aria-hidden="true" tabindex="-1"></a>df <span class="op">=</span> df.sort_values(<span class="st">"inc"</span>)</span>
+<span id="cb23-12"><a href="#cb23-12" aria-hidden="true" tabindex="-1"></a>plt.scatter(np.log(df[<span class="st">"inc"</span>]), df[<span class="st">"lit"</span>]<span class="op">**</span><span class="dv">4</span>, label<span class="op">=</span><span class="st">"Transformed data"</span>)</span>
+<span id="cb23-13"><a href="#cb23-13" aria-hidden="true" tabindex="-1"></a>plt.plot(np.log(df[<span class="st">"inc"</span>]), m<span class="op">*</span>np.log(df[<span class="st">"inc"</span>])<span class="op">+</span>b, c<span class="op">=</span><span class="st">"red"</span>, label<span class="op">=</span><span class="st">"Linear regression"</span>)</span>
+<span id="cb23-14"><a href="#cb23-14" aria-hidden="true" tabindex="-1"></a>plt.xlabel(<span class="st">"Log(gross national income per capita)"</span>)</span>
+<span id="cb23-15"><a href="#cb23-15" aria-hidden="true" tabindex="-1"></a>plt.ylabel(<span class="st">"Adult literacy rate (4th power)"</span>)</span>
+<span id="cb23-16"><a href="#cb23-16" aria-hidden="true" tabindex="-1"></a>plt.legend()<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 <div class="cell-output cell-output-stdout">
-<pre><code>The slope, m, of the transformed data is: 336400693.43172693
-The intercept, b, of the transformed data is: -1802204836.0479977</code></pre>
+<pre><code>The slope, m, of the transformed data is: 336400693.43172705
+The intercept, b, of the transformed data is: -1802204836.0479987</code></pre>
 </div>
 <div class="cell-output cell-output-display">
 <p><img src="visualization_2_files/figure-html/cell-24-output-2.png" width="597" height="443"></p>
@@ -1173,12 +1143,12 @@ <h3 data-number="8.4.1" class="anchored" data-anchor-id="linearization-and-apply
 <div class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="24">
 <details>
 <summary>Code</summary>
-<div class="sourceCode cell-code" id="cb30"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb30-1"><a href="#cb30-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Now, plug the values for m and b into the relationship between the untransformed x and y</span></span>
-<span id="cb30-2"><a href="#cb30-2" aria-hidden="true" tabindex="-1"></a>plt.scatter(df[<span class="st">"inc"</span>], df[<span class="st">"lit"</span>], label<span class="op">=</span><span class="st">"Untransformed data"</span>)</span>
-<span id="cb30-3"><a href="#cb30-3" aria-hidden="true" tabindex="-1"></a>plt.plot(df[<span class="st">"inc"</span>], (m<span class="op">*</span>np.log(df[<span class="st">"inc"</span>])<span class="op">+</span>b)<span class="op">**</span>(<span class="dv">1</span><span class="op">/</span><span class="dv">4</span>), c<span class="op">=</span><span class="st">"red"</span>, label<span class="op">=</span><span class="st">"Modeled relationship"</span>)</span>
-<span id="cb30-4"><a href="#cb30-4" aria-hidden="true" tabindex="-1"></a>plt.xlabel(<span class="st">"Gross national income per capita"</span>)</span>
-<span id="cb30-5"><a href="#cb30-5" aria-hidden="true" tabindex="-1"></a>plt.ylabel(<span class="st">"Adult literacy rate"</span>)</span>
-<span id="cb30-6"><a href="#cb30-6" aria-hidden="true" tabindex="-1"></a>plt.legend()<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode cell-code" id="cb25"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb25-1"><a href="#cb25-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Now, plug the values for m and b into the relationship between the untransformed x and y</span></span>
+<span id="cb25-2"><a href="#cb25-2" aria-hidden="true" tabindex="-1"></a>plt.scatter(df[<span class="st">"inc"</span>], df[<span class="st">"lit"</span>], label<span class="op">=</span><span class="st">"Untransformed data"</span>)</span>
+<span id="cb25-3"><a href="#cb25-3" aria-hidden="true" tabindex="-1"></a>plt.plot(df[<span class="st">"inc"</span>], (m<span class="op">*</span>np.log(df[<span class="st">"inc"</span>])<span class="op">+</span>b)<span class="op">**</span>(<span class="dv">1</span><span class="op">/</span><span class="dv">4</span>), c<span class="op">=</span><span class="st">"red"</span>, label<span class="op">=</span><span class="st">"Modeled relationship"</span>)</span>
+<span id="cb25-4"><a href="#cb25-4" aria-hidden="true" tabindex="-1"></a>plt.xlabel(<span class="st">"Gross national income per capita"</span>)</span>
+<span id="cb25-5"><a href="#cb25-5" aria-hidden="true" tabindex="-1"></a>plt.ylabel(<span class="st">"Adult literacy rate"</span>)</span>
+<span id="cb25-6"><a href="#cb25-6" aria-hidden="true" tabindex="-1"></a>plt.legend()<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 <div class="cell-output cell-output-display">
 <p><img src="visualization_2_files/figure-html/cell-25-output-1.png" width="593" height="429"></p>
diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-10-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-10-output-1.png
new file mode 100644
index 000000000..5a833e4e2
Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-10-output-1.png differ
diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-10-output-2.png b/docs/visualization_2/visualization_2_files/figure-html/cell-10-output-2.png
new file mode 100644
index 000000000..713bcd7a1
Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-10-output-2.png differ
diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-11-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-11-output-1.png
new file mode 100644
index 000000000..462735239
Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-11-output-1.png differ
diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-12-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-12-output-1.png
new file mode 100644
index 000000000..4bf04ac85
Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-12-output-1.png differ
diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-13-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-13-output-1.png
new file mode 100644
index 000000000..49a309a82
Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-13-output-1.png differ
diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-13-output-2.png b/docs/visualization_2/visualization_2_files/figure-html/cell-13-output-2.png
new file mode 100644
index 000000000..f26551da4
Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-13-output-2.png differ
diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-14-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-14-output-1.png
new file mode 100644
index 000000000..589bdf461
Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-14-output-1.png differ
diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-15-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-15-output-1.png
new file mode 100644
index 000000000..13d349515
Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-15-output-1.png differ
diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-16-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-16-output-1.png
new file mode 100644
index 000000000..a8fb882c9
Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-16-output-1.png differ
diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-17-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-17-output-1.png
new file mode 100644
index 000000000..ccb300f19
Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-17-output-1.png differ
diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-18-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-18-output-1.png
new file mode 100644
index 000000000..e0f6a9536
Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-18-output-1.png differ
diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-18-output-2.png b/docs/visualization_2/visualization_2_files/figure-html/cell-18-output-2.png
new file mode 100644
index 000000000..5ac500685
Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-18-output-2.png differ
diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-19-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-19-output-1.png
new file mode 100644
index 000000000..9ad3a069d
Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-19-output-1.png differ
diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-19-output-2.png b/docs/visualization_2/visualization_2_files/figure-html/cell-19-output-2.png
new file mode 100644
index 000000000..19f726c4b
Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-19-output-2.png differ
diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-20-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-20-output-1.png
new file mode 100644
index 000000000..fab9a8c51
Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-20-output-1.png differ
diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-20-output-2.png b/docs/visualization_2/visualization_2_files/figure-html/cell-20-output-2.png
new file mode 100644
index 000000000..f26551da4
Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-20-output-2.png differ
diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-20-output-3.png b/docs/visualization_2/visualization_2_files/figure-html/cell-20-output-3.png
new file mode 100644
index 000000000..143fbba29
Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-20-output-3.png differ
diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-21-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-21-output-1.png
new file mode 100644
index 000000000..fc02b67d6
Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-21-output-1.png differ
diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-22-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-22-output-1.png
new file mode 100644
index 000000000..024bfbca4
Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-22-output-1.png differ
diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-23-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-23-output-1.png
new file mode 100644
index 000000000..c09460e81
Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-23-output-1.png differ
diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-24-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-24-output-1.png
new file mode 100644
index 000000000..780f3b616
Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-24-output-1.png differ
diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-24-output-2.png b/docs/visualization_2/visualization_2_files/figure-html/cell-24-output-2.png
new file mode 100644
index 000000000..ba6f6b017
Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-24-output-2.png differ
diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-25-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-25-output-1.png
new file mode 100644
index 000000000..994192f3e
Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-25-output-1.png differ
diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-26-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-26-output-1.png
new file mode 100644
index 000000000..221442b1d
Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-26-output-1.png differ
diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-27-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-27-output-1.png
new file mode 100644
index 000000000..972bfb172
Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-27-output-1.png differ
diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-28-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-28-output-1.png
new file mode 100644
index 000000000..bb029adc5
Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-28-output-1.png differ
diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-29-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-29-output-1.png
new file mode 100644
index 000000000..3754b3776
Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-29-output-1.png differ
diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-3-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-3-output-1.png
new file mode 100644
index 000000000..610626314
Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-3-output-1.png differ
diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-30-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-30-output-1.png
new file mode 100644
index 000000000..488b5b595
Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-30-output-1.png differ
diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-31-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-31-output-1.png
new file mode 100644
index 000000000..68c761ad3
Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-31-output-1.png differ
diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-31-output-2.png b/docs/visualization_2/visualization_2_files/figure-html/cell-31-output-2.png
new file mode 100644
index 000000000..dc53abe15
Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-31-output-2.png differ
diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-32-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-32-output-1.png
new file mode 100644
index 000000000..fe8cc273f
Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-32-output-1.png differ
diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-32-output-2.png b/docs/visualization_2/visualization_2_files/figure-html/cell-32-output-2.png
new file mode 100644
index 000000000..84e90a931
Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-32-output-2.png differ
diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-33-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-33-output-1.png
new file mode 100644
index 000000000..04301b1ef
Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-33-output-1.png differ
diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-4-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-4-output-1.png
new file mode 100644
index 000000000..3c68e6497
Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-4-output-1.png differ
diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-5-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-5-output-1.png
new file mode 100644
index 000000000..185c1b768
Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-5-output-1.png differ
diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-6-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-6-output-1.png
new file mode 100644
index 000000000..ff012f3a9
Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-6-output-1.png differ
diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-7-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-7-output-1.png
new file mode 100644
index 000000000..06af8ea74
Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-7-output-1.png differ
diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-8-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-8-output-1.png
new file mode 100644
index 000000000..9f71a6727
Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-8-output-1.png differ
diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-9-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-9-output-1.png
new file mode 100644
index 000000000..827062832
Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-9-output-1.png differ
diff --git a/index.log b/index.log
index 5ff6e7ccf..d330f3006 100644
--- a/index.log
+++ b/index.log
@@ -1,4 +1,4 @@
-This is XeTeX, Version 3.141592653-2.6-0.999995 (TeX Live 2023) (preloaded format=xelatex 2024.3.3)  2 APR 2024 22:01
+This is XeTeX, Version 3.141592653-2.6-0.999995 (TeX Live 2023) (preloaded format=xelatex 2024.2.25)  4 APR 2024 13:56
 entering extended mode
  restricted \write18 enabled.
  %&-line parsing enabled.
@@ -6,25 +6,25 @@ entering extended mode
 (./index.tex
 LaTeX2e <2023-11-01> patch level 1
 L3 programming layer <2024-02-20>
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/koma-script/scrreprt.cls
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/koma-script/scrreprt.cls
 Document Class: scrreprt 2023/07/07 v3.41 KOMA-Script document class (report)
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/koma-script/scrkbase.sty
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/koma-script/scrkbase.sty
 Package: scrkbase 2023/07/07 v3.41 KOMA-Script package (KOMA-Script-dependent basics and keyval usage)
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/koma-script/scrbase.sty
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/koma-script/scrbase.sty
 Package: scrbase 2023/07/07 v3.41 KOMA-Script package (KOMA-Script-independent basics and keyval usage)
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/koma-script/scrlfile.sty
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/koma-script/scrlfile.sty
 Package: scrlfile 2023/07/07 v3.41 KOMA-Script package (file load hooks)
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/koma-script/scrlfile-hook.sty
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/koma-script/scrlfile-hook.sty
 Package: scrlfile-hook 2023/07/07 v3.41 KOMA-Script package (using LaTeX hooks)
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/koma-script/scrlogo.sty
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/koma-script/scrlogo.sty
 Package: scrlogo 2023/07/07 v3.41 KOMA-Script package (logo)
-))) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/graphics/keyval.sty
+))) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/graphics/keyval.sty
 Package: keyval 2022/05/29 v1.15 key=value parser (DPC)
 \KV@toks@=\toks17
 )
 Applying: [2021/05/01] Usage of raw or classic option list on input line 252.
 Already applied: [0000/00/00] Usage of raw or classic option list on input line 368.
-)) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/koma-script/tocbasic.sty
+)) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/koma-script/tocbasic.sty
 Package: tocbasic 2023/07/07 v3.41 KOMA-Script package (handling toc-files)
 \scr@dte@tocline@numberwidth=\skip48
 \scr@dte@tocline@numbox=\box51
@@ -34,9 +34,9 @@ Package tocbasic Info: omitting babel extension for `toc'
 (tocbasic)             for `toc' on input line 135.
 Class scrreprt Info: File `scrsize11pt.clo' used instead of
 (scrreprt)           file `scrsize11.clo' to setup font sizes on input line 2688.
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/koma-script/scrsize11pt.clo
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/koma-script/scrsize11pt.clo
 File: scrsize11pt.clo 2023/07/07 v3.41 KOMA-Script font size class option (11pt)
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/koma-script/typearea.sty
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/koma-script/typearea.sty
 Package: typearea 2023/07/07 v3.41 KOMA-Script package (type area)
 \ta@bcor=\skip49
 \ta@div=\count184
@@ -121,20 +121,20 @@ Package tocbasic Info: omitting babel extension for `lot'
 \c@table=\count193
 Class scrreprt Info: Redefining `\numberline' on input line 7428.
 \bibindent=\dimen140
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/amsmath/amsmath.sty
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/amsmath/amsmath.sty
 Package: amsmath 2023/05/13 v2.17o AMS math features
 \@mathmargin=\skip65
 For additional information on amsmath, use the `?' option.
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/amsmath/amstext.sty
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/amsmath/amstext.sty
 Package: amstext 2021/08/26 v2.01 AMS text
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/amsmath/amsgen.sty
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/amsmath/amsgen.sty
 File: amsgen.sty 1999/11/30 v2.0 generic functions
 \@emptytoks=\toks18
 \ex@=\dimen141
-)) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/amsmath/amsbsy.sty
+)) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/amsmath/amsbsy.sty
 Package: amsbsy 1999/11/29 v1.2d Bold Symbols
 \pmbraise@=\dimen142
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/amsmath/amsopn.sty
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/amsmath/amsopn.sty
 Package: amsopn 2022/04/08 v2.04 operator names
 )
 \inf@bad=\count194
@@ -184,20 +184,20 @@ LaTeX Info: Redefining \Relbar on input line 971.
 \mathdisplay@stack=\toks22
 LaTeX Info: Redefining \[ on input line 2953.
 LaTeX Info: Redefining \] on input line 2954.
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/amsfonts/amssymb.sty
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/amsfonts/amssymb.sty
 Package: amssymb 2013/01/14 v3.01 AMS font symbols
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/amsfonts/amsfonts.sty
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/amsfonts/amsfonts.sty
 Package: amsfonts 2013/01/14 v3.01 Basic AMSFonts support
 \symAMSa=\mathgroup4
 \symAMSb=\mathgroup5
 LaTeX Font Info:    Redeclaring math symbol \hbar on input line 98.
 LaTeX Font Info:    Overwriting math alphabet `\mathfrak' in version `bold'
 (Font)                  U/euf/m/n --> U/euf/b/n on input line 106.
-)) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/iftex/iftex.sty
+)) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/iftex/iftex.sty
 Package: iftex 2022/02/03 v1.0f TeX engine tests
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/unicode-math/unicode-math.sty (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/l3kernel/expl3.sty
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/unicode-math/unicode-math.sty (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/l3kernel/expl3.sty
 Package: expl3 2024-02-20 L3 programming layer (loader) 
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/l3backend/l3backend-xetex.def
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/l3backend/l3backend-xetex.def
 File: l3backend-xetex.def 2024-02-20 L3 backend support: XeTeX
 \g__graphics_track_int=\count272
 \l__pdf_internal_box=\box55
@@ -206,15 +206,15 @@ File: l3backend-xetex.def 2024-02-20 L3 backend support: XeTeX
 \g__pdf_backend_link_int=\count275
 ))
 Package: unicode-math 2023/08/13 v0.8r Unicode maths in XeLaTeX and LuaLaTeX
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/unicode-math/unicode-math-xetex.sty
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/unicode-math/unicode-math-xetex.sty
 Package: unicode-math-xetex 2023/08/13 v0.8r Unicode maths in XeLaTeX and LuaLaTeX
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/l3packages/xparse/xparse.sty
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/l3packages/xparse/xparse.sty
 Package: xparse 2024-02-18 L3 Experimental document command parser
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/l3packages/l3keys2e/l3keys2e.sty
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/l3packages/l3keys2e/l3keys2e.sty
 Package: l3keys2e 2024-02-18 LaTeX2e option processing using LaTeX3 keys
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/fontspec/fontspec.sty
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/fontspec/fontspec.sty
 Package: fontspec 2024/02/13 v2.9a Font selection for XeLaTeX and LuaLaTeX
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/fontspec/fontspec-xetex.sty
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/fontspec/fontspec-xetex.sty
 Package: fontspec-xetex 2024/02/13 v2.9a Font selection for XeLaTeX and LuaLaTeX
 \l__fontspec_script_int=\count276
 \l__fontspec_language_int=\count277
@@ -230,11 +230,11 @@ Package: fontspec-xetex 2024/02/13 v2.9a Font selection for XeLaTeX and LuaLaTeX
 \l__fontspec_tmpa_dim=\dimen150
 \l__fontspec_tmpb_dim=\dimen151
 \l__fontspec_tmpc_dim=\dimen152
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/base/fontenc.sty
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/base/fontenc.sty
 Package: fontenc 2021/04/29 v2.0v Standard LaTeX package
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/fontspec/fontspec.cfg))) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/base/fix-cm.sty
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/fontspec/fontspec.cfg))) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/base/fix-cm.sty
 Package: fix-cm 2020/11/24 v1.1t fixes to LaTeX
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/base/ts1enc.def
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/base/ts1enc.def
 File: ts1enc.def 2001/06/05 v3.0e (jk/car/fm) Standard LaTeX file
 LaTeX Font Info:    Redeclaring font encoding TS1 on input line 47.
 ))
@@ -242,7 +242,7 @@ LaTeX Font Info:    Redeclaring font encoding TS1 on input line 47.
 \g__um_fonts_used_int=\count288
 \l__um_primecount_int=\count289
 \g__um_primekern_muskip=\muskip17
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/unicode-math/unicode-math-table.tex))) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/lm/lmodern.sty
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/unicode-math/unicode-math-table.tex))) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/lm/lmodern.sty
 Package: lmodern 2015/05/01 v1.6.1 Latin Modern Fonts
 LaTeX Font Info:    Overwriting symbol font `operators' in version `normal'
 (Font)                  OT1/cmr/m/n --> OT1/lmr/m/n on input line 22.
@@ -276,13 +276,13 @@ LaTeX Font Info:    Overwriting math alphabet `\mathit' in version `bold'
 (Font)                  OT1/cmr/bx/it --> OT1/lmr/bx/it on input line 37.
 LaTeX Font Info:    Overwriting math alphabet `\mathtt' in version `bold'
 (Font)                  OT1/cmtt/m/n --> OT1/lmtt/m/n on input line 38.
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/upquote/upquote.sty
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/upquote/upquote.sty
 Package: upquote 2012/04/19 v1.3 upright-quote and grave-accent glyphs in verbatim
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/base/textcomp.sty
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/base/textcomp.sty
 Package: textcomp 2020/02/02 v2.0n Standard LaTeX package
-)) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/microtype/microtype.sty
+)) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/microtype/microtype.sty
 Package: microtype 2023/03/13 v3.1a Micro-typographical refinements (RS)
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/etoolbox/etoolbox.sty
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/etoolbox/etoolbox.sty
 Package: etoolbox 2020/10/05 v2.5k e-TeX tools for LaTeX (JAW)
 \etb@tempcnta=\count290
 )
@@ -297,22 +297,22 @@ LaTeX Info: Redefining \textls on input line 1368.
 \MT@outer@kern=\dimen153
 LaTeX Info: Redefining \textmicrotypecontext on input line 1988.
 \MT@listname@count=\count292
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/microtype/microtype-xetex.def
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/microtype/microtype-xetex.def
 File: microtype-xetex.def 2023/03/13 v3.1a Definitions specific to xetex (RS)
 LaTeX Info: Redefining \lsstyle on input line 238.
 )
 Package microtype Info: Loading configuration file microtype.cfg.
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/microtype/microtype.cfg
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/microtype/microtype.cfg
 File: microtype.cfg 2023/03/13 v3.1a microtype main configuration file (RS)
-)) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/xcolor/xcolor.sty
+)) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/xcolor/xcolor.sty
 Package: xcolor 2023/11/15 v3.01 LaTeX color extensions (UK)
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/graphics-cfg/color.cfg
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/graphics-cfg/color.cfg
 File: color.cfg 2016/01/02 v1.6 sample color configuration
 )
 Package xcolor Info: Driver file: xetex.def on input line 274.
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/graphics-def/xetex.def
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/graphics-def/xetex.def
 File: xetex.def 2022/09/22 v5.0n Graphics/color driver for xetex
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/graphics/mathcolor.ltx)
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/graphics/mathcolor.ltx)
 Package xcolor Info: Model `cmy' substituted by `cmy0' on input line 1350.
 Package xcolor Info: Model `RGB' extended on input line 1366.
 Package xcolor Info: Model `HTML' substituted by `rgb' on input line 1368.
@@ -321,13 +321,13 @@ Package xcolor Info: Model `tHsb' substituted by `hsb' on input line 1370.
 Package xcolor Info: Model `HSB' substituted by `hsb' on input line 1371.
 Package xcolor Info: Model `Gray' substituted by `gray' on input line 1372.
 Package xcolor Info: Model `wave' substituted by `hsb' on input line 1373.
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/graphics/dvipsnam.def
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/graphics/dvipsnam.def
 File: dvipsnam.def 2016/06/17 v3.0m Driver-dependent file (DPC,SPQR)
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/xcolor/svgnam.def
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/xcolor/svgnam.def
 File: svgnam.def 2023/11/15 v3.01 Predefined colors according to SVG 1.1 (UK)
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/xcolor/x11nam.def
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/xcolor/x11nam.def
 File: x11nam.def 2023/11/15 v3.01 Predefined colors according to Unix/X11 (UK)
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/fancyvrb/fancyvrb.sty
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/fancyvrb/fancyvrb.sty
 Package: fancyvrb 2024/01/20 4.5c verbatim text (tvz,hv)
 \FV@CodeLineNo=\count293
 \FV@InFile=\read2
@@ -335,14 +335,14 @@ Package: fancyvrb 2024/01/20 4.5c verbatim text (tvz,hv)
 \c@FancyVerbLine=\count294
 \FV@StepNumber=\count295
 \FV@OutFile=\write3
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/framed/framed.sty
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/framed/framed.sty
 Package: framed 2011/10/22 v 0.96: framed or shaded text with page breaks
 \OuterFrameSep=\skip68
 \fb@frw=\dimen154
 \fb@frh=\dimen155
 \FrameRule=\dimen156
 \FrameSep=\dimen157
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/tools/longtable.sty
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/tools/longtable.sty
 Package: longtable 2023-11-01 v4.19 Multi-page Table package (DPC)
 \LTleft=\skip69
 \LTright=\skip70
@@ -362,7 +362,7 @@ Package: longtable 2023-11-01 v4.19 Multi-page Table package (DPC)
 \LT@p@ftn=\toks25
 )
 Class scrreprt Info: longtable captions redefined on input line 98.
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/booktabs/booktabs.sty
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/booktabs/booktabs.sty
 Package: booktabs 2020/01/12 v1.61803398 Publication quality tables
 \heavyrulewidth=\dimen159
 \lightrulewidth=\dimen160
@@ -381,7 +381,7 @@ Package: booktabs 2020/01/12 v1.61803398 Publication quality tables
 \@thisruleclass=\count303
 \@lastruleclass=\count304
 \@thisrulewidth=\dimen171
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/tools/array.sty
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/tools/array.sty
 Package: array 2023/10/16 v2.5g Tabular extension package (FMi)
 \col@sep=\dimen172
 \ar@mcellbox=\box63
@@ -390,7 +390,7 @@ Package: array 2023/10/16 v2.5g Tabular extension package (FMi)
 \extratabsurround=\skip73
 \backup@length=\skip74
 \ar@cellbox=\box64
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/tools/calc.sty
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/tools/calc.sty
 Package: calc 2023/07/08 v4.3 Infix arithmetic (KKT,FJ)
 \calc@Acount=\count305
 \calc@Bcount=\count306
@@ -402,41 +402,41 @@ LaTeX Info: Redefining \setlength on input line 80.
 LaTeX Info: Redefining \addtolength on input line 81.
 \calc@Ccount=\count307
 \calc@Cskip=\skip77
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/mdwtools/footnote.sty
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/mdwtools/footnote.sty
 Package: footnote 1997/01/28 1.13 Save footnotes around boxes
 \fn@notes=\box65
 \fn@width=\dimen176
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/graphics/graphicx.sty
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/graphics/graphicx.sty
 Package: graphicx 2021/09/16 v1.2d Enhanced LaTeX Graphics (DPC,SPQR)
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/graphics/graphics.sty
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/graphics/graphics.sty
 Package: graphics 2022/03/10 v1.4e Standard LaTeX Graphics (DPC,SPQR)
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/graphics/trig.sty
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/graphics/trig.sty
 Package: trig 2021/08/11 v1.11 sin cos tan (DPC)
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/graphics-cfg/graphics.cfg
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/graphics-cfg/graphics.cfg
 File: graphics.cfg 2016/06/04 v1.11 sample graphics configuration
 )
 Package graphics Info: Driver file: xetex.def on input line 107.
 )
 \Gin@req@height=\dimen177
 \Gin@req@width=\dimen178
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/tcolorbox/tcolorbox.sty
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/tcolorbox/tcolorbox.sty
 Package: tcolorbox 2024/01/10 version 6.2.0 text color boxes
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/pgf/basiclayer/pgf.sty (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/pgf/utilities/pgfrcs.sty (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/utilities/pgfutil-common.tex
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/pgf/basiclayer/pgf.sty (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/pgf/utilities/pgfrcs.sty (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/utilities/pgfutil-common.tex
 \pgfutil@everybye=\toks27
 \pgfutil@tempdima=\dimen179
 \pgfutil@tempdimb=\dimen180
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/utilities/pgfutil-latex.def
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/utilities/pgfutil-latex.def
 \pgfutil@abb=\box66
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/utilities/pgfrcs.code.tex (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/pgf.revision.tex)
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/utilities/pgfrcs.code.tex (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/pgf.revision.tex)
 Package: pgfrcs 2023-01-15 v3.1.10 (3.1.10)
 ))
 Package: pgf 2023-01-15 v3.1.10 (3.1.10)
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/pgf/basiclayer/pgfcore.sty (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/pgf/systemlayer/pgfsys.sty (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/systemlayer/pgfsys.code.tex
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/pgf/basiclayer/pgfcore.sty (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/pgf/systemlayer/pgfsys.sty (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/systemlayer/pgfsys.code.tex
 Package: pgfsys 2023-01-15 v3.1.10 (3.1.10)
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/utilities/pgfkeys.code.tex
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/utilities/pgfkeys.code.tex
 \pgfkeys@pathtoks=\toks28
 \pgfkeys@temptoks=\toks29
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/utilities/pgfkeyslibraryfiltered.code.tex
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/utilities/pgfkeyslibraryfiltered.code.tex
 \pgfkeys@tmptoks=\toks30
 ))
 \pgf@x=\dimen181
@@ -459,36 +459,36 @@ Package: pgfsys 2023-01-15 v3.1.10 (3.1.10)
 \t@pgf@tokb=\toks32
 \t@pgf@tokc=\toks33
 \pgf@sys@id@count=\count312
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/systemlayer/pgf.cfg
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/systemlayer/pgf.cfg
 File: pgf.cfg 2023-01-15 v3.1.10 (3.1.10)
 )
 Driver file for pgf: pgfsys-xetex.def
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/systemlayer/pgfsys-xetex.def
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/systemlayer/pgfsys-xetex.def
 File: pgfsys-xetex.def 2023-01-15 v3.1.10 (3.1.10)
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/systemlayer/pgfsys-dvipdfmx.def
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/systemlayer/pgfsys-dvipdfmx.def
 File: pgfsys-dvipdfmx.def 2023-01-15 v3.1.10 (3.1.10)
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/systemlayer/pgfsys-common-pdf.def
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/systemlayer/pgfsys-common-pdf.def
 File: pgfsys-common-pdf.def 2023-01-15 v3.1.10 (3.1.10)
 )
 \pgfsys@objnum=\count313
-))) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/systemlayer/pgfsyssoftpath.code.tex
+))) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/systemlayer/pgfsyssoftpath.code.tex
 File: pgfsyssoftpath.code.tex 2023-01-15 v3.1.10 (3.1.10)
 \pgfsyssoftpath@smallbuffer@items=\count314
 \pgfsyssoftpath@bigbuffer@items=\count315
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/systemlayer/pgfsysprotocol.code.tex
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/systemlayer/pgfsysprotocol.code.tex
 File: pgfsysprotocol.code.tex 2023-01-15 v3.1.10 (3.1.10)
-)) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcore.code.tex
+)) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcore.code.tex
 Package: pgfcore 2023-01-15 v3.1.10 (3.1.10)
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmath.code.tex (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathutil.code.tex) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathparser.code.tex
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmath.code.tex (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathutil.code.tex) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathparser.code.tex
 \pgfmath@dimen=\dimen191
 \pgfmath@count=\count316
 \pgfmath@box=\box67
 \pgfmath@toks=\toks34
 \pgfmath@stack@operand=\toks35
 \pgfmath@stack@operation=\toks36
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.code.tex) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.basic.code.tex) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.trigonometric.code.tex) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.random.code.tex) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.comparison.code.tex) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.base.code.tex) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.round.code.tex) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.misc.code.tex) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.integerarithmetics.code.tex) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathcalc.code.tex) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathfloat.code.tex
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.code.tex) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.basic.code.tex) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.trigonometric.code.tex) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.random.code.tex) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.comparison.code.tex) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.base.code.tex) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.round.code.tex) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.misc.code.tex) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.integerarithmetics.code.tex) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathcalc.code.tex) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathfloat.code.tex
 \c@pgfmathroundto@lastzeros=\count317
-)) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfint.code.tex) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepoints.code.tex
+)) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfint.code.tex) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepoints.code.tex
 File: pgfcorepoints.code.tex 2023-01-15 v3.1.10 (3.1.10)
 \pgf@picminx=\dimen192
 \pgf@picmaxx=\dimen193
@@ -504,74 +504,74 @@ File: pgfcorepoints.code.tex 2023-01-15 v3.1.10 (3.1.10)
 \pgf@yy=\dimen259
 \pgf@zx=\dimen260
 \pgf@zy=\dimen261
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepathconstruct.code.tex
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepathconstruct.code.tex
 File: pgfcorepathconstruct.code.tex 2023-01-15 v3.1.10 (3.1.10)
 \pgf@path@lastx=\dimen262
 \pgf@path@lasty=\dimen263
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepathusage.code.tex
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepathusage.code.tex
 File: pgfcorepathusage.code.tex 2023-01-15 v3.1.10 (3.1.10)
 \pgf@shorten@end@additional=\dimen264
 \pgf@shorten@start@additional=\dimen265
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcorescopes.code.tex
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcorescopes.code.tex
 File: pgfcorescopes.code.tex 2023-01-15 v3.1.10 (3.1.10)
 \pgfpic=\box68
 \pgf@hbox=\box69
 \pgf@layerbox@main=\box70
 \pgf@picture@serial@count=\count318
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcoregraphicstate.code.tex
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcoregraphicstate.code.tex
 File: pgfcoregraphicstate.code.tex 2023-01-15 v3.1.10 (3.1.10)
 \pgflinewidth=\dimen266
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcoretransformations.code.tex
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcoretransformations.code.tex
 File: pgfcoretransformations.code.tex 2023-01-15 v3.1.10 (3.1.10)
 \pgf@pt@x=\dimen267
 \pgf@pt@y=\dimen268
 \pgf@pt@temp=\dimen269
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcorequick.code.tex
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcorequick.code.tex
 File: pgfcorequick.code.tex 2023-01-15 v3.1.10 (3.1.10)
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreobjects.code.tex
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreobjects.code.tex
 File: pgfcoreobjects.code.tex 2023-01-15 v3.1.10 (3.1.10)
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepathprocessing.code.tex
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepathprocessing.code.tex
 File: pgfcorepathprocessing.code.tex 2023-01-15 v3.1.10 (3.1.10)
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcorearrows.code.tex
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcorearrows.code.tex
 File: pgfcorearrows.code.tex 2023-01-15 v3.1.10 (3.1.10)
 \pgfarrowsep=\dimen270
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreshade.code.tex
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreshade.code.tex
 File: pgfcoreshade.code.tex 2023-01-15 v3.1.10 (3.1.10)
 \pgf@max=\dimen271
 \pgf@sys@shading@range@num=\count319
 \pgf@shadingcount=\count320
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreimage.code.tex
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreimage.code.tex
 File: pgfcoreimage.code.tex 2023-01-15 v3.1.10 (3.1.10)
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreexternal.code.tex
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreexternal.code.tex
 File: pgfcoreexternal.code.tex 2023-01-15 v3.1.10 (3.1.10)
 \pgfexternal@startupbox=\box71
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcorelayers.code.tex
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcorelayers.code.tex
 File: pgfcorelayers.code.tex 2023-01-15 v3.1.10 (3.1.10)
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcoretransparency.code.tex
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcoretransparency.code.tex
 File: pgfcoretransparency.code.tex 2023-01-15 v3.1.10 (3.1.10)
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepatterns.code.tex
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepatterns.code.tex
 File: pgfcorepatterns.code.tex 2023-01-15 v3.1.10 (3.1.10)
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcorerdf.code.tex
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcorerdf.code.tex
 File: pgfcorerdf.code.tex 2023-01-15 v3.1.10 (3.1.10)
-))) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/modules/pgfmoduleshapes.code.tex
+))) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/modules/pgfmoduleshapes.code.tex
 File: pgfmoduleshapes.code.tex 2023-01-15 v3.1.10 (3.1.10)
 \pgfnodeparttextbox=\box72
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/modules/pgfmoduleplot.code.tex
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/modules/pgfmoduleplot.code.tex
 File: pgfmoduleplot.code.tex 2023-01-15 v3.1.10 (3.1.10)
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/pgf/compatibility/pgfcomp-version-0-65.sty
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/pgf/compatibility/pgfcomp-version-0-65.sty
 Package: pgfcomp-version-0-65 2023-01-15 v3.1.10 (3.1.10)
 \pgf@nodesepstart=\dimen272
 \pgf@nodesepend=\dimen273
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/pgf/compatibility/pgfcomp-version-1-18.sty
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/pgf/compatibility/pgfcomp-version-1-18.sty
 Package: pgfcomp-version-1-18 2023-01-15 v3.1.10 (3.1.10)
-)) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/tools/verbatim.sty
+)) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/tools/verbatim.sty
 Package: verbatim 2023-11-06 v1.5v LaTeX2e package for verbatim enhancements
 \every@verbatim=\toks37
 \verbatim@line=\toks38
 \verbatim@in@stream=\read4
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/environ/environ.sty
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/environ/environ.sty
 Package: environ 2014/05/04 v0.3 A new way to define environments
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/trimspaces/trimspaces.sty
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/trimspaces/trimspaces.sty
 Package: trimspaces 2009/09/17 v1.1 Trim spaces around a token list
 ))
 \tcb@titlebox=\box73
@@ -587,17 +587,17 @@ Package: trimspaces 2009/09/17 v1.1 Trim spaces around a token list
 \tcb@temp=\box79
 \tcb@temp=\box80
 \tcb@temp=\box81
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/tcolorbox/tcbskins.code.tex
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/tcolorbox/tcbskins.code.tex
 Library (tcolorbox): 'tcbskins.code.tex' version '6.2.0'
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/pgf/frontendlayer/tikz.sty (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/pgf/utilities/pgffor.sty (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/pgf/utilities/pgfkeys.sty (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/utilities/pgfkeys.code.tex)) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/pgf/math/pgfmath.sty (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmath.code.tex)) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/utilities/pgffor.code.tex
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/pgf/frontendlayer/tikz.sty (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/pgf/utilities/pgffor.sty (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/pgf/utilities/pgfkeys.sty (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/utilities/pgfkeys.code.tex)) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/pgf/math/pgfmath.sty (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmath.code.tex)) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/utilities/pgffor.code.tex
 Package: pgffor 2023-01-15 v3.1.10 (3.1.10)
 \pgffor@iter=\dimen275
 \pgffor@skip=\dimen276
 \pgffor@stack=\toks39
 \pgffor@toks=\toks40
-)) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/frontendlayer/tikz/tikz.code.tex
+)) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/frontendlayer/tikz/tikz.code.tex
 Package: tikz 2023-01-15 v3.1.10 (3.1.10)
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/libraries/pgflibraryplothandlers.code.tex
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/libraries/pgflibraryplothandlers.code.tex
 File: pgflibraryplothandlers.code.tex 2023-01-15 v3.1.10 (3.1.10)
 \pgf@plot@mark@count=\count324
 \pgfplotmarksize=\dimen277
@@ -618,31 +618,31 @@ File: pgflibraryplothandlers.code.tex 2023-01-15 v3.1.10 (3.1.10)
 \tikznumberofchildren=\count326
 \tikznumberofcurrentchild=\count327
 \tikz@fig@count=\count328
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/modules/pgfmodulematrix.code.tex
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/modules/pgfmodulematrix.code.tex
 File: pgfmodulematrix.code.tex 2023-01-15 v3.1.10 (3.1.10)
 \pgfmatrixcurrentrow=\count329
 \pgfmatrixcurrentcolumn=\count330
 \pgf@matrix@numberofcolumns=\count331
 )
 \tikz@expandcount=\count332
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/frontendlayer/tikz/libraries/tikzlibrarytopaths.code.tex
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/frontendlayer/tikz/libraries/tikzlibrarytopaths.code.tex
 File: tikzlibrarytopaths.code.tex 2023-01-15 v3.1.10 (3.1.10)
-))) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/tikzfill/tikzfill.image.sty
+))) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/tikzfill/tikzfill.image.sty
 Package: tikzfill.image 2023/08/08 v1.0.1 Image filling library for TikZ
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/tikzfill/tikzfill-common.sty
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/tikzfill/tikzfill-common.sty
 Package: tikzfill-common 2023/08/08 v1.0.1 Auxiliary code for tikzfill
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/tikzfill/tikzlibraryfill.image.code.tex
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/tikzfill/tikzlibraryfill.image.code.tex
 File: tikzlibraryfill.image.code.tex 2023/08/08 v1.0.1 Image filling library
 \l__tikzfill_img_box=\box86
-)) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/tcolorbox/tcbskinsjigsaw.code.tex
+)) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/tcolorbox/tcbskinsjigsaw.code.tex
 Library (tcolorbox): 'tcbskinsjigsaw.code.tex' version '6.2.0'
-)) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/tcolorbox/tcbbreakable.code.tex
+)) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/tcolorbox/tcbbreakable.code.tex
 Library (tcolorbox): 'tcbbreakable.code.tex' version '6.2.0'
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/pdfcol/pdfcol.sty
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/pdfcol/pdfcol.sty
 Package: pdfcol 2022-09-21 v1.7 Handle new color stacks for pdfTeX (HO)
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/ltxcmds/ltxcmds.sty
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/ltxcmds/ltxcmds.sty
 Package: ltxcmds 2023-12-04 v1.26 LaTeX kernel commands for general use (HO)
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/infwarerr/infwarerr.sty
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/infwarerr/infwarerr.sty
 Package: infwarerr 2019/12/03 v1.5 Providing info/warning/error messages (HO)
 )
 Package pdfcol Info: Interface disabled because of missing PDF mode of pdfTeX.
@@ -651,39 +651,39 @@ Package pdfcol Info: pdfTeX's color stacks are not available.
 \tcb@testbox=\box87
 \tcb@totalupperbox=\box88
 \tcb@totallowerbox=\box89
-)) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/fontawesome5/fontawesome5.sty
+)) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/fontawesome5/fontawesome5.sty
 Package: fontawesome5 2022/05/02 v5.15.4 Font Awesome 5
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/fontawesome5/fontawesome5-utex-helper.sty
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/fontawesome5/fontawesome5-utex-helper.sty
 Package: fontawesome5-utex-helper 2022/05/02 v5.15.4 uTeX helper for fontawesome5
 LaTeX Font Info:    Trying to load font information for TU+fontawesomefree on input line 69.
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/fontawesome5/tufontawesomefree.fd)
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/fontawesome5/tufontawesomefree.fd)
 LaTeX Font Info:    Trying to load font information for TU+fontawesomebrands on input line 70.
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/fontawesome5/tufontawesomebrands.fd))) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/bookmark/bookmark.sty
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/fontawesome5/tufontawesomebrands.fd))) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/bookmark/bookmark.sty
 Package: bookmark 2023-12-10 v1.31 PDF bookmarks (HO)
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/hyperref/hyperref.sty
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/hyperref/hyperref.sty
 Package: hyperref 2024-01-20 v7.01h Hypertext links for LaTeX
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/kvsetkeys/kvsetkeys.sty
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/kvsetkeys/kvsetkeys.sty
 Package: kvsetkeys 2022-10-05 v1.19 Key value parser (HO)
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/kvdefinekeys/kvdefinekeys.sty
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/kvdefinekeys/kvdefinekeys.sty
 Package: kvdefinekeys 2019-12-19 v1.6 Define keys (HO)
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pdfescape/pdfescape.sty
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pdfescape/pdfescape.sty
 Package: pdfescape 2019/12/09 v1.15 Implements pdfTeX's escape features (HO)
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pdftexcmds/pdftexcmds.sty
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pdftexcmds/pdftexcmds.sty
 Package: pdftexcmds 2020-06-27 v0.33 Utility functions of pdfTeX for LuaTeX (HO)
 Package pdftexcmds Info: \pdf@primitive is available.
 Package pdftexcmds Info: \pdf@ifprimitive is available.
 Package pdftexcmds Info: \pdfdraftmode not found.
-)) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/hycolor/hycolor.sty
+)) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/hycolor/hycolor.sty
 Package: hycolor 2020-01-27 v1.10 Color options for hyperref/bookmark (HO)
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/auxhook/auxhook.sty
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/auxhook/auxhook.sty
 Package: auxhook 2019-12-17 v1.6 Hooks for auxiliary files (HO)
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/hyperref/nameref.sty
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/hyperref/nameref.sty
 Package: nameref 2023-11-26 v2.56 Cross-referencing by name of section
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/refcount/refcount.sty
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/refcount/refcount.sty
 Package: refcount 2019/12/15 v3.6 Data extraction from label references (HO)
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/gettitlestring/gettitlestring.sty
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/gettitlestring/gettitlestring.sty
 Package: gettitlestring 2019/12/15 v1.6 Cleanup title references (HO)
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/kvoptions/kvoptions.sty
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/kvoptions/kvoptions.sty
 Package: kvoptions 2022-06-15 v3.15 Key value format for package options (HO)
 ))
 \c@section@level=\count333
@@ -691,13 +691,13 @@ Package: kvoptions 2022-06-15 v3.15 Key value format for package options (HO)
 \@linkdim=\dimen286
 \Hy@linkcounter=\count334
 \Hy@pagecounter=\count335
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/hyperref/pd1enc.def
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/hyperref/pd1enc.def
 File: pd1enc.def 2024-01-20 v7.01h Hyperref: PDFDocEncoding definition (HO)
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/intcalc/intcalc.sty
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/intcalc/intcalc.sty
 Package: intcalc 2019/12/15 v1.3 Expandable calculations with integers (HO)
 )
 \Hy@SavedSpaceFactor=\count336
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/hyperref/puenc.def
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/hyperref/puenc.def
 File: puenc.def 2024-01-20 v7.01h Hyperref: PDF Unicode definition (HO)
 )
 Package hyperref Info: Option `unicode' set `true' on input line 4062.
@@ -709,15 +709,15 @@ Package hyperref Info: Backreferencing OFF on input line 4199.
 Package hyperref Info: Implicit mode ON; LaTeX internals redefined.
 Package hyperref Info: Bookmarks ON on input line 4446.
 \c@Hy@tempcnt=\count337
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/url/url.sty
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/url/url.sty
 \Urlmuskip=\muskip18
 Package: url 2013/09/16  ver 3.4  Verb mode for urls, etc.
 )
 LaTeX Info: Redefining \url on input line 4784.
 \XeTeXLinkMargin=\dimen287
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/bitset/bitset.sty
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/bitset/bitset.sty
 Package: bitset 2019/12/09 v1.3 Handle bit-vector datatype (HO)
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/bigintcalc/bigintcalc.sty
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/bigintcalc/bigintcalc.sty
 Package: bigintcalc 2019/12/15 v1.5 Expandable calculations on big integers (HO)
 ))
 \Fld@menulength=\count338
@@ -730,7 +730,7 @@ Package hyperref Info: backreferencing OFF on input line 6078.
 Package hyperref Info: Link coloring OFF on input line 6083.
 Package hyperref Info: Link coloring with OCG OFF on input line 6088.
 Package hyperref Info: PDF/A mode OFF on input line 6093.
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/base/atbegshi-ltx.sty
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/base/atbegshi-ltx.sty
 Package: atbegshi-ltx 2021/01/10 v1.0c Emulation of the original atbegshi
 package with kernel methods
 )
@@ -739,9 +739,9 @@ package with kernel methods
 \c@Hfootnote=\count341
 )
 Package hyperref Info: Driver (autodetected): hxetex.
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/hyperref/hxetex.def
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/hyperref/hxetex.def
 File: hxetex.def 2024-01-20 v7.01h Hyperref driver for XeTeX
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/stringenc/stringenc.sty
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/stringenc/stringenc.sty
 Package: stringenc 2019/11/29 v1.12 Convert strings between diff. encodings (HO)
 )
 \pdfm@box=\box90
@@ -749,23 +749,23 @@ Package: stringenc 2019/11/29 v1.12 Convert strings between diff. encodings (HO)
 \HyField@AnnotCount=\count343
 \Fld@listcount=\count344
 \c@bookmark@seq@number=\count345
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/rerunfilecheck/rerunfilecheck.sty
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/rerunfilecheck/rerunfilecheck.sty
 Package: rerunfilecheck 2022-07-10 v1.10 Rerun checks for auxiliary files (HO)
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/base/atveryend-ltx.sty
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/base/atveryend-ltx.sty
 Package: atveryend-ltx 2020/08/19 v1.0a Emulation of the original atveryend package
 with kernel methods
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/uniquecounter/uniquecounter.sty
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/uniquecounter/uniquecounter.sty
 Package: uniquecounter 2019/12/15 v1.4 Provide unlimited unique counter (HO)
 )
 Package uniquecounter Info: New unique counter `rerunfilecheck' on input line 285.
 )
 \Hy@SectionHShift=\skip78
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/bookmark/bkm-dvipdfm.def
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/bookmark/bkm-dvipdfm.def
 File: bkm-dvipdfm.def 2023-12-10 v1.31 bookmark driver for dvipdfm (HO)
 \BKM@id=\count346
-)) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/caption/caption.sty
+)) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/caption/caption.sty
 Package: caption 2023/08/05 v3.6o Customizing captions (AR)
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/caption/caption3.sty
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/caption/caption3.sty
 Package: caption3 2023/07/31 v2.4d caption3 kernel (AR)
 \caption@tempdima=\dimen290
 \captionmargin=\dimen291
@@ -776,20 +776,20 @@ Package: caption3 2023/07/31 v2.4d caption3 kernel (AR)
 \caption@parindent=\dimen296
 \caption@hangindent=\dimen297
 Package caption Info: KOMA-Script document class detected.
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/caption/caption-koma.sto
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/caption/caption-koma.sto
 File: caption-koma.sto 2023/09/08 v2.0e Adaption of the caption package to the KOMA-Script document classes (AR)
 ))
 \c@caption@flags=\count347
 \c@continuedfloat=\count348
 Package caption Info: hyperref package is loaded.
 Package caption Info: longtable package is loaded.
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/caption/ltcaption.sty
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/caption/ltcaption.sty
 Package: ltcaption 2021/01/08 v1.4c longtable captions (AR)
 )
 Package caption Info: KOMA-Script scrextend package detected.
 \caption@addmargin@hsize=\dimen298
 \caption@addmargin@linewidth=\dimen299
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/float/float.sty
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/float/float.sty
 Package: float 2001/11/08 v1.3d Float enhancements (AL)
 \c@float@type=\count349
 \float@exts=\toks41
@@ -799,7 +799,7 @@ Package: float 2001/11/08 v1.3d Float enhancements (AL)
 )
 \@float@every@codelisting=\toks43
 \c@codelisting=\count350
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/caption/subcaption.sty
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/caption/subcaption.sty
 Package: subcaption 2023/07/28 v1.6b Sub-captions (AR)
 Package caption Info: New subtype `subfigure' on input line 238.
 \c@subfigure=\count351
@@ -1084,7 +1084,7 @@ Package microtype Info: Using protrusion set `basicmath'.
 Package microtype Info: No adjustment of tracking.
 Package microtype Info: No adjustment of spacing.
 Package microtype Info: No adjustment of kerning.
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/microtype/mt-LatinModernRoman.cfg
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/microtype/mt-LatinModernRoman.cfg
 File: mt-LatinModernRoman.cfg 2021/02/21 v1.1 microtype config. file: Latin Modern Roman (RS)
 )
 Package hyperref Info: Link coloring ON on input line 221.
@@ -1105,7 +1105,7 @@ LaTeX Font Info:    Font shape `TU/latinmodern-math.otf(1)/m/n' will be
 LaTeX Font Info:    Font shape `TU/latinmodern-math.otf(1)/m/n' will be
 (Font)              scaled to size 7.0pt on input line 223.
 LaTeX Font Info:    Trying to load font information for OML+lmm on input line 223.
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/lm/omllmm.fd
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/lm/omllmm.fd
 File: omllmm.fd 2015/05/01 v1.6.1 Font defs for Latin Modern
 )
 LaTeX Font Info:    Font shape `TU/latinmodern-math.otf(2)/m/n' will be
@@ -1121,15 +1121,15 @@ LaTeX Font Info:    Font shape `TU/latinmodern-math.otf(3)/m/n' will be
 LaTeX Font Info:    Font shape `TU/latinmodern-math.otf(3)/m/n' will be
 (Font)              scaled to size 6.99925pt on input line 223.
 LaTeX Font Info:    Trying to load font information for U+msa on input line 223.
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/amsfonts/umsa.fd
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/amsfonts/umsa.fd
 File: umsa.fd 2013/01/14 v3.01 AMS symbols A
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/microtype/mt-msa.cfg
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/microtype/mt-msa.cfg
 File: mt-msa.cfg 2006/02/04 v1.1 microtype config. file: AMS symbols (a) (RS)
 )
 LaTeX Font Info:    Trying to load font information for U+msb on input line 223.
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/amsfonts/umsb.fd
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/amsfonts/umsb.fd
 File: umsb.fd 2013/01/14 v3.01 AMS symbols B
-) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/microtype/mt-msb.cfg
+) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/microtype/mt-msb.cfg
 File: mt-msb.cfg 2005/06/01 v1.0 microtype config. file: AMS symbols (b) (RS)
 ) [1
 
@@ -1162,7 +1162,7 @@ Class scrreprt Warning: \float@addtolists detected!
 (scrreprt)              a package that still implements the
 (scrreprt)              deprecated \float@addtolist interface.
 
-(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/microtype/mt-TU-empty.cfg
+(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/microtype/mt-TU-empty.cfg
 File: mt-TU-empty.cfg 2021/06/22 v1.1 microtype config. file: fonts with nonstandard glyph set (RS)
 )
 LaTeX Font Info:    Font shape `TU/latinmodern-math.otf(1)/m/n' will be
@@ -1203,8 +1203,8 @@ LaTeX Font Info:    Font shape `TU/lmtt/bx/n' in size <14.4> not available
 LaTeX Font Info:    Font shape `TU/lmtt/bx/n' in size <10.95> not available
 (Font)              Font shape `TU/lmtt/b/n' tried instead on input line 700.
 
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 743--743
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 743--743
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 [10]
@@ -1213,8 +1213,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 745--745
  []
 
 
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 792--792
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 792--792
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1223,8 +1223,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 794--794
  []
 
 [11]
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 827--827
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 827--827
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1233,8 +1233,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 829--829
  []
 
 
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 881--881
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 881--881
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1243,8 +1243,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 883--883
  []
 
 [12]
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 922--922
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 922--922
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1257,8 +1257,8 @@ LaTeX Font Info:    Font shape `TU/lmtt/bx/n' in size <12> not available
 (Font)              Font shape `TU/lmtt/b/n' tried instead on input line 983.
 Missing character: There is no   (U+2003) in font [lmroman10-regular]:mapping=tex-text;!
 
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 1041--1041
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 1041--1041
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 [14]
@@ -1275,8 +1275,8 @@ Overfull \hbox (53.61371pt too wide) in paragraph at lines 1046--1234
 Overfull \vbox (1913.30832pt too high) has occurred while \output is active []
 
 [16]
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 1260--1260
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 1260--1260
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1285,8 +1285,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 1262--1262
  []
 
 
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 1288--1288
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 1288--1288
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1295,8 +1295,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 1290--1290
  []
 
 [17]
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 1324--1324
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 1324--1324
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1305,8 +1305,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 1326--1326
  []
 
 
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 1351--1351
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 1351--1351
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1315,8 +1315,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 1353--1353
  []
 
 [18]
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 1397--1397
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 1397--1397
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1325,8 +1325,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 1399--1399
  []
 
 
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 1419--1419
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 1419--1419
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1335,8 +1335,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 1421--1421
  []
 
 [19]
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 1447--1447
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 1447--1447
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1345,8 +1345,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 1449--1449
  []
 
 
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 1479--1479
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 1479--1479
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1363,8 +1363,8 @@ Overfull \hbox (25.18869pt too wide) in paragraph at lines 1484--1673
 Overfull \vbox (1926.90831pt too high) has occurred while \output is active []
 
 [21]
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 1688--1688
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 1688--1688
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1386,8 +1386,8 @@ Overfull \hbox (36.13486pt too wide) in paragraph at lines 1946--1946
  []
 
 [24] [25]
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 2028--2028
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 2028--2028
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1401,8 +1401,8 @@ Overfull \hbox (24.53052pt too wide) in paragraph at lines 2033--2044
  []
 
 
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 2056--2056
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 2056--2056
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1411,8 +1411,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 2058--2058
  []
 
 [26]
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 2126--2126
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 2126--2126
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1421,8 +1421,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 2128--2128
  []
 
 
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 2154--2154
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 2154--2154
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1431,8 +1431,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 2156--2156
  []
 
 [27]
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 2181--2181
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 2181--2181
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1446,8 +1446,8 @@ Overfull \hbox (24.53052pt too wide) in paragraph at lines 2186--2196
  []
 
 
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 2206--2206
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 2206--2206
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1459,8 +1459,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 2208--2208
 Overfull \vbox (1913.30832pt too high) has occurred while \output is active []
 
 [29]
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 2417--2417
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 2417--2417
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1469,8 +1469,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 2419--2419
  []
 
 
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 2442--2442
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 2442--2442
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1484,8 +1484,8 @@ Overfull \hbox (24.53052pt too wide) in paragraph at lines 2447--2457
  []
 
 [30]
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 2521--2521
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 2521--2521
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1494,8 +1494,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 2523--2523
  []
 
 [31]
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 2552--2552
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 2552--2552
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1504,8 +1504,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 2554--2554
  []
 
 
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 2578--2578
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 2578--2578
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1517,8 +1517,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 2580--2580
 Overfull \vbox (1913.30832pt too high) has occurred while \output is active []
 
 [33]
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 2826--2826
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 2826--2826
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1532,8 +1532,8 @@ Overfull \hbox (24.53052pt too wide) in paragraph at lines 2831--2841
  []
 
 [34]
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 2854--2854
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 2854--2854
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1552,8 +1552,8 @@ chapter 3.
 [40
 
 ]
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 3334--3334
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 3334--3334
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1562,8 +1562,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 3336--3336
  []
 
 
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 3381--3381
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 3381--3381
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1572,8 +1572,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 3383--3383
  []
 
 [41]
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 3407--3407
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 3407--3407
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1582,8 +1582,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 3409--3409
  []
 
 
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 3443--3443
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 3443--3443
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1592,8 +1592,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 3445--3445
  []
 
 [42]
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 3513--3513
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 3513--3513
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1607,8 +1607,8 @@ Package longtable Warning: Column widths have changed
 (longtable)                in table 3.1 on input line 3545.
 
 
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 3562--3562
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 3562--3562
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1617,8 +1617,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 3564--3564
  []
 
 [44]
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 3600--3600
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 3600--3600
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1627,8 +1627,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 3602--3602
  []
 
 
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 3634--3634
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 3634--3634
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1637,8 +1637,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 3636--3636
  []
 
 [45]
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 3686--3686
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 3686--3686
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1647,8 +1647,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 3688--3688
  []
 
 [46]
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 3734--3734
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 3734--3734
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1657,8 +1657,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 3736--3736
  []
 
 
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 3777--3777
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 3777--3777
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1667,8 +1667,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 3779--3779
  []
 
 [47]
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 3808--3808
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 3808--3808
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1677,8 +1677,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 3810--3810
  []
 
 
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 3837--3837
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 3837--3837
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1687,8 +1687,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 3839--3839
  []
 
 [48]
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 3870--3870
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 3870--3870
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1697,8 +1697,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 3872--3872
  []
 
 
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 3908--3908
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 3908--3908
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1707,8 +1707,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 3910--3910
  []
 
 [49] [50] [51]
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 4065--4065
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 4065--4065
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1717,8 +1717,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 4067--4067
  []
 
 
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 4095--4095
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 4095--4095
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1732,8 +1732,8 @@ Underfull \hbox (badness 1845) in paragraph at lines 4121--4124
  []
 
 
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 4133--4133
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 4133--4133
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1742,8 +1742,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 4135--4135
  []
 
 
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 4157--4157
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 4157--4157
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1752,8 +1752,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 4159--4159
  []
 
 
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 4182--4182
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 4182--4182
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1762,8 +1762,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 4184--4184
  []
 
 [53]
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 4221--4221
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 4221--4221
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1772,8 +1772,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 4223--4223
  []
 
 [54]
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 4278--4278
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 4278--4278
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1786,8 +1786,8 @@ chapter 4.
 [57
 
 ]
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 4398--4398
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 4398--4398
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1796,8 +1796,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 4400--4400
  []
 
 
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 4439--4439
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 4439--4439
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 [58]
@@ -1806,8 +1806,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 4441--4441
  []
 
 
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 4468--4468
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 4468--4468
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1816,8 +1816,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 4470--4470
  []
 
 
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 4497--4497
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 4497--4497
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1826,8 +1826,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 4499--4499
  []
 
 [59]
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 4529--4529
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 4529--4529
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1836,8 +1836,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 4531--4531
  []
 
 [60]
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 4571--4571
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 4571--4571
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1846,8 +1846,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 4573--4573
  []
 
 
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 4600--4600
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 4600--4600
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1856,8 +1856,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 4602--4602
  []
 
 [61]
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 4675--4675
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 4675--4675
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1868,8 +1868,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 4677--4677
 File: pandas_3/images/agg.png Graphic file (type bmp)
 <pandas_3/images/agg.png>
 [62]
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 4722--4722
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 4722--4722
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1878,8 +1878,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 4724--4724
  []
 
 
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 4747--4747
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 4747--4747
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1888,8 +1888,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 4749--4749
  []
 
 [63]
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 4773--4773
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 4773--4773
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1898,8 +1898,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 4775--4775
  []
 
 [64]
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 4821--4821
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 4821--4821
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1908,8 +1908,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 4823--4823
  []
 
 
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 4847--4847
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 4847--4847
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1918,8 +1918,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 4849--4849
  []
 
 
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 4877--4877
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 4877--4877
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1928,8 +1928,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 4879--4879
  []
 
 [65]
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 4938--4938
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 4938--4938
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1941,8 +1941,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 4940--4940
 File: pandas_3/images/first.png Graphic file (type bmp)
 <pandas_3/images/first.png>
 
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 4980--4980
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 4980--4980
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1951,8 +1951,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 4982--4982
  []
 
 [67]
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 5018--5018
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 5018--5018
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1961,8 +1961,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 5020--5020
  []
 
 [68] [69]
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 5146--5146
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 5146--5146
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1976,8 +1976,8 @@ Overfull \hbox (68.74403pt too wide) in paragraph at lines 5174--5179
  []
 
 [70]
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 5201--5201
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 5201--5201
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -1986,8 +1986,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 5203--5203
  []
 
 
-Overfull \hbox (122.36606pt too wide) in paragraph at lines 19010--19010
-[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
+Overfull \hbox (151.10982pt too wide) in paragraph at lines 19010--19010
+[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] 
  []
 
 
@@ -2001,10 +2001,10 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 19012--19012
 l.19055 \end{Shaded}
                      
 Here is how much of TeX's memory you used:
- 39295 strings out of 476758
- 805632 string characters out of 5801188
+ 39295 strings out of 476779
+ 807712 string characters out of 5801537
  5000000 words of memory out of 5000000
- 60754 multiletter control sequences out of 15000+600000
+ 60733 multiletter control sequences out of 15000+600000
  564564 words of font info for 94 fonts, out of 8000000 for 9000
  14 hyphenation exceptions out of 8191
  108i,8n,121p,10900b,1030s stack positions out of 10000i,1000n,20000p,200000b,200000s
diff --git a/index.pdf b/index.pdf
index 9c65adef8..1ea410439 100644
Binary files a/index.pdf and b/index.pdf differ
diff --git a/index.tex b/index.tex
index d8ddc909d..b226ee97b 100644
--- a/index.tex
+++ b/index.tex
@@ -220,7 +220,7 @@
 
 \begin{document}
 \maketitle
-\ifdefined\Shaded\renewenvironment{Shaded}{\begin{tcolorbox}[enhanced, sharp corners, boxrule=0pt, breakable, interior hidden, frame hidden, borderline west={3pt}{0pt}{shadecolor}]}{\end{tcolorbox}}\fi
+\ifdefined\Shaded\renewenvironment{Shaded}{\begin{tcolorbox}[interior hidden, boxrule=0pt, breakable, enhanced, frame hidden, borderline west={3pt}{0pt}{shadecolor}, sharp corners]}{\end{tcolorbox}}\fi
 
 \renewcommand*\contentsname{Table of contents}
 {
@@ -262,7 +262,7 @@ \section*{About the Course Notes}\label{about-the-course-notes}}
 \hypertarget{introduction}{%
 \chapter{Introduction}\label{introduction}}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm]
+\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm]
 
 \begin{itemize}
 \tightlist
@@ -316,7 +316,7 @@ \chapter{Introduction}\label{introduction}}
 allowing you to take data and produce useful insights on the world's
 most challenging and ambiguous problems.
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Course Goals}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm]
+\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Course Goals}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm]
 
 \begin{itemize}
 \tightlist
@@ -334,7 +334,7 @@ \chapter{Introduction}\label{introduction}}
 
 \end{tcolorbox}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Some Topics We'll Cover}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm]
+\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Some Topics We'll Cover}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm]
 
 \begin{itemize}
 \tightlist
@@ -364,7 +364,7 @@ \chapter{Introduction}\label{introduction}}
 
 \end{tcolorbox}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Prerequisites}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm]
+\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Prerequisites}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm]
 
 To ensure that you can get the most out of the course content, please
 make sure that you are familiar with:
@@ -602,7 +602,7 @@ \section{Conclusion}\label{conclusion}}
 \hypertarget{pandas-i}{%
 \chapter{Pandas I}\label{pandas-i}}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm]
+\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm]
 
 \begin{itemize}
 \tightlist
@@ -740,7 +740,7 @@ \subsection{Series}\label{series}}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -789,7 +789,7 @@ \subsection{Series}\label{series}}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -824,7 +824,7 @@ \subsection{Series}\label{series}}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -878,7 +878,7 @@ \subsubsection{\texorpdfstring{Selection in
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -919,7 +919,7 @@ \subsubsection{\texorpdfstring{Selection in
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -1038,7 +1038,7 @@ \subsubsection{\texorpdfstring{Creating a
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -1257,7 +1257,7 @@ \subsubsection{\texorpdfstring{Creating a
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -1285,7 +1285,7 @@ \subsubsection{\texorpdfstring{Creating a
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -1321,7 +1321,7 @@ \subsubsection{\texorpdfstring{Creating a
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -1348,7 +1348,7 @@ \subsubsection{\texorpdfstring{Creating a
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -1394,7 +1394,7 @@ \subsubsection{\texorpdfstring{Creating a
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -1416,7 +1416,7 @@ \subsubsection{\texorpdfstring{Creating a
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -1444,7 +1444,7 @@ \subsubsection{\texorpdfstring{Creating a
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -1476,7 +1476,7 @@ \subsection{Indices}\label{indices}}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -1685,7 +1685,7 @@ \subsection{Indices}\label{indices}}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -2025,7 +2025,7 @@ \subsection{\texorpdfstring{Extracting data with \texttt{.head} and
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -2053,7 +2053,7 @@ \subsection{\texorpdfstring{Extracting data with \texttt{.head} and
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -2123,7 +2123,7 @@ \subsection{\texorpdfstring{Label-based Extraction: Indexing with
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -2151,7 +2151,7 @@ \subsection{\texorpdfstring{Label-based Extraction: Indexing with
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -2178,7 +2178,7 @@ \subsection{\texorpdfstring{Label-based Extraction: Indexing with
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -2203,7 +2203,7 @@ \subsection{\texorpdfstring{Label-based Extraction: Indexing with
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -2414,7 +2414,7 @@ \subsection{\texorpdfstring{Label-based Extraction: Indexing with
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -2439,7 +2439,7 @@ \subsection{\texorpdfstring{Label-based Extraction: Indexing with
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -2518,7 +2518,7 @@ \subsection{\texorpdfstring{Integer-based Extraction: Indexing with
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -2549,7 +2549,7 @@ \subsection{\texorpdfstring{Integer-based Extraction: Indexing with
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -2575,7 +2575,7 @@ \subsection{\texorpdfstring{Integer-based Extraction: Indexing with
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -2823,7 +2823,7 @@ \subsubsection{A slice of row numbers}\label{a-slice-of-row-numbers}}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -2851,7 +2851,7 @@ \subsubsection{A list of column labels}\label{a-list-of-column-labels}}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -3276,7 +3276,7 @@ \section{Parting Note}\label{parting-note}}
 \hypertarget{pandas-ii}{%
 \chapter{Pandas II}\label{pandas-ii}}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm]
+\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm]
 
 \begin{itemize}
 \tightlist
@@ -3331,7 +3331,7 @@ \chapter{Pandas II}\label{pandas-ii}}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -3378,7 +3378,7 @@ \section{Conditional Selection}\label{conditional-selection}}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -3404,7 +3404,7 @@ \section{Conditional Selection}\label{conditional-selection}}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -3440,7 +3440,7 @@ \section{Conditional Selection}\label{conditional-selection}}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -3510,7 +3510,7 @@ \section{Conditional Selection}\label{conditional-selection}}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -3559,7 +3559,7 @@ \section{Conditional Selection}\label{conditional-selection}}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -3597,7 +3597,7 @@ \section{Conditional Selection}\label{conditional-selection}}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -3631,7 +3631,7 @@ \section{Conditional Selection}\label{conditional-selection}}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -3683,7 +3683,7 @@ \section{Conditional Selection}\label{conditional-selection}}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -3731,7 +3731,7 @@ \section{Conditional Selection}\label{conditional-selection}}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -3774,7 +3774,7 @@ \section{Adding, Removing, and Modifying
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -3805,7 +3805,7 @@ \section{Adding, Removing, and Modifying
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -3834,7 +3834,7 @@ \section{Adding, Removing, and Modifying
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -3867,7 +3867,7 @@ \section{Adding, Removing, and Modifying
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -3905,7 +3905,7 @@ \section{Adding, Removing, and Modifying
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -4062,7 +4062,7 @@ \subsection{\texorpdfstring{\texttt{.describe()}}{.describe()}}\label{describe}}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -4092,7 +4092,7 @@ \subsection{\texorpdfstring{\texttt{.describe()}}{.describe()}}\label{describe}}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -4130,16 +4130,16 @@ \subsection{\texorpdfstring{\texttt{.sample()}}{.sample()}}\label{sample}}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
 
 \begin{tabular}{lllrlr}
 \toprule
-{} & State & Sex &  Year &       Name &  Count \\
+{} & State & Sex &  Year &   Name &  Count \\
 \midrule
-374406 &    CA &   M &  2011 &  Kristofer &     10 \\
+56345 &    CA &   F &  1967 &  Caron &      7 \\
 \bottomrule
 \end{tabular}
 
@@ -4154,7 +4154,7 @@ \subsection{\texorpdfstring{\texttt{.sample()}}{.sample()}}\label{sample}}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -4163,11 +4163,11 @@ \subsection{\texorpdfstring{\texttt{.sample()}}{.sample()}}\label{sample}}
 \toprule
 {} &  Year &      Name &  Count \\
 \midrule
-396148 &  2019 &      Leon &    313 \\
-350349 &  2003 &    Colton &    135 \\
-207158 &  2014 &  Cheyanne &     17 \\
-125299 &  1993 &     Leigh &     17 \\
-92807  &  1983 &  Marbella &     10 \\
+6109   &  1923 &     Beryl &      9 \\
+141133 &  1997 &    August &      6 \\
+156119 &  2001 &   Arianah &      5 \\
+163929 &  2003 &   Briyana &      5 \\
+72958  &  1975 &  Sherilyn &      8 \\
 \bottomrule
 \end{tabular}
 
@@ -4179,19 +4179,19 @@ \subsection{\texorpdfstring{\texttt{.sample()}}{.sample()}}\label{sample}}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
 
 \begin{tabular}{lrlr}
 \toprule
-{} &  Year &    Name &  Count \\
+{} &  Year &         Name &  Count \\
 \midrule
-344771 &  2000 &  Jamaal &      5 \\
-343070 &  2000 &   Ernie &     34 \\
-150439 &  2000 &     Aya &     18 \\
-342518 &  2000 &    Evan &    737 \\
+344709 &  2000 &       Eithan &      5 \\
+343830 &  2000 &      Jerrick &     10 \\
+342959 &  2000 &  Maximiliano &     48 \\
+151581 &  2000 &       Meliza &      8 \\
 \bottomrule
 \end{tabular}
 
@@ -4218,7 +4218,7 @@ \subsection{\texorpdfstring{\texttt{.value\_counts()}}{.value\_counts()}}\label{
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -4275,7 +4275,7 @@ \subsection{\texorpdfstring{\texttt{.sort\_values()}}{.sort\_values()}}\label{so
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -4334,7 +4334,7 @@ \section{Parting Note}\label{parting-note-1}}
 \hypertarget{pandas-iii}{%
 \chapter{Pandas III}\label{pandas-iii}}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm]
+\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm]
 
 \begin{itemize}
 \tightlist
@@ -4395,7 +4395,7 @@ \section{Custom Sorts}\label{custom-sorts}}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -4436,7 +4436,7 @@ \subsection{Approach 1: Create a Temporary
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -4465,7 +4465,7 @@ \subsection{Approach 1: Create a Temporary
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -4494,7 +4494,7 @@ \subsection{Approach 1: Create a Temporary
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -4526,7 +4526,7 @@ \subsection{\texorpdfstring{Approach 2: Sorting using the \texttt{key}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -4568,7 +4568,7 @@ \subsection{\texorpdfstring{Approach 3: Sorting using the \texttt{map}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -4597,7 +4597,7 @@ \subsection{\texorpdfstring{Approach 3: Sorting using the \texttt{map}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -4637,7 +4637,7 @@ \section{\texorpdfstring{Aggregating Data with
 \end{Shaded}
 
 \begin{verbatim}
-<pandas.core.groupby.generic.DataFrameGroupBy object at 0x140aa5af0>
+<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7fee326c26b0>
 \end{verbatim}
 
 What does this strange output mean? Calling \texttt{.groupby}
@@ -4672,7 +4672,7 @@ \section{\texorpdfstring{Aggregating Data with
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -4719,7 +4719,7 @@ \section{\texorpdfstring{Aggregating Data with
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -4744,7 +4744,7 @@ \section{\texorpdfstring{Aggregating Data with
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -4770,7 +4770,7 @@ \section{\texorpdfstring{Aggregating Data with
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -4818,7 +4818,7 @@ \subsection{Aggregation Functions}\label{aggregation-functions}}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -4844,7 +4844,7 @@ \subsection{Aggregation Functions}\label{aggregation-functions}}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -4874,7 +4874,7 @@ \subsection{Aggregation Functions}\label{aggregation-functions}}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -4935,7 +4935,7 @@ \subsection{Aggregation Functions}\label{aggregation-functions}}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -4977,7 +4977,7 @@ \subsection{Aggregation Functions}\label{aggregation-functions}}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -5015,7 +5015,7 @@ \subsection{Plotting Birth Counts}\label{plotting-birth-counts}}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -5143,7 +5143,7 @@ \subsection{\texorpdfstring{Revisiting the \texttt{.agg()}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -5198,7 +5198,7 @@ \subsection{Renaming Columns After
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -19007,7 +19007,7 @@ \subsection{Some Data Science Payoff}\label{some-data-science-payoff}}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -19070,7 +19070,7 @@ \subsection{Some Data Science Payoff}\label{some-data-science-payoff}}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -19105,7 +19105,7 @@ \section{\texorpdfstring{\texttt{.groupby()},
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -19160,7 +19160,7 @@ \subsection{\texorpdfstring{Raw \texttt{GroupBy}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -19231,7 +19231,7 @@ \subsection{\texorpdfstring{Other \texttt{GroupBy}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -19262,7 +19262,7 @@ \subsection{\texorpdfstring{Other \texttt{GroupBy}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -19285,7 +19285,7 @@ \subsection{\texorpdfstring{Other \texttt{GroupBy}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -19314,7 +19314,7 @@ \subsection{\texorpdfstring{Other \texttt{GroupBy}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -19416,7 +19416,7 @@ \subsection{Filtering by Group}\label{filtering-by-group}}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -19485,7 +19485,7 @@ \subsection{\texorpdfstring{Aggregation with \texttt{lambda}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -19555,7 +19555,7 @@ \subsection{\texorpdfstring{Aggregation with \texttt{lambda}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -19582,7 +19582,7 @@ \subsection{\texorpdfstring{Aggregation with \texttt{lambda}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -19634,7 +19634,7 @@ \subsection{\texorpdfstring{Aggregation with \texttt{lambda}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -19660,7 +19660,7 @@ \subsection{\texorpdfstring{Aggregation with \texttt{lambda}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -19699,7 +19699,7 @@ \section{Aggregating Data with Pivot
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -19725,7 +19725,7 @@ \section{Aggregating Data with Pivot
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -19784,7 +19784,7 @@ \section{Aggregating Data with Pivot
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -19845,7 +19845,7 @@ \section{Aggregating Data with Pivot
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -19891,7 +19891,7 @@ \section{Joining Tables}\label{joining-tables}}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -19926,7 +19926,7 @@ \section{Joining Tables}\label{joining-tables}}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -19952,7 +19952,7 @@ \section{Joining Tables}\label{joining-tables}}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -19988,7 +19988,7 @@ \section{Joining Tables}\label{joining-tables}}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -20061,7 +20061,7 @@ \chapter{Data Cleaning and EDA}\label{data-cleaning-and-eda}}
 \end{Highlighting}
 \end{Shaded}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm]
+\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm]
 
 \begin{itemize}
 \tightlist
@@ -20334,7 +20334,7 @@ \subsubsection{JSON}\label{json}}
 \end{Shaded}
 
 \begin{verbatim}
-Using cached version that was downloaded (UTC): Mon Mar 18 21:13:08 2024
+Using cached version that was downloaded (UTC): Fri Aug 25 09:57:25 2023
 \end{verbatim}
 
 \begin{verbatim}
@@ -20398,7 +20398,7 @@ \subsubsection{JSON}\label{json}}
 \end{Shaded}
 
 \begin{verbatim}
--rw-r--r--  1 Ishani  staff   114K Mar 18 21:13 data/confirmed-cases.json
+-rw-r--r--  1 lillianweng  staff   114K Aug 25  2023 data/confirmed-cases.json
 \end{verbatim}
 
 \begin{verbatim}
@@ -22184,15 +22184,9 @@ \subsection{\texorpdfstring{Understanding Missing Value 1:
 \end{Highlighting}
 \end{Shaded}
 
-\begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/seaborn/axisgrid.py:118: UserWarning:
-
-The figure layout has changed to tight
-\end{verbatim}
-
 \begin{figure}[H]
 
-{\centering \includegraphics{eda/eda_files/figure-pdf/cell-67-output-2.pdf}
+{\centering \includegraphics{eda/eda_files/figure-pdf/cell-67-output-1.pdf}
 
 }
 
@@ -22261,15 +22255,9 @@ \subsection{\texorpdfstring{Understanding Missing Value 2:
 \end{Highlighting}
 \end{Shaded}
 
-\begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/seaborn/axisgrid.py:118: UserWarning:
-
-The figure layout has changed to tight
-\end{verbatim}
-
 \begin{figure}[H]
 
-{\centering \includegraphics{eda/eda_files/figure-pdf/cell-69-output-2.pdf}
+{\centering \includegraphics{eda/eda_files/figure-pdf/cell-69-output-1.pdf}
 
 }
 
@@ -22623,7 +22611,7 @@ \subsection{EDA and Data Wrangling}\label{eda-and-data-wrangling}}
 \hypertarget{regular-expressions}{%
 \chapter{Regular Expressions}\label{regular-expressions}}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm]
+\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm]
 
 \begin{itemize}
 \tightlist
@@ -22811,9 +22799,8 @@ \subsection{Canonicalization}\label{canonicalization}}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
-
-In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
+/Users/lillianweng/anaconda3/envs/DSDP/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
+  return method()
 \end{verbatim}
 
 \begin{tabular}{lll}
@@ -22828,9 +22815,8 @@ \subsection{Canonicalization}\label{canonicalization}}
 \end{tabular}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
-
-In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
+/Users/lillianweng/anaconda3/envs/DSDP/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
+  return method()
 \end{verbatim}
 
 \begin{tabular}{llr}
@@ -22893,9 +22879,8 @@ \subsubsection{Canonicalization with Python String
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
-
-In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
+/Users/lillianweng/anaconda3/envs/DSDP/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
+  return method()
 \end{verbatim}
 
 \begin{tabular}{llll}
@@ -22910,9 +22895,8 @@ \subsubsection{Canonicalization with Python String
 \end{tabular}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
-
-In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
+/Users/lillianweng/anaconda3/envs/DSDP/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
+  return method()
 \end{verbatim}
 
 \begin{tabular}{llrl}
@@ -22960,17 +22944,12 @@ \subsubsection{Canonicalization with Pandas Series
 \end{Shaded}
 
 \begin{verbatim}
-/var/folders/7t/zbwy02ts2m7cn64fvwjqb8xw0000gp/T/ipykernel_59511/2523629438.py:3: FutureWarning:
-
-The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.
-
-/var/folders/7t/zbwy02ts2m7cn64fvwjqb8xw0000gp/T/ipykernel_59511/2523629438.py:3: FutureWarning:
-
-The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.
-
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
-
-In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
+/var/folders/sy/b85yc0p951zdr__z5hvdmbjm0000gn/T/ipykernel_58221/2523629438.py:7: FutureWarning: The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.
+  .str.replace('.', '')
+/var/folders/sy/b85yc0p951zdr__z5hvdmbjm0000gn/T/ipykernel_58221/2523629438.py:7: FutureWarning: The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.
+  .str.replace('.', '')
+/Users/lillianweng/anaconda3/envs/DSDP/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
+  return method()
 \end{verbatim}
 
 \begin{tabular}{llrll}
@@ -22985,9 +22964,8 @@ \subsubsection{Canonicalization with Pandas Series
 \end{tabular}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
-
-In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
+/Users/lillianweng/anaconda3/envs/DSDP/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
+  return method()
 \end{verbatim}
 
 \begin{tabular}{lllll}
@@ -23594,9 +23572,8 @@ \subsubsection{\texorpdfstring{Canonicalization with
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
-
-In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
+/Users/lillianweng/anaconda3/envs/DSDP/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
+  return method()
 \end{verbatim}
 
 \begin{tabular}{ll}
@@ -23676,9 +23653,8 @@ \subsubsection{\texorpdfstring{Extraction with
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
-
-In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
+/Users/lillianweng/anaconda3/envs/DSDP/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
+  return method()
 \end{verbatim}
 
 \begin{tabular}{ll}
@@ -23727,9 +23703,8 @@ \subsubsection{\texorpdfstring{Extraction with
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
-
-In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
+/Users/lillianweng/anaconda3/envs/DSDP/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
+  return method()
 \end{verbatim}
 
 \begin{tabular}{llll}
@@ -23750,9 +23725,8 @@ \subsubsection{\texorpdfstring{Extraction with
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
-
-In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
+/Users/lillianweng/anaconda3/envs/DSDP/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
+  return method()
 \end{verbatim}
 
 \begin{tabular}{lllll}
@@ -23911,7 +23885,7 @@ \section{Limitations of Regular
 \hypertarget{visualization-i}{%
 \chapter{Visualization I}\label{visualization-i}}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm]
+\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm]
 
 \begin{itemize}
 \tightlist
@@ -24087,7 +24061,7 @@ \section{Qualitative Variables: Bar
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -24231,7 +24205,7 @@ \section{Distributions of Quantitative
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -24749,7 +24723,7 @@ \subsubsection{Evaluating Histograms}\label{evaluating-histograms}}
 \hypertarget{visualization-ii}{%
 \chapter{Visualization II}\label{visualization-ii}}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm]
+\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm]
 
 \begin{itemize}
 \tightlist
@@ -24810,7 +24784,7 @@ \subsection{KDE Theory}\label{kde-theory}}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -24839,15 +24813,9 @@ \subsection{KDE Theory}\label{kde-theory}}
 \end{Highlighting}
 \end{Shaded}
 
-\begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/seaborn/axisgrid.py:118: UserWarning:
-
-The figure layout has changed to tight
-\end{verbatim}
-
 \begin{figure}[H]
 
-{\centering \includegraphics{visualization_2/visualization_2_files/figure-pdf/cell-3-output-2.pdf}
+{\centering \includegraphics{visualization_2/visualization_2_files/figure-pdf/cell-3-output-1.pdf}
 
 }
 
@@ -25328,15 +25296,9 @@ \section{\texorpdfstring{Diving Deeper into
 \end{Highlighting}
 \end{Shaded}
 
-\begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/seaborn/axisgrid.py:118: UserWarning:
-
-The figure layout has changed to tight
-\end{verbatim}
-
 \begin{figure}[H]
 
-{\centering \includegraphics{visualization_2/visualization_2_files/figure-pdf/cell-11-output-2.pdf}
+{\centering \includegraphics{visualization_2/visualization_2_files/figure-pdf/cell-11-output-1.pdf}
 
 }
 
@@ -25354,15 +25316,9 @@ \section{\texorpdfstring{Diving Deeper into
 \end{Highlighting}
 \end{Shaded}
 
-\begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/seaborn/axisgrid.py:118: UserWarning:
-
-The figure layout has changed to tight
-\end{verbatim}
-
 \begin{figure}[H]
 
-{\centering \includegraphics{visualization_2/visualization_2_files/figure-pdf/cell-12-output-2.pdf}
+{\centering \includegraphics{visualization_2/visualization_2_files/figure-pdf/cell-12-output-1.pdf}
 
 }
 
@@ -25380,15 +25336,9 @@ \section{\texorpdfstring{Diving Deeper into
 \end{Highlighting}
 \end{Shaded}
 
-\begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/seaborn/axisgrid.py:118: UserWarning:
-
-The figure layout has changed to tight
-\end{verbatim}
-
 \begin{figure}[H]
 
-{\centering \includegraphics{visualization_2/visualization_2_files/figure-pdf/cell-13-output-2.pdf}
+{\centering \includegraphics{visualization_2/visualization_2_files/figure-pdf/cell-13-output-1.pdf}
 
 }
 
@@ -25544,15 +25494,9 @@ \subsubsection{\texorpdfstring{\texttt{lmplot} and
 \end{Highlighting}
 \end{Shaded}
 
-\begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/seaborn/axisgrid.py:118: UserWarning:
-
-The figure layout has changed to tight
-\end{verbatim}
-
 \begin{figure}[H]
 
-{\centering \includegraphics{visualization_2/visualization_2_files/figure-pdf/cell-17-output-2.pdf}
+{\centering \includegraphics{visualization_2/visualization_2_files/figure-pdf/cell-17-output-1.pdf}
 
 }
 
@@ -25886,8 +25830,8 @@ \subsection{Linearization and Applying
 \end{Shaded}
 
 \begin{verbatim}
-The slope, m, of the transformed data is: 336400693.43172693
-The intercept, b, of the transformed data is: -1802204836.0479977
+The slope, m, of the transformed data is: 336400693.43172705
+The intercept, b, of the transformed data is: -1802204836.0479987
 \end{verbatim}
 
 \begin{figure}[H]
@@ -26295,7 +26239,7 @@ \subsection{Harnessing Context}\label{harnessing-context}}
 \hypertarget{sampling}{%
 \chapter{Sampling}\label{sampling}}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm]
+\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm]
 
 \begin{itemize}
 \tightlist
@@ -26744,9 +26688,8 @@ \subsection{Demo: Barbie v.
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
-
-In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
+/Users/lillianweng/anaconda3/envs/DSDP/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
+  return method()
 \end{verbatim}
 
 \begin{tabular}{lrllr}
@@ -26839,9 +26782,8 @@ \subsubsection{Check for Bias}\label{check-for-bias}}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
-
-In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
+/Users/lillianweng/anaconda3/envs/DSDP/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
+  return method()
 \end{verbatim}
 
 \begin{tabular}{lrlr}
@@ -26904,7 +26846,7 @@ \subsubsection{Simple Random Sample}\label{simple-random-sample}}
 \end{Shaded}
 
 \begin{verbatim}
-0.5302451891506862
+0.5287927522843883
 \end{verbatim}
 
 This is very close to the actual vote of 0.5302792307692308!
@@ -26928,8 +26870,8 @@ \subsubsection{Simple Random Sample}\label{simple-random-sample}}
 \end{Highlighting}
 \end{Shaded}
 
-\textbf{Actual} = 0.5303, \textbf{Sample} = 0.4788, \textbf{Err} =
-9.72\%.
+\textbf{Actual} = 0.5303, \textbf{Sample} = 0.5112, \textbf{Err} =
+3.59\%.
 
 We'll learn how to choose this number when we (re)learn the Central
 Limit Theorem later in the semester.
@@ -26979,7 +26921,7 @@ \subsubsection{Quantifying Chance
 \end{Shaded}
 
 \begin{verbatim}
-0.954
+0.95
 \end{verbatim}
 
 You can see the curve looks roughly Gaussian/normal. Using KDE:
@@ -27012,7 +26954,7 @@ \section{Summary}\label{summary-1}}
 \hypertarget{introduction-to-modeling}{%
 \chapter{Introduction to Modeling}\label{introduction-to-modeling}}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm]
+\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm]
 
 \begin{itemize}
 \tightlist
@@ -27375,7 +27317,7 @@ \subsection{Derivation}\label{derivation}}
   \(\hat{a} = \text{average of }y - \text{slope}\cdot\text{average of }x\)
 \end{itemize}
 
-\begin{tcolorbox}[enhanced jigsaw, opacityback=0, left=2mm, breakable, colback=white, arc=.35mm, leftrule=.75mm, toprule=.15mm, bottomrule=.15mm, rightrule=.15mm]
+\begin{tcolorbox}[enhanced jigsaw, leftrule=.75mm, colback=white, arc=.35mm, rightrule=.15mm, toprule=.15mm, opacityback=0, breakable, bottomrule=.15mm, left=2mm]
 
 Proof:
 
@@ -27727,7 +27669,7 @@ \section{Fitting the Model}\label{fitting-the-model}}
 \chapter{Constant Model, Loss, and
 Transformations}\label{constant-model-loss-and-transformations}}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm]
+\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm]
 
 \begin{itemize}
 \tightlist
@@ -29065,7 +29007,7 @@ \section{Bonus: Calculating Constant Model MSE Using an Algebraic
 \hypertarget{ordinary-least-squares}{%
 \chapter{Ordinary Least Squares}\label{ordinary-least-squares}}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm]
+\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm]
 
 \begin{itemize}
 \tightlist
@@ -29136,7 +29078,7 @@ \subsection{Multiple Linear
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -29178,7 +29120,7 @@ \subsection{Multiple Linear
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
+/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:
 
 In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
 \end{verbatim}
@@ -29239,7 +29181,7 @@ \subsection{Multiple Linear
 \hypertarget{linear-algebra-approach}{%
 \subsection{Linear Algebra Approach}\label{linear-algebra-approach}}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-tip-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-tip-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra Review: Vector Dot Product}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm]
+\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-tip-color!10!white, colframe=quarto-callout-tip-color-frame, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra Review: Vector Dot Product}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm]
 
 The \textbf{dot product (or inner product)} is a vector operation that:
 
@@ -29337,7 +29279,7 @@ \subsection{Linear Algebra Approach}\label{linear-algebra-approach}}
 \(\mathbb{Y}\) is also a vector with \(n\) elements
 (\(\mathbb{Y} \in \mathbb{R}^{n}\)).
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-tip-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-tip-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra Review: Linearity}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm]
+\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-tip-color!10!white, colframe=quarto-callout-tip-color-frame, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra Review: Linearity}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm]
 
 An expression is \textbf{linear in \(\theta\)} (a set of parameters) if
 it is a linear combination of the elements of the set. Checking if an
@@ -29382,7 +29324,7 @@ \subsection{Mean Squared Error}\label{mean-squared-error}}
 indication of how ``far away'' the predictions are from the true values,
 on average.
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-tip-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-tip-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra: L2 Norm}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm]
+\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-tip-color!10!white, colframe=quarto-callout-tip-color-frame, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra: L2 Norm}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm]
 
 When working with vectors, this idea of ``distance'' or the vector's
 \textbf{size/length} is represented by the \textbf{norm}. More
@@ -29530,7 +29472,7 @@ \subsection{A Note on Terminology for Multiple Linear
 \hypertarget{geometric-derivation}{%
 \section{Geometric Derivation}\label{geometric-derivation}}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-tip-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-tip-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra: Span}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm]
+\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-tip-color!10!white, colframe=quarto-callout-tip-color-frame, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra: Span}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm]
 
 Recall that the \textbf{span} or \textbf{column space} of a matrix
 \(\mathbb{X}\) (denoted \(span(\mathbb{X})\)) is the set of all possible
@@ -29542,7 +29484,7 @@ \section{Geometric Derivation}\label{geometric-derivation}}
 
 \end{tcolorbox}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-tip-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-tip-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra: Matrix-Vector Multiplication}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm]
+\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-tip-color!10!white, colframe=quarto-callout-tip-color-frame, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra: Matrix-Vector Multiplication}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm]
 
 There are 2 ways we can think about matrix-vector multiplication
 
@@ -29631,7 +29573,7 @@ \section{Geometric Derivation}\label{geometric-derivation}}
 visualize this as the vector created by dropping a perpendicular line
 from \(\mathbb{Y}\) onto the span of \(\mathbb{X}\).
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-tip-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-tip-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra: Orthogonality}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm]
+\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-tip-color!10!white, colframe=quarto-callout-tip-color-frame, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra: Orthogonality}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm]
 
 Recall that two vectors \(\vec{a}\) and \(\vec{b}\) are orthogonal if
 their dot product is zero: \(\vec{a}^{T}\vec{b} = 0\).
@@ -29754,7 +29696,7 @@ \section{OLS Properties}\label{ols-properties}}
 
 \[\mathbb{X}^Te = 0 \]
 
-\begin{tcolorbox}[enhanced jigsaw, opacityback=0, left=2mm, breakable, colback=white, arc=.35mm, leftrule=.75mm, toprule=.15mm, bottomrule=.15mm, rightrule=.15mm]
+\begin{tcolorbox}[enhanced jigsaw, leftrule=.75mm, colback=white, arc=.35mm, rightrule=.15mm, toprule=.15mm, opacityback=0, breakable, bottomrule=.15mm, left=2mm]
 
 Proof:
 
@@ -29792,7 +29734,7 @@ \section{OLS Properties}\label{ols-properties}}
 
 \[\sum_i^n e_i = 0\]
 
-\begin{tcolorbox}[enhanced jigsaw, opacityback=0, left=2mm, breakable, colback=white, arc=.35mm, leftrule=.75mm, toprule=.15mm, bottomrule=.15mm, rightrule=.15mm]
+\begin{tcolorbox}[enhanced jigsaw, leftrule=.75mm, colback=white, arc=.35mm, rightrule=.15mm, toprule=.15mm, opacityback=0, breakable, bottomrule=.15mm, left=2mm]
 
 Proof:
 
@@ -29858,7 +29800,7 @@ \section{Bonus: Uniqueness of the
 The Least Squares estimate \(\hat{\theta}\) is \textbf{unique} if and
 only if \(\mathbb{X}\) is \textbf{full column rank}.
 
-\begin{tcolorbox}[enhanced jigsaw, opacityback=0, left=2mm, breakable, colback=white, arc=.35mm, leftrule=.75mm, toprule=.15mm, bottomrule=.15mm, rightrule=.15mm]
+\begin{tcolorbox}[enhanced jigsaw, leftrule=.75mm, colback=white, arc=.35mm, rightrule=.15mm, toprule=.15mm, opacityback=0, breakable, bottomrule=.15mm, left=2mm]
 
 Proof:
 
@@ -29948,7 +29890,7 @@ \section{Bonus: Uniqueness of the
 \chapter{sklearn and Gradient
 Descent}\label{sklearn-and-gradient-descent}}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm]
+\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm]
 
 \begin{itemize}
 \tightlist
@@ -30314,7 +30256,7 @@ \subsection{\texorpdfstring{The \texttt{sklearn}
 \end{Shaded}
 
 \begin{verbatim}
-The RMSE of the model is 0.9881331104079044
+The RMSE of the model is 0.9881331104079045
 \end{verbatim}
 
 We can also see that we obtain the same predictions using
@@ -31134,7 +31076,7 @@ \subsection{Batch Gradient Descent and Stochastic Gradient
 \hypertarget{feature-engineering}{%
 \chapter{Feature Engineering}\label{feature-engineering}}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm]
+\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm]
 
 \begin{itemize}
 \tightlist
@@ -31675,7 +31617,7 @@ \section{Polynomial Features}\label{polynomial-features}}
 \end{Shaded}
 
 \begin{verbatim}
-MSE of model with (hp^2) feature: 18.984768907617223
+MSE of model with (hp^2) feature: 18.984768907617216
 \end{verbatim}
 
 \begin{figure}[H]
@@ -31731,7 +31673,7 @@ \section{Complexity and Overfitting}\label{complexity-and-overfitting}}
 we can improve model performance by designing increasingly complex
 models.
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-tip-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-tip-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Math Fact: Polynomial Degrees}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm]
+\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-tip-color!10!white, colframe=quarto-callout-tip-color-frame, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Math Fact: Polynomial Degrees}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm]
 
 Given \(N\) overlapping data points, we can always find a polynomial of
 degree \(N-1\) that goes through all those points.
@@ -31827,7 +31769,7 @@ \chapter{Case Study in Human Contexts and
 understand the material. The course notes will have the same broader
 structure but are by no means comprehensive.
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm]
+\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm]
 
 \begin{itemize}
 \tightlist
@@ -32052,7 +31994,7 @@ \section{The Response: Cook County Open Data
 \subsection{1. Question/Problem
 Formulation}\label{questionproblem-formulation}}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Driving Questions}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm]
+\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Driving Questions}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm]
 
 \begin{itemize}
 \tightlist
@@ -32092,7 +32034,7 @@ \subsection{1. Question/Problem
 comfortable. We can determine some metrics of success and frame a social
 problem as a data science problem.
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-tip-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-tip-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Definitions: Fairness and Transparency}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm]
+\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-tip-color!10!white, colframe=quarto-callout-tip-color-frame, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Definitions: Fairness and Transparency}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm]
 
 The definitions, as given by the Cook County Assessor's Office, are
 given below:
@@ -32185,7 +32127,7 @@ \subsection{1. Question/Problem
 \subsection{2. Data Acquisition and
 Cleaning}\label{data-acquisition-and-cleaning}}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Driving Questions}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm]
+\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Driving Questions}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm]
 
 \begin{itemize}
 \tightlist
@@ -32262,7 +32204,7 @@ \subsection{2. Data Acquisition and
 \subsection{3. Exploratory Data
 Analysis}\label{exploratory-data-analysis}}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Driving Questions}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm]
+\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Driving Questions}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm]
 
 \begin{itemize}
 \tightlist
@@ -32308,7 +32250,7 @@ \subsection{3. Exploratory Data
 \subsection{4. Prediction and
 Inference}\label{prediction-and-inference}}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Driving Questions}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm]
+\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Driving Questions}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm]
 
 \begin{itemize}
 \tightlist
@@ -32370,7 +32312,7 @@ \subsection{4. Prediction and
 \hypertarget{results-and-conclusions}{%
 \subsection{5. Results and Conclusions}\label{results-and-conclusions}}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Driving Questions}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm]
+\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Driving Questions}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm]
 
 \begin{itemize}
 \tightlist
@@ -32516,7 +32458,7 @@ \section{Key Takeaways}\label{key-takeaways}}
 \chapter{Cross Validation and
 Regularization}\label{cross-validation-and-regularization}}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm]
+\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm]
 
 \begin{itemize}
 \tightlist
@@ -32682,7 +32624,7 @@ \subsubsection{Test Sets}\label{test-sets}}
 
 \begin{verbatim}
 Training error: 17.85851684101209
-Test error: 23.192405629701074
+Test error: 23.192405629820122
 \end{verbatim}
 
 \hypertarget{validation-sets}{%
@@ -33314,7 +33256,7 @@ \section{Regression Summary}\label{regression-summary}}
 \hypertarget{random-variables}{%
 \chapter{Random Variables}\label{random-variables}}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm]
+\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm]
 
 \begin{itemize}
 \tightlist
@@ -33360,7 +33302,7 @@ \chapter{Random Variables}\label{random-variables}}
 implications for modeling, but if you want to go a step further, take
 Data 140, CS 70, and/or EECS 126.
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-tip-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-tip-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Data 8 Recap}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm]
+\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-tip-color!10!white, colframe=quarto-callout-tip-color-frame, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Data 8 Recap}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm]
 
 Recall the following concepts from Data 8:
 
@@ -33719,7 +33661,7 @@ \subsection{Variance}\label{variance}}
 calculations, as \(\mathbb{E}[X^2] = \text{Var}(X)\) if \(X\) is
 centered and \(E(X)=0\).
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-tip-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-tip-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Proof}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm]
+\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-tip-color!10!white, colframe=quarto-callout-tip-color-frame, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Proof}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm]
 
 \[\begin{align}
    \text{Var}(X) &= \mathbb{E}[(X-\mathbb{E}[X])^2] \\
@@ -33747,7 +33689,7 @@ \subsection{Example: Die}\label{example-die}}
       0, \text{otherwise} 
    \end{cases}\]
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-caution-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-caution-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-caution-color}{\faFire}\hspace{0.5em}{What's the expectation, \(\mathbb{E}[X]?\)}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm]
+\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-caution-color!10!white, colframe=quarto-callout-caution-color-frame, title=\textcolor{quarto-callout-caution-color}{\faFire}\hspace{0.5em}{What's the expectation, \(\mathbb{E}[X]?\)}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm]
 
 \[ \begin{align} 
          \mathbb{E}[X] &= 1\big(\frac{1}{6}\big) + 2\big(\frac{1}{6}\big) + 3\big(\frac{1}{6}\big) + 4\big(\frac{1}{6}\big) + 5\big(\frac{1}{6}\big) + 6\big(\frac{1}{6}\big) \\
@@ -33757,7 +33699,7 @@ \subsection{Example: Die}\label{example-die}}
 
 \end{tcolorbox}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-caution-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-caution-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-caution-color}{\faFire}\hspace{0.5em}{What's the variance, \(\text{Var}(X)?\)}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm]
+\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-caution-color!10!white, colframe=quarto-callout-caution-color-frame, title=\textcolor{quarto-callout-caution-color}{\faFire}\hspace{0.5em}{What's the variance, \(\text{Var}(X)?\)}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm]
 
 Using Approach 1 (definition): \[\begin{align} 
       \text{Var}(X) &= \big(\frac{1}{6}\big)((1 - \frac{7}{2})^2 + (2 - \frac{7}{2})^2 + (3 - \frac{7}{2})^2 + (4 - \frac{7}{2})^2 + (5 - \frac{7}{2})^2 + (6 - \frac{7}{2})^2) \\
@@ -33820,7 +33762,7 @@ \subsection{Properties of Expectation}\label{properties-of-expectation}}
 
 \[\mathbb{E}[aX+b] = aE[\mathbb{X}] + b\]
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-tip-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-tip-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Proof}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm]
+\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-tip-color!10!white, colframe=quarto-callout-tip-color-frame, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Proof}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm]
 
 \[\begin{align}
         \mathbb{E}[aX+b] &= \sum_{x} (ax + b) P(X=x) \\
@@ -33841,7 +33783,7 @@ \subsection{Properties of Expectation}\label{properties-of-expectation}}
 
 \[\mathbb{E}[X+Y] = \mathbb{E}[X] + \mathbb{E}[Y]\]
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-tip-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-tip-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Proof}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm]
+\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-tip-color!10!white, colframe=quarto-callout-tip-color-frame, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Proof}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm]
 
 \[\begin{align}
     \mathbb{E}[X+Y] &= \sum_{s} (X+Y)(s) P(s) \\
@@ -33892,7 +33834,7 @@ \subsection{Properties of Variance}\label{properties-of-variance}}
   \(X\) by \(b\) units.
 \end{itemize}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-tip-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-tip-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Proof}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm]
+\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-tip-color!10!white, colframe=quarto-callout-tip-color-frame, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Proof}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm]
 
 We know that \[\mathbb{E}[aX+b] = aE[\mathbb{X}] + b\]
 
@@ -33931,7 +33873,7 @@ \subsection{Properties of Variance}\label{properties-of-variance}}
   \[\text{Var}(X + Y) = \text{Var}(X) + \text{Var}(Y) \qquad \text{if } X, Y \text{ independent}\]
 \end{enumerate}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-tip-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-tip-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Proof}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm]
+\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-tip-color!10!white, colframe=quarto-callout-tip-color-frame, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Proof}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm]
 
 The variance of a sum is affected by the dependence between the two
 random variables that are being added. Let's expand the definition of
@@ -34103,7 +34045,7 @@ \subsection{Summary}\label{summary-2}}
 \chapter{Estimators, Bias, and
 Variance}\label{estimators-bias-and-variance}}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm]
+\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm]
 
 \begin{itemize}
 \tightlist
@@ -34255,7 +34197,7 @@ \subsection{Example}\label{example}}
 
 C. \(Y_C = 20 * X_1\)
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-caution-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-caution-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-caution-color}{\faFire}\hspace{0.5em}{Solution}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm]
+\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-caution-color!10!white, colframe=quarto-callout-caution-color-frame, title=\textcolor{quarto-callout-caution-color}{\faFire}\hspace{0.5em}{Solution}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm]
 
 Let \(X_1, X_2, ... X_{20}\) be 20 i.i.d Bernoulli(0.5) random
 variables. Since the \(X_i\)'s are independent,
@@ -34416,7 +34358,7 @@ \subsection{Using the Sample Mean to Estimate the Population
 \textbf{unbiased estimator} of the population mean and will explore this
 idea more in the next lecture.
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-tip-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-tip-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Data 8 Recap: Square Root Law}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm]
+\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-tip-color!10!white, colframe=quarto-callout-tip-color-frame, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Data 8 Recap: Square Root Law}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm]
 
 The square root law
 (\href{https://inferentialthinking.com/chapters/14/5/Variability_of_the_Sample_Mean.html\#the-square-root-law}{Data
@@ -34539,7 +34481,7 @@ \subsubsection{Estimating a Linear
 modeled by \[Y = g(x) + \epsilon\]
 \[ f_{\theta}(x) = Y = \theta_0 + \sum_{j=1}^p \theta_j x_j + \epsilon\]
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-warning-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-warning-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-warning-color}{\faExclamationTriangle}\hspace{0.5em}{Which expressions are random?}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm]
+\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-warning-color!10!white, colframe=quarto-callout-warning-color-frame, title=\textcolor{quarto-callout-warning-color}{\faExclamationTriangle}\hspace{0.5em}{Which expressions are random?}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm]
 
 In our two equations above, the true relationship
 \(g(x) = \theta_0 + \sum_{j=1}^p \theta_j x_j\) is not random, but
@@ -34554,7 +34496,7 @@ \subsubsection{Estimating a Linear
 use it to train a model and obtain an estimate of \(\hat{\theta}\)
 \[\hat{Y}(x) = f_{\hat{\theta}}(x) = \hat{\theta_0} + \sum_{j=1}^p \hat{\theta_j} x_j\]
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-warning-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-warning-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-warning-color}{\faExclamationTriangle}\hspace{0.5em}{Which expressions are random?}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm]
+\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-warning-color!10!white, colframe=quarto-callout-warning-color-frame, title=\textcolor{quarto-callout-warning-color}{\faExclamationTriangle}\hspace{0.5em}{Which expressions are random?}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm]
 
 In our estimating equation above, our sample \(\Bbb{X}\), \(\Bbb{Y}\)
 are random (often due to human error). Hence, the estimates we calculate
@@ -34745,7 +34687,7 @@ \subsubsection{Model Bias}\label{model-bias}}
 \(g(x)\); if it's negative, our model tends to underestimate \(g(x)\).
 And if it's 0, we can say that our model is \textbf{unbiased}.
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-tip-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-tip-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Unbiased Estimators}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm]
+\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-tip-color!10!white, colframe=quarto-callout-tip-color-frame, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Unbiased Estimators}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm]
 
 An \textbf{unbiased model} has a \(\text{model bias } = 0\). In other
 words, our model predicts \(g(x)\) on average.
@@ -34842,7 +34784,7 @@ \section{{[}Bonus{]} Proof of Bias-Variance
 Decomposition in the Bias-Variance Tradeoff section above, and this
 content is out of scope.
 
-\begin{tcolorbox}[enhanced jigsaw, opacityback=0, left=2mm, breakable, colback=white, arc=.35mm, leftrule=.75mm, toprule=.15mm, bottomrule=.15mm, rightrule=.15mm]
+\begin{tcolorbox}[enhanced jigsaw, leftrule=.75mm, colback=white, arc=.35mm, rightrule=.15mm, toprule=.15mm, opacityback=0, breakable, bottomrule=.15mm, left=2mm]
 
 \textbf{Click to show}\vspace{2mm}
 
@@ -34991,7 +34933,7 @@ \subsection{Step 4: Bias-Variance
 \chapter{Causal Inference and
 Confounding}\label{causal-inference-and-confounding}}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm]
+\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm]
 
 \begin{itemize}
 \tightlist
@@ -35120,7 +35062,7 @@ \section{Review: Bootstrap
   accurately model the variability of our estimates.
 \end{itemize}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-warning-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-warning-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-warning-color}{\faExclamationTriangle}\hspace{0.5em}{Why must we resample \emph{with replacement}?}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm]
+\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-warning-color!10!white, colframe=quarto-callout-warning-color-frame, title=\textcolor{quarto-callout-warning-color}{\faExclamationTriangle}\hspace{0.5em}{Why must we resample \emph{with replacement}?}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm]
 
 Given an original sample of size \(n\), we want a resample that has the
 same size \(n\) as the original. Sampling \emph{without} replacement
@@ -35215,9 +35157,8 @@ \subsection{Hypothesis Testing Through Bootstrap: Snowy Plover
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
-
-In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
+/Users/lillianweng/anaconda3/envs/DSDP/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
+  return method()
 \end{verbatim}
 
 \begin{tabular}{lrrrr}
@@ -35288,9 +35229,8 @@ \subsection{Hypothesis Testing Through Bootstrap: Snowy Plover
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
-
-In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
+/Users/lillianweng/anaconda3/envs/DSDP/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
+  return method()
 \end{verbatim}
 
 \begin{tabular}{lr}
@@ -35305,7 +35245,7 @@ \subsection{Hypothesis Testing Through Bootstrap: Snowy Plover
 \end{tabular}
 
 \begin{verbatim}
-RMSE 0.04547085380275766
+RMSE 0.04547085380275768
 \end{verbatim}
 
 Our single sample of data gives us the value of
@@ -35369,7 +35309,7 @@ \subsection{Hypothesis Testing Through Bootstrap: Snowy Plover
 \end{Shaded}
 
 \begin{verbatim}
-(-0.25864811956848754, 1.1034243854204049)
+(-0.258648119568487, 1.1034243854204047)
 \end{verbatim}
 
 Our bootstrapped 95\% confidence interval for \(\theta_1\) is
@@ -35418,9 +35358,8 @@ \subsection{Hypothesis Testing Through Bootstrap: Snowy Plover
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
-
-In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
+/Users/lillianweng/anaconda3/envs/DSDP/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
+  return method()
 \end{verbatim}
 
 \begin{tabular}{lrr}
@@ -35468,15 +35407,9 @@ \subsection{Hypothesis Testing Through Bootstrap: Snowy Plover
 \end{Highlighting}
 \end{Shaded}
 
-\begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/seaborn/axisgrid.py:118: UserWarning:
-
-The figure layout has changed to tight
-\end{verbatim}
-
 \begin{figure}[H]
 
-{\centering \includegraphics{inference_causality/inference_causality_files/figure-pdf/cell-6-output-2.pdf}
+{\centering \includegraphics{inference_causality/inference_causality_files/figure-pdf/cell-6-output-1.pdf}
 
 }
 
@@ -35539,9 +35472,8 @@ \subsection{A Simpler Model}\label{a-simpler-model}}
 \end{Shaded}
 
 \begin{verbatim}
-/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:
-
-In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
+/Users/lillianweng/anaconda3/envs/DSDP/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.
+  return method()
 \end{verbatim}
 
 \begin{tabular}{lr}
@@ -35610,8 +35542,8 @@ \subsection{A Simpler Model}\label{a-simpler-model}}
 \end{Shaded}
 
 \begin{verbatim}
-RMSE of Original Model: 0.04547085380275766
-RMSE of Interpretable Model: 0.046493941375556846
+RMSE of Original Model: 0.04547085380275768
+RMSE of Interpretable Model: 0.04649394137555684
 \end{verbatim}
 
 Yet, the confidence interval for the true parameter \(\theta_{1}\) does
@@ -35628,7 +35560,7 @@ \subsection{A Simpler Model}\label{a-simpler-model}}
 \end{Shaded}
 
 \begin{verbatim}
-(0.6029335250209633, 0.8208401738546206)
+(0.6029335250209632, 0.8208401738546206)
 \end{verbatim}
 
 In retrospect, it's no surprise that the weight of an egg best predicts
@@ -35784,7 +35716,7 @@ \subsection{How to perform causal
 \hypertarget{sql-i}{%
 \chapter{SQL I}\label{sql-i}}
 
-\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm]
+\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm]
 
 \begin{itemize}
 \tightlist
@@ -35928,7 +35860,8 @@ \section{Intro to SQL}\label{intro-to-sql}}
 \end{Shaded}
 
 \begin{verbatim}
-Running query in 'duck'
+ * duckdb:///data/example_duck.db
+Done.
 \end{verbatim}
 
 \begin{longtable}[]{@{}lll@{}}
@@ -35938,11 +35871,6 @@ \section{Intro to SQL}\label{intro-to-sql}}
 \endhead
 \bottomrule\noalign{}
 \endlastfoot
-hiccup & 2010 & 10 \\
-drogon & 2011 & -100 \\
-dragon 2 & 2019 & 0 \\
-puff & 2010 & 100 \\
-smaug & 2011 & None \\
 \end{longtable}
 
 Thanks to the \texttt{pandas} magic, the resulting return data is
@@ -35973,7 +35901,8 @@ \section{Tables and Schema}\label{tables-and-schema}}
 \end{Shaded}
 
 \begin{verbatim}
-Running query in 'duck'
+ * duckdb:///data/example_duck.db
+Done.
 \end{verbatim}
 
 \begin{longtable}[]{@{}lllll@{}}
@@ -35983,15 +35912,6 @@ \section{Tables and Schema}\label{tables-and-schema}}
 \endhead
 \bottomrule\noalign{}
 \endlastfoot
-table & dish & dish & 0 & CREATE TABLE dish("name" VARCHAR PRIMARY KEY,
-"type" VARCHAR, "cost" INTEGER, CHECK(("cost" \textgreater= 0))); \\
-table & dragon & dragon & 0 & CREATE TABLE dragon("name" VARCHAR PRIMARY
-KEY, "year" INTEGER, cute INTEGER, CHECK(("year" \textgreater=
-2000))); \\
-table & scene & scene & 0 & CREATE TABLE scene(id INTEGER PRIMARY KEY,
-biome VARCHAR NOT NULL, city VARCHAR NOT NULL, visitors INTEGER,
-created\_at TIMESTAMP DEFAULT(current\_date()), CHECK((visitors
-\textgreater= 0))); \\
 \end{longtable}
 
 The summary above displays information about the database; it contains
@@ -36144,7 +36064,8 @@ \subsection{\texorpdfstring{\texttt{SELECT}ing From
 \end{Shaded}
 
 \begin{verbatim}
-Running query in 'duck'
+ * duckdb:///data/example_duck.db
+Done.
 \end{verbatim}
 
 \begin{longtable}[]{@{}lll@{}}
@@ -36154,11 +36075,6 @@ \subsection{\texorpdfstring{\texttt{SELECT}ing From
 \endhead
 \bottomrule\noalign{}
 \endlastfoot
-hiccup & 2010 & 10 \\
-drogon & 2011 & -100 \\
-dragon 2 & 2019 & 0 \\
-puff & 2010 & 100 \\
-smaug & 2011 & None \\
 \end{longtable}
 
 In SQL, \texttt{*} means ``everything.'' The query above grabs
@@ -36176,7 +36092,8 @@ \subsection{\texorpdfstring{\texttt{SELECT}ing From
 \end{Shaded}
 
 \begin{verbatim}
-Running query in 'duck'
+ * duckdb:///data/example_duck.db
+Done.
 \end{verbatim}
 
 \begin{longtable}[]{@{}ll@{}}
@@ -36186,11 +36103,6 @@ \subsection{\texorpdfstring{\texttt{SELECT}ing From
 \endhead
 \bottomrule\noalign{}
 \endlastfoot
-10 & 2010 \\
--100 & 2011 \\
-0 & 2019 \\
-100 & 2010 \\
-None & 2011 \\
 \end{longtable}
 
 \textbf{Every} SQL query must include both a \texttt{SELECT} and
@@ -36254,7 +36166,8 @@ \subsubsection{\texorpdfstring{Aliasing with
 \end{Shaded}
 
 \begin{verbatim}
-Running query in 'duck'
+ * duckdb:///data/example_duck.db
+Done.
 \end{verbatim}
 
 \begin{longtable}[]{@{}ll@{}}
@@ -36264,11 +36177,6 @@ \subsubsection{\texorpdfstring{Aliasing with
 \endhead
 \bottomrule\noalign{}
 \endlastfoot
-10 & 2010 \\
--100 & 2011 \\
-0 & 2019 \\
-100 & 2010 \\
-None & 2011 \\
 \end{longtable}
 
 \hypertarget{uniqueness-with-distinct}{%
@@ -36289,7 +36197,8 @@ \subsubsection{\texorpdfstring{Uniqueness with
 \end{Shaded}
 
 \begin{verbatim}
-Running query in 'duck'
+ * duckdb:///data/example_duck.db
+Done.
 \end{verbatim}
 
 \begin{longtable}[]{@{}l@{}}
@@ -36299,9 +36208,6 @@ \subsubsection{\texorpdfstring{Uniqueness with
 \endhead
 \bottomrule\noalign{}
 \endlastfoot
-2019 \\
-2010 \\
-2011 \\
 \end{longtable}
 
 \hypertarget{applying-where-conditions}{%
@@ -36321,7 +36227,8 @@ \subsection{\texorpdfstring{Applying \texttt{WHERE}
 \end{Shaded}
 
 \begin{verbatim}
-Running query in 'duck'
+ * duckdb:///data/example_duck.db
+Done.
 \end{verbatim}
 
 \begin{longtable}[]{@{}ll@{}}
@@ -36331,8 +36238,6 @@ \subsection{\texorpdfstring{Applying \texttt{WHERE}
 \endhead
 \bottomrule\noalign{}
 \endlastfoot
-hiccup & 2010 \\
-puff & 2010 \\
 \end{longtable}
 
 We can add complexity to the \texttt{WHERE} condition using the keywords
@@ -36349,7 +36254,8 @@ \subsection{\texorpdfstring{Applying \texttt{WHERE}
 \end{Shaded}
 
 \begin{verbatim}
-Running query in 'duck'
+ * duckdb:///data/example_duck.db
+Done.
 \end{verbatim}
 
 \begin{longtable}[]{@{}ll@{}}
@@ -36359,9 +36265,6 @@ \subsection{\texorpdfstring{Applying \texttt{WHERE}
 \endhead
 \bottomrule\noalign{}
 \endlastfoot
-hiccup & 2010 \\
-puff & 2010 \\
-dragon 2 & 2019 \\
 \end{longtable}
 
 To spare ourselves needing to write complicated logical expressions by
@@ -36379,7 +36282,8 @@ \subsection{\texorpdfstring{Applying \texttt{WHERE}
 \end{Shaded}
 
 \begin{verbatim}
-Running query in 'duck'
+ * duckdb:///data/example_duck.db
+Done.
 \end{verbatim}
 
 \begin{longtable}[]{@{}ll@{}}
@@ -36389,8 +36293,6 @@ \subsection{\texorpdfstring{Applying \texttt{WHERE}
 \endhead
 \bottomrule\noalign{}
 \endlastfoot
-hiccup & 2010 \\
-puff & 2010 \\
 \end{longtable}
 
 \hypertarget{strings-in-sql}{%
@@ -36434,7 +36336,8 @@ \subsubsection{\texorpdfstring{\texttt{WHERE} WITH \texttt{NULL}
 \end{Shaded}
 
 \begin{verbatim}
-Running query in 'duck'
+ * duckdb:///data/example_duck.db
+Done.
 \end{verbatim}
 
 \begin{longtable}[]{@{}ll@{}}
@@ -36444,10 +36347,6 @@ \subsubsection{\texorpdfstring{\texttt{WHERE} WITH \texttt{NULL}
 \endhead
 \bottomrule\noalign{}
 \endlastfoot
-hiccup & 10 \\
-drogon & -100 \\
-dragon 2 & 0 \\
-puff & 100 \\
 \end{longtable}
 
 \hypertarget{sorting-and-restricting-output}{%
@@ -36472,7 +36371,8 @@ \subsubsection{\texorpdfstring{Sorting with
 \end{Shaded}
 
 \begin{verbatim}
-Running query in 'duck'
+ * duckdb:///data/example_duck.db
+Done.
 \end{verbatim}
 
 \begin{longtable}[]{@{}lll@{}}
@@ -36482,11 +36382,6 @@ \subsubsection{\texorpdfstring{Sorting with
 \endhead
 \bottomrule\noalign{}
 \endlastfoot
-drogon & 2011 & -100 \\
-dragon 2 & 2019 & 0 \\
-hiccup & 2010 & 10 \\
-puff & 2010 & 100 \\
-smaug & 2011 & None \\
 \end{longtable}
 
 By default, \texttt{ORDER\ BY} will display results in ascending order
@@ -36504,7 +36399,8 @@ \subsubsection{\texorpdfstring{Sorting with
 \end{Shaded}
 
 \begin{verbatim}
-Running query in 'duck'
+ * duckdb:///data/example_duck.db
+Done.
 \end{verbatim}
 
 \begin{longtable}[]{@{}lll@{}}
@@ -36514,11 +36410,6 @@ \subsubsection{\texorpdfstring{Sorting with
 \endhead
 \bottomrule\noalign{}
 \endlastfoot
-puff & 2010 & 100 \\
-hiccup & 2010 & 10 \\
-dragon 2 & 2019 & 0 \\
-drogon & 2011 & -100 \\
-smaug & 2011 & None \\
 \end{longtable}
 
 We can also tell SQL to \texttt{ORDER\ BY} two columns at once. This
@@ -36535,7 +36426,8 @@ \subsubsection{\texorpdfstring{Sorting with
 \end{Shaded}
 
 \begin{verbatim}
-Running query in 'duck'
+ * duckdb:///data/example_duck.db
+Done.
 \end{verbatim}
 
 \begin{longtable}[]{@{}lll@{}}
@@ -36545,11 +36437,6 @@ \subsubsection{\texorpdfstring{Sorting with
 \endhead
 \bottomrule\noalign{}
 \endlastfoot
-puff & 2010 & 100 \\
-hiccup & 2010 & 10 \\
-drogon & 2011 & -100 \\
-smaug & 2011 & None \\
-dragon 2 & 2019 & 0 \\
 \end{longtable}
 
 Note that in this example, \texttt{year} is sorted in ascending order
@@ -36577,7 +36464,8 @@ \subsubsection{\texorpdfstring{\texttt{LIMIT}
 \end{Shaded}
 
 \begin{verbatim}
-Running query in 'duck'
+ * duckdb:///data/example_duck.db
+Done.
 \end{verbatim}
 
 \begin{longtable}[]{@{}lll@{}}
@@ -36587,8 +36475,6 @@ \subsubsection{\texorpdfstring{\texttt{LIMIT}
 \endhead
 \bottomrule\noalign{}
 \endlastfoot
-hiccup & 2010 & 10 \\
-drogon & 2011 & -100 \\
 \end{longtable}
 
 The \texttt{OFFSET} keyword indicates the index at which \texttt{LIMIT}
@@ -36608,7 +36494,8 @@ \subsubsection{\texorpdfstring{\texttt{LIMIT}
 \end{Shaded}
 
 \begin{verbatim}
-Running query in 'duck'
+ * duckdb:///data/example_duck.db
+Done.
 \end{verbatim}
 
 \begin{longtable}[]{@{}lll@{}}
@@ -36618,10 +36505,94 @@ \subsubsection{\texorpdfstring{\texttt{LIMIT}
 \endhead
 \bottomrule\noalign{}
 \endlastfoot
-drogon & 2011 & -100 \\
-dragon 2 & 2019 & 0 \\
 \end{longtable}
 
+With these keywords in hand, let's update our SQL order of operations.
+Remember: \emph{every} SQL query must list clauses in this order.
+
+\begin{verbatim}
+SELECT <column expression list>
+FROM <table>
+[WHERE <predicate>]
+[ORDER BY <column list>]
+[LIMIT <number of rows>]
+[OFFSET <number of rows>];
+\end{verbatim}
+
+\hypertarget{summary-3}{%
+\section{Summary}\label{summary-3}}
+
+Let's summarize what we've learned so far. We know that \texttt{SELECT}
+and \texttt{FROM} are the fundamental building blocks of any SQL query.
+We can augment these two keywords with additional clauses to refine the
+data in our output table.
+
+Any clauses that we include must follow a strict ordering within the
+query:
+
+\begin{verbatim}
+SELECT <column list>
+FROM <table>
+[WHERE <predicate>]
+[ORDER BY <column list>]
+[LIMIT <number of rows>]
+[OFFSET <number of rows>]
+\end{verbatim}
+
+Here, any clause contained in square brackets \texttt{{[}\ {]}} is
+optional ------ we only need to use the keyword if it is relevant to the
+table operation we want to perform. Also note that by convention, we use
+all caps for keywords in SQL statements and use newlines to make code
+more readable.
+
+\bookmarksetup{startatroot}
+
+\hypertarget{sql-ii}{%
+\chapter{SQL II}\label{sql-ii}}
+
+\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm]
+
+\begin{itemize}
+\tightlist
+\item
+  Perform aggregations using \texttt{GROUP\ BY}
+\item
+  Introduce the ability to filter groups
+\item
+  Perform data cleaning and text manipulation in SQL
+\item
+  Join data across tables
+\end{itemize}
+
+\end{tcolorbox}
+
+In this lecture, we'll continue our work from last time to introduce
+some advanced SQL syntax.
+
+First, let's load in the \texttt{basic\_examples.db} database.
+
+\begin{Shaded}
+\begin{Highlighting}[]
+\CommentTok{\# Load the SQL Alchemy Python library and DuckDB}
+\ImportTok{import}\NormalTok{ sqlalchemy}
+\ImportTok{import}\NormalTok{ duckdb}
+\end{Highlighting}
+\end{Shaded}
+
+\begin{Shaded}
+\begin{Highlighting}[]
+\CommentTok{\# Load \%\%sql cell magic}
+\OperatorTok{\%}\NormalTok{load\_ext sql}
+\end{Highlighting}
+\end{Shaded}
+
+\begin{Shaded}
+\begin{Highlighting}[]
+\CommentTok{\# Connect to the database}
+\OperatorTok{\%}\NormalTok{sql duckdb:}\OperatorTok{///}\NormalTok{data}\OperatorTok{/}\NormalTok{basic\_examples.db }\OperatorTok{{-}{-}}\NormalTok{alias basic}
+\end{Highlighting}
+\end{Shaded}
+
 \hypertarget{aggregating-with-group-by}{%
 \section{\texorpdfstring{Aggregating with
 \texttt{GROUP\ BY}}{Aggregating with GROUP BY}}\label{aggregating-with-group-by}}
@@ -36640,7 +36611,7 @@ \section{\texorpdfstring{Aggregating with
 \texttt{pandas} implementation.
 
 To illustrate \texttt{GROUP\ BY}, we will consider the \texttt{Dish}
-table from the \texttt{basic\_examples.db} database.
+table from our database.
 
 \begin{Shaded}
 \begin{Highlighting}[]
@@ -36651,7 +36622,8 @@ \section{\texorpdfstring{Aggregating with
 \end{Shaded}
 
 \begin{verbatim}
-Running query in 'duck'
+ * duckdb:///data/basic_examples.db
+Done.
 \end{verbatim}
 
 \begin{longtable}[]{@{}lll@{}}
@@ -36661,17 +36633,11 @@ \section{\texorpdfstring{Aggregating with
 \endhead
 \bottomrule\noalign{}
 \endlastfoot
-ravioli & entree & 10 \\
-ramen & entree & 13 \\
-taco & entree & 7 \\
-edamame & appetizer & 4 \\
-fries & appetizer & 4 \\
-potsticker & appetizer & 4 \\
-ice cream & dessert & 5 \\
 \end{longtable}
 
-Say we wanted to find the total costs of dishes of a certain
-\texttt{type}. To accomplish this, we would write the following code.
+Notice that there are multiple dishes of the same \texttt{type}. What if
+we wanted to find the total costs of dishes of a certain \texttt{type}?
+To accomplish this, we would write the following code.
 
 \begin{Shaded}
 \begin{Highlighting}[]
@@ -36683,7 +36649,8 @@ \section{\texorpdfstring{Aggregating with
 \end{Shaded}
 
 \begin{verbatim}
-Running query in 'duck'
+ * duckdb:///data/basic_examples.db
+Done.
 \end{verbatim}
 
 \begin{longtable}[]{@{}ll@{}}
@@ -36693,9 +36660,6 @@ \section{\texorpdfstring{Aggregating with
 \endhead
 \bottomrule\noalign{}
 \endlastfoot
-appetizer & 12 \\
-entree & 30 \\
-dessert & 5 \\
 \end{longtable}
 
 What is going on here? The statement \texttt{GROUP\ BY\ type} tells SQL
@@ -36750,7 +36714,8 @@ \section{\texorpdfstring{Aggregating with
 \end{Shaded}
 
 \begin{verbatim}
-Running query in 'duck'
+ * duckdb:///data/basic_examples.db
+Done.
 \end{verbatim}
 
 \begin{longtable}[]{@{}llll@{}}
@@ -36760,9 +36725,6 @@ \section{\texorpdfstring{Aggregating with
 \endhead
 \bottomrule\noalign{}
 \endlastfoot
-entree & 30 & 7 & taco \\
-dessert & 5 & 5 & ice cream \\
-appetizer & 12 & 4 & potsticker \\
 \end{longtable}
 
 To count the number of rows associated with each group, we use the
@@ -36770,6 +36732,32 @@ \section{\texorpdfstring{Aggregating with
 number of rows in each group, including rows with null values. Its
 \texttt{pandas} equivalent is \texttt{.groupby().size()}.
 
+Recall the \texttt{Dragon} table from the previous lecture:
+
+\begin{Shaded}
+\begin{Highlighting}[]
+\OperatorTok{\%\%}\NormalTok{sql}
+\NormalTok{SELECT }\OperatorTok{*}\NormalTok{ FROM Dragon}\OperatorTok{;}
+\end{Highlighting}
+\end{Shaded}
+
+\begin{verbatim}
+ * duckdb:///data/basic_examples.db
+Done.
+\end{verbatim}
+
+\begin{longtable}[]{@{}lll@{}}
+\toprule\noalign{}
+name & year & cute \\
+\midrule\noalign{}
+\endhead
+\bottomrule\noalign{}
+\endlastfoot
+\end{longtable}
+
+Notice that \texttt{COUNT(*)} and \texttt{COUNT(cute)} result in
+different outputs.
+
 \begin{Shaded}
 \begin{Highlighting}[]
 \OperatorTok{\%\%}\NormalTok{sql}
@@ -36780,7 +36768,8 @@ \section{\texorpdfstring{Aggregating with
 \end{Shaded}
 
 \begin{verbatim}
-Running query in 'duck'
+ * duckdb:///data/basic_examples.db
+Done.
 \end{verbatim}
 
 \begin{longtable}[]{@{}ll@{}}
@@ -36790,15 +36779,8 @@ \section{\texorpdfstring{Aggregating with
 \endhead
 \bottomrule\noalign{}
 \endlastfoot
-2010 & 2 \\
-2011 & 2 \\
-2019 & 1 \\
 \end{longtable}
 
-To exclude \texttt{NULL} values when counting the rows in each group, we
-explicitly call \texttt{COUNT} on a column in the table. This is similar
-to calling \texttt{.groupby().count()} in \texttt{pandas}.
-
 \begin{Shaded}
 \begin{Highlighting}[]
 \OperatorTok{\%\%}\NormalTok{sql}
@@ -36809,7 +36791,8 @@ \section{\texorpdfstring{Aggregating with
 \end{Shaded}
 
 \begin{verbatim}
-Running query in 'duck'
+ * duckdb:///data/basic_examples.db
+Done.
 \end{verbatim}
 
 \begin{longtable}[]{@{}ll@{}}
@@ -36819,9 +36802,6 @@ \section{\texorpdfstring{Aggregating with
 \endhead
 \bottomrule\noalign{}
 \endlastfoot
-2010 & 2 \\
-2011 & 1 \\
-2019 & 1 \\
 \end{longtable}
 
 With this definition of \texttt{GROUP\ BY} in hand, let's update our SQL
@@ -36842,32 +36822,615 @@ \section{\texorpdfstring{Aggregating with
 the selection process and that column expressions may include
 aggregation functions (\texttt{MAX}, \texttt{MIN}, etc.).
 
-\hypertarget{summary-3}{%
-\section{Summary}\label{summary-3}}
+\hypertarget{filtering-groups}{%
+\section{Filtering Groups}\label{filtering-groups}}
 
-Let's summarize what we've learned so far. We know that \texttt{SELECT}
-and \texttt{FROM} are the fundamental building blocks of any SQL query.
-We can augment these two keywords with additional clauses to refine the
-data in our output table.
+Now, what if we only want groups that meet a certain condition?
+\texttt{HAVING} filters groups by applying some condition across all
+rows in each group. We interpret it as a way to keep only the groups
+\texttt{HAVING} some condition. Note the difference between
+\texttt{WHERE} and \texttt{HAVING}: we use \texttt{WHERE} to filter
+rows, whereas we use \texttt{HAVING} to filter \emph{groups}.
+\texttt{WHERE} precedes \texttt{HAVING} in terms of how SQL executes a
+query.
 
-Any clauses that we include must follow a strict ordering within the
-query:
+Let's take a look at the \texttt{Dish} table to see how we can use
+\texttt{HAVING}. Say we want to group dishes with a cost greater than 4
+by \texttt{type} and only keep groups where the max cost is less than
+10.
+
+\begin{Shaded}
+\begin{Highlighting}[]
+\OperatorTok{\%\%}\NormalTok{sql}
+\NormalTok{SELECT }\BuiltInTok{type}\NormalTok{, COUNT(}\OperatorTok{*}\NormalTok{)}
+\NormalTok{FROM Dish}
+\NormalTok{WHERE cost }\OperatorTok{\textgreater{}} \DecValTok{4}
+\NormalTok{GROUP BY }\BuiltInTok{type}
+\NormalTok{HAVING MAX(cost) }\OperatorTok{\textless{}}  \DecValTok{10}\OperatorTok{;}
+\end{Highlighting}
+\end{Shaded}
 
 \begin{verbatim}
-SELECT <column list>
+ * duckdb:///data/basic_examples.db
+Done.
+\end{verbatim}
+
+\begin{longtable}[]{@{}ll@{}}
+\toprule\noalign{}
+type & count\_star() \\
+\midrule\noalign{}
+\endhead
+\bottomrule\noalign{}
+\endlastfoot
+\end{longtable}
+
+Here, we first use \texttt{WHERE} to filter for rows with a cost greater
+than 4. We then group our values by \texttt{type} before applying the
+\texttt{HAVING} operator. With \texttt{HAVING}, we can filter our groups
+based on if the max cost is less than 10.
+
+\hypertarget{summary-sql}{%
+\section{Summary: SQL}\label{summary-sql}}
+
+With this definition of \texttt{GROUP\ BY} and \texttt{HAVING} in hand,
+let's update our SQL order of operations. Remember: \emph{every} SQL
+query must list clauses in this order.
+
+\begin{verbatim}
+SELECT <column expression list>
 FROM <table>
 [WHERE <predicate>]
 [GROUP BY <column list>]
 [ORDER BY <column list>]
 [LIMIT <number of rows>]
-[OFFSET <number of rows>]
+[OFFSET <number of rows>];
 \end{verbatim}
 
-Here, any clause contained in square brackets \texttt{{[}\ {]}} is
-optional ------ we only need to use the keyword if it is relevant to the
-table operation we want to perform. Also note that by convention, we use
-all caps for keywords in SQL statements and use newlines to make code
-more readable.
+Note that we can use the \texttt{AS} keyword to rename columns during
+the selection process and that column expressions may include
+aggregation functions (\texttt{MAX}, \texttt{MIN}, etc.).
+
+\hypertarget{eda-in-sql}{%
+\section{EDA in SQL}\label{eda-in-sql}}
+
+In the last lecture, we mostly worked under the assumption that our data
+had already been cleaned. However, as we saw in our first pass through
+the data science lifecycle, we're very unlikely to be given data that is
+free of formatting issues. With this in mind, we'll want to learn how to
+clean and transform data in SQL.
+
+Our typical workflow when working with ``big data'' is:
+
+\begin{enumerate}
+\def\labelenumi{\arabic{enumi}.}
+\tightlist
+\item
+  Use SQL to query data from a database
+\item
+  Use Python (with \texttt{pandas}) to analyze this data in detail
+\end{enumerate}
+
+We can, however, still perform simple data cleaning and re-structuring
+using SQL directly. To do so, we'll use the \texttt{Title} table from
+the \texttt{imdb\_duck} database, which contains information about
+movies and actors.
+
+Let's load in the \texttt{imdb\_duck} database.
+
+\begin{Shaded}
+\begin{Highlighting}[]
+\ImportTok{import}\NormalTok{ os}
+\ControlFlowTok{if}\NormalTok{ os.path.exists(}\StringTok{"/home/jovyan/shared/sql/imdb\_duck.db"}\NormalTok{):}
+\NormalTok{    imdbpath }\OperatorTok{=} \StringTok{"duckdb:////home/jovyan/shared/sql/imdb\_duck.db"}
+\ControlFlowTok{elif}\NormalTok{ os.path.exists(}\StringTok{"data/imdb\_duck.db"}\NormalTok{):}
+\NormalTok{    imdbpath }\OperatorTok{=}  \StringTok{"duckdb:///data/imdb\_duck.db"}
+\ControlFlowTok{else}\NormalTok{:}
+    \ImportTok{import}\NormalTok{ gdown}
+\NormalTok{    url }\OperatorTok{=} \StringTok{\textquotesingle{}https://drive.google.com/uc?id=10tKOHGLt9QoOgq5Ii{-}FhxpB9lDSQgl1O\textquotesingle{}}
+\NormalTok{    output\_path }\OperatorTok{=} \StringTok{\textquotesingle{}data/imdb\_duck.db\textquotesingle{}}
+\NormalTok{    gdown.download(url, output\_path, quiet}\OperatorTok{=}\VariableTok{False}\NormalTok{)}
+\NormalTok{    imdbpath }\OperatorTok{=} \StringTok{"duckdb:///data/imdb\_duck.db"}
+\BuiltInTok{print}\NormalTok{(imdbpath)}
+\end{Highlighting}
+\end{Shaded}
+
+\begin{verbatim}
+duckdb:///data/imdb_duck.db
+\end{verbatim}
+
+\begin{Shaded}
+\begin{Highlighting}[]
+\ImportTok{from}\NormalTok{ sqlalchemy }\ImportTok{import}\NormalTok{ create\_engine}
+\NormalTok{imdb\_engine }\OperatorTok{=}\NormalTok{ create\_engine(imdbpath, connect\_args}\OperatorTok{=}\NormalTok{\{}\StringTok{\textquotesingle{}read\_only\textquotesingle{}}\NormalTok{: }\VariableTok{True}\NormalTok{\})}
+\OperatorTok{\%}\NormalTok{sql imdb\_engine }\OperatorTok{{-}{-}}\NormalTok{alias imdb}
+\end{Highlighting}
+\end{Shaded}
+
+\begin{verbatim}
+ * duckdb:///data/basic_examples.db
+(duckdb.duckdb.ParserException) Parser Error: syntax error at or near "imdb_engine"
+[SQL: imdb_engine]
+(Background on this error at: https://sqlalche.me/e/20/f405)
+\end{verbatim}
+
+Since we'll be working with the \texttt{Title} table, let's take a quick
+look at what it contains.
+
+\begin{Shaded}
+\begin{Highlighting}[]
+\OperatorTok{\%\%}\NormalTok{sql imdb }
+    
+\NormalTok{SELECT }\OperatorTok{*}
+\NormalTok{FROM Title}
+\NormalTok{WHERE primaryTitle IN (}\StringTok{\textquotesingle{}Ginny \& Georgia\textquotesingle{}}\NormalTok{, }\StringTok{\textquotesingle{}What If...?\textquotesingle{}}\NormalTok{, }\StringTok{\textquotesingle{}Succession\textquotesingle{}}\NormalTok{, }\StringTok{\textquotesingle{}Veep\textquotesingle{}}\NormalTok{, }\StringTok{\textquotesingle{}Tenet\textquotesingle{}}\NormalTok{)}
+\NormalTok{LIMIT }\DecValTok{10}\OperatorTok{;}
+\end{Highlighting}
+\end{Shaded}
+
+\begin{verbatim}
+ * duckdb:///data/basic_examples.db
+(duckdb.duckdb.ParserException) Parser Error: syntax error at or near "imdb"
+[SQL: imdb
+    
+SELECT *
+FROM Title
+WHERE primaryTitle IN ('Ginny & Georgia', 'What If...?', 'Succession', 'Veep', 'Tenet')
+LIMIT 10;]
+(Background on this error at: https://sqlalche.me/e/20/f405)
+\end{verbatim}
+
+\hypertarget{matching-text-using-like}{%
+\subsection{\texorpdfstring{Matching Text using
+\texttt{LIKE}}{Matching Text using LIKE}}\label{matching-text-using-like}}
+
+One common task we encountered in our first look at EDA was needing to
+match string data. For example, we might want to remove entries
+beginning with the same prefix as part of the data cleaning process.
+
+In SQL, we use the \texttt{LIKE} operator to (you guessed it) look for
+strings that are \emph{like} a given string pattern.
+
+\begin{Shaded}
+\begin{Highlighting}[]
+\OperatorTok{\%\%}\NormalTok{sql}
+\NormalTok{SELECT titleType, primaryTitle}
+\NormalTok{FROM Title}
+\NormalTok{WHERE primaryTitle LIKE }\StringTok{\textquotesingle{}Star Wars: Episode I {-} The Phantom Menace\textquotesingle{}}
+\end{Highlighting}
+\end{Shaded}
+
+\begin{verbatim}
+ * duckdb:///data/basic_examples.db
+(duckdb.duckdb.CatalogException) Catalog Error: Table with name Title does not exist!
+Did you mean "temp.information_schema.tables"?
+LINE 2: FROM Title
+             ^
+[SQL: SELECT titleType, primaryTitle
+FROM Title
+WHERE primaryTitle LIKE 'Star Wars: Episode I - The Phantom Menace']
+(Background on this error at: https://sqlalche.me/e/20/f405)
+\end{verbatim}
+
+What if we wanted to find \emph{all} Star Wars movies? \texttt{\%} is
+the wildcard operator, it means ``look for any character, any number of
+times''. This makes it helpful for identifying strings that are similar
+to our desired pattern, even when we don't know the full text of what we
+aim to extract.
+
+\begin{Shaded}
+\begin{Highlighting}[]
+\OperatorTok{\%\%}\NormalTok{sql}
+\NormalTok{SELECT titleType, primaryTitle}
+\NormalTok{FROM Title}
+\NormalTok{WHERE primaryTitle LIKE }\StringTok{\textquotesingle{}\%Star Wars\%\textquotesingle{}}
+\NormalTok{LIMIT }\DecValTok{10}\OperatorTok{;}
+\end{Highlighting}
+\end{Shaded}
+
+\begin{verbatim}
+ * duckdb:///data/basic_examples.db
+(duckdb.duckdb.CatalogException) Catalog Error: Table with name Title does not exist!
+Did you mean "temp.information_schema.tables"?
+LINE 2: FROM Title
+             ^
+[SQL: SELECT titleType, primaryTitle
+FROM Title
+WHERE primaryTitle LIKE '%Star Wars%'
+LIMIT 10;]
+(Background on this error at: https://sqlalche.me/e/20/f405)
+\end{verbatim}
+
+Alternatively, we can use RegEx! DuckDB and most real DBMSs allow for
+this. Note that here, we have to use the \texttt{SIMILAR\ TO} operater
+rather than \texttt{LIKE}.
+
+\begin{Shaded}
+\begin{Highlighting}[]
+\OperatorTok{\%\%}\NormalTok{sql}
+\NormalTok{SELECT titleType, primaryTitle}
+\NormalTok{FROM Title}
+\NormalTok{WHERE primaryTitle SIMILAR TO }\StringTok{\textquotesingle{}.*Star Wars*.\textquotesingle{}}
+\NormalTok{LIMIT }\DecValTok{10}\OperatorTok{;}
+\end{Highlighting}
+\end{Shaded}
+
+\begin{verbatim}
+ * duckdb:///data/basic_examples.db
+(duckdb.duckdb.CatalogException) Catalog Error: Table with name Title does not exist!
+Did you mean "temp.information_schema.tables"?
+LINE 2: FROM Title
+             ^
+[SQL: SELECT titleType, primaryTitle
+FROM Title
+WHERE primaryTitle SIMILAR TO '.*Star Wars*.'
+LIMIT 10;]
+(Background on this error at: https://sqlalche.me/e/20/f405)
+\end{verbatim}
+
+\hypertarget{casting-data-types}{%
+\subsection{\texorpdfstring{\texttt{CAST}ing Data
+Types}{CASTing Data Types}}\label{casting-data-types}}
+
+A common data cleaning task is converting data to the correct variable
+type. The \texttt{CAST} keyword is used to generate a new output column.
+Each entry in this output column is the result of converting the data in
+an existing column to a new data type. For example, we may wish to
+convert numeric data stored as a string to an integer.
+
+\begin{Shaded}
+\begin{Highlighting}[]
+\OperatorTok{\%\%}\NormalTok{sql}
+\NormalTok{SELECT primaryTitle, CAST(runtimeMinutes AS INT)}
+\NormalTok{FROM Title}\OperatorTok{;}
+\end{Highlighting}
+\end{Shaded}
+
+\begin{verbatim}
+ * duckdb:///data/basic_examples.db
+(duckdb.duckdb.CatalogException) Catalog Error: Table with name Title does not exist!
+Did you mean "temp.information_schema.tables"?
+LINE 2: FROM Title;
+             ^
+[SQL: SELECT primaryTitle, CAST(runtimeMinutes AS INT)
+FROM Title;]
+(Background on this error at: https://sqlalche.me/e/20/f405)
+\end{verbatim}
+
+We use \texttt{CAST} when \texttt{SELECT}ing colunns for our output
+table. In the example above, we want to \texttt{SELECT} the columns of
+integer year and runtime data that is created by the \texttt{CAST}.
+
+SQL will automatically name a new column according to the command used
+to \texttt{SELECT} it, which can lead to unwieldy column names. We can
+rename the \texttt{CAST}ed column using the \texttt{AS} keyword.
+
+\begin{Shaded}
+\begin{Highlighting}[]
+\OperatorTok{\%\%}\NormalTok{sql}
+\NormalTok{SELECT primaryTitle AS title, CAST(runtimeMinutes AS INT) AS minutes, CAST(startYear AS INT) AS year}
+\NormalTok{FROM Title}
+\NormalTok{LIMIT }\DecValTok{5}\OperatorTok{;}
+\end{Highlighting}
+\end{Shaded}
+
+\begin{verbatim}
+ * duckdb:///data/basic_examples.db
+(duckdb.duckdb.CatalogException) Catalog Error: Table with name Title does not exist!
+Did you mean "temp.information_schema.tables"?
+LINE 2: FROM Title
+             ^
+[SQL: SELECT primaryTitle AS title, CAST(runtimeMinutes AS INT) AS minutes, CAST(startYear AS INT) AS year
+FROM Title
+LIMIT 5;]
+(Background on this error at: https://sqlalche.me/e/20/f405)
+\end{verbatim}
+
+\hypertarget{using-conditional-statements-with-case}{%
+\subsection{\texorpdfstring{Using Conditional Statements with
+\texttt{CASE}}{Using Conditional Statements with CASE}}\label{using-conditional-statements-with-case}}
+
+When working with \texttt{pandas}, we often ran into situations where we
+wanted to generate new columns using some form of conditional statement.
+For example, say we wanted to describe a film title as ``old,''
+``mid-aged,'' or ``new,'' depending on the year of its release.
+
+In SQL, conditional operations are performed using a \texttt{CASE}
+clause. Conceptually, \texttt{CASE} behaves much like the \texttt{CAST}
+operation: it creates a new column that we can then \texttt{SELECT} to
+appear in the output. The syntax for a \texttt{CASE} clause is as
+follows:
+
+\begin{verbatim}
+CASE WHEN <condition> THEN <value>
+     WHEN <other condition> THEN <other value>
+     ...
+     ELSE <yet another value>
+     END
+\end{verbatim}
+
+Scanning through the skeleton code above, you can see that the logic is
+similar to that of an \texttt{if} statement in Python. The conditional
+statement is first opened by calling \texttt{CASE}. Each new condition
+is specified by \texttt{WHEN}, with \texttt{THEN} indicating what value
+should be filled if the condition is met. \texttt{ELSE} specifies the
+value that should be filled if no other conditions are met. Lastly,
+\texttt{END} indicates the end of the conditional statement; once
+\texttt{END} has been called, SQL will continue evaluating the query as
+usual.
+
+Let's see this in action. In the example below, we give the new column
+created by the \texttt{CASE} statement the name \texttt{movie\_age}.
+
+\begin{Shaded}
+\begin{Highlighting}[]
+\OperatorTok{\%\%}\NormalTok{sql}
+\OperatorTok{/*}\NormalTok{ If a movie was filmed before }\DecValTok{1950}\NormalTok{, it }\KeywordTok{is} \StringTok{"old"}
+\NormalTok{Otherwise, }\ControlFlowTok{if}\NormalTok{ a movie was filmed before }\DecValTok{2000}\NormalTok{, it }\KeywordTok{is} \StringTok{"mid{-}aged"}
+\NormalTok{Else, a movie }\KeywordTok{is} \StringTok{"new"} \OperatorTok{*/}
+
+\NormalTok{SELECT titleType, startYear,}
+\NormalTok{CASE WHEN startYear }\OperatorTok{\textless{}} \DecValTok{1950}\NormalTok{ THEN }\StringTok{\textquotesingle{}old\textquotesingle{}}
+\NormalTok{     WHEN startYear }\OperatorTok{\textless{}} \DecValTok{2000}\NormalTok{ THEN }\StringTok{\textquotesingle{}mid{-}aged\textquotesingle{}}
+\NormalTok{     ELSE }\StringTok{\textquotesingle{}new\textquotesingle{}}
+\NormalTok{     END AS movie\_age}
+\NormalTok{FROM Title}\OperatorTok{;}
+\end{Highlighting}
+\end{Shaded}
+
+\begin{verbatim}
+ * duckdb:///data/basic_examples.db
+(duckdb.duckdb.CatalogException) Catalog Error: Table with name Title does not exist!
+Did you mean "temp.information_schema.tables"?
+LINE 10: FROM Title;
+              ^
+[SQL: /* If a movie was filmed before 1950, it is "old"
+Otherwise, if a movie was filmed before 2000, it is "mid-aged"
+Else, a movie is "new" */
+
+SELECT titleType, startYear,
+CASE WHEN startYear < 1950 THEN 'old'
+     WHEN startYear < 2000 THEN 'mid-aged'
+     ELSE 'new'
+     END AS movie_age
+FROM Title;]
+(Background on this error at: https://sqlalche.me/e/20/f405)
+\end{verbatim}
+
+\hypertarget{joining-tables-1}{%
+\section{\texorpdfstring{\texttt{JOIN}ing
+Tables}{JOINing Tables}}\label{joining-tables-1}}
+
+At this point, we're well-versed in using SQL as a tool to clean,
+manipulate, and transform data in a table. Notice that this sentence
+referred to one \emph{table}, specifically. What happens if the data we
+need is distributed across multiple tables? This is an important
+consideration when using SQL ------ recall that we first introduced SQL
+as a language to query from databases. Databases often store data in a
+multidimensional structure. In other words, information is stored across
+several tables, with each table containing a small subset of all the
+data housed by the database.
+
+A common way of organizing a database is by using a \textbf{star
+schema}. A star schema is composed of two types of tables. A
+\textbf{fact table} is the central table of the database ------ it
+contains the information needed to link entries across several
+\textbf{dimension tables}, which contain more detailed information about
+the data.
+
+Say we were working with a database about boba offerings in Berkeley.
+The dimension tables of the database might contain information about tea
+varieties and boba toppings. The fact table would be used to link this
+information across the various dimension tables.
+
+If we explicitly mark the relationships between tables, we start to see
+the star-like structure of the star schema.
+
+To join data across multiple tables, we'll use the (creatively named)
+\texttt{JOIN} keyword. We'll make things easier for now by first
+considering the simpler \texttt{cats} dataset, which consists of the
+tables \texttt{s} and \texttt{t}.
+
+To perform a join, we amend the \texttt{FROM} clause. You can think of
+this as saying, ``\texttt{SELECT} my data \texttt{FROM} tables that have
+been \texttt{JOIN}ed together.''
+
+Remember: SQL does not consider newlines or whitespace when interpreting
+queries. The indentation given in the example below is to help improve
+readability. If you wish, you can write code that does not follow this
+formatting.
+
+\begin{verbatim}
+SELECT <column list>
+FROM table_1 
+    JOIN table_2 
+    ON key_1 = key_2;
+\end{verbatim}
+
+We also need to specify what column from each table should be used to
+determine matching entries. By defining these keys, we provide SQL with
+the information it needs to pair rows of data together.
+
+The most commonly used type of SQL \texttt{JOIN} is the \textbf{inner
+join}. It turns out you're already familiar with what an inner join
+does, and how it works -- this is the type of join we've been using in
+\texttt{pandas} all along! In an inner join, we combine every row in our
+first table with its matching entry in the second table. If a row from
+either table does not have a match in the other table, it is omitted
+from the output.
+
+In a \textbf{cross join}, \emph{all} possible combinations of rows
+appear in the output table, regardless of whether or not rows share a
+matching key. Because all rows are joined, even if there is no matching
+key, it is not necessary to specify what keys to consider in an
+\texttt{ON} statement. A cross join is also known as a cartesian
+product.
+
+Conceptually, we can interpret an inner join as a cross join, followed
+by removing all rows that do not share a matching key. Notice that the
+output of the inner join above contains all rows of the cross join
+example that contain a single color across the entire row.
+
+In a \textbf{left outer join}, \emph{all} rows in the left table are
+kept in the output table. If a row in the right table shares a match
+with the left table, this row will be kept; otherwise, the rows in the
+right table are omitted from the output. We can fill in any missing
+values with \texttt{NULL}.
+
+A \textbf{right outer join} keeps all rows in the right table. Rows in
+the left table are only kept if they share a match in the right table.
+Again, we can fill in any missing values with \texttt{NULL}.
+
+In a \textbf{full outer join}, all rows that have a match between the
+two tables are joined together. If a row has no match in the second
+table, then the values of the columns for that second table are filled
+with \texttt{NULL}. In other words, a full outer join performs an inner
+join \emph{while still keeping} rows that have no match in the other
+table. This is best understood visually:
+
+We have kept the same output achieved using an inner join, with the
+addition of partially null rows for entries in \texttt{s} and \texttt{t}
+that had no match in the second table.
+
+\hypertarget{aliasing-in-joins}{%
+\subsection{\texorpdfstring{Aliasing in
+\texttt{JOIN}s}{Aliasing in JOINs}}\label{aliasing-in-joins}}
+
+When joining tables, we often create aliases for table names (similarly
+to what we did with column names in the last lecture). We do this as it
+is typically easier to refer to aliases, especially when we are working
+with long table names. We can even reference columns using aliased table
+names!
+
+Let's say we want to determine the average rating of various movies.
+We'll need to \texttt{JOIN} the \texttt{Title} and \texttt{Rating}
+tables and can create aliases for both tables.
+
+\begin{Shaded}
+\begin{Highlighting}[]
+\OperatorTok{\%\%}\NormalTok{sql}
+
+\NormalTok{SELECT primaryTitle, averageRating}
+\NormalTok{FROM Title AS T INNER JOIN Rating AS R}
+\NormalTok{ON T.tconst }\OperatorTok{=}\NormalTok{ R.tconst}\OperatorTok{;}
+\end{Highlighting}
+\end{Shaded}
+
+\begin{verbatim}
+ * duckdb:///data/basic_examples.db
+(duckdb.duckdb.CatalogException) Catalog Error: Table with name Title does not exist!
+Did you mean "temp.information_schema.tables"?
+LINE 2: FROM Title AS T INNER JOIN Rating AS R
+             ^
+[SQL: SELECT primaryTitle, averageRating
+FROM Title AS T INNER JOIN Rating AS R
+ON T.tconst = R.tconst;]
+(Background on this error at: https://sqlalche.me/e/20/f405)
+\end{verbatim}
+
+Note that the \texttt{AS} is actually optional! We can create aliases
+for our tables even without it, but we usually include it for clarity.
+
+\begin{Shaded}
+\begin{Highlighting}[]
+\OperatorTok{\%\%}\NormalTok{sql}
+
+\NormalTok{SELECT primaryTitle, averageRating}
+\NormalTok{FROM Title T INNER JOIN Rating R}
+\NormalTok{ON T.tconst }\OperatorTok{=}\NormalTok{ R.tconst}\OperatorTok{;}
+\end{Highlighting}
+\end{Shaded}
+
+\begin{verbatim}
+ * duckdb:///data/basic_examples.db
+(duckdb.duckdb.CatalogException) Catalog Error: Table with name Title does not exist!
+Did you mean "temp.information_schema.tables"?
+LINE 2: FROM Title T INNER JOIN Rating R
+             ^
+[SQL: SELECT primaryTitle, averageRating
+FROM Title T INNER JOIN Rating R
+ON T.tconst = R.tconst;]
+(Background on this error at: https://sqlalche.me/e/20/f405)
+\end{verbatim}
+
+\hypertarget{common-table-expressions}{%
+\subsection{Common Table Expressions}\label{common-table-expressions}}
+
+For more sophisticated data problems, the queries can become very
+complex. Common table expressions (CTEs) allow us to break down these
+complex queries into more manageable parts. To do so, we create
+temporary tables corresponding to different aspects of the problem and
+then reference them in the final query:
+
+\begin{verbatim}
+WITH 
+table_name1 AS ( 
+    SELECT ...
+),
+table_name2 AS ( 
+    SELECT ...
+)
+SELECT ... 
+FROM 
+table_name1, 
+table_name2, ...
+\end{verbatim}
+
+Let's say we want to identify the top 10 action movies that are highly
+rated (with an average rating greater than 7) and popular (having more
+than 5000 votes), along with the primary actors who are the most
+popular. We can use CTEs to break this query down into separate
+problems. Initially, we can filter to find good action movies and
+prolific actors separately. This way, in our final join, we only need to
+change the order.
+
+\begin{Shaded}
+\begin{Highlighting}[]
+\OperatorTok{\%\%}\NormalTok{sql}
+\NormalTok{WITH }
+\NormalTok{good\_action\_movies AS (}
+\NormalTok{    SELECT }\OperatorTok{*}
+\NormalTok{    FROM Title T JOIN Rating R ON T.tconst }\OperatorTok{=}\NormalTok{ R.tconst  }
+\NormalTok{    WHERE genres LIKE }\StringTok{\textquotesingle{}\%Action\%\textquotesingle{}}\NormalTok{ AND averageRating }\OperatorTok{\textgreater{}} \DecValTok{7}\NormalTok{ AND numVotes }\OperatorTok{\textgreater{}} \DecValTok{5000}
+\NormalTok{),}
+\NormalTok{prolific\_actors AS (}
+\NormalTok{    SELECT N.nconst, primaryName, COUNT(}\OperatorTok{*}\NormalTok{) }\ImportTok{as}\NormalTok{ numRoles}
+\NormalTok{    FROM Name N JOIN Principal P ON N.nconst }\OperatorTok{=}\NormalTok{ P.nconst}
+\NormalTok{    WHERE category }\OperatorTok{=} \StringTok{\textquotesingle{}actor\textquotesingle{}}
+\NormalTok{    GROUP BY N.nconst, primaryName}
+\NormalTok{)}
+\NormalTok{SELECT primaryTitle, primaryName, numRoles, ROUND(averageRating) AS rating}
+\NormalTok{FROM good\_action\_movies m, prolific\_actors a, principal p}
+\NormalTok{WHERE p.tconst }\OperatorTok{=}\NormalTok{ m.tconst AND p.nconst }\OperatorTok{=}\NormalTok{ a.nconst}
+\NormalTok{ORDER BY rating DESC, numRoles DESC}
+\NormalTok{LIMIT }\DecValTok{10}\OperatorTok{;}
+\end{Highlighting}
+\end{Shaded}
+
+\begin{verbatim}
+ * duckdb:///data/basic_examples.db
+(duckdb.duckdb.CatalogException) Catalog Error: Table with name Title does not exist!
+Did you mean "temp.information_schema.tables"?
+LINE 4:     F...
+                 ^
+[SQL: WITH 
+good_action_movies AS (
+    SELECT *
+    FROM Title T JOIN Rating R ON T.tconst = R.tconst  
+    WHERE genres LIKE '%Action%' AND averageRating > 7 AND numVotes > 5000
+),
+prolific_actors AS (
+    SELECT N.nconst, primaryName, COUNT(*) as numRoles
+    FROM Name N JOIN Principal P ON N.nconst = P.nconst
+    WHERE category = 'actor'
+    GROUP BY N.nconst, primaryName
+)
+SELECT primaryTitle, primaryName, numRoles, ROUND(averageRating) AS rating
+FROM good_action_movies m, prolific_actors a, principal p
+WHERE p.tconst = m.tconst AND p.nconst = a.nconst
+ORDER BY rating DESC, numRoles DESC
+LIMIT 10;]
+(Background on this error at: https://sqlalche.me/e/20/f405)
+\end{verbatim}
 
 
 
diff --git a/sql_II/data/basic_examples.db.wal b/sql_II/data/basic_examples.db.wal
deleted file mode 100644
index e69de29bb..000000000
diff --git a/sql_II/sql_II.html b/sql_II/sql_II.html
deleted file mode 100644
index d1266d154..000000000
--- a/sql_II/sql_II.html
+++ /dev/null
@@ -1,922 +0,0 @@
-<!DOCTYPE html>
-<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
-
-<meta charset="utf-8">
-<meta name="generator" content="quarto-1.4.551">
-
-<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
-
-
-<title>SQL II</title>
-<style>
-code{white-space: pre-wrap;}
-span.smallcaps{font-variant: small-caps;}
-div.columns{display: flex; gap: min(4vw, 1.5em);}
-div.column{flex: auto; overflow-x: auto;}
-div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
-ul.task-list{list-style: none;}
-ul.task-list li input[type="checkbox"] {
-  width: 0.8em;
-  margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */ 
-  vertical-align: middle;
-}
-/* CSS for syntax highlighting */
-pre > code.sourceCode { white-space: pre; position: relative; }
-pre > code.sourceCode > span { line-height: 1.25; }
-pre > code.sourceCode > span:empty { height: 1.2em; }
-.sourceCode { overflow: visible; }
-code.sourceCode > span { color: inherit; text-decoration: inherit; }
-div.sourceCode { margin: 1em 0; }
-pre.sourceCode { margin: 0; }
-@media screen {
-div.sourceCode { overflow: auto; }
-}
-@media print {
-pre > code.sourceCode { white-space: pre-wrap; }
-pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
-}
-pre.numberSource code
-  { counter-reset: source-line 0; }
-pre.numberSource code > span
-  { position: relative; left: -4em; counter-increment: source-line; }
-pre.numberSource code > span > a:first-child::before
-  { content: counter(source-line);
-    position: relative; left: -1em; text-align: right; vertical-align: baseline;
-    border: none; display: inline-block;
-    -webkit-touch-callout: none; -webkit-user-select: none;
-    -khtml-user-select: none; -moz-user-select: none;
-    -ms-user-select: none; user-select: none;
-    padding: 0 4px; width: 4em;
-  }
-pre.numberSource { margin-left: 3em;  padding-left: 4px; }
-div.sourceCode
-  {   }
-@media screen {
-pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
-}
-</style>
-
-
-<script src="sql_II_files/libs/clipboard/clipboard.min.js"></script>
-<script src="sql_II_files/libs/quarto-html/quarto.js"></script>
-<script src="sql_II_files/libs/quarto-html/popper.min.js"></script>
-<script src="sql_II_files/libs/quarto-html/tippy.umd.min.js"></script>
-<script src="sql_II_files/libs/quarto-html/anchor.min.js"></script>
-<link href="sql_II_files/libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="sql_II_files/libs/quarto-html/quarto-syntax-highlighting.css" rel="stylesheet" id="quarto-text-highlighting-styles">
-<script src="sql_II_files/libs/bootstrap/bootstrap.min.js"></script>
-<link href="sql_II_files/libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
-<link href="sql_II_files/libs/bootstrap/bootstrap.min.css" rel="stylesheet" id="quarto-bootstrap" data-mode="light">
-
-
-</head>
-
-<body>
-
-<div id="quarto-content" class="page-columns page-rows-contents page-layout-full">
-<div id="quarto-margin-sidebar" class="sidebar margin-sidebar">
-  <nav id="TOC" role="doc-toc" class="toc-active">
-    <h2 id="toc-title">SQL II</h2>
-   
-  <ul>
-  <li><a href="#aggregating-with-group-by" id="toc-aggregating-with-group-by" class="nav-link active" data-scroll-target="#aggregating-with-group-by">Aggregating with <code>GROUP BY</code></a></li>
-  <li><a href="#filtering-groups" id="toc-filtering-groups" class="nav-link" data-scroll-target="#filtering-groups">Filtering Groups</a></li>
-  <li><a href="#summary-sql" id="toc-summary-sql" class="nav-link" data-scroll-target="#summary-sql">Summary: SQL</a></li>
-  <li><a href="#eda-in-sql" id="toc-eda-in-sql" class="nav-link" data-scroll-target="#eda-in-sql">EDA in SQL</a>
-  <ul class="collapse">
-  <li><a href="#matching-text-using-like" id="toc-matching-text-using-like" class="nav-link" data-scroll-target="#matching-text-using-like">Matching Text using <code>LIKE</code></a></li>
-  <li><a href="#casting-data-types" id="toc-casting-data-types" class="nav-link" data-scroll-target="#casting-data-types"><code>CAST</code>ing Data Types</a></li>
-  <li><a href="#using-conditional-statements-with-case" id="toc-using-conditional-statements-with-case" class="nav-link" data-scroll-target="#using-conditional-statements-with-case">Using Conditional Statements with <code>CASE</code></a></li>
-  </ul></li>
-  <li><a href="#joining-tables" id="toc-joining-tables" class="nav-link" data-scroll-target="#joining-tables"><code>JOIN</code>ing Tables</a>
-  <ul class="collapse">
-  <li><a href="#aliasing-in-joins" id="toc-aliasing-in-joins" class="nav-link" data-scroll-target="#aliasing-in-joins">Aliasing in <code>JOIN</code>s</a></li>
-  <li><a href="#common-table-expression" id="toc-common-table-expression" class="nav-link" data-scroll-target="#common-table-expression">Common Table Expression</a></li>
-  </ul></li>
-  </ul>
-</nav>
-</div>
-<main class="content column-page-left" id="quarto-document-content">
-
-<header id="title-block-header" class="quarto-title-block default">
-<div class="quarto-title">
-<h1 class="title">SQL II</h1>
-</div>
-
-
-
-<div class="quarto-title-meta column-page-left">
-
-    
-  
-    
-  </div>
-  
-
-
-</header>
-
-
-<div class="callout callout-style-default callout-note no-icon callout-titled">
-<div class="callout-header d-flex align-content-center" data-bs-toggle="collapse" data-bs-target=".callout-1-contents" aria-controls="callout-1" aria-expanded="true" aria-label="Toggle callout">
-<div class="callout-icon-container">
-<i class="callout-icon no-icon"></i>
-</div>
-<div class="callout-title-container flex-fill">
-Learning Outcomes
-</div>
-<div class="callout-btn-toggle d-inline-block border-0 py-1 ps-1 pe-0 float-end"><i class="callout-toggle"></i></div>
-</div>
-<div id="callout-1" class="callout-1-contents callout-collapse collapse show">
-<div class="callout-body-container callout-body">
-<ul>
-<li>Perform aggregations using <code>GROUP BY</code></li>
-<li>Introduce the ability to filter groups</li>
-<li>Perform data cleaning and text manipulation in SQL</li>
-<li>Join data across tables</li>
-</ul>
-</div>
-</div>
-</div>
-<p>In this lecture, we’ll continue our work from last time to introduce some advanced SQL syntax.</p>
-<p>First, let’s load in the <code>basic_examples.db</code> database.</p>
-<div id="cell-2" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}">
-<div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>load_ext sql</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-</div>
-<div id="cell-3" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}">
-<div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> duckdb</span>
-<span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a>conn <span class="op">=</span> duckdb.<span class="ex">connect</span>()</span>
-<span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a>conn.query(<span class="st">"INSTALL sqlite"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-</div>
-<div id="cell-4" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}">
-<div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>sql duckdb:<span class="op">///</span>data<span class="op">/</span>basic_examples.db <span class="op">--</span>alias basic</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-</div>
-<section id="aggregating-with-group-by" class="level2">
-<h2 class="anchored" data-anchor-id="aggregating-with-group-by">Aggregating with <code>GROUP BY</code></h2>
-<p>At this point, we’ve seen that SQL offers much of the same functionality that was given to us by <code>pandas</code>. We can extract data from a table, filter it, and reorder it to suit our needs.</p>
-<p>In <code>pandas</code>, much of our analysis work relied heavily on being able to use <code>.groupby()</code> to aggregate across the rows of our dataset. SQL’s answer to this task is the (very conveniently named) <code>GROUP BY</code> clause. While the outputs of <code>GROUP BY</code> are similar to those of <code>.groupby()</code> —— in both cases, we obtain an output table where some column has been used for grouping —— the syntax and logic used to group data in SQL are fairly different to the <code>pandas</code> implementation.</p>
-<p>To illustrate <code>GROUP BY</code>, we will consider the <code>Dish</code> table from our database.</p>
-<div id="cell-6" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}">
-<div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
-<span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a>SELECT <span class="op">*</span> </span>
-<span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a>FROM Dish<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-</div>
-<p>Notice that there are multiple dishes of the same <code>type</code>. What if we wanted to find the total costs of dishes of a certain <code>type</code>? To accomplish this, we would write the following code.</p>
-<div id="cell-8" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}">
-<div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
-<span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a>SELECT <span class="bu">type</span>, SUM(cost)</span>
-<span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a>FROM Dish</span>
-<span id="cb5-4"><a href="#cb5-4" aria-hidden="true" tabindex="-1"></a>GROUP BY <span class="bu">type</span><span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-</div>
-<p>What is going on here? The statement <code>GROUP BY type</code> tells SQL to group the data based on the value contained in the <code>type</code> column (whether a record is an appetizer, entree, or dessert). <code>SUM(cost)</code> sums up the costs of dishes in each <code>type</code> and displays the result in the output table.</p>
-<p>You may be wondering: why does <code>SUM(cost)</code> come before the command to <code>GROUP BY type</code>? Don’t we need to form groups before we can count the number of entries in each? Remember that SQL is a <em>declarative</em> programming language —— a SQL programmer simply states what end result they would like to see, and leaves the task of figuring out <em>how</em> to obtain this result to SQL itself. This means that SQL queries sometimes don’t follow what a reader sees as a “logical” sequence of thought. Instead, SQL requires that we follow its set order of operations when constructing queries. So long as we follow this order, SQL will handle the underlying logic.</p>
-<p>In practical terms: our goal with this query was to output the total <code>cost</code>s of each <code>type</code>. To communicate this to SQL, we say that we want to <code>SELECT</code> the <code>SUM</code>med <code>cost</code> values for each <code>type</code> group.</p>
-<p>There are many aggregation functions that can be used to aggregate the data contained in each group. Some common examples are:</p>
-<ul>
-<li><code>COUNT</code>: count the number of rows associated with each group</li>
-<li><code>MIN</code>: find the minimum value of each group</li>
-<li><code>MAX</code>: find the maximum value of each group</li>
-<li><code>SUM</code>: sum across all records in each group</li>
-<li><code>AVG</code>: find the average value of each group</li>
-</ul>
-<p>We can easily compute multiple aggregations all at once (a task that was very tricky in <code>pandas</code>).</p>
-<div id="cell-10" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}">
-<div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
-<span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a>SELECT <span class="bu">type</span>, SUM(cost), MIN(cost), MAX(name)</span>
-<span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a>FROM Dish</span>
-<span id="cb6-4"><a href="#cb6-4" aria-hidden="true" tabindex="-1"></a>GROUP BY <span class="bu">type</span><span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-</div>
-<p>To count the number of rows associated with each group, we use the <code>COUNT</code> keyword. Calling <code>COUNT(*)</code> will compute the total number of rows in each group, including rows with null values. Its <code>pandas</code> equivalent is <code>.groupby().size()</code>.</p>
-<p>Recall the <code>Dragon</code> table from the previous lecture:</p>
-<div id="cell-12" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}">
-<div class="sourceCode cell-code" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
-<span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a>SELECT <span class="op">*</span> FROM Dragon<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-</div>
-<p>Notice that <code>COUNT(*)</code> and <code>COUNT(cute)</code> result in different outputs.</p>
-<div id="cell-14" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}">
-<div class="sourceCode cell-code" id="cb8"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
-<span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a>SELECT year, COUNT(<span class="op">*</span>)</span>
-<span id="cb8-3"><a href="#cb8-3" aria-hidden="true" tabindex="-1"></a>FROM Dragon</span>
-<span id="cb8-4"><a href="#cb8-4" aria-hidden="true" tabindex="-1"></a>GROUP BY year<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-</div>
-<div id="cell-15" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}">
-<div class="sourceCode cell-code" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
-<span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a>SELECT year, COUNT(cute)</span>
-<span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a>FROM Dragon</span>
-<span id="cb9-4"><a href="#cb9-4" aria-hidden="true" tabindex="-1"></a>GROUP BY year<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-</div>
-<p>With this definition of <code>GROUP BY</code> in hand, let’s update our SQL order of operations. Remember: <em>every</em> SQL query must list clauses in this order.</p>
-<pre><code>SELECT &lt;column expression list&gt;
-FROM &lt;table&gt;
-[WHERE &lt;predicate&gt;]
-[GROUP BY &lt;column list&gt;]
-[ORDER BY &lt;column list&gt;]
-[LIMIT &lt;number of rows&gt;]
-[OFFSET &lt;number of rows&gt;];</code></pre>
-<p>Note that we can use the <code>AS</code> keyword to rename columns during the selection process and that column expressions may include aggregation functions (<code>MAX</code>, <code>MIN</code>, etc.).</p>
-</section>
-<section id="filtering-groups" class="level2">
-<h2 class="anchored" data-anchor-id="filtering-groups">Filtering Groups</h2>
-<p>Now, what if we only want groups that meet a certain condition? <code>HAVING</code> filters groups by applying some condition across all rows in each group. We interpret it as a way to keep only the groups <code>HAVING</code> some condition. Note the difference between <code>WHERE</code> and <code>HAVING</code>: we use <code>WHERE</code> to filter rows, whereas we use <code>HAVING</code> to filter <em>groups</em>. <code>WHERE</code> precedes <code>HAVING</code> in terms of how SQL executes a query.</p>
-<p>Let’s take a look at the <code>Dish</code> table to see how we can use <code>HAVING</code>. Say we want to group dishes with a cost greater than 4 by <code>type</code> and only keep groups where the max cost is less than 10.</p>
-<div id="cell-17" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}">
-<div class="sourceCode cell-code" id="cb11"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
-<span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a>SELECT <span class="bu">type</span>, COUNT(<span class="op">*</span>)</span>
-<span id="cb11-3"><a href="#cb11-3" aria-hidden="true" tabindex="-1"></a>FROM Dish</span>
-<span id="cb11-4"><a href="#cb11-4" aria-hidden="true" tabindex="-1"></a>WHERE cost <span class="op">&gt;</span> <span class="dv">4</span></span>
-<span id="cb11-5"><a href="#cb11-5" aria-hidden="true" tabindex="-1"></a>GROUP BY <span class="bu">type</span></span>
-<span id="cb11-6"><a href="#cb11-6" aria-hidden="true" tabindex="-1"></a>HAVING MAX(cost) <span class="op">&lt;</span>  <span class="dv">10</span><span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-</div>
-<p>Here, we first use <code>WHERE</code> to filter for rows with a cost greater than 4. We then group our values by <code>type</code> before applying the <code>HAVING</code> operator. With <code>HAVING</code>, we can filter our groups based on if the max cost is less than 10.</p>
-</section>
-<section id="summary-sql" class="level2">
-<h2 class="anchored" data-anchor-id="summary-sql">Summary: SQL</h2>
-<p>With this definition of <code>GROUP BY</code> and <code>HAVING</code> in hand, let’s update our SQL order of operations. Remember: <em>every</em> SQL query must list clauses in this order.</p>
-<pre><code>SELECT &lt;column expression list&gt;
-FROM &lt;table&gt;
-[WHERE &lt;predicate&gt;]
-[GROUP BY &lt;column list&gt;]
-[ORDER BY &lt;column list&gt;]
-[LIMIT &lt;number of rows&gt;]
-[OFFSET &lt;number of rows&gt;];</code></pre>
-<p>Note that we can use the <code>AS</code> keyword to rename columns during the selection process and that column expressions may include aggregation functions (<code>MAX</code>, <code>MIN</code>, etc.).</p>
-</section>
-<section id="eda-in-sql" class="level2">
-<h2 class="anchored" data-anchor-id="eda-in-sql">EDA in SQL</h2>
-<p>In the last lecture, we mostly worked under the assumption that our data had already been cleaned. However, as we saw in our first pass through the data science lifecycle, we’re very unlikely to be given data that is free of formatting issues. With this in mind, we’ll want to learn how to clean and transform data in SQL.</p>
-<p>Our typical workflow when working with “big data” is:</p>
-<ol type="1">
-<li>Use SQL to query data from a database</li>
-<li>Use Python (with <code>pandas</code>) to analyze this data in detail</li>
-</ol>
-<p>We can, however, still perform simple data cleaning and re-structuring using SQL directly. To do so, we’ll use the <code>Title</code> table from the <code>imdb_duck</code> database, which contains information about movies and actors.</p>
-<p>Let’s load in the <code>imdb_duck</code> database.</p>
-<div id="cell-19" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}">
-<div class="sourceCode cell-code" id="cb13"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> os</span>
-<span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a><span class="cf">if</span> os.path.exists(<span class="st">"/home/jovyan/shared/sql/imdb_duck.db"</span>):</span>
-<span id="cb13-3"><a href="#cb13-3" aria-hidden="true" tabindex="-1"></a>    imdbpath <span class="op">=</span> <span class="st">"duckdb:////home/jovyan/shared/sql/imdb_duck.db"</span></span>
-<span id="cb13-4"><a href="#cb13-4" aria-hidden="true" tabindex="-1"></a><span class="cf">elif</span> os.path.exists(<span class="st">"data/imdb_duck.db"</span>):</span>
-<span id="cb13-5"><a href="#cb13-5" aria-hidden="true" tabindex="-1"></a>    imdbpath <span class="op">=</span>  <span class="st">"duckdb:///data/imdb_duck.db"</span></span>
-<span id="cb13-6"><a href="#cb13-6" aria-hidden="true" tabindex="-1"></a><span class="cf">else</span>:</span>
-<span id="cb13-7"><a href="#cb13-7" aria-hidden="true" tabindex="-1"></a>    <span class="im">import</span> gdown</span>
-<span id="cb13-8"><a href="#cb13-8" aria-hidden="true" tabindex="-1"></a>    url <span class="op">=</span> <span class="st">'https://drive.google.com/uc?id=10tKOHGLt9QoOgq5Ii-FhxpB9lDSQgl1O'</span></span>
-<span id="cb13-9"><a href="#cb13-9" aria-hidden="true" tabindex="-1"></a>    output_path <span class="op">=</span> <span class="st">'data/imdb_duck.db'</span></span>
-<span id="cb13-10"><a href="#cb13-10" aria-hidden="true" tabindex="-1"></a>    gdown.download(url, output_path, quiet<span class="op">=</span><span class="va">False</span>)</span>
-<span id="cb13-11"><a href="#cb13-11" aria-hidden="true" tabindex="-1"></a>    imdbpath <span class="op">=</span> <span class="st">"duckdb:///data/imdb_duck.db"</span></span>
-<span id="cb13-12"><a href="#cb13-12" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(imdbpath)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-</div>
-<div id="cell-20" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}">
-<div class="sourceCode cell-code" id="cb14"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> sqlalchemy <span class="im">import</span> create_engine</span>
-<span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a>imdb_engine <span class="op">=</span> create_engine(imdbpath, connect_args<span class="op">=</span>{<span class="st">'read_only'</span>: <span class="va">True</span>})</span>
-<span id="cb14-3"><a href="#cb14-3" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>sql imdb_engine <span class="op">--</span>alias imdb</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-</div>
-<p>Since we’ll be working with the <code>Title</code> table, let’s take a quick look at what it contains.</p>
-<div id="cell-22" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}">
-<div class="sourceCode cell-code" id="cb15"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql imdb </span>
-<span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a>    </span>
-<span id="cb15-3"><a href="#cb15-3" aria-hidden="true" tabindex="-1"></a>SELECT <span class="op">*</span></span>
-<span id="cb15-4"><a href="#cb15-4" aria-hidden="true" tabindex="-1"></a>FROM Title</span>
-<span id="cb15-5"><a href="#cb15-5" aria-hidden="true" tabindex="-1"></a>WHERE primaryTitle IN (<span class="st">'Ginny &amp; Georgia'</span>, <span class="st">'What If...?'</span>, <span class="st">'Succession'</span>, <span class="st">'Veep'</span>, <span class="st">'Tenet'</span>)</span>
-<span id="cb15-6"><a href="#cb15-6" aria-hidden="true" tabindex="-1"></a>LIMIT <span class="dv">10</span><span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-</div>
-<section id="matching-text-using-like" class="level3">
-<h3 class="anchored" data-anchor-id="matching-text-using-like">Matching Text using <code>LIKE</code></h3>
-<p>One common task we encountered in our first look at EDA was needing to match string data. For example, we might want to remove entries beginning with the same prefix as part of the data cleaning process.</p>
-<p>In SQL, we use the <code>LIKE</code> operator to (you guessed it) look for strings that are <em>like</em> a given string pattern.</p>
-<div id="cell-24" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}">
-<div class="sourceCode cell-code" id="cb16"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
-<span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a>SELECT titleType, primaryTitle</span>
-<span id="cb16-3"><a href="#cb16-3" aria-hidden="true" tabindex="-1"></a>FROM Title</span>
-<span id="cb16-4"><a href="#cb16-4" aria-hidden="true" tabindex="-1"></a>WHERE primaryTitle LIKE <span class="st">'Star Wars: Episode I - The Phantom Menace'</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-</div>
-<p>What if we wanted to find <em>all</em> Star Wars movies? <code>%</code> is the wildcard operator, it means “look for any character, any number of times”. This makes it helpful for identifying strings that are similar to our desired pattern, even when we don’t know the full text of what we aim to extract.</p>
-<div id="cell-26" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}">
-<div class="sourceCode cell-code" id="cb17"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
-<span id="cb17-2"><a href="#cb17-2" aria-hidden="true" tabindex="-1"></a>SELECT titleType, primaryTitle</span>
-<span id="cb17-3"><a href="#cb17-3" aria-hidden="true" tabindex="-1"></a>FROM Title</span>
-<span id="cb17-4"><a href="#cb17-4" aria-hidden="true" tabindex="-1"></a>WHERE primaryTitle LIKE <span class="st">'%Star Wars%'</span></span>
-<span id="cb17-5"><a href="#cb17-5" aria-hidden="true" tabindex="-1"></a>LIMIT <span class="dv">10</span><span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-</div>
-<p>Alternatively, we can use RegEx! DuckDB and most real DBMSs allow for this. Note that here, we have to use the <code>SIMILAR TO</code> operater rather than <code>LIKE</code>.</p>
-<div id="cell-28" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}">
-<div class="sourceCode cell-code" id="cb18"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
-<span id="cb18-2"><a href="#cb18-2" aria-hidden="true" tabindex="-1"></a>SELECT titleType, primaryTitle</span>
-<span id="cb18-3"><a href="#cb18-3" aria-hidden="true" tabindex="-1"></a>FROM Title</span>
-<span id="cb18-4"><a href="#cb18-4" aria-hidden="true" tabindex="-1"></a>WHERE primaryTitle SIMILAR TO <span class="st">'.*Star Wars*.'</span></span>
-<span id="cb18-5"><a href="#cb18-5" aria-hidden="true" tabindex="-1"></a>LIMIT <span class="dv">10</span><span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-</div>
-</section>
-<section id="casting-data-types" class="level3">
-<h3 class="anchored" data-anchor-id="casting-data-types"><code>CAST</code>ing Data Types</h3>
-<p>A common data cleaning task is converting data to the correct variable type. The <code>CAST</code> keyword is used to generate a new output column. Each entry in this output column is the result of converting the data in an existing column to a new data type. For example, we may wish to convert numeric data stored as a string to an integer.</p>
-<div id="cell-30" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}">
-<div class="sourceCode cell-code" id="cb19"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
-<span id="cb19-2"><a href="#cb19-2" aria-hidden="true" tabindex="-1"></a>SELECT primaryTitle, CAST(runtimeMinutes AS INT)</span>
-<span id="cb19-3"><a href="#cb19-3" aria-hidden="true" tabindex="-1"></a>FROM Title<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-</div>
-<p>We use <code>CAST</code> when <code>SELECT</code>ing colunns for our output table. In the example above, we want to <code>SELECT</code> the columns of integer year and runtime data that is created by the <code>CAST</code>.</p>
-<p>SQL will automatically name a new column according to the command used to <code>SELECT</code> it, which can lead to unwieldy column names. We can rename the <code>CAST</code>ed column using the <code>AS</code> keyword.</p>
-<div id="cell-32" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}">
-<div class="sourceCode cell-code" id="cb20"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
-<span id="cb20-2"><a href="#cb20-2" aria-hidden="true" tabindex="-1"></a>SELECT primaryTitle AS title, CAST(runtimeMinutes AS INT) AS minutes, CAST(startYear AS INT) AS year</span>
-<span id="cb20-3"><a href="#cb20-3" aria-hidden="true" tabindex="-1"></a>FROM Title</span>
-<span id="cb20-4"><a href="#cb20-4" aria-hidden="true" tabindex="-1"></a>LIMIT <span class="dv">5</span><span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-</div>
-</section>
-<section id="using-conditional-statements-with-case" class="level3">
-<h3 class="anchored" data-anchor-id="using-conditional-statements-with-case">Using Conditional Statements with <code>CASE</code></h3>
-<p>When working with <code>pandas</code>, we often ran into situations where we wanted to generate new columns using some form of conditional statement. For example, say we wanted to describe a film title as “old,” “mid-aged,” or “new,” depending on the year of its release.</p>
-<p>In SQL, conditional operations are performed using a <code>CASE</code> clause. Conceptually, <code>CASE</code> behaves much like the <code>CAST</code> operation: it creates a new column that we can then <code>SELECT</code> to appear in the output. The syntax for a <code>CASE</code> clause is as follows:</p>
-<pre><code>CASE WHEN &lt;condition&gt; THEN &lt;value&gt;
-     WHEN &lt;other condition&gt; THEN &lt;other value&gt;
-     ...
-     ELSE &lt;yet another value&gt;
-     END</code></pre>
-<p>Scanning through the skeleton code above, you can see that the logic is similar to that of an <code>if</code> statement in Python. The conditional statement is first opened by calling <code>CASE</code>. Each new condition is specified by <code>WHEN</code>, with <code>THEN</code> indicating what value should be filled if the condition is met. <code>ELSE</code> specifies the value that should be filled if no other conditions are met. Lastly, <code>END</code> indicates the end of the conditional statement; once <code>END</code> has been called, SQL will continue evaluating the query as usual.</p>
-<p>Let’s see this in action. In the example below, we give the new column created by the <code>CASE</code> statement the name <code>movie_age</code>.</p>
-<div id="cell-34" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}">
-<div class="sourceCode cell-code" id="cb22"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
-<span id="cb22-2"><a href="#cb22-2" aria-hidden="true" tabindex="-1"></a><span class="op">/*</span> If a movie was filmed before <span class="dv">1950</span>, it <span class="kw">is</span> <span class="st">"old"</span></span>
-<span id="cb22-3"><a href="#cb22-3" aria-hidden="true" tabindex="-1"></a>Otherwise, <span class="cf">if</span> a movie was filmed before <span class="dv">2000</span>, it <span class="kw">is</span> <span class="st">"mid-aged"</span></span>
-<span id="cb22-4"><a href="#cb22-4" aria-hidden="true" tabindex="-1"></a>Else, a movie <span class="kw">is</span> <span class="st">"new"</span> <span class="op">*/</span></span>
-<span id="cb22-5"><a href="#cb22-5" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb22-6"><a href="#cb22-6" aria-hidden="true" tabindex="-1"></a>SELECT titleType, startYear,</span>
-<span id="cb22-7"><a href="#cb22-7" aria-hidden="true" tabindex="-1"></a>CASE WHEN startYear <span class="op">&lt;</span> <span class="dv">1950</span> THEN <span class="st">'old'</span></span>
-<span id="cb22-8"><a href="#cb22-8" aria-hidden="true" tabindex="-1"></a>     WHEN startYear <span class="op">&lt;</span> <span class="dv">2000</span> THEN <span class="st">'mid-aged'</span></span>
-<span id="cb22-9"><a href="#cb22-9" aria-hidden="true" tabindex="-1"></a>     ELSE <span class="st">'new'</span></span>
-<span id="cb22-10"><a href="#cb22-10" aria-hidden="true" tabindex="-1"></a>     END AS movie_age</span>
-<span id="cb22-11"><a href="#cb22-11" aria-hidden="true" tabindex="-1"></a>FROM Title<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-</div>
-</section>
-</section>
-<section id="joining-tables" class="level2">
-<h2 class="anchored" data-anchor-id="joining-tables"><code>JOIN</code>ing Tables</h2>
-<p>At this point, we’re well-versed in using SQL as a tool to clean, manipulate, and transform data in a table. Notice that this sentence referred to one <em>table</em>, specifically. What happens if the data we need is distributed across multiple tables? This is an important consideration when using SQL —— recall that we first introduced SQL as a language to query from databases. Databases often store data in a multidimensional structure. In other words, information is stored across several tables, with each table containing a small subset of all the data housed by the database.</p>
-<p>A common way of organizing a database is by using a <strong>star schema</strong>. A star schema is composed of two types of tables. A <strong>fact table</strong> is the central table of the database —— it contains the information needed to link entries across several <strong>dimension tables</strong>, which contain more detailed information about the data.</p>
-<p>Say we were working with a database about boba offerings in Berkeley. The dimension tables of the database might contain information about tea varieties and boba toppings. The fact table would be used to link this information across the various dimension tables.</p>
-<div style="text-align: center;">
-<p><img src="images/multidimensional.png" alt="multidimensional" width="850"></p>
-</div>
-<p>If we explicitly mark the relationships between tables, we start to see the star-like structure of the star schema.</p>
-<div style="text-align: center;">
-<p><img src="images/star.png" alt="star" width="650"></p>
-</div>
-<p>To join data across multiple tables, we’ll use the (creatively named) <code>JOIN</code> keyword. We’ll make things easier for now by first considering the simpler <code>cats</code> dataset, which consists of the tables <code>s</code> and <code>t</code>.</p>
-<div style="text-align: center;">
-<p><img src="images/cats.png" alt="cats" width="500"></p>
-</div>
-<p>To perform a join, we amend the <code>FROM</code> clause. You can think of this as saying, “<code>SELECT</code> my data <code>FROM</code> tables that have been <code>JOIN</code>ed together.”</p>
-<p>Remember: SQL does not consider newlines or whitespace when interpreting queries. The indentation given in the example below is to help improve readability. If you wish, you can write code that does not follow this formatting.</p>
-<pre><code>SELECT &lt;column list&gt;
-FROM table_1 
-    JOIN table_2 
-    ON key_1 = key_2;</code></pre>
-<p>We also need to specify what column from each table should be used to determine matching entries. By defining these keys, we provide SQL with the information it needs to pair rows of data together.</p>
-<p>The most commonly used type of SQL <code>JOIN</code> is the <strong>inner join</strong>. It turns out you’re already familiar with what an inner join does, and how it works – this is the type of join we’ve been using in <code>pandas</code> all along! In an inner join, we combine every row in our first table with its matching entry in the second table. If a row from either table does not have a match in the other table, it is omitted from the output.</p>
-<div style="text-align: center;">
-<p><img src="images/inner.png" alt="inner" width="800"></p>
-</div>
-<p>In a <strong>cross join</strong>, <em>all</em> possible combinations of rows appear in the output table, regardless of whether or not rows share a matching key. Because all rows are joined, even if there is no matching key, it is not necessary to specify what keys to consider in an <code>ON</code> statement. A cross join is also known as a cartesian product.</p>
-<div style="text-align: center;">
-<p><img src="images/cross.png" alt="cross" width="800"></p>
-</div>
-<p>Conceptually, we can interpret an inner join as a cross join, followed by removing all rows that do not share a matching key. Notice that the output of the inner join above contains all rows of the cross join example that contain a single color across the entire row.</p>
-<p>In a <strong>left outer join</strong>, <em>all</em> rows in the left table are kept in the output table. If a row in the right table shares a match with the left table, this row will be kept; otherwise, the rows in the right table are omitted from the output. We can fill in any missing values with <code>NULL</code>.</p>
-<div style="text-align: center;">
-<p><img src="images/left.png" alt="left" width="800"></p>
-</div>
-<p>A <strong>right outer join</strong> keeps all rows in the right table. Rows in the left table are only kept if they share a match in the right table. Again, we can fill in any missing values with <code>NULL</code>.</p>
-<div style="text-align: center;">
-<p><img src="images/right.png" alt="right" width="800"></p>
-</div>
-<p>In a <strong>full outer join</strong>, all rows that have a match between the two tables are joined together. If a row has no match in the second table, then the values of the columns for that second table are filled with <code>NULL</code>. In other words, a full outer join performs an inner join <em>while still keeping</em> rows that have no match in the other table. This is best understood visually:</p>
-<div style="text-align: center;">
-<p><img src="images/full.png" alt="full" width="800"></p>
-</div>
-<p>We have kept the same output achieved using an inner join, with the addition of partially null rows for entries in <code>s</code> and <code>t</code> that had no match in the second table.</p>
-<section id="aliasing-in-joins" class="level3">
-<h3 class="anchored" data-anchor-id="aliasing-in-joins">Aliasing in <code>JOIN</code>s</h3>
-<p>When joining tables, we often create aliases for table names (similarly to what we did with column names in the last lecture). We do this as it is typically easier to refer to aliases, especially when we are working with long table names. We can even reference columns using aliased table names.</p>
-<p>Let’s say we want to determine the average rating of various movies:</p>
-<div id="cell-36" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}">
-<div class="sourceCode cell-code" id="cb24"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
-<span id="cb24-2"><a href="#cb24-2" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb24-3"><a href="#cb24-3" aria-hidden="true" tabindex="-1"></a>SELECT primaryTitle, averageRating</span>
-<span id="cb24-4"><a href="#cb24-4" aria-hidden="true" tabindex="-1"></a>FROM Title AS T INNER JOIN Rating AS R</span>
-<span id="cb24-5"><a href="#cb24-5" aria-hidden="true" tabindex="-1"></a>ON T.tconst <span class="op">=</span> R.tconst<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-</div>
-<p>Note that the <code>AS</code> is actually optional! We can create aliases for our tables even without it, but we usually include it for clarity.</p>
-<div id="cell-38" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}">
-<div class="sourceCode cell-code" id="cb25"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb25-1"><a href="#cb25-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
-<span id="cb25-2"><a href="#cb25-2" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb25-3"><a href="#cb25-3" aria-hidden="true" tabindex="-1"></a>SELECT primaryTitle, averageRating</span>
-<span id="cb25-4"><a href="#cb25-4" aria-hidden="true" tabindex="-1"></a>FROM Title T INNER JOIN Rating R</span>
-<span id="cb25-5"><a href="#cb25-5" aria-hidden="true" tabindex="-1"></a>ON T.tconst <span class="op">=</span> R.tconst<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-</div>
-</section>
-<section id="common-table-expression" class="level3">
-<h3 class="anchored" data-anchor-id="common-table-expression">Common Table Expression</h3>
-<p>For more sophisticated data problems, the queries can become very complex. Common Table Expressions allow us to break down these complex queries into more manageable parts. This involves creating temporary tables which correspond to different aspects of the problem and then referencing them in the final query. The following format is an example of how we can create two temporary tables and then use them for further querying:</p>
-<pre><code>WITH 
-table_name1 AS ( 
-    SELECT ...
-),
-table_name2 AS ( 
-    SELECT ...
-)
-SELECT ... 
-FROM 
-table_name1, 
-table_name2, ...</code></pre>
-<p>Let’s say we want to identify the top 10 action movies that are highly rated (with an average rating greater than 7) and popular (having more than 5000 votes), along with the primary actors who are the most popular, we can use Common Table Expression to break this query down into separate problems. Initially, we can filter to find good action movies and prolific actors separately. This way, in our final join, we only need to change the order.</p>
-<div id="cell-40" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}">
-<div class="sourceCode cell-code" id="cb27"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb27-1"><a href="#cb27-1" aria-hidden="true" tabindex="-1"></a><span class="op">%%</span>sql</span>
-<span id="cb27-2"><a href="#cb27-2" aria-hidden="true" tabindex="-1"></a>WITH </span>
-<span id="cb27-3"><a href="#cb27-3" aria-hidden="true" tabindex="-1"></a>good_action_movies AS (</span>
-<span id="cb27-4"><a href="#cb27-4" aria-hidden="true" tabindex="-1"></a>    SELECT <span class="op">*</span></span>
-<span id="cb27-5"><a href="#cb27-5" aria-hidden="true" tabindex="-1"></a>    FROM Title T JOIN Rating R ON T.tconst <span class="op">=</span> R.tconst  </span>
-<span id="cb27-6"><a href="#cb27-6" aria-hidden="true" tabindex="-1"></a>    WHERE genres LIKE <span class="st">'%Action%'</span> AND averageRating <span class="op">&gt;</span> <span class="dv">7</span> AND numVotes <span class="op">&gt;</span> <span class="dv">5000</span></span>
-<span id="cb27-7"><a href="#cb27-7" aria-hidden="true" tabindex="-1"></a>),</span>
-<span id="cb27-8"><a href="#cb27-8" aria-hidden="true" tabindex="-1"></a>prolific_actors AS (</span>
-<span id="cb27-9"><a href="#cb27-9" aria-hidden="true" tabindex="-1"></a>    SELECT N.nconst, primaryName, COUNT(<span class="op">*</span>) <span class="im">as</span> numRoles</span>
-<span id="cb27-10"><a href="#cb27-10" aria-hidden="true" tabindex="-1"></a>    FROM Name N JOIN Principal P ON N.nconst <span class="op">=</span> P.nconst</span>
-<span id="cb27-11"><a href="#cb27-11" aria-hidden="true" tabindex="-1"></a>    WHERE category <span class="op">=</span> <span class="st">'actor'</span></span>
-<span id="cb27-12"><a href="#cb27-12" aria-hidden="true" tabindex="-1"></a>    GROUP BY N.nconst, primaryName</span>
-<span id="cb27-13"><a href="#cb27-13" aria-hidden="true" tabindex="-1"></a>)</span>
-<span id="cb27-14"><a href="#cb27-14" aria-hidden="true" tabindex="-1"></a>SELECT primaryTitle, primaryName, numRoles, ROUND(averageRating) AS rating</span>
-<span id="cb27-15"><a href="#cb27-15" aria-hidden="true" tabindex="-1"></a>FROM good_action_movies m, prolific_actors a, principal p</span>
-<span id="cb27-16"><a href="#cb27-16" aria-hidden="true" tabindex="-1"></a>WHERE p.tconst <span class="op">=</span> m.tconst AND p.nconst <span class="op">=</span> a.nconst</span>
-<span id="cb27-17"><a href="#cb27-17" aria-hidden="true" tabindex="-1"></a>ORDER BY rating DESC, numRoles DESC</span>
-<span id="cb27-18"><a href="#cb27-18" aria-hidden="true" tabindex="-1"></a>LIMIT <span class="dv">10</span><span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-</div>
-</section>
-</section>
-
-</main>
-<!-- /main column -->
-<script id="quarto-html-after-body" type="application/javascript">
-window.document.addEventListener("DOMContentLoaded", function (event) {
-  const toggleBodyColorMode = (bsSheetEl) => {
-    const mode = bsSheetEl.getAttribute("data-mode");
-    const bodyEl = window.document.querySelector("body");
-    if (mode === "dark") {
-      bodyEl.classList.add("quarto-dark");
-      bodyEl.classList.remove("quarto-light");
-    } else {
-      bodyEl.classList.add("quarto-light");
-      bodyEl.classList.remove("quarto-dark");
-    }
-  }
-  const toggleBodyColorPrimary = () => {
-    const bsSheetEl = window.document.querySelector("link#quarto-bootstrap");
-    if (bsSheetEl) {
-      toggleBodyColorMode(bsSheetEl);
-    }
-  }
-  toggleBodyColorPrimary();  
-  const icon = "";
-  const anchorJS = new window.AnchorJS();
-  anchorJS.options = {
-    placement: 'right',
-    icon: icon
-  };
-  anchorJS.add('.anchored');
-  const isCodeAnnotation = (el) => {
-    for (const clz of el.classList) {
-      if (clz.startsWith('code-annotation-')) {                     
-        return true;
-      }
-    }
-    return false;
-  }
-  const clipboard = new window.ClipboardJS('.code-copy-button', {
-    text: function(trigger) {
-      const codeEl = trigger.previousElementSibling.cloneNode(true);
-      for (const childEl of codeEl.children) {
-        if (isCodeAnnotation(childEl)) {
-          childEl.remove();
-        }
-      }
-      return codeEl.innerText;
-    }
-  });
-  clipboard.on('success', function(e) {
-    // button target
-    const button = e.trigger;
-    // don't keep focus
-    button.blur();
-    // flash "checked"
-    button.classList.add('code-copy-button-checked');
-    var currentTitle = button.getAttribute("title");
-    button.setAttribute("title", "Copied!");
-    let tooltip;
-    if (window.bootstrap) {
-      button.setAttribute("data-bs-toggle", "tooltip");
-      button.setAttribute("data-bs-placement", "left");
-      button.setAttribute("data-bs-title", "Copied!");
-      tooltip = new bootstrap.Tooltip(button, 
-        { trigger: "manual", 
-          customClass: "code-copy-button-tooltip",
-          offset: [0, -8]});
-      tooltip.show();    
-    }
-    setTimeout(function() {
-      if (tooltip) {
-        tooltip.hide();
-        button.removeAttribute("data-bs-title");
-        button.removeAttribute("data-bs-toggle");
-        button.removeAttribute("data-bs-placement");
-      }
-      button.setAttribute("title", currentTitle);
-      button.classList.remove('code-copy-button-checked');
-    }, 1000);
-    // clear code selection
-    e.clearSelection();
-  });
-  const viewSource = window.document.getElementById('quarto-view-source') ||
-                     window.document.getElementById('quarto-code-tools-source');
-  if (viewSource) {
-    const sourceUrl = viewSource.getAttribute("data-quarto-source-url");
-    viewSource.addEventListener("click", function(e) {
-      if (sourceUrl) {
-        // rstudio viewer pane
-        if (/\bcapabilities=\b/.test(window.location)) {
-          window.open(sourceUrl);
-        } else {
-          window.location.href = sourceUrl;
-        }
-      } else {
-        const modal = new bootstrap.Modal(document.getElementById('quarto-embedded-source-code-modal'));
-        modal.show();
-      }
-      return false;
-    });
-  }
-  function toggleCodeHandler(show) {
-    return function(e) {
-      const detailsSrc = window.document.querySelectorAll(".cell > details > .sourceCode");
-      for (let i=0; i<detailsSrc.length; i++) {
-        const details = detailsSrc[i].parentElement;
-        if (show) {
-          details.open = true;
-        } else {
-          details.removeAttribute("open");
-        }
-      }
-      const cellCodeDivs = window.document.querySelectorAll(".cell > .sourceCode");
-      const fromCls = show ? "hidden" : "unhidden";
-      const toCls = show ? "unhidden" : "hidden";
-      for (let i=0; i<cellCodeDivs.length; i++) {
-        const codeDiv = cellCodeDivs[i];
-        if (codeDiv.classList.contains(fromCls)) {
-          codeDiv.classList.remove(fromCls);
-          codeDiv.classList.add(toCls);
-        } 
-      }
-      return false;
-    }
-  }
-  const hideAllCode = window.document.getElementById("quarto-hide-all-code");
-  if (hideAllCode) {
-    hideAllCode.addEventListener("click", toggleCodeHandler(false));
-  }
-  const showAllCode = window.document.getElementById("quarto-show-all-code");
-  if (showAllCode) {
-    showAllCode.addEventListener("click", toggleCodeHandler(true));
-  }
-    var localhostRegex = new RegExp(/^(?:http|https):\/\/localhost\:?[0-9]*\//);
-    var mailtoRegex = new RegExp(/^mailto:/);
-      var filterRegex = new RegExp('/' + window.location.host + '/');
-    var isInternal = (href) => {
-        return filterRegex.test(href) || localhostRegex.test(href) || mailtoRegex.test(href);
-    }
-    // Inspect non-navigation links and adorn them if external
- 	var links = window.document.querySelectorAll('a[href]:not(.nav-link):not(.navbar-brand):not(.toc-action):not(.sidebar-link):not(.sidebar-item-toggle):not(.pagination-link):not(.no-external):not([aria-hidden]):not(.dropdown-item):not(.quarto-navigation-tool)');
-    for (var i=0; i<links.length; i++) {
-      const link = links[i];
-      if (!isInternal(link.href)) {
-        // undo the damage that might have been done by quarto-nav.js in the case of
-        // links that we want to consider external
-        if (link.dataset.originalHref !== undefined) {
-          link.href = link.dataset.originalHref;
-        }
-      }
-    }
-  function tippyHover(el, contentFn, onTriggerFn, onUntriggerFn) {
-    const config = {
-      allowHTML: true,
-      maxWidth: 500,
-      delay: 100,
-      arrow: false,
-      appendTo: function(el) {
-          return el.parentElement;
-      },
-      interactive: true,
-      interactiveBorder: 10,
-      theme: 'quarto',
-      placement: 'bottom-start',
-    };
-    if (contentFn) {
-      config.content = contentFn;
-    }
-    if (onTriggerFn) {
-      config.onTrigger = onTriggerFn;
-    }
-    if (onUntriggerFn) {
-      config.onUntrigger = onUntriggerFn;
-    }
-    window.tippy(el, config); 
-  }
-  const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
-  for (var i=0; i<noterefs.length; i++) {
-    const ref = noterefs[i];
-    tippyHover(ref, function() {
-      // use id or data attribute instead here
-      let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
-      try { href = new URL(href).hash; } catch {}
-      const id = href.replace(/^#\/?/, "");
-      const note = window.document.getElementById(id);
-      if (note) {
-        return note.innerHTML;
-      } else {
-        return "";
-      }
-    });
-  }
-  const xrefs = window.document.querySelectorAll('a.quarto-xref');
-  const processXRef = (id, note) => {
-    // Strip column container classes
-    const stripColumnClz = (el) => {
-      el.classList.remove("page-full", "page-columns");
-      if (el.children) {
-        for (const child of el.children) {
-          stripColumnClz(child);
-        }
-      }
-    }
-    stripColumnClz(note)
-    if (id === null || id.startsWith('sec-')) {
-      // Special case sections, only their first couple elements
-      const container = document.createElement("div");
-      if (note.children && note.children.length > 2) {
-        container.appendChild(note.children[0].cloneNode(true));
-        for (let i = 1; i < note.children.length; i++) {
-          const child = note.children[i];
-          if (child.tagName === "P" && child.innerText === "") {
-            continue;
-          } else {
-            container.appendChild(child.cloneNode(true));
-            break;
-          }
-        }
-        if (window.Quarto?.typesetMath) {
-          window.Quarto.typesetMath(container);
-        }
-        return container.innerHTML
-      } else {
-        if (window.Quarto?.typesetMath) {
-          window.Quarto.typesetMath(note);
-        }
-        return note.innerHTML;
-      }
-    } else {
-      // Remove any anchor links if they are present
-      const anchorLink = note.querySelector('a.anchorjs-link');
-      if (anchorLink) {
-        anchorLink.remove();
-      }
-      if (window.Quarto?.typesetMath) {
-        window.Quarto.typesetMath(note);
-      }
-      // TODO in 1.5, we should make sure this works without a callout special case
-      if (note.classList.contains("callout")) {
-        return note.outerHTML;
-      } else {
-        return note.innerHTML;
-      }
-    }
-  }
-  for (var i=0; i<xrefs.length; i++) {
-    const xref = xrefs[i];
-    tippyHover(xref, undefined, function(instance) {
-      instance.disable();
-      let url = xref.getAttribute('href');
-      let hash = undefined; 
-      if (url.startsWith('#')) {
-        hash = url;
-      } else {
-        try { hash = new URL(url).hash; } catch {}
-      }
-      if (hash) {
-        const id = hash.replace(/^#\/?/, "");
-        const note = window.document.getElementById(id);
-        if (note !== null) {
-          try {
-            const html = processXRef(id, note.cloneNode(true));
-            instance.setContent(html);
-          } finally {
-            instance.enable();
-            instance.show();
-          }
-        } else {
-          // See if we can fetch this
-          fetch(url.split('#')[0])
-          .then(res => res.text())
-          .then(html => {
-            const parser = new DOMParser();
-            const htmlDoc = parser.parseFromString(html, "text/html");
-            const note = htmlDoc.getElementById(id);
-            if (note !== null) {
-              const html = processXRef(id, note);
-              instance.setContent(html);
-            } 
-          }).finally(() => {
-            instance.enable();
-            instance.show();
-          });
-        }
-      } else {
-        // See if we can fetch a full url (with no hash to target)
-        // This is a special case and we should probably do some content thinning / targeting
-        fetch(url)
-        .then(res => res.text())
-        .then(html => {
-          const parser = new DOMParser();
-          const htmlDoc = parser.parseFromString(html, "text/html");
-          const note = htmlDoc.querySelector('main.content');
-          if (note !== null) {
-            // This should only happen for chapter cross references
-            // (since there is no id in the URL)
-            // remove the first header
-            if (note.children.length > 0 && note.children[0].tagName === "HEADER") {
-              note.children[0].remove();
-            }
-            const html = processXRef(null, note);
-            instance.setContent(html);
-          } 
-        }).finally(() => {
-          instance.enable();
-          instance.show();
-        });
-      }
-    }, function(instance) {
-    });
-  }
-      let selectedAnnoteEl;
-      const selectorForAnnotation = ( cell, annotation) => {
-        let cellAttr = 'data-code-cell="' + cell + '"';
-        let lineAttr = 'data-code-annotation="' +  annotation + '"';
-        const selector = 'span[' + cellAttr + '][' + lineAttr + ']';
-        return selector;
-      }
-      const selectCodeLines = (annoteEl) => {
-        const doc = window.document;
-        const targetCell = annoteEl.getAttribute("data-target-cell");
-        const targetAnnotation = annoteEl.getAttribute("data-target-annotation");
-        const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation));
-        const lines = annoteSpan.getAttribute("data-code-lines").split(",");
-        const lineIds = lines.map((line) => {
-          return targetCell + "-" + line;
-        })
-        let top = null;
-        let height = null;
-        let parent = null;
-        if (lineIds.length > 0) {
-            //compute the position of the single el (top and bottom and make a div)
-            const el = window.document.getElementById(lineIds[0]);
-            top = el.offsetTop;
-            height = el.offsetHeight;
-            parent = el.parentElement.parentElement;
-          if (lineIds.length > 1) {
-            const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]);
-            const bottom = lastEl.offsetTop + lastEl.offsetHeight;
-            height = bottom - top;
-          }
-          if (top !== null && height !== null && parent !== null) {
-            // cook up a div (if necessary) and position it 
-            let div = window.document.getElementById("code-annotation-line-highlight");
-            if (div === null) {
-              div = window.document.createElement("div");
-              div.setAttribute("id", "code-annotation-line-highlight");
-              div.style.position = 'absolute';
-              parent.appendChild(div);
-            }
-            div.style.top = top - 2 + "px";
-            div.style.height = height + 4 + "px";
-            div.style.left = 0;
-            let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter");
-            if (gutterDiv === null) {
-              gutterDiv = window.document.createElement("div");
-              gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter");
-              gutterDiv.style.position = 'absolute';
-              const codeCell = window.document.getElementById(targetCell);
-              const gutter = codeCell.querySelector('.code-annotation-gutter');
-              gutter.appendChild(gutterDiv);
-            }
-            gutterDiv.style.top = top - 2 + "px";
-            gutterDiv.style.height = height + 4 + "px";
-          }
-          selectedAnnoteEl = annoteEl;
-        }
-      };
-      const unselectCodeLines = () => {
-        const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"];
-        elementsIds.forEach((elId) => {
-          const div = window.document.getElementById(elId);
-          if (div) {
-            div.remove();
-          }
-        });
-        selectedAnnoteEl = undefined;
-      };
-        // Handle positioning of the toggle
-    window.addEventListener(
-      "resize",
-      throttle(() => {
-        elRect = undefined;
-        if (selectedAnnoteEl) {
-          selectCodeLines(selectedAnnoteEl);
-        }
-      }, 10)
-    );
-    function throttle(fn, ms) {
-    let throttle = false;
-    let timer;
-      return (...args) => {
-        if(!throttle) { // first call gets through
-            fn.apply(this, args);
-            throttle = true;
-        } else { // all the others get throttled
-            if(timer) clearTimeout(timer); // cancel #2
-            timer = setTimeout(() => {
-              fn.apply(this, args);
-              timer = throttle = false;
-            }, ms);
-        }
-      };
-    }
-      // Attach click handler to the DT
-      const annoteDls = window.document.querySelectorAll('dt[data-target-cell]');
-      for (const annoteDlNode of annoteDls) {
-        annoteDlNode.addEventListener('click', (event) => {
-          const clickedEl = event.target;
-          if (clickedEl !== selectedAnnoteEl) {
-            unselectCodeLines();
-            const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active');
-            if (activeEl) {
-              activeEl.classList.remove('code-annotation-active');
-            }
-            selectCodeLines(clickedEl);
-            clickedEl.classList.add('code-annotation-active');
-          } else {
-            // Unselect the line
-            unselectCodeLines();
-            clickedEl.classList.remove('code-annotation-active');
-          }
-        });
-      }
-  const findCites = (el) => {
-    const parentEl = el.parentElement;
-    if (parentEl) {
-      const cites = parentEl.dataset.cites;
-      if (cites) {
-        return {
-          el,
-          cites: cites.split(' ')
-        };
-      } else {
-        return findCites(el.parentElement)
-      }
-    } else {
-      return undefined;
-    }
-  };
-  var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
-  for (var i=0; i<bibliorefs.length; i++) {
-    const ref = bibliorefs[i];
-    const citeInfo = findCites(ref);
-    if (citeInfo) {
-      tippyHover(citeInfo.el, function() {
-        var popup = window.document.createElement('div');
-        citeInfo.cites.forEach(function(cite) {
-          var citeDiv = window.document.createElement('div');
-          citeDiv.classList.add('hanging-indent');
-          citeDiv.classList.add('csl-entry');
-          var biblioDiv = window.document.getElementById('ref-' + cite);
-          if (biblioDiv) {
-            citeDiv.innerHTML = biblioDiv.innerHTML;
-          }
-          popup.appendChild(citeDiv);
-        });
-        return popup.innerHTML;
-      });
-    }
-  }
-});
-</script>
-</div> <!-- /content -->
-
-
-
-
-</body></html>
\ No newline at end of file
diff --git a/sql_II/sql_II.ipynb b/sql_II/sql_II.ipynb
deleted file mode 100644
index d9a6a4828..000000000
--- a/sql_II/sql_II.ipynb
+++ /dev/null
@@ -1,771 +0,0 @@
-{
-  "cells": [
-    {
-      "cell_type": "raw",
-      "metadata": {},
-      "source": [
-        "---\n",
-        "title: SQL II\n",
-        "execute:\n",
-        "  echo: true\n",
-        "format:\n",
-        "  html:\n",
-        "    code-fold: false\n",
-        "    code-tools: true\n",
-        "    toc: true\n",
-        "    toc-title: SQL II\n",
-        "    page-layout: full\n",
-        "    theme:\n",
-        "      - cosmo\n",
-        "      - cerulean\n",
-        "    callout-icon: false\n",
-        "---"
-      ],
-      "id": "04e68af4"
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "::: {.callout-note collapse=\"false\"}\n",
-        "## Learning Outcomes\n",
-        "* Perform aggregations using `GROUP BY`\n",
-        "* Introduce the ability to filter groups\n",
-        "* Perform data cleaning and text manipulation in SQL\n",
-        "* Join data across tables\n",
-        ":::\n",
-        "\n",
-        "In this lecture, we'll continue our work from last time to introduce some advanced SQL syntax. \n",
-        "\n",
-        "First, let's load in the `basic_examples.db` database.\n"
-      ],
-      "id": "2cdf73bf"
-    },
-    {
-      "cell_type": "code",
-      "metadata": {},
-      "source": [
-        "#| code-fold: true\n",
-        "# Load the SQL Alchemy Python library and DuckDB\n",
-        "import sqlalchemy\n",
-        "import duckdb"
-      ],
-      "id": "def2bb4b",
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "vscode": {
-          "languageId": "python"
-        }
-      },
-      "source": [
-        "# Load %%sql cell magic\n",
-        "%load_ext sql"
-      ],
-      "id": "c0cca068",
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "vscode": {
-          "languageId": "python"
-        }
-      },
-      "source": [
-        "# Connect to the database\n",
-        "%sql duckdb:///data/basic_examples.db --alias basic"
-      ],
-      "id": "6bc27dd5",
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "## Aggregating with `GROUP BY`\n",
-        "\n",
-        "At this point, we've seen that SQL offers much of the same functionality that was given to us by `pandas`. We can extract data from a table, filter it, and reorder it to suit our needs.\n",
-        "\n",
-        "In `pandas`, much of our analysis work relied heavily on being able to use `.groupby()` to aggregate across the rows of our dataset. SQL's answer to this task is the (very conveniently named) `GROUP BY` clause. While the outputs of `GROUP BY` are similar to those of `.groupby()` —— in both cases, we obtain an output table where some column has been used for grouping —— the syntax and logic used to group data in SQL are fairly different to the `pandas` implementation.\n",
-        "\n",
-        "To illustrate `GROUP BY`, we will consider the `Dish` table from our database.\n"
-      ],
-      "id": "0b24c419"
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "vscode": {
-          "languageId": "python"
-        }
-      },
-      "source": [
-        "%%sql\n",
-        "SELECT * \n",
-        "FROM Dish;"
-      ],
-      "id": "43189660",
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Notice that there are multiple dishes of the same `type`. What if we wanted to find the total costs of dishes of a certain `type`? To accomplish this, we would write the following code.\n"
-      ],
-      "id": "75136168"
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "vscode": {
-          "languageId": "python"
-        }
-      },
-      "source": [
-        "%%sql\n",
-        "SELECT type, SUM(cost)\n",
-        "FROM Dish\n",
-        "GROUP BY type;"
-      ],
-      "id": "dc0c1c08",
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "What is going on here? The statement `GROUP BY type` tells SQL to group the data based on the value contained in the `type` column (whether a record is an appetizer, entree, or dessert). `SUM(cost)` sums up the costs of dishes in each `type` and displays the result in the output table.\n",
-        "\n",
-        "You may be wondering: why does `SUM(cost)` come before the command to `GROUP BY type`? Don't we need to form groups before we can count the number of entries in each? Remember that SQL is a *declarative* programming language —— a SQL programmer simply states what end result they would like to see, and leaves the task of figuring out *how* to obtain this result to SQL itself. This means that SQL queries sometimes don't follow what a reader sees as a \"logical\" sequence of thought. Instead, SQL requires that we follow its set order of operations when constructing queries. So long as we follow this order, SQL will handle the underlying logic.\n",
-        "\n",
-        "In practical terms: our goal with this query was to output the total `cost`s of each `type`. To communicate this to SQL, we say that we want to `SELECT` the `SUM`med `cost` values for each `type` group. \n",
-        "\n",
-        "There are many aggregation functions that can be used to aggregate the data contained in each group. Some common examples are:\n",
-        "\n",
-        "* `COUNT`: count the number of rows associated with each group\n",
-        "* `MIN`: find the minimum value of each group\n",
-        "* `MAX`: find the maximum value of each group\n",
-        "* `SUM`: sum across all records in each group\n",
-        "* `AVG`: find the average value of each group\n",
-        "\n",
-        "We can easily compute multiple aggregations all at once (a task that was very tricky in `pandas`).\n"
-      ],
-      "id": "9ab651b9"
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "vscode": {
-          "languageId": "python"
-        }
-      },
-      "source": [
-        "%%sql\n",
-        "SELECT type, SUM(cost), MIN(cost), MAX(name)\n",
-        "FROM Dish\n",
-        "GROUP BY type;"
-      ],
-      "id": "c9464164",
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "To count the number of rows associated with each group, we use the `COUNT` keyword. Calling `COUNT(*)` will compute the total number of rows in each group, including rows with null values. Its `pandas` equivalent is `.groupby().size()`.\n",
-        "\n",
-        "Recall the `Dragon` table from the previous lecture:\n"
-      ],
-      "id": "96323771"
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "vscode": {
-          "languageId": "python"
-        }
-      },
-      "source": [
-        "%%sql\n",
-        "SELECT * FROM Dragon;"
-      ],
-      "id": "b6299c32",
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Notice that `COUNT(*)` and `COUNT(cute)` result in different outputs.\n"
-      ],
-      "id": "03b20f58"
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "vscode": {
-          "languageId": "python"
-        }
-      },
-      "source": [
-        "%%sql\n",
-        "SELECT year, COUNT(*)\n",
-        "FROM Dragon\n",
-        "GROUP BY year;"
-      ],
-      "id": "a12cdba2",
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "vscode": {
-          "languageId": "python"
-        }
-      },
-      "source": [
-        "%%sql\n",
-        "SELECT year, COUNT(cute)\n",
-        "FROM Dragon\n",
-        "GROUP BY year;"
-      ],
-      "id": "3688a2ac",
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "With this definition of `GROUP BY` in hand, let's update our SQL order of operations. Remember: *every* SQL query must list clauses in this order. \n",
-        "\n",
-        "    SELECT <column expression list>\n",
-        "    FROM <table>\n",
-        "    [WHERE <predicate>]\n",
-        "    [GROUP BY <column list>]\n",
-        "    [ORDER BY <column list>]\n",
-        "    [LIMIT <number of rows>]\n",
-        "    [OFFSET <number of rows>];\n",
-        "\n",
-        "Note that we can use the `AS` keyword to rename columns during the selection process and that column expressions may include aggregation functions (`MAX`, `MIN`, etc.).\n",
-        "\n",
-        "## Filtering Groups\n",
-        "\n",
-        "Now, what if we only want groups that meet a certain condition? `HAVING` filters groups by applying some condition across all rows in each group. We interpret it as a way to keep only the groups `HAVING` some condition. Note the difference between `WHERE` and `HAVING`: we use `WHERE` to filter rows, whereas we use `HAVING` to filter *groups*. `WHERE` precedes `HAVING` in terms of how SQL executes a query.\n",
-        "\n",
-        "Let's take a look at the `Dish` table to see how we can use `HAVING`. Say we want to group dishes with a cost greater than 4 by `type` and only keep groups where the max cost is less than 10.\n"
-      ],
-      "id": "5857ec62"
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "vscode": {
-          "languageId": "python"
-        }
-      },
-      "source": [
-        "%%sql\n",
-        "SELECT type, COUNT(*)\n",
-        "FROM Dish\n",
-        "WHERE cost > 4\n",
-        "GROUP BY type\n",
-        "HAVING MAX(cost) <  10;"
-      ],
-      "id": "99bfd356",
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Here, we first use `WHERE` to filter for rows with a cost greater than 4. We then group our values by `type` before applying the `HAVING` operator. With `HAVING`, we can filter our groups based on if the max cost is less than 10.\n",
-        "\n",
-        "## Summary: SQL\n",
-        "With this definition of `GROUP BY` and `HAVING` in hand, let's update our SQL order of operations. Remember: *every* SQL query must list clauses in this order. \n",
-        "\n",
-        "    SELECT <column expression list>\n",
-        "    FROM <table>\n",
-        "    [WHERE <predicate>]\n",
-        "    [GROUP BY <column list>]\n",
-        "    [ORDER BY <column list>]\n",
-        "    [LIMIT <number of rows>]\n",
-        "    [OFFSET <number of rows>];\n",
-        "\n",
-        "Note that we can use the `AS` keyword to rename columns during the selection process and that column expressions may include aggregation functions (`MAX`, `MIN`, etc.).\n",
-        "\n",
-        "## EDA in SQL\n",
-        "In the last lecture, we mostly worked under the assumption that our data had already been cleaned. However, as we saw in our first pass through the data science lifecycle, we're very unlikely to be given data that is free of formatting issues. With this in mind, we'll want to learn how to clean and transform data in SQL. \n",
-        "\n",
-        "Our typical workflow when working with \"big data\" is:\n",
-        "\n",
-        "1. Use SQL to query data from a database\n",
-        "2. Use Python (with `pandas`) to analyze this data in detail\n",
-        "\n",
-        "We can, however, still perform simple data cleaning and re-structuring using SQL directly. To do so, we'll use the `Title` table from the `imdb_duck` database, which contains information about movies and actors.\n",
-        "\n",
-        "Let's load in the `imdb_duck` database.\n"
-      ],
-      "id": "80483946"
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "vscode": {
-          "languageId": "python"
-        }
-      },
-      "source": [
-        "import os\n",
-        "if os.path.exists(\"/home/jovyan/shared/sql/imdb_duck.db\"):\n",
-        "    imdbpath = \"duckdb:////home/jovyan/shared/sql/imdb_duck.db\"\n",
-        "elif os.path.exists(\"data/imdb_duck.db\"):\n",
-        "    imdbpath =  \"duckdb:///data/imdb_duck.db\"\n",
-        "else:\n",
-        "    import gdown\n",
-        "    url = 'https://drive.google.com/uc?id=10tKOHGLt9QoOgq5Ii-FhxpB9lDSQgl1O'\n",
-        "    output_path = 'data/imdb_duck.db'\n",
-        "    gdown.download(url, output_path, quiet=False)\n",
-        "    imdbpath = \"duckdb:///data/imdb_duck.db\"\n",
-        "print(imdbpath)"
-      ],
-      "id": "b5eb5a70",
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "vscode": {
-          "languageId": "python"
-        }
-      },
-      "source": [
-        "from sqlalchemy import create_engine\n",
-        "imdb_engine = create_engine(imdbpath, connect_args={'read_only': True})\n",
-        "%sql imdb_engine --alias imdb"
-      ],
-      "id": "c8201903",
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Since we'll be working with the `Title` table, let's take a quick look at what it contains. \n"
-      ],
-      "id": "09166ab8"
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "vscode": {
-          "languageId": "python"
-        }
-      },
-      "source": [
-        "%%sql imdb \n",
-        "    \n",
-        "SELECT *\n",
-        "FROM Title\n",
-        "WHERE primaryTitle IN ('Ginny & Georgia', 'What If...?', 'Succession', 'Veep', 'Tenet')\n",
-        "LIMIT 10;"
-      ],
-      "id": "01b5d2f2",
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "### Matching Text using `LIKE`\n",
-        "\n",
-        "One common task we encountered in our first look at EDA was needing to match string data. For example, we might want to remove entries beginning with the same prefix as part of the data cleaning process.\n",
-        "\n",
-        "In SQL, we use the `LIKE` operator to (you guessed it) look for strings that are *like* a given string pattern. \n"
-      ],
-      "id": "ffef470c"
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "vscode": {
-          "languageId": "python"
-        }
-      },
-      "source": [
-        "%%sql\n",
-        "SELECT titleType, primaryTitle\n",
-        "FROM Title\n",
-        "WHERE primaryTitle LIKE 'Star Wars: Episode I - The Phantom Menace'"
-      ],
-      "id": "796c9947",
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "What if we wanted to find *all* Star Wars movies? `%` is the wildcard operator, it means \"look for any character, any number of times\". This makes it helpful for identifying strings that are similar to our desired pattern, even when we don't know the full text of what we aim to extract.\n"
-      ],
-      "id": "3fe091dc"
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "vscode": {
-          "languageId": "python"
-        }
-      },
-      "source": [
-        "%%sql\n",
-        "SELECT titleType, primaryTitle\n",
-        "FROM Title\n",
-        "WHERE primaryTitle LIKE '%Star Wars%'\n",
-        "LIMIT 10;"
-      ],
-      "id": "aeb204b2",
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Alternatively, we can use RegEx! DuckDB and most real DBMSs allow for this. Note that here, we have to use the `SIMILAR TO` operater rather than `LIKE`.\n"
-      ],
-      "id": "4ccf1358"
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "vscode": {
-          "languageId": "python"
-        }
-      },
-      "source": [
-        "%%sql\n",
-        "SELECT titleType, primaryTitle\n",
-        "FROM Title\n",
-        "WHERE primaryTitle SIMILAR TO '.*Star Wars*.'\n",
-        "LIMIT 10;"
-      ],
-      "id": "6d2d17f3",
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "### `CAST`ing Data Types\n",
-        "\n",
-        "A common data cleaning task is converting data to the correct variable type. The `CAST` keyword is used to generate a new output column. Each entry in this output column is the result of converting the data in an existing column to a new data type. For example, we may wish to convert numeric data stored as a string to an integer.\n"
-      ],
-      "id": "4dff6841"
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "vscode": {
-          "languageId": "python"
-        }
-      },
-      "source": [
-        "%%sql\n",
-        "SELECT primaryTitle, CAST(runtimeMinutes AS INT)\n",
-        "FROM Title;"
-      ],
-      "id": "33d3172b",
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "We use `CAST` when `SELECT`ing colunns for our output table. In the example above, we want to `SELECT` the columns of integer year and runtime data that is created by the `CAST`. \n",
-        "\n",
-        "SQL will automatically name a new column according to the command used to `SELECT` it, which can lead to unwieldy column names. We can rename the `CAST`ed column using the `AS` keyword.\n"
-      ],
-      "id": "e06a79ff"
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "vscode": {
-          "languageId": "python"
-        }
-      },
-      "source": [
-        "%%sql\n",
-        "SELECT primaryTitle AS title, CAST(runtimeMinutes AS INT) AS minutes, CAST(startYear AS INT) AS year\n",
-        "FROM Title\n",
-        "LIMIT 5;"
-      ],
-      "id": "2e169b4e",
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "### Using Conditional Statements with `CASE`\n",
-        "\n",
-        "When working with `pandas`, we often ran into situations where we wanted to generate new columns using some form of conditional statement. For example, say we wanted to describe a film title as \"old,\" \"mid-aged,\" or \"new,\" depending on the year of its release.\n",
-        "\n",
-        "In SQL, conditional operations are performed using a `CASE` clause. Conceptually, `CASE` behaves much like the `CAST` operation: it creates a new column that we can then `SELECT` to appear in the output. The syntax for a `CASE` clause is as follows:\n",
-        "\n",
-        "    CASE WHEN <condition> THEN <value>\n",
-        "         WHEN <other condition> THEN <other value>\n",
-        "         ...\n",
-        "         ELSE <yet another value>\n",
-        "         END\n",
-        "\n",
-        "Scanning through the skeleton code above, you can see that the logic is similar to that of an `if` statement in Python. The conditional statement is first opened by calling `CASE`. Each new condition is specified by `WHEN`, with `THEN` indicating what value should be filled if the condition is met. `ELSE` specifies the value that should be filled if no other conditions are met. Lastly, `END` indicates the end of the conditional statement; once `END` has been called, SQL will continue evaluating the query as usual. \n",
-        "\n",
-        "Let's see this in action. In the example below, we give the new column created by the `CASE` statement the name `movie_age`.\n"
-      ],
-      "id": "85d0694f"
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "vscode": {
-          "languageId": "python"
-        }
-      },
-      "source": [
-        "%%sql\n",
-        "/* If a movie was filmed before 1950, it is \"old\"\n",
-        "Otherwise, if a movie was filmed before 2000, it is \"mid-aged\"\n",
-        "Else, a movie is \"new\" */\n",
-        "\n",
-        "SELECT titleType, startYear,\n",
-        "CASE WHEN startYear < 1950 THEN 'old'\n",
-        "     WHEN startYear < 2000 THEN 'mid-aged'\n",
-        "     ELSE 'new'\n",
-        "     END AS movie_age\n",
-        "FROM Title;"
-      ],
-      "id": "f4d903f1",
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "## `JOIN`ing Tables\n",
-        "\n",
-        "At this point, we're well-versed in using SQL as a tool to clean, manipulate, and transform data in a table. Notice that this sentence referred to one *table*, specifically. What happens if the data we need is distributed across multiple tables? This is an important consideration when using SQL —— recall that we first introduced SQL as a language to query from databases. Databases often store data in a multidimensional structure. In other words, information is stored across several tables, with each table containing a small subset of all the data housed by the database. \n",
-        "\n",
-        "A common way of organizing a database is by using a **star schema**. A star schema is composed of two types of tables. A **fact table** is the central table of the database —— it contains the information needed to link entries across several **dimension tables**, which contain more detailed information about the data. \n",
-        "\n",
-        "Say we were working with a database about boba offerings in Berkeley. The dimension tables of the database might contain information about tea varieties and boba toppings. The fact table would be used to link this information across the various dimension tables.\n",
-        "\n",
-        "<div style=\"text-align: center;\">\n",
-        "<img src=\"images/multidimensional.png\" alt='multidimensional' width='850'>\n",
-        "</div> \n",
-        "\n",
-        "If we explicitly mark the relationships between tables, we start to see the star-like structure of the star schema.\n",
-        "\n",
-        "<div style=\"text-align: center;\">\n",
-        "<img src=\"images/star.png\" alt='star' width='650'>\n",
-        "</div> \n",
-        "\n",
-        "To join data across multiple tables, we'll use the (creatively named) `JOIN` keyword. We'll make things easier for now by first considering the simpler `cats` dataset, which consists of the tables `s` and `t`.\n",
-        "\n",
-        "<div style=\"text-align: center;\">\n",
-        "<img src=\"images/cats.png\" alt='cats' width='500'>\n",
-        "</div> \n",
-        "\n",
-        "To perform a join, we amend the `FROM` clause. You can think of this as saying, \"`SELECT` my data `FROM` tables that have  been `JOIN`ed together.\" \n",
-        "\n",
-        "Remember: SQL does not consider newlines or whitespace when interpreting queries. The indentation given in the example below is to help improve readability. If you wish, you can write code that does not follow this formatting.\n",
-        "\n",
-        "    SELECT <column list>\n",
-        "    FROM table_1 \n",
-        "        JOIN table_2 \n",
-        "        ON key_1 = key_2;\n",
-        "\n",
-        "We also need to specify what column from each table should be used to determine matching entries. By defining these keys, we provide SQL with the information it needs to pair rows of data together.\n",
-        "\n",
-        "\n",
-        "The most commonly used type of SQL `JOIN` is the **inner join**. It turns out you're already familiar with what an inner join does, and how it works – this is the type of join we've been using in `pandas` all along! In an inner join, we combine every row in our first table with its matching entry in the second table. If a row from either table does not have a match in the other table, it is omitted from the output. \n",
-        "\n",
-        "<div style=\"text-align: center;\">\n",
-        "<img src=\"images/inner.png\" alt='inner' width='800'>\n",
-        "</div> \n",
-        "\n",
-        "In a **cross join**, *all* possible combinations of rows appear in the output table, regardless of whether or not rows share a matching key. Because all rows are joined, even if there is no matching key, it is not necessary to specify what keys to consider in an `ON` statement. A cross join is also known as a cartesian product.\n",
-        "\n",
-        "<div style=\"text-align: center;\">\n",
-        "<img src=\"images/cross.png\" alt='cross' width='800'>\n",
-        "</div> \n",
-        "\n",
-        "Conceptually, we can interpret an inner join as a cross join, followed by removing all rows that do not share a matching key. Notice that the output of the inner join above contains all rows of the cross join example that contain a single color across the entire row.\n",
-        "\n",
-        "In a **left outer join**, *all* rows in the left table are kept in the output table. If a row in the right table shares a match with the left table, this row will be kept; otherwise, the rows in the right table are omitted from the output. We can fill in any missing values with `NULL`.\n",
-        "\n",
-        "<div style=\"text-align: center;\">\n",
-        "<img src=\"images/left.png\" alt='left' width='800'>\n",
-        "</div> \n",
-        "\n",
-        "A **right outer join** keeps all rows in the right table. Rows in the left table are only kept if they share a match in the right table. Again, we can fill in any missing values with `NULL`. \n",
-        "\n",
-        "<div style=\"text-align: center;\">\n",
-        "<img src=\"images/right.png\" alt='right' width='800'>\n",
-        "</div> \n",
-        "\n",
-        "In a **full outer join**, all rows that have a match between the two tables are joined together. If a row has no match in the second table, then the values of the columns for that second table are filled with `NULL`. In other words, a full outer join performs an inner join *while still keeping* rows that have no match in the other table. This is best understood visually:\n",
-        "\n",
-        "<div style=\"text-align: center;\">\n",
-        "<img src=\"images/full.png\" alt='full' width='800'>\n",
-        "</div> \n",
-        "\n",
-        "We have kept the same output achieved using an inner join, with the addition of partially null rows for entries in `s` and `t` that had no match in the second table. \n",
-        "\n",
-        "### Aliasing in `JOIN`s\n",
-        "\n",
-        "When joining tables, we often create aliases for table names (similarly to what we did with column names in the last lecture). We do this as it is typically easier to refer to aliases, especially when we are working with long table names. We can even reference columns using aliased table names!\n",
-        "\n",
-        "Let's say we want to determine the average rating of various movies. We'll need to `JOIN` the `Title` and `Rating` tables and can create aliases for both tables.\n"
-      ],
-      "id": "c0385d65"
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "vscode": {
-          "languageId": "python"
-        }
-      },
-      "source": [
-        "%%sql\n",
-        "\n",
-        "SELECT primaryTitle, averageRating\n",
-        "FROM Title AS T INNER JOIN Rating AS R\n",
-        "ON T.tconst = R.tconst;"
-      ],
-      "id": "c2180d4d",
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Note that the `AS` is actually optional! We can create aliases for our tables even without it, but we usually include it for clarity.\n"
-      ],
-      "id": "20658eb7"
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "vscode": {
-          "languageId": "python"
-        }
-      },
-      "source": [
-        "%%sql\n",
-        "\n",
-        "SELECT primaryTitle, averageRating\n",
-        "FROM Title T INNER JOIN Rating R\n",
-        "ON T.tconst = R.tconst;"
-      ],
-      "id": "aa673057",
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "### Common Table Expressions\n",
-        "\n",
-        "For more sophisticated data problems, the queries can become very complex. Common table expressions (CTEs) allow us to break down these complex queries into more manageable parts. To do so, we create temporary tables corresponding to different aspects of the problem and then reference them in the final query: \n",
-        "\n",
-        "    WITH \n",
-        "    table_name1 AS ( \n",
-        "        SELECT ...\n",
-        "    ),\n",
-        "    table_name2 AS ( \n",
-        "        SELECT ...\n",
-        "    )\n",
-        "    SELECT ... \n",
-        "    FROM \n",
-        "    table_name1, \n",
-        "    table_name2, ...\n",
-        "\n",
-        "Let's say we want to identify the top 10 action movies that are highly rated (with an average rating greater than 7) and popular (having more than 5000 votes), along with the primary actors who are the most popular. We can use CTEs to break this query down into separate problems. Initially, we can filter to find good action movies and prolific actors separately. This way, in our final join, we only need to change the order.\n"
-      ],
-      "id": "1a662cf2"
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "vscode": {
-          "languageId": "python"
-        }
-      },
-      "source": [
-        "%%sql\n",
-        "WITH \n",
-        "good_action_movies AS (\n",
-        "    SELECT *\n",
-        "    FROM Title T JOIN Rating R ON T.tconst = R.tconst  \n",
-        "    WHERE genres LIKE '%Action%' AND averageRating > 7 AND numVotes > 5000\n",
-        "),\n",
-        "prolific_actors AS (\n",
-        "    SELECT N.nconst, primaryName, COUNT(*) as numRoles\n",
-        "    FROM Name N JOIN Principal P ON N.nconst = P.nconst\n",
-        "    WHERE category = 'actor'\n",
-        "    GROUP BY N.nconst, primaryName\n",
-        ")\n",
-        "SELECT primaryTitle, primaryName, numRoles, ROUND(averageRating) AS rating\n",
-        "FROM good_action_movies m, prolific_actors a, principal p\n",
-        "WHERE p.tconst = m.tconst AND p.nconst = a.nconst\n",
-        "ORDER BY rating DESC, numRoles DESC\n",
-        "LIMIT 10;"
-      ],
-      "id": "b4b494f7",
-      "execution_count": null,
-      "outputs": []
-    }
-  ],
-  "metadata": {
-    "kernelspec": {
-      "display_name": "Python 3 (ipykernel)",
-      "language": "python",
-      "name": "python3"
-    },
-    "jupytext": {
-      "text_representation": {
-        "extension": ".qmd",
-        "format_name": "quarto",
-        "format_version": "1.0",
-        "jupytext_version": "1.16.1"
-      }
-    }
-  },
-  "nbformat": 4,
-  "nbformat_minor": 5
-}
\ No newline at end of file