diff --git a/assets/images/sp24-midterm/df.png b/assets/images/sp24-midterm/df.png
new file mode 100644
index 0000000..82b8175
Binary files /dev/null and b/assets/images/sp24-midterm/df.png differ
diff --git a/assets/images/sp24-midterm/h.png b/assets/images/sp24-midterm/h.png
new file mode 100644
index 0000000..ee4f7fd
Binary files /dev/null and b/assets/images/sp24-midterm/h.png differ
diff --git a/assets/images/sp24-midterm/j.png b/assets/images/sp24-midterm/j.png
new file mode 100644
index 0000000..8fcc58c
Binary files /dev/null and b/assets/images/sp24-midterm/j.png differ
diff --git a/assets/images/sp24-midterm/o.png b/assets/images/sp24-midterm/o.png
new file mode 100644
index 0000000..88b864f
Binary files /dev/null and b/assets/images/sp24-midterm/o.png differ
diff --git a/assets/images/sp24-midterm/q4a.png b/assets/images/sp24-midterm/q4a.png
new file mode 100644
index 0000000..fcac42a
Binary files /dev/null and b/assets/images/sp24-midterm/q4a.png differ
diff --git a/assets/images/sp24-midterm/q4b.png b/assets/images/sp24-midterm/q4b.png
new file mode 100644
index 0000000..ff0a67f
Binary files /dev/null and b/assets/images/sp24-midterm/q4b.png differ
diff --git a/assets/images/sp24-midterm/q5.png b/assets/images/sp24-midterm/q5.png
new file mode 100644
index 0000000..0393f58
Binary files /dev/null and b/assets/images/sp24-midterm/q5.png differ
diff --git a/docs/assets/images/sp24-midterm/df.png b/docs/assets/images/sp24-midterm/df.png
new file mode 100644
index 0000000..82b8175
Binary files /dev/null and b/docs/assets/images/sp24-midterm/df.png differ
diff --git a/docs/assets/images/sp24-midterm/h.png b/docs/assets/images/sp24-midterm/h.png
new file mode 100644
index 0000000..ee4f7fd
Binary files /dev/null and b/docs/assets/images/sp24-midterm/h.png differ
diff --git a/docs/assets/images/sp24-midterm/j.png b/docs/assets/images/sp24-midterm/j.png
new file mode 100644
index 0000000..8fcc58c
Binary files /dev/null and b/docs/assets/images/sp24-midterm/j.png differ
diff --git a/docs/assets/images/sp24-midterm/o.png b/docs/assets/images/sp24-midterm/o.png
new file mode 100644
index 0000000..88b864f
Binary files /dev/null and b/docs/assets/images/sp24-midterm/o.png differ
diff --git a/docs/assets/images/sp24-midterm/q4a.png b/docs/assets/images/sp24-midterm/q4a.png
new file mode 100644
index 0000000..fcac42a
Binary files /dev/null and b/docs/assets/images/sp24-midterm/q4a.png differ
diff --git a/docs/assets/images/sp24-midterm/q4b.png b/docs/assets/images/sp24-midterm/q4b.png
new file mode 100644
index 0000000..ff0a67f
Binary files /dev/null and b/docs/assets/images/sp24-midterm/q4b.png differ
diff --git a/docs/assets/images/sp24-midterm/q5.png b/docs/assets/images/sp24-midterm/q5.png
new file mode 100644
index 0000000..0393f58
Binary files /dev/null and b/docs/assets/images/sp24-midterm/q5.png differ
diff --git a/docs/index.html b/docs/index.html
index b203342..f461a54 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -87,6 +87,18 @@ <h3>
 <tbody>
 <tr>
 <th scope="row">
+Spring 2024
+</th>
+<td>
+Sam Lau
+</td>
+<td>
+<a href='sp24-midterm/index.html'>Midterm 🆕</a> <br>
+<!-- <a href='sp24-final/index.html'>Final 🆕</a> -->
+</td>
+</tr>
+<tr>
+<th scope="row">
 Winter 2024
 </th>
 <td>
@@ -94,7 +106,7 @@ <h3>
 </td>
 <td>
 <a href='wi24-midterm/index.html'>Midterm</a> <br>
-<a href='wi24-final/index.html'>Final 🆕</a>
+<a href='wi24-final/index.html'>Final</a>
 </td>
 </tr>
 <tr>
diff --git a/docs/sp24-midterm/index.html b/docs/sp24-midterm/index.html
new file mode 100644
index 0000000..8c80ad0
--- /dev/null
+++ b/docs/sp24-midterm/index.html
@@ -0,0 +1,833 @@
+<!DOCTYPE html>
+<html xmlns="http://www.w3.org/1999/xhtml" lang="" xml:lang="">
+<head>
+  <meta charset="utf-8" />
+  <meta name="generator" content="pandoc" />
+  <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes" />
+  <title>Spring 2024 Midterm Exam</title>
+  <style>
+    code{white-space: pre-wrap;}
+    span.smallcaps{font-variant: small-caps;}
+    div.columns{display: flex; gap: min(4vw, 1.5em);}
+    div.column{flex: auto; overflow-x: auto;}
+    div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
+    /* The extra [class] is a hack that increases specificity enough to
+       override a similar rule in reveal.js */
+    ul.task-list[class]{list-style: none;}
+    ul.task-list li input[type="checkbox"] {
+      font-size: inherit;
+      width: 0.8em;
+      margin: 0 0.8em 0.2em -1.6em;
+      vertical-align: middle;
+    }
+    /* CSS for syntax highlighting */
+    pre > code.sourceCode { white-space: pre; position: relative; }
+    pre > code.sourceCode > span { line-height: 1.25; }
+    pre > code.sourceCode > span:empty { height: 1.2em; }
+    .sourceCode { overflow: visible; }
+    code.sourceCode > span { color: inherit; text-decoration: inherit; }
+    div.sourceCode { margin: 1em 0; }
+    pre.sourceCode { margin: 0; }
+    @media screen {
+    div.sourceCode { overflow: auto; }
+    }
+    @media print {
+    pre > code.sourceCode { white-space: pre-wrap; }
+    pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
+    }
+    pre.numberSource code
+      { counter-reset: source-line 0; }
+    pre.numberSource code > span
+      { position: relative; left: -4em; counter-increment: source-line; }
+    pre.numberSource code > span > a:first-child::before
+      { content: counter(source-line);
+        position: relative; left: -1em; text-align: right; vertical-align: baseline;
+        border: none; display: inline-block;
+        -webkit-touch-callout: none; -webkit-user-select: none;
+        -khtml-user-select: none; -moz-user-select: none;
+        -ms-user-select: none; user-select: none;
+        padding: 0 4px; width: 4em;
+        color: #aaaaaa;
+      }
+    pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa;  padding-left: 4px; }
+    div.sourceCode
+      {   }
+    @media screen {
+    pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
+    }
+    code span.al { color: #ff0000; font-weight: bold; } /* Alert */
+    code span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
+    code span.at { color: #7d9029; } /* Attribute */
+    code span.bn { color: #40a070; } /* BaseN */
+    code span.bu { color: #008000; } /* BuiltIn */
+    code span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
+    code span.ch { color: #4070a0; } /* Char */
+    code span.cn { color: #880000; } /* Constant */
+    code span.co { color: #60a0b0; font-style: italic; } /* Comment */
+    code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
+    code span.do { color: #ba2121; font-style: italic; } /* Documentation */
+    code span.dt { color: #902000; } /* DataType */
+    code span.dv { color: #40a070; } /* DecVal */
+    code span.er { color: #ff0000; font-weight: bold; } /* Error */
+    code span.ex { } /* Extension */
+    code span.fl { color: #40a070; } /* Float */
+    code span.fu { color: #06287e; } /* Function */
+    code span.im { color: #008000; font-weight: bold; } /* Import */
+    code span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
+    code span.kw { color: #007020; font-weight: bold; } /* Keyword */
+    code span.op { color: #666666; } /* Operator */
+    code span.ot { color: #007020; } /* Other */
+    code span.pp { color: #bc7a00; } /* Preprocessor */
+    code span.sc { color: #4070a0; } /* SpecialChar */
+    code span.ss { color: #bb6688; } /* SpecialString */
+    code span.st { color: #4070a0; } /* String */
+    code span.va { color: #19177c; } /* Variable */
+    code span.vs { color: #4070a0; } /* VerbatimString */
+    code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
+  </style>
+  <link rel="stylesheet" href="../assets/theme.css" />
+  <script defer=""
+  src="https://cdn.jsdelivr.net/npm/katex@0.15.1/dist/katex.min.js"></script>
+  <script>document.addEventListener("DOMContentLoaded", function () {
+ var mathElements = document.getElementsByClassName("math");
+ var macros = [];
+ for (var i = 0; i < mathElements.length; i++) {
+  var texText = mathElements[i].firstChild;
+  if (mathElements[i].tagName == "SPAN") {
+   katex.render(texText.data, mathElements[i], {
+    displayMode: mathElements[i].classList.contains('display'),
+    throwOnError: false,
+    macros: macros,
+    fleqn: false
+   });
+}}});
+  </script>
+  <link rel="stylesheet"
+  href="https://cdn.jsdelivr.net/npm/katex@0.15.1/dist/katex.min.css" />
+  <!--[if lt IE 9]>
+    <script src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.3/html5shiv-printshiv.min.js"></script>
+  <![endif]-->
+</head>
+<body>
+<header id="title-block-header">
+<h1 class="title">Spring 2024 Midterm Exam</h1>
+</header>
+<p><link href="https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-EVSTQN3/azprG1Anm3QDgpJLIm9Nao0Yz1ztcQTwFspd3yD65VohhpuuCOmLASjC" crossorigin="anonymous">
+<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/js/bootstrap.bundle.min.js" integrity="sha384-MrcW6ZMFYlzcLA8Nl+NtUVF0sA7MsXsP1UyJoMp4YLEuNSfAP+JcXn/tWtIaxVXM" crossorigin="anonymous"></script>
+<!-- add after bootstrap.min.css -->
+<link rel="stylesheet" href="https://cdn.rawgit.com/afeld/bootstrap-toc/v1.0.1/dist/bootstrap-toc.min.css"/>
+<!-- add after bootstrap.min.js or bootstrap.bundle.min.js -->
+<script src="https://cdn.rawgit.com/afeld/bootstrap-toc/v1.0.1/dist/bootstrap-toc.min.js"></script></p>
+<!-- for difficulty gauges-->
+<script src="https://cdn.plot.ly/plotly-2.16.1.min.js"></script>
+<!-- Global site tag (gtag.js) - Google Analytics -->
+<script async src="https://www.googletagmanager.com/gtag/js?id=G-B947E6J6H4"></script>
+<script>
+  window.dataLayer = window.dataLayer || [];
+  function gtag(){dataLayer.push(arguments);}
+  gtag('js', new Date());
+
+  gtag('config', 'G-B947E6J6H4');
+</script>
+<p><a href="../index.html">← return to practice.dsc80.com</a></p>
+<hr />
+<p><strong>Instructor(s):</strong> Sam Lau</p>
+<p>This exam was administered in-person. The exam was closed-notes,
+except students were allowed to bring a single two-sided notes sheet. No
+calculators were allowed. Students had <strong>80 minutes</strong> to
+take this exam.</p>
+<hr />
+<h2 id="problem-1">Problem 1</h2>
+<p>Fill in Python code below so that the last line of each part
+evaluates to each desired result using the tables <code>h</code>,
+<code>o</code>, and <code>j</code> as shown on the Reference Sheet.</p>
+<p><br></p>
+<h3 id="problem-1.1">Problem 1.1</h3>
+<p>Find the median duration of outages that happened in the early
+morning (before 8am).</p>
+<div class="sourceCode" id="cb1"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a>o.loc[__(a)__,__(b)__].median()</span></code></pre></div>
+<div id="accordionExample" class="accordion">
+<div class="accordion-item">
+<h2 class="accordion-header" id="heading1_1">
+<button class="accordion-button" type="button" data-bs-toggle="collapse" data-bs-target="#collapse1_1" aria-expanded="true" aria-controls="collapse1_1">
+Click to view the solution.
+</button>
+</h2>
+<div id="collapse1_1" class="accordion-collapse collapse"
+aria-labelledby="heading1_1" data-bs-parent="#accordionExample">
+<div class="accordion-body">
+<header id="title-block-header">
+<h1 class="title"> </h1>
+</header>
+<p><strong>Answer:</strong></p>
+<p>(a): <code>o['time'].dt.hour &lt; 8</code></p>
+<p>(b): <code>'duration'</code></p>
+</div>
+</div>
+</div>
+</div>
+<p><br></p>
+<p><br></p>
+<h3 id="problem-1.2">Problem 1.2</h3>
+<p>A Series containing the mean outage duration for outages that
+happened on the weekend and outages that happened on weekdays.</p>
+<p><em>Hint: If <code>s</code> is a Series of timestamps,
+<code>s.dt.dayofweek</code> returns a Series of integers where 0 is
+Monday and 6 is Sunday.</em></p>
+<div class="sourceCode" id="cb2"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a>(o.assign(__(a)__)</span>
+<span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a>.groupby(__(b)__)[__(c)__].mean())</span></code></pre></div>
+<div id="accordionExample" class="accordion">
+<div class="accordion-item">
+<h2 class="accordion-header" id="heading1_2">
+<button class="accordion-button" type="button" data-bs-toggle="collapse" data-bs-target="#collapse1_2" aria-expanded="true" aria-controls="collapse1_2">
+Click to view the solution.
+</button>
+</h2>
+<div id="collapse1_2" class="accordion-collapse collapse"
+aria-labelledby="heading1_2" data-bs-parent="#accordionExample">
+<div class="accordion-body">
+<header id="title-block-header">
+<h1 class="title"> </h1>
+</header>
+<p><strong>Answer:</strong></p>
+<p>(a): <code>is\_weekend=o['time'].dt.dayofweek &gt;= 5</code></p>
+<p>(b): <code>'is\_weekend'</code>, (c): <code>'duration'</code></p>
+</div>
+</div>
+</div>
+</div>
+<p><br></p>
+<p><br></p>
+<h3 id="problem-1.3">Problem 1.3</h3>
+<p>A DataFrame containing the proportion of 4-digit address numbers for
+each unique street in <code>h</code>.</p>
+<div class="sourceCode" id="cb3"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> foo(x):</span>
+<span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a>    lengths <span class="op">=</span> __(a)__</span>
+<span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> (lengths <span class="op">==</span> <span class="dv">4</span>).mean()</span>
+<span id="cb3-4"><a href="#cb3-4" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb3-5"><a href="#cb3-5" aria-hidden="true" tabindex="-1"></a>h.groupby(__(b)__).__(c)__(foo)</span></code></pre></div>
+<div id="accordionExample" class="accordion">
+<div class="accordion-item">
+<h2 class="accordion-header" id="heading1_3">
+<button class="accordion-button" type="button" data-bs-toggle="collapse" data-bs-target="#collapse1_3" aria-expanded="true" aria-controls="collapse1_3">
+Click to view the solution.
+</button>
+</h2>
+<div id="collapse1_3" class="accordion-collapse collapse"
+aria-labelledby="heading1_3" data-bs-parent="#accordionExample">
+<div class="accordion-body">
+<header id="title-block-header">
+<h1 class="title"> </h1>
+</header>
+<p><strong>Answer:</strong></p>
+<p>(a): <code>x.astype(str).str.len()</code></p>
+<p>(b): <code>'street'</code></p>
+<p>(c): <code>agg</code></p>
+</div>
+</div>
+</div>
+</div>
+<p><br></p>
+<p><br></p>
+<h3 id="problem-1.4">Problem 1.4</h3>
+<p>What does the following code compute?</p>
+<div class="sourceCode" id="cb4"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a>a <span class="op">=</span> h.merge(j, left_index<span class="op">=</span><span class="va">True</span>, right_on<span class="op">=</span><span class="st">&#39;hid&#39;</span>, how<span class="op">=</span><span class="st">&#39;left&#39;</span>)</span>
+<span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a>a.loc[a[<span class="st">&#39;oid&#39;</span>].isna(), <span class="st">&#39;hid&#39;</span>].shape[<span class="dv">0</span>]</span></code></pre></div>
+<ul class="task-list">
+<li><p><input type="radio" disabled="" /> The number of addresses with exactly one outage.</p></li>
+<li><p><input type="radio" disabled="" /> The number of addresses with at least one outage.</p></li>
+<li><p><input type="radio" disabled="" /> The number of addresses with no outages.</p></li>
+<li><p><input type="radio" disabled="" /> The total number of addresses affected by all power outages.</p></li>
+<li><p><input type="radio" disabled="" /> The number of power outages.</p></li>
+<li><p><input type="radio" disabled="" /> The number of power outages that affected exactly one address.</p></li>
+<li><p><input type="radio" disabled="" /> The number of power outages that affected at least one address.</p></li>
+<li><p><input type="radio" disabled="" /> The number of power outages that affected no addresses.</p></li>
+<li><p><input type="radio" disabled="" /> 0</p></li>
+<li><p><input type="radio" disabled="" /> The code will raise an error.</p></li>
+<li><p><input type="radio" disabled="" /> None of the above.</p></li>
+</ul>
+<div id="accordionExample" class="accordion">
+<div class="accordion-item">
+<h2 class="accordion-header" id="heading1_4">
+<button class="accordion-button" type="button" data-bs-toggle="collapse" data-bs-target="#collapse1_4" aria-expanded="true" aria-controls="collapse1_4">
+Click to view the solution.
+</button>
+</h2>
+<div id="collapse1_4" class="accordion-collapse collapse"
+aria-labelledby="heading1_4" data-bs-parent="#accordionExample">
+<div class="accordion-body">
+<header id="title-block-header">
+<h1 class="title"> </h1>
+</header>
+<p><strong>Answer:</strong> The number of addresses with no outages.</p>
+</div>
+</div>
+</div>
+</div>
+<p><br></p>
+<hr />
+<h2 id="problem-2">Problem 2</h2>
+<p><br></p>
+<h3 id="problem-2.1">Problem 2.1</h3>
+<p>Consider the following code:</p>
+<div class="sourceCode" id="cb5"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a>whoa <span class="op">=</span> (h.merge(j, left_index<span class="op">=</span><span class="va">True</span>, right_on<span class="op">=</span><span class="st">&#39;hid&#39;</span>, how<span class="op">=</span><span class="st">&#39;left&#39;</span>)</span>
+<span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a>        .merge(o, left_on<span class="op">=</span><span class="st">&#39;oid&#39;</span>, right_index<span class="op">=</span><span class="va">True</span>, how<span class="op">=</span><span class="st">&#39;right&#39;</span>)</span>
+<span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a>        .reset_index(drop<span class="op">=</span><span class="va">True</span>))</span></code></pre></div>
+<p>Consider the following variables:</p>
+<div class="sourceCode" id="cb6"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a>a <span class="op">=</span> j[<span class="st">&#39;hid&#39;</span>] <span class="op">&lt;=</span> <span class="dv">50</span></span>
+<span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a>b <span class="op">=</span> j[<span class="st">&#39;hid&#39;</span>] <span class="op">&gt;</span> <span class="dv">50</span></span>
+<span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a>c <span class="op">=</span> j[<span class="st">&#39;oid&#39;</span>] <span class="op">&lt;=</span> <span class="dv">100</span></span>
+<span id="cb6-4"><a href="#cb6-4" aria-hidden="true" tabindex="-1"></a>d <span class="op">=</span> j[<span class="st">&#39;oid&#39;</span>] <span class="op">&gt;</span> <span class="dv">100</span></span>
+<span id="cb6-5"><a href="#cb6-5" aria-hidden="true" tabindex="-1"></a>e <span class="op">=</span> (j[j[<span class="st">&#39;hid&#39;</span>] <span class="op">&lt;=</span> <span class="dv">50</span>]</span>
+<span id="cb6-6"><a href="#cb6-6" aria-hidden="true" tabindex="-1"></a>     .groupby(<span class="st">&#39;hid&#39;</span>)</span>
+<span id="cb6-7"><a href="#cb6-7" aria-hidden="true" tabindex="-1"></a>     .<span class="bu">filter</span>(<span class="kw">lambda</span> x: <span class="bu">all</span>(x[<span class="st">&#39;oid&#39;</span>] <span class="op">&gt;</span> <span class="dv">100</span>))</span>
+<span id="cb6-8"><a href="#cb6-8" aria-hidden="true" tabindex="-1"></a>     [<span class="st">&#39;hid&#39;</span>]</span>
+<span id="cb6-9"><a href="#cb6-9" aria-hidden="true" tabindex="-1"></a>     .nunique())</span>
+<span id="cb6-10"><a href="#cb6-10" aria-hidden="true" tabindex="-1"></a>f <span class="op">=</span> (j[j[<span class="st">&#39;oid&#39;</span>] <span class="op">&lt;=</span> <span class="dv">100</span>]</span>
+<span id="cb6-11"><a href="#cb6-11" aria-hidden="true" tabindex="-1"></a>     .groupby(<span class="st">&#39;oid&#39;</span>)</span>
+<span id="cb6-12"><a href="#cb6-12" aria-hidden="true" tabindex="-1"></a>     .<span class="bu">filter</span>(<span class="kw">lambda</span> x: <span class="bu">all</span>(x[<span class="st">&#39;hid&#39;</span>] <span class="op">&gt;</span> <span class="dv">50</span>))</span>
+<span id="cb6-13"><a href="#cb6-13" aria-hidden="true" tabindex="-1"></a>     [<span class="st">&#39;oid&#39;</span>]</span>
+<span id="cb6-14"><a href="#cb6-14" aria-hidden="true" tabindex="-1"></a>     .nunique())</span>
+<span id="cb6-15"><a href="#cb6-15" aria-hidden="true" tabindex="-1"></a>g <span class="op">=</span> <span class="bu">len</span>(<span class="bu">set</span>(h.index) <span class="op">-</span> <span class="bu">set</span>(j[<span class="st">&#39;hid&#39;</span>]))</span>
+<span id="cb6-16"><a href="#cb6-16" aria-hidden="true" tabindex="-1"></a>i <span class="op">=</span> <span class="bu">len</span>(<span class="bu">set</span>(o.index) <span class="op">-</span> <span class="bu">set</span>(j[<span class="st">&#39;oid&#39;</span>]))</span></code></pre></div>
+<p>Write a <strong>single expression</strong> that evaluates to the
+number of rows in <code>whoa</code>. In your code, you may only use the
+variables <code>a</code>, <code>b</code>, <code>c</code>,
+<code>d</code>, <code>e</code>, <code>f</code>, <code>g</code>,
+<code>i</code> as defined above, arithmetic and bitwise operators
+(<code>+</code>, <code>-</code>, <code>/</code>, <code>*</code>,
+<code>&amp;</code>, <code>|</code>), and the <code>np.sum()</code>
+function. <strong>You may not use any other variables or
+functions.</strong> Your code might not need to use all of the variables
+defined above.</p>
+<div id="accordionExample" class="accordion">
+<div class="accordion-item">
+<h2 class="accordion-header" id="heading2_1">
+<button class="accordion-button" type="button" data-bs-toggle="collapse" data-bs-target="#collapse2_1" aria-expanded="true" aria-controls="collapse2_1">
+Click to view the solution.
+</button>
+</h2>
+<div id="collapse2_1" class="accordion-collapse collapse"
+aria-labelledby="heading2_1" data-bs-parent="#accordionExample">
+<div class="accordion-body">
+<header id="title-block-header">
+<h1 class="title"> </h1>
+</header>
+<p><strong>Answer:</strong> <code>np.sum(a &amp; c) + f + i</code></p>
+<p>We know that <code>h</code> has the numbers 1-50 as unique integers
+in its index, and <code>o</code> has the numbers 1-100 as unique
+integers in its index. However, the <code>hid</code> and
+<code>oid</code> columns in <code>j</code> have values outside these
+ranges. To approach this problem, it’s easiest to come up with smaller
+versions of <code>h</code>, <code>j</code>, and <code>o</code>, then
+perform the join by hand. For example, consider the following example
+<code>h</code>, <code>j</code>, and <code>o</code> tables:</p>
+<center>
+| **hid** |
+|---------|
+| 1       |
+| 2       |
+| 3       |
+</center>
+<center>
+| **hid** | **oid** |
+|---------|---------|
+| 1       | 1       |
+| 2       | 1       |
+| 2       | 10      |
+| 2       | 11      |
+| 10      | 3       |
+| 11      | 3       |
+</center>
+<center>
+| **oid** |
+|---------|
+| 1       |
+| 2       |
+| 3       |
+</center>
+<p>In this example, <code>whoa</code> would look like the following
+(omitting other columns besides <code>hid</code> and <code>oid</code>
+for brevity):</p>
+<center>
+| **hid** | **oid** |
+|---------|---------|
+| 1       | 1       |
+| 2       | 1       |
+| NaN     | 2       |
+| NaN     | 3       |
+</center>
+<p>There are 3 cases where rows will be kept for <code>whoa</code>:</p>
+<ol type="1">
+<li>When both <code>hid</code> and <code>oid</code> match in the three
+tables (when <code>a</code> and <code>c</code> are both true). In the
+example above, this corresponds to the first two rows of
+<code>whoa</code>.</li>
+<li>When the <code>oid</code> in <code>o</code> doesn’t appear at all in
+<code>j</code> (calculated by <code>i</code>). In the example above,
+this corresponds to the third row of <code>whoa</code>.</li>
+<li>When the <code>oid</code> in <code>o</code> does appear in
+<code>j</code>, but none of the <code>hid</code> values appear in
+<code>h</code> (calculated by <code>f</code>). In the example above,
+this corresponds to the fourth row of <code>whoa</code>.</li>
+</ol>
+<p>Therefore, the number of rows in <code>whoa</code> is:</p>
+<div id="cb1" class="sourceCode">
+<pre class="sourceCode python"><code class="sourceCode python"><span id="cb1-1"><a aria-hidden="true" href="#cb1-1" tabindex="-1"></a>np.<span class="bu">sum</span>(a <span class="op">&amp;</span> c) <span class="op">+</span> f <span class="op">+</span> i</span></code></pre>
+</div>
+</div>
+</div>
+</div>
+</div>
+<p><br></p>
+<hr />
+<h2 id="problem-3">Problem 3</h2>
+<p>Consider the following code which defines a DataFrame named
+<code>df</code>:</p>
+<div class="sourceCode" id="cb7"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> hour(df):       <span class="cf">return</span> df.assign(hour<span class="op">=</span>df[<span class="st">&#39;time&#39;</span>].dt.hour)</span>
+<span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> is_morning(df): <span class="cf">return</span> df.assign(is_morning<span class="op">=</span>df[<span class="st">&#39;hour&#39;</span>] <span class="op">&lt;</span> <span class="dv">12</span>)</span>
+<span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb7-4"><a href="#cb7-4" aria-hidden="true" tabindex="-1"></a>df <span class="op">=</span> (h.merge(j, left_index<span class="op">=</span><span class="va">True</span>, right_on<span class="op">=</span><span class="st">&#39;hid&#39;</span>, how<span class="op">=</span><span class="st">&#39;inner&#39;</span>)</span>
+<span id="cb7-5"><a href="#cb7-5" aria-hidden="true" tabindex="-1"></a>      .merge(o, left_on<span class="op">=</span><span class="st">&#39;oid&#39;</span>, right_index<span class="op">=</span><span class="va">True</span>, how<span class="op">=</span><span class="st">&#39;inner&#39;</span>)</span>
+<span id="cb7-6"><a href="#cb7-6" aria-hidden="true" tabindex="-1"></a>      .reset_index(drop<span class="op">=</span><span class="va">True</span>)</span>
+<span id="cb7-7"><a href="#cb7-7" aria-hidden="true" tabindex="-1"></a>      .pipe(hour)</span>
+<span id="cb7-8"><a href="#cb7-8" aria-hidden="true" tabindex="-1"></a>      .pipe(is_morning))</span></code></pre></div>
+<p>The first few rows of df are shown below.</p>
+<center><img src="../../assets/images/sp24-midterm/df.png" width=750></center>
+<p>Suppose we define a DataFrame <code>p</code> and functions
+<code>a</code>, <code>b</code>, <code>c</code>, and <code>d</code> as
+follows:</p>
+<div class="sourceCode" id="cb8"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a>p <span class="op">=</span> df.pivot_table(index<span class="op">=</span><span class="st">&#39;street&#39;</span>, columns<span class="op">=</span><span class="st">&#39;hour&#39;</span>, values<span class="op">=</span><span class="st">&#39;duration&#39;</span>,</span>
+<span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a>                   aggfunc<span class="op">=</span><span class="st">&#39;count&#39;</span>, fill_value<span class="op">=</span><span class="dv">0</span>)</span>
+<span id="cb8-3"><a href="#cb8-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb8-4"><a href="#cb8-4" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> a(n):    <span class="cf">return</span> p[n].<span class="bu">sum</span>()</span>
+<span id="cb8-5"><a href="#cb8-5" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> b(s):    <span class="cf">return</span> p.loc[s].<span class="bu">sum</span>()</span>
+<span id="cb8-6"><a href="#cb8-6" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> c():     <span class="cf">return</span> p.<span class="bu">sum</span>().<span class="bu">sum</span>()</span>
+<span id="cb8-7"><a href="#cb8-7" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> d(s, n): <span class="cf">return</span> p.loc[s, n]</span></code></pre></div>
+<p>Write a single expression to compute each of the probabilities below.
+<strong>Your code can only use the functions <code>a</code>,
+<code>b</code>, <code>c</code>, <code>d</code>, and arithmetic operators
+(<code>+</code>, <code>-</code>, <code>/</code>,
+<code>*</code>).</strong></p>
+<p><br></p>
+<h3 id="problem-3.1">Problem 3.1</h3>
+<p>The probability that a randomly selected row from <code>df</code> has
+the street <code>Mission Blvd</code>.</p>
+<div id="accordionExample" class="accordion">
+<div class="accordion-item">
+<h2 class="accordion-header" id="heading3_1">
+<button class="accordion-button" type="button" data-bs-toggle="collapse" data-bs-target="#collapse3_1" aria-expanded="true" aria-controls="collapse3_1">
+Click to view the solution.
+</button>
+</h2>
+<div id="collapse3_1" class="accordion-collapse collapse"
+aria-labelledby="heading3_1" data-bs-parent="#accordionExample">
+<div class="accordion-body">
+<header id="title-block-header">
+<h1 class="title"> </h1>
+</header>
+<p><strong>Answer:</strong> <code>b('Mission Blvd') / c()</code></p>
+</div>
+</div>
+</div>
+</div>
+<p><br></p>
+<p><br></p>
+<h3 id="problem-3.2">Problem 3.2</h3>
+<p>The probability that a randomly selected row from <code>df</code> has
+the street <code>Gilman Dr</code> given that its hour is
+<code>21</code>.</p>
+<div id="accordionExample" class="accordion">
+<div class="accordion-item">
+<h2 class="accordion-header" id="heading3_2">
+<button class="accordion-button" type="button" data-bs-toggle="collapse" data-bs-target="#collapse3_2" aria-expanded="true" aria-controls="collapse3_2">
+Click to view the solution.
+</button>
+</h2>
+<div id="collapse3_2" class="accordion-collapse collapse"
+aria-labelledby="heading3_2" data-bs-parent="#accordionExample">
+<div class="accordion-body">
+<header id="title-block-header">
+<h1 class="title"> </h1>
+</header>
+<p><strong>Answer:</strong> <code>d('Gilman Dr', 21) / a(21)</code></p>
+</div>
+</div>
+</div>
+</div>
+<p><br></p>
+<p><br></p>
+<h3 id="problem-3.3">Problem 3.3</h3>
+<p>The probability that a randomly selected row from <code>df</code>
+either has the street <code>Mission Blvd</code> or the hour
+<code>12</code>.</p>
+<div id="accordionExample" class="accordion">
+<div class="accordion-item">
+<h2 class="accordion-header" id="heading3_3">
+<button class="accordion-button" type="button" data-bs-toggle="collapse" data-bs-target="#collapse3_3" aria-expanded="true" aria-controls="collapse3_3">
+Click to view the solution.
+</button>
+</h2>
+<div id="collapse3_3" class="accordion-collapse collapse"
+aria-labelledby="heading3_3" data-bs-parent="#accordionExample">
+<div class="accordion-body">
+<header id="title-block-header">
+<h1 class="title"> </h1>
+</header>
+<p><strong>Answer:</strong>
+<code>(b('Mission Blvd') + a(12) - d('Mission Blvd', 12)) / c()</code></p>
+</div>
+</div>
+</div>
+</div>
+<p><br></p>
+<hr />
+<h2 id="problem-4">Problem 4</h2>
+<p><br></p>
+<h3 id="problem-4.1">Problem 4.1</h3>
+<p>Consider the following pivot table created using the <code>df</code>
+table from Question~ which shows the average duration of power outages
+split by street name and whether the outage happened before 12pm.</p>
+<center><img src="../../assets/images/sp24-midterm/q4a.png" width=750></center>
+<p>Given only the information in this pivot table and the Reference
+Sheet, is it possible to observe Simpson’s paradox for this data if we
+don’t split by street? In other words, is it possible that the average
+duration of power outages before 12pm is lower than the average duration
+of power outages after 12pm?</p>
+<ul class="task-list">
+<li><p><input type="radio" disabled="" /> Yes</p></li>
+<li><p><input type="radio" disabled="" /> No</p></li>
+<li><p><input type="radio" disabled="" /> Need more information to determine</p></li>
+</ul>
+<div id="accordionExample" class="accordion">
+<div class="accordion-item">
+<h2 class="accordion-header" id="heading4_1">
+<button class="accordion-button" type="button" data-bs-toggle="collapse" data-bs-target="#collapse4_1" aria-expanded="true" aria-controls="collapse4_1">
+Click to view the solution.
+</button>
+</h2>
+<div id="collapse4_1" class="accordion-collapse collapse"
+aria-labelledby="heading4_1" data-bs-parent="#accordionExample">
+<div class="accordion-body">
+<header id="title-block-header">
+<h1 class="title"> </h1>
+</header>
+<p><strong>Answer:</strong> Yes</p>
+<p>Notice that the overall average of the durations when
+<code>is_morning=True</code> is a weighted average of the values in the
+<code>is_morning=True</code> column of the pivot table. This means that
+the overall average when <code>is_morning=True</code> must be between
+(44.93, 59.29). Likewise, the overall average when
+<code>is_morning=False</code> must be between (40.62, 52.78). This
+implies that it’s possible for Simpson’s paradox to happen, since the
+overall average when <code>is_morning=False</code> can be higher than
+the average when <code>is_morning=True</code>.</p>
+</div>
+</div>
+</div>
+</div>
+<p><br></p>
+<p><br></p>
+<h3 id="problem-4.2">Problem 4.2</h3>
+<p>Consider the following pivot table created using the <code>o</code>
+table, which shows the average duration of power outages split by
+whether the outage happened on the weekend and whether the outage
+happened before 12pm.</p>
+<center><img src="../../assets/images/sp24-midterm/q4b.png" width=750></center>
+<p>Given only the information in this pivot table and the Reference
+Sheet, is it possible to observe Simpson’s paradox for this data if we
+don’t split by <code>is_weekend</code>? In other words, is it possible
+that the average duration of power outages before 12pm is lower than the
+average duration of power outages after 12pm?</p>
+<ul class="task-list">
+<li><p><input type="radio" disabled="" /> Yes</p></li>
+<li><p><input type="radio" disabled="" /> No</p></li>
+<li><p><input type="radio" disabled="" /> Need more information to determine</p></li>
+</ul>
+<div id="accordionExample" class="accordion">
+<div class="accordion-item">
+<h2 class="accordion-header" id="heading4_2">
+<button class="accordion-button" type="button" data-bs-toggle="collapse" data-bs-target="#collapse4_2" aria-expanded="true" aria-controls="collapse4_2">
+Click to view the solution.
+</button>
+</h2>
+<div id="collapse4_2" class="accordion-collapse collapse"
+aria-labelledby="heading4_2" data-bs-parent="#accordionExample">
+<div class="accordion-body">
+<header id="title-block-header">
+<h1 class="title"> </h1>
+</header>
+<p><strong>Answer:</strong> No</p>
+<p>By the same logic as the previous part, the overall average when
+<code>is_morning=True</code> must be between (53.09, 58.64). The overall
+average when <code>is_morning=False</code> must be between (43.40,
+51.67). This implies that Simpson’s paradox cannot happen, since the
+overall average when <code>is_morning=False</code> will never be greater
+than the overall average when <code>is_morning=True</code>.</p>
+</div>
+</div>
+</div>
+</div>
+<p><br></p>
+<hr />
+<h2 id="problem-5">Problem 5</h2>
+<p>Praveen wants to answer the following questions using hypothesis
+tests on the power outages data, so he adds a <code>hour</code> and
+<code>is_morning</code> column to the <code>o</code> DataFrame. The
+first few rows of the new <code>o</code> DataFrame are shown below. For
+this problem, assume that some of the <code>duration</code> values are
+missing.</p>
+<center><img src="../../assets/images/sp24-midterm/q5.png" width=750></center>
+<p>For each test, select the <strong>one</strong> correct procedure to
+simulate a single sample under the null hypothesis, and select
+<strong>all</strong> test statistics that can be used for the hypothesis
+test among the choices given.</p>
+<p><br></p>
+<h3 id="problem-5.1">Problem 5.1</h3>
+<p>Null Hypothesis: Every hour of the day (0, 1, 2, etc.) has an equal
+probability of having a power outage.</p>
+<p>Alternative Hypothesis: At least one hour is more prone to outages
+than others.</p>
+<p><strong>Simulation procedure</strong>:</p>
+<ul class="task-list">
+<li><p><input type="radio" disabled="" /> <code>np.random.multinomial(100, [1/2] * 2)</code></p></li>
+<li><p><input type="radio" disabled="" /> <code>np.random.multinomial(100, [1/24] * 24)</code></p></li>
+<li><p><input type="radio" disabled="" /> <code>o['hour'].sample(100)</code></p></li>
+<li><p><input type="radio" disabled="" /> <code>np.random.permutation(o['duration'])</code></p></li>
+</ul>
+<p><strong>Test statistic</strong>:</p>
+<ul class="task-list">
+<li><p><input type="checkbox" disabled="" /> Difference in means</p></li>
+<li><p><input type="checkbox" disabled="" /> Absolute difference in means</p></li>
+<li><p><input type="checkbox" disabled="" /> Total variation distance</p></li>
+<li><p><input type="checkbox" disabled="" /> K-S test statistic</p></li>
+</ul>
+<div id="accordionExample" class="accordion">
+<div class="accordion-item">
+<h2 class="accordion-header" id="heading5_1">
+<button class="accordion-button" type="button" data-bs-toggle="collapse" data-bs-target="#collapse5_1" aria-expanded="true" aria-controls="collapse5_1">
+Click to view the solution.
+</button>
+</h2>
+<div id="collapse5_1" class="accordion-collapse collapse"
+aria-labelledby="heading5_1" data-bs-parent="#accordionExample">
+<div class="accordion-body">
+<header id="title-block-header">
+<h1 class="title"> </h1>
+</header>
+<p><strong>Answer:</strong></p>
+<p>Simulation procedure:
+<code>np.random.multinomial(100, [1/24] * 24)</code></p>
+<p>Test statistic: Total variation distance, K-S test statistic</p>
+</div>
+</div>
+</div>
+</div>
+<p><br></p>
+<p><br></p>
+<h3 id="problem-5.2">Problem 5.2</h3>
+<p>Null: The proportion of outages that happen in the morning is the
+same for both recorded durations and missing durations.</p>
+<p>Alternative: The outages are more likely to happen in the morning for
+missing durations than for recorded durations.</p>
+<p><strong>Simulation procedure</strong>:</p>
+<ul class="task-list">
+<li><p><input type="radio" disabled="" /> <code>np.random.multinomial(100, [1/2] * 2)</code></p></li>
+<li><p><input type="radio" disabled="" /> <code>np.random.multinomial(100, [1/24] * 24)</code></p></li>
+<li><p><input type="radio" disabled="" /> <code>o['hour'].sample(100)</code></p></li>
+<li><p><input type="radio" disabled="" /> <code>np.random.permutation(o['duration'])</code></p></li>
+</ul>
+<p><strong>Test statistic</strong>:</p>
+<ul class="task-list">
+<li><p><input type="checkbox" disabled="" /> Difference in means</p></li>
+<li><p><input type="checkbox" disabled="" /> Absolute difference in means</p></li>
+<li><p><input type="checkbox" disabled="" /> Total variation distance</p></li>
+<li><p><input type="checkbox" disabled="" /> K-S test statistic</p></li>
+</ul>
+<div id="accordionExample" class="accordion">
+<div class="accordion-item">
+<h2 class="accordion-header" id="heading5_2">
+<button class="accordion-button" type="button" data-bs-toggle="collapse" data-bs-target="#collapse5_2" aria-expanded="true" aria-controls="collapse5_2">
+Click to view the solution.
+</button>
+</h2>
+<div id="collapse5_2" class="accordion-collapse collapse"
+aria-labelledby="heading5_2" data-bs-parent="#accordionExample">
+<div class="accordion-body">
+<header id="title-block-header">
+<h1 class="title"> </h1>
+</header>
+<p><strong>Answer:</strong></p>
+<p>Simulation procedure:
+<code>np.random.permutation(o['duration'])</code></p>
+<p>Test statistic: Difference in means</p>
+</div>
+</div>
+</div>
+</div>
+<p><br></p>
+<p><br></p>
+<h3 id="problem-5.3">Problem 5.3</h3>
+<p>Null: The distribution of hours is the same for both recorded
+durations and missing durations.</p>
+<p>Alternative: The distribution of hours is different for recorded
+durations and missing durations.</p>
+<p><strong>Simulation procedure</strong>:</p>
+<ul class="task-list">
+<li><p><input type="radio" disabled="" /> <code>np.random.multinomial(100, [1/2] * 2)</code></p></li>
+<li><p><input type="radio" disabled="" /> <code>np.random.multinomial(100, [1/24] * 24)</code></p></li>
+<li><p><input type="radio" disabled="" /> <code>o['hour'].sample(100)</code></p></li>
+<li><p><input type="radio" disabled="" /> <code>np.random.permutation(o['duration'])</code></p></li>
+</ul>
+<p><strong>Test statistic</strong>:</p>
+<ul class="task-list">
+<li><p><input type="checkbox" disabled="" /> Difference in means</p></li>
+<li><p><input type="checkbox" disabled="" /> Absolute difference in means</p></li>
+<li><p><input type="checkbox" disabled="" /> Total variation distance</p></li>
+<li><p><input type="checkbox" disabled="" /> K-S test statistic</p></li>
+</ul>
+<div id="accordionExample" class="accordion">
+<div class="accordion-item">
+<h2 class="accordion-header" id="heading5_3">
+<button class="accordion-button" type="button" data-bs-toggle="collapse" data-bs-target="#collapse5_3" aria-expanded="true" aria-controls="collapse5_3">
+Click to view the solution.
+</button>
+</h2>
+<div id="collapse5_3" class="accordion-collapse collapse"
+aria-labelledby="heading5_3" data-bs-parent="#accordionExample">
+<div class="accordion-body">
+<header id="title-block-header">
+<h1 class="title"> </h1>
+</header>
+<p><strong>Answer:</strong></p>
+<p>Simulation procedure:
+<code>np.random.permutation(o['duration'])</code></p>
+<p>Test statistic: Absolute difference in means, Total variation
+distance, K-S test statistic</p>
+</div>
+</div>
+</div>
+</div>
+<p><br></p>
+<hr />
+<h2 id="problem-6">Problem 6</h2>
+<p>After loading in the DataFrame <code>df</code> from Question~, Sam
+realizes that his puppy Bentley ate some of his data! The first few rows
+of <code>df</code> are shown below for convenience.</p>
+<center><img src="../../assets/images/sp24-midterm/df.png" width=750></center>
+<p><br></p>
+<h3 id="problem-6.1">Problem 6.1</h3>
+<p>Suppose that Sam sorted <code>df</code> by <code>is_morning</code>,
+and then Bentley ate the first five values from the
+<code>duration</code> column. What is the missingness mechanism for the
+<code>duration</code> column?</p>
+<ul class="task-list">
+<li><p><input type="radio" disabled="" /> Missing by design</p></li>
+<li><p><input type="radio" disabled="" /> MNAR</p></li>
+<li><p><input type="radio" disabled="" /> MAR on <code>is_morning</code> only</p></li>
+<li><p><input type="radio" disabled="" /> MAR on <code>is_morning</code> and <code>hour</code> only</p></li>
+<li><p><input type="radio" disabled="" /> MAR on <code>is_morning</code>, <code>hour</code>, and
+<code>time</code> only</p></li>
+<li><p><input type="radio" disabled="" /> MCAR</p></li>
+</ul>
+<div id="accordionExample" class="accordion">
+<div class="accordion-item">
+<h2 class="accordion-header" id="heading6_1">
+<button class="accordion-button" type="button" data-bs-toggle="collapse" data-bs-target="#collapse6_1" aria-expanded="true" aria-controls="collapse6_1">
+Click to view the solution.
+</button>
+</h2>
+<div id="collapse6_1" class="accordion-collapse collapse"
+aria-labelledby="heading6_1" data-bs-parent="#accordionExample">
+<div class="accordion-body">
+<header id="title-block-header">
+<h1 class="title"> </h1>
+</header>
+<p><strong>Answer:</strong> MAR on <code>is_morning</code>,
+<code>hour</code>, and <code>time</code> only</p>
+</div>
+</div>
+</div>
+</div>
+<p><br></p>
+<p><br></p>
+<h3 id="problem-6.2">Problem 6.2</h3>
+<p>Sam believes that the data are MAR on <code>hour</code> only, so he
+decides to use probabilistic imputation to fill in the missing values.
+He uses the following code copied from Lecture 8 (line numbers shown in
+parentheses):</p>
+<div class="sourceCode" id="cb9"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a>(<span class="dv">1</span>)  <span class="kw">def</span> impute(s):</span>
+<span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a>(<span class="dv">2</span>)      s <span class="op">=</span> s.copy()</span>
+<span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a>(<span class="dv">3</span>)      n <span class="op">=</span> s.isna().<span class="bu">sum</span>()</span>
+<span id="cb9-4"><a href="#cb9-4" aria-hidden="true" tabindex="-1"></a>(<span class="dv">4</span>)      fill <span class="op">=</span> np.random.choice(s.dropna(), n)</span>
+<span id="cb9-5"><a href="#cb9-5" aria-hidden="true" tabindex="-1"></a>(<span class="dv">5</span>)      s[s.isna()] <span class="op">=</span> fill</span>
+<span id="cb9-6"><a href="#cb9-6" aria-hidden="true" tabindex="-1"></a>(<span class="dv">6</span>)      <span class="cf">return</span> s</span>
+<span id="cb9-7"><a href="#cb9-7" aria-hidden="true" tabindex="-1"></a>(<span class="dv">7</span>)  df.groupby(<span class="st">&#39;hour&#39;</span>)[<span class="st">&#39;duration&#39;</span>].transform(impute)</span></code></pre></div>
+<ol type="1">
+<li>Even though this code is copied from lecture, it can raise an error
+on Sam’s data if a certain condition is met. Which of these, if true,
+would cause the code to error?</li>
+</ol>
+<ul class="task-list">
+<li><p><input type="radio" disabled="" /> The missing values in <code>duration</code> are actually NMAR.</p></li>
+<li><p><input type="radio" disabled="" /> The missing values in <code>duration</code> are actually MAR on
+<code>street</code>, not <code>hour</code>.</p></li>
+<li><p><input type="radio" disabled="" /> There are no missing values in <code>duration</code>.</p></li>
+<li><p><input type="radio" disabled="" /> At least one <code>hour</code> value doesn’t have any missing
+<code>duration</code> values.</p></li>
+<li><p><input type="radio" disabled="" /> At least one <code>hour</code> value only has missing
+<code>duration</code> values.</p></li>
+<li><p><input type="radio" disabled="" /> There are no rows where <code>hour == 12</code>.</p></li>
+</ul>
+<ol start="2" type="1">
+<li>Which line in the code would raise the error?</li>
+</ol>
+<ul class="task-list">
+<li><p><input type="radio" disabled="" /> Line 1</p></li>
+<li><p><input type="radio" disabled="" /> Line 2</p></li>
+<li><p><input type="radio" disabled="" /> Line 3</p></li>
+<li><p><input type="radio" disabled="" /> Line 4</p></li>
+<li><p><input type="radio" disabled="" /> Line 5</p></li>
+<li><p><input type="radio" disabled="" /> Line 6</p></li>
+<li><p><input type="radio" disabled="" /> Line 7</p></li>
+</ul>
+<div id="accordionExample" class="accordion">
+<div class="accordion-item">
+<h2 class="accordion-header" id="heading6_2">
+<button class="accordion-button" type="button" data-bs-toggle="collapse" data-bs-target="#collapse6_2" aria-expanded="true" aria-controls="collapse6_2">
+Click to view the solution.
+</button>
+</h2>
+<div id="collapse6_2" class="accordion-collapse collapse"
+aria-labelledby="heading6_2" data-bs-parent="#accordionExample">
+<div class="accordion-body">
+<header id="title-block-header">
+<h1 class="title"> </h1>
+</header>
+<p><strong>Answer:</strong> 1. At least one <code>hour</code> value only
+has missing <code>duration</code> values.</p>
+<p><strong>Answer:</strong> 2. Line 4</p>
+</div>
+</div>
+</div>
+</div>
+<p><br></p>
+<hr />
+<h2 id="section"><span class="math display"> </span></h2>
+<h4
+id="feedback-find-an-error-still-confused-have-a-suggestion-let-us-know-here.">👋
+Feedback: Find an error? Still confused? Have a suggestion?
+<a href="https://forms.gle/WZ71FchnXU1K154d7">Let us know
+here</u></a>.</h4>
+<hr />
+</body>
+</html>
diff --git a/pages/exams/sp24-midterm.yml b/pages/exams/sp24-midterm.yml
new file mode 100644
index 0000000..27dbb80
--- /dev/null
+++ b/pages/exams/sp24-midterm.yml
@@ -0,0 +1,11 @@
+title: 'Spring 2024 Midterm Exam'
+instructors: Sam Lau
+context: This exam was administered in-person. The exam was closed-notes, except students were allowed to bring a single two-sided notes sheet. No calculators were allowed. Students had **80 minutes** to take this exam.
+show_solution: true
+problems:
+  - sp24-midterm/sp24-mid-q01
+  - sp24-midterm/sp24-mid-q02
+  - sp24-midterm/sp24-mid-q03
+  - sp24-midterm/sp24-mid-q04
+  - sp24-midterm/sp24-mid-q05
+  - sp24-midterm/sp24-mid-q06
\ No newline at end of file
diff --git a/problems/sp24-midterm/sp24-mid-q01.md b/problems/sp24-midterm/sp24-mid-q01.md
new file mode 100644
index 0000000..e61f054
--- /dev/null
+++ b/problems/sp24-midterm/sp24-mid-q01.md
@@ -0,0 +1,111 @@
+# BEGIN PROB
+
+Fill in Python code below so that the last line of each part evaluates to each desired result using the tables `h`, `o`, and `j` as shown on the Reference Sheet.
+
+# BEGIN SUBPROB
+
+Find the median duration of outages that happened in the early morning (before 8am).
+
+```python
+o.loc[__(a)__,__(b)__].median()
+```
+
+# BEGIN SOLN
+
+**Answer:** 
+
+(a): `o['time'].dt.hour < 8`
+
+(b): `'duration'`
+
+# END SOLN
+
+# END SUBPROB
+
+
+
+# BEGIN SUBPROB
+
+A Series containing the mean outage duration for outages that happened on the weekend and outages that happened on weekdays.
+
+*Hint: If `s` is a Series of timestamps, `s.dt.dayofweek` returns a Series of integers where 0 is Monday and 6 is Sunday.*
+
+```python
+(o.assign(__(a)__)
+.groupby(__(b)__)[__(c)__].mean())
+```
+
+# BEGIN SOLN
+
+**Answer:** 
+
+(a): `is\_weekend=o['time'].dt.dayofweek >= 5`
+
+(b): `'is\_weekend'`, (c): `'duration'`
+
+# END SOLN
+
+# END SUBPROB
+
+
+
+# BEGIN SUBPROB
+
+A DataFrame containing the proportion of 4-digit address numbers for each unique street in `h`.
+
+```python
+def foo(x):
+    lengths = __(a)__
+    return (lengths == 4).mean()
+
+h.groupby(__(b)__).__(c)__(foo)
+```
+
+# BEGIN SOLN
+
+**Answer:** 
+
+(a): `x.astype(str).str.len()`
+
+(b): `'street'`
+
+(c): `agg`
+
+# END SOLN
+
+# END SUBPROB
+
+
+
+# BEGIN SUBPROB
+
+What does the following code compute?
+
+```python
+a = h.merge(j, left_index=True, right_on='hid', how='left')
+a.loc[a['oid'].isna(), 'hid'].shape[0]
+```
+
+( ) The number of addresses with exactly one outage.  
+( ) The number of addresses with at least one outage.  
+( ) The number of addresses with no outages.  
+( ) The total number of addresses affected by all power outages.  
+( ) The number of power outages.  
+( ) The number of power outages that affected exactly one address.  
+( ) The number of power outages that affected at least one address.  
+( ) The number of power outages that affected no addresses.  
+( ) 0  
+( ) The code will raise an error.  
+( ) None of the above.  
+
+
+
+# BEGIN SOLN
+
+**Answer:** The number of addresses with no outages.
+
+# END SOLN
+
+# END SUBPROB
+
+# END PROB
\ No newline at end of file
diff --git a/problems/sp24-midterm/sp24-mid-q02.md b/problems/sp24-midterm/sp24-mid-q02.md
new file mode 100644
index 0000000..be5a825
--- /dev/null
+++ b/problems/sp24-midterm/sp24-mid-q02.md
@@ -0,0 +1,97 @@
+# BEGIN PROB
+
+# BEGIN SUBPROB
+
+Consider the following code:
+
+```python
+whoa = (h.merge(j, left_index=True, right_on='hid', how='left')
+        .merge(o, left_on='oid', right_index=True, how='right')
+        .reset_index(drop=True))
+```
+
+Consider the following variables:
+
+```python
+a = j['hid'] <= 50
+b = j['hid'] > 50
+c = j['oid'] <= 100
+d = j['oid'] > 100
+e = (j[j['hid'] <= 50]
+     .groupby('hid')
+     .filter(lambda x: all(x['oid'] > 100))
+     ['hid']
+     .nunique())
+f = (j[j['oid'] <= 100]
+     .groupby('oid')
+     .filter(lambda x: all(x['hid'] > 50))
+     ['oid']
+     .nunique())
+g = len(set(h.index) - set(j['hid']))
+i = len(set(o.index) - set(j['oid']))
+```
+
+Write a **single expression** that evaluates to the number of rows in `whoa`. In your code, you may only use the variables `a`, `b`, `c`, `d`, `e`, `f`, `g`, `i` as defined above, arithmetic and bitwise operators (`+`, `-`, `/`, `*`, `&`, `|`), and the `np.sum()` function. **You may not use any other variables or functions.** Your code might not need to use all of the variables defined above.
+
+
+# BEGIN SOLN
+
+**Answer:** `np.sum(a & c) + f + i`
+
+We know that `h` has the numbers 1-50 as unique integers in its index, and `o` has the numbers 1-100 as unique integers in its index. However, the `hid` and `oid` columns in `j` have values outside these ranges. To approach this problem, it's easiest to come up with smaller versions of `h`, `j`, and `o`, then perform the join by hand. For example, consider the following example `h`, `j`, and `o` tables:
+
+<center>
+| **hid** |
+|---------|
+| 1       |
+| 2       |
+| 3       |
+</center>
+
+<center>
+| **hid** | **oid** |
+|---------|---------|
+| 1       | 1       |
+| 2       | 1       |
+| 2       | 10      |
+| 2       | 11      |
+| 10      | 3       |
+| 11      | 3       |
+</center>
+
+<center>
+| **oid** |
+|---------|
+| 1       |
+| 2       |
+| 3       |
+</center>
+
+In this example, `whoa` would look like the following (omitting other columns besides `hid` and `oid` for brevity):
+
+<center>
+| **hid** | **oid** |
+|---------|---------|
+| 1       | 1       |
+| 2       | 1       |
+| NaN     | 2       |
+| NaN     | 3       |
+</center>
+
+There are 3 cases where rows will be kept for `whoa`:
+
+1. When both `hid` and `oid` match in the three tables (when `a` and `c` are both true). In the example above, this corresponds to the first two rows of `whoa`.
+2. When the `oid` in `o` doesn't appear at all in `j` (calculated by `i`). In the example above, this corresponds to the third row of `whoa`.
+3. When the `oid` in `o` does appear in `j`, but none of the `hid` values appear in `h` (calculated by `f`). In the example above, this corresponds to the fourth row of `whoa`.
+
+Therefore, the number of rows in `whoa` is:
+
+```python
+np.sum(a & c) + f + i
+```
+
+# END SOLN
+
+# END SUBPROB
+
+# END PROB
\ No newline at end of file
diff --git a/problems/sp24-midterm/sp24-mid-q03.md b/problems/sp24-midterm/sp24-mid-q03.md
new file mode 100644
index 0000000..f9c004b
--- /dev/null
+++ b/problems/sp24-midterm/sp24-mid-q03.md
@@ -0,0 +1,74 @@
+# BEGIN PROB
+
+Consider the following code which defines a DataFrame named `df`:
+
+```python
+def hour(df):       return df.assign(hour=df['time'].dt.hour)
+def is_morning(df): return df.assign(is_morning=df['hour'] < 12)
+
+df = (h.merge(j, left_index=True, right_on='hid', how='inner')
+      .merge(o, left_on='oid', right_index=True, how='inner')
+      .reset_index(drop=True)
+      .pipe(hour)
+      .pipe(is_morning))
+```
+
+The first few rows of df are shown below.
+
+<center><img src="../../assets/images/sp24-midterm/df.png" width=750></center>
+
+Suppose we define a DataFrame `p` and functions `a`, `b`, `c`, and `d` as follows:
+
+```python
+p = df.pivot_table(index='street', columns='hour', values='duration',
+                   aggfunc='count', fill_value=0)
+
+def a(n):    return p[n].sum()
+def b(s):    return p.loc[s].sum()
+def c():     return p.sum().sum()
+def d(s, n): return p.loc[s, n]
+```
+
+Write a single expression to compute each of the probabilities below. **Your code can only use the functions `a`, `b`, `c`, `d`, and arithmetic operators (`+`, `-`, `/`, `*`).**
+
+# BEGIN SUBPROB
+
+The probability that a randomly selected row from `df` has the street `Mission Blvd`.
+
+# BEGIN SOLN
+
+**Answer:** `b('Mission Blvd') / c()`
+
+# END SOLN
+
+# END SUBPROB
+
+
+
+# BEGIN SUBPROB
+
+The probability that a randomly selected row from `df` has the street `Gilman Dr` given that its hour is `21`.
+
+# BEGIN SOLN
+
+**Answer:** `d('Gilman Dr', 21) / a(21)`
+
+# END SOLN
+
+# END SUBPROB
+
+
+
+# BEGIN SUBPROB
+
+The probability that a randomly selected row from `df` either has the street `Mission Blvd` or the hour `12`.
+
+# BEGIN SOLN
+
+**Answer:** `(b('Mission Blvd') + a(12) - d('Mission Blvd', 12)) / c()`
+
+# END SOLN
+
+# END SUBPROB
+
+# END PROB
\ No newline at end of file
diff --git a/problems/sp24-midterm/sp24-mid-q04.md b/problems/sp24-midterm/sp24-mid-q04.md
new file mode 100644
index 0000000..ca9cd29
--- /dev/null
+++ b/problems/sp24-midterm/sp24-mid-q04.md
@@ -0,0 +1,51 @@
+# BEGIN PROB
+
+# BEGIN SUBPROB
+
+Consider the following pivot table created using the `df` table from Question~\ref{q:pivoting} which shows the average duration of power outages split by street name and whether the outage happened before 12pm.
+
+<center><img src="../../assets/images/sp24-midterm/q4a.png" width=750></center>
+
+Given only the information in this pivot table and the Reference Sheet, is it possible to observe Simpson's paradox for this data if we don't split by street? In other words, is it possible that the average duration of power outages before 12pm is lower than the average duration of power outages after 12pm?
+
+( ) Yes
+( ) No
+( ) Need more information to determine
+
+# BEGIN SOLN
+
+**Answer:** Yes
+
+Notice that the overall average of the durations when `is_morning=True` is a weighted average of the values in the `is_morning=True` column of the pivot table. This means that the overall average when `is_morning=True` must be between (44.93, 59.29). Likewise, the overall average when `is_morning=False` must be between (40.62, 52.78). This implies that it's possible for Simpson's paradox to happen, since the overall average when `is_morning=False` can be higher than the average when `is_morning=True`.
+
+
+# END SOLN
+
+# END SUBPROB
+
+
+
+# BEGIN SUBPROB
+
+Consider the following pivot table created using the `o` table, which shows the average duration of power outages split by whether the outage happened on the weekend and whether the outage happened before 12pm.
+
+<center><img src="../../assets/images/sp24-midterm/q4b.png" width=750></center>
+
+Given only the information in this pivot table and the Reference Sheet, is it possible to observe Simpson's paradox for this data if we don't split by `is_weekend`? In other words, is it possible that the average duration of power outages before 12pm is lower than the average duration of power outages after 12pm?
+
+( ) Yes
+( ) No
+( ) Need more information to determine
+
+# BEGIN SOLN
+
+**Answer:** No
+
+By the same logic as the previous part, the overall average when `is_morning=True` must be between (53.09, 58.64). The overall average when `is_morning=False` must be between (43.40, 51.67). This implies that Simpson's paradox cannot happen, since the overall average when `is_morning=False` will never be greater than the overall average when `is_morning=True`.
+
+
+# END SOLN
+
+# END SUBPROB
+
+# END PROB
\ No newline at end of file
diff --git a/problems/sp24-midterm/sp24-mid-q05.md b/problems/sp24-midterm/sp24-mid-q05.md
new file mode 100644
index 0000000..9d5903e
--- /dev/null
+++ b/problems/sp24-midterm/sp24-mid-q05.md
@@ -0,0 +1,113 @@
+# BEGIN PROB
+
+Praveen wants to answer the following questions using hypothesis tests on the power outages data, so he adds a `hour` and `is_morning` column to the `o` DataFrame. The first few rows of the new `o` DataFrame are shown below. For this problem, assume that some of the `duration` values are missing.
+
+<center><img src="../../assets/images/sp24-midterm/q5.png" width=750></center>
+
+For each test, select the **one** correct procedure to simulate a single sample under the null hypothesis, and select **all** test statistics that can be used for the hypothesis test among the choices given.
+
+# BEGIN SUBPROB
+
+Null Hypothesis: Every hour of the day (0, 1, 2, etc.) has an equal probability of having a power outage.
+
+Alternative Hypothesis: At least one hour is more prone to outages than others.
+
+**Simulation procedure**:
+
+( ) `np.random.multinomial(100, [1/2] * 2)`  
+( ) `np.random.multinomial(100, [1/24] * 24)`  
+( ) `o['hour'].sample(100)`  
+( ) `np.random.permutation(o['duration'])`
+
+
+**Test statistic**:
+
+[ ] Difference in means  
+[ ] Absolute difference in means  
+[ ] Total variation distance  
+[ ] K-S test statistic
+
+
+# BEGIN SOLN
+
+**Answer:** 
+
+Simulation procedure: `np.random.multinomial(100, [1/24] * 24)`
+
+Test statistic: Total variation distance, K-S test statistic
+
+# END SOLN
+
+# END SUBPROB
+
+
+
+# BEGIN SUBPROB
+
+Null: The proportion of outages that happen in the morning is the same for both recorded durations and missing durations.
+
+Alternative: The outages are more likely to happen in the morning for missing durations than for recorded durations.
+
+**Simulation procedure**:
+
+( ) `np.random.multinomial(100, [1/2] * 2)`  
+( ) `np.random.multinomial(100, [1/24] * 24)`  
+( ) `o['hour'].sample(100)`  
+( ) `np.random.permutation(o['duration'])`
+
+
+**Test statistic**:
+
+[ ] Difference in means  
+[ ] Absolute difference in means  
+[ ] Total variation distance  
+[ ] K-S test statistic
+
+# BEGIN SOLN
+
+**Answer:** 
+
+Simulation procedure: `np.random.permutation(o['duration'])`
+
+Test statistic: Difference in means
+
+# END SOLN
+
+# END SUBPROB
+
+
+
+# BEGIN SUBPROB
+
+Null: The distribution of hours is the same for both recorded durations and missing durations.
+
+Alternative: The distribution of hours is different for recorded durations and missing durations.
+
+**Simulation procedure**:
+
+( ) `np.random.multinomial(100, [1/2] * 2)`  
+( ) `np.random.multinomial(100, [1/24] * 24)`  
+( ) `o['hour'].sample(100)`  
+( ) `np.random.permutation(o['duration'])`
+
+
+**Test statistic**:
+
+[ ] Difference in means  
+[ ] Absolute difference in means  
+[ ] Total variation distance  
+[ ] K-S test statistic
+
+# BEGIN SOLN
+
+**Answer:** 
+
+Simulation procedure: `np.random.permutation(o['duration'])`
+
+Test statistic: Absolute difference in means, Total variation distance, K-S test statistic
+
+# END SOLN
+
+# END SUBPROB
+
+# END PROB
\ No newline at end of file
diff --git a/problems/sp24-midterm/sp24-mid-q06.md b/problems/sp24-midterm/sp24-mid-q06.md
new file mode 100644
index 0000000..e06606b
--- /dev/null
+++ b/problems/sp24-midterm/sp24-mid-q06.md
@@ -0,0 +1,74 @@
+# BEGIN PROB
+
+After loading in the DataFrame `df` from Question~\ref{q:pivoting}, Sam realizes that his puppy Bentley ate some of his data! The first few rows of `df` are shown below for convenience.
+
+<center><img src="../../assets/images/sp24-midterm/df.png" width=750></center>
+
+# BEGIN SUBPROB
+
+Suppose that Sam sorted `df` by `is_morning`, and then Bentley ate the first five values from the `duration` column. What is the missingness mechanism for the `duration` column?
+
+( ) Missing by design  
+( ) MNAR  
+( ) MAR on `is_morning` only  
+( ) MAR on `is_morning` and `hour` only  
+( ) MAR on `is_morning`, `hour`, and `time` only  
+( ) MCAR
+
+# BEGIN SOLN
+
+**Answer:** MAR on `is_morning`, `hour`, and `time` only  
+
+# END SOLN
+
+# END SUBPROB
+
+
+
+# BEGIN SUBPROB
+
+Sam believes that the data are MAR on `hour` only, so he decides to use probabilistic imputation to fill in the missing values. He uses the following code copied from Lecture 8 (line numbers shown in parentheses):
+
+```python
+(1)  def impute(s):
+(2)      s = s.copy()
+(3)      n = s.isna().sum()
+(4)      fill = np.random.choice(s.dropna(), n)
+(5)      s[s.isna()] = fill
+(6)      return s
+(7)  df.groupby('hour')['duration'].transform(impute)
+```
+
+1.  Even though this code is copied from lecture, it can raise an error on Sam’s data if a certain condition is met. Which of these, if true, would cause the code to error?
+
+( ) The missing values in `duration` are actually NMAR.
+( ) The missing values in `duration` are actually MAR on `street`, not `hour`.
+( ) There are no missing values in `duration`.
+( ) At least one `hour` value doesn't have any missing `duration` values.
+( ) At least one `hour` value only has missing `duration` values.
+( ) There are no rows where `hour == 12`.
+
+2.  Which line in the code would raise the error?
+
+( ) Line 1
+( ) Line 2
+( ) Line 3
+( ) Line 4
+( ) Line 5
+( ) Line 6
+( ) Line 7
+
+
+# BEGIN SOLN
+
+**Answer:** 
+1. At least one `hour` value only has missing `duration` values.
+
+**Answer:** 
+2. Line 4
+
+# END SOLN
+
+# END SUBPROB
+
+# END PROB
\ No newline at end of file
diff --git a/problems/sp24-midterm/sp24-midterm-data-info.md b/problems/sp24-midterm/sp24-midterm-data-info.md
new file mode 100644
index 0000000..2c3a214
--- /dev/null
+++ b/problems/sp24-midterm/sp24-midterm-data-info.md
@@ -0,0 +1,20 @@
+The `h` table records addresses within San Diego. Only 50 addresses are recorded. The index of the dataframe contains the numbers 1-50 as unique integers.
+
+- `"number" (int)`: Street address number
+- `"street" (str)`: Street name
+
+<center><img src="../../assets/images/sp24-midterm/h.png" width=750></center>
+
+The `o` table records information on power outages within San Diego in April 2024. Only 100 outages are recorded. The index of the dataframe contains the numbers 1-100 as unique integers.
+
+- `"time" (pd.Timestamp)`: When the outage began
+- `"duration" (int)`: How long the outage lasted in minutes
+
+<center><img src="../../assets/images/sp24-midterm/o.png" width=750></center>
+
+The `j` table is a table that links outages to addresses. Each entry in the `j` table contains the `hid` of the affected address and the `oid` of the outage. For example, the first row of the table records that the outage with an `oid` of 1 caused the power to go out at addresses with `hid` 61 and 88. A single outage can affect multiple addresses. There are no missing values and no duplicated rows in this table, and all values are positive integers. This table records all the addresses affected by all of the outages in 2024 so far.
+
+- `"hid" (int)`: The `hid` of the affected address
+- `"oid" (int)`: The `oid` of the outage
+
+<center><img src="../../assets/images/sp24-midterm/j.png" width=750></center>
\ No newline at end of file