diff --git a/sql_I/data/example_duck.db.wal b/docs/.nojekyll similarity index 100% rename from sql_I/data/example_duck.db.wal rename to docs/.nojekyll diff --git a/case_study_HCE/case_study_HCE.html b/docs/case_study_HCE/case_study_HCE.html similarity index 100% rename from case_study_HCE/case_study_HCE.html rename to docs/case_study_HCE/case_study_HCE.html diff --git a/docs/case_study_HCE/images/data_life_cycle.PNG b/docs/case_study_HCE/images/data_life_cycle.PNG new file mode 100644 index 000000000..aef5d21de Binary files /dev/null and b/docs/case_study_HCE/images/data_life_cycle.PNG differ diff --git a/docs/case_study_HCE/images/vis_1.png b/docs/case_study_HCE/images/vis_1.png new file mode 100644 index 000000000..a9ecac7b3 Binary files /dev/null and b/docs/case_study_HCE/images/vis_1.png differ diff --git a/docs/case_study_HCE/images/vis_10.png b/docs/case_study_HCE/images/vis_10.png new file mode 100644 index 000000000..61daefb9d Binary files /dev/null and b/docs/case_study_HCE/images/vis_10.png differ diff --git a/docs/case_study_HCE/images/vis_2.png b/docs/case_study_HCE/images/vis_2.png new file mode 100644 index 000000000..db39da9e0 Binary files /dev/null and b/docs/case_study_HCE/images/vis_2.png differ diff --git a/docs/case_study_HCE/images/vis_3.jpg b/docs/case_study_HCE/images/vis_3.jpg new file mode 100644 index 000000000..72e645396 Binary files /dev/null and b/docs/case_study_HCE/images/vis_3.jpg differ diff --git a/docs/case_study_HCE/images/vis_4.png b/docs/case_study_HCE/images/vis_4.png new file mode 100644 index 000000000..472809dfc Binary files /dev/null and b/docs/case_study_HCE/images/vis_4.png differ diff --git a/docs/case_study_HCE/images/vis_5.png b/docs/case_study_HCE/images/vis_5.png new file mode 100644 index 000000000..74853eb27 Binary files /dev/null and b/docs/case_study_HCE/images/vis_5.png differ diff --git a/docs/case_study_HCE/images/vis_6.png b/docs/case_study_HCE/images/vis_6.png new file mode 100644 index 000000000..60d63cfb5 Binary files /dev/null and b/docs/case_study_HCE/images/vis_6.png differ diff --git a/docs/case_study_HCE/images/vis_7.png b/docs/case_study_HCE/images/vis_7.png new file mode 100644 index 000000000..ed490433d Binary files /dev/null and b/docs/case_study_HCE/images/vis_7.png differ diff --git a/docs/case_study_HCE/images/vis_8.png b/docs/case_study_HCE/images/vis_8.png new file mode 100644 index 000000000..e2ebc46b4 Binary files /dev/null and b/docs/case_study_HCE/images/vis_8.png differ diff --git a/docs/case_study_HCE/images/vis_9.png b/docs/case_study_HCE/images/vis_9.png new file mode 100644 index 000000000..aab375803 Binary files /dev/null and b/docs/case_study_HCE/images/vis_9.png differ diff --git a/docs/constant_model_loss_transformations/images/bulge.png b/docs/constant_model_loss_transformations/images/bulge.png new file mode 100644 index 000000000..aee1d745e Binary files /dev/null and b/docs/constant_model_loss_transformations/images/bulge.png differ diff --git a/docs/constant_model_loss_transformations/images/constant_loss_surface.png b/docs/constant_model_loss_transformations/images/constant_loss_surface.png new file mode 100644 index 000000000..1cd733bd8 Binary files /dev/null and b/docs/constant_model_loss_transformations/images/constant_loss_surface.png differ diff --git a/docs/constant_model_loss_transformations/images/dugong_rug.png b/docs/constant_model_loss_transformations/images/dugong_rug.png new file mode 100644 index 000000000..9c5e9df67 Binary files /dev/null and b/docs/constant_model_loss_transformations/images/dugong_rug.png differ diff --git a/docs/constant_model_loss_transformations/images/dugong_scatter.png b/docs/constant_model_loss_transformations/images/dugong_scatter.png new file mode 100644 index 000000000..4bf3a8b06 Binary files /dev/null and b/docs/constant_model_loss_transformations/images/dugong_scatter.png differ diff --git a/docs/constant_model_loss_transformations/images/error.png b/docs/constant_model_loss_transformations/images/error.png new file mode 100644 index 000000000..f37677abb Binary files /dev/null and b/docs/constant_model_loss_transformations/images/error.png differ diff --git a/docs/constant_model_loss_transformations/images/mae_loss_infinite.png b/docs/constant_model_loss_transformations/images/mae_loss_infinite.png new file mode 100644 index 000000000..2bd5e9e07 Binary files /dev/null and b/docs/constant_model_loss_transformations/images/mae_loss_infinite.png differ diff --git a/docs/constant_model_loss_transformations/images/mse_loss_26.png b/docs/constant_model_loss_transformations/images/mse_loss_26.png new file mode 100644 index 000000000..7c39cc767 Binary files /dev/null and b/docs/constant_model_loss_transformations/images/mse_loss_26.png differ diff --git a/docs/constant_model_loss_transformations/images/outliers.png b/docs/constant_model_loss_transformations/images/outliers.png new file mode 100644 index 000000000..61f295ddb Binary files /dev/null and b/docs/constant_model_loss_transformations/images/outliers.png differ diff --git a/docs/constant_model_loss_transformations/images/slr_loss_surface.png b/docs/constant_model_loss_transformations/images/slr_loss_surface.png new file mode 100644 index 000000000..66320e5d9 Binary files /dev/null and b/docs/constant_model_loss_transformations/images/slr_loss_surface.png differ diff --git a/docs/constant_model_loss_transformations/images/slr_modeling.png b/docs/constant_model_loss_transformations/images/slr_modeling.png new file mode 100644 index 000000000..c51158f5f Binary files /dev/null and b/docs/constant_model_loss_transformations/images/slr_modeling.png differ diff --git a/constant_model_loss_transformations/loss_transformations.html b/docs/constant_model_loss_transformations/loss_transformations.html similarity index 100% rename from constant_model_loss_transformations/loss_transformations.html rename to docs/constant_model_loss_transformations/loss_transformations.html diff --git a/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-10-output-2.png b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-10-output-2.png new file mode 100644 index 000000000..2c87cfcca Binary files /dev/null and b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-10-output-2.png differ diff --git a/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-12-output-1.png b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-12-output-1.png new file mode 100644 index 000000000..2d1655c25 Binary files /dev/null and b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-12-output-1.png differ diff --git a/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-13-output-1.png b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-13-output-1.png new file mode 100644 index 000000000..d75aaf3b2 Binary files /dev/null and b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-13-output-1.png differ diff --git a/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-17-output-1.png b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-17-output-1.png new file mode 100644 index 000000000..ca7e64f1c Binary files /dev/null and b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-17-output-1.png differ diff --git a/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-18-output-1.png b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-18-output-1.png new file mode 100644 index 000000000..f258bbd43 Binary files /dev/null and b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-18-output-1.png differ diff --git a/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-18-output-2.png b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-18-output-2.png new file mode 100644 index 000000000..f258bbd43 Binary files /dev/null and b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-18-output-2.png differ diff --git a/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-19-output-1.png b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-19-output-1.png new file mode 100644 index 000000000..e36a6dded Binary files /dev/null and b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-19-output-1.png differ diff --git a/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-20-output-1.png b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-20-output-1.png new file mode 100644 index 000000000..7530590cb Binary files /dev/null and b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-20-output-1.png differ diff --git a/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-20-output-2.png b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-20-output-2.png new file mode 100644 index 000000000..7530590cb Binary files /dev/null and b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-20-output-2.png differ diff --git a/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-4-output-1.png b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-4-output-1.png new file mode 100644 index 000000000..224a8b083 Binary files /dev/null and b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-4-output-1.png differ diff --git a/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-5-output-1.png b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-5-output-1.png new file mode 100644 index 000000000..3bc172d76 Binary files /dev/null and b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-5-output-1.png differ diff --git a/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-6-output-1.png b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-6-output-1.png new file mode 100644 index 000000000..5542b461e Binary files /dev/null and b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-6-output-1.png differ diff --git a/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-7-output-1.png b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-7-output-1.png new file mode 100644 index 000000000..a5b366fc4 Binary files /dev/null and b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-7-output-1.png differ diff --git a/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-8-output-1.png b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-8-output-1.png new file mode 100644 index 000000000..ea9a22eae Binary files /dev/null and b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-8-output-1.png differ diff --git a/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-9-output-1.png b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-9-output-1.png new file mode 100644 index 000000000..c60bfd6f6 Binary files /dev/null and b/docs/constant_model_loss_transformations/loss_transformations_files/figure-html/cell-9-output-1.png differ diff --git a/cv_regularization/cv_reg.html b/docs/cv_regularization/cv_reg.html similarity index 99% rename from cv_regularization/cv_reg.html rename to docs/cv_regularization/cv_reg.html index 817892826..cb4fb3cf6 100644 --- a/cv_regularization/cv_reg.html +++ b/docs/cv_regularization/cv_reg.html @@ -435,7 +435,7 @@

print(f"Test error: {test_error}")
Training error: 17.85851684101209
-Test error: 23.192405629701074
+Test error: 23.192405629736058
@@ -606,7 +606,7 @@

lasso_model.fit(X_train, Y_train) lasso_model.coef_ -
+
array([-2.54932056e-01, -9.48597165e-04,  8.91976284e-06, -1.22872290e-08])
@@ -621,7 +621,7 @@

X_train.head()
-
+
@@ -681,7 +681,7 @@

pd.DataFrame({"Feature":X_train.columns, "Parameter":lasso_model.coef_})
-
+
@@ -748,8 +748,8 @@

ridge_model.fit(X_train, Y_train) ridge_model.coef_

-
-
array([ 5.89130559e-02, -6.42445915e-03,  4.44468157e-05, -8.83981945e-08])
+
+
array([ 5.89130560e-02, -6.42445916e-03,  4.44468157e-05, -8.83981945e-08])
diff --git a/docs/cv_regularization/images/constrained_gd.png b/docs/cv_regularization/images/constrained_gd.png new file mode 100644 index 000000000..4eda732b7 Binary files /dev/null and b/docs/cv_regularization/images/constrained_gd.png differ diff --git a/docs/cv_regularization/images/cross_validation.png b/docs/cv_regularization/images/cross_validation.png new file mode 100644 index 000000000..9faee18b6 Binary files /dev/null and b/docs/cv_regularization/images/cross_validation.png differ diff --git a/docs/cv_regularization/images/diamond.png b/docs/cv_regularization/images/diamond.png new file mode 100644 index 000000000..cdb03a3b2 Binary files /dev/null and b/docs/cv_regularization/images/diamond.png differ diff --git a/docs/cv_regularization/images/diamondpoint.png b/docs/cv_regularization/images/diamondpoint.png new file mode 100644 index 000000000..2d56ec3f4 Binary files /dev/null and b/docs/cv_regularization/images/diamondpoint.png differ diff --git a/docs/cv_regularization/images/diamondreg.png b/docs/cv_regularization/images/diamondreg.png new file mode 100644 index 000000000..6bd703484 Binary files /dev/null and b/docs/cv_regularization/images/diamondreg.png differ diff --git a/docs/cv_regularization/images/green_constrained_gd_sol.png b/docs/cv_regularization/images/green_constrained_gd_sol.png new file mode 100644 index 000000000..aa481a6f4 Binary files /dev/null and b/docs/cv_regularization/images/green_constrained_gd_sol.png differ diff --git a/docs/cv_regularization/images/hyperparameter_tuning.png b/docs/cv_regularization/images/hyperparameter_tuning.png new file mode 100644 index 000000000..fce75441a Binary files /dev/null and b/docs/cv_regularization/images/hyperparameter_tuning.png differ diff --git a/docs/cv_regularization/images/largerq.png b/docs/cv_regularization/images/largerq.png new file mode 100644 index 000000000..b0d2b7979 Binary files /dev/null and b/docs/cv_regularization/images/largerq.png differ diff --git a/docs/cv_regularization/images/model_selection.png b/docs/cv_regularization/images/model_selection.png new file mode 100644 index 000000000..219273867 Binary files /dev/null and b/docs/cv_regularization/images/model_selection.png differ diff --git a/docs/cv_regularization/images/possible_validation_sets.png b/docs/cv_regularization/images/possible_validation_sets.png new file mode 100644 index 000000000..f41f7d364 Binary files /dev/null and b/docs/cv_regularization/images/possible_validation_sets.png differ diff --git a/docs/cv_regularization/images/simple_under_overfit.png b/docs/cv_regularization/images/simple_under_overfit.png new file mode 100644 index 000000000..51bdffdfc Binary files /dev/null and b/docs/cv_regularization/images/simple_under_overfit.png differ diff --git a/docs/cv_regularization/images/summary.png b/docs/cv_regularization/images/summary.png new file mode 100644 index 000000000..59a4ccaf7 Binary files /dev/null and b/docs/cv_regularization/images/summary.png differ diff --git a/docs/cv_regularization/images/train-test-split.png b/docs/cv_regularization/images/train-test-split.png new file mode 100644 index 000000000..6c9bfd0bc Binary files /dev/null and b/docs/cv_regularization/images/train-test-split.png differ diff --git a/docs/cv_regularization/images/training_validation_curve.png b/docs/cv_regularization/images/training_validation_curve.png new file mode 100644 index 000000000..0f6fd9aa6 Binary files /dev/null and b/docs/cv_regularization/images/training_validation_curve.png differ diff --git a/docs/cv_regularization/images/unconstrained.png b/docs/cv_regularization/images/unconstrained.png new file mode 100644 index 000000000..20ad9e443 Binary files /dev/null and b/docs/cv_regularization/images/unconstrained.png differ diff --git a/docs/cv_regularization/images/validation-split.png b/docs/cv_regularization/images/validation-split.png new file mode 100644 index 000000000..5c8aaa3bf Binary files /dev/null and b/docs/cv_regularization/images/validation-split.png differ diff --git a/docs/cv_regularization/images/validation_set.png b/docs/cv_regularization/images/validation_set.png new file mode 100644 index 000000000..7d816e7d6 Binary files /dev/null and b/docs/cv_regularization/images/validation_set.png differ diff --git a/docs/cv_regularization/images/verylarge.png b/docs/cv_regularization/images/verylarge.png new file mode 100644 index 000000000..b08a41efe Binary files /dev/null and b/docs/cv_regularization/images/verylarge.png differ diff --git a/docs/data100_logo.png b/docs/data100_logo.png new file mode 100644 index 000000000..b30e64d0e Binary files /dev/null and b/docs/data100_logo.png differ diff --git a/eda/eda.html b/docs/eda/eda.html similarity index 80% rename from eda/eda.html rename to docs/eda/eda.html index 6944abfb7..47fdd83f7 100644 --- a/eda/eda.html +++ b/docs/eda/eda.html @@ -686,7 +686,7 @@
force=False) covid_file # a file path wrapper object
-
Using cached version that was downloaded (UTC): Mon Mar 18 21:13:08 2024
+
Using cached version that was downloaded (UTC): Fri Aug 25 09:57:25 2023
PosixPath('data/confirmed-cases.json')
@@ -718,7 +718,7 @@
!ls -lh {covid_file}
 !wc -l {covid_file}
-
-rw-r--r--  1 Ishani  staff   114K Mar 18 21:13 data/confirmed-cases.json
+
-rw-r--r--  1 lillianweng  staff   114K Aug 25  2023 data/confirmed-cases.json
    1109 data/confirmed-cases.json
@@ -4130,14 +4130,8 @@

sns.displot(co2['Days']);
 plt.title("Distribution of days feature"); # suppresses unneeded plotting output

-
-
/Users/Ishani/micromamba/lib/python3.9/site-packages/seaborn/axisgrid.py:118: UserWarning:
-
-The figure layout has changed to tight
-
-
-

+

In terms of data quality, a handful of months have averages based on measurements taken on fewer than half the days. In addition, there are nearly 200 missing values–that’s about 27% of the data!

@@ -4147,8 +4141,8 @@

Code -
sns.scatterplot(x="Yr", y="Days", data=co2);
-plt.title("Day field by Year"); # the ; suppresses output
+
sns.scatterplot(x="Yr", y="Days", data=co2);
+plt.title("Day field by Year"); # the ; suppresses output

@@ -4172,23 +4166,17 @@

Code -
# Histograms of average CO2 measurements
-sns.displot(co2['Avg']);
+
# Histograms of average CO2 measurements
+sns.displot(co2['Avg']);
-
-
/Users/Ishani/micromamba/lib/python3.9/site-packages/seaborn/axisgrid.py:118: UserWarning:
-
-The figure layout has changed to tight
-
-
-

+

The non-missing values are in the 300-400 range (a regular range of CO2 levels).

We also see that there are only a few missing Avg values (<1% of values). Let’s examine all of them:

-
co2[co2["Avg"] < 0]
+
co2[co2["Avg"] < 0]
@@ -4297,8 +4285,8 @@

Code -
sns.lineplot(x='DecDate', y='Avg', data=co2)
-plt.title("CO2 Average By Month");
+
sns.lineplot(x='DecDate', y='Avg', data=co2)
+plt.title("CO2 Average By Month");

@@ -4309,9 +4297,9 @@

-
# 1. Drop missing values
-co2_drop = co2[co2['Avg'] > 0]
-co2_drop.head()
+
# 1. Drop missing values
+co2_drop = co2[co2['Avg'] > 0]
+co2_drop.head()
@@ -4387,9 +4375,9 @@

-
# 2. Replace NaN with -99.99
-co2_NA = co2.replace(-99.99, np.NaN)
-co2_NA.head()
+
# 2. Replace NaN with -99.99
+co2_NA = co2.replace(-99.99, np.NaN)
+co2_NA.head()
@@ -4473,10 +4461,10 @@

-
# 3. Use interpolated column which estimates missing Avg values
-co2_impute = co2.copy()
-co2_impute['Avg'] = co2['Int']
-co2_impute.head()
+
# 3. Use interpolated column which estimates missing Avg values
+co2_impute = co2.copy()
+co2_impute['Avg'] = co2['Int']
+co2_impute.head()
@@ -4556,30 +4544,30 @@

Code -
# results of plotting data in 1958
-
-def line_and_points(data, ax, title):
-    # assumes single year, hence Mo
-    ax.plot('Mo', 'Avg', data=data)
-    ax.scatter('Mo', 'Avg', data=data)
-    ax.set_xlim(2, 13)
-    ax.set_title(title)
-    ax.set_xticks(np.arange(3, 13))
-
-def data_year(data, year):
-    return data[data["Yr"] == 1958]
-    
-# uses matplotlib subplots
-# you may see more next week; focus on output for now
-fig, axes = plt.subplots(ncols = 3, figsize=(12, 4), sharey=True)
-
-year = 1958
-line_and_points(data_year(co2_drop, year), axes[0], title="1. Drop Missing")
-line_and_points(data_year(co2_NA, year), axes[1], title="2. Missing Set to NaN")
-line_and_points(data_year(co2_impute, year), axes[2], title="3. Missing Interpolated")
-
-fig.suptitle(f"Monthly Averages for {year}")
-plt.tight_layout()
+
# results of plotting data in 1958
+
+def line_and_points(data, ax, title):
+    # assumes single year, hence Mo
+    ax.plot('Mo', 'Avg', data=data)
+    ax.scatter('Mo', 'Avg', data=data)
+    ax.set_xlim(2, 13)
+    ax.set_title(title)
+    ax.set_xticks(np.arange(3, 13))
+
+def data_year(data, year):
+    return data[data["Yr"] == 1958]
+    
+# uses matplotlib subplots
+# you may see more next week; focus on output for now
+fig, axes = plt.subplots(ncols = 3, figsize=(12, 4), sharey=True)
+
+year = 1958
+line_and_points(data_year(co2_drop, year), axes[0], title="1. Drop Missing")
+line_and_points(data_year(co2_NA, year), axes[1], title="2. Missing Set to NaN")
+line_and_points(data_year(co2_impute, year), axes[2], title="3. Missing Interpolated")
+
+fig.suptitle(f"Monthly Averages for {year}")
+plt.tight_layout()

@@ -4595,8 +4583,8 @@

Code -
sns.lineplot(x='DecDate', y='Avg', data=co2_impute)
-plt.title("CO2 Average By Month, Imputed");
+
sns.lineplot(x='DecDate', y='Avg', data=co2_impute)
+plt.title("CO2 Average By Month, Imputed");

@@ -4623,9 +4611,9 @@

Code -
co2_year = co2_impute.groupby('Yr').mean()
-sns.lineplot(x='Yr', y='Avg', data=co2_year)
-plt.title("CO2 Average By Year");
+
co2_year = co2_impute.groupby('Yr').mean()
+sns.lineplot(x='Yr', y='Avg', data=co2_year)
+plt.title("CO2 Average By Year");

@@ -4966,1221 +4954,1221 @@

diff --git a/docs/eda/eda_files/figure-html/cell-62-output-1.png b/docs/eda/eda_files/figure-html/cell-62-output-1.png new file mode 100644 index 000000000..f392d5f92 Binary files /dev/null and b/docs/eda/eda_files/figure-html/cell-62-output-1.png differ diff --git a/docs/eda/eda_files/figure-html/cell-67-output-1.png b/docs/eda/eda_files/figure-html/cell-67-output-1.png new file mode 100644 index 000000000..be96b8c94 Binary files /dev/null and b/docs/eda/eda_files/figure-html/cell-67-output-1.png differ diff --git a/docs/eda/eda_files/figure-html/cell-68-output-1.png b/docs/eda/eda_files/figure-html/cell-68-output-1.png new file mode 100644 index 000000000..ffd29ff8f Binary files /dev/null and b/docs/eda/eda_files/figure-html/cell-68-output-1.png differ diff --git a/docs/eda/eda_files/figure-html/cell-69-output-1.png b/docs/eda/eda_files/figure-html/cell-69-output-1.png new file mode 100644 index 000000000..290889288 Binary files /dev/null and b/docs/eda/eda_files/figure-html/cell-69-output-1.png differ diff --git a/docs/eda/eda_files/figure-html/cell-71-output-1.png b/docs/eda/eda_files/figure-html/cell-71-output-1.png new file mode 100644 index 000000000..49ef3d6a6 Binary files /dev/null and b/docs/eda/eda_files/figure-html/cell-71-output-1.png differ diff --git a/docs/eda/eda_files/figure-html/cell-75-output-1.png b/docs/eda/eda_files/figure-html/cell-75-output-1.png new file mode 100644 index 000000000..15a5fe82d Binary files /dev/null and b/docs/eda/eda_files/figure-html/cell-75-output-1.png differ diff --git a/docs/eda/eda_files/figure-html/cell-76-output-1.png b/docs/eda/eda_files/figure-html/cell-76-output-1.png new file mode 100644 index 000000000..40b1fc714 Binary files /dev/null and b/docs/eda/eda_files/figure-html/cell-76-output-1.png differ diff --git a/docs/eda/eda_files/figure-html/cell-77-output-1.png b/docs/eda/eda_files/figure-html/cell-77-output-1.png new file mode 100644 index 000000000..99b6c2d1e Binary files /dev/null and b/docs/eda/eda_files/figure-html/cell-77-output-1.png differ diff --git a/docs/eda/images/variable.png b/docs/eda/images/variable.png new file mode 100644 index 000000000..3cd730a94 Binary files /dev/null and b/docs/eda/images/variable.png differ diff --git a/feature_engineering/feature_engineering.html b/docs/feature_engineering/feature_engineering.html similarity index 99% rename from feature_engineering/feature_engineering.html rename to docs/feature_engineering/feature_engineering.html index ddd15067c..0dce32e25 100644 --- a/feature_engineering/feature_engineering.html +++ b/docs/feature_engineering/feature_engineering.html @@ -752,7 +752,7 @@

print(f"MSE of model with (hp^2) feature: {np.mean((Y-hp2_model_predictions)**2)}")

-
MSE of model with (hp^2) feature: 18.984768907617223
+
MSE of model with (hp^2) feature: 18.984768907617216

diff --git a/docs/feature_engineering/feature_engineering_files/figure-html/cell-16-output-2.png b/docs/feature_engineering/feature_engineering_files/figure-html/cell-16-output-2.png new file mode 100644 index 000000000..f83966673 Binary files /dev/null and b/docs/feature_engineering/feature_engineering_files/figure-html/cell-16-output-2.png differ diff --git a/docs/feature_engineering/feature_engineering_files/figure-html/cell-17-output-2.png b/docs/feature_engineering/feature_engineering_files/figure-html/cell-17-output-2.png new file mode 100644 index 000000000..ceecd30f2 Binary files /dev/null and b/docs/feature_engineering/feature_engineering_files/figure-html/cell-17-output-2.png differ diff --git a/docs/feature_engineering/feature_engineering_files/figure-html/cell-5-output-2.png b/docs/feature_engineering/feature_engineering_files/figure-html/cell-5-output-2.png new file mode 100644 index 000000000..802e311e2 Binary files /dev/null and b/docs/feature_engineering/feature_engineering_files/figure-html/cell-5-output-2.png differ diff --git a/docs/feature_engineering/feature_engineering_files/figure-html/cell-6-output-2.png b/docs/feature_engineering/feature_engineering_files/figure-html/cell-6-output-2.png new file mode 100644 index 000000000..f57138e12 Binary files /dev/null and b/docs/feature_engineering/feature_engineering_files/figure-html/cell-6-output-2.png differ diff --git a/docs/feature_engineering/images/bias.png b/docs/feature_engineering/images/bias.png new file mode 100644 index 000000000..e6455ca22 Binary files /dev/null and b/docs/feature_engineering/images/bias.png differ diff --git a/docs/feature_engineering/images/bvt.png b/docs/feature_engineering/images/bvt.png new file mode 100644 index 000000000..7baffea82 Binary files /dev/null and b/docs/feature_engineering/images/bvt.png differ diff --git a/docs/feature_engineering/images/complex.png b/docs/feature_engineering/images/complex.png new file mode 100644 index 000000000..61769f1a3 Binary files /dev/null and b/docs/feature_engineering/images/complex.png differ diff --git a/docs/feature_engineering/images/complexity_grad_descent.png b/docs/feature_engineering/images/complexity_grad_descent.png new file mode 100644 index 000000000..8a48dbbe4 Binary files /dev/null and b/docs/feature_engineering/images/complexity_grad_descent.png differ diff --git a/docs/feature_engineering/images/complexity_normal_solution.png b/docs/feature_engineering/images/complexity_normal_solution.png new file mode 100644 index 000000000..c41ad6a7a Binary files /dev/null and b/docs/feature_engineering/images/complexity_normal_solution.png differ diff --git a/docs/feature_engineering/images/degree_comparison.png b/docs/feature_engineering/images/degree_comparison.png new file mode 100644 index 000000000..9bb1992e7 Binary files /dev/null and b/docs/feature_engineering/images/degree_comparison.png differ diff --git a/docs/feature_engineering/images/degree_comparison2.png b/docs/feature_engineering/images/degree_comparison2.png new file mode 100644 index 000000000..95ee200a0 Binary files /dev/null and b/docs/feature_engineering/images/degree_comparison2.png differ diff --git a/docs/feature_engineering/images/gd.png b/docs/feature_engineering/images/gd.png new file mode 100644 index 000000000..6ba0c3376 Binary files /dev/null and b/docs/feature_engineering/images/gd.png differ diff --git a/docs/feature_engineering/images/ohe.png b/docs/feature_engineering/images/ohe.png new file mode 100644 index 000000000..c5f26296c Binary files /dev/null and b/docs/feature_engineering/images/ohe.png differ diff --git a/docs/feature_engineering/images/ohemodel.png b/docs/feature_engineering/images/ohemodel.png new file mode 100644 index 000000000..06dddaea7 Binary files /dev/null and b/docs/feature_engineering/images/ohemodel.png differ diff --git a/docs/feature_engineering/images/perfect_poly_fits.png b/docs/feature_engineering/images/perfect_poly_fits.png new file mode 100644 index 000000000..86943ecfc Binary files /dev/null and b/docs/feature_engineering/images/perfect_poly_fits.png differ diff --git a/docs/feature_engineering/images/phi.png b/docs/feature_engineering/images/phi.png new file mode 100644 index 000000000..4c0b04e91 Binary files /dev/null and b/docs/feature_engineering/images/phi.png differ diff --git a/docs/feature_engineering/images/pytorchsgd.png b/docs/feature_engineering/images/pytorchsgd.png new file mode 100644 index 000000000..85b07dbcd Binary files /dev/null and b/docs/feature_engineering/images/pytorchsgd.png differ diff --git a/docs/feature_engineering/images/remove.png b/docs/feature_engineering/images/remove.png new file mode 100644 index 000000000..bd09ddcf1 Binary files /dev/null and b/docs/feature_engineering/images/remove.png differ diff --git a/docs/feature_engineering/images/resamples.png b/docs/feature_engineering/images/resamples.png new file mode 100644 index 000000000..28f904ab1 Binary files /dev/null and b/docs/feature_engineering/images/resamples.png differ diff --git a/docs/feature_engineering/images/sgd.png b/docs/feature_engineering/images/sgd.png new file mode 100644 index 000000000..ee579a100 Binary files /dev/null and b/docs/feature_engineering/images/sgd.png differ diff --git a/docs/feature_engineering/images/train_error.png b/docs/feature_engineering/images/train_error.png new file mode 100644 index 000000000..a2993b42b Binary files /dev/null and b/docs/feature_engineering/images/train_error.png differ diff --git a/gradient_descent/gradient_descent.html b/docs/gradient_descent/gradient_descent.html similarity index 87% rename from gradient_descent/gradient_descent.html rename to docs/gradient_descent/gradient_descent.html index 1589ea599..6c0d68abc 100644 --- a/gradient_descent/gradient_descent.html +++ b/docs/gradient_descent/gradient_descent.html @@ -106,7 +106,7 @@ require.undef("plotly"); requirejs.config({ paths: { - 'plotly': ['https://cdn.plot.ly/plotly-2.25.2.min'] + 'plotly': ['https://cdn.plot.ly/plotly-2.12.1.min'] } }); require(['plotly'], function(Plotly) { @@ -591,7 +591,7 @@

my_model.fit(X, Y)

-
LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
+
LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.

Notice that we use double brackets to extract this column. Why double brackets instead of just single brackets? The .fit method, by default, expects to receive 2-dimensional data – some kind of data that includes both rows and columns. Writing penguins["flipper_length_mm"] would return a 1D Series, causing sklearn to error. We avoid this by writing penguins[["flipper_length_mm"]] to produce a 2D DataFrame.

@@ -642,7 +642,7 @@

print(f"The RMSE of the model is {np.sqrt(np.mean((Y-Y_hat_two_features)**2))}")

-
The RMSE of the model is 0.9881331104079044
+
The RMSE of the model is 0.9881331104079045

We can also see that we obtain the same predictions using sklearn as we did when applying the ordinary least squares formula before!

@@ -758,9 +758,9 @@

-
+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ +
+ +
+ + +
+ + + +
+ +
+ + + + +
+ + + + +
+ + +
+ +
+
+
+ +
+
+Learning Outcomes +
+
+
+
+
+
    +
  • Perform aggregations using GROUP BY
  • +
  • Introduce the ability to filter groups
  • +
  • Perform data cleaning and text manipulation in SQL
  • +
  • Join data across tables
  • +
+
+
+
+

In this lecture, we’ll continue our work from last time to introduce some advanced SQL syntax.

+

First, let’s load in the basic_examples.db database.

+
+
+Code +
# Load the SQL Alchemy Python library and DuckDB
+import sqlalchemy
+import duckdb
+
+
+
+
# Load %%sql cell magic
+%load_ext sql
+
+
+
# Connect to the database
+%sql duckdb:///data/basic_examples.db --alias basic
+
+
+

21.1 Aggregating with GROUP BY

+

At this point, we’ve seen that SQL offers much of the same functionality that was given to us by pandas. We can extract data from a table, filter it, and reorder it to suit our needs.

+

In pandas, much of our analysis work relied heavily on being able to use .groupby() to aggregate across the rows of our dataset. SQL’s answer to this task is the (very conveniently named) GROUP BY clause. While the outputs of GROUP BY are similar to those of .groupby() —— in both cases, we obtain an output table where some column has been used for grouping —— the syntax and logic used to group data in SQL are fairly different to the pandas implementation.

+

To illustrate GROUP BY, we will consider the Dish table from our database.

+
+
%%sql
+SELECT * 
+FROM Dish;
+
+
 * duckdb:///data/basic_examples.db
+Done.
+
+
+ + + + + + + + + + +
nametypecost
+
+
+

Notice that there are multiple dishes of the same type. What if we wanted to find the total costs of dishes of a certain type? To accomplish this, we would write the following code.

+
+
%%sql
+SELECT type, SUM(cost)
+FROM Dish
+GROUP BY type;
+
+
 * duckdb:///data/basic_examples.db
+Done.
+
+
+ + + + + + + + + +
typesum("cost")
+
+
+

What is going on here? The statement GROUP BY type tells SQL to group the data based on the value contained in the type column (whether a record is an appetizer, entree, or dessert). SUM(cost) sums up the costs of dishes in each type and displays the result in the output table.

+

You may be wondering: why does SUM(cost) come before the command to GROUP BY type? Don’t we need to form groups before we can count the number of entries in each? Remember that SQL is a declarative programming language —— a SQL programmer simply states what end result they would like to see, and leaves the task of figuring out how to obtain this result to SQL itself. This means that SQL queries sometimes don’t follow what a reader sees as a “logical” sequence of thought. Instead, SQL requires that we follow its set order of operations when constructing queries. So long as we follow this order, SQL will handle the underlying logic.

+

In practical terms: our goal with this query was to output the total costs of each type. To communicate this to SQL, we say that we want to SELECT the SUMmed cost values for each type group.

+

There are many aggregation functions that can be used to aggregate the data contained in each group. Some common examples are:

+
    +
  • COUNT: count the number of rows associated with each group
  • +
  • MIN: find the minimum value of each group
  • +
  • MAX: find the maximum value of each group
  • +
  • SUM: sum across all records in each group
  • +
  • AVG: find the average value of each group
  • +
+

We can easily compute multiple aggregations all at once (a task that was very tricky in pandas).

+
+
%%sql
+SELECT type, SUM(cost), MIN(cost), MAX(name)
+FROM Dish
+GROUP BY type;
+
+
 * duckdb:///data/basic_examples.db
+Done.
+
+
+ + + + + + + + + + + +
typesum("cost")min("cost")max("name")
+
+
+

To count the number of rows associated with each group, we use the COUNT keyword. Calling COUNT(*) will compute the total number of rows in each group, including rows with null values. Its pandas equivalent is .groupby().size().

+

Recall the Dragon table from the previous lecture:

+
+
%%sql
+SELECT * FROM Dragon;
+
+
 * duckdb:///data/basic_examples.db
+Done.
+
+
+ + + + + + + + + + +
nameyearcute
+
+
+

Notice that COUNT(*) and COUNT(cute) result in different outputs.

+
+
%%sql
+SELECT year, COUNT(*)
+FROM Dragon
+GROUP BY year;
+
+
 * duckdb:///data/basic_examples.db
+Done.
+
+
+ + + + + + + + + +
yearcount_star()
+
+
+
+
%%sql
+SELECT year, COUNT(cute)
+FROM Dragon
+GROUP BY year;
+
+
 * duckdb:///data/basic_examples.db
+Done.
+
+
+ + + + + + + + + +
yearcount(cute)
+
+
+

With this definition of GROUP BY in hand, let’s update our SQL order of operations. Remember: every SQL query must list clauses in this order.

+
SELECT <column expression list>
+FROM <table>
+[WHERE <predicate>]
+[GROUP BY <column list>]
+[ORDER BY <column list>]
+[LIMIT <number of rows>]
+[OFFSET <number of rows>];
+

Note that we can use the AS keyword to rename columns during the selection process and that column expressions may include aggregation functions (MAX, MIN, etc.).

+
+
+

21.2 Filtering Groups

+

Now, what if we only want groups that meet a certain condition? HAVING filters groups by applying some condition across all rows in each group. We interpret it as a way to keep only the groups HAVING some condition. Note the difference between WHERE and HAVING: we use WHERE to filter rows, whereas we use HAVING to filter groups. WHERE precedes HAVING in terms of how SQL executes a query.

+

Let’s take a look at the Dish table to see how we can use HAVING. Say we want to group dishes with a cost greater than 4 by type and only keep groups where the max cost is less than 10.

+
+
%%sql
+SELECT type, COUNT(*)
+FROM Dish
+WHERE cost > 4
+GROUP BY type
+HAVING MAX(cost) <  10;
+
+
 * duckdb:///data/basic_examples.db
+Done.
+
+
+ + + + + + + + + +
typecount_star()
+
+
+

Here, we first use WHERE to filter for rows with a cost greater than 4. We then group our values by type before applying the HAVING operator. With HAVING, we can filter our groups based on if the max cost is less than 10.

+
+
+

21.3 Summary: SQL

+

With this definition of GROUP BY and HAVING in hand, let’s update our SQL order of operations. Remember: every SQL query must list clauses in this order.

+
SELECT <column expression list>
+FROM <table>
+[WHERE <predicate>]
+[GROUP BY <column list>]
+[ORDER BY <column list>]
+[LIMIT <number of rows>]
+[OFFSET <number of rows>];
+

Note that we can use the AS keyword to rename columns during the selection process and that column expressions may include aggregation functions (MAX, MIN, etc.).

+
+
+

21.4 EDA in SQL

+

In the last lecture, we mostly worked under the assumption that our data had already been cleaned. However, as we saw in our first pass through the data science lifecycle, we’re very unlikely to be given data that is free of formatting issues. With this in mind, we’ll want to learn how to clean and transform data in SQL.

+

Our typical workflow when working with “big data” is:

+
    +
  1. Use SQL to query data from a database
  2. +
  3. Use Python (with pandas) to analyze this data in detail
  4. +
+

We can, however, still perform simple data cleaning and re-structuring using SQL directly. To do so, we’ll use the Title table from the imdb_duck database, which contains information about movies and actors.

+

Let’s load in the imdb_duck database.

+
+
import os
+if os.path.exists("/home/jovyan/shared/sql/imdb_duck.db"):
+    imdbpath = "duckdb:////home/jovyan/shared/sql/imdb_duck.db"
+elif os.path.exists("data/imdb_duck.db"):
+    imdbpath =  "duckdb:///data/imdb_duck.db"
+else:
+    import gdown
+    url = 'https://drive.google.com/uc?id=10tKOHGLt9QoOgq5Ii-FhxpB9lDSQgl1O'
+    output_path = 'data/imdb_duck.db'
+    gdown.download(url, output_path, quiet=False)
+    imdbpath = "duckdb:///data/imdb_duck.db"
+print(imdbpath)
+
+
duckdb:///data/imdb_duck.db
+
+
+
+
from sqlalchemy import create_engine
+imdb_engine = create_engine(imdbpath, connect_args={'read_only': True})
+%sql imdb_engine --alias imdb
+
+
 * duckdb:///data/basic_examples.db
+(duckdb.duckdb.ParserException) Parser Error: syntax error at or near "imdb_engine"
+[SQL: imdb_engine]
+(Background on this error at: https://sqlalche.me/e/20/f405)
+
+
+

Since we’ll be working with the Title table, let’s take a quick look at what it contains.

+
+
%%sql imdb 
+    
+SELECT *
+FROM Title
+WHERE primaryTitle IN ('Ginny & Georgia', 'What If...?', 'Succession', 'Veep', 'Tenet')
+LIMIT 10;
+
+
 * duckdb:///data/basic_examples.db
+(duckdb.duckdb.ParserException) Parser Error: syntax error at or near "imdb"
+[SQL: imdb
+    
+SELECT *
+FROM Title
+WHERE primaryTitle IN ('Ginny & Georgia', 'What If...?', 'Succession', 'Veep', 'Tenet')
+LIMIT 10;]
+(Background on this error at: https://sqlalche.me/e/20/f405)
+
+
+
+

21.4.1 Matching Text using LIKE

+

One common task we encountered in our first look at EDA was needing to match string data. For example, we might want to remove entries beginning with the same prefix as part of the data cleaning process.

+

In SQL, we use the LIKE operator to (you guessed it) look for strings that are like a given string pattern.

+
+
%%sql
+SELECT titleType, primaryTitle
+FROM Title
+WHERE primaryTitle LIKE 'Star Wars: Episode I - The Phantom Menace'
+
+
 * duckdb:///data/basic_examples.db
+(duckdb.duckdb.CatalogException) Catalog Error: Table with name Title does not exist!
+Did you mean "temp.information_schema.tables"?
+LINE 2: FROM Title
+             ^
+[SQL: SELECT titleType, primaryTitle
+FROM Title
+WHERE primaryTitle LIKE 'Star Wars: Episode I - The Phantom Menace']
+(Background on this error at: https://sqlalche.me/e/20/f405)
+
+
+

What if we wanted to find all Star Wars movies? % is the wildcard operator, it means “look for any character, any number of times”. This makes it helpful for identifying strings that are similar to our desired pattern, even when we don’t know the full text of what we aim to extract.

+
+
%%sql
+SELECT titleType, primaryTitle
+FROM Title
+WHERE primaryTitle LIKE '%Star Wars%'
+LIMIT 10;
+
+
 * duckdb:///data/basic_examples.db
+(duckdb.duckdb.CatalogException) Catalog Error: Table with name Title does not exist!
+Did you mean "temp.information_schema.tables"?
+LINE 2: FROM Title
+             ^
+[SQL: SELECT titleType, primaryTitle
+FROM Title
+WHERE primaryTitle LIKE '%Star Wars%'
+LIMIT 10;]
+(Background on this error at: https://sqlalche.me/e/20/f405)
+
+
+

Alternatively, we can use RegEx! DuckDB and most real DBMSs allow for this. Note that here, we have to use the SIMILAR TO operater rather than LIKE.

+
+
%%sql
+SELECT titleType, primaryTitle
+FROM Title
+WHERE primaryTitle SIMILAR TO '.*Star Wars*.'
+LIMIT 10;
+
+
 * duckdb:///data/basic_examples.db
+(duckdb.duckdb.CatalogException) Catalog Error: Table with name Title does not exist!
+Did you mean "temp.information_schema.tables"?
+LINE 2: FROM Title
+             ^
+[SQL: SELECT titleType, primaryTitle
+FROM Title
+WHERE primaryTitle SIMILAR TO '.*Star Wars*.'
+LIMIT 10;]
+(Background on this error at: https://sqlalche.me/e/20/f405)
+
+
+
+
+

21.4.2 CASTing Data Types

+

A common data cleaning task is converting data to the correct variable type. The CAST keyword is used to generate a new output column. Each entry in this output column is the result of converting the data in an existing column to a new data type. For example, we may wish to convert numeric data stored as a string to an integer.

+
+
%%sql
+SELECT primaryTitle, CAST(runtimeMinutes AS INT)
+FROM Title;
+
+
 * duckdb:///data/basic_examples.db
+(duckdb.duckdb.CatalogException) Catalog Error: Table with name Title does not exist!
+Did you mean "temp.information_schema.tables"?
+LINE 2: FROM Title;
+             ^
+[SQL: SELECT primaryTitle, CAST(runtimeMinutes AS INT)
+FROM Title;]
+(Background on this error at: https://sqlalche.me/e/20/f405)
+
+
+

We use CAST when SELECTing colunns for our output table. In the example above, we want to SELECT the columns of integer year and runtime data that is created by the CAST.

+

SQL will automatically name a new column according to the command used to SELECT it, which can lead to unwieldy column names. We can rename the CASTed column using the AS keyword.

+
+
%%sql
+SELECT primaryTitle AS title, CAST(runtimeMinutes AS INT) AS minutes, CAST(startYear AS INT) AS year
+FROM Title
+LIMIT 5;
+
+
 * duckdb:///data/basic_examples.db
+(duckdb.duckdb.CatalogException) Catalog Error: Table with name Title does not exist!
+Did you mean "temp.information_schema.tables"?
+LINE 2: FROM Title
+             ^
+[SQL: SELECT primaryTitle AS title, CAST(runtimeMinutes AS INT) AS minutes, CAST(startYear AS INT) AS year
+FROM Title
+LIMIT 5;]
+(Background on this error at: https://sqlalche.me/e/20/f405)
+
+
+
+
+

21.4.3 Using Conditional Statements with CASE

+

When working with pandas, we often ran into situations where we wanted to generate new columns using some form of conditional statement. For example, say we wanted to describe a film title as “old,” “mid-aged,” or “new,” depending on the year of its release.

+

In SQL, conditional operations are performed using a CASE clause. Conceptually, CASE behaves much like the CAST operation: it creates a new column that we can then SELECT to appear in the output. The syntax for a CASE clause is as follows:

+
CASE WHEN <condition> THEN <value>
+     WHEN <other condition> THEN <other value>
+     ...
+     ELSE <yet another value>
+     END
+

Scanning through the skeleton code above, you can see that the logic is similar to that of an if statement in Python. The conditional statement is first opened by calling CASE. Each new condition is specified by WHEN, with THEN indicating what value should be filled if the condition is met. ELSE specifies the value that should be filled if no other conditions are met. Lastly, END indicates the end of the conditional statement; once END has been called, SQL will continue evaluating the query as usual.

+

Let’s see this in action. In the example below, we give the new column created by the CASE statement the name movie_age.

+
+
%%sql
+/* If a movie was filmed before 1950, it is "old"
+Otherwise, if a movie was filmed before 2000, it is "mid-aged"
+Else, a movie is "new" */
+
+SELECT titleType, startYear,
+CASE WHEN startYear < 1950 THEN 'old'
+     WHEN startYear < 2000 THEN 'mid-aged'
+     ELSE 'new'
+     END AS movie_age
+FROM Title;
+
+
 * duckdb:///data/basic_examples.db
+(duckdb.duckdb.CatalogException) Catalog Error: Table with name Title does not exist!
+Did you mean "temp.information_schema.tables"?
+LINE 10: FROM Title;
+              ^
+[SQL: /* If a movie was filmed before 1950, it is "old"
+Otherwise, if a movie was filmed before 2000, it is "mid-aged"
+Else, a movie is "new" */
+
+SELECT titleType, startYear,
+CASE WHEN startYear < 1950 THEN 'old'
+     WHEN startYear < 2000 THEN 'mid-aged'
+     ELSE 'new'
+     END AS movie_age
+FROM Title;]
+(Background on this error at: https://sqlalche.me/e/20/f405)
+
+
+
+
+
+

21.5 JOINing Tables

+

At this point, we’re well-versed in using SQL as a tool to clean, manipulate, and transform data in a table. Notice that this sentence referred to one table, specifically. What happens if the data we need is distributed across multiple tables? This is an important consideration when using SQL —— recall that we first introduced SQL as a language to query from databases. Databases often store data in a multidimensional structure. In other words, information is stored across several tables, with each table containing a small subset of all the data housed by the database.

+

A common way of organizing a database is by using a star schema. A star schema is composed of two types of tables. A fact table is the central table of the database —— it contains the information needed to link entries across several dimension tables, which contain more detailed information about the data.

+

Say we were working with a database about boba offerings in Berkeley. The dimension tables of the database might contain information about tea varieties and boba toppings. The fact table would be used to link this information across the various dimension tables.

+
+

multidimensional

+
+

If we explicitly mark the relationships between tables, we start to see the star-like structure of the star schema.

+
+

star

+
+

To join data across multiple tables, we’ll use the (creatively named) JOIN keyword. We’ll make things easier for now by first considering the simpler cats dataset, which consists of the tables s and t.

+
+

cats

+
+

To perform a join, we amend the FROM clause. You can think of this as saying, “SELECT my data FROM tables that have been JOINed together.”

+

Remember: SQL does not consider newlines or whitespace when interpreting queries. The indentation given in the example below is to help improve readability. If you wish, you can write code that does not follow this formatting.

+
SELECT <column list>
+FROM table_1 
+    JOIN table_2 
+    ON key_1 = key_2;
+

We also need to specify what column from each table should be used to determine matching entries. By defining these keys, we provide SQL with the information it needs to pair rows of data together.

+

The most commonly used type of SQL JOIN is the inner join. It turns out you’re already familiar with what an inner join does, and how it works – this is the type of join we’ve been using in pandas all along! In an inner join, we combine every row in our first table with its matching entry in the second table. If a row from either table does not have a match in the other table, it is omitted from the output.

+
+

inner

+
+

In a cross join, all possible combinations of rows appear in the output table, regardless of whether or not rows share a matching key. Because all rows are joined, even if there is no matching key, it is not necessary to specify what keys to consider in an ON statement. A cross join is also known as a cartesian product.

+
+

cross

+
+

Conceptually, we can interpret an inner join as a cross join, followed by removing all rows that do not share a matching key. Notice that the output of the inner join above contains all rows of the cross join example that contain a single color across the entire row.

+

In a left outer join, all rows in the left table are kept in the output table. If a row in the right table shares a match with the left table, this row will be kept; otherwise, the rows in the right table are omitted from the output. We can fill in any missing values with NULL.

+
+

left

+
+

A right outer join keeps all rows in the right table. Rows in the left table are only kept if they share a match in the right table. Again, we can fill in any missing values with NULL.

+
+

right

+
+

In a full outer join, all rows that have a match between the two tables are joined together. If a row has no match in the second table, then the values of the columns for that second table are filled with NULL. In other words, a full outer join performs an inner join while still keeping rows that have no match in the other table. This is best understood visually:

+
+

full

+
+

We have kept the same output achieved using an inner join, with the addition of partially null rows for entries in s and t that had no match in the second table.

+
+

21.5.1 Aliasing in JOINs

+

When joining tables, we often create aliases for table names (similarly to what we did with column names in the last lecture). We do this as it is typically easier to refer to aliases, especially when we are working with long table names. We can even reference columns using aliased table names!

+

Let’s say we want to determine the average rating of various movies. We’ll need to JOIN the Title and Rating tables and can create aliases for both tables.

+
+
%%sql
+
+SELECT primaryTitle, averageRating
+FROM Title AS T INNER JOIN Rating AS R
+ON T.tconst = R.tconst;
+
+
 * duckdb:///data/basic_examples.db
+(duckdb.duckdb.CatalogException) Catalog Error: Table with name Title does not exist!
+Did you mean "temp.information_schema.tables"?
+LINE 2: FROM Title AS T INNER JOIN Rating AS R
+             ^
+[SQL: SELECT primaryTitle, averageRating
+FROM Title AS T INNER JOIN Rating AS R
+ON T.tconst = R.tconst;]
+(Background on this error at: https://sqlalche.me/e/20/f405)
+
+
+

Note that the AS is actually optional! We can create aliases for our tables even without it, but we usually include it for clarity.

+
+
%%sql
+
+SELECT primaryTitle, averageRating
+FROM Title T INNER JOIN Rating R
+ON T.tconst = R.tconst;
+
+
 * duckdb:///data/basic_examples.db
+(duckdb.duckdb.CatalogException) Catalog Error: Table with name Title does not exist!
+Did you mean "temp.information_schema.tables"?
+LINE 2: FROM Title T INNER JOIN Rating R
+             ^
+[SQL: SELECT primaryTitle, averageRating
+FROM Title T INNER JOIN Rating R
+ON T.tconst = R.tconst;]
+(Background on this error at: https://sqlalche.me/e/20/f405)
+
+
+
+
+

21.5.2 Common Table Expressions

+

For more sophisticated data problems, the queries can become very complex. Common table expressions (CTEs) allow us to break down these complex queries into more manageable parts. To do so, we create temporary tables corresponding to different aspects of the problem and then reference them in the final query:

+
WITH 
+table_name1 AS ( 
+    SELECT ...
+),
+table_name2 AS ( 
+    SELECT ...
+)
+SELECT ... 
+FROM 
+table_name1, 
+table_name2, ...
+

Let’s say we want to identify the top 10 action movies that are highly rated (with an average rating greater than 7) and popular (having more than 5000 votes), along with the primary actors who are the most popular. We can use CTEs to break this query down into separate problems. Initially, we can filter to find good action movies and prolific actors separately. This way, in our final join, we only need to change the order.

+
+
%%sql
+WITH 
+good_action_movies AS (
+    SELECT *
+    FROM Title T JOIN Rating R ON T.tconst = R.tconst  
+    WHERE genres LIKE '%Action%' AND averageRating > 7 AND numVotes > 5000
+),
+prolific_actors AS (
+    SELECT N.nconst, primaryName, COUNT(*) as numRoles
+    FROM Name N JOIN Principal P ON N.nconst = P.nconst
+    WHERE category = 'actor'
+    GROUP BY N.nconst, primaryName
+)
+SELECT primaryTitle, primaryName, numRoles, ROUND(averageRating) AS rating
+FROM good_action_movies m, prolific_actors a, principal p
+WHERE p.tconst = m.tconst AND p.nconst = a.nconst
+ORDER BY rating DESC, numRoles DESC
+LIMIT 10;
+
+
 * duckdb:///data/basic_examples.db
+(duckdb.duckdb.CatalogException) Catalog Error: Table with name Title does not exist!
+Did you mean "temp.information_schema.tables"?
+LINE 4:     F...
+                 ^
+[SQL: WITH 
+good_action_movies AS (
+    SELECT *
+    FROM Title T JOIN Rating R ON T.tconst = R.tconst  
+    WHERE genres LIKE '%Action%' AND averageRating > 7 AND numVotes > 5000
+),
+prolific_actors AS (
+    SELECT N.nconst, primaryName, COUNT(*) as numRoles
+    FROM Name N JOIN Principal P ON N.nconst = P.nconst
+    WHERE category = 'actor'
+    GROUP BY N.nconst, primaryName
+)
+SELECT primaryTitle, primaryName, numRoles, ROUND(averageRating) AS rating
+FROM good_action_movies m, prolific_actors a, principal p
+WHERE p.tconst = m.tconst AND p.nconst = a.nconst
+ORDER BY rating DESC, numRoles DESC
+LIMIT 10;]
+(Background on this error at: https://sqlalche.me/e/20/f405)
+
+
+ + + + +
+
+ +
+ + +
+ + + + \ No newline at end of file diff --git a/docs/visualization_1/images/bad_distro.png b/docs/visualization_1/images/bad_distro.png new file mode 100644 index 000000000..da18378e1 Binary files /dev/null and b/docs/visualization_1/images/bad_distro.png differ diff --git a/docs/visualization_1/images/box_plot_diagram.png b/docs/visualization_1/images/box_plot_diagram.png new file mode 100644 index 000000000..1da125972 Binary files /dev/null and b/docs/visualization_1/images/box_plot_diagram.png differ diff --git a/docs/visualization_1/images/good_distro.png b/docs/visualization_1/images/good_distro.png new file mode 100644 index 000000000..ee7be0663 Binary files /dev/null and b/docs/visualization_1/images/good_distro.png differ diff --git a/docs/visualization_1/images/histogram_viz.png b/docs/visualization_1/images/histogram_viz.png new file mode 100644 index 000000000..4a50ec4b9 Binary files /dev/null and b/docs/visualization_1/images/histogram_viz.png differ diff --git a/docs/visualization_1/images/line_chart_viz.png b/docs/visualization_1/images/line_chart_viz.png new file mode 100644 index 000000000..bbec9dc15 Binary files /dev/null and b/docs/visualization_1/images/line_chart_viz.png differ diff --git a/docs/visualization_1/images/scatter.png b/docs/visualization_1/images/scatter.png new file mode 100644 index 000000000..3ee8bb834 Binary files /dev/null and b/docs/visualization_1/images/scatter.png differ diff --git a/docs/visualization_1/images/variable_types_vis_1.png b/docs/visualization_1/images/variable_types_vis_1.png new file mode 100644 index 000000000..0409b3cf1 Binary files /dev/null and b/docs/visualization_1/images/variable_types_vis_1.png differ diff --git a/visualization_1/visualization_1.html b/docs/visualization_1/visualization_1.html similarity index 99% rename from visualization_1/visualization_1.html rename to docs/visualization_1/visualization_1.html index 2ffa4ddcc..0765f5540 100644 --- a/visualization_1/visualization_1.html +++ b/docs/visualization_1/visualization_1.html @@ -854,7 +854,7 @@

sns.violinplot(data=wb, y="Gross national income per capita, Atlas method: $: 2016");
-

+

A quartile represents a 25% portion of the data. We say that:

diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-10-output-1.png b/docs/visualization_1/visualization_1_files/figure-html/cell-10-output-1.png new file mode 100644 index 000000000..322782d44 Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-10-output-1.png differ diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-11-output-1.png b/docs/visualization_1/visualization_1_files/figure-html/cell-11-output-1.png new file mode 100644 index 000000000..bdab34fba Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-11-output-1.png differ diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-12-output-1.png b/docs/visualization_1/visualization_1_files/figure-html/cell-12-output-1.png new file mode 100644 index 000000000..c318462e8 Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-12-output-1.png differ diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-13-output-1.png b/docs/visualization_1/visualization_1_files/figure-html/cell-13-output-1.png new file mode 100644 index 000000000..98c2427fb Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-13-output-1.png differ diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-14-output-1.png b/docs/visualization_1/visualization_1_files/figure-html/cell-14-output-1.png new file mode 100644 index 000000000..f67232c9e Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-14-output-1.png differ diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-15-output-1.png b/docs/visualization_1/visualization_1_files/figure-html/cell-15-output-1.png new file mode 100644 index 000000000..bc418fbf8 Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-15-output-1.png differ diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-17-output-1.png b/docs/visualization_1/visualization_1_files/figure-html/cell-17-output-1.png new file mode 100644 index 000000000..b23e61eea Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-17-output-1.png differ diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-18-output-2.png b/docs/visualization_1/visualization_1_files/figure-html/cell-18-output-2.png new file mode 100644 index 000000000..dd3a63d27 Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-18-output-2.png differ diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-19-output-2.png b/docs/visualization_1/visualization_1_files/figure-html/cell-19-output-2.png new file mode 100644 index 000000000..5db38e270 Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-19-output-2.png differ diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-20-output-2.png b/docs/visualization_1/visualization_1_files/figure-html/cell-20-output-2.png new file mode 100644 index 000000000..b0da5ec46 Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-20-output-2.png differ diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-21-output-1.png b/docs/visualization_1/visualization_1_files/figure-html/cell-21-output-1.png new file mode 100644 index 000000000..e8d03b85b Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-21-output-1.png differ diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-22-output-1.png b/docs/visualization_1/visualization_1_files/figure-html/cell-22-output-1.png new file mode 100644 index 000000000..8d349afdb Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-22-output-1.png differ diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-23-output-1.png b/docs/visualization_1/visualization_1_files/figure-html/cell-23-output-1.png new file mode 100644 index 000000000..3ab690cdd Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-23-output-1.png differ diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-25-output-1.png b/docs/visualization_1/visualization_1_files/figure-html/cell-25-output-1.png new file mode 100644 index 000000000..610626314 Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-25-output-1.png differ diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-26-output-1.png b/docs/visualization_1/visualization_1_files/figure-html/cell-26-output-1.png new file mode 100644 index 000000000..3c68e6497 Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-26-output-1.png differ diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-27-output-1.png b/docs/visualization_1/visualization_1_files/figure-html/cell-27-output-1.png new file mode 100644 index 000000000..185c1b768 Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-27-output-1.png differ diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-28-output-1.png b/docs/visualization_1/visualization_1_files/figure-html/cell-28-output-1.png new file mode 100644 index 000000000..ff012f3a9 Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-28-output-1.png differ diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-29-output-1.png b/docs/visualization_1/visualization_1_files/figure-html/cell-29-output-1.png new file mode 100644 index 000000000..06af8ea74 Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-29-output-1.png differ diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-3-output-1.png b/docs/visualization_1/visualization_1_files/figure-html/cell-3-output-1.png new file mode 100644 index 000000000..aff0c869e Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-3-output-1.png differ diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-30-output-1.png b/docs/visualization_1/visualization_1_files/figure-html/cell-30-output-1.png new file mode 100644 index 000000000..9f71a6727 Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-30-output-1.png differ diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-31-output-1.png b/docs/visualization_1/visualization_1_files/figure-html/cell-31-output-1.png new file mode 100644 index 000000000..827062832 Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-31-output-1.png differ diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-32-output-1.png b/docs/visualization_1/visualization_1_files/figure-html/cell-32-output-1.png new file mode 100644 index 000000000..5a833e4e2 Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-32-output-1.png differ diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-4-output-1.png b/docs/visualization_1/visualization_1_files/figure-html/cell-4-output-1.png new file mode 100644 index 000000000..2130b77e2 Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-4-output-1.png differ diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-5-output-1.png b/docs/visualization_1/visualization_1_files/figure-html/cell-5-output-1.png new file mode 100644 index 000000000..93e34b23a Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-5-output-1.png differ diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-7-output-1.png b/docs/visualization_1/visualization_1_files/figure-html/cell-7-output-1.png new file mode 100644 index 000000000..2e52c1f98 Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-7-output-1.png differ diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-8-output-1.png b/docs/visualization_1/visualization_1_files/figure-html/cell-8-output-1.png new file mode 100644 index 000000000..2be9dc6eb Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-8-output-1.png differ diff --git a/docs/visualization_1/visualization_1_files/figure-html/cell-9-output-1.png b/docs/visualization_1/visualization_1_files/figure-html/cell-9-output-1.png new file mode 100644 index 000000000..8ef478384 Binary files /dev/null and b/docs/visualization_1/visualization_1_files/figure-html/cell-9-output-1.png differ diff --git a/docs/visualization_2/images/boxcar_kernel.png b/docs/visualization_2/images/boxcar_kernel.png new file mode 100644 index 000000000..8d652b1e6 Binary files /dev/null and b/docs/visualization_2/images/boxcar_kernel.png differ diff --git a/docs/visualization_2/images/bulge.png b/docs/visualization_2/images/bulge.png new file mode 100644 index 000000000..304f40f14 Binary files /dev/null and b/docs/visualization_2/images/bulge.png differ diff --git a/docs/visualization_2/images/gaussian_0.1.png b/docs/visualization_2/images/gaussian_0.1.png new file mode 100644 index 000000000..5a71d3cc5 Binary files /dev/null and b/docs/visualization_2/images/gaussian_0.1.png differ diff --git a/docs/visualization_2/images/gaussian_1.png b/docs/visualization_2/images/gaussian_1.png new file mode 100644 index 000000000..e51846be2 Binary files /dev/null and b/docs/visualization_2/images/gaussian_1.png differ diff --git a/docs/visualization_2/images/gaussian_10.png b/docs/visualization_2/images/gaussian_10.png new file mode 100644 index 000000000..45d1974d3 Binary files /dev/null and b/docs/visualization_2/images/gaussian_10.png differ diff --git a/docs/visualization_2/images/gaussian_2.png b/docs/visualization_2/images/gaussian_2.png new file mode 100644 index 000000000..6357afff5 Binary files /dev/null and b/docs/visualization_2/images/gaussian_2.png differ diff --git a/docs/visualization_2/images/gaussian_kernel.png b/docs/visualization_2/images/gaussian_kernel.png new file mode 100644 index 000000000..8be7f2dcd Binary files /dev/null and b/docs/visualization_2/images/gaussian_kernel.png differ diff --git a/docs/visualization_2/images/good_viz_scale_1.png b/docs/visualization_2/images/good_viz_scale_1.png new file mode 100644 index 000000000..4576b61e1 Binary files /dev/null and b/docs/visualization_2/images/good_viz_scale_1.png differ diff --git a/docs/visualization_2/images/good_viz_scale_2.png b/docs/visualization_2/images/good_viz_scale_2.png new file mode 100644 index 000000000..ccbda9388 Binary files /dev/null and b/docs/visualization_2/images/good_viz_scale_2.png differ diff --git a/docs/visualization_2/images/horizontal.png b/docs/visualization_2/images/horizontal.png new file mode 100644 index 000000000..afcfa4856 Binary files /dev/null and b/docs/visualization_2/images/horizontal.png differ diff --git a/docs/visualization_2/images/jet_3_images.png b/docs/visualization_2/images/jet_3_images.png new file mode 100644 index 000000000..1067c77c7 Binary files /dev/null and b/docs/visualization_2/images/jet_3_images.png differ diff --git a/docs/visualization_2/images/jet_colormap.png b/docs/visualization_2/images/jet_colormap.png new file mode 100644 index 000000000..93d07c106 Binary files /dev/null and b/docs/visualization_2/images/jet_colormap.png differ diff --git a/docs/visualization_2/images/jet_four_by_four.png b/docs/visualization_2/images/jet_four_by_four.png new file mode 100644 index 000000000..a46062b04 Binary files /dev/null and b/docs/visualization_2/images/jet_four_by_four.png differ diff --git a/docs/visualization_2/images/jet_perceptually_uniform.png b/docs/visualization_2/images/jet_perceptually_uniform.png new file mode 100644 index 000000000..b0490ed8f Binary files /dev/null and b/docs/visualization_2/images/jet_perceptually_uniform.png differ diff --git a/docs/visualization_2/images/kde_function.png b/docs/visualization_2/images/kde_function.png new file mode 100644 index 000000000..392f8656a Binary files /dev/null and b/docs/visualization_2/images/kde_function.png differ diff --git a/docs/visualization_2/images/linearize.png b/docs/visualization_2/images/linearize.png new file mode 100644 index 000000000..14eec3a92 Binary files /dev/null and b/docs/visualization_2/images/linearize.png differ diff --git a/docs/visualization_2/images/male_female_earnings_barplot.png b/docs/visualization_2/images/male_female_earnings_barplot.png new file mode 100644 index 000000000..425ceb383 Binary files /dev/null and b/docs/visualization_2/images/male_female_earnings_barplot.png differ diff --git a/docs/visualization_2/images/male_female_earnings_scatterplot.png b/docs/visualization_2/images/male_female_earnings_scatterplot.png new file mode 100644 index 000000000..827631a08 Binary files /dev/null and b/docs/visualization_2/images/male_female_earnings_scatterplot.png differ diff --git a/docs/visualization_2/images/markings_viz.png b/docs/visualization_2/images/markings_viz.png new file mode 100644 index 000000000..a68e77643 Binary files /dev/null and b/docs/visualization_2/images/markings_viz.png differ diff --git a/docs/visualization_2/images/mutli_dim_encodings.png b/docs/visualization_2/images/mutli_dim_encodings.png new file mode 100644 index 000000000..67ede5ee6 Binary files /dev/null and b/docs/visualization_2/images/mutli_dim_encodings.png differ diff --git a/docs/visualization_2/images/revealed_viz.png b/docs/visualization_2/images/revealed_viz.png new file mode 100644 index 000000000..a5cbf2d83 Binary files /dev/null and b/docs/visualization_2/images/revealed_viz.png differ diff --git a/docs/visualization_2/images/rugplot_encoding.png b/docs/visualization_2/images/rugplot_encoding.png new file mode 100644 index 000000000..e568644eb Binary files /dev/null and b/docs/visualization_2/images/rugplot_encoding.png differ diff --git a/docs/visualization_2/images/small_multiples.png b/docs/visualization_2/images/small_multiples.png new file mode 100644 index 000000000..d624de378 Binary files /dev/null and b/docs/visualization_2/images/small_multiples.png differ diff --git a/docs/visualization_2/images/tukey_mosteller.png b/docs/visualization_2/images/tukey_mosteller.png new file mode 100644 index 000000000..6c322a019 Binary files /dev/null and b/docs/visualization_2/images/tukey_mosteller.png differ diff --git a/docs/visualization_2/images/unrevealed_viz.png b/docs/visualization_2/images/unrevealed_viz.png new file mode 100644 index 000000000..f371ed74d Binary files /dev/null and b/docs/visualization_2/images/unrevealed_viz.png differ diff --git a/docs/visualization_2/images/viridis_colormap.png b/docs/visualization_2/images/viridis_colormap.png new file mode 100644 index 000000000..37496838f Binary files /dev/null and b/docs/visualization_2/images/viridis_colormap.png differ diff --git a/docs/visualization_2/images/viridis_perceptually_uniform.png b/docs/visualization_2/images/viridis_perceptually_uniform.png new file mode 100644 index 000000000..266f869ec Binary files /dev/null and b/docs/visualization_2/images/viridis_perceptually_uniform.png differ diff --git a/docs/visualization_2/images/wrong_scale_viz.png b/docs/visualization_2/images/wrong_scale_viz.png new file mode 100644 index 000000000..c6cda3d97 Binary files /dev/null and b/docs/visualization_2/images/wrong_scale_viz.png differ diff --git a/visualization_2/visualization_2.html b/docs/visualization_2/visualization_2.html similarity index 93% rename from visualization_2/visualization_2.html rename to docs/visualization_2/visualization_2.html index 6036cc023..416250f8c 100644 --- a/visualization_2/visualization_2.html +++ b/docs/visualization_2/visualization_2.html @@ -572,14 +572,8 @@

kde = True, stat = "density") plt.title("Distribution of HIV rates");

-
-
/Users/Ishani/micromamba/lib/python3.9/site-packages/seaborn/axisgrid.py:118: UserWarning:
-
-The figure layout has changed to tight
-
-
-

+

Notice that the smooth KDE curve is higher when the histogram bins are taller. You can think of the height of the KDE curve as representing how “probable” it is that we randomly sample a datapoint with the corresponding value. This intuitively makes sense – if we have already collected more datapoints with a particular value (resulting in a tall histogram bin), it is more likely that, if we randomly sample another datapoint, we will sample one with a similar value (resulting in a high KDE curve).

@@ -598,14 +592,14 @@

Code -
data = [2.2, 2.8, 3.7, 5.3, 5.7]
-
-sns.rugplot(data, height=0.3)
-
-plt.xlabel("Data")
-plt.ylabel("Density")
-plt.xlim(-3, 10)
-plt.ylim(0, 0.5);
+
data = [2.2, 2.8, 3.7, 5.3, 5.7]
+
+sns.rugplot(data, height=0.3)
+
+plt.xlabel("Data")
+plt.ylabel("Density")
+plt.xlim(-3, 10)
+plt.ylim(0, 0.5);

@@ -615,11 +609,11 @@

Code -
sns.kdeplot(data)
-
-plt.xlabel("Data")
-plt.xlim(-3, 10)
-plt.ylim(0, 0.5);
+
sns.kdeplot(data)
+
+plt.xlabel("Data")
+plt.xlim(-3, 10)
+plt.ylim(0, 0.5);

@@ -634,21 +628,21 @@

Code -
def gaussian_kernel(x, z, a):
-    # We'll discuss where this mathematical formulation came from later
-    return (1/np.sqrt(2*np.pi*a**2)) * np.exp((-(x - z)**2 / (2 * a**2)))
-
-# Plot our datapoint
-sns.rugplot([2.2], height=0.3)
-
-# Plot the kernel
-x = np.linspace(-3, 10, 1000)
-plt.plot(x, gaussian_kernel(x, 2.2, 1))
-
-plt.xlabel("Data")
-plt.ylabel("Density")
-plt.xlim(-3, 10)
-plt.ylim(0, 0.5);
+
def gaussian_kernel(x, z, a):
+    # We'll discuss where this mathematical formulation came from later
+    return (1/np.sqrt(2*np.pi*a**2)) * np.exp((-(x - z)**2 / (2 * a**2)))
+
+# Plot our datapoint
+sns.rugplot([2.2], height=0.3)
+
+# Plot the kernel
+x = np.linspace(-3, 10, 1000)
+plt.plot(x, gaussian_kernel(x, 2.2, 1))
+
+plt.xlabel("Data")
+plt.ylabel("Density")
+plt.xlim(-3, 10)
+plt.ylim(0, 0.5);

@@ -658,41 +652,41 @@

Code -
# You will work with the functions below in Lab 4
-def create_kde(kernel, pts, a):
-    # Takes in a kernel, set of points, and alpha
-    # Returns the KDE as a function
-    def f(x):
-        output = 0
-        for pt in pts:
-            output += kernel(x, pt, a)
-        return output / len(pts) # Normalization factor
-    return f
-
-def plot_kde(kernel, pts, a):
-    # Calls create_kde and plots the corresponding KDE
-    f = create_kde(kernel, pts, a)
-    x = np.linspace(min(pts) - 5, max(pts) + 5, 1000)
-    y = [f(xi) for xi in x]
-    plt.plot(x, y);
-    
-def plot_separate_kernels(kernel, pts, a, norm=False):
-    # Plots individual kernels, which are then summed to create the KDE
-    x = np.linspace(min(pts) - 5, max(pts) + 5, 1000)
-    for pt in pts:
-        y = kernel(x, pt, a)
-        if norm:
-            y /= len(pts)
-        plt.plot(x, y)
-    
-    plt.show();
-    
-plt.xlim(-3, 10)
-plt.ylim(0, 0.5)
-plt.xlabel("Data")
-plt.ylabel("Density")
-
-plot_separate_kernels(gaussian_kernel, data, a = 1)
+
# You will work with the functions below in Lab 4
+def create_kde(kernel, pts, a):
+    # Takes in a kernel, set of points, and alpha
+    # Returns the KDE as a function
+    def f(x):
+        output = 0
+        for pt in pts:
+            output += kernel(x, pt, a)
+        return output / len(pts) # Normalization factor
+    return f
+
+def plot_kde(kernel, pts, a):
+    # Calls create_kde and plots the corresponding KDE
+    f = create_kde(kernel, pts, a)
+    x = np.linspace(min(pts) - 5, max(pts) + 5, 1000)
+    y = [f(xi) for xi in x]
+    plt.plot(x, y);
+    
+def plot_separate_kernels(kernel, pts, a, norm=False):
+    # Plots individual kernels, which are then summed to create the KDE
+    x = np.linspace(min(pts) - 5, max(pts) + 5, 1000)
+    for pt in pts:
+        y = kernel(x, pt, a)
+        if norm:
+            y /= len(pts)
+        plt.plot(x, y)
+    
+    plt.show();
+    
+plt.xlim(-3, 10)
+plt.ylim(0, 0.5)
+plt.xlabel("Data")
+plt.ylabel("Density")
+
+plot_separate_kernels(gaussian_kernel, data, a = 1)

@@ -706,13 +700,13 @@

Code -
plt.xlim(-3, 10)
-plt.ylim(0, 0.5)
-plt.xlabel("Data")
-plt.ylabel("Density")
-
-# The `norm` argument specifies whether or not to normalize the kernels
-plot_separate_kernels(gaussian_kernel, data, a = 1, norm = True)
+
plt.xlim(-3, 10)
+plt.ylim(0, 0.5)
+plt.xlabel("Data")
+plt.ylabel("Density")
+
+# The `norm` argument specifies whether or not to normalize the kernels
+plot_separate_kernels(gaussian_kernel, data, a = 1, norm = True)

@@ -725,12 +719,12 @@

Code -
plt.xlim(-3, 10)
-plt.ylim(0, 0.5)
-plt.xlabel("Data")
-plt.ylabel("Density")
-
-plot_kde(gaussian_kernel, data, a = 1)
+
plt.xlim(-3, 10)
+plt.ylim(0, 0.5)
+plt.xlabel("Data")
+plt.ylabel("Density")
+
+plot_kde(gaussian_kernel, data, a = 1)

@@ -831,13 +825,13 @@

Code -
def boxcar_kernel(alpha, x, z):
-    return (((x-z)>=-alpha/2)&((x-z)<=alpha/2))/alpha
-
-xs = np.linspace(-5, 5, 200)
-alpha=1
-kde_curve = [boxcar_kernel(alpha, x, 0) for x in xs]
-plt.plot(xs, kde_curve);
+
def boxcar_kernel(alpha, x, z):
+    return (((x-z)>=-alpha/2)&((x-z)<=alpha/2))/alpha
+
+xs = np.linspace(-5, 5, 200)
+alpha=1
+kde_curve = [boxcar_kernel(alpha, x, 0) for x in xs]
+plt.plot(xs, kde_curve);
@@ -876,51 +870,33 @@

. Note that here we’ve specified stat = density to normalize the histogram such that the area under the histogram is equal to 1.

-
sns.displot(data=wb, 
-            x="gni", 
-            kind="hist", 
-            stat="density") # default: stat=count and density integrates to 1
-plt.title("Distribution of gross national income per capita");
-
-
/Users/Ishani/micromamba/lib/python3.9/site-packages/seaborn/axisgrid.py:118: UserWarning:
-
-The figure layout has changed to tight
-
-
+
sns.displot(data=wb, 
+            x="gni", 
+            kind="hist", 
+            stat="density") # default: stat=count and density integrates to 1
+plt.title("Distribution of gross national income per capita");
-

+

Now, what if we want to generate a KDE plot? We can set kind = to "kde"!

-
sns.displot(data=wb, 
-            x="gni", 
-            kind='kde')
-plt.title("Distribution of gross national income per capita");
-
-
/Users/Ishani/micromamba/lib/python3.9/site-packages/seaborn/axisgrid.py:118: UserWarning:
-
-The figure layout has changed to tight
-
-
+
sns.displot(data=wb, 
+            x="gni", 
+            kind='kde')
+plt.title("Distribution of gross national income per capita");
-

+

And finally, if we want to generate an Empirical Cumulative Distribution Function (ECDF), we can specify kind = "ecdf".

-
sns.displot(data=wb, 
-            x="gni", 
-            kind='ecdf')
-plt.title("Cumulative Distribution of gross national income per capita");
-
-
/Users/Ishani/micromamba/lib/python3.9/site-packages/seaborn/axisgrid.py:118: UserWarning:
-
-The figure layout has changed to tight
-
-
+
sns.displot(data=wb, 
+            x="gni", 
+            kind='ecdf')
+plt.title("Cumulative Distribution of gross national income per capita");
-

+

@@ -932,23 +908,23 @@

Scatter plots are one of the most useful tools in representing the relationship between pairs of quantitative variables. They are particularly important in gauging the strength, or correlation, of the relationship between variables. Knowledge of these relationships can then motivate decisions in our modeling process.

In matplotlib, we use the function plt.scatter to generate a scatter plot. Notice that, unlike our examples of plotting single-variable distributions, now we specify sequences of values to be plotted along the x-axis and the y-axis.

-
plt.scatter(wb["per capita: % growth: 2016"], \
-            wb['Adult literacy rate: Female: % ages 15 and older: 2005-14'])
-
-plt.xlabel("% growth per capita")
-plt.ylabel("Female adult literacy rate")
-plt.title("Female adult literacy against % growth");
+
plt.scatter(wb["per capita: % growth: 2016"], \
+            wb['Adult literacy rate: Female: % ages 15 and older: 2005-14'])
+
+plt.xlabel("% growth per capita")
+plt.ylabel("Female adult literacy rate")
+plt.title("Female adult literacy against % growth");

In seaborn, we call the function sns.scatterplot. We use the x and y parameters to indicate the values to be plotted along the x and y axes, respectively. By using the hue parameter, we can specify a third variable to be used for coloring each scatter point.

-
sns.scatterplot(data = wb, x = "per capita: % growth: 2016", \
-               y = "Adult literacy rate: Female: % ages 15 and older: 2005-14", 
-               hue = "Continent")
-
-plt.title("Female adult literacy against % growth");
+
sns.scatterplot(data = wb, x = "per capita: % growth: 2016", \
+               y = "Adult literacy rate: Female: % ages 15 and older: 2005-14", 
+               hue = "Continent")
+
+plt.title("Female adult literacy against % growth");

@@ -963,25 +939,25 @@

In the cell below, we first jitter the data using np.random.uniform, then re-plot it with smaller markers. The resulting plot is much easier to interpret.

-
# Setting a seed ensures that we produce the same plot each time
-# This means that the course notes will not change each time you access them
-np.random.seed(150)
-
-# This call to np.random.uniform generates random numbers between -1 and 1
-# We add these random numbers to the original x data to jitter it slightly
-x_noise = np.random.uniform(-1, 1, len(wb))
-jittered_x = wb["per capita: % growth: 2016"] + x_noise
-
-# Repeat for y data
-y_noise = np.random.uniform(-5, 5, len(wb))
-jittered_y = wb["Adult literacy rate: Female: % ages 15 and older: 2005-14"] + y_noise
-
-# Setting the size parameter `s` changes the size of each point
-plt.scatter(jittered_x, jittered_y, s=15)
-
-plt.xlabel("% growth per capita (jittered)")
-plt.ylabel("Female adult literacy rate (jittered)")
-plt.title("Female adult literacy against % growth");
+
# Setting a seed ensures that we produce the same plot each time
+# This means that the course notes will not change each time you access them
+np.random.seed(150)
+
+# This call to np.random.uniform generates random numbers between -1 and 1
+# We add these random numbers to the original x data to jitter it slightly
+x_noise = np.random.uniform(-1, 1, len(wb))
+jittered_x = wb["per capita: % growth: 2016"] + x_noise
+
+# Repeat for y data
+y_noise = np.random.uniform(-5, 5, len(wb))
+jittered_y = wb["Adult literacy rate: Female: % ages 15 and older: 2005-14"] + y_noise
+
+# Setting the size parameter `s` changes the size of each point
+plt.scatter(jittered_x, jittered_y, s=15)
+
+plt.xlabel("% growth per capita (jittered)")
+plt.ylabel("Female adult literacy rate (jittered)")
+plt.title("Female adult literacy against % growth");

@@ -993,30 +969,24 @@

seaborn also includes several built-in functions for creating more sophisticated scatter plots. Two of the most commonly used examples are sns.lmplot and sns.jointplot.

sns.lmplot plots both a scatter plot and a linear regression line, all in one function call. We’ll discuss linear regression in a few lectures.

-
sns.lmplot(data = wb, x = "per capita: % growth: 2016", \
-           y = "Adult literacy rate: Female: % ages 15 and older: 2005-14")
-
-plt.title("Female adult literacy against % growth");
-
-
/Users/Ishani/micromamba/lib/python3.9/site-packages/seaborn/axisgrid.py:118: UserWarning:
-
-The figure layout has changed to tight
-
-
+
sns.lmplot(data = wb, x = "per capita: % growth: 2016", \
+           y = "Adult literacy rate: Female: % ages 15 and older: 2005-14")
+
+plt.title("Female adult literacy against % growth");
-

+

sns.jointplot creates a visualization with three components: a scatter plot, a histogram of the distribution of x values, and a histogram of the distribution of y values.

-
sns.jointplot(data = wb, x = "per capita: % growth: 2016", \
-           y = "Adult literacy rate: Female: % ages 15 and older: 2005-14")
-
-# plt.suptitle allows us to shift the title up so it does not overlap with the histogram
-plt.suptitle("Female adult literacy against % growth")
-plt.subplots_adjust(top=0.9);
+
sns.jointplot(data = wb, x = "per capita: % growth: 2016", \
+           y = "Adult literacy rate: Female: % ages 15 and older: 2005-14")
+
+# plt.suptitle allows us to shift the title up so it does not overlap with the histogram
+plt.suptitle("Female adult literacy against % growth")
+plt.subplots_adjust(top=0.9);
-

+

@@ -1026,15 +996,15 @@

Hex plots can be thought of as two-dimensional histograms that show the joint distribution between two variables. This is particularly useful when working with very dense data. In a hex plot, the x-y plane is binned into hexagons. Hexagons that are darker in color indicate a greater density of data – that is, there are more data points that lie in the region enclosed by the hexagon.

We can generate a hex plot using sns.jointplot modified with the kind parameter.

-
sns.jointplot(data = wb, x = "per capita: % growth: 2016", \
-              y = "Adult literacy rate: Female: % ages 15 and older: 2005-14", \
-              kind = "hex")
-
-# plt.suptitle allows us to shift the title up so it does not overlap with the histogram
-plt.suptitle("Female adult literacy against % growth")
-plt.subplots_adjust(top=0.9);
+
sns.jointplot(data = wb, x = "per capita: % growth: 2016", \
+              y = "Adult literacy rate: Female: % ages 15 and older: 2005-14", \
+              kind = "hex")
+
+# plt.suptitle allows us to shift the title up so it does not overlap with the histogram
+plt.suptitle("Female adult literacy against % growth")
+plt.subplots_adjust(top=0.9);
-

+

@@ -1043,11 +1013,11 @@

Contour plots are an alternative way of plotting the joint distribution of two variables. You can think of them as the 2-dimensional versions of KDE plots. A contour plot can be interpreted in a similar way to a topographic map. Each contour line represents an area that has the same density of datapoints throughout the region. Contours marked with darker colors contain more datapoints (a higher density) in that region.

sns.kdeplot will generate a contour plot if we specify both x and y data.

-
sns.kdeplot(data = wb, x = "per capita: % growth: 2016", \
-            y = "Adult literacy rate: Female: % ages 15 and older: 2005-14", \
-            fill = True)
-
-plt.title("Female adult literacy against % growth");
+
sns.kdeplot(data = wb, x = "per capita: % growth: 2016", \
+            y = "Adult literacy rate: Female: % ages 15 and older: 2005-14", \
+            fill = True)
+
+plt.title("Female adult literacy against % growth");

@@ -1063,17 +1033,17 @@

Code -
# Some data cleaning to help with the next example
-df = pd.DataFrame(index=wb.index)
-df['lit'] = wb['Adult literacy rate: Female: % ages 15 and older: 2005-14'] \
-            + wb["Adult literacy rate: Male: % ages 15 and older: 2005-14"]
-df['inc'] = wb['gni']
-df.dropna(inplace=True)
-
-plt.scatter(df["inc"], df["lit"])
-plt.xlabel("Gross national income per capita")
-plt.ylabel("Adult literacy rate")
-plt.title("Adult literacy rate against GNI per capita");
+
# Some data cleaning to help with the next example
+df = pd.DataFrame(index=wb.index)
+df['lit'] = wb['Adult literacy rate: Female: % ages 15 and older: 2005-14'] \
+            + wb["Adult literacy rate: Male: % ages 15 and older: 2005-14"]
+df['inc'] = wb['gni']
+df.dropna(inplace=True)
+
+plt.scatter(df["inc"], df["lit"])
+plt.xlabel("Gross national income per capita")
+plt.ylabel("Adult literacy rate")
+plt.title("Adult literacy rate against GNI per capita");

@@ -1101,12 +1071,12 @@

\(\log{(100)} = 4.61\) and \(\log{(10)} = 2.3\)).

In Data 100 (and most upper-division STEM classes), \(\log\) is used to refer to the natural logarithm with base \(e\).

-
# np.log takes the logarithm of an array or Series
-plt.scatter(np.log(df["inc"]), df["lit"])
-
-plt.xlabel("Log(gross national income per capita)")
-plt.ylabel("Adult literacy rate")
-plt.title("Adult literacy rate against Log(GNI per capita)");
+
# np.log takes the logarithm of an array or Series
+plt.scatter(np.log(df["inc"]), df["lit"])
+
+plt.xlabel("Log(gross national income per capita)")
+plt.ylabel("Adult literacy rate")
+plt.title("Adult literacy rate against Log(GNI per capita)");

@@ -1120,13 +1090,13 @@

\(2^4 = 16\) and \(200^4 = 1600000000\)).

-
# Apply a log transformation to the x values and a power transformation to the y values
-plt.scatter(np.log(df["inc"]), df["lit"]**4)
-
-plt.xlabel("Log(gross national income per capita)")
-plt.ylabel("Adult literacy rate (4th power)")
-plt.suptitle("Adult literacy rate (4th power) against Log(GNI per capita)")
-plt.subplots_adjust(top=0.9);
+
# Apply a log transformation to the x values and a power transformation to the y values
+plt.scatter(np.log(df["inc"]), df["lit"]**4)
+
+plt.xlabel("Log(gross national income per capita)")
+plt.ylabel("Adult literacy rate (4th power)")
+plt.suptitle("Adult literacy rate (4th power) against Log(GNI per capita)")
+plt.subplots_adjust(top=0.9);

@@ -1139,26 +1109,26 @@

Code -
# The code below fits a linear regression model. We'll discuss it at length in a future lecture
-from sklearn.linear_model import LinearRegression
-
-model = LinearRegression()
-model.fit(np.log(df[["inc"]]), df["lit"]**4)
-m, b = model.coef_[0], model.intercept_
-
-print(f"The slope, m, of the transformed data is: {m}")
-print(f"The intercept, b, of the transformed data is: {b}")
-
-df = df.sort_values("inc")
-plt.scatter(np.log(df["inc"]), df["lit"]**4, label="Transformed data")
-plt.plot(np.log(df["inc"]), m*np.log(df["inc"])+b, c="red", label="Linear regression")
-plt.xlabel("Log(gross national income per capita)")
-plt.ylabel("Adult literacy rate (4th power)")
-plt.legend();
+
# The code below fits a linear regression model. We'll discuss it at length in a future lecture
+from sklearn.linear_model import LinearRegression
+
+model = LinearRegression()
+model.fit(np.log(df[["inc"]]), df["lit"]**4)
+m, b = model.coef_[0], model.intercept_
+
+print(f"The slope, m, of the transformed data is: {m}")
+print(f"The intercept, b, of the transformed data is: {b}")
+
+df = df.sort_values("inc")
+plt.scatter(np.log(df["inc"]), df["lit"]**4, label="Transformed data")
+plt.plot(np.log(df["inc"]), m*np.log(df["inc"])+b, c="red", label="Linear regression")
+plt.xlabel("Log(gross national income per capita)")
+plt.ylabel("Adult literacy rate (4th power)")
+plt.legend();
-
The slope, m, of the transformed data is: 336400693.43172693
-The intercept, b, of the transformed data is: -1802204836.0479977
+
The slope, m, of the transformed data is: 336400693.43172705
+The intercept, b, of the transformed data is: -1802204836.0479987

@@ -1173,12 +1143,12 @@

Code -
# Now, plug the values for m and b into the relationship between the untransformed x and y
-plt.scatter(df["inc"], df["lit"], label="Untransformed data")
-plt.plot(df["inc"], (m*np.log(df["inc"])+b)**(1/4), c="red", label="Modeled relationship")
-plt.xlabel("Gross national income per capita")
-plt.ylabel("Adult literacy rate")
-plt.legend();
+
# Now, plug the values for m and b into the relationship between the untransformed x and y
+plt.scatter(df["inc"], df["lit"], label="Untransformed data")
+plt.plot(df["inc"], (m*np.log(df["inc"])+b)**(1/4), c="red", label="Modeled relationship")
+plt.xlabel("Gross national income per capita")
+plt.ylabel("Adult literacy rate")
+plt.legend();

diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-10-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-10-output-1.png new file mode 100644 index 000000000..5a833e4e2 Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-10-output-1.png differ diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-10-output-2.png b/docs/visualization_2/visualization_2_files/figure-html/cell-10-output-2.png new file mode 100644 index 000000000..713bcd7a1 Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-10-output-2.png differ diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-11-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-11-output-1.png new file mode 100644 index 000000000..462735239 Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-11-output-1.png differ diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-12-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-12-output-1.png new file mode 100644 index 000000000..4bf04ac85 Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-12-output-1.png differ diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-13-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-13-output-1.png new file mode 100644 index 000000000..49a309a82 Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-13-output-1.png differ diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-13-output-2.png b/docs/visualization_2/visualization_2_files/figure-html/cell-13-output-2.png new file mode 100644 index 000000000..f26551da4 Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-13-output-2.png differ diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-14-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-14-output-1.png new file mode 100644 index 000000000..589bdf461 Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-14-output-1.png differ diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-15-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-15-output-1.png new file mode 100644 index 000000000..13d349515 Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-15-output-1.png differ diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-16-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-16-output-1.png new file mode 100644 index 000000000..a8fb882c9 Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-16-output-1.png differ diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-17-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-17-output-1.png new file mode 100644 index 000000000..ccb300f19 Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-17-output-1.png differ diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-18-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-18-output-1.png new file mode 100644 index 000000000..e0f6a9536 Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-18-output-1.png differ diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-18-output-2.png b/docs/visualization_2/visualization_2_files/figure-html/cell-18-output-2.png new file mode 100644 index 000000000..5ac500685 Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-18-output-2.png differ diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-19-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-19-output-1.png new file mode 100644 index 000000000..9ad3a069d Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-19-output-1.png differ diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-19-output-2.png b/docs/visualization_2/visualization_2_files/figure-html/cell-19-output-2.png new file mode 100644 index 000000000..19f726c4b Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-19-output-2.png differ diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-20-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-20-output-1.png new file mode 100644 index 000000000..fab9a8c51 Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-20-output-1.png differ diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-20-output-2.png b/docs/visualization_2/visualization_2_files/figure-html/cell-20-output-2.png new file mode 100644 index 000000000..f26551da4 Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-20-output-2.png differ diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-20-output-3.png b/docs/visualization_2/visualization_2_files/figure-html/cell-20-output-3.png new file mode 100644 index 000000000..143fbba29 Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-20-output-3.png differ diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-21-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-21-output-1.png new file mode 100644 index 000000000..fc02b67d6 Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-21-output-1.png differ diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-22-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-22-output-1.png new file mode 100644 index 000000000..024bfbca4 Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-22-output-1.png differ diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-23-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-23-output-1.png new file mode 100644 index 000000000..c09460e81 Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-23-output-1.png differ diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-24-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-24-output-1.png new file mode 100644 index 000000000..780f3b616 Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-24-output-1.png differ diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-24-output-2.png b/docs/visualization_2/visualization_2_files/figure-html/cell-24-output-2.png new file mode 100644 index 000000000..ba6f6b017 Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-24-output-2.png differ diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-25-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-25-output-1.png new file mode 100644 index 000000000..994192f3e Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-25-output-1.png differ diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-26-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-26-output-1.png new file mode 100644 index 000000000..221442b1d Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-26-output-1.png differ diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-27-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-27-output-1.png new file mode 100644 index 000000000..972bfb172 Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-27-output-1.png differ diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-28-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-28-output-1.png new file mode 100644 index 000000000..bb029adc5 Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-28-output-1.png differ diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-29-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-29-output-1.png new file mode 100644 index 000000000..3754b3776 Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-29-output-1.png differ diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-3-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-3-output-1.png new file mode 100644 index 000000000..610626314 Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-3-output-1.png differ diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-30-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-30-output-1.png new file mode 100644 index 000000000..488b5b595 Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-30-output-1.png differ diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-31-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-31-output-1.png new file mode 100644 index 000000000..68c761ad3 Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-31-output-1.png differ diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-31-output-2.png b/docs/visualization_2/visualization_2_files/figure-html/cell-31-output-2.png new file mode 100644 index 000000000..dc53abe15 Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-31-output-2.png differ diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-32-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-32-output-1.png new file mode 100644 index 000000000..fe8cc273f Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-32-output-1.png differ diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-32-output-2.png b/docs/visualization_2/visualization_2_files/figure-html/cell-32-output-2.png new file mode 100644 index 000000000..84e90a931 Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-32-output-2.png differ diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-33-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-33-output-1.png new file mode 100644 index 000000000..04301b1ef Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-33-output-1.png differ diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-4-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-4-output-1.png new file mode 100644 index 000000000..3c68e6497 Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-4-output-1.png differ diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-5-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-5-output-1.png new file mode 100644 index 000000000..185c1b768 Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-5-output-1.png differ diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-6-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-6-output-1.png new file mode 100644 index 000000000..ff012f3a9 Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-6-output-1.png differ diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-7-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-7-output-1.png new file mode 100644 index 000000000..06af8ea74 Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-7-output-1.png differ diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-8-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-8-output-1.png new file mode 100644 index 000000000..9f71a6727 Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-8-output-1.png differ diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-9-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-9-output-1.png new file mode 100644 index 000000000..827062832 Binary files /dev/null and b/docs/visualization_2/visualization_2_files/figure-html/cell-9-output-1.png differ diff --git a/index.log b/index.log index 5ff6e7ccf..d330f3006 100644 --- a/index.log +++ b/index.log @@ -1,4 +1,4 @@ -This is XeTeX, Version 3.141592653-2.6-0.999995 (TeX Live 2023) (preloaded format=xelatex 2024.3.3) 2 APR 2024 22:01 +This is XeTeX, Version 3.141592653-2.6-0.999995 (TeX Live 2023) (preloaded format=xelatex 2024.2.25) 4 APR 2024 13:56 entering extended mode restricted \write18 enabled. %&-line parsing enabled. @@ -6,25 +6,25 @@ entering extended mode (./index.tex LaTeX2e <2023-11-01> patch level 1 L3 programming layer <2024-02-20> -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/koma-script/scrreprt.cls +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/koma-script/scrreprt.cls Document Class: scrreprt 2023/07/07 v3.41 KOMA-Script document class (report) -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/koma-script/scrkbase.sty +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/koma-script/scrkbase.sty Package: scrkbase 2023/07/07 v3.41 KOMA-Script package (KOMA-Script-dependent basics and keyval usage) -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/koma-script/scrbase.sty +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/koma-script/scrbase.sty Package: scrbase 2023/07/07 v3.41 KOMA-Script package (KOMA-Script-independent basics and keyval usage) -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/koma-script/scrlfile.sty +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/koma-script/scrlfile.sty Package: scrlfile 2023/07/07 v3.41 KOMA-Script package (file load hooks) -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/koma-script/scrlfile-hook.sty +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/koma-script/scrlfile-hook.sty Package: scrlfile-hook 2023/07/07 v3.41 KOMA-Script package (using LaTeX hooks) -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/koma-script/scrlogo.sty +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/koma-script/scrlogo.sty Package: scrlogo 2023/07/07 v3.41 KOMA-Script package (logo) -))) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/graphics/keyval.sty +))) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/graphics/keyval.sty Package: keyval 2022/05/29 v1.15 key=value parser (DPC) \KV@toks@=\toks17 ) Applying: [2021/05/01] Usage of raw or classic option list on input line 252. Already applied: [0000/00/00] Usage of raw or classic option list on input line 368. -)) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/koma-script/tocbasic.sty +)) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/koma-script/tocbasic.sty Package: tocbasic 2023/07/07 v3.41 KOMA-Script package (handling toc-files) \scr@dte@tocline@numberwidth=\skip48 \scr@dte@tocline@numbox=\box51 @@ -34,9 +34,9 @@ Package tocbasic Info: omitting babel extension for `toc' (tocbasic) for `toc' on input line 135. Class scrreprt Info: File `scrsize11pt.clo' used instead of (scrreprt) file `scrsize11.clo' to setup font sizes on input line 2688. -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/koma-script/scrsize11pt.clo +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/koma-script/scrsize11pt.clo File: scrsize11pt.clo 2023/07/07 v3.41 KOMA-Script font size class option (11pt) -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/koma-script/typearea.sty +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/koma-script/typearea.sty Package: typearea 2023/07/07 v3.41 KOMA-Script package (type area) \ta@bcor=\skip49 \ta@div=\count184 @@ -121,20 +121,20 @@ Package tocbasic Info: omitting babel extension for `lot' \c@table=\count193 Class scrreprt Info: Redefining `\numberline' on input line 7428. \bibindent=\dimen140 -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/amsmath/amsmath.sty +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/amsmath/amsmath.sty Package: amsmath 2023/05/13 v2.17o AMS math features \@mathmargin=\skip65 For additional information on amsmath, use the `?' option. -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/amsmath/amstext.sty +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/amsmath/amstext.sty Package: amstext 2021/08/26 v2.01 AMS text -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/amsmath/amsgen.sty +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/amsmath/amsgen.sty File: amsgen.sty 1999/11/30 v2.0 generic functions \@emptytoks=\toks18 \ex@=\dimen141 -)) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/amsmath/amsbsy.sty +)) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/amsmath/amsbsy.sty Package: amsbsy 1999/11/29 v1.2d Bold Symbols \pmbraise@=\dimen142 -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/amsmath/amsopn.sty +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/amsmath/amsopn.sty Package: amsopn 2022/04/08 v2.04 operator names ) \inf@bad=\count194 @@ -184,20 +184,20 @@ LaTeX Info: Redefining \Relbar on input line 971. \mathdisplay@stack=\toks22 LaTeX Info: Redefining \[ on input line 2953. LaTeX Info: Redefining \] on input line 2954. -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/amsfonts/amssymb.sty +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/amsfonts/amssymb.sty Package: amssymb 2013/01/14 v3.01 AMS font symbols -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/amsfonts/amsfonts.sty +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/amsfonts/amsfonts.sty Package: amsfonts 2013/01/14 v3.01 Basic AMSFonts support \symAMSa=\mathgroup4 \symAMSb=\mathgroup5 LaTeX Font Info: Redeclaring math symbol \hbar on input line 98. LaTeX Font Info: Overwriting math alphabet `\mathfrak' in version `bold' (Font) U/euf/m/n --> U/euf/b/n on input line 106. -)) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/iftex/iftex.sty +)) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/iftex/iftex.sty Package: iftex 2022/02/03 v1.0f TeX engine tests -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/unicode-math/unicode-math.sty (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/l3kernel/expl3.sty +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/unicode-math/unicode-math.sty (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/l3kernel/expl3.sty Package: expl3 2024-02-20 L3 programming layer (loader) -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/l3backend/l3backend-xetex.def +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/l3backend/l3backend-xetex.def File: l3backend-xetex.def 2024-02-20 L3 backend support: XeTeX \g__graphics_track_int=\count272 \l__pdf_internal_box=\box55 @@ -206,15 +206,15 @@ File: l3backend-xetex.def 2024-02-20 L3 backend support: XeTeX \g__pdf_backend_link_int=\count275 )) Package: unicode-math 2023/08/13 v0.8r Unicode maths in XeLaTeX and LuaLaTeX -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/unicode-math/unicode-math-xetex.sty +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/unicode-math/unicode-math-xetex.sty Package: unicode-math-xetex 2023/08/13 v0.8r Unicode maths in XeLaTeX and LuaLaTeX -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/l3packages/xparse/xparse.sty +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/l3packages/xparse/xparse.sty Package: xparse 2024-02-18 L3 Experimental document command parser -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/l3packages/l3keys2e/l3keys2e.sty +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/l3packages/l3keys2e/l3keys2e.sty Package: l3keys2e 2024-02-18 LaTeX2e option processing using LaTeX3 keys -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/fontspec/fontspec.sty +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/fontspec/fontspec.sty Package: fontspec 2024/02/13 v2.9a Font selection for XeLaTeX and LuaLaTeX -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/fontspec/fontspec-xetex.sty +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/fontspec/fontspec-xetex.sty Package: fontspec-xetex 2024/02/13 v2.9a Font selection for XeLaTeX and LuaLaTeX \l__fontspec_script_int=\count276 \l__fontspec_language_int=\count277 @@ -230,11 +230,11 @@ Package: fontspec-xetex 2024/02/13 v2.9a Font selection for XeLaTeX and LuaLaTeX \l__fontspec_tmpa_dim=\dimen150 \l__fontspec_tmpb_dim=\dimen151 \l__fontspec_tmpc_dim=\dimen152 -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/base/fontenc.sty +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/base/fontenc.sty Package: fontenc 2021/04/29 v2.0v Standard LaTeX package -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/fontspec/fontspec.cfg))) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/base/fix-cm.sty +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/fontspec/fontspec.cfg))) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/base/fix-cm.sty Package: fix-cm 2020/11/24 v1.1t fixes to LaTeX -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/base/ts1enc.def +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/base/ts1enc.def File: ts1enc.def 2001/06/05 v3.0e (jk/car/fm) Standard LaTeX file LaTeX Font Info: Redeclaring font encoding TS1 on input line 47. )) @@ -242,7 +242,7 @@ LaTeX Font Info: Redeclaring font encoding TS1 on input line 47. \g__um_fonts_used_int=\count288 \l__um_primecount_int=\count289 \g__um_primekern_muskip=\muskip17 -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/unicode-math/unicode-math-table.tex))) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/lm/lmodern.sty +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/unicode-math/unicode-math-table.tex))) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/lm/lmodern.sty Package: lmodern 2015/05/01 v1.6.1 Latin Modern Fonts LaTeX Font Info: Overwriting symbol font `operators' in version `normal' (Font) OT1/cmr/m/n --> OT1/lmr/m/n on input line 22. @@ -276,13 +276,13 @@ LaTeX Font Info: Overwriting math alphabet `\mathit' in version `bold' (Font) OT1/cmr/bx/it --> OT1/lmr/bx/it on input line 37. LaTeX Font Info: Overwriting math alphabet `\mathtt' in version `bold' (Font) OT1/cmtt/m/n --> OT1/lmtt/m/n on input line 38. -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/upquote/upquote.sty +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/upquote/upquote.sty Package: upquote 2012/04/19 v1.3 upright-quote and grave-accent glyphs in verbatim -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/base/textcomp.sty +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/base/textcomp.sty Package: textcomp 2020/02/02 v2.0n Standard LaTeX package -)) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/microtype/microtype.sty +)) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/microtype/microtype.sty Package: microtype 2023/03/13 v3.1a Micro-typographical refinements (RS) -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/etoolbox/etoolbox.sty +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/etoolbox/etoolbox.sty Package: etoolbox 2020/10/05 v2.5k e-TeX tools for LaTeX (JAW) \etb@tempcnta=\count290 ) @@ -297,22 +297,22 @@ LaTeX Info: Redefining \textls on input line 1368. \MT@outer@kern=\dimen153 LaTeX Info: Redefining \textmicrotypecontext on input line 1988. \MT@listname@count=\count292 -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/microtype/microtype-xetex.def +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/microtype/microtype-xetex.def File: microtype-xetex.def 2023/03/13 v3.1a Definitions specific to xetex (RS) LaTeX Info: Redefining \lsstyle on input line 238. ) Package microtype Info: Loading configuration file microtype.cfg. -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/microtype/microtype.cfg +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/microtype/microtype.cfg File: microtype.cfg 2023/03/13 v3.1a microtype main configuration file (RS) -)) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/xcolor/xcolor.sty +)) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/xcolor/xcolor.sty Package: xcolor 2023/11/15 v3.01 LaTeX color extensions (UK) -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/graphics-cfg/color.cfg +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/graphics-cfg/color.cfg File: color.cfg 2016/01/02 v1.6 sample color configuration ) Package xcolor Info: Driver file: xetex.def on input line 274. -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/graphics-def/xetex.def +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/graphics-def/xetex.def File: xetex.def 2022/09/22 v5.0n Graphics/color driver for xetex -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/graphics/mathcolor.ltx) +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/graphics/mathcolor.ltx) Package xcolor Info: Model `cmy' substituted by `cmy0' on input line 1350. Package xcolor Info: Model `RGB' extended on input line 1366. Package xcolor Info: Model `HTML' substituted by `rgb' on input line 1368. @@ -321,13 +321,13 @@ Package xcolor Info: Model `tHsb' substituted by `hsb' on input line 1370. Package xcolor Info: Model `HSB' substituted by `hsb' on input line 1371. Package xcolor Info: Model `Gray' substituted by `gray' on input line 1372. Package xcolor Info: Model `wave' substituted by `hsb' on input line 1373. -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/graphics/dvipsnam.def +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/graphics/dvipsnam.def File: dvipsnam.def 2016/06/17 v3.0m Driver-dependent file (DPC,SPQR) -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/xcolor/svgnam.def +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/xcolor/svgnam.def File: svgnam.def 2023/11/15 v3.01 Predefined colors according to SVG 1.1 (UK) -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/xcolor/x11nam.def +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/xcolor/x11nam.def File: x11nam.def 2023/11/15 v3.01 Predefined colors according to Unix/X11 (UK) -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/fancyvrb/fancyvrb.sty +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/fancyvrb/fancyvrb.sty Package: fancyvrb 2024/01/20 4.5c verbatim text (tvz,hv) \FV@CodeLineNo=\count293 \FV@InFile=\read2 @@ -335,14 +335,14 @@ Package: fancyvrb 2024/01/20 4.5c verbatim text (tvz,hv) \c@FancyVerbLine=\count294 \FV@StepNumber=\count295 \FV@OutFile=\write3 -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/framed/framed.sty +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/framed/framed.sty Package: framed 2011/10/22 v 0.96: framed or shaded text with page breaks \OuterFrameSep=\skip68 \fb@frw=\dimen154 \fb@frh=\dimen155 \FrameRule=\dimen156 \FrameSep=\dimen157 -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/tools/longtable.sty +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/tools/longtable.sty Package: longtable 2023-11-01 v4.19 Multi-page Table package (DPC) \LTleft=\skip69 \LTright=\skip70 @@ -362,7 +362,7 @@ Package: longtable 2023-11-01 v4.19 Multi-page Table package (DPC) \LT@p@ftn=\toks25 ) Class scrreprt Info: longtable captions redefined on input line 98. -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/booktabs/booktabs.sty +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/booktabs/booktabs.sty Package: booktabs 2020/01/12 v1.61803398 Publication quality tables \heavyrulewidth=\dimen159 \lightrulewidth=\dimen160 @@ -381,7 +381,7 @@ Package: booktabs 2020/01/12 v1.61803398 Publication quality tables \@thisruleclass=\count303 \@lastruleclass=\count304 \@thisrulewidth=\dimen171 -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/tools/array.sty +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/tools/array.sty Package: array 2023/10/16 v2.5g Tabular extension package (FMi) \col@sep=\dimen172 \ar@mcellbox=\box63 @@ -390,7 +390,7 @@ Package: array 2023/10/16 v2.5g Tabular extension package (FMi) \extratabsurround=\skip73 \backup@length=\skip74 \ar@cellbox=\box64 -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/tools/calc.sty +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/tools/calc.sty Package: calc 2023/07/08 v4.3 Infix arithmetic (KKT,FJ) \calc@Acount=\count305 \calc@Bcount=\count306 @@ -402,41 +402,41 @@ LaTeX Info: Redefining \setlength on input line 80. LaTeX Info: Redefining \addtolength on input line 81. \calc@Ccount=\count307 \calc@Cskip=\skip77 -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/mdwtools/footnote.sty +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/mdwtools/footnote.sty Package: footnote 1997/01/28 1.13 Save footnotes around boxes \fn@notes=\box65 \fn@width=\dimen176 -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/graphics/graphicx.sty +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/graphics/graphicx.sty Package: graphicx 2021/09/16 v1.2d Enhanced LaTeX Graphics (DPC,SPQR) -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/graphics/graphics.sty +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/graphics/graphics.sty Package: graphics 2022/03/10 v1.4e Standard LaTeX Graphics (DPC,SPQR) -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/graphics/trig.sty +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/graphics/trig.sty Package: trig 2021/08/11 v1.11 sin cos tan (DPC) -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/graphics-cfg/graphics.cfg +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/graphics-cfg/graphics.cfg File: graphics.cfg 2016/06/04 v1.11 sample graphics configuration ) Package graphics Info: Driver file: xetex.def on input line 107. ) \Gin@req@height=\dimen177 \Gin@req@width=\dimen178 -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/tcolorbox/tcolorbox.sty +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/tcolorbox/tcolorbox.sty Package: tcolorbox 2024/01/10 version 6.2.0 text color boxes -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/pgf/basiclayer/pgf.sty (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/pgf/utilities/pgfrcs.sty (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/utilities/pgfutil-common.tex +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/pgf/basiclayer/pgf.sty (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/pgf/utilities/pgfrcs.sty (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/utilities/pgfutil-common.tex \pgfutil@everybye=\toks27 \pgfutil@tempdima=\dimen179 \pgfutil@tempdimb=\dimen180 -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/utilities/pgfutil-latex.def +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/utilities/pgfutil-latex.def \pgfutil@abb=\box66 -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/utilities/pgfrcs.code.tex (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/pgf.revision.tex) +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/utilities/pgfrcs.code.tex (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/pgf.revision.tex) Package: pgfrcs 2023-01-15 v3.1.10 (3.1.10) )) Package: pgf 2023-01-15 v3.1.10 (3.1.10) -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/pgf/basiclayer/pgfcore.sty (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/pgf/systemlayer/pgfsys.sty (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/systemlayer/pgfsys.code.tex +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/pgf/basiclayer/pgfcore.sty (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/pgf/systemlayer/pgfsys.sty (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/systemlayer/pgfsys.code.tex Package: pgfsys 2023-01-15 v3.1.10 (3.1.10) -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/utilities/pgfkeys.code.tex +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/utilities/pgfkeys.code.tex \pgfkeys@pathtoks=\toks28 \pgfkeys@temptoks=\toks29 -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/utilities/pgfkeyslibraryfiltered.code.tex +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/utilities/pgfkeyslibraryfiltered.code.tex \pgfkeys@tmptoks=\toks30 )) \pgf@x=\dimen181 @@ -459,36 +459,36 @@ Package: pgfsys 2023-01-15 v3.1.10 (3.1.10) \t@pgf@tokb=\toks32 \t@pgf@tokc=\toks33 \pgf@sys@id@count=\count312 -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/systemlayer/pgf.cfg +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/systemlayer/pgf.cfg File: pgf.cfg 2023-01-15 v3.1.10 (3.1.10) ) Driver file for pgf: pgfsys-xetex.def -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/systemlayer/pgfsys-xetex.def +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/systemlayer/pgfsys-xetex.def File: pgfsys-xetex.def 2023-01-15 v3.1.10 (3.1.10) -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/systemlayer/pgfsys-dvipdfmx.def +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/systemlayer/pgfsys-dvipdfmx.def File: pgfsys-dvipdfmx.def 2023-01-15 v3.1.10 (3.1.10) -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/systemlayer/pgfsys-common-pdf.def +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/systemlayer/pgfsys-common-pdf.def File: pgfsys-common-pdf.def 2023-01-15 v3.1.10 (3.1.10) ) \pgfsys@objnum=\count313 -))) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/systemlayer/pgfsyssoftpath.code.tex +))) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/systemlayer/pgfsyssoftpath.code.tex File: pgfsyssoftpath.code.tex 2023-01-15 v3.1.10 (3.1.10) \pgfsyssoftpath@smallbuffer@items=\count314 \pgfsyssoftpath@bigbuffer@items=\count315 -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/systemlayer/pgfsysprotocol.code.tex +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/systemlayer/pgfsysprotocol.code.tex File: pgfsysprotocol.code.tex 2023-01-15 v3.1.10 (3.1.10) -)) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcore.code.tex +)) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcore.code.tex Package: pgfcore 2023-01-15 v3.1.10 (3.1.10) -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmath.code.tex (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathutil.code.tex) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathparser.code.tex +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmath.code.tex (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathutil.code.tex) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathparser.code.tex \pgfmath@dimen=\dimen191 \pgfmath@count=\count316 \pgfmath@box=\box67 \pgfmath@toks=\toks34 \pgfmath@stack@operand=\toks35 \pgfmath@stack@operation=\toks36 -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.code.tex) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.basic.code.tex) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.trigonometric.code.tex) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.random.code.tex) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.comparison.code.tex) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.base.code.tex) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.round.code.tex) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.misc.code.tex) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.integerarithmetics.code.tex) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathcalc.code.tex) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathfloat.code.tex +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.code.tex) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.basic.code.tex) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.trigonometric.code.tex) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.random.code.tex) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.comparison.code.tex) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.base.code.tex) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.round.code.tex) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.misc.code.tex) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.integerarithmetics.code.tex) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathcalc.code.tex) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathfloat.code.tex \c@pgfmathroundto@lastzeros=\count317 -)) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfint.code.tex) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepoints.code.tex +)) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfint.code.tex) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepoints.code.tex File: pgfcorepoints.code.tex 2023-01-15 v3.1.10 (3.1.10) \pgf@picminx=\dimen192 \pgf@picmaxx=\dimen193 @@ -504,74 +504,74 @@ File: pgfcorepoints.code.tex 2023-01-15 v3.1.10 (3.1.10) \pgf@yy=\dimen259 \pgf@zx=\dimen260 \pgf@zy=\dimen261 -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepathconstruct.code.tex +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepathconstruct.code.tex File: pgfcorepathconstruct.code.tex 2023-01-15 v3.1.10 (3.1.10) \pgf@path@lastx=\dimen262 \pgf@path@lasty=\dimen263 -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepathusage.code.tex +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepathusage.code.tex File: pgfcorepathusage.code.tex 2023-01-15 v3.1.10 (3.1.10) \pgf@shorten@end@additional=\dimen264 \pgf@shorten@start@additional=\dimen265 -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcorescopes.code.tex +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcorescopes.code.tex File: pgfcorescopes.code.tex 2023-01-15 v3.1.10 (3.1.10) \pgfpic=\box68 \pgf@hbox=\box69 \pgf@layerbox@main=\box70 \pgf@picture@serial@count=\count318 -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcoregraphicstate.code.tex +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcoregraphicstate.code.tex File: pgfcoregraphicstate.code.tex 2023-01-15 v3.1.10 (3.1.10) \pgflinewidth=\dimen266 -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcoretransformations.code.tex +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcoretransformations.code.tex File: pgfcoretransformations.code.tex 2023-01-15 v3.1.10 (3.1.10) \pgf@pt@x=\dimen267 \pgf@pt@y=\dimen268 \pgf@pt@temp=\dimen269 -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcorequick.code.tex +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcorequick.code.tex File: pgfcorequick.code.tex 2023-01-15 v3.1.10 (3.1.10) -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreobjects.code.tex +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreobjects.code.tex File: pgfcoreobjects.code.tex 2023-01-15 v3.1.10 (3.1.10) -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepathprocessing.code.tex +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepathprocessing.code.tex File: pgfcorepathprocessing.code.tex 2023-01-15 v3.1.10 (3.1.10) -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcorearrows.code.tex +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcorearrows.code.tex File: pgfcorearrows.code.tex 2023-01-15 v3.1.10 (3.1.10) \pgfarrowsep=\dimen270 -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreshade.code.tex +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreshade.code.tex File: pgfcoreshade.code.tex 2023-01-15 v3.1.10 (3.1.10) \pgf@max=\dimen271 \pgf@sys@shading@range@num=\count319 \pgf@shadingcount=\count320 -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreimage.code.tex +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreimage.code.tex File: pgfcoreimage.code.tex 2023-01-15 v3.1.10 (3.1.10) -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreexternal.code.tex +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreexternal.code.tex File: pgfcoreexternal.code.tex 2023-01-15 v3.1.10 (3.1.10) \pgfexternal@startupbox=\box71 -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcorelayers.code.tex +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcorelayers.code.tex File: pgfcorelayers.code.tex 2023-01-15 v3.1.10 (3.1.10) -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcoretransparency.code.tex +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcoretransparency.code.tex File: pgfcoretransparency.code.tex 2023-01-15 v3.1.10 (3.1.10) -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepatterns.code.tex +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepatterns.code.tex File: pgfcorepatterns.code.tex 2023-01-15 v3.1.10 (3.1.10) -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcorerdf.code.tex +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcorerdf.code.tex File: pgfcorerdf.code.tex 2023-01-15 v3.1.10 (3.1.10) -))) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/modules/pgfmoduleshapes.code.tex +))) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/modules/pgfmoduleshapes.code.tex File: pgfmoduleshapes.code.tex 2023-01-15 v3.1.10 (3.1.10) \pgfnodeparttextbox=\box72 -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/modules/pgfmoduleplot.code.tex +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/modules/pgfmoduleplot.code.tex File: pgfmoduleplot.code.tex 2023-01-15 v3.1.10 (3.1.10) -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/pgf/compatibility/pgfcomp-version-0-65.sty +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/pgf/compatibility/pgfcomp-version-0-65.sty Package: pgfcomp-version-0-65 2023-01-15 v3.1.10 (3.1.10) \pgf@nodesepstart=\dimen272 \pgf@nodesepend=\dimen273 -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/pgf/compatibility/pgfcomp-version-1-18.sty +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/pgf/compatibility/pgfcomp-version-1-18.sty Package: pgfcomp-version-1-18 2023-01-15 v3.1.10 (3.1.10) -)) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/tools/verbatim.sty +)) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/tools/verbatim.sty Package: verbatim 2023-11-06 v1.5v LaTeX2e package for verbatim enhancements \every@verbatim=\toks37 \verbatim@line=\toks38 \verbatim@in@stream=\read4 -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/environ/environ.sty +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/environ/environ.sty Package: environ 2014/05/04 v0.3 A new way to define environments -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/trimspaces/trimspaces.sty +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/trimspaces/trimspaces.sty Package: trimspaces 2009/09/17 v1.1 Trim spaces around a token list )) \tcb@titlebox=\box73 @@ -587,17 +587,17 @@ Package: trimspaces 2009/09/17 v1.1 Trim spaces around a token list \tcb@temp=\box79 \tcb@temp=\box80 \tcb@temp=\box81 -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/tcolorbox/tcbskins.code.tex +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/tcolorbox/tcbskins.code.tex Library (tcolorbox): 'tcbskins.code.tex' version '6.2.0' -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/pgf/frontendlayer/tikz.sty (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/pgf/utilities/pgffor.sty (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/pgf/utilities/pgfkeys.sty (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/utilities/pgfkeys.code.tex)) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/pgf/math/pgfmath.sty (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmath.code.tex)) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/utilities/pgffor.code.tex +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/pgf/frontendlayer/tikz.sty (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/pgf/utilities/pgffor.sty (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/pgf/utilities/pgfkeys.sty (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/utilities/pgfkeys.code.tex)) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/pgf/math/pgfmath.sty (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmath.code.tex)) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/utilities/pgffor.code.tex Package: pgffor 2023-01-15 v3.1.10 (3.1.10) \pgffor@iter=\dimen275 \pgffor@skip=\dimen276 \pgffor@stack=\toks39 \pgffor@toks=\toks40 -)) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/frontendlayer/tikz/tikz.code.tex +)) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/frontendlayer/tikz/tikz.code.tex Package: tikz 2023-01-15 v3.1.10 (3.1.10) -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/libraries/pgflibraryplothandlers.code.tex +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/libraries/pgflibraryplothandlers.code.tex File: pgflibraryplothandlers.code.tex 2023-01-15 v3.1.10 (3.1.10) \pgf@plot@mark@count=\count324 \pgfplotmarksize=\dimen277 @@ -618,31 +618,31 @@ File: pgflibraryplothandlers.code.tex 2023-01-15 v3.1.10 (3.1.10) \tikznumberofchildren=\count326 \tikznumberofcurrentchild=\count327 \tikz@fig@count=\count328 -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/modules/pgfmodulematrix.code.tex +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/modules/pgfmodulematrix.code.tex File: pgfmodulematrix.code.tex 2023-01-15 v3.1.10 (3.1.10) \pgfmatrixcurrentrow=\count329 \pgfmatrixcurrentcolumn=\count330 \pgf@matrix@numberofcolumns=\count331 ) \tikz@expandcount=\count332 -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pgf/frontendlayer/tikz/libraries/tikzlibrarytopaths.code.tex +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pgf/frontendlayer/tikz/libraries/tikzlibrarytopaths.code.tex File: tikzlibrarytopaths.code.tex 2023-01-15 v3.1.10 (3.1.10) -))) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/tikzfill/tikzfill.image.sty +))) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/tikzfill/tikzfill.image.sty Package: tikzfill.image 2023/08/08 v1.0.1 Image filling library for TikZ -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/tikzfill/tikzfill-common.sty +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/tikzfill/tikzfill-common.sty Package: tikzfill-common 2023/08/08 v1.0.1 Auxiliary code for tikzfill -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/tikzfill/tikzlibraryfill.image.code.tex +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/tikzfill/tikzlibraryfill.image.code.tex File: tikzlibraryfill.image.code.tex 2023/08/08 v1.0.1 Image filling library \l__tikzfill_img_box=\box86 -)) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/tcolorbox/tcbskinsjigsaw.code.tex +)) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/tcolorbox/tcbskinsjigsaw.code.tex Library (tcolorbox): 'tcbskinsjigsaw.code.tex' version '6.2.0' -)) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/tcolorbox/tcbbreakable.code.tex +)) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/tcolorbox/tcbbreakable.code.tex Library (tcolorbox): 'tcbbreakable.code.tex' version '6.2.0' -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/pdfcol/pdfcol.sty +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/pdfcol/pdfcol.sty Package: pdfcol 2022-09-21 v1.7 Handle new color stacks for pdfTeX (HO) -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/ltxcmds/ltxcmds.sty +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/ltxcmds/ltxcmds.sty Package: ltxcmds 2023-12-04 v1.26 LaTeX kernel commands for general use (HO) -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/infwarerr/infwarerr.sty +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/infwarerr/infwarerr.sty Package: infwarerr 2019/12/03 v1.5 Providing info/warning/error messages (HO) ) Package pdfcol Info: Interface disabled because of missing PDF mode of pdfTeX. @@ -651,39 +651,39 @@ Package pdfcol Info: pdfTeX's color stacks are not available. \tcb@testbox=\box87 \tcb@totalupperbox=\box88 \tcb@totallowerbox=\box89 -)) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/fontawesome5/fontawesome5.sty +)) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/fontawesome5/fontawesome5.sty Package: fontawesome5 2022/05/02 v5.15.4 Font Awesome 5 -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/fontawesome5/fontawesome5-utex-helper.sty +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/fontawesome5/fontawesome5-utex-helper.sty Package: fontawesome5-utex-helper 2022/05/02 v5.15.4 uTeX helper for fontawesome5 LaTeX Font Info: Trying to load font information for TU+fontawesomefree on input line 69. -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/fontawesome5/tufontawesomefree.fd) +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/fontawesome5/tufontawesomefree.fd) LaTeX Font Info: Trying to load font information for TU+fontawesomebrands on input line 70. -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/fontawesome5/tufontawesomebrands.fd))) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/bookmark/bookmark.sty +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/fontawesome5/tufontawesomebrands.fd))) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/bookmark/bookmark.sty Package: bookmark 2023-12-10 v1.31 PDF bookmarks (HO) -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/hyperref/hyperref.sty +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/hyperref/hyperref.sty Package: hyperref 2024-01-20 v7.01h Hypertext links for LaTeX -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/kvsetkeys/kvsetkeys.sty +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/kvsetkeys/kvsetkeys.sty Package: kvsetkeys 2022-10-05 v1.19 Key value parser (HO) -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/kvdefinekeys/kvdefinekeys.sty +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/kvdefinekeys/kvdefinekeys.sty Package: kvdefinekeys 2019-12-19 v1.6 Define keys (HO) -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pdfescape/pdfescape.sty +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pdfescape/pdfescape.sty Package: pdfescape 2019/12/09 v1.15 Implements pdfTeX's escape features (HO) -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/pdftexcmds/pdftexcmds.sty +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/pdftexcmds/pdftexcmds.sty Package: pdftexcmds 2020-06-27 v0.33 Utility functions of pdfTeX for LuaTeX (HO) Package pdftexcmds Info: \pdf@primitive is available. Package pdftexcmds Info: \pdf@ifprimitive is available. Package pdftexcmds Info: \pdfdraftmode not found. -)) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/hycolor/hycolor.sty +)) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/hycolor/hycolor.sty Package: hycolor 2020-01-27 v1.10 Color options for hyperref/bookmark (HO) -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/auxhook/auxhook.sty +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/auxhook/auxhook.sty Package: auxhook 2019-12-17 v1.6 Hooks for auxiliary files (HO) -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/hyperref/nameref.sty +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/hyperref/nameref.sty Package: nameref 2023-11-26 v2.56 Cross-referencing by name of section -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/refcount/refcount.sty +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/refcount/refcount.sty Package: refcount 2019/12/15 v3.6 Data extraction from label references (HO) -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/gettitlestring/gettitlestring.sty +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/gettitlestring/gettitlestring.sty Package: gettitlestring 2019/12/15 v1.6 Cleanup title references (HO) -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/kvoptions/kvoptions.sty +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/kvoptions/kvoptions.sty Package: kvoptions 2022-06-15 v3.15 Key value format for package options (HO) )) \c@section@level=\count333 @@ -691,13 +691,13 @@ Package: kvoptions 2022-06-15 v3.15 Key value format for package options (HO) \@linkdim=\dimen286 \Hy@linkcounter=\count334 \Hy@pagecounter=\count335 -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/hyperref/pd1enc.def +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/hyperref/pd1enc.def File: pd1enc.def 2024-01-20 v7.01h Hyperref: PDFDocEncoding definition (HO) -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/intcalc/intcalc.sty +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/intcalc/intcalc.sty Package: intcalc 2019/12/15 v1.3 Expandable calculations with integers (HO) ) \Hy@SavedSpaceFactor=\count336 -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/hyperref/puenc.def +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/hyperref/puenc.def File: puenc.def 2024-01-20 v7.01h Hyperref: PDF Unicode definition (HO) ) Package hyperref Info: Option `unicode' set `true' on input line 4062. @@ -709,15 +709,15 @@ Package hyperref Info: Backreferencing OFF on input line 4199. Package hyperref Info: Implicit mode ON; LaTeX internals redefined. Package hyperref Info: Bookmarks ON on input line 4446. \c@Hy@tempcnt=\count337 -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/url/url.sty +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/url/url.sty \Urlmuskip=\muskip18 Package: url 2013/09/16 ver 3.4 Verb mode for urls, etc. ) LaTeX Info: Redefining \url on input line 4784. \XeTeXLinkMargin=\dimen287 -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/bitset/bitset.sty +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/bitset/bitset.sty Package: bitset 2019/12/09 v1.3 Handle bit-vector datatype (HO) -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/bigintcalc/bigintcalc.sty +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/bigintcalc/bigintcalc.sty Package: bigintcalc 2019/12/15 v1.5 Expandable calculations on big integers (HO) )) \Fld@menulength=\count338 @@ -730,7 +730,7 @@ Package hyperref Info: backreferencing OFF on input line 6078. Package hyperref Info: Link coloring OFF on input line 6083. Package hyperref Info: Link coloring with OCG OFF on input line 6088. Package hyperref Info: PDF/A mode OFF on input line 6093. -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/base/atbegshi-ltx.sty +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/base/atbegshi-ltx.sty Package: atbegshi-ltx 2021/01/10 v1.0c Emulation of the original atbegshi package with kernel methods ) @@ -739,9 +739,9 @@ package with kernel methods \c@Hfootnote=\count341 ) Package hyperref Info: Driver (autodetected): hxetex. -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/hyperref/hxetex.def +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/hyperref/hxetex.def File: hxetex.def 2024-01-20 v7.01h Hyperref driver for XeTeX -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/stringenc/stringenc.sty +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/stringenc/stringenc.sty Package: stringenc 2019/11/29 v1.12 Convert strings between diff. encodings (HO) ) \pdfm@box=\box90 @@ -749,23 +749,23 @@ Package: stringenc 2019/11/29 v1.12 Convert strings between diff. encodings (HO) \HyField@AnnotCount=\count343 \Fld@listcount=\count344 \c@bookmark@seq@number=\count345 -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/rerunfilecheck/rerunfilecheck.sty +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/rerunfilecheck/rerunfilecheck.sty Package: rerunfilecheck 2022-07-10 v1.10 Rerun checks for auxiliary files (HO) -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/base/atveryend-ltx.sty +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/base/atveryend-ltx.sty Package: atveryend-ltx 2020/08/19 v1.0a Emulation of the original atveryend package with kernel methods -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/generic/uniquecounter/uniquecounter.sty +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/generic/uniquecounter/uniquecounter.sty Package: uniquecounter 2019/12/15 v1.4 Provide unlimited unique counter (HO) ) Package uniquecounter Info: New unique counter `rerunfilecheck' on input line 285. ) \Hy@SectionHShift=\skip78 -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/bookmark/bkm-dvipdfm.def +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/bookmark/bkm-dvipdfm.def File: bkm-dvipdfm.def 2023-12-10 v1.31 bookmark driver for dvipdfm (HO) \BKM@id=\count346 -)) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/caption/caption.sty +)) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/caption/caption.sty Package: caption 2023/08/05 v3.6o Customizing captions (AR) -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/caption/caption3.sty +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/caption/caption3.sty Package: caption3 2023/07/31 v2.4d caption3 kernel (AR) \caption@tempdima=\dimen290 \captionmargin=\dimen291 @@ -776,20 +776,20 @@ Package: caption3 2023/07/31 v2.4d caption3 kernel (AR) \caption@parindent=\dimen296 \caption@hangindent=\dimen297 Package caption Info: KOMA-Script document class detected. -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/caption/caption-koma.sto +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/caption/caption-koma.sto File: caption-koma.sto 2023/09/08 v2.0e Adaption of the caption package to the KOMA-Script document classes (AR) )) \c@caption@flags=\count347 \c@continuedfloat=\count348 Package caption Info: hyperref package is loaded. Package caption Info: longtable package is loaded. -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/caption/ltcaption.sty +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/caption/ltcaption.sty Package: ltcaption 2021/01/08 v1.4c longtable captions (AR) ) Package caption Info: KOMA-Script scrextend package detected. \caption@addmargin@hsize=\dimen298 \caption@addmargin@linewidth=\dimen299 -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/float/float.sty +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/float/float.sty Package: float 2001/11/08 v1.3d Float enhancements (AL) \c@float@type=\count349 \float@exts=\toks41 @@ -799,7 +799,7 @@ Package: float 2001/11/08 v1.3d Float enhancements (AL) ) \@float@every@codelisting=\toks43 \c@codelisting=\count350 -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/caption/subcaption.sty +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/caption/subcaption.sty Package: subcaption 2023/07/28 v1.6b Sub-captions (AR) Package caption Info: New subtype `subfigure' on input line 238. \c@subfigure=\count351 @@ -1084,7 +1084,7 @@ Package microtype Info: Using protrusion set `basicmath'. Package microtype Info: No adjustment of tracking. Package microtype Info: No adjustment of spacing. Package microtype Info: No adjustment of kerning. -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/microtype/mt-LatinModernRoman.cfg +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/microtype/mt-LatinModernRoman.cfg File: mt-LatinModernRoman.cfg 2021/02/21 v1.1 microtype config. file: Latin Modern Roman (RS) ) Package hyperref Info: Link coloring ON on input line 221. @@ -1105,7 +1105,7 @@ LaTeX Font Info: Font shape `TU/latinmodern-math.otf(1)/m/n' will be LaTeX Font Info: Font shape `TU/latinmodern-math.otf(1)/m/n' will be (Font) scaled to size 7.0pt on input line 223. LaTeX Font Info: Trying to load font information for OML+lmm on input line 223. -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/lm/omllmm.fd +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/lm/omllmm.fd File: omllmm.fd 2015/05/01 v1.6.1 Font defs for Latin Modern ) LaTeX Font Info: Font shape `TU/latinmodern-math.otf(2)/m/n' will be @@ -1121,15 +1121,15 @@ LaTeX Font Info: Font shape `TU/latinmodern-math.otf(3)/m/n' will be LaTeX Font Info: Font shape `TU/latinmodern-math.otf(3)/m/n' will be (Font) scaled to size 6.99925pt on input line 223. LaTeX Font Info: Trying to load font information for U+msa on input line 223. -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/amsfonts/umsa.fd +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/amsfonts/umsa.fd File: umsa.fd 2013/01/14 v3.01 AMS symbols A -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/microtype/mt-msa.cfg +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/microtype/mt-msa.cfg File: mt-msa.cfg 2006/02/04 v1.1 microtype config. file: AMS symbols (a) (RS) ) LaTeX Font Info: Trying to load font information for U+msb on input line 223. -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/amsfonts/umsb.fd +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/amsfonts/umsb.fd File: umsb.fd 2013/01/14 v3.01 AMS symbols B -) (/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/microtype/mt-msb.cfg +) (/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/microtype/mt-msb.cfg File: mt-msb.cfg 2005/06/01 v1.0 microtype config. file: AMS symbols (b) (RS) ) [1 @@ -1162,7 +1162,7 @@ Class scrreprt Warning: \float@addtolists detected! (scrreprt) a package that still implements the (scrreprt) deprecated \float@addtolist interface. -(/Users/Ishani/Library/TinyTeX/texmf-dist/tex/latex/microtype/mt-TU-empty.cfg +(/Users/lillianweng/Library/TinyTeX/texmf-dist/tex/latex/microtype/mt-TU-empty.cfg File: mt-TU-empty.cfg 2021/06/22 v1.1 microtype config. file: fonts with nonstandard glyph set (RS) ) LaTeX Font Info: Font shape `TU/latinmodern-math.otf(1)/m/n' will be @@ -1203,8 +1203,8 @@ LaTeX Font Info: Font shape `TU/lmtt/bx/n' in size <14.4> not available LaTeX Font Info: Font shape `TU/lmtt/bx/n' in size <10.95> not available (Font) Font shape `TU/lmtt/b/n' tried instead on input line 700. -Overfull \hbox (122.36606pt too wide) in paragraph at lines 743--743 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 743--743 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] [10] @@ -1213,8 +1213,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 745--745 [] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 792--792 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 792--792 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1223,8 +1223,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 794--794 [] [11] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 827--827 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 827--827 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1233,8 +1233,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 829--829 [] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 881--881 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 881--881 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1243,8 +1243,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 883--883 [] [12] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 922--922 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 922--922 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1257,8 +1257,8 @@ LaTeX Font Info: Font shape `TU/lmtt/bx/n' in size <12> not available (Font) Font shape `TU/lmtt/b/n' tried instead on input line 983. Missing character: There is no   (U+2003) in font [lmroman10-regular]:mapping=tex-text;! -Overfull \hbox (122.36606pt too wide) in paragraph at lines 1041--1041 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 1041--1041 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] [14] @@ -1275,8 +1275,8 @@ Overfull \hbox (53.61371pt too wide) in paragraph at lines 1046--1234 Overfull \vbox (1913.30832pt too high) has occurred while \output is active [] [16] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 1260--1260 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 1260--1260 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1285,8 +1285,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 1262--1262 [] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 1288--1288 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 1288--1288 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1295,8 +1295,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 1290--1290 [] [17] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 1324--1324 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 1324--1324 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1305,8 +1305,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 1326--1326 [] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 1351--1351 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 1351--1351 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1315,8 +1315,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 1353--1353 [] [18] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 1397--1397 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 1397--1397 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1325,8 +1325,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 1399--1399 [] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 1419--1419 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 1419--1419 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1335,8 +1335,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 1421--1421 [] [19] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 1447--1447 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 1447--1447 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1345,8 +1345,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 1449--1449 [] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 1479--1479 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 1479--1479 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1363,8 +1363,8 @@ Overfull \hbox (25.18869pt too wide) in paragraph at lines 1484--1673 Overfull \vbox (1926.90831pt too high) has occurred while \output is active [] [21] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 1688--1688 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 1688--1688 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1386,8 +1386,8 @@ Overfull \hbox (36.13486pt too wide) in paragraph at lines 1946--1946 [] [24] [25] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 2028--2028 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 2028--2028 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1401,8 +1401,8 @@ Overfull \hbox (24.53052pt too wide) in paragraph at lines 2033--2044 [] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 2056--2056 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 2056--2056 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1411,8 +1411,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 2058--2058 [] [26] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 2126--2126 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 2126--2126 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1421,8 +1421,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 2128--2128 [] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 2154--2154 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 2154--2154 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1431,8 +1431,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 2156--2156 [] [27] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 2181--2181 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 2181--2181 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1446,8 +1446,8 @@ Overfull \hbox (24.53052pt too wide) in paragraph at lines 2186--2196 [] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 2206--2206 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 2206--2206 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1459,8 +1459,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 2208--2208 Overfull \vbox (1913.30832pt too high) has occurred while \output is active [] [29] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 2417--2417 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 2417--2417 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1469,8 +1469,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 2419--2419 [] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 2442--2442 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 2442--2442 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1484,8 +1484,8 @@ Overfull \hbox (24.53052pt too wide) in paragraph at lines 2447--2457 [] [30] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 2521--2521 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 2521--2521 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1494,8 +1494,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 2523--2523 [] [31] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 2552--2552 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 2552--2552 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1504,8 +1504,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 2554--2554 [] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 2578--2578 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 2578--2578 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1517,8 +1517,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 2580--2580 Overfull \vbox (1913.30832pt too high) has occurred while \output is active [] [33] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 2826--2826 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 2826--2826 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1532,8 +1532,8 @@ Overfull \hbox (24.53052pt too wide) in paragraph at lines 2831--2841 [] [34] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 2854--2854 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 2854--2854 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1552,8 +1552,8 @@ chapter 3. [40 ] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 3334--3334 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 3334--3334 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1562,8 +1562,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 3336--3336 [] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 3381--3381 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 3381--3381 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1572,8 +1572,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 3383--3383 [] [41] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 3407--3407 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 3407--3407 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1582,8 +1582,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 3409--3409 [] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 3443--3443 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 3443--3443 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1592,8 +1592,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 3445--3445 [] [42] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 3513--3513 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 3513--3513 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1607,8 +1607,8 @@ Package longtable Warning: Column widths have changed (longtable) in table 3.1 on input line 3545. -Overfull \hbox (122.36606pt too wide) in paragraph at lines 3562--3562 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 3562--3562 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1617,8 +1617,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 3564--3564 [] [44] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 3600--3600 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 3600--3600 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1627,8 +1627,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 3602--3602 [] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 3634--3634 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 3634--3634 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1637,8 +1637,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 3636--3636 [] [45] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 3686--3686 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 3686--3686 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1647,8 +1647,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 3688--3688 [] [46] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 3734--3734 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 3734--3734 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1657,8 +1657,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 3736--3736 [] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 3777--3777 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 3777--3777 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1667,8 +1667,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 3779--3779 [] [47] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 3808--3808 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 3808--3808 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1677,8 +1677,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 3810--3810 [] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 3837--3837 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 3837--3837 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1687,8 +1687,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 3839--3839 [] [48] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 3870--3870 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 3870--3870 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1697,8 +1697,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 3872--3872 [] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 3908--3908 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 3908--3908 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1707,8 +1707,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 3910--3910 [] [49] [50] [51] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 4065--4065 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 4065--4065 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1717,8 +1717,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 4067--4067 [] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 4095--4095 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 4095--4095 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1732,8 +1732,8 @@ Underfull \hbox (badness 1845) in paragraph at lines 4121--4124 [] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 4133--4133 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 4133--4133 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1742,8 +1742,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 4135--4135 [] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 4157--4157 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 4157--4157 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1752,8 +1752,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 4159--4159 [] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 4182--4182 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 4182--4182 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1762,8 +1762,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 4184--4184 [] [53] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 4221--4221 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 4221--4221 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1772,8 +1772,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 4223--4223 [] [54] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 4278--4278 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 4278--4278 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1786,8 +1786,8 @@ chapter 4. [57 ] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 4398--4398 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 4398--4398 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1796,8 +1796,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 4400--4400 [] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 4439--4439 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 4439--4439 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] [58] @@ -1806,8 +1806,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 4441--4441 [] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 4468--4468 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 4468--4468 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1816,8 +1816,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 4470--4470 [] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 4497--4497 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 4497--4497 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1826,8 +1826,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 4499--4499 [] [59] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 4529--4529 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 4529--4529 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1836,8 +1836,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 4531--4531 [] [60] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 4571--4571 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 4571--4571 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1846,8 +1846,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 4573--4573 [] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 4600--4600 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 4600--4600 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1856,8 +1856,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 4602--4602 [] [61] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 4675--4675 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 4675--4675 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1868,8 +1868,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 4677--4677 File: pandas_3/images/agg.png Graphic file (type bmp) [62] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 4722--4722 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 4722--4722 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1878,8 +1878,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 4724--4724 [] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 4747--4747 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 4747--4747 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1888,8 +1888,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 4749--4749 [] [63] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 4773--4773 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 4773--4773 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1898,8 +1898,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 4775--4775 [] [64] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 4821--4821 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 4821--4821 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1908,8 +1908,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 4823--4823 [] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 4847--4847 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 4847--4847 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1918,8 +1918,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 4849--4849 [] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 4877--4877 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 4877--4877 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1928,8 +1928,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 4879--4879 [] [65] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 4938--4938 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 4938--4938 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1941,8 +1941,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 4940--4940 File: pandas_3/images/first.png Graphic file (type bmp) -Overfull \hbox (122.36606pt too wide) in paragraph at lines 4980--4980 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 4980--4980 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1951,8 +1951,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 4982--4982 [] [67] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 5018--5018 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 5018--5018 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1961,8 +1961,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 5020--5020 [] [68] [69] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 5146--5146 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 5146--5146 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1976,8 +1976,8 @@ Overfull \hbox (68.74403pt too wide) in paragraph at lines 5174--5179 [] [70] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 5201--5201 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 5201--5201 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -1986,8 +1986,8 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 5203--5203 [] -Overfull \hbox (122.36606pt too wide) in paragraph at lines 19010--19010 -[]\TU/lmtt/m/n/10.95 /Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning:[] +Overfull \hbox (151.10982pt too wide) in paragraph at lines 19010--19010 +[]\TU/lmtt/m/n/10.95 /Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning:[] [] @@ -2001,10 +2001,10 @@ Overfull \hbox (1214.62836pt too wide) in paragraph at lines 19012--19012 l.19055 \end{Shaded} Here is how much of TeX's memory you used: - 39295 strings out of 476758 - 805632 string characters out of 5801188 + 39295 strings out of 476779 + 807712 string characters out of 5801537 5000000 words of memory out of 5000000 - 60754 multiletter control sequences out of 15000+600000 + 60733 multiletter control sequences out of 15000+600000 564564 words of font info for 94 fonts, out of 8000000 for 9000 14 hyphenation exceptions out of 8191 108i,8n,121p,10900b,1030s stack positions out of 10000i,1000n,20000p,200000b,200000s diff --git a/index.pdf b/index.pdf index 9c65adef8..1ea410439 100644 Binary files a/index.pdf and b/index.pdf differ diff --git a/index.tex b/index.tex index d8ddc909d..b226ee97b 100644 --- a/index.tex +++ b/index.tex @@ -220,7 +220,7 @@ \begin{document} \maketitle -\ifdefined\Shaded\renewenvironment{Shaded}{\begin{tcolorbox}[enhanced, sharp corners, boxrule=0pt, breakable, interior hidden, frame hidden, borderline west={3pt}{0pt}{shadecolor}]}{\end{tcolorbox}}\fi +\ifdefined\Shaded\renewenvironment{Shaded}{\begin{tcolorbox}[interior hidden, boxrule=0pt, breakable, enhanced, frame hidden, borderline west={3pt}{0pt}{shadecolor}, sharp corners]}{\end{tcolorbox}}\fi \renewcommand*\contentsname{Table of contents} { @@ -262,7 +262,7 @@ \section*{About the Course Notes}\label{about-the-course-notes}} \hypertarget{introduction}{% \chapter{Introduction}\label{introduction}} -\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm] +\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm] \begin{itemize} \tightlist @@ -316,7 +316,7 @@ \chapter{Introduction}\label{introduction}} allowing you to take data and produce useful insights on the world's most challenging and ambiguous problems. -\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Course Goals}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm] +\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Course Goals}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm] \begin{itemize} \tightlist @@ -334,7 +334,7 @@ \chapter{Introduction}\label{introduction}} \end{tcolorbox} -\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Some Topics We'll Cover}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm] +\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Some Topics We'll Cover}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm] \begin{itemize} \tightlist @@ -364,7 +364,7 @@ \chapter{Introduction}\label{introduction}} \end{tcolorbox} -\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Prerequisites}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm] +\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Prerequisites}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm] To ensure that you can get the most out of the course content, please make sure that you are familiar with: @@ -602,7 +602,7 @@ \section{Conclusion}\label{conclusion}} \hypertarget{pandas-i}{% \chapter{Pandas I}\label{pandas-i}} -\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm] +\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm] \begin{itemize} \tightlist @@ -740,7 +740,7 @@ \subsection{Series}\label{series}} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -789,7 +789,7 @@ \subsection{Series}\label{series}} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -824,7 +824,7 @@ \subsection{Series}\label{series}} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -878,7 +878,7 @@ \subsubsection{\texorpdfstring{Selection in \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -919,7 +919,7 @@ \subsubsection{\texorpdfstring{Selection in \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -1038,7 +1038,7 @@ \subsubsection{\texorpdfstring{Creating a \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -1257,7 +1257,7 @@ \subsubsection{\texorpdfstring{Creating a \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -1285,7 +1285,7 @@ \subsubsection{\texorpdfstring{Creating a \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -1321,7 +1321,7 @@ \subsubsection{\texorpdfstring{Creating a \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -1348,7 +1348,7 @@ \subsubsection{\texorpdfstring{Creating a \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -1394,7 +1394,7 @@ \subsubsection{\texorpdfstring{Creating a \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -1416,7 +1416,7 @@ \subsubsection{\texorpdfstring{Creating a \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -1444,7 +1444,7 @@ \subsubsection{\texorpdfstring{Creating a \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -1476,7 +1476,7 @@ \subsection{Indices}\label{indices}} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -1685,7 +1685,7 @@ \subsection{Indices}\label{indices}} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -2025,7 +2025,7 @@ \subsection{\texorpdfstring{Extracting data with \texttt{.head} and \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -2053,7 +2053,7 @@ \subsection{\texorpdfstring{Extracting data with \texttt{.head} and \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -2123,7 +2123,7 @@ \subsection{\texorpdfstring{Label-based Extraction: Indexing with \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -2151,7 +2151,7 @@ \subsection{\texorpdfstring{Label-based Extraction: Indexing with \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -2178,7 +2178,7 @@ \subsection{\texorpdfstring{Label-based Extraction: Indexing with \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -2203,7 +2203,7 @@ \subsection{\texorpdfstring{Label-based Extraction: Indexing with \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -2414,7 +2414,7 @@ \subsection{\texorpdfstring{Label-based Extraction: Indexing with \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -2439,7 +2439,7 @@ \subsection{\texorpdfstring{Label-based Extraction: Indexing with \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -2518,7 +2518,7 @@ \subsection{\texorpdfstring{Integer-based Extraction: Indexing with \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -2549,7 +2549,7 @@ \subsection{\texorpdfstring{Integer-based Extraction: Indexing with \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -2575,7 +2575,7 @@ \subsection{\texorpdfstring{Integer-based Extraction: Indexing with \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -2823,7 +2823,7 @@ \subsubsection{A slice of row numbers}\label{a-slice-of-row-numbers}} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -2851,7 +2851,7 @@ \subsubsection{A list of column labels}\label{a-list-of-column-labels}} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -3276,7 +3276,7 @@ \section{Parting Note}\label{parting-note}} \hypertarget{pandas-ii}{% \chapter{Pandas II}\label{pandas-ii}} -\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm] +\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm] \begin{itemize} \tightlist @@ -3331,7 +3331,7 @@ \chapter{Pandas II}\label{pandas-ii}} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -3378,7 +3378,7 @@ \section{Conditional Selection}\label{conditional-selection}} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -3404,7 +3404,7 @@ \section{Conditional Selection}\label{conditional-selection}} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -3440,7 +3440,7 @@ \section{Conditional Selection}\label{conditional-selection}} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -3510,7 +3510,7 @@ \section{Conditional Selection}\label{conditional-selection}} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -3559,7 +3559,7 @@ \section{Conditional Selection}\label{conditional-selection}} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -3597,7 +3597,7 @@ \section{Conditional Selection}\label{conditional-selection}} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -3631,7 +3631,7 @@ \section{Conditional Selection}\label{conditional-selection}} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -3683,7 +3683,7 @@ \section{Conditional Selection}\label{conditional-selection}} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -3731,7 +3731,7 @@ \section{Conditional Selection}\label{conditional-selection}} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -3774,7 +3774,7 @@ \section{Adding, Removing, and Modifying \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -3805,7 +3805,7 @@ \section{Adding, Removing, and Modifying \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -3834,7 +3834,7 @@ \section{Adding, Removing, and Modifying \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -3867,7 +3867,7 @@ \section{Adding, Removing, and Modifying \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -3905,7 +3905,7 @@ \section{Adding, Removing, and Modifying \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -4062,7 +4062,7 @@ \subsection{\texorpdfstring{\texttt{.describe()}}{.describe()}}\label{describe}} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -4092,7 +4092,7 @@ \subsection{\texorpdfstring{\texttt{.describe()}}{.describe()}}\label{describe}} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -4130,16 +4130,16 @@ \subsection{\texorpdfstring{\texttt{.sample()}}{.sample()}}\label{sample}} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} \begin{tabular}{lllrlr} \toprule -{} & State & Sex & Year & Name & Count \\ +{} & State & Sex & Year & Name & Count \\ \midrule -374406 & CA & M & 2011 & Kristofer & 10 \\ +56345 & CA & F & 1967 & Caron & 7 \\ \bottomrule \end{tabular} @@ -4154,7 +4154,7 @@ \subsection{\texorpdfstring{\texttt{.sample()}}{.sample()}}\label{sample}} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -4163,11 +4163,11 @@ \subsection{\texorpdfstring{\texttt{.sample()}}{.sample()}}\label{sample}} \toprule {} & Year & Name & Count \\ \midrule -396148 & 2019 & Leon & 313 \\ -350349 & 2003 & Colton & 135 \\ -207158 & 2014 & Cheyanne & 17 \\ -125299 & 1993 & Leigh & 17 \\ -92807 & 1983 & Marbella & 10 \\ +6109 & 1923 & Beryl & 9 \\ +141133 & 1997 & August & 6 \\ +156119 & 2001 & Arianah & 5 \\ +163929 & 2003 & Briyana & 5 \\ +72958 & 1975 & Sherilyn & 8 \\ \bottomrule \end{tabular} @@ -4179,19 +4179,19 @@ \subsection{\texorpdfstring{\texttt{.sample()}}{.sample()}}\label{sample}} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} \begin{tabular}{lrlr} \toprule -{} & Year & Name & Count \\ +{} & Year & Name & Count \\ \midrule -344771 & 2000 & Jamaal & 5 \\ -343070 & 2000 & Ernie & 34 \\ -150439 & 2000 & Aya & 18 \\ -342518 & 2000 & Evan & 737 \\ +344709 & 2000 & Eithan & 5 \\ +343830 & 2000 & Jerrick & 10 \\ +342959 & 2000 & Maximiliano & 48 \\ +151581 & 2000 & Meliza & 8 \\ \bottomrule \end{tabular} @@ -4218,7 +4218,7 @@ \subsection{\texorpdfstring{\texttt{.value\_counts()}}{.value\_counts()}}\label{ \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -4275,7 +4275,7 @@ \subsection{\texorpdfstring{\texttt{.sort\_values()}}{.sort\_values()}}\label{so \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -4334,7 +4334,7 @@ \section{Parting Note}\label{parting-note-1}} \hypertarget{pandas-iii}{% \chapter{Pandas III}\label{pandas-iii}} -\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm] +\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm] \begin{itemize} \tightlist @@ -4395,7 +4395,7 @@ \section{Custom Sorts}\label{custom-sorts}} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -4436,7 +4436,7 @@ \subsection{Approach 1: Create a Temporary \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -4465,7 +4465,7 @@ \subsection{Approach 1: Create a Temporary \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -4494,7 +4494,7 @@ \subsection{Approach 1: Create a Temporary \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -4526,7 +4526,7 @@ \subsection{\texorpdfstring{Approach 2: Sorting using the \texttt{key} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -4568,7 +4568,7 @@ \subsection{\texorpdfstring{Approach 3: Sorting using the \texttt{map} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -4597,7 +4597,7 @@ \subsection{\texorpdfstring{Approach 3: Sorting using the \texttt{map} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -4637,7 +4637,7 @@ \section{\texorpdfstring{Aggregating Data with \end{Shaded} \begin{verbatim} - + \end{verbatim} What does this strange output mean? Calling \texttt{.groupby} @@ -4672,7 +4672,7 @@ \section{\texorpdfstring{Aggregating Data with \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -4719,7 +4719,7 @@ \section{\texorpdfstring{Aggregating Data with \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -4744,7 +4744,7 @@ \section{\texorpdfstring{Aggregating Data with \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -4770,7 +4770,7 @@ \section{\texorpdfstring{Aggregating Data with \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -4818,7 +4818,7 @@ \subsection{Aggregation Functions}\label{aggregation-functions}} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -4844,7 +4844,7 @@ \subsection{Aggregation Functions}\label{aggregation-functions}} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -4874,7 +4874,7 @@ \subsection{Aggregation Functions}\label{aggregation-functions}} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -4935,7 +4935,7 @@ \subsection{Aggregation Functions}\label{aggregation-functions}} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -4977,7 +4977,7 @@ \subsection{Aggregation Functions}\label{aggregation-functions}} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -5015,7 +5015,7 @@ \subsection{Plotting Birth Counts}\label{plotting-birth-counts}} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -5143,7 +5143,7 @@ \subsection{\texorpdfstring{Revisiting the \texttt{.agg()} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -5198,7 +5198,7 @@ \subsection{Renaming Columns After \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -19007,7 +19007,7 @@ \subsection{Some Data Science Payoff}\label{some-data-science-payoff}} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -19070,7 +19070,7 @@ \subsection{Some Data Science Payoff}\label{some-data-science-payoff}} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -19105,7 +19105,7 @@ \section{\texorpdfstring{\texttt{.groupby()}, \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -19160,7 +19160,7 @@ \subsection{\texorpdfstring{Raw \texttt{GroupBy} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -19231,7 +19231,7 @@ \subsection{\texorpdfstring{Other \texttt{GroupBy} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -19262,7 +19262,7 @@ \subsection{\texorpdfstring{Other \texttt{GroupBy} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -19285,7 +19285,7 @@ \subsection{\texorpdfstring{Other \texttt{GroupBy} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -19314,7 +19314,7 @@ \subsection{\texorpdfstring{Other \texttt{GroupBy} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -19416,7 +19416,7 @@ \subsection{Filtering by Group}\label{filtering-by-group}} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -19485,7 +19485,7 @@ \subsection{\texorpdfstring{Aggregation with \texttt{lambda} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -19555,7 +19555,7 @@ \subsection{\texorpdfstring{Aggregation with \texttt{lambda} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -19582,7 +19582,7 @@ \subsection{\texorpdfstring{Aggregation with \texttt{lambda} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -19634,7 +19634,7 @@ \subsection{\texorpdfstring{Aggregation with \texttt{lambda} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -19660,7 +19660,7 @@ \subsection{\texorpdfstring{Aggregation with \texttt{lambda} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -19699,7 +19699,7 @@ \section{Aggregating Data with Pivot \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -19725,7 +19725,7 @@ \section{Aggregating Data with Pivot \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -19784,7 +19784,7 @@ \section{Aggregating Data with Pivot \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -19845,7 +19845,7 @@ \section{Aggregating Data with Pivot \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -19891,7 +19891,7 @@ \section{Joining Tables}\label{joining-tables}} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -19926,7 +19926,7 @@ \section{Joining Tables}\label{joining-tables}} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -19952,7 +19952,7 @@ \section{Joining Tables}\label{joining-tables}} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -19988,7 +19988,7 @@ \section{Joining Tables}\label{joining-tables}} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -20061,7 +20061,7 @@ \chapter{Data Cleaning and EDA}\label{data-cleaning-and-eda}} \end{Highlighting} \end{Shaded} -\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm] +\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm] \begin{itemize} \tightlist @@ -20334,7 +20334,7 @@ \subsubsection{JSON}\label{json}} \end{Shaded} \begin{verbatim} -Using cached version that was downloaded (UTC): Mon Mar 18 21:13:08 2024 +Using cached version that was downloaded (UTC): Fri Aug 25 09:57:25 2023 \end{verbatim} \begin{verbatim} @@ -20398,7 +20398,7 @@ \subsubsection{JSON}\label{json}} \end{Shaded} \begin{verbatim} --rw-r--r-- 1 Ishani staff 114K Mar 18 21:13 data/confirmed-cases.json +-rw-r--r-- 1 lillianweng staff 114K Aug 25 2023 data/confirmed-cases.json \end{verbatim} \begin{verbatim} @@ -22184,15 +22184,9 @@ \subsection{\texorpdfstring{Understanding Missing Value 1: \end{Highlighting} \end{Shaded} -\begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/seaborn/axisgrid.py:118: UserWarning: - -The figure layout has changed to tight -\end{verbatim} - \begin{figure}[H] -{\centering \includegraphics{eda/eda_files/figure-pdf/cell-67-output-2.pdf} +{\centering \includegraphics{eda/eda_files/figure-pdf/cell-67-output-1.pdf} } @@ -22261,15 +22255,9 @@ \subsection{\texorpdfstring{Understanding Missing Value 2: \end{Highlighting} \end{Shaded} -\begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/seaborn/axisgrid.py:118: UserWarning: - -The figure layout has changed to tight -\end{verbatim} - \begin{figure}[H] -{\centering \includegraphics{eda/eda_files/figure-pdf/cell-69-output-2.pdf} +{\centering \includegraphics{eda/eda_files/figure-pdf/cell-69-output-1.pdf} } @@ -22623,7 +22611,7 @@ \subsection{EDA and Data Wrangling}\label{eda-and-data-wrangling}} \hypertarget{regular-expressions}{% \chapter{Regular Expressions}\label{regular-expressions}} -\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm] +\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm] \begin{itemize} \tightlist @@ -22811,9 +22799,8 @@ \subsection{Canonicalization}\label{canonicalization}} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: - -In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. +/Users/lillianweng/anaconda3/envs/DSDP/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. + return method() \end{verbatim} \begin{tabular}{lll} @@ -22828,9 +22815,8 @@ \subsection{Canonicalization}\label{canonicalization}} \end{tabular} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: - -In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. +/Users/lillianweng/anaconda3/envs/DSDP/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. + return method() \end{verbatim} \begin{tabular}{llr} @@ -22893,9 +22879,8 @@ \subsubsection{Canonicalization with Python String \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: - -In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. +/Users/lillianweng/anaconda3/envs/DSDP/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. + return method() \end{verbatim} \begin{tabular}{llll} @@ -22910,9 +22895,8 @@ \subsubsection{Canonicalization with Python String \end{tabular} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: - -In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. +/Users/lillianweng/anaconda3/envs/DSDP/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. + return method() \end{verbatim} \begin{tabular}{llrl} @@ -22960,17 +22944,12 @@ \subsubsection{Canonicalization with Pandas Series \end{Shaded} \begin{verbatim} -/var/folders/7t/zbwy02ts2m7cn64fvwjqb8xw0000gp/T/ipykernel_59511/2523629438.py:3: FutureWarning: - -The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True. - -/var/folders/7t/zbwy02ts2m7cn64fvwjqb8xw0000gp/T/ipykernel_59511/2523629438.py:3: FutureWarning: - -The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True. - -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: - -In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. +/var/folders/sy/b85yc0p951zdr__z5hvdmbjm0000gn/T/ipykernel_58221/2523629438.py:7: FutureWarning: The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True. + .str.replace('.', '') +/var/folders/sy/b85yc0p951zdr__z5hvdmbjm0000gn/T/ipykernel_58221/2523629438.py:7: FutureWarning: The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True. + .str.replace('.', '') +/Users/lillianweng/anaconda3/envs/DSDP/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. + return method() \end{verbatim} \begin{tabular}{llrll} @@ -22985,9 +22964,8 @@ \subsubsection{Canonicalization with Pandas Series \end{tabular} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: - -In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. +/Users/lillianweng/anaconda3/envs/DSDP/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. + return method() \end{verbatim} \begin{tabular}{lllll} @@ -23594,9 +23572,8 @@ \subsubsection{\texorpdfstring{Canonicalization with \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: - -In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. +/Users/lillianweng/anaconda3/envs/DSDP/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. + return method() \end{verbatim} \begin{tabular}{ll} @@ -23676,9 +23653,8 @@ \subsubsection{\texorpdfstring{Extraction with \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: - -In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. +/Users/lillianweng/anaconda3/envs/DSDP/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. + return method() \end{verbatim} \begin{tabular}{ll} @@ -23727,9 +23703,8 @@ \subsubsection{\texorpdfstring{Extraction with \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: - -In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. +/Users/lillianweng/anaconda3/envs/DSDP/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. + return method() \end{verbatim} \begin{tabular}{llll} @@ -23750,9 +23725,8 @@ \subsubsection{\texorpdfstring{Extraction with \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: - -In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. +/Users/lillianweng/anaconda3/envs/DSDP/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. + return method() \end{verbatim} \begin{tabular}{lllll} @@ -23911,7 +23885,7 @@ \section{Limitations of Regular \hypertarget{visualization-i}{% \chapter{Visualization I}\label{visualization-i}} -\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm] +\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm] \begin{itemize} \tightlist @@ -24087,7 +24061,7 @@ \section{Qualitative Variables: Bar \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -24231,7 +24205,7 @@ \section{Distributions of Quantitative \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -24749,7 +24723,7 @@ \subsubsection{Evaluating Histograms}\label{evaluating-histograms}} \hypertarget{visualization-ii}{% \chapter{Visualization II}\label{visualization-ii}} -\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm] +\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm] \begin{itemize} \tightlist @@ -24810,7 +24784,7 @@ \subsection{KDE Theory}\label{kde-theory}} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -24839,15 +24813,9 @@ \subsection{KDE Theory}\label{kde-theory}} \end{Highlighting} \end{Shaded} -\begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/seaborn/axisgrid.py:118: UserWarning: - -The figure layout has changed to tight -\end{verbatim} - \begin{figure}[H] -{\centering \includegraphics{visualization_2/visualization_2_files/figure-pdf/cell-3-output-2.pdf} +{\centering \includegraphics{visualization_2/visualization_2_files/figure-pdf/cell-3-output-1.pdf} } @@ -25328,15 +25296,9 @@ \section{\texorpdfstring{Diving Deeper into \end{Highlighting} \end{Shaded} -\begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/seaborn/axisgrid.py:118: UserWarning: - -The figure layout has changed to tight -\end{verbatim} - \begin{figure}[H] -{\centering \includegraphics{visualization_2/visualization_2_files/figure-pdf/cell-11-output-2.pdf} +{\centering \includegraphics{visualization_2/visualization_2_files/figure-pdf/cell-11-output-1.pdf} } @@ -25354,15 +25316,9 @@ \section{\texorpdfstring{Diving Deeper into \end{Highlighting} \end{Shaded} -\begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/seaborn/axisgrid.py:118: UserWarning: - -The figure layout has changed to tight -\end{verbatim} - \begin{figure}[H] -{\centering \includegraphics{visualization_2/visualization_2_files/figure-pdf/cell-12-output-2.pdf} +{\centering \includegraphics{visualization_2/visualization_2_files/figure-pdf/cell-12-output-1.pdf} } @@ -25380,15 +25336,9 @@ \section{\texorpdfstring{Diving Deeper into \end{Highlighting} \end{Shaded} -\begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/seaborn/axisgrid.py:118: UserWarning: - -The figure layout has changed to tight -\end{verbatim} - \begin{figure}[H] -{\centering \includegraphics{visualization_2/visualization_2_files/figure-pdf/cell-13-output-2.pdf} +{\centering \includegraphics{visualization_2/visualization_2_files/figure-pdf/cell-13-output-1.pdf} } @@ -25544,15 +25494,9 @@ \subsubsection{\texorpdfstring{\texttt{lmplot} and \end{Highlighting} \end{Shaded} -\begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/seaborn/axisgrid.py:118: UserWarning: - -The figure layout has changed to tight -\end{verbatim} - \begin{figure}[H] -{\centering \includegraphics{visualization_2/visualization_2_files/figure-pdf/cell-17-output-2.pdf} +{\centering \includegraphics{visualization_2/visualization_2_files/figure-pdf/cell-17-output-1.pdf} } @@ -25886,8 +25830,8 @@ \subsection{Linearization and Applying \end{Shaded} \begin{verbatim} -The slope, m, of the transformed data is: 336400693.43172693 -The intercept, b, of the transformed data is: -1802204836.0479977 +The slope, m, of the transformed data is: 336400693.43172705 +The intercept, b, of the transformed data is: -1802204836.0479987 \end{verbatim} \begin{figure}[H] @@ -26295,7 +26239,7 @@ \subsection{Harnessing Context}\label{harnessing-context}} \hypertarget{sampling}{% \chapter{Sampling}\label{sampling}} -\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm] +\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm] \begin{itemize} \tightlist @@ -26744,9 +26688,8 @@ \subsection{Demo: Barbie v. \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: - -In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. +/Users/lillianweng/anaconda3/envs/DSDP/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. + return method() \end{verbatim} \begin{tabular}{lrllr} @@ -26839,9 +26782,8 @@ \subsubsection{Check for Bias}\label{check-for-bias}} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: - -In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. +/Users/lillianweng/anaconda3/envs/DSDP/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. + return method() \end{verbatim} \begin{tabular}{lrlr} @@ -26904,7 +26846,7 @@ \subsubsection{Simple Random Sample}\label{simple-random-sample}} \end{Shaded} \begin{verbatim} -0.5302451891506862 +0.5287927522843883 \end{verbatim} This is very close to the actual vote of 0.5302792307692308! @@ -26928,8 +26870,8 @@ \subsubsection{Simple Random Sample}\label{simple-random-sample}} \end{Highlighting} \end{Shaded} -\textbf{Actual} = 0.5303, \textbf{Sample} = 0.4788, \textbf{Err} = -9.72\%. +\textbf{Actual} = 0.5303, \textbf{Sample} = 0.5112, \textbf{Err} = +3.59\%. We'll learn how to choose this number when we (re)learn the Central Limit Theorem later in the semester. @@ -26979,7 +26921,7 @@ \subsubsection{Quantifying Chance \end{Shaded} \begin{verbatim} -0.954 +0.95 \end{verbatim} You can see the curve looks roughly Gaussian/normal. Using KDE: @@ -27012,7 +26954,7 @@ \section{Summary}\label{summary-1}} \hypertarget{introduction-to-modeling}{% \chapter{Introduction to Modeling}\label{introduction-to-modeling}} -\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm] +\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm] \begin{itemize} \tightlist @@ -27375,7 +27317,7 @@ \subsection{Derivation}\label{derivation}} \(\hat{a} = \text{average of }y - \text{slope}\cdot\text{average of }x\) \end{itemize} -\begin{tcolorbox}[enhanced jigsaw, opacityback=0, left=2mm, breakable, colback=white, arc=.35mm, leftrule=.75mm, toprule=.15mm, bottomrule=.15mm, rightrule=.15mm] +\begin{tcolorbox}[enhanced jigsaw, leftrule=.75mm, colback=white, arc=.35mm, rightrule=.15mm, toprule=.15mm, opacityback=0, breakable, bottomrule=.15mm, left=2mm] Proof: @@ -27727,7 +27669,7 @@ \section{Fitting the Model}\label{fitting-the-model}} \chapter{Constant Model, Loss, and Transformations}\label{constant-model-loss-and-transformations}} -\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm] +\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm] \begin{itemize} \tightlist @@ -29065,7 +29007,7 @@ \section{Bonus: Calculating Constant Model MSE Using an Algebraic \hypertarget{ordinary-least-squares}{% \chapter{Ordinary Least Squares}\label{ordinary-least-squares}} -\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm] +\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm] \begin{itemize} \tightlist @@ -29136,7 +29078,7 @@ \subsection{Multiple Linear \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -29178,7 +29120,7 @@ \subsection{Multiple Linear \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: +/Users/lillianweng/anaconda3/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. \end{verbatim} @@ -29239,7 +29181,7 @@ \subsection{Multiple Linear \hypertarget{linear-algebra-approach}{% \subsection{Linear Algebra Approach}\label{linear-algebra-approach}} -\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-tip-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-tip-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra Review: Vector Dot Product}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm] +\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-tip-color!10!white, colframe=quarto-callout-tip-color-frame, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra Review: Vector Dot Product}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm] The \textbf{dot product (or inner product)} is a vector operation that: @@ -29337,7 +29279,7 @@ \subsection{Linear Algebra Approach}\label{linear-algebra-approach}} \(\mathbb{Y}\) is also a vector with \(n\) elements (\(\mathbb{Y} \in \mathbb{R}^{n}\)). -\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-tip-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-tip-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra Review: Linearity}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm] +\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-tip-color!10!white, colframe=quarto-callout-tip-color-frame, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra Review: Linearity}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm] An expression is \textbf{linear in \(\theta\)} (a set of parameters) if it is a linear combination of the elements of the set. Checking if an @@ -29382,7 +29324,7 @@ \subsection{Mean Squared Error}\label{mean-squared-error}} indication of how ``far away'' the predictions are from the true values, on average. -\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-tip-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-tip-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra: L2 Norm}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm] +\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-tip-color!10!white, colframe=quarto-callout-tip-color-frame, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra: L2 Norm}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm] When working with vectors, this idea of ``distance'' or the vector's \textbf{size/length} is represented by the \textbf{norm}. More @@ -29530,7 +29472,7 @@ \subsection{A Note on Terminology for Multiple Linear \hypertarget{geometric-derivation}{% \section{Geometric Derivation}\label{geometric-derivation}} -\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-tip-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-tip-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra: Span}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm] +\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-tip-color!10!white, colframe=quarto-callout-tip-color-frame, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra: Span}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm] Recall that the \textbf{span} or \textbf{column space} of a matrix \(\mathbb{X}\) (denoted \(span(\mathbb{X})\)) is the set of all possible @@ -29542,7 +29484,7 @@ \section{Geometric Derivation}\label{geometric-derivation}} \end{tcolorbox} -\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-tip-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-tip-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra: Matrix-Vector Multiplication}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm] +\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-tip-color!10!white, colframe=quarto-callout-tip-color-frame, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra: Matrix-Vector Multiplication}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm] There are 2 ways we can think about matrix-vector multiplication @@ -29631,7 +29573,7 @@ \section{Geometric Derivation}\label{geometric-derivation}} visualize this as the vector created by dropping a perpendicular line from \(\mathbb{Y}\) onto the span of \(\mathbb{X}\). -\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-tip-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-tip-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra: Orthogonality}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm] +\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-tip-color!10!white, colframe=quarto-callout-tip-color-frame, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra: Orthogonality}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm] Recall that two vectors \(\vec{a}\) and \(\vec{b}\) are orthogonal if their dot product is zero: \(\vec{a}^{T}\vec{b} = 0\). @@ -29754,7 +29696,7 @@ \section{OLS Properties}\label{ols-properties}} \[\mathbb{X}^Te = 0 \] -\begin{tcolorbox}[enhanced jigsaw, opacityback=0, left=2mm, breakable, colback=white, arc=.35mm, leftrule=.75mm, toprule=.15mm, bottomrule=.15mm, rightrule=.15mm] +\begin{tcolorbox}[enhanced jigsaw, leftrule=.75mm, colback=white, arc=.35mm, rightrule=.15mm, toprule=.15mm, opacityback=0, breakable, bottomrule=.15mm, left=2mm] Proof: @@ -29792,7 +29734,7 @@ \section{OLS Properties}\label{ols-properties}} \[\sum_i^n e_i = 0\] -\begin{tcolorbox}[enhanced jigsaw, opacityback=0, left=2mm, breakable, colback=white, arc=.35mm, leftrule=.75mm, toprule=.15mm, bottomrule=.15mm, rightrule=.15mm] +\begin{tcolorbox}[enhanced jigsaw, leftrule=.75mm, colback=white, arc=.35mm, rightrule=.15mm, toprule=.15mm, opacityback=0, breakable, bottomrule=.15mm, left=2mm] Proof: @@ -29858,7 +29800,7 @@ \section{Bonus: Uniqueness of the The Least Squares estimate \(\hat{\theta}\) is \textbf{unique} if and only if \(\mathbb{X}\) is \textbf{full column rank}. -\begin{tcolorbox}[enhanced jigsaw, opacityback=0, left=2mm, breakable, colback=white, arc=.35mm, leftrule=.75mm, toprule=.15mm, bottomrule=.15mm, rightrule=.15mm] +\begin{tcolorbox}[enhanced jigsaw, leftrule=.75mm, colback=white, arc=.35mm, rightrule=.15mm, toprule=.15mm, opacityback=0, breakable, bottomrule=.15mm, left=2mm] Proof: @@ -29948,7 +29890,7 @@ \section{Bonus: Uniqueness of the \chapter{sklearn and Gradient Descent}\label{sklearn-and-gradient-descent}} -\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm] +\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm] \begin{itemize} \tightlist @@ -30314,7 +30256,7 @@ \subsection{\texorpdfstring{The \texttt{sklearn} \end{Shaded} \begin{verbatim} -The RMSE of the model is 0.9881331104079044 +The RMSE of the model is 0.9881331104079045 \end{verbatim} We can also see that we obtain the same predictions using @@ -31134,7 +31076,7 @@ \subsection{Batch Gradient Descent and Stochastic Gradient \hypertarget{feature-engineering}{% \chapter{Feature Engineering}\label{feature-engineering}} -\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm] +\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm] \begin{itemize} \tightlist @@ -31675,7 +31617,7 @@ \section{Polynomial Features}\label{polynomial-features}} \end{Shaded} \begin{verbatim} -MSE of model with (hp^2) feature: 18.984768907617223 +MSE of model with (hp^2) feature: 18.984768907617216 \end{verbatim} \begin{figure}[H] @@ -31731,7 +31673,7 @@ \section{Complexity and Overfitting}\label{complexity-and-overfitting}} we can improve model performance by designing increasingly complex models. -\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-tip-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-tip-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Math Fact: Polynomial Degrees}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm] +\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-tip-color!10!white, colframe=quarto-callout-tip-color-frame, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Math Fact: Polynomial Degrees}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm] Given \(N\) overlapping data points, we can always find a polynomial of degree \(N-1\) that goes through all those points. @@ -31827,7 +31769,7 @@ \chapter{Case Study in Human Contexts and understand the material. The course notes will have the same broader structure but are by no means comprehensive. -\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm] +\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm] \begin{itemize} \tightlist @@ -32052,7 +31994,7 @@ \section{The Response: Cook County Open Data \subsection{1. Question/Problem Formulation}\label{questionproblem-formulation}} -\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Driving Questions}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm] +\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Driving Questions}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm] \begin{itemize} \tightlist @@ -32092,7 +32034,7 @@ \subsection{1. Question/Problem comfortable. We can determine some metrics of success and frame a social problem as a data science problem. -\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-tip-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-tip-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Definitions: Fairness and Transparency}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm] +\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-tip-color!10!white, colframe=quarto-callout-tip-color-frame, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Definitions: Fairness and Transparency}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm] The definitions, as given by the Cook County Assessor's Office, are given below: @@ -32185,7 +32127,7 @@ \subsection{1. Question/Problem \subsection{2. Data Acquisition and Cleaning}\label{data-acquisition-and-cleaning}} -\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Driving Questions}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm] +\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Driving Questions}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm] \begin{itemize} \tightlist @@ -32262,7 +32204,7 @@ \subsection{2. Data Acquisition and \subsection{3. Exploratory Data Analysis}\label{exploratory-data-analysis}} -\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Driving Questions}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm] +\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Driving Questions}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm] \begin{itemize} \tightlist @@ -32308,7 +32250,7 @@ \subsection{3. Exploratory Data \subsection{4. Prediction and Inference}\label{prediction-and-inference}} -\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Driving Questions}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm] +\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Driving Questions}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm] \begin{itemize} \tightlist @@ -32370,7 +32312,7 @@ \subsection{4. Prediction and \hypertarget{results-and-conclusions}{% \subsection{5. Results and Conclusions}\label{results-and-conclusions}} -\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Driving Questions}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm] +\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Driving Questions}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm] \begin{itemize} \tightlist @@ -32516,7 +32458,7 @@ \section{Key Takeaways}\label{key-takeaways}} \chapter{Cross Validation and Regularization}\label{cross-validation-and-regularization}} -\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm] +\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm] \begin{itemize} \tightlist @@ -32682,7 +32624,7 @@ \subsubsection{Test Sets}\label{test-sets}} \begin{verbatim} Training error: 17.85851684101209 -Test error: 23.192405629701074 +Test error: 23.192405629820122 \end{verbatim} \hypertarget{validation-sets}{% @@ -33314,7 +33256,7 @@ \section{Regression Summary}\label{regression-summary}} \hypertarget{random-variables}{% \chapter{Random Variables}\label{random-variables}} -\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm] +\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm] \begin{itemize} \tightlist @@ -33360,7 +33302,7 @@ \chapter{Random Variables}\label{random-variables}} implications for modeling, but if you want to go a step further, take Data 140, CS 70, and/or EECS 126. -\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-tip-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-tip-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Data 8 Recap}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm] +\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-tip-color!10!white, colframe=quarto-callout-tip-color-frame, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Data 8 Recap}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm] Recall the following concepts from Data 8: @@ -33719,7 +33661,7 @@ \subsection{Variance}\label{variance}} calculations, as \(\mathbb{E}[X^2] = \text{Var}(X)\) if \(X\) is centered and \(E(X)=0\). -\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-tip-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-tip-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Proof}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm] +\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-tip-color!10!white, colframe=quarto-callout-tip-color-frame, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Proof}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm] \[\begin{align} \text{Var}(X) &= \mathbb{E}[(X-\mathbb{E}[X])^2] \\ @@ -33747,7 +33689,7 @@ \subsection{Example: Die}\label{example-die}} 0, \text{otherwise} \end{cases}\] -\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-caution-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-caution-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-caution-color}{\faFire}\hspace{0.5em}{What's the expectation, \(\mathbb{E}[X]?\)}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm] +\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-caution-color!10!white, colframe=quarto-callout-caution-color-frame, title=\textcolor{quarto-callout-caution-color}{\faFire}\hspace{0.5em}{What's the expectation, \(\mathbb{E}[X]?\)}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm] \[ \begin{align} \mathbb{E}[X] &= 1\big(\frac{1}{6}\big) + 2\big(\frac{1}{6}\big) + 3\big(\frac{1}{6}\big) + 4\big(\frac{1}{6}\big) + 5\big(\frac{1}{6}\big) + 6\big(\frac{1}{6}\big) \\ @@ -33757,7 +33699,7 @@ \subsection{Example: Die}\label{example-die}} \end{tcolorbox} -\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-caution-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-caution-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-caution-color}{\faFire}\hspace{0.5em}{What's the variance, \(\text{Var}(X)?\)}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm] +\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-caution-color!10!white, colframe=quarto-callout-caution-color-frame, title=\textcolor{quarto-callout-caution-color}{\faFire}\hspace{0.5em}{What's the variance, \(\text{Var}(X)?\)}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm] Using Approach 1 (definition): \[\begin{align} \text{Var}(X) &= \big(\frac{1}{6}\big)((1 - \frac{7}{2})^2 + (2 - \frac{7}{2})^2 + (3 - \frac{7}{2})^2 + (4 - \frac{7}{2})^2 + (5 - \frac{7}{2})^2 + (6 - \frac{7}{2})^2) \\ @@ -33820,7 +33762,7 @@ \subsection{Properties of Expectation}\label{properties-of-expectation}} \[\mathbb{E}[aX+b] = aE[\mathbb{X}] + b\] -\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-tip-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-tip-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Proof}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm] +\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-tip-color!10!white, colframe=quarto-callout-tip-color-frame, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Proof}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm] \[\begin{align} \mathbb{E}[aX+b] &= \sum_{x} (ax + b) P(X=x) \\ @@ -33841,7 +33783,7 @@ \subsection{Properties of Expectation}\label{properties-of-expectation}} \[\mathbb{E}[X+Y] = \mathbb{E}[X] + \mathbb{E}[Y]\] -\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-tip-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-tip-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Proof}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm] +\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-tip-color!10!white, colframe=quarto-callout-tip-color-frame, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Proof}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm] \[\begin{align} \mathbb{E}[X+Y] &= \sum_{s} (X+Y)(s) P(s) \\ @@ -33892,7 +33834,7 @@ \subsection{Properties of Variance}\label{properties-of-variance}} \(X\) by \(b\) units. \end{itemize} -\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-tip-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-tip-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Proof}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm] +\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-tip-color!10!white, colframe=quarto-callout-tip-color-frame, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Proof}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm] We know that \[\mathbb{E}[aX+b] = aE[\mathbb{X}] + b\] @@ -33931,7 +33873,7 @@ \subsection{Properties of Variance}\label{properties-of-variance}} \[\text{Var}(X + Y) = \text{Var}(X) + \text{Var}(Y) \qquad \text{if } X, Y \text{ independent}\] \end{enumerate} -\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-tip-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-tip-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Proof}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm] +\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-tip-color!10!white, colframe=quarto-callout-tip-color-frame, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Proof}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm] The variance of a sum is affected by the dependence between the two random variables that are being added. Let's expand the definition of @@ -34103,7 +34045,7 @@ \subsection{Summary}\label{summary-2}} \chapter{Estimators, Bias, and Variance}\label{estimators-bias-and-variance}} -\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm] +\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm] \begin{itemize} \tightlist @@ -34255,7 +34197,7 @@ \subsection{Example}\label{example}} C. \(Y_C = 20 * X_1\) -\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-caution-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-caution-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-caution-color}{\faFire}\hspace{0.5em}{Solution}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm] +\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-caution-color!10!white, colframe=quarto-callout-caution-color-frame, title=\textcolor{quarto-callout-caution-color}{\faFire}\hspace{0.5em}{Solution}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm] Let \(X_1, X_2, ... X_{20}\) be 20 i.i.d Bernoulli(0.5) random variables. Since the \(X_i\)'s are independent, @@ -34416,7 +34358,7 @@ \subsection{Using the Sample Mean to Estimate the Population \textbf{unbiased estimator} of the population mean and will explore this idea more in the next lecture. -\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-tip-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-tip-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Data 8 Recap: Square Root Law}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm] +\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-tip-color!10!white, colframe=quarto-callout-tip-color-frame, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Data 8 Recap: Square Root Law}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm] The square root law (\href{https://inferentialthinking.com/chapters/14/5/Variability_of_the_Sample_Mean.html\#the-square-root-law}{Data @@ -34539,7 +34481,7 @@ \subsubsection{Estimating a Linear modeled by \[Y = g(x) + \epsilon\] \[ f_{\theta}(x) = Y = \theta_0 + \sum_{j=1}^p \theta_j x_j + \epsilon\] -\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-warning-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-warning-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-warning-color}{\faExclamationTriangle}\hspace{0.5em}{Which expressions are random?}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm] +\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-warning-color!10!white, colframe=quarto-callout-warning-color-frame, title=\textcolor{quarto-callout-warning-color}{\faExclamationTriangle}\hspace{0.5em}{Which expressions are random?}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm] In our two equations above, the true relationship \(g(x) = \theta_0 + \sum_{j=1}^p \theta_j x_j\) is not random, but @@ -34554,7 +34496,7 @@ \subsubsection{Estimating a Linear use it to train a model and obtain an estimate of \(\hat{\theta}\) \[\hat{Y}(x) = f_{\hat{\theta}}(x) = \hat{\theta_0} + \sum_{j=1}^p \hat{\theta_j} x_j\] -\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-warning-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-warning-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-warning-color}{\faExclamationTriangle}\hspace{0.5em}{Which expressions are random?}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm] +\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-warning-color!10!white, colframe=quarto-callout-warning-color-frame, title=\textcolor{quarto-callout-warning-color}{\faExclamationTriangle}\hspace{0.5em}{Which expressions are random?}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm] In our estimating equation above, our sample \(\Bbb{X}\), \(\Bbb{Y}\) are random (often due to human error). Hence, the estimates we calculate @@ -34745,7 +34687,7 @@ \subsubsection{Model Bias}\label{model-bias}} \(g(x)\); if it's negative, our model tends to underestimate \(g(x)\). And if it's 0, we can say that our model is \textbf{unbiased}. -\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-tip-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-tip-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Unbiased Estimators}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm] +\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-tip-color!10!white, colframe=quarto-callout-tip-color-frame, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Unbiased Estimators}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm] An \textbf{unbiased model} has a \(\text{model bias } = 0\). In other words, our model predicts \(g(x)\) on average. @@ -34842,7 +34784,7 @@ \section{{[}Bonus{]} Proof of Bias-Variance Decomposition in the Bias-Variance Tradeoff section above, and this content is out of scope. -\begin{tcolorbox}[enhanced jigsaw, opacityback=0, left=2mm, breakable, colback=white, arc=.35mm, leftrule=.75mm, toprule=.15mm, bottomrule=.15mm, rightrule=.15mm] +\begin{tcolorbox}[enhanced jigsaw, leftrule=.75mm, colback=white, arc=.35mm, rightrule=.15mm, toprule=.15mm, opacityback=0, breakable, bottomrule=.15mm, left=2mm] \textbf{Click to show}\vspace{2mm} @@ -34991,7 +34933,7 @@ \subsection{Step 4: Bias-Variance \chapter{Causal Inference and Confounding}\label{causal-inference-and-confounding}} -\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm] +\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm] \begin{itemize} \tightlist @@ -35120,7 +35062,7 @@ \section{Review: Bootstrap accurately model the variability of our estimates. \end{itemize} -\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-warning-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-warning-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-warning-color}{\faExclamationTriangle}\hspace{0.5em}{Why must we resample \emph{with replacement}?}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm] +\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-warning-color!10!white, colframe=quarto-callout-warning-color-frame, title=\textcolor{quarto-callout-warning-color}{\faExclamationTriangle}\hspace{0.5em}{Why must we resample \emph{with replacement}?}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm] Given an original sample of size \(n\), we want a resample that has the same size \(n\) as the original. Sampling \emph{without} replacement @@ -35215,9 +35157,8 @@ \subsection{Hypothesis Testing Through Bootstrap: Snowy Plover \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: - -In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. +/Users/lillianweng/anaconda3/envs/DSDP/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. + return method() \end{verbatim} \begin{tabular}{lrrrr} @@ -35288,9 +35229,8 @@ \subsection{Hypothesis Testing Through Bootstrap: Snowy Plover \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: - -In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. +/Users/lillianweng/anaconda3/envs/DSDP/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. + return method() \end{verbatim} \begin{tabular}{lr} @@ -35305,7 +35245,7 @@ \subsection{Hypothesis Testing Through Bootstrap: Snowy Plover \end{tabular} \begin{verbatim} -RMSE 0.04547085380275766 +RMSE 0.04547085380275768 \end{verbatim} Our single sample of data gives us the value of @@ -35369,7 +35309,7 @@ \subsection{Hypothesis Testing Through Bootstrap: Snowy Plover \end{Shaded} \begin{verbatim} -(-0.25864811956848754, 1.1034243854204049) +(-0.258648119568487, 1.1034243854204047) \end{verbatim} Our bootstrapped 95\% confidence interval for \(\theta_1\) is @@ -35418,9 +35358,8 @@ \subsection{Hypothesis Testing Through Bootstrap: Snowy Plover \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: - -In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. +/Users/lillianweng/anaconda3/envs/DSDP/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. + return method() \end{verbatim} \begin{tabular}{lrr} @@ -35468,15 +35407,9 @@ \subsection{Hypothesis Testing Through Bootstrap: Snowy Plover \end{Highlighting} \end{Shaded} -\begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/seaborn/axisgrid.py:118: UserWarning: - -The figure layout has changed to tight -\end{verbatim} - \begin{figure}[H] -{\centering \includegraphics{inference_causality/inference_causality_files/figure-pdf/cell-6-output-2.pdf} +{\centering \includegraphics{inference_causality/inference_causality_files/figure-pdf/cell-6-output-1.pdf} } @@ -35539,9 +35472,8 @@ \subsection{A Simpler Model}\label{a-simpler-model}} \end{Shaded} \begin{verbatim} -/Users/Ishani/micromamba/lib/python3.9/site-packages/IPython/core/formatters.py:342: FutureWarning: - -In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. +/Users/lillianweng/anaconda3/envs/DSDP/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality. + return method() \end{verbatim} \begin{tabular}{lr} @@ -35610,8 +35542,8 @@ \subsection{A Simpler Model}\label{a-simpler-model}} \end{Shaded} \begin{verbatim} -RMSE of Original Model: 0.04547085380275766 -RMSE of Interpretable Model: 0.046493941375556846 +RMSE of Original Model: 0.04547085380275768 +RMSE of Interpretable Model: 0.04649394137555684 \end{verbatim} Yet, the confidence interval for the true parameter \(\theta_{1}\) does @@ -35628,7 +35560,7 @@ \subsection{A Simpler Model}\label{a-simpler-model}} \end{Shaded} \begin{verbatim} -(0.6029335250209633, 0.8208401738546206) +(0.6029335250209632, 0.8208401738546206) \end{verbatim} In retrospect, it's no surprise that the weight of an egg best predicts @@ -35784,7 +35716,7 @@ \subsection{How to perform causal \hypertarget{sql-i}{% \chapter{SQL I}\label{sql-i}} -\begin{tcolorbox}[enhanced jigsaw, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, colbacktitle=quarto-callout-note-color!10!white, coltitle=black, titlerule=0mm, rightrule=.15mm, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, left=2mm, breakable, opacitybacktitle=0.6, toptitle=1mm, colback=white, bottomrule=.15mm, toprule=.15mm, leftrule=.75mm, arc=.35mm] +\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm] \begin{itemize} \tightlist @@ -35928,7 +35860,8 @@ \section{Intro to SQL}\label{intro-to-sql}} \end{Shaded} \begin{verbatim} -Running query in 'duck' + * duckdb:///data/example_duck.db +Done. \end{verbatim} \begin{longtable}[]{@{}lll@{}} @@ -35938,11 +35871,6 @@ \section{Intro to SQL}\label{intro-to-sql}} \endhead \bottomrule\noalign{} \endlastfoot -hiccup & 2010 & 10 \\ -drogon & 2011 & -100 \\ -dragon 2 & 2019 & 0 \\ -puff & 2010 & 100 \\ -smaug & 2011 & None \\ \end{longtable} Thanks to the \texttt{pandas} magic, the resulting return data is @@ -35973,7 +35901,8 @@ \section{Tables and Schema}\label{tables-and-schema}} \end{Shaded} \begin{verbatim} -Running query in 'duck' + * duckdb:///data/example_duck.db +Done. \end{verbatim} \begin{longtable}[]{@{}lllll@{}} @@ -35983,15 +35912,6 @@ \section{Tables and Schema}\label{tables-and-schema}} \endhead \bottomrule\noalign{} \endlastfoot -table & dish & dish & 0 & CREATE TABLE dish("name" VARCHAR PRIMARY KEY, -"type" VARCHAR, "cost" INTEGER, CHECK(("cost" \textgreater= 0))); \\ -table & dragon & dragon & 0 & CREATE TABLE dragon("name" VARCHAR PRIMARY -KEY, "year" INTEGER, cute INTEGER, CHECK(("year" \textgreater= -2000))); \\ -table & scene & scene & 0 & CREATE TABLE scene(id INTEGER PRIMARY KEY, -biome VARCHAR NOT NULL, city VARCHAR NOT NULL, visitors INTEGER, -created\_at TIMESTAMP DEFAULT(current\_date()), CHECK((visitors -\textgreater= 0))); \\ \end{longtable} The summary above displays information about the database; it contains @@ -36144,7 +36064,8 @@ \subsection{\texorpdfstring{\texttt{SELECT}ing From \end{Shaded} \begin{verbatim} -Running query in 'duck' + * duckdb:///data/example_duck.db +Done. \end{verbatim} \begin{longtable}[]{@{}lll@{}} @@ -36154,11 +36075,6 @@ \subsection{\texorpdfstring{\texttt{SELECT}ing From \endhead \bottomrule\noalign{} \endlastfoot -hiccup & 2010 & 10 \\ -drogon & 2011 & -100 \\ -dragon 2 & 2019 & 0 \\ -puff & 2010 & 100 \\ -smaug & 2011 & None \\ \end{longtable} In SQL, \texttt{*} means ``everything.'' The query above grabs @@ -36176,7 +36092,8 @@ \subsection{\texorpdfstring{\texttt{SELECT}ing From \end{Shaded} \begin{verbatim} -Running query in 'duck' + * duckdb:///data/example_duck.db +Done. \end{verbatim} \begin{longtable}[]{@{}ll@{}} @@ -36186,11 +36103,6 @@ \subsection{\texorpdfstring{\texttt{SELECT}ing From \endhead \bottomrule\noalign{} \endlastfoot -10 & 2010 \\ --100 & 2011 \\ -0 & 2019 \\ -100 & 2010 \\ -None & 2011 \\ \end{longtable} \textbf{Every} SQL query must include both a \texttt{SELECT} and @@ -36254,7 +36166,8 @@ \subsubsection{\texorpdfstring{Aliasing with \end{Shaded} \begin{verbatim} -Running query in 'duck' + * duckdb:///data/example_duck.db +Done. \end{verbatim} \begin{longtable}[]{@{}ll@{}} @@ -36264,11 +36177,6 @@ \subsubsection{\texorpdfstring{Aliasing with \endhead \bottomrule\noalign{} \endlastfoot -10 & 2010 \\ --100 & 2011 \\ -0 & 2019 \\ -100 & 2010 \\ -None & 2011 \\ \end{longtable} \hypertarget{uniqueness-with-distinct}{% @@ -36289,7 +36197,8 @@ \subsubsection{\texorpdfstring{Uniqueness with \end{Shaded} \begin{verbatim} -Running query in 'duck' + * duckdb:///data/example_duck.db +Done. \end{verbatim} \begin{longtable}[]{@{}l@{}} @@ -36299,9 +36208,6 @@ \subsubsection{\texorpdfstring{Uniqueness with \endhead \bottomrule\noalign{} \endlastfoot -2019 \\ -2010 \\ -2011 \\ \end{longtable} \hypertarget{applying-where-conditions}{% @@ -36321,7 +36227,8 @@ \subsection{\texorpdfstring{Applying \texttt{WHERE} \end{Shaded} \begin{verbatim} -Running query in 'duck' + * duckdb:///data/example_duck.db +Done. \end{verbatim} \begin{longtable}[]{@{}ll@{}} @@ -36331,8 +36238,6 @@ \subsection{\texorpdfstring{Applying \texttt{WHERE} \endhead \bottomrule\noalign{} \endlastfoot -hiccup & 2010 \\ -puff & 2010 \\ \end{longtable} We can add complexity to the \texttt{WHERE} condition using the keywords @@ -36349,7 +36254,8 @@ \subsection{\texorpdfstring{Applying \texttt{WHERE} \end{Shaded} \begin{verbatim} -Running query in 'duck' + * duckdb:///data/example_duck.db +Done. \end{verbatim} \begin{longtable}[]{@{}ll@{}} @@ -36359,9 +36265,6 @@ \subsection{\texorpdfstring{Applying \texttt{WHERE} \endhead \bottomrule\noalign{} \endlastfoot -hiccup & 2010 \\ -puff & 2010 \\ -dragon 2 & 2019 \\ \end{longtable} To spare ourselves needing to write complicated logical expressions by @@ -36379,7 +36282,8 @@ \subsection{\texorpdfstring{Applying \texttt{WHERE} \end{Shaded} \begin{verbatim} -Running query in 'duck' + * duckdb:///data/example_duck.db +Done. \end{verbatim} \begin{longtable}[]{@{}ll@{}} @@ -36389,8 +36293,6 @@ \subsection{\texorpdfstring{Applying \texttt{WHERE} \endhead \bottomrule\noalign{} \endlastfoot -hiccup & 2010 \\ -puff & 2010 \\ \end{longtable} \hypertarget{strings-in-sql}{% @@ -36434,7 +36336,8 @@ \subsubsection{\texorpdfstring{\texttt{WHERE} WITH \texttt{NULL} \end{Shaded} \begin{verbatim} -Running query in 'duck' + * duckdb:///data/example_duck.db +Done. \end{verbatim} \begin{longtable}[]{@{}ll@{}} @@ -36444,10 +36347,6 @@ \subsubsection{\texorpdfstring{\texttt{WHERE} WITH \texttt{NULL} \endhead \bottomrule\noalign{} \endlastfoot -hiccup & 10 \\ -drogon & -100 \\ -dragon 2 & 0 \\ -puff & 100 \\ \end{longtable} \hypertarget{sorting-and-restricting-output}{% @@ -36472,7 +36371,8 @@ \subsubsection{\texorpdfstring{Sorting with \end{Shaded} \begin{verbatim} -Running query in 'duck' + * duckdb:///data/example_duck.db +Done. \end{verbatim} \begin{longtable}[]{@{}lll@{}} @@ -36482,11 +36382,6 @@ \subsubsection{\texorpdfstring{Sorting with \endhead \bottomrule\noalign{} \endlastfoot -drogon & 2011 & -100 \\ -dragon 2 & 2019 & 0 \\ -hiccup & 2010 & 10 \\ -puff & 2010 & 100 \\ -smaug & 2011 & None \\ \end{longtable} By default, \texttt{ORDER\ BY} will display results in ascending order @@ -36504,7 +36399,8 @@ \subsubsection{\texorpdfstring{Sorting with \end{Shaded} \begin{verbatim} -Running query in 'duck' + * duckdb:///data/example_duck.db +Done. \end{verbatim} \begin{longtable}[]{@{}lll@{}} @@ -36514,11 +36410,6 @@ \subsubsection{\texorpdfstring{Sorting with \endhead \bottomrule\noalign{} \endlastfoot -puff & 2010 & 100 \\ -hiccup & 2010 & 10 \\ -dragon 2 & 2019 & 0 \\ -drogon & 2011 & -100 \\ -smaug & 2011 & None \\ \end{longtable} We can also tell SQL to \texttt{ORDER\ BY} two columns at once. This @@ -36535,7 +36426,8 @@ \subsubsection{\texorpdfstring{Sorting with \end{Shaded} \begin{verbatim} -Running query in 'duck' + * duckdb:///data/example_duck.db +Done. \end{verbatim} \begin{longtable}[]{@{}lll@{}} @@ -36545,11 +36437,6 @@ \subsubsection{\texorpdfstring{Sorting with \endhead \bottomrule\noalign{} \endlastfoot -puff & 2010 & 100 \\ -hiccup & 2010 & 10 \\ -drogon & 2011 & -100 \\ -smaug & 2011 & None \\ -dragon 2 & 2019 & 0 \\ \end{longtable} Note that in this example, \texttt{year} is sorted in ascending order @@ -36577,7 +36464,8 @@ \subsubsection{\texorpdfstring{\texttt{LIMIT} \end{Shaded} \begin{verbatim} -Running query in 'duck' + * duckdb:///data/example_duck.db +Done. \end{verbatim} \begin{longtable}[]{@{}lll@{}} @@ -36587,8 +36475,6 @@ \subsubsection{\texorpdfstring{\texttt{LIMIT} \endhead \bottomrule\noalign{} \endlastfoot -hiccup & 2010 & 10 \\ -drogon & 2011 & -100 \\ \end{longtable} The \texttt{OFFSET} keyword indicates the index at which \texttt{LIMIT} @@ -36608,7 +36494,8 @@ \subsubsection{\texorpdfstring{\texttt{LIMIT} \end{Shaded} \begin{verbatim} -Running query in 'duck' + * duckdb:///data/example_duck.db +Done. \end{verbatim} \begin{longtable}[]{@{}lll@{}} @@ -36618,10 +36505,94 @@ \subsubsection{\texorpdfstring{\texttt{LIMIT} \endhead \bottomrule\noalign{} \endlastfoot -drogon & 2011 & -100 \\ -dragon 2 & 2019 & 0 \\ \end{longtable} +With these keywords in hand, let's update our SQL order of operations. +Remember: \emph{every} SQL query must list clauses in this order. + +\begin{verbatim} +SELECT +FROM +[WHERE ] +[ORDER BY ] +[LIMIT ] +[OFFSET ]; +\end{verbatim} + +\hypertarget{summary-3}{% +\section{Summary}\label{summary-3}} + +Let's summarize what we've learned so far. We know that \texttt{SELECT} +and \texttt{FROM} are the fundamental building blocks of any SQL query. +We can augment these two keywords with additional clauses to refine the +data in our output table. + +Any clauses that we include must follow a strict ordering within the +query: + +\begin{verbatim} +SELECT +FROM
+[WHERE ] +[ORDER BY ] +[LIMIT ] +[OFFSET ] +\end{verbatim} + +Here, any clause contained in square brackets \texttt{{[}\ {]}} is +optional ------ we only need to use the keyword if it is relevant to the +table operation we want to perform. Also note that by convention, we use +all caps for keywords in SQL statements and use newlines to make code +more readable. + +\bookmarksetup{startatroot} + +\hypertarget{sql-ii}{% +\chapter{SQL II}\label{sql-ii}} + +\begin{tcolorbox}[enhanced jigsaw, toptitle=1mm, leftrule=.75mm, rightrule=.15mm, titlerule=0mm, toprule=.15mm, opacitybacktitle=0.6, colbacktitle=quarto-callout-note-color!10!white, colframe=quarto-callout-note-color-frame, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, colback=white, arc=.35mm, bottomrule=.15mm, opacityback=0, breakable, bottomtitle=1mm, left=2mm] + +\begin{itemize} +\tightlist +\item + Perform aggregations using \texttt{GROUP\ BY} +\item + Introduce the ability to filter groups +\item + Perform data cleaning and text manipulation in SQL +\item + Join data across tables +\end{itemize} + +\end{tcolorbox} + +In this lecture, we'll continue our work from last time to introduce +some advanced SQL syntax. + +First, let's load in the \texttt{basic\_examples.db} database. + +\begin{Shaded} +\begin{Highlighting}[] +\CommentTok{\# Load the SQL Alchemy Python library and DuckDB} +\ImportTok{import}\NormalTok{ sqlalchemy} +\ImportTok{import}\NormalTok{ duckdb} +\end{Highlighting} +\end{Shaded} + +\begin{Shaded} +\begin{Highlighting}[] +\CommentTok{\# Load \%\%sql cell magic} +\OperatorTok{\%}\NormalTok{load\_ext sql} +\end{Highlighting} +\end{Shaded} + +\begin{Shaded} +\begin{Highlighting}[] +\CommentTok{\# Connect to the database} +\OperatorTok{\%}\NormalTok{sql duckdb:}\OperatorTok{///}\NormalTok{data}\OperatorTok{/}\NormalTok{basic\_examples.db }\OperatorTok{{-}{-}}\NormalTok{alias basic} +\end{Highlighting} +\end{Shaded} + \hypertarget{aggregating-with-group-by}{% \section{\texorpdfstring{Aggregating with \texttt{GROUP\ BY}}{Aggregating with GROUP BY}}\label{aggregating-with-group-by}} @@ -36640,7 +36611,7 @@ \section{\texorpdfstring{Aggregating with \texttt{pandas} implementation. To illustrate \texttt{GROUP\ BY}, we will consider the \texttt{Dish} -table from the \texttt{basic\_examples.db} database. +table from our database. \begin{Shaded} \begin{Highlighting}[] @@ -36651,7 +36622,8 @@ \section{\texorpdfstring{Aggregating with \end{Shaded} \begin{verbatim} -Running query in 'duck' + * duckdb:///data/basic_examples.db +Done. \end{verbatim} \begin{longtable}[]{@{}lll@{}} @@ -36661,17 +36633,11 @@ \section{\texorpdfstring{Aggregating with \endhead \bottomrule\noalign{} \endlastfoot -ravioli & entree & 10 \\ -ramen & entree & 13 \\ -taco & entree & 7 \\ -edamame & appetizer & 4 \\ -fries & appetizer & 4 \\ -potsticker & appetizer & 4 \\ -ice cream & dessert & 5 \\ \end{longtable} -Say we wanted to find the total costs of dishes of a certain -\texttt{type}. To accomplish this, we would write the following code. +Notice that there are multiple dishes of the same \texttt{type}. What if +we wanted to find the total costs of dishes of a certain \texttt{type}? +To accomplish this, we would write the following code. \begin{Shaded} \begin{Highlighting}[] @@ -36683,7 +36649,8 @@ \section{\texorpdfstring{Aggregating with \end{Shaded} \begin{verbatim} -Running query in 'duck' + * duckdb:///data/basic_examples.db +Done. \end{verbatim} \begin{longtable}[]{@{}ll@{}} @@ -36693,9 +36660,6 @@ \section{\texorpdfstring{Aggregating with \endhead \bottomrule\noalign{} \endlastfoot -appetizer & 12 \\ -entree & 30 \\ -dessert & 5 \\ \end{longtable} What is going on here? The statement \texttt{GROUP\ BY\ type} tells SQL @@ -36750,7 +36714,8 @@ \section{\texorpdfstring{Aggregating with \end{Shaded} \begin{verbatim} -Running query in 'duck' + * duckdb:///data/basic_examples.db +Done. \end{verbatim} \begin{longtable}[]{@{}llll@{}} @@ -36760,9 +36725,6 @@ \section{\texorpdfstring{Aggregating with \endhead \bottomrule\noalign{} \endlastfoot -entree & 30 & 7 & taco \\ -dessert & 5 & 5 & ice cream \\ -appetizer & 12 & 4 & potsticker \\ \end{longtable} To count the number of rows associated with each group, we use the @@ -36770,6 +36732,32 @@ \section{\texorpdfstring{Aggregating with number of rows in each group, including rows with null values. Its \texttt{pandas} equivalent is \texttt{.groupby().size()}. +Recall the \texttt{Dragon} table from the previous lecture: + +\begin{Shaded} +\begin{Highlighting}[] +\OperatorTok{\%\%}\NormalTok{sql} +\NormalTok{SELECT }\OperatorTok{*}\NormalTok{ FROM Dragon}\OperatorTok{;} +\end{Highlighting} +\end{Shaded} + +\begin{verbatim} + * duckdb:///data/basic_examples.db +Done. +\end{verbatim} + +\begin{longtable}[]{@{}lll@{}} +\toprule\noalign{} +name & year & cute \\ +\midrule\noalign{} +\endhead +\bottomrule\noalign{} +\endlastfoot +\end{longtable} + +Notice that \texttt{COUNT(*)} and \texttt{COUNT(cute)} result in +different outputs. + \begin{Shaded} \begin{Highlighting}[] \OperatorTok{\%\%}\NormalTok{sql} @@ -36780,7 +36768,8 @@ \section{\texorpdfstring{Aggregating with \end{Shaded} \begin{verbatim} -Running query in 'duck' + * duckdb:///data/basic_examples.db +Done. \end{verbatim} \begin{longtable}[]{@{}ll@{}} @@ -36790,15 +36779,8 @@ \section{\texorpdfstring{Aggregating with \endhead \bottomrule\noalign{} \endlastfoot -2010 & 2 \\ -2011 & 2 \\ -2019 & 1 \\ \end{longtable} -To exclude \texttt{NULL} values when counting the rows in each group, we -explicitly call \texttt{COUNT} on a column in the table. This is similar -to calling \texttt{.groupby().count()} in \texttt{pandas}. - \begin{Shaded} \begin{Highlighting}[] \OperatorTok{\%\%}\NormalTok{sql} @@ -36809,7 +36791,8 @@ \section{\texorpdfstring{Aggregating with \end{Shaded} \begin{verbatim} -Running query in 'duck' + * duckdb:///data/basic_examples.db +Done. \end{verbatim} \begin{longtable}[]{@{}ll@{}} @@ -36819,9 +36802,6 @@ \section{\texorpdfstring{Aggregating with \endhead \bottomrule\noalign{} \endlastfoot -2010 & 2 \\ -2011 & 1 \\ -2019 & 1 \\ \end{longtable} With this definition of \texttt{GROUP\ BY} in hand, let's update our SQL @@ -36842,32 +36822,615 @@ \section{\texorpdfstring{Aggregating with the selection process and that column expressions may include aggregation functions (\texttt{MAX}, \texttt{MIN}, etc.). -\hypertarget{summary-3}{% -\section{Summary}\label{summary-3}} +\hypertarget{filtering-groups}{% +\section{Filtering Groups}\label{filtering-groups}} -Let's summarize what we've learned so far. We know that \texttt{SELECT} -and \texttt{FROM} are the fundamental building blocks of any SQL query. -We can augment these two keywords with additional clauses to refine the -data in our output table. +Now, what if we only want groups that meet a certain condition? +\texttt{HAVING} filters groups by applying some condition across all +rows in each group. We interpret it as a way to keep only the groups +\texttt{HAVING} some condition. Note the difference between +\texttt{WHERE} and \texttt{HAVING}: we use \texttt{WHERE} to filter +rows, whereas we use \texttt{HAVING} to filter \emph{groups}. +\texttt{WHERE} precedes \texttt{HAVING} in terms of how SQL executes a +query. -Any clauses that we include must follow a strict ordering within the -query: +Let's take a look at the \texttt{Dish} table to see how we can use +\texttt{HAVING}. Say we want to group dishes with a cost greater than 4 +by \texttt{type} and only keep groups where the max cost is less than +10. + +\begin{Shaded} +\begin{Highlighting}[] +\OperatorTok{\%\%}\NormalTok{sql} +\NormalTok{SELECT }\BuiltInTok{type}\NormalTok{, COUNT(}\OperatorTok{*}\NormalTok{)} +\NormalTok{FROM Dish} +\NormalTok{WHERE cost }\OperatorTok{\textgreater{}} \DecValTok{4} +\NormalTok{GROUP BY }\BuiltInTok{type} +\NormalTok{HAVING MAX(cost) }\OperatorTok{\textless{}} \DecValTok{10}\OperatorTok{;} +\end{Highlighting} +\end{Shaded} \begin{verbatim} -SELECT + * duckdb:///data/basic_examples.db +Done. +\end{verbatim} + +\begin{longtable}[]{@{}ll@{}} +\toprule\noalign{} +type & count\_star() \\ +\midrule\noalign{} +\endhead +\bottomrule\noalign{} +\endlastfoot +\end{longtable} + +Here, we first use \texttt{WHERE} to filter for rows with a cost greater +than 4. We then group our values by \texttt{type} before applying the +\texttt{HAVING} operator. With \texttt{HAVING}, we can filter our groups +based on if the max cost is less than 10. + +\hypertarget{summary-sql}{% +\section{Summary: SQL}\label{summary-sql}} + +With this definition of \texttt{GROUP\ BY} and \texttt{HAVING} in hand, +let's update our SQL order of operations. Remember: \emph{every} SQL +query must list clauses in this order. + +\begin{verbatim} +SELECT FROM
[WHERE ] [GROUP BY ] [ORDER BY ] [LIMIT ] -[OFFSET ] +[OFFSET ]; \end{verbatim} -Here, any clause contained in square brackets \texttt{{[}\ {]}} is -optional ------ we only need to use the keyword if it is relevant to the -table operation we want to perform. Also note that by convention, we use -all caps for keywords in SQL statements and use newlines to make code -more readable. +Note that we can use the \texttt{AS} keyword to rename columns during +the selection process and that column expressions may include +aggregation functions (\texttt{MAX}, \texttt{MIN}, etc.). + +\hypertarget{eda-in-sql}{% +\section{EDA in SQL}\label{eda-in-sql}} + +In the last lecture, we mostly worked under the assumption that our data +had already been cleaned. However, as we saw in our first pass through +the data science lifecycle, we're very unlikely to be given data that is +free of formatting issues. With this in mind, we'll want to learn how to +clean and transform data in SQL. + +Our typical workflow when working with ``big data'' is: + +\begin{enumerate} +\def\labelenumi{\arabic{enumi}.} +\tightlist +\item + Use SQL to query data from a database +\item + Use Python (with \texttt{pandas}) to analyze this data in detail +\end{enumerate} + +We can, however, still perform simple data cleaning and re-structuring +using SQL directly. To do so, we'll use the \texttt{Title} table from +the \texttt{imdb\_duck} database, which contains information about +movies and actors. + +Let's load in the \texttt{imdb\_duck} database. + +\begin{Shaded} +\begin{Highlighting}[] +\ImportTok{import}\NormalTok{ os} +\ControlFlowTok{if}\NormalTok{ os.path.exists(}\StringTok{"/home/jovyan/shared/sql/imdb\_duck.db"}\NormalTok{):} +\NormalTok{ imdbpath }\OperatorTok{=} \StringTok{"duckdb:////home/jovyan/shared/sql/imdb\_duck.db"} +\ControlFlowTok{elif}\NormalTok{ os.path.exists(}\StringTok{"data/imdb\_duck.db"}\NormalTok{):} +\NormalTok{ imdbpath }\OperatorTok{=} \StringTok{"duckdb:///data/imdb\_duck.db"} +\ControlFlowTok{else}\NormalTok{:} + \ImportTok{import}\NormalTok{ gdown} +\NormalTok{ url }\OperatorTok{=} \StringTok{\textquotesingle{}https://drive.google.com/uc?id=10tKOHGLt9QoOgq5Ii{-}FhxpB9lDSQgl1O\textquotesingle{}} +\NormalTok{ output\_path }\OperatorTok{=} \StringTok{\textquotesingle{}data/imdb\_duck.db\textquotesingle{}} +\NormalTok{ gdown.download(url, output\_path, quiet}\OperatorTok{=}\VariableTok{False}\NormalTok{)} +\NormalTok{ imdbpath }\OperatorTok{=} \StringTok{"duckdb:///data/imdb\_duck.db"} +\BuiltInTok{print}\NormalTok{(imdbpath)} +\end{Highlighting} +\end{Shaded} + +\begin{verbatim} +duckdb:///data/imdb_duck.db +\end{verbatim} + +\begin{Shaded} +\begin{Highlighting}[] +\ImportTok{from}\NormalTok{ sqlalchemy }\ImportTok{import}\NormalTok{ create\_engine} +\NormalTok{imdb\_engine }\OperatorTok{=}\NormalTok{ create\_engine(imdbpath, connect\_args}\OperatorTok{=}\NormalTok{\{}\StringTok{\textquotesingle{}read\_only\textquotesingle{}}\NormalTok{: }\VariableTok{True}\NormalTok{\})} +\OperatorTok{\%}\NormalTok{sql imdb\_engine }\OperatorTok{{-}{-}}\NormalTok{alias imdb} +\end{Highlighting} +\end{Shaded} + +\begin{verbatim} + * duckdb:///data/basic_examples.db +(duckdb.duckdb.ParserException) Parser Error: syntax error at or near "imdb_engine" +[SQL: imdb_engine] +(Background on this error at: https://sqlalche.me/e/20/f405) +\end{verbatim} + +Since we'll be working with the \texttt{Title} table, let's take a quick +look at what it contains. + +\begin{Shaded} +\begin{Highlighting}[] +\OperatorTok{\%\%}\NormalTok{sql imdb } + +\NormalTok{SELECT }\OperatorTok{*} +\NormalTok{FROM Title} +\NormalTok{WHERE primaryTitle IN (}\StringTok{\textquotesingle{}Ginny \& Georgia\textquotesingle{}}\NormalTok{, }\StringTok{\textquotesingle{}What If...?\textquotesingle{}}\NormalTok{, }\StringTok{\textquotesingle{}Succession\textquotesingle{}}\NormalTok{, }\StringTok{\textquotesingle{}Veep\textquotesingle{}}\NormalTok{, }\StringTok{\textquotesingle{}Tenet\textquotesingle{}}\NormalTok{)} +\NormalTok{LIMIT }\DecValTok{10}\OperatorTok{;} +\end{Highlighting} +\end{Shaded} + +\begin{verbatim} + * duckdb:///data/basic_examples.db +(duckdb.duckdb.ParserException) Parser Error: syntax error at or near "imdb" +[SQL: imdb + +SELECT * +FROM Title +WHERE primaryTitle IN ('Ginny & Georgia', 'What If...?', 'Succession', 'Veep', 'Tenet') +LIMIT 10;] +(Background on this error at: https://sqlalche.me/e/20/f405) +\end{verbatim} + +\hypertarget{matching-text-using-like}{% +\subsection{\texorpdfstring{Matching Text using +\texttt{LIKE}}{Matching Text using LIKE}}\label{matching-text-using-like}} + +One common task we encountered in our first look at EDA was needing to +match string data. For example, we might want to remove entries +beginning with the same prefix as part of the data cleaning process. + +In SQL, we use the \texttt{LIKE} operator to (you guessed it) look for +strings that are \emph{like} a given string pattern. + +\begin{Shaded} +\begin{Highlighting}[] +\OperatorTok{\%\%}\NormalTok{sql} +\NormalTok{SELECT titleType, primaryTitle} +\NormalTok{FROM Title} +\NormalTok{WHERE primaryTitle LIKE }\StringTok{\textquotesingle{}Star Wars: Episode I {-} The Phantom Menace\textquotesingle{}} +\end{Highlighting} +\end{Shaded} + +\begin{verbatim} + * duckdb:///data/basic_examples.db +(duckdb.duckdb.CatalogException) Catalog Error: Table with name Title does not exist! +Did you mean "temp.information_schema.tables"? +LINE 2: FROM Title + ^ +[SQL: SELECT titleType, primaryTitle +FROM Title +WHERE primaryTitle LIKE 'Star Wars: Episode I - The Phantom Menace'] +(Background on this error at: https://sqlalche.me/e/20/f405) +\end{verbatim} + +What if we wanted to find \emph{all} Star Wars movies? \texttt{\%} is +the wildcard operator, it means ``look for any character, any number of +times''. This makes it helpful for identifying strings that are similar +to our desired pattern, even when we don't know the full text of what we +aim to extract. + +\begin{Shaded} +\begin{Highlighting}[] +\OperatorTok{\%\%}\NormalTok{sql} +\NormalTok{SELECT titleType, primaryTitle} +\NormalTok{FROM Title} +\NormalTok{WHERE primaryTitle LIKE }\StringTok{\textquotesingle{}\%Star Wars\%\textquotesingle{}} +\NormalTok{LIMIT }\DecValTok{10}\OperatorTok{;} +\end{Highlighting} +\end{Shaded} + +\begin{verbatim} + * duckdb:///data/basic_examples.db +(duckdb.duckdb.CatalogException) Catalog Error: Table with name Title does not exist! +Did you mean "temp.information_schema.tables"? +LINE 2: FROM Title + ^ +[SQL: SELECT titleType, primaryTitle +FROM Title +WHERE primaryTitle LIKE '%Star Wars%' +LIMIT 10;] +(Background on this error at: https://sqlalche.me/e/20/f405) +\end{verbatim} + +Alternatively, we can use RegEx! DuckDB and most real DBMSs allow for +this. Note that here, we have to use the \texttt{SIMILAR\ TO} operater +rather than \texttt{LIKE}. + +\begin{Shaded} +\begin{Highlighting}[] +\OperatorTok{\%\%}\NormalTok{sql} +\NormalTok{SELECT titleType, primaryTitle} +\NormalTok{FROM Title} +\NormalTok{WHERE primaryTitle SIMILAR TO }\StringTok{\textquotesingle{}.*Star Wars*.\textquotesingle{}} +\NormalTok{LIMIT }\DecValTok{10}\OperatorTok{;} +\end{Highlighting} +\end{Shaded} + +\begin{verbatim} + * duckdb:///data/basic_examples.db +(duckdb.duckdb.CatalogException) Catalog Error: Table with name Title does not exist! +Did you mean "temp.information_schema.tables"? +LINE 2: FROM Title + ^ +[SQL: SELECT titleType, primaryTitle +FROM Title +WHERE primaryTitle SIMILAR TO '.*Star Wars*.' +LIMIT 10;] +(Background on this error at: https://sqlalche.me/e/20/f405) +\end{verbatim} + +\hypertarget{casting-data-types}{% +\subsection{\texorpdfstring{\texttt{CAST}ing Data +Types}{CASTing Data Types}}\label{casting-data-types}} + +A common data cleaning task is converting data to the correct variable +type. The \texttt{CAST} keyword is used to generate a new output column. +Each entry in this output column is the result of converting the data in +an existing column to a new data type. For example, we may wish to +convert numeric data stored as a string to an integer. + +\begin{Shaded} +\begin{Highlighting}[] +\OperatorTok{\%\%}\NormalTok{sql} +\NormalTok{SELECT primaryTitle, CAST(runtimeMinutes AS INT)} +\NormalTok{FROM Title}\OperatorTok{;} +\end{Highlighting} +\end{Shaded} + +\begin{verbatim} + * duckdb:///data/basic_examples.db +(duckdb.duckdb.CatalogException) Catalog Error: Table with name Title does not exist! +Did you mean "temp.information_schema.tables"? +LINE 2: FROM Title; + ^ +[SQL: SELECT primaryTitle, CAST(runtimeMinutes AS INT) +FROM Title;] +(Background on this error at: https://sqlalche.me/e/20/f405) +\end{verbatim} + +We use \texttt{CAST} when \texttt{SELECT}ing colunns for our output +table. In the example above, we want to \texttt{SELECT} the columns of +integer year and runtime data that is created by the \texttt{CAST}. + +SQL will automatically name a new column according to the command used +to \texttt{SELECT} it, which can lead to unwieldy column names. We can +rename the \texttt{CAST}ed column using the \texttt{AS} keyword. + +\begin{Shaded} +\begin{Highlighting}[] +\OperatorTok{\%\%}\NormalTok{sql} +\NormalTok{SELECT primaryTitle AS title, CAST(runtimeMinutes AS INT) AS minutes, CAST(startYear AS INT) AS year} +\NormalTok{FROM Title} +\NormalTok{LIMIT }\DecValTok{5}\OperatorTok{;} +\end{Highlighting} +\end{Shaded} + +\begin{verbatim} + * duckdb:///data/basic_examples.db +(duckdb.duckdb.CatalogException) Catalog Error: Table with name Title does not exist! +Did you mean "temp.information_schema.tables"? +LINE 2: FROM Title + ^ +[SQL: SELECT primaryTitle AS title, CAST(runtimeMinutes AS INT) AS minutes, CAST(startYear AS INT) AS year +FROM Title +LIMIT 5;] +(Background on this error at: https://sqlalche.me/e/20/f405) +\end{verbatim} + +\hypertarget{using-conditional-statements-with-case}{% +\subsection{\texorpdfstring{Using Conditional Statements with +\texttt{CASE}}{Using Conditional Statements with CASE}}\label{using-conditional-statements-with-case}} + +When working with \texttt{pandas}, we often ran into situations where we +wanted to generate new columns using some form of conditional statement. +For example, say we wanted to describe a film title as ``old,'' +``mid-aged,'' or ``new,'' depending on the year of its release. + +In SQL, conditional operations are performed using a \texttt{CASE} +clause. Conceptually, \texttt{CASE} behaves much like the \texttt{CAST} +operation: it creates a new column that we can then \texttt{SELECT} to +appear in the output. The syntax for a \texttt{CASE} clause is as +follows: + +\begin{verbatim} +CASE WHEN THEN + WHEN THEN + ... + ELSE + END +\end{verbatim} + +Scanning through the skeleton code above, you can see that the logic is +similar to that of an \texttt{if} statement in Python. The conditional +statement is first opened by calling \texttt{CASE}. Each new condition +is specified by \texttt{WHEN}, with \texttt{THEN} indicating what value +should be filled if the condition is met. \texttt{ELSE} specifies the +value that should be filled if no other conditions are met. Lastly, +\texttt{END} indicates the end of the conditional statement; once +\texttt{END} has been called, SQL will continue evaluating the query as +usual. + +Let's see this in action. In the example below, we give the new column +created by the \texttt{CASE} statement the name \texttt{movie\_age}. + +\begin{Shaded} +\begin{Highlighting}[] +\OperatorTok{\%\%}\NormalTok{sql} +\OperatorTok{/*}\NormalTok{ If a movie was filmed before }\DecValTok{1950}\NormalTok{, it }\KeywordTok{is} \StringTok{"old"} +\NormalTok{Otherwise, }\ControlFlowTok{if}\NormalTok{ a movie was filmed before }\DecValTok{2000}\NormalTok{, it }\KeywordTok{is} \StringTok{"mid{-}aged"} +\NormalTok{Else, a movie }\KeywordTok{is} \StringTok{"new"} \OperatorTok{*/} + +\NormalTok{SELECT titleType, startYear,} +\NormalTok{CASE WHEN startYear }\OperatorTok{\textless{}} \DecValTok{1950}\NormalTok{ THEN }\StringTok{\textquotesingle{}old\textquotesingle{}} +\NormalTok{ WHEN startYear }\OperatorTok{\textless{}} \DecValTok{2000}\NormalTok{ THEN }\StringTok{\textquotesingle{}mid{-}aged\textquotesingle{}} +\NormalTok{ ELSE }\StringTok{\textquotesingle{}new\textquotesingle{}} +\NormalTok{ END AS movie\_age} +\NormalTok{FROM Title}\OperatorTok{;} +\end{Highlighting} +\end{Shaded} + +\begin{verbatim} + * duckdb:///data/basic_examples.db +(duckdb.duckdb.CatalogException) Catalog Error: Table with name Title does not exist! +Did you mean "temp.information_schema.tables"? +LINE 10: FROM Title; + ^ +[SQL: /* If a movie was filmed before 1950, it is "old" +Otherwise, if a movie was filmed before 2000, it is "mid-aged" +Else, a movie is "new" */ + +SELECT titleType, startYear, +CASE WHEN startYear < 1950 THEN 'old' + WHEN startYear < 2000 THEN 'mid-aged' + ELSE 'new' + END AS movie_age +FROM Title;] +(Background on this error at: https://sqlalche.me/e/20/f405) +\end{verbatim} + +\hypertarget{joining-tables-1}{% +\section{\texorpdfstring{\texttt{JOIN}ing +Tables}{JOINing Tables}}\label{joining-tables-1}} + +At this point, we're well-versed in using SQL as a tool to clean, +manipulate, and transform data in a table. Notice that this sentence +referred to one \emph{table}, specifically. What happens if the data we +need is distributed across multiple tables? This is an important +consideration when using SQL ------ recall that we first introduced SQL +as a language to query from databases. Databases often store data in a +multidimensional structure. In other words, information is stored across +several tables, with each table containing a small subset of all the +data housed by the database. + +A common way of organizing a database is by using a \textbf{star +schema}. A star schema is composed of two types of tables. A +\textbf{fact table} is the central table of the database ------ it +contains the information needed to link entries across several +\textbf{dimension tables}, which contain more detailed information about +the data. + +Say we were working with a database about boba offerings in Berkeley. +The dimension tables of the database might contain information about tea +varieties and boba toppings. The fact table would be used to link this +information across the various dimension tables. + +If we explicitly mark the relationships between tables, we start to see +the star-like structure of the star schema. + +To join data across multiple tables, we'll use the (creatively named) +\texttt{JOIN} keyword. We'll make things easier for now by first +considering the simpler \texttt{cats} dataset, which consists of the +tables \texttt{s} and \texttt{t}. + +To perform a join, we amend the \texttt{FROM} clause. You can think of +this as saying, ``\texttt{SELECT} my data \texttt{FROM} tables that have +been \texttt{JOIN}ed together.'' + +Remember: SQL does not consider newlines or whitespace when interpreting +queries. The indentation given in the example below is to help improve +readability. If you wish, you can write code that does not follow this +formatting. + +\begin{verbatim} +SELECT +FROM table_1 + JOIN table_2 + ON key_1 = key_2; +\end{verbatim} + +We also need to specify what column from each table should be used to +determine matching entries. By defining these keys, we provide SQL with +the information it needs to pair rows of data together. + +The most commonly used type of SQL \texttt{JOIN} is the \textbf{inner +join}. It turns out you're already familiar with what an inner join +does, and how it works -- this is the type of join we've been using in +\texttt{pandas} all along! In an inner join, we combine every row in our +first table with its matching entry in the second table. If a row from +either table does not have a match in the other table, it is omitted +from the output. + +In a \textbf{cross join}, \emph{all} possible combinations of rows +appear in the output table, regardless of whether or not rows share a +matching key. Because all rows are joined, even if there is no matching +key, it is not necessary to specify what keys to consider in an +\texttt{ON} statement. A cross join is also known as a cartesian +product. + +Conceptually, we can interpret an inner join as a cross join, followed +by removing all rows that do not share a matching key. Notice that the +output of the inner join above contains all rows of the cross join +example that contain a single color across the entire row. + +In a \textbf{left outer join}, \emph{all} rows in the left table are +kept in the output table. If a row in the right table shares a match +with the left table, this row will be kept; otherwise, the rows in the +right table are omitted from the output. We can fill in any missing +values with \texttt{NULL}. + +A \textbf{right outer join} keeps all rows in the right table. Rows in +the left table are only kept if they share a match in the right table. +Again, we can fill in any missing values with \texttt{NULL}. + +In a \textbf{full outer join}, all rows that have a match between the +two tables are joined together. If a row has no match in the second +table, then the values of the columns for that second table are filled +with \texttt{NULL}. In other words, a full outer join performs an inner +join \emph{while still keeping} rows that have no match in the other +table. This is best understood visually: + +We have kept the same output achieved using an inner join, with the +addition of partially null rows for entries in \texttt{s} and \texttt{t} +that had no match in the second table. + +\hypertarget{aliasing-in-joins}{% +\subsection{\texorpdfstring{Aliasing in +\texttt{JOIN}s}{Aliasing in JOINs}}\label{aliasing-in-joins}} + +When joining tables, we often create aliases for table names (similarly +to what we did with column names in the last lecture). We do this as it +is typically easier to refer to aliases, especially when we are working +with long table names. We can even reference columns using aliased table +names! + +Let's say we want to determine the average rating of various movies. +We'll need to \texttt{JOIN} the \texttt{Title} and \texttt{Rating} +tables and can create aliases for both tables. + +\begin{Shaded} +\begin{Highlighting}[] +\OperatorTok{\%\%}\NormalTok{sql} + +\NormalTok{SELECT primaryTitle, averageRating} +\NormalTok{FROM Title AS T INNER JOIN Rating AS R} +\NormalTok{ON T.tconst }\OperatorTok{=}\NormalTok{ R.tconst}\OperatorTok{;} +\end{Highlighting} +\end{Shaded} + +\begin{verbatim} + * duckdb:///data/basic_examples.db +(duckdb.duckdb.CatalogException) Catalog Error: Table with name Title does not exist! +Did you mean "temp.information_schema.tables"? +LINE 2: FROM Title AS T INNER JOIN Rating AS R + ^ +[SQL: SELECT primaryTitle, averageRating +FROM Title AS T INNER JOIN Rating AS R +ON T.tconst = R.tconst;] +(Background on this error at: https://sqlalche.me/e/20/f405) +\end{verbatim} + +Note that the \texttt{AS} is actually optional! We can create aliases +for our tables even without it, but we usually include it for clarity. + +\begin{Shaded} +\begin{Highlighting}[] +\OperatorTok{\%\%}\NormalTok{sql} + +\NormalTok{SELECT primaryTitle, averageRating} +\NormalTok{FROM Title T INNER JOIN Rating R} +\NormalTok{ON T.tconst }\OperatorTok{=}\NormalTok{ R.tconst}\OperatorTok{;} +\end{Highlighting} +\end{Shaded} + +\begin{verbatim} + * duckdb:///data/basic_examples.db +(duckdb.duckdb.CatalogException) Catalog Error: Table with name Title does not exist! +Did you mean "temp.information_schema.tables"? +LINE 2: FROM Title T INNER JOIN Rating R + ^ +[SQL: SELECT primaryTitle, averageRating +FROM Title T INNER JOIN Rating R +ON T.tconst = R.tconst;] +(Background on this error at: https://sqlalche.me/e/20/f405) +\end{verbatim} + +\hypertarget{common-table-expressions}{% +\subsection{Common Table Expressions}\label{common-table-expressions}} + +For more sophisticated data problems, the queries can become very +complex. Common table expressions (CTEs) allow us to break down these +complex queries into more manageable parts. To do so, we create +temporary tables corresponding to different aspects of the problem and +then reference them in the final query: + +\begin{verbatim} +WITH +table_name1 AS ( + SELECT ... +), +table_name2 AS ( + SELECT ... +) +SELECT ... +FROM +table_name1, +table_name2, ... +\end{verbatim} + +Let's say we want to identify the top 10 action movies that are highly +rated (with an average rating greater than 7) and popular (having more +than 5000 votes), along with the primary actors who are the most +popular. We can use CTEs to break this query down into separate +problems. Initially, we can filter to find good action movies and +prolific actors separately. This way, in our final join, we only need to +change the order. + +\begin{Shaded} +\begin{Highlighting}[] +\OperatorTok{\%\%}\NormalTok{sql} +\NormalTok{WITH } +\NormalTok{good\_action\_movies AS (} +\NormalTok{ SELECT }\OperatorTok{*} +\NormalTok{ FROM Title T JOIN Rating R ON T.tconst }\OperatorTok{=}\NormalTok{ R.tconst } +\NormalTok{ WHERE genres LIKE }\StringTok{\textquotesingle{}\%Action\%\textquotesingle{}}\NormalTok{ AND averageRating }\OperatorTok{\textgreater{}} \DecValTok{7}\NormalTok{ AND numVotes }\OperatorTok{\textgreater{}} \DecValTok{5000} +\NormalTok{),} +\NormalTok{prolific\_actors AS (} +\NormalTok{ SELECT N.nconst, primaryName, COUNT(}\OperatorTok{*}\NormalTok{) }\ImportTok{as}\NormalTok{ numRoles} +\NormalTok{ FROM Name N JOIN Principal P ON N.nconst }\OperatorTok{=}\NormalTok{ P.nconst} +\NormalTok{ WHERE category }\OperatorTok{=} \StringTok{\textquotesingle{}actor\textquotesingle{}} +\NormalTok{ GROUP BY N.nconst, primaryName} +\NormalTok{)} +\NormalTok{SELECT primaryTitle, primaryName, numRoles, ROUND(averageRating) AS rating} +\NormalTok{FROM good\_action\_movies m, prolific\_actors a, principal p} +\NormalTok{WHERE p.tconst }\OperatorTok{=}\NormalTok{ m.tconst AND p.nconst }\OperatorTok{=}\NormalTok{ a.nconst} +\NormalTok{ORDER BY rating DESC, numRoles DESC} +\NormalTok{LIMIT }\DecValTok{10}\OperatorTok{;} +\end{Highlighting} +\end{Shaded} + +\begin{verbatim} + * duckdb:///data/basic_examples.db +(duckdb.duckdb.CatalogException) Catalog Error: Table with name Title does not exist! +Did you mean "temp.information_schema.tables"? +LINE 4: F... + ^ +[SQL: WITH +good_action_movies AS ( + SELECT * + FROM Title T JOIN Rating R ON T.tconst = R.tconst + WHERE genres LIKE '%Action%' AND averageRating > 7 AND numVotes > 5000 +), +prolific_actors AS ( + SELECT N.nconst, primaryName, COUNT(*) as numRoles + FROM Name N JOIN Principal P ON N.nconst = P.nconst + WHERE category = 'actor' + GROUP BY N.nconst, primaryName +) +SELECT primaryTitle, primaryName, numRoles, ROUND(averageRating) AS rating +FROM good_action_movies m, prolific_actors a, principal p +WHERE p.tconst = m.tconst AND p.nconst = a.nconst +ORDER BY rating DESC, numRoles DESC +LIMIT 10;] +(Background on this error at: https://sqlalche.me/e/20/f405) +\end{verbatim} diff --git a/sql_II/data/basic_examples.db.wal b/sql_II/data/basic_examples.db.wal deleted file mode 100644 index e69de29bb..000000000 diff --git a/sql_II/sql_II.html b/sql_II/sql_II.html deleted file mode 100644 index d1266d154..000000000 --- a/sql_II/sql_II.html +++ /dev/null @@ -1,922 +0,0 @@ - - - - - - - - - -SQL II - - - - - - - - - - - - - - - - - - - -
- -
- -
-
-

SQL II

-
- - - -
- - - - -
- - - -
- - -
-
-
- -
-
-Learning Outcomes -
-
-
-
-
-
    -
  • Perform aggregations using GROUP BY
  • -
  • Introduce the ability to filter groups
  • -
  • Perform data cleaning and text manipulation in SQL
  • -
  • Join data across tables
  • -
-
-
-
-

In this lecture, we’ll continue our work from last time to introduce some advanced SQL syntax.

-

First, let’s load in the basic_examples.db database.

-
-
%load_ext sql
-
-
-
import duckdb
-conn = duckdb.connect()
-conn.query("INSTALL sqlite")
-
-
-
%sql duckdb:///data/basic_examples.db --alias basic
-
-
-

Aggregating with GROUP BY

-

At this point, we’ve seen that SQL offers much of the same functionality that was given to us by pandas. We can extract data from a table, filter it, and reorder it to suit our needs.

-

In pandas, much of our analysis work relied heavily on being able to use .groupby() to aggregate across the rows of our dataset. SQL’s answer to this task is the (very conveniently named) GROUP BY clause. While the outputs of GROUP BY are similar to those of .groupby() —— in both cases, we obtain an output table where some column has been used for grouping —— the syntax and logic used to group data in SQL are fairly different to the pandas implementation.

-

To illustrate GROUP BY, we will consider the Dish table from our database.

-
-
%%sql
-SELECT * 
-FROM Dish;
-
-

Notice that there are multiple dishes of the same type. What if we wanted to find the total costs of dishes of a certain type? To accomplish this, we would write the following code.

-
-
%%sql
-SELECT type, SUM(cost)
-FROM Dish
-GROUP BY type;
-
-

What is going on here? The statement GROUP BY type tells SQL to group the data based on the value contained in the type column (whether a record is an appetizer, entree, or dessert). SUM(cost) sums up the costs of dishes in each type and displays the result in the output table.

-

You may be wondering: why does SUM(cost) come before the command to GROUP BY type? Don’t we need to form groups before we can count the number of entries in each? Remember that SQL is a declarative programming language —— a SQL programmer simply states what end result they would like to see, and leaves the task of figuring out how to obtain this result to SQL itself. This means that SQL queries sometimes don’t follow what a reader sees as a “logical” sequence of thought. Instead, SQL requires that we follow its set order of operations when constructing queries. So long as we follow this order, SQL will handle the underlying logic.

-

In practical terms: our goal with this query was to output the total costs of each type. To communicate this to SQL, we say that we want to SELECT the SUMmed cost values for each type group.

-

There are many aggregation functions that can be used to aggregate the data contained in each group. Some common examples are:

-
    -
  • COUNT: count the number of rows associated with each group
  • -
  • MIN: find the minimum value of each group
  • -
  • MAX: find the maximum value of each group
  • -
  • SUM: sum across all records in each group
  • -
  • AVG: find the average value of each group
  • -
-

We can easily compute multiple aggregations all at once (a task that was very tricky in pandas).

-
-
%%sql
-SELECT type, SUM(cost), MIN(cost), MAX(name)
-FROM Dish
-GROUP BY type;
-
-

To count the number of rows associated with each group, we use the COUNT keyword. Calling COUNT(*) will compute the total number of rows in each group, including rows with null values. Its pandas equivalent is .groupby().size().

-

Recall the Dragon table from the previous lecture:

-
-
%%sql
-SELECT * FROM Dragon;
-
-

Notice that COUNT(*) and COUNT(cute) result in different outputs.

-
-
%%sql
-SELECT year, COUNT(*)
-FROM Dragon
-GROUP BY year;
-
-
-
%%sql
-SELECT year, COUNT(cute)
-FROM Dragon
-GROUP BY year;
-
-

With this definition of GROUP BY in hand, let’s update our SQL order of operations. Remember: every SQL query must list clauses in this order.

-
SELECT <column expression list>
-FROM <table>
-[WHERE <predicate>]
-[GROUP BY <column list>]
-[ORDER BY <column list>]
-[LIMIT <number of rows>]
-[OFFSET <number of rows>];
-

Note that we can use the AS keyword to rename columns during the selection process and that column expressions may include aggregation functions (MAX, MIN, etc.).

-
-
-

Filtering Groups

-

Now, what if we only want groups that meet a certain condition? HAVING filters groups by applying some condition across all rows in each group. We interpret it as a way to keep only the groups HAVING some condition. Note the difference between WHERE and HAVING: we use WHERE to filter rows, whereas we use HAVING to filter groups. WHERE precedes HAVING in terms of how SQL executes a query.

-

Let’s take a look at the Dish table to see how we can use HAVING. Say we want to group dishes with a cost greater than 4 by type and only keep groups where the max cost is less than 10.

-
-
%%sql
-SELECT type, COUNT(*)
-FROM Dish
-WHERE cost > 4
-GROUP BY type
-HAVING MAX(cost) <  10;
-
-

Here, we first use WHERE to filter for rows with a cost greater than 4. We then group our values by type before applying the HAVING operator. With HAVING, we can filter our groups based on if the max cost is less than 10.

-
-
-

Summary: SQL

-

With this definition of GROUP BY and HAVING in hand, let’s update our SQL order of operations. Remember: every SQL query must list clauses in this order.

-
SELECT <column expression list>
-FROM <table>
-[WHERE <predicate>]
-[GROUP BY <column list>]
-[ORDER BY <column list>]
-[LIMIT <number of rows>]
-[OFFSET <number of rows>];
-

Note that we can use the AS keyword to rename columns during the selection process and that column expressions may include aggregation functions (MAX, MIN, etc.).

-
-
-

EDA in SQL

-

In the last lecture, we mostly worked under the assumption that our data had already been cleaned. However, as we saw in our first pass through the data science lifecycle, we’re very unlikely to be given data that is free of formatting issues. With this in mind, we’ll want to learn how to clean and transform data in SQL.

-

Our typical workflow when working with “big data” is:

-
    -
  1. Use SQL to query data from a database
  2. -
  3. Use Python (with pandas) to analyze this data in detail
  4. -
-

We can, however, still perform simple data cleaning and re-structuring using SQL directly. To do so, we’ll use the Title table from the imdb_duck database, which contains information about movies and actors.

-

Let’s load in the imdb_duck database.

-
-
import os
-if os.path.exists("/home/jovyan/shared/sql/imdb_duck.db"):
-    imdbpath = "duckdb:////home/jovyan/shared/sql/imdb_duck.db"
-elif os.path.exists("data/imdb_duck.db"):
-    imdbpath =  "duckdb:///data/imdb_duck.db"
-else:
-    import gdown
-    url = 'https://drive.google.com/uc?id=10tKOHGLt9QoOgq5Ii-FhxpB9lDSQgl1O'
-    output_path = 'data/imdb_duck.db'
-    gdown.download(url, output_path, quiet=False)
-    imdbpath = "duckdb:///data/imdb_duck.db"
-print(imdbpath)
-
-
-
from sqlalchemy import create_engine
-imdb_engine = create_engine(imdbpath, connect_args={'read_only': True})
-%sql imdb_engine --alias imdb
-
-

Since we’ll be working with the Title table, let’s take a quick look at what it contains.

-
-
%%sql imdb 
-    
-SELECT *
-FROM Title
-WHERE primaryTitle IN ('Ginny & Georgia', 'What If...?', 'Succession', 'Veep', 'Tenet')
-LIMIT 10;
-
-
-

Matching Text using LIKE

-

One common task we encountered in our first look at EDA was needing to match string data. For example, we might want to remove entries beginning with the same prefix as part of the data cleaning process.

-

In SQL, we use the LIKE operator to (you guessed it) look for strings that are like a given string pattern.

-
-
%%sql
-SELECT titleType, primaryTitle
-FROM Title
-WHERE primaryTitle LIKE 'Star Wars: Episode I - The Phantom Menace'
-
-

What if we wanted to find all Star Wars movies? % is the wildcard operator, it means “look for any character, any number of times”. This makes it helpful for identifying strings that are similar to our desired pattern, even when we don’t know the full text of what we aim to extract.

-
-
%%sql
-SELECT titleType, primaryTitle
-FROM Title
-WHERE primaryTitle LIKE '%Star Wars%'
-LIMIT 10;
-
-

Alternatively, we can use RegEx! DuckDB and most real DBMSs allow for this. Note that here, we have to use the SIMILAR TO operater rather than LIKE.

-
-
%%sql
-SELECT titleType, primaryTitle
-FROM Title
-WHERE primaryTitle SIMILAR TO '.*Star Wars*.'
-LIMIT 10;
-
-
-
-

CASTing Data Types

-

A common data cleaning task is converting data to the correct variable type. The CAST keyword is used to generate a new output column. Each entry in this output column is the result of converting the data in an existing column to a new data type. For example, we may wish to convert numeric data stored as a string to an integer.

-
-
%%sql
-SELECT primaryTitle, CAST(runtimeMinutes AS INT)
-FROM Title;
-
-

We use CAST when SELECTing colunns for our output table. In the example above, we want to SELECT the columns of integer year and runtime data that is created by the CAST.

-

SQL will automatically name a new column according to the command used to SELECT it, which can lead to unwieldy column names. We can rename the CASTed column using the AS keyword.

-
-
%%sql
-SELECT primaryTitle AS title, CAST(runtimeMinutes AS INT) AS minutes, CAST(startYear AS INT) AS year
-FROM Title
-LIMIT 5;
-
-
-
-

Using Conditional Statements with CASE

-

When working with pandas, we often ran into situations where we wanted to generate new columns using some form of conditional statement. For example, say we wanted to describe a film title as “old,” “mid-aged,” or “new,” depending on the year of its release.

-

In SQL, conditional operations are performed using a CASE clause. Conceptually, CASE behaves much like the CAST operation: it creates a new column that we can then SELECT to appear in the output. The syntax for a CASE clause is as follows:

-
CASE WHEN <condition> THEN <value>
-     WHEN <other condition> THEN <other value>
-     ...
-     ELSE <yet another value>
-     END
-

Scanning through the skeleton code above, you can see that the logic is similar to that of an if statement in Python. The conditional statement is first opened by calling CASE. Each new condition is specified by WHEN, with THEN indicating what value should be filled if the condition is met. ELSE specifies the value that should be filled if no other conditions are met. Lastly, END indicates the end of the conditional statement; once END has been called, SQL will continue evaluating the query as usual.

-

Let’s see this in action. In the example below, we give the new column created by the CASE statement the name movie_age.

-
-
%%sql
-/* If a movie was filmed before 1950, it is "old"
-Otherwise, if a movie was filmed before 2000, it is "mid-aged"
-Else, a movie is "new" */
-
-SELECT titleType, startYear,
-CASE WHEN startYear < 1950 THEN 'old'
-     WHEN startYear < 2000 THEN 'mid-aged'
-     ELSE 'new'
-     END AS movie_age
-FROM Title;
-
-
-
-
-

JOINing Tables

-

At this point, we’re well-versed in using SQL as a tool to clean, manipulate, and transform data in a table. Notice that this sentence referred to one table, specifically. What happens if the data we need is distributed across multiple tables? This is an important consideration when using SQL —— recall that we first introduced SQL as a language to query from databases. Databases often store data in a multidimensional structure. In other words, information is stored across several tables, with each table containing a small subset of all the data housed by the database.

-

A common way of organizing a database is by using a star schema. A star schema is composed of two types of tables. A fact table is the central table of the database —— it contains the information needed to link entries across several dimension tables, which contain more detailed information about the data.

-

Say we were working with a database about boba offerings in Berkeley. The dimension tables of the database might contain information about tea varieties and boba toppings. The fact table would be used to link this information across the various dimension tables.

-
-

multidimensional

-
-

If we explicitly mark the relationships between tables, we start to see the star-like structure of the star schema.

-
-

star

-
-

To join data across multiple tables, we’ll use the (creatively named) JOIN keyword. We’ll make things easier for now by first considering the simpler cats dataset, which consists of the tables s and t.

-
-

cats

-
-

To perform a join, we amend the FROM clause. You can think of this as saying, “SELECT my data FROM tables that have been JOINed together.”

-

Remember: SQL does not consider newlines or whitespace when interpreting queries. The indentation given in the example below is to help improve readability. If you wish, you can write code that does not follow this formatting.

-
SELECT <column list>
-FROM table_1 
-    JOIN table_2 
-    ON key_1 = key_2;
-

We also need to specify what column from each table should be used to determine matching entries. By defining these keys, we provide SQL with the information it needs to pair rows of data together.

-

The most commonly used type of SQL JOIN is the inner join. It turns out you’re already familiar with what an inner join does, and how it works – this is the type of join we’ve been using in pandas all along! In an inner join, we combine every row in our first table with its matching entry in the second table. If a row from either table does not have a match in the other table, it is omitted from the output.

-
-

inner

-
-

In a cross join, all possible combinations of rows appear in the output table, regardless of whether or not rows share a matching key. Because all rows are joined, even if there is no matching key, it is not necessary to specify what keys to consider in an ON statement. A cross join is also known as a cartesian product.

-
-

cross

-
-

Conceptually, we can interpret an inner join as a cross join, followed by removing all rows that do not share a matching key. Notice that the output of the inner join above contains all rows of the cross join example that contain a single color across the entire row.

-

In a left outer join, all rows in the left table are kept in the output table. If a row in the right table shares a match with the left table, this row will be kept; otherwise, the rows in the right table are omitted from the output. We can fill in any missing values with NULL.

-
-

left

-
-

A right outer join keeps all rows in the right table. Rows in the left table are only kept if they share a match in the right table. Again, we can fill in any missing values with NULL.

-
-

right

-
-

In a full outer join, all rows that have a match between the two tables are joined together. If a row has no match in the second table, then the values of the columns for that second table are filled with NULL. In other words, a full outer join performs an inner join while still keeping rows that have no match in the other table. This is best understood visually:

-
-

full

-
-

We have kept the same output achieved using an inner join, with the addition of partially null rows for entries in s and t that had no match in the second table.

-
-

Aliasing in JOINs

-

When joining tables, we often create aliases for table names (similarly to what we did with column names in the last lecture). We do this as it is typically easier to refer to aliases, especially when we are working with long table names. We can even reference columns using aliased table names.

-

Let’s say we want to determine the average rating of various movies:

-
-
%%sql
-
-SELECT primaryTitle, averageRating
-FROM Title AS T INNER JOIN Rating AS R
-ON T.tconst = R.tconst;
-
-

Note that the AS is actually optional! We can create aliases for our tables even without it, but we usually include it for clarity.

-
-
%%sql
-
-SELECT primaryTitle, averageRating
-FROM Title T INNER JOIN Rating R
-ON T.tconst = R.tconst;
-
-
-
-

Common Table Expression

-

For more sophisticated data problems, the queries can become very complex. Common Table Expressions allow us to break down these complex queries into more manageable parts. This involves creating temporary tables which correspond to different aspects of the problem and then referencing them in the final query. The following format is an example of how we can create two temporary tables and then use them for further querying:

-
WITH 
-table_name1 AS ( 
-    SELECT ...
-),
-table_name2 AS ( 
-    SELECT ...
-)
-SELECT ... 
-FROM 
-table_name1, 
-table_name2, ...
-

Let’s say we want to identify the top 10 action movies that are highly rated (with an average rating greater than 7) and popular (having more than 5000 votes), along with the primary actors who are the most popular, we can use Common Table Expression to break this query down into separate problems. Initially, we can filter to find good action movies and prolific actors separately. This way, in our final join, we only need to change the order.

-
-
%%sql
-WITH 
-good_action_movies AS (
-    SELECT *
-    FROM Title T JOIN Rating R ON T.tconst = R.tconst  
-    WHERE genres LIKE '%Action%' AND averageRating > 7 AND numVotes > 5000
-),
-prolific_actors AS (
-    SELECT N.nconst, primaryName, COUNT(*) as numRoles
-    FROM Name N JOIN Principal P ON N.nconst = P.nconst
-    WHERE category = 'actor'
-    GROUP BY N.nconst, primaryName
-)
-SELECT primaryTitle, primaryName, numRoles, ROUND(averageRating) AS rating
-FROM good_action_movies m, prolific_actors a, principal p
-WHERE p.tconst = m.tconst AND p.nconst = a.nconst
-ORDER BY rating DESC, numRoles DESC
-LIMIT 10;
-
-
-
- -
- - -
- - - - - \ No newline at end of file diff --git a/sql_II/sql_II.ipynb b/sql_II/sql_II.ipynb deleted file mode 100644 index d9a6a4828..000000000 --- a/sql_II/sql_II.ipynb +++ /dev/null @@ -1,771 +0,0 @@ -{ - "cells": [ - { - "cell_type": "raw", - "metadata": {}, - "source": [ - "---\n", - "title: SQL II\n", - "execute:\n", - " echo: true\n", - "format:\n", - " html:\n", - " code-fold: false\n", - " code-tools: true\n", - " toc: true\n", - " toc-title: SQL II\n", - " page-layout: full\n", - " theme:\n", - " - cosmo\n", - " - cerulean\n", - " callout-icon: false\n", - "---" - ], - "id": "04e68af4" - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "::: {.callout-note collapse=\"false\"}\n", - "## Learning Outcomes\n", - "* Perform aggregations using `GROUP BY`\n", - "* Introduce the ability to filter groups\n", - "* Perform data cleaning and text manipulation in SQL\n", - "* Join data across tables\n", - ":::\n", - "\n", - "In this lecture, we'll continue our work from last time to introduce some advanced SQL syntax. \n", - "\n", - "First, let's load in the `basic_examples.db` database.\n" - ], - "id": "2cdf73bf" - }, - { - "cell_type": "code", - "metadata": {}, - "source": [ - "#| code-fold: true\n", - "# Load the SQL Alchemy Python library and DuckDB\n", - "import sqlalchemy\n", - "import duckdb" - ], - "id": "def2bb4b", - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "vscode": { - "languageId": "python" - } - }, - "source": [ - "# Load %%sql cell magic\n", - "%load_ext sql" - ], - "id": "c0cca068", - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "vscode": { - "languageId": "python" - } - }, - "source": [ - "# Connect to the database\n", - "%sql duckdb:///data/basic_examples.db --alias basic" - ], - "id": "6bc27dd5", - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Aggregating with `GROUP BY`\n", - "\n", - "At this point, we've seen that SQL offers much of the same functionality that was given to us by `pandas`. We can extract data from a table, filter it, and reorder it to suit our needs.\n", - "\n", - "In `pandas`, much of our analysis work relied heavily on being able to use `.groupby()` to aggregate across the rows of our dataset. SQL's answer to this task is the (very conveniently named) `GROUP BY` clause. While the outputs of `GROUP BY` are similar to those of `.groupby()` —— in both cases, we obtain an output table where some column has been used for grouping —— the syntax and logic used to group data in SQL are fairly different to the `pandas` implementation.\n", - "\n", - "To illustrate `GROUP BY`, we will consider the `Dish` table from our database.\n" - ], - "id": "0b24c419" - }, - { - "cell_type": "code", - "metadata": { - "vscode": { - "languageId": "python" - } - }, - "source": [ - "%%sql\n", - "SELECT * \n", - "FROM Dish;" - ], - "id": "43189660", - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Notice that there are multiple dishes of the same `type`. What if we wanted to find the total costs of dishes of a certain `type`? To accomplish this, we would write the following code.\n" - ], - "id": "75136168" - }, - { - "cell_type": "code", - "metadata": { - "vscode": { - "languageId": "python" - } - }, - "source": [ - "%%sql\n", - "SELECT type, SUM(cost)\n", - "FROM Dish\n", - "GROUP BY type;" - ], - "id": "dc0c1c08", - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "What is going on here? The statement `GROUP BY type` tells SQL to group the data based on the value contained in the `type` column (whether a record is an appetizer, entree, or dessert). `SUM(cost)` sums up the costs of dishes in each `type` and displays the result in the output table.\n", - "\n", - "You may be wondering: why does `SUM(cost)` come before the command to `GROUP BY type`? Don't we need to form groups before we can count the number of entries in each? Remember that SQL is a *declarative* programming language —— a SQL programmer simply states what end result they would like to see, and leaves the task of figuring out *how* to obtain this result to SQL itself. This means that SQL queries sometimes don't follow what a reader sees as a \"logical\" sequence of thought. Instead, SQL requires that we follow its set order of operations when constructing queries. So long as we follow this order, SQL will handle the underlying logic.\n", - "\n", - "In practical terms: our goal with this query was to output the total `cost`s of each `type`. To communicate this to SQL, we say that we want to `SELECT` the `SUM`med `cost` values for each `type` group. \n", - "\n", - "There are many aggregation functions that can be used to aggregate the data contained in each group. Some common examples are:\n", - "\n", - "* `COUNT`: count the number of rows associated with each group\n", - "* `MIN`: find the minimum value of each group\n", - "* `MAX`: find the maximum value of each group\n", - "* `SUM`: sum across all records in each group\n", - "* `AVG`: find the average value of each group\n", - "\n", - "We can easily compute multiple aggregations all at once (a task that was very tricky in `pandas`).\n" - ], - "id": "9ab651b9" - }, - { - "cell_type": "code", - "metadata": { - "vscode": { - "languageId": "python" - } - }, - "source": [ - "%%sql\n", - "SELECT type, SUM(cost), MIN(cost), MAX(name)\n", - "FROM Dish\n", - "GROUP BY type;" - ], - "id": "c9464164", - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To count the number of rows associated with each group, we use the `COUNT` keyword. Calling `COUNT(*)` will compute the total number of rows in each group, including rows with null values. Its `pandas` equivalent is `.groupby().size()`.\n", - "\n", - "Recall the `Dragon` table from the previous lecture:\n" - ], - "id": "96323771" - }, - { - "cell_type": "code", - "metadata": { - "vscode": { - "languageId": "python" - } - }, - "source": [ - "%%sql\n", - "SELECT * FROM Dragon;" - ], - "id": "b6299c32", - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Notice that `COUNT(*)` and `COUNT(cute)` result in different outputs.\n" - ], - "id": "03b20f58" - }, - { - "cell_type": "code", - "metadata": { - "vscode": { - "languageId": "python" - } - }, - "source": [ - "%%sql\n", - "SELECT year, COUNT(*)\n", - "FROM Dragon\n", - "GROUP BY year;" - ], - "id": "a12cdba2", - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "vscode": { - "languageId": "python" - } - }, - "source": [ - "%%sql\n", - "SELECT year, COUNT(cute)\n", - "FROM Dragon\n", - "GROUP BY year;" - ], - "id": "3688a2ac", - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "With this definition of `GROUP BY` in hand, let's update our SQL order of operations. Remember: *every* SQL query must list clauses in this order. \n", - "\n", - " SELECT \n", - " FROM
\n", - " [WHERE ]\n", - " [GROUP BY ]\n", - " [ORDER BY ]\n", - " [LIMIT ]\n", - " [OFFSET ];\n", - "\n", - "Note that we can use the `AS` keyword to rename columns during the selection process and that column expressions may include aggregation functions (`MAX`, `MIN`, etc.).\n", - "\n", - "## Filtering Groups\n", - "\n", - "Now, what if we only want groups that meet a certain condition? `HAVING` filters groups by applying some condition across all rows in each group. We interpret it as a way to keep only the groups `HAVING` some condition. Note the difference between `WHERE` and `HAVING`: we use `WHERE` to filter rows, whereas we use `HAVING` to filter *groups*. `WHERE` precedes `HAVING` in terms of how SQL executes a query.\n", - "\n", - "Let's take a look at the `Dish` table to see how we can use `HAVING`. Say we want to group dishes with a cost greater than 4 by `type` and only keep groups where the max cost is less than 10.\n" - ], - "id": "5857ec62" - }, - { - "cell_type": "code", - "metadata": { - "vscode": { - "languageId": "python" - } - }, - "source": [ - "%%sql\n", - "SELECT type, COUNT(*)\n", - "FROM Dish\n", - "WHERE cost > 4\n", - "GROUP BY type\n", - "HAVING MAX(cost) < 10;" - ], - "id": "99bfd356", - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Here, we first use `WHERE` to filter for rows with a cost greater than 4. We then group our values by `type` before applying the `HAVING` operator. With `HAVING`, we can filter our groups based on if the max cost is less than 10.\n", - "\n", - "## Summary: SQL\n", - "With this definition of `GROUP BY` and `HAVING` in hand, let's update our SQL order of operations. Remember: *every* SQL query must list clauses in this order. \n", - "\n", - " SELECT \n", - " FROM
\n", - " [WHERE ]\n", - " [GROUP BY ]\n", - " [ORDER BY ]\n", - " [LIMIT ]\n", - " [OFFSET ];\n", - "\n", - "Note that we can use the `AS` keyword to rename columns during the selection process and that column expressions may include aggregation functions (`MAX`, `MIN`, etc.).\n", - "\n", - "## EDA in SQL\n", - "In the last lecture, we mostly worked under the assumption that our data had already been cleaned. However, as we saw in our first pass through the data science lifecycle, we're very unlikely to be given data that is free of formatting issues. With this in mind, we'll want to learn how to clean and transform data in SQL. \n", - "\n", - "Our typical workflow when working with \"big data\" is:\n", - "\n", - "1. Use SQL to query data from a database\n", - "2. Use Python (with `pandas`) to analyze this data in detail\n", - "\n", - "We can, however, still perform simple data cleaning and re-structuring using SQL directly. To do so, we'll use the `Title` table from the `imdb_duck` database, which contains information about movies and actors.\n", - "\n", - "Let's load in the `imdb_duck` database.\n" - ], - "id": "80483946" - }, - { - "cell_type": "code", - "metadata": { - "vscode": { - "languageId": "python" - } - }, - "source": [ - "import os\n", - "if os.path.exists(\"/home/jovyan/shared/sql/imdb_duck.db\"):\n", - " imdbpath = \"duckdb:////home/jovyan/shared/sql/imdb_duck.db\"\n", - "elif os.path.exists(\"data/imdb_duck.db\"):\n", - " imdbpath = \"duckdb:///data/imdb_duck.db\"\n", - "else:\n", - " import gdown\n", - " url = 'https://drive.google.com/uc?id=10tKOHGLt9QoOgq5Ii-FhxpB9lDSQgl1O'\n", - " output_path = 'data/imdb_duck.db'\n", - " gdown.download(url, output_path, quiet=False)\n", - " imdbpath = \"duckdb:///data/imdb_duck.db\"\n", - "print(imdbpath)" - ], - "id": "b5eb5a70", - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "vscode": { - "languageId": "python" - } - }, - "source": [ - "from sqlalchemy import create_engine\n", - "imdb_engine = create_engine(imdbpath, connect_args={'read_only': True})\n", - "%sql imdb_engine --alias imdb" - ], - "id": "c8201903", - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Since we'll be working with the `Title` table, let's take a quick look at what it contains. \n" - ], - "id": "09166ab8" - }, - { - "cell_type": "code", - "metadata": { - "vscode": { - "languageId": "python" - } - }, - "source": [ - "%%sql imdb \n", - " \n", - "SELECT *\n", - "FROM Title\n", - "WHERE primaryTitle IN ('Ginny & Georgia', 'What If...?', 'Succession', 'Veep', 'Tenet')\n", - "LIMIT 10;" - ], - "id": "01b5d2f2", - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Matching Text using `LIKE`\n", - "\n", - "One common task we encountered in our first look at EDA was needing to match string data. For example, we might want to remove entries beginning with the same prefix as part of the data cleaning process.\n", - "\n", - "In SQL, we use the `LIKE` operator to (you guessed it) look for strings that are *like* a given string pattern. \n" - ], - "id": "ffef470c" - }, - { - "cell_type": "code", - "metadata": { - "vscode": { - "languageId": "python" - } - }, - "source": [ - "%%sql\n", - "SELECT titleType, primaryTitle\n", - "FROM Title\n", - "WHERE primaryTitle LIKE 'Star Wars: Episode I - The Phantom Menace'" - ], - "id": "796c9947", - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "What if we wanted to find *all* Star Wars movies? `%` is the wildcard operator, it means \"look for any character, any number of times\". This makes it helpful for identifying strings that are similar to our desired pattern, even when we don't know the full text of what we aim to extract.\n" - ], - "id": "3fe091dc" - }, - { - "cell_type": "code", - "metadata": { - "vscode": { - "languageId": "python" - } - }, - "source": [ - "%%sql\n", - "SELECT titleType, primaryTitle\n", - "FROM Title\n", - "WHERE primaryTitle LIKE '%Star Wars%'\n", - "LIMIT 10;" - ], - "id": "aeb204b2", - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Alternatively, we can use RegEx! DuckDB and most real DBMSs allow for this. Note that here, we have to use the `SIMILAR TO` operater rather than `LIKE`.\n" - ], - "id": "4ccf1358" - }, - { - "cell_type": "code", - "metadata": { - "vscode": { - "languageId": "python" - } - }, - "source": [ - "%%sql\n", - "SELECT titleType, primaryTitle\n", - "FROM Title\n", - "WHERE primaryTitle SIMILAR TO '.*Star Wars*.'\n", - "LIMIT 10;" - ], - "id": "6d2d17f3", - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### `CAST`ing Data Types\n", - "\n", - "A common data cleaning task is converting data to the correct variable type. The `CAST` keyword is used to generate a new output column. Each entry in this output column is the result of converting the data in an existing column to a new data type. For example, we may wish to convert numeric data stored as a string to an integer.\n" - ], - "id": "4dff6841" - }, - { - "cell_type": "code", - "metadata": { - "vscode": { - "languageId": "python" - } - }, - "source": [ - "%%sql\n", - "SELECT primaryTitle, CAST(runtimeMinutes AS INT)\n", - "FROM Title;" - ], - "id": "33d3172b", - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We use `CAST` when `SELECT`ing colunns for our output table. In the example above, we want to `SELECT` the columns of integer year and runtime data that is created by the `CAST`. \n", - "\n", - "SQL will automatically name a new column according to the command used to `SELECT` it, which can lead to unwieldy column names. We can rename the `CAST`ed column using the `AS` keyword.\n" - ], - "id": "e06a79ff" - }, - { - "cell_type": "code", - "metadata": { - "vscode": { - "languageId": "python" - } - }, - "source": [ - "%%sql\n", - "SELECT primaryTitle AS title, CAST(runtimeMinutes AS INT) AS minutes, CAST(startYear AS INT) AS year\n", - "FROM Title\n", - "LIMIT 5;" - ], - "id": "2e169b4e", - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Using Conditional Statements with `CASE`\n", - "\n", - "When working with `pandas`, we often ran into situations where we wanted to generate new columns using some form of conditional statement. For example, say we wanted to describe a film title as \"old,\" \"mid-aged,\" or \"new,\" depending on the year of its release.\n", - "\n", - "In SQL, conditional operations are performed using a `CASE` clause. Conceptually, `CASE` behaves much like the `CAST` operation: it creates a new column that we can then `SELECT` to appear in the output. The syntax for a `CASE` clause is as follows:\n", - "\n", - " CASE WHEN THEN \n", - " WHEN THEN \n", - " ...\n", - " ELSE \n", - " END\n", - "\n", - "Scanning through the skeleton code above, you can see that the logic is similar to that of an `if` statement in Python. The conditional statement is first opened by calling `CASE`. Each new condition is specified by `WHEN`, with `THEN` indicating what value should be filled if the condition is met. `ELSE` specifies the value that should be filled if no other conditions are met. Lastly, `END` indicates the end of the conditional statement; once `END` has been called, SQL will continue evaluating the query as usual. \n", - "\n", - "Let's see this in action. In the example below, we give the new column created by the `CASE` statement the name `movie_age`.\n" - ], - "id": "85d0694f" - }, - { - "cell_type": "code", - "metadata": { - "vscode": { - "languageId": "python" - } - }, - "source": [ - "%%sql\n", - "/* If a movie was filmed before 1950, it is \"old\"\n", - "Otherwise, if a movie was filmed before 2000, it is \"mid-aged\"\n", - "Else, a movie is \"new\" */\n", - "\n", - "SELECT titleType, startYear,\n", - "CASE WHEN startYear < 1950 THEN 'old'\n", - " WHEN startYear < 2000 THEN 'mid-aged'\n", - " ELSE 'new'\n", - " END AS movie_age\n", - "FROM Title;" - ], - "id": "f4d903f1", - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## `JOIN`ing Tables\n", - "\n", - "At this point, we're well-versed in using SQL as a tool to clean, manipulate, and transform data in a table. Notice that this sentence referred to one *table*, specifically. What happens if the data we need is distributed across multiple tables? This is an important consideration when using SQL —— recall that we first introduced SQL as a language to query from databases. Databases often store data in a multidimensional structure. In other words, information is stored across several tables, with each table containing a small subset of all the data housed by the database. \n", - "\n", - "A common way of organizing a database is by using a **star schema**. A star schema is composed of two types of tables. A **fact table** is the central table of the database —— it contains the information needed to link entries across several **dimension tables**, which contain more detailed information about the data. \n", - "\n", - "Say we were working with a database about boba offerings in Berkeley. The dimension tables of the database might contain information about tea varieties and boba toppings. The fact table would be used to link this information across the various dimension tables.\n", - "\n", - "
\n", - "multidimensional\n", - "
\n", - "\n", - "If we explicitly mark the relationships between tables, we start to see the star-like structure of the star schema.\n", - "\n", - "
\n", - "star\n", - "
\n", - "\n", - "To join data across multiple tables, we'll use the (creatively named) `JOIN` keyword. We'll make things easier for now by first considering the simpler `cats` dataset, which consists of the tables `s` and `t`.\n", - "\n", - "
\n", - "cats\n", - "
\n", - "\n", - "To perform a join, we amend the `FROM` clause. You can think of this as saying, \"`SELECT` my data `FROM` tables that have been `JOIN`ed together.\" \n", - "\n", - "Remember: SQL does not consider newlines or whitespace when interpreting queries. The indentation given in the example below is to help improve readability. If you wish, you can write code that does not follow this formatting.\n", - "\n", - " SELECT \n", - " FROM table_1 \n", - " JOIN table_2 \n", - " ON key_1 = key_2;\n", - "\n", - "We also need to specify what column from each table should be used to determine matching entries. By defining these keys, we provide SQL with the information it needs to pair rows of data together.\n", - "\n", - "\n", - "The most commonly used type of SQL `JOIN` is the **inner join**. It turns out you're already familiar with what an inner join does, and how it works – this is the type of join we've been using in `pandas` all along! In an inner join, we combine every row in our first table with its matching entry in the second table. If a row from either table does not have a match in the other table, it is omitted from the output. \n", - "\n", - "
\n", - "inner\n", - "
\n", - "\n", - "In a **cross join**, *all* possible combinations of rows appear in the output table, regardless of whether or not rows share a matching key. Because all rows are joined, even if there is no matching key, it is not necessary to specify what keys to consider in an `ON` statement. A cross join is also known as a cartesian product.\n", - "\n", - "
\n", - "cross\n", - "
\n", - "\n", - "Conceptually, we can interpret an inner join as a cross join, followed by removing all rows that do not share a matching key. Notice that the output of the inner join above contains all rows of the cross join example that contain a single color across the entire row.\n", - "\n", - "In a **left outer join**, *all* rows in the left table are kept in the output table. If a row in the right table shares a match with the left table, this row will be kept; otherwise, the rows in the right table are omitted from the output. We can fill in any missing values with `NULL`.\n", - "\n", - "
\n", - "left\n", - "
\n", - "\n", - "A **right outer join** keeps all rows in the right table. Rows in the left table are only kept if they share a match in the right table. Again, we can fill in any missing values with `NULL`. \n", - "\n", - "
\n", - "right\n", - "
\n", - "\n", - "In a **full outer join**, all rows that have a match between the two tables are joined together. If a row has no match in the second table, then the values of the columns for that second table are filled with `NULL`. In other words, a full outer join performs an inner join *while still keeping* rows that have no match in the other table. This is best understood visually:\n", - "\n", - "
\n", - "full\n", - "
\n", - "\n", - "We have kept the same output achieved using an inner join, with the addition of partially null rows for entries in `s` and `t` that had no match in the second table. \n", - "\n", - "### Aliasing in `JOIN`s\n", - "\n", - "When joining tables, we often create aliases for table names (similarly to what we did with column names in the last lecture). We do this as it is typically easier to refer to aliases, especially when we are working with long table names. We can even reference columns using aliased table names!\n", - "\n", - "Let's say we want to determine the average rating of various movies. We'll need to `JOIN` the `Title` and `Rating` tables and can create aliases for both tables.\n" - ], - "id": "c0385d65" - }, - { - "cell_type": "code", - "metadata": { - "vscode": { - "languageId": "python" - } - }, - "source": [ - "%%sql\n", - "\n", - "SELECT primaryTitle, averageRating\n", - "FROM Title AS T INNER JOIN Rating AS R\n", - "ON T.tconst = R.tconst;" - ], - "id": "c2180d4d", - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Note that the `AS` is actually optional! We can create aliases for our tables even without it, but we usually include it for clarity.\n" - ], - "id": "20658eb7" - }, - { - "cell_type": "code", - "metadata": { - "vscode": { - "languageId": "python" - } - }, - "source": [ - "%%sql\n", - "\n", - "SELECT primaryTitle, averageRating\n", - "FROM Title T INNER JOIN Rating R\n", - "ON T.tconst = R.tconst;" - ], - "id": "aa673057", - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Common Table Expressions\n", - "\n", - "For more sophisticated data problems, the queries can become very complex. Common table expressions (CTEs) allow us to break down these complex queries into more manageable parts. To do so, we create temporary tables corresponding to different aspects of the problem and then reference them in the final query: \n", - "\n", - " WITH \n", - " table_name1 AS ( \n", - " SELECT ...\n", - " ),\n", - " table_name2 AS ( \n", - " SELECT ...\n", - " )\n", - " SELECT ... \n", - " FROM \n", - " table_name1, \n", - " table_name2, ...\n", - "\n", - "Let's say we want to identify the top 10 action movies that are highly rated (with an average rating greater than 7) and popular (having more than 5000 votes), along with the primary actors who are the most popular. We can use CTEs to break this query down into separate problems. Initially, we can filter to find good action movies and prolific actors separately. This way, in our final join, we only need to change the order.\n" - ], - "id": "1a662cf2" - }, - { - "cell_type": "code", - "metadata": { - "vscode": { - "languageId": "python" - } - }, - "source": [ - "%%sql\n", - "WITH \n", - "good_action_movies AS (\n", - " SELECT *\n", - " FROM Title T JOIN Rating R ON T.tconst = R.tconst \n", - " WHERE genres LIKE '%Action%' AND averageRating > 7 AND numVotes > 5000\n", - "),\n", - "prolific_actors AS (\n", - " SELECT N.nconst, primaryName, COUNT(*) as numRoles\n", - " FROM Name N JOIN Principal P ON N.nconst = P.nconst\n", - " WHERE category = 'actor'\n", - " GROUP BY N.nconst, primaryName\n", - ")\n", - "SELECT primaryTitle, primaryName, numRoles, ROUND(averageRating) AS rating\n", - "FROM good_action_movies m, prolific_actors a, principal p\n", - "WHERE p.tconst = m.tconst AND p.nconst = a.nconst\n", - "ORDER BY rating DESC, numRoles DESC\n", - "LIMIT 10;" - ], - "id": "b4b494f7", - "execution_count": null, - "outputs": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "jupytext": { - "text_representation": { - "extension": ".qmd", - "format_name": "quarto", - "format_version": "1.0", - "jupytext_version": "1.16.1" - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} \ No newline at end of file