diff --git a/.nojekyll b/.nojekyll
new file mode 100644
index 0000000..e69de29
diff --git a/404.html b/404.html
new file mode 100644
index 0000000..5a6b25e
--- /dev/null
+++ b/404.html
@@ -0,0 +1,331 @@
+<!DOCTYPE html>
+<html lang="" xml:lang="">
+<head>
+
+  <meta charset="utf-8" />
+  <meta http-equiv="X-UA-Compatible" content="IE=edge" />
+  <title>Page not found | MY464 Introduction to Quantitative Analysis for Media and Communications</title>
+  <meta name="description" content="This course is intended for those with little or no past training in quantitative methods. The course is an intensive introduction to some of the principles and methods of statistical analysis in social research. Topics covered in MY464 include descriptive statistics, basic ideas of inference and estimation, contingency tables and an introduction to linear regression models." />
+  <meta name="generator" content="bookdown 0.39 and GitBook 2.6.7" />
+
+  <meta property="og:title" content="Page not found | MY464 Introduction to Quantitative Analysis for Media and Communications" />
+  <meta property="og:type" content="book" />
+  
+  <meta property="og:description" content="This course is intended for those with little or no past training in quantitative methods. The course is an intensive introduction to some of the principles and methods of statistical analysis in social research. Topics covered in MY464 include descriptive statistics, basic ideas of inference and estimation, contingency tables and an introduction to linear regression models." />
+  <meta name="github-repo" content="LSE-Methodology/MY464" />
+
+  <meta name="twitter:card" content="summary" />
+  <meta name="twitter:title" content="Page not found | MY464 Introduction to Quantitative Analysis for Media and Communications" />
+  
+  <meta name="twitter:description" content="This course is intended for those with little or no past training in quantitative methods. The course is an intensive introduction to some of the principles and methods of statistical analysis in social research. Topics covered in MY464 include descriptive statistics, basic ideas of inference and estimation, contingency tables and an introduction to linear regression models." />
+  
+
+<meta name="author" content="Department of Methodology, London School of Economics and Political Science" />
+
+
+
+  <meta name="viewport" content="width=device-width, initial-scale=1" />
+  <meta name="apple-mobile-web-app-capable" content="yes" />
+  <meta name="apple-mobile-web-app-status-bar-style" content="black" />
+  
+  
+
+
+<script src="libs/jquery-3.6.0/jquery-3.6.0.min.js"></script>
+<script src="https://cdn.jsdelivr.net/npm/fuse.js@6.4.6/dist/fuse.min.js"></script>
+<link href="libs/gitbook-2.6.7/css/style.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-table.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-bookdown.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-highlight.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-search.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-fontsettings.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-clipboard.css" rel="stylesheet" />
+
+
+
+
+
+
+
+
+<link href="libs/anchor-sections-1.1.0/anchor-sections.css" rel="stylesheet" />
+<link href="libs/anchor-sections-1.1.0/anchor-sections-hash.css" rel="stylesheet" />
+<script src="libs/anchor-sections-1.1.0/anchor-sections.js"></script>
+
+
+
+<style type="text/css">
+  
+  div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
+</style>
+
+<link rel="stylesheet" href="style.css" type="text/css" />
+</head>
+
+<body>
+
+
+
+  <div class="book without-animation with-summary font-size-2 font-family-1" data-basepath=".">
+
+    <div class="book-summary">
+      <nav role="navigation">
+
+<ul class="summary">
+<li><a href="./">MY464 Introduction to Quantitative Analysis for Media and Communications</a></li>
+
+<li class="divider"></li>
+<li class="chapter" data-level="" data-path="index.html"><a href="index.html"><i class="fa fa-check"></i>Course information<a href="index.html#course-information" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1" data-path="c-intro.html"><a href="c-intro.html"><i class="fa fa-check"></i><b>1</b> Introduction<a href="c-intro.html#c-intro" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="1.1" data-path="c-intro.html"><a href="c-intro.html#s-intro-purpose"><i class="fa fa-check"></i><b>1.1</b> What is the purpose of this course?<a href="c-intro.html#s-intro-purpose" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.2" data-path="c-intro.html"><a href="c-intro.html#s-intro-definitions"><i class="fa fa-check"></i><b>1.2</b> Some basic definitions<a href="c-intro.html#s-intro-definitions" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="1.2.1" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-subj"><i class="fa fa-check"></i><b>1.2.1</b> Subjects and variables<a href="c-intro.html#ss-intro-def-subj" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.2.2" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-vartypes"><i class="fa fa-check"></i><b>1.2.2</b> Types of variables<a href="c-intro.html#ss-intro-def-vartypes" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.2.3" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-descr"><i class="fa fa-check"></i><b>1.2.3</b> Description and inference<a href="c-intro.html#ss-intro-def-descr" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.2.4" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-assoc"><i class="fa fa-check"></i><b>1.2.4</b> Association and causation<a href="c-intro.html#ss-intro-def-assoc" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="1.3" data-path="c-intro.html"><a href="c-intro.html#s-intro-outline"><i class="fa fa-check"></i><b>1.3</b> Outline of the course<a href="c-intro.html#s-intro-outline" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.4" data-path="c-intro.html"><a href="c-intro.html#s-intro-maths"><i class="fa fa-check"></i><b>1.4</b> The use of mathematics and computing<a href="c-intro.html#s-intro-maths" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="1.4.1" data-path="c-intro.html"><a href="c-intro.html#symbolic-mathematics-and-mathematical-notation"><i class="fa fa-check"></i><b>1.4.1</b> Symbolic mathematics and mathematical notation<a href="c-intro.html#symbolic-mathematics-and-mathematical-notation" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.4.2" data-path="c-intro.html"><a href="c-intro.html#computing-1"><i class="fa fa-check"></i><b>1.4.2</b> Computing<a href="c-intro.html#computing-1" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="2" data-path="c-descr1.html"><a href="c-descr1.html"><i class="fa fa-check"></i><b>2</b> Descriptive statistics<a href="c-descr1.html#c-descr1" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.1" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-intro"><i class="fa fa-check"></i><b>2.1</b> Introduction<a href="c-descr1.html#s-descr1-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.2" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-examples"><i class="fa fa-check"></i><b>2.2</b> Example data sets<a href="c-descr1.html#s-descr1-examples" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.3" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-1cat"><i class="fa fa-check"></i><b>2.3</b> Single categorical variable<a href="c-descr1.html#s-descr1-1cat" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.3.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-distr"><i class="fa fa-check"></i><b>2.3.1</b> Describing the sample distribution<a href="c-descr1.html#ss-descr1-1cat-distr" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.3.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-tables"><i class="fa fa-check"></i><b>2.3.2</b> Tabular methods: Tables of frequencies<a href="c-descr1.html#ss-descr1-1cat-tables" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.3.3" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-charts"><i class="fa fa-check"></i><b>2.3.3</b> Graphical methods: Bar charts<a href="c-descr1.html#ss-descr1-1cat-charts" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.3.4" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-descriptives"><i class="fa fa-check"></i><b>2.3.4</b> Simple descriptive statistics<a href="c-descr1.html#ss-descr1-1cat-descriptives" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="2.4" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-2cat"><i class="fa fa-check"></i><b>2.4</b> Two categorical variables<a href="c-descr1.html#s-descr1-2cat" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.4.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-tables"><i class="fa fa-check"></i><b>2.4.1</b> Two-way contingency tables<a href="c-descr1.html#ss-descr1-2cat-tables" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.4.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-cond"><i class="fa fa-check"></i><b>2.4.2</b> Conditional proportions<a href="c-descr1.html#ss-descr1-2cat-cond" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.4.3" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-assoc"><i class="fa fa-check"></i><b>2.4.3</b> Conditional distributions and associations<a href="c-descr1.html#ss-descr1-2cat-assoc" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.4.4" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-descr"><i class="fa fa-check"></i><b>2.4.4</b> Describing an association using conditional proportions<a href="c-descr1.html#ss-descr1-2cat-descr" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.4.5" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-gamma"><i class="fa fa-check"></i><b>2.4.5</b> A measure of association for ordinal variables<a href="c-descr1.html#ss-descr1-2cat-gamma" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="2.5" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-1cont"><i class="fa fa-check"></i><b>2.5</b> Sample distributions of a single continuous variable<a href="c-descr1.html#s-descr1-1cont" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.5.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cont-tab"><i class="fa fa-check"></i><b>2.5.1</b> Tabular methods<a href="c-descr1.html#ss-descr1-1cont-tab" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.5.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cont-graphs"><i class="fa fa-check"></i><b>2.5.2</b> Graphical methods<a href="c-descr1.html#ss-descr1-1cont-graphs" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="2.6" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-nums"><i class="fa fa-check"></i><b>2.6</b> Numerical descriptive statistics<a href="c-descr1.html#s-descr1-nums" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.6.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-nums-central"><i class="fa fa-check"></i><b>2.6.1</b> Measures of central tendency<a href="c-descr1.html#ss-descr1-nums-central" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.6.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-nums-variation"><i class="fa fa-check"></i><b>2.6.2</b> Measures of variation<a href="c-descr1.html#ss-descr1-nums-variation" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="2.7" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-2cont"><i class="fa fa-check"></i><b>2.7</b> Associations which involve continuous variables<a href="c-descr1.html#s-descr1-2cont" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.8" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-presentation"><i class="fa fa-check"></i><b>2.8</b> Presentation of tables and graphs<a href="c-descr1.html#s-descr1-presentation" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.9" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-app"><i class="fa fa-check"></i><b>2.9</b> Appendix: Country data<a href="c-descr1.html#s-descr1-app" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="3" data-path="c-samples.html"><a href="c-samples.html"><i class="fa fa-check"></i><b>3</b> Samples and populations<a href="c-samples.html#c-samples" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="3.1" data-path="c-samples.html"><a href="c-samples.html#s-samples-intro"><i class="fa fa-check"></i><b>3.1</b> Introduction<a href="c-samples.html#s-samples-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.2" data-path="c-samples.html"><a href="c-samples.html#s-samples-finpops"><i class="fa fa-check"></i><b>3.2</b> Finite populations<a href="c-samples.html#s-samples-finpops" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.3" data-path="c-samples.html"><a href="c-samples.html#s-samples-samples"><i class="fa fa-check"></i><b>3.3</b> Samples from finite populations<a href="c-samples.html#s-samples-samples" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.4" data-path="c-samples.html"><a href="c-samples.html#s-samples-infpops"><i class="fa fa-check"></i><b>3.4</b> Conceptual and infinite populations<a href="c-samples.html#s-samples-infpops" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.5" data-path="c-samples.html"><a href="c-samples.html#s-samples-popdistrs"><i class="fa fa-check"></i><b>3.5</b> Population distributions<a href="c-samples.html#s-samples-popdistrs" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.6" data-path="c-samples.html"><a href="c-samples.html#s-samples-inference"><i class="fa fa-check"></i><b>3.6</b> Need for statistical inference<a href="c-samples.html#s-samples-inference" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="4" data-path="c-tables.html"><a href="c-tables.html"><i class="fa fa-check"></i><b>4</b> Statistical inference for two-way tables<a href="c-tables.html#c-tables" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="4.1" data-path="c-tables.html"><a href="c-tables.html#s-tables-intro"><i class="fa fa-check"></i><b>4.1</b> Introduction<a href="c-tables.html#s-tables-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.2" data-path="c-tables.html"><a href="c-tables.html#s-tables-tests"><i class="fa fa-check"></i><b>4.2</b> Significance tests<a href="c-tables.html#s-tables-tests" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3" data-path="c-tables.html"><a href="c-tables.html#s-tables-chi2test"><i class="fa fa-check"></i><b>4.3</b> The chi-square test of independence<a href="c-tables.html#s-tables-chi2test" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="4.3.1" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-null"><i class="fa fa-check"></i><b>4.3.1</b> Hypotheses<a href="c-tables.html#ss-tables-chi2test-null" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.2" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-ass"><i class="fa fa-check"></i><b>4.3.2</b> Assumptions of a significance test<a href="c-tables.html#ss-tables-chi2test-ass" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.3" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-stat"><i class="fa fa-check"></i><b>4.3.3</b> The test statistic<a href="c-tables.html#ss-tables-chi2test-stat" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.4" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-sdist"><i class="fa fa-check"></i><b>4.3.4</b> The sampling distribution of the test statistic<a href="c-tables.html#ss-tables-chi2test-sdist" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.5" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-Pval"><i class="fa fa-check"></i><b>4.3.5</b> The P-value<a href="c-tables.html#ss-tables-chi2test-Pval" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.6" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-conclusions"><i class="fa fa-check"></i><b>4.3.6</b> Drawing conclusions from a test<a href="c-tables.html#ss-tables-chi2test-conclusions" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="4.4" data-path="c-tables.html"><a href="c-tables.html#s-tables-summary"><i class="fa fa-check"></i><b>4.4</b> Summary of the chi-square test of independence<a href="c-tables.html#s-tables-summary" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="5" data-path="c-probs.html"><a href="c-probs.html"><i class="fa fa-check"></i><b>5</b> Inference for population proportions<a href="c-probs.html#c-probs" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="5.1" data-path="c-probs.html"><a href="c-probs.html#s-probs-intro"><i class="fa fa-check"></i><b>5.1</b> Introduction<a href="c-probs.html#s-probs-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.2" data-path="c-probs.html"><a href="c-probs.html#s-probs-examples"><i class="fa fa-check"></i><b>5.2</b> Examples<a href="c-probs.html#s-probs-examples" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.3" data-path="c-probs.html"><a href="c-probs.html#s-probs-distribution"><i class="fa fa-check"></i><b>5.3</b> Probability distribution of a dichotomous variable<a href="c-probs.html#s-probs-distribution" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.4" data-path="c-probs.html"><a href="c-probs.html#s-probs-pointest"><i class="fa fa-check"></i><b>5.4</b> Point estimation of a population probability<a href="c-probs.html#s-probs-pointest" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5" data-path="c-probs.html"><a href="c-probs.html#s-probs-test1sample"><i class="fa fa-check"></i><b>5.5</b> Significance test of a single proportion<a href="c-probs.html#s-probs-test1sample" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="5.5.1" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-hypotheses"><i class="fa fa-check"></i><b>5.5.1</b> Null and alternative hypotheses<a href="c-probs.html#ss-probs-test1sample-hypotheses" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5.2" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-teststatistic"><i class="fa fa-check"></i><b>5.5.2</b> The test statistic<a href="c-probs.html#ss-probs-test1sample-teststatistic" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5.3" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-samplingd"><i class="fa fa-check"></i><b>5.5.3</b> The sampling distribution of the test statistic and P-values<a href="c-probs.html#ss-probs-test1sample-samplingd" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5.4" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-conclusions"><i class="fa fa-check"></i><b>5.5.4</b> Conclusions from the test<a href="c-probs.html#ss-probs-test1sample-conclusions" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5.5" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-summary"><i class="fa fa-check"></i><b>5.5.5</b> Summary of the test<a href="c-probs.html#ss-probs-test1sample-summary" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="5.6" data-path="c-probs.html"><a href="c-probs.html#s-probs-1sampleci"><i class="fa fa-check"></i><b>5.6</b> Confidence interval for a single proportion<a href="c-probs.html#s-probs-1sampleci" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="5.6.1" data-path="c-probs.html"><a href="c-probs.html#s-probs-1sampleci-intro"><i class="fa fa-check"></i><b>5.6.1</b> Introduction<a href="c-probs.html#s-probs-1sampleci-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.6.2" data-path="c-probs.html"><a href="c-probs.html#s-probs-1sampleci-calc"><i class="fa fa-check"></i><b>5.6.2</b> Calculation of the interval<a href="c-probs.html#s-probs-1sampleci-calc" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.6.3" data-path="c-probs.html"><a href="c-probs.html#s-probs-1sampleci-int"><i class="fa fa-check"></i><b>5.6.3</b> Interpretation of confidence intervals<a href="c-probs.html#s-probs-1sampleci-int" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.6.4" data-path="c-probs.html"><a href="c-probs.html#ss-means-ci-vstests"><i class="fa fa-check"></i><b>5.6.4</b> Confidence intervals vs. significance tests<a href="c-probs.html#ss-means-ci-vstests" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="5.7" data-path="c-probs.html"><a href="c-probs.html#s-probs-2samples"><i class="fa fa-check"></i><b>5.7</b> Inference for comparing two proportions<a href="c-probs.html#s-probs-2samples" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="6" data-path="c-contd.html"><a href="c-contd.html"><i class="fa fa-check"></i><b>6</b> Continuous variables: Population and sampling distributions<a href="c-contd.html#c-contd" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="6.1" data-path="c-contd.html"><a href="c-contd.html#s-contd-intro"><i class="fa fa-check"></i><b>6.1</b> Introduction<a href="c-contd.html#s-contd-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="6.2" data-path="c-contd.html"><a href="c-contd.html#s-contd-popdistrs"><i class="fa fa-check"></i><b>6.2</b> Population distributions of continuous variables<a href="c-contd.html#s-contd-popdistrs" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="6.2.1" data-path="c-contd.html"><a href="c-contd.html#ss-contd-popdistrs-params"><i class="fa fa-check"></i><b>6.2.1</b> Population parameters and their point estimates<a href="c-contd.html#ss-contd-popdistrs-params" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="6.3" data-path="c-contd.html"><a href="c-contd.html#s-contd-probdistrs"><i class="fa fa-check"></i><b>6.3</b> Probability distributions of continuous variables<a href="c-contd.html#s-contd-probdistrs" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="6.3.1" data-path="c-contd.html"><a href="c-contd.html#ss-contd-probdistrs-general"><i class="fa fa-check"></i><b>6.3.1</b> General comments<a href="c-contd.html#ss-contd-probdistrs-general" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="6.3.2" data-path="c-contd.html"><a href="c-contd.html#ss-contd-probdistrs-normal"><i class="fa fa-check"></i><b>6.3.2</b> The normal distribution as a population distribution<a href="c-contd.html#ss-contd-probdistrs-normal" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="6.4" data-path="c-contd.html"><a href="c-contd.html#s-contd-clt"><i class="fa fa-check"></i><b>6.4</b> The normal distribution as a sampling distribution<a href="c-contd.html#s-contd-clt" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="7" data-path="c-means.html"><a href="c-means.html"><i class="fa fa-check"></i><b>7</b> Analysis of population means<a href="c-means.html#c-means" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="7.1" data-path="c-means.html"><a href="c-means.html#s-means-intro"><i class="fa fa-check"></i><b>7.1</b> Introduction and examples<a href="c-means.html#s-means-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.2" data-path="c-means.html"><a href="c-means.html#s-means-descr"><i class="fa fa-check"></i><b>7.2</b> Descriptive statistics for comparisons of groups<a href="c-means.html#s-means-descr" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="7.2.1" data-path="c-means.html"><a href="c-means.html#ss-means-descr-graphs"><i class="fa fa-check"></i><b>7.2.1</b> Graphical methods of comparing sample distributions<a href="c-means.html#ss-means-descr-graphs" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.2.2" data-path="c-means.html"><a href="c-means.html#ss-means-descr-tables"><i class="fa fa-check"></i><b>7.2.2</b> Comparing summary statistics<a href="c-means.html#ss-means-descr-tables" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="7.3" data-path="c-means.html"><a href="c-means.html#s-means-inference"><i class="fa fa-check"></i><b>7.3</b> Inference for two means from independent samples<a href="c-means.html#s-means-inference" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="7.3.1" data-path="c-means.html"><a href="c-means.html#ss-means-inference-intro"><i class="fa fa-check"></i><b>7.3.1</b> Aims of the analysis<a href="c-means.html#ss-means-inference-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.3.2" data-path="c-means.html"><a href="c-means.html#ss-means-inference-test"><i class="fa fa-check"></i><b>7.3.2</b> Significance testing: The two-sample t-test<a href="c-means.html#ss-means-inference-test" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.3.3" data-path="c-means.html"><a href="c-means.html#ss-means-inference-ci"><i class="fa fa-check"></i><b>7.3.3</b> Confidence intervals for a difference of two means<a href="c-means.html#ss-means-inference-ci" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.3.4" data-path="c-means.html"><a href="c-means.html#ss-means-inference-variants"><i class="fa fa-check"></i><b>7.3.4</b> Variants of the test and confidence interval<a href="c-means.html#ss-means-inference-variants" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="7.4" data-path="c-means.html"><a href="c-means.html#s-means-1sample"><i class="fa fa-check"></i><b>7.4</b> Tests and confidence intervals for a single mean<a href="c-means.html#s-means-1sample" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.5" data-path="c-means.html"><a href="c-means.html#s-means-dependent"><i class="fa fa-check"></i><b>7.5</b> Inference for dependent samples<a href="c-means.html#s-means-dependent" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.6" data-path="c-means.html"><a href="c-means.html#s-means-tests3"><i class="fa fa-check"></i><b>7.6</b> Further comments on significance tests<a href="c-means.html#s-means-tests3" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="7.6.1" data-path="c-means.html"><a href="c-means.html#ss-means-tests3-errors"><i class="fa fa-check"></i><b>7.6.1</b> Different types of error<a href="c-means.html#ss-means-tests3-errors" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.6.2" data-path="c-means.html"><a href="c-means.html#ss-means-tests3-power"><i class="fa fa-check"></i><b>7.6.2</b> Power of significance tests<a href="c-means.html#ss-means-tests3-power" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.6.3" data-path="c-means.html"><a href="c-means.html#ss-means-tests3-importance"><i class="fa fa-check"></i><b>7.6.3</b> Significance vs. importance<a href="c-means.html#ss-means-tests3-importance" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="8" data-path="c-regression.html"><a href="c-regression.html"><i class="fa fa-check"></i><b>8</b> Linear regression models<a href="c-regression.html#c-regression" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.1" data-path="c-regression.html"><a href="c-regression.html#s-regression-intro"><i class="fa fa-check"></i><b>8.1</b> Introduction<a href="c-regression.html#s-regression-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.2" data-path="c-regression.html"><a href="c-regression.html#s-regression-descr"><i class="fa fa-check"></i><b>8.2</b> Describing association between two continuous variables<a href="c-regression.html#s-regression-descr" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.2.1" data-path="c-regression.html"><a href="c-regression.html#ss-regression-descr-intro"><i class="fa fa-check"></i><b>8.2.1</b> Introduction<a href="c-regression.html#ss-regression-descr-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.2.2" data-path="c-regression.html"><a href="c-regression.html#ss-regression-descr-plots"><i class="fa fa-check"></i><b>8.2.2</b> Graphical methods<a href="c-regression.html#ss-regression-descr-plots" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.2.3" data-path="c-regression.html"><a href="c-regression.html#ss-regression-descr-assoc"><i class="fa fa-check"></i><b>8.2.3</b> Linear associations<a href="c-regression.html#ss-regression-descr-assoc" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.2.4" data-path="c-regression.html"><a href="c-regression.html#ss-regression-descr-corr"><i class="fa fa-check"></i><b>8.2.4</b> Measures of association: covariance and correlation<a href="c-regression.html#ss-regression-descr-corr" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="8.3" data-path="c-regression.html"><a href="c-regression.html#s-regression-simple"><i class="fa fa-check"></i><b>8.3</b> Simple linear regression models<a href="c-regression.html#s-regression-simple" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.3.1" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-intro"><i class="fa fa-check"></i><b>8.3.1</b> Introduction<a href="c-regression.html#ss-regression-simple-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.3.2" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-def"><i class="fa fa-check"></i><b>8.3.2</b> Definition of the model<a href="c-regression.html#ss-regression-simple-def" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.3.3" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-int"><i class="fa fa-check"></i><b>8.3.3</b> Interpretation of the model parameters<a href="c-regression.html#ss-regression-simple-int" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.3.4" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-est"><i class="fa fa-check"></i><b>8.3.4</b> Estimation of the parameters<a href="c-regression.html#ss-regression-simple-est" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.3.5" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-inf"><i class="fa fa-check"></i><b>8.3.5</b> Statistical inference for the regression coefficients<a href="c-regression.html#ss-regression-simple-inf" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="8.4" data-path="c-regression.html"><a href="c-regression.html#s-regression-causality"><i class="fa fa-check"></i><b>8.4</b> Interlude: Association and causality<a href="c-regression.html#s-regression-causality" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.5" data-path="c-regression.html"><a href="c-regression.html#s-regression-multiple"><i class="fa fa-check"></i><b>8.5</b> Multiple linear regression models<a href="c-regression.html#s-regression-multiple" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.5.1" data-path="c-regression.html"><a href="c-regression.html#ss-regression-multiple-intro"><i class="fa fa-check"></i><b>8.5.1</b> Introduction<a href="c-regression.html#ss-regression-multiple-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.5.2" data-path="c-regression.html"><a href="c-regression.html#ss-regression-multiple-def"><i class="fa fa-check"></i><b>8.5.2</b> Definition of the model<a href="c-regression.html#ss-regression-multiple-def" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.5.3" data-path="c-regression.html"><a href="c-regression.html#ss-regression-multiple-unchanged"><i class="fa fa-check"></i><b>8.5.3</b> Unchanged elements from simple linear models<a href="c-regression.html#ss-regression-multiple-unchanged" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.5.4" data-path="c-regression.html"><a href="c-regression.html#ss-regression-multiple-beta"><i class="fa fa-check"></i><b>8.5.4</b> Interpretation and inference for the regression coefficients<a href="c-regression.html#ss-regression-multiple-beta" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="8.6" data-path="c-regression.html"><a href="c-regression.html#s-regression-dummies"><i class="fa fa-check"></i><b>8.6</b> Including categorical explanatory variables<a href="c-regression.html#s-regression-dummies" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.6.1" data-path="c-regression.html"><a href="c-regression.html#ss-regression-dummies-def"><i class="fa fa-check"></i><b>8.6.1</b> Dummy variables<a href="c-regression.html#ss-regression-dummies-def" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.6.2" data-path="c-regression.html"><a href="c-regression.html#ss-regression-dummies-example"><i class="fa fa-check"></i><b>8.6.2</b> A second example<a href="c-regression.html#ss-regression-dummies-example" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="8.7" data-path="c-regression.html"><a href="c-regression.html#s-regression-rest"><i class="fa fa-check"></i><b>8.7</b> Other issues in linear regression modelling<a href="c-regression.html#s-regression-rest" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="9" data-path="c-3waytables.html"><a href="c-3waytables.html"><i class="fa fa-check"></i><b>9</b> Analysis of 3-way contingency tables<a href="c-3waytables.html#c-3waytables" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="10" data-path="c-more.html"><a href="c-more.html"><i class="fa fa-check"></i><b>10</b> More statistics…<a href="c-more.html#c-more" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="" data-path="c-more.html"><a href="c-more.html#statistical-tables"><i class="fa fa-check"></i>Statistical tables<a href="c-more.html#statistical-tables" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="" data-path="c-more.html"><a href="c-more.html#table-of-standard-normal-tail-probabilities"><i class="fa fa-check"></i>Table of standard normal tail probabilities<a href="c-more.html#table-of-standard-normal-tail-probabilities" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="" data-path="c-more.html"><a href="c-more.html#table-of-critical-values-for-t-distributions"><i class="fa fa-check"></i>Table of critical values for t-distributions<a href="c-more.html#table-of-critical-values-for-t-distributions" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="" data-path="c-more.html"><a href="c-more.html#table-of-critical-values-for-chi-square-distributions"><i class="fa fa-check"></i>Table of critical values for chi-square distributions<a href="c-more.html#table-of-critical-values-for-chi-square-distributions" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+</ul></li>
+<li class="divider"></li>
+<li><a href="https://github.com/rstudio/bookdown">Published with bookdown</a></li>
+
+</ul>
+
+      </nav>
+    </div>
+
+    <div class="book-body">
+      <div class="body-inner">
+        <div class="book-header" role="navigation">
+          <h1>
+            <i class="fa fa-circle-o-notch fa-spin"></i><a href="./">MY464 Introduction to Quantitative Analysis for Media and Communications</a>
+          </h1>
+        </div>
+
+        <div class="page-wrapper" tabindex="-1" role="main">
+          <div class="page-inner">
+
+            <section class="normal" id="section-">
+<div id="page-not-found" class="section level1">
+<h1>Page not found</h1>
+<p>The page you requested cannot be found (perhaps it was moved or renamed).</p>
+<p>You may want to try searching to find the page's new location, or use
+the table of contents to find the page you are looking for.</p>
+</div>
+            </section>
+
+          </div>
+        </div>
+      </div>
+
+
+    </div>
+  </div>
+<script src="libs/gitbook-2.6.7/js/app.min.js"></script>
+<script src="libs/gitbook-2.6.7/js/clipboard.min.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-search.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-sharing.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-fontsettings.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-bookdown.js"></script>
+<script src="libs/gitbook-2.6.7/js/jquery.highlight.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-clipboard.js"></script>
+<script>
+gitbook.require(["gitbook"], function(gitbook) {
+gitbook.start({
+"sharing": {
+"github": false,
+"facebook": true,
+"twitter": true,
+"linkedin": false,
+"weibo": false,
+"instapaper": false,
+"vk": false,
+"whatsapp": false,
+"all": ["facebook", "twitter", "linkedin", "weibo", "instapaper"]
+},
+"fontsettings": {
+"theme": "white",
+"family": "sans",
+"size": 2
+},
+"edit": {
+"link": "https://github.com/LSE-Methodology/MY464/edit/master/%s",
+"text": "Edit"
+},
+"history": {
+"link": null,
+"text": null
+},
+"view": {
+"link": null,
+"text": null
+},
+"download": ["Coursepack-MY464.pdf", "Coursepack-MY464.epub"],
+"search": {
+"engine": "fuse",
+"options": null
+},
+"toc": {
+"collapse": "section"
+}
+});
+});
+</script>
+
+<!-- dynamically load mathjax for compatibility with self-contained -->
+<script>
+  (function () {
+    var script = document.createElement("script");
+    script.type = "text/javascript";
+    var src = "true";
+    if (src === "" || src === "true") src = "https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.9/latest.js?config=TeX-MML-AM_CHTML";
+    if (location.protocol !== "file:")
+      if (/^https?:/.test(src))
+        src = src.replace(/^https?:/, '');
+    script.src = src;
+    document.getElementsByTagName("head")[0].appendChild(script);
+  })();
+</script>
+</body>
+
+</html>
diff --git a/Coursepack-MY451.epub b/Coursepack-MY451.epub
new file mode 100644
index 0000000..f789008
Binary files /dev/null and b/Coursepack-MY451.epub differ
diff --git a/Coursepack-MY451.pdf b/Coursepack-MY451.pdf
new file mode 100644
index 0000000..5fc8973
Binary files /dev/null and b/Coursepack-MY451.pdf differ
diff --git a/Coursepack-MY464.epub b/Coursepack-MY464.epub
new file mode 100644
index 0000000..a9768d4
Binary files /dev/null and b/Coursepack-MY464.epub differ
diff --git a/Coursepack-MY464.pdf b/Coursepack-MY464.pdf
new file mode 100644
index 0000000..4fd44e6
Binary files /dev/null and b/Coursepack-MY464.pdf differ
diff --git a/appendix.html b/appendix.html
new file mode 100644
index 0000000..610d7fe
--- /dev/null
+++ b/appendix.html
@@ -0,0 +1,2219 @@
+<!DOCTYPE html>
+<html >
+
+<head>
+
+  <meta charset="UTF-8">
+  <meta http-equiv="X-UA-Compatible" content="IE=edge">
+  <title>MY451 Introduction to Quantitative Analysis</title>
+  <meta name="description" content="This course is intended for those with little or no past training in quantitative methods. The course is an intensive introduction to some of the principles and methods of statistical analysis in social research. Topics covered in MY451 include descriptive statistics, basic ideas of inference and estimation, contingency tables and an introduction to linear regression models.">
+  <meta name="generator" content="bookdown 0.5 and GitBook 2.6.7">
+
+  <meta property="og:title" content="MY451 Introduction to Quantitative Analysis" />
+  <meta property="og:type" content="book" />
+  
+  
+  <meta property="og:description" content="This course is intended for those with little or no past training in quantitative methods. The course is an intensive introduction to some of the principles and methods of statistical analysis in social research. Topics covered in MY451 include descriptive statistics, basic ideas of inference and estimation, contingency tables and an introduction to linear regression models." />
+  <meta name="github-repo" content="LSE-Methodology/MY451" />
+
+  <meta name="twitter:card" content="summary" />
+  <meta name="twitter:title" content="MY451 Introduction to Quantitative Analysis" />
+  
+  <meta name="twitter:description" content="This course is intended for those with little or no past training in quantitative methods. The course is an intensive introduction to some of the principles and methods of statistical analysis in social research. Topics covered in MY451 include descriptive statistics, basic ideas of inference and estimation, contingency tables and an introduction to linear regression models." />
+  
+
+<meta name="author" content="Jouni Kuha">
+<meta name="author" content="Department of Methodology">
+<meta name="author" content="London School of Economics and Political Science">
+
+
+
+  <meta name="viewport" content="width=device-width, initial-scale=1">
+  <meta name="apple-mobile-web-app-capable" content="yes">
+  <meta name="apple-mobile-web-app-status-bar-style" content="black">
+  
+  
+<link rel="prev" href="c-more.html">
+
+<script src="libs/jquery-2.2.3/jquery.min.js"></script>
+<link href="libs/gitbook-2.6.7/css/style.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-bookdown.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-highlight.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-search.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-fontsettings.css" rel="stylesheet" />
+
+
+
+
+
+
+
+
+
+
+<link rel="stylesheet" href="style.css" type="text/css" />
+</head>
+
+<body>
+
+
+
+  <div class="book without-animation with-summary font-size-2 font-family-1" data-basepath=".">
+
+    <div class="book-summary">
+      <nav role="navigation">
+
+<ul class="summary">
+<li><a href="./">MY451 Introduction to Quantitative Analysis</a></li>
+
+<li class="divider"></li>
+<li class="chapter" data-level="" data-path="index.html"><a href="index.html"><i class="fa fa-check"></i>Course information</a></li>
+<li class="chapter" data-level="1" data-path="c-intro.html"><a href="c-intro.html"><i class="fa fa-check"></i><b>1</b> Introduction</a><ul>
+<li class="chapter" data-level="1.1" data-path="c-intro.html"><a href="c-intro.html#s-intro-purpose"><i class="fa fa-check"></i><b>1.1</b> What is the purpose of this course?</a></li>
+<li class="chapter" data-level="1.2" data-path="c-intro.html"><a href="c-intro.html#s-intro-definitions"><i class="fa fa-check"></i><b>1.2</b> Some basic definitions</a><ul>
+<li class="chapter" data-level="1.2.1" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-subj"><i class="fa fa-check"></i><b>1.2.1</b> Subjects and variables</a></li>
+<li class="chapter" data-level="1.2.2" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-vartypes"><i class="fa fa-check"></i><b>1.2.2</b> Types of variables</a></li>
+<li class="chapter" data-level="1.2.3" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-descr"><i class="fa fa-check"></i><b>1.2.3</b> Description and inference</a></li>
+<li class="chapter" data-level="1.2.4" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-assoc"><i class="fa fa-check"></i><b>1.2.4</b> Association and causation</a></li>
+</ul></li>
+<li class="chapter" data-level="1.3" data-path="c-intro.html"><a href="c-intro.html#s-intro-outline"><i class="fa fa-check"></i><b>1.3</b> Outline of the course</a></li>
+<li class="chapter" data-level="1.4" data-path="c-intro.html"><a href="c-intro.html#s-intro-maths"><i class="fa fa-check"></i><b>1.4</b> The use of mathematics and computing</a><ul>
+<li class="chapter" data-level="1.4.1" data-path="c-intro.html"><a href="c-intro.html#symbolic-mathematics-and-mathematical-notation"><i class="fa fa-check"></i><b>1.4.1</b> Symbolic mathematics and mathematical notation</a></li>
+<li class="chapter" data-level="1.4.2" data-path="c-intro.html"><a href="c-intro.html#computing-1"><i class="fa fa-check"></i><b>1.4.2</b> Computing</a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="2" data-path="c-descr1.html"><a href="c-descr1.html"><i class="fa fa-check"></i><b>2</b> Descriptive statistics</a><ul>
+<li class="chapter" data-level="2.1" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-intro"><i class="fa fa-check"></i><b>2.1</b> Introduction</a></li>
+<li class="chapter" data-level="2.2" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-examples"><i class="fa fa-check"></i><b>2.2</b> Example data sets</a></li>
+<li class="chapter" data-level="2.3" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-1cat"><i class="fa fa-check"></i><b>2.3</b> Single categorical variable</a><ul>
+<li class="chapter" data-level="2.3.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-distr"><i class="fa fa-check"></i><b>2.3.1</b> Describing the sample distribution</a></li>
+<li class="chapter" data-level="2.3.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-tables"><i class="fa fa-check"></i><b>2.3.2</b> Tabular methods: Tables of frequencies</a></li>
+<li class="chapter" data-level="2.3.3" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-charts"><i class="fa fa-check"></i><b>2.3.3</b> Graphical methods: Bar charts</a></li>
+<li class="chapter" data-level="2.3.4" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-descriptives"><i class="fa fa-check"></i><b>2.3.4</b> Simple descriptive statistics</a></li>
+</ul></li>
+<li class="chapter" data-level="2.4" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-2cat"><i class="fa fa-check"></i><b>2.4</b> Two categorical variables</a><ul>
+<li class="chapter" data-level="2.4.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-tables"><i class="fa fa-check"></i><b>2.4.1</b> Two-way contingency tables</a></li>
+<li class="chapter" data-level="2.4.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-cond"><i class="fa fa-check"></i><b>2.4.2</b> Conditional proportions</a></li>
+<li class="chapter" data-level="2.4.3" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-assoc"><i class="fa fa-check"></i><b>2.4.3</b> Conditional distributions and associations</a></li>
+<li class="chapter" data-level="2.4.4" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-descr"><i class="fa fa-check"></i><b>2.4.4</b> Describing an association using conditional proportions</a></li>
+<li class="chapter" data-level="2.4.5" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-gamma"><i class="fa fa-check"></i><b>2.4.5</b> A measure of association for ordinal variables</a></li>
+</ul></li>
+<li class="chapter" data-level="2.5" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-1cont"><i class="fa fa-check"></i><b>2.5</b> Sample distributions of a single continuous variable</a><ul>
+<li class="chapter" data-level="2.5.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cont-tab"><i class="fa fa-check"></i><b>2.5.1</b> Tabular methods</a></li>
+<li class="chapter" data-level="2.5.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cont-graphs"><i class="fa fa-check"></i><b>2.5.2</b> Graphical methods</a></li>
+</ul></li>
+<li class="chapter" data-level="2.6" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-nums"><i class="fa fa-check"></i><b>2.6</b> Numerical descriptive statistics</a><ul>
+<li class="chapter" data-level="2.6.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-nums-central"><i class="fa fa-check"></i><b>2.6.1</b> Measures of central tendency</a></li>
+<li class="chapter" data-level="2.6.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-nums-variation"><i class="fa fa-check"></i><b>2.6.2</b> Measures of variation</a></li>
+</ul></li>
+<li class="chapter" data-level="2.7" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-2cont"><i class="fa fa-check"></i><b>2.7</b> Associations which involve continuous variables</a></li>
+<li class="chapter" data-level="2.8" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-presentation"><i class="fa fa-check"></i><b>2.8</b> Presentation of tables and graphs</a></li>
+<li class="chapter" data-level="2.9" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-app"><i class="fa fa-check"></i><b>2.9</b> Appendix: Country data</a></li>
+</ul></li>
+<li class="chapter" data-level="3" data-path="c-samples.html"><a href="c-samples.html"><i class="fa fa-check"></i><b>3</b> Samples and populations</a><ul>
+<li class="chapter" data-level="3.1" data-path="c-samples.html"><a href="c-samples.html#s-samples-intro"><i class="fa fa-check"></i><b>3.1</b> Introduction</a></li>
+<li class="chapter" data-level="3.2" data-path="c-samples.html"><a href="c-samples.html#s-samples-finpops"><i class="fa fa-check"></i><b>3.2</b> Finite populations</a></li>
+<li class="chapter" data-level="3.3" data-path="c-samples.html"><a href="c-samples.html#s-samples-samples"><i class="fa fa-check"></i><b>3.3</b> Samples from finite populations</a></li>
+<li class="chapter" data-level="3.4" data-path="c-samples.html"><a href="c-samples.html#s-samples-infpops"><i class="fa fa-check"></i><b>3.4</b> Conceptual and infinite populations</a></li>
+<li class="chapter" data-level="3.5" data-path="c-samples.html"><a href="c-samples.html#s-samples-popdistrs"><i class="fa fa-check"></i><b>3.5</b> Population distributions</a></li>
+<li class="chapter" data-level="3.6" data-path="c-samples.html"><a href="c-samples.html#s-samples-inference"><i class="fa fa-check"></i><b>3.6</b> Need for statistical inference</a></li>
+</ul></li>
+<li class="chapter" data-level="4" data-path="c-tables.html"><a href="c-tables.html"><i class="fa fa-check"></i><b>4</b> Statistical inference for two-way tables</a><ul>
+<li class="chapter" data-level="4.1" data-path="c-tables.html"><a href="c-tables.html#s-tables-intro"><i class="fa fa-check"></i><b>4.1</b> Introduction</a></li>
+<li class="chapter" data-level="4.2" data-path="c-tables.html"><a href="c-tables.html#s-tables-tests"><i class="fa fa-check"></i><b>4.2</b> Significance tests</a></li>
+<li class="chapter" data-level="4.3" data-path="c-tables.html"><a href="c-tables.html#s-tables-chi2test"><i class="fa fa-check"></i><b>4.3</b> The chi-square test of independence</a><ul>
+<li class="chapter" data-level="4.3.1" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-null"><i class="fa fa-check"></i><b>4.3.1</b> Hypotheses</a></li>
+<li class="chapter" data-level="4.3.2" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-ass"><i class="fa fa-check"></i><b>4.3.2</b> Assumptions of a significance test</a></li>
+<li class="chapter" data-level="4.3.3" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-stat"><i class="fa fa-check"></i><b>4.3.3</b> The test statistic</a></li>
+<li class="chapter" data-level="4.3.4" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-sdist"><i class="fa fa-check"></i><b>4.3.4</b> The sampling distribution of the test statistic</a></li>
+<li class="chapter" data-level="4.3.5" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-Pval"><i class="fa fa-check"></i><b>4.3.5</b> The P-value</a></li>
+<li class="chapter" data-level="4.3.6" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-conclusions"><i class="fa fa-check"></i><b>4.3.6</b> Drawing conclusions from a test</a></li>
+</ul></li>
+<li class="chapter" data-level="4.4" data-path="c-tables.html"><a href="c-tables.html#s-tables-summary"><i class="fa fa-check"></i><b>4.4</b> Summary of the chi-square test of independence</a></li>
+</ul></li>
+<li class="chapter" data-level="5" data-path="c-probs.html"><a href="c-probs.html"><i class="fa fa-check"></i><b>5</b> Inference for population proportions</a><ul>
+<li class="chapter" data-level="5.1" data-path="c-probs.html"><a href="c-probs.html#s-probs-intro"><i class="fa fa-check"></i><b>5.1</b> Introduction</a></li>
+<li class="chapter" data-level="5.2" data-path="c-probs.html"><a href="c-probs.html#s-probs-examples"><i class="fa fa-check"></i><b>5.2</b> Examples</a></li>
+<li class="chapter" data-level="5.3" data-path="c-probs.html"><a href="c-probs.html#s-probs-distribution"><i class="fa fa-check"></i><b>5.3</b> Probability distribution of a dichotomous variable</a></li>
+<li class="chapter" data-level="5.4" data-path="c-probs.html"><a href="c-probs.html#s-probs-pointest"><i class="fa fa-check"></i><b>5.4</b> Point estimation of a population probability</a></li>
+<li class="chapter" data-level="5.5" data-path="c-probs.html"><a href="c-probs.html#s-probs-test1sample"><i class="fa fa-check"></i><b>5.5</b> Significance test of a single proportion</a><ul>
+<li class="chapter" data-level="5.5.1" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-hypotheses"><i class="fa fa-check"></i><b>5.5.1</b> Null and alternative hypotheses</a></li>
+<li class="chapter" data-level="5.5.2" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-teststatistic"><i class="fa fa-check"></i><b>5.5.2</b> The test statistic</a></li>
+<li class="chapter" data-level="5.5.3" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-samplingd"><i class="fa fa-check"></i><b>5.5.3</b> The sampling distribution of the test statistic and P-values</a></li>
+<li class="chapter" data-level="5.5.4" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-conclusions"><i class="fa fa-check"></i><b>5.5.4</b> Conclusions from the test</a></li>
+<li class="chapter" data-level="5.5.5" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-summary"><i class="fa fa-check"></i><b>5.5.5</b> Summary of the test</a></li>
+</ul></li>
+<li class="chapter" data-level="5.6" data-path="c-probs.html"><a href="c-probs.html#s-probs-1sampleci"><i class="fa fa-check"></i><b>5.6</b> Confidence interval for a single proportion</a><ul>
+<li class="chapter" data-level="5.6.1" data-path="c-probs.html"><a href="c-probs.html#s-probs-1sampleci-intro"><i class="fa fa-check"></i><b>5.6.1</b> Introduction</a></li>
+<li class="chapter" data-level="5.6.2" data-path="c-probs.html"><a href="c-probs.html#s-probs-1sampleci-calc"><i class="fa fa-check"></i><b>5.6.2</b> Calculation of the interval</a></li>
+<li class="chapter" data-level="5.6.3" data-path="c-probs.html"><a href="c-probs.html#s-probs-1sampleci-int"><i class="fa fa-check"></i><b>5.6.3</b> Interpretation of confidence intervals</a></li>
+<li class="chapter" data-level="5.6.4" data-path="c-probs.html"><a href="c-probs.html#ss-means-ci-vstests"><i class="fa fa-check"></i><b>5.6.4</b> Confidence intervals vs. significance tests</a></li>
+</ul></li>
+<li class="chapter" data-level="5.7" data-path="c-probs.html"><a href="c-probs.html#s-probs-2samples"><i class="fa fa-check"></i><b>5.7</b> Inference for comparing two proportions</a></li>
+</ul></li>
+<li class="chapter" data-level="6" data-path="c-contd.html"><a href="c-contd.html"><i class="fa fa-check"></i><b>6</b> Continuous variables: Population and sampling distributions</a><ul>
+<li class="chapter" data-level="6.1" data-path="c-contd.html"><a href="c-contd.html#s-contd-intro"><i class="fa fa-check"></i><b>6.1</b> Introduction</a></li>
+<li class="chapter" data-level="6.2" data-path="c-contd.html"><a href="c-contd.html#s-contd-popdistrs"><i class="fa fa-check"></i><b>6.2</b> Population distributions of continuous variables</a><ul>
+<li class="chapter" data-level="6.2.1" data-path="c-contd.html"><a href="c-contd.html#ss-contd-popdistrs-params"><i class="fa fa-check"></i><b>6.2.1</b> Population parameters and their point estimates</a></li>
+</ul></li>
+<li class="chapter" data-level="6.3" data-path="c-contd.html"><a href="c-contd.html#s-contd-probdistrs"><i class="fa fa-check"></i><b>6.3</b> Probability distributions of continuous variables</a><ul>
+<li class="chapter" data-level="6.3.1" data-path="c-contd.html"><a href="c-contd.html#ss-contd-probdistrs-general"><i class="fa fa-check"></i><b>6.3.1</b> General comments</a></li>
+<li class="chapter" data-level="6.3.2" data-path="c-contd.html"><a href="c-contd.html#ss-contd-probdistrs-normal"><i class="fa fa-check"></i><b>6.3.2</b> The normal distribution as a population distribution</a></li>
+</ul></li>
+<li class="chapter" data-level="6.4" data-path="c-contd.html"><a href="c-contd.html#s-contd-clt"><i class="fa fa-check"></i><b>6.4</b> The normal distribution as a sampling distribution</a></li>
+</ul></li>
+<li class="chapter" data-level="7" data-path="c-means.html"><a href="c-means.html"><i class="fa fa-check"></i><b>7</b> Analysis of population means</a><ul>
+<li class="chapter" data-level="7.1" data-path="c-means.html"><a href="c-means.html#s-means-intro"><i class="fa fa-check"></i><b>7.1</b> Introduction and examples</a></li>
+<li class="chapter" data-level="7.2" data-path="c-means.html"><a href="c-means.html#s-means-descr"><i class="fa fa-check"></i><b>7.2</b> Descriptive statistics for comparisons of groups</a><ul>
+<li class="chapter" data-level="7.2.1" data-path="c-means.html"><a href="c-means.html#ss-means-descr-graphs"><i class="fa fa-check"></i><b>7.2.1</b> Graphical methods of comparing sample distributions</a></li>
+<li class="chapter" data-level="7.2.2" data-path="c-means.html"><a href="c-means.html#ss-means-descr-tables"><i class="fa fa-check"></i><b>7.2.2</b> Comparing summary statistics</a></li>
+</ul></li>
+<li class="chapter" data-level="7.3" data-path="c-means.html"><a href="c-means.html#s-means-inference"><i class="fa fa-check"></i><b>7.3</b> Inference for two means from independent samples</a><ul>
+<li class="chapter" data-level="7.3.1" data-path="c-means.html"><a href="c-means.html#ss-means-inference-intro"><i class="fa fa-check"></i><b>7.3.1</b> Aims of the analysis</a></li>
+<li class="chapter" data-level="7.3.2" data-path="c-means.html"><a href="c-means.html#ss-means-inference-test"><i class="fa fa-check"></i><b>7.3.2</b> Significance testing: The two-sample t-test</a></li>
+<li class="chapter" data-level="7.3.3" data-path="c-means.html"><a href="c-means.html#ss-means-inference-ci"><i class="fa fa-check"></i><b>7.3.3</b> Confidence intervals for a difference of two means</a></li>
+<li class="chapter" data-level="7.3.4" data-path="c-means.html"><a href="c-means.html#ss-means-inference-variants"><i class="fa fa-check"></i><b>7.3.4</b> Variants of the test and confidence interval</a></li>
+</ul></li>
+<li class="chapter" data-level="7.4" data-path="c-means.html"><a href="c-means.html#s-means-1sample"><i class="fa fa-check"></i><b>7.4</b> Tests and confidence intervals for a single mean</a></li>
+<li class="chapter" data-level="7.5" data-path="c-means.html"><a href="c-means.html#s-means-dependent"><i class="fa fa-check"></i><b>7.5</b> Inference for dependent samples</a></li>
+<li class="chapter" data-level="7.6" data-path="c-means.html"><a href="c-means.html#s-means-tests3"><i class="fa fa-check"></i><b>7.6</b> Further comments on significance tests</a><ul>
+<li class="chapter" data-level="7.6.1" data-path="c-means.html"><a href="c-means.html#ss-means-tests3-errors"><i class="fa fa-check"></i><b>7.6.1</b> Different types of error</a></li>
+<li class="chapter" data-level="7.6.2" data-path="c-means.html"><a href="c-means.html#ss-means-tests3-power"><i class="fa fa-check"></i><b>7.6.2</b> Power of significance tests</a></li>
+<li class="chapter" data-level="7.6.3" data-path="c-means.html"><a href="c-means.html#ss-means-tests3-importance"><i class="fa fa-check"></i><b>7.6.3</b> Significance vs. importance</a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="8" data-path="c-regression.html"><a href="c-regression.html"><i class="fa fa-check"></i><b>8</b> Linear regression models</a><ul>
+<li class="chapter" data-level="8.1" data-path="c-regression.html"><a href="c-regression.html#s-regression-intro"><i class="fa fa-check"></i><b>8.1</b> Introduction</a></li>
+<li class="chapter" data-level="8.2" data-path="c-regression.html"><a href="c-regression.html#s-regression-descr"><i class="fa fa-check"></i><b>8.2</b> Describing association between two continuous variables</a><ul>
+<li class="chapter" data-level="8.2.1" data-path="c-regression.html"><a href="c-regression.html#ss-regression-descr-intro"><i class="fa fa-check"></i><b>8.2.1</b> Introduction</a></li>
+<li class="chapter" data-level="8.2.2" data-path="c-regression.html"><a href="c-regression.html#ss-regression-descr-plots"><i class="fa fa-check"></i><b>8.2.2</b> Graphical methods</a></li>
+<li class="chapter" data-level="8.2.3" data-path="c-regression.html"><a href="c-regression.html#ss-regression-descr-assoc"><i class="fa fa-check"></i><b>8.2.3</b> Linear associations</a></li>
+<li class="chapter" data-level="8.2.4" data-path="c-regression.html"><a href="c-regression.html#ss-regression-descr-corr"><i class="fa fa-check"></i><b>8.2.4</b> Measures of association: covariance and correlation</a></li>
+</ul></li>
+<li class="chapter" data-level="8.3" data-path="c-regression.html"><a href="c-regression.html#s-regression-simple"><i class="fa fa-check"></i><b>8.3</b> Simple linear regression models</a><ul>
+<li class="chapter" data-level="8.3.1" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-intro"><i class="fa fa-check"></i><b>8.3.1</b> Introduction</a></li>
+<li class="chapter" data-level="8.3.2" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-def"><i class="fa fa-check"></i><b>8.3.2</b> Definition of the model</a></li>
+<li class="chapter" data-level="8.3.3" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-int"><i class="fa fa-check"></i><b>8.3.3</b> Interpretation of the model parameters</a></li>
+<li class="chapter" data-level="8.3.4" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-est"><i class="fa fa-check"></i><b>8.3.4</b> Estimation of the parameters</a></li>
+<li class="chapter" data-level="8.3.5" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-inf"><i class="fa fa-check"></i><b>8.3.5</b> Statistical inference for the regression coefficients</a></li>
+</ul></li>
+<li class="chapter" data-level="8.4" data-path="c-regression.html"><a href="c-regression.html#s-regression-causality"><i class="fa fa-check"></i><b>8.4</b> Interlude: Association and causality</a></li>
+<li class="chapter" data-level="8.5" data-path="c-regression.html"><a href="c-regression.html#s-regression-multiple"><i class="fa fa-check"></i><b>8.5</b> Multiple linear regression models</a><ul>
+<li class="chapter" data-level="8.5.1" data-path="c-regression.html"><a href="c-regression.html#ss-regression-multiple-intro"><i class="fa fa-check"></i><b>8.5.1</b> Introduction</a></li>
+<li class="chapter" data-level="8.5.2" data-path="c-regression.html"><a href="c-regression.html#ss-regression-multiple-def"><i class="fa fa-check"></i><b>8.5.2</b> Definition of the model</a></li>
+<li class="chapter" data-level="8.5.3" data-path="c-regression.html"><a href="c-regression.html#ss-regression-multiple-unchanged"><i class="fa fa-check"></i><b>8.5.3</b> Unchanged elements from simple linear models</a></li>
+<li class="chapter" data-level="8.5.4" data-path="c-regression.html"><a href="c-regression.html#ss-regression-multiple-beta"><i class="fa fa-check"></i><b>8.5.4</b> Interpretation and inference for the regression coefficients</a></li>
+</ul></li>
+<li class="chapter" data-level="8.6" data-path="c-regression.html"><a href="c-regression.html#s-regression-dummies"><i class="fa fa-check"></i><b>8.6</b> Including categorical explanatory variables</a><ul>
+<li class="chapter" data-level="8.6.1" data-path="c-regression.html"><a href="c-regression.html#ss-regression-dummies-def"><i class="fa fa-check"></i><b>8.6.1</b> Dummy variables</a></li>
+<li class="chapter" data-level="8.6.2" data-path="c-regression.html"><a href="c-regression.html#ss-regression-dummies-example"><i class="fa fa-check"></i><b>8.6.2</b> A second example</a></li>
+</ul></li>
+<li class="chapter" data-level="8.7" data-path="c-regression.html"><a href="c-regression.html#s-regression-rest"><i class="fa fa-check"></i><b>8.7</b> Other issues in linear regression modelling</a></li>
+</ul></li>
+<li class="chapter" data-level="9" data-path="c-3waytables.html"><a href="c-3waytables.html"><i class="fa fa-check"></i><b>9</b> Analysis of 3-way contingency tables</a></li>
+<li class="chapter" data-level="10" data-path="c-more.html"><a href="c-more.html"><i class="fa fa-check"></i><b>10</b> More statistics…</a></li>
+<li class="chapter" data-level="" data-path="appendix.html"><a href="appendix.html"><i class="fa fa-check"></i>Appendix</a><ul>
+<li class="chapter" data-level="" data-path="appendix.html"><a href="appendix.html#computer-classes"><i class="fa fa-check"></i>Computer classes</a><ul>
+<li class="chapter" data-level="" data-path="appendix.html"><a href="appendix.html#general-instructions"><i class="fa fa-check"></i>General instructions</a></li>
+<li class="chapter" data-level="" data-path="appendix.html"><a href="appendix.html#introduction-to-spss"><i class="fa fa-check"></i>Introduction to SPSS</a></li>
+<li class="chapter" data-level="" data-path="appendix.html"><a href="appendix.html#week-2-class-descriptive-statistics-for-categorical-data-and-entering-data"><i class="fa fa-check"></i>WEEK 2 class: Descriptive statistics for categorical data, and entering data</a></li>
+<li class="chapter" data-level="" data-path="appendix.html"><a href="appendix.html#week-3-class"><i class="fa fa-check"></i>WEEK 3 class</a></li>
+<li class="chapter" data-level="" data-path="appendix.html"><a href="appendix.html#week-4-class-two-way-contingency-tables"><i class="fa fa-check"></i>WEEK 4 class: Two-way contingency tables</a></li>
+<li class="chapter" data-level="" data-path="appendix.html"><a href="appendix.html#week-5-class-inference-for-two-population-means"><i class="fa fa-check"></i>WEEK 5 class: Inference for two population means</a></li>
+<li class="chapter" data-level="" data-path="appendix.html"><a href="appendix.html#week-7-class-inference-for-population-proportions"><i class="fa fa-check"></i>WEEK 7 class: Inference for population proportions</a></li>
+<li class="chapter" data-level="" data-path="appendix.html"><a href="appendix.html#week-7-class-correlation-and-simple-linear-regression-1"><i class="fa fa-check"></i>WEEK 7 class: Correlation and simple linear regression 1</a></li>
+<li class="chapter" data-level="" data-path="appendix.html"><a href="appendix.html#week-8-class-simple-linear-regression-and-3-way-tables"><i class="fa fa-check"></i>WEEK 8 class: Simple linear regression and 3-way tables</a></li>
+<li class="chapter" data-level="" data-path="appendix.html"><a href="appendix.html#week-9-class-multiple-linear-regression"><i class="fa fa-check"></i>WEEK 9 class: Multiple linear regression</a></li>
+<li class="chapter" data-level="" data-path="appendix.html"><a href="appendix.html#week-10-class-review-and-multiple-linear-regression"><i class="fa fa-check"></i>WEEK 10 class: Review and Multiple linear regression</a></li>
+</ul></li>
+<li class="chapter" data-level="" data-path="appendix.html"><a href="appendix.html#statistical-tables"><i class="fa fa-check"></i>Statistical tables</a><ul>
+<li class="chapter" data-level="" data-path="appendix.html"><a href="appendix.html#table-of-standard-normal-tail-probabilities"><i class="fa fa-check"></i>Table of standard normal tail probabilities</a></li>
+<li class="chapter" data-level="" data-path="appendix.html"><a href="appendix.html#table-of-critical-values-for-t-distributions"><i class="fa fa-check"></i>Table of critical values for t-distributions</a></li>
+<li class="chapter" data-level="" data-path="appendix.html"><a href="appendix.html#table-of-critical-values-for-chi-square-distributions"><i class="fa fa-check"></i>Table of critical values for chi-square distributions</a></li>
+</ul></li>
+</ul></li>
+<li class="divider"></li>
+<li><a href="https://github.com/rstudio/bookdown">Published with bookdown</a></li>
+
+</ul>
+
+      </nav>
+    </div>
+
+    <div class="book-body">
+      <div class="body-inner">
+        <div class="book-header" role="navigation">
+          <h1>
+            <i class="fa fa-circle-o-notch fa-spin"></i><a href="./">MY451 Introduction to Quantitative Analysis</a>
+          </h1>
+        </div>
+
+        <div class="page-wrapper" tabindex="-1" role="main">
+          <div class="page-inner">
+
+            <section class="normal" id="section-">
+<div id="appendix" class="section level1 unnumbered">
+<h1>Appendix</h1>
+<div id="computer-classes" class="section level2 unnumbered">
+<h2>Computer classes</h2>
+
+<p>Some general instructions on computing and the SPSS package are given first below. It makes most sense to read these together with the instructions for individual computer classes.</p>
+<div id="general-instructions" class="section level3 unnumbered">
+<h3>General instructions</h3>
+<div id="using-the-networked-computers-at-lse" class="section level4 unnumbered">
+<h4>Using the networked computers at LSE</h4>
+<ul>
+<li><p>To access IT facilities at LSE you need an IT account with its <strong>Username</strong> and <strong>Password</strong>. Please see <a href="http://www.lse.ac.uk/intranet/LSEServices/IMT/guides/accounts/activateAccount.aspx" class="uri">http://www.lse.ac.uk/intranet/LSEServices/IMT/guides/accounts/activateAccount.aspx</a> for instructions on how to activate your account. In case of any problems, please ask for assistance at the IT help desk (Library 1st floor).</p></li>
+<li><p>Various introductory <strong>documents</strong> can be accessed through the IMT services web pages at <a href="http://www.lse.ac.uk/intranet/LSEServices/IMT/home.aspx" class="uri">http://www.lse.ac.uk/intranet/LSEServices/IMT/home.aspx</a>.</p></li>
+<li><p><strong>Logging in</strong> to use Windows: When you arrive at a networked computer, wait for Windows to start up (if the machine is not already on). Type in <strong>CTRL + ALT + Delete</strong> and the <strong>Enter Network Password</strong> screen will appear. Type in your username and your password and press <strong>Enter</strong> or click on the <strong>OK</strong> button. This will log you on to the computer.</p></li>
+</ul>
+</div>
+<div id="data-downloading" class="section level4 unnumbered">
+<h4>Data downloading</h4>
+<p>The instructions for each class will give the name of a file or files which will be used for that exercise. In order to do the class, you will need to download the file to your H: space (i.e. your personal file storage space on the LSE network, shown as disk drive H: on a networked computer once you have logged on). You can download all the data files for the course, as well as other course-related material, from the web-based <strong>Moodle</strong> system. See instructions in the beginning of this book for how to register for MY451 on Moodle.</p>
+</div>
+</div>
+<div id="introduction-to-spss" class="section level3 unnumbered">
+<h3>Introduction to SPSS</h3>
+<div id="general-information-and-documentation" class="section level4 unnumbered">
+<h4>General information and documentation</h4>
+<p>SPSS (formerly Statistical Package for the Social Sciences) is a widely used general-purpose statistical software package. It will be used for all the computer classes on this course. The current version on the LSE network is SPSS 21. This section gives some general information on the structure and use of SPSS. The discussion is brief and not meant to be comprehensive. The instructions given here and in the descriptions of individual computer classes below will be sufficient for the purposes of this course. If, however, you wish to find out more about SPSS, more information and examples can be found in the SPSS help files and tutorials found under the <strong>Help</strong> menu of the program, and in introductory guide books such as</p>
+<p>Field, A. (2013). <em>Discovering Statistics using IBM SPSS Statistics</em> (4th ed). Sage. Kinnear, P. R. and Gray, C. D. (2012). <em>SPSS 19 Made Simple</em>. Psychology Press. Pallant, J. (2013). <em>SPSS Survival Manual</em> (5th ed). Open University Press.</p>
+<p>These are given here purely as examples (there are many others) and not as recommendations. We have not reviewed any of these books in detail and so cannot make any comparisons between them.</p>
+</div>
+<div id="starting-spss" class="section level4 unnumbered">
+<h4>Starting SPSS</h4>
+<p>To start SPSS, double-click on the SPSS icon on the Windows desktop. Alternatively, click on the <strong>Start</strong> button at the bottom left corner, and select <strong>All Programs</strong>, then <strong>Specialist and teaching software</strong>, <strong>Statistics</strong>, <strong>SPSS</strong>, and finally <strong>SPSS 21</strong> (or some obvious variant of these, in case the exact wording on your desktop is slightly different).</p>
+<p>An initial screen for opening data files appears. Click on <strong>Cancel</strong> to get rid of this and to enter the data editor (which will be discussed further below).</p>
+</div>
+<div id="exiting-from-spss" class="section level4 unnumbered">
+<h4>Exiting from SPSS</h4>
+<p>Select <strong>Exit</strong> from the <strong>File</strong> menu or click on the X at the upper right corner of the SPSS data editor window. You may then be prompted to save the information in the open windows; in particular, you should save the contents of the data editor in a file (see below) if you have made any changes to it.</p>
+</div>
+<div id="spss-windows" class="section level4 unnumbered">
+<h4>SPSS windows</h4>
+<p>There are several different types of windows in SPSS. The two most important are</p>
+<ul>
+<li><p><strong>Data editor</strong>: A data set is displayed in the Data Editor window. Several of these can be open at a time. The data editor which you have selected (clicked on) most recently defines the active data set, and the procedures you request from the menus are applied to this data set until you select a different active data set. The data editor window has two parts, accessed by clicking on the two tabs at the bottom of the window:</p>
+<ul>
+<li><p><strong>Data view</strong>, which shows the data matrix in the spreadsheet-like form discussed in Section <a href="c-intro.html#ss-intro-def-subj">1.2.1</a>, with units in the rows and variables in the columns.</p></li>
+<li><p><strong>Variable view</strong>, which shows information about the variables.</p></li>
+</ul>
+<p>Working with the data editor will be practised in the first computer class. The contents of the data editor, i.e.the data matrix and associated information, can be saved in an SPSS data file. Such files have names with the extension <strong>.sav</strong>.</p></li>
+<li><p><strong>Output viewer</strong>: Output from statistical analyses carried out on the data will appear here. The output can be printed directly from the viewer or copied and pasted to other programs. The contents of the viewer can also be saved in a file, with a name with the extension <strong>.spv</strong> (since version 17; in previous versions of SPSS the extension was <strong>.spo</strong>).</p></li>
+</ul>
+<p>There are also other windows, for example for editing SPSS graphs. They will be discussed in the instructions to individual computer classes where necessary.</p>
+</div>
+<div id="menus" class="section level4 unnumbered">
+<h4>Menus</h4>
+<p>SPSS has a menu-based interface, which can be used to access most of its features for statistical analysis, manipulation of data, loading, saving and printing files, and so on.</p>
+<ul>
+<li><p>The procedures for statistical analysis are found under the <strong>Analyze</strong> menu, which provides further drop-down menus for choosing different methods.</p>
+<ul>
+<li>Similarly, procedures for various statistical graphics are found under <strong>Graphs</strong>. We will be using procedures found under <strong>Graphs / Legacy Dialogs</strong>. Here “legacy” means that these are the graphics menus which were included also in previous versions of SPSS. The current version also contains a second, new set of menus for the same graphs, under <strong>Graphs / Chart Builder</strong>. We do not regard these as an improvement in usability, so we will continue to use the old menus. You are welcome to explore the cababilities of the “Chart Builder” on your own.</li>
+</ul></li>
+<li><p>Eventually the menu choices lead to a <strong>dialog box</strong> with various boxes and buttons for specifying details of the required analysis. Most dialog boxes contain buttons which open new dialog boxes for further options. The details of these choices for the methods covered on this course are described in the instructions to individual computer classes.</p></li>
+<li><p>Almost all of the dialog boxes have options which are not needed for our classes and never mentioned in the instructions. Some of these simply modify the output, others request variants of the statistical methods which will not be used in these classes. All such options have default values which can be left untouched here. You are, however, welcome to experiment with these additional choices to see what they do. Further information on them can be accessed through the <strong>Help</strong> button in each dialog box.</p></li>
+</ul>
+</div>
+<div id="notational-conventions-for-the-instructions" class="section level4 unnumbered">
+<h4>Notational conventions for the instructions</h4>
+<p>Because analyses in SPSS are carried out by making choices from the menus, the instructions for the computer classes need to describe these choices somehow. To reduce the length and tedium of the instructions, we will throughout present them in a particular format explained below. Because this information is rather abstract if read in isolation, it is best to go through it while carrying out specific instructions for the first few computer classes.</p>
+<ul>
+<li><p>The appropriate menu choices for obtaining the dialog box for the required analysis are first given in bold, for example as follows:</p>
+<p><strong>Analyze/Descriptive statistics/Frequencies</strong></p>
+<p>This is short for “Click on the menu item <strong>Analyze</strong> at the top of the window; from the drop-down menu, select <strong>Descriptive statistics</strong> and then click on <strong>Frequencies</strong>.” This particular choice opens a dialog box for constructing various descriptive statistics and graphs (as discussed in Chapter <a href="c-descr1.html#c-descr1">2</a>).</p>
+<p>Unless otherwise mentioned, subsequent instructions then refer to choices in the most recently opened dialog box, without repeating the full path to it.</p></li>
+<li><p>For all of the statistical analyses, we need first to specify which variables the analyses should be applied to. This is done by entering the names of those variables in appropriate boxes in the dialog boxes. For example, the dialog box opened above has a box labelled <strong>Variable(s)</strong> for this purpose. The dialog box also includes a separate box containing a list of all the variables in the data set. The required variables are selected from this list and moved to the choice boxes (and back again, when choices are changed) by clicking on an arrow button between the boxes. For example, suppose that a data set contains a grouped age variable called <em>AGEGROUP</em>, for which we want to construct a frequency table. The class instructions may then state in words “Place <em>AGEGROUP</em> in the <strong>Variable(s)</strong> box”, or sometimes just</p>
+<p><strong>Variable(s)/</strong><em>AGEGROUP</em></p>
+<p>both of which are short for “In the dialog box opened above, click on the name <em>AGEGROUP</em> in the list of variables, and then click on the arrow button to move the name into the <strong>Variable(s)</strong> box”. Sometimes we may also use a generic instruction of the form</p>
+<p><strong>Variable(s)/</strong><em><span class="math inline">\(&lt;\)</span>Variables<span class="math inline">\(&gt;\)</span></em></p>
+<p>where <em><span class="math inline">\(&lt;\)</span>Variables<span class="math inline">\(&gt;\)</span></em> indicates that this is where we would put the name of any variables for which we want to obtain a frequency table. Note that here and in many other procedures, it is possible to select several variables at once. For the Frequencies procedure used as an example here, this simply means that a separate frequency table is constructed for each selected variable.</p></li>
+<li><p>Other choices in a dialog box determine details of the analysis and its output. In most cases the selection is made from a fixed list of possibilities provided by SPSS, by clicking on the appropriate box or button. In the instructions, the choice is indicated by listing a path to it, for example as</p>
+<p><strong>Charts/Chart Type/Bar charts</strong></p>
+<p>in the above example (this requests the so-called bar chart). The items on such a list are labels for various items in the dialog boxes. For example, here <strong>Charts</strong> is a button which opens a new subsidiary dialog box, <strong>Chart Type</strong> is the title of a list of options in this new dialog box, and <strong>Bar charts</strong> is the choice we want to select. In other words, the above instruction is short for “In the dialog box opened above, click on the button <strong>Charts</strong> to open a new dialog box. Under <strong>Chart type</strong>, select <strong>Bar charts</strong> by clicking on a button next to it.”</p></li>
+<li><p>Some choices need to be made by typing in some information rather than selecting from a list of options. Specific instructions for this will be given when needed.</p></li>
+<li><p>After choices are made in subsidiary dialog boxes, we return to the main dialog box by clicking on <strong>Continue</strong>. Once all the required choices have been made, the analysis is executed by clicking on <strong>OK</strong> in the main dialog box. This should be reasonably obvious, so we will omit explicit instructions to do so.</p></li>
+</ul>
+<p>A useful feature of SPSS is the <strong>dialog recall</strong> button, which is typically sixth from the left in the top row of buttons in the Output viewer window; the button shows a rectangle with a green arrow pointing down from it. Clicking on this gives a menu of recently used procedures, and choosing one of these brings up the relevant dialog box, with the previously used choices selected. This is useful when you want to rerun a procedure, e.g. to try different choices for its options. It is usually quicker to reopen a dialog box using the dialog recall button than through the menus.</p>
+</div>
+<div id="spss-session-options" class="section level4 unnumbered">
+<h4>SPSS session options</h4>
+<p>Various options for controlling the format of SPSS output and other features can be found under <strong>Edit/Options</strong>. For example, an often useful choice is <strong>General/Variable Lists/Display names</strong>. This instructs SPSS to display the names of variables in the variable lists of all procedures, instead of the (typically much longer) descriptive labels of the variables. In large data sets this may make it easier to find the right variables from the list. This may be further helped by selecting <strong>General/Variable Lists/Alphabetical</strong>, which causes the names to be listed in an alphabetical order rather than the order in which the variables are included in the data set.</p>
+</div>
+<div id="printing-from-spss" class="section level4 unnumbered">
+<h4>Printing from SPSS</h4>
+<p>All the computers in the public rooms are connected to one of the laser printers. When you print a document or a part of it, you need to have credit on your printing account. See <a href="http://www.lse.ac.uk/intranet/LSEServices/IMT/guides/printing.aspx" class="uri">http://www.lse.ac.uk/intranet/LSEServices/IMT/guides/printing.aspx</a> for further information.</p>
+<ul>
+<li><p>You can print your results from the Output Viewer either by selecting <strong>File/Print</strong> or by clicking on Print on the toolbar (the button with a little picture of a printer). Please note that SPSS output is often quite long, so this may result in much more printout than you really want.</p></li>
+<li><p>Alternatively, in the Output Viewer, select the objects to be printed, select <strong>Edit / Copy</strong>, open a Word or Excel document and <strong>Paste</strong>. You can make any changes or corrections in this document before printing it. This method gives you more control over what gets printed than printing directly from SPSS.</p></li>
+<li><p>At the printer terminal, type in your username and password. The files sent for printing are then listed. Select the appropriate file number and follow the instructions given by the computer.</p></li>
+</ul>
+</div>
+<div id="spss-control-language" class="section level4 unnumbered">
+<h4>SPSS control language</h4>
+<p>Early versions of SPSS had no menu-based interface. Instead, commands were executed by specifying them in SPSS command language. This language is still there, underlying the menus, and each choice of commands and options from the menus can also be specified in the control language. We will not use this approach on this course, so you can ignore this section if you wish. However, there are some very good reasons why you might want to learn about the control language if you need to work with SPSS for, say, analyses for your thesis or dissertation:</p>
+<ul>
+<li><p>Because the control language commands can be saved in a file, they preserve a record of how an analysis was done. This may be important for checking that there were no errors, and for rerunning the analyses later if needed.</p></li>
+<li><p>For repetitive analyses, modifying and rerunning commands in the control language is quicker and less tedious than using the menus repeatedly.</p></li>
+<li><p>Some advanced SPSS procedures are not included in the menus, and can only be accessed through the control language.</p></li>
+</ul>
+<p>The main cost of using the control language is learning its syntax. This is initially much harder than using the menus, but becomes easier with experience. The easiest way to begin learning the syntax is to request SPSS to print out the commands corresponding to choices made from the menus. Two easy ways of doing so are</p>
+<ul>
+<li><p>Selecting the session option (i.e. under <strong>Edit/Options</strong>) <strong>Viewer/Display commands in the log</strong>. This causes the commands corresponding to the menu choices to be displayed in the output window.</p></li>
+<li><p>Clicking on the <strong>Paste</strong> button in a dialog box (instead of <strong>OK</strong>) after selecting an analysis. This opens a <em>Syntax window</em> where the corresponding commands are now displayed. The commands in a syntax window can be edited and executed, and also saved in a file (with the extension <strong>.sps</strong>) for future use.</p></li>
+</ul>
+
+</div>
+</div>
+<div id="week-2-class-descriptive-statistics-for-categorical-data-and-entering-data" class="section level3 unnumbered">
+<h3>WEEK 2 class: Descriptive statistics for categorical data, and entering data</h3>
+<div id="data-set" class="section level4 unnumbered">
+<h4>Data set</h4>
+<p>The data file <strong>ESS5_sample.sav</strong> will be used today. It contains a simplified sample of data from UK respondents in the 2010 European Social Survey (Round 5). The questions in the survey that you see here were designed By Dr Jonathan Jackson and his team as part of a module investigating public trust in the criminal justice system. Further information about the study can be found at<br />
+<a href="http://www.lse.ac.uk/methodology/whosWho/Jackson/jackson_ESS.aspx" class="uri">http://www.lse.ac.uk/methodology/whosWho/Jackson/jackson_ESS.aspx</a><a href="#fn59" class="footnoteRef" id="fnref59"><sup>59</sup></a></p>
+<p>The main purpose of today’s class is to introduce you to the layout of SPSS and to show you how to produce some basic tables and graphs for categorical variables. Additionally, we provide instructions on how to enter data into a new SPSS data file, using the Data Editor. This exercise is not strictly needed for the course, but we include it for two purposes. Firstly, students often find this a helpful way of learning how the software works. Secondly, this exercise may be a useful introduction for students who go on to collect or collate data for their own empirical research.</p>
+</div>
+<div id="classwork" class="section level4 unnumbered">
+<h4>Classwork</h4>
+</div>
+<div id="part-1-the-layout-of-an-spss-data-file" class="section level4 unnumbered">
+<h4>Part 1: The layout of an SPSS data file</h4>
+<ol style="list-style-type: decimal">
+<li><p><strong>Opening an SPSS data file</strong>: this is done from <strong>File/Open/Data</strong>, selecting the required file from whichever folder it is saved in in the usual Windows way. Do this to open ESS5_sample.sav.</p></li>
+<li><p><strong>Information in the Variable View window.</strong> The data file is now displayed in the Data Editor. Its Data View window shows the data as a spreadsheet (i.e. a data matrix). We will first consider the information in the Variable View window, accessed by clicking on the Variable View tab at the bottom left corner of the window. The columns of this window show various pieces of information about the variables. Take a little while familiarising yourself with them. The most important of the columns in Variable View are</p>
+<ul>
+<li><p><strong>Name</strong> of the variable in the SPSS data file. The names in this column (also shown as the column headings in Data View) will be used to refer to specific variables in all of the instructions for these computer classes.</p></li>
+<li><p><strong>Type</strong> of the variable. Here most of the variables are <em>Numeric</em>, i.e. numbers, and a few are <em>String</em>, which means text. Clicking on the entry for a variable in this column and then on the button (with three dots on it) revealed by this shows a list of other possibilities.</p></li>
+<li><p><strong>Width</strong> and <strong>Decimals</strong> control the total number of digits and the number of decimal places displayed in Data View. Clicking on an entry in these columns reveals buttons which can be used to increase or decrease these values. Here all but two of the numeric variables are coded as whole numbers, so Decimals has been set to 0 for them.</p></li>
+<li><p><strong>Label</strong> is used to enter a longer description of the variable. Double-clicking on an entry allows you to edit the text.</p></li>
+<li><p><strong>Values</strong> shows labels for individual values of a variable. This is mostly relevant for categorical variables, such as most of the ones in these data. Such variables are coded in the data set as numbers, and the Values entry maintains a record of the meanings of the categories the numbers correspond to. You can see examples of this by clicking on some of the entries in the Values column and then on the resulting button. The value labels can also be displayed for each observation in Data View by selecting <strong>View/Value Labels</strong> in that window.</p></li>
+<li><p><strong>Missing</strong> specifies <em>missing data codes</em>, i.e. values which are not actual measurements but indicators that an observation should be treated as missing. There may be several such codes. For example, variables in these data often have separate missing data codes for cases where a respondent was never asked a question (“Not applicable”, often abbreviated NAP), replied “Don’t know” (DK) or otherwise failed to provide an answer (“Refusal” or “No answer”; NA); the explanations of these values are found in the Values column. An alternative to using missing data codes (so-called <em>User missing</em> values) is to enter no value (a <em>System missing</em> value) for an observation in the data matrix. This is displayed as a full stop (.) in Data View. There are no such values in these data.</p></li>
+<li><p><strong>Measure</strong> indicates the measurement level of a variable, as <em>Nominal</em>, <em>Ordinal</em> or <em>Scale</em> (meaning interval). This is mostly for the user’s information, as SPSS makes little use of this specification.</p></li>
+</ul></li>
+<li><p>Any changes made to the data file are preserved by saving it again from <strong>File/Save</strong> (or by clicking on the Save File button of the toolbar, which the one with the picture of a diskette). You will also be prompted to do so when exiting SPSS or when trying to open a new data file. Today you should not save any changes you may have made to ESS5_sample.sav, so click <strong>No</strong> if prompted to do so below.</p></li>
+</ol>
+</div>
+<div id="part-2-descriptive-statistics-for-categorical-variables" class="section level4 unnumbered">
+<h4>Part 2: Descriptive statistics for categorical variables</h4>
+<p>Most of the statistics required for this class are found in SPSS under <strong>Analyze/Descriptive Statistics/Frequencies</strong> as follows:</p>
+<ul>
+<li><p>Names of the variables for which the statistics are requested are placed in the <strong>Variable(s)</strong> box. To make it easy to find variables in the list box on the left, you may find it convenient to change the way the variables are displayed in the list; see under “SPSS Session Options” for instructions.</p></li>
+<li><p>Tables of frequencies: select <strong>Display frequency tables</strong></p></li>
+<li><p>Bar charts: <strong>Charts/Chart Type/Bar charts</strong>. Note that under <strong>Chart Values</strong> you can choose between frequencies or percentage labels on the vertical axis.</p></li>
+<li><p>Pie charts: <strong>Charts/Chart Type/Pie charts</strong></p></li>
+</ul>
+<p>In addition, we will construct some two-way tables or cross-tabulations, by selecting <strong>Analyze/Descriptive Statistics/Crosstabs</strong>. In the dialog box that opens, request a contingency table between two variables by entering</p>
+<ul>
+<li><p>The name of the row variable into the <strong>Row(s)</strong> box, and</p></li>
+<li><p>The name of the column variable into the <strong>Column(s)</strong> box.</p></li>
+<li><p><strong>Cells/Percentages</strong> for percentages within the table: <strong>Row</strong> gives percentages within each row (i.e. frequencies divided by row totals), <strong>Column</strong> percentages within columns, and <strong>Total</strong> percentages out of the total sample size.</p></li>
+</ul>
+<p>The labels in the SPSS output should be self-explanatory. Note that in this and all subsequent classes, the output may also include some entries corresponding to methods and statistics not discussed on this course. They can be ignored here.</p>
+<ol style="list-style-type: decimal">
+<li><p>The first variable in the data set, GOODJOB, asks respondents whether they generally feel that the police are doing a good job in their country. There are three response categories for this item: “a good job”, “neither a good job nor a bad job”, or “a bad job”. Obtain a frequency table and bar chart to investigate the distribution of responses to this question.</p>
+<p>Check that you understand how to interpret the output you obtain. In particular, make sure that you understand the information displayed in each of the columns in the main table, and that you can see the connection between the information in the table and the information represented in the bar chart.</p></li>
+<li><p>The last variable in the set, AGE_GRP, records in which of the following age groups each respondent falls: up to 29 years of age, 30-49, or 50+ years. Let us consider the association between age group and opinions of the police. Obtain a two-way contingency table of GOODJOB by AGE_GRP. To make interpretation easier, request percentages within each of the age groups. If you use AGE_GRP as the row variable, then include row percentages in the output.</p>
+<p>Interpret the resulting table. Are opinions of the police distributed differently among the three different age groups? Does there appear to be an association between age group and attitude?</p></li>
+<li><p>If you have time after completing the data entry exercise (below), you may wish to return to this data set and explore frequencies and contingency tables for some of the other variables in the set.</p></li>
+</ol>
+
+</div>
+<div id="part-3-entering-data-directly-into-data-editor" class="section level4 unnumbered">
+<h4>Part 3: Entering data directly into Data Editor</h4>
+<p>This procedure may be useful to know if the data you are analysing are not in any electronic form at the beginning of the analysis, for example if you start with a pile of filled-in questionnaires from a survey. For practice, we will enter the following small, artificial data set:</p>
+<p>Sex: Man; Age: 45; Weight: 11 st 3 lbs<br />
+Sex: Woman; Age: 68; Weight: 8 st 2 lbs<br />
+Sex: Woman; Age: 17; Weight: 9 st 6 lbs<br />
+Sex: Man; Age: 28; Weight: 13 st 8 lbs<br />
+Sex: Woman; Age: 16; Weight: 7 st 8lbs</p>
+<ol style="list-style-type: decimal">
+<li><p>Select <strong>File/New/Data</strong> to clear the Data Editor. Go to Variable View and enter into the first four rows of the Name column names for the variables, for example <em>sex</em>, <em>age</em>, <em>wstones</em> and <em>wpounds</em>.</p></li>
+<li><p>Switch to Data View and type the data above into the appropriate columns, one unit (respondent) per row. Note that the person’s weight is entered into two columns, one for stones and one for pounds. Enter sex using numerical codes, e.g. 1 for women and 2 for men.</p></li>
+<li><p>Save the file as a new SPSS data file (<strong>File/Save as</strong>), giving it a name of your choice. You should also resave the file (from <strong>File/Save</strong> or by clicking the File Save button) after each of the changes made below.</p></li>
+<li><p>Practise modifying the information in Variable View by adding the following information for the sex variable:</p>
+<ul>
+<li><p>Enter the label <em>Sex of the respondent</em> into the Label column.</p></li>
+<li><p>Click on the Values cell and then on the resulting button to open a dialog box for entering value labels. Enter <strong>Value:</strong> <em>1</em>; <strong>Value Label</strong>: <em>Woman</em>; <strong>Add</strong>. Repeat for men, and click <strong>OK</strong> when finished.</p></li>
+</ul></li>
+<li><p><strong>Transforming variables</strong>: It is often necessary to derive new variables from existing ones. We will practise the two most common examples of this:</p>
+<ol style="list-style-type: decimal">
+<li><p><strong>Creating a grouped variable</strong>: Suppose, for example, that we want to define a grouped age variable with three categories: less than 25 years, 25–54 and 55 or over. This is done as follows:</p>
+<ul>
+<li><p>Select <strong>Transform/Recode into Different Variables</strong>. This opens a dialog box which is used to define the rule for how values of the existing variable are to be grouped into categories of the new one.</p></li>
+<li><p>Move the name of the age variable to the <strong>Input Variable –<span class="math inline">\(&gt;\)</span> Output Variable</strong> box.</p></li>
+<li><p>Under <strong>Output Variable</strong>, enter the <strong>Name</strong> of the new variable, for example <em>agegroup</em>, and click <strong>Change</strong>.</p></li>
+<li><p>Click on <strong>Old and New Values</strong>. Enter <strong>Old Value/Range: Lowest through</strong> <em>24</em> and <strong>New Value/Value:</strong> <em>1</em>, and click <strong>Add</strong>.</p></li>
+<li><p>Repeat for the other two categories, selecting <strong>Range:</strong> <em>25</em> <strong>through</strong> <strong>54</strong> and <strong>Range:</strong> <em>55</em> <strong>through highest</strong> for <strong>Old value</strong>, and <em>2</em> and <em>3</em> respectively for <strong>New value</strong>.</p></li>
+<li><p>You should now see the correct grouping instructions in the <strong>Old –<span class="math inline">\(\mathbf{&gt;}\)</span> New</strong> box. Click <strong>Continue</strong> and <strong>OK</strong> to create the new variable.</p></li>
+<li><p>Check the new variable in Data View. At this stage you should normally enter in Variable View the value labels of the age groups.</p></li>
+</ul></li>
+<li><p><strong>Calculations on variables</strong>: Some new variables are obtained through mathematical calculations on existing ones. For example, suppose we want to include weight in kilograms as well as stones and pounds. Using the information that one stone is equal to 6.35 kg and one pound is about 0.45 kg, the transformation is carried out as follows:</p>
+<ul>
+<li><p>Select <strong>Transform/Compute Variable</strong>. This opens a dialog box which is used to define the rule for calculating the values of the new variable.</p></li>
+<li><p>Enter <strong>Target variable:</strong> <em>weightkg</em> (for example; this is the name of the new variable) and <strong>Numeric Expression:</strong> <em>6.35 * wstones + 0.45 * wpounds</em>; for the latter, you can either type in the formula or use the variable list and calculator buttons in a fairly obvious way.</p></li>
+</ul></li>
+</ol></li>
+</ol>
+</div>
+<div id="week-2-homework" class="section level4 unnumbered">
+<h4>WEEK 2 HOMEWORK</h4>
+<p>The homework exercise for this week is to complete the multiple choice quiz which you can find in the Moodle resource for MY451. Answers to the questions are also included there, including feedback on why the incorrect answer are incorrect. The first part of the quiz asks for answers to the class exercise, and the second part asks you to identify the level of measurement of some different variables.</p>
+
+</div>
+</div>
+<div id="week-3-class" class="section level3 unnumbered">
+<h3>WEEK 3 class</h3>
+<p>Descriptive statistics for continuous variables</p>
+<p><strong>Data set:</strong> The data file used today is <strong>london-borough-profiles.sav</strong>. It contains a selection of data on the 33 London boroughs obtained from the <em>London Datastore</em>, which publishes a range of statistical data about the city, collated by the Greater London Authority’s <em>GLA Intelligence Unit</em>.^[The data were obtained from <a href="http://data.london.gov.uk/datastore/package/london-borough-profiles" class="uri">http://data.london.gov.uk/datastore/package/london-borough-profiles</a>.</p>
+<p>If you download the “Profiles in Excel” workbook, you will find that one of the pages contains a map of the boroughs, and a tool for visualising the data on that map. A regular map of the boroughs can be found at for example at</p>
+<p><a href="http://www.londoncouncils.gov.uk/londonfacts/londonlocalgovernment/londonmapandlinks/default.htm" class="uri">http://www.londoncouncils.gov.uk/londonfacts/londonlocalgovernment/londonmapandlinks/default.htm</a>.]</p>
+<div id="descriptive-statistics-in-spss" class="section level4 unnumbered">
+<h4>Descriptive statistics in SPSS</h4>
+<p>This week you will produce and examine descriptive statistics for a number of individual variables. As for last week, almost all of the statistics required for this class can be obtained in SPSS under <strong>Analyze/Descriptive Statistics/Frequencies</strong>. Note that you will probably not find the tables of frequencies very useful, because continuous variables can take so many different values. So for this class, uncheck the <strong>Display frequency tables</strong> option in the dialog box.</p>
+<ul>
+<li><p>Measures of central tendency: <strong>Mean</strong>, <strong>Median</strong> and <strong>Mode</strong> under <strong>Statistics / Central Tendency</strong></p></li>
+<li><p>Measures of variation: <strong>Range</strong>, <strong>Std. deviation</strong> and <strong>Variance</strong> under <strong>Statistics/Dispersion</strong>. For the Interquartile range, select <strong>Statistics/ Percentile values/Quartiles</strong> and calculate by hand the difference between the third and first quartiles given (as Percentiles 75 and 25 respectively) in the output.</p></li>
+<li><p>Histograms: <strong>Charts/Chart Type/Histograms</strong></p></li>
+</ul>
+<p>Two charts needed today are not found under the Frequencies procedure:</p>
+<ul>
+<li><p><strong>Stem and leaf plots</strong>, which are obtained from <strong>Analyze/Descriptive Statistics/Explore</strong> by entering variable(s) under <strong>Dependent list</strong> and selecting <strong>Display/Plots</strong> and <strong>Plots/Descriptive/Stem-and-leaf</strong>. You can place more than one variable under the <strong>Dependent list</strong> in order to compare variables.</p></li>
+<li><p><strong>Box plots</strong> are also automatically generated through this dialog box, regardless of whether you want to see them! So this is the simplest way to produce them.</p></li>
+</ul>
+<p>Most of these statistics and charts can be obtained in other ways as well, for example from <strong>Analyze/ Descriptive Statistics/Descriptives</strong> or <strong>Graphs/Legacy Dialogs/Histogram</strong>, or <strong>Graphs/Legacy Dialogs/Boxplot</strong>, but we will not use these alternatives today. Feel free to investigate them in your own time if you wish.</p>
+<p>The labels in the SPSS output should be self-explanatory. Note that in this and all subsequent classes, the output may also include some entries corresponding to methods and statistics not discussed on this course. They can be ignored here.</p>
+</div>
+<div id="classwork-1" class="section level4 unnumbered">
+<h4>Classwork</h4>
+<ol style="list-style-type: decimal">
+<li><p>The variable <em>YOUTH_DEPRIVATION</em> records for each borough the percentage of children who live in out-of-work families. This is an indicator of deprivation, with higher values indicating a worse situation for each borough. Investigate the distribution of this variable across London boroughs by obtaining its mean, median, minimum and maximum, quartiles and standard deviation, and a histogram. Obtain also a stem and leaf plot and a box plot. Note that double-clicking on a histogram (or any other SPSS graph) opens it in a new window, where the graph can be further edited by changing titles, colours etc. The graph can also be exported from SPSS into other software. Check that you understand how to find the measures of central tendency and dispersion from the output. Does the distribution of YOUTH_DEPRIVATION appear to be symmetrically distributed or skewed?</p></li>
+<li><p>Consider now the variable <em>CRIME</em>, which records the numbers of reported crimes for every 1000 inhabitants, over the years 2011-12. Obtain some summary descriptive statistics, a histogram and a box plot for this variable. Is the distribution of the variable symmetric or skewed to the left or right? <em>CRIME</em> is one of many variables in this data set which have outliers, i.e. boroughs with unusually large or small values of the variable. Normally statistical analysis focuses on the whole data rather than individual observations, but the identities of individual outliers are often also of interest. The outliers can be seen most easily in the box plots, where SPSS labels them with their case numbers, so that you can identify them easily in the data set. For example, 1 would indicate the 1st case in the data set. If you click on to the Data View you can see that this 1st case is the City of London. Which borough is the outlier for CRIME?</p></li>
+</ol>
+</div>
+<div id="homework-1" class="section level4 unnumbered">
+<h4>HOMEWORK</h4>
+<p>For the questions below, select the relevant SPSS output to include in your homework and write brief answers to the specific questions. Remember SPSS produces some outputs that you do not need. Feel free to transcribe tables or modify charts if you wish to improve their presentation.</p>
+<ol style="list-style-type: decimal">
+<li><p>The variable <em>VOTING</em> records voter turnout in a borough, specifically the percentage of eligible voters who voted in the local elections in 2010. Obtain descriptive statistics, a histogram and a box plot for this variable. What is the range of the variable, and what is its inter-quartile range? Are there any outliers? Is the distribution of voter turnout symmetrical or skewed? How you can you tell?</p></li>
+<li><p>In the data set employment rates are given overall, but also separately for males and females. The employment rate is the percentage of working age population who are in employment. Compare and contrast male and female employment rates across the boroughs, using the variables <em>MALE_EMPLOYMENT</em> and <em>FEMALE_EMPLOYMENT</em>. Comment on the differences and/or similarities in their descriptive statistics: minimum and maximum, mean, median and standard deviation. Obtain histograms for these two variables. Are the distributions of male employment and female employment symmetrical or skewed?</p></li>
+</ol>
+
+</div>
+</div>
+<div id="week-4-class-two-way-contingency-tables" class="section level3 unnumbered">
+<h3>WEEK 4 class: Two-way contingency tables</h3>
+<p><strong>Data set</strong>: The data file used today is <strong>GSS2010.sav</strong>. It contains a selection of variables on attitudes and demographic characteristics for 2044 respondents in the 2010 U.S. General Social Survey (GSS).<a href="#fn60" class="footnoteRef" id="fnref60"><sup>60</sup></a> The full data set contains 790 variables. For convenience the version you are analysing today contains just a selection of those items.</p>
+<div id="analysing-two-way-contingency-tables-in-spss" class="section level4 unnumbered">
+<h4>Analysing two-way contingency tables in SPSS</h4>
+<p>All of the analyses needed for this week’s class are found under <strong>Analyze/Descriptive Statistics/Crosstabs</strong>. We will be obtaining contingency tables between two variables, as in Week 2 class, with the following commands:</p>
+<ul>
+<li><p>The name of the row variable into the <strong>Row(s)</strong> box, and</p></li>
+<li><p>The name of the column variable into the <strong>Column(s)</strong> box.</p></li>
+<li><p><strong>Cells/Percentages</strong> for percentages within the table: <strong>Row</strong> gives percentages within each row (i.e. frequencies divided by row totals), <strong>Column</strong> percentages within columns, and <strong>Total</strong> percentages out of the total sample size.</p></li>
+</ul>
+<p>The only additional output we will need today is obtained by selecting</p>
+<ul>
+<li><p><strong>Statistics/Chi-square</strong> for the <span class="math inline">\(\chi^{2}\)</span> test of independence</p></li>
+<li><p>(If you are interested in the <span class="math inline">\(\gamma\)</span> measure of association for ordinal variables, outlined in the coursepack, you may obtain it using <strong>Statistics/Ordinal/Gamma</strong>. In the output the <span class="math inline">\(\gamma\)</span> statistic is shown in the “Symmetric measures” table in the “Value” column for “Gamma”. We will not use this measure today, but feel free to ask if you are interested in it.)</p></li>
+</ul>
+</div>
+<div id="classwork-2" class="section level4 unnumbered">
+<h4>Classwork</h4>
+<p>Suppose we want to use the GSS data to investigate whether in the U.S. population sex and age are associated with attitudes towards women’s roles. The respondent’s sex is included in the data as the variable <em>SEX</em>, and age as <em>AGEGROUP</em> in three groups: 18-34, 35-54, and 55 or over. The three attitude variables we consider are</p>
+<ul>
+<li><p><em>FEFAM</em>: Level of agreement with the following statement: “It is much better for everyone involved if the man is the achiever outside the home and the woman takes care of the home and family”. Available response options are Strongly agree, Agree, Disagree, and Strongly disagree.</p></li>
+<li><p>FEPOL: Level of agreement with the following statement: “Most men are better suited emotionally for politics than are most women”. Available response options are: Agree and Disagree.</p></li>
+<li><p><em>FEPRES</em>: Response to the following statement: “If your party nominated a woman for President, would you vote for her if she were qualified for the job?” Available response options are Yes and No.</p></li>
+</ul>
+<ol style="list-style-type: decimal">
+<li><p>Consider first the association between sex and attitude towards male and female work roles, by constructing a contingency table between <em>SEX</em> and <em>FEFAM</em>. To make interpretation of the results easier, include also appropriate percentages. Here it makes most sense to treat sex as an explanatory variable for attitude, so we want to examine percentages of attitudes within categories of male and female. If you use <em>SEX</em> as the row variable, this means including the Row percentages in the output. Request also the <span class="math inline">\(\chi^{2}\)</span>-test statistic. In SPSS output, results for the <span class="math inline">\(\chi^{2}\)</span> test are given below the two-way table itself in a table labelled “Chi-Square Tests”, in the row “Pearson Chi-Square”. The test statistic itself is given under “Value” and its <span class="math inline">\(P\)</span>-value under “Asymp. Sig. (2-sided)”. By considering the <span class="math inline">\(\chi^{2}\)</span> test statistic and its <span class="math inline">\(P\)</span>-value, do you think there is enough evidence to conclude that males and females differ in their views on male and female work roles? If there is, how would you describe the association?</p></li>
+<li><p>Consider now the association between age and attitude towards male and female work roles, by constructing a table between <em>AGEGROUP</em> and <em>FEFAM</em>. Interpret the results, and compare them to your findings in Exercise 1.</p></li>
+<li><p>Examine differences between men and women in their views about women’s suitability for politics, using a table between <em>SEX</em> and <em>FEPOL</em>. Interpret the results. (Note: ignore the last two columns of the <span class="math inline">\(\chi^{2}\)</span> test output, labelled ‘Exact Sig. (2-sided)’’ and ‘Exact Sig. (1-sided)’’, and use the result under “Asymp. Sig. (2-sided)” as in the other tables.)</p></li>
+</ol>
+</div>
+<div id="homework-2" class="section level4 unnumbered">
+<h4>HOMEWORK</h4>
+<ol style="list-style-type: decimal">
+<li><p>What is the null hypothesis for the <span class="math inline">\(\chi^{2}\)</span> test that you carried out in analysis 2 in the class, for the table of <em>AGEGROUP</em> by <em>FEFAM</em>?</p></li>
+<li><p>State the <span class="math inline">\(\chi^{2}\)</span> test statistic, degrees of freedom and <span class="math inline">\(P\)</span>-value for this table, and interpret these results.</p></li>
+<li><p>Interpret the table of percentages to describe the nature of the association between <em>AGEGROUP</em> and <em>FEFAM</em>.</p></li>
+<li><p>Consider now the association between age and attitude towards voting for a female President, by constructing a table between <em>AGEGROUP</em> and <em>FEPRES</em>. In the population, do people in different age groups differ in their willingness to vote for a female President? Interpret the results of the <span class="math inline">\(\chi^{2}\)</span> test and illustrate your answer with one or two percentages from the two-way table.</p></li>
+</ol>
+
+</div>
+</div>
+<div id="week-5-class-inference-for-two-population-means" class="section level3 unnumbered">
+<h3>WEEK 5 class: Inference for two population means</h3>
+<p><strong>Data set</strong>: The data file used today is <strong>ESS5_GBFR.sav</strong>. It contains data for a selection of variables from the 2010 European Social Survey for respondents in Great Britain and France.<a href="#fn61" class="footnoteRef" id="fnref61"><sup>61</sup></a> Only a few of the variables are used in the exercises; the rest are included in the data set as examples of the kinds of information obtained from this survey.</p>
+<p><strong>Two-sample inference for means in SPSS</strong></p>
+<ul>
+<li><p><span class="math inline">\(t\)</span>-tests and confidence intervals for two independent samples for inference on the difference of the population means: <strong>Analyze/Compare Means/Independent-Samples T Test</strong>. The variable of interest <span class="math inline">\(Y\)</span> is placed under <strong>Test Variable(s)</strong> and the explanatory variable <span class="math inline">\(X\)</span> under <strong>Grouping Variable</strong>. The values of <span class="math inline">\(X\)</span> identifying the two groups being compared are defined under <strong>Define Groups</strong>.</p></li>
+<li><p><em>Box plots</em> for descriptive purposes are obtained from <strong>Analyze/Descriptive Statistics/Explore</strong>. Here we want to draw side-by-side box plots for values of a response variable <span class="math inline">\(Y\)</span>, one plot for each distinct value of an explanatory variable <span class="math inline">\(X\)</span>. The name of <span class="math inline">\(Y\)</span> is placed under <strong>Dependent List</strong> and that of <span class="math inline">\(X\)</span> under <strong>Factor List</strong>. Box plots are obtained by selecting <strong>Plots/Boxplots/Factor levels together</strong>.</p></li>
+<li><p>Tests and confidence intervals for single means (c.f. Section <a href="c-means.html#s-means-1sample">7.4</a>) are not considered today. These are obtained from <strong>Analyze/Compare Means/One-Sample T Test</strong>. They can also be used to carry out inference for comparisons of means between two <em>dependent</em> samples (c.f. Section <a href="c-means.html#s-means-dependent">7.5</a>).</p></li>
+</ul>
+<p><strong>Classwork</strong> Consider the survey data in the file <em>ESS5_GBFR.sav</em>. We will examine two variables, and carry out statistical inference to compare their means among the survey populations of adults in Great Britain and France.<a href="#fn62" class="footnoteRef" id="fnref62"><sup>62</sup></a></p>
+<ol style="list-style-type: decimal">
+<li><p>The variable <em>WKHTOT</em> shows the number of hours per week the respondent normally works in his or her main job. Obtain box plots and descriptive statistics for this variable separately for each country (identified by the variable <em>CNTRY</em>). Compare measures of central tendency and variation for <em>WKHTOT</em> between the two countries. What do you observe?</p></li>
+<li><p>Obtain a <span class="math inline">\(t\)</span>-test and confidence interval for the difference of weekly working hours between Britain and France (specify the values of the country variable as <strong>Define Groups/Group 1:</strong> <em>GB</em> and <strong>Group 2:</strong> <em>FR</em> as coded in the data). Details of SPSS output for this are explained in Chapter <a href="c-means.html#c-means">7</a>; you can use the results under the assumption of equal population variances. What do you conclude? Is there a statistically significant difference in the average values of <em>WKHTOT</em> between the two countries? What does the confidence interval suggest about the size of the difference?</p></li>
+<li><p>The variable <em>STFJBOT</em> asks those in paid work, “How satisfied are you with the balance between the time you spend on your paid work and the time you spend on other aspects of your life?”. Respondents are asked to rate their level of satisfaction on a scale from 0-10, where 0 means “Extremely dissatisfied” and 10 means “Extremely satisfied”. Repeat exercises 1 and 2 for this variable, and compare also histograms of <em>STFJBOT</em> for each country. What do you observe?</p></li>
+</ol>
+<p><strong>HOMEWORK</strong></p>
+<ol style="list-style-type: decimal">
+<li><p>Write up your answers to the second class exercise, answering these specific questions:</p>
+<ol style="list-style-type: decimal">
+<li><p>What are the observed sample means for <em>WKHTOT</em> for French and British respondents?</p></li>
+<li><p>Is there a statistically significant difference in the average values of <em>WKHTOT</em> between the two countries? State the value of the test statistic and its corresponding <span class="math inline">\(P\)</span>-value. You may assume equal population variances for this test.</p></li>
+<li><p>Interpret the 95% confidence interval for the difference.</p></li>
+</ol></li>
+<li><p>The variable <em>WKHSCH</em> asks respondents, “How many hours a week, if any, would you choose to work, bearing in mind that your earnings would go up or down according to how many hours you work?”. Is there a statistically significant difference between ideal (rather than actual) work hours for French and British respondents? Carry out a t-test and report and interpret the results.</p></li>
+<li><p>The variable <em>STFMJOB</em> asks respondents, “How satisfied are you in your main job?”. Respondents are asked to rate their level of satisfaction on a scale from 0-10, where 0 means “Extremely dissatisfied” and 10 means “Extremely satisfied”. Is there a statistically significant difference, at the 5% level of significance, between mean levels of job satisfaction for French and British respondents? Answer this question by using the 95% confidence interval for the difference in means (you need the full t-test output to obtain the confidence interval, but you need not report the results of the t-test itself for this question).</p></li>
+</ol>
+
+</div>
+<div id="week-7-class-inference-for-population-proportions" class="section level3 unnumbered">
+<h3>WEEK 7 class: Inference for population proportions</h3>
+<p><strong>Data sets</strong>: Files <strong>BES2010post_lastdebate.sav</strong> and <strong>BES2010pre_lastdebate.sav</strong>.</p>
+<p><strong>Inference on proportions in SPSS</strong></p>
+<ul>
+<li><p>SPSS menus do not provide procedures for calculating the tests and confidence intervals for proportions discussed in Chapter <a href="c-probs.html#c-probs">5</a>. This is not a serious limitation, as the calculations are quite simple.</p></li>
+<li><p>It is probably easiest to use a pocket calculator for the calculations, and this is the approach we recommend for this class. The only part of the analysis it cannot do is calculating the precise <span class="math inline">\(P\)</span>-value for the tests, but even this can be avoided by using critical values from a statistical table such as the one at the end of this Coursepack to determine approximate <span class="math inline">\(P\)</span>-values (or by using an online <span class="math inline">\(P\)</span>-value calculator — see “About Week 4 class” on the Moodle page for suggested links).</p></li>
+</ul>
+<div id="classwork-3" class="section level4 unnumbered">
+<h4>Classwork</h4>
+<p>The survey data set <em>BES2010post_lastdebate.sav</em> contains part of the information collected by the British Election Study, an ongoing research programme designed to understand voter choices in the UK.<a href="#fn63" class="footnoteRef" id="fnref63"><sup>63</sup></a></p>
+<p>In the run-up to the UK General Election on 6 May 2010, opinion polls reported quite dramatic changes in popularity of the Liberal Democrat party. Key to their increasing popularity was the performance of their party leader, Nick Clegg, in a series of three televised debates between the leaders of the three main political parties (the other participants were Gordon Brown for Labour and David Cameron for the Conservative party). The debates were broadcast between 15 and 29 April 2010.</p>
+<p>The data in <em>BES2010post_lastdebate.sav</em> contain information on respondents’ voting intentions, obtained after the debates had ended (i.e. between 30 April and 6 May).</p>
+<ol style="list-style-type: decimal">
+<li><p><em>VOTE_LIBDEM</em> is a dichotomous variable indicating whether a respondent intended to vote for the Liberal Democrats (value 1) or some other party (0) in the 2010 General Election. The value of this variable is by definition missing for those who had not decided which way they would vote or who did not intend to vote at all, so they are automatically excluded from the analysis. The parameter of interest <span class="math inline">\(\pi\)</span> is now the population proportion of those who <em>say</em> they would vote Liberal Democrat. We will compare it to 0.23, the proportion of the vote the party actually received in 2010. The analysis is thus one-sample inference on a population proportion, and the relevant formulas are (11) for the test statistic and (15) for the confidence interval that can be found in Section <a href="c-probs.html#ss-probs-test1sample-teststatistic">5.5.2</a> and <a href="c-probs.html#s-probs-1sampleci-calc">5.6.2</a> respectively.</p>
+<ul>
+<li><p>Begin by creating a frequency table of <em>VOTE_LIBDEM</em>. This should show that the sample estimate of <span class="math inline">\(\pi\)</span> is 0.260, out of <span class="math inline">\(3226\)</span> non-missing responses. Thus <span class="math inline">\(n=3226\)</span> and <span class="math inline">\(\hat{\pi}=0.260\)</span> in the notation of Chapter <a href="c-probs.html#c-probs">5</a>.</p></li>
+<li><p>For the one-sample significance test, the value of <span class="math inline">\(\pi\)</span> under the null hypothesis is <span class="math inline">\(\pi_{0}=0.230\)</span>. Using the specific formula of the test statistic in Section <a href="c-probs.html#ss-probs-test1sample-teststatistic">5.5.2</a>, the value of the test statistic <span class="math inline">\(z\)</span> is thus given by the calculation <span class="math display">\[z = \frac{0.260-0.230}{\sqrt{0.230\times (1-0.230)/3226}}\]</span> Calculate this using a calculator. The result should be <span class="math inline">\(z=4.049\)</span>.</p></li>
+<li><p>The (two-sided) <span class="math inline">\(P\)</span>-value for this is the probability that a value from the standard normal distribution is at most <span class="math inline">\(-4.049\)</span> or at least 4.049. Evaluate this approximately by comparing the value of <span class="math inline">\(z\)</span> to critical values from the standard normal distribution (c.f. Table <a href="c-probs.html#tab:t-ttable">5.2</a>) as explained in Section <a href="c-probs.html#ss-probs-test1sample-samplingd">5.5.3</a>. Here, for example, <span class="math inline">\(z\)</span> is larger than 1.96, so the two-sided <span class="math inline">\(P\)</span>-value must be smaller than 0.05. Convince yourself that you understand this statement.</p></li>
+<li><p>Calculate a 95% confidence interval for the population proportion of prospective Liberal Democrat voters, using equation (15) at the end of Section <a href="c-probs.html#s-probs-1sampleci-calc">5.6.2</a>.</p></li>
+</ul>
+<p>What do you conclude about the proportions of prospective and actual Liberal Democrat voters? Why might the two differ from each other?</p></li>
+<li><p>The variable <em>TVDEBATE</em> indicates whether the respondent reports having watched any of the three televised debates (1 for Yes, at least one watched, 0 otherwise - this includes “no” and “don’t know” responses). We will compare the proportion of people intending to vote Liberal Democrat amongst those who watched some or all of the debates with those who did not, using the two-sample methods of analysis discussed in Section <a href="c-probs.html#s-probs-2samples">5.7</a>. The formula of the <span class="math inline">\(z\)</span>-test statistic for testing the hypothesis of equal population proportions is thus the two-sample <span class="math inline">\(z\)</span>-test statistic for proportions (see middle of Section <a href="c-probs.html#s-probs-2samples">5.7</a>), and a confidence interval for the difference of the porportions is (25) in Section <a href="c-probs.html#s-probs-2samples">5.7</a>.</p>
+<ul>
+<li><p>Begin by calculating the relevant sample proportions. The easiest way to do this is by creating a two-way contingency table between <em>TVDEBATE</em> and <em>VOTE_LIBDEM</em> as you did in the Week 2 and 4 classes. The results required for the analysis considered here are all shown in the resulting table. Convince yourself that these show that, in the notation of Section <a href="c-probs.html#s-probs-2samples">5.7</a>,</p>
+<ul>
+<li><p><span class="math inline">\(n_{1}=930\)</span> and <span class="math inline">\(\hat{\pi}_{1}=0.218\; (=203/930)\)</span>,</p></li>
+<li><p><span class="math inline">\(n_{2}=2296\)</span> and <span class="math inline">\(\hat{\pi}_{2}=0.277\; (=636/2296)\)</span>,</p></li>
+</ul>
+<p>where 1 denotes respondents who did not watch any of the debates and 2 those who watched at least some. The pooled estimated proportion <span class="math inline">\(\hat{\pi}\)</span> (formula 21 in Section <a href="c-probs.html#s-probs-2samples">5.7</a>) used in the test statistic (23) is here <span class="math inline">\(\hat{\pi}=0.260\)</span>, shown on the “Total” row.</p></li>
+<li><p>Calculate the test statistic, its <span class="math inline">\(P\)</span>-value and a 95% confidence for the difference in population proportions, using the relevant formulas. For example, the test statistic is here given by <span class="math display">\[z= \frac{0.277-0.218}{\sqrt{0.260\times (1-0.260)\times
+(1/2296+1/930)}}.\]</span></p></li>
+</ul>
+<p>What do you conclude? Is there evidence that those who watched at least some of the leaders’ debates were more likely to declare an intention to vote Liberal Democrat? If there is, how big is the difference in proportions of prospective Liberal Democrat voters between the debate-watchers and debate-non-watchers?</p></li>
+</ol>
+</div>
+<div id="homework-3" class="section level4 unnumbered">
+<h4>HOMEWORK</h4>
+<p>Write up your answers to the second class exercise. In particular, answer the following specific questions:</p>
+<ol style="list-style-type: decimal">
+<li><p>What proportion of respondents say that they did watch at least some of the leaders’ debates? And what proportion did not? Of those who watched at least some of the leaders’ debates, what proportion said they intended to vote Liberal Democrat? And what proportion of those who did <em>not</em> watch any of the leaders’ debates said they intended to vote Liberal Democrat?</p></li>
+<li><p>Calculate the test statistic and find its corresponding approximate <span class="math inline">\(P\)</span>-value for the difference in population proportions of prospective Liberal Democrat voters among those who did and did not watch the leaders’ debates. Show your working. State the conclusion from the test.</p></li>
+<li><p>Calculate a 95% confidence interval around this difference. State its lower and upper limits.</p></li>
+<li><p>Write a brief substantive interpretation of your results.</p></li>
+</ol>
+<p>The data set <em>BES2010pre_lastdebate.sav</em> contains responses to the same question - whether respondents intended to vote Liberal Democrat or not - but asked before the last of the party leaders’ debates. Repeat the analysis you carried out for the first class exercise, but using this data set. In other words carry out a one-sample analysis, of the kind done in exercise 1 above, to compare the proportion of respondents who said they intended to vote Liberal Democrat with the proportion who actually did. Answer the following questions:</p>
+<ol style="list-style-type: decimal">
+<li><p>State the null hypothesis for the test.</p></li>
+<li><p>Calculate the test statistic and find its corresponding approximate <span class="math inline">\(P\)</span>-value. Show your workings.</p></li>
+<li><p>Give a brief interpretation of the results. Do they differ from the other data set? Can you think of any reasons for this? (This last question invites some speculation - do not worry if you don’t have any ideas! But see the sample answer if you are interested in our speculation.)</p></li>
+</ol>
+
+</div>
+</div>
+<div id="week-7-class-correlation-and-simple-linear-regression-1" class="section level3 unnumbered">
+<h3>WEEK 7 class: Correlation and simple linear regression 1</h3>
+<p><strong>Data set</strong>: Files <strong>decathlon2012.sav</strong>.</p>
+<div id="scatterplots-correlation-and-simple-linear-regression-in-spss" class="section level4 unnumbered">
+<h4>Scatterplots, correlation and simple linear regression in SPSS</h4>
+<ul>
+<li><p>A scatterplot is obtained from <strong>Graphs/Legacy Dialogs/“Scatter/Dot”/ Simple Scatter/Define</strong>. The variables for the <span class="math inline">\(X\)</span>-axis and <span class="math inline">\(Y\)</span>-axis are placed in the <strong>X Axis</strong> and <strong>Y Axis</strong> boxes respectively. Double-clicking on the plot in the Output window opens it in a <em>Chart Editor</em>, where various additions to the graph can be made. A fitted straight line is added from <strong>Elements/Fit Line at Total</strong>. A least squares fitted line is the default under this option, so it is drawn immediately and you can just click <strong>Close</strong>. Closing the Chart Editor commits the changes to the Output window.</p></li>
+<li><p>A correlation matrix is obtained from <strong>Analyze/Correlate/Bivariate</strong>, when <strong>Correlation Coefficients/Pearson</strong> is selected (which is the default, so you should not need to change it). The variables included in the correlation matrix are placed into the <strong>Variables</strong> box. The output also includes a test for the hypothesis that the population correlation is 0, but we will ignore it.</p></li>
+<li><p>Linear regression models are obtained from <strong>Analyze/Regression/Linear</strong>. The response variable is placed under <strong>Dependent</strong> and the explanatory variable under <strong>Independent(s)</strong>. The dialog box has many options for various additional choices. Today you can leave all of them at their default values, except that you should select <strong>Statistics/Regression Coefficients/Confidence intervals</strong> to include also 95% confidence intervals for the regression coefficients in the output.</p></li>
+</ul>
+</div>
+<div id="classwork-4" class="section level4 unnumbered">
+<h4>Classwork</h4>
+<p>Decathlon is a sport where the participants complete ten different athletics events over two days. Their results in each are then translated into points, and the winner is the competitor with the highest points total for the ten events. The file <em>decathlon2012.sav</em> contains the results of the decathlon competition at the 2012 Olympics in London for the 26 athletes who finished the competition.<a href="#fn64" class="footnoteRef" id="fnref64"><sup>64</sup></a> The results for each event are given both in their original units (variables with names beginning with “mark_”) and in decathlon points (names beginning with “points_”). The ten events are identified by the variable labels in Variable View. The variable <em>points_total</em> gives the final points total for each competitor.</p>
+<ol style="list-style-type: decimal">
+<li><p>Create a scatterplot between the result (<span class="math inline">\(X\)</span>-axis) and points (<span class="math inline">\(Y\)</span>-axis) for one event, the 100-metre sprint (variables <em>MARK_100M</em> and <em>POINTS_100M</em>), and add a fitted line. This simply provides information on the calculation used to transform the result into points. Clearly a linear calculation is used for this, at least over the range of results in these data. Notice the downward slope of the line: the faster the result, the higher the number of points. From now on, for simplicity we will consider only the points variables for each event.</p></li>
+<li><p>Obtain the correlation matrix for all pairs of variables among the ten individual points scores and the total score. Consider first correlations between the individual events only. Which correlations tend to be high (say over 0.5), which ones close to zero and which ones even negative? Can you think of any reasons for this? Draw scatterplots and fitted lines for a few pairs of variables with different sizes of correlations (here the variables are treated symmetrically, so it does not matter which one is placed on the <span class="math inline">\(X\)</span>-axis). Can these associations be reasonably described as linear?</p></li>
+<li><p>Consider now the correlations between the ten event scores and the final score <em>POINTS_TOTAL</em>. Which of them is highest, and which one lowest? Examine the scatterplot and fitted line between points for 100 metres (<em>POINTS_100M</em>) and the total score (POINTS_TOTAL). Fit a line to this scatterplot variables, with <em>POINTS_100M</em> as the explanatory variable. Interpret the results. Does there appear to be an association between the points for 100 metres and the total score? What is the nature of the association?</p>
+<p>Suppose you were told that a competitor received 800 points (a time of about 11.3 seconds) for 100 metres, the first event of the decathlon. Based on the fitted model, what final points score would you predict for him? You can calculate this fitted value with a pocket calculator. What would be the predicted value if the 100-metre score was 950 points (about 10.6 s) instead?</p></li>
+</ol>
+<p><strong>HOMEWORK</strong></p>
+<ol style="list-style-type: decimal">
+<li><p>Briefly discuss the correlation matrix produced in the class. Pick out a few examples for illustration - which correlations are highest, and which ones lowest, and which ones negative? You may comment on correlations between individual events, as well as on correlations between the final score and individual events.</p></li>
+<li><p>Obtain the scatterplot and linear regression model for the total score given points for the long jump, one of the field events (POINTS_LONGJUMP). Is the score for long jump strongly or weakly associated with the final score? Interpret the slope coefficient. Suppose you were told that a competitor received 900 points (a jump of about 7.4 metres) for the long jump. Based on the fitted model, what final points score would you predict for him?</p></li>
+<li><p>Obtain the scatterplot and linear regression model for the total score given points for throwing the discus, another of the field events (POINTS_DISCUS). Interpret the slope coefficient. Is the score for discus strongly or weakly associated with the final score?</p></li>
+</ol>
+
+</div>
+</div>
+<div id="week-8-class-simple-linear-regression-and-3-way-tables" class="section level3 unnumbered">
+<h3>WEEK 8 class: Simple linear regression and 3-way tables</h3>
+<p><strong>Data set</strong>: File <strong>GSS2010.SAV</strong>. This contains a selection of variables on attitudes and demographic characteristics for 2044 respondents in the 2010 U.S. General Social Survey (GSS).<a href="#fn65" class="footnoteRef" id="fnref65"><sup>65</sup></a> Only a few of the variables are used in the exercises.</p>
+<div id="classwork---linear-regression" class="section level4 unnumbered">
+<h4>Classwork - linear regression</h4>
+<p>Here we will focus on the variables <em>EDUC</em>, <em>PAEDUC</em>, <em>MAEDUC</em> and <em>SPEDUC</em>. These show the number of years of education completed by, respectively, the survey respondent him/herself, and the respondent’s father, mother and spouse.</p>
+<ol style="list-style-type: decimal">
+<li><p>Obtain basic descriptive statistics for the variables. Here they can be compared directly, because the meaning of the variable is similar in each case. We can even draw side-by-side box plots for the variables (rather than for values of a single variable at different levels of another, as before). These can be obtained from <strong>Analyze/Descriptive Statistics/Explore</strong> by placing all the variables under <strong>Dependent List</strong> and selecting <strong>Plots/Boxplots/Dependents together</strong>. You should then also select <strong>Options/Missing Values/Exclude cases pairwise</strong> to include all non-missing values for each variable (here <em>SPEDUC</em> has for obvious reasons more missing values than the others).</p></li>
+<li><p>Obtain the correlation matrix of the four variables. Which correlations are highest, and which ones lowest?</p></li>
+<li><p>Draw a scatterplot with fitted line for <em>EDUC</em> given <em>PAEDUC</em>. Fit a linear regression model between these variables, regressiong <em>EDUC</em> (response variable) on <em>PAEDUC</em> (explanatory variable). Interpret the results. Is there a statistically significant linear association between a person’s years of schooling and those of his/her father? Interpret the estimated regression coefficient, <span class="math inline">\(t\)</span>-statistic and <span class="math inline">\(P\)</span>-value, and 95 per cent confidence interval.</p></li>
+<li><p>Based on the fitted model, what is the predicted number of years of education for a respondent whose father completed 12 years of education?</p></li>
+</ol>
+</div>
+<div id="homework-simple-linear-regression-and-three-way-tables" class="section level4 unnumbered">
+<h4>HOMEWORK: Simple linear regression and three-way tables</h4>
+<p>The homework exercise uses the same data set for two different types of analysis.</p>
+</div>
+<div id="linear-regression" class="section level4 unnumbered">
+<h4>Linear regression</h4>
+<p>Draw a scatterplot with fitted line for <em>EDUC</em> given <em>MAEDUC</em>. Fit a linear regression model between these variables, regressiong <em>EDUC</em> (response variable) on <em>MAEDUC</em> (explanatory variable).</p>
+<ol style="list-style-type: decimal">
+<li><p>Interpret the results: Is there a statistically significant linear association between a person’s years of schooling and those of his/her mother? Interpret the estimated regression coefficient, <span class="math inline">\(t\)</span>-statistic and <span class="math inline">\(P\)</span>-value, and 95 per cent confidence interval.</p></li>
+<li><p>Based on the fitted model, what is the predicted number of years of education for a respondent whose mother completed 10 years of education?</p></li>
+<li><p>Interpret the R-squared statistic for the model.</p></li>
+</ol>
+</div>
+<div id="analysing-multiway-contingency-tables-in-spss" class="section level4 unnumbered">
+<h4>Analysing multiway contingency tables in SPSS</h4>
+<p>Three-way contingency tables are again obtained from <strong>Analyze/Descriptive Statistics/Crosstabs</strong>. The only change from Week 4 class is that the conditioning variable is now placed in the <strong>Layer 1 of 1</strong> box. This produces a series of partial two-way tables between the row and column variables specified in the <strong>Row(s)</strong> and <strong>Column(s)</strong> boxes, one for each category of the <strong>Layer</strong> variable. Percentages and <span class="math inline">\(\chi^{2}\)</span> test are similarly calculated separately for each partial table. For this example we elaborate on the first two exercises from Week 4 class. To remind you, the categorical variables we are analysing are these:</p>
+<ul>
+<li><p>The respondent’s sex, recorded as the variable <em>SEX</em>.</p></li>
+<li><p>age as <em>AGEGROUP</em> in three groups: 18-34, 35-54 and 55 or over.</p></li>
+<li><p><em>FEFAM</em>: Level of agreement with the following statement: “It is much better for everyone involved if the man is the achiever outside the home and the woman takes care of the home and family”, with response options Strongly agree, Agree, Disagree, and Strongly disagree.</p></li>
+</ul>
+<ol style="list-style-type: decimal">
+<li><p>First remind yourself of the associations between SEX and FEFAM and between AGEGROUP and FEFAM. Obtain the two-way contingency table between <em>FEFAM</em> and <em>SEX</em>, including appropriate percentages and <span class="math inline">\(\chi^{2}\)</span> test of independence. Repeat the procedure for <em>FEFAM</em> by <em>AGEGROUP</em>. What do you learn about the associations between attitude and sex, and between attitude and age?</p></li>
+<li><p>Sociologists would suggest that the relationship between sex and attitude towards male and female work roles might be different for different age groups. In other words, age might modify the association between sex and attitude. Investigate this possible interaction between the three variables. Create a three-way table where <em>FEFAM</em> is the column variable, <em>SEX</em> the row variable and <em>AGEGROUP</em> the layer (conditioning) variable. Study the SPSS output, and make sure you understand how this shows three partial tables of <em>FEFAM</em> vs.<em>SEX</em>, one for each possible value of <em>AGEGROUP</em>. Examine and interpret the associations in the three partial tables. State the results of the <span class="math inline">\(\chi^{2}\)</span> test for each partial table, and illustrate your interpretations with some appropriate percentages. Finally, summarise your findings: are there differences in the nature, strength or significance of the association between sex and attitude, depending on the age group? Comment on how this interpretation differs from the initial two-way table of <em>FEFAM</em> and <em>SEX</em>.</p></li>
+</ol>
+
+</div>
+</div>
+<div id="week-9-class-multiple-linear-regression" class="section level3 unnumbered">
+<h3>WEEK 9 class: Multiple linear regression</h3>
+<p><strong>Data set</strong>: File <strong>humandevelopment2011.sav</strong>.</p>
+<div id="multiple-linear-regression-in-spss" class="section level4 unnumbered">
+<h4>Multiple linear regression in SPSS</h4>
+<ul>
+<li><p>Multiple linear regression is obtained from <strong>Analyze/Regression/Linear</strong>, by placing all of the required explanatory variables in the <strong>Independent(s)</strong> box. No other changes from last week are required.</p></li>
+<li><p>To include categorical explanatory variables, the necessary dummy variables have to be created first. The ones for today’s class are already included in the data set. If you need to create dummy variables for your own analyses in the future, it is usually easiest to do so from <strong>Transform/Compute Variable</strong>. Some of the buttons on the keypad shown in that dialog box are <em>logical operators</em> for defining conditions for which the outcome is either 1 (True) or 0 (False), as required by a dummy variable. For example, the categorical variable <em>INCOME_GROUP</em> in today’s data set has the value 3 if the country is in the high income group. The dummy variable <em>HIGH_INCOME</em> was created from this by entering <strong>Target Variable:</strong> <em>HIGH_INCOME</em> and <strong>Numeric Expression:</strong> <em>INCOME_GROUP=3</em>. This means that the new variable <em>HIGH_INCOME</em> will have the value 1 for countries for which <em>INCOME_GROUP</em> is equal to 3, and will be 0 otherwise. Other logical operators may also be used: for example, <em>urban_pop$&lt;$50</em> would produce 1 if the variable <em>URBAN_POP</em> was less than 50 and 0 otherwise.</p></li>
+</ul>
+</div>
+<div id="classwork-5" class="section level4 unnumbered">
+<h4>Classwork</h4>
+<p>The file <em>humandevelopment2011.sav</em> contains data on a number of indicators of what might broadly be called development, for 194 countries in 2011. These were collated from two international data agency sources.<a href="#fn66" class="footnoteRef" id="fnref66"><sup>66</sup></a> The response variable considered today is <em>SCHOOL_YEARS</em>, which records for each country the mean number of years of schooling taken by the adult population. We treat it here as a general indicator of the educational situation in a country, which is an important aspect of development. We will consider the following explanatory variables for it:</p>
+<ul>
+<li><p><em>URBAN_POP</em>: the degree of urbanisation of the country, specifically the percentage of the country’s population living in urban areas variable</p></li>
+<li><p><em>GOVERNANCE</em>, a continuous variable contructed from expert opinion surveys to reflect the perceived effectiveness of government in delivering services.</p></li>
+<li><p><em>INFANT_MORTALITY</em>, number of infants dying before 1 year old, per 1,000 live births — a “proxy” indicator representing the health of the population</p></li>
+<li><p><em>INCOME_GROUP</em>, classified as low, middle or high income economies. This is also provided in the form of three dummy variables: <em>LOW_INCOME</em>, <em>MIDDLE_INCOME</em> and <em>HIGH_INCOME</em>.</p></li>
+</ul>
+<ol style="list-style-type: decimal">
+<li><p>Obtain some descriptive statistics for the continuous variables, to gain and impression of their ranges. A quick way of doing this is via <strong>Analyze/Descriptive Statistics/Frequencies</strong>, unchecking the “Display frequency tables” and requesting minimum and maximum values.</p></li>
+<li><p>Investigate the idea that increased urbanisation is linked to greater availability of schooling for people. Obtain a scatterplot and a simple linear regression model for <em>SCHOOL_YEARS</em> given <em>URBAN_POP</em>. What do you observe in the scatterplot? Interpret the regression output.</p></li>
+<li><p>Now consider the possibility that schooling may also be explained by the effectiveness of governments in providing public services (such as education). Fit a multiple linear regression model for <em>SCHOOL_YEARS</em> given both <em>URBAN_POP</em> and <em>GOVERNANCE</em>. Compare the the estimated coefficient of <em>URBAN_POP</em> for this model with the coefficient of the same variable in the model in Question 2. What do you conclude? Does the association between schooling and urbanisation change when we control for government effectiveness? If so, in what way? Interpret the estimate coefficient of <em>GOVERNANCE</em> in the fitted model, the results of its <span class="math inline">\(t\)</span>-test and its 95% confidence interval.</p></li>
+<li><p>Next consider the possible explanatory value of the income wealth of a country for understanding variation in schooling years. Include income by entering two of the three dummy variables for income group. For the most convenient interpretation, we suggest that you leave “low income” as the reference group, and enter the dummies for <em>MIDDLE_INCOME</em> and <em>HIGH_INCOME</em> in the model. Interpret the values of the estimated regression coefficients for the two income dummy variables. In addition, for each one state the null hypothesis for its <span class="math inline">\(t\)</span>-test, and interpret the result of the test and 95% confidence intervals.</p></li>
+<li><p>Using this model, what level of schooling would you predict for a country with 70% urban population, a score of 1.5 on governance, and a high income economy?</p></li>
+<li><p>Using this model, what level of schooling would you predict for a country with 30% urban population, a score of -0.2 on governance, and a low income economy?</p></li>
+</ol>
+<p><strong>HOMEWORK</strong></p>
+<ol style="list-style-type: decimal">
+<li><p>Write up your answers to the last three questions in the class exercise.</p></li>
+<li><p>Finally, consider one more possible explanatory variable: <em>INFANT_MORTALITY</em>. Add this variable to the multiple linear regression model fitted above. Is it statistically significant, at the 1% level of significance? Interpret the value of its estimated coefficient, and its 95% confidence interval. Take care to make sense of the sign (positive or negative) of the coefficient.</p></li>
+<li><p>Has the inclusion of <em>INFANT_MORTALITY</em> modified the interpretation of any of the other explanatory variables in the model? Are they all statistically significant, at the 5% level of significance? Briefly outline the similarities and differences between the results for this final model and the model fitted in the class exercise.</p></li>
+</ol>
+
+</div>
+</div>
+<div id="week-10-class-review-and-multiple-linear-regression" class="section level3 unnumbered">
+<h3>WEEK 10 class: Review and Multiple linear regression</h3>
+<p><strong>Data set</strong>: File <strong>ESS5GB_trust.sav</strong>.</p>
+<p>This class is for you to revisit any topics of your choosing. Make the most of the opportunity to ask your class teachers any questions you have about any of the course material, and to practise any of the analyses you have learned during the course.</p>
+<p>As an optional exercise, the data file <strong>ESS5GB_trust.sav</strong> is provided. This contains a selection of variables from the survey of British respondents that forms the 2010 wave of the European Social Survey.<a href="#fn67" class="footnoteRef" id="fnref67"><sup>67</sup></a></p>
+<p>We suggest that you use the data to practise multiple linear regression modelling on one or more of the variables capturing people’s levels of trust in institutions. For these questions, respondents were asked the following: “Using this card, please tell me on a score of 0-10 how much you personally trust each of the institutions I read out. 0 means you do not trust an institution at all, and 10 means you have complete trust.” The institutions (and their variable names) are:</p>
+<ul>
+<li><p><em>trstprl</em>: Trust in country’s parliament</p></li>
+<li><p><em>trstlgl</em>: Trust in the legal system</p></li>
+<li><p><em>trstplc</em>: Trust in the police</p></li>
+<li><p><em>trstplt</em>: Trust in politicians</p></li>
+<li><p><em>trstprt</em>: Trust in political parties</p></li>
+<li><p><em>trstep</em>: Trust in the European Parliament</p></li>
+<li><p><em>trstun</em>: Trust in the United Nations</p></li>
+</ul>
+<p>After you choose a response variable that interests you, you will need to select some potential explanatory variables to test. The data set contains a number of variables. Some are socio-demographic, such as age and gender. Some are attitudinal or behavioural, such as amount of time spent reading newspapers. You will need to make a judgement about the levels of measurement of the variables, and how to enter them into the model. Use the “Values” column in the SPSS Variable View to check how each variable is coded. Note: we suggest that it is not too much of a compromise to treat the variables on television, radio and newspaper consumption as continuous, interval level variables. Note also: we have provided dummy variables for the categorical variables in the data set.</p>
+<p><strong>HOMEWORK</strong></p>
+<p>As this is the last week of the course, there is no homework. You can find further information on this and the other class exercises and homeworks in the model answers, which will be posted in the Moodle site.</p>
+
+</div>
+</div>
+<div id="statistical-tables" class="section level2 unnumbered">
+<h2>Statistical tables</h2>
+<p>Explanation of the “Table of standard normal tail probabilities” in Section @ref(s_disttables_Z):</p>
+<ul>
+<li><p>The table shows, for values of <span class="math inline">\(Z\)</span> between 0 and 3.5, the probability that a value from the standard normal distribution is <em>larger than</em> <span class="math inline">\(Z\)</span> (i.e. the “right-hand” tail probabilities).</p>
+<ul>
+<li>For example, the probability of values larger than 0.50 is 0.3085.</li>
+</ul></li>
+<li><p>For negative values of <span class="math inline">\(Z\)</span>, the probability of values <em>smaller than</em> <span class="math inline">\(Z\)</span> (the “left-hand” tail probability) is equal to the right-hand tail probability for the corresponding positive value of <span class="math inline">\(Z\)</span>.</p>
+<ul>
+<li>For example, the probability of values smaller than <span class="math inline">\(-0.50\)</span> is also 0.3085.</li>
+</ul></li>
+</ul>
+
+<div id="table-of-standard-normal-tail-probabilities" class="section level3 unnumbered">
+<h3>Table of standard normal tail probabilities</h3>
+
+<table>
+<thead>
+<tr class="header">
+<th align="left"><span class="math inline">\(z\)</span></th>
+<th align="left">Prob. </th>
+<th align="left"><span class="math inline">\(z\)</span></th>
+<th align="left">Prob. </th>
+<th align="left"><span class="math inline">\(z\)</span></th>
+<th align="left">Prob. </th>
+<th align="left"><span class="math inline">\(z\)</span></th>
+<th align="left">Prob. </th>
+<th align="left"><span class="math inline">\(z\)</span></th>
+<th align="left">Prob. </th>
+<th align="left"><span class="math inline">\(z\)</span></th>
+<th align="left">Prob. </th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="left">0.00</td>
+<td align="left">0.5000</td>
+<td align="left">0.50</td>
+<td align="left">0.3085</td>
+<td align="left">1.00</td>
+<td align="left">0.1587</td>
+<td align="left">1.50</td>
+<td align="left">0.0668</td>
+<td align="left">2.00</td>
+<td align="left">0.0228</td>
+<td align="left">2.50</td>
+<td align="left">0.0062</td>
+</tr>
+<tr class="even">
+<td align="left">0.01</td>
+<td align="left">0.4960</td>
+<td align="left">0.51</td>
+<td align="left">0.3050</td>
+<td align="left">1.01</td>
+<td align="left">0.1562</td>
+<td align="left">1.51</td>
+<td align="left">0.0655</td>
+<td align="left">2.01</td>
+<td align="left">0.0222</td>
+<td align="left">2.52</td>
+<td align="left">0.0059</td>
+</tr>
+<tr class="odd">
+<td align="left">0.02</td>
+<td align="left">0.4920</td>
+<td align="left">0.52</td>
+<td align="left">0.3015</td>
+<td align="left">1.02</td>
+<td align="left">0.1539</td>
+<td align="left">1.52</td>
+<td align="left">0.0643</td>
+<td align="left">2.02</td>
+<td align="left">0.0217</td>
+<td align="left">2.54</td>
+<td align="left">0.0055</td>
+</tr>
+<tr class="even">
+<td align="left">0.03</td>
+<td align="left">0.4880</td>
+<td align="left">0.53</td>
+<td align="left">0.2981</td>
+<td align="left">1.03</td>
+<td align="left">0.1515</td>
+<td align="left">1.53</td>
+<td align="left">0.0630</td>
+<td align="left">2.03</td>
+<td align="left">0.0212</td>
+<td align="left">2.56</td>
+<td align="left">0.0052</td>
+</tr>
+<tr class="odd">
+<td align="left">0.04</td>
+<td align="left">0.4840</td>
+<td align="left">0.54</td>
+<td align="left">0.2946</td>
+<td align="left">1.04</td>
+<td align="left">0.1492</td>
+<td align="left">1.54</td>
+<td align="left">0.0618</td>
+<td align="left">2.04</td>
+<td align="left">0.0207</td>
+<td align="left">2.58</td>
+<td align="left">0.0049</td>
+</tr>
+<tr class="even">
+<td align="left">0.05</td>
+<td align="left">0.4801</td>
+<td align="left">0.55</td>
+<td align="left">0.2912</td>
+<td align="left">1.05</td>
+<td align="left">0.1469</td>
+<td align="left">1.55</td>
+<td align="left">0.0606</td>
+<td align="left">2.05</td>
+<td align="left">0.0202</td>
+<td align="left">2.60</td>
+<td align="left">0.0047</td>
+</tr>
+<tr class="odd">
+<td align="left">0.06</td>
+<td align="left">0.4761</td>
+<td align="left">0.56</td>
+<td align="left">0.2877</td>
+<td align="left">1.06</td>
+<td align="left">0.1446</td>
+<td align="left">1.56</td>
+<td align="left">0.0594</td>
+<td align="left">2.06</td>
+<td align="left">0.0197</td>
+<td align="left">2.62</td>
+<td align="left">0.0044</td>
+</tr>
+<tr class="even">
+<td align="left">0.07</td>
+<td align="left">0.4721</td>
+<td align="left">0.57</td>
+<td align="left">0.2843</td>
+<td align="left">1.07</td>
+<td align="left">0.1423</td>
+<td align="left">1.57</td>
+<td align="left">0.0582</td>
+<td align="left">2.07</td>
+<td align="left">0.0192</td>
+<td align="left">2.64</td>
+<td align="left">0.0041</td>
+</tr>
+<tr class="odd">
+<td align="left">0.08</td>
+<td align="left">0.4681</td>
+<td align="left">0.58</td>
+<td align="left">0.2810</td>
+<td align="left">1.08</td>
+<td align="left">0.1401</td>
+<td align="left">1.58</td>
+<td align="left">0.0571</td>
+<td align="left">2.08</td>
+<td align="left">0.0188</td>
+<td align="left">2.66</td>
+<td align="left">0.0039</td>
+</tr>
+<tr class="even">
+<td align="left">0.09</td>
+<td align="left">0.4641</td>
+<td align="left">0.59</td>
+<td align="left">0.2776</td>
+<td align="left">1.09</td>
+<td align="left">0.1379</td>
+<td align="left">1.59</td>
+<td align="left">0.0559</td>
+<td align="left">2.09</td>
+<td align="left">0.0183</td>
+<td align="left">2.68</td>
+<td align="left">0.0037</td>
+</tr>
+<tr class="odd">
+<td align="left">0.10</td>
+<td align="left">0.4602</td>
+<td align="left">0.60</td>
+<td align="left">0.2743</td>
+<td align="left">1.10</td>
+<td align="left">0.1357</td>
+<td align="left">1.60</td>
+<td align="left">0.0548</td>
+<td align="left">2.10</td>
+<td align="left">0.0179</td>
+<td align="left">2.70</td>
+<td align="left">0.0035</td>
+</tr>
+<tr class="even">
+<td align="left">0.11</td>
+<td align="left">0.4562</td>
+<td align="left">0.61</td>
+<td align="left">0.2709</td>
+<td align="left">1.11</td>
+<td align="left">0.1335</td>
+<td align="left">1.61</td>
+<td align="left">0.0537</td>
+<td align="left">2.11</td>
+<td align="left">0.0174</td>
+<td align="left">2.72</td>
+<td align="left">0.0033</td>
+</tr>
+<tr class="odd">
+<td align="left">0.12</td>
+<td align="left">0.4522</td>
+<td align="left">0.62</td>
+<td align="left">0.2676</td>
+<td align="left">1.12</td>
+<td align="left">0.1314</td>
+<td align="left">1.62</td>
+<td align="left">0.0526</td>
+<td align="left">2.12</td>
+<td align="left">0.0170</td>
+<td align="left">2.74</td>
+<td align="left">0.0031</td>
+</tr>
+<tr class="even">
+<td align="left">0.13</td>
+<td align="left">0.4483</td>
+<td align="left">0.63</td>
+<td align="left">0.2643</td>
+<td align="left">1.13</td>
+<td align="left">0.1292</td>
+<td align="left">1.63</td>
+<td align="left">0.0516</td>
+<td align="left">2.13</td>
+<td align="left">0.0166</td>
+<td align="left">2.76</td>
+<td align="left">0.0029</td>
+</tr>
+<tr class="odd">
+<td align="left">0.14</td>
+<td align="left">0.4443</td>
+<td align="left">0.64</td>
+<td align="left">0.2611</td>
+<td align="left">1.14</td>
+<td align="left">0.1271</td>
+<td align="left">1.64</td>
+<td align="left">0.0505</td>
+<td align="left">2.14</td>
+<td align="left">0.0162</td>
+<td align="left">2.78</td>
+<td align="left">0.0027</td>
+</tr>
+<tr class="even">
+<td align="left">0.15</td>
+<td align="left">0.4404</td>
+<td align="left">0.65</td>
+<td align="left">0.2578</td>
+<td align="left">1.15</td>
+<td align="left">0.1251</td>
+<td align="left">1.65</td>
+<td align="left">0.0495</td>
+<td align="left">2.15</td>
+<td align="left">0.0158</td>
+<td align="left">2.80</td>
+<td align="left">0.0026</td>
+</tr>
+<tr class="odd">
+<td align="left">0.16</td>
+<td align="left">0.4364</td>
+<td align="left">0.66</td>
+<td align="left">0.2546</td>
+<td align="left">1.16</td>
+<td align="left">0.1230</td>
+<td align="left">1.66</td>
+<td align="left">0.0485</td>
+<td align="left">2.16</td>
+<td align="left">0.0154</td>
+<td align="left">2.82</td>
+<td align="left">0.0024</td>
+</tr>
+<tr class="even">
+<td align="left">0.17</td>
+<td align="left">0.4325</td>
+<td align="left">0.67</td>
+<td align="left">0.2514</td>
+<td align="left">1.17</td>
+<td align="left">0.1210</td>
+<td align="left">1.67</td>
+<td align="left">0.0475</td>
+<td align="left">2.17</td>
+<td align="left">0.0150</td>
+<td align="left">2.84</td>
+<td align="left">0.0023</td>
+</tr>
+<tr class="odd">
+<td align="left">0.18</td>
+<td align="left">0.4286</td>
+<td align="left">0.68</td>
+<td align="left">0.2483</td>
+<td align="left">1.18</td>
+<td align="left">0.1190</td>
+<td align="left">1.68</td>
+<td align="left">0.0465</td>
+<td align="left">2.18</td>
+<td align="left">0.0146</td>
+<td align="left">2.86</td>
+<td align="left">0.0021</td>
+</tr>
+<tr class="even">
+<td align="left">0.19</td>
+<td align="left">0.4247</td>
+<td align="left">0.69</td>
+<td align="left">0.2451</td>
+<td align="left">1.19</td>
+<td align="left">0.1170</td>
+<td align="left">1.69</td>
+<td align="left">0.0455</td>
+<td align="left">2.19</td>
+<td align="left">0.0143</td>
+<td align="left">2.88</td>
+<td align="left">0.0020</td>
+</tr>
+<tr class="odd">
+<td align="left">0.20</td>
+<td align="left">0.4207</td>
+<td align="left">0.70</td>
+<td align="left">0.2420</td>
+<td align="left">1.20</td>
+<td align="left">0.1151</td>
+<td align="left">1.70</td>
+<td align="left">0.0446</td>
+<td align="left">2.20</td>
+<td align="left">0.0139</td>
+<td align="left">2.90</td>
+<td align="left">0.0019</td>
+</tr>
+<tr class="even">
+<td align="left">0.21</td>
+<td align="left">0.4168</td>
+<td align="left">0.71</td>
+<td align="left">0.2389</td>
+<td align="left">1.21</td>
+<td align="left">0.1131</td>
+<td align="left">1.71</td>
+<td align="left">0.0436</td>
+<td align="left">2.21</td>
+<td align="left">0.0136</td>
+<td align="left">2.92</td>
+<td align="left">0.0018</td>
+</tr>
+<tr class="odd">
+<td align="left">0.22</td>
+<td align="left">0.4129</td>
+<td align="left">0.72</td>
+<td align="left">0.2358</td>
+<td align="left">1.22</td>
+<td align="left">0.1112</td>
+<td align="left">1.72</td>
+<td align="left">0.0427</td>
+<td align="left">2.22</td>
+<td align="left">0.0132</td>
+<td align="left">2.94</td>
+<td align="left">0.0016</td>
+</tr>
+<tr class="even">
+<td align="left">0.23</td>
+<td align="left">0.4090</td>
+<td align="left">0.73</td>
+<td align="left">0.2327</td>
+<td align="left">1.23</td>
+<td align="left">0.1093</td>
+<td align="left">1.73</td>
+<td align="left">0.0418</td>
+<td align="left">2.23</td>
+<td align="left">0.0129</td>
+<td align="left">2.96</td>
+<td align="left">0.0015</td>
+</tr>
+<tr class="odd">
+<td align="left">0.24</td>
+<td align="left">0.4052</td>
+<td align="left">0.74</td>
+<td align="left">0.2296</td>
+<td align="left">1.24</td>
+<td align="left">0.1075</td>
+<td align="left">1.74</td>
+<td align="left">0.0409</td>
+<td align="left">2.24</td>
+<td align="left">0.0125</td>
+<td align="left">2.98</td>
+<td align="left">0.0014</td>
+</tr>
+<tr class="even">
+<td align="left">0.25</td>
+<td align="left">0.4013</td>
+<td align="left">0.75</td>
+<td align="left">0.2266</td>
+<td align="left">1.25</td>
+<td align="left">0.1056</td>
+<td align="left">1.75</td>
+<td align="left">0.0401</td>
+<td align="left">2.25</td>
+<td align="left">0.0122</td>
+<td align="left">3.00</td>
+<td align="left">0.0013</td>
+</tr>
+<tr class="odd">
+<td align="left">0.26</td>
+<td align="left">0.3974</td>
+<td align="left">0.76</td>
+<td align="left">0.2236</td>
+<td align="left">1.26</td>
+<td align="left">0.1038</td>
+<td align="left">1.76</td>
+<td align="left">0.0392</td>
+<td align="left">2.26</td>
+<td align="left">0.0119</td>
+<td align="left">3.02</td>
+<td align="left">0.0013</td>
+</tr>
+<tr class="even">
+<td align="left">0.27</td>
+<td align="left">0.3936</td>
+<td align="left">0.77</td>
+<td align="left">0.2206</td>
+<td align="left">1.27</td>
+<td align="left">0.1020</td>
+<td align="left">1.77</td>
+<td align="left">0.0384</td>
+<td align="left">2.27</td>
+<td align="left">0.0116</td>
+<td align="left">3.04</td>
+<td align="left">0.0012</td>
+</tr>
+<tr class="odd">
+<td align="left">0.28</td>
+<td align="left">0.3897</td>
+<td align="left">0.78</td>
+<td align="left">0.2177</td>
+<td align="left">1.28</td>
+<td align="left">0.1003</td>
+<td align="left">1.78</td>
+<td align="left">0.0375</td>
+<td align="left">2.28</td>
+<td align="left">0.0113</td>
+<td align="left">3.06</td>
+<td align="left">0.0011</td>
+</tr>
+<tr class="even">
+<td align="left">0.29</td>
+<td align="left">0.3859</td>
+<td align="left">0.79</td>
+<td align="left">0.2148</td>
+<td align="left">1.29</td>
+<td align="left">0.0985</td>
+<td align="left">1.79</td>
+<td align="left">0.0367</td>
+<td align="left">2.29</td>
+<td align="left">0.0110</td>
+<td align="left">3.08</td>
+<td align="left">0.0010</td>
+</tr>
+<tr class="odd">
+<td align="left">0.30</td>
+<td align="left">0.3821</td>
+<td align="left">0.80</td>
+<td align="left">0.2119</td>
+<td align="left">1.30</td>
+<td align="left">0.0968</td>
+<td align="left">1.80</td>
+<td align="left">0.0359</td>
+<td align="left">2.30</td>
+<td align="left">0.0107</td>
+<td align="left">3.10</td>
+<td align="left">0.0010</td>
+</tr>
+<tr class="even">
+<td align="left">0.31</td>
+<td align="left">0.3783</td>
+<td align="left">0.81</td>
+<td align="left">0.2090</td>
+<td align="left">1.31</td>
+<td align="left">0.0951</td>
+<td align="left">1.81</td>
+<td align="left">0.0351</td>
+<td align="left">2.31</td>
+<td align="left">0.0104</td>
+<td align="left">3.12</td>
+<td align="left">0.0009</td>
+</tr>
+<tr class="odd">
+<td align="left">0.32</td>
+<td align="left">0.3745</td>
+<td align="left">0.82</td>
+<td align="left">0.2061</td>
+<td align="left">1.32</td>
+<td align="left">0.0934</td>
+<td align="left">1.82</td>
+<td align="left">0.0344</td>
+<td align="left">2.32</td>
+<td align="left">0.0102</td>
+<td align="left">3.14</td>
+<td align="left">0.0008</td>
+</tr>
+<tr class="even">
+<td align="left">0.33</td>
+<td align="left">0.3707</td>
+<td align="left">0.83</td>
+<td align="left">0.2033</td>
+<td align="left">1.33</td>
+<td align="left">0.0918</td>
+<td align="left">1.83</td>
+<td align="left">0.0336</td>
+<td align="left">2.33</td>
+<td align="left">0.0099</td>
+<td align="left">3.16</td>
+<td align="left">0.0008</td>
+</tr>
+<tr class="odd">
+<td align="left">0.34</td>
+<td align="left">0.3669</td>
+<td align="left">0.84</td>
+<td align="left">0.2005</td>
+<td align="left">1.34</td>
+<td align="left">0.0901</td>
+<td align="left">1.84</td>
+<td align="left">0.0329</td>
+<td align="left">2.34</td>
+<td align="left">0.0096</td>
+<td align="left">3.18</td>
+<td align="left">0.0007</td>
+</tr>
+<tr class="even">
+<td align="left">0.35</td>
+<td align="left">0.3632</td>
+<td align="left">0.85</td>
+<td align="left">0.1977</td>
+<td align="left">1.35</td>
+<td align="left">0.0885</td>
+<td align="left">1.85</td>
+<td align="left">0.0322</td>
+<td align="left">2.35</td>
+<td align="left">0.0094</td>
+<td align="left">3.20</td>
+<td align="left">0.0007</td>
+</tr>
+<tr class="odd">
+<td align="left">0.36</td>
+<td align="left">0.3594</td>
+<td align="left">0.86</td>
+<td align="left">0.1949</td>
+<td align="left">1.36</td>
+<td align="left">0.0869</td>
+<td align="left">1.86</td>
+<td align="left">0.0314</td>
+<td align="left">2.36</td>
+<td align="left">0.0091</td>
+<td align="left">3.22</td>
+<td align="left">0.0006</td>
+</tr>
+<tr class="even">
+<td align="left">0.37</td>
+<td align="left">0.3557</td>
+<td align="left">0.87</td>
+<td align="left">0.1922</td>
+<td align="left">1.37</td>
+<td align="left">0.0853</td>
+<td align="left">1.87</td>
+<td align="left">0.0307</td>
+<td align="left">2.37</td>
+<td align="left">0.0089</td>
+<td align="left">3.24</td>
+<td align="left">0.0006</td>
+</tr>
+<tr class="odd">
+<td align="left">0.38</td>
+<td align="left">0.3520</td>
+<td align="left">0.88</td>
+<td align="left">0.1894</td>
+<td align="left">1.38</td>
+<td align="left">0.0838</td>
+<td align="left">1.88</td>
+<td align="left">0.0301</td>
+<td align="left">2.38</td>
+<td align="left">0.0087</td>
+<td align="left">3.26</td>
+<td align="left">0.0006</td>
+</tr>
+<tr class="even">
+<td align="left">0.39</td>
+<td align="left">0.3483</td>
+<td align="left">0.89</td>
+<td align="left">0.1867</td>
+<td align="left">1.39</td>
+<td align="left">0.0823</td>
+<td align="left">1.89</td>
+<td align="left">0.0294</td>
+<td align="left">2.39</td>
+<td align="left">0.0084</td>
+<td align="left">3.28</td>
+<td align="left">0.0005</td>
+</tr>
+<tr class="odd">
+<td align="left">0.40</td>
+<td align="left">0.3446</td>
+<td align="left">0.90</td>
+<td align="left">0.1841</td>
+<td align="left">1.40</td>
+<td align="left">0.0808</td>
+<td align="left">1.90</td>
+<td align="left">0.0287</td>
+<td align="left">2.40</td>
+<td align="left">0.0082</td>
+<td align="left">3.30</td>
+<td align="left">0.0005</td>
+</tr>
+<tr class="even">
+<td align="left">0.41</td>
+<td align="left">0.3409</td>
+<td align="left">0.91</td>
+<td align="left">0.1814</td>
+<td align="left">1.41</td>
+<td align="left">0.0793</td>
+<td align="left">1.91</td>
+<td align="left">0.0281</td>
+<td align="left">2.41</td>
+<td align="left">0.0080</td>
+<td align="left">3.32</td>
+<td align="left">0.0005</td>
+</tr>
+<tr class="odd">
+<td align="left">0.42</td>
+<td align="left">0.3372</td>
+<td align="left">0.92</td>
+<td align="left">0.1788</td>
+<td align="left">1.42</td>
+<td align="left">0.0778</td>
+<td align="left">1.92</td>
+<td align="left">0.0274</td>
+<td align="left">2.42</td>
+<td align="left">0.0078</td>
+<td align="left">3.34</td>
+<td align="left">0.0004</td>
+</tr>
+<tr class="even">
+<td align="left">0.43</td>
+<td align="left">0.3336</td>
+<td align="left">0.93</td>
+<td align="left">0.1762</td>
+<td align="left">1.43</td>
+<td align="left">0.0764</td>
+<td align="left">1.93</td>
+<td align="left">0.0268</td>
+<td align="left">2.43</td>
+<td align="left">0.0075</td>
+<td align="left">3.36</td>
+<td align="left">0.0004</td>
+</tr>
+<tr class="odd">
+<td align="left">0.44</td>
+<td align="left">0.3300</td>
+<td align="left">0.94</td>
+<td align="left">0.1736</td>
+<td align="left">1.44</td>
+<td align="left">0.0749</td>
+<td align="left">1.94</td>
+<td align="left">0.0262</td>
+<td align="left">2.44</td>
+<td align="left">0.0073</td>
+<td align="left">3.38</td>
+<td align="left">0.0004</td>
+</tr>
+<tr class="even">
+<td align="left">0.45</td>
+<td align="left">0.3264</td>
+<td align="left">0.95</td>
+<td align="left">0.1711</td>
+<td align="left">1.45</td>
+<td align="left">0.0735</td>
+<td align="left">1.95</td>
+<td align="left">0.0256</td>
+<td align="left">2.45</td>
+<td align="left">0.0071</td>
+<td align="left">3.40</td>
+<td align="left">0.0003</td>
+</tr>
+<tr class="odd">
+<td align="left">0.46</td>
+<td align="left">0.3228</td>
+<td align="left">0.96</td>
+<td align="left">0.1685</td>
+<td align="left">1.46</td>
+<td align="left">0.0721</td>
+<td align="left">1.96</td>
+<td align="left">0.0250</td>
+<td align="left">2.46</td>
+<td align="left">0.0069</td>
+<td align="left">3.42</td>
+<td align="left">0.0003</td>
+</tr>
+<tr class="even">
+<td align="left">0.47</td>
+<td align="left">0.3192</td>
+<td align="left">0.97</td>
+<td align="left">0.1660</td>
+<td align="left">1.47</td>
+<td align="left">0.0708</td>
+<td align="left">1.97</td>
+<td align="left">0.0244</td>
+<td align="left">2.47</td>
+<td align="left">0.0068</td>
+<td align="left">3.44</td>
+<td align="left">0.0003</td>
+</tr>
+<tr class="odd">
+<td align="left">0.48</td>
+<td align="left">0.3156</td>
+<td align="left">0.98</td>
+<td align="left">0.1635</td>
+<td align="left">1.48</td>
+<td align="left">0.0694</td>
+<td align="left">1.98</td>
+<td align="left">0.0239</td>
+<td align="left">2.48</td>
+<td align="left">0.0066</td>
+<td align="left">3.46</td>
+<td align="left">0.0003</td>
+</tr>
+<tr class="even">
+<td align="left">0.49</td>
+<td align="left">0.3121</td>
+<td align="left">0.99</td>
+<td align="left">0.1611</td>
+<td align="left">1.49</td>
+<td align="left">0.0681</td>
+<td align="left">1.99</td>
+<td align="left">0.0233</td>
+<td align="left">2.49</td>
+<td align="left">0.0064</td>
+<td align="left">3.48</td>
+<td align="left">0.0003</td>
+</tr>
+</tbody>
+</table>
+
+</div>
+<div id="table-of-critical-values-for-t-distributions" class="section level3 unnumbered">
+<h3>Table of critical values for t-distributions</h3>
+<table>
+<tbody>
+<tr class="odd">
+<td align="left">df</td>
+<td align="right">0.100</td>
+<td align="right">0.050</td>
+<td align="right">0.025</td>
+<td align="right">0.010</td>
+<td align="right">0.005</td>
+<td align="right">0.001</td>
+<td align="right">0.0005</td>
+</tr>
+<tr class="even">
+<td align="left">1</td>
+<td align="right">3.078</td>
+<td align="right">6.314</td>
+<td align="right">12.706</td>
+<td align="right">31.821</td>
+<td align="right">63.657</td>
+<td align="right">318.309</td>
+<td align="right">636.619</td>
+</tr>
+<tr class="odd">
+<td align="left">2</td>
+<td align="right">1.886</td>
+<td align="right">2.920</td>
+<td align="right">4.303</td>
+<td align="right">6.965</td>
+<td align="right">9.925</td>
+<td align="right">22.327</td>
+<td align="right">31.599</td>
+</tr>
+<tr class="even">
+<td align="left">3</td>
+<td align="right">1.638</td>
+<td align="right">2.353</td>
+<td align="right">3.182</td>
+<td align="right">4.541</td>
+<td align="right">5.841</td>
+<td align="right">10.215</td>
+<td align="right">12.924</td>
+</tr>
+<tr class="odd">
+<td align="left">4</td>
+<td align="right">1.533</td>
+<td align="right">2.132</td>
+<td align="right">2.776</td>
+<td align="right">3.747</td>
+<td align="right">4.604</td>
+<td align="right">7.173</td>
+<td align="right">8.610</td>
+</tr>
+<tr class="even">
+<td align="left">5</td>
+<td align="right">1.476</td>
+<td align="right">2.015</td>
+<td align="right">2.571</td>
+<td align="right">3.365</td>
+<td align="right">4.032</td>
+<td align="right">5.893</td>
+<td align="right">6.869</td>
+</tr>
+<tr class="odd">
+<td align="left">6</td>
+<td align="right">1.440</td>
+<td align="right">1.943</td>
+<td align="right">2.447</td>
+<td align="right">3.143</td>
+<td align="right">3.707</td>
+<td align="right">5.208</td>
+<td align="right">5.959</td>
+</tr>
+<tr class="even">
+<td align="left">7</td>
+<td align="right">1.415</td>
+<td align="right">1.895</td>
+<td align="right">2.365</td>
+<td align="right">2.998</td>
+<td align="right">3.499</td>
+<td align="right">4.785</td>
+<td align="right">5.408</td>
+</tr>
+<tr class="odd">
+<td align="left">8</td>
+<td align="right">1.397</td>
+<td align="right">1.860</td>
+<td align="right">2.306</td>
+<td align="right">2.896</td>
+<td align="right">3.355</td>
+<td align="right">4.501</td>
+<td align="right">5.041</td>
+</tr>
+<tr class="even">
+<td align="left">9</td>
+<td align="right">1.383</td>
+<td align="right">1.833</td>
+<td align="right">2.262</td>
+<td align="right">2.821</td>
+<td align="right">3.250</td>
+<td align="right">4.297</td>
+<td align="right">4.781</td>
+</tr>
+<tr class="odd">
+<td align="left">10</td>
+<td align="right">1.372</td>
+<td align="right">1.812</td>
+<td align="right">2.228</td>
+<td align="right">2.764</td>
+<td align="right">3.169</td>
+<td align="right">4.144</td>
+<td align="right">4.587</td>
+</tr>
+<tr class="even">
+<td align="left">11</td>
+<td align="right">1.363</td>
+<td align="right">1.796</td>
+<td align="right">2.201</td>
+<td align="right">2.718</td>
+<td align="right">3.106</td>
+<td align="right">4.025</td>
+<td align="right">4.437</td>
+</tr>
+<tr class="odd">
+<td align="left">12</td>
+<td align="right">1.356</td>
+<td align="right">1.782</td>
+<td align="right">2.179</td>
+<td align="right">2.681</td>
+<td align="right">3.055</td>
+<td align="right">3.930</td>
+<td align="right">4.318</td>
+</tr>
+<tr class="even">
+<td align="left">13</td>
+<td align="right">1.350</td>
+<td align="right">1.771</td>
+<td align="right">2.160</td>
+<td align="right">2.650</td>
+<td align="right">3.012</td>
+<td align="right">3.852</td>
+<td align="right">4.221</td>
+</tr>
+<tr class="odd">
+<td align="left">14</td>
+<td align="right">1.345</td>
+<td align="right">1.761</td>
+<td align="right">2.145</td>
+<td align="right">2.624</td>
+<td align="right">2.977</td>
+<td align="right">3.787</td>
+<td align="right">4.140</td>
+</tr>
+<tr class="even">
+<td align="left">15</td>
+<td align="right">1.341</td>
+<td align="right">1.753</td>
+<td align="right">2.131</td>
+<td align="right">2.602</td>
+<td align="right">2.947</td>
+<td align="right">3.733</td>
+<td align="right">4.073</td>
+</tr>
+<tr class="odd">
+<td align="left">16</td>
+<td align="right">1.337</td>
+<td align="right">1.746</td>
+<td align="right">2.120</td>
+<td align="right">2.583</td>
+<td align="right">2.921</td>
+<td align="right">3.686</td>
+<td align="right">4.015</td>
+</tr>
+<tr class="even">
+<td align="left">17</td>
+<td align="right">1.333</td>
+<td align="right">1.740</td>
+<td align="right">2.110</td>
+<td align="right">2.567</td>
+<td align="right">2.898</td>
+<td align="right">3.646</td>
+<td align="right">3.965</td>
+</tr>
+<tr class="odd">
+<td align="left">18</td>
+<td align="right">1.330</td>
+<td align="right">1.734</td>
+<td align="right">2.101</td>
+<td align="right">2.552</td>
+<td align="right">2.878</td>
+<td align="right">3.610</td>
+<td align="right">3.922</td>
+</tr>
+<tr class="even">
+<td align="left">19</td>
+<td align="right">1.328</td>
+<td align="right">1.729</td>
+<td align="right">2.093</td>
+<td align="right">2.539</td>
+<td align="right">2.861</td>
+<td align="right">3.579</td>
+<td align="right">3.883</td>
+</tr>
+<tr class="odd">
+<td align="left">20</td>
+<td align="right">1.325</td>
+<td align="right">1.725</td>
+<td align="right">2.086</td>
+<td align="right">2.528</td>
+<td align="right">2.845</td>
+<td align="right">3.552</td>
+<td align="right">3.850</td>
+</tr>
+<tr class="even">
+<td align="left">21</td>
+<td align="right">1.323</td>
+<td align="right">1.721</td>
+<td align="right">2.080</td>
+<td align="right">2.518</td>
+<td align="right">2.831</td>
+<td align="right">3.527</td>
+<td align="right">3.819</td>
+</tr>
+<tr class="odd">
+<td align="left">22</td>
+<td align="right">1.321</td>
+<td align="right">1.717</td>
+<td align="right">2.074</td>
+<td align="right">2.508</td>
+<td align="right">2.819</td>
+<td align="right">3.505</td>
+<td align="right">3.792</td>
+</tr>
+<tr class="even">
+<td align="left">23</td>
+<td align="right">1.319</td>
+<td align="right">1.714</td>
+<td align="right">2.069</td>
+<td align="right">2.500</td>
+<td align="right">2.807</td>
+<td align="right">3.485</td>
+<td align="right">3.768</td>
+</tr>
+<tr class="odd">
+<td align="left">24</td>
+<td align="right">1.318</td>
+<td align="right">1.711</td>
+<td align="right">2.064</td>
+<td align="right">2.492</td>
+<td align="right">2.797</td>
+<td align="right">3.467</td>
+<td align="right">3.745</td>
+</tr>
+<tr class="even">
+<td align="left">25</td>
+<td align="right">1.316</td>
+<td align="right">1.708</td>
+<td align="right">2.060</td>
+<td align="right">2.485</td>
+<td align="right">2.787</td>
+<td align="right">3.450</td>
+<td align="right">3.725</td>
+</tr>
+<tr class="odd">
+<td align="left">26</td>
+<td align="right">1.315</td>
+<td align="right">1.706</td>
+<td align="right">2.056</td>
+<td align="right">2.479</td>
+<td align="right">2.779</td>
+<td align="right">3.435</td>
+<td align="right">3.707</td>
+</tr>
+<tr class="even">
+<td align="left">27</td>
+<td align="right">1.314</td>
+<td align="right">1.703</td>
+<td align="right">2.052</td>
+<td align="right">2.473</td>
+<td align="right">2.771</td>
+<td align="right">3.421</td>
+<td align="right">3.690</td>
+</tr>
+<tr class="odd">
+<td align="left">28</td>
+<td align="right">1.313</td>
+<td align="right">1.701</td>
+<td align="right">2.048</td>
+<td align="right">2.467</td>
+<td align="right">2.763</td>
+<td align="right">3.408</td>
+<td align="right">3.674</td>
+</tr>
+<tr class="even">
+<td align="left">29</td>
+<td align="right">1.311</td>
+<td align="right">1.699</td>
+<td align="right">2.045</td>
+<td align="right">2.462</td>
+<td align="right">2.756</td>
+<td align="right">3.396</td>
+<td align="right">3.659</td>
+</tr>
+<tr class="odd">
+<td align="left">30</td>
+<td align="right">1.310</td>
+<td align="right">1.697</td>
+<td align="right">2.042</td>
+<td align="right">2.457</td>
+<td align="right">2.750</td>
+<td align="right">3.385</td>
+<td align="right">3.646</td>
+</tr>
+<tr class="even">
+<td align="left">40</td>
+<td align="right">1.303</td>
+<td align="right">1.684</td>
+<td align="right">2.021</td>
+<td align="right">2.423</td>
+<td align="right">2.704</td>
+<td align="right">3.307</td>
+<td align="right">3.551</td>
+</tr>
+<tr class="odd">
+<td align="left">60</td>
+<td align="right">1.296</td>
+<td align="right">1.671</td>
+<td align="right">2.000</td>
+<td align="right">2.390</td>
+<td align="right">2.660</td>
+<td align="right">3.232</td>
+<td align="right">3.460</td>
+</tr>
+<tr class="even">
+<td align="left">120</td>
+<td align="right">1.289</td>
+<td align="right">1.658</td>
+<td align="right">1.980</td>
+<td align="right">2.358</td>
+<td align="right">2.617</td>
+<td align="right">3.160</td>
+<td align="right">3.373</td>
+</tr>
+<tr class="odd">
+<td align="left"><span class="math inline">\(\infty\)</span></td>
+<td align="right">1.282</td>
+<td align="right">1.645</td>
+<td align="right">1.960</td>
+<td align="right">2.326</td>
+<td align="right">2.576</td>
+<td align="right">3.090</td>
+<td align="right">3.291</td>
+</tr>
+</tbody>
+</table>
+<p><em>Explanation</em>: For example, the value 3.078 in the top left corner indicates that for a <span class="math inline">\(t\)</span>-distribution with 1 degree of freedom the probability of values greater than 3.078 is 0.100. The last row shows critical values for the standard normal distribution.</p>
+
+</div>
+<div id="table-of-critical-values-for-chi-square-distributions" class="section level3 unnumbered">
+<h3>Table of critical values for chi-square distributions</h3>
+<table>
+<tbody>
+<tr class="odd">
+<td align="left">df</td>
+<td align="right">0.100</td>
+<td align="right">0.050</td>
+<td align="right">0.010</td>
+<td align="right">0.001</td>
+</tr>
+<tr class="even">
+<td align="left">1</td>
+<td align="right">2.71</td>
+<td align="right">3.84</td>
+<td align="right">6.63</td>
+<td align="right">10.828</td>
+</tr>
+<tr class="odd">
+<td align="left">2</td>
+<td align="right">4.61</td>
+<td align="right">5.99</td>
+<td align="right">9.21</td>
+<td align="right">13.816</td>
+</tr>
+<tr class="even">
+<td align="left">3</td>
+<td align="right">6.25</td>
+<td align="right">7.81</td>
+<td align="right">11.34</td>
+<td align="right">16.266</td>
+</tr>
+<tr class="odd">
+<td align="left">4</td>
+<td align="right">7.78</td>
+<td align="right">9.49</td>
+<td align="right">13.28</td>
+<td align="right">18.467</td>
+</tr>
+<tr class="even">
+<td align="left">5</td>
+<td align="right">9.24</td>
+<td align="right">11.07</td>
+<td align="right">15.09</td>
+<td align="right">20.515</td>
+</tr>
+<tr class="odd">
+<td align="left">6</td>
+<td align="right">10.64</td>
+<td align="right">12.59</td>
+<td align="right">16.81</td>
+<td align="right">22.458</td>
+</tr>
+<tr class="even">
+<td align="left">7</td>
+<td align="right">12.02</td>
+<td align="right">14.07</td>
+<td align="right">18.48</td>
+<td align="right">24.322</td>
+</tr>
+<tr class="odd">
+<td align="left">8</td>
+<td align="right">13.36</td>
+<td align="right">15.51</td>
+<td align="right">20.09</td>
+<td align="right">26.124</td>
+</tr>
+<tr class="even">
+<td align="left">9</td>
+<td align="right">14.68</td>
+<td align="right">16.92</td>
+<td align="right">21.67</td>
+<td align="right">27.877</td>
+</tr>
+<tr class="odd">
+<td align="left">10</td>
+<td align="right">15.99</td>
+<td align="right">18.31</td>
+<td align="right">23.21</td>
+<td align="right">29.588</td>
+</tr>
+<tr class="even">
+<td align="left">11</td>
+<td align="right">17.28</td>
+<td align="right">19.68</td>
+<td align="right">24.72</td>
+<td align="right">31.264</td>
+</tr>
+<tr class="odd">
+<td align="left">12</td>
+<td align="right">18.55</td>
+<td align="right">21.03</td>
+<td align="right">26.22</td>
+<td align="right">32.909</td>
+</tr>
+<tr class="even">
+<td align="left">13</td>
+<td align="right">19.81</td>
+<td align="right">22.36</td>
+<td align="right">27.69</td>
+<td align="right">34.528</td>
+</tr>
+<tr class="odd">
+<td align="left">14</td>
+<td align="right">21.06</td>
+<td align="right">23.68</td>
+<td align="right">29.14</td>
+<td align="right">36.123</td>
+</tr>
+<tr class="even">
+<td align="left">15</td>
+<td align="right">22.31</td>
+<td align="right">25.00</td>
+<td align="right">30.58</td>
+<td align="right">37.697</td>
+</tr>
+<tr class="odd">
+<td align="left">16</td>
+<td align="right">23.54</td>
+<td align="right">26.30</td>
+<td align="right">32.00</td>
+<td align="right">39.252</td>
+</tr>
+<tr class="even">
+<td align="left">17</td>
+<td align="right">24.77</td>
+<td align="right">27.59</td>
+<td align="right">33.41</td>
+<td align="right">40.790</td>
+</tr>
+<tr class="odd">
+<td align="left">18</td>
+<td align="right">25.99</td>
+<td align="right">28.87</td>
+<td align="right">34.81</td>
+<td align="right">42.312</td>
+</tr>
+<tr class="even">
+<td align="left">19</td>
+<td align="right">27.20</td>
+<td align="right">30.14</td>
+<td align="right">36.19</td>
+<td align="right">43.820</td>
+</tr>
+<tr class="odd">
+<td align="left">20</td>
+<td align="right">28.41</td>
+<td align="right">31.41</td>
+<td align="right">37.57</td>
+<td align="right">45.315</td>
+</tr>
+<tr class="even">
+<td align="left">25</td>
+<td align="right">34.38</td>
+<td align="right">37.65</td>
+<td align="right">44.31</td>
+<td align="right">52.620</td>
+</tr>
+<tr class="odd">
+<td align="left">30</td>
+<td align="right">40.26</td>
+<td align="right">43.77</td>
+<td align="right">50.89</td>
+<td align="right">59.703</td>
+</tr>
+<tr class="even">
+<td align="left">40</td>
+<td align="right">51.81</td>
+<td align="right">55.76</td>
+<td align="right">63.69</td>
+<td align="right">73.402</td>
+</tr>
+<tr class="odd">
+<td align="left">50</td>
+<td align="right">63.17</td>
+<td align="right">67.50</td>
+<td align="right">76.15</td>
+<td align="right">86.661</td>
+</tr>
+<tr class="even">
+<td align="left">60</td>
+<td align="right">74.40</td>
+<td align="right">79.08</td>
+<td align="right">88.38</td>
+<td align="right">99.607</td>
+</tr>
+<tr class="odd">
+<td align="left">70</td>
+<td align="right">85.53</td>
+<td align="right">90.53</td>
+<td align="right">100.43</td>
+<td align="right">112.317</td>
+</tr>
+<tr class="even">
+<td align="left">80</td>
+<td align="right">96.58</td>
+<td align="right">101.88</td>
+<td align="right">112.33</td>
+<td align="right">124.839</td>
+</tr>
+<tr class="odd">
+<td align="left">90</td>
+<td align="right">107.57</td>
+<td align="right">113.15</td>
+<td align="right">124.12</td>
+<td align="right">137.208</td>
+</tr>
+<tr class="even">
+<td align="left">100</td>
+<td align="right">118.50</td>
+<td align="right">124.34</td>
+<td align="right">135.81</td>
+<td align="right">149.449</td>
+</tr>
+</tbody>
+</table>
+<p><em>Explanation</em>: For example, the value 2.71 in the top left corner indicates that for a <span class="math inline">\(\chi^{2}\)</span> distribution with 1 degree of freedom the probability of values greater than 2.71 is 0.100.</p>
+
+</div>
+</div>
+</div>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+<div class="footnotes">
+<hr />
+<ol start="59">
+<li id="fn59"><p>ESS Round 5: European Social Survey Round 5 Data (2010). Data file edition 2.0. Norwegian Social Science Data Services, Norway - Data Archive and distributor of ESS data. The full data can be obtained from <a href="http://ess.nsd.uib.no/ess/round5/" class="uri">http://ess.nsd.uib.no/ess/round5/</a>.<a href="appendix.html#fnref59">↩</a></p></li>
+<li id="fn60"><p>The data can be obtained from <a href="http://www3.norc.org/gss+website/" class="uri">http://www3.norc.org/gss+website/</a>, which gives further information on the survey, including the full text of the questionnaires.<a href="appendix.html#fnref60">↩</a></p></li>
+<li id="fn61"><p>ESS Round 5: European Social Survey Round 5 Data (2010). Data file edition 2.0. Norwegian Social Science Data Services, Norway - Data Archive and distributor of ESS data. The full data can be obtained from <a href="http://ess.nsd.uib.no/ess/round5/" class="uri">http://ess.nsd.uib.no/ess/round5/</a>.<a href="appendix.html#fnref61">↩</a></p></li>
+<li id="fn62"><p>Strictly speaking, the analysis should incorporate sampling weights (variable <em>DWEIGHT</em>) to adjust for different sampling probabilities for different types of respondents. Here the weights are ignored. Using them would not change the main conclusions for these variables.<a href="appendix.html#fnref62">↩</a></p></li>
+<li id="fn63"><p>The data can be obtained from <a href="http://bes2009-10.org/" class="uri">http://bes2009-10.org/</a>, which gives further information on the survey, including the full text of the questionnaires. The data analysed in this class and homework are from the BES Campaign Internet Panel Survey, which has been divided into two data sets corresponding to two time periods leading up to the General Election.<a href="appendix.html#fnref63">↩</a></p></li>
+<li id="fn64"><p>Official results obtained from <a href="http://www.olympic.org/london-2012-summer-olympics" class="uri">http://www.olympic.org/london-2012-summer-olympics</a>.<a href="appendix.html#fnref64">↩</a></p></li>
+<li id="fn65"><p>The data can be obtained from <a href="http://www..norc.org/GSS+Website/" class="uri">http://www..norc.org/GSS+Website/</a>, which gives further information on the survey, including the full text of the questionnaires.<a href="appendix.html#fnref65">↩</a></p></li>
+<li id="fn66"><p>United Nations Development Programme <em>International Human Development Indicators</em>, <a href="http://hdr.undp.org/en/data/" class="uri">http://hdr.undp.org/en/data/</a>; World Bank <em>Worldwide Governance Indicators</em>, <a href="http://info.worldbank.org/governance/wgi/pdf/wgidataset.xlsx" class="uri">http://info.worldbank.org/governance/wgi/pdf/wgidataset.xlsx</a>; World Bank <em>World Development Indicators</em>, <a href="http://data.worldbank.org/indicator/SP.DYN.IMRT.IN" class="uri">http://data.worldbank.org/indicator/SP.DYN.IMRT.IN</a>.<a href="appendix.html#fnref66">↩</a></p></li>
+<li id="fn67"><p>ESS Round 5: European Social Survey Round 5 Data (2010). Data file edition 2.0. Norwegian Social Science Data Services, Norway - Data Archive and distributor of ESS data. The full data can be obtained from <a href="http://ess.nsd.uib.no/ess/round5/" class="uri">http://ess.nsd.uib.no/ess/round5/</a>.<a href="appendix.html#fnref67">↩</a></p></li>
+</ol>
+</div>
+            </section>
+
+          </div>
+        </div>
+      </div>
+<a href="c-more.html" class="navigation navigation-prev navigation-unique" aria-label="Previous page"><i class="fa fa-angle-left"></i></a>
+
+    </div>
+  </div>
+<script src="libs/gitbook-2.6.7/js/app.min.js"></script>
+<script src="libs/gitbook-2.6.7/js/lunr.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-search.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-sharing.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-fontsettings.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-bookdown.js"></script>
+<script src="libs/gitbook-2.6.7/js/jquery.highlight.js"></script>
+<script>
+gitbook.require(["gitbook"], function(gitbook) {
+gitbook.start({
+"sharing": {
+"github": false,
+"facebook": true,
+"twitter": true,
+"google": false,
+"weibo": false,
+"instapper": false,
+"vk": false,
+"all": ["facebook", "google", "twitter", "weibo", "instapaper"]
+},
+"fontsettings": {
+"theme": "white",
+"family": "sans",
+"size": 2
+},
+"edit": {
+"link": "https://github.com/LSE-Methodology/MY451/edit/master/11-MY451-appendix.Rmd",
+"text": "Edit"
+},
+"download": ["Coursepack-MY451.pdf", "Coursepack-MY451.epub"],
+"toc": {
+"collapse": "section"
+}
+});
+});
+</script>
+
+<!-- dynamically load mathjax for compatibility with self-contained -->
+<script>
+  (function () {
+    var script = document.createElement("script");
+    script.type = "text/javascript";
+    script.src  = "https://cdn.bootcss.com/mathjax/2.7.1/MathJax.js?config=TeX-MML-AM_CHTML";
+    if (location.protocol !== "file:" && /^https?:/.test(script.src))
+      script.src  = script.src.replace(/^https?:/, '');
+    document.getElementsByTagName("head")[0].appendChild(script);
+  })();
+</script>
+</body>
+
+</html>
diff --git a/bar_attitude.png b/bar_attitude.png
new file mode 100644
index 0000000..7e4a71f
Binary files /dev/null and b/bar_attitude.png differ
diff --git a/bloodp1.png b/bloodp1.png
new file mode 100644
index 0000000..7f7f54f
Binary files /dev/null and b/bloodp1.png differ
diff --git a/bloodp2.png b/bloodp2.png
new file mode 100644
index 0000000..2fb4304
Binary files /dev/null and b/bloodp2.png differ
diff --git a/box_gdp.png b/box_gdp.png
new file mode 100644
index 0000000..98e1fe6
Binary files /dev/null and b/box_gdp.png differ
diff --git a/c-3waytables.html b/c-3waytables.html
new file mode 100644
index 0000000..17e318c
--- /dev/null
+++ b/c-3waytables.html
@@ -0,0 +1,1749 @@
+<!DOCTYPE html>
+<html lang="" xml:lang="">
+<head>
+
+  <meta charset="utf-8" />
+  <meta http-equiv="X-UA-Compatible" content="IE=edge" />
+  <title>Chapter 9 Analysis of 3-way contingency tables | MY464 Introduction to Quantitative Analysis for Media and Communications</title>
+  <meta name="description" content="This course is intended for those with little or no past training in quantitative methods. The course is an intensive introduction to some of the principles and methods of statistical analysis in social research. Topics covered in MY464 include descriptive statistics, basic ideas of inference and estimation, contingency tables and an introduction to linear regression models." />
+  <meta name="generator" content="bookdown 0.39 and GitBook 2.6.7" />
+
+  <meta property="og:title" content="Chapter 9 Analysis of 3-way contingency tables | MY464 Introduction to Quantitative Analysis for Media and Communications" />
+  <meta property="og:type" content="book" />
+  
+  <meta property="og:description" content="This course is intended for those with little or no past training in quantitative methods. The course is an intensive introduction to some of the principles and methods of statistical analysis in social research. Topics covered in MY464 include descriptive statistics, basic ideas of inference and estimation, contingency tables and an introduction to linear regression models." />
+  <meta name="github-repo" content="LSE-Methodology/MY464" />
+
+  <meta name="twitter:card" content="summary" />
+  <meta name="twitter:title" content="Chapter 9 Analysis of 3-way contingency tables | MY464 Introduction to Quantitative Analysis for Media and Communications" />
+  
+  <meta name="twitter:description" content="This course is intended for those with little or no past training in quantitative methods. The course is an intensive introduction to some of the principles and methods of statistical analysis in social research. Topics covered in MY464 include descriptive statistics, basic ideas of inference and estimation, contingency tables and an introduction to linear regression models." />
+  
+
+<meta name="author" content="Department of Methodology, London School of Economics and Political Science" />
+
+
+
+  <meta name="viewport" content="width=device-width, initial-scale=1" />
+  <meta name="apple-mobile-web-app-capable" content="yes" />
+  <meta name="apple-mobile-web-app-status-bar-style" content="black" />
+  
+  
+<link rel="prev" href="c-regression.html"/>
+<link rel="next" href="c-more.html"/>
+<script src="libs/jquery-3.6.0/jquery-3.6.0.min.js"></script>
+<script src="https://cdn.jsdelivr.net/npm/fuse.js@6.4.6/dist/fuse.min.js"></script>
+<link href="libs/gitbook-2.6.7/css/style.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-table.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-bookdown.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-highlight.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-search.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-fontsettings.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-clipboard.css" rel="stylesheet" />
+
+
+
+
+
+
+
+
+<link href="libs/anchor-sections-1.1.0/anchor-sections.css" rel="stylesheet" />
+<link href="libs/anchor-sections-1.1.0/anchor-sections-hash.css" rel="stylesheet" />
+<script src="libs/anchor-sections-1.1.0/anchor-sections.js"></script>
+
+
+
+<style type="text/css">
+  
+  div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
+</style>
+
+<link rel="stylesheet" href="style.css" type="text/css" />
+</head>
+
+<body>
+
+
+
+  <div class="book without-animation with-summary font-size-2 font-family-1" data-basepath=".">
+
+    <div class="book-summary">
+      <nav role="navigation">
+
+<ul class="summary">
+<li><a href="./">MY464 Introduction to Quantitative Analysis for Media and Communications</a></li>
+
+<li class="divider"></li>
+<li class="chapter" data-level="" data-path="index.html"><a href="index.html"><i class="fa fa-check"></i>Course information<a href="index.html#course-information" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1" data-path="c-intro.html"><a href="c-intro.html"><i class="fa fa-check"></i><b>1</b> Introduction<a href="c-intro.html#c-intro" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="1.1" data-path="c-intro.html"><a href="c-intro.html#s-intro-purpose"><i class="fa fa-check"></i><b>1.1</b> What is the purpose of this course?<a href="c-intro.html#s-intro-purpose" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.2" data-path="c-intro.html"><a href="c-intro.html#s-intro-definitions"><i class="fa fa-check"></i><b>1.2</b> Some basic definitions<a href="c-intro.html#s-intro-definitions" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="1.2.1" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-subj"><i class="fa fa-check"></i><b>1.2.1</b> Subjects and variables<a href="c-intro.html#ss-intro-def-subj" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.2.2" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-vartypes"><i class="fa fa-check"></i><b>1.2.2</b> Types of variables<a href="c-intro.html#ss-intro-def-vartypes" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.2.3" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-descr"><i class="fa fa-check"></i><b>1.2.3</b> Description and inference<a href="c-intro.html#ss-intro-def-descr" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.2.4" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-assoc"><i class="fa fa-check"></i><b>1.2.4</b> Association and causation<a href="c-intro.html#ss-intro-def-assoc" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="1.3" data-path="c-intro.html"><a href="c-intro.html#s-intro-outline"><i class="fa fa-check"></i><b>1.3</b> Outline of the course<a href="c-intro.html#s-intro-outline" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.4" data-path="c-intro.html"><a href="c-intro.html#s-intro-maths"><i class="fa fa-check"></i><b>1.4</b> The use of mathematics and computing<a href="c-intro.html#s-intro-maths" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="1.4.1" data-path="c-intro.html"><a href="c-intro.html#symbolic-mathematics-and-mathematical-notation"><i class="fa fa-check"></i><b>1.4.1</b> Symbolic mathematics and mathematical notation<a href="c-intro.html#symbolic-mathematics-and-mathematical-notation" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.4.2" data-path="c-intro.html"><a href="c-intro.html#computing-1"><i class="fa fa-check"></i><b>1.4.2</b> Computing<a href="c-intro.html#computing-1" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="2" data-path="c-descr1.html"><a href="c-descr1.html"><i class="fa fa-check"></i><b>2</b> Descriptive statistics<a href="c-descr1.html#c-descr1" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.1" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-intro"><i class="fa fa-check"></i><b>2.1</b> Introduction<a href="c-descr1.html#s-descr1-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.2" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-examples"><i class="fa fa-check"></i><b>2.2</b> Example data sets<a href="c-descr1.html#s-descr1-examples" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.3" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-1cat"><i class="fa fa-check"></i><b>2.3</b> Single categorical variable<a href="c-descr1.html#s-descr1-1cat" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.3.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-distr"><i class="fa fa-check"></i><b>2.3.1</b> Describing the sample distribution<a href="c-descr1.html#ss-descr1-1cat-distr" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.3.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-tables"><i class="fa fa-check"></i><b>2.3.2</b> Tabular methods: Tables of frequencies<a href="c-descr1.html#ss-descr1-1cat-tables" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.3.3" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-charts"><i class="fa fa-check"></i><b>2.3.3</b> Graphical methods: Bar charts<a href="c-descr1.html#ss-descr1-1cat-charts" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.3.4" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-descriptives"><i class="fa fa-check"></i><b>2.3.4</b> Simple descriptive statistics<a href="c-descr1.html#ss-descr1-1cat-descriptives" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="2.4" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-2cat"><i class="fa fa-check"></i><b>2.4</b> Two categorical variables<a href="c-descr1.html#s-descr1-2cat" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.4.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-tables"><i class="fa fa-check"></i><b>2.4.1</b> Two-way contingency tables<a href="c-descr1.html#ss-descr1-2cat-tables" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.4.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-cond"><i class="fa fa-check"></i><b>2.4.2</b> Conditional proportions<a href="c-descr1.html#ss-descr1-2cat-cond" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.4.3" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-assoc"><i class="fa fa-check"></i><b>2.4.3</b> Conditional distributions and associations<a href="c-descr1.html#ss-descr1-2cat-assoc" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.4.4" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-descr"><i class="fa fa-check"></i><b>2.4.4</b> Describing an association using conditional proportions<a href="c-descr1.html#ss-descr1-2cat-descr" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.4.5" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-gamma"><i class="fa fa-check"></i><b>2.4.5</b> A measure of association for ordinal variables<a href="c-descr1.html#ss-descr1-2cat-gamma" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="2.5" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-1cont"><i class="fa fa-check"></i><b>2.5</b> Sample distributions of a single continuous variable<a href="c-descr1.html#s-descr1-1cont" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.5.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cont-tab"><i class="fa fa-check"></i><b>2.5.1</b> Tabular methods<a href="c-descr1.html#ss-descr1-1cont-tab" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.5.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cont-graphs"><i class="fa fa-check"></i><b>2.5.2</b> Graphical methods<a href="c-descr1.html#ss-descr1-1cont-graphs" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="2.6" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-nums"><i class="fa fa-check"></i><b>2.6</b> Numerical descriptive statistics<a href="c-descr1.html#s-descr1-nums" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.6.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-nums-central"><i class="fa fa-check"></i><b>2.6.1</b> Measures of central tendency<a href="c-descr1.html#ss-descr1-nums-central" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.6.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-nums-variation"><i class="fa fa-check"></i><b>2.6.2</b> Measures of variation<a href="c-descr1.html#ss-descr1-nums-variation" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="2.7" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-2cont"><i class="fa fa-check"></i><b>2.7</b> Associations which involve continuous variables<a href="c-descr1.html#s-descr1-2cont" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.8" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-presentation"><i class="fa fa-check"></i><b>2.8</b> Presentation of tables and graphs<a href="c-descr1.html#s-descr1-presentation" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.9" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-app"><i class="fa fa-check"></i><b>2.9</b> Appendix: Country data<a href="c-descr1.html#s-descr1-app" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="3" data-path="c-samples.html"><a href="c-samples.html"><i class="fa fa-check"></i><b>3</b> Samples and populations<a href="c-samples.html#c-samples" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="3.1" data-path="c-samples.html"><a href="c-samples.html#s-samples-intro"><i class="fa fa-check"></i><b>3.1</b> Introduction<a href="c-samples.html#s-samples-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.2" data-path="c-samples.html"><a href="c-samples.html#s-samples-finpops"><i class="fa fa-check"></i><b>3.2</b> Finite populations<a href="c-samples.html#s-samples-finpops" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.3" data-path="c-samples.html"><a href="c-samples.html#s-samples-samples"><i class="fa fa-check"></i><b>3.3</b> Samples from finite populations<a href="c-samples.html#s-samples-samples" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.4" data-path="c-samples.html"><a href="c-samples.html#s-samples-infpops"><i class="fa fa-check"></i><b>3.4</b> Conceptual and infinite populations<a href="c-samples.html#s-samples-infpops" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.5" data-path="c-samples.html"><a href="c-samples.html#s-samples-popdistrs"><i class="fa fa-check"></i><b>3.5</b> Population distributions<a href="c-samples.html#s-samples-popdistrs" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.6" data-path="c-samples.html"><a href="c-samples.html#s-samples-inference"><i class="fa fa-check"></i><b>3.6</b> Need for statistical inference<a href="c-samples.html#s-samples-inference" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="4" data-path="c-tables.html"><a href="c-tables.html"><i class="fa fa-check"></i><b>4</b> Statistical inference for two-way tables<a href="c-tables.html#c-tables" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="4.1" data-path="c-tables.html"><a href="c-tables.html#s-tables-intro"><i class="fa fa-check"></i><b>4.1</b> Introduction<a href="c-tables.html#s-tables-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.2" data-path="c-tables.html"><a href="c-tables.html#s-tables-tests"><i class="fa fa-check"></i><b>4.2</b> Significance tests<a href="c-tables.html#s-tables-tests" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3" data-path="c-tables.html"><a href="c-tables.html#s-tables-chi2test"><i class="fa fa-check"></i><b>4.3</b> The chi-square test of independence<a href="c-tables.html#s-tables-chi2test" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="4.3.1" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-null"><i class="fa fa-check"></i><b>4.3.1</b> Hypotheses<a href="c-tables.html#ss-tables-chi2test-null" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.2" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-ass"><i class="fa fa-check"></i><b>4.3.2</b> Assumptions of a significance test<a href="c-tables.html#ss-tables-chi2test-ass" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.3" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-stat"><i class="fa fa-check"></i><b>4.3.3</b> The test statistic<a href="c-tables.html#ss-tables-chi2test-stat" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.4" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-sdist"><i class="fa fa-check"></i><b>4.3.4</b> The sampling distribution of the test statistic<a href="c-tables.html#ss-tables-chi2test-sdist" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.5" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-Pval"><i class="fa fa-check"></i><b>4.3.5</b> The P-value<a href="c-tables.html#ss-tables-chi2test-Pval" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.6" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-conclusions"><i class="fa fa-check"></i><b>4.3.6</b> Drawing conclusions from a test<a href="c-tables.html#ss-tables-chi2test-conclusions" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="4.4" data-path="c-tables.html"><a href="c-tables.html#s-tables-summary"><i class="fa fa-check"></i><b>4.4</b> Summary of the chi-square test of independence<a href="c-tables.html#s-tables-summary" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="5" data-path="c-probs.html"><a href="c-probs.html"><i class="fa fa-check"></i><b>5</b> Inference for population proportions<a href="c-probs.html#c-probs" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="5.1" data-path="c-probs.html"><a href="c-probs.html#s-probs-intro"><i class="fa fa-check"></i><b>5.1</b> Introduction<a href="c-probs.html#s-probs-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.2" data-path="c-probs.html"><a href="c-probs.html#s-probs-examples"><i class="fa fa-check"></i><b>5.2</b> Examples<a href="c-probs.html#s-probs-examples" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.3" data-path="c-probs.html"><a href="c-probs.html#s-probs-distribution"><i class="fa fa-check"></i><b>5.3</b> Probability distribution of a dichotomous variable<a href="c-probs.html#s-probs-distribution" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.4" data-path="c-probs.html"><a href="c-probs.html#s-probs-pointest"><i class="fa fa-check"></i><b>5.4</b> Point estimation of a population probability<a href="c-probs.html#s-probs-pointest" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5" data-path="c-probs.html"><a href="c-probs.html#s-probs-test1sample"><i class="fa fa-check"></i><b>5.5</b> Significance test of a single proportion<a href="c-probs.html#s-probs-test1sample" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="5.5.1" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-hypotheses"><i class="fa fa-check"></i><b>5.5.1</b> Null and alternative hypotheses<a href="c-probs.html#ss-probs-test1sample-hypotheses" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5.2" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-teststatistic"><i class="fa fa-check"></i><b>5.5.2</b> The test statistic<a href="c-probs.html#ss-probs-test1sample-teststatistic" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5.3" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-samplingd"><i class="fa fa-check"></i><b>5.5.3</b> The sampling distribution of the test statistic and P-values<a href="c-probs.html#ss-probs-test1sample-samplingd" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5.4" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-conclusions"><i class="fa fa-check"></i><b>5.5.4</b> Conclusions from the test<a href="c-probs.html#ss-probs-test1sample-conclusions" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5.5" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-summary"><i class="fa fa-check"></i><b>5.5.5</b> Summary of the test<a href="c-probs.html#ss-probs-test1sample-summary" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="5.6" data-path="c-probs.html"><a href="c-probs.html#s-probs-1sampleci"><i class="fa fa-check"></i><b>5.6</b> Confidence interval for a single proportion<a href="c-probs.html#s-probs-1sampleci" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="5.6.1" data-path="c-probs.html"><a href="c-probs.html#s-probs-1sampleci-intro"><i class="fa fa-check"></i><b>5.6.1</b> Introduction<a href="c-probs.html#s-probs-1sampleci-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.6.2" data-path="c-probs.html"><a href="c-probs.html#s-probs-1sampleci-calc"><i class="fa fa-check"></i><b>5.6.2</b> Calculation of the interval<a href="c-probs.html#s-probs-1sampleci-calc" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.6.3" data-path="c-probs.html"><a href="c-probs.html#s-probs-1sampleci-int"><i class="fa fa-check"></i><b>5.6.3</b> Interpretation of confidence intervals<a href="c-probs.html#s-probs-1sampleci-int" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.6.4" data-path="c-probs.html"><a href="c-probs.html#ss-means-ci-vstests"><i class="fa fa-check"></i><b>5.6.4</b> Confidence intervals vs. significance tests<a href="c-probs.html#ss-means-ci-vstests" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="5.7" data-path="c-probs.html"><a href="c-probs.html#s-probs-2samples"><i class="fa fa-check"></i><b>5.7</b> Inference for comparing two proportions<a href="c-probs.html#s-probs-2samples" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="6" data-path="c-contd.html"><a href="c-contd.html"><i class="fa fa-check"></i><b>6</b> Continuous variables: Population and sampling distributions<a href="c-contd.html#c-contd" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="6.1" data-path="c-contd.html"><a href="c-contd.html#s-contd-intro"><i class="fa fa-check"></i><b>6.1</b> Introduction<a href="c-contd.html#s-contd-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="6.2" data-path="c-contd.html"><a href="c-contd.html#s-contd-popdistrs"><i class="fa fa-check"></i><b>6.2</b> Population distributions of continuous variables<a href="c-contd.html#s-contd-popdistrs" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="6.2.1" data-path="c-contd.html"><a href="c-contd.html#ss-contd-popdistrs-params"><i class="fa fa-check"></i><b>6.2.1</b> Population parameters and their point estimates<a href="c-contd.html#ss-contd-popdistrs-params" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="6.3" data-path="c-contd.html"><a href="c-contd.html#s-contd-probdistrs"><i class="fa fa-check"></i><b>6.3</b> Probability distributions of continuous variables<a href="c-contd.html#s-contd-probdistrs" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="6.3.1" data-path="c-contd.html"><a href="c-contd.html#ss-contd-probdistrs-general"><i class="fa fa-check"></i><b>6.3.1</b> General comments<a href="c-contd.html#ss-contd-probdistrs-general" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="6.3.2" data-path="c-contd.html"><a href="c-contd.html#ss-contd-probdistrs-normal"><i class="fa fa-check"></i><b>6.3.2</b> The normal distribution as a population distribution<a href="c-contd.html#ss-contd-probdistrs-normal" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="6.4" data-path="c-contd.html"><a href="c-contd.html#s-contd-clt"><i class="fa fa-check"></i><b>6.4</b> The normal distribution as a sampling distribution<a href="c-contd.html#s-contd-clt" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="7" data-path="c-means.html"><a href="c-means.html"><i class="fa fa-check"></i><b>7</b> Analysis of population means<a href="c-means.html#c-means" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="7.1" data-path="c-means.html"><a href="c-means.html#s-means-intro"><i class="fa fa-check"></i><b>7.1</b> Introduction and examples<a href="c-means.html#s-means-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.2" data-path="c-means.html"><a href="c-means.html#s-means-descr"><i class="fa fa-check"></i><b>7.2</b> Descriptive statistics for comparisons of groups<a href="c-means.html#s-means-descr" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="7.2.1" data-path="c-means.html"><a href="c-means.html#ss-means-descr-graphs"><i class="fa fa-check"></i><b>7.2.1</b> Graphical methods of comparing sample distributions<a href="c-means.html#ss-means-descr-graphs" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.2.2" data-path="c-means.html"><a href="c-means.html#ss-means-descr-tables"><i class="fa fa-check"></i><b>7.2.2</b> Comparing summary statistics<a href="c-means.html#ss-means-descr-tables" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="7.3" data-path="c-means.html"><a href="c-means.html#s-means-inference"><i class="fa fa-check"></i><b>7.3</b> Inference for two means from independent samples<a href="c-means.html#s-means-inference" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="7.3.1" data-path="c-means.html"><a href="c-means.html#ss-means-inference-intro"><i class="fa fa-check"></i><b>7.3.1</b> Aims of the analysis<a href="c-means.html#ss-means-inference-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.3.2" data-path="c-means.html"><a href="c-means.html#ss-means-inference-test"><i class="fa fa-check"></i><b>7.3.2</b> Significance testing: The two-sample t-test<a href="c-means.html#ss-means-inference-test" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.3.3" data-path="c-means.html"><a href="c-means.html#ss-means-inference-ci"><i class="fa fa-check"></i><b>7.3.3</b> Confidence intervals for a difference of two means<a href="c-means.html#ss-means-inference-ci" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.3.4" data-path="c-means.html"><a href="c-means.html#ss-means-inference-variants"><i class="fa fa-check"></i><b>7.3.4</b> Variants of the test and confidence interval<a href="c-means.html#ss-means-inference-variants" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="7.4" data-path="c-means.html"><a href="c-means.html#s-means-1sample"><i class="fa fa-check"></i><b>7.4</b> Tests and confidence intervals for a single mean<a href="c-means.html#s-means-1sample" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.5" data-path="c-means.html"><a href="c-means.html#s-means-dependent"><i class="fa fa-check"></i><b>7.5</b> Inference for dependent samples<a href="c-means.html#s-means-dependent" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.6" data-path="c-means.html"><a href="c-means.html#s-means-tests3"><i class="fa fa-check"></i><b>7.6</b> Further comments on significance tests<a href="c-means.html#s-means-tests3" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="7.6.1" data-path="c-means.html"><a href="c-means.html#ss-means-tests3-errors"><i class="fa fa-check"></i><b>7.6.1</b> Different types of error<a href="c-means.html#ss-means-tests3-errors" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.6.2" data-path="c-means.html"><a href="c-means.html#ss-means-tests3-power"><i class="fa fa-check"></i><b>7.6.2</b> Power of significance tests<a href="c-means.html#ss-means-tests3-power" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.6.3" data-path="c-means.html"><a href="c-means.html#ss-means-tests3-importance"><i class="fa fa-check"></i><b>7.6.3</b> Significance vs. importance<a href="c-means.html#ss-means-tests3-importance" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="8" data-path="c-regression.html"><a href="c-regression.html"><i class="fa fa-check"></i><b>8</b> Linear regression models<a href="c-regression.html#c-regression" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.1" data-path="c-regression.html"><a href="c-regression.html#s-regression-intro"><i class="fa fa-check"></i><b>8.1</b> Introduction<a href="c-regression.html#s-regression-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.2" data-path="c-regression.html"><a href="c-regression.html#s-regression-descr"><i class="fa fa-check"></i><b>8.2</b> Describing association between two continuous variables<a href="c-regression.html#s-regression-descr" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.2.1" data-path="c-regression.html"><a href="c-regression.html#ss-regression-descr-intro"><i class="fa fa-check"></i><b>8.2.1</b> Introduction<a href="c-regression.html#ss-regression-descr-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.2.2" data-path="c-regression.html"><a href="c-regression.html#ss-regression-descr-plots"><i class="fa fa-check"></i><b>8.2.2</b> Graphical methods<a href="c-regression.html#ss-regression-descr-plots" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.2.3" data-path="c-regression.html"><a href="c-regression.html#ss-regression-descr-assoc"><i class="fa fa-check"></i><b>8.2.3</b> Linear associations<a href="c-regression.html#ss-regression-descr-assoc" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.2.4" data-path="c-regression.html"><a href="c-regression.html#ss-regression-descr-corr"><i class="fa fa-check"></i><b>8.2.4</b> Measures of association: covariance and correlation<a href="c-regression.html#ss-regression-descr-corr" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="8.3" data-path="c-regression.html"><a href="c-regression.html#s-regression-simple"><i class="fa fa-check"></i><b>8.3</b> Simple linear regression models<a href="c-regression.html#s-regression-simple" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.3.1" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-intro"><i class="fa fa-check"></i><b>8.3.1</b> Introduction<a href="c-regression.html#ss-regression-simple-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.3.2" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-def"><i class="fa fa-check"></i><b>8.3.2</b> Definition of the model<a href="c-regression.html#ss-regression-simple-def" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.3.3" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-int"><i class="fa fa-check"></i><b>8.3.3</b> Interpretation of the model parameters<a href="c-regression.html#ss-regression-simple-int" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.3.4" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-est"><i class="fa fa-check"></i><b>8.3.4</b> Estimation of the parameters<a href="c-regression.html#ss-regression-simple-est" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.3.5" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-inf"><i class="fa fa-check"></i><b>8.3.5</b> Statistical inference for the regression coefficients<a href="c-regression.html#ss-regression-simple-inf" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="8.4" data-path="c-regression.html"><a href="c-regression.html#s-regression-causality"><i class="fa fa-check"></i><b>8.4</b> Interlude: Association and causality<a href="c-regression.html#s-regression-causality" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.5" data-path="c-regression.html"><a href="c-regression.html#s-regression-multiple"><i class="fa fa-check"></i><b>8.5</b> Multiple linear regression models<a href="c-regression.html#s-regression-multiple" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.5.1" data-path="c-regression.html"><a href="c-regression.html#ss-regression-multiple-intro"><i class="fa fa-check"></i><b>8.5.1</b> Introduction<a href="c-regression.html#ss-regression-multiple-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.5.2" data-path="c-regression.html"><a href="c-regression.html#ss-regression-multiple-def"><i class="fa fa-check"></i><b>8.5.2</b> Definition of the model<a href="c-regression.html#ss-regression-multiple-def" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.5.3" data-path="c-regression.html"><a href="c-regression.html#ss-regression-multiple-unchanged"><i class="fa fa-check"></i><b>8.5.3</b> Unchanged elements from simple linear models<a href="c-regression.html#ss-regression-multiple-unchanged" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.5.4" data-path="c-regression.html"><a href="c-regression.html#ss-regression-multiple-beta"><i class="fa fa-check"></i><b>8.5.4</b> Interpretation and inference for the regression coefficients<a href="c-regression.html#ss-regression-multiple-beta" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="8.6" data-path="c-regression.html"><a href="c-regression.html#s-regression-dummies"><i class="fa fa-check"></i><b>8.6</b> Including categorical explanatory variables<a href="c-regression.html#s-regression-dummies" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.6.1" data-path="c-regression.html"><a href="c-regression.html#ss-regression-dummies-def"><i class="fa fa-check"></i><b>8.6.1</b> Dummy variables<a href="c-regression.html#ss-regression-dummies-def" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.6.2" data-path="c-regression.html"><a href="c-regression.html#ss-regression-dummies-example"><i class="fa fa-check"></i><b>8.6.2</b> A second example<a href="c-regression.html#ss-regression-dummies-example" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="8.7" data-path="c-regression.html"><a href="c-regression.html#s-regression-rest"><i class="fa fa-check"></i><b>8.7</b> Other issues in linear regression modelling<a href="c-regression.html#s-regression-rest" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="9" data-path="c-3waytables.html"><a href="c-3waytables.html"><i class="fa fa-check"></i><b>9</b> Analysis of 3-way contingency tables<a href="c-3waytables.html#c-3waytables" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="10" data-path="c-more.html"><a href="c-more.html"><i class="fa fa-check"></i><b>10</b> More statistics…<a href="c-more.html#c-more" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="" data-path="c-more.html"><a href="c-more.html#statistical-tables"><i class="fa fa-check"></i>Statistical tables<a href="c-more.html#statistical-tables" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="" data-path="c-more.html"><a href="c-more.html#table-of-standard-normal-tail-probabilities"><i class="fa fa-check"></i>Table of standard normal tail probabilities<a href="c-more.html#table-of-standard-normal-tail-probabilities" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="" data-path="c-more.html"><a href="c-more.html#table-of-critical-values-for-t-distributions"><i class="fa fa-check"></i>Table of critical values for t-distributions<a href="c-more.html#table-of-critical-values-for-t-distributions" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="" data-path="c-more.html"><a href="c-more.html#table-of-critical-values-for-chi-square-distributions"><i class="fa fa-check"></i>Table of critical values for chi-square distributions<a href="c-more.html#table-of-critical-values-for-chi-square-distributions" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+</ul></li>
+<li class="divider"></li>
+<li><a href="https://github.com/rstudio/bookdown">Published with bookdown</a></li>
+
+</ul>
+
+      </nav>
+    </div>
+
+    <div class="book-body">
+      <div class="body-inner">
+        <div class="book-header" role="navigation">
+          <h1>
+            <i class="fa fa-circle-o-notch fa-spin"></i><a href="./">MY464 Introduction to Quantitative Analysis for Media and Communications</a>
+          </h1>
+        </div>
+
+        <div class="page-wrapper" tabindex="-1" role="main">
+          <div class="page-inner">
+
+            <section class="normal" id="section-">
+<div id="c-3waytables" class="section level1 hasAnchor">
+<h1><span class="header-section-number">Chapter 9</span> Analysis of 3-way contingency tables<a href="c-3waytables.html#c-3waytables" class="anchor-section" aria-label="Anchor link to header"></a></h1>
+<p>In Section <a href="c-descr1.html#s-descr1-2cat">2.4</a> and Chapter <a href="c-tables.html#c-tables">4</a> we discussed
+the analysis of two-way contingency tables (crosstabulations) for
+examining the associations between two categorical variables. In this
+section we extend this by introducing the basic ideas of <strong>multiway
+contingency tables</strong> which include more than two categorical variables.
+We focus solely on the simplest instance of them, a <strong>three-way table</strong>
+of three variables.</p>
+<p>This topic is thematically related also to some of Chapter
+<a href="c-regression.html#c-regression">8</a>, in that a multiway contingency table can be seen as
+a way of implementing for categorical variables the ideas of statistical
+control that were also a feature of the multiple linear regression model
+of Section <a href="c-regression.html#s-regression-multiple">8.5</a>. Here, however, we will not
+consider formal regression models for categorical variables (these are
+mentioned only briefly at the end of the chapter). Instead, we give
+examples of analyses which simply apply familiar methods for two-way
+tables repeatedly for tables of two variables at fixed values of a third
+variable.</p>
+<p>The discussion is organised arond three examples. In each case we start
+with a two-way table, and then introduce a third variable which we want
+to control for. This reveals various features in the examples, to
+illustrate the types of findings that may be uncovered by statistical
+control.</p>
+<p><strong>Example 9.1: Berkeley admissions</strong></p>
+<p>Table <a href="c-3waytables.html#tab:t-berkeley1">9.1</a> summarises data on applications for admission to
+graduate study at the University of California, Berkeley, for the fall
+quarter 1973.<a href="#fn55" class="footnote-ref" id="fnref55"><sup>55</sup></a> The data are for five of the six departments with the
+largest number of applications, labelled below Departments 2–5
+(Department 1 will be discussed at the end of this section). Table
+<a href="c-3waytables.html#tab:t-berkeley1">9.1</a> shows the two-way contingency table of the sex of the
+applicant and whether he or she was admitted to the university.</p>
+<table style="width:75%;">
+<caption><span id="tab:t-berkeley1">Table 9.1: </span>Table of sex of applicant vs. admission in the Berkeley admissions
+data. The column labelled ‘% Yes’ is the percentage of applicants
+admitted within each row. <span class="math inline">\(\chi^{2}=38.4\)</span>, <span class="math inline">\(df=1\)</span>, <span class="math inline">\(P&lt;0.001\)</span>.</caption>
+<colgroup>
+<col width="19%" />
+<col width="18%" />
+<col width="15%" />
+<col width="11%" />
+<col width="11%" />
+</colgroup>
+<thead>
+<tr class="header">
+<th align="left"><br />
+Sex</th>
+<th align="right">Admitted
+No</th>
+<th align="right">Admitted
+Yes</th>
+<th align="right"><br />
+% Yes</th>
+<th align="right"><br />
+Total</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="left">Male</td>
+<td align="right">1180</td>
+<td align="right">686</td>
+<td align="right">36.8</td>
+<td align="right">1866</td>
+</tr>
+<tr class="even">
+<td align="left">Female</td>
+<td align="right">1259</td>
+<td align="right">468</td>
+<td align="right">27.1</td>
+<td align="right">1727</td>
+</tr>
+<tr class="odd">
+<td align="left">Total</td>
+<td align="right">2439</td>
+<td align="right">1154</td>
+<td align="right">32.1</td>
+<td align="right">3593</td>
+</tr>
+</tbody>
+</table>
+<p>The percentages in Table <a href="c-3waytables.html#tab:t-berkeley1">9.1</a> show that men were more
+likely to be admitted, with a 36.8% success rate compared to 27.1% for
+women. The difference is strongly significant, with <span class="math inline">\(P&lt;0.001\)</span> for the
+<span class="math inline">\(\chi^{2}\)</span> test of independence. If this association was interpreted
+causally, it might be regarded as evidence of sex bias in the admissions
+process. However, other important variables may also need to be
+considered in the analysis. One of them is the academic department to
+which an applicant had applied. Information on the department as well as
+sex and admission is shown in Table <a href="c-3waytables.html#tab:t-berkeley2">9.2</a>.</p>
+<table style="width:97%;">
+<caption><span id="tab:t-berkeley2">Table 9.2: </span>Sex of applicant vs. admission by academic department in the
+Berkeley admissions data.</caption>
+<colgroup>
+<col width="36%" />
+<col width="13%" />
+<col width="13%" />
+<col width="13%" />
+<col width="10%" />
+<col width="10%" />
+</colgroup>
+<thead>
+<tr class="header">
+<th align="left">Department</th>
+<th align="left"><br />
+Sex</th>
+<th align="right">Admitted
+No</th>
+<th align="right">Admitted
+Yes</th>
+<th align="right"><br />
+% Yes</th>
+<th align="right"><br />
+Total</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="left">2</td>
+<td align="left">Male</td>
+<td align="right">207</td>
+<td align="right">353</td>
+<td align="right">63.0</td>
+<td align="right">560</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="left">Female</td>
+<td align="right">8</td>
+<td align="right">17</td>
+<td align="right">68.0</td>
+<td align="right">25</td>
+</tr>
+<tr class="odd">
+<td align="left"></td>
+<td align="left">Total</td>
+<td align="right">215</td>
+<td align="right">370</td>
+<td align="right">63.2</td>
+<td align="right">585</td>
+</tr>
+<tr class="even">
+<td align="left"><span class="math inline">\(\chi^{2}=0.25\)</span>, <span class="math inline">\(P=0.61\)</span></td>
+<td align="left"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+</tr>
+<tr class="odd">
+<td align="left">3</td>
+<td align="left">Male</td>
+<td align="right">205</td>
+<td align="right">120</td>
+<td align="right">36.9</td>
+<td align="right">325</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="left">Female</td>
+<td align="right">391</td>
+<td align="right">202</td>
+<td align="right">34.1</td>
+<td align="right">593</td>
+</tr>
+<tr class="odd">
+<td align="left"></td>
+<td align="left">Total</td>
+<td align="right">596</td>
+<td align="right">322</td>
+<td align="right">35.1</td>
+<td align="right">918</td>
+</tr>
+<tr class="even">
+<td align="left"><span class="math inline">\(\chi^{2}=0.75\)</span>, <span class="math inline">\(P=0.39\)</span></td>
+<td align="left"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+</tr>
+<tr class="odd">
+<td align="left">4</td>
+<td align="left">Male</td>
+<td align="right">279</td>
+<td align="right">138</td>
+<td align="right">33.1</td>
+<td align="right">417</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="left">Female</td>
+<td align="right">244</td>
+<td align="right">131</td>
+<td align="right">34.9</td>
+<td align="right">375</td>
+</tr>
+<tr class="odd">
+<td align="left"></td>
+<td align="left">Total</td>
+<td align="right">523</td>
+<td align="right">269</td>
+<td align="right">34.0</td>
+<td align="right">792</td>
+</tr>
+<tr class="even">
+<td align="left"><span class="math inline">\(\chi^{2}=0.30\)</span>, <span class="math inline">\(P=0.59\)</span></td>
+<td align="left"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+</tr>
+<tr class="odd">
+<td align="left">5</td>
+<td align="left">Male</td>
+<td align="right">138</td>
+<td align="right">53</td>
+<td align="right">27.7</td>
+<td align="right">191</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="left">Female</td>
+<td align="right">299</td>
+<td align="right">94</td>
+<td align="right">23.9</td>
+<td align="right">393</td>
+</tr>
+<tr class="odd">
+<td align="left"></td>
+<td align="left">Total</td>
+<td align="right">437</td>
+<td align="right">147</td>
+<td align="right">25.2</td>
+<td align="right">584</td>
+</tr>
+<tr class="even">
+<td align="left"><span class="math inline">\(\chi^{2}=1.00\)</span>, <span class="math inline">\(P=0.32\)</span></td>
+<td align="left"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+</tr>
+<tr class="odd">
+<td align="left">6</td>
+<td align="left">Male</td>
+<td align="right">351</td>
+<td align="right">22</td>
+<td align="right">5.9</td>
+<td align="right">373</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="left">Female</td>
+<td align="right">317</td>
+<td align="right">24</td>
+<td align="right">7.0</td>
+<td align="right">341</td>
+</tr>
+<tr class="odd">
+<td align="left"></td>
+<td align="left">Total</td>
+<td align="right">668</td>
+<td align="right">46</td>
+<td align="right">6.4</td>
+<td align="right">714</td>
+</tr>
+<tr class="even">
+<td align="left"><span class="math inline">\(\chi^{2}=0.38\)</span>, <span class="math inline">\(P=0.54\)</span></td>
+<td align="left"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+</tr>
+<tr class="odd">
+<td align="left">Total</td>
+<td align="left"></td>
+<td align="right">2439</td>
+<td align="right">1154</td>
+<td align="right">32.1</td>
+<td align="right">3593</td>
+</tr>
+</tbody>
+</table>
+<p>Table <a href="c-3waytables.html#tab:t-berkeley2">9.2</a> is a <em>three-way</em> contingency table, because each
+of its internal cells shows the number of applicants with a particular
+combination of three variables: department, sex and admission status.
+For example, the frequency 207 in the top left corner indicates that
+there were 207 male applicants to department 2 who were not admitted.
+Table <a href="c-3waytables.html#tab:t-berkeley2">9.2</a> is presented in the form of a series of tables
+of sex vs. admission, just like in the original two-way table
+<a href="c-3waytables.html#tab:t-berkeley1">9.1</a>, but now with one table for each department. These are
+known as <strong>partial tables</strong> of sex vs. admission, <strong>controlling for</strong>
+department. The word “control” is used here in the same sense as before:
+each partial table summarises the data for the applicants to a single
+department, so the variable “department” is literally held constant
+within the partial tables.</p>
+<p>Table <a href="c-3waytables.html#tab:t-berkeley2">9.2</a> also contains the marginal distributions of sex
+and admission status within each department. They can be used to
+construct the other two possible two-way tables for these variables, for
+department vs. sex of applicant and department vs. admission status.
+This information, summarised in Table <a href="c-3waytables.html#tab:t-berkeley3">9.3</a>, is discussed
+further below.</p>
+<p>The association between sex and admission within each partial table can
+be examined using methods for two-way tables. For every one of them, the
+<span class="math inline">\(\chi^{2}\)</span> test shows that the hypothesis of independence cannot be
+rejected, so there is no evidence of sex bias within any department. The
+apparent association in Table <a href="c-3waytables.html#tab:t-berkeley1">9.1</a> is thus spurious, and
+disappears when we control for department. Why this happens can be
+understood by considering the distributions of sex and admissions across
+departments, as shown in Table <a href="c-3waytables.html#tab:t-berkeley3">9.3</a>. Department is clearly
+associated with sex of the applicant: for example, almost all of the
+applicants to department 2, but only a third of the applicants to
+department 5 are men. Similarly, there is an association between
+department and admission: for example, nearly two thirds of the
+applicants to department 2 but only a quarter of the applicants to
+department 5 were admitted. It is the combination of these associations
+which induces the spurious association between sex and admission in
+Table <a href="c-3waytables.html#tab:t-berkeley1">9.1</a>. In essence, women had a lower admission rate
+overall because relatively more of them applied to the more selective
+departments and fewer to the easy ones.</p>
+<table>
+<caption><span id="tab:t-berkeley3">Table 9.3: </span>Percentages of male applicants and applicants admitted by department
+in the Berkeley admissions data.</caption>
+<thead>
+<tr class="header">
+<th align="left">Of all applicants</th>
+<th align="right">Department 2</th>
+<th align="right">3</th>
+<th align="right">4</th>
+<th align="right">5</th>
+<th align="right">6</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="left">% Male</td>
+<td align="right">96</td>
+<td align="right">35</td>
+<td align="right">53</td>
+<td align="right">33</td>
+<td align="right">52</td>
+</tr>
+<tr class="even">
+<td align="left">% Admitted</td>
+<td align="right">63</td>
+<td align="right">35</td>
+<td align="right">34</td>
+<td align="right">25</td>
+<td align="right">6</td>
+</tr>
+<tr class="odd">
+<td align="left">Number of applicants</td>
+<td align="right">585</td>
+<td align="right">918</td>
+<td align="right">792</td>
+<td align="right">584</td>
+<td align="right">714</td>
+</tr>
+</tbody>
+</table>
+<p>One possible set of causal connections leading to a spurious association
+between <span class="math inline">\(X\)</span> and <span class="math inline">\(Y\)</span> was represented graphically by Figure <a href="c-regression.html#fig:f-xyzspurious">8.10</a>. There are, however, other possibilities which may
+be more appropriate in particular cases. In the admissions example,
+department (corresponding to the control variable <span class="math inline">\(Z\)</span>) cannot be
+regarded as the cause of the sex of the applicant. Instead, we may
+consider the causal chain Sex <span class="math inline">\(\longrightarrow\)</span> Department
+<span class="math inline">\(\longrightarrow\)</span> Admission. Here department is an <em>intervening
+variable</em> between sex and admission rather than a common cause of them.
+We can still argue that sex has an effect on admission, but it is an
+<em>indirect effect</em> operating through the effect of sex on choice of
+department. The distinction is important for the original research
+question behind these data, that of possible sex bias in admissions. A
+direct effect of sex on likelihood on admission might be evidence of
+such bias, because it might indicate that departments are treating male
+and female candidates differently. An indirect effect of the kind found
+here does not suggest bias, because it results from the applicants’ own
+choices of which department to apply to.</p>
+<p>In the admissions example a strong association in the initial two-way
+table was “explained away” when we controlled for a third variable. The
+next example is one where controlling leaves the initial association
+unchanged.</p>
+<p><strong>Example 9.2:</strong> <strong>Importance of short-term gains for investors
+(continued)</strong></p>
+<p>Table <a href="c-descr1.html#tab:t-investors">2.7</a> showed a
+relatively strong association between a person’s age group and his or
+her attitude towards short-term gains as an investment goal. This
+association is also strongly significant, with <span class="math inline">\(P&lt;0.001\)</span> for the
+<span class="math inline">\(\chi^{2}\)</span> test of independence. Table <a href="c-3waytables.html#tab:t-investors3">9.4</a> shows the
+crosstabulations of these variables, now controlling also for the
+respondent’s sex. The association is now still significant in both
+partial tables. An investigation of the row proportions suggests that
+the pattern of association is very similar in both tables, as is its
+strength as measured by the <span class="math inline">\(\gamma\)</span> statistic (<span class="math inline">\(\gamma=-0.376\)</span> among
+men and <span class="math inline">\(\gamma=-0.395\)</span> among women). The conclusions obtained from the
+original two-way table are thus unchanged after controlling for sex.</p>
+<table style="width:97%;">
+<caption><span id="tab:t-investors3">Table 9.4: </span>Frequencies of respondents by age group and attitude towards
+short-term gains in Example 9.2, controlling for sex of respondent.
+The numbers below the frequencies are proportions within rows. <span class="math inline">\(\chi^{2}=82.4\)</span>, <span class="math inline">\(df=9\)</span>, <span class="math inline">\(P&lt;0.001\)</span>. <span class="math inline">\(\gamma=-0.376\)</span>.</caption>
+<colgroup>
+<col width="21%" />
+<col width="20%" />
+<col width="15%" />
+<col width="15%" />
+<col width="15%" />
+<col width="10%" />
+</colgroup>
+<thead>
+<tr class="header">
+<th align="left"><strong>MEN</strong>
+Age group</th>
+<th align="right"><br />
+Irrelevant</th>
+<th align="right">Slightly
+important</th>
+<th align="right"><br />
+Important</th>
+<th align="right">Very
+important</th>
+<th align="right"><br />
+Total</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="left">Under 45</td>
+<td align="right">29</td>
+<td align="right">35</td>
+<td align="right">30</td>
+<td align="right">22</td>
+<td align="right">116</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="right">0.250</td>
+<td align="right">0.302</td>
+<td align="right">0.259</td>
+<td align="right">0.190</td>
+<td align="right">1.000</td>
+</tr>
+<tr class="odd">
+<td align="left">45–54</td>
+<td align="right">83</td>
+<td align="right">60</td>
+<td align="right">52</td>
+<td align="right">29</td>
+<td align="right">224</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="right">0.371</td>
+<td align="right">0.268</td>
+<td align="right">0.232</td>
+<td align="right">0.129</td>
+<td align="right">1.000</td>
+</tr>
+<tr class="odd">
+<td align="left">55–64</td>
+<td align="right">116</td>
+<td align="right">40</td>
+<td align="right">28</td>
+<td align="right">16</td>
+<td align="right">200</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="right">0.580</td>
+<td align="right">0.200</td>
+<td align="right">0.140</td>
+<td align="right">0.080</td>
+<td align="right">1.000</td>
+</tr>
+<tr class="odd">
+<td align="left">65 and over</td>
+<td align="right">150</td>
+<td align="right">53</td>
+<td align="right">16</td>
+<td align="right">12</td>
+<td align="right">231</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="right">0.649</td>
+<td align="right">0.229</td>
+<td align="right">0.069</td>
+<td align="right">0.052</td>
+<td align="right">1.000</td>
+</tr>
+<tr class="odd">
+<td align="left">Total</td>
+<td align="right">378</td>
+<td align="right">188</td>
+<td align="right">126</td>
+<td align="right">79</td>
+<td align="right">771</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="right">0.490</td>
+<td align="right">0.244</td>
+<td align="right">0.163</td>
+<td align="right">0.102</td>
+<td align="right">1.000</td>
+</tr>
+</tbody>
+</table>
+<table style="width:97%;">
+<caption><span id="tab:t-investors3">Table 9.4: </span>Frequencies of respondents by age group and attitude towards
+short-term gains in Example 9.2, controlling for sex of respondent.
+The numbers below the frequencies are proportions within rows. <span class="math inline">\(\chi^{2}=27.6\)</span>, <span class="math inline">\(df=9\)</span>, <span class="math inline">\(P=0.001\)</span>. <span class="math inline">\(\gamma=-0.395\)</span>.</caption>
+<colgroup>
+<col width="21%" />
+<col width="20%" />
+<col width="15%" />
+<col width="15%" />
+<col width="15%" />
+<col width="10%" />
+</colgroup>
+<thead>
+<tr class="header">
+<th align="left"><strong>WOMEN</strong>
+Age group</th>
+<th align="right"><br />
+Irrelevant</th>
+<th align="right">Slightly
+important</th>
+<th align="right"><br />
+Important</th>
+<th align="right">Very
+important</th>
+<th align="right"><br />
+Total</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="left">Under 45</td>
+<td align="right">8</td>
+<td align="right">10</td>
+<td align="right">8</td>
+<td align="right">4</td>
+<td align="right">30</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="right">0.267</td>
+<td align="right">0.333</td>
+<td align="right">0.267</td>
+<td align="right">0.133</td>
+<td align="right">1.000</td>
+</tr>
+<tr class="odd">
+<td align="left">45–54</td>
+<td align="right">28</td>
+<td align="right">17</td>
+<td align="right">5</td>
+<td align="right">8</td>
+<td align="right">58</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="right">0.483</td>
+<td align="right">0.293</td>
+<td align="right">0.086</td>
+<td align="right">0.138</td>
+<td align="right">1.000</td>
+</tr>
+<tr class="odd">
+<td align="left">55–64</td>
+<td align="right">37</td>
+<td align="right">9</td>
+<td align="right">3</td>
+<td align="right">4</td>
+<td align="right">53</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="right">0.698</td>
+<td align="right">0.170</td>
+<td align="right">0.057</td>
+<td align="right">0.075</td>
+<td align="right">1.000</td>
+</tr>
+<tr class="odd">
+<td align="left">65 and over</td>
+<td align="right">43</td>
+<td align="right">11</td>
+<td align="right">3</td>
+<td align="right">3</td>
+<td align="right">60</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="right">0.717</td>
+<td align="right">0.183</td>
+<td align="right">0.050</td>
+<td align="right">0.050</td>
+<td align="right">1.000</td>
+</tr>
+<tr class="odd">
+<td align="left">Total</td>
+<td align="right">116</td>
+<td align="right">47</td>
+<td align="right">19</td>
+<td align="right">19</td>
+<td align="right">201</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="right">0.577</td>
+<td align="right">0.234</td>
+<td align="right">0.095</td>
+<td align="right">0.095</td>
+<td align="right">1.000</td>
+</tr>
+</tbody>
+</table>
+<p><strong>Example 9.3</strong>: <strong><em>The Titanic</em></strong></p>
+<p>The passenger liner RMS <em>Titanic</em> hit an iceberg and sank in the North
+Atlantic on 14 April 1912, with heavy loss of life. Table
+<a href="c-3waytables.html#tab:t-titanic2">9.5</a> shows a crosstabulation of the people on board the
+<em>Titanic</em>, classified according to their status (as male passenger,
+female or child passenger, or member of the ship’s crew) and whether
+they survived the sinking.<a href="#fn56" class="footnote-ref" id="fnref56"><sup>56</sup></a> The <span class="math inline">\(\chi^{2}\)</span> test of independence has
+<span class="math inline">\(P&lt;0.001\)</span> for this table, so there are statistically significant
+differences in probabilities of survival between the groups. The table
+suggests, in particular, that women and children among the passengers
+were more likely to survive than male passengers or the ship’s crew.</p>
+<table style="width:83%;">
+<caption><span id="tab:t-titanic2">Table 9.5: </span>Survival status of the people aboard the <em>Titanic</em>, divided into
+three groups. The numbers in brackets are proportions of survivors and
+non-survivors within each group. <span class="math inline">\(\chi^{2}=418\)</span>, <span class="math inline">\(\text{df}=2\)</span>, <span class="math inline">\(P&lt;0.001\)</span>.</caption>
+<colgroup>
+<col width="40%" />
+<col width="15%" />
+<col width="13%" />
+<col width="13%" />
+</colgroup>
+<thead>
+<tr class="header">
+<th align="left">Group</th>
+<th align="right">Survivor:
+Yes</th>
+<th align="right"><br />
+No</th>
+<th align="right"><br />
+Total</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="left">Male passenger</td>
+<td align="right">146</td>
+<td align="right">659</td>
+<td align="right">805</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="right">(0.181)</td>
+<td align="right">(0.819)</td>
+<td align="right">(1.000)</td>
+</tr>
+<tr class="odd">
+<td align="left">Female or child passenger</td>
+<td align="right">353</td>
+<td align="right">158</td>
+<td align="right">511</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="right">(0.691)</td>
+<td align="right">(0.309)</td>
+<td align="right">(1.000)</td>
+</tr>
+<tr class="odd">
+<td align="left">Crew member</td>
+<td align="right">212</td>
+<td align="right">673</td>
+<td align="right">885</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="right">(0.240)</td>
+<td align="right">(0.760)</td>
+<td align="right">(1.000)</td>
+</tr>
+<tr class="odd">
+<td align="left">Total</td>
+<td align="right">711</td>
+<td align="right">1490</td>
+<td align="right">2201</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="right">(0.323)</td>
+<td align="right">(0.677)</td>
+<td align="right">(1.000)</td>
+</tr>
+</tbody>
+</table>
+<p>We next control also for the class in which a person was travelling,
+classified as first, second or third class. Since class does not apply
+to the ship’s crew, this analysis is limited to the passengers,
+classified as men vs. women and children. The two-way table of sex by
+survival status for them is given by Table <a href="c-3waytables.html#tab:t-titanic2">9.5</a>, ignoring
+the row for crew members. This association is strongly significant, with
+<span class="math inline">\(\chi^{2}=344\)</span> and <span class="math inline">\(P&lt;0.001\)</span>.</p>
+<table style="width:76%;">
+<colgroup>
+<col width="12%" />
+<col width="23%" />
+<col width="18%" />
+<col width="11%" />
+<col width="11%" />
+</colgroup>
+<thead>
+<tr class="header">
+<th align="left"><br />
+Class</th>
+<th align="left"><br />
+Group</th>
+<th align="right">Survivor:
+Yes</th>
+<th align="right"><br />
+No</th>
+<th align="right"><br />
+Total</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="left">First</td>
+<td align="left">Man</td>
+<td align="right">57</td>
+<td align="right">118</td>
+<td align="right">175</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="left"></td>
+<td align="right">0.326</td>
+<td align="right">0.674</td>
+<td align="right">1.000</td>
+</tr>
+<tr class="odd">
+<td align="left"></td>
+<td align="left">Woman or child</td>
+<td align="right">146</td>
+<td align="right">4</td>
+<td align="right">150</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="left"></td>
+<td align="right">0.973</td>
+<td align="right">0.027</td>
+<td align="right">1.000</td>
+</tr>
+<tr class="odd">
+<td align="left"></td>
+<td align="left">Total</td>
+<td align="right">203</td>
+<td align="right">122</td>
+<td align="right">325</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="left"></td>
+<td align="right">0.625</td>
+<td align="right">0.375</td>
+<td align="right">1.000</td>
+</tr>
+<tr class="odd">
+<td align="left">Second</td>
+<td align="left">Man</td>
+<td align="right">14</td>
+<td align="right">154</td>
+<td align="right">168</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="left"></td>
+<td align="right">0.083</td>
+<td align="right">0.917</td>
+<td align="right">1.000</td>
+</tr>
+<tr class="odd">
+<td align="left"></td>
+<td align="left">Woman or child</td>
+<td align="right">104</td>
+<td align="right">13</td>
+<td align="right">117</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="left"></td>
+<td align="right">0.889</td>
+<td align="right">0.111</td>
+<td align="right">1.000</td>
+</tr>
+<tr class="odd">
+<td align="left"></td>
+<td align="left">Total</td>
+<td align="right">118</td>
+<td align="right">167</td>
+<td align="right">285</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="left"></td>
+<td align="right">0.414</td>
+<td align="right">0.586</td>
+<td align="right">1.000</td>
+</tr>
+<tr class="odd">
+<td align="left">Third</td>
+<td align="left">Man</td>
+<td align="right">75</td>
+<td align="right">387</td>
+<td align="right">462</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="left"></td>
+<td align="right">0.162</td>
+<td align="right">0.838</td>
+<td align="right">1.000</td>
+</tr>
+<tr class="odd">
+<td align="left"></td>
+<td align="left">Woman or child</td>
+<td align="right">103</td>
+<td align="right">141</td>
+<td align="right">244</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="left"></td>
+<td align="right">0.422</td>
+<td align="right">0.578</td>
+<td align="right">1.000</td>
+</tr>
+<tr class="odd">
+<td align="left"></td>
+<td align="left">Total</td>
+<td align="right">178</td>
+<td align="right">528</td>
+<td align="right">706</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="left"></td>
+<td align="right">0.252</td>
+<td align="right">0.748</td>
+<td align="right">1.000</td>
+</tr>
+<tr class="odd">
+<td align="left">Total</td>
+<td align="left"></td>
+<td align="right">499</td>
+<td align="right">817</td>
+<td align="right">1316</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="left"></td>
+<td align="right">0.379</td>
+<td align="right">0.621</td>
+<td align="right">1.000</td>
+</tr>
+</tbody>
+</table>
+<p>:(#tab:t-titanic3)Survival status of the passengers of the <em>Titanic</em>, classified by
+class and sex. The numbers below the frequencies are proportions
+within rows.</p>
+<p>Two-way tables involving class (not shown here) suggest that it is
+mildly associated with sex (with percentages of men 54%, 59% and 65% in
+first, second and third class respectively) and strongly associated with
+survival (with 63%, 41% and 25% of the passengers surviving). It is thus
+possible that class might influence the association between sex and
+survival. This is investigated in Table <a href="#tab:t-titanic3">9.6</a>, which shows
+the partial associations between sex and survival status, controlling
+for class. This association is strongly significant (with <span class="math inline">\(P&lt;0.001\)</span> for
+the <span class="math inline">\(\chi^{2}\)</span> test) in every partial table, so it is clearly not
+explained away by associations involving class. The direction of the
+association is also the same in each table, with women and children more
+likely to survive than men among passengers of every class.</p>
+<p>The presence and direction of the association in the two-way Table
+<a href="c-3waytables.html#tab:t-titanic2">9.5</a> are thus preserved and similar in every partial table
+controlling for class. However, there appear to be differences in the
+<em>strength</em> of the association between the partial tables. Considering,
+for example, the ratios of the proportions in each class, women and
+children were about 3.0 (<span class="math inline">\(=0.973/0.326\)</span>) times more likely to survive
+than men in first class, while the ratio was about 10.7 in second class
+and 2.6 in the third. The contrast of men vs. women and children was
+thus strongest among second-class passengers. This example differs in
+this respect from the previous ones, where the associations were similar
+in each partial table, either because they were all essentially zero
+(Example 9.1) or non-zero but similar in both direction and strength
+(Example 9.2).</p>
+<p>We are now considering three variables, class, sex and survival.
+Although it is not necessary for this analysis to divide them into
+explanatory and response variables, introducing such a distinction is
+useful for discussion of the results. Here it is most natural to treat
+survival as the response variable, and both class and sex as explanatory
+variables for survival. The associations in the partial tables in Table
+<a href="#tab:t-titanic3">9.6</a> are then partial associations between the response
+variable and one of the explanatory variables (sex), controlling for the
+other explanatory variable (class). As discussed above, the strength of
+this partial association is different for different values of class.
+This is an example of a statistical <strong>interaction</strong>. In general, there
+is an interaction between two explanatory variables if the strength and
+nature of the partial association of (either) one of them on a response
+variable depends on the value at which the other explanatory variable is
+controlled. Here there is an interaction between class and sex, because
+the association between sex and survival is different at different
+levels of class.</p>
+<p>Interactions are an important but challenging element of many
+statistical analyses. Important, because they often correspond to
+interesting and subtle features of associations in the data.
+Challenging, because understanding and interpreting them involves
+talking about (at least) three variables at once. This can seem rather
+complicated, at least initially. It adds to the difficulty that
+interactions can take many forms. In the <em>Titanic</em> example, for
+instance, the nature of the class-sex interaction was that the
+association between sex and survival was in the same direction but of
+different strengths at different levels of class. In other cases
+associations may disappear in some but not all of the partial tables, or
+remain strong but in different directions in different ones. They may
+even all or nearly all be in a different direction from the association
+in the original two-way table, as in the next example.</p>
+<p><strong>Example 9.4: Smoking and mortality</strong></p>
+<p>A health survey was carried out in Whickham near Newcastle upon Tyne in
+1972–74, and a follow-up survey of the same respondents twenty years
+later.<a href="#fn57" class="footnote-ref" id="fnref57"><sup>57</sup></a> Here we consider only the <span class="math inline">\(n=1314\)</span> female respondents who
+were classified by the first survey either as current smokers or as
+never having smoked. Table <a href="c-3waytables.html#tab:t-whickham1">9.7</a> shows the
+crossclassification of these women according to their smoking status in
+1972–74 and whether they were still alive twenty years later. The
+<span class="math inline">\(\chi^{2}\)</span> test indicates a strongly significant association (with
+<span class="math inline">\(P=0.003\)</span>), and the numbers suggest that a smaller proportion of smokers
+than of nonsmokers had died between the surveys. Should we thus conclude
+that smoking helps to keep you alive? Probably not, given that it is
+known beyond reasonable doubt that the causal relationship between
+smoking and mortality is in the opposite direction. Clearly the picture
+has been distorted by failure to control for some relevant further
+variables. One such variable is the age of the respondents.</p>
+<table>
+<caption><span id="tab:t-whickham1">Table 9.7: </span>Table of smoking status in 1972–74 vs. twenty-year survival among
+the respondents in Example 9.4. The numbers below the frequencies are
+proportions within rows.</caption>
+<thead>
+<tr class="header">
+<th align="left">Smoker</th>
+<th align="right">Dead</th>
+<th align="right">Alive</th>
+<th align="right">Total</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="left">Yes</td>
+<td align="right">139</td>
+<td align="right">443</td>
+<td align="right">582</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="right">0.239</td>
+<td align="right">0.761</td>
+<td align="right">1.000</td>
+</tr>
+<tr class="odd">
+<td align="left">No</td>
+<td align="right">230</td>
+<td align="right">502</td>
+<td align="right">732</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="right">0.314</td>
+<td align="right">0.686</td>
+<td align="right">1.000</td>
+</tr>
+<tr class="odd">
+<td align="left">Total</td>
+<td align="right">369</td>
+<td align="right">945</td>
+<td align="right">1314</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="right">0.281</td>
+<td align="right">0.719</td>
+<td align="right">1.000</td>
+</tr>
+</tbody>
+</table>
+<p>Table <a href="c-3waytables.html#tab:t-whickham2">9.8</a> shows the partial tables of smoking vs. survival
+controlling for age at the time of the first survey, classified into
+seven categories. Note first that this three-way table appears somewhat
+different from those shown in Tables <a href="c-3waytables.html#tab:t-berkeley2">9.2</a>,
+<a href="c-3waytables.html#tab:t-investors3">9.4</a> and <a href="#tab:t-titanic3">9.6</a>. This is because one variable,
+survival status, is summarised only by the percentage of survivors
+within each combination of age group and smoking status. This is a
+common trick to save space in three-way tables involving dichotomous
+variables like survival here. The full table can easily be constructed
+from these numbers if needed. For example, 98.4% of the nonsmokers aged
+18–24 were alive at the time of the second survey. Since there were a
+total of 62 respondents in this group (as shown in the last column),
+this means that 61 of them (i.e. 98.4%) were alive and 1 (or 1.6%) was
+not.</p>
+<p>The percentages in Table <a href="c-3waytables.html#tab:t-whickham2">9.8</a> show that in five of the
+seven age groups the proportion of survivors is higher among nonsmokers
+than smokers, i.e. these partial associations in the sample are in the
+opposite direction from the association in Table <a href="c-3waytables.html#tab:t-whickham1">9.7</a>. This
+reversal is known as <strong>Simpson’s paradox</strong>. The term is somewhat
+misleading, as the finding is not really paradoxical in any logical
+sense. Instead, it is again a consequence of a particular pattern of
+associations between the control variable and the other two variables.</p>
+<table style="width:98%;">
+<caption><span id="tab:t-whickham2">Table 9.8: </span>Percentage of respondents in Example 9.4 surviving at the time of
+the second survey, by smoking status and age group in 1972–74.</caption>
+<colgroup>
+<col width="17%" />
+<col width="28%" />
+<col width="13%" />
+<col width="24%" />
+<col width="13%" />
+</colgroup>
+<thead>
+<tr class="header">
+<th align="left"><br />
+Age group</th>
+<th align="right">% Alive after 20 years:
+Smokers</th>
+<th align="right"><br />
+Nonsmokers</th>
+<th align="right">Number (in 1972–74):
+Smokers</th>
+<th align="right"><br />
+Nonsmokers</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="left">18–24</td>
+<td align="right">96.4</td>
+<td align="right">98.4</td>
+<td align="right">55</td>
+<td align="right">62</td>
+</tr>
+<tr class="even">
+<td align="left">25–34</td>
+<td align="right">97.6</td>
+<td align="right">96.8</td>
+<td align="right">124</td>
+<td align="right">157</td>
+</tr>
+<tr class="odd">
+<td align="left">35–44</td>
+<td align="right">87.2</td>
+<td align="right">94.2</td>
+<td align="right">109</td>
+<td align="right">121</td>
+</tr>
+<tr class="even">
+<td align="left">45–54</td>
+<td align="right">79.2</td>
+<td align="right">84.6</td>
+<td align="right">130</td>
+<td align="right">78</td>
+</tr>
+<tr class="odd">
+<td align="left">55–64</td>
+<td align="right">55.7</td>
+<td align="right">66.9</td>
+<td align="right">115</td>
+<td align="right">121</td>
+</tr>
+<tr class="even">
+<td align="left">65–74</td>
+<td align="right">19.4</td>
+<td align="right">21.7</td>
+<td align="right">36</td>
+<td align="right">129</td>
+</tr>
+<tr class="odd">
+<td align="left">75–</td>
+<td align="right">0.0</td>
+<td align="right">0.0</td>
+<td align="right">12</td>
+<td align="right">64</td>
+</tr>
+<tr class="even">
+<td align="left">All age groups</td>
+<td align="right">76.1</td>
+<td align="right">68.6</td>
+<td align="right">582</td>
+<td align="right">732</td>
+</tr>
+</tbody>
+</table>
+<p>The two-way tables of age by survival and age by smoking are shown side
+by side in Table <a href="c-3waytables.html#tab:t-whickham3">9.9</a>. The table is somewhat elaborate and
+unconventional, so it requires some explanation. The rows of the table
+correspond to the age groups, identified by the second column, and the
+frequencies of respondents in each age group are given in the last
+column. The left-hand column shows the percentages of survivors within
+each age group. The right-hand side of the table gives the two-way table
+of age group and smoking status. It contains percentages calculated both
+within the rows (without parentheses) and columns (in parentheses) of
+the table. As an example, consider numbers for the age group 18–24.
+There were 117 respondents in this age group at the time of the first
+survey. Of them, 47% were then smokers and 53% were nonsmokers, and 97%
+were still alive at the time of the second survey. Furthermore, 10% of
+all the 582 smokers, 9% of all the 732 nonsmokers and 9% of the 1314
+respondents overall were in this age group.</p>
+<table style="width:98%;">
+<caption><span id="tab:t-whickham3">Table 9.9: </span>Two-way contingency tables of age group vs. survival (on the left)
+and age group vs. smoking (on the right) in Example 6.4. The
+percentages in parentheses are column percentages (within smokers or
+nonsmokers) and the ones without parentheses are row percentages
+(within age groups).</caption>
+<colgroup>
+<col width="14%" />
+<col width="15%" />
+<col width="27%" />
+<col width="18%" />
+<col width="12%" />
+<col width="9%" />
+</colgroup>
+<thead>
+<tr class="header">
+<th align="right"><br />
+% Alive</th>
+<th align="left"><br />
+Age group</th>
+<th align="center">Row and column %
+Smokers</th>
+<th align="center"><br />
+Nonsmokers</th>
+<th align="center"><br />
+Total %</th>
+<th align="right"><br />
+Count</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="right">97</td>
+<td align="left">18–24</td>
+<td align="center">47</td>
+<td align="center">53</td>
+<td align="center">100</td>
+<td align="right">117</td>
+</tr>
+<tr class="even">
+<td align="right"></td>
+<td align="left"></td>
+<td align="center">(10)</td>
+<td align="center">(9)</td>
+<td align="center">(9)</td>
+<td align="right"></td>
+</tr>
+<tr class="odd">
+<td align="right">97</td>
+<td align="left">25–34</td>
+<td align="center">44</td>
+<td align="center">56</td>
+<td align="center">100</td>
+<td align="right">281</td>
+</tr>
+<tr class="even">
+<td align="right"></td>
+<td align="left"></td>
+<td align="center">(21)</td>
+<td align="center">(21)</td>
+<td align="center">(21)</td>
+<td align="right"></td>
+</tr>
+<tr class="odd">
+<td align="right">91</td>
+<td align="left">35–44</td>
+<td align="center">47</td>
+<td align="center">53</td>
+<td align="center">100</td>
+<td align="right">230</td>
+</tr>
+<tr class="even">
+<td align="right"></td>
+<td align="left"></td>
+<td align="center">(19)</td>
+<td align="center">(17)</td>
+<td align="center">(18)</td>
+<td align="right"></td>
+</tr>
+<tr class="odd">
+<td align="right">81</td>
+<td align="left">45–54</td>
+<td align="center">63</td>
+<td align="center">38</td>
+<td align="center">100</td>
+<td align="right">208</td>
+</tr>
+<tr class="even">
+<td align="right"></td>
+<td align="left"></td>
+<td align="center">(22)</td>
+<td align="center">(11)</td>
+<td align="center">(16)</td>
+<td align="right"></td>
+</tr>
+<tr class="odd">
+<td align="right">61</td>
+<td align="left">55–64</td>
+<td align="center">49</td>
+<td align="center">51</td>
+<td align="center">100</td>
+<td align="right">236</td>
+</tr>
+<tr class="even">
+<td align="right"></td>
+<td align="left"></td>
+<td align="center">(20)</td>
+<td align="center">(17)</td>
+<td align="center">(13)</td>
+<td align="right"></td>
+</tr>
+<tr class="odd">
+<td align="right">21</td>
+<td align="left">65–74</td>
+<td align="center">22</td>
+<td align="center">78</td>
+<td align="center">100</td>
+<td align="right">165</td>
+</tr>
+<tr class="even">
+<td align="right"></td>
+<td align="left"></td>
+<td align="center">(6)</td>
+<td align="center">(18)</td>
+<td align="center">(13)</td>
+<td align="right"></td>
+</tr>
+<tr class="odd">
+<td align="right">0</td>
+<td align="left">75–</td>
+<td align="center">17</td>
+<td align="center">83</td>
+<td align="center">100</td>
+<td align="right">77</td>
+</tr>
+<tr class="even">
+<td align="right"></td>
+<td align="left"></td>
+<td align="center">(2)</td>
+<td align="center">(9)</td>
+<td align="center">(6)</td>
+<td align="right"></td>
+</tr>
+<tr class="odd">
+<td align="right">72</td>
+<td align="left">Total %</td>
+<td align="center">44</td>
+<td align="center">56</td>
+<td align="center">100</td>
+<td align="right"></td>
+</tr>
+<tr class="even">
+<td align="right"></td>
+<td align="left"></td>
+<td align="center">(100)</td>
+<td align="center">(100)</td>
+<td align="center">(100)</td>
+<td align="right"></td>
+</tr>
+<tr class="odd">
+<td align="right">945</td>
+<td align="left">Total count</td>
+<td align="center">582</td>
+<td align="center">732</td>
+<td align="center"></td>
+<td align="right">1314</td>
+</tr>
+</tbody>
+</table>
+<p>Table <a href="c-3waytables.html#tab:t-whickham3">9.9</a> shows a clear association between age and
+survival, for understandable reasons mostly unconnected with smoking.
+The youngest respondents of the first survey were highly likely and the
+oldest unlikely to be alive twenty years later. There is also an
+association between age and smoking: in particular, the proportion of
+smokers was lowest among the oldest respondents. The implications of
+this are seen perhaps more clearly by considering the column
+proportions, i.e. the age distributions of smokers and nonsmokers in the
+original sample. These show that the group of nonsmokers was
+substantially older than that of the smokers; for example, 27% of the
+nonsmokers but only 8% of the smokers belonged to the two oldest age
+groups. It is this imbalance which explains why nonsmokers, more of whom
+are old, appear to have lower chances of survival until we control for
+age.</p>
+<p>The discussion above refers to the <em>sample</em> associations between smoking
+and survival in the partial tables, which suggest that mortality is
+higher among smokers than nonsmokers. In terms of statistical
+significance, however, the evidence is fairly weak: the association is
+even borderline significant only in the 35–44 and 55–64 age groups, with
+<span class="math inline">\(P\)</span>-values of 0.063 and 0.075 respectively for the <span class="math inline">\(\chi^{2}\)</span> test. This
+is an indication not so much of lack of a real association but of the
+fact that these data do not provide much power for detecting it. Overall
+twenty-year mortality is a fairly rough measure of health status for
+comparisons of smokers and nonsmokers, especially in the youngest and
+oldest age groups where mortality is either very low or very high for
+everyone. Differences are likely to be have been further diluted by many
+of the original smokers having stopped smoking between the surveys. This
+example should thus not be regarded as a serious examination of the
+health effects of smoking, for which much more specific data and more
+careful analyses are required.<a href="#fn58" class="footnote-ref" id="fnref58"><sup>58</sup></a></p>
+<p>The Berkeley admissions data discussed earlier provide another example
+of a partial Simpson’s paradox. Previously we considered only
+departments 2–6, for none of which there was a significant partial
+association between sex and admission. For department 1, the partial
+table indicates a strongly significant difference in favour of women,
+with 82% of the 108 female applicants admitted, compared to 62% of the
+825 male applicants. However, the two-way association between sex and
+admission for departments 1–6 combined remains strongly significant and
+shows an even larger difference in favour of men than before. This
+result can now be readily explained as a result of imbalances in sex
+ratios and rates of admission between departments. Department 1 is both
+easy to get into (with 64% admitted) and heavily favoured by men (88% of
+the applicants). These features combine to contribute to higher
+admissions percentages for men overall, even though within department 1
+itself women are more likely to be admitted.</p>
+<p>In summary, the examples discussed above demonstrate that many things
+can happen to an association between two variables when we control for a
+third one. The association may disappear, indicating that it was
+spurious, or it may remain similar and unchanged in all of the partial
+tables. It may also become different in different partial tables,
+indicating an interaction. Which of these occurs depends on the patterns
+of associations between the control variable and the other two
+variables. The crucial point is that the two-way table alone cannot
+reveal which of these cases we are dealing with, because the counts in
+the two-way table could split into three-way tables in many different
+ways. The only way to determine how controlling for other variables will
+affect an association is to actually do so. This is the case not only
+for multiway contingency tables, but for all methods of statistical
+control, in particular multiple linear regression and other regression
+models.</p>
+<p>Two final remarks round off our discussion of multiway contingency
+tables:</p>
+<ul>
+<li><p>Extension of the ideas of three-way tables to four-way and larger
+contingency tables is obvious and straightforward. In such tables,
+every cell corresponds to the subjects with a particular combination
+of the values of four or more variables. This involves no new
+conceptual difficulties, and the only challenge is how to arrange
+the table for convenient presentation. When the main interest is in
+associations between a particular pair of two variables, the usual
+solution is to present a set of partial two-way tables for them, one
+for each combination of the other (control) variables. Suppose, for
+instance, that in the university admissions example we had obtained
+similar data at two different years, say 1973 and 2003. We would
+then have four variables: year, department, sex and
+admission status. These could be summarised as in Table
+<a href="c-3waytables.html#tab:t-berkeley2">9.2</a>, except that each partial table for sex
+vs. admission would now be conditional on the values of both year
+and department. The full four-way table would then consist of ten
+<span class="math inline">\(2\times 2\)</span> partial tables, one for each of the ten combinations of
+two years and five departments, (i.e. applicants to Department 2 in
+1973, Department 2 in 2003, and so on to Department 6 in 2003).</p></li>
+<li><p>The only inferential tool for multiway contingency tables discussed
+here was to arrange the table as a set of two-way partial tables and
+to apply the <span class="math inline">\(\chi^{2}\)</span> test of independence to each of them. This
+is a perfectly sensible approach and a great improvement over just
+analysing two-way tables. There are, however, questions which cannot
+easily be answered with this method. For example, when can we say
+that associations in different partial tables are different enough
+for us to declare that there is evidence of an interaction? Or what
+if we want to consider many different partial associations, either
+for a response variable with each of the other variables in turn, or
+because there is no single response variable? More powerful methods
+are required for such analyses. They are multiple regression models
+like the multiple linear regression of Section
+<a href="c-regression.html#s-regression-multiple">8.5</a>, but modified so that they become
+approriate for categorical response variables. Some of these models
+are introduced on the course MY452.</p></li>
+</ul>
+
+</div>
+<div class="footnotes">
+<hr />
+<ol start="55">
+<li id="fn55"><p>These data, which were produced by the Graduate Division of UC
+Berkeley, were first discussed in Bickel, P. J., Hammel, E. A., and
+O’Connell, J. W. (1975), “Sex bias in graduate admissions: Data from
+Berkeley”, <em>Science</em> <strong>187</strong>, 398–404. They have since become a
+much-used teaching example. The version of the data considered here
+are from Freedman, D., Pisani, R., and Purves, R., <em>Statistics</em>
+(W. W. Norton, 1978).<a href="c-3waytables.html#fnref55" class="footnote-back">↩</a></p></li>
+<li id="fn56"><p>The data are from the 1912 report of the official British Wreck
+Commissioner’s inquiry into the sinking, available at
+<a href="http://www.titanicinquiry.org" class="uri">http://www.titanicinquiry.org</a>.<a href="c-3waytables.html#fnref56" class="footnote-back">↩</a></p></li>
+<li id="fn57"><p>The two studies are reported in Tunbridge, W. M. G. et al. (1977).
+“The spectrum of thyroid disease in a community: The Whickham
+survey”. <em>Clinical Endocrinology</em> <strong>7</strong>, 481–493, and Vanderpump,
+M. P. J. et al. (1995). “The incidence of thyroid disorders in the
+community: A twenty-year follow-up of the Whickham survey”.
+<em>Clinical Endocrinology</em> <strong>43</strong>, 55–69. The data are used to
+illustrate Simpson’s paradox by Appleton, D. R. et al. (1996).
+“Ignoring a covariate: An example of Simpson’s paradox”. <em>The
+American Statistician</em> <strong>50</strong>, 340–341.<a href="c-3waytables.html#fnref57" class="footnote-back">↩</a></p></li>
+<li id="fn58"><p>For one remarkable example of such studies, see Doll, R. et
+al. (2004), “Mortality in relation to smoking: 50 years’
+observations on male British doctors”, <em>British Medical Journal</em>
+<strong>328</strong>, 1519–1528, and Doll, R. and Hill, A. B. (1954), “The
+Mortality of doctors in relation to their smoking habits: A
+preliminary report”, <em>British Medical Journal</em> <strong>228</strong>, 1451–1455.
+The older paper is reprinted together with the more recent one in
+the 2004 issue of <em>BMJ</em>.<a href="c-3waytables.html#fnref58" class="footnote-back">↩</a></p></li>
+</ol>
+</div>
+            </section>
+
+          </div>
+        </div>
+      </div>
+<a href="c-regression.html" class="navigation navigation-prev " aria-label="Previous page"><i class="fa fa-angle-left"></i></a>
+<a href="c-more.html" class="navigation navigation-next " aria-label="Next page"><i class="fa fa-angle-right"></i></a>
+    </div>
+  </div>
+<script src="libs/gitbook-2.6.7/js/app.min.js"></script>
+<script src="libs/gitbook-2.6.7/js/clipboard.min.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-search.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-sharing.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-fontsettings.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-bookdown.js"></script>
+<script src="libs/gitbook-2.6.7/js/jquery.highlight.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-clipboard.js"></script>
+<script>
+gitbook.require(["gitbook"], function(gitbook) {
+gitbook.start({
+"sharing": {
+"github": false,
+"facebook": true,
+"twitter": true,
+"linkedin": false,
+"weibo": false,
+"instapaper": false,
+"vk": false,
+"whatsapp": false,
+"all": ["facebook", "twitter", "linkedin", "weibo", "instapaper"]
+},
+"fontsettings": {
+"theme": "white",
+"family": "sans",
+"size": 2
+},
+"edit": {
+"link": "https://github.com/LSE-Methodology/MY464/edit/master/09-MY464-3waytables.Rmd",
+"text": "Edit"
+},
+"history": {
+"link": null,
+"text": null
+},
+"view": {
+"link": null,
+"text": null
+},
+"download": ["Coursepack-MY464.pdf", "Coursepack-MY464.epub"],
+"search": {
+"engine": "fuse",
+"options": null
+},
+"toc": {
+"collapse": "section"
+}
+});
+});
+</script>
+
+<!-- dynamically load mathjax for compatibility with self-contained -->
+<script>
+  (function () {
+    var script = document.createElement("script");
+    script.type = "text/javascript";
+    var src = "true";
+    if (src === "" || src === "true") src = "https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.9/latest.js?config=TeX-MML-AM_CHTML";
+    if (location.protocol !== "file:")
+      if (/^https?:/.test(src))
+        src = src.replace(/^https?:/, '');
+    script.src = src;
+    document.getElementsByTagName("head")[0].appendChild(script);
+  })();
+</script>
+</body>
+
+</html>
diff --git a/c-class0.html b/c-class0.html
new file mode 100644
index 0000000..5730fc2
--- /dev/null
+++ b/c-class0.html
@@ -0,0 +1,739 @@
+<!DOCTYPE html>
+<html >
+
+<head>
+
+  <meta charset="UTF-8">
+  <meta http-equiv="X-UA-Compatible" content="IE=edge">
+  <title>MY451 Introduction to Quantitative Analysis</title>
+  <meta content="text/html; charset=UTF-8" http-equiv="Content-Type">
+  <meta name="description" content="MY451 Introduction to Quantitative Analysis">
+  <meta name="generator" content="bookdown 0.1.5 and GitBook 2.6.7">
+
+  <meta property="og:title" content="MY451 Introduction to Quantitative Analysis" />
+  <meta property="og:type" content="book" />
+  
+  
+  
+  <meta name="github-repo" content="kbenoit/MY451" />
+
+  <meta name="twitter:card" content="summary" />
+  <meta name="twitter:title" content="MY451 Introduction to Quantitative Analysis" />
+  
+  
+  
+
+<meta name="author" content="Jouni Kuha">
+
+
+<script type="text/x-mathjax-config">
+MathJax.Hub.Config({
+  TeX: { equationNumbers: { autoNumber: "AMS" } }
+});
+</script>
+
+  <meta name="viewport" content="width=device-width, initial-scale=1">
+  <meta name="apple-mobile-web-app-capable" content="yes">
+  <meta name="apple-mobile-web-app-status-bar-style" content="black">
+  
+  
+<link rel="prev" href="c-more.html">
+<link rel="next" href="c-disttables.html">
+
+<script src="libs/jquery-2.2.3/jquery.min.js"></script>
+<link href="libs/gitbook-2.6.7/css/style.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-bookdown.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-highlight.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-search.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-fontsettings.css" rel="stylesheet" />
+
+
+
+
+
+
+
+
+
+
+<link rel="stylesheet" href="style.css" type="text/css" />
+</head>
+
+<body>
+
+
+  <div class="book without-animation with-summary font-size-2 font-family-1" data-basepath=".">
+
+    <div class="book-summary">
+      <nav role="navigation">
+
+<ul class="summary">
+<li><a href="./">A Minimal Book Example</a></li>
+
+<li class="divider"></li>
+<li class="chapter" data-level="1" data-path="index.html"><a href="index.html"><i class="fa fa-check"></i><b>1</b> Course information</a></li>
+<li class="chapter" data-level="2" data-path="c-intro.html"><a href="c-intro.html"><i class="fa fa-check"></i><b>2</b> Introduction</a><ul>
+<li class="chapter" data-level="2.1" data-path="c-intro.html"><a href="c-intro.html#s-intro-purpose"><i class="fa fa-check"></i><b>2.1</b> What is the purpose of this course?</a></li>
+<li class="chapter" data-level="2.2" data-path="c-intro.html"><a href="c-intro.html#s-intro-definitions"><i class="fa fa-check"></i><b>2.2</b> Some basic definitions</a><ul>
+<li class="chapter" data-level="2.2.1" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-subj"><i class="fa fa-check"></i><b>2.2.1</b> Subjects and variables</a></li>
+<li class="chapter" data-level="2.2.2" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-vartypes"><i class="fa fa-check"></i><b>2.2.2</b> Types of variables</a></li>
+<li class="chapter" data-level="2.2.3" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-descr"><i class="fa fa-check"></i><b>2.2.3</b> Description and inference</a></li>
+<li class="chapter" data-level="2.2.4" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-assoc"><i class="fa fa-check"></i><b>2.2.4</b> Association and causation</a></li>
+</ul></li>
+<li class="chapter" data-level="2.3" data-path="c-intro.html"><a href="c-intro.html#s-intro-outline"><i class="fa fa-check"></i><b>2.3</b> Outline of the course</a></li>
+<li class="chapter" data-level="2.4" data-path="c-intro.html"><a href="c-intro.html#s-intro-maths"><i class="fa fa-check"></i><b>2.4</b> The use of mathematics and computing</a><ul>
+<li class="chapter" data-level="2.4.1" data-path="c-intro.html"><a href="c-intro.html#symbolic-mathematics-and-mathematical-notation"><i class="fa fa-check"></i><b>2.4.1</b> Symbolic mathematics and mathematical notation</a></li>
+<li class="chapter" data-level="2.4.2" data-path="c-intro.html"><a href="c-intro.html#computing-1"><i class="fa fa-check"></i><b>2.4.2</b> Computing</a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="3" data-path="c-descr1.html"><a href="c-descr1.html"><i class="fa fa-check"></i><b>3</b> Descriptive statistics</a><ul>
+<li class="chapter" data-level="3.1" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-intro"><i class="fa fa-check"></i><b>3.1</b> Introduction</a></li>
+<li class="chapter" data-level="3.2" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-examples"><i class="fa fa-check"></i><b>3.2</b> Example data sets</a></li>
+<li class="chapter" data-level="3.3" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-1cat"><i class="fa fa-check"></i><b>3.3</b> Single categorical variable</a><ul>
+<li class="chapter" data-level="3.3.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-distr"><i class="fa fa-check"></i><b>3.3.1</b> Describing the sample distribution</a></li>
+<li class="chapter" data-level="3.3.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-tables"><i class="fa fa-check"></i><b>3.3.2</b> Tabular methods: Tables of frequencies</a></li>
+<li class="chapter" data-level="3.3.3" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-charts"><i class="fa fa-check"></i><b>3.3.3</b> Graphical methods: Bar charts</a></li>
+<li class="chapter" data-level="3.3.4" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-descriptives"><i class="fa fa-check"></i><b>3.3.4</b> Simple descriptive statistics</a></li>
+</ul></li>
+<li class="chapter" data-level="3.4" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-2cat"><i class="fa fa-check"></i><b>3.4</b> Two categorical variables</a><ul>
+<li class="chapter" data-level="3.4.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-tables"><i class="fa fa-check"></i><b>3.4.1</b> Two-way contingency tables</a></li>
+<li class="chapter" data-level="3.4.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-cond"><i class="fa fa-check"></i><b>3.4.2</b> Conditional proportions</a></li>
+<li class="chapter" data-level="3.4.3" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-assoc"><i class="fa fa-check"></i><b>3.4.3</b> Conditional distributions and associations</a></li>
+<li class="chapter" data-level="3.4.4" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-descr"><i class="fa fa-check"></i><b>3.4.4</b> Describing an association using conditional proportions</a></li>
+<li class="chapter" data-level="3.4.5" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-gamma"><i class="fa fa-check"></i><b>3.4.5</b> A measure of association for ordinal variables</a></li>
+</ul></li>
+<li class="chapter" data-level="3.5" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-1cont"><i class="fa fa-check"></i><b>3.5</b> Sample distributions of a single continuous variable</a><ul>
+<li class="chapter" data-level="3.5.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cont-tab"><i class="fa fa-check"></i><b>3.5.1</b> Tabular methods</a></li>
+<li class="chapter" data-level="3.5.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cont-graphs"><i class="fa fa-check"></i><b>3.5.2</b> Graphical methods</a></li>
+</ul></li>
+<li class="chapter" data-level="3.6" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-nums"><i class="fa fa-check"></i><b>3.6</b> Numerical descriptive statistics</a><ul>
+<li class="chapter" data-level="3.6.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-nums-central"><i class="fa fa-check"></i><b>3.6.1</b> Measures of central tendency</a></li>
+<li class="chapter" data-level="3.6.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-nums-variation"><i class="fa fa-check"></i><b>3.6.2</b> Measures of variation</a></li>
+</ul></li>
+<li class="chapter" data-level="3.7" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-2cont"><i class="fa fa-check"></i><b>3.7</b> Associations which involve continuous variables</a></li>
+<li class="chapter" data-level="3.8" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-presentation"><i class="fa fa-check"></i><b>3.8</b> Presentation of tables and graphs</a></li>
+<li class="chapter" data-level="3.9" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-app"><i class="fa fa-check"></i><b>3.9</b> Appendix: Country data</a></li>
+</ul></li>
+<li class="chapter" data-level="4" data-path="c-samples.html"><a href="c-samples.html"><i class="fa fa-check"></i><b>4</b> Samples and populations</a><ul>
+<li class="chapter" data-level="4.1" data-path="c-samples.html"><a href="c-samples.html#s-samples-intro"><i class="fa fa-check"></i><b>4.1</b> Introduction</a><ul>
+<li class="chapter" data-level="4.1.1" data-path="c-samples.html"><a href="c-samples.html#s-probs-1sampleci-int"><i class="fa fa-check"></i><b>4.1.1</b> Interpretation of confidence intervals</a></li>
+<li class="chapter" data-level="4.1.2" data-path="c-samples.html"><a href="c-samples.html#ss-means-ci-vstests"><i class="fa fa-check"></i><b>4.1.2</b> Confidence intervals vs. significance tests</a></li>
+</ul></li>
+<li class="chapter" data-level="4.2" data-path="c-samples.html"><a href="c-samples.html#s-probs-2samples"><i class="fa fa-check"></i><b>4.2</b> Inference for comparing two proportions</a></li>
+<li class="chapter" data-level="4.3" data-path="c-samples.html"><a href="c-samples.html#s-regression-dummies"><i class="fa fa-check"></i><b>4.3</b> Including categorical explanatory variables</a><ul>
+<li class="chapter" data-level="4.3.1" data-path="c-samples.html"><a href="c-samples.html#ss-regression-dummies-def"><i class="fa fa-check"></i><b>4.3.1</b> Dummy variables</a></li>
+<li class="chapter" data-level="4.3.2" data-path="c-samples.html"><a href="c-samples.html#ss-regression-dummies-example"><i class="fa fa-check"></i><b>4.3.2</b> A second example</a></li>
+</ul></li>
+<li class="chapter" data-level="4.4" data-path="c-samples.html"><a href="c-samples.html#s-regression-rest"><i class="fa fa-check"></i><b>4.4</b> Other issues in linear regression modelling</a></li>
+</ul></li>
+<li class="chapter" data-level="5" data-path="c-3waytables.html"><a href="c-3waytables.html"><i class="fa fa-check"></i><b>5</b> Analysis of 3-way contingency tables</a></li>
+<li class="chapter" data-level="6" data-path="c-more.html"><a href="c-more.html"><i class="fa fa-check"></i><b>6</b> More statistics…</a></li>
+<li class="chapter" data-level="7" data-path="c-class0.html"><a href="c-class0.html"><i class="fa fa-check"></i><b>7</b> Computer classes</a><ul>
+<li class="chapter" data-level="7.1" data-path="c-class0.html"><a href="c-class0.html#general-instructions"><i class="fa fa-check"></i><b>7.1</b> General instructions</a></li>
+<li class="chapter" data-level="7.2" data-path="c-class0.html"><a href="c-class0.html#s-intro-SPSS"><i class="fa fa-check"></i><b>7.2</b> Introduction to SPSS</a></li>
+<li class="chapter" data-level="7.3" data-path="c-class0.html"><a href="c-class0.html#p-class1"><i class="fa fa-check"></i><b>7.3</b> WEEK 2 class: Descriptive statistics for categorical data, and entering data</a></li>
+<li><a href="c-class0.html#week-3-class-descriptive-statistics-for-continuous-variables"><span class="toc-section-number">7.4</span> WEEK 3 class:<br />
+Descriptive statistics for continuous variables</a></li>
+<li class="chapter" data-level="7.5" data-path="c-class0.html"><a href="c-class0.html#week-4-class-two-way-contingency-tables"><i class="fa fa-check"></i><b>7.5</b> WEEK 4 class: Two-way contingency tables</a></li>
+<li class="chapter" data-level="7.6" data-path="c-class0.html"><a href="c-class0.html#week-5-class-inference-for-two-population-means"><i class="fa fa-check"></i><b>7.6</b> WEEK 5 class: Inference for two population means</a></li>
+<li class="chapter" data-level="7.7" data-path="c-class0.html"><a href="c-class0.html#week-7-class-inference-for-population-proportions"><i class="fa fa-check"></i><b>7.7</b> WEEK 7 class: Inference for population proportions</a></li>
+<li class="chapter" data-level="7.8" data-path="c-class0.html"><a href="c-class0.html#week-7-class-correlation-and-simple-linear-regression-1"><i class="fa fa-check"></i><b>7.8</b> WEEK 7 class: Correlation and simple linear regression 1</a></li>
+<li class="chapter" data-level="7.9" data-path="c-class0.html"><a href="c-class0.html#week-8-class-simple-linear-regression-and-3-way-tables"><i class="fa fa-check"></i><b>7.9</b> WEEK 8 class: Simple linear regression and 3-way tables</a></li>
+<li class="chapter" data-level="7.10" data-path="c-class0.html"><a href="c-class0.html#week-9-class-multiple-linear-regression"><i class="fa fa-check"></i><b>7.10</b> WEEK 9 class: Multiple linear regression</a></li>
+<li class="chapter" data-level="7.11" data-path="c-class0.html"><a href="c-class0.html#week-10-class-review-and-multiple-linear-regression"><i class="fa fa-check"></i><b>7.11</b> WEEK 10 class: Review and Multiple linear regression</a></li>
+</ul></li>
+<li class="chapter" data-level="8" data-path="c-disttables.html"><a href="c-disttables.html"><i class="fa fa-check"></i><b>8</b> Statistical tables</a><ul>
+<li class="chapter" data-level="8.1" data-path="c-disttables.html"><a href="c-disttables.html#s-disttables-Z"><i class="fa fa-check"></i><b>8.1</b> Table of standard normal tail probabilities</a></li>
+<li class="chapter" data-level="8.2" data-path="c-disttables.html"><a href="c-disttables.html#s-disttables-t"><i class="fa fa-check"></i><b>8.2</b> Table of critical values for <span class="math inline">\(t\)</span>-distributions</a></li>
+<li class="chapter" data-level="8.3" data-path="c-disttables.html"><a href="c-disttables.html#s-disttables-chi2"><i class="fa fa-check"></i><b>8.3</b> Table of critical values for <span class="math inline">\(\chi^{2}\)</span> distributions</a></li>
+</ul></li>
+<li class="divider"></li>
+<li><a href="https://github.com/rstudio/bookdown" target="blank">Published with bookdown</a></li>
+
+</ul>
+
+      </nav>
+    </div>
+
+    <div class="book-body">
+      <div class="body-inner">
+        <div class="book-header" role="navigation">
+          <h1>
+            <i class="fa fa-circle-o-notch fa-spin"></i><a href="./">MY451 Introduction to Quantitative Analysis</a>
+          </h1>
+        </div>
+
+        <div class="page-wrapper" tabindex="-1" role="main">
+          <div class="page-inner">
+
+            <section class="normal" id="section-">
+<div id="c-class0" class="section level1">
+<h1><span class="header-section-number">Chapter 7</span> Computer classes</h1>
+<p></p>
+<p>Some general instructions on computing and the SPSS package are given first below. It makes most sense to read these together with the instructions for individual computer classes, which begin in Section <a href="c-class0.html#p-class1">7.3</a>.</p>
+<div id="general-instructions" class="section level2">
+<h2><span class="header-section-number">7.1</span> General instructions</h2>
+<div id="using-the-networked-computers-at-lse" class="section level4 unnumbered">
+<h4>Using the networked computers at LSE</h4>
+<ul>
+<li><p>To access IT facilities at LSE you need an IT account with its <strong>Username</strong> and <strong>Password</strong>. Please see <code>http://www.lse.ac.uk/intranet/LSEServices/IMT/</code><br />
+<code>guides/accounts/activateAccount.aspx</code> for instructions on how to activate your account. In case of any problems, please ask for assistance at the IT help desk (Library 1st floor).</p></li>
+<li><p>Various introductory <strong>documents</strong> can be accessed through the IMT services web pages at <code>http://www.lse.ac.uk/intranet/LSEServices/IMT/home.aspx</code>.</p></li>
+<li><p><strong>Logging in</strong> to use Windows: When you arrive at a networked computer, wait for Windows to start up (if the machine is not already on). Type in <strong>CTRL + ALT + Delete</strong> and the <strong>Enter Network Password</strong> screen will appear. Type in your username and your password and press <strong>Enter</strong> or click on the <strong>OK</strong> button. This will log you on to the computer.</p></li>
+</ul>
+</div>
+<div id="data-downloading" class="section level4 unnumbered">
+<h4>Data downloading</h4>
+<p>The instructions for each class will give the name of a file or files which will be used for that exercise. In order to do the class, you will need to download the file to your H: space (i.e. your personal file storage space on the LSE network, shown as disk drive H: on a networked computer once you have logged on). You can download all the data files for the course, as well as other course-related material, from the web-based <strong>Moodle</strong> system. See instructions in Chapter <a href="index.html#c-course-information">1</a> for how to register for MY451 on Moodle.</p>
+</div>
+</div>
+<div id="s-intro-SPSS" class="section level2">
+<h2><span class="header-section-number">7.2</span> Introduction to SPSS</h2>
+<div id="general-information-and-documentation" class="section level4 unnumbered">
+<h4>General information and documentation</h4>
+<p>SPSS (formerly Statistical Package for the Social Sciences) is a widely used general-purpose statistical software package. It will be used for all the computer classes on this course. The current version on the LSE network is SPSS 21. This section gives some general information on the structure and use of SPSS. The discussion is brief and not meant to be comprehensive. The instructions given here and in the descriptions of individual computer classes below will be sufficient for the purposes of this course. If, however, you wish to find out more about SPSS, more information and examples can be found in the SPSS help files and tutorials found under the <strong>Help</strong> menu of the program, and in introductory guide books such as</p>
+<p>Field, A. (2013). <em>Discovering Statistics using IBM SPSS Statistics</em> (4th ed). Sage. Kinnear, P. R. and Gray, C. D. (2012). <em>SPSS 19 Made Simple</em>. Psychology Press. Pallant, J. (2013). <em>SPSS Survival Manual</em> (5th ed). Open University Press.</p>
+<p>These are given here purely as examples (there are many others) and not as recommendations. We have not reviewed any of these books in detail and so cannot make any comparisons between them.</p>
+</div>
+<div id="starting-spss" class="section level4 unnumbered">
+<h4>Starting SPSS</h4>
+<p>To start SPSS, double-click on the SPSS icon on the Windows desktop. Alternatively, click on the <strong>Start</strong> button at the bottom left corner, and select <strong>All Programs</strong>, then <strong>Specialist and teaching software</strong>, <strong>Statistics</strong>, <strong>SPSS</strong>, and finally <strong>SPSS 21</strong> (or some obvious variant of these, in case the exact wording on your desktop is slightly different).</p>
+<p>An initial screen for opening data files appears. Click on <strong>Cancel</strong> to get rid of this and to enter the data editor (which will be discussed further below).</p>
+</div>
+<div id="exiting-from-spss" class="section level4 unnumbered">
+<h4>Exiting from SPSS</h4>
+<p>Select <strong>Exit</strong> from the <strong>File</strong> menu or click on the X at the upper right corner of the SPSS data editor window. You may then be prompted to save the information in the open windows; in particular, you should save the contents of the data editor in a file (see below) if you have made any changes to it.</p>
+</div>
+<div id="spss-windows" class="section level4 unnumbered">
+<h4>SPSS windows</h4>
+<p>There are several different types of windows in SPSS. The two most important are</p>
+<ul>
+<li><p><strong>Data editor</strong>: A data set is displayed in the Data Editor window. Several of these can be open at a time. The data editor which you have selected (clicked on) most recently defines the active data set, and the procedures you request from the menus are applied to this data set until you select a different active data set. The data editor window has two parts, accessed by clicking on the two tabs at the bottom of the window:</p>
+<ul>
+<li><p><strong>Data view</strong>, which shows the data matrix in the spreadsheet-like form discussed in Section <a href="c-intro.html#ss-intro-def-subj">2.2.1</a>, with units in the rows and variables in the columns.</p></li>
+<li><p><strong>Variable view</strong>, which shows information about the variables.</p></li>
+</ul>
+<p>Working with the data editor will be practised in the first computer class. The contents of the data editor, i.e.the data matrix and associated information, can be saved in an SPSS data file. Such files have names with the extension <strong>.sav</strong>.</p></li>
+<li><p><strong>Output viewer</strong>: Output from statistical analyses carried out on the data will appear here. The output can be printed directly from the viewer or copied and pasted to other programs. The contents of the viewer can also be saved in a file, with a name with the extension <strong>.spv</strong> (since version 17; in previous versions of SPSS the extension was <strong>.spo</strong>).</p></li>
+</ul>
+<p>There are also other windows, for example for editing SPSS graphs. They will be discussed in the instructions to individual computer classes where necessary.</p>
+</div>
+<div id="menus" class="section level4 unnumbered">
+<h4>Menus</h4>
+<p>SPSS has a menu-based interface, which can be used to access most of its features for statistical analysis, manipulation of data, loading, saving and printing files, and so on.</p>
+<ul>
+<li><p>The procedures for statistical analysis are found under the <strong>Analyze</strong> menu, which provides further drop-down menus for choosing different methods.</p>
+<ul>
+<li>Similarly, procedures for various statistical graphics are found under <strong>Graphs</strong>. We will be using procedures found under <strong>Graphs / Legacy Dialogs</strong>. Here “legacy” means that these are the graphics menus which were included also in previous versions of SPSS. The current version also contains a second, new set of menus for the same graphs, under <strong>Graphs / Chart Builder</strong>. We do not regard these as an improvement in usability, so we will continue to use the old menus. You are welcome to explore the cababilities of the “Chart Builder” on your own.</li>
+</ul></li>
+<li><p>Eventually the menu choices lead to a <strong>dialog box</strong> with various boxes and buttons for specifying details of the required analysis. Most dialog boxes contain buttons which open new dialog boxes for further options. The details of these choices for the methods covered on this course are described in the instructions to individual computer classes.</p></li>
+<li><p>Almost all of the dialog boxes have options which are not needed for our classes and never mentioned in the instructions. Some of these simply modify the output, others request variants of the statistical methods which will not be used in these classes. All such options have default values which can be left untouched here. You are, however, welcome to experiment with these additional choices to see what they do. Further information on them can be accessed through the <strong>Help</strong> button in each dialog box.</p></li>
+</ul>
+</div>
+<div id="notational-conventions-for-the-instructions" class="section level4 unnumbered">
+<h4>Notational conventions for the instructions</h4>
+<p>Because analyses in SPSS are carried out by making choices from the menus, the instructions for the computer classes need to describe these choices somehow. To reduce the length and tedium of the instructions, we will throughout present them in a particular format explained below. Because this information is rather abstract if read in isolation, it is best to go through it while carrying out specific instructions for the first few computer classes.</p>
+<ul>
+<li><p>The appropriate menu choices for obtaining the dialog box for the required analysis are first given in bold, for example as follows:</p>
+<p><strong>Analyze/Descriptive statistics/Frequencies</strong></p>
+<p>This is short for “Click on the menu item <strong>Analyze</strong> at the top of the window; from the drop-down menu, select <strong>Descriptive statistics</strong> and then click on <strong>Frequencies</strong>.” This particular choice opens a dialog box for constructing various descriptive statistics and graphs (as discussed in Chapter <a href="c-descr1.html#c-descr1">3</a>).</p>
+<p>Unless otherwise mentioned, subsequent instructions then refer to choices in the most recently opened dialog box, without repeating the full path to it.</p></li>
+<li><p>For all of the statistical analyses, we need first to specify which variables the analyses should be applied to. This is done by entering the names of those variables in appropriate boxes in the dialog boxes. For example, the dialog box opened above has a box labelled <strong>Variable(s)</strong> for this purpose. The dialog box also includes a separate box containing a list of all the variables in the data set. The required variables are selected from this list and moved to the choice boxes (and back again, when choices are changed) by clicking on an arrow button between the boxes. For example, suppose that a data set contains a grouped age variable called <em>AGEGROUP</em>, for which we want to construct a frequency table. The class instructions may then state in words “Place <em>AGEGROUP</em> in the <strong>Variable(s)</strong> box”, or sometimes just</p>
+<p><strong>Variable(s)/</strong><em>AGEGROUP</em></p>
+<p>both of which are short for “In the dialog box opened above, click on the name <em>AGEGROUP</em> in the list of variables, and then click on the arrow button to move the name into the <strong>Variable(s)</strong> box”. Sometimes we may also use a generic instruction of the form</p>
+<p><strong>Variable(s)/</strong><em><span class="math inline">\(&lt;\)</span>Variables<span class="math inline">\(&gt;\)</span></em></p>
+<p>where <em><span class="math inline">\(&lt;\)</span>Variables<span class="math inline">\(&gt;\)</span></em> indicates that this is where we would put the name of any variables for which we want to obtain a frequency table. Note that here and in many other procedures, it is possible to select several variables at once. For the Frequencies procedure used as an example here, this simply means that a separate frequency table is constructed for each selected variable.</p></li>
+<li><p>Other choices in a dialog box determine details of the analysis and its output. In most cases the selection is made from a fixed list of possibilities provided by SPSS, by clicking on the appropriate box or button. In the instructions, the choice is indicated by listing a path to it, for example as</p>
+<p><strong>Charts/Chart Type/Bar charts</strong></p>
+<p>in the above example (this requests the so-called bar chart). The items on such a list are labels for various items in the dialog boxes. For example, here <strong>Charts</strong> is a button which opens a new subsidiary dialog box, <strong>Chart Type</strong> is the title of a list of options in this new dialog box, and <strong>Bar charts</strong> is the choice we want to select. In other words, the above instruction is short for “In the dialog box opened above, click on the button <strong>Charts</strong> to open a new dialog box. Under <strong>Chart type</strong>, select <strong>Bar charts</strong> by clicking on a button next to it.”</p></li>
+<li><p>Some choices need to be made by typing in some information rather than selecting from a list of options. Specific instructions for this will be given when needed.</p></li>
+<li><p>After choices are made in subsidiary dialog boxes, we return to the main dialog box by clicking on <strong>Continue</strong>. Once all the required choices have been made, the analysis is executed by clicking on <strong>OK</strong> in the main dialog box. This should be reasonably obvious, so we will omit explicit instructions to do so.</p></li>
+</ul>
+<p>A useful feature of SPSS is the <strong>dialog recall</strong> button, which is typically sixth from the left in the top row of buttons in the Output viewer window; the button shows a rectangle with a green arrow pointing down from it. Clicking on this gives a menu of recently used procedures, and choosing one of these brings up the relevant dialog box, with the previously used choices selected. This is useful when you want to rerun a procedure, e.g. to try different choices for its options. It is usually quicker to reopen a dialog box using the dialog recall button than through the menus.</p>
+</div>
+<div id="spss-session-options" class="section level4 unnumbered">
+<h4>SPSS session options</h4>
+<p>Various options for controlling the format of SPSS output and other features can be found under <strong>Edit/Options</strong>. For example, an often useful choice is <strong>General/Variable Lists/Display names</strong>. This instructs SPSS to display the names of variables in the variable lists of all procedures, instead of the (typically much longer) descriptive labels of the variables. In large data sets this may make it easier to find the right variables from the list. This may be further helped by selecting <strong>General/Variable Lists/Alphabetical</strong>, which causes the names to be listed in an alphabetical order rather than the order in which the variables are included in the data set.</p>
+</div>
+<div id="printing-from-spss" class="section level4 unnumbered">
+<h4>Printing from SPSS</h4>
+<p>All the computers in the public rooms are connected to one of the laser printers. When you print a document or a part of it, you need to have credit on your printing account. See  further information.</p>
+<ul>
+<li><p>You can print your results from the Output Viewer either by selecting <strong>File/Print</strong> or by clicking on Print on the toolbar (the button with a little picture of a printer). Please note that SPSS output is often quite long, so this may result in much more printout than you really want.</p></li>
+<li><p>Alternatively, in the Output Viewer, select the objects to be printed, select <strong>Edit / Copy</strong>, open a Word or Excel document and <strong>Paste</strong>. You can make any changes or corrections in this document before printing it. This method gives you more control over what gets printed than printing directly from SPSS.</p></li>
+<li><p>At the printer terminal, type in your username and password. The files sent for printing are then listed. Select the appropriate file number and follow the instructions given by the computer.</p></li>
+</ul>
+</div>
+<div id="spss-control-language" class="section level4 unnumbered">
+<h4>SPSS control language</h4>
+<p>Early versions of SPSS had no menu-based interface. Instead, commands were executed by specifying them in SPSS command language. This language is still there, underlying the menus, and each choice of commands and options from the menus can also be specified in the control language. We will not use this approach on this course, so you can ignore this section if you wish. However, there are some very good reasons why you might want to learn about the control language if you need to work with SPSS for, say, analyses for your thesis or dissertation:</p>
+<ul>
+<li><p>Because the control language commands can be saved in a file, they preserve a record of how an analysis was done. This may be important for checking that there were no errors, and for rerunning the analyses later if needed.</p></li>
+<li><p>For repetitive analyses, modifying and rerunning commands in the control language is quicker and less tedious than using the menus repeatedly.</p></li>
+<li><p>Some advanced SPSS procedures are not included in the menus, and can only be accessed through the control language.</p></li>
+</ul>
+<p>The main cost of using the control language is learning its syntax. This is initially much harder than using the menus, but becomes easier with experience. The easiest way to begin learning the syntax is to request SPSS to print out the commands corresponding to choices made from the menus. Two easy ways of doing so are</p>
+<ul>
+<li><p>Selecting the session option (i.e. under <strong>Edit/Options</strong>) <strong>Viewer/Display commands in the log</strong>. This causes the commands corresponding to the menu choices to be displayed in the output window.</p></li>
+<li><p>Clicking on the <strong>Paste</strong> button in a dialog box (instead of <strong>OK</strong>) after selecting an analysis. This opens a <em>Syntax window</em> where the corresponding commands are now displayed. The commands in a syntax window can be edited and executed, and also saved in a file (with the extension <strong>.sps</strong>) for future use.</p></li>
+</ul>
+
+</div>
+</div>
+<div id="p-class1" class="section level2">
+<h2><span class="header-section-number">7.3</span> WEEK 2 class: Descriptive statistics for categorical data, and entering data</h2>
+<div id="data-set" class="section level4 unnumbered">
+<h4>Data set</h4>
+<p>The data file <strong>ESS5_sample.sav</strong> will be used today. It contains a simplified sample of data from UK respondents in the 2010 European Social Survey (Round 5). The questions in the survey that you see here were designed By Dr Jonathan Jackson and his team as part of a module investigating public trust in the criminal justice system. Further information about the study can be found at<br />
+<code>www.lse.ac.uk/methodology/whosWho/Jackson/jackson_ESS.aspx</code><a href="#fn16" class="footnoteRef" id="fnref16"><sup>16</sup></a></p>
+<p>The main purpose of today’s class is to introduce you to the layout of SPSS and to show you how to produce some basic tables and graphs for categorical variables. Additionally, we provide instructions on how to enter data into a new SPSS data file, using the Data Editor. This exercise is not strictly needed for the course, but we include it for two purposes. Firstly, students often find this a helpful way of learning how the software works. Secondly, this exercise may be a useful introduction for students who go on to collect or collate data for their own empirical research.</p>
+</div>
+<div id="classwork" class="section level4 unnumbered">
+<h4>Classwork</h4>
+</div>
+<div id="part-1-the-layout-of-an-spss-data-file" class="section level4 unnumbered">
+<h4>Part 1: The layout of an SPSS data file</h4>
+<ol style="list-style-type: decimal">
+<li><p><strong>Opening an SPSS data file</strong>: this is done from <strong>File/Open/Data</strong>, selecting the required file from whichever folder it is saved in in the usual Windows way. Do this to open ESS5_sample.sav.</p></li>
+<li><p><strong>Information in the Variable View window.</strong> The data file is now displayed in the Data Editor. Its Data View window shows the data as a spreadsheet (i.e. a data matrix). We will first consider the information in the Variable View window, accessed by clicking on the Variable View tab at the bottom left corner of the window. The columns of this window show various pieces of information about the variables. Take a little while familiarising yourself with them. The most important of the columns in Variable View are</p>
+<ul>
+<li><p><strong>Name</strong> of the variable in the SPSS data file. The names in this column (also shown as the column headings in Data View) will be used to refer to specific variables in all of the instructions for these computer classes.</p></li>
+<li><p><strong>Type</strong> of the variable. Here most of the variables are <em>Numeric</em>, i.e. numbers, and a few are <em>String</em>, which means text. Clicking on the entry for a variable in this column and then on the button (with three dots on it) revealed by this shows a list of other possibilities.</p></li>
+<li><p><strong>Width</strong> and <strong>Decimals</strong> control the total number of digits and the number of decimal places displayed in Data View. Clicking on an entry in these columns reveals buttons which can be used to increase or decrease these values. Here all but two of the numeric variables are coded as whole numbers, so Decimals has been set to 0 for them.</p></li>
+<li><p><strong>Label</strong> is used to enter a longer description of the variable. Double-clicking on an entry allows you to edit the text.</p></li>
+<li><p><strong>Values</strong> shows labels for individual values of a variable. This is mostly relevant for categorical variables, such as most of the ones in these data. Such variables are coded in the data set as numbers, and the Values entry maintains a record of the meanings of the categories the numbers correspond to. You can see examples of this by clicking on some of the entries in the Values column and then on the resulting button. The value labels can also be displayed for each observation in Data View by selecting <strong>View/Value Labels</strong> in that window.</p></li>
+<li><p><strong>Missing</strong> specifies <em>missing data codes</em>, i.e. values which are not actual measurements but indicators that an observation should be treated as missing. There may be several such codes. For example, variables in these data often have separate missing data codes for cases where a respondent was never asked a question (“Not applicable”, often abbreviated NAP), replied “Don’t know” (DK) or otherwise failed to provide an answer (“Refusal” or “No answer”; NA); the explanations of these values are found in the Values column. An alternative to using missing data codes (so-called <em>User missing</em> values) is to enter no value (a <em>System missing</em> value) for an observation in the data matrix. This is displayed as a full stop (.) in Data View. There are no such values in these data.</p></li>
+<li><p><strong>Measure</strong> indicates the measurement level of a variable, as <em>Nominal</em>, <em>Ordinal</em> or <em>Scale</em> (meaning interval). This is mostly for the user’s information, as SPSS makes little use of this specification.</p></li>
+</ul></li>
+<li><p>Any changes made to the data file are preserved by saving it again from <strong>File/Save</strong> (or by clicking on the Save File button of the toolbar, which the one with the picture of a diskette). You will also be prompted to do so when exiting SPSS or when trying to open a new data file. Today you should not save any changes you may have made to ESS5_sample.sav, so click <strong>No</strong> if prompted to do so below.</p></li>
+</ol>
+</div>
+<div id="part-2-descriptive-statistics-for-categorical-variables" class="section level4 unnumbered">
+<h4>Part 2: Descriptive statistics for categorical variables</h4>
+<p>Most of the statistics required for this class are found in SPSS under <strong>Analyze/Descriptive Statistics/Frequencies</strong> as follows:</p>
+<ul>
+<li><p>Names of the variables for which the statistics are requested are placed in the <strong>Variable(s)</strong> box. To make it easy to find variables in the list box on the left, you may find it convenient to change the way the variables are displayed in the list; see under “SPSS Session Options in Section <a href="c-class0.html#s-intro-SPSS">7.2</a> for instructions.</p></li>
+<li><p>Tables of frequencies: select <strong>Display frequency tables</strong></p></li>
+<li><p>Bar charts: <strong>Charts/Chart Type/Bar charts</strong>. Note that under <strong>Chart Values</strong> you can choose between frequencies or percentage labels on the vertical axis.</p></li>
+<li><p>Pie charts: <strong>Charts/Chart Type/Pie charts</strong></p></li>
+</ul>
+<p>In addition, we will construct some two-way tables or cross-tabulations, by selecting <strong>Analyze/Descriptive Statistics/Crosstabs</strong>. In the dialog box that opens, request a contingency table between two variables by entering</p>
+<ul>
+<li><p>The name of the row variable into the <strong>Row(s)</strong> box, and</p></li>
+<li><p>The name of the column variable into the <strong>Column(s)</strong> box.</p></li>
+<li><p><strong>Cells/Percentages</strong> for percentages within the table: <strong>Row</strong> gives percentages within each row (i.e. frequencies divided by row totals), <strong>Column</strong> percentages within columns, and <strong>Total</strong> percentages out of the total sample size.</p></li>
+</ul>
+<p>The labels in the SPSS output should be self-explanatory. Note that in this and all subsequent classes, the output may also include some entries corresponding to methods and statistics not discussed on this course. They can be ignored here.</p>
+<ol style="list-style-type: decimal">
+<li><p>The first variable in the data set, GOODJOB, asks respondents whether they generally feel that the police are doing a good job in their country. There are three response categories for this item: “a good job”, “neither a good job nor a bad job”, or “a bad job”. Obtain a frequency table and bar chart to investigate the distribution of responses to this question.</p>
+<p>Check that you understand how to interpret the output you obtain. In particular, make sure that you understand the information displayed in each of the columns in the main table, and that you can see the connection between the information in the table and the information represented in the bar chart.</p></li>
+<li><p>The last variable in the set, AGE_GRP, records in which of the following age groups each respondent falls: up to 29 years of age, 30-49, or 50+ years. Let us consider the association between age group and opinions of the police. Obtain a two-way contingency table of GOODJOB by AGE_GRP. To make interpretation easier, request percentages within each of the age groups. If you use AGE_GRP as the row variable, then include row percentages in the output.</p>
+<p>Interpret the resulting table. Are opinions of the police distributed differently among the three different age groups? Does there appear to be an association between age group and attitude?</p></li>
+<li><p>If you have time after completing the data entry exercise (below), you may wish to return to this data set and explore frequencies and contingency tables for some of the other variables in the set.</p></li>
+</ol>
+
+</div>
+<div id="part-3-entering-data-directly-into-data-editor" class="section level4 unnumbered">
+<h4>Part 3: Entering data directly into Data Editor</h4>
+<p>This procedure may be useful to know if the data you are analysing are not in any electronic form at the beginning of the analysis, for example if you start with a pile of filled-in questionnaires from a survey. For practice, we will enter the following small, artificial data set:</p>
+<p>Sex: Man; Age: 45; Weight: 11 st 3 lbs<br />
+Sex: Woman; Age: 68; Weight: 8 st 2 lbs<br />
+Sex: Woman; Age: 17; Weight: 9 st 6 lbs<br />
+Sex: Man; Age: 28; Weight: 13 st 8 lbs<br />
+Sex: Woman; Age: 16; Weight: 7 st 8lbs</p>
+<ol style="list-style-type: decimal">
+<li><p>Select <strong>File/New/Data</strong> to clear the Data Editor. Go to Variable View and enter into the first four rows of the Name column names for the variables, for example <em>sex</em>, <em>age</em>, <em>wstones</em> and <em>wpounds</em>.</p></li>
+<li><p>Switch to Data View and type the data above into the appropriate columns, one unit (respondent) per row. Note that the person’s weight is entered into two columns, one for stones and one for pounds. Enter sex using numerical codes, e.g. 1 for women and 2 for men.</p></li>
+<li><p>Save the file as a new SPSS data file (<strong>File/Save as</strong>), giving it a name of your choice. You should also resave the file (from <strong>File/Save</strong> or by clicking the File Save button) after each of the changes made below.</p></li>
+<li><p>Practise modifying the information in Variable View by adding the following information for the sex variable:</p>
+<ul>
+<li><p>Enter the label <em>Sex of the respondent</em> into the Label column.</p></li>
+<li><p>Click on the Values cell and then on the resulting button to open a dialog box for entering value labels. Enter <strong>Value:</strong> <em>1</em>; <strong>Value Label</strong>: <em>Woman</em>; <strong>Add</strong>. Repeat for men, and click <strong>OK</strong> when finished.</p></li>
+</ul></li>
+<li><p><strong>Transforming variables</strong>: It is often necessary to derive new variables from existing ones. We will practise the two most common examples of this:</p>
+<ol style="list-style-type: decimal">
+<li><p><strong>Creating a grouped variable</strong>: Suppose, for example, that we want to define a grouped age variable with three categories: less than 25 years, 25–54 and 55 or over. This is done as follows:</p>
+<ul>
+<li><p>Select <strong>Transform/Recode into Different Variables</strong>. This opens a dialog box which is used to define the rule for how values of the existing variable are to be grouped into categories of the new one.</p></li>
+<li><p>Move the name of the age variable to the <strong>Input Variable –<span class="math inline">\(&gt;\)</span> Output Variable</strong> box.</p></li>
+<li><p>Under <strong>Output Variable</strong>, enter the <strong>Name</strong> of the new variable, for example <em>agegroup</em>, and click <strong>Change</strong>.</p></li>
+<li><p>Click on <strong>Old and New Values</strong>. Enter <strong>Old Value/Range: Lowest through</strong> <em>24</em> and <strong>New Value/Value:</strong> <em>1</em>, and click <strong>Add</strong>.</p></li>
+<li><p>Repeat for the other two categories, selecting <strong>Range:</strong> <em>25</em> <strong>through</strong> <strong>54</strong> and <strong>Range:</strong> <em>55</em> <strong>through highest</strong> for <strong>Old value</strong>, and <em>2</em> and <em>3</em> respectively for <strong>New value</strong>.</p></li>
+<li><p>You should now see the correct grouping instructions in the <strong>Old –<span class="math inline">\(\mathbf{&gt;}\)</span> New</strong> box. Click <strong>Continue</strong> and <strong>OK</strong> to create the new variable.</p></li>
+<li><p>Check the new variable in Data View. At this stage you should normally enter in Variable View the value labels of the age groups.</p></li>
+</ul></li>
+<li><p><strong>Calculations on variables</strong>: Some new variables are obtained through mathematical calculations on existing ones. For example, suppose we want to include weight in kilograms as well as stones and pounds. Using the information that one stone is equal to 6.35 kg and one pound is about 0.45 kg, the transformation is carried out as follows:</p>
+<ul>
+<li><p>Select <strong>Transform/Compute Variable</strong>. This opens a dialog box which is used to define the rule for calculating the values of the new variable.</p></li>
+<li><p>Enter <strong>Target variable:</strong> <em>weightkg</em> (for example; this is the name of the new variable) and <strong>Numeric Expression:</strong> <em>6.35 * wstones + 0.45 * wpounds</em>; for the latter, you can either type in the formula or use the variable list and calculator buttons in a fairly obvious way.</p></li>
+</ul></li>
+</ol></li>
+</ol>
+</div>
+<div id="week-2-homework" class="section level4 unnumbered">
+<h4>WEEK 2 HOMEWORK</h4>
+<p>The homework exercise for this week is to complete the multiple choice quiz which you can find in the Moodle resource for MY451. Answers to the questions are also included there, including feedback on why the incorrect answer are incorrect. The first part of the quiz asks for answers to the class exercise, and the second part asks you to identify the level of measurement of some different variables.</p>
+<p></p>
+</div>
+</div>
+<div id="week-3-class-descriptive-statistics-for-continuous-variables" class="section level2">
+<h2><span class="header-section-number">7.4</span> WEEK 3 class:<br />
+Descriptive statistics for continuous variables</h2>
+<p><strong>Data set:</strong> The data file used today is <strong>london-borough-profiles.sav</strong>. It contains a selection of data on the 33 London boroughs obtained from the <em>London Datastore</em>, which publishes a range of statistical data about the city, collated by the Greater London Authority’s <em>GLA Intelligence Unit</em>.<a href="#fn17" class="footnoteRef" id="fnref17"><sup>17</sup></a></p>
+<div id="descriptive-statistics-in-spss" class="section level4 unnumbered">
+<h4>Descriptive statistics in SPSS</h4>
+<p>This week you will produce and examine descriptive statistics for a number of individual variables. As for last week, almost all of the statistics required for this class can be obtained in SPSS under <strong>Analyze/Descriptive Statistics/Frequencies</strong>. Note that you will probably not find the tables of frequencies very useful, because continuous variables can take so many different values. So for this class, uncheck the <strong>Display frequency tables</strong> option in the dialog box.</p>
+<ul>
+<li><p>Measures of central tendency: <strong>Mean</strong>, <strong>Median</strong> and <strong>Mode</strong> under <strong>Statistics / Central Tendency</strong></p></li>
+<li><p>Measures of variation: <strong>Range</strong>, <strong>Std. deviation</strong> and <strong>Variance</strong> under <strong>Statistics/Dispersion</strong>. For the Interquartile range, select <strong>Statistics/ Percentile values/Quartiles</strong> and calculate by hand the difference between the third and first quartiles given (as Percentiles 75 and 25 respectively) in the output.</p></li>
+<li><p>Histograms: <strong>Charts/Chart Type/Histograms</strong></p></li>
+</ul>
+<p>Two charts needed today are not found under the Frequencies procedure:</p>
+<ul>
+<li><p><strong>Stem and leaf plots</strong>, which are obtained from <strong>Analyze/Descriptive Statistics/Explore</strong> by entering variable(s) under <strong>Dependent list</strong> and selecting <strong>Display/Plots</strong> and <strong>Plots/Descriptive/Stem-and-leaf</strong>. You can place more than one variable under the <strong>Dependent list</strong> in order to compare variables.</p></li>
+<li><p><strong>Box plots</strong> are also automatically generated through this dialog box, regardless of whether you want to see them! So this is the simplest way to produce them.</p></li>
+</ul>
+<p>Most of these statistics and charts can be obtained in other ways as well, for example from <strong>Analyze/ Descriptive Statistics/Descriptives</strong> or <strong>Graphs/Legacy Dialogs/Histogram</strong>, or <strong>Graphs/Legacy Dialogs/Boxplot</strong>, but we will not use these alternatives today. Feel free to investigate them in your own time if you wish.</p>
+<p>The labels in the SPSS output should be self-explanatory. Note that in this and all subsequent classes, the output may also include some entries corresponding to methods and statistics not discussed on this course. They can be ignored here.</p>
+</div>
+<div id="classwork-1" class="section level4 unnumbered">
+<h4>Classwork</h4>
+<ol style="list-style-type: decimal">
+<li><p>The variable <em>YOUTH_DEPRIVATION</em> records for each borough the percentage of children who live in out-of-work families. This is an indicator of deprivation, with higher values indicating a worse situation for each borough. Investigate the distribution of this variable across London boroughs by obtaining its mean, median, minimum and maximum, quartiles and standard deviation, and a histogram. Obtain also a stem and leaf plot and a box plot. Note that double-clicking on a histogram (or any other SPSS graph) opens it in a new window, where the graph can be further edited by changing titles, colours etc. The graph can also be exported from SPSS into other software. Check that you understand how to find the measures of central tendency and dispersion from the output. Does the distribution of YOUTH_DEPRIVATION appear to be symmetrically distributed or skewed?</p></li>
+<li><p>Consider now the variable <em>CRIME</em>, which records the numbers of reported crimes for every 1000 inhabitants, over the years 2011-12. Obtain some summary descriptive statistics, a histogram and a box plot for this variable. Is the distribution of the variable symmetric or skewed to the left or right? <em>CRIME</em> is one of many variables in this data set which have outliers, i.e. boroughs with unusually large or small values of the variable. Normally statistical analysis focuses on the whole data rather than individual observations, but the identities of individual outliers are often also of interest. The outliers can be seen most easily in the box plots, where SPSS labels them with their case numbers, so that you can identify them easily in the data set. For example, 1 would indicate the 1st case in the data set. If you click on to the Data View you can see that this 1st case is the City of London. Which borough is the outlier for CRIME?</p></li>
+</ol>
+</div>
+<div id="homework-1" class="section level4 unnumbered">
+<h4>HOMEWORK</h4>
+<p>For the questions below, select the relevant SPSS output to include in your homework and write brief answers to the specific questions. Remember SPSS produces some outputs that you do not need. Feel free to transcribe tables or modify charts if you wish to improve their presentation.</p>
+<ol style="list-style-type: decimal">
+<li><p>The variable <em>VOTING</em> records voter turnout in a borough, specifically the percentage of eligible voters who voted in the local elections in 2010. Obtain descriptive statistics, a histogram and a box plot for this variable. What is the range of the variable, and what is its inter-quartile range? Are there any outliers? Is the distribution of voter turnout symmetrical or skewed? How you can you tell?</p></li>
+<li><p>In the data set employment rates are given overall, but also separately for males and females. The employment rate is the percentage of working age population who are in employment. Compare and contrast male and female employment rates across the boroughs, using the variables <em>MALE_EMPLOYMENT</em> and <em>FEMALE_EMPLOYMENT</em>. Comment on the differences and/or similarities in their descriptive statistics: minimum and maximum, mean, median and standard deviation. Obtain histograms for these two variables. Are the distributions of male employment and female employment symmetrical or skewed?</p></li>
+</ol>
+
+</div>
+</div>
+<div id="week-4-class-two-way-contingency-tables" class="section level2">
+<h2><span class="header-section-number">7.5</span> WEEK 4 class: Two-way contingency tables</h2>
+<p><strong>Data set</strong>: The data file used today is <strong>GSS2010.sav</strong>. It contains a selection of variables on attitudes and demographic characteristics for 2044 respondents in the 2010 U.S. General Social Survey (GSS)<a href="#fn18" class="footnoteRef" id="fnref18"><sup>18</sup></a>. The full data set contains 790 variables. For convenience the version you are analysing today contains just a selection of those items.</p>
+<div id="analysing-two-way-contingency-tables-in-spss" class="section level4 unnumbered">
+<h4>Analysing two-way contingency tables in SPSS</h4>
+<p>All of the analyses needed for this week’s class are found under <strong>Analyze/Descriptive Statistics/Crosstabs</strong>. We will be obtaining contingency tables between two variables, as in Week 2 class, with the following commands:</p>
+<ul>
+<li><p>The name of the row variable into the <strong>Row(s)</strong> box, and</p></li>
+<li><p>The name of the column variable into the <strong>Column(s)</strong> box.</p></li>
+<li><p><strong>Cells/Percentages</strong> for percentages within the table: <strong>Row</strong> gives percentages within each row (i.e. frequencies divided by row totals), <strong>Column</strong> percentages within columns, and <strong>Total</strong> percentages out of the total sample size.</p></li>
+</ul>
+<p>The only additional output we will need today is obtained by selecting</p>
+<ul>
+<li><p><strong>Statistics/Chi-square</strong> for the <span class="math inline">\(\chi^{2}\)</span> test of independence</p></li>
+<li><p>(If you are interested in the <span class="math inline">\(\gamma\)</span> measure of association for ordinal variables, outlined in the coursepack, you may obtain it using <strong>Statistics/Ordinal/Gamma</strong>. In the output the <span class="math inline">\(\gamma\)</span> statistic is shown in the “Symmetric measures” table in the “Value” column for “Gamma”. We will not use this measure today, but feel free to ask if you are interested in it.)</p></li>
+</ul>
+</div>
+<div id="classwork-2" class="section level4 unnumbered">
+<h4>Classwork</h4>
+<p>Suppose we want to use the GSS data to investigate whether in the U.S. population sex and age are associated with attitudes towards women’s roles. The respondent’s sex is included in the data as the variable <em>SEX</em>, and age as <em>AGEGROUP</em> in three groups: 18-34, 35-54, and 55 or over. The three attitude variables we consider are</p>
+<ul>
+<li><p><em>FEFAM</em>: Level of agreement with the following statement: “It is much better for everyone involved if the man is the achiever outside the home and the woman takes care of the home and family”. Available response options are Strongly agree, Agree, Disagree, and Strongly disagree.</p></li>
+<li><p>FEPOL: Level of agreement with the following statement: “Most men are better suited emotionally for politics than are most women”. Available response options are: Agree and Disagree.</p></li>
+<li><p><em>FEPRES</em>: Response to the following statement: “If your party nominated a woman for President, would you vote for her if she were qualified for the job?” Available response options are Yes and No.</p></li>
+</ul>
+<ol style="list-style-type: decimal">
+<li><p>Consider first the association between sex and attitude towards male and female work roles, by constructing a contingency table between <em>SEX</em> and <em>FEFAM</em>. To make interpretation of the results easier, include also appropriate percentages. Here it makes most sense to treat sex as an explanatory variable for attitude, so we want to examine percentages of attitudes within categories of male and female. If you use <em>SEX</em> as the row variable, this means including the Row percentages in the output. Request also the <span class="math inline">\(\chi^{2}\)</span>-test statistic. In SPSS output, results for the <span class="math inline">\(\chi^{2}\)</span> test are given below the two-way table itself in a table labelled “Chi-Square Tests”, in the row “Pearson Chi-Square”. The test statistic itself is given under “Value” and its <span class="math inline">\(P\)</span>-value under “Asymp. Sig. (2-sided)”. By considering the <span class="math inline">\(\chi^{2}\)</span> test statistic and its <span class="math inline">\(P\)</span>-value, do you think there is enough evidence to conclude that males and females differ in their views on male and female work roles? If there is, how would you describe the association?</p></li>
+<li><p>Consider now the association between age and attitude towards male and female work roles, by constructing a table between <em>AGEGROUP</em> and <em>FEFAM</em>. Interpret the results, and compare them to your findings in Exercise 1.</p></li>
+<li><p>Examine differences between men and women in their views about women’s suitability for politics, using a table between <em>SEX</em> and <em>FEPOL</em>. Interpret the results. (Note: ignore the last two columns of the <span class="math inline">\(\chi^{2}\)</span> test output, labelled ‘Exact Sig. (2-sided)’’ and ‘Exact Sig. (1-sided)’’, and use the result under “Asymp. Sig. (2-sided)” as in the other tables.)</p></li>
+</ol>
+</div>
+<div id="homework-2" class="section level4 unnumbered">
+<h4>HOMEWORK</h4>
+<ol style="list-style-type: decimal">
+<li><p>What is the null hypothesis for the <span class="math inline">\(\chi^{2}\)</span> test that you carried out in analysis 2 in the class, for the table of <em>AGEGROUP</em> by <em>FEFAM</em>?</p></li>
+<li><p>State the <span class="math inline">\(\chi^{2}\)</span> test statistic, degrees of freedom and <span class="math inline">\(P\)</span>-value for this table, and interpret these results.</p></li>
+<li><p>Interpret the table of percentages to describe the nature of the association between <em>AGEGROUP</em> and <em>FEFAM</em>.</p></li>
+<li><p>Consider now the association between age and attitude towards voting for a female President, by constructing a table between <em>AGEGROUP</em> and <em>FEPRES</em>. In the population, do people in different age groups differ in their willingness to vote for a female President? Interpret the results of the <span class="math inline">\(\chi^{2}\)</span> test and illustrate your answer with one or two percentages from the two-way table.</p></li>
+</ol>
+
+</div>
+</div>
+<div id="week-5-class-inference-for-two-population-means" class="section level2">
+<h2><span class="header-section-number">7.6</span> WEEK 5 class: Inference for two population means</h2>
+<p><strong>Data set</strong>: The data file used today is <strong>ESS5_GBFR.sav</strong>. It contains data for a selection of variables from the 2010 European Social Survey for respondents in Great Britain and France<a href="#fn19" class="footnoteRef" id="fnref19"><sup>19</sup></a>. Only a few of the variables are used in the exercises; the rest are included in the data set as examples of the kinds of information obtained from this survey.</p>
+<p><strong>Two-sample inference for means in SPSS</strong></p>
+<ul>
+<li><p><span class="math inline">\(t\)</span>-tests and confidence intervals for two independent samples for inference on the difference of the population means: <strong>Analyze/Compare Means/Independent-Samples T Test</strong>. The variable of interest <span class="math inline">\(Y\)</span> is placed under <strong>Test Variable(s)</strong> and the explanatory variable <span class="math inline">\(X\)</span> under <strong>Grouping Variable</strong>. The values of <span class="math inline">\(X\)</span> identifying the two groups being compared are defined under <strong>Define Groups</strong>.</p></li>
+<li><p><em>Box plots</em> for descriptive purposes are obtained from <strong>Analyze/Descriptive Statistics/Explore</strong>. Here we want to draw side-by-side box plots for values of a response variable <span class="math inline">\(Y\)</span>, one plot for each distinct value of an explanatory variable <span class="math inline">\(X\)</span>. The name of <span class="math inline">\(Y\)</span> is placed under <strong>Dependent List</strong> and that of <span class="math inline">\(X\)</span> under <strong>Factor List</strong>. Box plots are obtained by selecting <strong>Plots/Boxplots/Factor levels together</strong>.</p></li>
+<li><p>Tests and confidence intervals for single means (c.f. Section <a href="#s-means-1sample"><strong>??</strong></a>) are not considered today. These are obtained from <strong>Analyze/Compare Means/One-Sample T Test</strong>. They can also be used to carry out inference for comparisons of means between two <em>dependent</em> samples (c.f. Section <a href="#s-means-dependent"><strong>??</strong></a>).</p></li>
+</ul>
+<p><strong>Classwork</strong><br />
+Consider the survey data in the file <em>ESS5_GBFR.sav</em>. We will examine two variables, and carry out statistical inference to compare their means among the survey populations of adults in Great Britain and France<a href="#fn20" class="footnoteRef" id="fnref20"><sup>20</sup></a>.</p>
+<ol style="list-style-type: decimal">
+<li><p>The variable <em>WKHTOT</em> shows the number of hours per week the respondent normally works in his or her main job. Obtain box plots and descriptive statistics for this variable separately for each country (identified by the variable <em>CNTRY</em>). Compare measures of central tendency and variation for <em>WKHTOT</em> between the two countries. What do you observe?</p></li>
+<li><p>Obtain a <span class="math inline">\(t\)</span>-test and confidence interval for the difference of weekly working hours between Britain and France (specify the values of the country variable as <strong>Define Groups/Group 1:</strong> <em>GB</em> and <strong>Group 2:</strong> <em>FR</em> as coded in the data). Details of SPSS output for this are explained in Chapter <a href="#c-means"><strong>??</strong></a>; you can use the results under the assumption of equal population variances. What do you conclude? Is there a statistically significant difference in the average values of <em>WKHTOT</em> between the two countries? What does the confidence interval suggest about the size of the difference?</p></li>
+<li><p>The variable <em>STFJBOT</em> asks those in paid work, “How satisfied are you with the balance between the time you spend on your paid work and the time you spend on other aspects of your life?”. Respondents are asked to rate their level of satisfaction on a scale from 0-10, where 0 means “Extremely dissatisfied” and 10 means “Extremely satisfied”. Repeat exercises 1 and 2 for this variable, and compare also histograms of <em>STFJBOT</em> for each country. What do you observe?</p></li>
+</ol>
+<p><strong>HOMEWORK</strong></p>
+<ol style="list-style-type: decimal">
+<li><p>Write up your answers to the second class exercise, answering these specific questions:</p>
+<ol style="list-style-type: decimal">
+<li><p>What are the observed sample means for <em>WKHTOT</em> for French and British respondents?</p></li>
+<li><p>Is there a statistically significant difference in the average values of <em>WKHTOT</em> between the two countries? State the value of the test statistic and its corresponding <span class="math inline">\(P\)</span>-value. You may assume equal population variances for this test.</p></li>
+<li><p>Interpret the 95% confidence interval for the difference.</p></li>
+</ol></li>
+<li><p>The variable <em>WKHSCH</em> asks respondents, “How many hours a week, if any, would you choose to work, bearing in mind that your earnings would go up or down according to how many hours you work?”. Is there a statistically significant difference between ideal (rather than actual) work hours for French and British respondents? Carry out a t-test and report and interpret the results.</p></li>
+<li><p>The variable <em>STFMJOB</em> asks respondents, “How satisfied are you in your main job?”. Respondents are asked to rate their level of satisfaction on a scale from 0-10, where 0 means “Extremely dissatisfied” and 10 means “Extremely satisfied”. Is there a statistically significant difference, at the 5% level of significance, between mean levels of job satisfaction for French and British respondents? Answer this question by using the 95% confidence interval for the difference in means (you need the full t-test output to obtain the confidence interval, but you need not report the results of the t-test itself for this question).</p></li>
+</ol>
+
+</div>
+<div id="week-7-class-inference-for-population-proportions" class="section level2">
+<h2><span class="header-section-number">7.7</span> WEEK 7 class: Inference for population proportions</h2>
+<p><strong>Data sets</strong>: Files <strong>BES2010post_lastdebate.sav</strong> and <strong>BES2010pre_lastdebate.sav</strong>.</p>
+<p><strong>Inference on proportions in SPSS</strong></p>
+<ul>
+<li><p>SPSS menus do not provide procedures for calculating the tests and confidence intervals for proportions discussed in Chapter <a href="#c-probs"><strong>??</strong></a>. This is not a serious limitation, as the calculations are quite simple.</p></li>
+<li><p>It is probably easiest to use a pocket calculator for the calculations, and this is the approach we recommend for this class. The only part of the analysis it cannot do is calculating the precise <span class="math inline">\(P\)</span>-value for the tests, but even this can be avoided by using critical values from a statistical table such as the one at the end of this Coursepack to determine approximate <span class="math inline">\(P\)</span>-values (or by using an online <span class="math inline">\(P\)</span>-value calculator — see “About Week 4 class” on the Moodle page for suggested links).</p></li>
+</ul>
+<div id="classwork-3" class="section level4 unnumbered">
+<h4>Classwork</h4>
+<p>The survey data set <em>BES2010post_lastdebate.sav</em> contains part of the information collected by the British Election Study, an ongoing research programme designed to understand voter choices in the UK.<a href="#fn21" class="footnoteRef" id="fnref21"><sup>21</sup></a></p>
+<p>In the run-up to the UK General Election on 6 May 2010, opinion polls reported quite dramatic changes in popularity of the Liberal Democrat party. Key to their increasing popularity was the performance of their party leader, Nick Clegg, in a series of three televised debates between the leaders of the three main political parties (the other participants were Gordon Brown for Labour and David Cameron for the Conservative party). The debates were broadcast between 15 and 29 April 2010.</p>
+<p>The data in <em>BES2010post_lastdebate.sav</em> contain information on respondents’ voting intentions, obtained after the debates had ended (i.e. between 30 April and 6 May).</p>
+<ol style="list-style-type: decimal">
+<li><p><em>VOTE_LIBDEM</em> is a dichotomous variable indicating whether a respondent intended to vote for the Liberal Democrats (value 1) or some other party (0) in the 2010 General Election. The value of this variable is by definition missing for those who had not decided which way they would vote or who did not intend to vote at all, so they are automatically excluded from the analysis. The parameter of interest <span class="math inline">\(\pi\)</span> is now the population proportion of those who <em>say</em> they would vote Liberal Democrat. We will compare it to 0.23, the proportion of the vote the party actually received in 2010. The analysis is thus one-sample inference on a population proportion, and the relevant formulas are (<a href="#ztestp"><strong>??</strong></a>) for the test statistic and (<a href="#cip2"><strong>??</strong></a>) for the confidence interval.</p>
+<ul>
+<li><p>Begin by creating a frequency table of <em>VOTE_LIBDEM</em>. This should show that the sample estimate of <span class="math inline">\(\pi\)</span> is 0.260, out of <span class="math inline">\(3226\)</span> non-missing responses. Thus <span class="math inline">\(n=3226\)</span> and <span class="math inline">\(\hat{\pi}=0.260\)</span> in the notation of Chapter <a href="#c-probs"><strong>??</strong></a>.</p></li>
+<li><p>For the one-sample significance test, the value of <span class="math inline">\(\pi\)</span> under the null hypothesis is <span class="math inline">\(\pi_{0}=0.230\)</span>. Using equation (<a href="#ztestp"><strong>??</strong></a>), the value of the test statistic <span class="math inline">\(z\)</span> is thus given by the calculation <span class="math display">\[z = \frac{0.260-0.230}{\sqrt{0.230\times (1-0.230)/3226}}\]</span> Calculate this using a calculator. The result should be <span class="math inline">\(z=4.049\)</span>.</p></li>
+<li><p>The (two-sided) <span class="math inline">\(P\)</span>-value for this is the probability that a value from the standard normal distribution is at most <span class="math inline">\(-4.049\)</span> or at least 4.049. Evaluate this approximately by comparing the value of <span class="math inline">\(z\)</span> to critical values from the standard normal distribution (c.f. Table <a href="#tab:t-ttable"><strong>??</strong></a>) as explained in Section <a href="#ss-probs-test1sample-samplingd"><strong>??</strong></a>. Here, for example, <span class="math inline">\(z\)</span> is larger than 1.96, so the two-sided <span class="math inline">\(P\)</span>-value must be smaller than 0.05. Convince yourself that you understand this statement.</p></li>
+<li><p>Calculate a 95% confidence interval for the population proportion of prospective Liberal Democrat voters, using equation (<a href="#cip2"><strong>??</strong></a>).</p></li>
+</ul>
+<p>What do you conclude about the proportions of prospective and actual Liberal Democrat voters? Why might the two differ from each other?</p></li>
+<li><p>The variable <em>TVDEBATE</em> indicates whether the respondent reports having watched any of the three televised debates (1 for Yes, at least one watched, 0 otherwise - this includes “no” and “don’t know” responses). We will compare the proportion of people intending to vote Liberal Democrat amongst those who watched some or all of the debates with those who did not, using the two-sample methods of analysis discussed in Section <a href="c-samples.html#s-probs-2samples">4.2</a>. The formula of the <span class="math inline">\(z\)</span>-test statistic for testing the hypothesis of equal population proportions is thus (<a href="#ztestDpi"><strong>??</strong></a>), and a confidence interval for the difference of the porportions is (<a href="#ciDpi"><strong>??</strong></a>).</p>
+<ul>
+<li><p>Begin by calculating the relevant sample proportions. The easiest way to do this is by creating a two-way contingency table between <em>TVDEBATE</em> and <em>VOTE_LIBDEM</em> as you did in the Week 2 and 4 classes. The results required for the analysis considered here are all shown in the resulting table. Convince yourself that these show that, in the notation of Section <a href="c-samples.html#s-probs-2samples">4.2</a>,</p>
+<ul>
+<li><p><span class="math inline">\(n_{1}=930\)</span> and <span class="math inline">\(\hat{\pi}_{1}=0.218\; (=203/930)\)</span>,</p></li>
+<li><p><span class="math inline">\(n_{2}=2296\)</span> and <span class="math inline">\(\hat{\pi}_{2}=0.277\; (=636/2296)\)</span>,</p></li>
+</ul>
+<p>where 1 denotes respondents who did not watch any of the debates and 2 those who watched at least some. The pooled estimated proportion <span class="math inline">\(\hat{\pi}\)</span> (formula <a href="#phat2sample"><strong>??</strong></a>) used in the test statistic (<a href="#ztestDpi"><strong>??</strong></a>) is here <span class="math inline">\(\hat{\pi}=0.260\)</span>, shown on the “Total” row.</p></li>
+<li><p>Calculate the test statistic, its <span class="math inline">\(P\)</span>-value and a 95% confidence for the difference in population proportions, using the relevant formulas. For example, the test statistic is here given by <span class="math display">\[z= \frac{0.277-0.218}{\sqrt{0.260\times (1-0.260)\times
+(1/2296+1/930)}}.\]</span></p></li>
+</ul>
+<p>What do you conclude? Is there evidence that those who watched at least some of the leaders’ debates were more likely to declare an intention to vote Liberal Democrat? If there is, how big is the difference in proportions of prospective Liberal Democrat voters between the debate-watchers and debate-non-watchers?</p></li>
+</ol>
+</div>
+<div id="homework-3" class="section level4 unnumbered">
+<h4>HOMEWORK</h4>
+<p>Write up your answers to the second class exercise. In particular, answer the following specific questions:</p>
+<ol style="list-style-type: decimal">
+<li><p>What proportion of respondents say that they did watch at least some of the leaders’ debates? And what proportion did not? Of those who watched at least some of the leaders’ debates, what proportion said they intended to vote Liberal Democrat? And what proportion of those who did <em>not</em> watch any of the leaders’ debates said they intended to vote Liberal Democrat?</p></li>
+<li><p>Calculate the test statistic and find its corresponding approximate <span class="math inline">\(P\)</span>-value for the difference in population proportions of prospective Liberal Democrat voters among those who did and did not watch the leaders’ debates. Show your working. State the conclusion from the test.</p></li>
+<li><p>Calculate a 95% confidence interval around this difference. State its lower and upper limits.</p></li>
+<li><p>Write a brief substantive interpretation of your results.</p></li>
+</ol>
+<p>The data set <em>BES2010pre_lastdebate.sav</em> contains responses to the same question - whether respondents intended to vote Liberal Democrat or not - but asked before the last of the party leaders’ debates. Repeat the analysis you carried out for the first class exercise, but using this data set. In other words carry out a one-sample analysis, of the kind done in exercise 1 above, to compare the proportion of respondents who said they intended to vote Liberal Democrat with the proportion who actually did. Answer the following questions:</p>
+<ol style="list-style-type: decimal">
+<li><p>State the null hypothesis for the test.</p></li>
+<li><p>Calculate the test statistic and find its corresponding approximate <span class="math inline">\(P\)</span>-value. Show your workings.</p></li>
+<li><p>Give a brief interpretation of the results. Do they differ from the other data set? Can you think of any reasons for this? (This last question invites some speculation - do not worry if you don’t have any ideas! But see the sample answer if you are interested in our speculation.)</p></li>
+</ol>
+
+</div>
+</div>
+<div id="week-7-class-correlation-and-simple-linear-regression-1" class="section level2">
+<h2><span class="header-section-number">7.8</span> WEEK 7 class: Correlation and simple linear regression 1</h2>
+<p><strong>Data set</strong>: Files <strong>decathlon2012.sav</strong>.</p>
+<div id="scatterplots-correlation-and-simple-linear-regression-in-spss" class="section level4 unnumbered">
+<h4>Scatterplots, correlation and simple linear regression in SPSS</h4>
+<ul>
+<li><p>A scatterplot is obtained from <strong>Graphs/Legacy Dialogs/“Scatter/Dot”/ Simple Scatter/Define</strong>. The variables for the <span class="math inline">\(X\)</span>-axis and <span class="math inline">\(Y\)</span>-axis are placed in the <strong>X Axis</strong> and <strong>Y Axis</strong> boxes respectively. Double-clicking on the plot in the Output window opens it in a <em>Chart Editor</em>, where various additions to the graph can be made. A fitted straight line is added from <strong>Elements/Fit Line at Total</strong>. A least squares fitted line is the default under this option, so it is drawn immediately and you can just click <strong>Close</strong>. Closing the Chart Editor commits the changes to the Output window.</p></li>
+<li><p>A correlation matrix is obtained from <strong>Analyze/Correlate/Bivariate</strong>, when <strong>Correlation Coefficients/Pearson</strong> is selected (which is the default, so you should not need to change it). The variables included in the correlation matrix are placed into the <strong>Variables</strong> box. The output also includes a test for the hypothesis that the population correlation is 0, but we will ignore it.</p></li>
+<li><p>Linear regression models are obtained from <strong>Analyze/Regression/Linear</strong>. The response variable is placed under <strong>Dependent</strong> and the explanatory variable under <strong>Independent(s)</strong>. The dialog box has many options for various additional choices. Today you can leave all of them at their default values, except that you should select <strong>Statistics/Regression Coefficients/Confidence intervals</strong> to include also 95% confidence intervals for the regression coefficients in the output.</p></li>
+</ul>
+</div>
+<div id="classwork-4" class="section level4 unnumbered">
+<h4>Classwork</h4>
+<p>Decathlon is a sport where the participants complete ten different athletics events over two days. Their results in each are then translated into points, and the winner is the competitor with the highest points total for the ten events. The file <em>decathlon2012.sav</em> contains the results of the decathlon competition at the 2012 Olympics in London for the 26 athletes who finished the competition.<a href="#fn22" class="footnoteRef" id="fnref22"><sup>22</sup></a> The results for each event are given both in their original units (variables with names beginning with “mark_”) and in decathlon points (names beginning with “points_”). The ten events are identified by the variable labels in Variable View. The variable <em>points_total</em> gives the final points total for each competitor.</p>
+<ol style="list-style-type: decimal">
+<li><p>Create a scatterplot between the result (<span class="math inline">\(X\)</span>-axis) and points (<span class="math inline">\(Y\)</span>-axis) for one event, the 100-metre sprint (variables <em>MARK_100M</em> and <em>POINTS_100M</em>), and add a fitted line. This simply provides information on the calculation used to transform the result into points. Clearly a linear calculation is used for this, at least over the range of results in these data. Notice the downward slope of the line: the faster the result, the higher the number of points. From now on, for simplicity we will consider only the points variables for each event.</p></li>
+<li><p>Obtain the correlation matrix for all pairs of variables among the ten individual points scores and the total score. Consider first correlations between the individual events only. Which correlations tend to be high (say over 0.5), which ones close to zero and which ones even negative? Can you think of any reasons for this? Draw scatterplots and fitted lines for a few pairs of variables with different sizes of correlations (here the variables are treated symmetrically, so it does not matter which one is placed on the <span class="math inline">\(X\)</span>-axis). Can these associations be reasonably described as linear?</p></li>
+<li><p>Consider now the correlations between the ten event scores and the final score <em>POINTS_TOTAL</em>. Which of them is highest, and which one lowest? Examine the scatterplot and fitted line between points for 100 metres (<em>POINTS_100M</em>) and the total score (POINTS_TOTAL). Fit a line to this scatterplot variables, with <em>POINTS_100M</em> as the explanatory variable. Interpret the results. Does there appear to be an association between the points for 100 metres and the total score? What is the nature of the association?</p>
+<p>Suppose you were told that a competitor received 800 points (a time of about 11.3 seconds) for 100 metres, the first event of the decathlon. Based on the fitted model, what final points score would you predict for him? You can calculate this fitted value with a pocket calculator. What would be the predicted value if the 100-metre score was 950 points (about 10.6 s) instead?</p></li>
+</ol>
+<p><strong>HOMEWORK</strong></p>
+<ol style="list-style-type: decimal">
+<li><p>Briefly discuss the correlation matrix produced in the class. Pick out a few examples for illustration - which correlations are highest, and which ones lowest, and which ones negative? You may comment on correlations between individual events, as well as on correlations between the final score and individual events.</p></li>
+<li><p>Obtain the scatterplot and linear regression model for the total score given points for the long jump, one of the field events (POINTS_LONGJUMP). Is the score for long jump strongly or weakly associated with the final score? Interpret the slope coefficient. Suppose you were told that a competitor received 900 points (a jump of about 7.4 metres) for the long jump. Based on the fitted model, what final points score would you predict for him?</p></li>
+<li><p>Obtain the scatterplot and linear regression model for the total score given points for throwing the discus, another of the field events (POINTS_DISCUS). Interpret the slope coefficient. Is the score for discus strongly or weakly associated with the final score?</p></li>
+</ol>
+
+</div>
+</div>
+<div id="week-8-class-simple-linear-regression-and-3-way-tables" class="section level2">
+<h2><span class="header-section-number">7.9</span> WEEK 8 class: Simple linear regression and 3-way tables</h2>
+<p><strong>Data set</strong>: File <strong>GSS2010.SAV</strong>. This contains a selection of variables on attitudes and demographic characteristics for 2044 respondents in the 2010 U.S. General Social Survey (GSS)<a href="#fn23" class="footnoteRef" id="fnref23"><sup>23</sup></a>. Only a few of the variables are used in the exercises.</p>
+<div id="classwork---linear-regression" class="section level4 unnumbered">
+<h4>Classwork - linear regression</h4>
+<p>Here we will focus on the variables <em>EDUC</em>, <em>PAEDUC</em>, <em>MAEDUC</em> and <em>SPEDUC</em>. These show the number of years of education completed by, respectively, the survey respondent him/herself, and the respondent’s father, mother and spouse.</p>
+<ol style="list-style-type: decimal">
+<li><p>Obtain basic descriptive statistics for the variables. Here they can be compared directly, because the meaning of the variable is similar in each case. We can even draw side-by-side box plots for the variables (rather than for values of a single variable at different levels of another, as before). These can be obtained from <strong>Analyze/Descriptive Statistics/Explore</strong> by placing all the variables under <strong>Dependent List</strong> and selecting <strong>Plots/Boxplots/Dependents together</strong>. You should then also select <strong>Options/Missing Values/Exclude cases pairwise</strong> to include all non-missing values for each variable (here <em>SPEDUC</em> has for obvious reasons more missing values than the others).</p></li>
+<li><p>Obtain the correlation matrix of the four variables. Which correlations are highest, and which ones lowest?</p></li>
+<li><p>Draw a scatterplot with fitted line for <em>EDUC</em> given <em>PAEDUC</em>. Fit a linear regression model between these variables, regressiong <em>EDUC</em> (response variable) on <em>PAEDUC</em> (explanatory variable). Interpret the results. Is there a statistically significant linear association between a person’s years of schooling and those of his/her father? Interpret the estimated regression coefficient, <span class="math inline">\(t\)</span>-statistic and <span class="math inline">\(P\)</span>-value, and 95 per cent confidence interval.</p></li>
+<li><p>Based on the fitted model, what is the predicted number of years of education for a respondent whose father completed 12 years of education?</p></li>
+</ol>
+</div>
+<div id="homework-simple-linear-regression-and-three-way-tables" class="section level4 unnumbered">
+<h4>HOMEWORK: Simple linear regression and three-way tables</h4>
+<p>The homework exercise uses the same data set for two different types of analysis.</p>
+</div>
+<div id="linear-regression" class="section level4 unnumbered">
+<h4>Linear regression</h4>
+<p>Draw a scatterplot with fitted line for <em>EDUC</em> given <em>MAEDUC</em>. Fit a linear regression model between these variables, regressiong <em>EDUC</em> (response variable) on <em>MAEDUC</em> (explanatory variable).</p>
+<ol style="list-style-type: decimal">
+<li><p>Interpret the results: Is there a statistically significant linear association between a person’s years of schooling and those of his/her mother? Interpret the estimated regression coefficient, <span class="math inline">\(t\)</span>-statistic and <span class="math inline">\(P\)</span>-value, and 95 per cent confidence interval.</p></li>
+<li><p>Based on the fitted model, what is the predicted number of years of education for a respondent whose mother completed 10 years of education?</p></li>
+<li><p>Interpret the R-squared statistic for the model.</p></li>
+</ol>
+</div>
+<div id="analysing-multiway-contingency-tables-in-spss" class="section level4 unnumbered">
+<h4>Analysing multiway contingency tables in SPSS</h4>
+<p>Three-way contingency tables are again obtained from <strong>Analyze/Descriptive Statistics/Crosstabs</strong>. The only change from Week 4 class is that the conditioning variable is now placed in the <strong>Layer 1 of 1</strong> box. This produces a series of partial two-way tables between the row and column variables specified in the <strong>Row(s)</strong> and <strong>Column(s)</strong> boxes, one for each category of the <strong>Layer</strong> variable. Percentages and <span class="math inline">\(\chi^{2}\)</span> test are similarly calculated separately for each partial table. For this example we elaborate on the first two exercises from Week 4 class. To remind you, the categorical variables we are analysing are these:</p>
+<ul>
+<li><p>The respondent’s sex, recorded as the variable <em>SEX</em>.</p></li>
+<li><p>age as <em>AGEGROUP</em> in three groups: 18-34, 35-54 and 55 or over.</p></li>
+<li><p><em>FEFAM</em>: Level of agreement with the following statement: “It is much better for everyone involved if the man is the achiever outside the home and the woman takes care of the home and family”, with response options Strongly agree, Agree, Disagree, and Strongly disagree.</p></li>
+</ul>
+<ol style="list-style-type: decimal">
+<li><p>First remind yourself of the associations between SEX and FEFAM and between AGEGROUP and FEFAM. Obtain the two-way contingency table between <em>FEFAM</em> and <em>SEX</em>, including appropriate percentages and <span class="math inline">\(\chi^{2}\)</span> test of independence. Repeat the procedure for <em>FEFAM</em> by <em>AGEGROUP</em>. What do you learn about the associations between attitude and sex, and between attitude and age?</p></li>
+<li><p>Sociologists would suggest that the relationship between sex and attitude towards male and female work roles might be different for different age groups. In other words, age might modify the association between sex and attitude. Investigate this possible interaction between the three variables. Create a three-way table where <em>FEFAM</em> is the column variable, <em>SEX</em> the row variable and <em>AGEGROUP</em> the layer (conditioning) variable. Study the SPSS output, and make sure you understand how this shows three partial tables of <em>FEFAM</em> vs.<em>SEX</em>, one for each possible value of <em>AGEGROUP</em>. Examine and interpret the associations in the three partial tables. State the results of the <span class="math inline">\(\chi^{2}\)</span> test for each partial table, and illustrate your interpretations with some appropriate percentages. Finally, summarise your findings: are there differences in the nature, strength or significance of the association between sex and attitude, depending on the age group? Comment on how this interpretation differs from the initial two-way table of <em>FEFAM</em> and <em>SEX</em>.</p></li>
+</ol>
+
+</div>
+</div>
+<div id="week-9-class-multiple-linear-regression" class="section level2">
+<h2><span class="header-section-number">7.10</span> WEEK 9 class: Multiple linear regression</h2>
+<p><strong>Data set</strong>: File <strong>humandevelopment2011.sav</strong>.</p>
+<div id="multiple-linear-regression-in-spss" class="section level4 unnumbered">
+<h4>Multiple linear regression in SPSS</h4>
+<ul>
+<li><p>Multiple linear regression is obtained from <strong>Analyze/Regression/Linear</strong>, by placing all of the required explanatory variables in the <strong>Independent(s)</strong> box. No other changes from last week are required.</p></li>
+<li><p>To include categorical explanatory variables, the necessary dummy variables have to be created first. The ones for today’s class are already included in the data set. If you need to create dummy variables for your own analyses in the future, it is usually easiest to do so from <strong>Transform/Compute Variable</strong>. Some of the buttons on the keypad shown in that dialog box are <em>logical operators</em> for defining conditions for which the outcome is either 1 (True) or 0 (False), as required by a dummy variable. For example, the categorical variable <em>INCOME_GROUP</em> in today’s data set has the value 3 if the country is in the high income group. The dummy variable <em>HIGH_INCOME</em> was created from this by entering <strong>Target Variable:</strong> <em>HIGH_INCOME</em> and <strong>Numeric Expression:</strong> <em>INCOME_GROUP=3</em>. This means that the new variable <em>HIGH_INCOME</em> will have the value 1 for countries for which <em>INCOME_GROUP</em> is equal to 3, and will be 0 otherwise. Other logical operators may also be used: for example, <em>urban_pop$&lt;$50</em> would produce 1 if the variable <em>URBAN_POP</em> was less than 50 and 0 otherwise.</p></li>
+</ul>
+</div>
+<div id="classwork-5" class="section level4 unnumbered">
+<h4>Classwork</h4>
+<p>The file <em>humandevelopment2011.sav</em> contains data on a number of indicators of what might broadly be called development, for 194 countries in 2011. These were collated from two international data agency sources<a href="#fn24" class="footnoteRef" id="fnref24"><sup>24</sup></a>. The response variable considered today is <em>SCHOOL_YEARS</em>, which records for each country the mean number of years of schooling taken by the adult population. We treat it here as a general indicator of the educational situation in a country, which is an important aspect of development. We will consider the following explanatory variables for it:</p>
+<ul>
+<li><p><em>URBAN_POP</em>: the degree of urbanisation of the country, specifically the percentage of the country’s population living in urban areas variable</p></li>
+<li><p><em>GOVERNANCE</em>, a continuous variable contructed from expert opinion surveys to reflect the perceived effectiveness of government in delivering services.</p></li>
+<li><p><em>INFANT_MORTALITY</em>, number of infants dying before 1 year old, per 1,000 live births — a “proxy” indicator representing the health of the population</p></li>
+<li><p><em>INCOME_GROUP</em>, classified as low, middle or high income economies. This is also provided in the form of three dummy variables: <em>LOW_INCOME</em>, <em>MIDDLE_INCOME</em> and <em>HIGH_INCOME</em>.</p></li>
+</ul>
+<ol style="list-style-type: decimal">
+<li><p>Obtain some descriptive statistics for the continuous variables, to gain and impression of their ranges. A quick way of doing this is via <strong>Analyze/Descriptive Statistics/Frequencies</strong>, unchecking the “Display frequency tables” and requesting minimum and maximum values.</p></li>
+<li><p>Investigate the idea that increased urbanisation is linked to greater availability of schooling for people. Obtain a scatterplot and a simple linear regression model for <em>SCHOOL_YEARS</em> given <em>URBAN_POP</em>. What do you observe in the scatterplot? Interpret the regression output.</p></li>
+<li><p>Now consider the possibility that schooling may also be explained by the effectiveness of governments in providing public services (such as education). Fit a multiple linear regression model for <em>SCHOOL_YEARS</em> given both <em>URBAN_POP</em> and <em>GOVERNANCE</em>. Compare the the estimated coefficient of <em>URBAN_POP</em> for this model with the coefficient of the same variable in the model in Question 2. What do you conclude? Does the association between schooling and urbanisation change when we control for government effectiveness? If so, in what way? Interpret the estimate coefficient of <em>GOVERNANCE</em> in the fitted model, the results of its <span class="math inline">\(t\)</span>-test and its 95% confidence interval.</p></li>
+<li><p>Next consider the possible explanatory value of the income wealth of a country for understanding variation in schooling years. Include income by entering two of the three dummy variables for income group. For the most convenient interpretation, we suggest that you leave “low income” as the reference group, and enter the dummies for <em>MIDDLE_INCOME</em> and <em>HIGH_INCOME</em> in the model. Interpret the values of the estimated regression coefficients for the two income dummy variables. In addition, for each one state the null hypothesis for its <span class="math inline">\(t\)</span>-test, and interpret the result of the test and 95% confidence intervals.</p></li>
+<li><p>Using this model, what level of schooling would you predict for a country with 70% urban population, a score of 1.5 on governance, and a high income economy?</p></li>
+<li><p>Using this model, what level of schooling would you predict for a country with 30% urban population, a score of -0.2 on governance, and a low income economy?</p></li>
+</ol>
+<p><strong>HOMEWORK</strong></p>
+<ol style="list-style-type: decimal">
+<li><p>Write up your answers to the last three questions in the class exercise.</p></li>
+<li><p>Finally, consider one more possible explanatory variable: <em>INFANT_MORTALITY</em>. Add this variable to the multiple linear regression model fitted above. Is it statistically significant, at the 1% level of significance? Interpret the value of its estimated coefficient, and its 95% confidence interval. Take care to make sense of the sign (positive or negative) of the coefficient.</p></li>
+<li><p>Has the inclusion of <em>INFANT_MORTALITY</em> modified the interpretation of any of the other explanatory variables in the model? Are they all statistically significant, at the 5% level of significance? Briefly outline the similarities and differences between the results for this final model and the model fitted in the class exercise.</p></li>
+</ol>
+
+</div>
+</div>
+<div id="week-10-class-review-and-multiple-linear-regression" class="section level2">
+<h2><span class="header-section-number">7.11</span> WEEK 10 class: Review and Multiple linear regression</h2>
+<p><strong>Data set</strong>: File <strong>ESS5GB_trust.sav</strong>.</p>
+<p>This class is for you to revisit any topics of your choosing. Make the most of the opportunity to ask your class teachers any questions you have about any of the course material, and to practise any of the analyses you have learned during the course.</p>
+<p>As an optional exercise, the data file <strong>ESS5GB_trust.sav</strong> is provided. This contains a selection of variables from the survey of British respondents that forms the 2010 wave of the European Social Survey<a href="#fn25" class="footnoteRef" id="fnref25"><sup>25</sup></a>.</p>
+<p>We suggest that you use the data to practise multiple linear regression modelling on one or more of the variables capturing people’s levels of trust in institutions. For these questions, respondents were asked the following: “Using this card, please tell me on a score of 0-10 how much you personally trust each of the institutions I read out. 0 means you do not trust an institution at all, and 10 means you have complete trust.” The institutions (and their variable names) are:</p>
+<ul>
+<li><p><em>trstprl</em>: Trust in country’s parliament</p></li>
+<li><p><em>trstlgl</em>: Trust in the legal system</p></li>
+<li><p><em>trstplc</em>: Trust in the police</p></li>
+<li><p><em>trstplt</em>: Trust in politicians</p></li>
+<li><p><em>trstprt</em>: Trust in political parties</p></li>
+<li><p><em>trstep</em>: Trust in the European Parliament</p></li>
+<li><p><em>trstun</em>: Trust in the United Nations</p></li>
+</ul>
+<p>After you choose a response variable that interests you, you will need to select some potential explanatory variables to test. The data set contains a number of variables. Some are socio-demographic, such as age and gender. Some are attitudinal or behavioural, such as amount of time spent reading newspapers. You will need to make a judgement about the levels of measurement of the variables, and how to enter them into the model. Use the “Values” column in the SPSS Variable View to check how each variable is coded. Note: we suggest that it is not too much of a compromise to treat the variables on television, radio and newspaper consumption as continuous, interval level variables. Note also: we have provided dummy variables for the categorical variables in the data set.</p>
+<p><strong>HOMEWORK</strong></p>
+<p>As this is the last week of the course, there is no homework. You can find further information on this and the other class exercises and homeworks in the model answers, which will be posted in the Moodle site.</p>
+
+</div>
+</div>
+<div class="footnotes">
+<hr />
+<ol start="16">
+<li id="fn16"><p>ESS Round 5: European Social Survey Round 5 Data (2010). Data file edition 2.0. Norwegian Social Science Data Services, Norway � Data Archive and distributor of ESS data. The full data can be obtained from <code>http://ess.nsd.uib.no/ess/round5/</code>.<a href="c-class0.html#fnref16">↩</a></p></li>
+<li id="fn18"><p>The data can be obtained from <code>http://www3.norc.org/gss+website/</code>, which gives further information on the survey, including the full text of the questionnaires.<a href="c-class0.html#fnref18">↩</a></p></li>
+<li id="fn19"><p>ESS Round 5: European Social Survey Round 5 Data (2010). Data file edition 2.0. Norwegian Social Science Data Services, Norway � Data Archive and distributor of ESS data. The full data can be obtained from <code>http://ess.nsd.uib.no/ess/round5/</code>.<a href="c-class0.html#fnref19">↩</a></p></li>
+<li id="fn20"><p>Strictly speaking, the analysis should incorporate sampling weights (variable <em>DWEIGHT</em>) to adjust for different sampling probabilities for different types of respondents. Here the weights are ignored. Using them would not change the main conclusions for these variables.<a href="c-class0.html#fnref20">↩</a></p></li>
+<li id="fn21"><p>The data can be obtained from <code>http://bes2009-10.org/</code>, which gives further information on the survey, including the full text of the questionnaires. The data analysed in this class and homework are from the BES Campaign Internet Panel Survey, which has been divided into two data sets corresponding to two time periods leading up to the General Election.<a href="c-class0.html#fnref21">↩</a></p></li>
+<li id="fn22"><p>Official results obtained from <code>www.olympic.org/london-2012-summer-olympics</code>.<a href="c-class0.html#fnref22">↩</a></p></li>
+<li id="fn23"><p>The data can be obtained from <code>www3.norc.org/GSS+Website/</code>, which gives further information on the survey, including the full text of the questionnaires.<a href="c-class0.html#fnref23">↩</a></p></li>
+<li id="fn24"><p>United Nations Development Programme <em>International Human Development Indicators</em>, <code>http://hdr.undp.org/en/data/</code>; World Bank <em>Worldwide Governance Indicators</em>, <code>http://info.worldbank.org/governance/wgi/pdf/wgidataset.xlsx</code>; World Bank <em>World Development Indicators</em>, <code>http://data.worldbank.org/indicator/SP.DYN.IMRT.IN</code>.<a href="c-class0.html#fnref24">↩</a></p></li>
+<li id="fn25"><p>ESS Round 5: European Social Survey Round 5 Data (2010). Data file edition 2.0. Norwegian Social Science Data Services, Norway � Data Archive and distributor of ESS data. The full data can be obtained from <code>http://ess.nsd.uib.no/ess/round5/</code>.<a href="c-class0.html#fnref25">↩</a></p></li>
+</ol>
+</div>
+            </section>
+
+          </div>
+        </div>
+      </div>
+<a href="c-more.html" class="navigation navigation-prev " aria-label="Previous page"><i class="fa fa-angle-left"></i></a>
+<a href="c-disttables.html" class="navigation navigation-next " aria-label="Next page""><i class="fa fa-angle-right"></i></a>
+
+<script src="libs/gitbook-2.6.7/js/app.min.js"></script>
+<script src="libs/gitbook-2.6.7/js/lunr.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-search.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-sharing.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-fontsettings.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-bookdown.js"></script>
+<script src="libs/gitbook-2.6.7/js/jquery.highlight.js"></script>
+<script>
+require(["gitbook"], function(gitbook) {
+gitbook.start({
+"sharing": {
+"facebook": true,
+"twitter": true,
+"google": false,
+"weibo": false,
+"instapper": false,
+"vk": false,
+"all": ["facebook", "google", "twitter", "weibo", "instapaper"]
+},
+"fontsettings": {
+"theme": "white",
+"family": "sans",
+"size": 2
+},
+"edit": {
+"link": "https://github.com/rstudio/bookdown-demo/edit/master/11-MY451_A.rmd",
+"text": null
+},
+"download": ["Coursepack-MY451.pdf", "Coursepack-MY451.epub"],
+"toc": {
+"collapse": "subsection"
+}
+});
+});
+</script>
+
+<!-- dynamically load mathjax for compatibility with self-contained -->
+<script>
+  (function () {
+    var script = document.createElement("script");
+    script.type = "text/javascript";
+    script.src  = "https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
+    if (location.protocol !== "file:" && /^https?:/.test(script.src))
+      script.src  = script.src.replace(/^https?:/, '');
+    document.getElementsByTagName("head")[0].appendChild(script);
+  })();
+</script>
+</body>
+
+</html>
diff --git a/c-contd.html b/c-contd.html
new file mode 100644
index 0000000..d97e705
--- /dev/null
+++ b/c-contd.html
@@ -0,0 +1,1113 @@
+<!DOCTYPE html>
+<html lang="" xml:lang="">
+<head>
+
+  <meta charset="utf-8" />
+  <meta http-equiv="X-UA-Compatible" content="IE=edge" />
+  <title>Chapter 6 Continuous variables: Population and sampling distributions | MY464 Introduction to Quantitative Analysis for Media and Communications</title>
+  <meta name="description" content="This course is intended for those with little or no past training in quantitative methods. The course is an intensive introduction to some of the principles and methods of statistical analysis in social research. Topics covered in MY464 include descriptive statistics, basic ideas of inference and estimation, contingency tables and an introduction to linear regression models." />
+  <meta name="generator" content="bookdown 0.39 and GitBook 2.6.7" />
+
+  <meta property="og:title" content="Chapter 6 Continuous variables: Population and sampling distributions | MY464 Introduction to Quantitative Analysis for Media and Communications" />
+  <meta property="og:type" content="book" />
+  
+  <meta property="og:description" content="This course is intended for those with little or no past training in quantitative methods. The course is an intensive introduction to some of the principles and methods of statistical analysis in social research. Topics covered in MY464 include descriptive statistics, basic ideas of inference and estimation, contingency tables and an introduction to linear regression models." />
+  <meta name="github-repo" content="LSE-Methodology/MY464" />
+
+  <meta name="twitter:card" content="summary" />
+  <meta name="twitter:title" content="Chapter 6 Continuous variables: Population and sampling distributions | MY464 Introduction to Quantitative Analysis for Media and Communications" />
+  
+  <meta name="twitter:description" content="This course is intended for those with little or no past training in quantitative methods. The course is an intensive introduction to some of the principles and methods of statistical analysis in social research. Topics covered in MY464 include descriptive statistics, basic ideas of inference and estimation, contingency tables and an introduction to linear regression models." />
+  
+
+<meta name="author" content="Department of Methodology, London School of Economics and Political Science" />
+
+
+
+  <meta name="viewport" content="width=device-width, initial-scale=1" />
+  <meta name="apple-mobile-web-app-capable" content="yes" />
+  <meta name="apple-mobile-web-app-status-bar-style" content="black" />
+  
+  
+<link rel="prev" href="c-probs.html"/>
+<link rel="next" href="c-means.html"/>
+<script src="libs/jquery-3.6.0/jquery-3.6.0.min.js"></script>
+<script src="https://cdn.jsdelivr.net/npm/fuse.js@6.4.6/dist/fuse.min.js"></script>
+<link href="libs/gitbook-2.6.7/css/style.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-table.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-bookdown.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-highlight.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-search.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-fontsettings.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-clipboard.css" rel="stylesheet" />
+
+
+
+
+
+
+
+
+<link href="libs/anchor-sections-1.1.0/anchor-sections.css" rel="stylesheet" />
+<link href="libs/anchor-sections-1.1.0/anchor-sections-hash.css" rel="stylesheet" />
+<script src="libs/anchor-sections-1.1.0/anchor-sections.js"></script>
+
+
+
+<style type="text/css">
+  
+  div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
+</style>
+
+<link rel="stylesheet" href="style.css" type="text/css" />
+</head>
+
+<body>
+
+
+
+  <div class="book without-animation with-summary font-size-2 font-family-1" data-basepath=".">
+
+    <div class="book-summary">
+      <nav role="navigation">
+
+<ul class="summary">
+<li><a href="./">MY464 Introduction to Quantitative Analysis for Media and Communications</a></li>
+
+<li class="divider"></li>
+<li class="chapter" data-level="" data-path="index.html"><a href="index.html"><i class="fa fa-check"></i>Course information<a href="index.html#course-information" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1" data-path="c-intro.html"><a href="c-intro.html"><i class="fa fa-check"></i><b>1</b> Introduction<a href="c-intro.html#c-intro" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="1.1" data-path="c-intro.html"><a href="c-intro.html#s-intro-purpose"><i class="fa fa-check"></i><b>1.1</b> What is the purpose of this course?<a href="c-intro.html#s-intro-purpose" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.2" data-path="c-intro.html"><a href="c-intro.html#s-intro-definitions"><i class="fa fa-check"></i><b>1.2</b> Some basic definitions<a href="c-intro.html#s-intro-definitions" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="1.2.1" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-subj"><i class="fa fa-check"></i><b>1.2.1</b> Subjects and variables<a href="c-intro.html#ss-intro-def-subj" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.2.2" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-vartypes"><i class="fa fa-check"></i><b>1.2.2</b> Types of variables<a href="c-intro.html#ss-intro-def-vartypes" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.2.3" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-descr"><i class="fa fa-check"></i><b>1.2.3</b> Description and inference<a href="c-intro.html#ss-intro-def-descr" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.2.4" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-assoc"><i class="fa fa-check"></i><b>1.2.4</b> Association and causation<a href="c-intro.html#ss-intro-def-assoc" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="1.3" data-path="c-intro.html"><a href="c-intro.html#s-intro-outline"><i class="fa fa-check"></i><b>1.3</b> Outline of the course<a href="c-intro.html#s-intro-outline" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.4" data-path="c-intro.html"><a href="c-intro.html#s-intro-maths"><i class="fa fa-check"></i><b>1.4</b> The use of mathematics and computing<a href="c-intro.html#s-intro-maths" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="1.4.1" data-path="c-intro.html"><a href="c-intro.html#symbolic-mathematics-and-mathematical-notation"><i class="fa fa-check"></i><b>1.4.1</b> Symbolic mathematics and mathematical notation<a href="c-intro.html#symbolic-mathematics-and-mathematical-notation" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.4.2" data-path="c-intro.html"><a href="c-intro.html#computing-1"><i class="fa fa-check"></i><b>1.4.2</b> Computing<a href="c-intro.html#computing-1" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="2" data-path="c-descr1.html"><a href="c-descr1.html"><i class="fa fa-check"></i><b>2</b> Descriptive statistics<a href="c-descr1.html#c-descr1" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.1" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-intro"><i class="fa fa-check"></i><b>2.1</b> Introduction<a href="c-descr1.html#s-descr1-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.2" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-examples"><i class="fa fa-check"></i><b>2.2</b> Example data sets<a href="c-descr1.html#s-descr1-examples" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.3" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-1cat"><i class="fa fa-check"></i><b>2.3</b> Single categorical variable<a href="c-descr1.html#s-descr1-1cat" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.3.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-distr"><i class="fa fa-check"></i><b>2.3.1</b> Describing the sample distribution<a href="c-descr1.html#ss-descr1-1cat-distr" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.3.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-tables"><i class="fa fa-check"></i><b>2.3.2</b> Tabular methods: Tables of frequencies<a href="c-descr1.html#ss-descr1-1cat-tables" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.3.3" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-charts"><i class="fa fa-check"></i><b>2.3.3</b> Graphical methods: Bar charts<a href="c-descr1.html#ss-descr1-1cat-charts" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.3.4" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-descriptives"><i class="fa fa-check"></i><b>2.3.4</b> Simple descriptive statistics<a href="c-descr1.html#ss-descr1-1cat-descriptives" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="2.4" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-2cat"><i class="fa fa-check"></i><b>2.4</b> Two categorical variables<a href="c-descr1.html#s-descr1-2cat" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.4.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-tables"><i class="fa fa-check"></i><b>2.4.1</b> Two-way contingency tables<a href="c-descr1.html#ss-descr1-2cat-tables" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.4.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-cond"><i class="fa fa-check"></i><b>2.4.2</b> Conditional proportions<a href="c-descr1.html#ss-descr1-2cat-cond" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.4.3" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-assoc"><i class="fa fa-check"></i><b>2.4.3</b> Conditional distributions and associations<a href="c-descr1.html#ss-descr1-2cat-assoc" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.4.4" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-descr"><i class="fa fa-check"></i><b>2.4.4</b> Describing an association using conditional proportions<a href="c-descr1.html#ss-descr1-2cat-descr" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.4.5" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-gamma"><i class="fa fa-check"></i><b>2.4.5</b> A measure of association for ordinal variables<a href="c-descr1.html#ss-descr1-2cat-gamma" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="2.5" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-1cont"><i class="fa fa-check"></i><b>2.5</b> Sample distributions of a single continuous variable<a href="c-descr1.html#s-descr1-1cont" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.5.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cont-tab"><i class="fa fa-check"></i><b>2.5.1</b> Tabular methods<a href="c-descr1.html#ss-descr1-1cont-tab" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.5.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cont-graphs"><i class="fa fa-check"></i><b>2.5.2</b> Graphical methods<a href="c-descr1.html#ss-descr1-1cont-graphs" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="2.6" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-nums"><i class="fa fa-check"></i><b>2.6</b> Numerical descriptive statistics<a href="c-descr1.html#s-descr1-nums" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.6.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-nums-central"><i class="fa fa-check"></i><b>2.6.1</b> Measures of central tendency<a href="c-descr1.html#ss-descr1-nums-central" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.6.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-nums-variation"><i class="fa fa-check"></i><b>2.6.2</b> Measures of variation<a href="c-descr1.html#ss-descr1-nums-variation" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="2.7" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-2cont"><i class="fa fa-check"></i><b>2.7</b> Associations which involve continuous variables<a href="c-descr1.html#s-descr1-2cont" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.8" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-presentation"><i class="fa fa-check"></i><b>2.8</b> Presentation of tables and graphs<a href="c-descr1.html#s-descr1-presentation" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.9" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-app"><i class="fa fa-check"></i><b>2.9</b> Appendix: Country data<a href="c-descr1.html#s-descr1-app" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="3" data-path="c-samples.html"><a href="c-samples.html"><i class="fa fa-check"></i><b>3</b> Samples and populations<a href="c-samples.html#c-samples" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="3.1" data-path="c-samples.html"><a href="c-samples.html#s-samples-intro"><i class="fa fa-check"></i><b>3.1</b> Introduction<a href="c-samples.html#s-samples-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.2" data-path="c-samples.html"><a href="c-samples.html#s-samples-finpops"><i class="fa fa-check"></i><b>3.2</b> Finite populations<a href="c-samples.html#s-samples-finpops" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.3" data-path="c-samples.html"><a href="c-samples.html#s-samples-samples"><i class="fa fa-check"></i><b>3.3</b> Samples from finite populations<a href="c-samples.html#s-samples-samples" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.4" data-path="c-samples.html"><a href="c-samples.html#s-samples-infpops"><i class="fa fa-check"></i><b>3.4</b> Conceptual and infinite populations<a href="c-samples.html#s-samples-infpops" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.5" data-path="c-samples.html"><a href="c-samples.html#s-samples-popdistrs"><i class="fa fa-check"></i><b>3.5</b> Population distributions<a href="c-samples.html#s-samples-popdistrs" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.6" data-path="c-samples.html"><a href="c-samples.html#s-samples-inference"><i class="fa fa-check"></i><b>3.6</b> Need for statistical inference<a href="c-samples.html#s-samples-inference" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="4" data-path="c-tables.html"><a href="c-tables.html"><i class="fa fa-check"></i><b>4</b> Statistical inference for two-way tables<a href="c-tables.html#c-tables" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="4.1" data-path="c-tables.html"><a href="c-tables.html#s-tables-intro"><i class="fa fa-check"></i><b>4.1</b> Introduction<a href="c-tables.html#s-tables-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.2" data-path="c-tables.html"><a href="c-tables.html#s-tables-tests"><i class="fa fa-check"></i><b>4.2</b> Significance tests<a href="c-tables.html#s-tables-tests" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3" data-path="c-tables.html"><a href="c-tables.html#s-tables-chi2test"><i class="fa fa-check"></i><b>4.3</b> The chi-square test of independence<a href="c-tables.html#s-tables-chi2test" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="4.3.1" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-null"><i class="fa fa-check"></i><b>4.3.1</b> Hypotheses<a href="c-tables.html#ss-tables-chi2test-null" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.2" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-ass"><i class="fa fa-check"></i><b>4.3.2</b> Assumptions of a significance test<a href="c-tables.html#ss-tables-chi2test-ass" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.3" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-stat"><i class="fa fa-check"></i><b>4.3.3</b> The test statistic<a href="c-tables.html#ss-tables-chi2test-stat" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.4" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-sdist"><i class="fa fa-check"></i><b>4.3.4</b> The sampling distribution of the test statistic<a href="c-tables.html#ss-tables-chi2test-sdist" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.5" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-Pval"><i class="fa fa-check"></i><b>4.3.5</b> The P-value<a href="c-tables.html#ss-tables-chi2test-Pval" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.6" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-conclusions"><i class="fa fa-check"></i><b>4.3.6</b> Drawing conclusions from a test<a href="c-tables.html#ss-tables-chi2test-conclusions" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="4.4" data-path="c-tables.html"><a href="c-tables.html#s-tables-summary"><i class="fa fa-check"></i><b>4.4</b> Summary of the chi-square test of independence<a href="c-tables.html#s-tables-summary" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="5" data-path="c-probs.html"><a href="c-probs.html"><i class="fa fa-check"></i><b>5</b> Inference for population proportions<a href="c-probs.html#c-probs" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="5.1" data-path="c-probs.html"><a href="c-probs.html#s-probs-intro"><i class="fa fa-check"></i><b>5.1</b> Introduction<a href="c-probs.html#s-probs-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.2" data-path="c-probs.html"><a href="c-probs.html#s-probs-examples"><i class="fa fa-check"></i><b>5.2</b> Examples<a href="c-probs.html#s-probs-examples" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.3" data-path="c-probs.html"><a href="c-probs.html#s-probs-distribution"><i class="fa fa-check"></i><b>5.3</b> Probability distribution of a dichotomous variable<a href="c-probs.html#s-probs-distribution" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.4" data-path="c-probs.html"><a href="c-probs.html#s-probs-pointest"><i class="fa fa-check"></i><b>5.4</b> Point estimation of a population probability<a href="c-probs.html#s-probs-pointest" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5" data-path="c-probs.html"><a href="c-probs.html#s-probs-test1sample"><i class="fa fa-check"></i><b>5.5</b> Significance test of a single proportion<a href="c-probs.html#s-probs-test1sample" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="5.5.1" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-hypotheses"><i class="fa fa-check"></i><b>5.5.1</b> Null and alternative hypotheses<a href="c-probs.html#ss-probs-test1sample-hypotheses" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5.2" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-teststatistic"><i class="fa fa-check"></i><b>5.5.2</b> The test statistic<a href="c-probs.html#ss-probs-test1sample-teststatistic" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5.3" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-samplingd"><i class="fa fa-check"></i><b>5.5.3</b> The sampling distribution of the test statistic and P-values<a href="c-probs.html#ss-probs-test1sample-samplingd" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5.4" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-conclusions"><i class="fa fa-check"></i><b>5.5.4</b> Conclusions from the test<a href="c-probs.html#ss-probs-test1sample-conclusions" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5.5" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-summary"><i class="fa fa-check"></i><b>5.5.5</b> Summary of the test<a href="c-probs.html#ss-probs-test1sample-summary" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="5.6" data-path="c-probs.html"><a href="c-probs.html#s-probs-1sampleci"><i class="fa fa-check"></i><b>5.6</b> Confidence interval for a single proportion<a href="c-probs.html#s-probs-1sampleci" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="5.6.1" data-path="c-probs.html"><a href="c-probs.html#s-probs-1sampleci-intro"><i class="fa fa-check"></i><b>5.6.1</b> Introduction<a href="c-probs.html#s-probs-1sampleci-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.6.2" data-path="c-probs.html"><a href="c-probs.html#s-probs-1sampleci-calc"><i class="fa fa-check"></i><b>5.6.2</b> Calculation of the interval<a href="c-probs.html#s-probs-1sampleci-calc" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.6.3" data-path="c-probs.html"><a href="c-probs.html#s-probs-1sampleci-int"><i class="fa fa-check"></i><b>5.6.3</b> Interpretation of confidence intervals<a href="c-probs.html#s-probs-1sampleci-int" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.6.4" data-path="c-probs.html"><a href="c-probs.html#ss-means-ci-vstests"><i class="fa fa-check"></i><b>5.6.4</b> Confidence intervals vs. significance tests<a href="c-probs.html#ss-means-ci-vstests" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="5.7" data-path="c-probs.html"><a href="c-probs.html#s-probs-2samples"><i class="fa fa-check"></i><b>5.7</b> Inference for comparing two proportions<a href="c-probs.html#s-probs-2samples" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="6" data-path="c-contd.html"><a href="c-contd.html"><i class="fa fa-check"></i><b>6</b> Continuous variables: Population and sampling distributions<a href="c-contd.html#c-contd" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="6.1" data-path="c-contd.html"><a href="c-contd.html#s-contd-intro"><i class="fa fa-check"></i><b>6.1</b> Introduction<a href="c-contd.html#s-contd-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="6.2" data-path="c-contd.html"><a href="c-contd.html#s-contd-popdistrs"><i class="fa fa-check"></i><b>6.2</b> Population distributions of continuous variables<a href="c-contd.html#s-contd-popdistrs" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="6.2.1" data-path="c-contd.html"><a href="c-contd.html#ss-contd-popdistrs-params"><i class="fa fa-check"></i><b>6.2.1</b> Population parameters and their point estimates<a href="c-contd.html#ss-contd-popdistrs-params" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="6.3" data-path="c-contd.html"><a href="c-contd.html#s-contd-probdistrs"><i class="fa fa-check"></i><b>6.3</b> Probability distributions of continuous variables<a href="c-contd.html#s-contd-probdistrs" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="6.3.1" data-path="c-contd.html"><a href="c-contd.html#ss-contd-probdistrs-general"><i class="fa fa-check"></i><b>6.3.1</b> General comments<a href="c-contd.html#ss-contd-probdistrs-general" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="6.3.2" data-path="c-contd.html"><a href="c-contd.html#ss-contd-probdistrs-normal"><i class="fa fa-check"></i><b>6.3.2</b> The normal distribution as a population distribution<a href="c-contd.html#ss-contd-probdistrs-normal" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="6.4" data-path="c-contd.html"><a href="c-contd.html#s-contd-clt"><i class="fa fa-check"></i><b>6.4</b> The normal distribution as a sampling distribution<a href="c-contd.html#s-contd-clt" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="7" data-path="c-means.html"><a href="c-means.html"><i class="fa fa-check"></i><b>7</b> Analysis of population means<a href="c-means.html#c-means" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="7.1" data-path="c-means.html"><a href="c-means.html#s-means-intro"><i class="fa fa-check"></i><b>7.1</b> Introduction and examples<a href="c-means.html#s-means-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.2" data-path="c-means.html"><a href="c-means.html#s-means-descr"><i class="fa fa-check"></i><b>7.2</b> Descriptive statistics for comparisons of groups<a href="c-means.html#s-means-descr" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="7.2.1" data-path="c-means.html"><a href="c-means.html#ss-means-descr-graphs"><i class="fa fa-check"></i><b>7.2.1</b> Graphical methods of comparing sample distributions<a href="c-means.html#ss-means-descr-graphs" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.2.2" data-path="c-means.html"><a href="c-means.html#ss-means-descr-tables"><i class="fa fa-check"></i><b>7.2.2</b> Comparing summary statistics<a href="c-means.html#ss-means-descr-tables" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="7.3" data-path="c-means.html"><a href="c-means.html#s-means-inference"><i class="fa fa-check"></i><b>7.3</b> Inference for two means from independent samples<a href="c-means.html#s-means-inference" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="7.3.1" data-path="c-means.html"><a href="c-means.html#ss-means-inference-intro"><i class="fa fa-check"></i><b>7.3.1</b> Aims of the analysis<a href="c-means.html#ss-means-inference-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.3.2" data-path="c-means.html"><a href="c-means.html#ss-means-inference-test"><i class="fa fa-check"></i><b>7.3.2</b> Significance testing: The two-sample t-test<a href="c-means.html#ss-means-inference-test" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.3.3" data-path="c-means.html"><a href="c-means.html#ss-means-inference-ci"><i class="fa fa-check"></i><b>7.3.3</b> Confidence intervals for a difference of two means<a href="c-means.html#ss-means-inference-ci" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.3.4" data-path="c-means.html"><a href="c-means.html#ss-means-inference-variants"><i class="fa fa-check"></i><b>7.3.4</b> Variants of the test and confidence interval<a href="c-means.html#ss-means-inference-variants" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="7.4" data-path="c-means.html"><a href="c-means.html#s-means-1sample"><i class="fa fa-check"></i><b>7.4</b> Tests and confidence intervals for a single mean<a href="c-means.html#s-means-1sample" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.5" data-path="c-means.html"><a href="c-means.html#s-means-dependent"><i class="fa fa-check"></i><b>7.5</b> Inference for dependent samples<a href="c-means.html#s-means-dependent" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.6" data-path="c-means.html"><a href="c-means.html#s-means-tests3"><i class="fa fa-check"></i><b>7.6</b> Further comments on significance tests<a href="c-means.html#s-means-tests3" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="7.6.1" data-path="c-means.html"><a href="c-means.html#ss-means-tests3-errors"><i class="fa fa-check"></i><b>7.6.1</b> Different types of error<a href="c-means.html#ss-means-tests3-errors" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.6.2" data-path="c-means.html"><a href="c-means.html#ss-means-tests3-power"><i class="fa fa-check"></i><b>7.6.2</b> Power of significance tests<a href="c-means.html#ss-means-tests3-power" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.6.3" data-path="c-means.html"><a href="c-means.html#ss-means-tests3-importance"><i class="fa fa-check"></i><b>7.6.3</b> Significance vs. importance<a href="c-means.html#ss-means-tests3-importance" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="8" data-path="c-regression.html"><a href="c-regression.html"><i class="fa fa-check"></i><b>8</b> Linear regression models<a href="c-regression.html#c-regression" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.1" data-path="c-regression.html"><a href="c-regression.html#s-regression-intro"><i class="fa fa-check"></i><b>8.1</b> Introduction<a href="c-regression.html#s-regression-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.2" data-path="c-regression.html"><a href="c-regression.html#s-regression-descr"><i class="fa fa-check"></i><b>8.2</b> Describing association between two continuous variables<a href="c-regression.html#s-regression-descr" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.2.1" data-path="c-regression.html"><a href="c-regression.html#ss-regression-descr-intro"><i class="fa fa-check"></i><b>8.2.1</b> Introduction<a href="c-regression.html#ss-regression-descr-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.2.2" data-path="c-regression.html"><a href="c-regression.html#ss-regression-descr-plots"><i class="fa fa-check"></i><b>8.2.2</b> Graphical methods<a href="c-regression.html#ss-regression-descr-plots" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.2.3" data-path="c-regression.html"><a href="c-regression.html#ss-regression-descr-assoc"><i class="fa fa-check"></i><b>8.2.3</b> Linear associations<a href="c-regression.html#ss-regression-descr-assoc" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.2.4" data-path="c-regression.html"><a href="c-regression.html#ss-regression-descr-corr"><i class="fa fa-check"></i><b>8.2.4</b> Measures of association: covariance and correlation<a href="c-regression.html#ss-regression-descr-corr" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="8.3" data-path="c-regression.html"><a href="c-regression.html#s-regression-simple"><i class="fa fa-check"></i><b>8.3</b> Simple linear regression models<a href="c-regression.html#s-regression-simple" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.3.1" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-intro"><i class="fa fa-check"></i><b>8.3.1</b> Introduction<a href="c-regression.html#ss-regression-simple-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.3.2" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-def"><i class="fa fa-check"></i><b>8.3.2</b> Definition of the model<a href="c-regression.html#ss-regression-simple-def" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.3.3" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-int"><i class="fa fa-check"></i><b>8.3.3</b> Interpretation of the model parameters<a href="c-regression.html#ss-regression-simple-int" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.3.4" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-est"><i class="fa fa-check"></i><b>8.3.4</b> Estimation of the parameters<a href="c-regression.html#ss-regression-simple-est" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.3.5" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-inf"><i class="fa fa-check"></i><b>8.3.5</b> Statistical inference for the regression coefficients<a href="c-regression.html#ss-regression-simple-inf" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="8.4" data-path="c-regression.html"><a href="c-regression.html#s-regression-causality"><i class="fa fa-check"></i><b>8.4</b> Interlude: Association and causality<a href="c-regression.html#s-regression-causality" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.5" data-path="c-regression.html"><a href="c-regression.html#s-regression-multiple"><i class="fa fa-check"></i><b>8.5</b> Multiple linear regression models<a href="c-regression.html#s-regression-multiple" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.5.1" data-path="c-regression.html"><a href="c-regression.html#ss-regression-multiple-intro"><i class="fa fa-check"></i><b>8.5.1</b> Introduction<a href="c-regression.html#ss-regression-multiple-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.5.2" data-path="c-regression.html"><a href="c-regression.html#ss-regression-multiple-def"><i class="fa fa-check"></i><b>8.5.2</b> Definition of the model<a href="c-regression.html#ss-regression-multiple-def" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.5.3" data-path="c-regression.html"><a href="c-regression.html#ss-regression-multiple-unchanged"><i class="fa fa-check"></i><b>8.5.3</b> Unchanged elements from simple linear models<a href="c-regression.html#ss-regression-multiple-unchanged" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.5.4" data-path="c-regression.html"><a href="c-regression.html#ss-regression-multiple-beta"><i class="fa fa-check"></i><b>8.5.4</b> Interpretation and inference for the regression coefficients<a href="c-regression.html#ss-regression-multiple-beta" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="8.6" data-path="c-regression.html"><a href="c-regression.html#s-regression-dummies"><i class="fa fa-check"></i><b>8.6</b> Including categorical explanatory variables<a href="c-regression.html#s-regression-dummies" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.6.1" data-path="c-regression.html"><a href="c-regression.html#ss-regression-dummies-def"><i class="fa fa-check"></i><b>8.6.1</b> Dummy variables<a href="c-regression.html#ss-regression-dummies-def" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.6.2" data-path="c-regression.html"><a href="c-regression.html#ss-regression-dummies-example"><i class="fa fa-check"></i><b>8.6.2</b> A second example<a href="c-regression.html#ss-regression-dummies-example" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="8.7" data-path="c-regression.html"><a href="c-regression.html#s-regression-rest"><i class="fa fa-check"></i><b>8.7</b> Other issues in linear regression modelling<a href="c-regression.html#s-regression-rest" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="9" data-path="c-3waytables.html"><a href="c-3waytables.html"><i class="fa fa-check"></i><b>9</b> Analysis of 3-way contingency tables<a href="c-3waytables.html#c-3waytables" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="10" data-path="c-more.html"><a href="c-more.html"><i class="fa fa-check"></i><b>10</b> More statistics…<a href="c-more.html#c-more" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="" data-path="c-more.html"><a href="c-more.html#statistical-tables"><i class="fa fa-check"></i>Statistical tables<a href="c-more.html#statistical-tables" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="" data-path="c-more.html"><a href="c-more.html#table-of-standard-normal-tail-probabilities"><i class="fa fa-check"></i>Table of standard normal tail probabilities<a href="c-more.html#table-of-standard-normal-tail-probabilities" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="" data-path="c-more.html"><a href="c-more.html#table-of-critical-values-for-t-distributions"><i class="fa fa-check"></i>Table of critical values for t-distributions<a href="c-more.html#table-of-critical-values-for-t-distributions" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="" data-path="c-more.html"><a href="c-more.html#table-of-critical-values-for-chi-square-distributions"><i class="fa fa-check"></i>Table of critical values for chi-square distributions<a href="c-more.html#table-of-critical-values-for-chi-square-distributions" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+</ul></li>
+<li class="divider"></li>
+<li><a href="https://github.com/rstudio/bookdown">Published with bookdown</a></li>
+
+</ul>
+
+      </nav>
+    </div>
+
+    <div class="book-body">
+      <div class="body-inner">
+        <div class="book-header" role="navigation">
+          <h1>
+            <i class="fa fa-circle-o-notch fa-spin"></i><a href="./">MY464 Introduction to Quantitative Analysis for Media and Communications</a>
+          </h1>
+        </div>
+
+        <div class="page-wrapper" tabindex="-1" role="main">
+          <div class="page-inner">
+
+            <section class="normal" id="section-">
+<div id="c-contd" class="section level1 hasAnchor">
+<h1><span class="header-section-number">Chapter 6</span> Continuous variables: Population and sampling distributions<a href="c-contd.html#c-contd" class="anchor-section" aria-label="Anchor link to header"></a></h1>
+<div id="s-contd-intro" class="section level2 hasAnchor">
+<h2><span class="header-section-number">6.1</span> Introduction<a href="c-contd.html#s-contd-intro" class="anchor-section" aria-label="Anchor link to header"></a></h2>
+<p><em>This</em> chapter serves both as an explanation of some topics that were
+skimmed over previously, and as preparation for later chapters. Its
+central theme is probability distributions of continuous variables.
+These may appear in two distinct roles:</p>
+<ul>
+<li><p>As population distributions of continuous variables, for instance
+blood pressure in the illustrative example of this chapter. This
+contrasts with the kinds of discrete variables that were considered
+in Chapters <a href="c-tables.html#c-tables">4</a> and <a href="c-probs.html#c-probs">5</a>. Methods of inference
+for continuous variables will be introduced in Chapters
+<a href="c-means.html#c-means">7</a> and <a href="c-regression.html#c-regression">8</a>.</p></li>
+<li><p>As sampling distributions of sample statistics. These are typically
+continuous even in analyses of discrete variables, such as in
+Chapter <a href="c-probs.html#c-probs">5</a> where the variable of interest <span class="math inline">\(Y\)</span> was binary
+but the sampling distributions of a sample proportion <span class="math inline">\(\hat{\pi}\)</span>
+and the <span class="math inline">\(z\)</span>-test statistic for population probability <span class="math inline">\(\pi\)</span> were
+nevertheless continuous. We have already encountered two continuous
+distributions in this role, the <span class="math inline">\(\chi^{2}\)</span> distributions in Chapter
+<a href="c-tables.html#c-tables">4</a> and the standard normal distribution in Chapter
+<a href="c-probs.html#c-probs">5</a>. Their origins are explained in more detail below.</p></li>
+</ul>
+<p>To illustrate the concepts, we use data from the
+Health Survey for England 2002 (HES).<a href="#fn24" class="footnote-ref" id="fnref24"><sup>24</sup></a> One part of the survey was a
+short physical examination by a nurse. Figure <a href="c-contd.html#fig:f-bp1">6.1</a> shows a
+histogram and frequency polygon of diastolic blood pressure (in mm Hg)
+for 4489 respondents, measured by the mean of the last two of three
+measurements taken during the examination. Data from respondents for
+whom the measurements were not obtained or were considered invalid have
+been excluded. Respondents aged under 25 have also been excluded for
+simplicity, because this age group was oversampled in the 2002 HES.</p>
+<div class="figure"><span style="display:block;" id="fig:f-bp1"></span>
+<img src="bloodp1.png" alt="Histogram of diastolic blood pressure, with the corresponding frequency polygon, from Health Survey for England 2002 (respondents aged 25 or over, n=4489)." style="width:13.5cm" />
+<p class="caption">Figure 6.1: Histogram of diastolic blood pressure, with the corresponding frequency polygon, from Health Survey for England 2002 (respondents aged 25 or over, <span class="math inline">\(n=4489\)</span>).</p>
+</div>
+<p>The respondents whose blood pressures are summarized in Figure
+<a href="c-contd.html#fig:f-bp1">6.1</a> are in reality a sample from a larger population in the
+sense of Sections <a href="c-samples.html#s-samples-finpops">3.2</a> and <a href="c-samples.html#s-samples-samples">3.3</a>.
+However, for illustrative purposes we will here pretend that they are
+actually an entire finite population of 4489 people (the adults in a
+small town, say). The values summarised in Figure <a href="c-contd.html#fig:f-bp1">6.1</a> then form
+the population distribution of blood pressure in this population. It is
+clear that blood pressure is best treated as a continuous variable.</p>
+</div>
+<div id="s-contd-popdistrs" class="section level2 hasAnchor">
+<h2><span class="header-section-number">6.2</span> Population distributions of continuous variables<a href="c-contd.html#s-contd-popdistrs" class="anchor-section" aria-label="Anchor link to header"></a></h2>
+<div id="ss-contd-popdistrs-params" class="section level3 hasAnchor">
+<h3><span class="header-section-number">6.2.1</span> Population parameters and their point estimates<a href="c-contd.html#ss-contd-popdistrs-params" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>If we knew all of its values, we could summarise a finite population
+distribution by, say, a histogram like Figure <a href="c-contd.html#fig:f-bp1">6.1</a>. We can also
+consider specific characteristics of the distribution, i.e. its
+<em>parameters</em> in the sense introduced in Section
+<a href="c-probs.html#s-probs-distribution">5.3</a>. For the distribution of a continuous
+variable, the most important parameters are the <strong>population mean</strong>
+<span class="math display" id="eq:mu">\[\begin{equation}
+\mu=\frac{\sum Y_{i}}{N}
+\tag{6.1}
+\end{equation}\]</span>
+and the <strong>population variance</strong>
+<span class="math display" id="eq:sigma2">\[\begin{equation}
+\sigma^{2} = \frac{\sum (Y_{i}-\mu)^{2}}{N}
+\tag{6.2}
+\end{equation}\]</span>
+or, instead of the variance, the <strong>population standard
+deviation</strong>
+<span class="math display" id="eq:sigma">\[\begin{equation}
+\sigma = \sqrt{\frac{\sum (Y_{i}-\mu)^{2}}{N}}.
+\tag{6.3}
+\end{equation}\]</span>
+Here <span class="math inline">\(\mu\)</span> and <span class="math inline">\(\sigma\)</span> are the lower-case Greek letters
+“mu” and “sigma” respectively, and <span class="math inline">\(\sigma^{2}\)</span> is read “sigma squared”.
+It is common to use Greek letters for population parameters, as we did
+also for the probability parameter <span class="math inline">\(\pi\)</span> in Chapter <a href="c-probs.html#c-probs">5</a>.</p>
+<p>In (<a href="c-contd.html#eq:mu">(6.1)</a>)–(<a href="c-contd.html#eq:sigma">(6.3)</a>), <span class="math inline">\(N\)</span> is the number of units in a finite
+population and the sums indicated by <span class="math inline">\(\Sigma\)</span> are over all of these <span class="math inline">\(N\)</span>
+units. If we treat the data in Figure <a href="c-contd.html#fig:f-bp1">6.1</a> as a population,
+<span class="math inline">\(N=4489\)</span> and these population parameters are <span class="math inline">\(\mu=74.2\)</span>,
+<span class="math inline">\(\sigma^{2}=127.87\)</span> and <span class="math inline">\(\sigma=11.3\)</span>.</p>
+<p>Because the formulas (<a href="c-contd.html#eq:mu">(6.1)</a>)–(<a href="c-contd.html#eq:sigma">(6.3)</a>) involve the population
+size <span class="math inline">\(N\)</span>, they apply in this exact form only to finite populations like
+the one in this example (and as discussed more generally in Section
+<a href="c-samples.html#s-samples-finpops">3.2</a>) but not to infinite ones of the kind discussed
+in Section <a href="c-samples.html#s-samples-infpops">3.4</a>. However, the definitions of <span class="math inline">\(\mu\)</span>,
+<span class="math inline">\(\sigma^{2}\)</span>, <span class="math inline">\(\sigma\)</span> and other parameters can be extended to apply
+also to infinite populations. These definitions, which will be omitted
+here, involve the concept of continuous probability distributions that
+is discussed in the next section. The interpretations of the population
+parameters turn out to be intuitively similar for both the finite and
+infinite-population cases, and the same methods of analysis apply to
+both, so we can here ignore the distinction without further comment.</p>
+<p>The population formulas (<a href="c-contd.html#eq:mu">(6.1)</a>)–(<a href="c-contd.html#eq:sigma">(6.3)</a>) clearly resemble those
+of some sample statistics introduced in Chapter <a href="c-descr1.html#c-descr1">2</a>,
+specifically the <em>sample</em> mean, variance and standard deviation
+<span class="math display" id="eq:Ybar-ch6">\[\begin{equation}
+\bar{Y}=\frac{\sum Y_{i}}{n}, 
+\tag{6.4}
+\end{equation}\]</span>
+<span class="math display" id="eq:s2-ch6">\[\begin{equation}
+s^{2} = \frac{\sum (Y_{i}-\bar{Y})^{2}}{n-1}
+\tag{6.5}
+\end{equation}\]</span>
+and
+<span class="math display" id="eq:s-ch6">\[\begin{equation}
+s = \sqrt{\frac{\sum (Y_{i}-\bar{Y})^{2}}{n-1}}
+\tag{6.6}
+\end{equation}\]</span></p>
+<p>where the sums are now over the <span class="math inline">\(n\)</span>
+observations in a sample. These can be used as descriptions of the
+sample distribution as discussed in Chapter <a href="c-descr1.html#c-descr1">2</a>, but also as
+<em>point estimates</em> of the corresponding population parameters in the
+sense defined in Section <a href="c-probs.html#s-probs-pointest">5.4</a>. We may thus use the
+sample mean <span class="math inline">\(\bar{Y}\)</span> as a point estimate of the population mean <span class="math inline">\(\mu\)</span>,
+and the sample variance <span class="math inline">\(s^{2}\)</span> and sample standard deviation <span class="math inline">\(s\)</span> as
+point estimates of population variance <span class="math inline">\(\sigma^{2}\)</span> and standard
+deviation <span class="math inline">\(\sigma\)</span> respectively. These same estimates can be used for
+both finite and infinite population distributions.</p>
+<p>For further illustration of the connection between population and sample
+quantities, we have also drawn a simple random sample of <span class="math inline">\(n=50\)</span>
+observations from the finite population of <span class="math inline">\(N=4489\)</span> observations in
+Figure <a href="c-contd.html#fig:f-bp1">6.1</a>. Table <a href="c-contd.html#tab:t-bp-example">6.1</a> shows the summary
+statistics (<a href="c-contd.html#eq:Ybar-ch6">(6.4)</a>–(<a href="c-contd.html#eq:s-ch6">(6.6)</a>) in this sample and the
+corresponding parameters (<a href="c-contd.html#eq:mu">(6.1)</a>)–(<a href="c-contd.html#eq:sigma">(6.3)</a>) in the population.</p>
+<table style="width:98%;">
+<caption><span id="tab:t-bp-example">Table 6.1: </span>Summary statistics for diastolic blood pressure in the population
+and a sample from it in the example used for illustration in Sections
+<a href="c-contd.html#s-contd-popdistrs">6.2</a>–<a href="c-contd.html#s-contd-clt">6.4</a>.</caption>
+<colgroup>
+<col width="16%" />
+<col width="13%" />
+<col width="20%" />
+<col width="19%" />
+<col width="27%" />
+</colgroup>
+<thead>
+<tr class="header">
+<th></th>
+<th align="right"><br />
+Size</th>
+<th align="center"><br />
+Mean</th>
+<th align="right">Standard
+Deviation</th>
+<th align="right"><br />
+Variance</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td>Population</td>
+<td align="right"><span class="math inline">\(N=4489\)</span></td>
+<td align="center"><span class="math inline">\(\mu=74.2\)</span></td>
+<td align="right"><span class="math inline">\(\sigma=11.3\)</span></td>
+<td align="right"><span class="math inline">\(\sigma^{2}=127.87\)</span></td>
+</tr>
+<tr class="even">
+<td>Sample</td>
+<td align="right"><span class="math inline">\(n=50\)</span></td>
+<td align="center"><span class="math inline">\(\bar{Y}=72.6\)</span></td>
+<td align="right"><span class="math inline">\(s=12.7\)</span></td>
+<td align="right"><span class="math inline">\(s^{2}=161.19\)</span></td>
+</tr>
+</tbody>
+</table>
+<p>You may have noticed that the formulas of the sample variance
+(<a href="c-contd.html#eq:s2-ch6">(6.5)</a>) and sample standard deviation (<a href="c-contd.html#eq:s-ch6">(6.6)</a>) involve the
+divisor <span class="math inline">\(n-1\)</span> rather than the <span class="math inline">\(n\)</span> which might seem more natural, while
+the population formulas (<a href="c-contd.html#eq:sigma2">(6.2)</a>) and (<a href="c-contd.html#eq:sigma">(6.3)</a>) do use <span class="math inline">\(N\)</span>
+rather than <span class="math inline">\(N-1\)</span>. The reason for this is that using <span class="math inline">\(n-1\)</span> gives the
+estimators certain mathematically desirable properties (<span class="math inline">\(s^{2}\)</span> is an
+<em>unbiased</em> estimate of <span class="math inline">\(\sigma^{2}\)</span>, but <span class="math inline">\(\hat{\sigma}^{2}\)</span> below is
+not). This detail need not concern us here. In fact, the statistics
+which use <span class="math inline">\(n\)</span> instead,
+i.e.
+<span class="math display" id="eq:s2b">\[\begin{equation}
+\hat{\sigma}^{2}=\frac{\sum (Y_{i}-\bar{Y})^{2}}{n}
+\tag{6.7}
+\end{equation}\]</span>
+for <span class="math inline">\(\sigma^{2}\)</span> and
+<span class="math inline">\(\hat{\sigma}=\sqrt{\hat{\sigma}^{2}}\)</span> for <span class="math inline">\(\sigma\)</span>, are also sensible
+estimates and very similar to <span class="math inline">\(s^{2}\)</span> and <span class="math inline">\(s\)</span> unless <span class="math inline">\(n\)</span> is very small.
+In general, there are often several possible sample statistics which
+could be used as estimates for the same population parameter.</p>
+</div>
+</div>
+<div id="s-contd-probdistrs" class="section level2 hasAnchor">
+<h2><span class="header-section-number">6.3</span> Probability distributions of continuous variables<a href="c-contd.html#s-contd-probdistrs" class="anchor-section" aria-label="Anchor link to header"></a></h2>
+<div id="ss-contd-probdistrs-general" class="section level3 hasAnchor">
+<h3><span class="header-section-number">6.3.1</span> General comments<a href="c-contd.html#ss-contd-probdistrs-general" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>Thinking about population distributions of continuous distributions
+using, say, histograms as in Figure <a href="c-contd.html#fig:f-bp1">6.1</a> would present
+difficulties for statistical inference, for at least two reasons. First,
+samples cannot in practice give us enough information to make reliable
+inferences on all the details of a population distribution, such as the
+small kinks and bumps of Figure <a href="c-contd.html#fig:f-bp1">6.1</a>. Such details would
+typically not even be particularly interesting compared to major
+features like the central tendency and variation of the population
+distribution. Second, this way of thinking about the population
+distribution is not appropriate when the population is regarded as
+infinite.</p>
+<p>Addressing both of these problems requires one more conceptual leap.
+This is to make the assumption that the population distribution is
+well-represented by a continuous <em>probability distribution</em>, and focus
+on inference on the parameters of that distribution.</p>
+<p>We have already introduced the concept of probability distributions in
+Section <a href="c-samples.html#s-samples-popdistrs">3.5</a>, and considered instances of it in
+Chapters <a href="c-tables.html#c-tables">4</a> and <a href="c-probs.html#c-probs">5</a>. There, however, the term was
+not emphasised because it added no crucial insight into the methods of
+inference. This was because for discrete variables a probability
+distribution is specified simply by listing the probabilities of all the
+categories of the variable. The additional terminology of probability
+distributions and their parameters seems almost redundant in that
+context.</p>
+<p>The situation is very different for continuous variables. This is
+illustrated by Figure <a href="c-contd.html#fig:f-bp2">6.2</a>, which shows the same frequency
+polygon as in Figure <a href="c-contd.html#fig:f-bp1">6.1</a>, now supplemented by a smooth curve.
+This curve (“a probability density function”) describes a particular
+probability distribution. It can be thought of as a smoothed version of
+the shape of the frequency polygon. What we will do in the future is to
+use some such probability distribution to represent the population
+distribution. This means effectively arguing that we believe that the
+shape of the true population distribution is sufficiently regular to be
+well described by a smooth curve such as the one in Figure <a href="c-contd.html#fig:f-bp2">6.2</a>.</p>
+<p>In Figure <a href="c-contd.html#fig:f-bp2">6.2</a> the curve and the frequency polygon have
+reasonably similar shapes, so the assumption that the former is a good
+representation of the latter does not seem far-fetched. However, the two
+are clearly not exactly the same, nor do we expect that
+even the blood pressures of all English adults exactly match this curve
+or any other simple probability distribution. All we require is that a
+population distribution is close enough to a specified probability
+distribution for the results from analyses based on this assumption to
+be meaningful and not misleading.</p>
+<div class="figure"><span style="display:block;" id="fig:f-bp2"></span>
+<img src="bloodp2.png" alt="The frequency polygon of Figure 6.1, together with a normal curve with the same mean and variance." style="width:11.5cm" />
+<p class="caption">Figure 6.2: The frequency polygon of Figure <a href="c-contd.html#fig:f-bp1">6.1</a>, together with a normal
+curve with the same mean and variance.</p>
+</div>
+<p> Such a simplifying assumption about the population
+distribution is known as a <strong>statistical model</strong> for the population. The
+reason for working with a model is that it leads to much simpler methods
+of analysis than would otherwise be required. For example, the shape of
+the distribution shown in Figure <a href="c-contd.html#fig:f-bp2">6.2</a> is entirely determined by
+just two parameters, its mean and variance. Under this model, all
+questions about the population distribution can thus be reduced to
+questions about these two population parameters, and inference can focus
+on tests and confidence intervals for them.</p>
+<p>The potential cost of choosing a specific probability distribution as
+the statistical model for a particular application is that the
+assumption may be inappropriate for the data at hand, and if it is,
+conclusions about population parameters derived from analyses based on
+this assumption may be misleading. The distribution should thus be
+chosen carefully, usually based on both substantive considerations and
+initial descriptive examination of the observed data.</p>
+<p>For example, the particular probability distribution shown in Figure
+<a href="c-contd.html#fig:f-bp2">6.2</a>, which is known as the normal distribution, is by definition
+symmetric around its mean. While it is an adequate approximation of many
+approximately symmetric population distributions of continuous
+variables, such as that of blood pressure, many other population
+distributions are not even roughly symmetric. It would be unrealistic to
+assume the population distributions of such variables to be normal.
+Instead, we might consider other continuous probability distributions
+which can be skewed. Examples of these are the <em>Exponential</em>, <em>Gamma</em>,
+<em>Weibull</em>, and <em>Beta</em> distributions. <em>Discrete</em> distributions, of
+course, will require quite different probability disributions, such as
+the <em>Binomial</em> distribution discussed in Chapter <a href="c-probs.html#c-probs">5</a>, or the
+<em>Multinomial</em> and <em>Poisson</em> distributions. On this course, however, we
+will not include further discussion of these various possibilities.</p>
+</div>
+<div id="ss-contd-probdistrs-normal" class="section level3 hasAnchor">
+<h3><span class="header-section-number">6.3.2</span> The normal distribution as a population distribution<a href="c-contd.html#ss-contd-probdistrs-normal" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>The particular probability distribution that is included in Figure
+<a href="c-contd.html#fig:f-bp2">6.2</a> is a <strong>normal distribution</strong>, also known as the <em>Gaussian</em>
+distribution, after the great German mathematician Karl Friedrich Gauss
+who was one of the first to derive it in 1809. Figure <a href="c-contd.html#fig:f-10dm">6.3</a> shows
+a portrait of Gauss from the former German 10-DM banknote, together with
+pictures of the university town of Göttingen and of the normal curve
+(even the mathematical formula of the curve is engraved on the note).
+The curve of the normal distribution is also known as the “bell curve”
+because of its shape.</p>
+<div class="figure"><span style="display:block;" id="fig:f-10dm"></span>
+<img src="mark.png" alt="A portrait of Gauss and the normal curve on a former German 10-DM banknote." style="width:14cm" />
+<p class="caption">Figure 6.3: A portrait of Gauss and the normal curve on a former German 10-DM banknote.</p>
+</div>
+<p>The normal distribution is by far the most important probability
+distribution in statistics. The main reason for this is its use as a
+sampling distribution in a wide range of contexts, for reasons that are
+explained in Section <a href="c-contd.html#s-contd-clt">6.4</a>. However, the normal distribution
+is also useful for describing many approximately symmetric population
+distributions, and it is in this context that we introduce its
+properties first.</p>
+<p>A normal distribution is completely specified by two numbers, its mean
+(or “expected value”) <span class="math inline">\(\mu\)</span> and variance <span class="math inline">\(\sigma^{2}\)</span>. This is sometimes
+expressed in notation as <span class="math inline">\(Y\sim N(\mu, \sigma^{2})\)</span>, which is read as
+“<span class="math inline">\(Y\)</span> is normally distributed with mean <span class="math inline">\(\mu\)</span> and variance <span class="math inline">\(\sigma^{2}\)</span>”.
+Different values for <span class="math inline">\(\mu\)</span> and <span class="math inline">\(\sigma^{2}\)</span> give different
+distributions. For example, the curve in Figure <a href="c-contd.html#fig:f-bp2">6.2</a> is that of
+the <span class="math inline">\(N(74.2, \, 127.87)\)</span> distribution, where the mean <span class="math inline">\(\mu=74.2\)</span> and
+variance <span class="math inline">\(\sigma^{2}=127.87\)</span> are the same as the mean and variance
+calculated from formulas (<a href="c-contd.html#eq:mu">(6.1)</a>) and (<a href="c-contd.html#eq:sigma2">(6.2)</a>) for the 4489
+observations of blood pressure. This ensures that this particular normal
+curve best matches the frequency polygon in Figure <a href="c-contd.html#fig:f-bp2">6.2</a>.</p>
+<p>The mean <span class="math inline">\(\mu\)</span> describes the central tendency of the distribution, and
+the variance <span class="math inline">\(\sigma^{2}\)</span> its variability. This is illustrated by Figure
+<a href="c-contd.html#fig:f-3norms">6.4</a>, which shows the curves for three different normal
+distributions. The mean of a normal distribution is also equal to both
+its median and its mode. Thus <span class="math inline">\(\mu\)</span> is the central value in the sense
+that it divides the distribution into two equal halves, and it also
+indicates the peak of the curve (the highest probability, as discussed
+below). In Figure <a href="c-contd.html#fig:f-3norms">6.4</a>, the curves for <span class="math inline">\(N(0, 1)\)</span> and <span class="math inline">\(N(0, 9)\)</span>
+are both centered around <span class="math inline">\(\mu=0\)</span>; the mean of the <span class="math inline">\(N(5, 1)\)</span> distribution
+is <span class="math inline">\(\mu=5\)</span>, so the whole curve is shifted to the right and centered
+around 5.</p>
+<div class="figure"><span style="display:block;" id="fig:f-3norms"></span>
+<img src="threenorms.png" alt="Three normal distributions with different means and/or variances." style="width:11cm" />
+<p class="caption">Figure 6.4: Three normal distributions with different means and/or variances.</p>
+</div>
+<p>The variance <span class="math inline">\(\sigma^{2}\)</span> determines how widely spread the curve is. In
+Figure <a href="c-contd.html#fig:f-3norms">6.4</a>, the curves for <span class="math inline">\(N(0, 1)\)</span> and <span class="math inline">\(N(5, 1)\)</span> have the
+same variance <span class="math inline">\(\sigma^{2}=1\)</span>, so they have the same shape in terms of
+their spread. The curve for <span class="math inline">\(N(0, 9)\)</span>, on the other hand, is more spread
+out, because it has a higher variance of <span class="math inline">\(\sigma^{2}=9\)</span>. As before, it
+is often more convenient to describe variability in terms of the
+standard deviation <span class="math inline">\(\sigma\)</span>, which is the square root of the variance.
+Thus we may also say that the <span class="math inline">\(N(0, 9)\)</span> distribution has the standard
+deviation <span class="math inline">\(\sigma=\sqrt{9}=3\)</span> (for <span class="math inline">\(\sigma^{2}=1\)</span> the two numbers are
+the same, since <span class="math inline">\(\sqrt{1}=1\)</span>).</p>
+<p>In the histogram in Figure <a href="c-contd.html#fig:f-bp1">6.1</a>, the heights of the bars
+correspond to the proportions of different ranges of blood pressure
+among the 4489 people in the data set. Another way of stating this is
+that if we were to sample a person from this group at random, the
+heights of the bars indicate the <strong>probabilities</strong> that the selected
+person’s blood pressure would be in a particular range. Some values are
+clearly more likely than others. For example, for blood pressures in the
+range 50–51.5, the probability is about 0.0025, corresponding to a low
+bar, while for the range 74–75.5 it is about 0.0365, corresponding to a
+much higher bar.</p>
+<p>The interpretation is the same for the curve of a continuous probability
+distribution. Its height also indicates the probability of different
+values in random sampling from a population with that distribution. More
+precisely, the <em>areas</em> under the curve give such probabilities for
+ranges of values. Probabilities of all the possible values must add up
+to one, so the area under the whole curve is one — i.e. a randomly
+sampled unit must have <em>some</em> value of the variable in question. More
+generally, the area under the curve for a range of values gives the
+probability that the value of a randomly sampled observation is in that
+range. These are the same principles that we have already used to derive
+<span class="math inline">\(P\)</span>-values for tests in Sections <a href="c-tables.html#ss-tables-chi2test-Pval">4.3.5</a> and
+<a href="c-probs.html#ss-probs-test1sample-samplingd">5.5.3</a>.</p>
+<div class="figure"><span style="display:block;" id="fig:f-norm1"></span>
+<img src="norm1.png" alt="Illustration of probabilities for the normal distribution. The probability of an observation being within one standard deviation of the mean (the grey area) is 0.68, and the probability of it being within 1.96 standard deviations of the mean (grey and shaded areas together) is 0.95." style="width:12cm" />
+<p class="caption">Figure 6.5: Illustration of probabilities for the normal distribution. The
+probability of an observation being within one standard deviation of the
+mean (the grey area) is 0.68, and the probability of it being within
+1.96 standard deviations of the mean (grey and shaded areas together) is
+0.95.</p>
+</div>
+<p>Figure <a href="c-contd.html#fig:f-norm1">6.5</a> illustrates this further with some results which
+hold for any normal distribution, whatever its mean and variance. The
+grey area in the figure corresponds to values from <span class="math inline">\(\mu-\sigma\)</span> to
+<span class="math inline">\(\mu+\sigma\)</span>, i.e. those values which are no further than one standard
+deviation from the mean. The area of the grey region is 0.68, so the
+probability that a randomly sampled value from a normal distribution is
+within one standard deviation of the mean is 0.68. The two shaded
+regions either side of the grey area extend the area to 1.96 standard
+deviations below and above the mean. The probability of this region (the
+grey and shaded areas together) is 0.95. Rounding the 1.96 to 2, we can
+thus say that approximately 95% of observations drawn from a normal
+distribution tend to be within two standard deviations of the mean. This
+leaves the remaining 5% in the two tails of the distribution, further
+than 1.96 standard deviations from the mean (the two white areas in
+Figure <a href="c-contd.html#fig:f-norm1">6.5</a>). Because the normal distribution is symmetric,
+these two areas are of equal size and each thus has the probability
+0.025 (i.e. 0.05/2).</p>
+<p>Such calculations can also be used to determine probabilities in
+particular examples. Returning to the blood pressure data, we might for
+example be interested in</p>
+<ul>
+<li><p>the proportion of people in some population whose diastolic blood
+pressure is higher than 90 (one possible cut-off point for high
+blood pressure or hypertension)</p></li>
+<li><p>the proportion of people with diastolic blood pressure below 60
+(possibly indicating unusually low blood pressure or hypotension)</p></li>
+<li><p>the proportion of people in the normal pressure range of 60–90</p></li>
+</ul>
+<div class="figure"><span style="display:block;" id="fig:f-normbp"></span>
+<img src="normbp.png" alt="Illustration of probabilities for a normal distribution in the blood pressure example, where \mu=74.2 and \sigma=11.3. The plot shows probabilities for the ranges of values at most 60 (“Low”), between 60 and 90 (“Mid”) and over 90 (“High”)." style="width:12cm" />
+<p class="caption">Figure 6.6: Illustration of probabilities for a normal distribution in the blood pressure example, where <span class="math inline">\(\mu=74.2\)</span> and <span class="math inline">\(\sigma=11.3\)</span>. The plot shows probabilities for the ranges of values at most 60 (“Low”), between 60 and 90 (“Mid”) and over 90 (“High”).</p>
+</div>
+<p>Such figures might be of interest for example for predicting health
+service needs for treating hypertension. Suppose that we were reasonably
+confident (perhaps from surveys like the one described above) that the
+distribution of diastolic blood pressure in the population of interest
+was approximately normally distributed with mean 74.2 and variance
+127.87 (and thus standard deviation 11.3). The probabilities of interest
+are then the areas of the regions shown in Figure <a href="c-contd.html#fig:f-normbp">6.6</a>.</p>
+<p>The remaining question is how to calculate such probabilities. The short
+answer is “with a computer”. However, to explain an approach which is
+required for this in some computer packages and also to provide an
+alternative method which does not require a computer, we need to
+introduce one more new quantity. This is the <strong>Z score</strong>, which is
+defined as
+<span class="math display" id="eq:Zscore">\[\begin{equation}
+Z = \frac{Y-\mu}{\sigma}
+\tag{6.8}
+\end{equation}\]</span>
+where <span class="math inline">\(Y\)</span> can be any value of the variable of interest.
+For example, in the blood pressure example the <span class="math inline">\(Z\)</span> scores corresponding
+to values 60 and 90 are <span class="math inline">\(Z=(60-74.2)/11.3=-1.26\)</span> and
+<span class="math inline">\(Z=(90-74.2)/11.3=1.40\)</span> respectively. The <span class="math inline">\(Z\)</span> score can be interpreted
+as the distance of the value <span class="math inline">\(Y\)</span> from the mean <span class="math inline">\(\mu\)</span>, measured in
+standard deviations <span class="math inline">\(\sigma\)</span>. Thus the blood pressure 60, with a <span class="math inline">\(Z\)</span>
+score of <span class="math inline">\(-1.26\)</span>, is 1.26 standard deviations <em>below</em> (hence the
+negative sign) the mean, while 90 (with <span class="math inline">\(Z\)</span> score 1.40) is 1.40 standard
+deviations <em>above</em> the mean.</p>
+<p>The probability distribution of the <span class="math inline">\(Z\)</span> scores is a normal distribution
+with mean 0 and variance 1, i.e. <span class="math inline">\(Z\sim N(0,1)\)</span>. This is known as the
+<strong>standard normal distribution</strong>. The usefulness of <span class="math inline">\(Z\)</span> scores lies in
+the fact that by transforming the original variable <span class="math inline">\(Y\)</span> from the
+<span class="math inline">\(N(\mu, \sigma^{2}\)</span>) distribution into the standard normal distribution
+they remove the specific values of <span class="math inline">\(\mu\)</span> and <span class="math inline">\(\sigma\)</span> from the
+calculation. With this trick, probabilities for any normal distribution
+can be calculated using a single table for <span class="math inline">\(Z\)</span> scores. Such a table is
+given in the Appendix, and an extract
+from it is shown in Table <a href="c-contd.html#tab:t-normtab">6.2</a> (note that it is not always
+presented exactly like this, as different books may use slightly
+different format or notation). The first column lists values of the <span class="math inline">\(Z\)</span>
+score (a full table would typically give all values from 0.00 to about
+3.50). The second column, labelled “Tail Prob.”, gives the probability
+that a <span class="math inline">\(Z\)</span> score for a normal distribution is <em>larger than</em> the value
+given by <span class="math inline">\(z\)</span>, i.e. the area of the region to the right of <span class="math inline">\(z\)</span>.</p>
+<table>
+<caption><span id="tab:t-normtab">Table 6.2: </span>Extract from the table of right-hand tail probabilities for normal <span class="math inline">\(Z\)</span> scores. Here “Tail Prob.” is the probability that a value from the standard normal distribution is at least the value in the column
+labelled “<span class="math inline">\(z\)</span>”. The full table is shown in the Appendix.</caption>
+<thead>
+<tr class="header">
+<th align="right"><span class="math inline">\(z\)</span></th>
+<th align="right">Tail Prob. </th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="right">…</td>
+<td align="right">…</td>
+</tr>
+<tr class="even">
+<td align="right">1.24</td>
+<td align="right">0.1075</td>
+</tr>
+<tr class="odd">
+<td align="right">1.25</td>
+<td align="right">0.1056</td>
+</tr>
+<tr class="even">
+<td align="right">1.26</td>
+<td align="right">0.1038</td>
+</tr>
+<tr class="odd">
+<td align="right">1.27</td>
+<td align="right">0.1020</td>
+</tr>
+<tr class="even">
+<td align="right">…</td>
+<td align="right">…</td>
+</tr>
+<tr class="odd">
+<td align="right">1.38</td>
+<td align="right">0.0838</td>
+</tr>
+<tr class="even">
+<td align="right">1.39</td>
+<td align="right">0.0823</td>
+</tr>
+<tr class="odd">
+<td align="right">1.40</td>
+<td align="right">0.0808</td>
+</tr>
+<tr class="even">
+<td align="right">1.41</td>
+<td align="right">0.0793</td>
+</tr>
+<tr class="odd">
+<td align="right">…</td>
+<td align="right">…</td>
+</tr>
+</tbody>
+</table>
+<p>Consider first the probability that blood pressure is greater than 90,
+i.e. the area labelled “High” in Figure <a href="c-contd.html#fig:f-normbp">6.6</a>. We have seen
+that 90 corresponds to a <span class="math inline">\(Z\)</span> score of 1.40, so the probability of high
+blood pressure is the same as the probability that the normal <span class="math inline">\(Z\)</span> score
+is greater than 1.40. The row for <span class="math inline">\(z=1.40\)</span> in the table tells us that
+this probability is 0.0808, or 0.08 when rounded to two decimal places
+as in Figure <a href="c-contd.html#fig:f-normbp">6.6</a>.</p>
+<p>The second quantity of interest was the probability of a blood pressure
+at most 60, i.e. the area of the “Low” region in Figure <a href="c-contd.html#fig:f-normbp">6.6</a>.
+The corresponding <span class="math inline">\(Z\)</span> score is <span class="math inline">\(-1.26\)</span>. The table, however, shows only
+positive values of <span class="math inline">\(z\)</span>. This is because we can use the symmetry of the
+normal distribution to reduce all such questions to ones about positive
+values of <span class="math inline">\(z\)</span>. Because the distribution is symmetric, the probability
+that a <span class="math inline">\(Z\)</span> score is <em>at most</em> <span class="math inline">\(-1.26\)</span> (the area of the left-hand tail to
+the left of <span class="math inline">\(-1.26\)</span>) is the same as the probability that it is <em>at
+least</em> 1.26 (the area of the right-hand tail to the right of 1.26). This
+is the kind of quantity we calculated above.<a href="#fn25" class="footnote-ref" id="fnref25"><sup>25</sup></a> The required
+probability is thus equal to the right-hand tail probability for 1.26,
+which the table shows to be 0.1038 (rounded to 0.10 in Figure
+<a href="c-contd.html#fig:f-normbp">6.6</a>).</p>
+<p>Finally, the probability of the “Mid” range of blood pressure is the
+remaining probability not in the two other regions. Because the whole
+area under the curve (the total probability) is 1, the required
+probability is obtained by subtraction as <span class="math inline">\(1-(0.0808+0.1038)=0.8154\)</span>. In
+this example these values obtained from the normal approximation of the
+population distribution are very accurate. The exact proportions of the
+4489 respondents who had diastolic blood pressure at most 60 or greater
+than 90 were 0.0996 and 0.0793 respectively, so rounded to two decimal
+places they were the same as the 0.10 and 0.08 obtained from the normal
+approximation.</p>
+<p>These days we can use statistical computer programs to
+calculate such probabilities directly for a normal distribution with any
+mean and standard deviation. For example, SPSS has a function called
+<code>CDF.NORMAL(</code><em>quant,mean,stddev</em><code>)</code> for this purpose. It calculates the
+probability that the value from a normal distribution with mean <em>mean</em>
+and standard deviation <em>stddev</em> is <strong>at most</strong> <em>quant</em>.</p>
+<p>In practice we do not usually know the population mean and variance, so
+their sample estimates will be used in such calculations. For example,
+for the sample in Table <a href="c-contd.html#tab:t-bp-example">6.1</a> we had <span class="math inline">\(\bar{Y}=72.6\)</span> and
+<span class="math inline">\(s=12.7\)</span>. Using these values in a similar calculation as above gives the
+estimated proportion of people in the population with diastolic blood
+pressures over 90 as 8.5%. Even with a sample of only 50 observations,
+the estimate is reasonably close to the true population proportion of
+about 8.1%.</p>
+</div>
+</div>
+<div id="s-contd-clt" class="section level2 hasAnchor">
+<h2><span class="header-section-number">6.4</span> The normal distribution as a sampling distribution<a href="c-contd.html#s-contd-clt" class="anchor-section" aria-label="Anchor link to header"></a></h2>
+<p>We have already encountered the normal distribution in Section
+<a href="c-probs.html#ss-probs-test1sample-samplingd">5.5.3</a>, in the role of the <em>sampling
+distribution</em> of a test statistic rather than as a model for the
+population distribution of a variable. In fact, the most important use
+of the normal distribution is as a sampling distribution, because in
+this role it often cannot be replaced by any other simple distributions.
+The reasons for this claim are explained in this section. We begin with
+the case of the distribution of the sample mean in samples from a normal
+population, before extending it with a result which provides the
+justification for the standard normal sampling distributions used for
+inference on proportions in Chapter <a href="c-probs.html#c-probs">5</a>, and even for the
+<span class="math inline">\(\chi^{2}\)</span> sampling distribution of the <span class="math inline">\(\chi^{2}\)</span> test in Chapter
+<a href="c-tables.html#c-tables">4</a>.</p>
+<p>Recall from Section <a href="c-tables.html#ss-tables-chi2test-sdist">4.3.4</a> that the sampling
+distribution of a statistic is its distribution across all possible
+random samples of a given size from a population. The statistic we focus
+on here is the sample mean <span class="math inline">\(\bar{Y}\)</span>. If we assume that the population
+distribution is exactly normal, we have the following result:</p>
+<ul>
+<li>If the population distribution of a variable <span class="math inline">\(Y\)</span> is normal with mean
+<span class="math inline">\(\mu\)</span> and variance <span class="math inline">\(\sigma^{2}\)</span>, the sampling distribution of the
+sample mean <span class="math inline">\(\bar{Y}\)</span> for a random sample of size <span class="math inline">\(n\)</span> is also a
+normal distribution, with mean <span class="math inline">\(\mu\)</span> and variance <span class="math inline">\(\sigma^{2}/n\)</span>.</li>
+</ul>
+<p>The mean and variance of this sampling distribution are worth discussing
+separately:</p>
+<ul>
+<li><p>The mean of the sampling distribution of <span class="math inline">\(\bar{Y}\)</span> is equal to the
+population mean <span class="math inline">\(\mu\)</span> of <span class="math inline">\(Y\)</span>. This means that while <span class="math inline">\(\bar{Y}\)</span> from a
+single sample may be below or above the true <span class="math inline">\(\mu\)</span>, in repeated
+samples it would on average estimate the correct parameter. In
+statistical language, <span class="math inline">\(\bar{Y}\)</span> is then an <em>unbiased estimate</em> of
+<span class="math inline">\(\mu\)</span>. More generally, most possible samples would give values of
+<span class="math inline">\(\bar{Y}\)</span> not very far from <span class="math inline">\(\mu\)</span>, where the scale for “far” is
+provided by the standard deviation discussed below.</p></li>
+<li><p>The variance of the sampling distribution of <span class="math inline">\(\bar{Y}\)</span> is
+<span class="math inline">\(\sigma^{2}/n\)</span> or, equivalently, its standard deviation is
+<span class="math inline">\(\sigma/\sqrt{n}\)</span>. This standard deviation is also known as the
+<strong>standard error of the mean</strong>, and is often denoted by something
+like <span class="math inline">\(\sigma_{\bar{Y}}\)</span>. It describes the variability of the
+sampling distribution. Its magnitude depends on <span class="math inline">\(\sigma\)</span>, i.e. on
+the variability of <span class="math inline">\(Y\)</span> in the population. More interestingly, it
+also depends on the sample size <span class="math inline">\(n\)</span>, which appears in the
+denominator in <span class="math inline">\(\sigma/\sqrt{n}\)</span>. This means that the standard error
+of the mean is smaller for large samples than for small ones. This
+is illustrated in Figure <a href="c-contd.html#fig:f-sampld2">6.7</a>. It shows the sampling
+distribution of <span class="math inline">\(\bar{Y}\)</span> for samples of sizes <span class="math inline">\(n=50\)</span> and <span class="math inline">\(n=1000\)</span>
+from a normal population with <span class="math inline">\(\mu=74.2\)</span> and <span class="math inline">\(\sigma=11.3\)</span>, i.e. the
+population mean and standard deviation in the blood pressure
+example. It can be seen that while both sampling distributions are
+centered around the true mean <span class="math inline">\(\mu=74.2\)</span>, the distribution for the
+smaller sample is more spread out than that for the larger sample:
+more precisely, the standard error of the mean is
+<span class="math inline">\(\sigma/\sqrt{n}=11.3/\sqrt{50}=1.60\)</span> when <span class="math inline">\(n=50\)</span> and
+<span class="math inline">\(11.3/\sqrt{1000}=0.36\)</span> when <span class="math inline">\(n=1000\)</span>. Recalling from Section
+<a href="c-contd.html#ss-contd-probdistrs-normal">6.3.2</a> that approximately 95% of the
+probability in a normal distribution is within two standard
+deviations of the mean, this means that about 95% of samples of size
+50 in this case would give a value of <span class="math inline">\(\bar{Y}\)</span> between
+<span class="math inline">\(\mu-2*1.60=74.2-3.2=71.0\)</span> and <span class="math inline">\(74.2+3.2=77.4\)</span>. For samples of size
+<span class="math inline">\(n=1000\)</span>, on the other hand, 95% of samples would yield <span class="math inline">\(\bar{Y}\)</span> in
+the much narrower range of <span class="math inline">\(74.2-2*0.36=73.5\)</span> to <span class="math inline">\(74.2+2*0.36=74.9\)</span>.</p></li>
+</ul>
+<div class="figure"><span style="display:block;" id="fig:f-sampld2"></span>
+<img src="sampld2_bp.png" alt="Illustration of the sampling distribution of the sample mean for two sample sizes. In both cases the population distribution is normal with \mu=74.2 and \sigma=11.3." style="width:12cm" />
+<p class="caption">Figure 6.7: Illustration of the sampling distribution of the sample mean for two
+sample sizes. In both cases the population distribution is normal with
+<span class="math inline">\(\mu=74.2\)</span> and <span class="math inline">\(\sigma=11.3\)</span>.</p>
+</div>
+<p>The connection between sample size and the variability of a sampling
+distribution applies not only to the sample mean but to (almost) all
+estimates of population parameters. In general, (i) the task of
+statistical inference is to use information in a sample to draw
+conclusions about population parameters; (ii) the expected magnitude of
+the sampling error, i.e. the remaining uncertainty about population
+parameters resulting from having information only on a sample, is
+characterised by the variability of the sampling distributions of
+estimates of the parameters; and (iii) other things being equal, the
+variability of a sampling distribution decreases when the sample size
+increases. Thus data really are the currency of statistics and more data
+are better than less data. In practice data collection of course costs
+time and money, so we cannot always obtain samples which are as large as
+we might otherwise want. Apart from resource constraints, the choice of
+sample size depends also on such things as the aims of the analysis, the
+level of precision required, and guesses about the variability of
+variables in the population. Statistical considerations of the
+trade-offs between them in order to make decisions about sample sizes
+are known as <em>power</em> calculations. They will be discussed very briefly
+later, in Section <a href="c-means.html#ss-means-tests3-power">7.6.2</a>.</p>
+<p>In Figure <a href="c-contd.html#fig:f-sampld">6.8</a> we use a computer simulation rather than a
+mathematical theorem to examine the sampling distribution of a sample
+mean. Here 100,000 simple random samples of size <span class="math inline">\(n=50\)</span> were drawn from
+the <span class="math inline">\(N=4489\)</span> values of blood pressure that we are treating as the finite
+population in this illustration. The sample mean <span class="math inline">\(\bar{Y}\)</span> of blood
+pressure was calculated for each of these samples, and the histogram of
+these 100,000 values of <span class="math inline">\(\bar{Y}\)</span> is shown in Figure <a href="c-contd.html#fig:f-sampld">6.8</a>.
+Also shown is the curve of the normal distribution with the mean <span class="math inline">\(\mu\)</span>
+and standard deviation <span class="math inline">\(\sigma/\sqrt{50}\)</span> determined by the theoretical
+result given above.</p>
+<div class="figure"><span style="display:block;" id="fig:f-sampld"></span>
+<img src="sampld1_bp.png" alt="Example of the sampling distribution of the sample mean. The plot shows a histogram of the values of the sample mean in 100,000 samples of size n=50 drawn from the 4489 values of diastolic blood pressure shown in Figure 6.1, for which the mean is \mu=74.2 and standard deviation is \sigma=11.3. Superimposed on the histogram is the curve of the approximate sampling distribution, which is normal with mean \mu and standard deviation \sigma/\sqrt{n}." style="width:12cm" />
+<p class="caption">Figure 6.8: Example of the sampling distribution of the sample mean. The plot
+shows a histogram of the values of the sample mean in 100,000 samples of
+size <span class="math inline">\(n=50\)</span> drawn from the 4489 values of diastolic blood pressure shown
+in Figure <a href="c-contd.html#fig:f-bp1">6.1</a>, for which the mean is <span class="math inline">\(\mu=74.2\)</span> and standard
+deviation is <span class="math inline">\(\sigma=11.3\)</span>. Superimposed on the histogram is the curve
+of the approximate sampling distribution, which is normal with mean
+<span class="math inline">\(\mu\)</span> and standard deviation <span class="math inline">\(\sigma/\sqrt{n}\)</span>.</p>
+</div>
+<p>The match between the curve and the histogram in Figure <a href="c-contd.html#fig:f-sampld">6.8</a>
+is clearly very close. This is actually a nontrivial finding which
+illustrates a result which is of crucial importance for statistical
+inference. Recall that the normal curve shown in Figure <a href="c-contd.html#fig:f-sampld">6.8</a>
+is derived from the mathematical result stated above, which assumed that
+the population distribution of <span class="math inline">\(Y\)</span> is <em>exactly</em> normal. The histogram in
+Figure <a href="c-contd.html#fig:f-sampld">6.8</a>, on the other hand, is based on repeated samples
+from the actual population distribution of blood pressure, which, while
+quite close to a normal distribution as shown in Figure <a href="c-contd.html#fig:f-bp2">6.2</a>, is
+certainly not exactly normal. Despite this, it is clear that the normal
+curve describes the histogram essentially exactly.</p>
+<p>If this was not true, that is if the sampling distribution that applies
+for the normal distribution was inadequate when the the true population
+distribution was even slightly different from normal, the result would
+be of little practical use. No population distribution is ever exactly
+normal, and many are very far from normality. Fortunately, however, it
+turns out that quite the opposite is true, and that the sampling
+distribution of the mean is approximately the same for nearly <em>all</em>
+population distributions. This is the conclusion from the <strong>Central
+Limit Theorem</strong> (CLT), one of the most remarkable results in all of
+mathematics. Establishing the CLT with increasing levels of generality
+has been the work of many mathematicians over several centuries, as
+different versions of it have been proved by, among others, de Moivre,
+Laplace, Cauchy, Chebyshev, Markov, Liapounov, Lindeberg, Feller, Lévy,
+Hoeffding, Robbins, and Rebolledo between about 1730 and 1980. One
+version of the CLT can be stated as</p>
+<p><strong>The (Lindeberg-Feller) Central Limit Theorem</strong>: For each
+<span class="math inline">\(n=1,2,\dots\)</span>, let <span class="math inline">\(Y_{nj}\)</span>, for <span class="math inline">\(j=1,2,\dots,n\)</span>, be independent random
+variables with <span class="math inline">\(\text{E}(Y_{nj})=0\)</span> and
+<span class="math inline">\(\text{var}(Y_{nj})=\sigma^{2}_{nj}\)</span>. Let <span class="math inline">\(Z_{n}=\sum_{j=1}^{n} Y_{nj}\)</span>,
+and let <span class="math inline">\(B^{2}_{n}=\text{var}(Z_{n})=\sum_{j=1}^{n} \sigma^{2}_{nj}\)</span>.
+Suppose also that the following condition holds: for every <span class="math inline">\(\epsilon&gt;0\)</span>,
+<span class="math display" id="eq:lindeberg">\[\begin{equation}
+\frac{1}{B_{n}^{2}}\,\sum_{j=1}^{n} \, \text{E}\{ Y_{nj}^{2} I(|Y_{nj}|\ge \epsilon B_{n})\}\rightarrow 0 \; \text{  as  } \; n\rightarrow \infty.
+\tag{6.9}
+\end{equation}\]</span>
+Then
+<span class="math inline">\(Z_{n}/B_{n} \stackrel{\mathcal{L}}{\longrightarrow} N(0,1)\)</span>.</p>
+<p>No, that will not come up in the examination. The theorem is given here
+just as a glimpse of how this topic would be introduced in a very
+different kind of text book,<a href="#fn26" class="footnote-ref" id="fnref26"><sup>26</sup></a> and because it pleases the author of
+this coursepack to note that Jarl Lindeberg was Finnish. For our
+purposes, it is better to state the same result in English:</p>
+<ul>
+<li>If <span class="math inline">\(Y_{1}, Y_{2}, \dots, Y_{n}\)</span> are a random sample of observations
+from (almost)<a href="#fn27" class="footnote-ref" id="fnref27"><sup>27</sup></a> any distribution with a population mean <span class="math inline">\(\mu\)</span> and
+variance <span class="math inline">\(\sigma^{2}\)</span>, and if <span class="math inline">\(n\)</span> is reasonably large, the sampling
+distribution of their sample mean <span class="math inline">\(\bar{Y}\)</span> is approximately a
+normal distribution with mean <span class="math inline">\(\mu\)</span> and variance <span class="math inline">\(\sigma^{2}/n\)</span>.</li>
+</ul>
+<p>Thus the sampling distribution of the mean from practically any
+population distribution is approximately the same as when the population
+distribution is normal, as long as the sample size is “reasonably
+large”. The larger the sample size is, the closer the sampling
+distribution is to the normal distribution, and it becomes exactly
+normal when the sample size is infinitely large (i.e. “asymptotically”).
+What is large enough depends particularly on the nature of the
+population distribution. For continuous variables, the CLT approximation
+is typically adequate even for sample sizes as small as <span class="math inline">\(n=30\)</span>, so we
+can make use of the approximate normal sampling distribution when <span class="math inline">\(n\)</span> is
+30 or larger. This is, of course, simply a pragmatic rule of thumb which
+does not mean that the normal approximation is completely appropriate
+for <span class="math inline">\(n=30\)</span> but entirely inappropriate for <span class="math inline">\(n=29\)</span>; rather, the
+approximation becomes better and better as the sample size increases,
+while below about 30 the chance of incorrect conclusions from using it
+becomes large enough for us not to usually want to take that risk.</p>
+<p>We have seen in Figure <a href="c-contd.html#fig:f-sampld2">6.7</a> that in the blood pressure
+example the sampling distribution given by the Central Limit Theorem is
+essentially exact for samples of size <span class="math inline">\(n=50\)</span>. In this case this is
+hardly surprising, as the population distribution itself is already
+quite close to a normal distribution. The theorem is not, however,
+limited to such easy cases but works quite generally. To demonstrate
+this with a more severe test, let us consider a population distribution
+that is as far as possible from normal. This is the binomial
+distribution of a binary variable that was introduced in Section
+<a href="c-probs.html#s-probs-distribution">5.3</a>. If the probability parameter of this
+distribution is <span class="math inline">\(\pi\)</span>, its mean and variance are <span class="math inline">\(\mu=\pi\)</span> and
+<span class="math inline">\(\sigma^{2}=\pi(1-\pi)\)</span>, and the sample mean <span class="math inline">\(\bar{Y}\)</span> of observations
+from the distribution is the sample proportion <span class="math inline">\(\hat{\pi}\)</span> (see the equation at the end of Section <a href="c-probs.html#s-probs-pointest">5.4</a>). The CLT then tells
+us that</p>
+<ul>
+<li>When <span class="math inline">\(n\)</span> is large enough, the sampling distribution of the sample
+proportion <span class="math inline">\(\hat{\pi}\)</span> of a dichotomous variable <span class="math inline">\(Y\)</span> with population
+proportion <span class="math inline">\(\pi\)</span> is approximately a normal distribution with mean
+<span class="math inline">\(\pi\)</span> and variance <span class="math inline">\(\pi(1-\pi)/n\)</span>.</li>
+</ul>
+<p>This powerful result is illustrated in Figure <a href="c-contd.html#fig:f-cltbin">6.9</a>. It is
+similar to Figure <a href="c-contd.html#fig:f-sampld">6.8</a> in that it shows sampling distributions
+obtained from a computer simulation, together with the normal curve
+suggested by the CLT. For each plot, 5000 samples of size <span class="math inline">\(n\)</span> were
+simulated from a population where <span class="math inline">\(\pi\)</span> was 0.2. The sample proportion
+<span class="math inline">\(\hat{\pi}\)</span> was then calculated for each simulated sample, and the
+histogram of these 5000 values drawn. Four different sample sizes were
+used: <span class="math inline">\(n=10\)</span>, 30, 100, and 1000. It can be seen that the normal
+distribution is not a very good approximation of the sampling
+distribution of <span class="math inline">\(\hat{\pi}\)</span> when <span class="math inline">\(n\)</span> is as small as 10 or even 30. For
+the larger values of 100 and 1000, however, the normal approximation is
+already quite good, as expected from the CLT.</p>
+<div class="figure"><span style="display:block;" id="fig:f-cltbin"></span>
+<img src="sampld_p.png" alt="Illustration of the Central Limit Theorem for the sample proportion of a dichotomous variable. Each plot shows the histogram of the sample proportions \hat{\pi} calculated for 5000 samples simulated from a population distribution with proportion \pi=0.2, together with the normal curve with mean \pi and variance \pi(1-\pi)/n. The samples sizes n are 10, 30, 100 and 1000." style="width:13.5cm" />
+<p class="caption">Figure 6.9: Illustration of the Central Limit Theorem for the sample proportion of
+a dichotomous variable. Each plot shows the histogram of the sample
+proportions <span class="math inline">\(\hat{\pi}\)</span> calculated for 5000 samples simulated from a
+population distribution with proportion <span class="math inline">\(\pi=0.2\)</span>, together with the
+normal curve with mean <span class="math inline">\(\pi\)</span> and variance <span class="math inline">\(\pi(1-\pi)/n\)</span>. The samples
+sizes <span class="math inline">\(n\)</span> are 10, 30, 100 and 1000.</p>
+</div>
+<p>The variability of the sampling distribution will again depend on <span class="math inline">\(n\)</span>.
+In Figure <a href="c-contd.html#fig:f-cltbin">6.9</a>, the observed range of values of <span class="math inline">\(\hat{\pi}\)</span>
+decreases substantially as <span class="math inline">\(n\)</span> increases. When <span class="math inline">\(n=10\)</span>, values of between
+about 0 and 0.4 are quite common, whereas with <span class="math inline">\(n=1000\)</span>, essentially all
+of the samples give <span class="math inline">\(\hat{\pi}\)</span> between about 0.16 and 0.24, and a large
+majority are between 0.18 and 0.22. Thus increasing the sample size will
+again increase the precision with which we can estimate <span class="math inline">\(\pi\)</span>, and
+decrease the uncertainty in inference about its true value.</p>
+<p>The Central Limit Theorem is, with some additional results, the
+justification for the standard normal sampling distribution used for
+tests and confidence intervals for proportions in Chapter <a href="c-probs.html#c-probs">5</a>.
+The conditions for sample sizes mentioned there (at the beginning of Section <a href="c-probs.html#ss-probs-test1sample-samplingd">5.5.3</a> and <a href="c-probs.html#s-probs-2samples">5.7</a>) again derive from conditions
+for the CLT to be adequate. The same is also ultimately true for the
+<span class="math inline">\(\chi^{2}\)</span> distribution and conditions for the <span class="math inline">\(\chi^{2}\)</span> test in
+Chapter <a href="c-tables.html#c-tables">4</a>. Results like these, and many others, explain the
+central importance of the CLT in statistical methodology.</p>
+
+</div>
+</div>
+<div class="footnotes">
+<hr />
+<ol start="24">
+<li id="fn24"><p>Carried out on behalf of The Department of Health by SCPR and the
+Department of Epidemiology and Public Health, UCL. Data used here
+were obtained from the UK Data Archive at <a href="http://www.data-archive.ac.uk" class="uri">http://www.data-archive.ac.uk</a>.<a href="c-contd.html#fnref24" class="footnote-back">↩</a></p></li>
+<li id="fn25"><p>Note that there we were looking for the probability of a Z score
+being “bigger than” rather than “at least” a certain value; for a
+continuous probability distribution this makes no difference, and
+both probabilities are the same.<a href="c-contd.html#fnref25" class="footnote-back">↩</a></p></li>
+<li id="fn26"><p>Ferguson, T. S. (1996). <em>A Course in Large Sample Theory</em>, Chapman
+&amp; Hall, London.<a href="c-contd.html#fnref26" class="footnote-back">↩</a></p></li>
+<li id="fn27"><p>The CLT does not hold in some rather weird cases which need not
+concern us here. Condition (<a href="c-contd.html#eq:lindeberg">(6.9)</a>) is a mathematical
+expression for “not weird”.<a href="c-contd.html#fnref27" class="footnote-back">↩</a></p></li>
+</ol>
+</div>
+            </section>
+
+          </div>
+        </div>
+      </div>
+<a href="c-probs.html" class="navigation navigation-prev " aria-label="Previous page"><i class="fa fa-angle-left"></i></a>
+<a href="c-means.html" class="navigation navigation-next " aria-label="Next page"><i class="fa fa-angle-right"></i></a>
+    </div>
+  </div>
+<script src="libs/gitbook-2.6.7/js/app.min.js"></script>
+<script src="libs/gitbook-2.6.7/js/clipboard.min.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-search.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-sharing.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-fontsettings.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-bookdown.js"></script>
+<script src="libs/gitbook-2.6.7/js/jquery.highlight.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-clipboard.js"></script>
+<script>
+gitbook.require(["gitbook"], function(gitbook) {
+gitbook.start({
+"sharing": {
+"github": false,
+"facebook": true,
+"twitter": true,
+"linkedin": false,
+"weibo": false,
+"instapaper": false,
+"vk": false,
+"whatsapp": false,
+"all": ["facebook", "twitter", "linkedin", "weibo", "instapaper"]
+},
+"fontsettings": {
+"theme": "white",
+"family": "sans",
+"size": 2
+},
+"edit": {
+"link": "https://github.com/LSE-Methodology/MY464/edit/master/06-MY464-contd.Rmd",
+"text": "Edit"
+},
+"history": {
+"link": null,
+"text": null
+},
+"view": {
+"link": null,
+"text": null
+},
+"download": ["Coursepack-MY464.pdf", "Coursepack-MY464.epub"],
+"search": {
+"engine": "fuse",
+"options": null
+},
+"toc": {
+"collapse": "section"
+}
+});
+});
+</script>
+
+<!-- dynamically load mathjax for compatibility with self-contained -->
+<script>
+  (function () {
+    var script = document.createElement("script");
+    script.type = "text/javascript";
+    var src = "true";
+    if (src === "" || src === "true") src = "https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.9/latest.js?config=TeX-MML-AM_CHTML";
+    if (location.protocol !== "file:")
+      if (/^https?:/.test(src))
+        src = src.replace(/^https?:/, '');
+    script.src = src;
+    document.getElementsByTagName("head")[0].appendChild(script);
+  })();
+</script>
+</body>
+
+</html>
diff --git a/c-descr1.html b/c-descr1.html
new file mode 100644
index 0000000..39d870d
--- /dev/null
+++ b/c-descr1.html
@@ -0,0 +1,3558 @@
+<!DOCTYPE html>
+<html lang="" xml:lang="">
+<head>
+
+  <meta charset="utf-8" />
+  <meta http-equiv="X-UA-Compatible" content="IE=edge" />
+  <title>Chapter 2 Descriptive statistics | MY464 Introduction to Quantitative Analysis for Media and Communications</title>
+  <meta name="description" content="This course is intended for those with little or no past training in quantitative methods. The course is an intensive introduction to some of the principles and methods of statistical analysis in social research. Topics covered in MY464 include descriptive statistics, basic ideas of inference and estimation, contingency tables and an introduction to linear regression models." />
+  <meta name="generator" content="bookdown 0.39 and GitBook 2.6.7" />
+
+  <meta property="og:title" content="Chapter 2 Descriptive statistics | MY464 Introduction to Quantitative Analysis for Media and Communications" />
+  <meta property="og:type" content="book" />
+  
+  <meta property="og:description" content="This course is intended for those with little or no past training in quantitative methods. The course is an intensive introduction to some of the principles and methods of statistical analysis in social research. Topics covered in MY464 include descriptive statistics, basic ideas of inference and estimation, contingency tables and an introduction to linear regression models." />
+  <meta name="github-repo" content="LSE-Methodology/MY464" />
+
+  <meta name="twitter:card" content="summary" />
+  <meta name="twitter:title" content="Chapter 2 Descriptive statistics | MY464 Introduction to Quantitative Analysis for Media and Communications" />
+  
+  <meta name="twitter:description" content="This course is intended for those with little or no past training in quantitative methods. The course is an intensive introduction to some of the principles and methods of statistical analysis in social research. Topics covered in MY464 include descriptive statistics, basic ideas of inference and estimation, contingency tables and an introduction to linear regression models." />
+  
+
+<meta name="author" content="Department of Methodology, London School of Economics and Political Science" />
+
+
+
+  <meta name="viewport" content="width=device-width, initial-scale=1" />
+  <meta name="apple-mobile-web-app-capable" content="yes" />
+  <meta name="apple-mobile-web-app-status-bar-style" content="black" />
+  
+  
+<link rel="prev" href="c-intro.html"/>
+<link rel="next" href="c-samples.html"/>
+<script src="libs/jquery-3.6.0/jquery-3.6.0.min.js"></script>
+<script src="https://cdn.jsdelivr.net/npm/fuse.js@6.4.6/dist/fuse.min.js"></script>
+<link href="libs/gitbook-2.6.7/css/style.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-table.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-bookdown.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-highlight.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-search.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-fontsettings.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-clipboard.css" rel="stylesheet" />
+
+
+
+
+
+
+
+
+<link href="libs/anchor-sections-1.1.0/anchor-sections.css" rel="stylesheet" />
+<link href="libs/anchor-sections-1.1.0/anchor-sections-hash.css" rel="stylesheet" />
+<script src="libs/anchor-sections-1.1.0/anchor-sections.js"></script>
+
+
+
+<style type="text/css">
+  
+  div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
+</style>
+
+<link rel="stylesheet" href="style.css" type="text/css" />
+</head>
+
+<body>
+
+
+
+  <div class="book without-animation with-summary font-size-2 font-family-1" data-basepath=".">
+
+    <div class="book-summary">
+      <nav role="navigation">
+
+<ul class="summary">
+<li><a href="./">MY464 Introduction to Quantitative Analysis for Media and Communications</a></li>
+
+<li class="divider"></li>
+<li class="chapter" data-level="" data-path="index.html"><a href="index.html"><i class="fa fa-check"></i>Course information<a href="index.html#course-information" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1" data-path="c-intro.html"><a href="c-intro.html"><i class="fa fa-check"></i><b>1</b> Introduction<a href="c-intro.html#c-intro" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="1.1" data-path="c-intro.html"><a href="c-intro.html#s-intro-purpose"><i class="fa fa-check"></i><b>1.1</b> What is the purpose of this course?<a href="c-intro.html#s-intro-purpose" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.2" data-path="c-intro.html"><a href="c-intro.html#s-intro-definitions"><i class="fa fa-check"></i><b>1.2</b> Some basic definitions<a href="c-intro.html#s-intro-definitions" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="1.2.1" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-subj"><i class="fa fa-check"></i><b>1.2.1</b> Subjects and variables<a href="c-intro.html#ss-intro-def-subj" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.2.2" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-vartypes"><i class="fa fa-check"></i><b>1.2.2</b> Types of variables<a href="c-intro.html#ss-intro-def-vartypes" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.2.3" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-descr"><i class="fa fa-check"></i><b>1.2.3</b> Description and inference<a href="c-intro.html#ss-intro-def-descr" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.2.4" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-assoc"><i class="fa fa-check"></i><b>1.2.4</b> Association and causation<a href="c-intro.html#ss-intro-def-assoc" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="1.3" data-path="c-intro.html"><a href="c-intro.html#s-intro-outline"><i class="fa fa-check"></i><b>1.3</b> Outline of the course<a href="c-intro.html#s-intro-outline" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.4" data-path="c-intro.html"><a href="c-intro.html#s-intro-maths"><i class="fa fa-check"></i><b>1.4</b> The use of mathematics and computing<a href="c-intro.html#s-intro-maths" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="1.4.1" data-path="c-intro.html"><a href="c-intro.html#symbolic-mathematics-and-mathematical-notation"><i class="fa fa-check"></i><b>1.4.1</b> Symbolic mathematics and mathematical notation<a href="c-intro.html#symbolic-mathematics-and-mathematical-notation" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.4.2" data-path="c-intro.html"><a href="c-intro.html#computing-1"><i class="fa fa-check"></i><b>1.4.2</b> Computing<a href="c-intro.html#computing-1" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="2" data-path="c-descr1.html"><a href="c-descr1.html"><i class="fa fa-check"></i><b>2</b> Descriptive statistics<a href="c-descr1.html#c-descr1" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.1" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-intro"><i class="fa fa-check"></i><b>2.1</b> Introduction<a href="c-descr1.html#s-descr1-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.2" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-examples"><i class="fa fa-check"></i><b>2.2</b> Example data sets<a href="c-descr1.html#s-descr1-examples" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.3" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-1cat"><i class="fa fa-check"></i><b>2.3</b> Single categorical variable<a href="c-descr1.html#s-descr1-1cat" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.3.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-distr"><i class="fa fa-check"></i><b>2.3.1</b> Describing the sample distribution<a href="c-descr1.html#ss-descr1-1cat-distr" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.3.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-tables"><i class="fa fa-check"></i><b>2.3.2</b> Tabular methods: Tables of frequencies<a href="c-descr1.html#ss-descr1-1cat-tables" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.3.3" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-charts"><i class="fa fa-check"></i><b>2.3.3</b> Graphical methods: Bar charts<a href="c-descr1.html#ss-descr1-1cat-charts" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.3.4" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-descriptives"><i class="fa fa-check"></i><b>2.3.4</b> Simple descriptive statistics<a href="c-descr1.html#ss-descr1-1cat-descriptives" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="2.4" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-2cat"><i class="fa fa-check"></i><b>2.4</b> Two categorical variables<a href="c-descr1.html#s-descr1-2cat" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.4.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-tables"><i class="fa fa-check"></i><b>2.4.1</b> Two-way contingency tables<a href="c-descr1.html#ss-descr1-2cat-tables" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.4.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-cond"><i class="fa fa-check"></i><b>2.4.2</b> Conditional proportions<a href="c-descr1.html#ss-descr1-2cat-cond" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.4.3" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-assoc"><i class="fa fa-check"></i><b>2.4.3</b> Conditional distributions and associations<a href="c-descr1.html#ss-descr1-2cat-assoc" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.4.4" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-descr"><i class="fa fa-check"></i><b>2.4.4</b> Describing an association using conditional proportions<a href="c-descr1.html#ss-descr1-2cat-descr" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.4.5" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-gamma"><i class="fa fa-check"></i><b>2.4.5</b> A measure of association for ordinal variables<a href="c-descr1.html#ss-descr1-2cat-gamma" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="2.5" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-1cont"><i class="fa fa-check"></i><b>2.5</b> Sample distributions of a single continuous variable<a href="c-descr1.html#s-descr1-1cont" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.5.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cont-tab"><i class="fa fa-check"></i><b>2.5.1</b> Tabular methods<a href="c-descr1.html#ss-descr1-1cont-tab" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.5.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cont-graphs"><i class="fa fa-check"></i><b>2.5.2</b> Graphical methods<a href="c-descr1.html#ss-descr1-1cont-graphs" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="2.6" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-nums"><i class="fa fa-check"></i><b>2.6</b> Numerical descriptive statistics<a href="c-descr1.html#s-descr1-nums" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.6.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-nums-central"><i class="fa fa-check"></i><b>2.6.1</b> Measures of central tendency<a href="c-descr1.html#ss-descr1-nums-central" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.6.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-nums-variation"><i class="fa fa-check"></i><b>2.6.2</b> Measures of variation<a href="c-descr1.html#ss-descr1-nums-variation" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="2.7" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-2cont"><i class="fa fa-check"></i><b>2.7</b> Associations which involve continuous variables<a href="c-descr1.html#s-descr1-2cont" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.8" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-presentation"><i class="fa fa-check"></i><b>2.8</b> Presentation of tables and graphs<a href="c-descr1.html#s-descr1-presentation" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.9" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-app"><i class="fa fa-check"></i><b>2.9</b> Appendix: Country data<a href="c-descr1.html#s-descr1-app" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="3" data-path="c-samples.html"><a href="c-samples.html"><i class="fa fa-check"></i><b>3</b> Samples and populations<a href="c-samples.html#c-samples" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="3.1" data-path="c-samples.html"><a href="c-samples.html#s-samples-intro"><i class="fa fa-check"></i><b>3.1</b> Introduction<a href="c-samples.html#s-samples-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.2" data-path="c-samples.html"><a href="c-samples.html#s-samples-finpops"><i class="fa fa-check"></i><b>3.2</b> Finite populations<a href="c-samples.html#s-samples-finpops" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.3" data-path="c-samples.html"><a href="c-samples.html#s-samples-samples"><i class="fa fa-check"></i><b>3.3</b> Samples from finite populations<a href="c-samples.html#s-samples-samples" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.4" data-path="c-samples.html"><a href="c-samples.html#s-samples-infpops"><i class="fa fa-check"></i><b>3.4</b> Conceptual and infinite populations<a href="c-samples.html#s-samples-infpops" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.5" data-path="c-samples.html"><a href="c-samples.html#s-samples-popdistrs"><i class="fa fa-check"></i><b>3.5</b> Population distributions<a href="c-samples.html#s-samples-popdistrs" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.6" data-path="c-samples.html"><a href="c-samples.html#s-samples-inference"><i class="fa fa-check"></i><b>3.6</b> Need for statistical inference<a href="c-samples.html#s-samples-inference" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="4" data-path="c-tables.html"><a href="c-tables.html"><i class="fa fa-check"></i><b>4</b> Statistical inference for two-way tables<a href="c-tables.html#c-tables" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="4.1" data-path="c-tables.html"><a href="c-tables.html#s-tables-intro"><i class="fa fa-check"></i><b>4.1</b> Introduction<a href="c-tables.html#s-tables-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.2" data-path="c-tables.html"><a href="c-tables.html#s-tables-tests"><i class="fa fa-check"></i><b>4.2</b> Significance tests<a href="c-tables.html#s-tables-tests" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3" data-path="c-tables.html"><a href="c-tables.html#s-tables-chi2test"><i class="fa fa-check"></i><b>4.3</b> The chi-square test of independence<a href="c-tables.html#s-tables-chi2test" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="4.3.1" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-null"><i class="fa fa-check"></i><b>4.3.1</b> Hypotheses<a href="c-tables.html#ss-tables-chi2test-null" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.2" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-ass"><i class="fa fa-check"></i><b>4.3.2</b> Assumptions of a significance test<a href="c-tables.html#ss-tables-chi2test-ass" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.3" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-stat"><i class="fa fa-check"></i><b>4.3.3</b> The test statistic<a href="c-tables.html#ss-tables-chi2test-stat" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.4" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-sdist"><i class="fa fa-check"></i><b>4.3.4</b> The sampling distribution of the test statistic<a href="c-tables.html#ss-tables-chi2test-sdist" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.5" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-Pval"><i class="fa fa-check"></i><b>4.3.5</b> The P-value<a href="c-tables.html#ss-tables-chi2test-Pval" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.6" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-conclusions"><i class="fa fa-check"></i><b>4.3.6</b> Drawing conclusions from a test<a href="c-tables.html#ss-tables-chi2test-conclusions" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="4.4" data-path="c-tables.html"><a href="c-tables.html#s-tables-summary"><i class="fa fa-check"></i><b>4.4</b> Summary of the chi-square test of independence<a href="c-tables.html#s-tables-summary" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="5" data-path="c-probs.html"><a href="c-probs.html"><i class="fa fa-check"></i><b>5</b> Inference for population proportions<a href="c-probs.html#c-probs" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="5.1" data-path="c-probs.html"><a href="c-probs.html#s-probs-intro"><i class="fa fa-check"></i><b>5.1</b> Introduction<a href="c-probs.html#s-probs-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.2" data-path="c-probs.html"><a href="c-probs.html#s-probs-examples"><i class="fa fa-check"></i><b>5.2</b> Examples<a href="c-probs.html#s-probs-examples" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.3" data-path="c-probs.html"><a href="c-probs.html#s-probs-distribution"><i class="fa fa-check"></i><b>5.3</b> Probability distribution of a dichotomous variable<a href="c-probs.html#s-probs-distribution" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.4" data-path="c-probs.html"><a href="c-probs.html#s-probs-pointest"><i class="fa fa-check"></i><b>5.4</b> Point estimation of a population probability<a href="c-probs.html#s-probs-pointest" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5" data-path="c-probs.html"><a href="c-probs.html#s-probs-test1sample"><i class="fa fa-check"></i><b>5.5</b> Significance test of a single proportion<a href="c-probs.html#s-probs-test1sample" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="5.5.1" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-hypotheses"><i class="fa fa-check"></i><b>5.5.1</b> Null and alternative hypotheses<a href="c-probs.html#ss-probs-test1sample-hypotheses" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5.2" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-teststatistic"><i class="fa fa-check"></i><b>5.5.2</b> The test statistic<a href="c-probs.html#ss-probs-test1sample-teststatistic" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5.3" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-samplingd"><i class="fa fa-check"></i><b>5.5.3</b> The sampling distribution of the test statistic and P-values<a href="c-probs.html#ss-probs-test1sample-samplingd" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5.4" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-conclusions"><i class="fa fa-check"></i><b>5.5.4</b> Conclusions from the test<a href="c-probs.html#ss-probs-test1sample-conclusions" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5.5" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-summary"><i class="fa fa-check"></i><b>5.5.5</b> Summary of the test<a href="c-probs.html#ss-probs-test1sample-summary" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="5.6" data-path="c-probs.html"><a href="c-probs.html#s-probs-1sampleci"><i class="fa fa-check"></i><b>5.6</b> Confidence interval for a single proportion<a href="c-probs.html#s-probs-1sampleci" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="5.6.1" data-path="c-probs.html"><a href="c-probs.html#s-probs-1sampleci-intro"><i class="fa fa-check"></i><b>5.6.1</b> Introduction<a href="c-probs.html#s-probs-1sampleci-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.6.2" data-path="c-probs.html"><a href="c-probs.html#s-probs-1sampleci-calc"><i class="fa fa-check"></i><b>5.6.2</b> Calculation of the interval<a href="c-probs.html#s-probs-1sampleci-calc" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.6.3" data-path="c-probs.html"><a href="c-probs.html#s-probs-1sampleci-int"><i class="fa fa-check"></i><b>5.6.3</b> Interpretation of confidence intervals<a href="c-probs.html#s-probs-1sampleci-int" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.6.4" data-path="c-probs.html"><a href="c-probs.html#ss-means-ci-vstests"><i class="fa fa-check"></i><b>5.6.4</b> Confidence intervals vs. significance tests<a href="c-probs.html#ss-means-ci-vstests" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="5.7" data-path="c-probs.html"><a href="c-probs.html#s-probs-2samples"><i class="fa fa-check"></i><b>5.7</b> Inference for comparing two proportions<a href="c-probs.html#s-probs-2samples" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="6" data-path="c-contd.html"><a href="c-contd.html"><i class="fa fa-check"></i><b>6</b> Continuous variables: Population and sampling distributions<a href="c-contd.html#c-contd" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="6.1" data-path="c-contd.html"><a href="c-contd.html#s-contd-intro"><i class="fa fa-check"></i><b>6.1</b> Introduction<a href="c-contd.html#s-contd-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="6.2" data-path="c-contd.html"><a href="c-contd.html#s-contd-popdistrs"><i class="fa fa-check"></i><b>6.2</b> Population distributions of continuous variables<a href="c-contd.html#s-contd-popdistrs" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="6.2.1" data-path="c-contd.html"><a href="c-contd.html#ss-contd-popdistrs-params"><i class="fa fa-check"></i><b>6.2.1</b> Population parameters and their point estimates<a href="c-contd.html#ss-contd-popdistrs-params" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="6.3" data-path="c-contd.html"><a href="c-contd.html#s-contd-probdistrs"><i class="fa fa-check"></i><b>6.3</b> Probability distributions of continuous variables<a href="c-contd.html#s-contd-probdistrs" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="6.3.1" data-path="c-contd.html"><a href="c-contd.html#ss-contd-probdistrs-general"><i class="fa fa-check"></i><b>6.3.1</b> General comments<a href="c-contd.html#ss-contd-probdistrs-general" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="6.3.2" data-path="c-contd.html"><a href="c-contd.html#ss-contd-probdistrs-normal"><i class="fa fa-check"></i><b>6.3.2</b> The normal distribution as a population distribution<a href="c-contd.html#ss-contd-probdistrs-normal" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="6.4" data-path="c-contd.html"><a href="c-contd.html#s-contd-clt"><i class="fa fa-check"></i><b>6.4</b> The normal distribution as a sampling distribution<a href="c-contd.html#s-contd-clt" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="7" data-path="c-means.html"><a href="c-means.html"><i class="fa fa-check"></i><b>7</b> Analysis of population means<a href="c-means.html#c-means" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="7.1" data-path="c-means.html"><a href="c-means.html#s-means-intro"><i class="fa fa-check"></i><b>7.1</b> Introduction and examples<a href="c-means.html#s-means-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.2" data-path="c-means.html"><a href="c-means.html#s-means-descr"><i class="fa fa-check"></i><b>7.2</b> Descriptive statistics for comparisons of groups<a href="c-means.html#s-means-descr" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="7.2.1" data-path="c-means.html"><a href="c-means.html#ss-means-descr-graphs"><i class="fa fa-check"></i><b>7.2.1</b> Graphical methods of comparing sample distributions<a href="c-means.html#ss-means-descr-graphs" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.2.2" data-path="c-means.html"><a href="c-means.html#ss-means-descr-tables"><i class="fa fa-check"></i><b>7.2.2</b> Comparing summary statistics<a href="c-means.html#ss-means-descr-tables" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="7.3" data-path="c-means.html"><a href="c-means.html#s-means-inference"><i class="fa fa-check"></i><b>7.3</b> Inference for two means from independent samples<a href="c-means.html#s-means-inference" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="7.3.1" data-path="c-means.html"><a href="c-means.html#ss-means-inference-intro"><i class="fa fa-check"></i><b>7.3.1</b> Aims of the analysis<a href="c-means.html#ss-means-inference-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.3.2" data-path="c-means.html"><a href="c-means.html#ss-means-inference-test"><i class="fa fa-check"></i><b>7.3.2</b> Significance testing: The two-sample t-test<a href="c-means.html#ss-means-inference-test" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.3.3" data-path="c-means.html"><a href="c-means.html#ss-means-inference-ci"><i class="fa fa-check"></i><b>7.3.3</b> Confidence intervals for a difference of two means<a href="c-means.html#ss-means-inference-ci" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.3.4" data-path="c-means.html"><a href="c-means.html#ss-means-inference-variants"><i class="fa fa-check"></i><b>7.3.4</b> Variants of the test and confidence interval<a href="c-means.html#ss-means-inference-variants" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="7.4" data-path="c-means.html"><a href="c-means.html#s-means-1sample"><i class="fa fa-check"></i><b>7.4</b> Tests and confidence intervals for a single mean<a href="c-means.html#s-means-1sample" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.5" data-path="c-means.html"><a href="c-means.html#s-means-dependent"><i class="fa fa-check"></i><b>7.5</b> Inference for dependent samples<a href="c-means.html#s-means-dependent" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.6" data-path="c-means.html"><a href="c-means.html#s-means-tests3"><i class="fa fa-check"></i><b>7.6</b> Further comments on significance tests<a href="c-means.html#s-means-tests3" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="7.6.1" data-path="c-means.html"><a href="c-means.html#ss-means-tests3-errors"><i class="fa fa-check"></i><b>7.6.1</b> Different types of error<a href="c-means.html#ss-means-tests3-errors" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.6.2" data-path="c-means.html"><a href="c-means.html#ss-means-tests3-power"><i class="fa fa-check"></i><b>7.6.2</b> Power of significance tests<a href="c-means.html#ss-means-tests3-power" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.6.3" data-path="c-means.html"><a href="c-means.html#ss-means-tests3-importance"><i class="fa fa-check"></i><b>7.6.3</b> Significance vs. importance<a href="c-means.html#ss-means-tests3-importance" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="8" data-path="c-regression.html"><a href="c-regression.html"><i class="fa fa-check"></i><b>8</b> Linear regression models<a href="c-regression.html#c-regression" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.1" data-path="c-regression.html"><a href="c-regression.html#s-regression-intro"><i class="fa fa-check"></i><b>8.1</b> Introduction<a href="c-regression.html#s-regression-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.2" data-path="c-regression.html"><a href="c-regression.html#s-regression-descr"><i class="fa fa-check"></i><b>8.2</b> Describing association between two continuous variables<a href="c-regression.html#s-regression-descr" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.2.1" data-path="c-regression.html"><a href="c-regression.html#ss-regression-descr-intro"><i class="fa fa-check"></i><b>8.2.1</b> Introduction<a href="c-regression.html#ss-regression-descr-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.2.2" data-path="c-regression.html"><a href="c-regression.html#ss-regression-descr-plots"><i class="fa fa-check"></i><b>8.2.2</b> Graphical methods<a href="c-regression.html#ss-regression-descr-plots" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.2.3" data-path="c-regression.html"><a href="c-regression.html#ss-regression-descr-assoc"><i class="fa fa-check"></i><b>8.2.3</b> Linear associations<a href="c-regression.html#ss-regression-descr-assoc" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.2.4" data-path="c-regression.html"><a href="c-regression.html#ss-regression-descr-corr"><i class="fa fa-check"></i><b>8.2.4</b> Measures of association: covariance and correlation<a href="c-regression.html#ss-regression-descr-corr" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="8.3" data-path="c-regression.html"><a href="c-regression.html#s-regression-simple"><i class="fa fa-check"></i><b>8.3</b> Simple linear regression models<a href="c-regression.html#s-regression-simple" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.3.1" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-intro"><i class="fa fa-check"></i><b>8.3.1</b> Introduction<a href="c-regression.html#ss-regression-simple-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.3.2" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-def"><i class="fa fa-check"></i><b>8.3.2</b> Definition of the model<a href="c-regression.html#ss-regression-simple-def" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.3.3" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-int"><i class="fa fa-check"></i><b>8.3.3</b> Interpretation of the model parameters<a href="c-regression.html#ss-regression-simple-int" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.3.4" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-est"><i class="fa fa-check"></i><b>8.3.4</b> Estimation of the parameters<a href="c-regression.html#ss-regression-simple-est" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.3.5" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-inf"><i class="fa fa-check"></i><b>8.3.5</b> Statistical inference for the regression coefficients<a href="c-regression.html#ss-regression-simple-inf" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="8.4" data-path="c-regression.html"><a href="c-regression.html#s-regression-causality"><i class="fa fa-check"></i><b>8.4</b> Interlude: Association and causality<a href="c-regression.html#s-regression-causality" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.5" data-path="c-regression.html"><a href="c-regression.html#s-regression-multiple"><i class="fa fa-check"></i><b>8.5</b> Multiple linear regression models<a href="c-regression.html#s-regression-multiple" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.5.1" data-path="c-regression.html"><a href="c-regression.html#ss-regression-multiple-intro"><i class="fa fa-check"></i><b>8.5.1</b> Introduction<a href="c-regression.html#ss-regression-multiple-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.5.2" data-path="c-regression.html"><a href="c-regression.html#ss-regression-multiple-def"><i class="fa fa-check"></i><b>8.5.2</b> Definition of the model<a href="c-regression.html#ss-regression-multiple-def" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.5.3" data-path="c-regression.html"><a href="c-regression.html#ss-regression-multiple-unchanged"><i class="fa fa-check"></i><b>8.5.3</b> Unchanged elements from simple linear models<a href="c-regression.html#ss-regression-multiple-unchanged" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.5.4" data-path="c-regression.html"><a href="c-regression.html#ss-regression-multiple-beta"><i class="fa fa-check"></i><b>8.5.4</b> Interpretation and inference for the regression coefficients<a href="c-regression.html#ss-regression-multiple-beta" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="8.6" data-path="c-regression.html"><a href="c-regression.html#s-regression-dummies"><i class="fa fa-check"></i><b>8.6</b> Including categorical explanatory variables<a href="c-regression.html#s-regression-dummies" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.6.1" data-path="c-regression.html"><a href="c-regression.html#ss-regression-dummies-def"><i class="fa fa-check"></i><b>8.6.1</b> Dummy variables<a href="c-regression.html#ss-regression-dummies-def" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.6.2" data-path="c-regression.html"><a href="c-regression.html#ss-regression-dummies-example"><i class="fa fa-check"></i><b>8.6.2</b> A second example<a href="c-regression.html#ss-regression-dummies-example" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="8.7" data-path="c-regression.html"><a href="c-regression.html#s-regression-rest"><i class="fa fa-check"></i><b>8.7</b> Other issues in linear regression modelling<a href="c-regression.html#s-regression-rest" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="9" data-path="c-3waytables.html"><a href="c-3waytables.html"><i class="fa fa-check"></i><b>9</b> Analysis of 3-way contingency tables<a href="c-3waytables.html#c-3waytables" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="10" data-path="c-more.html"><a href="c-more.html"><i class="fa fa-check"></i><b>10</b> More statistics…<a href="c-more.html#c-more" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="" data-path="c-more.html"><a href="c-more.html#statistical-tables"><i class="fa fa-check"></i>Statistical tables<a href="c-more.html#statistical-tables" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="" data-path="c-more.html"><a href="c-more.html#table-of-standard-normal-tail-probabilities"><i class="fa fa-check"></i>Table of standard normal tail probabilities<a href="c-more.html#table-of-standard-normal-tail-probabilities" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="" data-path="c-more.html"><a href="c-more.html#table-of-critical-values-for-t-distributions"><i class="fa fa-check"></i>Table of critical values for t-distributions<a href="c-more.html#table-of-critical-values-for-t-distributions" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="" data-path="c-more.html"><a href="c-more.html#table-of-critical-values-for-chi-square-distributions"><i class="fa fa-check"></i>Table of critical values for chi-square distributions<a href="c-more.html#table-of-critical-values-for-chi-square-distributions" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+</ul></li>
+<li class="divider"></li>
+<li><a href="https://github.com/rstudio/bookdown">Published with bookdown</a></li>
+
+</ul>
+
+      </nav>
+    </div>
+
+    <div class="book-body">
+      <div class="body-inner">
+        <div class="book-header" role="navigation">
+          <h1>
+            <i class="fa fa-circle-o-notch fa-spin"></i><a href="./">MY464 Introduction to Quantitative Analysis for Media and Communications</a>
+          </h1>
+        </div>
+
+        <div class="page-wrapper" tabindex="-1" role="main">
+          <div class="page-inner">
+
+            <section class="normal" id="section-">
+<div id="c-descr1" class="section level1 hasAnchor">
+<h1><span class="header-section-number">Chapter 2</span> Descriptive statistics<a href="c-descr1.html#c-descr1" class="anchor-section" aria-label="Anchor link to header"></a></h1>
+<div id="s-descr1-intro" class="section level2 hasAnchor">
+<h2><span class="header-section-number">2.1</span> Introduction<a href="c-descr1.html#s-descr1-intro" class="anchor-section" aria-label="Anchor link to header"></a></h2>
+<p>This chapter introduces some common descriptive statistical methods. It
+is organised around two dichotomies:</p>
+<ul>
+<li><p>Methods that are used only for variables with small numbers of
+values, vs. methods that are used also or only for variables with
+many values (see the end of Section <a href="c-intro.html#ss-intro-def-vartypes">1.2.2</a> for more on
+this distinction). The former include, in particular, descriptive
+methods for categorical variables, and the latter the methods for
+continuous variables.</p></li>
+<li><p><strong>Univariate</strong> descriptive methods which consider only one variable
+at a time, vs. <strong>bivariate</strong> methods which aim to describe the
+association between <em>two</em> variables.</p></li>
+</ul>
+<p>Section <a href="c-descr1.html#s-descr1-1cat">2.3</a> describes univariate methods for categorical
+variables and Section <a href="c-descr1.html#s-descr1-2cat">2.4</a> bivariate methods for cases
+where both variables are categorical. Sections <a href="c-descr1.html#s-descr1-1cont">2.5</a> and
+<a href="c-descr1.html#s-descr1-nums">2.6</a> cover univariate methods which are mostly used for
+continuous variables. Section <a href="c-descr1.html#s-descr1-2cont">2.7</a> lists some bivariate
+methods where at least one variable is continuous; these methods are
+discussed in detail elsewhere in the coursepack. The chapter concludes
+with some general guidelines for presentation of descriptive tables and
+graphs in Section <a href="c-descr1.html#s-descr1-presentation">2.8</a>.</p>
+</div>
+<div id="s-descr1-examples" class="section level2 hasAnchor">
+<h2><span class="header-section-number">2.2</span> Example data sets<a href="c-descr1.html#s-descr1-examples" class="anchor-section" aria-label="Anchor link to header"></a></h2>
+<p>Two examples are used to illustrate the methods throughout this chapter:</p>
+<p><em>Example: Country data</em> </p>
+<p>Consider data for 155 countries on three variables:</p>
+<ul>
+<li><p>The <strong>region</strong> where the country is located, coded as 1=Africa,
+2=Asia, 3=Europe, 4=Latin America, 5=Northern America, 6=Oceania.</p></li>
+<li><p>A measure of the level of <strong>democracy</strong> in the country, measured on
+an 11-point scale from 0 (lowest level of democracy) to
+10 (highest).</p></li>
+<li><p>Gross Domestic Product (<strong>GDP</strong>) per capita, in thousands
+of U.S. dollars.</p></li>
+</ul>
+<p>Further information on the variables is given in the appendix to this
+chapter (Section <a href="c-descr1.html#s-descr1-app">2.9</a>), together with the whole data set,
+shown in Table <a href="c-descr1.html#tab:t-countrydata">2.14</a>.</p>
+<p>Region is clearly a discrete (and categorical), nominal-level variable,
+and GDP a continuous, interval-level variable. The democracy index is
+discrete; it is most realistic to consider its measurement level to be
+ordinal, and it is regarded as such in this chapter. However, it is the
+kind of variable which might in many analyses be treated instead as an
+effectively continuous, interval-level variable.</p>
+<p><em>Example: Survey data on attitudes towards income redistribution</em></p>
+<p>The data for the second example come from Round 5 of the European Social
+Survey (ESS), which was carried out in 2010.<a href="#fn2" class="footnote-ref" id="fnref2"><sup>2</sup></a> The survey was fielded
+in 28 countries, but here we use only data from 2344 respondents in the
+UK. Two variables are considered:</p>
+<ul>
+<li><p><strong>Sex</strong> of the respondent, coded as 1=Male, 2=Female.</p></li>
+<li><p>Answer to the following survey question:<br />
+<em>“The government should take measures to reduce differences in
+income levels”</em>,<br />
+with five response options coded as “Agree strongly”=1, “Agree”=2,
+“Neither agree nor disagree”=3, “Disagree”=4, and “Disagree
+strongly”=5. This is a measure of the respondent’s <strong>attitude</strong>
+towards income redistribution.</p></li>
+</ul>
+<p>Both of these are discrete, categorical variables. Sex is binary and
+attitude is ordinal.</p>
+<p>Attitudes towards <em>income redistribution</em> are an example of the broader
+topic of public opinion on welfare state policies. This is a large topic
+of classic and current interest in the social sciences, and questions on
+it have been included in many public opinion surveys.<a href="#fn3" class="footnote-ref" id="fnref3"><sup>3</sup></a> Of key
+interest is to explore the how people’s attitudes are associated with
+their individual characteristics (including such factors as age, sex,
+education and income) and the contexts in which they live (for example
+the type of welfare regime adopted in their country). In section
+<a href="c-descr1.html#s-descr1-2cat">2.4</a> below we use descriptive statistics to examine such
+associations between sex and attitude in this sample.</p>
+</div>
+<div id="s-descr1-1cat" class="section level2 hasAnchor">
+<h2><span class="header-section-number">2.3</span> Single categorical variable<a href="c-descr1.html#s-descr1-1cat" class="anchor-section" aria-label="Anchor link to header"></a></h2>
+<div id="ss-descr1-1cat-distr" class="section level3 hasAnchor">
+<h3><span class="header-section-number">2.3.1</span> Describing the sample distribution<a href="c-descr1.html#ss-descr1-1cat-distr" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>The term <em>distribution</em> is very important in statistics. In this section
+we consider the distribution of a single variable in the observed data,
+i.e. its <em>sample distribution</em>:</p>
+<ul>
+<li>The <strong>sample distribution</strong> of a variable consists of a list of the
+values of the variable which occur in a sample, together with the
+number of times each value occurs.</li>
+</ul>
+<p>Later we will discuss other kinds of distributions, such as population,
+probability and sampling distributions, but they will all be variants of
+the same concept.</p>
+<p>The task of descriptive statistics for a single variable is to summarize
+the sample distribution or some features of it. This can be done in the
+form of tables, graphs or single numbers.</p>
+</div>
+<div id="ss-descr1-1cat-tables" class="section level3 hasAnchor">
+<h3><span class="header-section-number">2.3.2</span> Tabular methods: Tables of frequencies<a href="c-descr1.html#ss-descr1-1cat-tables" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>When a variable has only a limited number of distinct values, its sample
+distribution can be summarized directly from the definition given above.
+In other words, we simply count and display the number of times each of
+the values appears in the data. One way to do the display is as a table,
+like the ones for region and the democracy index in the country data,
+and attitude in the survey example, which are shown in Tables
+<a href="c-descr1.html#tab:t-region">2.1</a>, <a href="c-descr1.html#tab:t-democ">2.2</a> and <a href="c-descr1.html#tab:t-attitude">2.3</a> respectively.</p>
+<table>
+<caption><span id="tab:t-region">Table 2.1: </span>Frequency distribution of the region variable in the country data.</caption>
+<thead>
+<tr class="header">
+<th align="left">Region</th>
+<th align="right">Frequency</th>
+<th align="right">Proportion</th>
+<th align="right">%</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="left">Africa</td>
+<td align="right">48</td>
+<td align="right">0.310</td>
+<td align="right">31.0</td>
+</tr>
+<tr class="even">
+<td align="left">Asia</td>
+<td align="right">44</td>
+<td align="right">0.284</td>
+<td align="right">28.4</td>
+</tr>
+<tr class="odd">
+<td align="left">Europe</td>
+<td align="right">34</td>
+<td align="right">0.219</td>
+<td align="right">21.9</td>
+</tr>
+<tr class="even">
+<td align="left">Latin America</td>
+<td align="right">23</td>
+<td align="right">0.148</td>
+<td align="right">14.8</td>
+</tr>
+<tr class="odd">
+<td align="left">Northern America</td>
+<td align="right">2</td>
+<td align="right">0.013</td>
+<td align="right">1.3</td>
+</tr>
+<tr class="even">
+<td align="left">Oceania</td>
+<td align="right">4</td>
+<td align="right">0.026</td>
+<td align="right">2.6</td>
+</tr>
+<tr class="odd">
+<td align="left">Total</td>
+<td align="right">155</td>
+<td align="right">1.000</td>
+<td align="right">100.0</td>
+</tr>
+</tbody>
+</table>
+<table style="width:79%;">
+<caption><span id="tab:t-democ">Table 2.2: </span>Frequency distribution of the democracy index in the country
+data.</caption>
+<colgroup>
+<col width="16%" />
+<col width="16%" />
+<col width="18%" />
+<col width="9%" />
+<col width="18%" />
+</colgroup>
+<thead>
+<tr class="header">
+<th align="left">Democracy
+score</th>
+<th align="center"><br />
+Frequency</th>
+<th align="center"><br />
+Proportion</th>
+<th align="right"><br />
+%</th>
+<th align="right">Cumulative
+%</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="left">0</td>
+<td align="center">35</td>
+<td align="center">0.226</td>
+<td align="right">22.6</td>
+<td align="right">22.6</td>
+</tr>
+<tr class="even">
+<td align="left">1</td>
+<td align="center">12</td>
+<td align="center">0.077</td>
+<td align="right">7.7</td>
+<td align="right">30.3</td>
+</tr>
+<tr class="odd">
+<td align="left">2</td>
+<td align="center">4</td>
+<td align="center">0.026</td>
+<td align="right">2.6</td>
+<td align="right">32.9</td>
+</tr>
+<tr class="even">
+<td align="left">3</td>
+<td align="center">6</td>
+<td align="center">0.039</td>
+<td align="right">3.9</td>
+<td align="right">36.8</td>
+</tr>
+<tr class="odd">
+<td align="left">4</td>
+<td align="center">5</td>
+<td align="center">0.032</td>
+<td align="right">3.2</td>
+<td align="right">40.0</td>
+</tr>
+<tr class="even">
+<td align="left">5</td>
+<td align="center">5</td>
+<td align="center">0.032</td>
+<td align="right">3.2</td>
+<td align="right">43.2</td>
+</tr>
+<tr class="odd">
+<td align="left">6</td>
+<td align="center">12</td>
+<td align="center">0.077</td>
+<td align="right">7.7</td>
+<td align="right">50.9</td>
+</tr>
+<tr class="even">
+<td align="left">7</td>
+<td align="center">13</td>
+<td align="center">0.084</td>
+<td align="right">8.4</td>
+<td align="right">59.3</td>
+</tr>
+<tr class="odd">
+<td align="left">8</td>
+<td align="center">16</td>
+<td align="center">0.103</td>
+<td align="right">10.3</td>
+<td align="right">69.6</td>
+</tr>
+<tr class="even">
+<td align="left">9</td>
+<td align="center">15</td>
+<td align="center">0.097</td>
+<td align="right">9.7</td>
+<td align="right">79.3</td>
+</tr>
+<tr class="odd">
+<td align="left">10</td>
+<td align="center">32</td>
+<td align="center">0.206</td>
+<td align="right">20.6</td>
+<td align="right">99.9</td>
+</tr>
+<tr class="even">
+<td align="left">Total</td>
+<td align="center">155</td>
+<td align="center">0.999</td>
+<td align="right">99.9</td>
+<td align="right"></td>
+</tr>
+</tbody>
+</table>
+<table>
+<caption><span id="tab:t-attitude">Table 2.3: </span>Frequency distribution of responses to a question on attitude
+towards income redistribution in the survey example.</caption>
+<thead>
+<tr class="header">
+<th align="left">Response</th>
+<th align="right">Frequency</th>
+<th align="center">Proportion</th>
+<th align="right">%</th>
+<th align="right">Cumulative %</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="left">Agree strongly (1)</td>
+<td align="right">366</td>
+<td align="center">0.156</td>
+<td align="right">15.6</td>
+<td align="right">15.6</td>
+</tr>
+<tr class="even">
+<td align="left">Agree (2)</td>
+<td align="right">1090</td>
+<td align="center">0.465</td>
+<td align="right">46.5</td>
+<td align="right">62.1</td>
+</tr>
+<tr class="odd">
+<td align="left">Neither agree nor disagree (3)</td>
+<td align="right">426</td>
+<td align="center">0.182</td>
+<td align="right">18.2</td>
+<td align="right">80.3</td>
+</tr>
+<tr class="even">
+<td align="left">Disagree (4)</td>
+<td align="right">387</td>
+<td align="center">0.165</td>
+<td align="right">16.5</td>
+<td align="right">96.8</td>
+</tr>
+<tr class="odd">
+<td align="left">Disagree strongly (5)</td>
+<td align="right">75</td>
+<td align="center">0.032</td>
+<td align="right">3.2</td>
+<td align="right">100.0</td>
+</tr>
+<tr class="even">
+<td align="left">Total</td>
+<td align="right">2344</td>
+<td align="center">1.00</td>
+<td align="right">100.0</td>
+<td align="right"></td>
+</tr>
+</tbody>
+</table>
+<p>Each row of such a table corresponds to one possible value of a
+variable, and the second column shows the number of units with that
+value in the data. Thus there are 48 countries from Africa and 44 from
+Asia in the contry data set and 32 countries with the highest democracy
+score 10, and so on. Similarly, 366 respondents in the survey sample
+strongly agreed with the attitude question, and 75 strongly disagreed
+with it. These counts are also called <strong>frequencies</strong>, a distribution
+like this is a <strong>frequency distribution</strong>, and the table is also known
+as a <strong>frequency table</strong>. The sum of the frequencies, given on the line
+labelled “Total” in the tables, is the sample size <span class="math inline">\(n\)</span>, here 155 for the
+country data and 2344 for the survey data.</p>
+<p>It is sometimes more convenient to consider relative values of the
+frequencies instead of the frequencies themselves. The <strong>relative
+frequency</strong> or <strong>proportion</strong> of a category of a variable is its
+frequency divided by the sample size. For example, the proportion of
+countries from Africa in the country data is <span class="math inline">\(48/155=0.310\)</span> (rounded to
+three decimal places). A close relative of the proportion is the
+<strong>percentage</strong>, which is simply proportion multiplied by a hundred; for
+example, 31% of the countries in the sample are from Africa. The sum of
+the proportions is one, and the sum of the percentages is one hundred
+(because of rounding error, the sum in a reported table may be very
+slightly different, as it is in Table <a href="c-descr1.html#tab:t-democ">2.2</a>).</p>
+</div>
+<div id="ss-descr1-1cat-charts" class="section level3 hasAnchor">
+<h3><span class="header-section-number">2.3.3</span> Graphical methods: Bar charts<a href="c-descr1.html#ss-descr1-1cat-charts" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>Graphical methods of describing data (<em>statistical graphics</em>) make use
+of our ability to process and interpret even very large amounts of
+visual information. The basic graph for summarising the sample
+distribution of a discrete variable is a <strong>bar chart</strong>. It is the
+graphical equivalent of a one-way table of frequencies.</p>
+<p>Figures <a href="c-descr1.html#fig:f-bars-region">2.1</a>, <a href="c-descr1.html#fig:f-bars-democ">2.2</a> and
+<a href="c-descr1.html#fig:f-bars-attitude">2.3</a> show the bar charts for region, democracy index
+and attitude, corresponding to the frequencies in Tables <a href="c-descr1.html#tab:t-region">2.1</a>,
+<a href="c-descr1.html#tab:t-democ">2.2</a> and <a href="c-descr1.html#tab:t-attitude">2.3</a>. Each bar corresponds to one category
+of the variable, and the height of the bar is proportional to the
+frequency of observations in that category. This visual cue allows us to
+make quick comparisons between the frequencies of different categories
+by comparing the heights of the bars.</p>
+<div class="figure"><span style="display:block;" id="fig:f-bars-region"></span>
+<img src="regions.png" alt="Bar chart of regions in the country data." style="height:9.5cm" />
+<p class="caption">Figure 2.1: Bar chart of regions in the country data.</p>
+</div>
+<div class="figure"><span style="display:block;" id="fig:f-bars-democ"></span>
+<img src="democ.png" alt="Bar chart of the democracy index in the country data." style="height:9.5cm" />
+<p class="caption">Figure 2.2: Bar chart of the democracy index in the country data.</p>
+</div>
+<div class="figure"><span style="display:block;" id="fig:f-bars-attitude"></span>
+<img src="bar_attitude.png" alt="Bar chart of the attitude variable in the survey data example. Agreement with statement: ``The government should take measures to reduce differences in income levels’’. European Social Survey, Round 5 (2010), UK respondents only." style="height:8cm" />
+<p class="caption">Figure 2.3: Bar chart of the attitude variable in the survey data example. Agreement with statement: ``The government should take measures to reduce differences in income levels’’. European Social Survey, Round 5 (2010), UK respondents only.</p>
+</div>
+<p>Some guidelines for drawing bar charts are:</p>
+<ul>
+<li><p>The heights of the bars may represent frequencies, proportions
+or percentages. This only changes the units on the vertical axis but
+not the relative heights of the bars. The shape of the graph will be
+the same in each case. In Figure <a href="c-descr1.html#fig:f-bars-region">2.1</a>, the units are
+frequencies, while in Figures <a href="c-descr1.html#fig:f-bars-democ">2.2</a> and
+<a href="c-descr1.html#fig:f-bars-attitude">2.3</a> they are percentages.</p></li>
+<li><p>The bars do not touch each other, to highlight the discrete nature
+of the variable.</p></li>
+<li><p>The bars <em>must</em> start at zero. It they do not, visual comparisons
+between their heights are distorted and the graph becomes useless.</p></li>
+<li><p>If the variable is ordinal, the bars must be in the natural order of
+the categories, as in Figures <a href="c-descr1.html#fig:f-bars-democ">2.2</a> and
+<a href="c-descr1.html#fig:f-bars-attitude">2.3</a>. If the variable is nominal, the order
+is arbitrary. Often it makes sense to order the categories from
+largest (i.e. the one with the largest frequency) to the smallest,
+possibly leaving any “Others” category last. In Figure
+<a href="c-descr1.html#fig:f-bars-region">2.1</a>, the frequency ordering would swap Northern
+America and Oceania, but it seems more natural to keep Northern and
+Latin America next to each other.</p></li>
+</ul>
+<p>A bar chart is a relatively unexciting statistical graphic in that it
+does not convey very much visual information. For nominal variables, in
+particular, the corresponding table is often just as easy to understand
+and takes less space. For ordinal variables, the bar chart has the
+additional advantage that its shape shows how the frequencies vary
+across the ordering of the categories. For example, Figure
+<a href="c-descr1.html#fig:f-bars-democ">2.2</a> quite effectively conveys the information that the
+most common values of the democracy index are the extreme scores 0 and
+10.</p>
+<p>Sometimes you may see graphs which look like bar charts of this kind,
+but which actually show the values of a single variable for some units
+rather than frequncies or percentages. For example, a report on the
+economies of East Asia might show a chart of GDP per capita for Japan,
+China, South Korea and North Korea, with one bar for each country, and
+their heights proportional to 28.2, 5.0, 17.8 and 1.3 respectively
+(c.f. the data in Table <a href="c-descr1.html#tab:t-countrydata">2.14</a>). The basic idea of such
+graphs is the same as that of standard bar charts. However, they are not
+particularly useful as descriptive statistics, since they simply display
+values in the original data without any summarization or simplification.</p>
+</div>
+<div id="ss-descr1-1cat-descriptives" class="section level3 hasAnchor">
+<h3><span class="header-section-number">2.3.4</span> Simple descriptive statistics<a href="c-descr1.html#ss-descr1-1cat-descriptives" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>Instead of the whole sample distribution, we may want to summarise only
+some individual aspects of it, such as its central tendency or
+variation. Descriptive statistics that are used for this purpose are
+broadly similar for both discrete and continuous variables, so they will
+be discussed together for both in Section <a href="c-descr1.html#s-descr1-nums">2.6</a>.</p>
+</div>
+</div>
+<div id="s-descr1-2cat" class="section level2 hasAnchor">
+<h2><span class="header-section-number">2.4</span> Two categorical variables<a href="c-descr1.html#s-descr1-2cat" class="anchor-section" aria-label="Anchor link to header"></a></h2>
+<div id="ss-descr1-2cat-tables" class="section level3 hasAnchor">
+<h3><span class="header-section-number">2.4.1</span> Two-way contingency tables<a href="c-descr1.html#ss-descr1-2cat-tables" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>The next task we consider is how to describe the sample distributions of
+two categorical variables together, and in so doing also summarise the
+association between these variables. The key tool is a table which shows
+the <strong>crosstabulation</strong> of the frequencies of the variables. This is
+also known as a <strong>contingency table</strong>. Table <a href="c-descr1.html#tab:t-sex-attitude">2.4</a> shows
+such a table for the respondents’ sex and attitude in our survey
+example. We use it to introduce the basic structure and terminology of
+contingency tables:</p>
+<table style="width:98%;">
+<caption><span id="tab:t-sex-attitude">Table 2.4: </span><em>``The government should take measures to reduce differences in income levels’’</em>: Two-way table of frequencies of respondents in the survey example,
+by sex and attitude towards income redistribution. Data: European Social Survey, Round 5, 2010, UK respondents only.</caption>
+<colgroup>
+<col width="11%" />
+<col width="19%" />
+<col width="9%" />
+<col width="19%" />
+<col width="13%" />
+<col width="13%" />
+<col width="9%" />
+</colgroup>
+<thead>
+<tr class="header">
+<th align="left"><br />
+Sex</th>
+<th align="center">Agree
+strongly</th>
+<th align="center"><br />
+Agree</th>
+<th align="center">Neither agree
+nor disagree</th>
+<th align="center"><br />
+Disagree</th>
+<th align="center">Disagree
+strongly</th>
+<th align="right"><br />
+Total</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="left">Male</td>
+<td align="center">160</td>
+<td align="center">439</td>
+<td align="center">187</td>
+<td align="center">200</td>
+<td align="center">41</td>
+<td align="right">1027</td>
+</tr>
+<tr class="even">
+<td align="left">Female</td>
+<td align="center">206</td>
+<td align="center">651</td>
+<td align="center">239</td>
+<td align="center">187</td>
+<td align="center">34</td>
+<td align="right">1317</td>
+</tr>
+<tr class="odd">
+<td align="left">Total</td>
+<td align="center">366</td>
+<td align="center">1090</td>
+<td align="center">426</td>
+<td align="center">387</td>
+<td align="center">75</td>
+<td align="right">2344</td>
+</tr>
+</tbody>
+</table>
+<ul>
+<li><p>Because a table like <a href="c-descr1.html#tab:t-sex-attitude">2.4</a> summarizes the values of
+two variables, it is known as a <strong>two-way</strong> contingency table.
+Similarly, the tables of single variables introduced in Section
+<a href="c-descr1.html#ss-descr1-1cat-tables">2.3.2</a> are <em>one-way</em> tables. It is also
+possible to construct tables involving more than two variables,
+i.e. three-way tables, four-way tables, and so on. These are
+discussed in Chapter <a href="c-3waytables.html#c-3waytables">9</a>.</p></li>
+<li><p>The variables in a contingency table may ordinal or nominal
+(including dichotomous). Often an ordinal variable is derived by
+grouping an originally continuous, interval-level variable, a
+practice which is discussed further in Section <a href="c-descr1.html#s-descr1-1cont">2.5</a>.</p></li>
+<li><p>The horizontal divisions of a table (e.g. the lines corresponding to
+the two sexes in Table <a href="c-descr1.html#tab:t-sex-attitude">2.4</a>) are its <strong>rows</strong>, and
+the vertical divisions (e.g. the survey responses in Table
+<a href="c-descr1.html#tab:t-sex-attitude">2.4</a>) are its <strong>columns</strong>.</p></li>
+<li><p>The size of a contingency table is stated in terms of the numbers of
+its rows and columns. For example, Table <a href="c-descr1.html#tab:t-sex-attitude">2.4</a> is a
+<span class="math inline">\(2\times 5\)</span> (pronounced “two-by-five”) table, because it has two rows and
+five columns. This notation may also be used symbolically, so that
+we may refer generically to <span class="math inline">\(R\times C\)</span> tables which have
+some (unspecified) number of <span class="math inline">\(R\)</span> rows and <span class="math inline">\(C\)</span> columns. The smallest
+two-way table is thus a <span class="math inline">\(2\times 2\)</span> table, where both variables
+are dichotomous.</p></li>
+<li><p>The intersection of a row and a column is a <strong>cell</strong> of the table.
+The basic two-way contingency table shows in each cell the
+number (frequency) of units in the data set with the corresponding
+values of the row variable and the column variable. For example,
+Table <a href="c-descr1.html#tab:t-sex-attitude">2.4</a> shows that there were 160 male
+respondents who strongly agreed with the statement, and 239 female
+respondents who neither agreed nor disagreed with it. These
+frequencies are also known as <strong>cell counts</strong>.</p></li>
+<li><p>The row and column labelled “Total” in Table <a href="c-descr1.html#tab:t-sex-attitude">2.4</a>
+are known as the <strong>margins</strong> of the table. They show the frequencies
+of the values of the row and the column variable separately, summing
+the frequencies over the categories of the other variable. For
+example, the table shows that there were overall 1027
+(<span class="math inline">\(=160+439+187+200+41\)</span>) male respondents, and that overall 75
+(<span class="math inline">\(=41+34\)</span>) respondents strongly disagreed with the statement. In
+other words, the margins are <em>one-way</em> tables of the frequencies of
+each of the two variables, so for example the frequencies on the
+margin for attitude in Table <a href="c-descr1.html#tab:t-sex-attitude">2.4</a> are the same as
+the ones in the one-way table for this variable shown in Table
+<a href="c-descr1.html#tab:t-attitude">2.3</a>. The distributions described by the margins are
+known as the <strong>marginal distributions</strong> of the row and
+column variables. In contrast, the frequencies in the internal cells
+of the table, which show how many units have each possible
+<em>combination</em> of the row and column variables, describe the <strong>joint
+distribution</strong> of the two variables.</p></li>
+<li><p>The number in the bottom right-hand corner of the table is the sum
+of all of the frequencies, i.e. the total sample size <span class="math inline">\(n\)</span>.</p></li>
+</ul>
+<p>In addition to frequencies, it is often convenient to display
+proportions or percentages. Dividing the frequencies by the sample size
+gives overall proportions and (multiplying by a hundred) percentages.
+This is illustrated in Table <a href="#tab:t-sex-attitude-pr">2.5</a>, which shows the
+overall proportions, obtained by dividing the frequencies in Table
+<a href="c-descr1.html#tab:t-sex-attitude">2.4</a> by <span class="math inline">\(n=2344\)</span>. For example, out of all these
+respondents, the proportion of 0.102 (<span class="math inline">\(=239/2344\)</span>) were women who
+neither agreed nor disagreed with the statement. The proportions are
+also shown for the marginal distributions: for example, 15.6% (i.e. the
+proportion <span class="math inline">\(0.156=366/2344\)</span>) of the respondents strongly agreed with the
+statement. The sum of the proportions over all the cells is 1, as shown
+in the bottom right corner of the table.</p>
+<table style="width:98%;">
+<colgroup>
+<col width="17%" />
+<col width="18%" />
+<col width="9%" />
+<col width="18%" />
+<col width="12%" />
+<col width="12%" />
+<col width="9%" />
+</colgroup>
+<thead>
+<tr class="header">
+<th align="left">Sex</th>
+<th align="center">Agree
+strongly</th>
+<th align="center"><br />
+Agree</th>
+<th align="center">Neither agree
+nor disagree</th>
+<th align="center"><br />
+Disagree</th>
+<th align="center">Disagree
+strongly</th>
+<th align="center"><br />
+Total</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="left">Male</td>
+<td align="center">0.068</td>
+<td align="center">0.187</td>
+<td align="center">0.080</td>
+<td align="center">0.085</td>
+<td align="center">0.017</td>
+<td align="center">0.438</td>
+</tr>
+<tr class="even">
+<td align="left">Female</td>
+<td align="center">0.088</td>
+<td align="center">0.278</td>
+<td align="center">0.102</td>
+<td align="center">0.080</td>
+<td align="center">0.015</td>
+<td align="center">0.562</td>
+</tr>
+<tr class="odd">
+<td align="left">Total</td>
+<td align="center">0.156</td>
+<td align="center">0.465</td>
+<td align="center">0.182</td>
+<td align="center">0.165</td>
+<td align="center">0.032</td>
+<td align="center">1.000</td>
+</tr>
+</tbody>
+</table>
+<p>:(#tab:t-sex-attitude-pr)<em>``The government should take measures to reduce differences in income levels’’</em>: Two-way table of joint proportions of respondents in the survey
+example, with each combination of sex and attitude towards income
+redistribution. Data: European Social Survey, Round 5, 2010, UK respondents only.</p>
+</div>
+<div id="ss-descr1-2cat-cond" class="section level3 hasAnchor">
+<h3><span class="header-section-number">2.4.2</span> Conditional proportions<a href="c-descr1.html#ss-descr1-2cat-cond" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>A two-way contingency table is symmetric in that it does not distinguish
+between explanatory and response variables. In many applications,
+however, this distinction is useful for interpretation. In our example,
+for instance, it is natural to treat sex as the explanatory variable and
+attitude towards income redistribution as the response response, and so
+to focus the interpretation on how attitude may depend on sex.</p>
+<p>The overall proportions are in such cases not the most relevant
+quantities for interpretation of a table. Instead, we typically
+calculate proportions within each category of the row variable or the
+column variable, i.e. the <strong>conditional proportions</strong> of one variable
+given the other. The numbers in brackets in Table
+<a href="c-descr1.html#tab:t-sex-attitude-row">2.6</a> show these proportions calculated for each
+<em>row</em> of Table <a href="c-descr1.html#tab:t-sex-attitude">2.4</a> (Table <a href="c-descr1.html#tab:t-sex-attitude-row">2.6</a> also
+includes the actual frequencies; it is advisable to include them even
+when conditional proportions are of most interest, to show the numbers
+on which the proportions are based). In other words, these are the
+conditional proportions of attitude towards income redistribution given
+sex, i.e. separately for men and women. For example, the number 0.156 in
+the top left-hand corner of Table <a href="c-descr1.html#tab:t-sex-attitude-row">2.6</a> is obtained
+by dividing the number of male respondents who agreed strongly with the
+statement (160) by the total number of male respondents (1027). Thus
+15.6% of the men strongly agreed, and for example 2.6% of women strongly
+disagreed with the statement. The (1.0) in the last column of the table
+indicate that the proportions sum to 1 along each row, to remind us that
+the conditional proportions have been calculated within the rows. The
+bracketed proportions in the ‘Total’ row are the proportions of the
+<em>marginal</em> distribution of the attitude variable, so they are the same
+as the proportions in the ‘Total’ row of Table <a href="#tab:t-sex-attitude-pr">2.5</a>.</p>
+<table style="width:98%;">
+<caption><span id="tab:t-sex-attitude-row">Table 2.6: </span><em>``The government should take measures to reduce differences in income levels’’</em>: Two-way table of frequencies of respondents in the survey example,
+by sex and attitude towards income redistribution. The numbers in
+brackets are proportions within the rows, i.e. conditional proportions
+of attitude given sex. Data: European Social Survey, Round 5, 2010, UK respondents only.</caption>
+<colgroup>
+<col width="15%" />
+<col width="18%" />
+<col width="11%" />
+<col width="18%" />
+<col width="12%" />
+<col width="12%" />
+<col width="9%" />
+</colgroup>
+<thead>
+<tr class="header">
+<th align="left">Sex</th>
+<th align="center">Agree
+strongly</th>
+<th align="center"><br />
+Agree</th>
+<th align="center">Neither agree
+nor disagree</th>
+<th align="center"><br />
+Disagree</th>
+<th align="center">Disagree
+strongly</th>
+<th align="center"><br />
+Total</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="left">Male</td>
+<td align="center">160</td>
+<td align="center">439</td>
+<td align="center">187</td>
+<td align="center">200</td>
+<td align="center">41</td>
+<td align="center">1027</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="center">(0.156)</td>
+<td align="center">(0.428)</td>
+<td align="center">(0.182)</td>
+<td align="center">(0.195)</td>
+<td align="center">(0.040)</td>
+<td align="center">(1.0)</td>
+</tr>
+<tr class="odd">
+<td align="left">Female</td>
+<td align="center">206</td>
+<td align="center">651</td>
+<td align="center">239</td>
+<td align="center">187</td>
+<td align="center">34</td>
+<td align="center">1317</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="center">(0.156)</td>
+<td align="center">(0.494)</td>
+<td align="center">(0.182)</td>
+<td align="center">(0.142)</td>
+<td align="center">(0.026)</td>
+<td align="center">(1.0)</td>
+</tr>
+<tr class="odd">
+<td align="left">Total</td>
+<td align="center">366</td>
+<td align="center">1090</td>
+<td align="center">426</td>
+<td align="center">387</td>
+<td align="center">75</td>
+<td align="center">2344</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="center">(0.156)</td>
+<td align="center">(0.465)</td>
+<td align="center">(0.182)</td>
+<td align="center">(0.165)</td>
+<td align="center">(0.032)</td>
+<td align="center">(1.0)</td>
+</tr>
+</tbody>
+</table>
+<p>We could also have calculated conditional proportions within the
+<em>columns</em>, i.e. for sex given attitude. For example, the proportion
+<span class="math inline">\(0.563=206/366\)</span> of all respondents who strongly agreed with the
+statement are women. These, however, seem less interesting, because it
+seems more natural to examine how attitude varies by sex rather than how
+sex varies by attitude. In general, for any two-way table we can
+calculate conditional proportions for both the rows and the columns, but
+typically only one of them is used for interpretation.</p>
+</div>
+<div id="ss-descr1-2cat-assoc" class="section level3 hasAnchor">
+<h3><span class="header-section-number">2.4.3</span> Conditional distributions and associations<a href="c-descr1.html#ss-descr1-2cat-assoc" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>Suppose that we regard one variable in a two-way table as the
+explanatory variable (let us denote it by <span class="math inline">\(X\)</span>) and the other variable as
+the response variable (<span class="math inline">\(Y\)</span>). In our survey example, sex is thus <span class="math inline">\(X\)</span> and
+attitude is <span class="math inline">\(Y\)</span>. Here the dichotomous <span class="math inline">\(X\)</span> divides the full sample into
+two groups, identified by the observed value of <span class="math inline">\(X\)</span> — men and women. We
+may then think of these two groups as two separate samples, and consider
+statistical quantities separately for each of them. In particular, in
+Table <a href="c-descr1.html#tab:t-sex-attitude-row">2.6</a> we calculated conditional proportions for
+<span class="math inline">\(Y\)</span> given <span class="math inline">\(X\)</span>, i.e. for attitude given sex. These proportions describe
+two distinct sample distributions of <span class="math inline">\(Y\)</span>, one for men and one for women.
+They are examples of <em>conditional distributions</em>:</p>
+<ul>
+<li>The <strong>conditional distribution</strong> of a variable <span class="math inline">\(Y\)</span> given another
+variable <span class="math inline">\(X\)</span> is the distribution of <span class="math inline">\(Y\)</span> among those units which have
+a particular value of <span class="math inline">\(X\)</span>.</li>
+</ul>
+<p>This concept is not limited to two-way tables but extends also to other
+kinds of variables and distributions that are discussed later in this
+coursepack. Both the response variable <span class="math inline">\(Y\)</span> and the explanatory variable
+<span class="math inline">\(X\)</span> may be continuous as well as discrete, and can have any number of
+values. In all such cases there is a separate conditional distribution
+for <span class="math inline">\(Y\)</span> for each possible value of <span class="math inline">\(X\)</span>. A particular one of these
+distributions is sometimes referred to more explicitly as the
+conditional distribution of <span class="math inline">\(Y\)</span> given <span class="math inline">\(X=x\)</span>, where the “<span class="math inline">\(X=x\)</span>” indicates
+that <span class="math inline">\(X\)</span> is considered at a particular value <span class="math inline">\(x\)</span> (as in “the
+distribution of <span class="math inline">\(Y\)</span> given <span class="math inline">\(X=2\)</span>”, say).</p>
+<p>Conditional distributions of one variable given another allow us to
+define and describe associations between the variables. The informal
+definition in Section <a href="c-intro.html#ss-intro-def-assoc">1.2.4</a> stated that there is an
+association between two variables if knowing the value of one of them
+will help to predict the value of the other. We can now give a more
+precise definition:</p>
+<ul>
+<li>There is an <strong>association</strong> between variables <span class="math inline">\(X\)</span> and <span class="math inline">\(Y\)</span> if the
+conditional distribution of <span class="math inline">\(Y\)</span> given <span class="math inline">\(X\)</span> is different for different
+values of <span class="math inline">\(X\)</span>.</li>
+</ul>
+<p>This definition coincides with the more informal one. If the conditional
+distribution of <span class="math inline">\(Y\)</span> varies with <span class="math inline">\(X\)</span> and if we know <span class="math inline">\(X\)</span>, it is best to
+predict <span class="math inline">\(Y\)</span> from its conditional distribution given the known value of
+<span class="math inline">\(X\)</span>. This will indeed work better than predicting <span class="math inline">\(Y\)</span> without using
+information on <span class="math inline">\(X\)</span>, i.e. from the marginal distribution of <span class="math inline">\(Y\)</span>.
+Prediction based on the conditional distribution would still be subject
+to error, because in most cases <span class="math inline">\(X\)</span> does not predict <span class="math inline">\(Y\)</span> perfectly. In
+other words, the definition of an association considered here is
+<em>statistical</em> (or <em>probabilistic</em>) rather than <em>deterministic</em>. In our
+example a deterministic association would mean that there is one
+response given by all the men and one response (possibly different from
+the men’s) given by all the women. This is of course not the case here
+nor in most other applications in the social sciences. It is thus
+crucially important that we have the tools also to analyse statistical
+associations.</p>
+<p>In our example, sex and attitude are associated if men and women differ
+in their attitudes toward income redistribution. Previous studies
+suggest that such an association exists, and that it takes the form that
+women tend to have higher levels of support than men for
+redistribution.<a href="#fn4" class="footnote-ref" id="fnref4"><sup>4</sup></a> As possible explanations for this pattern, both
+structural reasons (women tend to have lower incomes than men and to
+rely more on welfare state support) and cultural or psychological ones
+(women are more likely than men to adopt social values of equality and
+caring) have been suggested.</p>
+</div>
+<div id="ss-descr1-2cat-descr" class="section level3 hasAnchor">
+<h3><span class="header-section-number">2.4.4</span> Describing an association using conditional proportions<a href="c-descr1.html#ss-descr1-2cat-descr" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>Two variables presented in a contingency table are associated in the
+sample if the conditional distributions of one of them vary across the
+values of the other. This is the case in our data set: for example, 4.0%
+of men but 2.6% of women strongly disagree with the statement. There is
+thus some association between sex and attitude in this sample. This much
+is easy to conclude. What requires a little more work is a more detailed
+description of the pattern and strength of the association, i.e. how and
+where the conditional distributions differ from each other.</p>
+<p>The most general way of summarising associations in a contingency table
+is by comparing the conditional proportions of the same level of the
+response given different levels of the explanatory variable. There is no
+simple formula for how this should be done, so you should use your
+common sense to present comparisons which give a good summary of the
+patterns across the table. Unless both variables in the table are
+dichotomous, several different comparisons may be needed, and may not
+all display similar patterns. For example, in Table
+<a href="c-descr1.html#tab:t-sex-attitude-row">2.6</a> the same proportion (0.156, or 15.6%) of both
+men and women strongly agree with the statement, whereas the proportion
+who respond “Agree” is higher for women (49.4%) than for men (42.8%).</p>
+<p>When the response variable is ordinal, it is often more illuminating to
+focus on comparisons of <em>cumulative</em> proportions which add up
+conditional proportions over two or more adjacent categories. For
+instance, the combined proportion of respondents who either strongly
+agree or agree with the statement is a useful summary of the general
+level of agreement among the respondents. In our example this is 58.4%
+(<span class="math inline">\(=15.5\%+42.8\%\)</span>) for men but 65.0% for women.</p>
+<p>A comparison between two proportions may be further distilled into a
+single number by reporting the <em>difference</em> or <em>ratio</em> between them. For
+example, for the proportions of agreeing or strongly agreeing above, the
+difference is <span class="math inline">\(0.650-0.584=0.066\)</span>, so the proportion is 0.066 (i.e. 6.6
+percentage points) higher for women than for men. The ratio of these
+proportions is <span class="math inline">\(0.650/0.584=1.11\)</span>, so the proportion for women is 1.11
+times the proportion for men (i.e. 11% higher). Both of these indicate
+that in this sample women were more likely to agree or strongly agree
+with the statement than were men. In a particular application we might
+report a difference or a ratio like this, depending on which of them was
+considered more relevant or easily understandable. Other summaries are
+also possible; for example, on MY452 we will discuss a measure called
+the <em>odds ratio</em>, which turns out to be convenient for more general
+methods of analysing associations involving categorical variables.</p>
+<p>The broad conclusion in the example is that there is an association
+between sex and attitude in these data from the European Social Survey,
+and that it is of the kind suggested by existing literature. A larger
+proportion of women than of men indicate agreement with the statement
+that the government should take measures to reduce income differences,
+and conversely larger proportion of men disagree with it (e.g. 23.5% of
+men but only 16.8% of women disagree or strongly disagree). Thus in this
+sample women do indeed demonstrate somewhat higher levels of support for
+income redistribution. Whether these differences also warrant a
+generalisation of the conclusions to people outside the sample is a
+question which we will take up in Chapters <a href="c-samples.html#c-samples">3</a> and
+<a href="c-tables.html#c-tables">4</a>.</p>
+</div>
+<div id="ss-descr1-2cat-gamma" class="section level3 hasAnchor">
+<h3><span class="header-section-number">2.4.5</span> A measure of association for ordinal variables<a href="c-descr1.html#ss-descr1-2cat-gamma" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>In the previous example the explanatory variable (sex) had 2 categories
+and the response variable (attitude) had 5. A full examination of the
+individual conditional distributions of attitude given sex then involved
+comparisons of five pairs of proportions, one for each level of the
+attitude variable. This number gets larger still if the explanatory
+variable also has several levels, as in the following example:</p>
+<p><em>Example: Importance of short-term gains for investors</em></p>
+<p>Information on the behaviour and expectations of individual investors
+was collected by sending a questionnaire to a sample of customers of a
+U.S. brokerage house.<a href="#fn5" class="footnote-ref" id="fnref5"><sup>5</sup></a> One of the questions asked the respondents to
+state how much importance they placed on quick profits (short-term
+gains) as an objective when they invested money. The responses were
+recorded in four categories as “Irrelevant”, “Slightly important”,
+“Important” or “Very important”. Table <a href="c-descr1.html#tab:t-investors">2.7</a> shows the
+crosstabulation of this variable with the age of the respondent in four
+age groups.</p>
+<table style="width:97%;">
+<caption><span id="tab:t-investors">Table 2.7: </span>Importance of short-term gains: Frequencies of respondents in the investment example, by age group
+and attitude towards short-term gains as investment goal. Conditional
+proportions of attitude given age group are shown in brackets. The
+value of the <span class="math inline">\(\gamma\)</span> measure of association is <span class="math inline">\(-0.377\)</span>.</caption>
+<colgroup>
+<col width="18%" />
+<col width="19%" />
+<col width="15%" />
+<col width="15%" />
+<col width="15%" />
+<col width="11%" />
+</colgroup>
+<thead>
+<tr class="header">
+<th align="left"><br />
+Age group</th>
+<th align="right"><br />
+Irrelevant</th>
+<th align="right">Slightly
+important</th>
+<th align="right"><br />
+Important</th>
+<th align="right">Very
+important</th>
+<th align="right"><br />
+Total</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="left">Under 45</td>
+<td align="right">37</td>
+<td align="right">45</td>
+<td align="right">38</td>
+<td align="right">26</td>
+<td align="right">146</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="right">(0.253)</td>
+<td align="right">(0.308)</td>
+<td align="right">(0.260)</td>
+<td align="right">(0.178)</td>
+<td align="right">(1.00)</td>
+</tr>
+<tr class="odd">
+<td align="left">45–54</td>
+<td align="right">111</td>
+<td align="right">77</td>
+<td align="right">57</td>
+<td align="right">37</td>
+<td align="right">282</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="right">(0.394)</td>
+<td align="right">(0.273)</td>
+<td align="right">(0.202)</td>
+<td align="right">(0.131)</td>
+<td align="right">(1.00)</td>
+</tr>
+<tr class="odd">
+<td align="left">55–64</td>
+<td align="right">153</td>
+<td align="right">49</td>
+<td align="right">31</td>
+<td align="right">20</td>
+<td align="right">253</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="right">(0.605)</td>
+<td align="right">(0.194)</td>
+<td align="right">(0.123)</td>
+<td align="right">(0.079)</td>
+<td align="right">(1.00)</td>
+</tr>
+<tr class="odd">
+<td align="left">65 and over</td>
+<td align="right">193</td>
+<td align="right">64</td>
+<td align="right">19</td>
+<td align="right">15</td>
+<td align="right">291</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="right">(0.663)</td>
+<td align="right">(0.220)</td>
+<td align="right">(0.065)</td>
+<td align="right">(0.052)</td>
+<td align="right">(1.00)</td>
+</tr>
+<tr class="odd">
+<td align="left">Total</td>
+<td align="right">494</td>
+<td align="right">235</td>
+<td align="right">145</td>
+<td align="right">98</td>
+<td align="right">972</td>
+</tr>
+</tbody>
+</table>
+<p>Here there are four conditional distributions, one for each age group,
+and each of them is described by four proportions of different levels of
+attitude. There are then many possible comparisons of the kind discussed
+above. For example, we might want to compare the proportions of
+respondents who consider short-term gains irrelevant between the oldest
+and the youngest age group, the proportions for whom such gains are very
+important between these two groups, or, in general, the proportions in
+any category of the response variable between any two age groups.</p>
+<p>Although pairwise comparisons like this are important and informative,
+they can clearly become cumbersome when the number of possible
+comparisons is large. A potentially attractive alternative is then to
+try to summarise the strength of the association between the variables
+in a single number, a <strong>measure of association</strong> of some kind. There are
+many such measures for two-way contingency tables, labelled with a range
+of Greek and Roman letters (e.g. <span class="math inline">\(\phi\)</span>, <span class="math inline">\(\lambda\)</span>, <span class="math inline">\(\gamma\)</span>, <span class="math inline">\(\rho\)</span>,
+<span class="math inline">\(\tau\)</span>, V, Q, U and d). The most useful of them are designed for tables
+where both of the variables are measured at the ordinal level, as is the
+case in Table <a href="c-descr1.html#tab:t-investors">2.7</a>. The ordering of the categories can then
+be exploited to capture the strength of the association in a single
+measure. This is not possible when at least one of the variables is
+measured at the nominal level, as any attempt to reduce the patterns of
+the conditional probabilities into one number will then inevitably
+obscure much of the information in the table. It is better to avoid
+measures of association defined for nominal variables, and to describe
+their associations only through comparisons of conditional probabilities
+as described in the previous section.</p>
+<p>Here we will discuss only one measure of association for two-way tables
+of ordinal variables. It is known as <span class="math inline">\(\gamma\)</span> (“gamma”). It
+characterises one possible general pattern of association between two
+ordinal variables, namely the extent to which high values of one
+variable tend to be associated with high or low values of the other
+variable. Here speaking of “low” and “high” values, or of “increasing”
+or “decreasing” them, is meaningful when the variables are ordinal. For
+example, in Table <a href="c-descr1.html#tab:t-investors">2.7</a> the categories corresponding to the
+bottom rows and right-most columns are in an obvious sense “high” values
+of age and importance respectively.</p>
+<p>Consider the conditional proportions of importance given age group shown
+in Table <a href="c-descr1.html#tab:t-investors">2.7</a>. It is clear that, for example, the
+proportion of respondents for whom short-term gains are very important
+is highest in the youngest, and lowest in the oldest age group.
+Similarly, the proportion of respondents for whom such gains are
+irrelevant increases consistently from the youngest to the oldest group.
+In other words, respondents with <em>high</em> values of the explanatory
+variable (age group) tend to have <em>low</em> values the response variable
+(importance of short-term gains). Such an association is said to be
+<em>negative</em>. A <em>positive</em> association would be seen in a table where high
+values of one variable were associated with high values of the other.</p>
+<p>Measures of association for summarising such patterns are typically
+based on the numbers of concordant and discordant pairs of observations.
+Suppose we compare two units classified according to the two variables
+in the table. These units form a <em>concordant pair</em> if one of them has a
+higher value of both variables than the other. For example, consider two
+respondents in Table <a href="c-descr1.html#tab:t-investors">2.7</a>, one with values (Under 45;
+Irrelevant) and the other with (45–54; Important). This is a concordant
+pair, because the second respondent has both a higher value of age group
+(45–54 vs. Under 45) and a higher value of the importance variable
+(Important vs. Irrelevant) than the first respondent. In contrast, in a
+<em>discordant pair</em> one unit has a higher value of one variable but a
+lower value of the other variable than the other unit. For example, a
+pair of respondents with values (45–54; Very important) and (55–64;
+Irrelevant) is discordant, because the latter has a higher value of age
+group but a lower value of the importance variable than the former.
+Pairs of units with the same value of one or both of the variables are
+known as <em>tied</em> pairs. They are not used in the calculations discussed
+below.</p>
+<p>The <span class="math inline">\(\gamma\)</span> measure of association is defined as
+<span class="math display">\[\begin{equation}\gamma=\frac{C-D}{C+D}
+\label{eq:gamma}\end{equation}\]</span> where <span class="math inline">\(C\)</span> is the total number of concordant pairs in the
+table, and <span class="math inline">\(D\)</span> is the number of discordant pairs. For Table
+<a href="c-descr1.html#tab:t-investors">2.7</a>, the value of this is <span class="math inline">\(\gamma=-0.377\)</span>.</p>
+<p>Calculation of <span class="math inline">\(C\)</span> and <span class="math inline">\(D\)</span> is straightforward but tedious and
+uninteresting, and can be left to a computer. Remembering the exact form
+of (<a href="#eq:gamma">(<strong>??</strong>)</a>) is also not crucial. More important than the formula of
+<span class="math inline">\(\gamma\)</span> (or any other measure of association) is its interpretation.
+This can be considered on several levels of specificity, which are
+discussed separately below. The discussion is relatively detailed, as
+these considerations are relevant and useful not only for <span class="math inline">\(\gamma\)</span>, but
+also for all other measures of association in statistics.</p>
+<p>The <strong>sign</strong> of the statistic: It can be seen from (<a href="#eq:gamma">(<strong>??</strong>)</a>) that
+<span class="math inline">\(\gamma\)</span> is positive (greater than zero) when there are more concordant
+pairs than discordant ones (i.e. <span class="math inline">\(C&gt;D\)</span>), and negative when there are
+more discordant than concordant pairs (<span class="math inline">\(C&lt;D\)</span>). This also implies that
+<span class="math inline">\(\gamma\)</span> will be positive when the association is positive in the sense
+discussed above, and negative when the association is negative. A value
+of <span class="math inline">\(\gamma=0\)</span> indicates a complete lack of association of this kind. In
+Table <a href="c-descr1.html#tab:t-investors">2.7</a> we have <span class="math inline">\(\gamma=-0.377\)</span>, indicating a negative
+association. This agrees with the conclusion obtained informally above.</p>
+<p>The <strong>extreme values</strong> of the statistic: Clearly <span class="math inline">\(\gamma=1\)</span> if there are
+no discordant pairs (<span class="math inline">\(D=0\)</span>), and <span class="math inline">\(\gamma=-1\)</span> if there are no concordant
+pairs (<span class="math inline">\(C=0\)</span>). The values <span class="math inline">\(\gamma=-1\)</span> and <span class="math inline">\(\gamma=1\)</span> are the smallest
+and largest possible values of <span class="math inline">\(\gamma\)</span>, and indicate the strongest
+possible levels of negative and positive association respectively. More
+generally, the closer <span class="math inline">\(\gamma\)</span> is to <span class="math inline">\(-1\)</span> or 1, the stronger is the
+(negative or positive) association.</p>
+<p>The <strong>formal interpretation</strong> of the statistic: This refers to any way
+of interpreting the value more understandably than just vaguely as a
+measure of “strength of association”. Most often, such an intepretation
+is expressed as a <em>proportion</em> of some kind. For <span class="math inline">\(\gamma\)</span>, this is done
+using a principle known as <strong>Proportional reduction of error</strong> (PRE).
+Because the PRE idea is also used to interpret many other measures of
+association in statistics, we will first describe it in general terms
+which are not limited to <span class="math inline">\(\gamma\)</span>.</p>
+<p>Suppose we consider an explanatory variable <span class="math inline">\(X\)</span> and a response variable
+<span class="math inline">\(Y\)</span>, and want to make predictions of the values of <span class="math inline">\(Y\)</span> in a data set.
+This is done twice, first in a way which makes no use of <span class="math inline">\(X\)</span>, and then
+in a way which predicts the value of <span class="math inline">\(Y\)</span> for each unit using information
+on the corresponding value of <span class="math inline">\(X\)</span> and on the strength and direction of
+the association between <span class="math inline">\(X\)</span> and <span class="math inline">\(Y\)</span>. Recalling the connection between
+association and prediction, it is clear that the second approach should
+result in better predictions if the two variables are associated. The
+comparison also reflects the <em>strength</em> of the association: the stronger
+it is, the bigger is the improvement in prediction gained by utilising
+information on <span class="math inline">\(X\)</span>.</p>
+<p>A PRE measure describes the size of this improvement. Suppose that the
+magnitude or number of errors made in predicting the values of <span class="math inline">\(Y\)</span> in a
+data set using the first scheme, i.e. ignoring information on <span class="math inline">\(X\)</span>, is
+somehow measured by a single number <span class="math inline">\(E_{1}\)</span>, and that <span class="math inline">\(E_{2}\)</span> is the
+same measure of errors for the second prediction scheme which makes use
+of <span class="math inline">\(X\)</span>. The difference <span class="math inline">\(E_{1}-E_{2}\)</span> is thus the improvement in
+prediction achieved by the second scheme over the first. A PRE measure
+of association is the ratio <span class="math display">\[\begin{equation}\text{PRE}= \frac{E_{1}-E_{2}}{E_{1}},
+\label{eq:PRE}\end{equation}\]</span> i.e. the improvement in predictions as a <em>proportion</em> of
+the number of errors <span class="math inline">\(E_{1}\)</span> under the first scheme. This formulation is
+convenient for interpretation, because a proportion is easily
+understandable even if <span class="math inline">\(E_{1}\)</span> and <span class="math inline">\(E_{2}\)</span> themselves are expressed in
+some unfamiliar units. The smallest possible value of (<a href="#eq:PRE">(<strong>??</strong>)</a>) is
+clearly 0, obtained when <span class="math inline">\(E_{2}=E_{1}\)</span>, i.e. when using information on
+<span class="math inline">\(X\)</span> gives no improvement in predictions. The largest possible value of
+PRE is 1, obtained when <span class="math inline">\(E_{2}=0\)</span>, i.e. when <span class="math inline">\(Y\)</span> can be predicted
+perfectly from <span class="math inline">\(X\)</span>. The values 0 and 1 indicate no association and
+perfect association respectively.</p>
+<p>The <span class="math inline">\(\gamma\)</span> statistic is a PRE measure, although with a somewhat
+convoluted explanation. Suppose that we consider a pair of observations
+which is known to be either concordant or discordant (the PRE
+interpretation of <span class="math inline">\(\gamma\)</span> ignores tied observations). One of the two
+observations thus has a higher value of <span class="math inline">\(X\)</span> than the other. For example,
+suppose that we consider two respondents in Table <a href="c-descr1.html#tab:t-investors">2.7</a> from
+different age groups. We are then asked to predict the <em>order</em> of the
+values of <span class="math inline">\(Y\)</span>, i.e. which of the two units has the higher value of <span class="math inline">\(Y\)</span>.
+In the example of Table <a href="c-descr1.html#tab:t-investors">2.7</a>, this means predicting whether
+the older respondent places a higher or lower level of importance on
+short-term gains than the younger respondent. Two sets of predictions
+are again compared. The first approach makes the prediction at random
+and with equal probabilities, essentially tossing a coin to guess
+whether the observation with the higher value of <span class="math inline">\(X\)</span> has the higher or
+lower value of <span class="math inline">\(Y\)</span>. The second prediction makes use of information on
+the direction of the association between <span class="math inline">\(X\)</span> and <span class="math inline">\(Y\)</span>. If the association
+is known to be negative (i.e. there are more discordant than concordant
+pairs), every pair is predicted to be discordant; if it is positive,
+every pair is predicted to be concordant. For example, in Table
+<a href="c-descr1.html#tab:t-investors">2.7</a> the association is negative, so we would always
+predict that the older of two respondents places a lower value of
+importance on short-term gains.</p>
+<p>If these predictions are repeated for every non-tied pair in the table,
+the expected number of incorrect predictions under the first scheme is
+<span class="math inline">\(E_{1}=(C+D)/2\)</span>. Under the second scheme it is <span class="math inline">\(E_{2}=D\)</span> if the
+association is positive and <span class="math inline">\(E_{2}=C\)</span> if it is negative. Substituting
+these into the general formula (<a href="#eq:PRE">(<strong>??</strong>)</a>) shows that the <span class="math inline">\(\gamma\)</span>
+statistic (<a href="#eq:gamma">(<strong>??</strong>)</a>) is of the PRE form when <span class="math inline">\(\gamma\)</span> is positive;
+when it is negative, the absolute value of <span class="math inline">\(\gamma\)</span> (i.e. its value with
+the minus sign omitted) is a PRE measure, and the negative sign of
+<span class="math inline">\(\gamma\)</span> indicates that the association is in the negative direction. In
+our example <span class="math inline">\(\gamma=-0.377\)</span>, so age and attitude are negatively
+associated. Its absolute value <span class="math inline">\(0.377\)</span> shows that we will make 37.7%
+fewer errors if we predict for every non-tied pair that the older
+respondent places less importance on short-term gains, compared to
+predictions made by tossing a coin for each pair.</p>
+<p>The final property of interest is the <strong>substantive interpretation</strong> of
+the strength of association indicated by <span class="math inline">\(\gamma\)</span> for a particular
+table. For example, should <span class="math inline">\(\gamma=-0.377\)</span> for Table <a href="c-descr1.html#tab:t-investors">2.7</a>
+be regarded as evidence of weak, moderate or strong negative association
+between age and attitude? Although this is usually the most (or only)
+interesting part of the interpretation, it is also the most difficult,
+and one to which a statistician’s response is likely to be a firm “it
+depends”. This is because the strength of associations we may expect to
+observe depends on the variables under consideration: a <span class="math inline">\(\gamma\)</span> of 0.5,
+say, might be commonplace for some types of variables but never observed
+for others. Considerations of the magnitude of <span class="math inline">\(\gamma\)</span> are most useful
+in comparisons of associations between the same two variables in
+different samples or groups. For example, in Chapter <a href="c-3waytables.html#c-3waytables">9</a>
+we will calculate <span class="math inline">\(\gamma\)</span> for the variables in Table <a href="c-descr1.html#tab:t-investors">2.7</a>
+separately for men and women (see Table <a href="c-3waytables.html#tab:t-investors3">9.4</a>). These turn
+out to be very similar, so the strength of the association appears to be
+roughly similar in these two groups.</p>
+<p>Three further observations complete our discussion of <span class="math inline">\(\gamma\)</span>:</p>
+<ul>
+<li><p>Since “high” values of a variable were defined as ones towards the
+bottom and right of a table, reversing the order in which the
+categories are listed will also reverse the interpretation of “high”
+and “low” and of a “negative” or “positive” association. Such a
+reversal for one variable will change the sign of <span class="math inline">\(\gamma\)</span> but not
+its absolute value. For example, in Table <a href="c-descr1.html#tab:t-investors">2.7</a> we could
+have listed the age groups from the oldest to the youngest, in which
+case we would have obtained <span class="math inline">\(\gamma=0.377\)</span> instead of
+<span class="math inline">\(\gamma=-0.377\)</span>. Reversing the ordering of both of the variables
+will give the same value of <span class="math inline">\(\gamma\)</span> as when neither is reversed.
+The nature and interpretation of the association remain unchanged in
+each case.</p></li>
+<li><p><span class="math inline">\(\gamma\)</span> can also be used when one or both of the variables are
+dichotomous, but not when either is nominal and has more than
+two categories. If, for example, the table includes a nominal
+variable with four categories, there are 24 different and equally
+acceptable ways of ordering the categories, each giving a different
+value of <span class="math inline">\(\gamma\)</span> (or rather 12 different positive values and their
+negatives). An interpretation of the value obtained for any
+particular ordering is then entirely meaningless.</p></li>
+<li><p><span class="math inline">\(\gamma\)</span> can also be treated as an estimate of the corresponding
+measure of association in a population from which the observed table
+is a sample. To emphasise this, the symbol <span class="math inline">\(\hat{\gamma}\)</span> is
+sometimes used for the sample statistic we have discussed here,
+reserving <span class="math inline">\(\gamma\)</span> for the population parameter. It is then also
+possible to define significance tests and confidence intervals for
+the population <span class="math inline">\(\gamma\)</span>. These are given, for example, in SPSS
+output for two-way tables. Here, however, we will not discuss them,
+but will treat <span class="math inline">\(\gamma\)</span> purely as a descriptive measure
+of association. Statistical inference on associations for two-way
+tables will be considered only in the context of a different test,
+introduced in Chapter <a href="c-tables.html#c-tables">4</a>.</p></li>
+</ul>
+</div>
+</div>
+<div id="s-descr1-1cont" class="section level2 hasAnchor">
+<h2><span class="header-section-number">2.5</span> Sample distributions of a single continuous variable<a href="c-descr1.html#s-descr1-1cont" class="anchor-section" aria-label="Anchor link to header"></a></h2>
+<div id="ss-descr1-1cont-tab" class="section level3 hasAnchor">
+<h3><span class="header-section-number">2.5.1</span> Tabular methods<a href="c-descr1.html#ss-descr1-1cont-tab" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>A table of frequencies and proportions or percentages is a concise and
+easily understandable summary of the sample distribution of a
+categorical variable or any variable for which only a small number of
+different values have been observed. On the other hand, applying the
+same idea to a continuous variable or a discrete variable with many
+different values is likely to be less useful, because all of the
+individual frequencies may be small. For example, in this section we
+illustrate the methods using the GDP variable in the country data
+introduced at the beginning of Section <a href="c-descr1.html#s-descr1-examples">2.2</a>. This has 99 different
+values among the 155 countries, 66 of these values appear only once, and
+the largest frequency (for 0.8) is five. A frequency table of these
+values would be entirely unenlightening.</p>
+<table>
+<tbody>
+<tr class="odd">
+<td>GDP   \</td>
+</tr>
+<tr class="even">
+<td>(thousands of</td>
+</tr>
+<tr class="odd">
+<td>dollars) Frequency %</td>
+</tr>
+</tbody>
+</table>
+<p>less than 2.0 49 31.6</p>
+<p>2.0–4.9 32 20.6</p>
+<p>5.0–9.9 29 18.7</p>
+<p>10.0–19.9 21 13.5</p>
+<p>20.0–29.9 19 12.3</p>
+<p>30.0 or more 5 3.2</p>
+<p>Total 155 99.9
+———————————-</p>
+<p>: (#tab:t-gdp)Frequency distribution of GDP per capita in the country data.</p>
+<p>Instead, we can count the frequencies for some <em>intervals</em> of values.
+Table <a href="#tab:t-gdp">2.8</a> shows an example of this for the GDP variable. The
+frequency on its first line shows that there are 49 countries with GDP
+per capita of less than $2000, the second line that there are 32
+countries with the GDP per capita between $2000 and $4900 (these
+values included), and so on. We have thus in effect first created an
+ordinal categorical variable by grouping the original continuous GDP
+variable, and then drawn a frequency table of the grouped variable in
+the same way as we do for categorical variables. Some information about
+the distribution of the original, ungrouped variable will be lost in
+doing so, in that the exact values of the observations within each
+interval are obscured. This, however, is a minor loss compared to the
+benefit of obtaining a useful summary of the main features of the
+distribution.</p>
+<p>The intervals must be <em>mutually exclusive</em>, so that no value belongs to
+more than one interval, and <em>exhaustive</em>, so that all values in the data
+belong to some interval. Otherwise the choice is arbitrary, in that we
+can choose the intervals in any way which is sensible and informative.
+Often this is a question of finding the right balance between too few
+categories (losing too much of the original information) and too many
+categories (making the table harder to read).</p>
+</div>
+<div id="ss-descr1-1cont-graphs" class="section level3 hasAnchor">
+<h3><span class="header-section-number">2.5.2</span> Graphical methods<a href="c-descr1.html#ss-descr1-1cont-graphs" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<div id="histograms" class="section level4 unnumbered hasAnchor">
+<h4>Histograms<a href="c-descr1.html#histograms" class="anchor-section" aria-label="Anchor link to header"></a></h4>
+<div class="figure"><span style="display:block;" id="fig:f-hist-gdp"></span>
+<img src="gdp.png" alt="Histogram of GDP per capita in the country data, together with the corresponding frequency polygon." style="width:13.5cm" />
+<p class="caption">Figure 2.4: Histogram of GDP per capita in the country data, together with the
+corresponding frequency polygon.</p>
+</div>
+<p>A <strong>histogram</strong> is the graphical version of a frequency table for a
+grouped variable, like that in Table <a href="#tab:t-gdp">2.8</a>. Figure
+<a href="c-descr1.html#fig:f-hist-gdp">2.4</a> shows a histogram for the GDP variable (the histogram
+consists of the bars; the lines belong to a different graph, the
+frequency polygon explained below). The basic idea of a histogram is
+very similar to that of the bar chart, except that now the bars touch
+each other to emphasise the fact that the original (ungrouped) variable
+is considered continuous. Because the grouped variable is ordinal, the
+bars of a histogram must be in the correct order.</p>
+<p>A good choice of the grouping intervals of the variable and thus the
+number of bars in the histogram is important for the usefulness of the
+graph. If there are too few bars, too much information is obscured; if
+too many, the shape of the histogram may become confusingly irregular.
+Often the number of intervals used for a histogram will be larger than
+what would be sensible for a table like <a href="#tab:t-gdp">2.8</a>. Furthermore,
+intervals like those in Table <a href="#tab:t-gdp">2.8</a> are not even allowed in a
+histogram, because they are of different widths (of 2, 3, 5, 10 and 10
+units for the first five, and unbounded for the last one). The intervals
+in a histogram must be of equal widths, because otherwise the visual
+information in it becomes distorted (at least unless the histogram is
+modified in ways not discussed here). For example, the intervals in
+Figure <a href="c-descr1.html#fig:f-hist-gdp">2.4</a> (less than 2.5, 2.5–less than 5.0, 5.0–less than
+7.5 etc.) are all 2.5 units wide. The exact choice can usually be left
+to computer packages such as SPSS which use automatic rules for choosing
+sensible intervals.</p>
+</div>
+<div id="frequency-polygons" class="section level4 unnumbered hasAnchor">
+<h4>Frequency polygons<a href="c-descr1.html#frequency-polygons" class="anchor-section" aria-label="Anchor link to header"></a></h4>
+<p>Figure <a href="c-descr1.html#fig:f-hist-gdp">2.4</a> also shows a <strong>frequency polygon</strong> of the GDP
+variable. This is obtained by drawing lines to connect the mid-points of
+the tops of the bars in a histogram. At each end of the histogram the
+lines are further connected to zero, as if the histogram had additional
+bars of zero height to the left and right of the smallest and largest
+observed categories. The result is a curve with a similar shape as the
+corresponding histogram, and its interpretation is similar to that of
+the histogram.</p>
+<p>A histogram is usually preferable to a frequency polygon for presenting
+a single distribution, especially since histograms are typically much
+easier to produce in standard software such as SPSS. However, frequency
+polygons will later be useful for making comparisons between several
+distributions.</p>
+</div>
+<div id="stem-and-leaf-plots" class="section level4 unnumbered hasAnchor">
+<h4>Stem and leaf plots<a href="c-descr1.html#stem-and-leaf-plots" class="anchor-section" aria-label="Anchor link to header"></a></h4>
+<p>A <strong>stem and leaf plot</strong> is a close relative of the histogram, and is
+used for much the same purposes, mostly in small data sets. It is
+easiest to explain through an example, so let us consider the GDP
+variable again. The stem and leaf plot for it is shown in Figure
+<a href="c-descr1.html#tab:t-stemgdp">2.9</a>. First, note that the values of the variable in the
+sample (from $500 to $37800, recorded as 0.5 to 37.8 thousands of
+dollars) have at most three significant digits. If the observations have
+too many digits to be convenient for a stem and leaf plot, they can be
+rounded first; for example, if the GDP figures had actually been
+recorded down to the last dollar, we would have rounded them to the
+nearest hundred dollars (as in Table <a href="c-descr1.html#tab:t-countrydata">2.14</a>) for the plot.
+The last digit (here hundreds of dollars) will determine the <em>leaves</em>
+for the plot, while other digits (here round thousands of dollars) will
+define the <em>stem</em>.</p>
+<table>
+<caption><span id="tab:t-stemgdp">Table 2.9: </span>Stem and leaf plot of GDP per capita in the country data (Stem=thousands of dollars, Leaf=hundreds of dollars).</caption>
+<tbody>
+<tr class="odd">
+<td align="left"><code>0</code></td>
+<td align="left"><code>5566677778888899</code></td>
+</tr>
+<tr class="even">
+<td align="left"><code>1</code></td>
+<td align="left"><code>0001112233334445566677788899999</code></td>
+</tr>
+<tr class="odd">
+<td align="left"><code>2</code></td>
+<td align="left"><code>1122234556799</code></td>
+</tr>
+<tr class="even">
+<td align="left"><code>3</code></td>
+<td align="left"><code>02334579</code></td>
+</tr>
+<tr class="odd">
+<td align="left"><code>4</code></td>
+<td align="left"><code>00013567889</code></td>
+</tr>
+<tr class="even">
+<td align="left"><code>5</code></td>
+<td align="left"><code>014588</code></td>
+</tr>
+<tr class="odd">
+<td align="left"><code>6</code></td>
+<td align="left"><code>0013334779</code></td>
+</tr>
+<tr class="even">
+<td align="left"><code>7</code></td>
+<td align="left"><code>002466</code></td>
+</tr>
+<tr class="odd">
+<td align="left"><code>8</code></td>
+<td align="left"><code>9</code></td>
+</tr>
+<tr class="even">
+<td align="left"><code>9</code></td>
+<td align="left"><code>000159</code></td>
+</tr>
+<tr class="odd">
+<td align="left"><code>10</code></td>
+<td align="left"><code>267</code></td>
+</tr>
+<tr class="even">
+<td align="left"><code>11</code></td>
+<td align="left"><code>12448</code></td>
+</tr>
+<tr class="odd">
+<td align="left"><code>12</code></td>
+<td align="left"><code>38</code></td>
+</tr>
+<tr class="even">
+<td align="left"><code>13</code></td>
+<td align="left"><code>139</code></td>
+</tr>
+<tr class="odd">
+<td align="left"><code>14</code></td>
+<td align="left"></td>
+</tr>
+<tr class="even">
+<td align="left"><code>15</code></td>
+<td align="left"><code>7</code></td>
+</tr>
+<tr class="odd">
+<td align="left"><code>16</code></td>
+<td align="left"><code>9</code></td>
+</tr>
+<tr class="even">
+<td align="left"><code>17</code></td>
+<td align="left"><code>8</code></td>
+</tr>
+<tr class="odd">
+<td align="left"><code>18</code></td>
+<td align="left"><code>0</code></td>
+</tr>
+<tr class="even">
+<td align="left"><code>19</code></td>
+<td align="left"><code>0028</code></td>
+</tr>
+<tr class="odd">
+<td align="left"><code>20</code></td>
+<td align="left"><code>0</code></td>
+</tr>
+<tr class="even">
+<td align="left"><code>21</code></td>
+<td align="left"><code>56</code></td>
+</tr>
+<tr class="odd">
+<td align="left"><code>22</code></td>
+<td align="left"><code>0</code></td>
+</tr>
+<tr class="even">
+<td align="left"><code>23</code></td>
+<td align="left"><code>247</code></td>
+</tr>
+<tr class="odd">
+<td align="left"><code>24</code></td>
+<td align="left"></td>
+</tr>
+<tr class="even">
+<td align="left"><code>25</code></td>
+<td align="left"></td>
+</tr>
+<tr class="odd">
+<td align="left"><code>26</code></td>
+<td align="left"><code>78</code></td>
+</tr>
+<tr class="even">
+<td align="left"><code>27</code></td>
+<td align="left"><code>4667</code></td>
+</tr>
+<tr class="odd">
+<td align="left"><code>28</code></td>
+<td align="left"><code>26</code></td>
+</tr>
+<tr class="even">
+<td align="left"><code>29</code></td>
+<td align="left"><code>0168</code></td>
+</tr>
+<tr class="odd">
+<td align="left"><code>30</code></td>
+<td align="left"><code>0</code></td>
+</tr>
+<tr class="even">
+<td align="left"><code>31</code></td>
+<td align="left"><code>1</code></td>
+</tr>
+<tr class="odd">
+<td align="left"><code>32</code></td>
+<td align="left"><code>7</code></td>
+</tr>
+<tr class="even">
+<td align="left"><code>33</code></td>
+<td align="left"></td>
+</tr>
+<tr class="odd">
+<td align="left"><code>34</code></td>
+<td align="left"></td>
+</tr>
+<tr class="even">
+<td align="left"><code>35</code></td>
+<td align="left"></td>
+</tr>
+<tr class="odd">
+<td align="left"><code>36</code></td>
+<td align="left"></td>
+</tr>
+<tr class="even">
+<td align="left"><code>37</code></td>
+<td align="left"><code>88</code></td>
+</tr>
+</tbody>
+</table>
+<p>The left-hand column in <a href="c-descr1.html#tab:t-stemgdp">2.9</a> lists the stem values in the
+data, from smallest (0) to the largest (37). Each data value with the
+same stem is represented on the same line by its leaf, i.e. its last
+digit. Thus the smallest value, 0.5 for Sierra Leone, is shown as a leaf
+“5” on the “0” stem, East Timor (another 0.5) as another “5” next to it,
+and so on up to the largest value 37.8 for Norway, shown as an “8” leaf
+on the “37” stem.</p>
+<p>The stem and leaf plot is very similar to a histogram (try turning
+Figure <a href="c-descr1.html#tab:t-stemgdp">2.9</a> on its side, and compare to Figure
+<a href="c-descr1.html#fig:f-hist-gdp">2.4</a>). It has the additional advantage that it also shows
+the actual numerical values of the observations. In some rather special
+cases this can reveal additional features of the data. Consider, for
+example, the plot shown in Figure <a href="c-descr1.html#tab:t-stemhours">2.10</a>. The variable here
+is the number of hours 86 respondents in a social survey (a small subset
+of all the respondents, drawn purely for this illustration) reported
+their <em>spouse</em> worked in the previous week. An obvious feature of the
+plot is the prevalence of zeroes as the leaves, especially the many
+observations with 40 reported hours. This suggests that most respondents
+probably did not carefully recall and add up the exact hours their
+spouses worked the previous week; instead, a round “40” is likely to be
+effectively a synonym for “my spouse has a regular nine-to-five job”.
+Such <em>digit preference</em> is quite common for many variables in surveys,
+and serves as a reminder that our measurements are not always as precise
+as they may appear.</p>
+<table>
+<caption><span id="tab:t-stemhours">Table 2.10: </span>Stem and leaf plot of the reported hours worked last week by the spouses of respondents in a social survey (the data are a sample from data from the U.S. General Survey; observations with less than 12 reported hours have been excluded). The stems and leaves indicate tens of hours and single hours respectively.
+The main disadvantage of a stem and leaf plot is that since every data
+value is shown separately, the plot can only be used when the sample
+size is relatively small. In such cases it is, however, a very useful
+and user-friendly graph. Also, “small” does not mean “tiny”. For
+example, the country data set has as many as <span class="math inline">\(n=155\)</span> observations, yet
+Figure <a href="c-descr1.html#tab:t-stemgdp">2.9</a> is still quite readable and fits on a single
+page.</caption>
+<tbody>
+<tr class="odd">
+<td align="left"><code>1</code></td>
+<td align="left"><code>55</code></td>
+</tr>
+<tr class="even">
+<td align="left"><code>2</code></td>
+<td align="left"><code>0000000555</code></td>
+</tr>
+<tr class="odd">
+<td align="left"><code>3</code></td>
+<td align="left"><code>00002222556889</code></td>
+</tr>
+<tr class="even">
+<td align="left"><code>4</code></td>
+<td align="left"><code>000000000000000000000000000000255556888</code></td>
+</tr>
+<tr class="odd">
+<td align="left"><code>5</code></td>
+<td align="left"><code>000000355</code></td>
+</tr>
+<tr class="even">
+<td align="left"><code>6</code></td>
+<td align="left"><code>000000555</code></td>
+</tr>
+<tr class="odd">
+<td align="left"><code>7</code></td>
+<td align="left"><code>022</code></td>
+</tr>
+</tbody>
+</table>
+<div class="figure"><span style="display:block;" id="fig:f-boxplot-gdp"></span>
+<img src="box_gdp.png" alt="An annotated box plot of GDP per capita in the country data (n=155)." style="width:13.5cm" />
+<p class="caption">Figure 2.5: An annotated box plot of GDP per capita in the country data (<span class="math inline">\(n=155\)</span>).</p>
+</div>
+</div>
+<div id="box-plots" class="section level4 unnumbered hasAnchor">
+<h4>Box plots<a href="c-descr1.html#box-plots" class="anchor-section" aria-label="Anchor link to header"></a></h4>
+<p>A <strong>box plot</strong> differs from the graphs discussed so far in that it does
+not attempt to display the whole distribution, but only certain
+characteristics of it. The quantities included in a box plot are some of
+the summary statistics defined in Section <a href="c-descr1.html#s-descr1-nums">2.6</a>. To
+introduce the idea, one box plot is shown in Figure <a href="c-descr1.html#fig:f-boxplot-gdp">2.5</a>.
+The variable considered here is again GDP per capita. The vertical axis
+shows possible values of the variable, and the plot itself contains the
+following elements:</p>
+<ul>
+<li><p>The line inside the central box is the <strong>median</strong> of the variable.
+Here it is 4.7.</p></li>
+<li><p>The end points of the <strong>box</strong> are the <strong>first and third quartile</strong>
+of the variable, here 1.7 and 11.4 respectively. The length of the
+box is thus the interquartile range (IQR), here
+<span class="math inline">\(\text{IQR}=11.4-1.7=9.7\)</span>. The range of values covered by the box
+contains the middle 50% of the observations. Half of the countries
+in this sample have GDPs between $1700 and $11400.</p></li>
+<li><p>The two lines extending from the box on either side are known as the
+<strong>whiskers</strong>. Their length is determined as follows:</p>
+<ul>
+<li><p>Calculate the value of 1.5 times the IQR. This is the maximum
+length of each whisker. Here this is <span class="math inline">\(1.5\times 9.7=14.6\)</span></p></li>
+<li><p>The lower whisker extends to the smallest value (<strong>minimum</strong>) of
+the variable in the sample, or to the smallest value which is at
+most 1.5<span class="math inline">\(\times\)</span>IQR units below the first quartile, whichever
+is larger. Here the minimum is 0.5, which is less than 14.6
+units below the first quartile of 1.7, so the lower whisker ends
+at 0.5.</p></li>
+<li><p>The upper whisker extends to the largest value (<strong>maximum</strong>) in
+the sample, or to the largest value which is at most
+1.5<span class="math inline">\(\times\)</span>IQR units above the third quartile, whichever
+is smaller. Here the maximum is 37.8, which is further than the
+maximum distance of 14.6 above the third quartile of 11.4
+allowed for a whisker. Thus the upper whisker could be drawn at
+most to <span class="math inline">\(11.4+14.6=26\)</span>. In this sample there are actually no
+observations of exactly 26, so the whisker ends at the next
+smallest observed value, which is 23.7.</p></li>
+</ul></li>
+<li><p>If the mimimum is further than 1.5<span class="math inline">\(\times\)</span>IQR below the first
+quartile, or maximum further than 1.5<span class="math inline">\(\times\)</span>IQR above the third
+quartile, there are still observations which are not in the range
+spanned by the box and the whiskers. Such extreme observations are
+considered <strong>outliers</strong> in the plot. The values for each outlier are
+plotted separately as points. Here there are 15 different outlying
+values, all with large values of the variable (because in two cases
+two countries have the same value, these 15 points actually
+represent 17 countries).</p></li>
+</ul>
+<p>A box plot thus shows some of the main features of a distribution with
+the following visual cues:</p>
+<ul>
+<li><p>The central line shows a central value (the median) of
+the distribution.</p></li>
+<li><p>The box shows the location of the central bulk (middle 50%) of the
+observations</p></li>
+<li><p>The whiskers show the range of the regular (non-outlying)
+observations.</p></li>
+<li><p>Very extreme values (outliers), if any, are shown individually.</p></li>
+</ul>
+<p>This can be quite effective for summarizing a distribution. For example,
+a box plot where the median line is not roughly in the middle of the
+box, or where one of the whiskers is much longer than the other,
+indicates that the sample distribution is skewed in the direction of the
+longer half of the box and the longer whisker. Here the distribution of
+GDP per capita is clearly positively skewed, as we have already
+observed. However, for a single distribution all such information and
+more can also be obtained from a histogram. It is instead for
+<em>comparisons</em> of distributions between two or more samples that box
+plots are particularly convenient, because it is easy to place two or
+more of them side by side. This will be illustrated later in Section
+<a href="c-means.html#ss-means-descr-graphs">7.2.1</a>.</p>
+</div>
+<div id="other-graphs-for-single-variables" class="section level4 unnumbered hasAnchor">
+<h4>Other graphs for single variables<a href="c-descr1.html#other-graphs-for-single-variables" class="anchor-section" aria-label="Anchor link to header"></a></h4>
+<p>Other types of graphs that are not described here are also sometimes
+used for displaying distributions. One of them is a <strong>pie chart</strong>, which
+shows the proportions of the levels of a categorical (or grouped
+continuous) variable as sectors of a circle. The relative area of a
+sector indicates the proportion of the category. We will not discuss pie
+charts further here, because we do not find them particularly useful
+(the same information can usually be presented more clearly in a table,
+bar chart or histogram). That, however, is partly a matter of taste, and
+there is nothing inherently wrong with (clearly and properly presented)
+pie charts.</p>
+</div>
+</div>
+</div>
+<div id="s-descr1-nums" class="section level2 hasAnchor">
+<h2><span class="header-section-number">2.6</span> Numerical descriptive statistics<a href="c-descr1.html#s-descr1-nums" class="anchor-section" aria-label="Anchor link to header"></a></h2>
+<p>The tabular and graphical methods discussed above
+aim to display the whole sample distribution of a variable in an
+understandable form. The methods introduced in this section have a
+different purpose. Each of them is used to summarize some important
+single feature of the distribution in one number. In general, any such
+number calculated from the data is called a <strong>statistic</strong>. When it is
+used for data description, it is a <strong>descriptive statistic</strong>, also known
+as a <strong>summary statistic</strong>. This creates some terminological confusion,
+as the phrase “descriptive statistics” can mean either all statistical
+methods used for description or those statistics (i.e. numbers
+calculated from the data) with a descriptive purpose. The difference is
+usually unimportant or clear from the context.</p>
+<p>The two salient features of a distribution for which we will define
+descriptive statistics are its <em>central tendency</em> and its <em>variation</em>.</p>
+<div id="ss-descr1-nums-central" class="section level3 hasAnchor">
+<h3><span class="header-section-number">2.6.1</span> Measures of central tendency<a href="c-descr1.html#ss-descr1-nums-central" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>If you were allowed to know only one feature of the sample distribution
+of a variable, chances are you would ask for something like its most
+typical value, the middle value, or the average value — in short, you
+would be interested in a measure of <em>central tendency</em>. We will discuss
+three such measures below: the mode, the median and the mean
+(corresponding, respectively, to the phrases “most typical”, “middle”
+and “average” used above).</p>
+<div id="the-mode" class="section level4 unnumbered hasAnchor">
+<h4>The mode<a href="c-descr1.html#the-mode" class="anchor-section" aria-label="Anchor link to header"></a></h4>
+<p>The <strong>mode</strong> is the value of the variable which occurs most often in the
+data, i.e. the one with the highest frequency. For example, Tables
+<a href="c-descr1.html#tab:t-region">2.1</a> and <a href="c-descr1.html#tab:t-democ">2.2</a> show that the mode of the region
+variable in the country data is “Africa” and the mode of the democracy
+score is 0. The GDP variable has two modes, 0.8 and 1.9, which both
+appear five times (a distribution can have several modes; one with two
+modes is said to be <em>bimodal</em>).</p>
+<p>The mode can be used for variables of any measurement level. For
+<em>nominal</em> variables it is the only available measure of central
+tendency, as the median and the mean are not appropriate for such
+variables.</p>
+<p>The mode does not need to be a <em>central</em> value in the sense that it can
+even be the largest or smallest value of the variable, if this occurs
+most often. This is the case for the democracy index in our example.</p>
+<p>The mode is most useful for categorical variables, where the number of
+possible values is small, and the most common value thus has a high
+frequency. With continuous variables (like GDP) and discrete variables
+with many different values, the mode may be unstable and misleading. For
+example, it is perfectly possible that all but one value appear once
+each in a sample, and the mode is the value which happens to occur
+twice.</p>
+</div>
+<div id="the-median" class="section level4 unnumbered hasAnchor">
+<h4>The median<a href="c-descr1.html#the-median" class="anchor-section" aria-label="Anchor link to header"></a></h4>
+<p>Suppose that the values of a variable in a sample are first ordered from
+the smallest to the largest. For example, in Table <a href="c-descr1.html#tab:t-countrydata">2.14</a>
+the countries are ordered in this way according to their GDP (starting
+from the bottom of the table). The <strong>median</strong> is the value which falls
+in the middle of this ordering, so that it divides the observed values
+into two halves. Because this requires a meaningful ordering of the
+values, the median is appropriate only for ordinal and interval-level
+variables, but not for nominal ones.</p>
+<p>More specifically, suppose that there are <span class="math inline">\(n\)</span> observations, indexed from
+1 for the smallest to <span class="math inline">\(n\)</span> for the largest. The index of the middle
+observation is then <span class="math inline">\((n+1)/2\)</span>. If <span class="math inline">\(n\)</span> is an odd number, the median is
+simply the observation in the ordered sample with this index. If <span class="math inline">\(n\)</span> is
+even, <span class="math inline">\((n+1)/2\)</span> falls between two whole numbers, and the median is the
+mean (of the kind defined below) of the observations with these two
+indices. For example, in the country data set <span class="math inline">\(n=155\)</span> (an odd number),
+and <span class="math inline">\((n+1)/2=78\)</span>, so the median is the value of the 78th observation in
+the ordered sample; if instead there had been <span class="math inline">\(n=156\)</span> countries,
+<span class="math inline">\((n+1)/2=78.5\)</span>, so the median would have been the mean of the 78th and
+79th observations.</p>
+<p>In the country data set the median of the democracy score is 6, and the
+median GDP is $4700 (the 78th observation in GDP order is Paraguay). In
+practice these are of course found using a a computer package like SPSS.
+For an ordinal categorical variable like the democracy score the median
+can also be found easily from the frequency table by considering the
+<em>cumulative percentages</em> (or proportions) of the categories. These are
+obtained by adding up the percentages up to and including each category,
+as shown in the last column of Table <a href="c-descr1.html#tab:t-democ">2.2</a>. The median is then
+the category in which the cumulative percentage reaches or passes 50%.
+For the democracy index this happens for the score of 6, which has a
+cumulative percentage of 50.9%.</p>
+</div>
+<div id="the-mean" class="section level4 unnumbered hasAnchor">
+<h4>The mean<a href="c-descr1.html#the-mean" class="anchor-section" aria-label="Anchor link to header"></a></h4>
+<p>The <strong>mean</strong> is the best-known and most widely used measure of central
+tendency. It is also known as the <strong>average</strong>. To define the mean, we
+need to introduce our first pieces of mathematical notation. Suppose
+first that the variable of interest is denoted by <span class="math inline">\(Y\)</span>. In practice the
+variable is of course called something else, like GDP or Age or Income,
+but in the formulas below it is much more convenient to refer to any
+such variable generically by one letter (note also that the choice of
+the letter itself is arbitrary; for example, you may often see <span class="math inline">\(X\)</span> used
+instead of <span class="math inline">\(Y\)</span> when the mean is defined). Individual observations of <span class="math inline">\(Y\)</span>
+are denoted generically by <span class="math inline">\(Y_{i}\)</span>, where the subscript <span class="math inline">\(i\)</span> identifies a
+single subject. The values of <span class="math inline">\(i\)</span> range from <span class="math inline">\(1\)</span> to <span class="math inline">\(n\)</span>, so all of the
+observations in the sample are <span class="math inline">\(Y_{1}, Y_{2}, \dots, Y_{n}\)</span>, e.g. in the country example (with <span class="math inline">\(n=155\)</span>) <span class="math inline">\(Y_{1}, Y_{2}, \dots, Y_{155}\)</span>. The ordering of the observations is arbitrary
+here, so it might for example be the order in which they are listed in
+your SPSS data file. The mean <span class="math inline">\(\bar{Y}\)</span> (“Y-bar”) of the observations of
+<span class="math inline">\(Y\)</span> in the sample is defined as <span class="math display">\[\begin{equation}\bar{Y} = \frac{\sum Y_{i}}{n}.
+\label{eq:Ybar}\end{equation}\]</span> Here <span class="math inline">\(n\)</span> is again the sample size. The symbol <span class="math inline">\(\Sigma\)</span>
+(upper-case Greek letter “Sigma”) is a <em>summation symbol</em>, which
+indicates that we calculate the sum of all <span class="math inline">\(Y_{i}\)</span> (often this is stated
+more explicitly by the notation <span class="math inline">\(\sum_{i} Y_{i}\)</span> or
+<span class="math inline">\(\sum_{i=1}^{n} Y_{i}\)</span> to make it clear that the summation is over all
+the values of <span class="math inline">\(i\)</span>). In other words, (<a href="#eq:Ybar">(<strong>??</strong>)</a>) is a concise expression
+for <span class="math display">\[\bar{Y}= \frac{Y_{1}+Y_{2}+\dots+Y_{n}}{n}\]</span> or, in English,
+“calculate the sum of all the observations of the variable <span class="math inline">\(Y\)</span> in the
+sample, and divide this sum by the number of observations to obtain the
+mean of <span class="math inline">\(Y\)</span> in the sample”. For example, for GDP per capita this
+calculation gives
+<span class="math display">\[\bar{Y}= \frac{37.8+37.8+32.7+\dots+0.6+0.5+0.5}{155}
+=\frac{1335.1}{155}=8.6\]</span> (rounded to one decimal place), i.e. mean GDP
+among these countries is about $8600.</p>
+<p>Because the mean requires arithmetical calculations (summation and
+division) on the observations <span class="math inline">\(Y_{i}\)</span>, it is strictly speaking only
+appropriate for interval level variables, but not for ordinal ones, for
+which the numbers of the categories are ordered labels rather than real
+numbers. However, it is common to see this instruction ignored and means
+calculated for ordinal variables, especially when they have a large
+number of categories (see also the discussion under “Measurement Levels” in Section
+<a href="c-intro.html#ss-intro-def-vartypes">1.2.2</a>). For example, the mean democracy
+score in our sample (using the codes 0–10 as its values) is 5.4. This
+may be used as a summary of the central tendency of the variable, but it
+should not be overinterpreted as its meaning is not quite as clear as
+that of, say, mean GDP.</p>
+<p>For interval level variables the mean is by far the most commonly used
+measure of central tendency. It does, however, have one arguably
+undesirable feature. This is illustrated by the statistics for the GDP
+variable, as shown in Table <a href="c-descr1.html#tab:t-countries-sums">2.11</a>. Its mean ($8600) is
+clearly much larger than the median ($4700). This is due to the shape
+of the distribution of GDP, as revealed by Figure <a href="c-descr1.html#fig:f-hist-gdp">2.4</a> or
+even more clearly by the stem and leaf plot of Figure <a href="c-descr1.html#tab:t-stemgdp">2.9</a>.
+While most of the countries are concentrated around a fairly narrow
+range of GDPs, there are also a number of countries with much larger
+GDPs. The ranges of values in the small and large ends of the values in
+a distribution are (for fairly obvious visual reasons) called the
+<strong>tails</strong> of the distribution. A distribution with a (much) longer tail
+in one end than the other is said to be <strong>skewed</strong>. A distribution like
+that of GDP in Figure <a href="c-descr1.html#fig:f-hist-gdp">2.4</a>, with its long tail towards the
+large values, is said to be <strong>skewed to the right</strong> or <strong>positively
+skewed</strong>. A distribution shown in panel A of Figure <a href="c-descr1.html#fig:f-skews">2.6</a> is
+<strong>skewed to the left</strong> (<strong>negatively skewed</strong>): while the examination
+marks of most students are relatively high, there are some students with
+very low marks. A distribution which has no clear skewness in either
+direction, like the distribution of typical weekly working hours in
+panel B of Figure <a href="c-descr1.html#fig:f-skews">2.6</a> is (approximately) <strong>symmetric</strong>.</p>
+<table style="width:99%;">
+<caption><span id="tab:t-countries-sums">Table 2.11: </span>Summary statistics for the three variables in the country data. IQR=interquartile range; s.d.=standard deviation; *: inappropriate for a nominal variable; <span class="math inline">\(\dagger\)</span>: if the democracy index is treated as an interval-level variable.</caption>
+<colgroup>
+<col width="14%" />
+<col width="13%" />
+<col width="28%" />
+<col width="11%" />
+<col width="10%" />
+<col width="9%" />
+<col width="11%" />
+</colgroup>
+<thead>
+<tr class="header">
+<th></th>
+<th align="right">Measures</th>
+<th align="center">of central</th>
+<th align="center">tendency</th>
+<th align="center">Measures</th>
+<th align="center">of</th>
+<th align="center">variation</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td>Variable</td>
+<td align="right">Mode</td>
+<td align="center">Median</td>
+<td align="center">Mean</td>
+<td align="center">Range</td>
+<td align="center">IQR</td>
+<td align="center">s.d.</td>
+</tr>
+<tr class="even">
+<td>Region</td>
+<td align="right">Africa</td>
+<td align="center">*</td>
+<td align="center">*</td>
+<td align="center">*</td>
+<td align="center">*</td>
+<td align="center">*</td>
+</tr>
+<tr class="odd">
+<td>Democracy index</td>
+<td align="right">0</td>
+<td align="center">6</td>
+<td align="center">5.4<span class="math inline">\(^{\dagger}\)</span></td>
+<td align="center">10<span class="math inline">\(^{\dagger}\)</span></td>
+<td align="center">8<span class="math inline">\(^{\dagger}\)</span></td>
+<td align="center">3.9<span class="math inline">\(^{\dagger}\)</span></td>
+</tr>
+<tr class="even">
+<td>GDP per capita</td>
+<td align="right">$800
+and $1900</td>
+<td align="center">$4700</td>
+<td align="center">$8600</td>
+<td align="center">$37300</td>
+<td align="center">$9700</td>
+<td align="center">$9450</td>
+</tr>
+</tbody>
+</table>
+<div class="figure"><span style="display:block;" id="fig:f-skews"></span>
+<img src="twohists.png" title="fig:" alt="Examples of a negatively skewed and an approximately symmetric sample distribution. Panel A shows the distribution of examination marks for MY451 (2003; n=419), and B shows the distribution of the number of hours a person usually works in their main job in the 3 per cent Individual Sample of Anonymized Records from the 2001 U.K. Census (n=867,016, respondents with hours 0 or not applicable omitted) Source of the data for panel B: Cathie Marsh Centre for Census and Survey Research, University of Manchester, http://www.ccsr.ac.uk/sars/." style="width:13.5cm" />
+<p class="caption">Figure 2.6: Examples of a negatively skewed and an approximately symmetric sample
+distribution. Panel A shows the distribution of examination marks for
+MY451 (2003; <span class="math inline">\(n=419\)</span>), and B shows the distribution of the number of
+hours a person usually works in their main job in the 3 per cent
+Individual Sample of Anonymized Records from the 2001 U.K. Census
+(<span class="math inline">\(n=867,016\)</span>, respondents with hours 0 or not applicable omitted) Source of the data for panel B: Cathie Marsh Centre
+for Census and Survey Research, University of Manchester,
+<a href="http://www.ccsr.ac.uk/sars/" class="uri">http://www.ccsr.ac.uk/sars/</a>.</p>
+</div>
+<p>The mean is almost always further in the direction of skewness than the
+median. That is why the mean of the positively skewed GDP variable is
+larger than its median. In general, a comparison between the two
+statistics will reveal the direction of any skewness, and give an
+indication of its magnitude. When the difference is large, as it is
+here, it is typically sensible to report both the mean and the median.</p>
+<p>The mean is sensitive even to individual observations far in the tails
+of the distribution. Such observations, which are very different (much
+larger or smaller) from the rest of the data, are known as <strong>outliers</strong>.
+Even a single outlier can, if it is extreme enough, pull the mean far
+towards itself, even beyond the range of all the other observations, as
+in the following example:</p>
+<p><em>Example: A sample with an outlier</em><br />
+Suppose that an M.Sc. student, preparing her dissertation on elements of
+social capital in Canada, is examining various measures of community
+activities in a sample of fourty municipalities in the province of
+Manitoba.<a href="#fn6" class="footnote-ref" id="fnref6"><sup>6</sup></a> As part of an initial description of these communities,
+she wants to summarize their populations, which are</p>
+<p>5, 79, 143, 226, 303, 317, 384, 417, 448, 505, 524, 525, 538, 619, 621,
+629, 637, 760, 801, 906, 955, 959, 964, 1047, 1111, 1152, 1457, 1491,
+1722, 1907, 2079, 2405, 2723, 3950, 4012, 4032, 4183, 4427, 12602,
+619544.</p>
+<p>The outlier in this case is the city of Winnipeg, whose population of
+nearly 620,000 is 49 times as large as that of the next largest
+municipality in the sample. With it included in the sample, the mean
+population of the 40 municipalities is about 17000; without it, the mean
+for the other 39 is 1600. The two numbers give rather different pictures
+of the size of an “average” community in the data (similar differences
+would probably be observed for other variables too, so the large city
+would be an outlier in many respects in a study like this). The median,
+on the other hand, is 906 for the 39 smaller communities, and 930.5 with
+Winnipeg included. It is thus essentially unaffected by the outlier,
+basically because it is only influenced by the fact that 619,554 is
+bigger than the mid-point of the data, but not by how much bigger it is.</p>
+</div>
+</div>
+<div id="ss-descr1-nums-variation" class="section level3 hasAnchor">
+<h3><span class="header-section-number">2.6.2</span> Measures of variation<a href="c-descr1.html#ss-descr1-nums-variation" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>A measure of central tendency is not a complete summary of a
+distribution, in that there can be distributions which have the same
+central tendency but which are different in some other respect. To
+illustrate this with a hypothetical example, suppose we are studying the
+students in three classrooms of the same grade at a local school. Each
+class has 14 students, and all students have just taken the same test,
+graded 1 (low) to 10 (high). The marks of the students are found to be
+as shown in Table <a href="c-descr1.html#tab:t-classmarks">2.12</a>.</p>
+<p>Both the mean and the median of the marks are 6 in
+every class. However, the classes are otherwise clearly not similar. In
+particular, the <strong>variation</strong> (or <strong>dispersion</strong>) of the marks is very
+different. There is no variation at all in Class 1 where everyone has
+the same score, and quite a lot of variation in Class 3, while Class 2
+seems to fall between the two. To capture this, some <strong>measure of
+variation</strong> will be needed. Three such measures are described here. All
+of them stricly speaking require the variable to be measured at an
+interval level, because they involve calculations of differences between
+its values. Using them on an ordinal variable is thus subject to similar
+cautions as for the mean above. These measures of variation are entirely
+inappropriate for nominal-level variables. There are some measures which
+can be used for such variables, but they are not described here.</p>
+<table>
+<caption><span id="tab:t-classmarks">Table 2.12: </span>A hypothetical examples of test marks of students in three classes.</caption>
+<tbody>
+<tr class="odd">
+<td align="left">Class 1:</td>
+<td align="left">6 6 6 6 6 6 6 6 6 6 6 6 6 6</td>
+</tr>
+<tr class="even">
+<td align="left">Class 2:</td>
+<td align="left">4 4 5 5 5 6 6 6 6 7 7 7 8 8</td>
+</tr>
+<tr class="odd">
+<td align="left">Class 3:</td>
+<td align="left">1 2 2 3 4 4 4 8 8 9 9 10 10 10</td>
+</tr>
+</tbody>
+</table>
+<div id="range" class="section level4 unnumbered hasAnchor">
+<h4>Range<a href="c-descr1.html#range" class="anchor-section" aria-label="Anchor link to header"></a></h4>
+<p>The <strong>range</strong> of a variable is simply the difference between its largest
+and smallest observed values (the <strong>maximum</strong> and <strong>minimum</strong> in
+statistical terminology). In the class example above,</p>
+<p>Class 1: Range <span class="math inline">\(= 6-6 =0\)</span><br />
+Class 2: Range <span class="math inline">\(= 8-4 =4\)</span><br />
+Class 3: Range <span class="math inline">\(= 10-1 =9\)</span></p>
+<p>The measure is largest for Class 3 and smallest for Class 1, so it seems
+to capture the differences in variation suggested by an initial look at
+the numbers themselves. For Class 1 the range is 0, because all of the
+observations are the same. In general, any sensible measure of variation
+should be zero when there is no variation (all observations are
+identical), and all of the measures described here have that property.</p>
+<p>In the country data, the range of GDP is $37800-$500=$37300, and the
+range of the democracy score (if we cautiously treat it as an
+interval-level variable) is 10-0=10.</p>
+</div>
+<div id="interquartile-range" class="section level4 unnumbered hasAnchor">
+<h4>Interquartile range<a href="c-descr1.html#interquartile-range" class="anchor-section" aria-label="Anchor link to header"></a></h4>
+<p>The range is often not a particularly useful measure of variation,
+because it depends <em>only</em> on the two extremes of the data. It is thus
+very sensitive to outliers. If, for example, there is one large outlier,
+the range will be large even if all of the other observations are very
+similar to each other.</p>
+<p>One way to reduce the effects of outliers is to ignore the tails of the
+distribution and consider the variation only among the central range of
+the data. This idea is expressed in the <strong>Interquartile range</strong>. First
+we have to define the quartiles:</p>
+<ul>
+<li><p><strong>The first quartile</strong> is the value such that 25% (one quarter) of
+the observations are smaller than (or equal to) it, and 75%
+(three quarters) bigger than (or equal to) it.</p></li>
+<li><p><strong>The third quartile</strong> is the value such that 75% of the
+observations are smaller than (or equal to) it, and 25% bigger than
+(or equal to) it.</p></li>
+</ul>
+<p>The quartiles are thus similar in spirit to the median. Just as the
+median divides the observations into two equal halves (those below and
+those above the median), the quartiles divide them into two groups at
+different points. For example, the first quartile divides the
+observations into the smallest 25% and the remaining largest 75%. (The
+median can thus also be described as the <em>second quartile</em>, and all of
+these statistics are special cases of a larger class of similar
+statistics known as <em>percentiles</em>.)</p>
+<p>The interquartile range (IQR) is the difference between the third and
+the first quartile. It is the range of the middle 50% of the
+observations, leaving out the smallest 25% and the largest 25%. This
+effectively eliminates the effects of any outliers, so IQR is a useful
+measure of variation (often used together with the median as measure of
+central tendency) when there are serious outliers or when the
+distribution is very skewed.</p>
+<p>For the class example the interquartile ranges are</p>
+<p>Class 1: IQR <span class="math inline">\(= 6-6 =0\)</span><br />
+Class 2: IQR <span class="math inline">\(= 7-5 =2\)</span><br />
+Class 3: IQR <span class="math inline">\(= 9.25-2.75 =6.5\)</span></p>
+<p>These are again in the expected order.<a href="#fn7" class="footnote-ref" id="fnref7"><sup>7</sup></a> For the country data, the
+first and third quartiles for GDP are 1.7 and 11.4 respectively, and
+IQR=11.4-1.7=9.7. For the democracy score the quartiles are 1 and 9, and
+IQR=8.</p>
+</div>
+<div id="standard-deviation" class="section level4 unnumbered hasAnchor">
+<h4>Standard deviation<a href="c-descr1.html#standard-deviation" class="anchor-section" aria-label="Anchor link to header"></a></h4>
+<p>The most commonly used measure of variation is based on the
+<strong>deviations</strong> <span class="math display">\[Y_{i}-\bar{Y}\]</span> where <span class="math inline">\(Y_{i}\)</span> again denotes an
+individual observation of a variable, and <span class="math inline">\(\bar{Y}\)</span> is its mean. A
+deviation is the difference between an individual observation and the
+average value in the sample. Table <a href="c-descr1.html#tab:t-sdex">2.13</a> shows the deviations for
+Class 3 in the class example, together with the other calculations
+discussed below. Here a negative deviation indicates that an observation
+is smaller than the mean of 6 (e.g. <span class="math inline">\(1-6=-5\)</span>), and a positive deviation
+that an observation is larger than the mean (e.g. <span class="math inline">\(10-6=+4\)</span>).</p>
+<table>
+<caption><span id="tab:t-sdex">Table 2.13: </span>Calculating the standard deviation of test marks for Class 3 in the
+class example at the beginning of Section <a href="c-descr1.html#ss-descr1-nums-variation">2.6.2</a>.</caption>
+<thead>
+<tr class="header">
+<th align="left">Student</th>
+<th align="right"><span class="math inline">\(Y_{i}\)</span></th>
+<th align="right"><span class="math inline">\(Y_{i}-\bar{Y}\)</span></th>
+<th align="right"><span class="math inline">\((Y_{i}-\bar{Y})^{2}\)</span></th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="left">1</td>
+<td align="right">1</td>
+<td align="right"><span class="math inline">\(-5\)</span></td>
+<td align="right">25</td>
+</tr>
+<tr class="even">
+<td align="left">2</td>
+<td align="right">2</td>
+<td align="right"><span class="math inline">\(-4\)</span></td>
+<td align="right">16</td>
+</tr>
+<tr class="odd">
+<td align="left">3</td>
+<td align="right">2</td>
+<td align="right"><span class="math inline">\(-4\)</span></td>
+<td align="right">16</td>
+</tr>
+<tr class="even">
+<td align="left">4</td>
+<td align="right">3</td>
+<td align="right"><span class="math inline">\(-3\)</span></td>
+<td align="right">9</td>
+</tr>
+<tr class="odd">
+<td align="left">5</td>
+<td align="right">4</td>
+<td align="right"><span class="math inline">\(-2\)</span></td>
+<td align="right">4</td>
+</tr>
+<tr class="even">
+<td align="left">6</td>
+<td align="right">4</td>
+<td align="right"><span class="math inline">\(-2\)</span></td>
+<td align="right">4</td>
+</tr>
+<tr class="odd">
+<td align="left">7</td>
+<td align="right">4</td>
+<td align="right"><span class="math inline">\(-2\)</span></td>
+<td align="right">4</td>
+</tr>
+<tr class="even">
+<td align="left">8</td>
+<td align="right">8</td>
+<td align="right">+2</td>
+<td align="right">4</td>
+</tr>
+<tr class="odd">
+<td align="left">9</td>
+<td align="right">8</td>
+<td align="right">+2</td>
+<td align="right">4</td>
+</tr>
+<tr class="even">
+<td align="left">10</td>
+<td align="right">9</td>
+<td align="right">+3</td>
+<td align="right">9</td>
+</tr>
+<tr class="odd">
+<td align="left">11</td>
+<td align="right">9</td>
+<td align="right">+3</td>
+<td align="right">9</td>
+</tr>
+<tr class="even">
+<td align="left">12</td>
+<td align="right">10</td>
+<td align="right">+4</td>
+<td align="right">16</td>
+</tr>
+<tr class="odd">
+<td align="left">13</td>
+<td align="right">10</td>
+<td align="right">+4</td>
+<td align="right">16</td>
+</tr>
+<tr class="even">
+<td align="left"><span class="math inline">\(14=n\)</span></td>
+<td align="right">10</td>
+<td align="right">+4</td>
+<td align="right">16</td>
+</tr>
+<tr class="odd">
+<td align="left">Sum</td>
+<td align="right"><span class="math inline">\(\sum Y_{i}=84\)</span></td>
+<td align="right"><span class="math inline">\(\sum(Y_{i}-\bar{Y})=0\)</span></td>
+<td align="right"><span class="math inline">\(\sum(Y_{i}-\bar{Y})^{2}=152\)</span></td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="right"><span class="math inline">\(\bar{Y}=84/14=6\)</span></td>
+<td align="right"><span class="math inline">\(\sum(Y_{i}-\bar{Y})/n=0\)</span></td>
+<td align="right"><span class="math inline">\(s^{2}=152/13=11.69\)</span></td>
+</tr>
+<tr class="odd">
+<td align="left"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"><span class="math inline">\(s=\sqrt{11.69}=3.4\)</span></td>
+</tr>
+</tbody>
+</table>
+<p>The deviations are clearly related to variation, as a sample with little
+variation will have small deviations (most observations are close to the
+mean) and one with a lot of variation will have many large deviations
+(many observations are far from the mean). All that remains is to
+aggregate them in some sensible way into a single number.</p>
+<p>An inappropriate summary of the deviations is their mean, i.e. <span class="math inline">\(\sum (Y_{i}-\bar{Y})/n\)</span>. In the class example this turns out to be zero (see
+the second column of Table <a href="c-descr1.html#tab:t-sdex">2.13</a>), and not by coincidence. It can
+be shown that the mean of the deviations is in fact zero for any set of
+numbers. This happens because positive and negative deviations will
+always exactly cancel out each other in the sum. This is clearly not
+what we want, because a negative deviation of, say, <span class="math inline">\(-2\)</span> (an observation
+two units below the mean) should be equally strong evidence of variation
+as a positive deviation of +2 (an observation two units above the mean).
+The signs of the deviations thus need to be eliminated somehow. Just
+dropping the negative signs (so that <span class="math inline">\(-2\)</span> becomes 2) means calculating
+the <em>absolute values</em> of the deviations, denoted <span class="math inline">\(|Y_{i}-\bar{Y}|\)</span>.
+Taking the mean of these gives the <strong>mean absolute deviation</strong> or MAD,
+defined as <span class="math display">\[\text{MAD}=\frac{\sum |Y_{i}-\bar{Y}|}{n}.\]</span> This is a
+perfectly sensible measure of variation, but it is not very commonly
+used. This is largely because absolute values are mathematically rather
+difficult to work with, and this would make MAD very inconvenient for
+more sophisticated analyses, where measures of variation will also be
+needed.<a href="#fn8" class="footnote-ref" id="fnref8"><sup>8</sup></a> Instead, we eliminate the signs of the deviations by using
+their squares <span class="math inline">\((Y_{i}-\bar{Y})^{2}\)</span>, i.e. by multiplying each deviation
+by itself (c.f. the third column of Table <a href="c-descr1.html#tab:t-sdex">2.13</a> for an
+illustration). These are used to calculate the <strong>variance</strong>, denoted
+<span class="math inline">\(s^{2}\)</span> and defined as <span class="math display">\[\begin{equation}s^{2} = \frac{\sum (Y_{i}-\bar{Y})^{2}}{n-1}.
+\label{eq:samplevar}\end{equation}\]</span> This is (apart from the <span class="math inline">\(n-1\)</span> rather than <span class="math inline">\(n\)</span> as the
+divisor) essentially the mean of the squared deviations. Its units of
+measurement are also squares of the units of the original measurements.
+For example, the variance of the GDP variable, which is itself measured
+in (thousands of) dollars, is expressed in dollars squared. This is
+rather inconvenient for any meaningful interpretation. To obtain a
+measure of variation expressed in the original units, we can take the
+square root (indicated below by <span class="math inline">\(\sqrt{\; \; }\)</span>) of the variance. This
+statistic is the <strong>standard deviation</strong>, often abbreviated as S.D.,
+denoted by <span class="math inline">\(s\)</span> and defined as
+<span class="math display">\[\begin{equation}s = \sqrt{\frac{\sum (Y_{i}-\bar{Y})^{2}}{n-1}.}
+\label{eq:sd}\end{equation}\]</span> For the class example, this is 0 for Class 1, 1.3 for Class
+2, and 3.4 for class 3. In the country data, the standard deviation of
+GDP is $9450 and that of the democracy score (if it is treated as an
+interval-level variable) is 3.9, as shown in Table
+<a href="c-descr1.html#tab:t-countries-sums">2.11</a>.</p>
+<p>Like the mean, the standard deviation is sensitive to outliers and
+skewness of the distribution, so sometimes other measures of variation
+(e.g. IQR or MAD) should be reported instead of, or in addition to it.
+Nevertheless, the standard deviation is by far the most commonly used
+measure of variation. One reason for this is that it is very important
+not just as a descriptive statistic but also as an element in several
+forms of statistical inference. For description it is typically less
+immediately interpretable than measures of central tendency. Often the
+most revealing descriptive uses of the standard deviation are in
+comparisons between samples, like in the class example above. The
+following is a real example of this kind, where variation was in fact of
+more interest than central tendency:</p>
+<p><em>Example: Variation in rates of economic growth</em><br />
+In an article titled “Dancing in step” on November 13th 2004, <em>The
+Economist</em> discussed a set of data (collected by the J. P. Morgan Chase
+bank) on the annual growth rates (in percentage points) of the Gross
+Domestic Products (GDP) of 30 countries for each year since 1971.
+Measures of central tendency, such as average growth rates for each
+country and each year, are clearly interesting in this case. However,
+most of the discussion in the article concerned <em>variation</em> in growth
+rates, measured by their standard deviation across countries for each
+year, and especially changes in this variation over time. The standard
+deviation of growth rates was around 3–5 percentage points for every
+year until the early 1990s, had fallen to about 2 percentage points in
+2003, and was forecast to decline further in subsequent years. There had
+thus previously been a fair amount of variation in rates of economic
+growth (with some economies growing faster and some slower, some perhaps
+being in recession), whereas recently the growth rates had become more
+similar across countries. The article summarized this in its subtitle as
+“The world’s economies are more synchronised than ever before”, and went
+on to discuss the implications of this development for global economy.</p>
+<p>The formula (<a href="#eq:sd">(<strong>??</strong>)</a>) for the standard deviation involves the divisor
+<span class="math inline">\(n-1\)</span>, where the discussion leading up to it might make you expect <span class="math inline">\(n\)</span>
+instead. The reasons for this will be discussed briefly in Section
+<a href="c-contd.html#ss-contd-popdistrs-params">6.2.1</a>. The definition is not entirely
+consistent in that some textbooks do use <span class="math inline">\(n\)</span> instead of <span class="math inline">\(n-1\)</span>. The
+difference is of no great importance, and using either <span class="math inline">\(n\)</span> or <span class="math inline">\(n-1\)</span>
+would be fine for our purposes. Whenever <span class="math inline">\(n\)</span> is even moderately large,
+the difference between <span class="math inline">\(n\)</span> and <span class="math inline">\(n-1\)</span> is in any case small, and both
+definitions of standard deviation give very similar values.</p>
+<p>Finally, measures of central tendency and measures of variation, even
+though they summarise the two most important features of a sample
+distribution of a variable, may still miss some important features of
+the distribution. Consider, for example, the class marks in Classes 2
+and 3 in our hypothetical example. These are summarized by the bar
+charts of Figure <a href="c-descr1.html#fig:f-classbars">2.7</a>. The distribution for Class 2 is
+symmetric and concentrated around the mean value of 6. The most
+noticeable feature of the marks in Class 3, on the other hand, is that
+there appear to be two distinct groups of students, one with very low
+scores and one with high scores. A similar feature was also noted in the
+distribution of the democracy index in the country data (c.f. Figure
+<a href="c-descr1.html#fig:f-bars-democ">2.2</a>). This property would not be revealed by measures of
+central tendency or variation, so it is an illustration of why it is
+always sensible to also examine the whole distribution of a variable
+using frequency tables or graphical methods.</p>
+<div class="figure"><span style="display:block;" id="fig:f-classbars"></span>
+<img src="classbars.png" alt="Bar charts of the test marks in the class example at the beginning of Section 2.6.2 for Classes 2 (on the left) and 3 (on the right)." style="height:8.3cm" />
+<p class="caption">Figure 2.7: Bar charts of the test marks in the class example at the beginning of Section <a href="c-descr1.html#ss-descr1-nums-variation">2.6.2</a> for Classes 2 (on the left) and 3 (on the
+right).</p>
+</div>
+</div>
+</div>
+</div>
+<div id="s-descr1-2cont" class="section level2 hasAnchor">
+<h2><span class="header-section-number">2.7</span> Associations which involve continuous variables<a href="c-descr1.html#s-descr1-2cont" class="anchor-section" aria-label="Anchor link to header"></a></h2>
+<p>Bivariate descriptive methods which are designed for situations where at
+least one of the two variables is continuous are not described here but
+in later sections:</p>
+<ul>
+<li><p>Explanatory variable is categorical and response variable
+continuous: Parallel histograms, frequency polygons and box plots
+(Section <a href="c-means.html#s-means-descr">7.2</a>).</p></li>
+<li><p>Both explanatory and response variables are continuous: Scatter
+plots and line plots (Section <a href="c-regression.html#ss-regression-descr-plots">8.2.2</a>).</p></li>
+</ul>
+<p>We do not discuss the remaining possibility, where the explanatory
+variable is continuous and the response is categorical. The simplest and
+usually quite sufficient way to give an initial description of the
+associations in this case is to group the explanatory variable into a
+categorical variable and then apply the methods of Section
+<a href="c-descr1.html#s-descr1-2cat">2.4</a>.</p>
+</div>
+<div id="s-descr1-presentation" class="section level2 hasAnchor">
+<h2><span class="header-section-number">2.8</span> Presentation of tables and graphs<a href="c-descr1.html#s-descr1-presentation" class="anchor-section" aria-label="Anchor link to header"></a></h2>
+<p>The purpose of statistical tables and graphs is to communicate
+information correctly, clearly and effectively. If they do not do that,
+that is, if they leave the reader misled, confused or uninformed, they
+have failed and should not have been shown at all. Creating good tables
+and graphics is not only a matter of understanding the technical details
+described above. It also involves general principles of design and
+presentation. Most of these should be simple common sense but clearly
+are not, judging by the many entirely unhelpful tables and graphs
+appearing in all kinds of publications. This section discusses very
+briefly some principles of good practice in presenting descriptive
+statistics in tables and graphs. Much of the section is based on two
+books, <em>The Visual Display of Quantitative Information</em> by Edward
+R. Tufte (Graphics Press, 1983) and <em>Visual Revelations</em> by Howard
+Wainer (Copernicus, 1997). These can be consulted for further
+information and examples of both good and bad practice.</p>
+<p>First, a reader of a table or graph should be able to understand what it
+is about:</p>
+<ul>
+<li><p>The variables should be labelled clearly. In particular, the names
+used in computer data files should not be used unless they are also
+understandable words. So even if a variable is called ATTDFOXH in
+your SPSS file, it should still be labelled “Attitude to
+foxhunting”or something similar in presentation. Similarly, the
+categories of variables should be labelled in words
+wherever appropriate.</p></li>
+<li><p>Items such as the columns of a table or the vertical axis of a bar
+chart should also be labelled clearly (e.g. whether they are for
+frequencies or percentages).</p></li>
+<li><p>More generally, a table or figure and its caption should be
+(within reason) as self-contained as possible, in that the reader
+should be able to understand them with little reference to the rest
+of the text for explanation (remember that tables and figures often
+float, i.e. they may appear on a different page from where they are
+referred to in the main text). This may also include giving the
+source of the data in a note or caption to the table or figure.</p></li>
+</ul>
+<p>Some guidelines for constructing tables are</p>
+<ul>
+<li><p>A table produced by software such as SPSS, although it contains the
+necessary numbers, is rarely suitable for presentation directly.
+Tables included in research reports should be retyped
+and reformatted.</p></li>
+<li><p>The categories of the variable should be in a sensible order. For
+ordinal variables (including those obtained by grouping a continuous
+one), this should obviously be the natural ordering of the
+categories. For a nominal variable, the order can be chosen in
+whichever way is most useful for presentation. Often it makes sense
+to order categories from the largest to the smallest, typically
+leaving any “Others” category last.</p></li>
+<li><p>If only proportions or percentages are shown, the sample size <span class="math inline">\(n\)</span>
+should also be reported, perhaps in a note or caption to the table.
+This will allow the reader to judge how informative the table is. A
+percentage of 20% is clearly richer information when it corresponds
+to a frequency of 2,000 in a sample 10,000 than when it means 2 out
+of 10 observations. When <span class="math inline">\(n\)</span> is very small, proportions and
+percentages should be avoided altogether: reporting 1 out 7 as 14.3%
+is simply nonsensical.</p></li>
+<li><p>Proportions and percentages can and should be rounded. It is rarely
+necessary to see percentages with more than one decimal place, if
+even that.</p></li>
+</ul>
+<p>With graphs, it is always useful to bear in mind Wainer’s principle:</p>
+<p><strong>The aim of good data graphics is to</strong><br />
+<strong>display data accurately and clearly</strong></p>
+<p>The way to produce <em>bad</em> graphs is thus to break some part of this, for
+example by (1) not showing much data, (2) showing much that is not data,
+(3) showing the data inaccurately, or (4) obscuring the data. Graphs
+with these characteristics are a form of visual lying, distorting the
+graphical cues in a plot in ways which make it difficult or impossible
+to obtain accurate information from it.</p>
+<p>One example of a lying graph already mentioned is the “cut” bar chart
+where the bars do not begin at zero. Another is the pseudo third
+dimension, an example of which is shown in Figure <a href="c-descr1.html#fig:f-yuk">2.8</a>. The
+information presented in this graph is the same as that of Figure
+<a href="c-descr1.html#fig:f-bars-region">2.1</a>, i.e. frequencies of different regions. These are
+represented by the heights of the bars. The additional information
+conveyed by the apparent thickness of the bars, represented in
+perspective to give an illusion of three-dimensional bars, is then —
+exactly nothing. The fake third dimension represents no data, and serves
+only to distort the real data that are being shown.</p>
+<p>We can thus give a simple instruction: using a fake third dimension like
+the one in Figure <a href="c-descr1.html#fig:f-yuk">2.8</a> is always wrong and not acceptable under
+any circumstances. This is true irrespective of the fact that such
+graphs are often seen and easily (often almost automatically) produced
+by software packages like Microsoft Excel. All this proves is that the
+programmers of those packages have little graphical sense, or perhaps
+that their companies have discovered that their customers are willing to
+pay for such “features” as colourful but pointless graphs. Indeed, many
+if not most of the graph styles provided by, say, Excel (exploding pie
+charts, doughnuts, cones, pyramids and so on) are entirely useless for
+accurate presentation of data.</p>
+<div class="figure"><span style="display:block;" id="fig:f-yuk"></span>
+<img src="threeD.png" alt="An example of an unacceptable graph: a bar chart with a pseudo three-dimensional effect. The data are the same as in Figure 2.1." style="height:8cm" />
+<p class="caption">Figure 2.8: An example of an unacceptable graph: a bar chart with a pseudo
+three-dimensional effect. The data are the same as in Figure
+<a href="c-descr1.html#fig:f-bars-region">2.1</a>.</p>
+</div>
+<p>An objection sometimes offered to such a severe rule is that bad graphs
+“look good”. This can be answered in two ways. First, a statistical
+graphic is not a decoration, but a tool for presenting information.
+Authors who confuse the two often end up displaying pretty colours and
+strange shapes to hide the lack of actual information in a graph.
+Second, even in an aesthetic sense a useful and accurate graph is
+preferable to a bad one, in the way that any well-designed object with a
+function tends to be more attractive than a badly designed one.</p>
+<p>What, then, is the recipe for good graphics? Mostly this is just a
+matter of using basic graph types in a sensible and restrained manner,
+focusing on presenting information and avoiding all distracting
+decoration. Some such examples have been given earlier in this chapter.
+Other types of graphs are used to illustrate associations between
+variables, which we have not yet discussed. To anticipate that a little,
+Figure <a href="c-descr1.html#fig:f-houseprices">2.9</a> shows one (good but not in any way
+exceptional) example of such graphs. It is a reproduction of a graph
+originally published in a survey of Spain in <em>The Economist</em>, and shows
+changes in average house prices in Spain, Germany and Britain between
+1993 and 2003. Even without an introductory statistics course, the main
+message of Figure <a href="c-descr1.html#fig:f-houseprices">2.9</a> is immediately clear: increases in
+Spanish house prices over the period have been comparable to those in
+Britain, with prices more than doubling in both countries, and very
+unlike those in Germany, where the prices have remained unchanged. Note
+also that the graph distinguishes between the lines for different
+countries by using different types of line. Different colours can of
+course be used instead, but their differences will become obscured if
+the graph is photocopied or printed in black and white.</p>
+<div class="figure"><span style="display:block;" id="fig:f-houseprices"></span>
+<img src="houseprices.png" alt="An example of an informative graph: house prices in three countries between 1993 and 2003, indexed to 100 in 1993. Source: The Economist, June 26th, 2004. The numbers were estimated from the graph in the magazine, so they are approximate." style="width:10cm" />
+<p class="caption">Figure 2.9: An example of an informative graph: house prices in three countries
+between 1993 and 2003, indexed to 100 in 1993. Source: <em>The Economist</em>, June 26th, 2004. The numbers
+were estimated from the graph in the magazine, so they are
+approximate.</p>
+</div>
+<p>In addition to such modest but sensible and useful basic graphs, you may
+sometimes encounter inspired examples of special graphs which manage to
+describe particular data sets in exceptionally vivid and informative
+ways. Some such examples are shown at
+<a href="http://www.datavis.ca/gallery/index.php" class="uri">http://www.datavis.ca/gallery/index.php</a>, on the web page maintained by
+Michael Friendly at York University in Canada (unfortunately, however,
+the electronic images do not always do justice to the originals; crisper
+versions can be found in the books mentioned above). For example, the
+page shows what Edward Tufte has described as possibly “the best
+statistical graphic ever drawn”. This is Charles Joseph Minard’s
+graphical memorial, drawn in 1861, to the fate of Napoleon I’s army in
+their invasion of Russia in 1812. For contrast, the page also shows a
+number of examples of visual lying and other terrible graphs, including
+a mould-breaking re-intrepretation of the idea of a pie chart by Fox
+News, and a colourful effort that Tufte has called possibly “the worst
+graphic ever to find its way into print”. Clearly not all pictures tell
+us as much as a thousand words.</p>
+</div>
+<div id="s-descr1-app" class="section level2 hasAnchor">
+<h2><span class="header-section-number">2.9</span> Appendix: Country data<a href="c-descr1.html#s-descr1-app" class="anchor-section" aria-label="Anchor link to header"></a></h2>
+<p>The data used for illustration throughout this chapter are given in
+Table <a href="c-descr1.html#tab:t-countrydata">2.14</a>. The variables are defined as follows:</p>
+<ul>
+<li><p><strong>region</strong> indicates the macro region where the country is located,
+coded as 1=Africa, 2=Asia, 3=Europe, 4=Latin America, 5=Northern
+America, 6=Oceania. The list of regions and the assignment of
+countries to regions are those used by the UN Statistics Division
+(see &lt;unstats.un.org/unsd/methods/m49/m49.htm&gt;).</p></li>
+<li><p><strong>democracy</strong> is a measure of institutionalised democracy by the
+Polity IV project.<a href="#fn9" class="footnote-ref" id="fnref9"><sup>9</sup></a> The values refer to each country’s
+classification in 2002. The variable has an 11-point scale from 0
+(lowest level of democracy) to 10 (highest). Countries coded as
+being in the state of “interruption” or “interregnum” have
+been omitted.</p></li>
+<li><p><strong>GDP</strong> is the country’s Gross Domestic Product per capita (in
+thousands of U.S. dollars), adjusted for purchasing power parity.
+The data were obtained from CIA’s <em>The World Factbook 2004</em>
+(<a href="https://www.cia.gov/library/publications/resources/the-world-factbook/" class="uri">https://www.cia.gov/library/publications/resources/the-world-factbook/</a>). The figures refer to
+slightly different years for different countries.</p></li>
+</ul>
+<p>The data set contains those 155 countries for which recent data on all
+of the three variables were available at the time the example created.</p>
+<table>
+<caption><span id="tab:t-countrydata">Table 2.14: </span></caption>
+<thead>
+<tr class="header">
+<th align="left">Country</th>
+<th align="right">R</th>
+<th align="right">D</th>
+<th align="right">GDP</th>
+<th align="left">Country</th>
+<th align="right">R</th>
+<th align="right">D</th>
+<th align="right">GDP</th>
+<th align="left">Country</th>
+<th align="right">R</th>
+<th align="right">D</th>
+<th align="right">GDP</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="left">Norway</td>
+<td align="right">3</td>
+<td align="right">10</td>
+<td align="right">37.8</td>
+<td align="left">Bulgaria</td>
+<td align="right">3</td>
+<td align="right">9</td>
+<td align="right">7.6</td>
+<td align="left">Pakistan</td>
+<td align="right">2</td>
+<td align="right">0</td>
+<td align="right">2.1</td>
+</tr>
+<tr class="even">
+<td align="left">USA</td>
+<td align="right">5</td>
+<td align="right">10</td>
+<td align="right">37.8</td>
+<td align="left">Thailand</td>
+<td align="right">2</td>
+<td align="right">9</td>
+<td align="right">7.4</td>
+<td align="left">Angola</td>
+<td align="right">1</td>
+<td align="right">1</td>
+<td align="right">1.9</td>
+</tr>
+<tr class="odd">
+<td align="left">Switzerland</td>
+<td align="right">3</td>
+<td align="right">10</td>
+<td align="right">32.7</td>
+<td align="left">Namibia</td>
+<td align="right">1</td>
+<td align="right">6</td>
+<td align="right">7.2</td>
+<td align="left">Bangladesh</td>
+<td align="right">2</td>
+<td align="right">6</td>
+<td align="right">1.9</td>
+</tr>
+<tr class="even">
+<td align="left">Denmark</td>
+<td align="right">3</td>
+<td align="right">10</td>
+<td align="right">31.1</td>
+<td align="left">Iran</td>
+<td align="right">2</td>
+<td align="right">4</td>
+<td align="right">7.0</td>
+<td align="left">Cambodia</td>
+<td align="right">2</td>
+<td align="right">3</td>
+<td align="right">1.9</td>
+</tr>
+<tr class="odd">
+<td align="left">Austria</td>
+<td align="right">3</td>
+<td align="right">10</td>
+<td align="right">30.0</td>
+<td align="left">Romania</td>
+<td align="right">3</td>
+<td align="right">8</td>
+<td align="right">7.0</td>
+<td align="left">Sudan</td>
+<td align="right">1</td>
+<td align="right">0</td>
+<td align="right">1.9</td>
+</tr>
+<tr class="even">
+<td align="left">Canada</td>
+<td align="right">5</td>
+<td align="right">10</td>
+<td align="right">29.8</td>
+<td align="left">Tunisia</td>
+<td align="right">1</td>
+<td align="right">1</td>
+<td align="right">6.9</td>
+<td align="left">Zimbabwe</td>
+<td align="right">1</td>
+<td align="right">0</td>
+<td align="right">1.9</td>
+</tr>
+<tr class="odd">
+<td align="left">Ireland</td>
+<td align="right">3</td>
+<td align="right">10</td>
+<td align="right">29.6</td>
+<td align="left">Macedonia</td>
+<td align="right">3</td>
+<td align="right">9</td>
+<td align="right">6.7</td>
+<td align="left">Burma</td>
+<td align="right">2</td>
+<td align="right">0</td>
+<td align="right">1.8</td>
+</tr>
+<tr class="even">
+<td align="left">Belgium</td>
+<td align="right">3</td>
+<td align="right">10</td>
+<td align="right">29.1</td>
+<td align="left">Turkey</td>
+<td align="right">2</td>
+<td align="right">8</td>
+<td align="right">6.7</td>
+<td align="left">Cameroon</td>
+<td align="right">1</td>
+<td align="right">1</td>
+<td align="right">1.8</td>
+</tr>
+<tr class="odd">
+<td align="left">Australia</td>
+<td align="right">6</td>
+<td align="right">10</td>
+<td align="right">29.0</td>
+<td align="left">Libya</td>
+<td align="right">1</td>
+<td align="right">0</td>
+<td align="right">6.4</td>
+<td align="left">Mauritania</td>
+<td align="right">1</td>
+<td align="right">0</td>
+<td align="right">1.8</td>
+</tr>
+<tr class="even">
+<td align="left">Netherlands</td>
+<td align="right">3</td>
+<td align="right">10</td>
+<td align="right">28.6</td>
+<td align="left">Colombia</td>
+<td align="right">4</td>
+<td align="right">7</td>
+<td align="right">6.3</td>
+<td align="left">Moldova</td>
+<td align="right">3</td>
+<td align="right">8</td>
+<td align="right">1.8</td>
+</tr>
+<tr class="odd">
+<td align="left">Japan</td>
+<td align="right">2</td>
+<td align="right">10</td>
+<td align="right">28.2</td>
+<td align="left">Kazakhstan</td>
+<td align="right">2</td>
+<td align="right">0</td>
+<td align="right">6.3</td>
+<td align="left">Mongolia</td>
+<td align="right">2</td>
+<td align="right">10</td>
+<td align="right">1.8</td>
+</tr>
+<tr class="even">
+<td align="left">UK</td>
+<td align="right">3</td>
+<td align="right">10</td>
+<td align="right">27.7</td>
+<td align="left">Panama</td>
+<td align="right">4</td>
+<td align="right">9</td>
+<td align="right">6.3</td>
+<td align="left">Laos</td>
+<td align="right">2</td>
+<td align="right">0</td>
+<td align="right">1.7</td>
+</tr>
+<tr class="odd">
+<td align="left">France</td>
+<td align="right">3</td>
+<td align="right">9</td>
+<td align="right">27.6</td>
+<td align="left">Belarus</td>
+<td align="right">3</td>
+<td align="right">0</td>
+<td align="right">6.1</td>
+<td align="left">Gambia</td>
+<td align="right">1</td>
+<td align="right">0</td>
+<td align="right">1.7</td>
+</tr>
+<tr class="even">
+<td align="left">Germany</td>
+<td align="right">3</td>
+<td align="right">10</td>
+<td align="right">27.6</td>
+<td align="left">Algeria</td>
+<td align="right">1</td>
+<td align="right">1</td>
+<td align="right">6.0</td>
+<td align="left">Uzbekistan</td>
+<td align="right">2</td>
+<td align="right">0</td>
+<td align="right">1.7</td>
+</tr>
+<tr class="odd">
+<td align="left">Finland</td>
+<td align="right">3</td>
+<td align="right">10</td>
+<td align="right">27.4</td>
+<td align="left">Dominican R.</td>
+<td align="right">4</td>
+<td align="right">8</td>
+<td align="right">6.0</td>
+<td align="left">Haiti</td>
+<td align="right">4</td>
+<td align="right">1</td>
+<td align="right">1.6</td>
+</tr>
+<tr class="even">
+<td align="left">Sweden</td>
+<td align="right">3</td>
+<td align="right">10</td>
+<td align="right">26.8</td>
+<td align="left">Fiji</td>
+<td align="right">6</td>
+<td align="right">6</td>
+<td align="right">5.8</td>
+<td align="left">Kyrgyzstan</td>
+<td align="right">2</td>
+<td align="right">1</td>
+<td align="right">1.6</td>
+</tr>
+<tr class="odd">
+<td align="left">Italy</td>
+<td align="right">3</td>
+<td align="right">10</td>
+<td align="right">26.7</td>
+<td align="left">Turkmenistan</td>
+<td align="right">2</td>
+<td align="right">0</td>
+<td align="right">5.8</td>
+<td align="left">Senegal</td>
+<td align="right">1</td>
+<td align="right">8</td>
+<td align="right">1.6</td>
+</tr>
+<tr class="even">
+<td align="left">Singapore</td>
+<td align="right">2</td>
+<td align="right">2</td>
+<td align="right">23.7</td>
+<td align="left">Gabon</td>
+<td align="right">1</td>
+<td align="right">0</td>
+<td align="right">5.5</td>
+<td align="left">Iraq</td>
+<td align="right">2</td>
+<td align="right">0</td>
+<td align="right">1.5</td>
+</tr>
+<tr class="odd">
+<td align="left">Taiwan</td>
+<td align="right">2</td>
+<td align="right">9</td>
+<td align="right">23.4</td>
+<td align="left">Ukraine</td>
+<td align="right">3</td>
+<td align="right">7</td>
+<td align="right">5.4</td>
+<td align="left">Togo</td>
+<td align="right">1</td>
+<td align="right">1</td>
+<td align="right">1.5</td>
+</tr>
+<tr class="even">
+<td align="left">UAE</td>
+<td align="right">2</td>
+<td align="right">0</td>
+<td align="right">23.2</td>
+<td align="left">Peru</td>
+<td align="right">4</td>
+<td align="right">9</td>
+<td align="right">5.1</td>
+<td align="left">Cote d’Ivoire</td>
+<td align="right">1</td>
+<td align="right">5</td>
+<td align="right">1.4</td>
+</tr>
+<tr class="odd">
+<td align="left">Spain</td>
+<td align="right">3</td>
+<td align="right">10</td>
+<td align="right">22.0</td>
+<td align="left">China</td>
+<td align="right">2</td>
+<td align="right">0</td>
+<td align="right">5.0</td>
+<td align="left">Nepal</td>
+<td align="right">2</td>
+<td align="right">1</td>
+<td align="right">1.4</td>
+</tr>
+<tr class="even">
+<td align="left">NZ</td>
+<td align="right">6</td>
+<td align="right">10</td>
+<td align="right">21.6</td>
+<td align="left">Swaziland</td>
+<td align="right">1</td>
+<td align="right">0</td>
+<td align="right">4.9</td>
+<td align="left">Uganda</td>
+<td align="right">1</td>
+<td align="right">0</td>
+<td align="right">1.4</td>
+</tr>
+<tr class="odd">
+<td align="left">Qatar</td>
+<td align="right">2</td>
+<td align="right">0</td>
+<td align="right">21.5</td>
+<td align="left">El Salvador</td>
+<td align="right">4</td>
+<td align="right">7</td>
+<td align="right">4.8</td>
+<td align="left">Bhutan</td>
+<td align="right">2</td>
+<td align="right">0</td>
+<td align="right">1.3</td>
+</tr>
+<tr class="even">
+<td align="left">Greece</td>
+<td align="right">3</td>
+<td align="right">10</td>
+<td align="right">20.0</td>
+<td align="left">Venezuela</td>
+<td align="right">4</td>
+<td align="right">6</td>
+<td align="right">4.8</td>
+<td align="left">Djibouti</td>
+<td align="right">1</td>
+<td align="right">3</td>
+<td align="right">1.3</td>
+</tr>
+<tr class="odd">
+<td align="left">Israel</td>
+<td align="right">2</td>
+<td align="right">10</td>
+<td align="right">19.8</td>
+<td align="left">Paraguay</td>
+<td align="right">4</td>
+<td align="right">7</td>
+<td align="right">4.7</td>
+<td align="left">N. Korea</td>
+<td align="right">2</td>
+<td align="right">0</td>
+<td align="right">1.3</td>
+</tr>
+<tr class="even">
+<td align="left">Cyprus</td>
+<td align="right">2</td>
+<td align="right">10</td>
+<td align="right">19.2</td>
+<td align="left">Philippines</td>
+<td align="right">2</td>
+<td align="right">8</td>
+<td align="right">4.6</td>
+<td align="left">Rwanda</td>
+<td align="right">1</td>
+<td align="right">0</td>
+<td align="right">1.3</td>
+</tr>
+<tr class="odd">
+<td align="left">Kuwait</td>
+<td align="right">2</td>
+<td align="right">0</td>
+<td align="right">19.0</td>
+<td align="left">Albania</td>
+<td align="right">3</td>
+<td align="right">7</td>
+<td align="right">4.5</td>
+<td align="left">Chad</td>
+<td align="right">1</td>
+<td align="right">1</td>
+<td align="right">1.2</td>
+</tr>
+<tr class="even">
+<td align="left">Slovenia</td>
+<td align="right">3</td>
+<td align="right">10</td>
+<td align="right">19.0</td>
+<td align="left">Jordan</td>
+<td align="right">2</td>
+<td align="right">2</td>
+<td align="right">4.3</td>
+<td align="left">Mozambique</td>
+<td align="right">1</td>
+<td align="right">6</td>
+<td align="right">1.2</td>
+</tr>
+<tr class="odd">
+<td align="left">Portugal</td>
+<td align="right">3</td>
+<td align="right">10</td>
+<td align="right">18.0</td>
+<td align="left">Guatemala</td>
+<td align="right">4</td>
+<td align="right">8</td>
+<td align="right">4.1</td>
+<td align="left">Benin</td>
+<td align="right">1</td>
+<td align="right">6</td>
+<td align="right">1.1</td>
+</tr>
+<tr class="even">
+<td align="left">S. Korea</td>
+<td align="right">2</td>
+<td align="right">8</td>
+<td align="right">17.8</td>
+<td align="left">Egypt</td>
+<td align="right">1</td>
+<td align="right">0</td>
+<td align="right">4.0</td>
+<td align="left">Burkina Faso</td>
+<td align="right">1</td>
+<td align="right">2</td>
+<td align="right">1.1</td>
+</tr>
+<tr class="odd">
+<td align="left">Bahrain</td>
+<td align="right">2</td>
+<td align="right">0</td>
+<td align="right">16.9</td>
+<td align="left">Guyana</td>
+<td align="right">4</td>
+<td align="right">6</td>
+<td align="right">4.0</td>
+<td align="left">C. Afr. R.</td>
+<td align="right">1</td>
+<td align="right">5</td>
+<td align="right">1.1</td>
+</tr>
+<tr class="even">
+<td align="left">Czech R.</td>
+<td align="right">3</td>
+<td align="right">10</td>
+<td align="right">15.7</td>
+<td align="left">Morocco</td>
+<td align="right">1</td>
+<td align="right">0</td>
+<td align="right">4.0</td>
+<td align="left">Kenya</td>
+<td align="right">1</td>
+<td align="right">8</td>
+<td align="right">1.0</td>
+</tr>
+<tr class="odd">
+<td align="left">Hungary</td>
+<td align="right">3</td>
+<td align="right">10</td>
+<td align="right">13.9</td>
+<td align="left">Jamaica</td>
+<td align="right">4</td>
+<td align="right">9</td>
+<td align="right">3.9</td>
+<td align="left">Liberia</td>
+<td align="right">1</td>
+<td align="right">3</td>
+<td align="right">1.0</td>
+</tr>
+<tr class="even">
+<td align="left">Slovakia</td>
+<td align="right">3</td>
+<td align="right">9</td>
+<td align="right">13.3</td>
+<td align="left">Sri Lanka</td>
+<td align="right">2</td>
+<td align="right">7</td>
+<td align="right">3.7</td>
+<td align="left">Tajikistan</td>
+<td align="right">2</td>
+<td align="right">2</td>
+<td align="right">1.0</td>
+</tr>
+<tr class="odd">
+<td align="left">Oman</td>
+<td align="right">2</td>
+<td align="right">0</td>
+<td align="right">13.1</td>
+<td align="left">Armenia</td>
+<td align="right">2</td>
+<td align="right">6</td>
+<td align="right">3.5</td>
+<td align="left">Mali</td>
+<td align="right">1</td>
+<td align="right">6</td>
+<td align="right">.9</td>
+</tr>
+<tr class="even">
+<td align="left">Uruguay</td>
+<td align="right">4</td>
+<td align="right">10</td>
+<td align="right">12.8</td>
+<td align="left">Azerbaijan</td>
+<td align="right">2</td>
+<td align="right">0</td>
+<td align="right">3.4</td>
+<td align="left">Nigeria</td>
+<td align="right">1</td>
+<td align="right">4</td>
+<td align="right">.9</td>
+</tr>
+<tr class="odd">
+<td align="left">Estonia</td>
+<td align="right">3</td>
+<td align="right">7</td>
+<td align="right">12.3</td>
+<td align="left">Ecuador</td>
+<td align="right">4</td>
+<td align="right">6</td>
+<td align="right">3.3</td>
+<td align="left">Guinea-Bissau</td>
+<td align="right">1</td>
+<td align="right">5</td>
+<td align="right">.8</td>
+</tr>
+<tr class="even">
+<td align="left">Saudi Ar.</td>
+<td align="right">2</td>
+<td align="right">0</td>
+<td align="right">11.8</td>
+<td align="left">Syria</td>
+<td align="right">2</td>
+<td align="right">0</td>
+<td align="right">3.3</td>
+<td align="left">Madagascar</td>
+<td align="right">1</td>
+<td align="right">7</td>
+<td align="right">.8</td>
+</tr>
+<tr class="odd">
+<td align="left">Lithuania</td>
+<td align="right">3</td>
+<td align="right">10</td>
+<td align="right">11.4</td>
+<td align="left">Indonesia</td>
+<td align="right">2</td>
+<td align="right">8</td>
+<td align="right">3.2</td>
+<td align="left">Niger</td>
+<td align="right">1</td>
+<td align="right">4</td>
+<td align="right">.8</td>
+</tr>
+<tr class="even">
+<td align="left">Mauritius</td>
+<td align="right">1</td>
+<td align="right">10</td>
+<td align="right">11.4</td>
+<td align="left">Lesotho</td>
+<td align="right">1</td>
+<td align="right">8</td>
+<td align="right">3.0</td>
+<td align="left">Yemen</td>
+<td align="right">2</td>
+<td align="right">1</td>
+<td align="right">.8</td>
+</tr>
+<tr class="odd">
+<td align="left">Argentina</td>
+<td align="right">4</td>
+<td align="right">8</td>
+<td align="right">11.2</td>
+<td align="left">Cuba</td>
+<td align="right">4</td>
+<td align="right">0</td>
+<td align="right">2.9</td>
+<td align="left">Zambia</td>
+<td align="right">1</td>
+<td align="right">3</td>
+<td align="right">.8</td>
+</tr>
+<tr class="even">
+<td align="left">Poland</td>
+<td align="right">3</td>
+<td align="right">9</td>
+<td align="right">11.1</td>
+<td align="left">India</td>
+<td align="right">2</td>
+<td align="right">9</td>
+<td align="right">2.9</td>
+<td align="left">Comoros</td>
+<td align="right">1</td>
+<td align="right">4</td>
+<td align="right">.7</td>
+</tr>
+<tr class="odd">
+<td align="left">S. Africa</td>
+<td align="right">1</td>
+<td align="right">9</td>
+<td align="right">10.7</td>
+<td align="left">Equatorial G.</td>
+<td align="right">1</td>
+<td align="right">0</td>
+<td align="right">2.7</td>
+<td align="left">Eritrea</td>
+<td align="right">1</td>
+<td align="right">0</td>
+<td align="right">.7</td>
+</tr>
+<tr class="even">
+<td align="left">Croatia</td>
+<td align="right">3</td>
+<td align="right">7</td>
+<td align="right">10.6</td>
+<td align="left">Honduras</td>
+<td align="right">4</td>
+<td align="right">7</td>
+<td align="right">2.6</td>
+<td align="left">Ethiopia</td>
+<td align="right">1</td>
+<td align="right">3</td>
+<td align="right">.7</td>
+</tr>
+<tr class="odd">
+<td align="left">Latvia</td>
+<td align="right">3</td>
+<td align="right">8</td>
+<td align="right">10.2</td>
+<td align="left">Georgia</td>
+<td align="right">2</td>
+<td align="right">5</td>
+<td align="right">2.5</td>
+<td align="left">Congo (Br.)</td>
+<td align="right">1</td>
+<td align="right">0</td>
+<td align="right">.7</td>
+</tr>
+<tr class="even">
+<td align="left">Trinidad</td>
+<td align="right">4</td>
+<td align="right">10</td>
+<td align="right">9.5</td>
+<td align="left">Vietnam</td>
+<td align="right">2</td>
+<td align="right">0</td>
+<td align="right">2.5</td>
+<td align="left">Burundi</td>
+<td align="right">1</td>
+<td align="right">1</td>
+<td align="right">.6</td>
+</tr>
+<tr class="odd">
+<td align="left">Costa Rica</td>
+<td align="right">4</td>
+<td align="right">10</td>
+<td align="right">9.1</td>
+<td align="left">Bolivia</td>
+<td align="right">4</td>
+<td align="right">9</td>
+<td align="right">2.4</td>
+<td align="left">Malawi</td>
+<td align="right">1</td>
+<td align="right">6</td>
+<td align="right">.6</td>
+</tr>
+<tr class="even">
+<td align="left">Botswana</td>
+<td align="right">1</td>
+<td align="right">9</td>
+<td align="right">9.0</td>
+<td align="left">Nicaragua</td>
+<td align="right">4</td>
+<td align="right">8</td>
+<td align="right">2.3</td>
+<td align="left">Tanzania</td>
+<td align="right">1</td>
+<td align="right">3</td>
+<td align="right">.6</td>
+</tr>
+<tr class="odd">
+<td align="left">Malaysia</td>
+<td align="right">2</td>
+<td align="right">4</td>
+<td align="right">9.0</td>
+<td align="left">Ghana</td>
+<td align="right">1</td>
+<td align="right">7</td>
+<td align="right">2.2</td>
+<td align="left">East Timor</td>
+<td align="right">2</td>
+<td align="right">6</td>
+<td align="right">.5</td>
+</tr>
+<tr class="even">
+<td align="left">Mexico</td>
+<td align="right">4</td>
+<td align="right">8</td>
+<td align="right">9.0</td>
+<td align="left">PNG</td>
+<td align="right">6</td>
+<td align="right">10</td>
+<td align="right">2.2</td>
+<td align="left">Sierra Leone</td>
+<td align="right">1</td>
+<td align="right">5</td>
+<td align="right">.5</td>
+</tr>
+<tr class="odd">
+<td align="left">Russia</td>
+<td align="right">3</td>
+<td align="right">7</td>
+<td align="right">8.9</td>
+<td align="left">Serbia</td>
+<td align="right">3</td>
+<td align="right">7</td>
+<td align="right">2.2</td>
+<td align="left"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+</tr>
+<tr class="even">
+<td align="left">Brazil</td>
+<td align="right">4</td>
+<td align="right">8</td>
+<td align="right">7.6</td>
+<td align="left">Guinea</td>
+<td align="right">1</td>
+<td align="right">1</td>
+<td align="right">2.1</td>
+<td align="left"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+</tr>
+</tbody>
+</table>
+
+</div>
+</div>
+<div class="footnotes">
+<hr />
+<ol start="2">
+<li id="fn2"><p>ESS Round 5: European Social Survey Round 5 Data (2010). Data file
+edition 2.0. Norwegian Social Science Data Services, Norway - Data
+Archive and distributor of ESS data.<a href="c-descr1.html#fnref2" class="footnote-back">↩</a></p></li>
+<li id="fn3"><p>For recent findings, see for example Svallfors, S. (ed.) (2012),
+<em>Contested Welfare States: Welfare Attitudes in Europe and Beyond</em>.
+Stanford University Press.<a href="c-descr1.html#fnref3" class="footnote-back">↩</a></p></li>
+<li id="fn4"><p>See, for example, Svallfors (1997), Words of welfare and attitudes
+to redistribution: A comparison of eight western nations, <em>European
+Sociological Review</em>, 13, 283-304; and Blekesaune and Quadagno
+(2003), Public attitudes towards welfare state policies: A
+comparative analysis of 24 nations, <em>European Sociological Review</em>,
+19, 415-427.<a href="c-descr1.html#fnref4" class="footnote-back">↩</a></p></li>
+<li id="fn5"><p>Lewellen, W. G., Lease, R. G., and Schlarbaum, G. G. (1977).
+“Patterns of investment strategy and behavior among individual
+investors”. <em>The Journal of Business</em>, <strong>50</strong>, 296–333. The
+published article gave only the total sample size, the marginal
+distributions of sex and age group, and conditional proportions for
+the short-term gains variable given sex and age group. These were
+used to create tables of frequencies separately for men and women
+(assuming further that the age distribution was the same for both),
+and Table <a href="c-descr1.html#tab:t-investors">2.7</a> was obtained by combining these. The
+resulting table is consistent with information in the article, apart
+from rounding error.<a href="c-descr1.html#fnref5" class="footnote-back">↩</a></p></li>
+<li id="fn6"><p>This is a random sample of municipalities, obtained for this
+illustration from the 2001 census data provided by Statistics Canada
+at <a href="http://www.statcan.gc.ca" class="uri">http://www.statcan.gc.ca</a>.<a href="c-descr1.html#fnref6" class="footnote-back">↩</a></p></li>
+<li id="fn7"><p>There is no need to worry about how the quartile values 9.25 and
+2.75 for class 3 were calculated. Different software packages may in
+fact do that slightly differently; these values are from SPSS.<a href="c-descr1.html#fnref7" class="footnote-back">↩</a></p></li>
+<li id="fn8"><p>In mathematical terms, the difficulty is that the absolute value
+function has no derivative at zero.<a href="c-descr1.html#fnref8" class="footnote-back">↩</a></p></li>
+<li id="fn9"><p>Monty G. Marshall and Keith Jaggers (2002). <em>Polity IV Dataset</em>.
+<span class="math display">\[Computer file; version p4v2002\]</span> College Park, MD: Center for
+International Development and Conflict Management, University of
+Maryland.<a href="c-descr1.html#fnref9" class="footnote-back">↩</a></p></li>
+</ol>
+</div>
+            </section>
+
+          </div>
+        </div>
+      </div>
+<a href="c-intro.html" class="navigation navigation-prev " aria-label="Previous page"><i class="fa fa-angle-left"></i></a>
+<a href="c-samples.html" class="navigation navigation-next " aria-label="Next page"><i class="fa fa-angle-right"></i></a>
+    </div>
+  </div>
+<script src="libs/gitbook-2.6.7/js/app.min.js"></script>
+<script src="libs/gitbook-2.6.7/js/clipboard.min.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-search.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-sharing.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-fontsettings.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-bookdown.js"></script>
+<script src="libs/gitbook-2.6.7/js/jquery.highlight.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-clipboard.js"></script>
+<script>
+gitbook.require(["gitbook"], function(gitbook) {
+gitbook.start({
+"sharing": {
+"github": false,
+"facebook": true,
+"twitter": true,
+"linkedin": false,
+"weibo": false,
+"instapaper": false,
+"vk": false,
+"whatsapp": false,
+"all": ["facebook", "twitter", "linkedin", "weibo", "instapaper"]
+},
+"fontsettings": {
+"theme": "white",
+"family": "sans",
+"size": 2
+},
+"edit": {
+"link": "https://github.com/LSE-Methodology/MY464/edit/master/02-MY464-descr1.Rmd",
+"text": "Edit"
+},
+"history": {
+"link": null,
+"text": null
+},
+"view": {
+"link": null,
+"text": null
+},
+"download": ["Coursepack-MY464.pdf", "Coursepack-MY464.epub"],
+"search": {
+"engine": "fuse",
+"options": null
+},
+"toc": {
+"collapse": "section"
+}
+});
+});
+</script>
+
+<!-- dynamically load mathjax for compatibility with self-contained -->
+<script>
+  (function () {
+    var script = document.createElement("script");
+    script.type = "text/javascript";
+    var src = "true";
+    if (src === "" || src === "true") src = "https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.9/latest.js?config=TeX-MML-AM_CHTML";
+    if (location.protocol !== "file:")
+      if (/^https?:/.test(src))
+        src = src.replace(/^https?:/, '');
+    script.src = src;
+    document.getElementsByTagName("head")[0].appendChild(script);
+  })();
+</script>
+</body>
+
+</html>
diff --git a/c-disttables.html b/c-disttables.html
new file mode 100644
index 0000000..acec797
--- /dev/null
+++ b/c-disttables.html
@@ -0,0 +1,1369 @@
+<!DOCTYPE html>
+<html >
+
+<head>
+
+  <meta charset="UTF-8">
+  <meta http-equiv="X-UA-Compatible" content="IE=edge">
+  <title>MY451 Introduction to Quantitative Analysis</title>
+  <meta content="text/html; charset=UTF-8" http-equiv="Content-Type">
+  <meta name="description" content="MY451 Introduction to Quantitative Analysis">
+  <meta name="generator" content="bookdown 0.1.5 and GitBook 2.6.7">
+
+  <meta property="og:title" content="MY451 Introduction to Quantitative Analysis" />
+  <meta property="og:type" content="book" />
+  
+  
+  
+  <meta name="github-repo" content="kbenoit/MY451" />
+
+  <meta name="twitter:card" content="summary" />
+  <meta name="twitter:title" content="MY451 Introduction to Quantitative Analysis" />
+  
+  
+  
+
+<meta name="author" content="Jouni Kuha">
+
+
+<script type="text/x-mathjax-config">
+MathJax.Hub.Config({
+  TeX: { equationNumbers: { autoNumber: "AMS" } }
+});
+</script>
+
+  <meta name="viewport" content="width=device-width, initial-scale=1">
+  <meta name="apple-mobile-web-app-capable" content="yes">
+  <meta name="apple-mobile-web-app-status-bar-style" content="black">
+  
+  
+<link rel="prev" href="c-class0.html">
+
+
+<script src="libs/jquery-2.2.3/jquery.min.js"></script>
+<link href="libs/gitbook-2.6.7/css/style.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-bookdown.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-highlight.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-search.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-fontsettings.css" rel="stylesheet" />
+
+
+
+
+
+
+
+
+
+
+<link rel="stylesheet" href="style.css" type="text/css" />
+</head>
+
+<body>
+
+
+  <div class="book without-animation with-summary font-size-2 font-family-1" data-basepath=".">
+
+    <div class="book-summary">
+      <nav role="navigation">
+
+<ul class="summary">
+<li><a href="./">A Minimal Book Example</a></li>
+
+<li class="divider"></li>
+<li class="chapter" data-level="1" data-path="index.html"><a href="index.html"><i class="fa fa-check"></i><b>1</b> Course information</a></li>
+<li class="chapter" data-level="2" data-path="c-intro.html"><a href="c-intro.html"><i class="fa fa-check"></i><b>2</b> Introduction</a><ul>
+<li class="chapter" data-level="2.1" data-path="c-intro.html"><a href="c-intro.html#s-intro-purpose"><i class="fa fa-check"></i><b>2.1</b> What is the purpose of this course?</a></li>
+<li class="chapter" data-level="2.2" data-path="c-intro.html"><a href="c-intro.html#s-intro-definitions"><i class="fa fa-check"></i><b>2.2</b> Some basic definitions</a><ul>
+<li class="chapter" data-level="2.2.1" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-subj"><i class="fa fa-check"></i><b>2.2.1</b> Subjects and variables</a></li>
+<li class="chapter" data-level="2.2.2" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-vartypes"><i class="fa fa-check"></i><b>2.2.2</b> Types of variables</a></li>
+<li class="chapter" data-level="2.2.3" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-descr"><i class="fa fa-check"></i><b>2.2.3</b> Description and inference</a></li>
+<li class="chapter" data-level="2.2.4" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-assoc"><i class="fa fa-check"></i><b>2.2.4</b> Association and causation</a></li>
+</ul></li>
+<li class="chapter" data-level="2.3" data-path="c-intro.html"><a href="c-intro.html#s-intro-outline"><i class="fa fa-check"></i><b>2.3</b> Outline of the course</a></li>
+<li class="chapter" data-level="2.4" data-path="c-intro.html"><a href="c-intro.html#s-intro-maths"><i class="fa fa-check"></i><b>2.4</b> The use of mathematics and computing</a><ul>
+<li class="chapter" data-level="2.4.1" data-path="c-intro.html"><a href="c-intro.html#symbolic-mathematics-and-mathematical-notation"><i class="fa fa-check"></i><b>2.4.1</b> Symbolic mathematics and mathematical notation</a></li>
+<li class="chapter" data-level="2.4.2" data-path="c-intro.html"><a href="c-intro.html#computing-1"><i class="fa fa-check"></i><b>2.4.2</b> Computing</a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="3" data-path="c-descr1.html"><a href="c-descr1.html"><i class="fa fa-check"></i><b>3</b> Descriptive statistics</a><ul>
+<li class="chapter" data-level="3.1" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-intro"><i class="fa fa-check"></i><b>3.1</b> Introduction</a></li>
+<li class="chapter" data-level="3.2" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-examples"><i class="fa fa-check"></i><b>3.2</b> Example data sets</a></li>
+<li class="chapter" data-level="3.3" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-1cat"><i class="fa fa-check"></i><b>3.3</b> Single categorical variable</a><ul>
+<li class="chapter" data-level="3.3.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-distr"><i class="fa fa-check"></i><b>3.3.1</b> Describing the sample distribution</a></li>
+<li class="chapter" data-level="3.3.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-tables"><i class="fa fa-check"></i><b>3.3.2</b> Tabular methods: Tables of frequencies</a></li>
+<li class="chapter" data-level="3.3.3" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-charts"><i class="fa fa-check"></i><b>3.3.3</b> Graphical methods: Bar charts</a></li>
+<li class="chapter" data-level="3.3.4" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-descriptives"><i class="fa fa-check"></i><b>3.3.4</b> Simple descriptive statistics</a></li>
+</ul></li>
+<li class="chapter" data-level="3.4" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-2cat"><i class="fa fa-check"></i><b>3.4</b> Two categorical variables</a><ul>
+<li class="chapter" data-level="3.4.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-tables"><i class="fa fa-check"></i><b>3.4.1</b> Two-way contingency tables</a></li>
+<li class="chapter" data-level="3.4.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-cond"><i class="fa fa-check"></i><b>3.4.2</b> Conditional proportions</a></li>
+<li class="chapter" data-level="3.4.3" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-assoc"><i class="fa fa-check"></i><b>3.4.3</b> Conditional distributions and associations</a></li>
+<li class="chapter" data-level="3.4.4" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-descr"><i class="fa fa-check"></i><b>3.4.4</b> Describing an association using conditional proportions</a></li>
+<li class="chapter" data-level="3.4.5" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-gamma"><i class="fa fa-check"></i><b>3.4.5</b> A measure of association for ordinal variables</a></li>
+</ul></li>
+<li class="chapter" data-level="3.5" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-1cont"><i class="fa fa-check"></i><b>3.5</b> Sample distributions of a single continuous variable</a><ul>
+<li class="chapter" data-level="3.5.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cont-tab"><i class="fa fa-check"></i><b>3.5.1</b> Tabular methods</a></li>
+<li class="chapter" data-level="3.5.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cont-graphs"><i class="fa fa-check"></i><b>3.5.2</b> Graphical methods</a></li>
+</ul></li>
+<li class="chapter" data-level="3.6" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-nums"><i class="fa fa-check"></i><b>3.6</b> Numerical descriptive statistics</a><ul>
+<li class="chapter" data-level="3.6.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-nums-central"><i class="fa fa-check"></i><b>3.6.1</b> Measures of central tendency</a></li>
+<li class="chapter" data-level="3.6.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-nums-variation"><i class="fa fa-check"></i><b>3.6.2</b> Measures of variation</a></li>
+</ul></li>
+<li class="chapter" data-level="3.7" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-2cont"><i class="fa fa-check"></i><b>3.7</b> Associations which involve continuous variables</a></li>
+<li class="chapter" data-level="3.8" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-presentation"><i class="fa fa-check"></i><b>3.8</b> Presentation of tables and graphs</a></li>
+<li class="chapter" data-level="3.9" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-app"><i class="fa fa-check"></i><b>3.9</b> Appendix: Country data</a></li>
+</ul></li>
+<li class="chapter" data-level="4" data-path="c-samples.html"><a href="c-samples.html"><i class="fa fa-check"></i><b>4</b> Samples and populations</a><ul>
+<li class="chapter" data-level="4.1" data-path="c-samples.html"><a href="c-samples.html#s-samples-intro"><i class="fa fa-check"></i><b>4.1</b> Introduction</a><ul>
+<li class="chapter" data-level="4.1.1" data-path="c-samples.html"><a href="c-samples.html#s-probs-1sampleci-int"><i class="fa fa-check"></i><b>4.1.1</b> Interpretation of confidence intervals</a></li>
+<li class="chapter" data-level="4.1.2" data-path="c-samples.html"><a href="c-samples.html#ss-means-ci-vstests"><i class="fa fa-check"></i><b>4.1.2</b> Confidence intervals vs. significance tests</a></li>
+</ul></li>
+<li class="chapter" data-level="4.2" data-path="c-samples.html"><a href="c-samples.html#s-probs-2samples"><i class="fa fa-check"></i><b>4.2</b> Inference for comparing two proportions</a></li>
+<li class="chapter" data-level="4.3" data-path="c-samples.html"><a href="c-samples.html#s-regression-dummies"><i class="fa fa-check"></i><b>4.3</b> Including categorical explanatory variables</a><ul>
+<li class="chapter" data-level="4.3.1" data-path="c-samples.html"><a href="c-samples.html#ss-regression-dummies-def"><i class="fa fa-check"></i><b>4.3.1</b> Dummy variables</a></li>
+<li class="chapter" data-level="4.3.2" data-path="c-samples.html"><a href="c-samples.html#ss-regression-dummies-example"><i class="fa fa-check"></i><b>4.3.2</b> A second example</a></li>
+</ul></li>
+<li class="chapter" data-level="4.4" data-path="c-samples.html"><a href="c-samples.html#s-regression-rest"><i class="fa fa-check"></i><b>4.4</b> Other issues in linear regression modelling</a></li>
+</ul></li>
+<li class="chapter" data-level="5" data-path="c-3waytables.html"><a href="c-3waytables.html"><i class="fa fa-check"></i><b>5</b> Analysis of 3-way contingency tables</a></li>
+<li class="chapter" data-level="6" data-path="c-more.html"><a href="c-more.html"><i class="fa fa-check"></i><b>6</b> More statistics…</a></li>
+<li class="chapter" data-level="7" data-path="c-class0.html"><a href="c-class0.html"><i class="fa fa-check"></i><b>7</b> Computer classes</a><ul>
+<li class="chapter" data-level="7.1" data-path="c-class0.html"><a href="c-class0.html#general-instructions"><i class="fa fa-check"></i><b>7.1</b> General instructions</a></li>
+<li class="chapter" data-level="7.2" data-path="c-class0.html"><a href="c-class0.html#s-intro-SPSS"><i class="fa fa-check"></i><b>7.2</b> Introduction to SPSS</a></li>
+<li class="chapter" data-level="7.3" data-path="c-class0.html"><a href="c-class0.html#p-class1"><i class="fa fa-check"></i><b>7.3</b> WEEK 2 class: Descriptive statistics for categorical data, and entering data</a></li>
+<li><a href="c-class0.html#week-3-class-descriptive-statistics-for-continuous-variables"><span class="toc-section-number">7.4</span> WEEK 3 class:<br />
+Descriptive statistics for continuous variables</a></li>
+<li class="chapter" data-level="7.5" data-path="c-class0.html"><a href="c-class0.html#week-4-class-two-way-contingency-tables"><i class="fa fa-check"></i><b>7.5</b> WEEK 4 class: Two-way contingency tables</a></li>
+<li class="chapter" data-level="7.6" data-path="c-class0.html"><a href="c-class0.html#week-5-class-inference-for-two-population-means"><i class="fa fa-check"></i><b>7.6</b> WEEK 5 class: Inference for two population means</a></li>
+<li class="chapter" data-level="7.7" data-path="c-class0.html"><a href="c-class0.html#week-7-class-inference-for-population-proportions"><i class="fa fa-check"></i><b>7.7</b> WEEK 7 class: Inference for population proportions</a></li>
+<li class="chapter" data-level="7.8" data-path="c-class0.html"><a href="c-class0.html#week-7-class-correlation-and-simple-linear-regression-1"><i class="fa fa-check"></i><b>7.8</b> WEEK 7 class: Correlation and simple linear regression 1</a></li>
+<li class="chapter" data-level="7.9" data-path="c-class0.html"><a href="c-class0.html#week-8-class-simple-linear-regression-and-3-way-tables"><i class="fa fa-check"></i><b>7.9</b> WEEK 8 class: Simple linear regression and 3-way tables</a></li>
+<li class="chapter" data-level="7.10" data-path="c-class0.html"><a href="c-class0.html#week-9-class-multiple-linear-regression"><i class="fa fa-check"></i><b>7.10</b> WEEK 9 class: Multiple linear regression</a></li>
+<li class="chapter" data-level="7.11" data-path="c-class0.html"><a href="c-class0.html#week-10-class-review-and-multiple-linear-regression"><i class="fa fa-check"></i><b>7.11</b> WEEK 10 class: Review and Multiple linear regression</a></li>
+</ul></li>
+<li class="chapter" data-level="8" data-path="c-disttables.html"><a href="c-disttables.html"><i class="fa fa-check"></i><b>8</b> Statistical tables</a><ul>
+<li class="chapter" data-level="8.1" data-path="c-disttables.html"><a href="c-disttables.html#s-disttables-Z"><i class="fa fa-check"></i><b>8.1</b> Table of standard normal tail probabilities</a></li>
+<li class="chapter" data-level="8.2" data-path="c-disttables.html"><a href="c-disttables.html#s-disttables-t"><i class="fa fa-check"></i><b>8.2</b> Table of critical values for <span class="math inline">\(t\)</span>-distributions</a></li>
+<li class="chapter" data-level="8.3" data-path="c-disttables.html"><a href="c-disttables.html#s-disttables-chi2"><i class="fa fa-check"></i><b>8.3</b> Table of critical values for <span class="math inline">\(\chi^{2}\)</span> distributions</a></li>
+</ul></li>
+<li class="divider"></li>
+<li><a href="https://github.com/rstudio/bookdown" target="blank">Published with bookdown</a></li>
+
+</ul>
+
+      </nav>
+    </div>
+
+    <div class="book-body">
+      <div class="body-inner">
+        <div class="book-header" role="navigation">
+          <h1>
+            <i class="fa fa-circle-o-notch fa-spin"></i><a href="./">MY451 Introduction to Quantitative Analysis</a>
+          </h1>
+        </div>
+
+        <div class="page-wrapper" tabindex="-1" role="main">
+          <div class="page-inner">
+
+            <section class="normal" id="section-">
+<div id="c-disttables" class="section level1">
+<h1><span class="header-section-number">Chapter 8</span> Statistical tables</h1>
+<p>Explanation of the “Table of standard normal tail probabilities” in Section @ref(s_disttables_Z):</p>
+<ul>
+<li><p>The table shows, for values of <span class="math inline">\(Z\)</span> between 0 and 3.5, the probability that a value from the standard normal distribution is <em>larger than</em> <span class="math inline">\(Z\)</span> (i.e. the “right-hand” tail probabilities).</p>
+<ul>
+<li>For example, the probability of values larger than 0.50 is 0.3085.</li>
+</ul></li>
+<li><p>For negative values of <span class="math inline">\(Z\)</span>, the probability of values <em>smaller than</em> <span class="math inline">\(Z\)</span> (the “left-hand” tail probability) is equal to the right-hand tail probability for the corresponding positive value of <span class="math inline">\(Z\)</span>.</p>
+<ul>
+<li>For example, the probability of values smaller than <span class="math inline">\(-0.50\)</span> is also 0.3085.</li>
+</ul></li>
+</ul>
+
+<div id="s-disttables-Z" class="section level2">
+<h2><span class="header-section-number">8.1</span> Table of standard normal tail probabilities</h2>
+<p></p>
+<table>
+<thead>
+<tr class="header">
+<th align="left"><span class="math inline">\(z\)</span></th>
+<th align="left">Prob. </th>
+<th align="left"><span class="math inline">\(z\)</span></th>
+<th align="left">Prob. </th>
+<th align="left"><span class="math inline">\(z\)</span></th>
+<th align="left">Prob. </th>
+<th align="left"><span class="math inline">\(z\)</span></th>
+<th align="left">Prob. </th>
+<th align="left"><span class="math inline">\(z\)</span></th>
+<th align="left">Prob. </th>
+<th align="left"><span class="math inline">\(z\)</span></th>
+<th align="left">Prob. </th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="left">0.00</td>
+<td align="left">0.5000</td>
+<td align="left">0.50</td>
+<td align="left">0.3085</td>
+<td align="left">1.00</td>
+<td align="left">0.1587</td>
+<td align="left">1.50</td>
+<td align="left">0.0668</td>
+<td align="left">2.00</td>
+<td align="left">0.0228</td>
+<td align="left">2.50</td>
+<td align="left">0.0062</td>
+</tr>
+<tr class="even">
+<td align="left">0.01</td>
+<td align="left">0.4960</td>
+<td align="left">0.51</td>
+<td align="left">0.3050</td>
+<td align="left">1.01</td>
+<td align="left">0.1562</td>
+<td align="left">1.51</td>
+<td align="left">0.0655</td>
+<td align="left">2.01</td>
+<td align="left">0.0222</td>
+<td align="left">2.52</td>
+<td align="left">0.0059</td>
+</tr>
+<tr class="odd">
+<td align="left">0.02</td>
+<td align="left">0.4920</td>
+<td align="left">0.52</td>
+<td align="left">0.3015</td>
+<td align="left">1.02</td>
+<td align="left">0.1539</td>
+<td align="left">1.52</td>
+<td align="left">0.0643</td>
+<td align="left">2.02</td>
+<td align="left">0.0217</td>
+<td align="left">2.54</td>
+<td align="left">0.0055</td>
+</tr>
+<tr class="even">
+<td align="left">0.03</td>
+<td align="left">0.4880</td>
+<td align="left">0.53</td>
+<td align="left">0.2981</td>
+<td align="left">1.03</td>
+<td align="left">0.1515</td>
+<td align="left">1.53</td>
+<td align="left">0.0630</td>
+<td align="left">2.03</td>
+<td align="left">0.0212</td>
+<td align="left">2.56</td>
+<td align="left">0.0052</td>
+</tr>
+<tr class="odd">
+<td align="left">0.04</td>
+<td align="left">0.4840</td>
+<td align="left">0.54</td>
+<td align="left">0.2946</td>
+<td align="left">1.04</td>
+<td align="left">0.1492</td>
+<td align="left">1.54</td>
+<td align="left">0.0618</td>
+<td align="left">2.04</td>
+<td align="left">0.0207</td>
+<td align="left">2.58</td>
+<td align="left">0.0049</td>
+</tr>
+<tr class="even">
+<td align="left">0.05</td>
+<td align="left">0.4801</td>
+<td align="left">0.55</td>
+<td align="left">0.2912</td>
+<td align="left">1.05</td>
+<td align="left">0.1469</td>
+<td align="left">1.55</td>
+<td align="left">0.0606</td>
+<td align="left">2.05</td>
+<td align="left">0.0202</td>
+<td align="left">2.60</td>
+<td align="left">0.0047</td>
+</tr>
+<tr class="odd">
+<td align="left">0.06</td>
+<td align="left">0.4761</td>
+<td align="left">0.56</td>
+<td align="left">0.2877</td>
+<td align="left">1.06</td>
+<td align="left">0.1446</td>
+<td align="left">1.56</td>
+<td align="left">0.0594</td>
+<td align="left">2.06</td>
+<td align="left">0.0197</td>
+<td align="left">2.62</td>
+<td align="left">0.0044</td>
+</tr>
+<tr class="even">
+<td align="left">0.07</td>
+<td align="left">0.4721</td>
+<td align="left">0.57</td>
+<td align="left">0.2843</td>
+<td align="left">1.07</td>
+<td align="left">0.1423</td>
+<td align="left">1.57</td>
+<td align="left">0.0582</td>
+<td align="left">2.07</td>
+<td align="left">0.0192</td>
+<td align="left">2.64</td>
+<td align="left">0.0041</td>
+</tr>
+<tr class="odd">
+<td align="left">0.08</td>
+<td align="left">0.4681</td>
+<td align="left">0.58</td>
+<td align="left">0.2810</td>
+<td align="left">1.08</td>
+<td align="left">0.1401</td>
+<td align="left">1.58</td>
+<td align="left">0.0571</td>
+<td align="left">2.08</td>
+<td align="left">0.0188</td>
+<td align="left">2.66</td>
+<td align="left">0.0039</td>
+</tr>
+<tr class="even">
+<td align="left">0.09</td>
+<td align="left">0.4641</td>
+<td align="left">0.59</td>
+<td align="left">0.2776</td>
+<td align="left">1.09</td>
+<td align="left">0.1379</td>
+<td align="left">1.59</td>
+<td align="left">0.0559</td>
+<td align="left">2.09</td>
+<td align="left">0.0183</td>
+<td align="left">2.68</td>
+<td align="left">0.0037</td>
+</tr>
+<tr class="odd">
+<td align="left">0.10</td>
+<td align="left">0.4602</td>
+<td align="left">0.60</td>
+<td align="left">0.2743</td>
+<td align="left">1.10</td>
+<td align="left">0.1357</td>
+<td align="left">1.60</td>
+<td align="left">0.0548</td>
+<td align="left">2.10</td>
+<td align="left">0.0179</td>
+<td align="left">2.70</td>
+<td align="left">0.0035</td>
+</tr>
+<tr class="even">
+<td align="left">0.11</td>
+<td align="left">0.4562</td>
+<td align="left">0.61</td>
+<td align="left">0.2709</td>
+<td align="left">1.11</td>
+<td align="left">0.1335</td>
+<td align="left">1.61</td>
+<td align="left">0.0537</td>
+<td align="left">2.11</td>
+<td align="left">0.0174</td>
+<td align="left">2.72</td>
+<td align="left">0.0033</td>
+</tr>
+<tr class="odd">
+<td align="left">0.12</td>
+<td align="left">0.4522</td>
+<td align="left">0.62</td>
+<td align="left">0.2676</td>
+<td align="left">1.12</td>
+<td align="left">0.1314</td>
+<td align="left">1.62</td>
+<td align="left">0.0526</td>
+<td align="left">2.12</td>
+<td align="left">0.0170</td>
+<td align="left">2.74</td>
+<td align="left">0.0031</td>
+</tr>
+<tr class="even">
+<td align="left">0.13</td>
+<td align="left">0.4483</td>
+<td align="left">0.63</td>
+<td align="left">0.2643</td>
+<td align="left">1.13</td>
+<td align="left">0.1292</td>
+<td align="left">1.63</td>
+<td align="left">0.0516</td>
+<td align="left">2.13</td>
+<td align="left">0.0166</td>
+<td align="left">2.76</td>
+<td align="left">0.0029</td>
+</tr>
+<tr class="odd">
+<td align="left">0.14</td>
+<td align="left">0.4443</td>
+<td align="left">0.64</td>
+<td align="left">0.2611</td>
+<td align="left">1.14</td>
+<td align="left">0.1271</td>
+<td align="left">1.64</td>
+<td align="left">0.0505</td>
+<td align="left">2.14</td>
+<td align="left">0.0162</td>
+<td align="left">2.78</td>
+<td align="left">0.0027</td>
+</tr>
+<tr class="even">
+<td align="left">0.15</td>
+<td align="left">0.4404</td>
+<td align="left">0.65</td>
+<td align="left">0.2578</td>
+<td align="left">1.15</td>
+<td align="left">0.1251</td>
+<td align="left">1.65</td>
+<td align="left">0.0495</td>
+<td align="left">2.15</td>
+<td align="left">0.0158</td>
+<td align="left">2.80</td>
+<td align="left">0.0026</td>
+</tr>
+<tr class="odd">
+<td align="left">0.16</td>
+<td align="left">0.4364</td>
+<td align="left">0.66</td>
+<td align="left">0.2546</td>
+<td align="left">1.16</td>
+<td align="left">0.1230</td>
+<td align="left">1.66</td>
+<td align="left">0.0485</td>
+<td align="left">2.16</td>
+<td align="left">0.0154</td>
+<td align="left">2.82</td>
+<td align="left">0.0024</td>
+</tr>
+<tr class="even">
+<td align="left">0.17</td>
+<td align="left">0.4325</td>
+<td align="left">0.67</td>
+<td align="left">0.2514</td>
+<td align="left">1.17</td>
+<td align="left">0.1210</td>
+<td align="left">1.67</td>
+<td align="left">0.0475</td>
+<td align="left">2.17</td>
+<td align="left">0.0150</td>
+<td align="left">2.84</td>
+<td align="left">0.0023</td>
+</tr>
+<tr class="odd">
+<td align="left">0.18</td>
+<td align="left">0.4286</td>
+<td align="left">0.68</td>
+<td align="left">0.2483</td>
+<td align="left">1.18</td>
+<td align="left">0.1190</td>
+<td align="left">1.68</td>
+<td align="left">0.0465</td>
+<td align="left">2.18</td>
+<td align="left">0.0146</td>
+<td align="left">2.86</td>
+<td align="left">0.0021</td>
+</tr>
+<tr class="even">
+<td align="left">0.19</td>
+<td align="left">0.4247</td>
+<td align="left">0.69</td>
+<td align="left">0.2451</td>
+<td align="left">1.19</td>
+<td align="left">0.1170</td>
+<td align="left">1.69</td>
+<td align="left">0.0455</td>
+<td align="left">2.19</td>
+<td align="left">0.0143</td>
+<td align="left">2.88</td>
+<td align="left">0.0020</td>
+</tr>
+<tr class="odd">
+<td align="left">0.20</td>
+<td align="left">0.4207</td>
+<td align="left">0.70</td>
+<td align="left">0.2420</td>
+<td align="left">1.20</td>
+<td align="left">0.1151</td>
+<td align="left">1.70</td>
+<td align="left">0.0446</td>
+<td align="left">2.20</td>
+<td align="left">0.0139</td>
+<td align="left">2.90</td>
+<td align="left">0.0019</td>
+</tr>
+<tr class="even">
+<td align="left">0.21</td>
+<td align="left">0.4168</td>
+<td align="left">0.71</td>
+<td align="left">0.2389</td>
+<td align="left">1.21</td>
+<td align="left">0.1131</td>
+<td align="left">1.71</td>
+<td align="left">0.0436</td>
+<td align="left">2.21</td>
+<td align="left">0.0136</td>
+<td align="left">2.92</td>
+<td align="left">0.0018</td>
+</tr>
+<tr class="odd">
+<td align="left">0.22</td>
+<td align="left">0.4129</td>
+<td align="left">0.72</td>
+<td align="left">0.2358</td>
+<td align="left">1.22</td>
+<td align="left">0.1112</td>
+<td align="left">1.72</td>
+<td align="left">0.0427</td>
+<td align="left">2.22</td>
+<td align="left">0.0132</td>
+<td align="left">2.94</td>
+<td align="left">0.0016</td>
+</tr>
+<tr class="even">
+<td align="left">0.23</td>
+<td align="left">0.4090</td>
+<td align="left">0.73</td>
+<td align="left">0.2327</td>
+<td align="left">1.23</td>
+<td align="left">0.1093</td>
+<td align="left">1.73</td>
+<td align="left">0.0418</td>
+<td align="left">2.23</td>
+<td align="left">0.0129</td>
+<td align="left">2.96</td>
+<td align="left">0.0015</td>
+</tr>
+<tr class="odd">
+<td align="left">0.24</td>
+<td align="left">0.4052</td>
+<td align="left">0.74</td>
+<td align="left">0.2296</td>
+<td align="left">1.24</td>
+<td align="left">0.1075</td>
+<td align="left">1.74</td>
+<td align="left">0.0409</td>
+<td align="left">2.24</td>
+<td align="left">0.0125</td>
+<td align="left">2.98</td>
+<td align="left">0.0014</td>
+</tr>
+<tr class="even">
+<td align="left">0.25</td>
+<td align="left">0.4013</td>
+<td align="left">0.75</td>
+<td align="left">0.2266</td>
+<td align="left">1.25</td>
+<td align="left">0.1056</td>
+<td align="left">1.75</td>
+<td align="left">0.0401</td>
+<td align="left">2.25</td>
+<td align="left">0.0122</td>
+<td align="left">3.00</td>
+<td align="left">0.0013</td>
+</tr>
+<tr class="odd">
+<td align="left">0.26</td>
+<td align="left">0.3974</td>
+<td align="left">0.76</td>
+<td align="left">0.2236</td>
+<td align="left">1.26</td>
+<td align="left">0.1038</td>
+<td align="left">1.76</td>
+<td align="left">0.0392</td>
+<td align="left">2.26</td>
+<td align="left">0.0119</td>
+<td align="left">3.02</td>
+<td align="left">0.0013</td>
+</tr>
+<tr class="even">
+<td align="left">0.27</td>
+<td align="left">0.3936</td>
+<td align="left">0.77</td>
+<td align="left">0.2206</td>
+<td align="left">1.27</td>
+<td align="left">0.1020</td>
+<td align="left">1.77</td>
+<td align="left">0.0384</td>
+<td align="left">2.27</td>
+<td align="left">0.0116</td>
+<td align="left">3.04</td>
+<td align="left">0.0012</td>
+</tr>
+<tr class="odd">
+<td align="left">0.28</td>
+<td align="left">0.3897</td>
+<td align="left">0.78</td>
+<td align="left">0.2177</td>
+<td align="left">1.28</td>
+<td align="left">0.1003</td>
+<td align="left">1.78</td>
+<td align="left">0.0375</td>
+<td align="left">2.28</td>
+<td align="left">0.0113</td>
+<td align="left">3.06</td>
+<td align="left">0.0011</td>
+</tr>
+<tr class="even">
+<td align="left">0.29</td>
+<td align="left">0.3859</td>
+<td align="left">0.79</td>
+<td align="left">0.2148</td>
+<td align="left">1.29</td>
+<td align="left">0.0985</td>
+<td align="left">1.79</td>
+<td align="left">0.0367</td>
+<td align="left">2.29</td>
+<td align="left">0.0110</td>
+<td align="left">3.08</td>
+<td align="left">0.0010</td>
+</tr>
+<tr class="odd">
+<td align="left">0.30</td>
+<td align="left">0.3821</td>
+<td align="left">0.80</td>
+<td align="left">0.2119</td>
+<td align="left">1.30</td>
+<td align="left">0.0968</td>
+<td align="left">1.80</td>
+<td align="left">0.0359</td>
+<td align="left">2.30</td>
+<td align="left">0.0107</td>
+<td align="left">3.10</td>
+<td align="left">0.0010</td>
+</tr>
+<tr class="even">
+<td align="left">0.31</td>
+<td align="left">0.3783</td>
+<td align="left">0.81</td>
+<td align="left">0.2090</td>
+<td align="left">1.31</td>
+<td align="left">0.0951</td>
+<td align="left">1.81</td>
+<td align="left">0.0351</td>
+<td align="left">2.31</td>
+<td align="left">0.0104</td>
+<td align="left">3.12</td>
+<td align="left">0.0009</td>
+</tr>
+<tr class="odd">
+<td align="left">0.32</td>
+<td align="left">0.3745</td>
+<td align="left">0.82</td>
+<td align="left">0.2061</td>
+<td align="left">1.32</td>
+<td align="left">0.0934</td>
+<td align="left">1.82</td>
+<td align="left">0.0344</td>
+<td align="left">2.32</td>
+<td align="left">0.0102</td>
+<td align="left">3.14</td>
+<td align="left">0.0008</td>
+</tr>
+<tr class="even">
+<td align="left">0.33</td>
+<td align="left">0.3707</td>
+<td align="left">0.83</td>
+<td align="left">0.2033</td>
+<td align="left">1.33</td>
+<td align="left">0.0918</td>
+<td align="left">1.83</td>
+<td align="left">0.0336</td>
+<td align="left">2.33</td>
+<td align="left">0.0099</td>
+<td align="left">3.16</td>
+<td align="left">0.0008</td>
+</tr>
+<tr class="odd">
+<td align="left">0.34</td>
+<td align="left">0.3669</td>
+<td align="left">0.84</td>
+<td align="left">0.2005</td>
+<td align="left">1.34</td>
+<td align="left">0.0901</td>
+<td align="left">1.84</td>
+<td align="left">0.0329</td>
+<td align="left">2.34</td>
+<td align="left">0.0096</td>
+<td align="left">3.18</td>
+<td align="left">0.0007</td>
+</tr>
+<tr class="even">
+<td align="left">0.35</td>
+<td align="left">0.3632</td>
+<td align="left">0.85</td>
+<td align="left">0.1977</td>
+<td align="left">1.35</td>
+<td align="left">0.0885</td>
+<td align="left">1.85</td>
+<td align="left">0.0322</td>
+<td align="left">2.35</td>
+<td align="left">0.0094</td>
+<td align="left">3.20</td>
+<td align="left">0.0007</td>
+</tr>
+<tr class="odd">
+<td align="left">0.36</td>
+<td align="left">0.3594</td>
+<td align="left">0.86</td>
+<td align="left">0.1949</td>
+<td align="left">1.36</td>
+<td align="left">0.0869</td>
+<td align="left">1.86</td>
+<td align="left">0.0314</td>
+<td align="left">2.36</td>
+<td align="left">0.0091</td>
+<td align="left">3.22</td>
+<td align="left">0.0006</td>
+</tr>
+<tr class="even">
+<td align="left">0.37</td>
+<td align="left">0.3557</td>
+<td align="left">0.87</td>
+<td align="left">0.1922</td>
+<td align="left">1.37</td>
+<td align="left">0.0853</td>
+<td align="left">1.87</td>
+<td align="left">0.0307</td>
+<td align="left">2.37</td>
+<td align="left">0.0089</td>
+<td align="left">3.24</td>
+<td align="left">0.0006</td>
+</tr>
+<tr class="odd">
+<td align="left">0.38</td>
+<td align="left">0.3520</td>
+<td align="left">0.88</td>
+<td align="left">0.1894</td>
+<td align="left">1.38</td>
+<td align="left">0.0838</td>
+<td align="left">1.88</td>
+<td align="left">0.0301</td>
+<td align="left">2.38</td>
+<td align="left">0.0087</td>
+<td align="left">3.26</td>
+<td align="left">0.0006</td>
+</tr>
+<tr class="even">
+<td align="left">0.39</td>
+<td align="left">0.3483</td>
+<td align="left">0.89</td>
+<td align="left">0.1867</td>
+<td align="left">1.39</td>
+<td align="left">0.0823</td>
+<td align="left">1.89</td>
+<td align="left">0.0294</td>
+<td align="left">2.39</td>
+<td align="left">0.0084</td>
+<td align="left">3.28</td>
+<td align="left">0.0005</td>
+</tr>
+<tr class="odd">
+<td align="left">0.40</td>
+<td align="left">0.3446</td>
+<td align="left">0.90</td>
+<td align="left">0.1841</td>
+<td align="left">1.40</td>
+<td align="left">0.0808</td>
+<td align="left">1.90</td>
+<td align="left">0.0287</td>
+<td align="left">2.40</td>
+<td align="left">0.0082</td>
+<td align="left">3.30</td>
+<td align="left">0.0005</td>
+</tr>
+<tr class="even">
+<td align="left">0.41</td>
+<td align="left">0.3409</td>
+<td align="left">0.91</td>
+<td align="left">0.1814</td>
+<td align="left">1.41</td>
+<td align="left">0.0793</td>
+<td align="left">1.91</td>
+<td align="left">0.0281</td>
+<td align="left">2.41</td>
+<td align="left">0.0080</td>
+<td align="left">3.32</td>
+<td align="left">0.0005</td>
+</tr>
+<tr class="odd">
+<td align="left">0.42</td>
+<td align="left">0.3372</td>
+<td align="left">0.92</td>
+<td align="left">0.1788</td>
+<td align="left">1.42</td>
+<td align="left">0.0778</td>
+<td align="left">1.92</td>
+<td align="left">0.0274</td>
+<td align="left">2.42</td>
+<td align="left">0.0078</td>
+<td align="left">3.34</td>
+<td align="left">0.0004</td>
+</tr>
+<tr class="even">
+<td align="left">0.43</td>
+<td align="left">0.3336</td>
+<td align="left">0.93</td>
+<td align="left">0.1762</td>
+<td align="left">1.43</td>
+<td align="left">0.0764</td>
+<td align="left">1.93</td>
+<td align="left">0.0268</td>
+<td align="left">2.43</td>
+<td align="left">0.0075</td>
+<td align="left">3.36</td>
+<td align="left">0.0004</td>
+</tr>
+<tr class="odd">
+<td align="left">0.44</td>
+<td align="left">0.3300</td>
+<td align="left">0.94</td>
+<td align="left">0.1736</td>
+<td align="left">1.44</td>
+<td align="left">0.0749</td>
+<td align="left">1.94</td>
+<td align="left">0.0262</td>
+<td align="left">2.44</td>
+<td align="left">0.0073</td>
+<td align="left">3.38</td>
+<td align="left">0.0004</td>
+</tr>
+<tr class="even">
+<td align="left">0.45</td>
+<td align="left">0.3264</td>
+<td align="left">0.95</td>
+<td align="left">0.1711</td>
+<td align="left">1.45</td>
+<td align="left">0.0735</td>
+<td align="left">1.95</td>
+<td align="left">0.0256</td>
+<td align="left">2.45</td>
+<td align="left">0.0071</td>
+<td align="left">3.40</td>
+<td align="left">0.0003</td>
+</tr>
+<tr class="odd">
+<td align="left">0.46</td>
+<td align="left">0.3228</td>
+<td align="left">0.96</td>
+<td align="left">0.1685</td>
+<td align="left">1.46</td>
+<td align="left">0.0721</td>
+<td align="left">1.96</td>
+<td align="left">0.0250</td>
+<td align="left">2.46</td>
+<td align="left">0.0069</td>
+<td align="left">3.42</td>
+<td align="left">0.0003</td>
+</tr>
+<tr class="even">
+<td align="left">0.47</td>
+<td align="left">0.3192</td>
+<td align="left">0.97</td>
+<td align="left">0.1660</td>
+<td align="left">1.47</td>
+<td align="left">0.0708</td>
+<td align="left">1.97</td>
+<td align="left">0.0244</td>
+<td align="left">2.47</td>
+<td align="left">0.0068</td>
+<td align="left">3.44</td>
+<td align="left">0.0003</td>
+</tr>
+<tr class="odd">
+<td align="left">0.48</td>
+<td align="left">0.3156</td>
+<td align="left">0.98</td>
+<td align="left">0.1635</td>
+<td align="left">1.48</td>
+<td align="left">0.0694</td>
+<td align="left">1.98</td>
+<td align="left">0.0239</td>
+<td align="left">2.48</td>
+<td align="left">0.0066</td>
+<td align="left">3.46</td>
+<td align="left">0.0003</td>
+</tr>
+<tr class="even">
+<td align="left">0.49</td>
+<td align="left">0.3121</td>
+<td align="left">0.99</td>
+<td align="left">0.1611</td>
+<td align="left">1.49</td>
+<td align="left">0.0681</td>
+<td align="left">1.99</td>
+<td align="left">0.0233</td>
+<td align="left">2.49</td>
+<td align="left">0.0064</td>
+<td align="left">3.48</td>
+<td align="left">0.0003</td>
+</tr>
+</tbody>
+</table>
+
+</div>
+<div id="s-disttables-t" class="section level2">
+<h2><span class="header-section-number">8.2</span> Table of critical values for <span class="math inline">\(t\)</span>-distributions</h2>
+<table>
+<tbody>
+<tr class="odd">
+<td align="left">df</td>
+<td align="right">0.100</td>
+<td align="right">0.050</td>
+<td align="right">0.025</td>
+<td align="right">0.010</td>
+<td align="right">0.005</td>
+<td align="right">0.001</td>
+<td align="right">0.0005</td>
+</tr>
+<tr class="even">
+<td align="left">1</td>
+<td align="right">3.078</td>
+<td align="right">6.314</td>
+<td align="right">12.706</td>
+<td align="right">31.821</td>
+<td align="right">63.657</td>
+<td align="right">318.309</td>
+<td align="right">636.619</td>
+</tr>
+<tr class="odd">
+<td align="left">2</td>
+<td align="right">1.886</td>
+<td align="right">2.920</td>
+<td align="right">4.303</td>
+<td align="right">6.965</td>
+<td align="right">9.925</td>
+<td align="right">22.327</td>
+<td align="right">31.599</td>
+</tr>
+<tr class="even">
+<td align="left">3</td>
+<td align="right">1.638</td>
+<td align="right">2.353</td>
+<td align="right">3.182</td>
+<td align="right">4.541</td>
+<td align="right">5.841</td>
+<td align="right">10.215</td>
+<td align="right">12.924</td>
+</tr>
+<tr class="odd">
+<td align="left">4</td>
+<td align="right">1.533</td>
+<td align="right">2.132</td>
+<td align="right">2.776</td>
+<td align="right">3.747</td>
+<td align="right">4.604</td>
+<td align="right">7.173</td>
+<td align="right">8.610</td>
+</tr>
+<tr class="even">
+<td align="left">5</td>
+<td align="right">1.476</td>
+<td align="right">2.015</td>
+<td align="right">2.571</td>
+<td align="right">3.365</td>
+<td align="right">4.032</td>
+<td align="right">5.893</td>
+<td align="right">6.869</td>
+</tr>
+<tr class="odd">
+<td align="left">6</td>
+<td align="right">1.440</td>
+<td align="right">1.943</td>
+<td align="right">2.447</td>
+<td align="right">3.143</td>
+<td align="right">3.707</td>
+<td align="right">5.208</td>
+<td align="right">5.959</td>
+</tr>
+<tr class="even">
+<td align="left">7</td>
+<td align="right">1.415</td>
+<td align="right">1.895</td>
+<td align="right">2.365</td>
+<td align="right">2.998</td>
+<td align="right">3.499</td>
+<td align="right">4.785</td>
+<td align="right">5.408</td>
+</tr>
+<tr class="odd">
+<td align="left">8</td>
+<td align="right">1.397</td>
+<td align="right">1.860</td>
+<td align="right">2.306</td>
+<td align="right">2.896</td>
+<td align="right">3.355</td>
+<td align="right">4.501</td>
+<td align="right">5.041</td>
+</tr>
+<tr class="even">
+<td align="left">9</td>
+<td align="right">1.383</td>
+<td align="right">1.833</td>
+<td align="right">2.262</td>
+<td align="right">2.821</td>
+<td align="right">3.250</td>
+<td align="right">4.297</td>
+<td align="right">4.781</td>
+</tr>
+<tr class="odd">
+<td align="left">10</td>
+<td align="right">1.372</td>
+<td align="right">1.812</td>
+<td align="right">2.228</td>
+<td align="right">2.764</td>
+<td align="right">3.169</td>
+<td align="right">4.144</td>
+<td align="right">4.587</td>
+</tr>
+<tr class="even">
+<td align="left">11</td>
+<td align="right">1.363</td>
+<td align="right">1.796</td>
+<td align="right">2.201</td>
+<td align="right">2.718</td>
+<td align="right">3.106</td>
+<td align="right">4.025</td>
+<td align="right">4.437</td>
+</tr>
+<tr class="odd">
+<td align="left">12</td>
+<td align="right">1.356</td>
+<td align="right">1.782</td>
+<td align="right">2.179</td>
+<td align="right">2.681</td>
+<td align="right">3.055</td>
+<td align="right">3.930</td>
+<td align="right">4.318</td>
+</tr>
+<tr class="even">
+<td align="left">13</td>
+<td align="right">1.350</td>
+<td align="right">1.771</td>
+<td align="right">2.160</td>
+<td align="right">2.650</td>
+<td align="right">3.012</td>
+<td align="right">3.852</td>
+<td align="right">4.221</td>
+</tr>
+<tr class="odd">
+<td align="left">14</td>
+<td align="right">1.345</td>
+<td align="right">1.761</td>
+<td align="right">2.145</td>
+<td align="right">2.624</td>
+<td align="right">2.977</td>
+<td align="right">3.787</td>
+<td align="right">4.140</td>
+</tr>
+<tr class="even">
+<td align="left">15</td>
+<td align="right">1.341</td>
+<td align="right">1.753</td>
+<td align="right">2.131</td>
+<td align="right">2.602</td>
+<td align="right">2.947</td>
+<td align="right">3.733</td>
+<td align="right">4.073</td>
+</tr>
+<tr class="odd">
+<td align="left">16</td>
+<td align="right">1.337</td>
+<td align="right">1.746</td>
+<td align="right">2.120</td>
+<td align="right">2.583</td>
+<td align="right">2.921</td>
+<td align="right">3.686</td>
+<td align="right">4.015</td>
+</tr>
+<tr class="even">
+<td align="left">17</td>
+<td align="right">1.333</td>
+<td align="right">1.740</td>
+<td align="right">2.110</td>
+<td align="right">2.567</td>
+<td align="right">2.898</td>
+<td align="right">3.646</td>
+<td align="right">3.965</td>
+</tr>
+<tr class="odd">
+<td align="left">18</td>
+<td align="right">1.330</td>
+<td align="right">1.734</td>
+<td align="right">2.101</td>
+<td align="right">2.552</td>
+<td align="right">2.878</td>
+<td align="right">3.610</td>
+<td align="right">3.922</td>
+</tr>
+<tr class="even">
+<td align="left">19</td>
+<td align="right">1.328</td>
+<td align="right">1.729</td>
+<td align="right">2.093</td>
+<td align="right">2.539</td>
+<td align="right">2.861</td>
+<td align="right">3.579</td>
+<td align="right">3.883</td>
+</tr>
+<tr class="odd">
+<td align="left">20</td>
+<td align="right">1.325</td>
+<td align="right">1.725</td>
+<td align="right">2.086</td>
+<td align="right">2.528</td>
+<td align="right">2.845</td>
+<td align="right">3.552</td>
+<td align="right">3.850</td>
+</tr>
+<tr class="even">
+<td align="left">21</td>
+<td align="right">1.323</td>
+<td align="right">1.721</td>
+<td align="right">2.080</td>
+<td align="right">2.518</td>
+<td align="right">2.831</td>
+<td align="right">3.527</td>
+<td align="right">3.819</td>
+</tr>
+<tr class="odd">
+<td align="left">22</td>
+<td align="right">1.321</td>
+<td align="right">1.717</td>
+<td align="right">2.074</td>
+<td align="right">2.508</td>
+<td align="right">2.819</td>
+<td align="right">3.505</td>
+<td align="right">3.792</td>
+</tr>
+<tr class="even">
+<td align="left">23</td>
+<td align="right">1.319</td>
+<td align="right">1.714</td>
+<td align="right">2.069</td>
+<td align="right">2.500</td>
+<td align="right">2.807</td>
+<td align="right">3.485</td>
+<td align="right">3.768</td>
+</tr>
+<tr class="odd">
+<td align="left">24</td>
+<td align="right">1.318</td>
+<td align="right">1.711</td>
+<td align="right">2.064</td>
+<td align="right">2.492</td>
+<td align="right">2.797</td>
+<td align="right">3.467</td>
+<td align="right">3.745</td>
+</tr>
+<tr class="even">
+<td align="left">25</td>
+<td align="right">1.316</td>
+<td align="right">1.708</td>
+<td align="right">2.060</td>
+<td align="right">2.485</td>
+<td align="right">2.787</td>
+<td align="right">3.450</td>
+<td align="right">3.725</td>
+</tr>
+<tr class="odd">
+<td align="left">26</td>
+<td align="right">1.315</td>
+<td align="right">1.706</td>
+<td align="right">2.056</td>
+<td align="right">2.479</td>
+<td align="right">2.779</td>
+<td align="right">3.435</td>
+<td align="right">3.707</td>
+</tr>
+<tr class="even">
+<td align="left">27</td>
+<td align="right">1.314</td>
+<td align="right">1.703</td>
+<td align="right">2.052</td>
+<td align="right">2.473</td>
+<td align="right">2.771</td>
+<td align="right">3.421</td>
+<td align="right">3.690</td>
+</tr>
+<tr class="odd">
+<td align="left">28</td>
+<td align="right">1.313</td>
+<td align="right">1.701</td>
+<td align="right">2.048</td>
+<td align="right">2.467</td>
+<td align="right">2.763</td>
+<td align="right">3.408</td>
+<td align="right">3.674</td>
+</tr>
+<tr class="even">
+<td align="left">29</td>
+<td align="right">1.311</td>
+<td align="right">1.699</td>
+<td align="right">2.045</td>
+<td align="right">2.462</td>
+<td align="right">2.756</td>
+<td align="right">3.396</td>
+<td align="right">3.659</td>
+</tr>
+<tr class="odd">
+<td align="left">30</td>
+<td align="right">1.310</td>
+<td align="right">1.697</td>
+<td align="right">2.042</td>
+<td align="right">2.457</td>
+<td align="right">2.750</td>
+<td align="right">3.385</td>
+<td align="right">3.646</td>
+</tr>
+<tr class="even">
+<td align="left">40</td>
+<td align="right">1.303</td>
+<td align="right">1.684</td>
+<td align="right">2.021</td>
+<td align="right">2.423</td>
+<td align="right">2.704</td>
+<td align="right">3.307</td>
+<td align="right">3.551</td>
+</tr>
+<tr class="odd">
+<td align="left">60</td>
+<td align="right">1.296</td>
+<td align="right">1.671</td>
+<td align="right">2.000</td>
+<td align="right">2.390</td>
+<td align="right">2.660</td>
+<td align="right">3.232</td>
+<td align="right">3.460</td>
+</tr>
+<tr class="even">
+<td align="left">120</td>
+<td align="right">1.289</td>
+<td align="right">1.658</td>
+<td align="right">1.980</td>
+<td align="right">2.358</td>
+<td align="right">2.617</td>
+<td align="right">3.160</td>
+<td align="right">3.373</td>
+</tr>
+<tr class="odd">
+<td align="left"><span class="math inline">\(\infty\)</span></td>
+<td align="right">1.282</td>
+<td align="right">1.645</td>
+<td align="right">1.960</td>
+<td align="right">2.326</td>
+<td align="right">2.576</td>
+<td align="right">3.090</td>
+<td align="right">3.291</td>
+</tr>
+</tbody>
+</table>
+<p><em>Explanation</em>: For example, the value 3.078 in the top left corner indicates that for a <span class="math inline">\(t\)</span>-distribution with 1 degree of freedom the probability of values greater than 3.078 is 0.100. The last row shows critical values for the standard normal distribution.</p>
+
+</div>
+<div id="s-disttables-chi2" class="section level2">
+<h2><span class="header-section-number">8.3</span> Table of critical values for <span class="math inline">\(\chi^{2}\)</span> distributions</h2>
+<hr />
+<p>df 0.100 0.050 0.010 0.001 1 2.71 3.84 6.63 10.828 2 4.61 5.99 9.21 13.816 3 6.25 7.81 11.34 16.266 4 7.78 9.49 13.28 18.467 5 9.24 11.07 15.09 20.515 6 10.64 12.59 16.81 22.458 7 12.02 14.07 18.48 24.322 8 13.36 15.51 20.09 26.124 9 14.68 16.92 21.67 27.877 10 15.99 18.31 23.21 29.588 11 17.28 19.68 24.72 31.264 12 18.55 21.03 26.22 32.909 13 19.81 22.36 27.69 34.528 14 21.06 23.68 29.14 36.123 15 22.31 25.00 30.58 37.697 16 23.54 26.30 32.00 39.252 17 24.77 27.59 33.41 40.790 18 25.99 28.87 34.81 42.312 19 27.20 30.14 36.19 43.820 20 28.41 31.41 37.57 45.315 25 34.38 37.65 44.31 52.620 30 40.26 43.77 50.89 59.703 40 51.81 55.76 63.69 73.402 50 63.17 67.50 76.15 86.661 60 74.40 79.08 88.38 99.607 70 85.53 90.53 100.43 112.317 80 96.58 101.88 112.33 124.839 90 107.57 113.15 124.12 137.208 100 118.50 124.34 135.81 149.449</p>
+<p><em>Explanation</em>: For example, the value 2.71 in the top left corner indicates that for a <span class="math inline">\(\chi^{2}\)</span> distribution with 1 degree of freedom the probability of values greater than 2.71 is 0.100.</p>
+
+</div>
+</div>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+            </section>
+
+          </div>
+        </div>
+      </div>
+<a href="c-class0.html" class="navigation navigation-prev navigation-unique" aria-label="Previous page"><i class="fa fa-angle-left"></i></a>
+
+
+<script src="libs/gitbook-2.6.7/js/app.min.js"></script>
+<script src="libs/gitbook-2.6.7/js/lunr.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-search.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-sharing.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-fontsettings.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-bookdown.js"></script>
+<script src="libs/gitbook-2.6.7/js/jquery.highlight.js"></script>
+<script>
+require(["gitbook"], function(gitbook) {
+gitbook.start({
+"sharing": {
+"facebook": true,
+"twitter": true,
+"google": false,
+"weibo": false,
+"instapper": false,
+"vk": false,
+"all": ["facebook", "google", "twitter", "weibo", "instapaper"]
+},
+"fontsettings": {
+"theme": "white",
+"family": "sans",
+"size": 2
+},
+"edit": {
+"link": "https://github.com/rstudio/bookdown-demo/edit/master/11-MY451_A.rmd",
+"text": null
+},
+"download": ["Coursepack-MY451.pdf", "Coursepack-MY451.epub"],
+"toc": {
+"collapse": "subsection"
+}
+});
+});
+</script>
+
+<!-- dynamically load mathjax for compatibility with self-contained -->
+<script>
+  (function () {
+    var script = document.createElement("script");
+    script.type = "text/javascript";
+    script.src  = "https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
+    if (location.protocol !== "file:" && /^https?:/.test(script.src))
+      script.src  = script.src.replace(/^https?:/, '');
+    document.getElementsByTagName("head")[0].appendChild(script);
+  })();
+</script>
+</body>
+
+</html>
diff --git a/c-intro.html b/c-intro.html
new file mode 100644
index 0000000..46e6819
--- /dev/null
+++ b/c-intro.html
@@ -0,0 +1,1373 @@
+<!DOCTYPE html>
+<html lang="" xml:lang="">
+<head>
+
+  <meta charset="utf-8" />
+  <meta http-equiv="X-UA-Compatible" content="IE=edge" />
+  <title>Chapter 1 Introduction | MY464 Introduction to Quantitative Analysis for Media and Communications</title>
+  <meta name="description" content="This course is intended for those with little or no past training in quantitative methods. The course is an intensive introduction to some of the principles and methods of statistical analysis in social research. Topics covered in MY464 include descriptive statistics, basic ideas of inference and estimation, contingency tables and an introduction to linear regression models." />
+  <meta name="generator" content="bookdown 0.39 and GitBook 2.6.7" />
+
+  <meta property="og:title" content="Chapter 1 Introduction | MY464 Introduction to Quantitative Analysis for Media and Communications" />
+  <meta property="og:type" content="book" />
+  
+  <meta property="og:description" content="This course is intended for those with little or no past training in quantitative methods. The course is an intensive introduction to some of the principles and methods of statistical analysis in social research. Topics covered in MY464 include descriptive statistics, basic ideas of inference and estimation, contingency tables and an introduction to linear regression models." />
+  <meta name="github-repo" content="LSE-Methodology/MY464" />
+
+  <meta name="twitter:card" content="summary" />
+  <meta name="twitter:title" content="Chapter 1 Introduction | MY464 Introduction to Quantitative Analysis for Media and Communications" />
+  
+  <meta name="twitter:description" content="This course is intended for those with little or no past training in quantitative methods. The course is an intensive introduction to some of the principles and methods of statistical analysis in social research. Topics covered in MY464 include descriptive statistics, basic ideas of inference and estimation, contingency tables and an introduction to linear regression models." />
+  
+
+<meta name="author" content="Department of Methodology, London School of Economics and Political Science" />
+
+
+
+  <meta name="viewport" content="width=device-width, initial-scale=1" />
+  <meta name="apple-mobile-web-app-capable" content="yes" />
+  <meta name="apple-mobile-web-app-status-bar-style" content="black" />
+  
+  
+<link rel="prev" href="index.html"/>
+<link rel="next" href="c-descr1.html"/>
+<script src="libs/jquery-3.6.0/jquery-3.6.0.min.js"></script>
+<script src="https://cdn.jsdelivr.net/npm/fuse.js@6.4.6/dist/fuse.min.js"></script>
+<link href="libs/gitbook-2.6.7/css/style.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-table.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-bookdown.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-highlight.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-search.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-fontsettings.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-clipboard.css" rel="stylesheet" />
+
+
+
+
+
+
+
+
+<link href="libs/anchor-sections-1.1.0/anchor-sections.css" rel="stylesheet" />
+<link href="libs/anchor-sections-1.1.0/anchor-sections-hash.css" rel="stylesheet" />
+<script src="libs/anchor-sections-1.1.0/anchor-sections.js"></script>
+
+
+
+<style type="text/css">
+  
+  div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
+</style>
+
+<link rel="stylesheet" href="style.css" type="text/css" />
+</head>
+
+<body>
+
+
+
+  <div class="book without-animation with-summary font-size-2 font-family-1" data-basepath=".">
+
+    <div class="book-summary">
+      <nav role="navigation">
+
+<ul class="summary">
+<li><a href="./">MY464 Introduction to Quantitative Analysis for Media and Communications</a></li>
+
+<li class="divider"></li>
+<li class="chapter" data-level="" data-path="index.html"><a href="index.html"><i class="fa fa-check"></i>Course information<a href="index.html#course-information" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1" data-path="c-intro.html"><a href="c-intro.html"><i class="fa fa-check"></i><b>1</b> Introduction<a href="c-intro.html#c-intro" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="1.1" data-path="c-intro.html"><a href="c-intro.html#s-intro-purpose"><i class="fa fa-check"></i><b>1.1</b> What is the purpose of this course?<a href="c-intro.html#s-intro-purpose" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.2" data-path="c-intro.html"><a href="c-intro.html#s-intro-definitions"><i class="fa fa-check"></i><b>1.2</b> Some basic definitions<a href="c-intro.html#s-intro-definitions" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="1.2.1" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-subj"><i class="fa fa-check"></i><b>1.2.1</b> Subjects and variables<a href="c-intro.html#ss-intro-def-subj" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.2.2" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-vartypes"><i class="fa fa-check"></i><b>1.2.2</b> Types of variables<a href="c-intro.html#ss-intro-def-vartypes" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.2.3" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-descr"><i class="fa fa-check"></i><b>1.2.3</b> Description and inference<a href="c-intro.html#ss-intro-def-descr" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.2.4" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-assoc"><i class="fa fa-check"></i><b>1.2.4</b> Association and causation<a href="c-intro.html#ss-intro-def-assoc" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="1.3" data-path="c-intro.html"><a href="c-intro.html#s-intro-outline"><i class="fa fa-check"></i><b>1.3</b> Outline of the course<a href="c-intro.html#s-intro-outline" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.4" data-path="c-intro.html"><a href="c-intro.html#s-intro-maths"><i class="fa fa-check"></i><b>1.4</b> The use of mathematics and computing<a href="c-intro.html#s-intro-maths" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="1.4.1" data-path="c-intro.html"><a href="c-intro.html#symbolic-mathematics-and-mathematical-notation"><i class="fa fa-check"></i><b>1.4.1</b> Symbolic mathematics and mathematical notation<a href="c-intro.html#symbolic-mathematics-and-mathematical-notation" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.4.2" data-path="c-intro.html"><a href="c-intro.html#computing-1"><i class="fa fa-check"></i><b>1.4.2</b> Computing<a href="c-intro.html#computing-1" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="2" data-path="c-descr1.html"><a href="c-descr1.html"><i class="fa fa-check"></i><b>2</b> Descriptive statistics<a href="c-descr1.html#c-descr1" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.1" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-intro"><i class="fa fa-check"></i><b>2.1</b> Introduction<a href="c-descr1.html#s-descr1-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.2" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-examples"><i class="fa fa-check"></i><b>2.2</b> Example data sets<a href="c-descr1.html#s-descr1-examples" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.3" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-1cat"><i class="fa fa-check"></i><b>2.3</b> Single categorical variable<a href="c-descr1.html#s-descr1-1cat" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.3.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-distr"><i class="fa fa-check"></i><b>2.3.1</b> Describing the sample distribution<a href="c-descr1.html#ss-descr1-1cat-distr" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.3.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-tables"><i class="fa fa-check"></i><b>2.3.2</b> Tabular methods: Tables of frequencies<a href="c-descr1.html#ss-descr1-1cat-tables" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.3.3" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-charts"><i class="fa fa-check"></i><b>2.3.3</b> Graphical methods: Bar charts<a href="c-descr1.html#ss-descr1-1cat-charts" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.3.4" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-descriptives"><i class="fa fa-check"></i><b>2.3.4</b> Simple descriptive statistics<a href="c-descr1.html#ss-descr1-1cat-descriptives" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="2.4" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-2cat"><i class="fa fa-check"></i><b>2.4</b> Two categorical variables<a href="c-descr1.html#s-descr1-2cat" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.4.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-tables"><i class="fa fa-check"></i><b>2.4.1</b> Two-way contingency tables<a href="c-descr1.html#ss-descr1-2cat-tables" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.4.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-cond"><i class="fa fa-check"></i><b>2.4.2</b> Conditional proportions<a href="c-descr1.html#ss-descr1-2cat-cond" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.4.3" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-assoc"><i class="fa fa-check"></i><b>2.4.3</b> Conditional distributions and associations<a href="c-descr1.html#ss-descr1-2cat-assoc" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.4.4" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-descr"><i class="fa fa-check"></i><b>2.4.4</b> Describing an association using conditional proportions<a href="c-descr1.html#ss-descr1-2cat-descr" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.4.5" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-gamma"><i class="fa fa-check"></i><b>2.4.5</b> A measure of association for ordinal variables<a href="c-descr1.html#ss-descr1-2cat-gamma" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="2.5" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-1cont"><i class="fa fa-check"></i><b>2.5</b> Sample distributions of a single continuous variable<a href="c-descr1.html#s-descr1-1cont" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.5.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cont-tab"><i class="fa fa-check"></i><b>2.5.1</b> Tabular methods<a href="c-descr1.html#ss-descr1-1cont-tab" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.5.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cont-graphs"><i class="fa fa-check"></i><b>2.5.2</b> Graphical methods<a href="c-descr1.html#ss-descr1-1cont-graphs" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="2.6" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-nums"><i class="fa fa-check"></i><b>2.6</b> Numerical descriptive statistics<a href="c-descr1.html#s-descr1-nums" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.6.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-nums-central"><i class="fa fa-check"></i><b>2.6.1</b> Measures of central tendency<a href="c-descr1.html#ss-descr1-nums-central" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.6.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-nums-variation"><i class="fa fa-check"></i><b>2.6.2</b> Measures of variation<a href="c-descr1.html#ss-descr1-nums-variation" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="2.7" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-2cont"><i class="fa fa-check"></i><b>2.7</b> Associations which involve continuous variables<a href="c-descr1.html#s-descr1-2cont" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.8" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-presentation"><i class="fa fa-check"></i><b>2.8</b> Presentation of tables and graphs<a href="c-descr1.html#s-descr1-presentation" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.9" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-app"><i class="fa fa-check"></i><b>2.9</b> Appendix: Country data<a href="c-descr1.html#s-descr1-app" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="3" data-path="c-samples.html"><a href="c-samples.html"><i class="fa fa-check"></i><b>3</b> Samples and populations<a href="c-samples.html#c-samples" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="3.1" data-path="c-samples.html"><a href="c-samples.html#s-samples-intro"><i class="fa fa-check"></i><b>3.1</b> Introduction<a href="c-samples.html#s-samples-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.2" data-path="c-samples.html"><a href="c-samples.html#s-samples-finpops"><i class="fa fa-check"></i><b>3.2</b> Finite populations<a href="c-samples.html#s-samples-finpops" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.3" data-path="c-samples.html"><a href="c-samples.html#s-samples-samples"><i class="fa fa-check"></i><b>3.3</b> Samples from finite populations<a href="c-samples.html#s-samples-samples" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.4" data-path="c-samples.html"><a href="c-samples.html#s-samples-infpops"><i class="fa fa-check"></i><b>3.4</b> Conceptual and infinite populations<a href="c-samples.html#s-samples-infpops" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.5" data-path="c-samples.html"><a href="c-samples.html#s-samples-popdistrs"><i class="fa fa-check"></i><b>3.5</b> Population distributions<a href="c-samples.html#s-samples-popdistrs" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.6" data-path="c-samples.html"><a href="c-samples.html#s-samples-inference"><i class="fa fa-check"></i><b>3.6</b> Need for statistical inference<a href="c-samples.html#s-samples-inference" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="4" data-path="c-tables.html"><a href="c-tables.html"><i class="fa fa-check"></i><b>4</b> Statistical inference for two-way tables<a href="c-tables.html#c-tables" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="4.1" data-path="c-tables.html"><a href="c-tables.html#s-tables-intro"><i class="fa fa-check"></i><b>4.1</b> Introduction<a href="c-tables.html#s-tables-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.2" data-path="c-tables.html"><a href="c-tables.html#s-tables-tests"><i class="fa fa-check"></i><b>4.2</b> Significance tests<a href="c-tables.html#s-tables-tests" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3" data-path="c-tables.html"><a href="c-tables.html#s-tables-chi2test"><i class="fa fa-check"></i><b>4.3</b> The chi-square test of independence<a href="c-tables.html#s-tables-chi2test" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="4.3.1" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-null"><i class="fa fa-check"></i><b>4.3.1</b> Hypotheses<a href="c-tables.html#ss-tables-chi2test-null" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.2" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-ass"><i class="fa fa-check"></i><b>4.3.2</b> Assumptions of a significance test<a href="c-tables.html#ss-tables-chi2test-ass" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.3" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-stat"><i class="fa fa-check"></i><b>4.3.3</b> The test statistic<a href="c-tables.html#ss-tables-chi2test-stat" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.4" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-sdist"><i class="fa fa-check"></i><b>4.3.4</b> The sampling distribution of the test statistic<a href="c-tables.html#ss-tables-chi2test-sdist" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.5" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-Pval"><i class="fa fa-check"></i><b>4.3.5</b> The P-value<a href="c-tables.html#ss-tables-chi2test-Pval" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.6" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-conclusions"><i class="fa fa-check"></i><b>4.3.6</b> Drawing conclusions from a test<a href="c-tables.html#ss-tables-chi2test-conclusions" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="4.4" data-path="c-tables.html"><a href="c-tables.html#s-tables-summary"><i class="fa fa-check"></i><b>4.4</b> Summary of the chi-square test of independence<a href="c-tables.html#s-tables-summary" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="5" data-path="c-probs.html"><a href="c-probs.html"><i class="fa fa-check"></i><b>5</b> Inference for population proportions<a href="c-probs.html#c-probs" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="5.1" data-path="c-probs.html"><a href="c-probs.html#s-probs-intro"><i class="fa fa-check"></i><b>5.1</b> Introduction<a href="c-probs.html#s-probs-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.2" data-path="c-probs.html"><a href="c-probs.html#s-probs-examples"><i class="fa fa-check"></i><b>5.2</b> Examples<a href="c-probs.html#s-probs-examples" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.3" data-path="c-probs.html"><a href="c-probs.html#s-probs-distribution"><i class="fa fa-check"></i><b>5.3</b> Probability distribution of a dichotomous variable<a href="c-probs.html#s-probs-distribution" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.4" data-path="c-probs.html"><a href="c-probs.html#s-probs-pointest"><i class="fa fa-check"></i><b>5.4</b> Point estimation of a population probability<a href="c-probs.html#s-probs-pointest" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5" data-path="c-probs.html"><a href="c-probs.html#s-probs-test1sample"><i class="fa fa-check"></i><b>5.5</b> Significance test of a single proportion<a href="c-probs.html#s-probs-test1sample" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="5.5.1" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-hypotheses"><i class="fa fa-check"></i><b>5.5.1</b> Null and alternative hypotheses<a href="c-probs.html#ss-probs-test1sample-hypotheses" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5.2" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-teststatistic"><i class="fa fa-check"></i><b>5.5.2</b> The test statistic<a href="c-probs.html#ss-probs-test1sample-teststatistic" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5.3" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-samplingd"><i class="fa fa-check"></i><b>5.5.3</b> The sampling distribution of the test statistic and P-values<a href="c-probs.html#ss-probs-test1sample-samplingd" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5.4" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-conclusions"><i class="fa fa-check"></i><b>5.5.4</b> Conclusions from the test<a href="c-probs.html#ss-probs-test1sample-conclusions" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5.5" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-summary"><i class="fa fa-check"></i><b>5.5.5</b> Summary of the test<a href="c-probs.html#ss-probs-test1sample-summary" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="5.6" data-path="c-probs.html"><a href="c-probs.html#s-probs-1sampleci"><i class="fa fa-check"></i><b>5.6</b> Confidence interval for a single proportion<a href="c-probs.html#s-probs-1sampleci" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="5.6.1" data-path="c-probs.html"><a href="c-probs.html#s-probs-1sampleci-intro"><i class="fa fa-check"></i><b>5.6.1</b> Introduction<a href="c-probs.html#s-probs-1sampleci-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.6.2" data-path="c-probs.html"><a href="c-probs.html#s-probs-1sampleci-calc"><i class="fa fa-check"></i><b>5.6.2</b> Calculation of the interval<a href="c-probs.html#s-probs-1sampleci-calc" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.6.3" data-path="c-probs.html"><a href="c-probs.html#s-probs-1sampleci-int"><i class="fa fa-check"></i><b>5.6.3</b> Interpretation of confidence intervals<a href="c-probs.html#s-probs-1sampleci-int" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.6.4" data-path="c-probs.html"><a href="c-probs.html#ss-means-ci-vstests"><i class="fa fa-check"></i><b>5.6.4</b> Confidence intervals vs. significance tests<a href="c-probs.html#ss-means-ci-vstests" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="5.7" data-path="c-probs.html"><a href="c-probs.html#s-probs-2samples"><i class="fa fa-check"></i><b>5.7</b> Inference for comparing two proportions<a href="c-probs.html#s-probs-2samples" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="6" data-path="c-contd.html"><a href="c-contd.html"><i class="fa fa-check"></i><b>6</b> Continuous variables: Population and sampling distributions<a href="c-contd.html#c-contd" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="6.1" data-path="c-contd.html"><a href="c-contd.html#s-contd-intro"><i class="fa fa-check"></i><b>6.1</b> Introduction<a href="c-contd.html#s-contd-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="6.2" data-path="c-contd.html"><a href="c-contd.html#s-contd-popdistrs"><i class="fa fa-check"></i><b>6.2</b> Population distributions of continuous variables<a href="c-contd.html#s-contd-popdistrs" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="6.2.1" data-path="c-contd.html"><a href="c-contd.html#ss-contd-popdistrs-params"><i class="fa fa-check"></i><b>6.2.1</b> Population parameters and their point estimates<a href="c-contd.html#ss-contd-popdistrs-params" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="6.3" data-path="c-contd.html"><a href="c-contd.html#s-contd-probdistrs"><i class="fa fa-check"></i><b>6.3</b> Probability distributions of continuous variables<a href="c-contd.html#s-contd-probdistrs" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="6.3.1" data-path="c-contd.html"><a href="c-contd.html#ss-contd-probdistrs-general"><i class="fa fa-check"></i><b>6.3.1</b> General comments<a href="c-contd.html#ss-contd-probdistrs-general" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="6.3.2" data-path="c-contd.html"><a href="c-contd.html#ss-contd-probdistrs-normal"><i class="fa fa-check"></i><b>6.3.2</b> The normal distribution as a population distribution<a href="c-contd.html#ss-contd-probdistrs-normal" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="6.4" data-path="c-contd.html"><a href="c-contd.html#s-contd-clt"><i class="fa fa-check"></i><b>6.4</b> The normal distribution as a sampling distribution<a href="c-contd.html#s-contd-clt" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="7" data-path="c-means.html"><a href="c-means.html"><i class="fa fa-check"></i><b>7</b> Analysis of population means<a href="c-means.html#c-means" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="7.1" data-path="c-means.html"><a href="c-means.html#s-means-intro"><i class="fa fa-check"></i><b>7.1</b> Introduction and examples<a href="c-means.html#s-means-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.2" data-path="c-means.html"><a href="c-means.html#s-means-descr"><i class="fa fa-check"></i><b>7.2</b> Descriptive statistics for comparisons of groups<a href="c-means.html#s-means-descr" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="7.2.1" data-path="c-means.html"><a href="c-means.html#ss-means-descr-graphs"><i class="fa fa-check"></i><b>7.2.1</b> Graphical methods of comparing sample distributions<a href="c-means.html#ss-means-descr-graphs" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.2.2" data-path="c-means.html"><a href="c-means.html#ss-means-descr-tables"><i class="fa fa-check"></i><b>7.2.2</b> Comparing summary statistics<a href="c-means.html#ss-means-descr-tables" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="7.3" data-path="c-means.html"><a href="c-means.html#s-means-inference"><i class="fa fa-check"></i><b>7.3</b> Inference for two means from independent samples<a href="c-means.html#s-means-inference" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="7.3.1" data-path="c-means.html"><a href="c-means.html#ss-means-inference-intro"><i class="fa fa-check"></i><b>7.3.1</b> Aims of the analysis<a href="c-means.html#ss-means-inference-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.3.2" data-path="c-means.html"><a href="c-means.html#ss-means-inference-test"><i class="fa fa-check"></i><b>7.3.2</b> Significance testing: The two-sample t-test<a href="c-means.html#ss-means-inference-test" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.3.3" data-path="c-means.html"><a href="c-means.html#ss-means-inference-ci"><i class="fa fa-check"></i><b>7.3.3</b> Confidence intervals for a difference of two means<a href="c-means.html#ss-means-inference-ci" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.3.4" data-path="c-means.html"><a href="c-means.html#ss-means-inference-variants"><i class="fa fa-check"></i><b>7.3.4</b> Variants of the test and confidence interval<a href="c-means.html#ss-means-inference-variants" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="7.4" data-path="c-means.html"><a href="c-means.html#s-means-1sample"><i class="fa fa-check"></i><b>7.4</b> Tests and confidence intervals for a single mean<a href="c-means.html#s-means-1sample" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.5" data-path="c-means.html"><a href="c-means.html#s-means-dependent"><i class="fa fa-check"></i><b>7.5</b> Inference for dependent samples<a href="c-means.html#s-means-dependent" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.6" data-path="c-means.html"><a href="c-means.html#s-means-tests3"><i class="fa fa-check"></i><b>7.6</b> Further comments on significance tests<a href="c-means.html#s-means-tests3" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="7.6.1" data-path="c-means.html"><a href="c-means.html#ss-means-tests3-errors"><i class="fa fa-check"></i><b>7.6.1</b> Different types of error<a href="c-means.html#ss-means-tests3-errors" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.6.2" data-path="c-means.html"><a href="c-means.html#ss-means-tests3-power"><i class="fa fa-check"></i><b>7.6.2</b> Power of significance tests<a href="c-means.html#ss-means-tests3-power" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.6.3" data-path="c-means.html"><a href="c-means.html#ss-means-tests3-importance"><i class="fa fa-check"></i><b>7.6.3</b> Significance vs. importance<a href="c-means.html#ss-means-tests3-importance" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="8" data-path="c-regression.html"><a href="c-regression.html"><i class="fa fa-check"></i><b>8</b> Linear regression models<a href="c-regression.html#c-regression" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.1" data-path="c-regression.html"><a href="c-regression.html#s-regression-intro"><i class="fa fa-check"></i><b>8.1</b> Introduction<a href="c-regression.html#s-regression-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.2" data-path="c-regression.html"><a href="c-regression.html#s-regression-descr"><i class="fa fa-check"></i><b>8.2</b> Describing association between two continuous variables<a href="c-regression.html#s-regression-descr" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.2.1" data-path="c-regression.html"><a href="c-regression.html#ss-regression-descr-intro"><i class="fa fa-check"></i><b>8.2.1</b> Introduction<a href="c-regression.html#ss-regression-descr-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.2.2" data-path="c-regression.html"><a href="c-regression.html#ss-regression-descr-plots"><i class="fa fa-check"></i><b>8.2.2</b> Graphical methods<a href="c-regression.html#ss-regression-descr-plots" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.2.3" data-path="c-regression.html"><a href="c-regression.html#ss-regression-descr-assoc"><i class="fa fa-check"></i><b>8.2.3</b> Linear associations<a href="c-regression.html#ss-regression-descr-assoc" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.2.4" data-path="c-regression.html"><a href="c-regression.html#ss-regression-descr-corr"><i class="fa fa-check"></i><b>8.2.4</b> Measures of association: covariance and correlation<a href="c-regression.html#ss-regression-descr-corr" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="8.3" data-path="c-regression.html"><a href="c-regression.html#s-regression-simple"><i class="fa fa-check"></i><b>8.3</b> Simple linear regression models<a href="c-regression.html#s-regression-simple" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.3.1" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-intro"><i class="fa fa-check"></i><b>8.3.1</b> Introduction<a href="c-regression.html#ss-regression-simple-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.3.2" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-def"><i class="fa fa-check"></i><b>8.3.2</b> Definition of the model<a href="c-regression.html#ss-regression-simple-def" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.3.3" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-int"><i class="fa fa-check"></i><b>8.3.3</b> Interpretation of the model parameters<a href="c-regression.html#ss-regression-simple-int" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.3.4" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-est"><i class="fa fa-check"></i><b>8.3.4</b> Estimation of the parameters<a href="c-regression.html#ss-regression-simple-est" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.3.5" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-inf"><i class="fa fa-check"></i><b>8.3.5</b> Statistical inference for the regression coefficients<a href="c-regression.html#ss-regression-simple-inf" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="8.4" data-path="c-regression.html"><a href="c-regression.html#s-regression-causality"><i class="fa fa-check"></i><b>8.4</b> Interlude: Association and causality<a href="c-regression.html#s-regression-causality" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.5" data-path="c-regression.html"><a href="c-regression.html#s-regression-multiple"><i class="fa fa-check"></i><b>8.5</b> Multiple linear regression models<a href="c-regression.html#s-regression-multiple" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.5.1" data-path="c-regression.html"><a href="c-regression.html#ss-regression-multiple-intro"><i class="fa fa-check"></i><b>8.5.1</b> Introduction<a href="c-regression.html#ss-regression-multiple-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.5.2" data-path="c-regression.html"><a href="c-regression.html#ss-regression-multiple-def"><i class="fa fa-check"></i><b>8.5.2</b> Definition of the model<a href="c-regression.html#ss-regression-multiple-def" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.5.3" data-path="c-regression.html"><a href="c-regression.html#ss-regression-multiple-unchanged"><i class="fa fa-check"></i><b>8.5.3</b> Unchanged elements from simple linear models<a href="c-regression.html#ss-regression-multiple-unchanged" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.5.4" data-path="c-regression.html"><a href="c-regression.html#ss-regression-multiple-beta"><i class="fa fa-check"></i><b>8.5.4</b> Interpretation and inference for the regression coefficients<a href="c-regression.html#ss-regression-multiple-beta" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="8.6" data-path="c-regression.html"><a href="c-regression.html#s-regression-dummies"><i class="fa fa-check"></i><b>8.6</b> Including categorical explanatory variables<a href="c-regression.html#s-regression-dummies" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.6.1" data-path="c-regression.html"><a href="c-regression.html#ss-regression-dummies-def"><i class="fa fa-check"></i><b>8.6.1</b> Dummy variables<a href="c-regression.html#ss-regression-dummies-def" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.6.2" data-path="c-regression.html"><a href="c-regression.html#ss-regression-dummies-example"><i class="fa fa-check"></i><b>8.6.2</b> A second example<a href="c-regression.html#ss-regression-dummies-example" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="8.7" data-path="c-regression.html"><a href="c-regression.html#s-regression-rest"><i class="fa fa-check"></i><b>8.7</b> Other issues in linear regression modelling<a href="c-regression.html#s-regression-rest" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="9" data-path="c-3waytables.html"><a href="c-3waytables.html"><i class="fa fa-check"></i><b>9</b> Analysis of 3-way contingency tables<a href="c-3waytables.html#c-3waytables" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="10" data-path="c-more.html"><a href="c-more.html"><i class="fa fa-check"></i><b>10</b> More statistics…<a href="c-more.html#c-more" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="" data-path="c-more.html"><a href="c-more.html#statistical-tables"><i class="fa fa-check"></i>Statistical tables<a href="c-more.html#statistical-tables" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="" data-path="c-more.html"><a href="c-more.html#table-of-standard-normal-tail-probabilities"><i class="fa fa-check"></i>Table of standard normal tail probabilities<a href="c-more.html#table-of-standard-normal-tail-probabilities" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="" data-path="c-more.html"><a href="c-more.html#table-of-critical-values-for-t-distributions"><i class="fa fa-check"></i>Table of critical values for t-distributions<a href="c-more.html#table-of-critical-values-for-t-distributions" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="" data-path="c-more.html"><a href="c-more.html#table-of-critical-values-for-chi-square-distributions"><i class="fa fa-check"></i>Table of critical values for chi-square distributions<a href="c-more.html#table-of-critical-values-for-chi-square-distributions" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+</ul></li>
+<li class="divider"></li>
+<li><a href="https://github.com/rstudio/bookdown">Published with bookdown</a></li>
+
+</ul>
+
+      </nav>
+    </div>
+
+    <div class="book-body">
+      <div class="body-inner">
+        <div class="book-header" role="navigation">
+          <h1>
+            <i class="fa fa-circle-o-notch fa-spin"></i><a href="./">MY464 Introduction to Quantitative Analysis for Media and Communications</a>
+          </h1>
+        </div>
+
+        <div class="page-wrapper" tabindex="-1" role="main">
+          <div class="page-inner">
+
+            <section class="normal" id="section-">
+<div id="c-intro" class="section level1 hasAnchor">
+<h1><span class="header-section-number">Chapter 1</span> Introduction<a href="c-intro.html#c-intro" class="anchor-section" aria-label="Anchor link to header"></a></h1>
+<div id="s-intro-purpose" class="section level2 hasAnchor">
+<h2><span class="header-section-number">1.1</span> What is the purpose of this course?<a href="c-intro.html#s-intro-purpose" class="anchor-section" aria-label="Anchor link to header"></a></h2>
+<p>The title of any course should be descriptive of its contents. This one
+is called</p>
+<center>
+<strong>MY464: Introduction to Quantitative Analysis</strong>
+</center>
+<p>Every part of this tells us something about the nature of the course:</p>
+<p>The <strong>M</strong> stands for <em>Methodology</em> of social research. Here <em>research</em>
+refers to activities aimed at obtaining new knowledge about the world,
+in the case of the social sciences the <em>social</em> world of people and
+their institutions and interactions. Here we are concerned solely with
+<em>empirical</em> research, where such knowledge is based on information
+obtained by making <em>observations</em> on what goes on in that world. There are many
+different ways (<em>methods</em>) of making such observations, some better than
+others for deriving valid knowledge. “Methodology” refers both to the
+methods used in particular studies, and the study of research methods in
+general.</p>
+<p>The word analysis indicates the area of research methodology that
+the course is about. In general, any empirical research project will
+involve at least the following stages:</p>
+<ol style="list-style-type: decimal">
+<li><p>Identifying a research <em>topic</em></p></li>
+<li><p>Formulating <em>research questions</em></p></li>
+<li><p>Deciding what kinds of <em>information</em> to collect to try to answer the
+research questions, and deciding how to collect it and where to
+collect it from</p></li>
+<li><p>Collecting the information</p></li>
+<li><p><em>Analysing</em> the information in appropriate ways to answer the
+research questions</p></li>
+<li><p><em>Reporting</em> the findings</p></li>
+</ol>
+<p>The empirical information collected in the research process is often
+referred to as <em>data</em>. This course is mostly about some basic methods
+for step 5, the <em>analysis</em> of such data.</p>
+<p>Methods of analysis, however competently used, will not be very useful
+unless other parts of the research process have also been carried out
+well. These other parts, which (especially steps 2–4 above) can be
+broadly termed <em>research design</em> are covered on other courses. Here we
+will mostly not consider research design, in effect assuming that we start
+at a point where we want to analyse some data which have been collected in
+a sensible way to answer meaningful research questions. However, you
+should bear in mind throughout the course that in a real research
+situation both good design and good analysis are essential for success.</p>
+<p>The word <strong>quantitative</strong> in the title of the course indicates that the
+methods you will learn here are used to analyse quantitative data. This
+means that the data will enter the analysis in the form of <em>numbers</em> of
+some kind. In social sciences, for example, data obtained from
+administrative records or from surveys using structured interviews are
+typically quantitative. An alternative is <em>qualitative</em> data, which are
+not rendered into numbers for the analysis. For example, unstructured
+interviews, focus groups and ethnography typically produce mostly
+qualitative data. Both quantitative and qualitative data are important
+and widely used in social research. For some research questions, one or
+the other may be clearly more appropriate, but in many if not most cases
+the research would benefit from collecting both qualitative and
+quantitative data. This course will concentrate solely on quantitative
+data analysis, while the collection and analysis of qualitative data are
+covered on other courses (e.g. MY421, MY426 and MY427), which we hope
+you will also be taking.</p>
+<p>All the methods taught here, and almost all approaches used for
+quantitative data analysis in the social sciences in general, are
+<em>statistical</em> methods. The defining feature of such methods is that
+randomness and probability play an essential role in them; some of the
+ways in which they do so will become apparent later, others need not
+concern us here. The title of the course could thus also have included
+the word <em>statistics</em>. However, the Department of Methodology courses on
+statistical methods (e.g. MY464, MY465, MY452, MY455 and MY459) have
+traditionally been labelled as courses on “quantitative analysis” rather
+than “statistics”. This is done to indicate that they differ from
+classical introductory statistics courses in some ways, especially in
+the presentation being less mathematical.</p>
+<p>The course is called an “<strong>Introduction</strong> to Quantitative Analysis”
+because it is an introductory course which does not assume that you have
+learned any statistics before. MY464 or a comparable course should be
+taken before more advanced courses on quantitative methods. Statistics
+is a cumulative subject where later courses build on material learned on
+earlier ones. Because MY464 is introductory, it will start with very
+simple methods, and many of the more advanced (and powerful) ones will
+only be covered on the later courses. This does not, however, mean that
+you are wasting your time here even if it is methods from, say, MY452
+that you will eventually need most: understanding the material of this
+course is essential for learning more advanced methods.</p>
+<p>At the end of the course you should be familiar with certain methods of
+statistical analysis. This will enable you to be both a user and a
+consumer of statistics:</p>
+<ul>
+<li><p>You will be able to use the methods to analyse your own data and to
+report the results of the analyses.</p></li>
+<li><p>Perhaps even more importantly, you will also be able to understand
+(and possibly criticize) their use in other people’s research.
+Because interpreting results is typically somewhat easier than
+carrying out new analyses, and because all statistical methods use
+the same basic ideas introduced here, you will even have some
+understanding of many of the techniques not discussed on
+this course.</p></li>
+</ul>
+<p>Another pair of different but complementary aims of the course is that
+MY464 is both a self-contained unit and a prerequisite for courses that
+follow it:</p>
+<ul>
+<li><p>If this is the last statistics course you will take, it will enable
+you to understand and use the particular methods covered here. This
+includes the technique of linear regression modelling (described in
+Chapter <a href="c-regression.html#c-regression">8</a>), which is arguably the most important
+and commonly used statistical method of all. This course can,
+however, introduce only the most important elements of linear
+regression, while some of the more advanced ones are discussed only
+on MY452.</p></li>
+<li><p>The ideas learned on this course will provide the conceptual
+foundation for any further courses in quantitative methods that you
+may take. The basic ideas will then not need to be learned from
+scratch again, and the other courses can instead concentrate on
+introducing further, ever more powerful statistical methods for
+different types of data.</p></li>
+</ul>
+</div>
+<div id="s-intro-definitions" class="section level2 hasAnchor">
+<h2><span class="header-section-number">1.2</span> Some basic definitions<a href="c-intro.html#s-intro-definitions" class="anchor-section" aria-label="Anchor link to header"></a></h2>
+<p>Like any discipline, statistics involves some special terminology which
+makes it easier to discuss its concepts with sufficient precision. Some
+of these terms are defined in this section, while others will be
+introduced later when they are needed.</p>
+<p>You should bear in mind that all terminology is arbitrary, so there may
+be different terms for the same concept. The same is true of notation
+and symbols (such as <span class="math inline">\(n\)</span>, <span class="math inline">\(\mu\)</span>, <span class="math inline">\(\bar{Y}\)</span>, <span class="math inline">\(R^{2}\)</span>, and others) which
+will be introduced later. Some statistical terms and symbols are so well
+established that they are almost always used in the same way, but for
+many others there are several versions in common use. While we try to be
+consistent with the notation and terminology within this coursepack, we
+cannot absolutely guarantee that we will not occasionally use different
+terms for the same concept even here. In other textbooks and in research
+articles you will certainly occasionally encounter alternative
+terminology for some of these concepts. If you find yourself confused by
+such differences, please come to the advisory hours or ask your class
+teacher for clarification.</p>
+<div id="ss-intro-def-subj" class="section level3 hasAnchor">
+<h3><span class="header-section-number">1.2.1</span> Subjects and variables<a href="c-intro.html#ss-intro-def-subj" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>Table <a href="#tab:t-datamatrix">1.1</a> shows a small set of quantitative data. Once
+collected, the data are typically arranged and stored in this kind of
+spreadsheet-type rectangular table, known as a <strong>data matrix</strong>. In the
+computer classes you will see data in this form in SPSS.</p>
+<table>
+<colgroup>
+<col width="9%" />
+<col width="10%" />
+<col width="10%" />
+<col width="11%" />
+<col width="15%" />
+<col width="11%" />
+<col width="15%" />
+<col width="15%" />
+</colgroup>
+<thead>
+<tr class="header">
+<th align="right">Id</th>
+<th align="right"><em>age</em></th>
+<th align="right"><em>sex</em></th>
+<th align="right"><em>educ</em></th>
+<th align="right"><em>wrkstat</em></th>
+<th align="right"><em>life</em></th>
+<th align="right"><em>income4</em></th>
+<th align="right"><em>pres92</em></th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="right">1</td>
+<td align="right">43</td>
+<td align="right">1</td>
+<td align="right">11</td>
+<td align="right">1</td>
+<td align="right">2</td>
+<td align="right">3</td>
+<td align="right">2</td>
+</tr>
+<tr class="even">
+<td align="right">2</td>
+<td align="right">44</td>
+<td align="right">1</td>
+<td align="right">16</td>
+<td align="right">1</td>
+<td align="right">3</td>
+<td align="right">3</td>
+<td align="right">1</td>
+</tr>
+<tr class="odd">
+<td align="right">3</td>
+<td align="right">43</td>
+<td align="right">2</td>
+<td align="right">16</td>
+<td align="right">1</td>
+<td align="right">3</td>
+<td align="right">3</td>
+<td align="right">2</td>
+</tr>
+<tr class="even">
+<td align="right">4</td>
+<td align="right">78</td>
+<td align="right">2</td>
+<td align="right">17</td>
+<td align="right">5</td>
+<td align="right">3</td>
+<td align="right">4</td>
+<td align="right">1</td>
+</tr>
+<tr class="odd">
+<td align="right">5</td>
+<td align="right">83</td>
+<td align="right">1</td>
+<td align="right">11</td>
+<td align="right">5</td>
+<td align="right">2</td>
+<td align="right">1</td>
+<td align="right">1</td>
+</tr>
+<tr class="even">
+<td align="right">6</td>
+<td align="right">55</td>
+<td align="right">2</td>
+<td align="right">12</td>
+<td align="right">1</td>
+<td align="right">2</td>
+<td align="right">99</td>
+<td align="right">1</td>
+</tr>
+<tr class="odd">
+<td align="right">7</td>
+<td align="right">75</td>
+<td align="right">1</td>
+<td align="right">12</td>
+<td align="right">5</td>
+<td align="right">2</td>
+<td align="right">1</td>
+<td align="right">0</td>
+</tr>
+<tr class="even">
+<td align="right">8</td>
+<td align="right">31</td>
+<td align="right">1</td>
+<td align="right">18</td>
+<td align="right">1</td>
+<td align="right">3</td>
+<td align="right">4</td>
+<td align="right">2</td>
+</tr>
+<tr class="odd">
+<td align="right">9</td>
+<td align="right">54</td>
+<td align="right">2</td>
+<td align="right">18</td>
+<td align="right">2</td>
+<td align="right">3</td>
+<td align="right">1</td>
+<td align="right">1</td>
+</tr>
+<tr class="even">
+<td align="right">10</td>
+<td align="right">23</td>
+<td align="right">2</td>
+<td align="right">15</td>
+<td align="right">1</td>
+<td align="right">2</td>
+<td align="right">3</td>
+<td align="right">3</td>
+</tr>
+<tr class="odd">
+<td align="right">11</td>
+<td align="right">63</td>
+<td align="right">2</td>
+<td align="right">4</td>
+<td align="right">5</td>
+<td align="right">1</td>
+<td align="right">1</td>
+<td align="right">1</td>
+</tr>
+<tr class="even">
+<td align="right">12</td>
+<td align="right">33</td>
+<td align="right">2</td>
+<td align="right">10</td>
+<td align="right">4</td>
+<td align="right">3</td>
+<td align="right">1</td>
+<td align="right">0</td>
+</tr>
+<tr class="odd">
+<td align="right">13</td>
+<td align="right">39</td>
+<td align="right">2</td>
+<td align="right">8</td>
+<td align="right">7</td>
+<td align="right">3</td>
+<td align="right">1</td>
+<td align="right">0</td>
+</tr>
+<tr class="even">
+<td align="right">14</td>
+<td align="right">55</td>
+<td align="right">2</td>
+<td align="right">16</td>
+<td align="right">1</td>
+<td align="right">2</td>
+<td align="right">4</td>
+<td align="right">1</td>
+</tr>
+<tr class="odd">
+<td align="right">15</td>
+<td align="right">36</td>
+<td align="right">2</td>
+<td align="right">14</td>
+<td align="right">3</td>
+<td align="right">2</td>
+<td align="right">4</td>
+<td align="right">1</td>
+</tr>
+<tr class="even">
+<td align="right">16</td>
+<td align="right">44</td>
+<td align="right">2</td>
+<td align="right">18</td>
+<td align="right">2</td>
+<td align="right">3</td>
+<td align="right">4</td>
+<td align="right">1</td>
+</tr>
+<tr class="odd">
+<td align="right">17</td>
+<td align="right">45</td>
+<td align="right">2</td>
+<td align="right">16</td>
+<td align="right">1</td>
+<td align="right">2</td>
+<td align="right">4</td>
+<td align="right">1</td>
+</tr>
+<tr class="even">
+<td align="right">18</td>
+<td align="right">36</td>
+<td align="right">2</td>
+<td align="right">18</td>
+<td align="right">1</td>
+<td align="right">2</td>
+<td align="right">99</td>
+<td align="right">1</td>
+</tr>
+<tr class="odd">
+<td align="right">19</td>
+<td align="right">29</td>
+<td align="right">1</td>
+<td align="right">16</td>
+<td align="right">1</td>
+<td align="right">3</td>
+<td align="right">3</td>
+<td align="right">1</td>
+</tr>
+<tr class="even">
+<td align="right">20</td>
+<td align="right">30</td>
+<td align="right">2</td>
+<td align="right">14</td>
+<td align="right">1</td>
+<td align="right">2</td>
+<td align="right">2</td>
+<td align="right">1</td>
+</tr>
+</tbody>
+</table>
+<p>:(#tab:t-datamatrix)An example of a small data matrix based on data from the U.S. General Social Survey (GSS), showing measurements of seven
+variables for 20 respondents in a social survey. The variables are
+defined as <em>age</em>: age in years; <em>sex</em>: sex (1=male; 2=female); <em>educ</em>:
+highest year of school completed; <em>wrkstat</em>: labour force status
+(1=working full time; 2=working part time; 3=temporarily not working;
+4=unemployed; 5=retired; 6=in education; 7=keeping house; 8=other);
+<em>life</em>: is life exciting or dull? (1=dull; 2=routine; 3=exciting);
+<em>income4</em>: total annual family income (1=$24,999 or less;
+2=$25,000–$39,999; 3=$40,000–$59,999; 4=$60,000 or more; 99
+indicates a missing value); <em>pres92</em>: vote in the 1992 presidential
+election (0=did not vote or not eligible to vote; 1=Bill Clinton;
+2=George H. W. Bush; 3=Ross Perot; 4=Other).</p>
+<p>The rows (moving downwards) and columns (moving left to right) of a data
+matrix correspond to the first two important terms: the rows to the
+<em>subjects</em> and the columns to the <em>variables</em> in the data.</p>
+<ul>
+<li><p>A <strong>subject</strong> is the smallest unit yielding information in
+the study. In the example of Table <a href="#tab:t-datamatrix">1.1</a>, the subjects
+are individual people, as they are in very many social
+science examples. In other cases they may instead be families,
+companies, neighbourhoods, countries, or whatever else is relevant
+in a particular study. There is also much variation in the term
+itself, so that instead of “subjects”, a study might refer to
+“units”, “elements”, “respondents” or “participants”, or simply to
+“persons”, “individuals”, “families” or “countries”, for example.
+Whatever the term, it is usually clear from the context what the
+subjects are in a particular analysis.</p>
+<p>The subjects in the data of Table <a href="#tab:t-datamatrix">1.1</a> are uniquely
+identified only by a number (labelled “Id”) assigned by the
+researcher, as in a survey like this their names would not typically
+be recorded. In situations where the identities of individual
+subjects are available and of interest (such as when they are
+countries), their names would typically be included in the
+data matrix.</p></li>
+<li><p>A <strong>variable</strong> is a characteristic which varies between subjects.
+For example, Table <a href="#tab:t-datamatrix">1.1</a> contains data on seven
+variables — age, sex, education, labour force status, attitude to
+life, family income and vote in a past election — defined and
+recorded in the particular ways explained in the caption of
+the table. It can be seen that these are indeed “variable” in that
+not everyone has the same value of any of them. It is this variation
+that makes collecting data on many subjects necessary
+and worthwhile. In contrast, research questions about
+characteristics which are the same for every subject
+(i.e. <em>constants</em> rather than variables) are rare, usually not
+particularly interesting, and not very difficult to answer.</p>
+<p>The labels of the columns in Table <a href="#tab:t-datamatrix">1.1</a> (<em>age</em>,
+<em>wrkstat</em>, <em>income4</em> etc.) are the names by which the variables are
+uniquely identified in the data file on a computer. Such concise
+titles are useful for this purpose, but should be avoided when
+reporting the results of data analyses, where clear English terms
+can be used instead. In other words, a report should not say
+something like “The analysis suggests that WRKSTAT of the
+respondents is…” but instead something like “The analysis suggests
+that the labour force status of the respondents is…”, with the
+definition of this variable and its categories also clearly stated.</p></li>
+</ul>
+<p>Collecting quantitative data involves determining the values of a set of
+variables for a group of subjects and assigning numbers to these values.
+This is also known as <strong>measuring</strong> the values of the variables. Here
+the word “measure” is used in a broader sense than in everyday language,
+so that, for example, we are measuring a person’s sex in this sense when
+we assign a variable called “Sex” the value 1 if the person is male and
+2 if she is female. The value assigned to a variable for a subject is
+called a <strong>measurement</strong> or an <strong>observation</strong>. Our data thus consist of
+the measurements of a set of variables for a set of subjects. In the
+data matrix, each row contains the measurements of all the variables in
+the data for one subject, and each column contains the measurements of
+one variable for all of the subjects.</p>
+<p>The number of subjects in a set of data is known as the <strong>sample size</strong>,
+and is typically denoted by <span class="math inline">\(n\)</span>. In a survey, for example, this would be
+the number of people who responded to the questions in the survey
+interview. In Table <a href="#tab:t-datamatrix">1.1</a> we have <span class="math inline">\(n=20\)</span>. This would
+normally be a very small sample size for a survey, and indeed the real
+sample size in this one is several thousands. The twenty subjects here
+were drawn from among them to obtain a small example which fits on a
+page.</p>
+<p>A common problem in many studies is <strong>nonresponse</strong> or <strong>missing data</strong>,
+which occurs when some measurements are not obtained. For example, some
+survey respondents may refuse to answer certain questions, so that the
+values of the variables corresponding to those questions will be missing
+for them. In Table <a href="#tab:t-datamatrix">1.1</a>, the income variable is missing
+for subjects 6 and 18, and recorded only as a <em>missing value code</em>, here
+“99”. Missing values create a problem which has to be addressed somehow
+before or during the statistical analysis. The easiest approach is to
+simply ignore all the subjects with missing values and use only those
+with complete data on all the variables needed for a given analysis. For
+example, any analysis of the data in Table <a href="#tab:t-datamatrix">1.1</a> which
+involved the variable <em>income4</em> would then exclude all the data for
+subjects 6 and 18. This method of “complete-case analysis” is usually
+applied automatically by most statistical software packages, including
+SPSS. It is, however, not a very good approach. For example, it means
+that a lot of information will be thrown away if there are many subjects
+with some observations missing. Statisticians have developed better ways
+of dealing with missing data, but they are unfortunately beyond the
+scope of this course.</p>
+</div>
+<div id="ss-intro-def-vartypes" class="section level3 hasAnchor">
+<h3><span class="header-section-number">1.2.2</span> Types of variables<a href="c-intro.html#ss-intro-def-vartypes" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>Information on a variable consists of the observations (measurements) of
+it for the subjects in our data, recorded in the form of numbers.
+However, not all numbers are the same. First, a particular way of
+measuring a variable may or may not provide a good measure of the
+concept of interest. For example, a measurement of a person’s weight
+from a well-calibrated scale would typically be a good measure of the
+person’s true weight, but an answer to the survey question “How many
+units of alcohol did you drink in the last seven days?” might be a much
+less accurate measurement of the person’s true alcohol consumption
+(i.e. it might have <em>measurement error</em> for a variety of reasons). So
+just because you have put a number on a concept does not automatically
+mean that you have captured that concept in a useful way. Devising good
+ways of measuring variables is a major part of research design. For
+example, social scientists are often interested in studying attitudes,
+beliefs or personality traits, which are very difficult to measure
+directly. A common approach is to develop <em>attitude scales</em>, which
+combine answers to multiple questions (“items”) on the attitude into one
+number.</p>
+<p>Here we will again leave questions of measurement to courses on research
+design, effectively assuming that the variables we are analysing have
+been measured well enough for the analysis to be meaningful. Even then
+we will have to consider some distinctions between different kinds of
+variables. This is because the type of a variable largely determines
+which methods of statistical analysis are appropriate for that variable.
+It will be necessary to consider two related distinctions:</p>
+<ul>
+<li><p>Between different measurement levels</p></li>
+<li><p>Between continuous and discrete variables</p></li>
+</ul>
+<div id="measurement-levels" class="section level4 unnumbered hasAnchor">
+<h4>Measurement levels<a href="c-intro.html#measurement-levels" class="anchor-section" aria-label="Anchor link to header"></a></h4>
+<p>When a numerical value of a particular variable is allocated to a
+subject, it becomes possible to relate that value to the values assigned
+to other subjects. The <strong>measurement level</strong> of the variable indicates
+how much information the number provides for such comparisons. To
+introduce this concept, consider the variables obtained as answers to
+the following three questions in the former U.K. General Household
+Survey:</p>
+<p>[1] <em>Are you</em></p>
+<table>
+<tbody>
+<tr class="odd">
+<td align="left"><em>single, that is, never married?</em></td>
+<td align="left">(coded as 1)</td>
+</tr>
+<tr class="even">
+<td align="left"><em>married and living with your husband/wife?</em></td>
+<td align="left">(2)</td>
+</tr>
+<tr class="odd">
+<td align="left"><em>married and separated from your husband/wife?</em></td>
+<td align="left">(3)</td>
+</tr>
+<tr class="even">
+<td align="left"><em>divorced?</em></td>
+<td align="left">(4)</td>
+</tr>
+<tr class="odd">
+<td align="left"><em>or widowed?</em></td>
+<td align="left">(5)</td>
+</tr>
+</tbody>
+</table>
+<p>[2] <em>Over the last twelve months, would you say your health has on the
+whole been good, fairly good, or not good?</em><br />
+(“Good” is coded as 1, “Fairly Good” as 2, and “Not Good” as 3.)</p>
+<p>[3] <em>About how many cigaretters A DAY do you usually smoke on
+weekdays?</em><br />
+(Recorded as the number of cigarettes)</p>
+<p>These variables illustrate three of the four possibilities in the most
+common classification of measurement levels:</p>
+<ul>
+<li><p>A variable is measured on a <strong>nominal scale</strong> if the numbers are
+simply labels for different possible values (<em>levels</em> or
+<em>categories</em>) of the variable. The only possible comparison is then
+to identify whether two subjects have the <em>same</em> or <em>different</em>
+values of the variable. The marital status variable [1] is
+measured on a nominal scale. The values of such <em>nominal-level
+variables</em> are not in any order, so we cannot talk about one subject
+having “more” or “less” of the variable than another subject; even
+though “divorced” is coded with a larger number (4) than “single”
+(1), divorced is not more or bigger than single in any relevant
+sense. We also cannot carry out arithmetical calculations on the
+values, as if they were numbers in the ordinary sense. For example,
+if one person is single and another widowed, it is obviously
+nonsensical to say that they are on average separated (even though
+<span class="math inline">\((1+5)/2=3\)</span>).</p>
+<p>The only requirement for the codes assigned to the levels of a
+nominal-level variable is that different levels must receive
+different codes. Apart from that, the codes are arbitrary, so that
+we can use any set of numbers for them in any order. Indeed, the
+codes do not even need to be numbers, so they may instead be
+displayed in the data matrix as short words (“labels” for
+the categories). Using successive small whole numbers
+(<span class="math inline">\(1,2,3,\dots\)</span>) is just a simple and concise choice for the codes.</p>
+<p>Further examples of nominal-level variables are the variables <em>sex</em>,
+<em>wrkstat</em>, and <em>pres92</em> in Table <a href="#tab:t-datamatrix">1.1</a>.</p></li>
+<li><p>A variable is measured on an <strong>ordinal scale</strong> if its values do have
+a natural ordering. It is then possible to determine not only
+whether two subjects have the same value, but also whether one or
+the other has a <em>higher</em> value. For example, the self-reported
+health variable [2] is an ordinal-level variable, as larger values
+indicate worse states of health. The numbers assigned to the
+categories now have to be in the correct order, because otherwise
+information about the true ordering of the categories would
+be distorted. Apart from the order, the choice of the actual numbers
+is still arbitrary, and calculations on them are still not strictly
+speaking meaningful.</p>
+<p>Further examples of ordinal-level variables are <em>life</em> and <em>income4</em>
+in Table <a href="#tab:t-datamatrix">1.1</a>.</p></li>
+<li><p>A variable is measured on an <strong>interval scale</strong> if <em>differences</em> in
+its values are comparable. One example is temperature measured on
+the Celsius (Centigrade) scale. It is now meaningful to state not
+only that 20<span class="math inline">\(^{\circ}\)</span>C is a <em>different</em> and <em>higher</em> temperature
+than 5<span class="math inline">\(^{\circ}\)</span>C, but also that the <em>difference</em> between them is
+15<span class="math inline">\(^{\circ}\)</span>C, and that that difference is of the same size as the
+difference between, say, 40<span class="math inline">\(^{\circ}\)</span>C and 25<span class="math inline">\(^{\circ}\)</span>C.
+Interval-level measurements are “proper” numbers in that
+calculations such as the average noon temperature in London over a
+year are meaningful. What we <em>cannot</em> do is to compare <em>ratios</em> of
+interval-level variables. Thus 20<span class="math inline">\(^{\circ}\)</span>C is not four times as
+warm as 5<span class="math inline">\(^{\circ}\)</span>C, nor is their real ratio the same as that of
+40<span class="math inline">\(^{\circ}\)</span>C and 10<span class="math inline">\(^{\circ}\)</span>C. This is because the zero value of
+the Celcius scale (0<span class="math inline">\(^{\circ}\)</span>C) is not the lowest possible
+temperature but an arbitrary point chosen for convenience
+of definition.</p></li>
+<li><p>A variable is measured on a <strong>ratio scale</strong> if it has all the
+properties of an interval-level variable and also a true zero point.
+For example, the smoking variable [3] is measured on a ratio
+level, with zero cigarettes as its point of origin. It is now
+possible to carry out all the comparisons possible for
+interval-level variables, and also to compare ratios. For example,
+it is meaningful to say that someone who smokes 20 cigarettes a day
+smokes <em>twice</em> as many cigarettes as one who smokes 10 cigarettes,
+and that that ratio is equal to the ratio of 30 and 15 cigarettes.</p>
+<p>Further examples of ratio-level variables are <em>age</em> and <em>educ</em> in
+Table <a href="#tab:t-datamatrix">1.1</a>.</p></li>
+</ul>
+<p>The distinction between interval-level and ratio-level variables is in
+practice mostly unimportant, as the same statistical methods can be
+applied to both. We will thus consider them together throughout this
+course, and will, for simplicity, refer to variables on either scale as
+interval level variables. Doing so is logically coherent, because ratio
+level variables have all the properties of interval level variables, as
+well the additional property of a true zero point.</p>
+<p>Similarly, nominal and ordinal variables can often be analysed with the
+same methods. When this is the case, we will refer to them together as
+nominal/ordinal level variables. There are, however, contexts where the
+difference between them matters, and we will then discuss nominal and
+ordinal scales separately.</p>
+<p>The simplest kind of nominal variable is one with only <em>two</em> possible
+values, for example sex recorded as “male” or “female” or an opinion
+recorded just as “agree” or “disagree”. Such a variable is said to be
+<strong>binary</strong> or <strong>dichotomous</strong>. As with any nominal variable, codes for
+the two levels can be assigned in any way we like (as long as different
+levels get different codes), for example as 1=Female and 2=Male; later
+it will turn out that in some analyses it is most convenient to use the
+values 0 and 1.</p>
+<p>The distinction between ordinal-level and interval-level variables is
+sometimes further blurred in practice. Consider, for example, an
+attitude scale of the kind mentioned above, let’s say a scale for
+happiness. Suppose that the possible values of the scale range from 0
+(least happy) to 48 (most happy). In most cases it would be most
+realistic to consider these measurements to be on an ordinal rather than
+an interval scale. However, statistical methods developed specifically
+for ordinal-level variables do not cope very well with variables with
+this many possible values. Thus ordinal variables with many possible
+values (at least more than ten, say) are typically treated as if they
+were measured on an interval scale.</p>
+</div>
+<div id="continuous-and-discrete-variables" class="section level4 unnumbered hasAnchor">
+<h4>Continuous and discrete variables<a href="c-intro.html#continuous-and-discrete-variables" class="anchor-section" aria-label="Anchor link to header"></a></h4>
+<p>This distinction is based on the possible values a variable can have:</p>
+<ul>
+<li><p>A variable is <strong>discrete</strong> if its basic unit of measurement cannot
+be subdivided. Thus a discrete variable can only have certain
+values, and the values between these are logically impossible. For
+example, the marital status variable [1] and the health variable
+[2] defined under “Measurement Levels” in Section <a href="c-intro.html#ss-intro-def-vartypes">1.2.2</a> are discrete, because
+values like marital status of 2.3 or self-reported health of 1.7 are
+impossible given the way the variables are defined.</p></li>
+<li><p>A variable is <strong>continuous</strong> if it can in principle take infinitely
+varied fractional values. The idea implies an unbroken scale or
+continuum of possible values. Age is an example of a continuous
+variable, as we can in principle measure it to any degree of
+accuracy we like — years, days, minutes, seconds, micro-seconds.
+Similarly, distance, weight and even income can be considered to
+be continuous.</p></li>
+</ul>
+<p>You should note the “in principle” in this definition of continuous
+variables above. Continuity is here a pragmatic concept, not a
+philosophical one. Thus we will treat age and income as continous even
+though they are in practice measured to the nearest year or the nearest
+hundred pounds, and not in microseconds or millionths of a penny (nor is
+the definition inviting you to start musing on quantum mechanics and
+arguing that nothing is fundamentally continuous). What the distinction
+between discrete and continuous really amounts to in practice is the
+difference between variables which in our data tend to take relatively
+few values (discrete variables) and ones which can take lots of
+different values (continuous variables). This also implies that we will
+sometimes treat variables which are undeniably discrete in the strict
+sense as if they were really continuous. For example, the number of
+people is clearly discrete when it refers to numbers of registered
+voters in households (with a limited number of possible values in
+practice), but effectively continuous when it refers to populations of
+countries (with very many possible values).</p>
+<p>The measurement level of a variable refers to the way a characteristic
+is recorded in the data, not to some other, perhaps more fundamental
+version of that characteristic. For example, annual income recorded to
+the nearest dollar is continuous, but an income variable (c.f. Table
+<a href="#tab:t-datamatrix">1.1</a>) with values</p>
+<ul>
+<li><p>if annual income is $24,999 or less;</p></li>
+<li><p>if annual income is $25,000–$39,999;</p></li>
+<li><p>if annual income is $40,000–$59,999;</p></li>
+<li><p>if annual income is $60,000 or more</p></li>
+</ul>
+<p>is discrete. This kind of variable, obtained by
+grouping ranges of values of an initially continuous measurement, is
+common in the social sciences, where the exact values of such variables
+are often not that interesting and may not be very accurately measured.</p>
+<p>The term <strong>categorical variable</strong> will be used in this coursepack to
+refer to a discrete variable which has only a finite (in practice quite
+small) number of possible values, which are known in advance. For
+example, a person’s sex is typically coded simply as “Male” or “Female”,
+with no other values. Similarly, the grouped income variable shown above
+is categorical, as every income corresponds to one of its four
+categories (note that it is the “rest” category 4 which guarantees that
+the variable does indeed cover all possibilities). Categorical variables
+are of separate interest because they are common and because some
+statistical methods are designed specifically for them. An example of a
+non-categorical discrete variable is the population of a country, which
+does not have a small, fixed set of possible values (unless it is again
+transformed into a grouped variable as in the income example above).</p>
+</div>
+<div id="relationships-between-the-two-distinctions" class="section level4 unnumbered hasAnchor">
+<h4>Relationships between the two distinctions<a href="c-intro.html#relationships-between-the-two-distinctions" class="anchor-section" aria-label="Anchor link to header"></a></h4>
+<p>The distinctions between variables with different measurement levels on
+one hand, and continuous and discrete variables on the other, are
+partially related. Essentially all nominal/ordinal-level variables are
+discrete, and almost all continous variables are interval-level
+variables. This leaves one further possibility, namely a discrete
+interval-level variable; the most common example of this is a <strong>count</strong>,
+such as the number of children in a family or the population of a
+country. These connections are summarized in Table <a href="#tab:t-vartypes">1.2</a>.</p>
+<table style="width:97%;">
+<colgroup>
+<col width="23%" />
+<col width="38%" />
+<col width="34%" />
+</colgroup>
+<tbody>
+<tr class="odd">
+<td></td>
+<td align="left"><em>Measurement level</em></td>
+<td align="left"><em>Measurement level</em></td>
+</tr>
+<tr class="even">
+<td></td>
+<td align="left"><strong>Nominal/ordinal</strong></td>
+<td align="left"><strong>Interval/ratio</strong></td>
+</tr>
+<tr class="odd">
+<td><strong>Discrete</strong></td>
+<td align="left">Many</td>
+<td align="left"><em>Counts</em></td>
+</tr>
+<tr class="even">
+<td></td>
+<td align="left">- Always <strong>categorical</strong>,
+i.e. having a fixed set
+of possible values
+(categories)
+- If only two categories,
+variable is <strong>binary</strong>
+(<strong>dichotomous</strong>)</td>
+<td align="left">- If many different
+observed values,
+often treated as
+effectively continuous</td>
+</tr>
+<tr class="odd">
+<td><strong>Continuous</strong></td>
+<td align="left">None</td>
+<td align="left">Many</td>
+</tr>
+</tbody>
+</table>
+<p>:(#tab:t-vartypes)Relationships between the types of variables discussed in Section
+@ref(ss-intro-def-vartypes.</p>
+<p>In practice the situation may be even simpler than this, in that the
+most relevant distinction is often between the following two
+cases:</p>
+<ol style="list-style-type: decimal">
+<li><p>Discrete variables with a small number of observed values. This
+includes both categorical variables, for which all possible values
+are known in advance, and variables for which only a small number of
+values were actually observed even if others might have been
+possible.<a href="#fn1" class="footnote-ref" id="fnref1"><sup>1</sup></a> Such variables can be conveniently summarized in the
+form of tables and handled by methods appropriate for such tables,
+as described later in this coursepack. This group also includes all
+nominal variables, even ones with a relatively large number of
+categories, since methods for group 2. below are entirely
+inappropriate for them.</p></li>
+<li><p>Variables with a large number of possible values. This includes all
+continuous variables and those interval-level or ordinal discrete
+variables which have so many values that it is pragmatic to treat
+them as effectively continuous.</p></li>
+</ol>
+<p>Although there are contexts where we need to distinguish between types
+of variables more carefully than this, for practical purposes this
+simple distinction is often sufficient.</p>
+</div>
+</div>
+<div id="ss-intro-def-descr" class="section level3 hasAnchor">
+<h3><span class="header-section-number">1.2.3</span> Description and inference<a href="c-intro.html#ss-intro-def-descr" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>In the past, the subtitle of this course was “Description and
+inference”. This is still descriptive of the contents of the course.
+These words refer to two different although related tasks of statistical
+analysis. They can be thought of as solutions to what might be called
+the “too much and not enough” problems with observed data. A set of data
+is “too much” in that it is very difficult to understand or explain the
+data, or to draw any conclusions from it, simply by staring at the
+numbers in a data matrix. Making much sense of even a small data matrix
+like the one in Table <a href="#tab:t-datamatrix">1.1</a> is challenging, and the task
+becomes entirely impossible with bigger ones. There is thus a clear need
+for methods of statistical description:</p>
+<ul>
+<li><strong>Description</strong>: summarizing some features of the data in ways that
+make them easily understandable. Such methods of description may be
+in the form of numbers or graphs.</li>
+</ul>
+<p>The “not enough” problem is that quite often the subjects in the data
+are treated as representatives of some larger group which is our real
+object of interest. In statistical terminology, the observed subjects
+are regarded as a <strong>sample</strong> from a larger <strong>population</strong>. For example,
+a pre-election opinion poll is not carried out because we are
+particularly interested in the voting intentions of the particular
+thousand or so people who answer the questions in the poll (the sample),
+but because we hope that their answers will help us draw conclusions
+about the preferences of all of those who intend to vote on election day
+(the population). The job of statistical inference is to provide methods
+for generalising from a sample to the population:</p>
+<ul>
+<li><strong>Inference</strong>: drawing conclusions about characteristics of a
+population based on the data observed in a sample. The two main
+tools of statistical inference are <strong>significance tests</strong> and
+<strong>confidence intervals</strong>.</li>
+</ul>
+<p>Some of the methods described on this course are mainly intended for
+description and others for inference, but many also have a useful role
+in both.</p>
+</div>
+<div id="ss-intro-def-assoc" class="section level3 hasAnchor">
+<h3><span class="header-section-number">1.2.4</span> Association and causation<a href="c-intro.html#ss-intro-def-assoc" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>The simplest methods of analysis described on this course consider
+questions which involve only one variable at a time. For example, the
+variable might be the political party a respondent intends to vote for
+in the next general election. We might then want to know what proportion
+of voters plan to vote for the Labour party, or which party is likely to
+receive the most votes.</p>
+<p>However, considering variables one at a time is not going to entertain
+us for very long. This is because most interesting research questions
+involve associations between variables. One way to define an association
+is that</p>
+<ul>
+<li>There is an <strong>association</strong> between two variables if knowing the
+value of one of the variables will help to predict the value of the
+other variable.</li>
+</ul>
+<p>(A more careful definition will be given later.) Other ways of referring
+to the same concept are that the variables are “related” or that there
+is a “dependence” between them.</p>
+<p>For example, suppose that instead of considering voting intentions
+overall, we were interested in <em>comparing</em> them between two groups of
+people, homeowners and people who live in rented accommodation. Surveys
+typically suggest that homeowners are more likely to vote for the
+Conservatives and less likely to vote for Labour than renters. There is
+then an association between the two (discrete) variables “type of
+accommodation” and “voting intention”, and knowing the type of a
+person’s accommodation would help us better predict who they intend to
+vote for. Similarly, a study of education and income might find that
+people with more education (measured by years of education completed)
+tend to have higher incomes (measured by annual income in pounds), again
+suggesting an association between these two (continuous) variables.</p>
+<p>Sometimes the variables in an association are in some sense on an equal
+footing. More often, however, they are instead considered asymmetrically
+in that it is more natural to think of one of them as being used to
+predict the other. For example, in the examples of the previous
+paragraph it seems easier to talk about home ownership predicting voting
+intention than vice versa, and of level of education predicting income
+than vice versa. The variable used for prediction is then known as an
+<strong>explanatory variable</strong> and the variable to be predicted as the
+<strong>response variable</strong> (an alternative convention is to talk about
+<strong>independent</strong> rather than explanatory variables and <strong>dependent</strong>
+instead of response variables). The most powerful statistical techniques
+for analysing associations between explanatory and response variables
+are known as <strong>regression</strong> methods. They are by far the most important
+family of methods of quantitative data analysis. On this course you will
+learn about the most important member of this family, the method of
+<strong>linear regression</strong>.</p>
+<p>In the many research questions where regression methods are useful, it
+almost always turns out to be crucially important to be able to consider
+several different explanatory variables simultaneously for a single
+response variable. Regression methods allow for this through the
+techniques of <strong>multiple regression</strong>.</p>
+<p>The statistical concept of association is closely related to the
+stronger concept of <strong>causation</strong>, which is at the heart of very many
+research questions in the social sciences and elsewhere. The two
+concepts are not the same. In particular, association is not
+<em>sufficient</em> evidence for causation, i.e. finding that two variables are
+statistically associated does not prove that either variable has a
+causal effect on the other. On the other hand, association is almost
+always <em>necessary</em> for causation: if there is no association between two
+variables, it is very unlikely that there is a direct causal effect
+between them. This means that analysis of associations is a necessary
+part, but not the only part, of the analysis of causal effects from
+quantitative data. Furthermore, statistical analysis of associations is
+carried out in essentially the same way whether or not it is intended as
+part of a causal argument. On this course we will mostly focus on
+associations. The kinds of additional arguments that are needed to
+support causal conclusions are based on information on the research
+design and the nature of the variables. They are discussed only briefly
+on this course, and at greater length on courses of research design such
+as MY400 (and the more advanced MY457, which considers design and
+analysis for causal inference together).</p>
+</div>
+</div>
+<div id="s-intro-outline" class="section level2 hasAnchor">
+<h2><span class="header-section-number">1.3</span> Outline of the course<a href="c-intro.html#s-intro-outline" class="anchor-section" aria-label="Anchor link to header"></a></h2>
+<p>We have now defined three separate distinctions between different
+problems for statistical analysis, according to (1) the types of
+variables involved, (2) whether description or inference is required,
+and (3) whether we are examining one variable only or associations
+between several variables. Different combinations of these elements
+require different methods of statistical analysis. They also provide the
+structure for the course, as follows:</p>
+<ul>
+<li><p><strong>Chapter <a href="c-descr1.html#c-descr1">2</a></strong>: Description for single variables of any
+type, and for associations between categorical variables.</p></li>
+<li><p><strong>Chapter <a href="c-samples.html#c-samples">3</a></strong>: Some general concepts of
+statistical inference.</p></li>
+<li><p><strong>Chapter <a href="c-tables.html#c-tables">4</a></strong>: Inference for associations between
+categorical variables.</p></li>
+<li><p><strong>Chapter <a href="c-probs.html#c-probs">5</a></strong>: Inference for single dichotomous
+variables, and for associations between a dichotomous explanatory
+variable and a dichotomous response variable.</p></li>
+<li><p><strong>Chapter <a href="c-contd.html#c-contd">6</a></strong>: More general concepts of
+statistical inference.</p></li>
+<li><p><strong>Chapter <a href="c-means.html#c-means">7</a></strong>: Description and inference for
+associations between a dichotomous explanatory variable and a
+continuous response variable, and inference for single
+continuous variables.</p></li>
+<li><p><strong>Chapter <a href="c-regression.html#c-regression">8</a></strong>: Description and inference for
+associations between any kinds of explanatory variables and a
+continuous response variable.</p></li>
+<li><p><strong>Chapter <a href="c-3waytables.html#c-3waytables">9</a></strong>: Some additional comments on analyses
+which involve three or more categorical variables.</p></li>
+</ul>
+<p>As well as in Chapters <a href="c-samples.html#c-samples">3</a> and <a href="c-contd.html#c-contd">6</a>, general
+concepts of statistical inference are also gradually introduced in
+Chapters <a href="c-tables.html#c-tables">4</a>, <a href="c-probs.html#c-probs">5</a> and <a href="c-means.html#c-means">7</a>, initially in
+the context of the specific analyses considered in these chapters.</p>
+</div>
+<div id="s-intro-maths" class="section level2 hasAnchor">
+<h2><span class="header-section-number">1.4</span> The use of mathematics and computing<a href="c-intro.html#s-intro-maths" class="anchor-section" aria-label="Anchor link to header"></a></h2>
+<p>Many of you will approach this course with some reluctance and
+uncertainty, even anxiety. Often this is because of fears about
+mathematics, which may be something you never liked or never learned
+that well. Statistics does indeed involve a lot of mathematics in both
+its algebraic (symbolical) and arithmetic (numerical) senses. However,
+the understanding and use of statistical concepts and methods can be
+usefully taught and learned even without most of that mathematics, and
+that is what we hope to do on this course. It is perfectly possible to
+do well on the course without being at all good at mathematics of the
+secondary school kind.</p>
+<div id="symbolic-mathematics-and-mathematical-notation" class="section level3 hasAnchor">
+<h3><span class="header-section-number">1.4.1</span> Symbolic mathematics and mathematical notation<a href="c-intro.html#symbolic-mathematics-and-mathematical-notation" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>Statistics <em>is</em> a mathematical subject in that its concepts and methods
+are expressed using mathematical formalism, and grounded in a branch of
+mathematics known as probability theory. As a result, heavy use of
+mathematics is essential for those who develop these methods
+(i.e. statisticians). However, those who only <em>use</em> them (i.e. you) can
+ignore most of it and still gain a solid and non-trivialised
+understanding of the methods. We will thus be able to omit most of the
+mathematical details. In particular, we will not show you how the
+methods are derived or prove theorems about them, nor do we expect you
+to do anything like that.</p>
+<p>We will, however, use mathematical notation whenever necessary to state
+the main results and to define the methods used. This is because
+mathematics is the language in which many of these results are easiest
+to express clearly and accurately, and trying to avoid all mathematical
+notation would be contrived and unhelpful. Most of the notation is
+fairly simple and will be explained in detail. We will also interpret
+such formulas in English as well to draw attention to their most
+important features.</p>
+<p>Another way of explaining statistical methods is through applied
+examples. These will be used throughout the course. Most of them are
+drawn from real data from research in a range social of social sciences.
+If you wish to find further examples of how these methods are used in
+your own discipline, a good place to start is in relevant books and
+research journals.</p>
+</div>
+<div id="computing-1" class="section level3 hasAnchor">
+<h3><span class="header-section-number">1.4.2</span> Computing<a href="c-intro.html#computing-1" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>Statistical analysis involves also a lot of mathematics of the numerical
+kind, i.e. various calculations on the numbers in the data. Doing such
+calculations by hand or with a pocket calculator would be tedious and
+unenlightening, and in any case impossible for all but the smallest
+samples and simplest methods. We will mostly avoid doing that by leaving
+the drudgery of calculation to computers, where the methods are
+implemented in statistical software packages. This also means that you
+can carry out the analyses without understanding all the numerical
+details of the calculations. Instead, we can focus on trying to
+understand when and why certain methods of analysis are used, and
+learning to interpret their results.</p>
+<p>A simple pocket calculator is still more convenient than a computer for
+some very simple calculations. You will also need one for this purpose
+in the examination, where computers are not allowed. Any such
+calculations required in the examination will be extremely simple to do
+(assuming you know what you are trying to do, of course). For more
+complex analyses, the exam questions will involve interpreting computer
+output rather than carrying out the calculations. The homework questions
+that follow the computer classes contain examples of both of these types
+of questions.</p>
+<p>The software package used in the computer classes of this course is
+called R/RStudio. There are other statistics packages, for example SAS,
+Minitab, Stata and SPSS. Any one of them could be used for the analyses on
+this course, and the exact choice does not matter very much. R/RStudio is
+convenient for our purposes, because it is free, open source, widely used, has a reasonably
+user-friendly menu interface, and is capable of powerful extenstions for those
+who wish to pursue things further.</p>
+<p>Sometimes you may see a phrase such as “R Course” used apparently as
+a synonym for “Statistics course”. This makes as little sense as
+treating an introduction to Microsoft Word as a course on how to write
+good English. It is not possible to learn quantitative data analysis
+well by just sitting down in front of R or any other statistics
+package and trying to figure out what all those menus are for. On the
+other hand, using R/RStudio to apply statistical methods to analyse real data
+is an effective way of strengthening the understanding of those methods
+<em>after</em> they have first been introduced in lectures. That is why this
+course has weekly computer classes.</p>
+<p>The software-specific questions on how to carry out statistical analyses
+are typically of a lesser order of difficulty once the methods
+themselves are reasonably well understood. In other words, once you have
+a clear idea of what you want to do, finding out how to do it in R/RStudio
+tends not to be that difficult. For example, in the next chapter we will
+discuss the mean, one simple tool of descriptive statistics. Suppose
+that you then want to calculate the mean of a variable called <em>Age</em> in a
+data set. Learning how to do this in R is then a matter of (1)
+finding a package and command to calculate a mean and (2) finding the part of the R
+output where the calculated mean of <em>Age</em> is reported. Instructions for
+steps like this for techniques covered on this course are given in the
+descriptions of the corresponding computer classes.</p>
+<p>There are, however, some tasks which have more to do with specific
+software packages than with statistics in general. For example, the fact
+that R/RStudio uses text commands rather than drop down menus , and the
+general style of those menus,need to be understood first. You also need to
+learn how to get data into R in the first place, how to manipulate the data in various ways, and
+how to export output from the analyses to other packages. Some
+instructions on how to do such things are given in the first computer
+class. The introduction to the computer classes also includes details of some SPSS guidebooks and
+other sources of information which you may find useful if you want to
+know more about the program.</p>
+
+</div>
+</div>
+</div>
+<div class="footnotes">
+<hr />
+<ol start="1">
+<li id="fn1"><p>For example, suppose we collected data on the number of traffic
+accidents on each of a sample of streets in a week, and suppose that
+the only numbers observed were 0, 1, 2, and 3. Other, even much
+larger values were clearly at least logically possible, but they
+just did not occur. Of course, redefining the largest value as “3 or
+more” would turn the variable into an unambiguously categorical one.<a href="c-intro.html#fnref1" class="footnote-back">↩</a></p></li>
+</ol>
+</div>
+            </section>
+
+          </div>
+        </div>
+      </div>
+<a href="index.html" class="navigation navigation-prev " aria-label="Previous page"><i class="fa fa-angle-left"></i></a>
+<a href="c-descr1.html" class="navigation navigation-next " aria-label="Next page"><i class="fa fa-angle-right"></i></a>
+    </div>
+  </div>
+<script src="libs/gitbook-2.6.7/js/app.min.js"></script>
+<script src="libs/gitbook-2.6.7/js/clipboard.min.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-search.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-sharing.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-fontsettings.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-bookdown.js"></script>
+<script src="libs/gitbook-2.6.7/js/jquery.highlight.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-clipboard.js"></script>
+<script>
+gitbook.require(["gitbook"], function(gitbook) {
+gitbook.start({
+"sharing": {
+"github": false,
+"facebook": true,
+"twitter": true,
+"linkedin": false,
+"weibo": false,
+"instapaper": false,
+"vk": false,
+"whatsapp": false,
+"all": ["facebook", "twitter", "linkedin", "weibo", "instapaper"]
+},
+"fontsettings": {
+"theme": "white",
+"family": "sans",
+"size": 2
+},
+"edit": {
+"link": "https://github.com/LSE-Methodology/MY464/edit/master/01-MY464-intro.Rmd",
+"text": "Edit"
+},
+"history": {
+"link": null,
+"text": null
+},
+"view": {
+"link": null,
+"text": null
+},
+"download": ["Coursepack-MY464.pdf", "Coursepack-MY464.epub"],
+"search": {
+"engine": "fuse",
+"options": null
+},
+"toc": {
+"collapse": "section"
+}
+});
+});
+</script>
+
+<!-- dynamically load mathjax for compatibility with self-contained -->
+<script>
+  (function () {
+    var script = document.createElement("script");
+    script.type = "text/javascript";
+    var src = "true";
+    if (src === "" || src === "true") src = "https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.9/latest.js?config=TeX-MML-AM_CHTML";
+    if (location.protocol !== "file:")
+      if (/^https?:/.test(src))
+        src = src.replace(/^https?:/, '');
+    script.src = src;
+    document.getElementsByTagName("head")[0].appendChild(script);
+  })();
+</script>
+</body>
+
+</html>
diff --git a/c-means.html b/c-means.html
new file mode 100644
index 0000000..ddafcc6
--- /dev/null
+++ b/c-means.html
@@ -0,0 +1,2356 @@
+<!DOCTYPE html>
+<html lang="" xml:lang="">
+<head>
+
+  <meta charset="utf-8" />
+  <meta http-equiv="X-UA-Compatible" content="IE=edge" />
+  <title>Chapter 7 Analysis of population means | MY464 Introduction to Quantitative Analysis for Media and Communications</title>
+  <meta name="description" content="This course is intended for those with little or no past training in quantitative methods. The course is an intensive introduction to some of the principles and methods of statistical analysis in social research. Topics covered in MY464 include descriptive statistics, basic ideas of inference and estimation, contingency tables and an introduction to linear regression models." />
+  <meta name="generator" content="bookdown 0.39 and GitBook 2.6.7" />
+
+  <meta property="og:title" content="Chapter 7 Analysis of population means | MY464 Introduction to Quantitative Analysis for Media and Communications" />
+  <meta property="og:type" content="book" />
+  
+  <meta property="og:description" content="This course is intended for those with little or no past training in quantitative methods. The course is an intensive introduction to some of the principles and methods of statistical analysis in social research. Topics covered in MY464 include descriptive statistics, basic ideas of inference and estimation, contingency tables and an introduction to linear regression models." />
+  <meta name="github-repo" content="LSE-Methodology/MY464" />
+
+  <meta name="twitter:card" content="summary" />
+  <meta name="twitter:title" content="Chapter 7 Analysis of population means | MY464 Introduction to Quantitative Analysis for Media and Communications" />
+  
+  <meta name="twitter:description" content="This course is intended for those with little or no past training in quantitative methods. The course is an intensive introduction to some of the principles and methods of statistical analysis in social research. Topics covered in MY464 include descriptive statistics, basic ideas of inference and estimation, contingency tables and an introduction to linear regression models." />
+  
+
+<meta name="author" content="Department of Methodology, London School of Economics and Political Science" />
+
+
+
+  <meta name="viewport" content="width=device-width, initial-scale=1" />
+  <meta name="apple-mobile-web-app-capable" content="yes" />
+  <meta name="apple-mobile-web-app-status-bar-style" content="black" />
+  
+  
+<link rel="prev" href="c-contd.html"/>
+<link rel="next" href="c-regression.html"/>
+<script src="libs/jquery-3.6.0/jquery-3.6.0.min.js"></script>
+<script src="https://cdn.jsdelivr.net/npm/fuse.js@6.4.6/dist/fuse.min.js"></script>
+<link href="libs/gitbook-2.6.7/css/style.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-table.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-bookdown.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-highlight.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-search.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-fontsettings.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-clipboard.css" rel="stylesheet" />
+
+
+
+
+
+
+
+
+<link href="libs/anchor-sections-1.1.0/anchor-sections.css" rel="stylesheet" />
+<link href="libs/anchor-sections-1.1.0/anchor-sections-hash.css" rel="stylesheet" />
+<script src="libs/anchor-sections-1.1.0/anchor-sections.js"></script>
+
+
+
+<style type="text/css">
+  
+  div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
+</style>
+
+<link rel="stylesheet" href="style.css" type="text/css" />
+</head>
+
+<body>
+
+
+
+  <div class="book without-animation with-summary font-size-2 font-family-1" data-basepath=".">
+
+    <div class="book-summary">
+      <nav role="navigation">
+
+<ul class="summary">
+<li><a href="./">MY464 Introduction to Quantitative Analysis for Media and Communications</a></li>
+
+<li class="divider"></li>
+<li class="chapter" data-level="" data-path="index.html"><a href="index.html"><i class="fa fa-check"></i>Course information<a href="index.html#course-information" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1" data-path="c-intro.html"><a href="c-intro.html"><i class="fa fa-check"></i><b>1</b> Introduction<a href="c-intro.html#c-intro" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="1.1" data-path="c-intro.html"><a href="c-intro.html#s-intro-purpose"><i class="fa fa-check"></i><b>1.1</b> What is the purpose of this course?<a href="c-intro.html#s-intro-purpose" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.2" data-path="c-intro.html"><a href="c-intro.html#s-intro-definitions"><i class="fa fa-check"></i><b>1.2</b> Some basic definitions<a href="c-intro.html#s-intro-definitions" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="1.2.1" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-subj"><i class="fa fa-check"></i><b>1.2.1</b> Subjects and variables<a href="c-intro.html#ss-intro-def-subj" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.2.2" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-vartypes"><i class="fa fa-check"></i><b>1.2.2</b> Types of variables<a href="c-intro.html#ss-intro-def-vartypes" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.2.3" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-descr"><i class="fa fa-check"></i><b>1.2.3</b> Description and inference<a href="c-intro.html#ss-intro-def-descr" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.2.4" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-assoc"><i class="fa fa-check"></i><b>1.2.4</b> Association and causation<a href="c-intro.html#ss-intro-def-assoc" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="1.3" data-path="c-intro.html"><a href="c-intro.html#s-intro-outline"><i class="fa fa-check"></i><b>1.3</b> Outline of the course<a href="c-intro.html#s-intro-outline" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.4" data-path="c-intro.html"><a href="c-intro.html#s-intro-maths"><i class="fa fa-check"></i><b>1.4</b> The use of mathematics and computing<a href="c-intro.html#s-intro-maths" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="1.4.1" data-path="c-intro.html"><a href="c-intro.html#symbolic-mathematics-and-mathematical-notation"><i class="fa fa-check"></i><b>1.4.1</b> Symbolic mathematics and mathematical notation<a href="c-intro.html#symbolic-mathematics-and-mathematical-notation" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.4.2" data-path="c-intro.html"><a href="c-intro.html#computing-1"><i class="fa fa-check"></i><b>1.4.2</b> Computing<a href="c-intro.html#computing-1" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="2" data-path="c-descr1.html"><a href="c-descr1.html"><i class="fa fa-check"></i><b>2</b> Descriptive statistics<a href="c-descr1.html#c-descr1" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.1" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-intro"><i class="fa fa-check"></i><b>2.1</b> Introduction<a href="c-descr1.html#s-descr1-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.2" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-examples"><i class="fa fa-check"></i><b>2.2</b> Example data sets<a href="c-descr1.html#s-descr1-examples" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.3" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-1cat"><i class="fa fa-check"></i><b>2.3</b> Single categorical variable<a href="c-descr1.html#s-descr1-1cat" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.3.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-distr"><i class="fa fa-check"></i><b>2.3.1</b> Describing the sample distribution<a href="c-descr1.html#ss-descr1-1cat-distr" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.3.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-tables"><i class="fa fa-check"></i><b>2.3.2</b> Tabular methods: Tables of frequencies<a href="c-descr1.html#ss-descr1-1cat-tables" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.3.3" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-charts"><i class="fa fa-check"></i><b>2.3.3</b> Graphical methods: Bar charts<a href="c-descr1.html#ss-descr1-1cat-charts" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.3.4" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-descriptives"><i class="fa fa-check"></i><b>2.3.4</b> Simple descriptive statistics<a href="c-descr1.html#ss-descr1-1cat-descriptives" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="2.4" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-2cat"><i class="fa fa-check"></i><b>2.4</b> Two categorical variables<a href="c-descr1.html#s-descr1-2cat" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.4.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-tables"><i class="fa fa-check"></i><b>2.4.1</b> Two-way contingency tables<a href="c-descr1.html#ss-descr1-2cat-tables" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.4.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-cond"><i class="fa fa-check"></i><b>2.4.2</b> Conditional proportions<a href="c-descr1.html#ss-descr1-2cat-cond" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.4.3" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-assoc"><i class="fa fa-check"></i><b>2.4.3</b> Conditional distributions and associations<a href="c-descr1.html#ss-descr1-2cat-assoc" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.4.4" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-descr"><i class="fa fa-check"></i><b>2.4.4</b> Describing an association using conditional proportions<a href="c-descr1.html#ss-descr1-2cat-descr" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.4.5" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-gamma"><i class="fa fa-check"></i><b>2.4.5</b> A measure of association for ordinal variables<a href="c-descr1.html#ss-descr1-2cat-gamma" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="2.5" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-1cont"><i class="fa fa-check"></i><b>2.5</b> Sample distributions of a single continuous variable<a href="c-descr1.html#s-descr1-1cont" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.5.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cont-tab"><i class="fa fa-check"></i><b>2.5.1</b> Tabular methods<a href="c-descr1.html#ss-descr1-1cont-tab" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.5.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cont-graphs"><i class="fa fa-check"></i><b>2.5.2</b> Graphical methods<a href="c-descr1.html#ss-descr1-1cont-graphs" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="2.6" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-nums"><i class="fa fa-check"></i><b>2.6</b> Numerical descriptive statistics<a href="c-descr1.html#s-descr1-nums" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.6.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-nums-central"><i class="fa fa-check"></i><b>2.6.1</b> Measures of central tendency<a href="c-descr1.html#ss-descr1-nums-central" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.6.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-nums-variation"><i class="fa fa-check"></i><b>2.6.2</b> Measures of variation<a href="c-descr1.html#ss-descr1-nums-variation" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="2.7" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-2cont"><i class="fa fa-check"></i><b>2.7</b> Associations which involve continuous variables<a href="c-descr1.html#s-descr1-2cont" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.8" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-presentation"><i class="fa fa-check"></i><b>2.8</b> Presentation of tables and graphs<a href="c-descr1.html#s-descr1-presentation" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.9" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-app"><i class="fa fa-check"></i><b>2.9</b> Appendix: Country data<a href="c-descr1.html#s-descr1-app" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="3" data-path="c-samples.html"><a href="c-samples.html"><i class="fa fa-check"></i><b>3</b> Samples and populations<a href="c-samples.html#c-samples" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="3.1" data-path="c-samples.html"><a href="c-samples.html#s-samples-intro"><i class="fa fa-check"></i><b>3.1</b> Introduction<a href="c-samples.html#s-samples-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.2" data-path="c-samples.html"><a href="c-samples.html#s-samples-finpops"><i class="fa fa-check"></i><b>3.2</b> Finite populations<a href="c-samples.html#s-samples-finpops" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.3" data-path="c-samples.html"><a href="c-samples.html#s-samples-samples"><i class="fa fa-check"></i><b>3.3</b> Samples from finite populations<a href="c-samples.html#s-samples-samples" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.4" data-path="c-samples.html"><a href="c-samples.html#s-samples-infpops"><i class="fa fa-check"></i><b>3.4</b> Conceptual and infinite populations<a href="c-samples.html#s-samples-infpops" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.5" data-path="c-samples.html"><a href="c-samples.html#s-samples-popdistrs"><i class="fa fa-check"></i><b>3.5</b> Population distributions<a href="c-samples.html#s-samples-popdistrs" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.6" data-path="c-samples.html"><a href="c-samples.html#s-samples-inference"><i class="fa fa-check"></i><b>3.6</b> Need for statistical inference<a href="c-samples.html#s-samples-inference" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="4" data-path="c-tables.html"><a href="c-tables.html"><i class="fa fa-check"></i><b>4</b> Statistical inference for two-way tables<a href="c-tables.html#c-tables" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="4.1" data-path="c-tables.html"><a href="c-tables.html#s-tables-intro"><i class="fa fa-check"></i><b>4.1</b> Introduction<a href="c-tables.html#s-tables-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.2" data-path="c-tables.html"><a href="c-tables.html#s-tables-tests"><i class="fa fa-check"></i><b>4.2</b> Significance tests<a href="c-tables.html#s-tables-tests" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3" data-path="c-tables.html"><a href="c-tables.html#s-tables-chi2test"><i class="fa fa-check"></i><b>4.3</b> The chi-square test of independence<a href="c-tables.html#s-tables-chi2test" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="4.3.1" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-null"><i class="fa fa-check"></i><b>4.3.1</b> Hypotheses<a href="c-tables.html#ss-tables-chi2test-null" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.2" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-ass"><i class="fa fa-check"></i><b>4.3.2</b> Assumptions of a significance test<a href="c-tables.html#ss-tables-chi2test-ass" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.3" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-stat"><i class="fa fa-check"></i><b>4.3.3</b> The test statistic<a href="c-tables.html#ss-tables-chi2test-stat" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.4" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-sdist"><i class="fa fa-check"></i><b>4.3.4</b> The sampling distribution of the test statistic<a href="c-tables.html#ss-tables-chi2test-sdist" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.5" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-Pval"><i class="fa fa-check"></i><b>4.3.5</b> The P-value<a href="c-tables.html#ss-tables-chi2test-Pval" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.6" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-conclusions"><i class="fa fa-check"></i><b>4.3.6</b> Drawing conclusions from a test<a href="c-tables.html#ss-tables-chi2test-conclusions" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="4.4" data-path="c-tables.html"><a href="c-tables.html#s-tables-summary"><i class="fa fa-check"></i><b>4.4</b> Summary of the chi-square test of independence<a href="c-tables.html#s-tables-summary" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="5" data-path="c-probs.html"><a href="c-probs.html"><i class="fa fa-check"></i><b>5</b> Inference for population proportions<a href="c-probs.html#c-probs" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="5.1" data-path="c-probs.html"><a href="c-probs.html#s-probs-intro"><i class="fa fa-check"></i><b>5.1</b> Introduction<a href="c-probs.html#s-probs-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.2" data-path="c-probs.html"><a href="c-probs.html#s-probs-examples"><i class="fa fa-check"></i><b>5.2</b> Examples<a href="c-probs.html#s-probs-examples" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.3" data-path="c-probs.html"><a href="c-probs.html#s-probs-distribution"><i class="fa fa-check"></i><b>5.3</b> Probability distribution of a dichotomous variable<a href="c-probs.html#s-probs-distribution" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.4" data-path="c-probs.html"><a href="c-probs.html#s-probs-pointest"><i class="fa fa-check"></i><b>5.4</b> Point estimation of a population probability<a href="c-probs.html#s-probs-pointest" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5" data-path="c-probs.html"><a href="c-probs.html#s-probs-test1sample"><i class="fa fa-check"></i><b>5.5</b> Significance test of a single proportion<a href="c-probs.html#s-probs-test1sample" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="5.5.1" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-hypotheses"><i class="fa fa-check"></i><b>5.5.1</b> Null and alternative hypotheses<a href="c-probs.html#ss-probs-test1sample-hypotheses" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5.2" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-teststatistic"><i class="fa fa-check"></i><b>5.5.2</b> The test statistic<a href="c-probs.html#ss-probs-test1sample-teststatistic" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5.3" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-samplingd"><i class="fa fa-check"></i><b>5.5.3</b> The sampling distribution of the test statistic and P-values<a href="c-probs.html#ss-probs-test1sample-samplingd" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5.4" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-conclusions"><i class="fa fa-check"></i><b>5.5.4</b> Conclusions from the test<a href="c-probs.html#ss-probs-test1sample-conclusions" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5.5" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-summary"><i class="fa fa-check"></i><b>5.5.5</b> Summary of the test<a href="c-probs.html#ss-probs-test1sample-summary" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="5.6" data-path="c-probs.html"><a href="c-probs.html#s-probs-1sampleci"><i class="fa fa-check"></i><b>5.6</b> Confidence interval for a single proportion<a href="c-probs.html#s-probs-1sampleci" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="5.6.1" data-path="c-probs.html"><a href="c-probs.html#s-probs-1sampleci-intro"><i class="fa fa-check"></i><b>5.6.1</b> Introduction<a href="c-probs.html#s-probs-1sampleci-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.6.2" data-path="c-probs.html"><a href="c-probs.html#s-probs-1sampleci-calc"><i class="fa fa-check"></i><b>5.6.2</b> Calculation of the interval<a href="c-probs.html#s-probs-1sampleci-calc" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.6.3" data-path="c-probs.html"><a href="c-probs.html#s-probs-1sampleci-int"><i class="fa fa-check"></i><b>5.6.3</b> Interpretation of confidence intervals<a href="c-probs.html#s-probs-1sampleci-int" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.6.4" data-path="c-probs.html"><a href="c-probs.html#ss-means-ci-vstests"><i class="fa fa-check"></i><b>5.6.4</b> Confidence intervals vs. significance tests<a href="c-probs.html#ss-means-ci-vstests" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="5.7" data-path="c-probs.html"><a href="c-probs.html#s-probs-2samples"><i class="fa fa-check"></i><b>5.7</b> Inference for comparing two proportions<a href="c-probs.html#s-probs-2samples" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="6" data-path="c-contd.html"><a href="c-contd.html"><i class="fa fa-check"></i><b>6</b> Continuous variables: Population and sampling distributions<a href="c-contd.html#c-contd" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="6.1" data-path="c-contd.html"><a href="c-contd.html#s-contd-intro"><i class="fa fa-check"></i><b>6.1</b> Introduction<a href="c-contd.html#s-contd-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="6.2" data-path="c-contd.html"><a href="c-contd.html#s-contd-popdistrs"><i class="fa fa-check"></i><b>6.2</b> Population distributions of continuous variables<a href="c-contd.html#s-contd-popdistrs" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="6.2.1" data-path="c-contd.html"><a href="c-contd.html#ss-contd-popdistrs-params"><i class="fa fa-check"></i><b>6.2.1</b> Population parameters and their point estimates<a href="c-contd.html#ss-contd-popdistrs-params" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="6.3" data-path="c-contd.html"><a href="c-contd.html#s-contd-probdistrs"><i class="fa fa-check"></i><b>6.3</b> Probability distributions of continuous variables<a href="c-contd.html#s-contd-probdistrs" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="6.3.1" data-path="c-contd.html"><a href="c-contd.html#ss-contd-probdistrs-general"><i class="fa fa-check"></i><b>6.3.1</b> General comments<a href="c-contd.html#ss-contd-probdistrs-general" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="6.3.2" data-path="c-contd.html"><a href="c-contd.html#ss-contd-probdistrs-normal"><i class="fa fa-check"></i><b>6.3.2</b> The normal distribution as a population distribution<a href="c-contd.html#ss-contd-probdistrs-normal" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="6.4" data-path="c-contd.html"><a href="c-contd.html#s-contd-clt"><i class="fa fa-check"></i><b>6.4</b> The normal distribution as a sampling distribution<a href="c-contd.html#s-contd-clt" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="7" data-path="c-means.html"><a href="c-means.html"><i class="fa fa-check"></i><b>7</b> Analysis of population means<a href="c-means.html#c-means" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="7.1" data-path="c-means.html"><a href="c-means.html#s-means-intro"><i class="fa fa-check"></i><b>7.1</b> Introduction and examples<a href="c-means.html#s-means-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.2" data-path="c-means.html"><a href="c-means.html#s-means-descr"><i class="fa fa-check"></i><b>7.2</b> Descriptive statistics for comparisons of groups<a href="c-means.html#s-means-descr" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="7.2.1" data-path="c-means.html"><a href="c-means.html#ss-means-descr-graphs"><i class="fa fa-check"></i><b>7.2.1</b> Graphical methods of comparing sample distributions<a href="c-means.html#ss-means-descr-graphs" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.2.2" data-path="c-means.html"><a href="c-means.html#ss-means-descr-tables"><i class="fa fa-check"></i><b>7.2.2</b> Comparing summary statistics<a href="c-means.html#ss-means-descr-tables" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="7.3" data-path="c-means.html"><a href="c-means.html#s-means-inference"><i class="fa fa-check"></i><b>7.3</b> Inference for two means from independent samples<a href="c-means.html#s-means-inference" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="7.3.1" data-path="c-means.html"><a href="c-means.html#ss-means-inference-intro"><i class="fa fa-check"></i><b>7.3.1</b> Aims of the analysis<a href="c-means.html#ss-means-inference-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.3.2" data-path="c-means.html"><a href="c-means.html#ss-means-inference-test"><i class="fa fa-check"></i><b>7.3.2</b> Significance testing: The two-sample t-test<a href="c-means.html#ss-means-inference-test" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.3.3" data-path="c-means.html"><a href="c-means.html#ss-means-inference-ci"><i class="fa fa-check"></i><b>7.3.3</b> Confidence intervals for a difference of two means<a href="c-means.html#ss-means-inference-ci" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.3.4" data-path="c-means.html"><a href="c-means.html#ss-means-inference-variants"><i class="fa fa-check"></i><b>7.3.4</b> Variants of the test and confidence interval<a href="c-means.html#ss-means-inference-variants" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="7.4" data-path="c-means.html"><a href="c-means.html#s-means-1sample"><i class="fa fa-check"></i><b>7.4</b> Tests and confidence intervals for a single mean<a href="c-means.html#s-means-1sample" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.5" data-path="c-means.html"><a href="c-means.html#s-means-dependent"><i class="fa fa-check"></i><b>7.5</b> Inference for dependent samples<a href="c-means.html#s-means-dependent" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.6" data-path="c-means.html"><a href="c-means.html#s-means-tests3"><i class="fa fa-check"></i><b>7.6</b> Further comments on significance tests<a href="c-means.html#s-means-tests3" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="7.6.1" data-path="c-means.html"><a href="c-means.html#ss-means-tests3-errors"><i class="fa fa-check"></i><b>7.6.1</b> Different types of error<a href="c-means.html#ss-means-tests3-errors" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.6.2" data-path="c-means.html"><a href="c-means.html#ss-means-tests3-power"><i class="fa fa-check"></i><b>7.6.2</b> Power of significance tests<a href="c-means.html#ss-means-tests3-power" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.6.3" data-path="c-means.html"><a href="c-means.html#ss-means-tests3-importance"><i class="fa fa-check"></i><b>7.6.3</b> Significance vs. importance<a href="c-means.html#ss-means-tests3-importance" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="8" data-path="c-regression.html"><a href="c-regression.html"><i class="fa fa-check"></i><b>8</b> Linear regression models<a href="c-regression.html#c-regression" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.1" data-path="c-regression.html"><a href="c-regression.html#s-regression-intro"><i class="fa fa-check"></i><b>8.1</b> Introduction<a href="c-regression.html#s-regression-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.2" data-path="c-regression.html"><a href="c-regression.html#s-regression-descr"><i class="fa fa-check"></i><b>8.2</b> Describing association between two continuous variables<a href="c-regression.html#s-regression-descr" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.2.1" data-path="c-regression.html"><a href="c-regression.html#ss-regression-descr-intro"><i class="fa fa-check"></i><b>8.2.1</b> Introduction<a href="c-regression.html#ss-regression-descr-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.2.2" data-path="c-regression.html"><a href="c-regression.html#ss-regression-descr-plots"><i class="fa fa-check"></i><b>8.2.2</b> Graphical methods<a href="c-regression.html#ss-regression-descr-plots" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.2.3" data-path="c-regression.html"><a href="c-regression.html#ss-regression-descr-assoc"><i class="fa fa-check"></i><b>8.2.3</b> Linear associations<a href="c-regression.html#ss-regression-descr-assoc" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.2.4" data-path="c-regression.html"><a href="c-regression.html#ss-regression-descr-corr"><i class="fa fa-check"></i><b>8.2.4</b> Measures of association: covariance and correlation<a href="c-regression.html#ss-regression-descr-corr" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="8.3" data-path="c-regression.html"><a href="c-regression.html#s-regression-simple"><i class="fa fa-check"></i><b>8.3</b> Simple linear regression models<a href="c-regression.html#s-regression-simple" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.3.1" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-intro"><i class="fa fa-check"></i><b>8.3.1</b> Introduction<a href="c-regression.html#ss-regression-simple-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.3.2" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-def"><i class="fa fa-check"></i><b>8.3.2</b> Definition of the model<a href="c-regression.html#ss-regression-simple-def" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.3.3" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-int"><i class="fa fa-check"></i><b>8.3.3</b> Interpretation of the model parameters<a href="c-regression.html#ss-regression-simple-int" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.3.4" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-est"><i class="fa fa-check"></i><b>8.3.4</b> Estimation of the parameters<a href="c-regression.html#ss-regression-simple-est" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.3.5" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-inf"><i class="fa fa-check"></i><b>8.3.5</b> Statistical inference for the regression coefficients<a href="c-regression.html#ss-regression-simple-inf" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="8.4" data-path="c-regression.html"><a href="c-regression.html#s-regression-causality"><i class="fa fa-check"></i><b>8.4</b> Interlude: Association and causality<a href="c-regression.html#s-regression-causality" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.5" data-path="c-regression.html"><a href="c-regression.html#s-regression-multiple"><i class="fa fa-check"></i><b>8.5</b> Multiple linear regression models<a href="c-regression.html#s-regression-multiple" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.5.1" data-path="c-regression.html"><a href="c-regression.html#ss-regression-multiple-intro"><i class="fa fa-check"></i><b>8.5.1</b> Introduction<a href="c-regression.html#ss-regression-multiple-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.5.2" data-path="c-regression.html"><a href="c-regression.html#ss-regression-multiple-def"><i class="fa fa-check"></i><b>8.5.2</b> Definition of the model<a href="c-regression.html#ss-regression-multiple-def" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.5.3" data-path="c-regression.html"><a href="c-regression.html#ss-regression-multiple-unchanged"><i class="fa fa-check"></i><b>8.5.3</b> Unchanged elements from simple linear models<a href="c-regression.html#ss-regression-multiple-unchanged" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.5.4" data-path="c-regression.html"><a href="c-regression.html#ss-regression-multiple-beta"><i class="fa fa-check"></i><b>8.5.4</b> Interpretation and inference for the regression coefficients<a href="c-regression.html#ss-regression-multiple-beta" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="8.6" data-path="c-regression.html"><a href="c-regression.html#s-regression-dummies"><i class="fa fa-check"></i><b>8.6</b> Including categorical explanatory variables<a href="c-regression.html#s-regression-dummies" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.6.1" data-path="c-regression.html"><a href="c-regression.html#ss-regression-dummies-def"><i class="fa fa-check"></i><b>8.6.1</b> Dummy variables<a href="c-regression.html#ss-regression-dummies-def" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.6.2" data-path="c-regression.html"><a href="c-regression.html#ss-regression-dummies-example"><i class="fa fa-check"></i><b>8.6.2</b> A second example<a href="c-regression.html#ss-regression-dummies-example" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="8.7" data-path="c-regression.html"><a href="c-regression.html#s-regression-rest"><i class="fa fa-check"></i><b>8.7</b> Other issues in linear regression modelling<a href="c-regression.html#s-regression-rest" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="9" data-path="c-3waytables.html"><a href="c-3waytables.html"><i class="fa fa-check"></i><b>9</b> Analysis of 3-way contingency tables<a href="c-3waytables.html#c-3waytables" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="10" data-path="c-more.html"><a href="c-more.html"><i class="fa fa-check"></i><b>10</b> More statistics…<a href="c-more.html#c-more" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="" data-path="c-more.html"><a href="c-more.html#statistical-tables"><i class="fa fa-check"></i>Statistical tables<a href="c-more.html#statistical-tables" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="" data-path="c-more.html"><a href="c-more.html#table-of-standard-normal-tail-probabilities"><i class="fa fa-check"></i>Table of standard normal tail probabilities<a href="c-more.html#table-of-standard-normal-tail-probabilities" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="" data-path="c-more.html"><a href="c-more.html#table-of-critical-values-for-t-distributions"><i class="fa fa-check"></i>Table of critical values for t-distributions<a href="c-more.html#table-of-critical-values-for-t-distributions" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="" data-path="c-more.html"><a href="c-more.html#table-of-critical-values-for-chi-square-distributions"><i class="fa fa-check"></i>Table of critical values for chi-square distributions<a href="c-more.html#table-of-critical-values-for-chi-square-distributions" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+</ul></li>
+<li class="divider"></li>
+<li><a href="https://github.com/rstudio/bookdown">Published with bookdown</a></li>
+
+</ul>
+
+      </nav>
+    </div>
+
+    <div class="book-body">
+      <div class="body-inner">
+        <div class="book-header" role="navigation">
+          <h1>
+            <i class="fa fa-circle-o-notch fa-spin"></i><a href="./">MY464 Introduction to Quantitative Analysis for Media and Communications</a>
+          </h1>
+        </div>
+
+        <div class="page-wrapper" tabindex="-1" role="main">
+          <div class="page-inner">
+
+            <section class="normal" id="section-">
+<div id="c-means" class="section level1 hasAnchor">
+<h1><span class="header-section-number">Chapter 7</span> Analysis of population means<a href="c-means.html#c-means" class="anchor-section" aria-label="Anchor link to header"></a></h1>
+<div id="s-means-intro" class="section level2 hasAnchor">
+<h2><span class="header-section-number">7.1</span> Introduction and examples<a href="c-means.html#s-means-intro" class="anchor-section" aria-label="Anchor link to header"></a></h2>
+<p>This chapter introduces some basic methods of analysis for continuous,
+interval-level variables. The main focus is on statistical inference on
+population <em>means</em> of such variables, but some new methods of
+descriptive statistics are also described. The discussion draws on the
+general ideas that have already been explaned for inference in Chapters
+<a href="c-tables.html#c-tables">4</a> and <a href="c-probs.html#c-probs">5</a>, and for continuous distributions in
+Chapter <a href="c-contd.html#c-contd">6</a>. Few if any new concepts thus need to be
+introduced here. Instead, this chapter can focus on describing the
+specifics of these very commonly used methods for continuous variables.</p>
+<p>As in Chapter <a href="c-probs.html#c-probs">5</a>, questions on both a single group and on
+comparisons between two groups are discussed here. Now, however, the
+main focus is on the two-group case. There we treat the group as the
+explanatory variable <span class="math inline">\(X\)</span> and the continuous variable of interest as the
+response variable <span class="math inline">\(Y\)</span>, and assess the possible associations between <span class="math inline">\(X\)</span>
+and <span class="math inline">\(Y\)</span> by comparing the distributions (and especially the means) of <span class="math inline">\(Y\)</span>
+in the two groups.</p>
+<p>The following five examples will be used for illustration throughout
+this chapter. Summary statistics for them are shown in Table
+<a href="#tab:t-groupex">7.1</a>.</p>
+<p><strong>Example 7.1: Survey data on diet</strong></p>
+<p>The National Diet and Nutrition Survey of adults aged 19–64 living in
+private households in Great Britain was carried out in 2000–01.<a href="#fn28" class="footnote-ref" id="fnref28"><sup>28</sup></a> One
+part of the survey was a food diary where the respondents recorded all
+food and drink they consumed in a seven-day period. We consider two
+variables derived from the diary: the consumption of fruit and
+vegetables in portions (of 400g) per day (with mean in the sample of
+size <span class="math inline">\(n=1724\)</span> of <span class="math inline">\(\bar{Y}=2.8\)</span>, and standard deviation <span class="math inline">\(s=2.15\)</span>), and
+the percentage of daily food energy intake obtained from fat and fatty
+acids (<span class="math inline">\(n=1724\)</span>, <span class="math inline">\(\bar{Y}=35.3\)</span>, and <span class="math inline">\(s=6.11\)</span>).</p>
+<table style="width:100%;">
+<colgroup>
+<col width="63%" />
+<col width="7%" />
+<col width="11%" />
+<col width="7%" />
+<col width="9%" />
+</colgroup>
+<thead>
+<tr class="header">
+<th align="left"><strong>One sample</strong></th>
+<th align="right"><span class="math inline">\(n\)</span></th>
+<th align="right"><span class="math inline">\(\bar{Y}\)</span></th>
+<th align="right"><span class="math inline">\(s\)</span></th>
+<th align="right">Diff.</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="left"><em>Example 7.1: Variables from the National Diet and Nutrition Survey</em></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+</tr>
+<tr class="even">
+<td align="left">  Fruit and vegetable consumption (400g portions)</td>
+<td align="right">1724</td>
+<td align="right">2.8</td>
+<td align="right">2.15</td>
+<td align="right"></td>
+</tr>
+<tr class="odd">
+<td align="left">  Total energy intake from fat (%)
+</td>
+<td align="right">1724</td>
+<td align="right">35.3</td>
+<td align="right">6.11</td>
+<td align="right"></td>
+</tr>
+<tr class="even">
+<td align="left"><strong>Two independent samples</strong></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+</tr>
+<tr class="odd">
+<td align="left"><em>Example 7.2: Average weekly hours spent on housework</em></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+</tr>
+<tr class="even">
+<td align="left">  Men</td>
+<td align="right">635</td>
+<td align="right">7.33</td>
+<td align="right">5.53</td>
+<td align="right"></td>
+</tr>
+<tr class="odd">
+<td align="left">  Women
+</td>
+<td align="right">469</td>
+<td align="right">8.49</td>
+<td align="right">6.14</td>
+<td align="right">1.16</td>
+</tr>
+<tr class="even">
+<td align="left"><em>Example 7.3: Perceived friendliness of a police officer</em></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+</tr>
+<tr class="odd">
+<td align="left">  No sunglasses</td>
+<td align="right">67</td>
+<td align="right">8.23</td>
+<td align="right">2.39</td>
+<td align="right"></td>
+</tr>
+<tr class="even">
+<td align="left">  Sunglasses
+</td>
+<td align="right">66</td>
+<td align="right">6.49</td>
+<td align="right">2.01</td>
+<td align="right">-1.74</td>
+</tr>
+<tr class="odd">
+<td align="left"><strong>Two dependent samples</strong></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+</tr>
+<tr class="even">
+<td align="left"><em>Example 7.4: Father’s personal well-being</em></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+</tr>
+<tr class="odd">
+<td align="left">  Sixth month of wife’s pregnancy</td>
+<td align="right">109</td>
+<td align="right">30.69</td>
+<td align="right"></td>
+<td align="right"></td>
+</tr>
+<tr class="even">
+<td align="left">  One month after the birth
+</td>
+<td align="right">109</td>
+<td align="right">30.77</td>
+<td align="right">2.58</td>
+<td align="right">0.08</td>
+</tr>
+<tr class="odd">
+<td align="left"><em>Example 7.5: Traffic flows on successive Fridays</em></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+</tr>
+<tr class="even">
+<td align="left">  Friday the 6th</td>
+<td align="right">10</td>
+<td align="right">128,385</td>
+<td align="right"></td>
+<td align="right"></td>
+</tr>
+<tr class="odd">
+<td align="left">  Friday the 13th</td>
+<td align="right">10</td>
+<td align="right">126,550</td>
+<td align="right">1176</td>
+<td align="right">-1835</td>
+</tr>
+</tbody>
+</table>
+<p>:(#tab:t-groupex)Examples of analyses of population means used in Chapter <a href="c-means.html#c-means">7</a>. Here <span class="math inline">\(n\)</span> and <span class="math inline">\(\bar{Y}\)</span> denote the sample size and sample mean respectively, in the two-group examples 7.2–7.5 separately for the two groups. “Diff.” denotes the between-group difference of means, and <span class="math inline">\(s\)</span> is the sample standard deviation of the response variable <span class="math inline">\(Y\)</span> for the whole sample (Example 7.1), of the response variable within each group (Examples 7.2 and 7.3), or of the within-pair differences (Examples 7.4 and 7.5).</p>
+<p><strong>Example 7.2: Housework by men and women</strong></p>
+<p>This example uses data from the 12th wave of the British Household Panel
+Survey (BHPS), collected in 2002. BHPS is an ongoing survey of UK
+households, measuring a range of socioeconomic variables. One of the
+questions in 2002 was</p>
+<p><em>“About how many hours do you spend on housework in an average week,
+such as time spent cooking, cleaning and doing the laundry?”</em></p>
+<p>The response to this question (recorded in whole hours) will be the
+response variable <span class="math inline">\(Y\)</span>, and the respondent’s sex will be the explanatory
+variable <span class="math inline">\(X\)</span>. We consider only those respondents who were less than 65
+years old at the time of the interview and who lived in single-person
+households (thus the comparisons considered here will not involve
+questions of the division of domestic work within families).<a href="#fn29" class="footnote-ref" id="fnref29"><sup>29</sup></a></p>
+<p>We can indicate summary statistics separately for the two groups by
+using subscripts 1 for men and 2 for women (for example). The sample
+sizes are <span class="math inline">\(n_{1}=635\)</span> for men and <span class="math inline">\(n_{2}=469\)</span> for women, and the sample
+means of <span class="math inline">\(Y\)</span> are <span class="math inline">\(\bar{Y}_{1}=7.33\)</span> and <span class="math inline">\(\bar{Y}_{2}=8.49\)</span>. These and
+the sample standard deviations <span class="math inline">\(s_{1}\)</span> and <span class="math inline">\(s_{2}\)</span> are also shown in
+Table <a href="#tab:t-groupex">7.1</a>.</p>
+<p><strong>Example 7.3: Eye contact and perceived friendliness of police officers</strong></p>
+<p>This example is based on an experiment conducted to examine the effects
+of some aspects of the appearance and behaviour of police officers on
+how members of the public perceive their encounters with the police.<a href="#fn30" class="footnote-ref" id="fnref30"><sup>30</sup></a>
+The subjects of the study were 133 people stopped by the Traffic Patrol
+Division of a detachment of the Royal Canadian Mounted Police. When
+talking to the driver who had been stopped, the police officer either
+wore reflective sunglasses which hid his eyes, or wore no glasses at
+all, thus permitting eye contact with the respondent. These two
+conditions define the explanatory variable <span class="math inline">\(X\)</span>, coded 1 if the officer
+wore no glasses and 2 if he wore sunglasses. The choice of whether
+sunglasses were worn was made at random before a driver was stopped.</p>
+<p>While the police officer went back to his car to write out a report, a
+researcher asked the respondent some further questions, one of which is
+used here as the response variable <span class="math inline">\(Y\)</span>. It is a measure of the
+respondent’s perception of the friendliness of the police officer,
+measured on a 10-point scale where large values indicate high levels of
+friendliness.</p>
+<p>The article describing the experiment does not report all the summary
+statistics needed for our purposes. The statistics shown in Table
+<a href="#tab:t-groupex">7.1</a> have thus been partially made up for use here. They are,
+however, consistent with the real results from the study. In particular,
+the direction and statistical significance of the difference between
+<span class="math inline">\(\bar{Y}_{2}\)</span> and <span class="math inline">\(\bar{Y}_{1}\)</span> are the same as those in the published
+report.</p>
+<p><strong>Example 7.4: Transition to parenthood</strong></p>
+<p>In a study of the stresses and feelings associated with parenthood, 109
+couples expecting their first child were interviewed before and after
+the birth of the baby.<a href="#fn31" class="footnote-ref" id="fnref31"><sup>31</sup></a> Here we consider only data for the fathers,
+and only one of the variables measured in the study. This variable is a
+measure of personal well-being, obtained from a seven-item attitude
+scale, where larger values indicate higher levels of well-being.
+Measurements of it were obtained for each father at three time points:
+when the mother was six months pregnant, one month after the birth of
+the baby, and six months after the birth. Here we will use only the
+first two of the measurements. The response variable <span class="math inline">\(Y\)</span> will thus be
+the measure of personal well-being, and the explanatory variable <span class="math inline">\(X\)</span>
+will be the time of measurement (sixth month of the pregnancy or one
+month after the birth). The means of <span class="math inline">\(Y\)</span> at the two times are shown in
+Table <a href="#tab:t-groupex">7.1</a>. As in Example 7.3, not all of the numbers needed
+here were given in the original article. Specifically, the standard
+error of the difference in Table <a href="#tab:t-groupex">7.1</a> has been made up in
+such a way that the results of a significance test for the mean
+difference agree with those in the article.</p>
+<p><strong>Example 7.5: Traffic patterns on Friday the 13th</strong></p>
+<p>A common superstition regards the 13th day of any month falling on a
+Friday as a particularly unlucky day. In a study examining the possible
+effects of this belief on people’s behaviour,<a href="#fn32" class="footnote-ref" id="fnref32"><sup>32</sup></a> data were obtained on
+the numbers of vehicles travelling between junctions 7 and 8 and
+junctions 9 and 10 on the M25 motorway around London during every Friday
+the 13th in 1990–92. For comparison, the same numbers were also recorded
+during the previous Friday (i.e. the 6th) in each case. There are only
+ten such pairs here, and the full data set is shown in Table
+<a href="#tab:t-F13">7.2</a>. Here the explanatory variable <span class="math inline">\(X\)</span> indicates whether a day
+is Friday the 6th (coded as 1) or Friday the 13th (coded as 2), and the
+response variable is the number of vehicles travelling between two
+junctions.</p>
+<table>
+<thead>
+<tr class="header">
+<th align="left">Date</th>
+<th align="left">Junctions</th>
+<th align="right">Friday the 6th</th>
+<th align="right">Friday the 13th</th>
+<th align="right">Difference</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="left">July 1990</td>
+<td align="left">7 to 8</td>
+<td align="right">139246</td>
+<td align="right">138548</td>
+<td align="right">-698</td>
+</tr>
+<tr class="even">
+<td align="left">July 1990</td>
+<td align="left">9 to 10</td>
+<td align="right">134012</td>
+<td align="right">132908</td>
+<td align="right">-1104</td>
+</tr>
+<tr class="odd">
+<td align="left">September 1991</td>
+<td align="left">7 to 8</td>
+<td align="right">137055</td>
+<td align="right">136018</td>
+<td align="right">-1037</td>
+</tr>
+<tr class="even">
+<td align="left">September 1991</td>
+<td align="left">9 to 10</td>
+<td align="right">133732</td>
+<td align="right">131843</td>
+<td align="right">-1889</td>
+</tr>
+<tr class="odd">
+<td align="left">December 1991</td>
+<td align="left">7 to 8</td>
+<td align="right">123552</td>
+<td align="right">121641</td>
+<td align="right">-1911</td>
+</tr>
+<tr class="even">
+<td align="left">December 1991</td>
+<td align="left">9 to 10</td>
+<td align="right">121139</td>
+<td align="right">118723</td>
+<td align="right">-2416</td>
+</tr>
+<tr class="odd">
+<td align="left">March 1992</td>
+<td align="left">7 to 8</td>
+<td align="right">128293</td>
+<td align="right">125532</td>
+<td align="right">-2761</td>
+</tr>
+<tr class="even">
+<td align="left">March 1992</td>
+<td align="left">9 to 10</td>
+<td align="right">124631</td>
+<td align="right">120249</td>
+<td align="right">-4382</td>
+</tr>
+<tr class="odd">
+<td align="left">November 1992</td>
+<td align="left">7 to 8</td>
+<td align="right">124609</td>
+<td align="right">122770</td>
+<td align="right">-1839</td>
+</tr>
+<tr class="even">
+<td align="left">November 1992</td>
+<td align="left">9 to 10</td>
+<td align="right">117584</td>
+<td align="right">117263</td>
+<td align="right">-321</td>
+</tr>
+</tbody>
+</table>
+<p>:(#tab:t-F13)Data for Example 7.5: Traffic flows between junctions of the M25 on
+each Friday the 6th and Friday the 13th in 1990-92.</p>
+<p>In each of these cases, we will regard the variable of interest <span class="math inline">\(Y\)</span> as a
+continuous, interval-level variable. The five examples illustrate three
+different situations considered in this chapter. Example 7.1 includes
+two separate <span class="math inline">\(Y\)</span>-variables (consumption of fruit and vegetables, and fat
+intake), each of which is considered for a single population. Questions
+of interest are about the mean of the variable in the population. This
+is analogous to the one-group questions on proportions in Sections
+<a href="c-probs.html#s-probs-test1sample">5.5</a> and <a href="c-probs.html#s-probs-1sampleci">5.6</a>. In this chapter
+the one-group case is discussed only relatively briefly, in Section
+<a href="c-means.html#s-means-1sample">7.4</a>.</p>
+<p>The main focus here is on the case illustrated by Examples 7.2 and 7.3.
+These involve samples of a response variable (hours of housework, or
+preceived friendliness) from two groups (men and women, or police with
+or without sunglasses). We are then interested in comparing the
+distributions, and especially the means, of the response variable
+between the groups. This case will be discussed first. Descriptive
+statistics for it are described in Section <a href="c-means.html#s-means-descr">7.2</a>, and
+statistical inference in Section <a href="c-means.html#s-means-inference">7.3</a>.</p>
+<p>Finally, examples 7.4 and 7.5 also involve comparisons between two
+groups, but of a slightly different kind than examples 7.2 and 7.3. The
+two types of cases differ in the nature of the two samples (groups)
+being compared.  In Examples 7.2 and 7.3, the
+samples can be considered to be <strong>independent</strong>. What this claim means
+will be discussed briefly later; informally, it is justified in these
+examples because the subjects in the two groups are separate and
+unrelated individuals. In Examples 7.4 and 7.5, in contrast, the samples
+(before and after the birth of a child, or two successive Fridays) must
+be considered <strong>dependent</strong>, essentially because they concern
+measurements on the same units at two distinct times. This case is
+discussed in Section <a href="c-means.html#s-means-dependent">7.5</a>.</p>
+<p>In each of the four two-group examples we are primarily interested in
+questions about possible association between the group variable <span class="math inline">\(X\)</span> and
+the response variable <span class="math inline">\(Y\)</span>. As before, this is the question of whether
+the conditional distributions of <span class="math inline">\(Y\)</span> are different at the two levels of
+<span class="math inline">\(X\)</span>. There is thus an association between <span class="math inline">\(X\)</span> and <span class="math inline">\(Y\)</span> if</p>
+<ul>
+<li><p>Example 7.2: The distribution of hours of housework is different for
+men than for women.</p></li>
+<li><p>Example 7.3: The distribution of perceptions of a police officer’s
+friendliness is different when he is wearing mirrored sunglasses
+than when he is not.</p></li>
+<li><p>Example 7.4: The distribution of measurements of personal well-being
+is different at the sixth month of the pregnancy than one month
+after the birth.</p></li>
+<li><p>Example 7.5: The distributions of the numbers of cars on the
+motorway differ between Friday the 6th and the following Friday
+the 13th.</p></li>
+</ul>
+<p>We denote the two values of <span class="math inline">\(X\)</span>, i.e. the two groups, by 1 and 2. The
+mean of the population distribution of <span class="math inline">\(Y\)</span> given <span class="math inline">\(X=1\)</span> will be denoted
+<span class="math inline">\(\mu_{1}\)</span> and the standard deviation <span class="math inline">\(\sigma_{1}\)</span>, and the mean and
+standard deviation of the population distribution given <span class="math inline">\(X=2\)</span> are
+denoted <span class="math inline">\(\mu_{2}\)</span> and <span class="math inline">\(\sigma_{2}\)</span> similarly. The corresponding sample
+quantities are the conditional sample means <span class="math inline">\(\bar{Y}_{1}\)</span> and
+<span class="math inline">\(\bar{Y}_{2}\)</span> and sample standard deviations <span class="math inline">\(s_{1}\)</span> and <span class="math inline">\(s_{2}\)</span>. For
+inference, we will focus on the population difference
+<span class="math inline">\(\Delta=\mu_{2}-\mu_{1}\)</span> which is estimated by the sample difference
+<span class="math inline">\(\hat{\Delta}=\bar{Y}_{2}-\bar{Y}_{1}\)</span>. Some of the descriptive methods
+described in Section <a href="c-means.html#s-means-descr">7.2</a>, on the other hand, also aim to
+summarise and compare other aspects of the two conditional sample
+distributions.</p>
+</div>
+<div id="s-means-descr" class="section level2 hasAnchor">
+<h2><span class="header-section-number">7.2</span> Descriptive statistics for comparisons of groups<a href="c-means.html#s-means-descr" class="anchor-section" aria-label="Anchor link to header"></a></h2>
+<div id="ss-means-descr-graphs" class="section level3 hasAnchor">
+<h3><span class="header-section-number">7.2.1</span> Graphical methods of comparing sample distributions<a href="c-means.html#ss-means-descr-graphs" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>There is an association between the group variable <span class="math inline">\(X\)</span> and the response
+variable <span class="math inline">\(Y\)</span> if the distributions of <span class="math inline">\(Y\)</span> in the two groups are not the
+same. To determine the extent and nature of any such association, we
+need to compare the two distributions. This section describes methods of
+doing so for observed data, i.e. for examining associations in a sample.
+We begin with graphical methods which can be used to detect differences
+in any aspects of the two distributions. We then discuss some
+non-graphical summaries which compare specific aspects of the sample
+distributions, especially their means.</p>
+<p>Although the methods of <em>inference</em> described later in this chapter will
+be limited to the case where the group variable <span class="math inline">\(X\)</span> is dichotomous, many
+of the descriptive methods discussed below can just as easily be applied
+when more than two groups are being compared. This will be mentioned
+wherever appropriate. For inference in the multiple-group case some of
+the methods discussed in Chapter <a href="c-regression.html#c-regression">8</a> are applicable.</p>
+<p>In Section <a href="c-descr1.html#ss-descr1-1cont-graphs">2.5.2</a> we described four graphical
+methods of summarizing the sample distribution of one continuous
+variable <span class="math inline">\(Y\)</span>: the histogram, the stem and leaf plot, the frequency
+polygon and the box plot. Each of these can be adapted for comparisons
+of two or more distributions, although some more conveniently than
+others. We illustrate the use three of the plots for this purpose, using
+the comparison of housework hours in Example 7.2 for illustration. Stem
+and leaf plots will not be shown, because they are less appropriate when
+the sample sizes are as large as they are in this example.</p>
+<p>Two sample distributions can be compared by displaying histograms of
+them side by side, as shown in Figure <a href="c-means.html#fig:f-hworkpyramid">7.1</a>. This is not
+a very common type of graph, and not ideal for visually comparing the
+two distributions, because the bars to be compared (here for men
+vs. women) end at opposite ends of the plot. A better alternative is to
+use frequency polygons. Since these represent a sample distribution by a
+single line, it is easy to include two of them in the same plot, as
+shown in Figure <a href="c-means.html#fig:f-hworkpolygons">7.2</a>. Finally, Figure
+<a href="c-means.html#fig:f-twoboxplots">7.3</a> shows two boxplots of reported housework hours, one
+for men and one for women.</p>
+<p>The plots suggest that the distributions are quite similar for men and
+women. In both groups, the largest proportion of respondents stated that
+they do between 4 and 7 hours of housework a week. The distributions are
+clearly positively skewed, since the reported number of hours was much
+higher than average for a number of people (whereas less than zero hours
+were of course not recorded for anyone). The proportions of observations
+in categories including values 5, 10, 15, 20, 25 and 30 tend to be
+relatively high, suggesting that many respondents chose to report their
+answers in such round numbers. The box plots show that the median number
+of hours is higher for women than for men (7 vs. 6 hours), and women’s
+responses have slightly less variation, as measured by both the IQR and
+the range of the whiskers. Both distributions have several larger,
+outlying observations (note that SPSS, which was used to produce Figure
+<a href="c-means.html#fig:f-twoboxplots">7.3</a>, divides outliers into moderate and “extreme” ones;
+the latter are observations more than 3 IQR from the end of the box, and
+are plotted with asterisks).</p>
+<div class="figure"><span style="display:block;" id="fig:f-hworkpyramid"></span>
+<img src="hworkpyramid.png" alt="Histograms of the sample distributions of reported weekly hours of housework in Example 7.2, separately for men (n=635) and women (n=469)." style="width:130mm" />
+<p class="caption">Figure 7.1: Histograms of the sample distributions of reported weekly hours of housework in Example 7.2, separately for men (<span class="math inline">\(n=635\)</span>) and women (<span class="math inline">\(n=469\)</span>).</p>
+</div>
+<div class="figure"><span style="display:block;" id="fig:f-hworkpolygons"></span>
+<img src="hwork.png" alt="Frequency polygons of the sample distributions of reported weekly hours of housework in Example 7.2, separately for men and women. The points show the percentages of observations in the intervals of 0–3, 4–7, \dots, 32–35 hours (plus zero percentages at each end of the curve)." style="width:11.5cm" />
+<p class="caption">Figure 7.2: Frequency polygons of the sample distributions of reported weekly hours of housework in Example 7.2, separately for men and women. The points show the percentages of observations in the intervals of 0–3, 4–7, <span class="math inline">\(\dots\)</span>, 32–35 hours (plus zero percentages at each end of the curve).</p>
+</div>
+<div class="figure"><span style="display:block;" id="fig:f-twoboxplots"></span>
+<img src="twoboxplots.png" alt="Box plots of the sample distributions of reported weekly hours of housework in Example 7.2, separately for men and women." style="width:11cm" />
+<p class="caption">Figure 7.3: Box plots of the sample distributions of reported weekly hours of housework in Example 7.2, separately for men and women.</p>
+</div>
+<p>Figures <a href="c-means.html#fig:f-hworkpyramid">7.1</a>–<a href="c-means.html#fig:f-twoboxplots">7.3</a> also illustrate an
+important general point about such comparisons. Typically we focus on
+comparing <em>means</em> of the conditional distributions. Here the difference
+between the sample means is 1.16, i.e. women in the sample spend, on
+average, over an hour longer on housework per week than men. The
+direction of the difference could also be guessed from Figure
+<a href="c-means.html#fig:f-hworkpolygons">7.2</a>, which shows that somewhat smaller proportions of
+women than of men report small numbers of hours, and larger proportions
+of women report large numbers. This difference will later be shown to be
+statistically significant, and it is also arguably relatively large in a
+substantive sense.</p>
+<p>However, it is equally important to note that the two distributions
+summarized by the graphs are nevertheless largely similar. For example,
+even though the mean is higher for women, there are clearly many women
+who report spending hardly any time on housework, and many men who spend
+a lot of time on it. In other words, the two distributions overlap to a
+large extent. This obvious point is often somewhat neglected in public
+discussions of differences between groups such as men and women or
+different ethnic groups. It is not uncommon to see reports of research
+indicating that (say) men have higher or lower values of something or
+other then women. Such statements usually refer to differences of
+averages, and are often clearly important and interesting. Less helpful,
+however, is the tendency to discuss the differences almost as if the
+corresponding distributions had no overlap at all, i.e. as if <em>all</em> men
+were higher or lower in some characteristic than all women. This is
+obviously hardly ever the case.</p>
+<p>Box plots and frequency polygons can also be used to compare more than
+two sample distributions. For example, the experimental conditions in
+the study behind Example 7.3 actually involved not only whether or not a
+police officer wore sunglasses, but also whether or not he wore a gun.
+Distributions of perceived friendliness given all four combinations of
+these two conditions could easily be summarized by drawing four box
+plots or frequency polygons in the same plot, one for each experimental
+condition.</p>
+</div>
+<div id="ss-means-descr-tables" class="section level3 hasAnchor">
+<h3><span class="header-section-number">7.2.2</span> Comparing summary statistics<a href="c-means.html#ss-means-descr-tables" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>Main features of sample distributions, such as their central tendencies
+and variations, are described using the summary statistics introduced in
+Section <a href="c-descr1.html#s-descr1-nums">2.6</a>. These too can be compared between groups.
+Table <a href="#tab:t-groupex">7.1</a> shows such statistics for the examples of this
+chapter. Tables like these are routinely reported for initial
+description of data, even if more elaborate statistical methods are
+later used.</p>
+<p>Sometimes the association between two variables in a sample is
+summarized in a single <em>measure of association</em> calculated from the
+data. This is especially convenient when both of the variables are
+continuous (in which case the most common measure of association is
+known as the <em>correlation</em> coefficient). In this section we consider as
+such a summary the difference <span class="math inline">\(\hat{\Delta}=\bar{Y}_{2}-\bar{Y}_{1}\)</span> of
+the sample means of <span class="math inline">\(Y\)</span> in the two groups. These differences are also
+shown in Table <a href="#tab:t-groupex">7.1</a>.</p>
+<p>The difference of means is important because it is also the focus of the
+most common methods of inference for two-group comparisons. For purely
+descriptive purposes it may be as or more convenient to report some
+other statistic. For example, the difference of means of 1.16 hours in
+Example 7.2 could also be described in <em>relative</em> terms by saying that
+the women’s average is about 16 per cent higher than the men’s average
+(because <span class="math inline">\(1.16/7.33=0.158\)</span>, i.e. the difference represents 15.8 % of the
+men’s average).</p>
+</div>
+</div>
+<div id="s-means-inference" class="section level2 hasAnchor">
+<h2><span class="header-section-number">7.3</span> Inference for two means from independent samples<a href="c-means.html#s-means-inference" class="anchor-section" aria-label="Anchor link to header"></a></h2>
+<div id="ss-means-inference-intro" class="section level3 hasAnchor">
+<h3><span class="header-section-number">7.3.1</span> Aims of the analysis<a href="c-means.html#ss-means-inference-intro" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>Formulated as a statistical model in the sense discussed on page in Section <a href="c-contd.html#ss-contd-probdistrs-general">6.3.1</a>, the
+assumptions of the analyses considered in this section are as follows:</p>
+<ol style="list-style-type: decimal">
+<li><p> We have a sample of <span class="math inline">\(n_{1}\)</span> independent
+observations of a variable <span class="math inline">\(Y\)</span> in group 1, which have a population
+distribution with mean <span class="math inline">\(\mu_{1}\)</span> and standard deviation
+<span class="math inline">\(\sigma_{1}\)</span>.</p></li>
+<li><p>We have a sample of <span class="math inline">\(n_{2}\)</span> independent observations of <span class="math inline">\(Y\)</span> in group
+2, which have a population distribution with mean <span class="math inline">\(\mu_{2}\)</span> and
+standard deviation <span class="math inline">\(\sigma_{2}\)</span>.</p></li>
+<li><p>The two samples are independent, in the sense discussed following Example 7.5.</p></li>
+<li><p>For now, we further assume that the population standard deviations
+<span class="math inline">\(\sigma_{1}\)</span> and <span class="math inline">\(\sigma_{2}\)</span> are equal, with a common value denoted
+by <span class="math inline">\(\sigma\)</span>. This relatively minor assumption will be discussed
+further in Section <a href="c-means.html#ss-means-inference-variants">7.3.4</a>.</p></li>
+</ol>
+<p>We could have stated the starting points of the analyses in Chapters
+<a href="c-tables.html#c-tables">4</a> and <a href="c-probs.html#c-probs">5</a> also in such formal terms. It is not
+absolutely necessary to always do so, but we should at least remember
+that any statistical analysis is based on some such model. In
+particular, this helps to make it clear what our methods of analysis do
+and do not assume, so that we may critically examine whether these
+assumptions appear to be justified for the data at hand.</p>
+<p>The model stated above does not require that the population
+distributions of <span class="math inline">\(Y\)</span> should have the form of any particular probability
+distribution. It is often further assumed that these distributions are
+normal distributions, but this is not essential. Discussion of this
+question is postponed until Section <a href="c-means.html#ss-means-inference-variants">7.3.4</a>.</p>
+<p>The only new term in this model statement was the “independent” under
+assumptions 1 and 2. This statistical term can be roughly translated as
+“unrelated”. The condition can usually be regarded as satisfied when the
+units of analysis are different entities, as in Examples 7.2 and 7.3
+where the units within each group are distinct individual people. In
+these examples the individuals in the two groups are also distinct, from
+which it follows that the two <em>samples</em> are independent as required by
+assumption 3. The same assumption of independent observations is also
+required by all of the methods described in Chapters <a href="c-tables.html#c-tables">4</a> and
+<a href="c-probs.html#c-probs">5</a>, although we did not state this explicitly there.</p>
+<p>This situation is illustrated by Example 7.2, where <span class="math inline">\(Y\)</span> is the number of
+hours a person spends doing housework in a week, and the two groups are
+men (group 1) and women (group 2).</p>
+<p>The quantity of main interest is here the difference of population means
+<span class="math display" id="eq:DeltaB">\[\begin{equation}
+\Delta=\mu_{2}-\mu_{1}.
+\tag{7.1} 
+\end{equation}\]</span>
+In particular, if <span class="math inline">\(\Delta=0\)</span>, the population means in
+the two groups are the same. If <span class="math inline">\(\Delta\ne 0\)</span>, they are not the same,
+which implies that there is an association between <span class="math inline">\(Y\)</span> and the group in
+the population.</p>
+<p>Inference on <span class="math inline">\(\Delta\)</span> can be carried out using methods which are
+straightforward modifications of the ones introduced first in Chapter
+<a href="c-probs.html#c-probs">5</a>. For significance testing, the null hypothesis of interest
+is
+<span class="math display" id="eq:mH0a">\[\begin{equation}
+H_{0}: \; \Delta=0,
+\tag{7.2}
+\end{equation}\]</span>
+to be tested against a two-sided (<span class="math inline">\(H_{a}:\; \Delta\ne 0\)</span>)
+or one-sided (<span class="math inline">\(H_{a}:\; \Delta&gt; 0\)</span> or <span class="math inline">\(H_{a}:\; \Delta&lt; 0\)</span>) alternative
+hypothesis. The test statistic used to test (<a href="c-means.html#eq:mH0a">(7.2)</a>) is again of the
+form
+<span class="math display" id="eq:tma">\[\begin{equation}
+t=\frac{\hat{\Delta}}{\hat{\sigma}_{\hat{\Delta}}}
+\tag{7.3}
+\end{equation}\]</span>
+where <span class="math inline">\(\hat{\Delta}\)</span> is a sample estimate of <span class="math inline">\(\Delta\)</span>, and
+<span class="math inline">\(\hat{\sigma}_{\hat{\Delta}}\)</span> its estimated standard error. Here the
+statistic is conventionally labelled <span class="math inline">\(t\)</span> rather than <span class="math inline">\(z\)</span> and called the
+<em>t-test statistic</em> because sometimes the <span class="math inline">\(t\)</span>-distribution rather than
+the normal is used as its sampling distribution. This possibility is
+discussed in Section <a href="c-means.html#ss-means-inference-variants">7.3.4</a>, and we can
+ignore it until then.</p>
+<p>Confidence intervals for the differences <span class="math inline">\(\Delta\)</span> are also of the
+familiar form
+<span class="math display" id="eq:ciDpa">\[\begin{equation}
+\hat{\Delta} \pm z_{\alpha/2}\, \hat{\sigma}_{\hat{\Delta}}
+\tag{7.4}
+\end{equation}\]</span>
+where <span class="math inline">\(z_{\alpha/2}\)</span> is the appropriate multiplier from
+the standard normal distribution to obtain the required confidence
+level, e.g. <span class="math inline">\(z_{0.025}=1.96\)</span> for 95% confidence intervals. The
+multiplier is replaced with a slightly different one if the
+<span class="math inline">\(t\)</span>-distribution is used as the sampling distribution, as discussed in
+Section <a href="c-means.html#ss-means-inference-variants">7.3.4</a>.</p>
+<p>The details of these formulas in the case of two-sample inference on
+means are described next, in Section <a href="c-means.html#ss-means-inference-test">7.3.2</a> for
+the significance test and in Section <a href="c-means.html#ss-means-inference-ci">7.3.3</a> for the
+confidence interval.</p>
+</div>
+<div id="ss-means-inference-test" class="section level3 hasAnchor">
+<h3><span class="header-section-number">7.3.2</span> Significance testing: The two-sample t-test<a href="c-means.html#ss-means-inference-test" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>For tests of the difference of means <span class="math inline">\(\Delta=\mu_{2}-\mu_{1}\)</span> between
+two population distributions, we consider the null hypothesis of no
+difference
+<span class="math display" id="eq:H0m">\[\begin{equation}
+H_{0}: \; \Delta=0.
+\tag{7.5}
+\end{equation}\]</span>
+In the housework example, this is the hypothesis that
+average weekly hours of housework in the population are the same for men
+and women. It is tested against an alternative hypothesis, either the
+two-sided alternative hypotheses
+<span class="math display" id="eq:Hatwom">\[\begin{equation}
+H_{a}: \; \Delta\ne 0
+\tag{7.6}
+\end{equation}\]</span>
+or one of the one-sided alternative hypotheses
+<span class="math display">\[H_{a}:  \Delta&gt; 0 \text{ or } H_{a}:  \Delta&lt; 0\]</span> In the discussion below, we concentrate on the more
+common two-sided alternative.</p>
+<p>The test statistic for testing (<a href="c-means.html#eq:H0m">(7.5)</a>) is of the general form
+(<a href="c-means.html#eq:tma">(7.3)</a>). Here it depends on the data only through the sample means
+<span class="math inline">\(\bar{Y}_{1}\)</span> and <span class="math inline">\(\bar{Y}_{2}\)</span> and sample variances <span class="math inline">\(s_{1}^{2}\)</span> and
+<span class="math inline">\(s_{2}^{2}\)</span> of <span class="math inline">\(Y\)</span> in the two groups. A point estimate of <span class="math inline">\(\Delta\)</span> is
+<span class="math display" id="eq:Dhatmu">\[\begin{equation}
+\hat{\Delta}=\bar{Y}_{2}-\bar{Y}_{1}.
+\tag{7.7}
+\end{equation}\]</span>
+In terms of the population parameters, the standard
+error of <span class="math inline">\(\hat{\Delta}\)</span> is
+<span class="math display" id="eq:sigmaDmu">\[\begin{equation}
+\sigma_{\hat{\Delta}}=\sqrt{\sigma^{2}_{\bar{Y}_{2}}+\sigma^{2}_{\bar{Y}_{1}}}=\sqrt{\frac{\sigma^{2}_{2}}{n_{2}}+\frac{\sigma^{2}_{1}}{n_{1}}}.
+\tag{7.8}
+\end{equation}\]</span>
+When we assume that the population standard
+deviations <span class="math inline">\(\sigma_{1}\)</span> and <span class="math inline">\(\sigma_{2}\)</span> are equal, with a common value
+<span class="math inline">\(\sigma\)</span>, (<a href="c-means.html#eq:sigmaDmu">(7.8)</a>) simplifies to
+<span class="math display" id="eq:seDpop">\[\begin{equation}
+\sigma_{\hat{\Delta}} =\sigma\; \sqrt{\frac{1}{n_{2}}+\frac{1}{n_{1}}}.
+\tag{7.9}
+\end{equation}\]</span>
+The formula of the test statistic uses an estimate of
+this standard error, given by
+<span class="math display" id="eq:seD2">\[\begin{equation}
+\hat{\sigma}_{\hat{\Delta}} =\hat{\sigma} \; \sqrt{\frac{1}{n_{2}}+\frac{1}{n_{1}}}
+\tag{7.10}
+\end{equation}\]</span>
+where <span class="math inline">\(\hat{\sigma}\)</span> is an estimate of <span class="math inline">\(\sigma\)</span>,
+calculated from
+<span class="math display" id="eq:sehatjoint">\[\begin{equation}
+\hat{\sigma}=\sqrt{\frac{(n_{2}-1)s^{2}_{2}+(n_{1}-1)s^{2}_{1}}{n_{1}+n_{2}-2}}.
+\tag{7.11}
+\end{equation}\]</span>
+Substituting (<a href="c-means.html#eq:Dhatmu">(7.7)</a>) and (<a href="c-means.html#eq:seD2">(7.10)</a>) into
+the general formula (<a href="c-means.html#eq:tma">(7.3)</a>) gives the <strong>two-sample t-test statistic
+for means</strong>
+<span class="math display" id="eq:ztestmuDb">\[\begin{equation}
+t=\frac{\bar{Y}_{2}-\bar{Y}_{1}}
+{\hat{\sigma}\, \sqrt{1/n_{2}+1/n_{1}}}
+\tag{7.12}
+\end{equation}\]</span>
+where <span class="math inline">\(\hat{\sigma}\)</span> is given by (<a href="c-means.html#eq:sehatjoint">(7.11)</a>).</p>
+<p>For an illustration of the calculations, consider again the housework
+Example 7.2. Here, denoting men by 1 and women by 2, <span class="math inline">\(n_{1}=635\)</span>,
+<span class="math inline">\(n_{2}=469\)</span>, <span class="math inline">\(\bar{Y}_{1}=7.33\)</span>, <span class="math inline">\(\bar{Y}_{2}=8.49\)</span>, <span class="math inline">\(s_{1}=5.53\)</span> and
+<span class="math inline">\(s_{2}=6.14\)</span>. The estimated mean difference is thus
+<span class="math display">\[\hat{\Delta}=\bar{Y}_{2}-\bar{Y}_{1}=8.49-7.33=1.16.\]</span> The common
+value of the population standard deviation <span class="math inline">\(\sigma\)</span> is estimated from
+(<a href="c-means.html#eq:sehatjoint">(7.11)</a>) as <span class="math display">\[\begin{aligned}
+\hat{\sigma}&amp;=&amp;
+\sqrt{\frac{(n_{2}-1)s^{2}_{2}+(n_{1}-1)s^{2}_{1}}{n_{1}+n_{2}-2}}
+=
+\sqrt{\frac{(469-1) 6.14^{2}+(635-1) 5.53^{2}}{635+469-2}}\\
+&amp;=&amp; \sqrt{33.604}=5.797\end{aligned}\]</span> and the estimated standard error
+of <span class="math inline">\(\hat{\Delta}\)</span> is given by (<a href="c-means.html#eq:seD2">(7.10)</a>) as
+<span class="math display">\[\hat{\sigma}_{\hat{\Delta}} =
+\hat{\sigma} \; \sqrt{\frac{1}{n_{2}}+\frac{1}{n_{1}}}
+=5.797 \; \sqrt{\frac{1}{469}+\frac{1}{635}}=0.353.\]</span> The value of the
+t-test statistic (<a href="c-means.html#eq:ztestmuDb">(7.12)</a>) is then obtained as
+<span class="math display">\[t=\frac{1.16}{0.353}=3.29.\]</span> These values and other quantities
+explained later, as well as similar results for Example 7.3, are also
+shown in Table <a href="#tab:t-2testsY1">7.3</a>.</p>
+<table style="width:98%;">
+<colgroup>
+<col width="31%" />
+<col width="27%" />
+<col width="8%" />
+<col width="10%" />
+<col width="20%" />
+</colgroup>
+<thead>
+<tr class="header">
+<th align="right"><span class="math inline">\(\hat{\Delta}\)</span></th>
+<th align="right"><span class="math inline">\(\hat{\sigma}_{\hat{\Delta}}\)</span></th>
+<th align="right"><span class="math inline">\(t\)</span></th>
+<th align="right"><span class="math inline">\(P\)</span>-value</th>
+<th align="right">95 % C.I.</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="right">Example 7.2: Average weekly hours
+spent on housework</td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+</tr>
+<tr class="even">
+<td align="right">1.16</td>
+<td align="right">0.353</td>
+<td align="right">3.29</td>
+<td align="right">0.001</td>
+<td align="right">(0.47; 1.85)</td>
+</tr>
+<tr class="odd">
+<td align="right">Example 7.3: Perceived friendliness
+of a police officer</td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+</tr>
+<tr class="even">
+<td align="right"><span class="math inline">\(-1.74\)</span></td>
+<td align="right">0.383</td>
+<td align="right"><span class="math inline">\(-4.55\)</span></td>
+<td align="right"><span class="math inline">\(&lt;0.001\)</span></td>
+<td align="right"><span class="math inline">\((-2.49; -0.99)\)</span></td>
+</tr>
+</tbody>
+</table>
+<p>:(#tab:t-2testsY1)Results of tests and confidence intervals for comparing means for
+two independent samples. For Example 7.2, the difference of means is
+between women and men, and for Example 7.3, it is between wearing and
+not wearing sunglasses. The test statistics and confidence intervals
+are obtained under the assumption of equal population standard
+deviations, and the <span class="math inline">\(P\)</span>-values are for a test with a two-sided
+alternative hypothesis. See the text for the definitions of the
+statistics.</p>
+<p> If necessary, calculations like these can be carried
+out even with a pocket calculator. It is, however, much more convenient
+to leave them to statistical software. Figure <a href="c-means.html#fig:f-spss2test">7.4</a> shows
+SPSS output for the two-sample t-test for the housework data. The first
+part of the table, labelled “Group Statistics”, shows the sample sizes
+<span class="math inline">\(n\)</span>, means <span class="math inline">\(\bar{Y}\)</span> and standard deviations <span class="math inline">\(s\)</span> separately for the two
+groups. The quantity labelled “Std. Error Mean” is <span class="math inline">\(s/\sqrt{n}\)</span>. This is
+an estimate of the standard error of the sample mean, which is the
+quantity <span class="math inline">\(\sigma/\sqrt{n}\)</span> discussed in Section <a href="c-contd.html#s-contd-clt">6.4</a>.</p>
+<p>The second part of the table in Figure <a href="c-means.html#fig:f-spss2test">7.4</a>, labelled
+“Independent Samples Test”, gives results for the t-test itself. The
+test considered here, which assumes a common population standard
+deviation <span class="math inline">\(\sigma\)</span> (and thus also variance <span class="math inline">\(\sigma^{2}\)</span>), is found on
+the row labelled “Equal variances assumed”. The test statistic is shown
+in the column labelled “<span class="math inline">\(t\)</span>”, and the difference
+<span class="math inline">\(\hat{\Delta}=\bar{Y}_{2}-\bar{Y}_{1}\)</span> and its standard error
+<span class="math inline">\(\hat{\sigma}_{\hat{\Delta}}\)</span> are shown in the “Mean Difference” and
+“Std. Error Difference” columns respectively. Note that the difference
+(<span class="math inline">\(-1.16\)</span>) has been calculated in SPSS between men and women rather than
+vice versa as in Table <a href="#tab:t-2testsY1">7.3</a>, but this will make no
+difference to the conclusions from the test.</p>
+<div class="figure"><span style="display:block;" id="fig:f-spss2test"></span>
+<img src="spss2t.png" alt="SPSS output for a two-sample t-test in Example 7.2, comparing average weekly hours spent on housework between men and women." style="width:17cm" />
+<p class="caption">Figure 7.4: SPSS output for a two-sample <span class="math inline">\(t\)</span>-test in Example 7.2, comparing average weekly hours spent on housework between men and women.</p>
+</div>
+<p>In the two-sample situation with assumptions 1–4 at the beginning of Section <a href="c-means.html#ss-means-inference-intro">7.3.1</a>, the sampling distribution of the t-test
+statistic (<a href="c-means.html#eq:ztestmuDb">(7.12)</a>) is approximately a standard normal
+distribution when the null hypothesis
+<span class="math inline">\(H_{0}: \; \Delta=\mu_{2}-\mu_{1}=0\)</span> is true in the population and the
+sample sizes are large enough. This is again a consequence of the
+Central Limit Theorem. The requirement for “large enough” sample sizes
+is fairly easy to satisfy. A good rule of thumb is that the sample sizes
+<span class="math inline">\(n_{1}\)</span> and <span class="math inline">\(n_{2}\)</span> in the two groups should both be at least 20 for the
+sampling distribution of the test statistic to be well enough
+approximated by the standard normal distribution. In the housework
+example we have data on 635 men and 469 women, so the sample sizes are
+clearly large enough. A variant of the test which relaxes the condition
+on the sample sizes is discussed in Section
+<a href="c-means.html#ss-means-inference-variants">7.3.4</a> below.</p>
+<p>The <span class="math inline">\(P\)</span>-value of the test is calculated from this sampling distribution
+in exactly the same way as for the tests of proportions in Section
+<a href="c-probs.html#ss-probs-test1sample-samplingd">5.5.3</a>. In the housework example the value
+of the <span class="math inline">\(t\)</span>-test statistic is <span class="math inline">\(t=3.29\)</span>. The <span class="math inline">\(P\)</span>-value for testing the
+null hypothesis against the two-sided alternative (<a href="c-means.html#eq:Hatwom">(7.6)</a>) is then
+the probability, calculated from the standard normal distribution, of
+values that are at least 3.29 or at most <span class="math inline">\(-3.29\)</span>. Each of these two
+probabilities is about 0.0005, so the <span class="math inline">\(P\)</span>-value is
+<span class="math inline">\(0.0005+0.0005=0.001\)</span>. In the SPSS output of Figure <a href="c-means.html#fig:f-spss2test">7.4</a> it
+is given in the column labelled “Sig. (2-tailed)”, where “Sig.” is short
+for “significance” and “2-tailed” is a synonym for “2-sided”.</p>
+<p>The <span class="math inline">\(P\)</span>-value can also be calculated approximately using the table of
+the standard normal distribution (see Table <a href="c-probs.html#tab:t-ttable">5.2</a>, as explained in Section
+<a href="c-probs.html#ss-probs-test1sample-samplingd">5.5.3</a>. Here the test statistic <span class="math inline">\(t=3.29\)</span>,
+which is larger than the critical values 1.65, 1.96 and 2.58 for the
+0.10, 0.05 and 0.01 significance levels for a two-sided test, so we can
+report that <span class="math inline">\(P&lt;0.01\)</span>. Here <span class="math inline">\(t\)</span> is by chance actually equal (to two
+decimal places) to the critical value for the 0.001 significance level,
+so we could also report <span class="math inline">\(P=0.001\)</span>. These findings agree, as they should,
+with the exact <span class="math inline">\(P\)</span>-value of 0.001 shown in the SPSS output.</p>
+<p>In conclusion, the two-sample <span class="math inline">\(t\)</span>-test in Example 7.2 indicates that
+there is very strong evidence (with <span class="math inline">\(P=0.001\)</span> for the two-sided test)
+against the claim that the hours of weekly housework are on average the
+same for men and women in the population.</p>
+<p>Here we showed raw SPSS output in Figure <a href="c-means.html#fig:f-spss2test">7.4</a> because we
+wanted to explain its contents and format. Note, however, that such
+unedited computer output is rarely if ever appropriate in research
+reports. Instead, results of statistical analyses should be given in
+text or tables formatted in appropriate ways for presentation. See Table
+<a href="#tab:t-2testsY1">7.3</a> and various other examples in this coursepack and
+textbooks on statistics.</p>
+<p>To summarise the elements of the test again, we repeat them briefly, now
+for Example 7.3, the experiment on the effect of eye contact on the
+perceived friendliness of police officers (c.f. Table <a href="#tab:t-groupex">7.1</a> for the summary statistics):</p>
+<ol style="list-style-type: decimal">
+<li><p>Data: samples from two groups, one with the experimental condition
+where the officer wore no sunglasses, with sample size <span class="math inline">\(n_{1}=67\)</span>,
+mean <span class="math inline">\(\bar{Y}_{1}=8.23\)</span> and standard deviation <span class="math inline">\(s_{1}=2.39\)</span>, and the
+second with the experimental condition where the officer did wear
+sunglasses, with <span class="math inline">\(n_{2}=66\)</span>, <span class="math inline">\(\bar{Y}_{2}=6.49\)</span> and <span class="math inline">\(s_{2}=2.01\)</span>.</p></li>
+<li><p>Assumptions: the observations are random samples of statistically
+independent observations from two populations, one with mean
+<span class="math inline">\(\mu_{1}\)</span> and standard deviation <span class="math inline">\(\sigma_{1}\)</span>, and the other with
+with mean <span class="math inline">\(\mu_{2}\)</span> and the same standard deviation <span class="math inline">\(\sigma_{2}\)</span>,
+where the standard deviations are equal, with value
+<span class="math inline">\(\sigma=\sigma_{1}=\sigma_{2}\)</span>. The sample sizes <span class="math inline">\(n_{1}\)</span> and <span class="math inline">\(n_{2}\)</span>
+are sufficiently large, say both at least 20, for the sampling
+distribution of the test statistic under the null hypothesis to be
+approximately standard normal.</p></li>
+<li><p>Hypotheses: These are about the difference of the population means
+<span class="math inline">\(\Delta=\mu_{2}-\mu_{1}\)</span>, with null hypothesis <span class="math inline">\(H_{0}: \Delta=0\)</span>.
+The two-sided alternative hypothesis <span class="math inline">\(H_{a}: \Delta\ne 0\)</span> is
+considered in this example.</p></li>
+<li><p>The test statistic: the two-sample <span class="math inline">\(t\)</span>-statistic
+<span class="math display">\[t=\frac{\hat{\Delta}}{\hat{\sigma}_{\hat{\Delta}}}=
+\frac{-1.74}{0.383}=-4.55\]</span> where
+<span class="math display">\[\hat{\Delta}=\bar{Y}_{2}-\bar{Y}_{1}=6.49-8.23=-1.74\]</span> and
+<span class="math display">\[\hat{\sigma}_{\hat{\Delta}}=
+\hat{\sigma} \; \sqrt{\frac{1}{n_{2}}+\frac{1}{n_{1}}}
+=2.210 \times \sqrt{
+\frac{1}{66}+\frac{1}{67}}=0.383\]</span> with <span class="math display">\[\hat{\sigma}=
+\sqrt{\frac{(n_{2}-1)s^{2}_{2}+(n_{1}-1)s^{2}_{1}}{n_{1}+n_{2}-2}}
+=
+\sqrt{\frac{65\times 2.01^{2}+66\times 2.39^{2}}{131}}
+=2.210\]</span></p></li>
+<li><p>The sampling distribution of the test statistic when <span class="math inline">\(H_{0}\)</span> is
+true: approximately the standard normal distribution.</p></li>
+<li><p>The <span class="math inline">\(P\)</span>-value: the probability that a randomly selected value from
+the standard normal distribution is at most <span class="math inline">\(-4.55\)</span> or at least
+4.55, which is about 0.000005 (reported as <span class="math inline">\(P&lt;0.001\)</span>).</p></li>
+<li><p>Conclusion: A two-sample <span class="math inline">\(t\)</span>-test indicates very strong evidence
+that the average perceived level of the friendliness of a police
+officer is different when the officer is wearing reflective
+sunglasses than when the officer is not wearing such glasses
+(<span class="math inline">\(P&lt;0.001\)</span>).</p></li>
+</ol>
+</div>
+<div id="ss-means-inference-ci" class="section level3 hasAnchor">
+<h3><span class="header-section-number">7.3.3</span> Confidence intervals for a difference of two means<a href="c-means.html#ss-means-inference-ci" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>A confidence interval for the mean difference <span class="math inline">\(\Delta=\mu_{1}-\mu_{2}\)</span>
+is obtained by substituting appropriate expressions into the general
+formula (<a href="c-means.html#eq:ciDpa">(7.4)</a>). Specifically, here
+<span class="math inline">\(\hat{\Delta}=\bar{Y}_{2}-\bar{Y}_{1}\)</span> and a 95% confidence interval for
+<span class="math inline">\(\Delta\)</span> is
+<span class="math display" id="eq:ciDmu2">\[\begin{equation}
+(\bar{Y}_{2}-\bar{Y}_{1}) \pm 1.96\;  \hat{\sigma} \;\sqrt{\frac{1}{n_{2}}+\frac{1}{n_{1}}}
+\tag{7.13}
+\end{equation}\]</span>
+where <span class="math inline">\(\hat{\sigma}\)</span> is obtained from equation
+<a href="c-means.html#eq:sehatjoint">(7.11)</a>. The validity of this again requires that the sample
+sizes <span class="math inline">\(n_{1}\)</span> and <span class="math inline">\(n_{2}\)</span> from both groups are reasonably large, say
+both at least 20. For the housework Example 7.2, the 95% confidence
+interval is <span class="math display">\[1.16\pm 1.96\times 0.353 = 1.16 \pm 0.69 = (0.47; 1.85)\]</span>
+using the values of <span class="math inline">\(\bar{Y}_{2}-\bar{Y}_{1}\)</span> and its standard error
+calculated earlier. This interval is also shown in Table
+<a href="#tab:t-2testsY1">7.3</a> and in the SPSS output in
+Figure <a href="c-means.html#fig:f-spss2test">7.4</a> .
+In the latter, the interval is given as (-1.85; -0.47) because it is
+expressed for the difference defined in the opposite direction (men <span class="math inline">\(-\)</span>
+women instead of vice versa). For Example 7.3, the 95% confidence
+interval is <span class="math inline">\(-1.74\pm 1.96\times 0.383=(-2.49; -0.99)\)</span>.</p>
+<p>Based on the data in Example 7.2 we are thus 95 % confident that the
+difference between women’s and men’s average hours of reported weekly
+housework in the population is between 0.47 and 1.85 hours. In
+substantive terms this interval, from just under half an hour to nearly
+two hours, is arguably fairly wide in that its two end points might well
+be regarded as substantially different from each other. The difference
+between women’s and men’s average housework hours is thus estimated
+fairly imprecisely from this survey.</p>
+</div>
+<div id="ss-means-inference-variants" class="section level3 hasAnchor">
+<h3><span class="header-section-number">7.3.4</span> Variants of the test and confidence interval<a href="c-means.html#ss-means-inference-variants" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<div id="allowing-unequal-population-variances" class="section level4 unnumbered hasAnchor">
+<h4>Allowing unequal population variances<a href="c-means.html#allowing-unequal-population-variances" class="anchor-section" aria-label="Anchor link to header"></a></h4>
+<p>The two-sample <span class="math inline">\(t\)</span>-test and confidence interval for the difference of
+means were stated above under the assumption that the standard
+deviations <span class="math inline">\(\sigma_{1}\)</span> and <span class="math inline">\(\sigma_{2}\)</span> of the variable of interest <span class="math inline">\(Y\)</span>
+are the same in both of the two groups being compared. This assumption
+is not in fact essential. If it is omitted, we obtain formulas which
+differ from the ones discussed above only in one part of the
+calculations.</p>
+<p>Suppose that we do allow the unknown values of <span class="math inline">\(\sigma_{1}\)</span> and
+<span class="math inline">\(\sigma_{2}\)</span> to be different from each other. In other words, we
+consider the model stated at the beginning of Section <a href="c-means.html#ss-means-inference-intro">7.3.1</a>, without
+assumption 4 that <span class="math inline">\(\sigma_{1}=\sigma_{2}\)</span>. The test statistic is then
+still of the same form as before,
+i.e. <span class="math inline">\(t=\hat{\Delta}/\hat{\sigma}_{\hat{\Delta}}\)</span>, with
+<span class="math inline">\(\hat{\Delta}=\bar{Y}_{2}-\bar{Y}_{1}\)</span>. The only change in the
+calculations is that the estimate of the standard error of
+<span class="math inline">\(\hat{\Delta}\)</span>, the formula of which is given by equation
+(<a href="c-means.html#eq:sigmaDmu">(7.8)</a>), now uses separate estimates
+of <span class="math inline">\(\sigma_{1}\)</span> and <span class="math inline">\(\sigma_{2}\)</span>. The obvious choices for these are the
+corresponding sample standard deviations, <span class="math inline">\(s_{1}\)</span> for <span class="math inline">\(\sigma_{1}\)</span> and
+<span class="math inline">\(s_{2}\)</span> for <span class="math inline">\(\sigma_{2}\)</span>. This gives the estimated standard error as
+<span class="math display" id="eq:seDmu-ne">\[\begin{equation}
+\hat{\sigma}_{\hat{\Delta}}=\sqrt{\frac{s_{2}^{2}}{n_{2}}+\frac{s_{1}^{2}}{n_{1}}}.
+\tag{7.14}
+\end{equation}\]</span>
+Substituting this to the formula of the test
+statistic yields the two-sample <span class="math inline">\(t\)</span>-test statistic without the
+assumption of equal population standard deviations,
+<span class="math display" id="eq:ztestmuD">\[\begin{equation}
+t=\frac{\bar{Y}_{2}-\bar{Y}_{1}}{\sqrt{s^{2}_{2}/n_{2}+s^{2}_{1}/n_{1}}}.
+\tag{7.15}
+\end{equation}\]</span>
+The sampling distribution of this under the null
+hypothesis is again approximately a standard normal distribution when
+the sample sizes <span class="math inline">\(n_{1}\)</span> and <span class="math inline">\(n_{2}\)</span> are both at least 20. The <span class="math inline">\(P\)</span>-value
+for the test is obtained in exactly the same way as before, and the
+principles of interpreting the result of the test are also unchanged.</p>
+<p>For the confidence interval, the only change from Section
+<a href="c-means.html#ss-means-inference-ci">7.3.3</a> is again that the estimated standard error
+is changed, so for a 95% confidence interval we use
+<span class="math display" id="eq:ciDmu">\[\begin{equation}
+(\bar{Y}_{2}-\bar{Y}_{1}) \pm 1.96 \;\sqrt{\frac{s^{2}_{2}}{n_{2}}+\frac{s^{2}_{1}}{n_{1}}}.
+\tag{7.16}
+\end{equation}\]</span>
+In the housework example 7.2, the estimated standard error
+(<a href="c-means.html#eq:seDmu-ne">(7.14)</a>) is <span class="math display">\[\hat{\sigma}_{\hat{\Delta}}=
+\sqrt{
+\frac{6.14^{2}}{469}+
+\frac{5.53^{2}}{635}
+}=
+\sqrt{0.1285}=0.359,\]</span> the value of the test statistic is
+<span class="math display">\[t=\frac{1.16}{0.359}=3.23,\]</span> and the two-sided <span class="math inline">\(P\)</span>-value is now
+<span class="math inline">\(P=0.001\)</span>. Recall that when the population standard deviations were
+assumed to be equal, we obtained <span class="math inline">\(\hat{\sigma}_{\hat{\Delta}}=0.353\)</span>,
+<span class="math inline">\(t=3.29\)</span> and again <span class="math inline">\(P=0.001\)</span>. The two sets of results are thus very
+similar, and the conclusions from the test are the same in both cases.
+The differences between the two variants of the test are even smaller in
+Example 7.3, where the estimated standard error
+<span class="math inline">\(\hat{\sigma}_{\hat{\Delta}}=0.383\)</span> is the same (to three decimal
+places) in both cases, and the results are thus identical.<a href="#fn33" class="footnote-ref" id="fnref33"><sup>33</sup></a> In both
+examples the confidence intervals obtained from (<a href="c-means.html#eq:ciDmu2">(7.13)</a>) and
+(<a href="c-means.html#eq:ciDmu">(7.16)</a>) are also very similar. Both variants of the two-sample
+analyses are shown in SPSS output (c.f. Figure <a href="c-means.html#fig:f-spss2test">7.4</a>), the ones assuming equal population standard
+deviations on the row labelled “Equal variances assumed” and the one
+without this assumption on the “Equal variances not assumed” row.<a href="#fn34" class="footnote-ref" id="fnref34"><sup>34</sup></a></p>
+<p>Which methods should we then use, the ones with or without the
+assumption of equal population variances? In practice the choice rarely
+makes much difference, and the <span class="math inline">\(P\)</span>-values and conclusions from the two
+versions of the test are typically very similar.<a href="#fn35" class="footnote-ref" id="fnref35"><sup>35</sup></a> Not assuming the
+variances to be equal has the advantage of making fewer restrictive
+assumptions about the population. For this reason it should be used in
+the rare cases where the <span class="math inline">\(P\)</span>-values obtained under the different
+assumptions are substantially different. This version of the test
+statistic is also slightly easier to calculate by hand, since
+(<a href="c-means.html#eq:seDmu-ne">(7.14)</a>) is a slightly simpler formula than
+(<a href="c-means.html#eq:seD2">(7.10)</a>)–(<a href="c-means.html#eq:sehatjoint">(7.11)</a>). On the other hand, the test statistic
+which does assume equal standard deviations has the advantage that it is
+more closely related to analogous tests used in more general contexts
+(especially the method of linear regression modelling, discussed in
+Chapter <a href="c-regression.html#c-regression">8</a>). It is also preferable when the sample sizes
+are very small, as discussed below.</p>
+</div>
+<div id="using-the-t-distribution" class="section level4 unnumbered hasAnchor">
+<h4>Using the <span class="math inline">\(t\)</span> distribution<a href="c-means.html#using-the-t-distribution" class="anchor-section" aria-label="Anchor link to header"></a></h4>
+<p>As discussed in Section <a href="c-contd.html#s-contd-probdistrs">6.3</a>, it is often assumed
+that the population distributions of the variables under consideration
+are described by particular probability distributions. In this chapter,
+however, such assumptions have so far been avoided. This is a
+consequence of the Central Limit Theorem, which ensures that as long as
+the sample sizes are large enough, the sampling distribution of the
+two-sample <span class="math inline">\(t\)</span>-test statistic is approximately the standard normal
+distribution, irrespective of the forms of the population distributions
+of <span class="math inline">\(Y\)</span> in the two groups. In this section we briefly describe variants
+of the test and confidence interval which <em>do</em> assume that the
+population distributions are of a particular form, specifically that
+they are normal distributions. This changes the sampling distribution
+that is used for the test statistic and for the multiplier of the
+confidence interval, but the analyses are otherwise unchanged.</p>
+<p>For the significance test, there are again two variants depending on the
+assumptions about the the population standard deviations <span class="math inline">\(\sigma_{1}\)</span>
+and <span class="math inline">\(\sigma_{2}\)</span>. Consider first the case where these are assumed to be
+equal. The sampling distribution is then given by the following result,
+which now holds for <em>any</em> sample sizes <span class="math inline">\(n_{1}\)</span> and <span class="math inline">\(n_{2}\)</span>:</p>
+<ul>
+<li>In the two-sample situation specified by assumptions 1–4 at the beginning of Section <a href="c-means.html#ss-means-inference-intro">7.3.1</a> (including the assumption of equal population
+standard deviations, <span class="math inline">\(\sigma_{1}=\sigma_{2}=\sigma\)</span>), and if also
+the distribution of <span class="math inline">\(Y\)</span> is a normal distribution in both groups, the
+sampling distribution of the t-test statistic (<a href="c-means.html#eq:ztestmuDb">(7.12)</a>) is a
+<span class="math inline">\(t\)</span> distribution with <span class="math inline">\(n_{1}+n_{2}-2\)</span> degrees of freedom when the
+null hypothesis <span class="math inline">\(H_{0}: \; \Delta=\mu_{2}-\mu_{1}=0\)</span> is true in the population.</li>
+</ul>
+<p>The <span class="math inline">\(\mathbf{t}\)</span> <strong>distributions</strong> mentioned in this result are a family
+of distributions with different degrees of freedom, in a similar way as
+the <span class="math inline">\(\chi^{2}\)</span> distributions discussed in Section
+<a href="c-tables.html#ss-tables-chi2test-sdist">4.3.4</a>. All <span class="math inline">\(t\)</span> distributions are symmetric
+around 0. Their shape is quite similar to that of the standard normal
+distribution, except that the variance of a <span class="math inline">\(t\)</span> distribution is somewhat
+larger and its tails thus heavier. The difference is noticeable only
+when the degrees of freedom are small, as seen in Figure
+<a href="c-means.html#fig:f-tdistr1">7.5</a>. This shows the curves for the <span class="math inline">\(t\)</span> distributions with 6
+and 30 degrees of freedom, compared to the standard normal distribution.
+It can be seen that the <span class="math inline">\(t_{30}\)</span> distribution is already very similar to
+the <span class="math inline">\(N(0,1)\)</span> distribution. With degrees of freedom larger than about 30,
+the difference becomes almost indistinguishable.</p>
+<div class="figure"><span style="display:block;" id="fig:f-tdistr1"></span>
+<img src="tdistr1.png" alt="Curves of two t distributions with small degrees of freedom, compared to the standard normal distribution." style="width:13cm" />
+<p class="caption">Figure 7.5: Curves of two <span class="math inline">\(t\)</span> distributions with small degrees of freedom, compared to the standard normal distribution.</p>
+</div>
+<p>If we use this result for the test, the <span class="math inline">\(P\)</span>-value is obtained from the
+<span class="math inline">\(t\)</span> distribution with <span class="math inline">\(n_{1}+n_{2}-2\)</span> degrees of freedom (often denoted
+<span class="math inline">\(t_{n1+n2-2}\)</span>). The principles of doing this are exactly the same as
+those described in Section <a href="c-probs.html#ss-probs-test1sample-samplingd">5.5.3</a>, and can
+be graphically illustrated by plots similar to those in Figure
+<a href="c-probs.html#fig:f-pval-prob">5.1</a>. Precise <span class="math inline">\(P\)</span>-values are
+again obtained using a computer. In fact, <span class="math inline">\(P\)</span>-values in SPSS output for
+the two-sample <span class="math inline">\(t\)</span>-test (c.f. Figure <a href="c-means.html#fig:f-spss2test">7.4</a>) are actually those obtained from the <span class="math inline">\(t\)</span>
+distribution (with the degrees of freedom shown in the column labelled
+“df”) rather than the standard normal distribution. Differences between
+the two are, however, very small if the sample sizes are even moderately
+large, because then the degrees of freedom <span class="math inline">\(df=n_{1}+n_{2}-2\)</span> are large
+enough for the two distributions to be virtually identical. This is the
+case, for instance, in both of the examples considered so far in this
+chapter, where <span class="math inline">\(df=1102\)</span> in Example 7.2 and <span class="math inline">\(df=131\)</span> in Example 7.3.</p>
+<p>If precise <span class="math inline">\(P\)</span>-values from the <span class="math inline">\(t\)</span> distribution are not available, upper
+bounds for them can again be obtained using appropriate tables, in the
+same way as in Section <a href="c-probs.html#ss-probs-test1sample-samplingd">5.5.3</a>. Now,
+however, the critical values depend also on the degrees of freedom.
+Because of this, introductory text books on statistics typically include
+a table of critical values for <span class="math inline">\(t\)</span> distributions for a selection of
+degrees of freedom. A table of this kind is shown in the Appendix at the end of this course pack. Each row of the
+table corresponds to a <span class="math inline">\(t\)</span> distribution with the degrees of freedom
+given in the column labelled “df”. As here, such tables typically
+include all degrees of freedom between 1 and 30, plus a selection of
+larger values, here 40, 60 and 120.</p>
+<p>The last row is labelled “<span class="math inline">\(\infty\)</span>”, the mathematical symbol for
+infinity. This corresponds to the standard normal distribution, as a <span class="math inline">\(t\)</span>
+distribution with infinite degrees of freedom is equal to the standard
+normal. The practical implication of this is that the standard normal
+distribution is a good enough approximation for any <span class="math inline">\(t\)</span> distribution
+with reasonably large degrees of freedom. The table thus lists
+individual degrees of freedom only up to some point, and the last row
+will be used for any values larger than this. For degrees of freedom
+between two values shown in the table (e.g. 50 when only 40 and 60 are
+given), it is best to use the values for the nearest available degrees
+of freedom <em>below</em> the required ones (e.g. use 40 for 50). This will
+give a “conservative” approximate <span class="math inline">\(P\)</span>-value which may be slightly larger
+than the exact value.</p>
+<p>As for the standard normal distribution, the table is used to identify
+critical values for different significance levels (c.f. the information
+in Table <a href="c-probs.html#tab:t-ttable">5.2</a>). For example, if the degrees of freedom are 20,
+the critical value for two-sided tests at the significance level 0.05 in
+the “0.025” column on the row labelled “20”. This is 2.086. In general,
+critical values for <span class="math inline">\(t\)</span> distributions are somewhat larger than
+corresponding values for the standard normal distribution, but the
+difference between the two is quite small when the degrees of freedom
+are reasonably large.</p>
+<p>The <span class="math inline">\(t\)</span>-test and the <span class="math inline">\(t\)</span> distribution are among the oldest tools of
+statistical inference. They were introduced in 1908 by W. S. Gosset,<a href="#fn36" class="footnote-ref" id="fnref36"><sup>36</sup></a>
+initially for the one-sample case discussed in Section
+<a href="c-means.html#s-means-1sample">7.4</a>. Gosset was working as a chemist at the Guinness
+brewery at St. James’ Gate, Dublin. He published his findings under the
+pseudonym “Student”, and the distribution is often known as <em>Student’s
+<span class="math inline">\(t\)</span> distribution</em>.</p>
+<p>These results for the sampling distribution hold when the population
+standard deviations <span class="math inline">\(\sigma_{1}\)</span> and <span class="math inline">\(\sigma_{2}\)</span> are assumed to be
+equal. If this assumption is not made, the test statistic is again
+calculated using formulas (<a href="c-means.html#eq:seDmu-ne">(7.14)</a>) and (<a href="c-means.html#eq:ztestmuD">(7.15)</a>). This case is mathematically more difficult than the
+previous one, because the sampling distribution of the test statistic
+under the null hypothesis is then not exactly a <span class="math inline">\(t\)</span> distribution even
+when the population distributions are normal. One way of dealing with
+this complication (which is known as the Behrens–Fisher problem) is to
+find a <span class="math inline">\(t\)</span> distribution which is a good approximation of the true
+sampling distribution. The degrees of freedom of this approximating
+distribution are given by
+<span class="math display" id="eq:satter-df">\[\begin{equation}
+df=\frac{\left(\frac{s^{2}_{1}}{n_{1}}+\frac{s^{2}_{2}}{n_{2}}\right)^{2}}{\left(\frac{s_{1}^{2}}{n_{1}}\right)^{2}\;\left(\frac{1}{n_{1}-1}\right)+\left(\frac{s_{2}^{2}}{n_{2}}\right)^{2}\;\left(\frac{1}{n_{2}-1}\right)}.
+\tag{7.17}
+\end{equation}\]</span>
+This formula, which is known as the
+Welch-Satterthwaite approximation, is not particularly interesting or
+worth learning in itself. It is presented here purely for completeness,
+and to give an idea of how the degrees of freedom given in the SPSS
+output are obtained. In Example 7.2 (see Figure <a href="c-means.html#fig:f-spss2test">7.4</a>) these degrees of freedom are 945.777,
+showing that the approximate degrees of freedom from (<a href="c-means.html#eq:satter-df">(7.17)</a>)
+are often not whole numbers. If approximate <span class="math inline">\(P\)</span>-values are then obtained
+from a <span class="math inline">\(t\)</span>-table, we need to use values for the nearest whole-number
+degrees of freedom shown in the table. This problem does not arise if
+the calculations are done with a computer.</p>
+<p>Two sample <span class="math inline">\(t\)</span>-test statistics (in two variants, under equal and unequal
+population standard deviations) have now been defined under two
+different sets of assumptions about the population distributions. In
+each case, the formula of the test statistic is the same, so the only
+difference is in the form of its sampling distribution under the null
+hypothesis. If the population distributions of <span class="math inline">\(Y\)</span> in the two groups are
+assumed to be normal, the sampling distribution of the <span class="math inline">\(t\)</span>-statistic is
+a <span class="math inline">\(t\)</span> distribution with appropriate degrees of freedom. If the sample
+sizes are reasonably large, the sampling distribution is approximately
+standard normal, whatever the shape of the population distribution.
+Which set of assumptions should we then use? The following guidelines
+can be used to make the choice:</p>
+<ul>
+<li><p>The easiest and arguably most common case is the one where both
+sample sizes <span class="math inline">\(n_{1}\)</span> and <span class="math inline">\(n_{2}\)</span> are large enough (both greater than
+20, say) for the standard normal approximation of the sampling
+distribution to be reasonably accurate. Because the degrees of
+freedom of the appropriate <span class="math inline">\(t\)</span> distribution are then also large, the
+two sampling distributions are very similar, and conclusions from
+the test will be similar in either case. It is then purely a matter
+of convenience which sampling distribution is used:</p>
+<ul>
+<li><p>If you use a computer (e.g. SPSS) to carry out the test or you
+are (e.g. in an exam) given computer output, use the <span class="math inline">\(P\)</span>-value
+in the output. This will be from the <span class="math inline">\(t\)</span> distribution.</p></li>
+<li><p>If you need to calculate the test statistic by hand and thus
+need to use tables of critical values to draw the conclusion,
+use the critical values for the standard normal distribution
+(see Table <a href="c-probs.html#tab:t-ttable">5.2</a>).</p></li>
+</ul></li>
+<li><p>When the sample sizes are small (e.g. if one or both of them are
+less than 20), only the <span class="math inline">\(t\)</span> distribution can be used, and even then
+only if <span class="math inline">\(Y\)</span> is approximately normally distributed in both groups in
+the population. For some variables (say weight or blood pressure) we
+might have some confidence that this is the case, perhaps from
+previous, larger studies. In other cases the normality of <span class="math inline">\(Y\)</span> can
+only be assessed based on its sample distribution, which of course
+is not very informative when the sample is small. In most cases,
+some doubt will remain, so the results of a <span class="math inline">\(t\)</span>-test from small
+samples should be treated with caution. An alternative is then to
+use <em>nonparametric</em> tests which avoid the assumption of normality,
+for example the so-called Wilcoxon–Mann–Whitney test. These,
+however, are not covered on this course.</p></li>
+</ul>
+<p>There are also situations where the population distribution of <span class="math inline">\(Y\)</span>
+cannot possibly be normal, so the possibility of referring to a <span class="math inline">\(t\)</span>
+distribution does not arise. One example are the tests on population
+proportions that were discussed in Chapter <a href="c-probs.html#c-probs">5</a>. There the only
+possibility we discussed was to use the approximate standard normal
+sampling distribution, as long as the sample sizes were large enough.
+Because the <span class="math inline">\(t\)</span>-distribution is never relevant there, the test statistic
+is conventionally called the <span class="math inline">\(z\)</span>-test statistic rather than <span class="math inline">\(t\)</span>.
+Sometimes the label <span class="math inline">\(z\)</span> instead of <span class="math inline">\(t\)</span> is used also for two-sample
+<span class="math inline">\(t\)</span>-statistics described in this chapter. This does not change the test
+itself.</p>
+<p>It is also possible to obtain a confidence interval for <span class="math inline">\(\Delta\)</span> which
+is valid for even very small sample sizes <span class="math inline">\(n_{1}\)</span> and <span class="math inline">\(n_{2}\)</span>, but only
+under the further assumption that the population distribution of <span class="math inline">\(Y\)</span> in
+both groups is normal. This affects only the multiplier of the standard
+errors, which is now based on a <span class="math inline">\(t\)</span> distribution. The appropriate
+degrees of freedom are again <span class="math inline">\(df=n_{1}+n_{2}-2\)</span> when the population
+standard deviations are assumed equal, and approximately given by
+equation (<a href="c-means.html#eq:satter-df">(7.17)</a>) if not. In this case the multiplier in
+(<a href="c-means.html#eq:ciDpa">(7.4)</a>) may be labelled <span class="math inline">\(t^{(df)}_{\alpha/2}\)</span> instead of
+<span class="math inline">\(z_{\alpha/2}\)</span> to draw attention to the fact that it comes from a
+<span class="math inline">\(t\)</span>-distribution and depends on the degrees of freedom <span class="math inline">\(df\)</span> as well as
+the significance level <span class="math inline">\(1-\alpha\)</span>.</p>
+<p>Any multiplier <span class="math inline">\(t_{\alpha/2}^{(df)}\)</span> is obtained from the relevant <span class="math inline">\(t\)</span>
+distribution using exactly the same logic as the one explained for the
+normal distribution in the previous section, using a computer or a table
+of <span class="math inline">\(t\)</span> distributions. For example, in the <span class="math inline">\(t\)</span> table in the Appendix, multipliers for a 95% confidence interval are
+the numbers given in the column labelled “0.025”. Suppose, for instance,
+that the sample sizes <span class="math inline">\(n_{1}\)</span> and <span class="math inline">\(n_{2}\)</span> are both 10 and population
+standard deviations are assumed equal, so that <span class="math inline">\(df=10+10-2=18\)</span>. The
+table shows that a <span class="math inline">\(t\)</span>-based 95% confidence interval would then use the
+multiplier 2.101. This is somewhat larger than the corresponding
+multiplier 1.96 from the normal distribution, and the <span class="math inline">\(t\)</span>-based interval
+is somewhat wider than one based on the normal distribution. The
+difference between the two becomes very small when the sample sizes are
+even moderately large, because then <span class="math inline">\(df\)</span> is large and
+<span class="math inline">\(t_{\alpha/2}^{(df)}\)</span> is very close to 1.96.</p>
+<p>The choice between confidence intervals based on the normal or a <span class="math inline">\(t\)</span>
+distribution involves the same considerations as for the significance
+test. In short, if the sample sizes are not very small, the choice makes
+little difference and can be based on convenience. If you are
+calculating an interval by hand, a normal-based one is easier to use
+because the multiplier (e.g. 1.96 for 95% intervals) does not depend on
+the sample sizes. If, instead, a computer is used, it typically gives
+confidence intervals for differences of means based on the <span class="math inline">\(t\)</span>
+distribution, so these are easier to use. Finally, if one or both of the
+sample sizes are small, only <span class="math inline">\(t\)</span>-based intervals can safely be used, and
+then only if you are confident that the population distributions of <span class="math inline">\(Y\)</span>
+are approximately normal.</p>
+</div>
+</div>
+</div>
+<div id="s-means-1sample" class="section level2 hasAnchor">
+<h2><span class="header-section-number">7.4</span> Tests and confidence intervals for a single mean<a href="c-means.html#s-means-1sample" class="anchor-section" aria-label="Anchor link to header"></a></h2>
+<p>The task considered in this section is inference on the population mean
+of a continuous, interval-level variable <span class="math inline">\(Y\)</span> in a single population.
+This is thus analogous to the analysis of a single proportion in
+Sections <a href="c-probs.html#s-probs-test1sample">5.5</a>–<a href="c-probs.html#s-probs-1sampleci">5.6</a>, but with a
+continuous variable of interest.</p>
+<p>We use Example 7.1 on survey data on diet for illustration. We will
+consider two variables, daily consumption of portions of fruit and
+vegetables, and the percentage of total faily energy intake obtained
+from fat and fatty acids. These will be analysed separately, each in
+turn in the role of the variable of interest <span class="math inline">\(Y\)</span>. Summary statistics for
+the variables are shown in Table <a href="#tab:t-ttests1">7.4</a></p>
+<table>
+<tbody>
+<tr class="odd">
+<td align="center">          <span class="math inline">\(P\)</span>-value <span class="math inline">\(P\)</span>-value \</td>
+</tr>
+<tr class="even">
+<td align="center">Two- One- 95% CI</td>
+</tr>
+<tr class="odd">
+<td align="center">Variable <span class="math inline">\(n\)</span> <span class="math inline">\(\bar{Y}\)</span> <span class="math inline">\(s\)</span> <span class="math inline">\(\mu_{0}\)</span> <span class="math inline">\(t\)</span> sided<span class="math inline">\(^{*}\)</span> sided<span class="math inline">\(^{\dagger}\)</span> for <span class="math inline">\(\mu\)</span></td>
+</tr>
+</tbody>
+</table>
+<p>Fruit and vegetable 1724 2.8 2.15 5 -49.49 <span class="math inline">\(&lt;0.001\)</span> <span class="math inline">\(&lt;0.001\)</span> (2.70; 2.90)
+consumption
+(400g portions)</p>
+<p>Total energy intake 1724 35.3 6.11 35 2.04 0.042 0.021 (35.01; 35.59)
+from fat (%)
+—————————————————————————————————————————————</p>
+<p>:(#tab:t-ttests1)Summary statistics, <span class="math inline">\(t\)</span>-tests and confidence intervals for the mean
+for the two variables in Example 7.1 (variables from the Diet and
+Nutrition Survey). <span class="math inline">\(n=\)</span>sample size; <span class="math inline">\(\bar{Y}=\)</span>sample mean; <span class="math inline">\(s=\)</span>sample standard
+deviation; <span class="math inline">\(\mu_{0}=\)</span>null hypothesis about the population mean; <span class="math inline">\(t=t\)</span>-test
+statistic; <span class="math inline">\(*\)</span>: Alternative hypothesis <span class="math inline">\(H_{a}: \mu\ne \mu_{0}\)</span>; <span class="math inline">\(\dagger\)</span>:
+Alternative hypotheses <span class="math inline">\(H_{a}: \mu&lt;5\)</span> and <span class="math inline">\(\mu&gt;35\)</span> respectively.</p>
+<p>The setting for the analysis of this section is summarised as a
+statistical model for observations of a variable <span class="math inline">\(Y\)</span> as follows:</p>
+<ol style="list-style-type: decimal">
+<li><p>The population distribution of <span class="math inline">\(Y\)</span> has some unknown mean <span class="math inline">\(\mu\)</span> and
+unknown standard deviation <span class="math inline">\(\sigma\)</span>.</p></li>
+<li><p>The observations <span class="math inline">\(Y_{1}, Y_{2}, \dots, Y_{n}\)</span> in the sample are a
+random sample from the population.</p></li>
+<li><p>The observations are statistically independent, as discussed at the beginning of Section <a href="c-means.html#ss-means-inference-intro">7.3.1</a>.</p></li>
+</ol>
+<p>It is not necessary to assume that the population distribution has a
+particular form. However, this is again sometimes assumed to be a normal
+distribution, in which case the analyses may be modified in ways
+discussed below.</p>
+<p>The only quantity of interest considered here is <span class="math inline">\(\mu\)</span>, the population
+mean of <span class="math inline">\(Y\)</span>. In the diet examples this is the mean number of portions of
+fruit and vegetables, or mean percentage of energy derived from fat
+(both on an average day for an individual) among the members of the
+population (which for this survey is British adults aged 19–64).</p>
+<p>Because no separate groups are being compared, questions of interest are
+now not about differences between different group means, but about the
+value of <span class="math inline">\(\mu\)</span> itself. The best single estimate (<em>point estimate</em>) of
+<span class="math inline">\(\mu\)</span> is the sample mean <span class="math inline">\(\bar{Y}\)</span>. More information is provided by a
+confidence interval which shows which values of <span class="math inline">\(\mu\)</span> are plausible
+given the observed data.</p>
+<p>Significance testing focuses on the question of whether it is plausible
+that the true value of <span class="math inline">\(\mu\)</span> is equal to a particular value <span class="math inline">\(\mu_{0}\)</span>
+specified by the researcher. The specific value of <span class="math inline">\(\mu_{0}\)</span> to be
+tested is suggested by the research questions. For example, we will
+consider <span class="math inline">\(\mu_{0}=5\)</span> for portions of fruit and vegetables and
+<span class="math inline">\(\mu_{0}=35\)</span> for the percentage of energy from fat. These values are
+chosen because they correspond to recommendations by the Department of
+Health that we should consume at least 5 portions of fruit and
+vegetables a day, and that fat should contribute no more than 35% of
+total energy intake. The statistical question is thus whether the
+average level of consumption in the population is at the recommended
+level.</p>
+<p>In this setting, the null hypothesis for a significance test will be of
+the form
+<span class="math display" id="eq:H01">\[\begin{equation}
+H_{0}: \; \mu=\mu_{0},
+\tag{7.18}
+\end{equation}\]</span>
+i.e. it claims that the unknown population mean <span class="math inline">\(\mu\)</span> is
+equal to the value <span class="math inline">\(\mu_{0}\)</span> specified by the null hypothesis. This will
+be tested against the two-sided alternative hypothesis
+<span class="math display" id="eq:Ha1two">\[\begin{equation}
+H_{a}: \; \mu\ne \mu_{0}
+\tag{7.19}
+\end{equation}\]</span>
+or one of the one-sided alternative hypotheses
+<span class="math display" id="eq:Ha1onegt">\[\begin{equation}
+H_{a}:  \mu&gt; \mu_{0}
+\tag{7.20}
+\end{equation}\]</span>
+or
+<span class="math display" id="eq:Ha1onelt">\[\begin{equation}
+H_{a}:  \mu&lt; \mu_{0}.
+\tag{7.21}
+\end{equation}\]</span>
+For example, we might consider the one-sided alternative hypotheses <span class="math inline">\(H_{a}:\; \mu&lt;5\)</span> for portions of fruit and vegetables and <span class="math inline">\(H_{a}:\;\mu&gt;35\)</span> for the percentage of energy from fat. For both of these, the
+alternative corresponds to a difference from <span class="math inline">\(\mu_{0}\)</span> in the unhealthy
+direction, i.e. less fruit and vegetables and more fat than are
+recommended.</p>
+<p>To establish a connection to the general formulas that have been stated
+previously, it is again useful to express these hypotheses in terms of
+<span class="math display" id="eq:D1mu">\[\begin{equation}
+\Delta=\mu-\mu_{0},
+\tag{7.22}
+\end{equation}\]</span>
+i.e. the difference between the unknown true mean <span class="math inline">\(\mu\)</span>
+and the value <span class="math inline">\(\mu_{0}\)</span> claimed by the null hypothesis. Because this is
+0 if and only if <span class="math inline">\(\mu\)</span> and <span class="math inline">\(\mu_{0}\)</span> are equal, the null hypothesis
+(<a href="c-means.html#eq:H01">(7.18)</a>) can also be expressed as
+<span class="math display" id="eq:H0D">\[\begin{equation}
+H_{0}: \; \Delta=0,
+\tag{7.23}
+\end{equation}\]</span>
+and possible alternative hypotheses as
+<span class="math display" id="eq:HaDtwo">\[\begin{equation}
+H_{0}: \Delta\ne0,
+\tag{7.24}
+\end{equation}\]</span>
+<span class="math display" id="eq:HaDonegt">\[\begin{equation}
+H_{0}: \Delta&gt;0
+\tag{7.25}
+\end{equation}\]</span>
+and
+<span class="math display" id="eq:HaDonelt">\[\begin{equation}
+H_{0}:  \Delta&lt; 0,   
+\tag{7.26}
+\end{equation}\]</span>
+corresponding to
+(<a href="c-means.html#eq:Ha1two">(7.19)</a>), (<a href="c-means.html#eq:Ha1onegt">(7.20)</a>) and (<a href="c-means.html#eq:Ha1onelt">(7.21)</a>) respectively.</p>
+<p>The general formulas summarised in Section
+<a href="c-means.html#ss-means-inference-intro">7.3.1</a> can again be used, as long as their
+details are modified to apply to <span class="math inline">\(\Delta\)</span> defined as <span class="math inline">\(\mu-\mu_{0}\)</span>. The
+resulting formulas are listed briefly below, and then illustrated using
+the data from the diet survey:</p>
+<ul>
+<li><p>The point estimate of the difference <span class="math inline">\(\Delta=\mu-\mu_{0}\)</span> is
+<span class="math display" id="eq:Dhat1">\[\begin{equation}
+\hat{\Delta}=\bar{Y}-\mu_{0}.
+\tag{7.27}
+\end{equation}\]</span></p></li>
+<li><p>The standard error of <span class="math inline">\(\hat{\Delta}\)</span>, i.e. the standard deviation of
+its sampling distribution, is
+<span class="math inline">\(\sigma_{\hat{\Delta}}=\sigma/\sqrt{n}\)</span> (note that this is equal to
+the standard error <span class="math inline">\(\sigma_{\bar{Y}}\)</span> of the sample mean <span class="math inline">\(\bar{Y}\)</span>
+itself).<a href="#fn37" class="footnote-ref" id="fnref37"><sup>37</sup></a> This is estimated by
+<span class="math display" id="eq:seDhat1">\[\begin{equation}
+\hat{\sigma}_{\hat{\Delta}} = \frac{s}{\sqrt{n}}.
+\tag{7.28}
+\end{equation}\]</span></p></li>
+<li><p>The <span class="math inline">\(t\)</span>-test statistic for testing the null hypothesis (<a href="c-means.html#eq:H0D">(7.23)</a>) is
+<span class="math display" id="eq:tD1">\[\begin{equation}
+t=\frac{\hat{\Delta}}{\hat{\sigma}_{\hat{\Delta}}} = \frac{\bar{Y}-\mu_{0}}{s/\sqrt{n}}.
+\tag{7.29}
+\end{equation}\]</span></p></li>
+<li><p>The sampling distribution of the <span class="math inline">\(t\)</span>-statistic, when the null
+hypothesis is true, is approximately a standard normal distribution,
+when the sample size <span class="math inline">\(n\)</span> is reasonably large. A common rule of thumb
+is that this sampling distribution is adequate when <span class="math inline">\(n\)</span> is at
+least 30.</p>
+<ul>
+<li>Alternatively, we may make the further assumption that the
+population distribution of <span class="math inline">\(Y\)</span> is normal, in which case no
+conditions on <span class="math inline">\(n\)</span> are required. The sampling distribution of <span class="math inline">\(t\)</span>
+is then a <span class="math inline">\(t\)</span> distribution with <span class="math inline">\(n-1\)</span> degrees of freedom. The
+choice of which sampling distribution to refer to is based on
+the considerations outlined in Section
+<a href="c-means.html#ss-means-inference-variants">7.3.4</a>. When <span class="math inline">\(n\)</span> is 30 or larger, the
+two approaches give very similar results.</li>
+</ul></li>
+<li><p><span class="math inline">\(P\)</span>-values are obtained and the conclusions drawn in the same way as
+for two-sample tests, with appropriate modifications to the wording
+of the conclusions.</p></li>
+<li><p>A confidence interval for <span class="math inline">\(\Delta\)</span>, with confidence level <span class="math inline">\(1-\alpha\)</span>
+and based on the approximate normal sampling distribution, is given
+by
+<span class="math display" id="eq:ciD1">\[\begin{equation}
+\hat{\Delta}\pm z_{\alpha/2}\, \hat{\sigma}_{\hat{\Delta}} = (\bar{Y}-\mu_{0}) \pm z_{\alpha/2} \, \frac{s}{\sqrt{n}}
+\tag{7.30}
+\end{equation}\]</span>
+where <span class="math inline">\(z_{\alpha/2}\)</span> is the multiplier from the
+standard normal distribution for the required significance level
+(see Table <a href="c-probs.html#tab:t-ciq">5.3</a>), most often 1.96 for
+a 95% confidence interval. If an interval based on the <span class="math inline">\(t\)</span>
+distribution is wanted instead, <span class="math inline">\(z_{\alpha/2}\)</span> is replaced by the
+corresponding multiplier <span class="math inline">\(t_{\alpha/2}^{(n-1)}\)</span> from the
+<span class="math inline">\(t_{n-1}\)</span> distribution.</p>
+<p>Instead of the interval (<a href="c-means.html#eq:ciD1">(7.30)</a>) for the difference
+<span class="math inline">\(\Delta=\mu-\mu_{0}\)</span>, it is usually more sensible to report a
+confidence interval for <span class="math inline">\(\mu\)</span> itself. This is given by
+<span class="math display" id="eq:cimu1">\[\begin{equation}
+\bar{Y} \pm z_{\alpha/2} \, \frac{s}{\sqrt{n}},
+\tag{7.31}
+\end{equation}\]</span>
+which is obtained by adding <span class="math inline">\(\mu_{0}\)</span> to both end
+points of (<a href="c-means.html#eq:ciD1">(7.30)</a>).</p></li>
+</ul>
+<p>For the fruit and vegetable variable in the diet example, the mean under
+the null hypothesis is the dietary recommendation <span class="math inline">\(\mu_{0}=5\)</span>. The
+estimated difference (<a href="c-means.html#eq:Dhat1">(7.27)</a>) is <span class="math display">\[\hat{\Delta}=2.8-5=-2.2\]</span> and
+its estimated standard error (<a href="c-means.html#eq:seDhat1">(7.28)</a>) is
+<span class="math display">\[\hat{\sigma}_{\hat{\Delta}}= \frac{2.15}{\sqrt{1724}} = 0.05178,\]</span> so
+the <span class="math inline">\(t\)</span>-test statistic (<a href="c-means.html#eq:tD1">(7.29)</a>) is
+<span class="math display">\[t=\frac{-2.2}{0.05178} = -42.49.\]</span> To obtain the <span class="math inline">\(P\)</span>-value for the
+test, <span class="math inline">\(t=-42.49\)</span> is referred to the sampling distribution under the null
+hypothesis, which can here be taken to be the standard normal
+distribution, as the sample size <span class="math inline">\(n=1723\)</span> is large. If we consider the
+two-sided alternative hypothesis <span class="math inline">\(H_{a}:\; \Delta\ne 0\)</span> (i.e. <span class="math inline">\(H_{a}:\; \mu\ne5\)</span>), the <span class="math inline">\(P\)</span>-value is the probability that a randomly selected
+value from the standard normal distribution is at most <span class="math inline">\(-42.49\)</span> or at
+least 42.49. This is a very small probability, approximately
+<span class="math inline">\(0.00\cdots019\)</span>, with 268 zeroes between the decimal point and the 1.
+This is, of course, to all practical purposes zero, and can be reported
+as <span class="math inline">\(P&lt;0.001\)</span>. The null hypothesis <span class="math inline">\(H_{0}:\; \mu=5\)</span> is rejected at any
+conventional level of significance. A <span class="math inline">\(t\)</span>-test for the mean indicates
+very strong evidence that the average daily number of portions of fruit
+and vegetables consumed by members of the population differs from the
+recommended minimum of five.</p>
+<p>If we considered instead the one-sided alternative hypothesis <span class="math inline">\(H_{a}:\;\Delta&lt;0\)</span> (i.e. <span class="math inline">\(H_{a}: \; \mu&lt;5\)</span>), the observed sample mean
+<span class="math inline">\(\bar{Y}=2.8&lt;5\)</span> is in the direction of this alternative. The <span class="math inline">\(P\)</span>-value
+is then the one-sided <span class="math inline">\(P\)</span>-value divided by 2, which is here a small
+value reported as <span class="math inline">\(P&lt;0.001\)</span> again. The null hypothesis <span class="math inline">\(H_{0}: \; \mu=5\)</span>
+(and by implication also the one-sided null hypothesis
+<span class="math inline">\(H_{0}:\; \mu\ge 5\)</span>, as discussed at the end of Section <a href="c-probs.html#ss-probs-test1sample-hypotheses">5.5.1</a>) is
+thus also rejected in favour of this one-sided alternative, at any
+conventional significance level.</p>
+<p>A 95% confidence interval for <span class="math inline">\(\mu\)</span> is obtained from (<a href="c-means.html#eq:cimu1">(7.31)</a>) as
+<span class="math display">\[2.8\pm 1.96 \times \frac{2.15}{\sqrt{1724}}
+=2.8\pm 1.96 \times 0.05178=
+2.8\pm 0.10 = (2.70; 2.90).\]</span> We are thus 95% confident that the average
+daily number of portions of fruit and vegetables consumed by members of
+the population is between 2.70 and 2.90.</p>
+<p>Figure <a href="c-means.html#fig:f-spsstest">7.6</a> shows how these results for the fruit and
+vegetable variable are displayed in SPSS output. The label “portions”
+refers to the name given to the variable in the SPSS data file, and
+“Test Value = 5” indicates the null hypothesis value <span class="math inline">\(\mu_{0}\)</span> being
+tested. Other parts of the SPSS output correspond to
+the information in Table <a href="#tab:t-ttests1">7.4</a> in fairly obvious ways, so “N”
+indicates the sample size <span class="math inline">\(n\)</span> (and not a population size, which is
+denoted by <span class="math inline">\(N\)</span> in our notation), “Mean” the sample mean <span class="math inline">\(\bar{Y}\)</span>,
+“Std. Deviation” the sample standard deviation <span class="math inline">\(s\)</span>, “Std. Error Mean”
+the estimate of the standard error of the mean given by
+<span class="math inline">\(s/\sqrt{n}=2.15/\sqrt{1724}=0.05178\)</span>, “Mean Difference” the difference
+<span class="math inline">\(\hat{\Delta}=\bar{Y}-\mu_{0}=2.8-5=-2.2\)</span>, and “t” the <span class="math inline">\(t\)</span>-test
+statistic (<a href="c-means.html#eq:tD1">(7.29)</a>). The <span class="math inline">\(P\)</span>-value against the two-sided alternative
+hypothesis is shown as “Sig. (2-tailed)” (reported in the somewhat
+sloppy SPSS manner as “.000”). This is actually obtained from the <span class="math inline">\(t\)</span>
+distribution, the degrees of freedom of which (<span class="math inline">\(n-1=1723\)</span>) are given
+under “df”. Finally, the output also contains a 95% confidence interval
+for the difference <span class="math inline">\(\Delta=\mu-\mu_{0}\)</span>, i.e. the interval
+(<a href="c-means.html#eq:ciD1">(7.30)</a>).<a href="#fn38" class="footnote-ref" id="fnref38"><sup>38</sup></a> This is given as <span class="math inline">\((-2.30; -2.10)\)</span>. To obtain the more
+convenient confidence interval (<a href="c-means.html#eq:cimu1">(7.31)</a>) for <span class="math inline">\(\mu\)</span> itself, we only
+need to add <span class="math inline">\(\mu_{0}=5\)</span> to both end points of the interval shown by
+SPSS, to obtain <span class="math inline">\((-2.30+5; -2.10+5)=(2.70; 2.90)\)</span> as before.</p>
+<div class="figure"><span style="display:block;" id="fig:f-spsstest"></span>
+<img src="ttestspss.png" alt="SPSS output for a t-test of a single mean. The output is for the variable on fruit and vegetable consumption in Table 7.4, with the null hypothesis H-{0}: \mu=5." style="width:130mm" />
+<p class="caption">Figure 7.6: SPSS output for a <span class="math inline">\(t\)</span>-test of a single mean. The output is for the variable on fruit and vegetable consumption in Table <a href="#tab:t-ttests1">7.4</a>, with the null hypothesis <span class="math inline">\(H-{0}: \mu=5\)</span>.</p>
+</div>
+<p>Similar results for the variable on the percentage of dietary energy
+obtained from fat are also shown in Table <a href="#tab:t-ttests1">7.4</a>. Here
+<span class="math inline">\(\mu_{0}=35\)</span>, <span class="math inline">\(\hat{\Delta}=35.3-35=0.3\)</span>,
+<span class="math inline">\(\hat{\sigma}_{\hat{\Delta}}=6.11/\sqrt{1724}=0.147\)</span>, <span class="math inline">\(t=0.3/0.147\)</span>, and
+the two-sided <span class="math inline">\(P\)</span>-value is <span class="math inline">\(P=0.042\)</span>. Here <span class="math inline">\(P&lt;0.05\)</span>, so null hypothesis
+that the population average of the percentage of energy obtained from
+fat is 35 is rejected at the 5% level of significance. However, because
+<span class="math inline">\(P&gt;0.01\)</span>, the hypothesis would not be rejected at the next conventional
+significance level of 1%. The conclusions are the same if we considered
+the one-sided alternative hypothesis <span class="math inline">\(H_{a}:\; \mu&gt;35\)</span>, for which
+<span class="math inline">\(P=0.042/2=0.021\)</span> (as the observed sample mean <span class="math inline">\(\bar{Y}=35.3\)</span> is in the
+direction of <span class="math inline">\(H_{a}\)</span>). In this case the evidence against the null
+hypothesis is thus somewhat less strong than for the fruit and vegetable
+variable, for which the <span class="math inline">\(P\)</span>-value was extremely small. The 95%
+confidence interval for the population average of the fat variable is
+<span class="math inline">\(35.3\pm 1.96\times 0.147=(35.01; 35.59)\)</span>.</p>
+<p>Analysis of a single population mean is a good illustration of some of
+the advantages of confidence intervals over significance tests. First, a
+confidence interval provides a summary of all the plausible values of
+<span class="math inline">\(\mu\)</span> even when, as is very often the case, there is no obvious single
+value <span class="math inline">\(\mu_{0}\)</span> to be considered as the null hypothesis of the
+one-sample <span class="math inline">\(t\)</span>-test. Second, even when such a significance test is
+sensible, the conclusion can also be obtained from the confidence
+interval, as discussed at the end of Section <a href="c-probs.html#ss-means-ci-vstests">5.6.4</a>. In other words,
+<span class="math inline">\(H_{0}:\; \mu=\mu_{0}\)</span> is rejected at a given significance level against a
+two-sided alternative hypothesis, if the confidence interval for <span class="math inline">\(\mu\)</span>
+at the corresponding confidence level does not contain <span class="math inline">\(\mu_{0}\)</span>, and
+not rejected if the interval contains <span class="math inline">\(\mu_{0}\)</span>. Here the 95% confidence
+interval (2.70; 2.90) does not contain 5 for the fruit and vegetable
+variable, and the interval (35.01; 35.59) does not contain 35 for the
+fat variable, so the null hypotheses with these values as <span class="math inline">\(\mu_{0}\)</span> are
+rejected at the 5% level of significance.</p>
+<p>The width of a confidence interval also gives information on how precise
+the results of the statistical analysis are. Here the intervals seem
+quite narrow for both variables, in that it seems that their end points
+(e.g. 2.7 and 2.9 for portions of fruit and vegetables) would imply
+qualitatively similar conclusions about the level of consumption in the
+population. Analysis of the sample of 1724 respondents in the National
+Diet and Nutrition Survey thus appears to have given us quite precise
+information on the population averages for most practical purposes. Of
+course, what is precise enough ultimately depends on what those purposes
+are. If much higher precision was required, the sample size in the
+survey would have to be correspondingly larger.</p>
+<p>Finally, in cases where a null hypothesis is rejected by a significance
+test, a confidence interval has the additional advantage of providing a
+way to assess whether the observed deviation from the null hypothesis
+seems large in some <em>substantive</em> sense. For example, the confidence
+interval for the fat variable draws attention to the fact that the
+evidence against a population mean of 35 is not very strong. The lower
+bound of the interval is only 0.01 units above 35, which is very little
+relative to the overall width (about 0.60) of the interval. The
+<span class="math inline">\(P\)</span>-value (0.041) of the test, which is not much below the reference
+level of 0.05, also suggests this, but in a less obvious way. Even the
+upper limit (35.59) of the interval is arguably not very far from 35, so
+it suggests that we can be fairly confident that the population mean
+does not differ from 35 by very much in the substantive sense. This
+contrasts with the results for the fruit and vegetable variable, where
+all the values covered by the confidence interval (2.70; 2.90) are much
+more obviously far from the recommended value of 5.</p>
+</div>
+<div id="s-means-dependent" class="section level2 hasAnchor">
+<h2><span class="header-section-number">7.5</span> Inference for dependent samples<a href="c-means.html#s-means-dependent" class="anchor-section" aria-label="Anchor link to header"></a></h2>
+<p>In the two-sample cases considered in Section <a href="c-means.html#s-means-inference">7.3</a>,
+the two groups being compared consisted of separate and presumably
+unrelated units (people, in all of these cases). It thus seemed
+justified to treat the groups as statistically independent. The third
+and last general case considered in this chapter is one where this
+assumption cannot be made, because there are some obvious connections
+between the groups. Examples 7.4 and 7.5 illustrate this situation. Specifically, in
+both cases we can find for each observation in one group a natural
+<em>pair</em> in the other group. In Example 7.4, the data consist of
+observations of a variable for a group of fathers at two time points, so
+the pairs of observations are clearly formed by the two measurements for
+each father. In Example 7.5 the basic observations are for separate
+days, but these are paired (<em>matched</em>) in that for each Friday the 13th
+in one group, the preceding Friday the 6th is included in the other. In
+both cases the existence of the pairings implies that we must treat the
+two groups as statistically <em>dependent</em>.</p>
+<p>Data with dependent samples are quite common, largely because they are
+often very informative. Principles of good research design suggest that
+one key condition for being able to make valid and powerful comparisons
+between two groups is that the groups should be as similar as possible,
+apart from differing in the characteristic being considered. Dependent
+samples represent an attempt to achieve this through intelligent data
+collection. In Example 7.4, the comparison of interest is between a
+man’s sense of well-being before and after the birth of his first child.
+It is likely that there are also other factors which affect well-being,
+such as personality and life circumstances unrelated to the birth of a
+child. Here, however, we can compare the well-being for the <em>same</em> men
+before and after the birth, which should mean that many of those other
+characteristics remain approximately unchanged between the two
+measurements. Information on the effects of the birth of a child will
+then mostly come not from overall levels of well-being but <em>changes</em> in
+it for each man.</p>
+<p>In Example 7.5, time of the year and day of the week are likely to have
+a very strong effect on traffic levels. Comparing, say, Friday, November
+13th to Friday, July 6th, let alone to Sunday, November 15th, would thus
+not provide much information about possible additional differences which
+were due specifically to a Friday being the 13th. To keep these other
+characteristics approximately constant and thus to focus on the effects
+of Friday the 13th, each such Friday has here been matched with the
+nearest preceding Friday. With this design, data on just ten matched
+pairs will (as seen below) allow us to conclude that the differences are
+statistically significant.</p>
+<p>Generalisations of the research designs illustrated by Examples 7.4 and
+7.5 allow for measurements at more than two occasions for each subject
+(so-called longitudinal or panel studies) and groups of more than two
+matched units (clustered designs). Most of these are analysed using
+statistical methods which are beyond the scope of this course. The
+paired case is an exception, for which the analysis is in fact easier
+than for two independent samples. This is because the pairing of
+observations allows us to reduce the analysis into a one-sample problem,
+simply by considering within-pair <em>differences</em> in the response variable
+<span class="math inline">\(Y\)</span>. Only the case where <span class="math inline">\(Y\)</span> is a continuous variable is considered
+here. There are also methods of inference for comparing two (or more)
+dependent samples of response variables of other types, but they are not
+covered here.</p>
+<p>The quantity of interest is again a population difference. This time it
+can be formulated as <span class="math inline">\(\Delta=\mu_{2}-\mu_{1}\)</span>, where <span class="math inline">\(\mu_{1}\)</span> is the
+mean of <span class="math inline">\(Y\)</span> for the first group (e.g. the first time point in Example
+7.4) and <span class="math inline">\(\mu_{2}\)</span> its mean for the second group. Methods of inference
+for <span class="math inline">\(\Delta\)</span> will again be obtained using the same general results which
+were previously applied to one-sample analyses and comparisons of two
+independent samples. The easiest way to do this is now to consider a new
+variable <span class="math inline">\(D\)</span>, defined for each <em>pair</em> <span class="math inline">\(i\)</span> as <span class="math inline">\(D_{i}=Y_{2i}-Y_{1i}\)</span>,
+where <span class="math inline">\(Y_{1i}\)</span> denotes the value of the first measurement of <span class="math inline">\(Y\)</span> for
+pair <span class="math inline">\(i\)</span>, and <span class="math inline">\(Y_{2i}\)</span> is the second measurement of <span class="math inline">\(Y\)</span> for the same
+pair. In Example 7.4 this is thus the difference between a man’s
+well-being after the birth of his first baby, and the same man’s
+well-being before the birth. In Example 7.5, <span class="math inline">\(D\)</span> is the difference in
+traffic flows on a stretch of motorway between a Friday the 13th and the
+Friday a week earlier (these values are shown in the last column of
+Table <a href="#tab:t-F13">7.2</a>). The number of observations of <span class="math inline">\(D\)</span> is the number of
+pairs, which is equal to the sample sizes <span class="math inline">\(n_{1}\)</span> and <span class="math inline">\(n_{2}\)</span> in each of
+the two groups (the case where one of the two measurements might be
+missing for some pairs is not considered here). We will denote it by
+<span class="math inline">\(n\)</span>.</p>
+<p>The population mean of the differences <span class="math inline">\(D\)</span> is also
+<span class="math inline">\(\Delta=\mu_{2}-\mu_{1}\)</span>, so the observed values <span class="math inline">\(D_{i}\)</span> can be used for
+inference on <span class="math inline">\(\Delta\)</span>. An estimate of <span class="math inline">\(\Delta\)</span> is the sample average of
+<span class="math inline">\(D_{i}\)</span>,
+i.e.
+<span class="math display" id="eq:Dbar-dep">\[\begin{equation}
+\hat{\Delta}=\overline{D}=\frac{1}{n}\sum_{i=1}^{n} D_{i}.
+\tag{7.32}
+\end{equation}\]</span>
+In other words, this is the average of the
+within-pair differences between the two measurements of <span class="math inline">\(Y\)</span>. Its
+standard error is estimated by
+<span class="math display" id="eq:sDbar-dep">\[\begin{equation}
+\hat{\sigma}_{\hat{\Delta}} = \frac{s_{D}}{\sqrt{n}}
+\tag{7.33}
+\end{equation}\]</span>
+where <span class="math inline">\(s_{D}\)</span> is the sample standard deviation of
+<span class="math inline">\(D\)</span>, i.e.
+<span class="math display" id="eq:s2D-dep">\[\begin{equation}
+s_{D} = \sqrt{\frac{\sum (D_{i}-\overline{D})^{2}}{n-1}}.
+\tag{7.34}
+\end{equation}\]</span>
+A test statistic for the null hypothesis
+<span class="math inline">\(H_{0}: \Delta=0\)</span> is given by
+<span class="math display" id="eq:zD-dep">\[\begin{equation}
+t=\frac{\hat{\Delta}}{\hat{\sigma}_{\hat{\Delta}}}=\frac{\overline{D}}{s_{D}/\sqrt{n}}
+\tag{7.35}
+\end{equation}\]</span>
+and its <span class="math inline">\(P\)</span>-value is obtained either from the standard
+normal distribution or the <span class="math inline">\(t_{n-1}\)</span> distribution. A confidence interval
+for <span class="math inline">\(\Delta\)</span> with confidence level <span class="math inline">\(1-\alpha\)</span> is given by
+<span class="math display" id="eq:ciD-dep">\[\begin{equation}
+\hat{\Delta} \pm q_{\alpha/2} \times \hat{\sigma}_{\hat{\Delta}}=\overline{D} \pm q_{\alpha/2} \times \frac{s_{D}}{\sqrt{n}}
+\tag{7.36}
+\end{equation}\]</span>
+where the multiplier <span class="math inline">\(q_{\alpha/2}\)</span> is either
+<span class="math inline">\(z_{\alpha/2}\)</span> or <span class="math inline">\(t_{\alpha/2}^{(n-1)}\)</span>. These formulas are obtained by
+noting that this is simply a one-sample analysis with the differences
+<span class="math inline">\(D\)</span> in place of the variable <span class="math inline">\(Y\)</span>, and applying the formulas of Section
+<a href="c-means.html#s-means-1sample">7.4</a> to the observed values of <span class="math inline">\(D\)</span>.</p>
+<table style="width:99%;">
+<colgroup>
+<col width="25%" />
+<col width="18%" />
+<col width="15%" />
+<col width="20%" />
+<col width="18%" />
+</colgroup>
+<thead>
+<tr class="header">
+<th align="right"><br />
+<span class="math inline">\(\hat{\Delta}\)</span></th>
+<th align="right"><br />
+<span class="math inline">\(\hat{\sigma}_{\hat{\Delta}}\)</span></th>
+<th align="right">Test of <span class="math inline">\(H_{0}: \Delta=0\)</span>
+<span class="math inline">\(t\)</span></th>
+<th align="right">Test of <span class="math inline">\(H_{0}: \Delta=0\)</span>
+<span class="math inline">\(P\)</span>-value</th>
+<th align="right"><br />
+95 % C.I. for <span class="math inline">\(\Delta\)</span></th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="right">Example 7.4: Father’s personal well-being</td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+</tr>
+<tr class="even">
+<td align="right">0.08</td>
+<td align="right">0.247</td>
+<td align="right">0.324</td>
+<td align="right">0.75<span class="math inline">\(^{\dagger}\)</span></td>
+<td align="right">(-0.40; 0.56)</td>
+</tr>
+<tr class="odd">
+<td align="right">Example 7.5: Traffic flows on successive
+Fridays</td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+</tr>
+<tr class="even">
+<td align="right">-1835</td>
+<td align="right">372</td>
+<td align="right">-4.93</td>
+<td align="right">0.001<span class="math inline">\(^{*}\)</span></td>
+<td align="right">(-2676; -994)</td>
+</tr>
+</tbody>
+</table>
+<p>:(#tab:t-2tests-dep)Results of tests and confidence intervals for comparing means of two
+dependent samples. For Example 7.4, the difference is between after
+and before the birth of the child, and for Example 7.5 it is between
+Friday the 13th and the preceding Friday the 6th. See the text for the
+definitions of the statistics. (* Obtained from the <span class="math inline">\(t_{9}\)</span> distribution;
+<span class="math inline">\(\dagger\)</span> Obtained from the standard normal distribution.)</p>
+<p>Results for Examples 7.4 and 7.5 are shown in Table <a href="#tab:t-2tests-dep">7.5</a>.
+To illustrate the calculations, consider Example 7.5. The <span class="math inline">\(n=10\)</span> values
+of <span class="math inline">\(D_{i}\)</span> for it are shown in Table <a href="#tab:t-F13">7.2</a>, and the summary
+statistics <span class="math inline">\(\overline{D}=-1835\)</span> and <span class="math inline">\(s_{D}=1176\)</span> in Table
+<a href="#tab:t-groupex">7.1</a>. The standard error of <span class="math inline">\(\overline{D}\)</span> is thus
+<span class="math inline">\(s_{D}/\sqrt{n}=1176/\sqrt{10}=372\)</span> and the value of the test statistic
+(<a href="c-means.html#eq:zD-dep">(7.35)</a>) is
+<span class="math display">\[z=\frac{-1835}{1176/\sqrt{10}}=\frac{-1835}{372}=-4.93.\]</span> This example
+differs from others we have considered so far in that the sample size of
+<span class="math inline">\(n=10\)</span> is clearly too small for us to rely on large-sample results. It
+is thus not appropriate to refer the test statistic to a standard normal
+distribution. Instead, <span class="math inline">\(P\)</span>-values can be obtained from a <span class="math inline">\(t\)</span>
+distribution, but only if the population distribution of <span class="math inline">\(D\)</span> itself can
+be assumed to be approximately normal. Here we have only the ten
+observed values of <span class="math inline">\(D\)</span> to use for a rather informal assessment of
+whether this assumption appears to be reasonable. One value of <span class="math inline">\(D\)</span> is
+smaller than -4000, and 2, 5, 2 of them are in the ranges -3000 to
+-2001, -2000 to -1001, and -1000 to -1 respectively. Apart from the
+smallest observation, the sample distribution of <span class="math inline">\(D\)</span> is thus at least
+approximately symmetric. While this definitely does not prove that <span class="math inline">\(D\)</span>
+is normally distributed, it is at least not obviously inconsistent with
+such a claim. We thus feel moderately confident that we can apply here
+tests and confidence intervals based on the <span class="math inline">\(t\)</span> distribution.</p>
+<p>The <span class="math inline">\(P\)</span>-value, obtained from a <span class="math inline">\(t\)</span> distribution with <span class="math inline">\(n-1=9\)</span> degrees of
+freedom, for the test statistic <span class="math inline">\(-4.93\)</span> is approximately 0.001. Even
+with only ten pairs of observations, there is significant evidence that
+the volume of traffic on a Friday the 13th differs from that of the
+preceding Friday. A confidence interval for the difference is obtained
+from (<a href="c-means.html#eq:ciD-dep">(7.36)</a>) as <span class="math display">\[-1835 \pm 2.26 \times 372 = (-2676; -994)\]</span>
+where the multiplier 2.26 is the quantity
+<span class="math inline">\(t_{\alpha/2}^{(n-1)}=t_{0.975}^{(9)}\)</span>, obtained from a computer or a
+table of the <span class="math inline">\(t_{9}\)</span>-distribution. The interval shows that we are 95%
+confident that the average reduction in traffic on Friday the 13th on
+the stretches of motorway considered here is between 994 and 2676
+vehicles. This seems like a substantial systematic difference, although
+not particularly large as a proportion of the total volume of traffic on
+those roads. In the absence of other information we are tempted to
+associate the reduction with some people avoiding driving on a day they
+consider to be unlucky.</p>
+<p>In Example 7.4 the <span class="math inline">\(P\)</span>-value is 0.75, so we cannot reject the null
+hypothesis that <span class="math inline">\(\Delta=0\)</span>. There is thus no evidence that there was a
+difference in first-time fathers’ self-assessed level of well-being
+between the time their wives were six months pregnant, and a month after
+the birth of the baby. This is also indicated by the 95% confidence
+interval  for the difference, which clearly covers
+the value 0 of no difference.</p>
+</div>
+<div id="s-means-tests3" class="section level2 hasAnchor">
+<h2><span class="header-section-number">7.6</span> Further comments on significance tests<a href="c-means.html#s-means-tests3" class="anchor-section" aria-label="Anchor link to header"></a></h2>
+<p>Some further aspects of significance testing are dicussed here. These
+are not practical issues that need to be actively considered every time
+you carry out a test. Instead, they provide context and motivation for
+the principles behind significance tests.</p>
+<div id="ss-means-tests3-errors" class="section level3 hasAnchor">
+<h3><span class="header-section-number">7.6.1</span> Different types of error<a href="c-means.html#ss-means-tests3-errors" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>Consider for the moment the approach to significance testing where the
+outcome is presented in the form of a discrete claim or decision about
+the hypotheses, stating that the null hypothesis was either rejected or
+not rejected. This claim can either be correct or incorrect, depending
+on whether the null hypothesis is true in the population. There are four
+possibilities, summarized in Table <a href="#tab:t-twoerrors">7.6</a>. Two of these are
+correct decisions and two are incorrect. The two kinds of incorrect
+decisions are traditionally called</p>
+<ul>
+<li><p><strong>Type I error:</strong> rejecting the null hypothesis when it is true</p></li>
+<li><p><strong>Type II error:</strong> not rejecting the null hypothesis when it is
+false</p></li>
+</ul>
+<p>The terms are unmemorably bland, but they do at least suggest an order
+of importance. Type I error is conventionally considered more serious
+than Type II, so what we most want to avoid is rejecting the null
+hypothesis unnecessarily. This implies that we will maintain the null
+hypothesis unless data provide strong enough evidence to justify
+rejecting it, a principle which is somewhat analogous to the “keep a
+theory until falsified” thinking of Popperian philosophy of science, or
+even the “innocent until proven guilty” principle of jurisprudence.</p>
+<table>
+<tbody>
+<tr class="odd">
+<td></td>
+<td></td>
+<td align="left"><span class="math inline">\(H_{0}\)</span> is</td>
+<td align="left"><span class="math inline">\(H_{0}\)</span> is</td>
+</tr>
+<tr class="even">
+<td></td>
+<td></td>
+<td align="left">Not Rejected</td>
+<td align="left">Rejected</td>
+</tr>
+<tr class="odd">
+<td><span class="math inline">\(H_{0}\)</span> is</td>
+<td>True</td>
+<td align="left">Correct decision</td>
+<td align="left">Type I error</td>
+</tr>
+<tr class="even">
+<td></td>
+<td>False</td>
+<td align="left">Type II error</td>
+<td align="left">Correct decision</td>
+</tr>
+</tbody>
+</table>
+<p>:(#tab:t-twoerrors)The four possible combinations of the truth of a null hypothesis
+<span class="math inline">\(H_{0}\)</span> in a population and decision about it from a significance
+test.</p>
+<p>Dispite our dislike of Type I errors, we will not try to avoid them
+completely. The only way to guarantee that the null hypothesis is never
+incorrectly rejected is never to reject it at all, whatever the
+evidence. This is not a useful decision rule for empirical research.
+Instead, we will decide in advance how high a probability of Type I
+error we are willing to tolerate, and then use a test procedure with
+that probability. Suppose that we use a 5% level of significance to make
+decisions from a test. The null hypothesis is then rejected if the
+sample yields a test statistic for which the <span class="math inline">\(P\)</span>-value is less than
+0.05. If the null hypothesis is actually true, such values are, by the
+definition of the <span class="math inline">\(P\)</span>-value, obtained with probability 0.05. Thus the
+significance level (<span class="math inline">\(\alpha\)</span>-level) of a test is the probability of
+making a Type I error. If we use a large <span class="math inline">\(\alpha\)</span>-level (say
+<span class="math inline">\(\alpha=0.10\)</span>), the null hypothesis is rejected relatively easily
+(whenever <span class="math inline">\(P\)</span>-value is less than 0.10), but the chances of committing a
+Type I error are correspondingly high (also 0.10); with a smaller value
+like <span class="math inline">\(\alpha=0.01\)</span>, the error probability is lower because <span class="math inline">\(H_{0}\)</span> is
+rejected only when evidence against it is quite strong.</p>
+<p>This description assumes that the true probability of Type I error for a
+test <em>is</em> equal to its stated <span class="math inline">\(\alpha\)</span>-level. This is true when the
+assumptions of the test (about the population distribution, sample size
+etc.) are satisfied. If the assumptions fail, the true significance
+level will differ from the stated one, i.e. the <span class="math inline">\(P\)</span>-value calculated
+from the standard sampling distribution for that particular test will
+differ from the true <span class="math inline">\(P\)</span>-value which would be obtained from the exact
+sampling distribution from the population in question. Sometimes the
+difference is minor and can be ignored for most practical purposes (the
+test is then said to be <em>robust</em> to violations of some of its
+assumptions). In many situations, however, using an inappropriate test
+may lead to incorrect conclusions: for example, a test which claims that
+the <span class="math inline">\(P\)</span>-value is 0.02 when it is really 0.35 will clearly give a
+misleading picture of the strength of evidence against the null
+hypothesis. To avoid this, the task of statisticians is to develop valid
+(and preferably robust) tests for many different kinds of hypotheses and
+data. The task of the empirical researcher is to choose a test which is
+appropriate for his or her data.</p>
+<p>In the spirit of regarding Type I errors as the most serious, the worst
+kind of incorrect test is one which gives too low a <span class="math inline">\(P\)</span>-value,
+i.e. exaggerates the strength of evidence against the null hypothesis.
+Sometimes it is known that this is impossible or unlikely, so that the
+<span class="math inline">\(P\)</span>-value is either correct or too high. The significance test is then
+said to be <em>conservative</em>, because its true rate of Type I errors will
+be the same or lower than the stated <span class="math inline">\(\alpha\)</span>-level. A conservative
+procedure of statistical inference is regarded as the next best thing to
+one which has the correct level of significance. For example, when the
+sample size is relatively large, <span class="math inline">\(P\)</span>-values for all of the tests
+discussed in this chapter may be calculated from a standard normal or
+from a <span class="math inline">\(t\)</span> distribution. <span class="math inline">\(P\)</span>-values from a <span class="math inline">\(t\)</span> distribution are then
+always somewhat larger. This means that using the <span class="math inline">\(t\)</span> distribution is
+(very slightly) conservative when the population distributions are not
+normal, so that we can safely use the <span class="math inline">\(P\)</span>-values from SPSS output of a
+<span class="math inline">\(t\)</span>-test even in that case (this argument does not, however, justify
+using the <span class="math inline">\(t\)</span>-test when <span class="math inline">\(Y\)</span> is not normally distributed and the sample
+size is small, because the sampling distribution of the <span class="math inline">\(t\)</span>-test
+statistic may then be very far from normal).</p>
+</div>
+<div id="ss-means-tests3-power" class="section level3 hasAnchor">
+<h3><span class="header-section-number">7.6.2</span> Power of significance tests<a href="c-means.html#ss-means-tests3-power" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>After addressing the question of Type I error by selecting an
+appropriate test and deciding on the significance level to be used, we
+turn our attention to Type II errors. The probability that a
+significance test will reject the null hypothesis when it is in fact not
+true, i.e. the probability of <em>avoiding</em> a Type II error, is known as
+the <strong>power</strong> of the test. It depends, in particular, on</p>
+<ul>
+<li><p>The nature of the test. If several valid tests are available for a
+particular analysis, we would naturally prefer one which tends to
+have the highest power. One aim of theoretical statistics is to
+identify the most powerful test procedures for different problems.</p></li>
+<li><p>The sample size: other things being equal, larger samples mean
+higher power.</p></li>
+<li><p>The true value of the population parameter to be tested, here the
+population mean or proportion. The power of any test will be highest
+when the true value is very different from the value specified by
+the null hypothesis. For example, it will obviously be easier to
+detect that a population mean differs from a null value of
+<span class="math inline">\(\mu_{0}=5\)</span> when the true mean is 25 than when it is 5.1.</p></li>
+<li><p>The population variability of the variable. Since large population
+variance translates into large sampling variability and hence high
+levels of uncertainty, the power will be low when population
+variability is large, and high if the population variability is low.</p></li>
+</ul>
+<p>The last three of these considerations are often used at the design
+stage of a study to get an idea of the sample size required for a
+certain level of power, or of the power achievable with a given sample
+size. Since data collection costs time and money, we would not want to
+collect a much larger sample than is required for a level of certainty
+sufficient for the purposes of a study. On the other hand, if a
+preliminary calculation reveals that the largest sample we can afford
+would still be unlikely to give enough information to detect interesting
+effects, the study might be best abandoned.</p>
+<p>A power calculation requires the researcher to specify the kinds of
+differences from a null hypothesis which are large enough to be of
+practical or theoretical interest, so that she or he would want to be
+able to detect them with high probability (it must always be accepted
+that the power will be lower for smaller differences). For example,
+suppose that we are planning a study to compare the effects of two
+alternative teaching methods on the performance of students in an
+examination where possible scores are between 0 and 100. The null
+hypothesis is that average results are the same for students taught with
+each method. It is decided that we want enough data to be able to reject
+this with high probability if the true difference <span class="math inline">\(\Delta\)</span> of the
+average exam scores between the two groups is larger than 5 points,
+i.e. <span class="math inline">\(\Delta&lt;-5\)</span> or <span class="math inline">\(\Delta&gt;5\)</span>. The power calculation might then answer
+questions like</p>
+<ul>
+<li><p>What is the smallest sample size for which the probability of
+rejecting <span class="math inline">\(H_{0}: \Delta=0\)</span> is at least 0.9, when the true value of
+<span class="math inline">\(\Delta\)</span> is smaller than <span class="math inline">\(-5\)</span> or larger than 5?</p></li>
+<li><p>The largest sample sizes we can afford are 1000 in both groups,
+i.e. <span class="math inline">\(n_{1}=n_{2}=1000\)</span>. What is the probability this gives us of
+rejecting <span class="math inline">\(H_{0}: \Delta=0\)</span> when the true value of <span class="math inline">\(\Delta\)</span> is
+smaller than <span class="math inline">\(-5\)</span> or larger than 5?</p></li>
+</ul>
+<p>To answer these questions, we would also need a rough guess of the
+population standard deviations <span class="math inline">\(\sigma_{1}\)</span> and <span class="math inline">\(\sigma_{2}\)</span>, perhaps
+obtained from previous studies. Such calculations employ further
+mathematical results for test statistics, essentially using their
+sampling distributions under specific alternative hypotheses. The
+details are, however, beyond the scope of this course.</p>
+</div>
+<div id="ss-means-tests3-importance" class="section level3 hasAnchor">
+<h3><span class="header-section-number">7.6.3</span> Significance vs. importance<a href="c-means.html#ss-means-tests3-importance" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>The <span class="math inline">\(P\)</span>-value is a measure of the strength of evidence the data provide
+against the null hypothesis. This is not the same as the magnitude of
+the difference between sample estimates and the null hypothesis, or the
+practical importance of such differences. As noted above, the power of a
+test increases with increasing sampling size. One implication of this is
+that when <span class="math inline">\(n\)</span> is large, even quite small observed deviations from the
+values that correspond exactly to the null hypothesis will be judged to
+be statistically significant. Consider, for example, the two dietary
+variables in Table <a href="#tab:t-ttests1">7.4</a>. The
+sample mean of the fat variable is 35.3, which is significantly
+different (at the 5% level of significance) from <span class="math inline">\(\mu_{0}\)</span> of 35. It is
+possible, however, that a difference of 0.3 might be considered
+unimportant in practice. In contrast, the sample mean of the fruit and
+vegetable variable is 2.8, and the difference from <span class="math inline">\(\mu_{0}\)</span> of 5 seems
+not only strongly significant but also large for most practical
+purposes.</p>
+<p>In contrast to the large-sample case, in small samples even quite large
+apparent deviations from the null hypothesis might still result in a
+large <span class="math inline">\(P\)</span>-value. For example, in a very small study a sample mean of the
+fat variable of, say, 30 or even 50 might not be interpreted as
+sufficiently strong evidence against a population mean of 35. This is
+obviously related to the discussion of statistical power in the previous
+section, in that it illustrates what happens when the sample is too
+small to provide enough information for useful conclusions.</p>
+<p>In these and all other cases, decisions about what is or is not of
+practical importance are subject-matter questions rather than
+statistical ones, and would have to be based on information about the
+nature and implications of the variables in question. In our dietary
+examples this would involve at least medical considerations, and perhaps
+also financial implications of the public health costs of the observed
+situation or of possible efforts of trying to change it.</p>
+
+</div>
+</div>
+</div>
+<div class="footnotes">
+<hr />
+<ol start="28">
+<li id="fn28"><p>Conducted for the Food Standards Agency and the Department of
+Health by ONS and MRC Human Nutrition Research. The sample
+statistics used here are from the survey reports published by HMSO
+in 2002-04, aggregating results published separately for men and
+women. The standard errors have been adjusted for non-constant
+sampling probabilities using design factors published in the survey
+reports. We will treat these numbers as if they were from a simple
+random sample.<a href="c-means.html#fnref28" class="footnote-back">↩</a></p></li>
+<li id="fn29"><p>The data were obtained from the UK Data Archive. Three respondents
+with outlying values of the housework variable (two women and one
+man, with 50, 50 and 70 reported weekly hours) have been omitted
+from the analysis considered here.<a href="c-means.html#fnref29" class="footnote-back">↩</a></p></li>
+<li id="fn30"><p>Boyanowsky, E. O. and Griffiths, C. T. (1982). “Weapons and eye
+contact as instigators or inhibitors of aggressive arousal in
+police-citizen interaction”. <em>Journal of Applied Social Psychology</em>,
+<strong>12</strong>, 398–407.<a href="c-means.html#fnref30" class="footnote-back">↩</a></p></li>
+<li id="fn31"><p>Miller, B. C. and Sollie, D. L. (1980). “Normal stresses during
+the transition to parenthood”. <em>Family Relations</em>, <strong>29</strong>, 459–465.
+See the article for further information, including results for the
+mothers.<a href="c-means.html#fnref31" class="footnote-back">↩</a></p></li>
+<li id="fn32"><p>Scanlon, T. J. et al. (1993). “Is Friday the 13th bad for your
+health?”. <em>British Medical Journal</em>, <strong>307</strong>, 1584–1586. The data
+were obtained from The Data and Story Library at Carnegie Mellon
+University (<code>lib.stat.cmu.edu/DASL</code>).<a href="c-means.html#fnref32" class="footnote-back">↩</a></p></li>
+<li id="fn33"><p>In this case this is a consquence of the fact that the sample
+sizes (67 and 66) in the two groups are very similar. When they are
+exactly equal, formulas (<a href="c-means.html#eq:sehatjoint">(7.11)</a>)–(<a href="c-means.html#eq:ztestmuDb">(7.12)</a>) and
+(<a href="c-means.html#eq:seDmu-ne">(7.14)</a>) actually give exactly the same value for the
+standard error <span class="math inline">\(\hat{\sigma}_{\hat{\Delta}}\)</span>, and <span class="math inline">\(t\)</span> is thus also
+the same for both variants of the test.<a href="c-means.html#fnref33" class="footnote-back">↩</a></p></li>
+<li id="fn34"><p>The output also shows, under “Levene’s test”, a test statistic and
+<span class="math inline">\(P\)</span>-value for testing the hypothesis of equal standard deviations
+(<span class="math inline">\(H_{0}: \, \sigma_{1}=\sigma_{2}\)</span>). However, we prefer not to rely on this
+because the test requires the additional assumption that the
+population distributions are normal, and is very sensitive to the
+correctness of this assumption.<a href="c-means.html#fnref34" class="footnote-back">↩</a></p></li>
+<li id="fn35"><p>In the MY464 examination and homework, for example, both variants
+of the test are equally acceptable, unless a question explicitly
+states otherwise.<a href="c-means.html#fnref35" class="footnote-back">↩</a></p></li>
+<li id="fn36"><p>Student (1908). “The probable error of a mean”. <em>Biometrika</em>
+<strong>6</strong>, 1–25.<a href="c-means.html#fnref36" class="footnote-back">↩</a></p></li>
+<li id="fn37"><p>The two are the same because <span class="math inline">\(\mu_{0}\)</span> in
+<span class="math inline">\(\hat{\Delta}=\bar{Y}-\mu_{0}\)</span> is a known number rather a
+data-dependent statistic, which means that it does not affect the
+standard error.<a href="c-means.html#fnref37" class="footnote-back">↩</a></p></li>
+<li id="fn38"><p>Except that SPSS uses the multiplier from <span class="math inline">\(t_{1723}\)</span> distribution
+rather than the normal distribution. This makes no difference here,
+as the former is 1.961 and the latter 1.960.<a href="c-means.html#fnref38" class="footnote-back">↩</a></p></li>
+</ol>
+</div>
+            </section>
+
+          </div>
+        </div>
+      </div>
+<a href="c-contd.html" class="navigation navigation-prev " aria-label="Previous page"><i class="fa fa-angle-left"></i></a>
+<a href="c-regression.html" class="navigation navigation-next " aria-label="Next page"><i class="fa fa-angle-right"></i></a>
+    </div>
+  </div>
+<script src="libs/gitbook-2.6.7/js/app.min.js"></script>
+<script src="libs/gitbook-2.6.7/js/clipboard.min.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-search.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-sharing.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-fontsettings.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-bookdown.js"></script>
+<script src="libs/gitbook-2.6.7/js/jquery.highlight.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-clipboard.js"></script>
+<script>
+gitbook.require(["gitbook"], function(gitbook) {
+gitbook.start({
+"sharing": {
+"github": false,
+"facebook": true,
+"twitter": true,
+"linkedin": false,
+"weibo": false,
+"instapaper": false,
+"vk": false,
+"whatsapp": false,
+"all": ["facebook", "twitter", "linkedin", "weibo", "instapaper"]
+},
+"fontsettings": {
+"theme": "white",
+"family": "sans",
+"size": 2
+},
+"edit": {
+"link": "https://github.com/LSE-Methodology/MY464/edit/master/07-MY464-means.Rmd",
+"text": "Edit"
+},
+"history": {
+"link": null,
+"text": null
+},
+"view": {
+"link": null,
+"text": null
+},
+"download": ["Coursepack-MY464.pdf", "Coursepack-MY464.epub"],
+"search": {
+"engine": "fuse",
+"options": null
+},
+"toc": {
+"collapse": "section"
+}
+});
+});
+</script>
+
+<!-- dynamically load mathjax for compatibility with self-contained -->
+<script>
+  (function () {
+    var script = document.createElement("script");
+    script.type = "text/javascript";
+    var src = "true";
+    if (src === "" || src === "true") src = "https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.9/latest.js?config=TeX-MML-AM_CHTML";
+    if (location.protocol !== "file:")
+      if (/^https?:/.test(src))
+        src = src.replace(/^https?:/, '');
+    script.src = src;
+    document.getElementsByTagName("head")[0].appendChild(script);
+  })();
+</script>
+</body>
+
+</html>
diff --git a/c-more.html b/c-more.html
new file mode 100644
index 0000000..5fe1b8e
--- /dev/null
+++ b/c-more.html
@@ -0,0 +1,2001 @@
+<!DOCTYPE html>
+<html lang="" xml:lang="">
+<head>
+
+  <meta charset="utf-8" />
+  <meta http-equiv="X-UA-Compatible" content="IE=edge" />
+  <title>Chapter 10 More statistics… | MY464 Introduction to Quantitative Analysis for Media and Communications</title>
+  <meta name="description" content="This course is intended for those with little or no past training in quantitative methods. The course is an intensive introduction to some of the principles and methods of statistical analysis in social research. Topics covered in MY464 include descriptive statistics, basic ideas of inference and estimation, contingency tables and an introduction to linear regression models." />
+  <meta name="generator" content="bookdown 0.39 and GitBook 2.6.7" />
+
+  <meta property="og:title" content="Chapter 10 More statistics… | MY464 Introduction to Quantitative Analysis for Media and Communications" />
+  <meta property="og:type" content="book" />
+  
+  <meta property="og:description" content="This course is intended for those with little or no past training in quantitative methods. The course is an intensive introduction to some of the principles and methods of statistical analysis in social research. Topics covered in MY464 include descriptive statistics, basic ideas of inference and estimation, contingency tables and an introduction to linear regression models." />
+  <meta name="github-repo" content="LSE-Methodology/MY464" />
+
+  <meta name="twitter:card" content="summary" />
+  <meta name="twitter:title" content="Chapter 10 More statistics… | MY464 Introduction to Quantitative Analysis for Media and Communications" />
+  
+  <meta name="twitter:description" content="This course is intended for those with little or no past training in quantitative methods. The course is an intensive introduction to some of the principles and methods of statistical analysis in social research. Topics covered in MY464 include descriptive statistics, basic ideas of inference and estimation, contingency tables and an introduction to linear regression models." />
+  
+
+<meta name="author" content="Department of Methodology, London School of Economics and Political Science" />
+
+
+
+  <meta name="viewport" content="width=device-width, initial-scale=1" />
+  <meta name="apple-mobile-web-app-capable" content="yes" />
+  <meta name="apple-mobile-web-app-status-bar-style" content="black" />
+  
+  
+<link rel="prev" href="c-3waytables.html"/>
+
+<script src="libs/jquery-3.6.0/jquery-3.6.0.min.js"></script>
+<script src="https://cdn.jsdelivr.net/npm/fuse.js@6.4.6/dist/fuse.min.js"></script>
+<link href="libs/gitbook-2.6.7/css/style.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-table.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-bookdown.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-highlight.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-search.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-fontsettings.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-clipboard.css" rel="stylesheet" />
+
+
+
+
+
+
+
+
+<link href="libs/anchor-sections-1.1.0/anchor-sections.css" rel="stylesheet" />
+<link href="libs/anchor-sections-1.1.0/anchor-sections-hash.css" rel="stylesheet" />
+<script src="libs/anchor-sections-1.1.0/anchor-sections.js"></script>
+
+
+
+<style type="text/css">
+  
+  div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
+</style>
+
+<link rel="stylesheet" href="style.css" type="text/css" />
+</head>
+
+<body>
+
+
+
+  <div class="book without-animation with-summary font-size-2 font-family-1" data-basepath=".">
+
+    <div class="book-summary">
+      <nav role="navigation">
+
+<ul class="summary">
+<li><a href="./">MY464 Introduction to Quantitative Analysis for Media and Communications</a></li>
+
+<li class="divider"></li>
+<li class="chapter" data-level="" data-path="index.html"><a href="index.html"><i class="fa fa-check"></i>Course information<a href="index.html#course-information" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1" data-path="c-intro.html"><a href="c-intro.html"><i class="fa fa-check"></i><b>1</b> Introduction<a href="c-intro.html#c-intro" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="1.1" data-path="c-intro.html"><a href="c-intro.html#s-intro-purpose"><i class="fa fa-check"></i><b>1.1</b> What is the purpose of this course?<a href="c-intro.html#s-intro-purpose" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.2" data-path="c-intro.html"><a href="c-intro.html#s-intro-definitions"><i class="fa fa-check"></i><b>1.2</b> Some basic definitions<a href="c-intro.html#s-intro-definitions" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="1.2.1" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-subj"><i class="fa fa-check"></i><b>1.2.1</b> Subjects and variables<a href="c-intro.html#ss-intro-def-subj" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.2.2" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-vartypes"><i class="fa fa-check"></i><b>1.2.2</b> Types of variables<a href="c-intro.html#ss-intro-def-vartypes" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.2.3" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-descr"><i class="fa fa-check"></i><b>1.2.3</b> Description and inference<a href="c-intro.html#ss-intro-def-descr" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.2.4" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-assoc"><i class="fa fa-check"></i><b>1.2.4</b> Association and causation<a href="c-intro.html#ss-intro-def-assoc" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="1.3" data-path="c-intro.html"><a href="c-intro.html#s-intro-outline"><i class="fa fa-check"></i><b>1.3</b> Outline of the course<a href="c-intro.html#s-intro-outline" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.4" data-path="c-intro.html"><a href="c-intro.html#s-intro-maths"><i class="fa fa-check"></i><b>1.4</b> The use of mathematics and computing<a href="c-intro.html#s-intro-maths" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="1.4.1" data-path="c-intro.html"><a href="c-intro.html#symbolic-mathematics-and-mathematical-notation"><i class="fa fa-check"></i><b>1.4.1</b> Symbolic mathematics and mathematical notation<a href="c-intro.html#symbolic-mathematics-and-mathematical-notation" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.4.2" data-path="c-intro.html"><a href="c-intro.html#computing-1"><i class="fa fa-check"></i><b>1.4.2</b> Computing<a href="c-intro.html#computing-1" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="2" data-path="c-descr1.html"><a href="c-descr1.html"><i class="fa fa-check"></i><b>2</b> Descriptive statistics<a href="c-descr1.html#c-descr1" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.1" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-intro"><i class="fa fa-check"></i><b>2.1</b> Introduction<a href="c-descr1.html#s-descr1-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.2" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-examples"><i class="fa fa-check"></i><b>2.2</b> Example data sets<a href="c-descr1.html#s-descr1-examples" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.3" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-1cat"><i class="fa fa-check"></i><b>2.3</b> Single categorical variable<a href="c-descr1.html#s-descr1-1cat" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.3.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-distr"><i class="fa fa-check"></i><b>2.3.1</b> Describing the sample distribution<a href="c-descr1.html#ss-descr1-1cat-distr" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.3.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-tables"><i class="fa fa-check"></i><b>2.3.2</b> Tabular methods: Tables of frequencies<a href="c-descr1.html#ss-descr1-1cat-tables" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.3.3" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-charts"><i class="fa fa-check"></i><b>2.3.3</b> Graphical methods: Bar charts<a href="c-descr1.html#ss-descr1-1cat-charts" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.3.4" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-descriptives"><i class="fa fa-check"></i><b>2.3.4</b> Simple descriptive statistics<a href="c-descr1.html#ss-descr1-1cat-descriptives" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="2.4" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-2cat"><i class="fa fa-check"></i><b>2.4</b> Two categorical variables<a href="c-descr1.html#s-descr1-2cat" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.4.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-tables"><i class="fa fa-check"></i><b>2.4.1</b> Two-way contingency tables<a href="c-descr1.html#ss-descr1-2cat-tables" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.4.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-cond"><i class="fa fa-check"></i><b>2.4.2</b> Conditional proportions<a href="c-descr1.html#ss-descr1-2cat-cond" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.4.3" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-assoc"><i class="fa fa-check"></i><b>2.4.3</b> Conditional distributions and associations<a href="c-descr1.html#ss-descr1-2cat-assoc" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.4.4" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-descr"><i class="fa fa-check"></i><b>2.4.4</b> Describing an association using conditional proportions<a href="c-descr1.html#ss-descr1-2cat-descr" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.4.5" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-gamma"><i class="fa fa-check"></i><b>2.4.5</b> A measure of association for ordinal variables<a href="c-descr1.html#ss-descr1-2cat-gamma" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="2.5" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-1cont"><i class="fa fa-check"></i><b>2.5</b> Sample distributions of a single continuous variable<a href="c-descr1.html#s-descr1-1cont" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.5.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cont-tab"><i class="fa fa-check"></i><b>2.5.1</b> Tabular methods<a href="c-descr1.html#ss-descr1-1cont-tab" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.5.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cont-graphs"><i class="fa fa-check"></i><b>2.5.2</b> Graphical methods<a href="c-descr1.html#ss-descr1-1cont-graphs" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="2.6" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-nums"><i class="fa fa-check"></i><b>2.6</b> Numerical descriptive statistics<a href="c-descr1.html#s-descr1-nums" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.6.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-nums-central"><i class="fa fa-check"></i><b>2.6.1</b> Measures of central tendency<a href="c-descr1.html#ss-descr1-nums-central" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.6.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-nums-variation"><i class="fa fa-check"></i><b>2.6.2</b> Measures of variation<a href="c-descr1.html#ss-descr1-nums-variation" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="2.7" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-2cont"><i class="fa fa-check"></i><b>2.7</b> Associations which involve continuous variables<a href="c-descr1.html#s-descr1-2cont" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.8" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-presentation"><i class="fa fa-check"></i><b>2.8</b> Presentation of tables and graphs<a href="c-descr1.html#s-descr1-presentation" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.9" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-app"><i class="fa fa-check"></i><b>2.9</b> Appendix: Country data<a href="c-descr1.html#s-descr1-app" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="3" data-path="c-samples.html"><a href="c-samples.html"><i class="fa fa-check"></i><b>3</b> Samples and populations<a href="c-samples.html#c-samples" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="3.1" data-path="c-samples.html"><a href="c-samples.html#s-samples-intro"><i class="fa fa-check"></i><b>3.1</b> Introduction<a href="c-samples.html#s-samples-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.2" data-path="c-samples.html"><a href="c-samples.html#s-samples-finpops"><i class="fa fa-check"></i><b>3.2</b> Finite populations<a href="c-samples.html#s-samples-finpops" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.3" data-path="c-samples.html"><a href="c-samples.html#s-samples-samples"><i class="fa fa-check"></i><b>3.3</b> Samples from finite populations<a href="c-samples.html#s-samples-samples" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.4" data-path="c-samples.html"><a href="c-samples.html#s-samples-infpops"><i class="fa fa-check"></i><b>3.4</b> Conceptual and infinite populations<a href="c-samples.html#s-samples-infpops" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.5" data-path="c-samples.html"><a href="c-samples.html#s-samples-popdistrs"><i class="fa fa-check"></i><b>3.5</b> Population distributions<a href="c-samples.html#s-samples-popdistrs" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.6" data-path="c-samples.html"><a href="c-samples.html#s-samples-inference"><i class="fa fa-check"></i><b>3.6</b> Need for statistical inference<a href="c-samples.html#s-samples-inference" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="4" data-path="c-tables.html"><a href="c-tables.html"><i class="fa fa-check"></i><b>4</b> Statistical inference for two-way tables<a href="c-tables.html#c-tables" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="4.1" data-path="c-tables.html"><a href="c-tables.html#s-tables-intro"><i class="fa fa-check"></i><b>4.1</b> Introduction<a href="c-tables.html#s-tables-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.2" data-path="c-tables.html"><a href="c-tables.html#s-tables-tests"><i class="fa fa-check"></i><b>4.2</b> Significance tests<a href="c-tables.html#s-tables-tests" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3" data-path="c-tables.html"><a href="c-tables.html#s-tables-chi2test"><i class="fa fa-check"></i><b>4.3</b> The chi-square test of independence<a href="c-tables.html#s-tables-chi2test" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="4.3.1" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-null"><i class="fa fa-check"></i><b>4.3.1</b> Hypotheses<a href="c-tables.html#ss-tables-chi2test-null" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.2" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-ass"><i class="fa fa-check"></i><b>4.3.2</b> Assumptions of a significance test<a href="c-tables.html#ss-tables-chi2test-ass" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.3" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-stat"><i class="fa fa-check"></i><b>4.3.3</b> The test statistic<a href="c-tables.html#ss-tables-chi2test-stat" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.4" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-sdist"><i class="fa fa-check"></i><b>4.3.4</b> The sampling distribution of the test statistic<a href="c-tables.html#ss-tables-chi2test-sdist" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.5" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-Pval"><i class="fa fa-check"></i><b>4.3.5</b> The P-value<a href="c-tables.html#ss-tables-chi2test-Pval" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.6" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-conclusions"><i class="fa fa-check"></i><b>4.3.6</b> Drawing conclusions from a test<a href="c-tables.html#ss-tables-chi2test-conclusions" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="4.4" data-path="c-tables.html"><a href="c-tables.html#s-tables-summary"><i class="fa fa-check"></i><b>4.4</b> Summary of the chi-square test of independence<a href="c-tables.html#s-tables-summary" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="5" data-path="c-probs.html"><a href="c-probs.html"><i class="fa fa-check"></i><b>5</b> Inference for population proportions<a href="c-probs.html#c-probs" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="5.1" data-path="c-probs.html"><a href="c-probs.html#s-probs-intro"><i class="fa fa-check"></i><b>5.1</b> Introduction<a href="c-probs.html#s-probs-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.2" data-path="c-probs.html"><a href="c-probs.html#s-probs-examples"><i class="fa fa-check"></i><b>5.2</b> Examples<a href="c-probs.html#s-probs-examples" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.3" data-path="c-probs.html"><a href="c-probs.html#s-probs-distribution"><i class="fa fa-check"></i><b>5.3</b> Probability distribution of a dichotomous variable<a href="c-probs.html#s-probs-distribution" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.4" data-path="c-probs.html"><a href="c-probs.html#s-probs-pointest"><i class="fa fa-check"></i><b>5.4</b> Point estimation of a population probability<a href="c-probs.html#s-probs-pointest" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5" data-path="c-probs.html"><a href="c-probs.html#s-probs-test1sample"><i class="fa fa-check"></i><b>5.5</b> Significance test of a single proportion<a href="c-probs.html#s-probs-test1sample" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="5.5.1" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-hypotheses"><i class="fa fa-check"></i><b>5.5.1</b> Null and alternative hypotheses<a href="c-probs.html#ss-probs-test1sample-hypotheses" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5.2" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-teststatistic"><i class="fa fa-check"></i><b>5.5.2</b> The test statistic<a href="c-probs.html#ss-probs-test1sample-teststatistic" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5.3" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-samplingd"><i class="fa fa-check"></i><b>5.5.3</b> The sampling distribution of the test statistic and P-values<a href="c-probs.html#ss-probs-test1sample-samplingd" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5.4" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-conclusions"><i class="fa fa-check"></i><b>5.5.4</b> Conclusions from the test<a href="c-probs.html#ss-probs-test1sample-conclusions" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5.5" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-summary"><i class="fa fa-check"></i><b>5.5.5</b> Summary of the test<a href="c-probs.html#ss-probs-test1sample-summary" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="5.6" data-path="c-probs.html"><a href="c-probs.html#s-probs-1sampleci"><i class="fa fa-check"></i><b>5.6</b> Confidence interval for a single proportion<a href="c-probs.html#s-probs-1sampleci" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="5.6.1" data-path="c-probs.html"><a href="c-probs.html#s-probs-1sampleci-intro"><i class="fa fa-check"></i><b>5.6.1</b> Introduction<a href="c-probs.html#s-probs-1sampleci-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.6.2" data-path="c-probs.html"><a href="c-probs.html#s-probs-1sampleci-calc"><i class="fa fa-check"></i><b>5.6.2</b> Calculation of the interval<a href="c-probs.html#s-probs-1sampleci-calc" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.6.3" data-path="c-probs.html"><a href="c-probs.html#s-probs-1sampleci-int"><i class="fa fa-check"></i><b>5.6.3</b> Interpretation of confidence intervals<a href="c-probs.html#s-probs-1sampleci-int" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.6.4" data-path="c-probs.html"><a href="c-probs.html#ss-means-ci-vstests"><i class="fa fa-check"></i><b>5.6.4</b> Confidence intervals vs. significance tests<a href="c-probs.html#ss-means-ci-vstests" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="5.7" data-path="c-probs.html"><a href="c-probs.html#s-probs-2samples"><i class="fa fa-check"></i><b>5.7</b> Inference for comparing two proportions<a href="c-probs.html#s-probs-2samples" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="6" data-path="c-contd.html"><a href="c-contd.html"><i class="fa fa-check"></i><b>6</b> Continuous variables: Population and sampling distributions<a href="c-contd.html#c-contd" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="6.1" data-path="c-contd.html"><a href="c-contd.html#s-contd-intro"><i class="fa fa-check"></i><b>6.1</b> Introduction<a href="c-contd.html#s-contd-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="6.2" data-path="c-contd.html"><a href="c-contd.html#s-contd-popdistrs"><i class="fa fa-check"></i><b>6.2</b> Population distributions of continuous variables<a href="c-contd.html#s-contd-popdistrs" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="6.2.1" data-path="c-contd.html"><a href="c-contd.html#ss-contd-popdistrs-params"><i class="fa fa-check"></i><b>6.2.1</b> Population parameters and their point estimates<a href="c-contd.html#ss-contd-popdistrs-params" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="6.3" data-path="c-contd.html"><a href="c-contd.html#s-contd-probdistrs"><i class="fa fa-check"></i><b>6.3</b> Probability distributions of continuous variables<a href="c-contd.html#s-contd-probdistrs" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="6.3.1" data-path="c-contd.html"><a href="c-contd.html#ss-contd-probdistrs-general"><i class="fa fa-check"></i><b>6.3.1</b> General comments<a href="c-contd.html#ss-contd-probdistrs-general" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="6.3.2" data-path="c-contd.html"><a href="c-contd.html#ss-contd-probdistrs-normal"><i class="fa fa-check"></i><b>6.3.2</b> The normal distribution as a population distribution<a href="c-contd.html#ss-contd-probdistrs-normal" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="6.4" data-path="c-contd.html"><a href="c-contd.html#s-contd-clt"><i class="fa fa-check"></i><b>6.4</b> The normal distribution as a sampling distribution<a href="c-contd.html#s-contd-clt" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="7" data-path="c-means.html"><a href="c-means.html"><i class="fa fa-check"></i><b>7</b> Analysis of population means<a href="c-means.html#c-means" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="7.1" data-path="c-means.html"><a href="c-means.html#s-means-intro"><i class="fa fa-check"></i><b>7.1</b> Introduction and examples<a href="c-means.html#s-means-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.2" data-path="c-means.html"><a href="c-means.html#s-means-descr"><i class="fa fa-check"></i><b>7.2</b> Descriptive statistics for comparisons of groups<a href="c-means.html#s-means-descr" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="7.2.1" data-path="c-means.html"><a href="c-means.html#ss-means-descr-graphs"><i class="fa fa-check"></i><b>7.2.1</b> Graphical methods of comparing sample distributions<a href="c-means.html#ss-means-descr-graphs" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.2.2" data-path="c-means.html"><a href="c-means.html#ss-means-descr-tables"><i class="fa fa-check"></i><b>7.2.2</b> Comparing summary statistics<a href="c-means.html#ss-means-descr-tables" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="7.3" data-path="c-means.html"><a href="c-means.html#s-means-inference"><i class="fa fa-check"></i><b>7.3</b> Inference for two means from independent samples<a href="c-means.html#s-means-inference" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="7.3.1" data-path="c-means.html"><a href="c-means.html#ss-means-inference-intro"><i class="fa fa-check"></i><b>7.3.1</b> Aims of the analysis<a href="c-means.html#ss-means-inference-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.3.2" data-path="c-means.html"><a href="c-means.html#ss-means-inference-test"><i class="fa fa-check"></i><b>7.3.2</b> Significance testing: The two-sample t-test<a href="c-means.html#ss-means-inference-test" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.3.3" data-path="c-means.html"><a href="c-means.html#ss-means-inference-ci"><i class="fa fa-check"></i><b>7.3.3</b> Confidence intervals for a difference of two means<a href="c-means.html#ss-means-inference-ci" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.3.4" data-path="c-means.html"><a href="c-means.html#ss-means-inference-variants"><i class="fa fa-check"></i><b>7.3.4</b> Variants of the test and confidence interval<a href="c-means.html#ss-means-inference-variants" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="7.4" data-path="c-means.html"><a href="c-means.html#s-means-1sample"><i class="fa fa-check"></i><b>7.4</b> Tests and confidence intervals for a single mean<a href="c-means.html#s-means-1sample" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.5" data-path="c-means.html"><a href="c-means.html#s-means-dependent"><i class="fa fa-check"></i><b>7.5</b> Inference for dependent samples<a href="c-means.html#s-means-dependent" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.6" data-path="c-means.html"><a href="c-means.html#s-means-tests3"><i class="fa fa-check"></i><b>7.6</b> Further comments on significance tests<a href="c-means.html#s-means-tests3" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="7.6.1" data-path="c-means.html"><a href="c-means.html#ss-means-tests3-errors"><i class="fa fa-check"></i><b>7.6.1</b> Different types of error<a href="c-means.html#ss-means-tests3-errors" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.6.2" data-path="c-means.html"><a href="c-means.html#ss-means-tests3-power"><i class="fa fa-check"></i><b>7.6.2</b> Power of significance tests<a href="c-means.html#ss-means-tests3-power" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.6.3" data-path="c-means.html"><a href="c-means.html#ss-means-tests3-importance"><i class="fa fa-check"></i><b>7.6.3</b> Significance vs. importance<a href="c-means.html#ss-means-tests3-importance" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="8" data-path="c-regression.html"><a href="c-regression.html"><i class="fa fa-check"></i><b>8</b> Linear regression models<a href="c-regression.html#c-regression" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.1" data-path="c-regression.html"><a href="c-regression.html#s-regression-intro"><i class="fa fa-check"></i><b>8.1</b> Introduction<a href="c-regression.html#s-regression-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.2" data-path="c-regression.html"><a href="c-regression.html#s-regression-descr"><i class="fa fa-check"></i><b>8.2</b> Describing association between two continuous variables<a href="c-regression.html#s-regression-descr" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.2.1" data-path="c-regression.html"><a href="c-regression.html#ss-regression-descr-intro"><i class="fa fa-check"></i><b>8.2.1</b> Introduction<a href="c-regression.html#ss-regression-descr-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.2.2" data-path="c-regression.html"><a href="c-regression.html#ss-regression-descr-plots"><i class="fa fa-check"></i><b>8.2.2</b> Graphical methods<a href="c-regression.html#ss-regression-descr-plots" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.2.3" data-path="c-regression.html"><a href="c-regression.html#ss-regression-descr-assoc"><i class="fa fa-check"></i><b>8.2.3</b> Linear associations<a href="c-regression.html#ss-regression-descr-assoc" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.2.4" data-path="c-regression.html"><a href="c-regression.html#ss-regression-descr-corr"><i class="fa fa-check"></i><b>8.2.4</b> Measures of association: covariance and correlation<a href="c-regression.html#ss-regression-descr-corr" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="8.3" data-path="c-regression.html"><a href="c-regression.html#s-regression-simple"><i class="fa fa-check"></i><b>8.3</b> Simple linear regression models<a href="c-regression.html#s-regression-simple" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.3.1" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-intro"><i class="fa fa-check"></i><b>8.3.1</b> Introduction<a href="c-regression.html#ss-regression-simple-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.3.2" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-def"><i class="fa fa-check"></i><b>8.3.2</b> Definition of the model<a href="c-regression.html#ss-regression-simple-def" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.3.3" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-int"><i class="fa fa-check"></i><b>8.3.3</b> Interpretation of the model parameters<a href="c-regression.html#ss-regression-simple-int" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.3.4" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-est"><i class="fa fa-check"></i><b>8.3.4</b> Estimation of the parameters<a href="c-regression.html#ss-regression-simple-est" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.3.5" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-inf"><i class="fa fa-check"></i><b>8.3.5</b> Statistical inference for the regression coefficients<a href="c-regression.html#ss-regression-simple-inf" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="8.4" data-path="c-regression.html"><a href="c-regression.html#s-regression-causality"><i class="fa fa-check"></i><b>8.4</b> Interlude: Association and causality<a href="c-regression.html#s-regression-causality" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.5" data-path="c-regression.html"><a href="c-regression.html#s-regression-multiple"><i class="fa fa-check"></i><b>8.5</b> Multiple linear regression models<a href="c-regression.html#s-regression-multiple" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.5.1" data-path="c-regression.html"><a href="c-regression.html#ss-regression-multiple-intro"><i class="fa fa-check"></i><b>8.5.1</b> Introduction<a href="c-regression.html#ss-regression-multiple-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.5.2" data-path="c-regression.html"><a href="c-regression.html#ss-regression-multiple-def"><i class="fa fa-check"></i><b>8.5.2</b> Definition of the model<a href="c-regression.html#ss-regression-multiple-def" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.5.3" data-path="c-regression.html"><a href="c-regression.html#ss-regression-multiple-unchanged"><i class="fa fa-check"></i><b>8.5.3</b> Unchanged elements from simple linear models<a href="c-regression.html#ss-regression-multiple-unchanged" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.5.4" data-path="c-regression.html"><a href="c-regression.html#ss-regression-multiple-beta"><i class="fa fa-check"></i><b>8.5.4</b> Interpretation and inference for the regression coefficients<a href="c-regression.html#ss-regression-multiple-beta" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="8.6" data-path="c-regression.html"><a href="c-regression.html#s-regression-dummies"><i class="fa fa-check"></i><b>8.6</b> Including categorical explanatory variables<a href="c-regression.html#s-regression-dummies" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.6.1" data-path="c-regression.html"><a href="c-regression.html#ss-regression-dummies-def"><i class="fa fa-check"></i><b>8.6.1</b> Dummy variables<a href="c-regression.html#ss-regression-dummies-def" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.6.2" data-path="c-regression.html"><a href="c-regression.html#ss-regression-dummies-example"><i class="fa fa-check"></i><b>8.6.2</b> A second example<a href="c-regression.html#ss-regression-dummies-example" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="8.7" data-path="c-regression.html"><a href="c-regression.html#s-regression-rest"><i class="fa fa-check"></i><b>8.7</b> Other issues in linear regression modelling<a href="c-regression.html#s-regression-rest" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="9" data-path="c-3waytables.html"><a href="c-3waytables.html"><i class="fa fa-check"></i><b>9</b> Analysis of 3-way contingency tables<a href="c-3waytables.html#c-3waytables" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="10" data-path="c-more.html"><a href="c-more.html"><i class="fa fa-check"></i><b>10</b> More statistics…<a href="c-more.html#c-more" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="" data-path="c-more.html"><a href="c-more.html#statistical-tables"><i class="fa fa-check"></i>Statistical tables<a href="c-more.html#statistical-tables" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="" data-path="c-more.html"><a href="c-more.html#table-of-standard-normal-tail-probabilities"><i class="fa fa-check"></i>Table of standard normal tail probabilities<a href="c-more.html#table-of-standard-normal-tail-probabilities" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="" data-path="c-more.html"><a href="c-more.html#table-of-critical-values-for-t-distributions"><i class="fa fa-check"></i>Table of critical values for t-distributions<a href="c-more.html#table-of-critical-values-for-t-distributions" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="" data-path="c-more.html"><a href="c-more.html#table-of-critical-values-for-chi-square-distributions"><i class="fa fa-check"></i>Table of critical values for chi-square distributions<a href="c-more.html#table-of-critical-values-for-chi-square-distributions" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+</ul></li>
+<li class="divider"></li>
+<li><a href="https://github.com/rstudio/bookdown">Published with bookdown</a></li>
+
+</ul>
+
+      </nav>
+    </div>
+
+    <div class="book-body">
+      <div class="body-inner">
+        <div class="book-header" role="navigation">
+          <h1>
+            <i class="fa fa-circle-o-notch fa-spin"></i><a href="./">MY464 Introduction to Quantitative Analysis for Media and Communications</a>
+          </h1>
+        </div>
+
+        <div class="page-wrapper" tabindex="-1" role="main">
+          <div class="page-inner">
+
+            <section class="normal" id="section-">
+<div id="c-more" class="section level1 hasAnchor">
+<h1><span class="header-section-number">Chapter 10</span> More statistics…<a href="c-more.html#c-more" class="anchor-section" aria-label="Anchor link to header"></a></h1>
+<p>You will no doubt be pleased to learn that the topics covered on this
+course have not quite exhausted the list of available statistical
+methods. In this chapter we outline some of the most important further
+areas of statistics, so that you are at least aware of their existence
+and titles. For some of them, codes of LSE courses which cover these
+methods are given in parentheses.</p>
+<p>A very large part of advanced statistics is devoted to further types of
+<strong>regression models</strong>. The basic idea of them is the same as for
+multiple linear regression, i.e. modelling expected values of response
+variables given several explanatory variables. The issues involved in
+the form and treatment of explanatory variables are usually almost
+exactly the same as for linear models. Different classes of regression
+models are needed mainly to accommodate different types of response
+variables:</p>
+<ul>
+<li><p>Models for <strong>categorical response variables</strong>. These exist for
+situations where the response variable is dichotomous (<strong>binary
+regression</strong>, especially <strong>logistic models</strong>), has more than two
+unordered (<strong>multinomial logistic models</strong>) or ordered (<strong>ordinal
+regression models</strong>) categories, or is a count, for example in a
+contingency table (<strong>Poisson regression</strong>, <strong>loglinear models</strong>).
+Despite the many different titles, all of these models are closely
+connected (MY452)</p></li>
+<li><p>Models for cases where the response is a length of time to some
+event, such as a spell of unemployment, interval between births of
+children or survival of a patient in a medical study. These
+techniques are known as <strong>event history analysis</strong>, <strong>survival
+analysis</strong> or <strong>lifetime data analysis</strong>. Despite the different
+terms, all refer to the same statistical models.</p></li>
+</ul>
+<p>Techniques for the analysis of <strong>dependent data</strong>, which do not require
+the assumption of statistically independent observations used by almost
+all the methods on this course:</p>
+<ul>
+<li><p><strong>Time series analysis</strong> for one or more long sequence of
+observations of the same quantity over time. For example, each of
+the five temperature sequencies in Figure <a href="c-regression.html#fig:f-temperatures">8.2</a> is a
+time series of this kind.</p></li>
+<li><p>Regression models for <strong>hierarchical data</strong>, where some sets of
+observations are not independent of each other. There are two main
+types of such data: <strong>longitudinal</strong> or <strong>panel data</strong> which consist
+of short time series for many units (e.g. answers by respondents in
+successive waves of a panel survey), and <strong>nested</strong> or <strong>multilevel
+data</strong> where basic units are grouped in natural groups or clusters
+(e.g. pupils in classes and schools in an educational study). Both
+of these can be analysed using the same general classes of models,
+which in turn are generalisations of linear and other regression
+models used for independent data (ST416 for models for multilevel
+data and ST442 for models for longitudinal data).</p></li>
+</ul>
+<p>Methods for <strong>multivariate data</strong>. Roughly speaking, this means data
+with several variables for comparable quantities treated on an equal
+footing, so that none of them is obviously a response to the others. For
+example, results for the ten events in the decathlon data of the week 7
+computer class or, more seriously, the responses to a series of related
+attitude items in a survey are multivariate data of this kind.</p>
+<ul>
+<li><p>Various methods of <strong>descriptive multivariate analysis</strong> for jointly
+summarising and presenting information on the many variables,
+e.g. <strong>cluster analysis</strong>, <strong>multidimensional scaling</strong> and
+<strong>principal component analysis</strong> (MY455 for principal
+components analysis).</p></li>
+<li><p>Model-based methods for multivariate data. These are typically
+<strong>latent variable models</strong>, which also involve variables which can
+never be directly observed. The simplest latent variable technique
+is <strong>exploratory factor analysis</strong>, and others include
+<strong>confirmatory factor analysis</strong>, <strong>structural equation models</strong>,
+and <strong>latent trait</strong> and <strong>latent class</strong> models (MY455).</p></li>
+</ul>
+<p>Some types of <strong>research design</strong> may also involve particular
+statistical considerations:</p>
+<ul>
+<li><p><strong>Sampling theory</strong> for the design of probability samples, e.g.  for
+surveys (part of MY456, which also covers methodology of surveys
+in general).</p></li>
+<li><p><strong>Design of experiments</strong> for more complex randomized experiments.</p></li>
+</ul>
+<p>Finally, some areas of statistics are concerned with broader and more
+fundamental aspects of statistical analysis, such as alternative forms
+of model specification and inference (e.g. <strong>nonparametric methods</strong>) or
+the basic ideas of inference itself (e.g. <strong>Bayesian statistics</strong>).
+These and the more specific tools further build on the foundations of
+all statistical methods, which are the subject of <strong>probability theory</strong>
+and <strong>mathematical statistics</strong>. However, you are welcome, if you wish,
+to leave the details of these fields to professional statisticians, if
+only to keep them too in employment.</p>
+
+
+<div id="statistical-tables" class="section level2 unnumbered hasAnchor">
+<h2>Statistical tables<a href="c-more.html#statistical-tables" class="anchor-section" aria-label="Anchor link to header"></a></h2>
+<p>Explanation of the “Table of standard normal tail probabilities” in Section
+@ref(s_disttables_Z):</p>
+<ul>
+<li><p>The table shows, for values of <span class="math inline">\(Z\)</span> between 0 and 3.5, the
+probability that a value from the standard normal distribution is
+<em>larger than</em> <span class="math inline">\(Z\)</span> (i.e. the “right-hand” tail probabilities).</p>
+<ul>
+<li>For example, the probability of values larger than 0.50
+is 0.3085.</li>
+</ul></li>
+<li><p>For negative values of <span class="math inline">\(Z\)</span>, the probability of values <em>smaller than</em>
+<span class="math inline">\(Z\)</span> (the “left-hand” tail probability) is equal to the right-hand
+tail probability for the corresponding positive value of <span class="math inline">\(Z\)</span>.</p>
+<ul>
+<li>For example, the probability of values smaller than <span class="math inline">\(-0.50\)</span> is
+also 0.3085.</li>
+</ul></li>
+</ul>
+<div style="page-break-after: always;"></div>
+<div id="table-of-standard-normal-tail-probabilities" class="section level3 unnumbered hasAnchor">
+<h3>Table of standard normal tail probabilities<a href="c-more.html#table-of-standard-normal-tail-probabilities" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+
+<table>
+<thead>
+<tr class="header">
+<th align="left"><span class="math inline">\(z\)</span></th>
+<th align="left">Prob. </th>
+<th align="left"><span class="math inline">\(z\)</span></th>
+<th align="left">Prob. </th>
+<th align="left"><span class="math inline">\(z\)</span></th>
+<th align="left">Prob. </th>
+<th align="left"><span class="math inline">\(z\)</span></th>
+<th align="left">Prob. </th>
+<th align="left"><span class="math inline">\(z\)</span></th>
+<th align="left">Prob. </th>
+<th align="left"><span class="math inline">\(z\)</span></th>
+<th align="left">Prob. </th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="left">0.00</td>
+<td align="left">0.5000</td>
+<td align="left">0.50</td>
+<td align="left">0.3085</td>
+<td align="left">1.00</td>
+<td align="left">0.1587</td>
+<td align="left">1.50</td>
+<td align="left">0.0668</td>
+<td align="left">2.00</td>
+<td align="left">0.0228</td>
+<td align="left">2.50</td>
+<td align="left">0.0062</td>
+</tr>
+<tr class="even">
+<td align="left">0.01</td>
+<td align="left">0.4960</td>
+<td align="left">0.51</td>
+<td align="left">0.3050</td>
+<td align="left">1.01</td>
+<td align="left">0.1562</td>
+<td align="left">1.51</td>
+<td align="left">0.0655</td>
+<td align="left">2.01</td>
+<td align="left">0.0222</td>
+<td align="left">2.52</td>
+<td align="left">0.0059</td>
+</tr>
+<tr class="odd">
+<td align="left">0.02</td>
+<td align="left">0.4920</td>
+<td align="left">0.52</td>
+<td align="left">0.3015</td>
+<td align="left">1.02</td>
+<td align="left">0.1539</td>
+<td align="left">1.52</td>
+<td align="left">0.0643</td>
+<td align="left">2.02</td>
+<td align="left">0.0217</td>
+<td align="left">2.54</td>
+<td align="left">0.0055</td>
+</tr>
+<tr class="even">
+<td align="left">0.03</td>
+<td align="left">0.4880</td>
+<td align="left">0.53</td>
+<td align="left">0.2981</td>
+<td align="left">1.03</td>
+<td align="left">0.1515</td>
+<td align="left">1.53</td>
+<td align="left">0.0630</td>
+<td align="left">2.03</td>
+<td align="left">0.0212</td>
+<td align="left">2.56</td>
+<td align="left">0.0052</td>
+</tr>
+<tr class="odd">
+<td align="left">0.04</td>
+<td align="left">0.4840</td>
+<td align="left">0.54</td>
+<td align="left">0.2946</td>
+<td align="left">1.04</td>
+<td align="left">0.1492</td>
+<td align="left">1.54</td>
+<td align="left">0.0618</td>
+<td align="left">2.04</td>
+<td align="left">0.0207</td>
+<td align="left">2.58</td>
+<td align="left">0.0049</td>
+</tr>
+<tr class="even">
+<td align="left">0.05</td>
+<td align="left">0.4801</td>
+<td align="left">0.55</td>
+<td align="left">0.2912</td>
+<td align="left">1.05</td>
+<td align="left">0.1469</td>
+<td align="left">1.55</td>
+<td align="left">0.0606</td>
+<td align="left">2.05</td>
+<td align="left">0.0202</td>
+<td align="left">2.60</td>
+<td align="left">0.0047</td>
+</tr>
+<tr class="odd">
+<td align="left">0.06</td>
+<td align="left">0.4761</td>
+<td align="left">0.56</td>
+<td align="left">0.2877</td>
+<td align="left">1.06</td>
+<td align="left">0.1446</td>
+<td align="left">1.56</td>
+<td align="left">0.0594</td>
+<td align="left">2.06</td>
+<td align="left">0.0197</td>
+<td align="left">2.62</td>
+<td align="left">0.0044</td>
+</tr>
+<tr class="even">
+<td align="left">0.07</td>
+<td align="left">0.4721</td>
+<td align="left">0.57</td>
+<td align="left">0.2843</td>
+<td align="left">1.07</td>
+<td align="left">0.1423</td>
+<td align="left">1.57</td>
+<td align="left">0.0582</td>
+<td align="left">2.07</td>
+<td align="left">0.0192</td>
+<td align="left">2.64</td>
+<td align="left">0.0041</td>
+</tr>
+<tr class="odd">
+<td align="left">0.08</td>
+<td align="left">0.4681</td>
+<td align="left">0.58</td>
+<td align="left">0.2810</td>
+<td align="left">1.08</td>
+<td align="left">0.1401</td>
+<td align="left">1.58</td>
+<td align="left">0.0571</td>
+<td align="left">2.08</td>
+<td align="left">0.0188</td>
+<td align="left">2.66</td>
+<td align="left">0.0039</td>
+</tr>
+<tr class="even">
+<td align="left">0.09</td>
+<td align="left">0.4641</td>
+<td align="left">0.59</td>
+<td align="left">0.2776</td>
+<td align="left">1.09</td>
+<td align="left">0.1379</td>
+<td align="left">1.59</td>
+<td align="left">0.0559</td>
+<td align="left">2.09</td>
+<td align="left">0.0183</td>
+<td align="left">2.68</td>
+<td align="left">0.0037</td>
+</tr>
+<tr class="odd">
+<td align="left">0.10</td>
+<td align="left">0.4602</td>
+<td align="left">0.60</td>
+<td align="left">0.2743</td>
+<td align="left">1.10</td>
+<td align="left">0.1357</td>
+<td align="left">1.60</td>
+<td align="left">0.0548</td>
+<td align="left">2.10</td>
+<td align="left">0.0179</td>
+<td align="left">2.70</td>
+<td align="left">0.0035</td>
+</tr>
+<tr class="even">
+<td align="left">0.11</td>
+<td align="left">0.4562</td>
+<td align="left">0.61</td>
+<td align="left">0.2709</td>
+<td align="left">1.11</td>
+<td align="left">0.1335</td>
+<td align="left">1.61</td>
+<td align="left">0.0537</td>
+<td align="left">2.11</td>
+<td align="left">0.0174</td>
+<td align="left">2.72</td>
+<td align="left">0.0033</td>
+</tr>
+<tr class="odd">
+<td align="left">0.12</td>
+<td align="left">0.4522</td>
+<td align="left">0.62</td>
+<td align="left">0.2676</td>
+<td align="left">1.12</td>
+<td align="left">0.1314</td>
+<td align="left">1.62</td>
+<td align="left">0.0526</td>
+<td align="left">2.12</td>
+<td align="left">0.0170</td>
+<td align="left">2.74</td>
+<td align="left">0.0031</td>
+</tr>
+<tr class="even">
+<td align="left">0.13</td>
+<td align="left">0.4483</td>
+<td align="left">0.63</td>
+<td align="left">0.2643</td>
+<td align="left">1.13</td>
+<td align="left">0.1292</td>
+<td align="left">1.63</td>
+<td align="left">0.0516</td>
+<td align="left">2.13</td>
+<td align="left">0.0166</td>
+<td align="left">2.76</td>
+<td align="left">0.0029</td>
+</tr>
+<tr class="odd">
+<td align="left">0.14</td>
+<td align="left">0.4443</td>
+<td align="left">0.64</td>
+<td align="left">0.2611</td>
+<td align="left">1.14</td>
+<td align="left">0.1271</td>
+<td align="left">1.64</td>
+<td align="left">0.0505</td>
+<td align="left">2.14</td>
+<td align="left">0.0162</td>
+<td align="left">2.78</td>
+<td align="left">0.0027</td>
+</tr>
+<tr class="even">
+<td align="left">0.15</td>
+<td align="left">0.4404</td>
+<td align="left">0.65</td>
+<td align="left">0.2578</td>
+<td align="left">1.15</td>
+<td align="left">0.1251</td>
+<td align="left">1.65</td>
+<td align="left">0.0495</td>
+<td align="left">2.15</td>
+<td align="left">0.0158</td>
+<td align="left">2.80</td>
+<td align="left">0.0026</td>
+</tr>
+<tr class="odd">
+<td align="left">0.16</td>
+<td align="left">0.4364</td>
+<td align="left">0.66</td>
+<td align="left">0.2546</td>
+<td align="left">1.16</td>
+<td align="left">0.1230</td>
+<td align="left">1.66</td>
+<td align="left">0.0485</td>
+<td align="left">2.16</td>
+<td align="left">0.0154</td>
+<td align="left">2.82</td>
+<td align="left">0.0024</td>
+</tr>
+<tr class="even">
+<td align="left">0.17</td>
+<td align="left">0.4325</td>
+<td align="left">0.67</td>
+<td align="left">0.2514</td>
+<td align="left">1.17</td>
+<td align="left">0.1210</td>
+<td align="left">1.67</td>
+<td align="left">0.0475</td>
+<td align="left">2.17</td>
+<td align="left">0.0150</td>
+<td align="left">2.84</td>
+<td align="left">0.0023</td>
+</tr>
+<tr class="odd">
+<td align="left">0.18</td>
+<td align="left">0.4286</td>
+<td align="left">0.68</td>
+<td align="left">0.2483</td>
+<td align="left">1.18</td>
+<td align="left">0.1190</td>
+<td align="left">1.68</td>
+<td align="left">0.0465</td>
+<td align="left">2.18</td>
+<td align="left">0.0146</td>
+<td align="left">2.86</td>
+<td align="left">0.0021</td>
+</tr>
+<tr class="even">
+<td align="left">0.19</td>
+<td align="left">0.4247</td>
+<td align="left">0.69</td>
+<td align="left">0.2451</td>
+<td align="left">1.19</td>
+<td align="left">0.1170</td>
+<td align="left">1.69</td>
+<td align="left">0.0455</td>
+<td align="left">2.19</td>
+<td align="left">0.0143</td>
+<td align="left">2.88</td>
+<td align="left">0.0020</td>
+</tr>
+<tr class="odd">
+<td align="left">0.20</td>
+<td align="left">0.4207</td>
+<td align="left">0.70</td>
+<td align="left">0.2420</td>
+<td align="left">1.20</td>
+<td align="left">0.1151</td>
+<td align="left">1.70</td>
+<td align="left">0.0446</td>
+<td align="left">2.20</td>
+<td align="left">0.0139</td>
+<td align="left">2.90</td>
+<td align="left">0.0019</td>
+</tr>
+<tr class="even">
+<td align="left">0.21</td>
+<td align="left">0.4168</td>
+<td align="left">0.71</td>
+<td align="left">0.2389</td>
+<td align="left">1.21</td>
+<td align="left">0.1131</td>
+<td align="left">1.71</td>
+<td align="left">0.0436</td>
+<td align="left">2.21</td>
+<td align="left">0.0136</td>
+<td align="left">2.92</td>
+<td align="left">0.0018</td>
+</tr>
+<tr class="odd">
+<td align="left">0.22</td>
+<td align="left">0.4129</td>
+<td align="left">0.72</td>
+<td align="left">0.2358</td>
+<td align="left">1.22</td>
+<td align="left">0.1112</td>
+<td align="left">1.72</td>
+<td align="left">0.0427</td>
+<td align="left">2.22</td>
+<td align="left">0.0132</td>
+<td align="left">2.94</td>
+<td align="left">0.0016</td>
+</tr>
+<tr class="even">
+<td align="left">0.23</td>
+<td align="left">0.4090</td>
+<td align="left">0.73</td>
+<td align="left">0.2327</td>
+<td align="left">1.23</td>
+<td align="left">0.1093</td>
+<td align="left">1.73</td>
+<td align="left">0.0418</td>
+<td align="left">2.23</td>
+<td align="left">0.0129</td>
+<td align="left">2.96</td>
+<td align="left">0.0015</td>
+</tr>
+<tr class="odd">
+<td align="left">0.24</td>
+<td align="left">0.4052</td>
+<td align="left">0.74</td>
+<td align="left">0.2296</td>
+<td align="left">1.24</td>
+<td align="left">0.1075</td>
+<td align="left">1.74</td>
+<td align="left">0.0409</td>
+<td align="left">2.24</td>
+<td align="left">0.0125</td>
+<td align="left">2.98</td>
+<td align="left">0.0014</td>
+</tr>
+<tr class="even">
+<td align="left">0.25</td>
+<td align="left">0.4013</td>
+<td align="left">0.75</td>
+<td align="left">0.2266</td>
+<td align="left">1.25</td>
+<td align="left">0.1056</td>
+<td align="left">1.75</td>
+<td align="left">0.0401</td>
+<td align="left">2.25</td>
+<td align="left">0.0122</td>
+<td align="left">3.00</td>
+<td align="left">0.0013</td>
+</tr>
+<tr class="odd">
+<td align="left">0.26</td>
+<td align="left">0.3974</td>
+<td align="left">0.76</td>
+<td align="left">0.2236</td>
+<td align="left">1.26</td>
+<td align="left">0.1038</td>
+<td align="left">1.76</td>
+<td align="left">0.0392</td>
+<td align="left">2.26</td>
+<td align="left">0.0119</td>
+<td align="left">3.02</td>
+<td align="left">0.0013</td>
+</tr>
+<tr class="even">
+<td align="left">0.27</td>
+<td align="left">0.3936</td>
+<td align="left">0.77</td>
+<td align="left">0.2206</td>
+<td align="left">1.27</td>
+<td align="left">0.1020</td>
+<td align="left">1.77</td>
+<td align="left">0.0384</td>
+<td align="left">2.27</td>
+<td align="left">0.0116</td>
+<td align="left">3.04</td>
+<td align="left">0.0012</td>
+</tr>
+<tr class="odd">
+<td align="left">0.28</td>
+<td align="left">0.3897</td>
+<td align="left">0.78</td>
+<td align="left">0.2177</td>
+<td align="left">1.28</td>
+<td align="left">0.1003</td>
+<td align="left">1.78</td>
+<td align="left">0.0375</td>
+<td align="left">2.28</td>
+<td align="left">0.0113</td>
+<td align="left">3.06</td>
+<td align="left">0.0011</td>
+</tr>
+<tr class="even">
+<td align="left">0.29</td>
+<td align="left">0.3859</td>
+<td align="left">0.79</td>
+<td align="left">0.2148</td>
+<td align="left">1.29</td>
+<td align="left">0.0985</td>
+<td align="left">1.79</td>
+<td align="left">0.0367</td>
+<td align="left">2.29</td>
+<td align="left">0.0110</td>
+<td align="left">3.08</td>
+<td align="left">0.0010</td>
+</tr>
+<tr class="odd">
+<td align="left">0.30</td>
+<td align="left">0.3821</td>
+<td align="left">0.80</td>
+<td align="left">0.2119</td>
+<td align="left">1.30</td>
+<td align="left">0.0968</td>
+<td align="left">1.80</td>
+<td align="left">0.0359</td>
+<td align="left">2.30</td>
+<td align="left">0.0107</td>
+<td align="left">3.10</td>
+<td align="left">0.0010</td>
+</tr>
+<tr class="even">
+<td align="left">0.31</td>
+<td align="left">0.3783</td>
+<td align="left">0.81</td>
+<td align="left">0.2090</td>
+<td align="left">1.31</td>
+<td align="left">0.0951</td>
+<td align="left">1.81</td>
+<td align="left">0.0351</td>
+<td align="left">2.31</td>
+<td align="left">0.0104</td>
+<td align="left">3.12</td>
+<td align="left">0.0009</td>
+</tr>
+<tr class="odd">
+<td align="left">0.32</td>
+<td align="left">0.3745</td>
+<td align="left">0.82</td>
+<td align="left">0.2061</td>
+<td align="left">1.32</td>
+<td align="left">0.0934</td>
+<td align="left">1.82</td>
+<td align="left">0.0344</td>
+<td align="left">2.32</td>
+<td align="left">0.0102</td>
+<td align="left">3.14</td>
+<td align="left">0.0008</td>
+</tr>
+<tr class="even">
+<td align="left">0.33</td>
+<td align="left">0.3707</td>
+<td align="left">0.83</td>
+<td align="left">0.2033</td>
+<td align="left">1.33</td>
+<td align="left">0.0918</td>
+<td align="left">1.83</td>
+<td align="left">0.0336</td>
+<td align="left">2.33</td>
+<td align="left">0.0099</td>
+<td align="left">3.16</td>
+<td align="left">0.0008</td>
+</tr>
+<tr class="odd">
+<td align="left">0.34</td>
+<td align="left">0.3669</td>
+<td align="left">0.84</td>
+<td align="left">0.2005</td>
+<td align="left">1.34</td>
+<td align="left">0.0901</td>
+<td align="left">1.84</td>
+<td align="left">0.0329</td>
+<td align="left">2.34</td>
+<td align="left">0.0096</td>
+<td align="left">3.18</td>
+<td align="left">0.0007</td>
+</tr>
+<tr class="even">
+<td align="left">0.35</td>
+<td align="left">0.3632</td>
+<td align="left">0.85</td>
+<td align="left">0.1977</td>
+<td align="left">1.35</td>
+<td align="left">0.0885</td>
+<td align="left">1.85</td>
+<td align="left">0.0322</td>
+<td align="left">2.35</td>
+<td align="left">0.0094</td>
+<td align="left">3.20</td>
+<td align="left">0.0007</td>
+</tr>
+<tr class="odd">
+<td align="left">0.36</td>
+<td align="left">0.3594</td>
+<td align="left">0.86</td>
+<td align="left">0.1949</td>
+<td align="left">1.36</td>
+<td align="left">0.0869</td>
+<td align="left">1.86</td>
+<td align="left">0.0314</td>
+<td align="left">2.36</td>
+<td align="left">0.0091</td>
+<td align="left">3.22</td>
+<td align="left">0.0006</td>
+</tr>
+<tr class="even">
+<td align="left">0.37</td>
+<td align="left">0.3557</td>
+<td align="left">0.87</td>
+<td align="left">0.1922</td>
+<td align="left">1.37</td>
+<td align="left">0.0853</td>
+<td align="left">1.87</td>
+<td align="left">0.0307</td>
+<td align="left">2.37</td>
+<td align="left">0.0089</td>
+<td align="left">3.24</td>
+<td align="left">0.0006</td>
+</tr>
+<tr class="odd">
+<td align="left">0.38</td>
+<td align="left">0.3520</td>
+<td align="left">0.88</td>
+<td align="left">0.1894</td>
+<td align="left">1.38</td>
+<td align="left">0.0838</td>
+<td align="left">1.88</td>
+<td align="left">0.0301</td>
+<td align="left">2.38</td>
+<td align="left">0.0087</td>
+<td align="left">3.26</td>
+<td align="left">0.0006</td>
+</tr>
+<tr class="even">
+<td align="left">0.39</td>
+<td align="left">0.3483</td>
+<td align="left">0.89</td>
+<td align="left">0.1867</td>
+<td align="left">1.39</td>
+<td align="left">0.0823</td>
+<td align="left">1.89</td>
+<td align="left">0.0294</td>
+<td align="left">2.39</td>
+<td align="left">0.0084</td>
+<td align="left">3.28</td>
+<td align="left">0.0005</td>
+</tr>
+<tr class="odd">
+<td align="left">0.40</td>
+<td align="left">0.3446</td>
+<td align="left">0.90</td>
+<td align="left">0.1841</td>
+<td align="left">1.40</td>
+<td align="left">0.0808</td>
+<td align="left">1.90</td>
+<td align="left">0.0287</td>
+<td align="left">2.40</td>
+<td align="left">0.0082</td>
+<td align="left">3.30</td>
+<td align="left">0.0005</td>
+</tr>
+<tr class="even">
+<td align="left">0.41</td>
+<td align="left">0.3409</td>
+<td align="left">0.91</td>
+<td align="left">0.1814</td>
+<td align="left">1.41</td>
+<td align="left">0.0793</td>
+<td align="left">1.91</td>
+<td align="left">0.0281</td>
+<td align="left">2.41</td>
+<td align="left">0.0080</td>
+<td align="left">3.32</td>
+<td align="left">0.0005</td>
+</tr>
+<tr class="odd">
+<td align="left">0.42</td>
+<td align="left">0.3372</td>
+<td align="left">0.92</td>
+<td align="left">0.1788</td>
+<td align="left">1.42</td>
+<td align="left">0.0778</td>
+<td align="left">1.92</td>
+<td align="left">0.0274</td>
+<td align="left">2.42</td>
+<td align="left">0.0078</td>
+<td align="left">3.34</td>
+<td align="left">0.0004</td>
+</tr>
+<tr class="even">
+<td align="left">0.43</td>
+<td align="left">0.3336</td>
+<td align="left">0.93</td>
+<td align="left">0.1762</td>
+<td align="left">1.43</td>
+<td align="left">0.0764</td>
+<td align="left">1.93</td>
+<td align="left">0.0268</td>
+<td align="left">2.43</td>
+<td align="left">0.0075</td>
+<td align="left">3.36</td>
+<td align="left">0.0004</td>
+</tr>
+<tr class="odd">
+<td align="left">0.44</td>
+<td align="left">0.3300</td>
+<td align="left">0.94</td>
+<td align="left">0.1736</td>
+<td align="left">1.44</td>
+<td align="left">0.0749</td>
+<td align="left">1.94</td>
+<td align="left">0.0262</td>
+<td align="left">2.44</td>
+<td align="left">0.0073</td>
+<td align="left">3.38</td>
+<td align="left">0.0004</td>
+</tr>
+<tr class="even">
+<td align="left">0.45</td>
+<td align="left">0.3264</td>
+<td align="left">0.95</td>
+<td align="left">0.1711</td>
+<td align="left">1.45</td>
+<td align="left">0.0735</td>
+<td align="left">1.95</td>
+<td align="left">0.0256</td>
+<td align="left">2.45</td>
+<td align="left">0.0071</td>
+<td align="left">3.40</td>
+<td align="left">0.0003</td>
+</tr>
+<tr class="odd">
+<td align="left">0.46</td>
+<td align="left">0.3228</td>
+<td align="left">0.96</td>
+<td align="left">0.1685</td>
+<td align="left">1.46</td>
+<td align="left">0.0721</td>
+<td align="left">1.96</td>
+<td align="left">0.0250</td>
+<td align="left">2.46</td>
+<td align="left">0.0069</td>
+<td align="left">3.42</td>
+<td align="left">0.0003</td>
+</tr>
+<tr class="even">
+<td align="left">0.47</td>
+<td align="left">0.3192</td>
+<td align="left">0.97</td>
+<td align="left">0.1660</td>
+<td align="left">1.47</td>
+<td align="left">0.0708</td>
+<td align="left">1.97</td>
+<td align="left">0.0244</td>
+<td align="left">2.47</td>
+<td align="left">0.0068</td>
+<td align="left">3.44</td>
+<td align="left">0.0003</td>
+</tr>
+<tr class="odd">
+<td align="left">0.48</td>
+<td align="left">0.3156</td>
+<td align="left">0.98</td>
+<td align="left">0.1635</td>
+<td align="left">1.48</td>
+<td align="left">0.0694</td>
+<td align="left">1.98</td>
+<td align="left">0.0239</td>
+<td align="left">2.48</td>
+<td align="left">0.0066</td>
+<td align="left">3.46</td>
+<td align="left">0.0003</td>
+</tr>
+<tr class="even">
+<td align="left">0.49</td>
+<td align="left">0.3121</td>
+<td align="left">0.99</td>
+<td align="left">0.1611</td>
+<td align="left">1.49</td>
+<td align="left">0.0681</td>
+<td align="left">1.99</td>
+<td align="left">0.0233</td>
+<td align="left">2.49</td>
+<td align="left">0.0064</td>
+<td align="left">3.48</td>
+<td align="left">0.0003</td>
+</tr>
+</tbody>
+</table>
+<div style="page-break-after: always;"></div>
+</div>
+<div id="table-of-critical-values-for-t-distributions" class="section level3 unnumbered hasAnchor">
+<h3>Table of critical values for t-distributions<a href="c-more.html#table-of-critical-values-for-t-distributions" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<table>
+<tbody>
+<tr class="odd">
+<td align="left">df</td>
+<td align="right">0.100</td>
+<td align="right">0.050</td>
+<td align="right">0.025</td>
+<td align="right">0.010</td>
+<td align="right">0.005</td>
+<td align="right">0.001</td>
+<td align="right">0.0005</td>
+</tr>
+<tr class="even">
+<td align="left">1</td>
+<td align="right">3.078</td>
+<td align="right">6.314</td>
+<td align="right">12.706</td>
+<td align="right">31.821</td>
+<td align="right">63.657</td>
+<td align="right">318.309</td>
+<td align="right">636.619</td>
+</tr>
+<tr class="odd">
+<td align="left">2</td>
+<td align="right">1.886</td>
+<td align="right">2.920</td>
+<td align="right">4.303</td>
+<td align="right">6.965</td>
+<td align="right">9.925</td>
+<td align="right">22.327</td>
+<td align="right">31.599</td>
+</tr>
+<tr class="even">
+<td align="left">3</td>
+<td align="right">1.638</td>
+<td align="right">2.353</td>
+<td align="right">3.182</td>
+<td align="right">4.541</td>
+<td align="right">5.841</td>
+<td align="right">10.215</td>
+<td align="right">12.924</td>
+</tr>
+<tr class="odd">
+<td align="left">4</td>
+<td align="right">1.533</td>
+<td align="right">2.132</td>
+<td align="right">2.776</td>
+<td align="right">3.747</td>
+<td align="right">4.604</td>
+<td align="right">7.173</td>
+<td align="right">8.610</td>
+</tr>
+<tr class="even">
+<td align="left">5</td>
+<td align="right">1.476</td>
+<td align="right">2.015</td>
+<td align="right">2.571</td>
+<td align="right">3.365</td>
+<td align="right">4.032</td>
+<td align="right">5.893</td>
+<td align="right">6.869</td>
+</tr>
+<tr class="odd">
+<td align="left">6</td>
+<td align="right">1.440</td>
+<td align="right">1.943</td>
+<td align="right">2.447</td>
+<td align="right">3.143</td>
+<td align="right">3.707</td>
+<td align="right">5.208</td>
+<td align="right">5.959</td>
+</tr>
+<tr class="even">
+<td align="left">7</td>
+<td align="right">1.415</td>
+<td align="right">1.895</td>
+<td align="right">2.365</td>
+<td align="right">2.998</td>
+<td align="right">3.499</td>
+<td align="right">4.785</td>
+<td align="right">5.408</td>
+</tr>
+<tr class="odd">
+<td align="left">8</td>
+<td align="right">1.397</td>
+<td align="right">1.860</td>
+<td align="right">2.306</td>
+<td align="right">2.896</td>
+<td align="right">3.355</td>
+<td align="right">4.501</td>
+<td align="right">5.041</td>
+</tr>
+<tr class="even">
+<td align="left">9</td>
+<td align="right">1.383</td>
+<td align="right">1.833</td>
+<td align="right">2.262</td>
+<td align="right">2.821</td>
+<td align="right">3.250</td>
+<td align="right">4.297</td>
+<td align="right">4.781</td>
+</tr>
+<tr class="odd">
+<td align="left">10</td>
+<td align="right">1.372</td>
+<td align="right">1.812</td>
+<td align="right">2.228</td>
+<td align="right">2.764</td>
+<td align="right">3.169</td>
+<td align="right">4.144</td>
+<td align="right">4.587</td>
+</tr>
+<tr class="even">
+<td align="left">11</td>
+<td align="right">1.363</td>
+<td align="right">1.796</td>
+<td align="right">2.201</td>
+<td align="right">2.718</td>
+<td align="right">3.106</td>
+<td align="right">4.025</td>
+<td align="right">4.437</td>
+</tr>
+<tr class="odd">
+<td align="left">12</td>
+<td align="right">1.356</td>
+<td align="right">1.782</td>
+<td align="right">2.179</td>
+<td align="right">2.681</td>
+<td align="right">3.055</td>
+<td align="right">3.930</td>
+<td align="right">4.318</td>
+</tr>
+<tr class="even">
+<td align="left">13</td>
+<td align="right">1.350</td>
+<td align="right">1.771</td>
+<td align="right">2.160</td>
+<td align="right">2.650</td>
+<td align="right">3.012</td>
+<td align="right">3.852</td>
+<td align="right">4.221</td>
+</tr>
+<tr class="odd">
+<td align="left">14</td>
+<td align="right">1.345</td>
+<td align="right">1.761</td>
+<td align="right">2.145</td>
+<td align="right">2.624</td>
+<td align="right">2.977</td>
+<td align="right">3.787</td>
+<td align="right">4.140</td>
+</tr>
+<tr class="even">
+<td align="left">15</td>
+<td align="right">1.341</td>
+<td align="right">1.753</td>
+<td align="right">2.131</td>
+<td align="right">2.602</td>
+<td align="right">2.947</td>
+<td align="right">3.733</td>
+<td align="right">4.073</td>
+</tr>
+<tr class="odd">
+<td align="left">16</td>
+<td align="right">1.337</td>
+<td align="right">1.746</td>
+<td align="right">2.120</td>
+<td align="right">2.583</td>
+<td align="right">2.921</td>
+<td align="right">3.686</td>
+<td align="right">4.015</td>
+</tr>
+<tr class="even">
+<td align="left">17</td>
+<td align="right">1.333</td>
+<td align="right">1.740</td>
+<td align="right">2.110</td>
+<td align="right">2.567</td>
+<td align="right">2.898</td>
+<td align="right">3.646</td>
+<td align="right">3.965</td>
+</tr>
+<tr class="odd">
+<td align="left">18</td>
+<td align="right">1.330</td>
+<td align="right">1.734</td>
+<td align="right">2.101</td>
+<td align="right">2.552</td>
+<td align="right">2.878</td>
+<td align="right">3.610</td>
+<td align="right">3.922</td>
+</tr>
+<tr class="even">
+<td align="left">19</td>
+<td align="right">1.328</td>
+<td align="right">1.729</td>
+<td align="right">2.093</td>
+<td align="right">2.539</td>
+<td align="right">2.861</td>
+<td align="right">3.579</td>
+<td align="right">3.883</td>
+</tr>
+<tr class="odd">
+<td align="left">20</td>
+<td align="right">1.325</td>
+<td align="right">1.725</td>
+<td align="right">2.086</td>
+<td align="right">2.528</td>
+<td align="right">2.845</td>
+<td align="right">3.552</td>
+<td align="right">3.850</td>
+</tr>
+<tr class="even">
+<td align="left">21</td>
+<td align="right">1.323</td>
+<td align="right">1.721</td>
+<td align="right">2.080</td>
+<td align="right">2.518</td>
+<td align="right">2.831</td>
+<td align="right">3.527</td>
+<td align="right">3.819</td>
+</tr>
+<tr class="odd">
+<td align="left">22</td>
+<td align="right">1.321</td>
+<td align="right">1.717</td>
+<td align="right">2.074</td>
+<td align="right">2.508</td>
+<td align="right">2.819</td>
+<td align="right">3.505</td>
+<td align="right">3.792</td>
+</tr>
+<tr class="even">
+<td align="left">23</td>
+<td align="right">1.319</td>
+<td align="right">1.714</td>
+<td align="right">2.069</td>
+<td align="right">2.500</td>
+<td align="right">2.807</td>
+<td align="right">3.485</td>
+<td align="right">3.768</td>
+</tr>
+<tr class="odd">
+<td align="left">24</td>
+<td align="right">1.318</td>
+<td align="right">1.711</td>
+<td align="right">2.064</td>
+<td align="right">2.492</td>
+<td align="right">2.797</td>
+<td align="right">3.467</td>
+<td align="right">3.745</td>
+</tr>
+<tr class="even">
+<td align="left">25</td>
+<td align="right">1.316</td>
+<td align="right">1.708</td>
+<td align="right">2.060</td>
+<td align="right">2.485</td>
+<td align="right">2.787</td>
+<td align="right">3.450</td>
+<td align="right">3.725</td>
+</tr>
+<tr class="odd">
+<td align="left">26</td>
+<td align="right">1.315</td>
+<td align="right">1.706</td>
+<td align="right">2.056</td>
+<td align="right">2.479</td>
+<td align="right">2.779</td>
+<td align="right">3.435</td>
+<td align="right">3.707</td>
+</tr>
+<tr class="even">
+<td align="left">27</td>
+<td align="right">1.314</td>
+<td align="right">1.703</td>
+<td align="right">2.052</td>
+<td align="right">2.473</td>
+<td align="right">2.771</td>
+<td align="right">3.421</td>
+<td align="right">3.690</td>
+</tr>
+<tr class="odd">
+<td align="left">28</td>
+<td align="right">1.313</td>
+<td align="right">1.701</td>
+<td align="right">2.048</td>
+<td align="right">2.467</td>
+<td align="right">2.763</td>
+<td align="right">3.408</td>
+<td align="right">3.674</td>
+</tr>
+<tr class="even">
+<td align="left">29</td>
+<td align="right">1.311</td>
+<td align="right">1.699</td>
+<td align="right">2.045</td>
+<td align="right">2.462</td>
+<td align="right">2.756</td>
+<td align="right">3.396</td>
+<td align="right">3.659</td>
+</tr>
+<tr class="odd">
+<td align="left">30</td>
+<td align="right">1.310</td>
+<td align="right">1.697</td>
+<td align="right">2.042</td>
+<td align="right">2.457</td>
+<td align="right">2.750</td>
+<td align="right">3.385</td>
+<td align="right">3.646</td>
+</tr>
+<tr class="even">
+<td align="left">40</td>
+<td align="right">1.303</td>
+<td align="right">1.684</td>
+<td align="right">2.021</td>
+<td align="right">2.423</td>
+<td align="right">2.704</td>
+<td align="right">3.307</td>
+<td align="right">3.551</td>
+</tr>
+<tr class="odd">
+<td align="left">60</td>
+<td align="right">1.296</td>
+<td align="right">1.671</td>
+<td align="right">2.000</td>
+<td align="right">2.390</td>
+<td align="right">2.660</td>
+<td align="right">3.232</td>
+<td align="right">3.460</td>
+</tr>
+<tr class="even">
+<td align="left">120</td>
+<td align="right">1.289</td>
+<td align="right">1.658</td>
+<td align="right">1.980</td>
+<td align="right">2.358</td>
+<td align="right">2.617</td>
+<td align="right">3.160</td>
+<td align="right">3.373</td>
+</tr>
+<tr class="odd">
+<td align="left"><span class="math inline">\(\infty\)</span></td>
+<td align="right">1.282</td>
+<td align="right">1.645</td>
+<td align="right">1.960</td>
+<td align="right">2.326</td>
+<td align="right">2.576</td>
+<td align="right">3.090</td>
+<td align="right">3.291</td>
+</tr>
+</tbody>
+</table>
+<p><em>Explanation</em>: An example, consider the value 3.078 in the top left corner.
+This indicates that for a <span class="math inline">\(t\)</span>-distribution with 1 degree of freedom the
+probability of values greater than 3.078 is 0.100. The last row shows
+critical values for the standard normal distribution.</p>
+<div style="page-break-after: always;"></div>
+</div>
+<div id="table-of-critical-values-for-chi-square-distributions" class="section level3 unnumbered hasAnchor">
+<h3>Table of critical values for chi-square distributions<a href="c-more.html#table-of-critical-values-for-chi-square-distributions" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<table>
+<tbody>
+<tr class="odd">
+<td align="left">df</td>
+<td align="right">0.100</td>
+<td align="right">0.050</td>
+<td align="right">0.010</td>
+<td align="right">0.001</td>
+</tr>
+<tr class="even">
+<td align="left">1</td>
+<td align="right">2.71</td>
+<td align="right">3.84</td>
+<td align="right">6.63</td>
+<td align="right">10.828</td>
+</tr>
+<tr class="odd">
+<td align="left">2</td>
+<td align="right">4.61</td>
+<td align="right">5.99</td>
+<td align="right">9.21</td>
+<td align="right">13.816</td>
+</tr>
+<tr class="even">
+<td align="left">3</td>
+<td align="right">6.25</td>
+<td align="right">7.81</td>
+<td align="right">11.34</td>
+<td align="right">16.266</td>
+</tr>
+<tr class="odd">
+<td align="left">4</td>
+<td align="right">7.78</td>
+<td align="right">9.49</td>
+<td align="right">13.28</td>
+<td align="right">18.467</td>
+</tr>
+<tr class="even">
+<td align="left">5</td>
+<td align="right">9.24</td>
+<td align="right">11.07</td>
+<td align="right">15.09</td>
+<td align="right">20.515</td>
+</tr>
+<tr class="odd">
+<td align="left">6</td>
+<td align="right">10.64</td>
+<td align="right">12.59</td>
+<td align="right">16.81</td>
+<td align="right">22.458</td>
+</tr>
+<tr class="even">
+<td align="left">7</td>
+<td align="right">12.02</td>
+<td align="right">14.07</td>
+<td align="right">18.48</td>
+<td align="right">24.322</td>
+</tr>
+<tr class="odd">
+<td align="left">8</td>
+<td align="right">13.36</td>
+<td align="right">15.51</td>
+<td align="right">20.09</td>
+<td align="right">26.124</td>
+</tr>
+<tr class="even">
+<td align="left">9</td>
+<td align="right">14.68</td>
+<td align="right">16.92</td>
+<td align="right">21.67</td>
+<td align="right">27.877</td>
+</tr>
+<tr class="odd">
+<td align="left">10</td>
+<td align="right">15.99</td>
+<td align="right">18.31</td>
+<td align="right">23.21</td>
+<td align="right">29.588</td>
+</tr>
+<tr class="even">
+<td align="left">11</td>
+<td align="right">17.28</td>
+<td align="right">19.68</td>
+<td align="right">24.72</td>
+<td align="right">31.264</td>
+</tr>
+<tr class="odd">
+<td align="left">12</td>
+<td align="right">18.55</td>
+<td align="right">21.03</td>
+<td align="right">26.22</td>
+<td align="right">32.909</td>
+</tr>
+<tr class="even">
+<td align="left">13</td>
+<td align="right">19.81</td>
+<td align="right">22.36</td>
+<td align="right">27.69</td>
+<td align="right">34.528</td>
+</tr>
+<tr class="odd">
+<td align="left">14</td>
+<td align="right">21.06</td>
+<td align="right">23.68</td>
+<td align="right">29.14</td>
+<td align="right">36.123</td>
+</tr>
+<tr class="even">
+<td align="left">15</td>
+<td align="right">22.31</td>
+<td align="right">25.00</td>
+<td align="right">30.58</td>
+<td align="right">37.697</td>
+</tr>
+<tr class="odd">
+<td align="left">16</td>
+<td align="right">23.54</td>
+<td align="right">26.30</td>
+<td align="right">32.00</td>
+<td align="right">39.252</td>
+</tr>
+<tr class="even">
+<td align="left">17</td>
+<td align="right">24.77</td>
+<td align="right">27.59</td>
+<td align="right">33.41</td>
+<td align="right">40.790</td>
+</tr>
+<tr class="odd">
+<td align="left">18</td>
+<td align="right">25.99</td>
+<td align="right">28.87</td>
+<td align="right">34.81</td>
+<td align="right">42.312</td>
+</tr>
+<tr class="even">
+<td align="left">19</td>
+<td align="right">27.20</td>
+<td align="right">30.14</td>
+<td align="right">36.19</td>
+<td align="right">43.820</td>
+</tr>
+<tr class="odd">
+<td align="left">20</td>
+<td align="right">28.41</td>
+<td align="right">31.41</td>
+<td align="right">37.57</td>
+<td align="right">45.315</td>
+</tr>
+<tr class="even">
+<td align="left">25</td>
+<td align="right">34.38</td>
+<td align="right">37.65</td>
+<td align="right">44.31</td>
+<td align="right">52.620</td>
+</tr>
+<tr class="odd">
+<td align="left">30</td>
+<td align="right">40.26</td>
+<td align="right">43.77</td>
+<td align="right">50.89</td>
+<td align="right">59.703</td>
+</tr>
+<tr class="even">
+<td align="left">40</td>
+<td align="right">51.81</td>
+<td align="right">55.76</td>
+<td align="right">63.69</td>
+<td align="right">73.402</td>
+</tr>
+<tr class="odd">
+<td align="left">50</td>
+<td align="right">63.17</td>
+<td align="right">67.50</td>
+<td align="right">76.15</td>
+<td align="right">86.661</td>
+</tr>
+<tr class="even">
+<td align="left">60</td>
+<td align="right">74.40</td>
+<td align="right">79.08</td>
+<td align="right">88.38</td>
+<td align="right">99.607</td>
+</tr>
+<tr class="odd">
+<td align="left">70</td>
+<td align="right">85.53</td>
+<td align="right">90.53</td>
+<td align="right">100.43</td>
+<td align="right">112.317</td>
+</tr>
+<tr class="even">
+<td align="left">80</td>
+<td align="right">96.58</td>
+<td align="right">101.88</td>
+<td align="right">112.33</td>
+<td align="right">124.839</td>
+</tr>
+<tr class="odd">
+<td align="left">90</td>
+<td align="right">107.57</td>
+<td align="right">113.15</td>
+<td align="right">124.12</td>
+<td align="right">137.208</td>
+</tr>
+<tr class="even">
+<td align="left">100</td>
+<td align="right">118.50</td>
+<td align="right">124.34</td>
+<td align="right">135.81</td>
+<td align="right">149.449</td>
+</tr>
+</tbody>
+</table>
+<p><em>Explanation</em>: For example, the value 2.71 in the top left corner
+indicates that for a <span class="math inline">\(\chi^{2}\)</span> distribution with 1 degree of freedom
+the probability of values greater than 2.71 is 0.100.</p>
+
+</div>
+</div>
+</div>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+            </section>
+
+          </div>
+        </div>
+      </div>
+<a href="c-3waytables.html" class="navigation navigation-prev navigation-unique" aria-label="Previous page"><i class="fa fa-angle-left"></i></a>
+
+    </div>
+  </div>
+<script src="libs/gitbook-2.6.7/js/app.min.js"></script>
+<script src="libs/gitbook-2.6.7/js/clipboard.min.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-search.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-sharing.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-fontsettings.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-bookdown.js"></script>
+<script src="libs/gitbook-2.6.7/js/jquery.highlight.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-clipboard.js"></script>
+<script>
+gitbook.require(["gitbook"], function(gitbook) {
+gitbook.start({
+"sharing": {
+"github": false,
+"facebook": true,
+"twitter": true,
+"linkedin": false,
+"weibo": false,
+"instapaper": false,
+"vk": false,
+"whatsapp": false,
+"all": ["facebook", "twitter", "linkedin", "weibo", "instapaper"]
+},
+"fontsettings": {
+"theme": "white",
+"family": "sans",
+"size": 2
+},
+"edit": {
+"link": "https://github.com/LSE-Methodology/MY464/edit/master/10-MY464-more.Rmd",
+"text": "Edit"
+},
+"history": {
+"link": null,
+"text": null
+},
+"view": {
+"link": null,
+"text": null
+},
+"download": ["Coursepack-MY464.pdf", "Coursepack-MY464.epub"],
+"search": {
+"engine": "fuse",
+"options": null
+},
+"toc": {
+"collapse": "section"
+}
+});
+});
+</script>
+
+<!-- dynamically load mathjax for compatibility with self-contained -->
+<script>
+  (function () {
+    var script = document.createElement("script");
+    script.type = "text/javascript";
+    var src = "true";
+    if (src === "" || src === "true") src = "https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.9/latest.js?config=TeX-MML-AM_CHTML";
+    if (location.protocol !== "file:")
+      if (/^https?:/.test(src))
+        src = src.replace(/^https?:/, '');
+    script.src = src;
+    document.getElementsByTagName("head")[0].appendChild(script);
+  })();
+</script>
+</body>
+
+</html>
diff --git a/c-probs.html b/c-probs.html
new file mode 100644
index 0000000..4122aa4
--- /dev/null
+++ b/c-probs.html
@@ -0,0 +1,1785 @@
+<!DOCTYPE html>
+<html lang="" xml:lang="">
+<head>
+
+  <meta charset="utf-8" />
+  <meta http-equiv="X-UA-Compatible" content="IE=edge" />
+  <title>Chapter 5 Inference for population proportions | MY464 Introduction to Quantitative Analysis for Media and Communications</title>
+  <meta name="description" content="This course is intended for those with little or no past training in quantitative methods. The course is an intensive introduction to some of the principles and methods of statistical analysis in social research. Topics covered in MY464 include descriptive statistics, basic ideas of inference and estimation, contingency tables and an introduction to linear regression models." />
+  <meta name="generator" content="bookdown 0.39 and GitBook 2.6.7" />
+
+  <meta property="og:title" content="Chapter 5 Inference for population proportions | MY464 Introduction to Quantitative Analysis for Media and Communications" />
+  <meta property="og:type" content="book" />
+  
+  <meta property="og:description" content="This course is intended for those with little or no past training in quantitative methods. The course is an intensive introduction to some of the principles and methods of statistical analysis in social research. Topics covered in MY464 include descriptive statistics, basic ideas of inference and estimation, contingency tables and an introduction to linear regression models." />
+  <meta name="github-repo" content="LSE-Methodology/MY464" />
+
+  <meta name="twitter:card" content="summary" />
+  <meta name="twitter:title" content="Chapter 5 Inference for population proportions | MY464 Introduction to Quantitative Analysis for Media and Communications" />
+  
+  <meta name="twitter:description" content="This course is intended for those with little or no past training in quantitative methods. The course is an intensive introduction to some of the principles and methods of statistical analysis in social research. Topics covered in MY464 include descriptive statistics, basic ideas of inference and estimation, contingency tables and an introduction to linear regression models." />
+  
+
+<meta name="author" content="Department of Methodology, London School of Economics and Political Science" />
+
+
+
+  <meta name="viewport" content="width=device-width, initial-scale=1" />
+  <meta name="apple-mobile-web-app-capable" content="yes" />
+  <meta name="apple-mobile-web-app-status-bar-style" content="black" />
+  
+  
+<link rel="prev" href="c-tables.html"/>
+<link rel="next" href="c-contd.html"/>
+<script src="libs/jquery-3.6.0/jquery-3.6.0.min.js"></script>
+<script src="https://cdn.jsdelivr.net/npm/fuse.js@6.4.6/dist/fuse.min.js"></script>
+<link href="libs/gitbook-2.6.7/css/style.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-table.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-bookdown.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-highlight.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-search.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-fontsettings.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-clipboard.css" rel="stylesheet" />
+
+
+
+
+
+
+
+
+<link href="libs/anchor-sections-1.1.0/anchor-sections.css" rel="stylesheet" />
+<link href="libs/anchor-sections-1.1.0/anchor-sections-hash.css" rel="stylesheet" />
+<script src="libs/anchor-sections-1.1.0/anchor-sections.js"></script>
+
+
+
+<style type="text/css">
+  
+  div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
+</style>
+
+<link rel="stylesheet" href="style.css" type="text/css" />
+</head>
+
+<body>
+
+
+
+  <div class="book without-animation with-summary font-size-2 font-family-1" data-basepath=".">
+
+    <div class="book-summary">
+      <nav role="navigation">
+
+<ul class="summary">
+<li><a href="./">MY464 Introduction to Quantitative Analysis for Media and Communications</a></li>
+
+<li class="divider"></li>
+<li class="chapter" data-level="" data-path="index.html"><a href="index.html"><i class="fa fa-check"></i>Course information<a href="index.html#course-information" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1" data-path="c-intro.html"><a href="c-intro.html"><i class="fa fa-check"></i><b>1</b> Introduction<a href="c-intro.html#c-intro" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="1.1" data-path="c-intro.html"><a href="c-intro.html#s-intro-purpose"><i class="fa fa-check"></i><b>1.1</b> What is the purpose of this course?<a href="c-intro.html#s-intro-purpose" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.2" data-path="c-intro.html"><a href="c-intro.html#s-intro-definitions"><i class="fa fa-check"></i><b>1.2</b> Some basic definitions<a href="c-intro.html#s-intro-definitions" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="1.2.1" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-subj"><i class="fa fa-check"></i><b>1.2.1</b> Subjects and variables<a href="c-intro.html#ss-intro-def-subj" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.2.2" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-vartypes"><i class="fa fa-check"></i><b>1.2.2</b> Types of variables<a href="c-intro.html#ss-intro-def-vartypes" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.2.3" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-descr"><i class="fa fa-check"></i><b>1.2.3</b> Description and inference<a href="c-intro.html#ss-intro-def-descr" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.2.4" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-assoc"><i class="fa fa-check"></i><b>1.2.4</b> Association and causation<a href="c-intro.html#ss-intro-def-assoc" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="1.3" data-path="c-intro.html"><a href="c-intro.html#s-intro-outline"><i class="fa fa-check"></i><b>1.3</b> Outline of the course<a href="c-intro.html#s-intro-outline" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.4" data-path="c-intro.html"><a href="c-intro.html#s-intro-maths"><i class="fa fa-check"></i><b>1.4</b> The use of mathematics and computing<a href="c-intro.html#s-intro-maths" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="1.4.1" data-path="c-intro.html"><a href="c-intro.html#symbolic-mathematics-and-mathematical-notation"><i class="fa fa-check"></i><b>1.4.1</b> Symbolic mathematics and mathematical notation<a href="c-intro.html#symbolic-mathematics-and-mathematical-notation" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.4.2" data-path="c-intro.html"><a href="c-intro.html#computing-1"><i class="fa fa-check"></i><b>1.4.2</b> Computing<a href="c-intro.html#computing-1" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="2" data-path="c-descr1.html"><a href="c-descr1.html"><i class="fa fa-check"></i><b>2</b> Descriptive statistics<a href="c-descr1.html#c-descr1" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.1" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-intro"><i class="fa fa-check"></i><b>2.1</b> Introduction<a href="c-descr1.html#s-descr1-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.2" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-examples"><i class="fa fa-check"></i><b>2.2</b> Example data sets<a href="c-descr1.html#s-descr1-examples" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.3" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-1cat"><i class="fa fa-check"></i><b>2.3</b> Single categorical variable<a href="c-descr1.html#s-descr1-1cat" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.3.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-distr"><i class="fa fa-check"></i><b>2.3.1</b> Describing the sample distribution<a href="c-descr1.html#ss-descr1-1cat-distr" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.3.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-tables"><i class="fa fa-check"></i><b>2.3.2</b> Tabular methods: Tables of frequencies<a href="c-descr1.html#ss-descr1-1cat-tables" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.3.3" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-charts"><i class="fa fa-check"></i><b>2.3.3</b> Graphical methods: Bar charts<a href="c-descr1.html#ss-descr1-1cat-charts" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.3.4" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-descriptives"><i class="fa fa-check"></i><b>2.3.4</b> Simple descriptive statistics<a href="c-descr1.html#ss-descr1-1cat-descriptives" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="2.4" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-2cat"><i class="fa fa-check"></i><b>2.4</b> Two categorical variables<a href="c-descr1.html#s-descr1-2cat" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.4.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-tables"><i class="fa fa-check"></i><b>2.4.1</b> Two-way contingency tables<a href="c-descr1.html#ss-descr1-2cat-tables" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.4.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-cond"><i class="fa fa-check"></i><b>2.4.2</b> Conditional proportions<a href="c-descr1.html#ss-descr1-2cat-cond" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.4.3" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-assoc"><i class="fa fa-check"></i><b>2.4.3</b> Conditional distributions and associations<a href="c-descr1.html#ss-descr1-2cat-assoc" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.4.4" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-descr"><i class="fa fa-check"></i><b>2.4.4</b> Describing an association using conditional proportions<a href="c-descr1.html#ss-descr1-2cat-descr" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.4.5" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-gamma"><i class="fa fa-check"></i><b>2.4.5</b> A measure of association for ordinal variables<a href="c-descr1.html#ss-descr1-2cat-gamma" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="2.5" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-1cont"><i class="fa fa-check"></i><b>2.5</b> Sample distributions of a single continuous variable<a href="c-descr1.html#s-descr1-1cont" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.5.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cont-tab"><i class="fa fa-check"></i><b>2.5.1</b> Tabular methods<a href="c-descr1.html#ss-descr1-1cont-tab" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.5.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cont-graphs"><i class="fa fa-check"></i><b>2.5.2</b> Graphical methods<a href="c-descr1.html#ss-descr1-1cont-graphs" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="2.6" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-nums"><i class="fa fa-check"></i><b>2.6</b> Numerical descriptive statistics<a href="c-descr1.html#s-descr1-nums" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.6.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-nums-central"><i class="fa fa-check"></i><b>2.6.1</b> Measures of central tendency<a href="c-descr1.html#ss-descr1-nums-central" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.6.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-nums-variation"><i class="fa fa-check"></i><b>2.6.2</b> Measures of variation<a href="c-descr1.html#ss-descr1-nums-variation" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="2.7" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-2cont"><i class="fa fa-check"></i><b>2.7</b> Associations which involve continuous variables<a href="c-descr1.html#s-descr1-2cont" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.8" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-presentation"><i class="fa fa-check"></i><b>2.8</b> Presentation of tables and graphs<a href="c-descr1.html#s-descr1-presentation" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.9" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-app"><i class="fa fa-check"></i><b>2.9</b> Appendix: Country data<a href="c-descr1.html#s-descr1-app" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="3" data-path="c-samples.html"><a href="c-samples.html"><i class="fa fa-check"></i><b>3</b> Samples and populations<a href="c-samples.html#c-samples" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="3.1" data-path="c-samples.html"><a href="c-samples.html#s-samples-intro"><i class="fa fa-check"></i><b>3.1</b> Introduction<a href="c-samples.html#s-samples-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.2" data-path="c-samples.html"><a href="c-samples.html#s-samples-finpops"><i class="fa fa-check"></i><b>3.2</b> Finite populations<a href="c-samples.html#s-samples-finpops" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.3" data-path="c-samples.html"><a href="c-samples.html#s-samples-samples"><i class="fa fa-check"></i><b>3.3</b> Samples from finite populations<a href="c-samples.html#s-samples-samples" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.4" data-path="c-samples.html"><a href="c-samples.html#s-samples-infpops"><i class="fa fa-check"></i><b>3.4</b> Conceptual and infinite populations<a href="c-samples.html#s-samples-infpops" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.5" data-path="c-samples.html"><a href="c-samples.html#s-samples-popdistrs"><i class="fa fa-check"></i><b>3.5</b> Population distributions<a href="c-samples.html#s-samples-popdistrs" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.6" data-path="c-samples.html"><a href="c-samples.html#s-samples-inference"><i class="fa fa-check"></i><b>3.6</b> Need for statistical inference<a href="c-samples.html#s-samples-inference" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="4" data-path="c-tables.html"><a href="c-tables.html"><i class="fa fa-check"></i><b>4</b> Statistical inference for two-way tables<a href="c-tables.html#c-tables" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="4.1" data-path="c-tables.html"><a href="c-tables.html#s-tables-intro"><i class="fa fa-check"></i><b>4.1</b> Introduction<a href="c-tables.html#s-tables-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.2" data-path="c-tables.html"><a href="c-tables.html#s-tables-tests"><i class="fa fa-check"></i><b>4.2</b> Significance tests<a href="c-tables.html#s-tables-tests" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3" data-path="c-tables.html"><a href="c-tables.html#s-tables-chi2test"><i class="fa fa-check"></i><b>4.3</b> The chi-square test of independence<a href="c-tables.html#s-tables-chi2test" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="4.3.1" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-null"><i class="fa fa-check"></i><b>4.3.1</b> Hypotheses<a href="c-tables.html#ss-tables-chi2test-null" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.2" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-ass"><i class="fa fa-check"></i><b>4.3.2</b> Assumptions of a significance test<a href="c-tables.html#ss-tables-chi2test-ass" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.3" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-stat"><i class="fa fa-check"></i><b>4.3.3</b> The test statistic<a href="c-tables.html#ss-tables-chi2test-stat" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.4" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-sdist"><i class="fa fa-check"></i><b>4.3.4</b> The sampling distribution of the test statistic<a href="c-tables.html#ss-tables-chi2test-sdist" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.5" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-Pval"><i class="fa fa-check"></i><b>4.3.5</b> The P-value<a href="c-tables.html#ss-tables-chi2test-Pval" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.6" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-conclusions"><i class="fa fa-check"></i><b>4.3.6</b> Drawing conclusions from a test<a href="c-tables.html#ss-tables-chi2test-conclusions" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="4.4" data-path="c-tables.html"><a href="c-tables.html#s-tables-summary"><i class="fa fa-check"></i><b>4.4</b> Summary of the chi-square test of independence<a href="c-tables.html#s-tables-summary" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="5" data-path="c-probs.html"><a href="c-probs.html"><i class="fa fa-check"></i><b>5</b> Inference for population proportions<a href="c-probs.html#c-probs" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="5.1" data-path="c-probs.html"><a href="c-probs.html#s-probs-intro"><i class="fa fa-check"></i><b>5.1</b> Introduction<a href="c-probs.html#s-probs-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.2" data-path="c-probs.html"><a href="c-probs.html#s-probs-examples"><i class="fa fa-check"></i><b>5.2</b> Examples<a href="c-probs.html#s-probs-examples" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.3" data-path="c-probs.html"><a href="c-probs.html#s-probs-distribution"><i class="fa fa-check"></i><b>5.3</b> Probability distribution of a dichotomous variable<a href="c-probs.html#s-probs-distribution" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.4" data-path="c-probs.html"><a href="c-probs.html#s-probs-pointest"><i class="fa fa-check"></i><b>5.4</b> Point estimation of a population probability<a href="c-probs.html#s-probs-pointest" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5" data-path="c-probs.html"><a href="c-probs.html#s-probs-test1sample"><i class="fa fa-check"></i><b>5.5</b> Significance test of a single proportion<a href="c-probs.html#s-probs-test1sample" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="5.5.1" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-hypotheses"><i class="fa fa-check"></i><b>5.5.1</b> Null and alternative hypotheses<a href="c-probs.html#ss-probs-test1sample-hypotheses" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5.2" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-teststatistic"><i class="fa fa-check"></i><b>5.5.2</b> The test statistic<a href="c-probs.html#ss-probs-test1sample-teststatistic" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5.3" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-samplingd"><i class="fa fa-check"></i><b>5.5.3</b> The sampling distribution of the test statistic and P-values<a href="c-probs.html#ss-probs-test1sample-samplingd" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5.4" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-conclusions"><i class="fa fa-check"></i><b>5.5.4</b> Conclusions from the test<a href="c-probs.html#ss-probs-test1sample-conclusions" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5.5" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-summary"><i class="fa fa-check"></i><b>5.5.5</b> Summary of the test<a href="c-probs.html#ss-probs-test1sample-summary" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="5.6" data-path="c-probs.html"><a href="c-probs.html#s-probs-1sampleci"><i class="fa fa-check"></i><b>5.6</b> Confidence interval for a single proportion<a href="c-probs.html#s-probs-1sampleci" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="5.6.1" data-path="c-probs.html"><a href="c-probs.html#s-probs-1sampleci-intro"><i class="fa fa-check"></i><b>5.6.1</b> Introduction<a href="c-probs.html#s-probs-1sampleci-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.6.2" data-path="c-probs.html"><a href="c-probs.html#s-probs-1sampleci-calc"><i class="fa fa-check"></i><b>5.6.2</b> Calculation of the interval<a href="c-probs.html#s-probs-1sampleci-calc" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.6.3" data-path="c-probs.html"><a href="c-probs.html#s-probs-1sampleci-int"><i class="fa fa-check"></i><b>5.6.3</b> Interpretation of confidence intervals<a href="c-probs.html#s-probs-1sampleci-int" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.6.4" data-path="c-probs.html"><a href="c-probs.html#ss-means-ci-vstests"><i class="fa fa-check"></i><b>5.6.4</b> Confidence intervals vs. significance tests<a href="c-probs.html#ss-means-ci-vstests" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="5.7" data-path="c-probs.html"><a href="c-probs.html#s-probs-2samples"><i class="fa fa-check"></i><b>5.7</b> Inference for comparing two proportions<a href="c-probs.html#s-probs-2samples" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="6" data-path="c-contd.html"><a href="c-contd.html"><i class="fa fa-check"></i><b>6</b> Continuous variables: Population and sampling distributions<a href="c-contd.html#c-contd" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="6.1" data-path="c-contd.html"><a href="c-contd.html#s-contd-intro"><i class="fa fa-check"></i><b>6.1</b> Introduction<a href="c-contd.html#s-contd-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="6.2" data-path="c-contd.html"><a href="c-contd.html#s-contd-popdistrs"><i class="fa fa-check"></i><b>6.2</b> Population distributions of continuous variables<a href="c-contd.html#s-contd-popdistrs" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="6.2.1" data-path="c-contd.html"><a href="c-contd.html#ss-contd-popdistrs-params"><i class="fa fa-check"></i><b>6.2.1</b> Population parameters and their point estimates<a href="c-contd.html#ss-contd-popdistrs-params" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="6.3" data-path="c-contd.html"><a href="c-contd.html#s-contd-probdistrs"><i class="fa fa-check"></i><b>6.3</b> Probability distributions of continuous variables<a href="c-contd.html#s-contd-probdistrs" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="6.3.1" data-path="c-contd.html"><a href="c-contd.html#ss-contd-probdistrs-general"><i class="fa fa-check"></i><b>6.3.1</b> General comments<a href="c-contd.html#ss-contd-probdistrs-general" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="6.3.2" data-path="c-contd.html"><a href="c-contd.html#ss-contd-probdistrs-normal"><i class="fa fa-check"></i><b>6.3.2</b> The normal distribution as a population distribution<a href="c-contd.html#ss-contd-probdistrs-normal" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="6.4" data-path="c-contd.html"><a href="c-contd.html#s-contd-clt"><i class="fa fa-check"></i><b>6.4</b> The normal distribution as a sampling distribution<a href="c-contd.html#s-contd-clt" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="7" data-path="c-means.html"><a href="c-means.html"><i class="fa fa-check"></i><b>7</b> Analysis of population means<a href="c-means.html#c-means" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="7.1" data-path="c-means.html"><a href="c-means.html#s-means-intro"><i class="fa fa-check"></i><b>7.1</b> Introduction and examples<a href="c-means.html#s-means-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.2" data-path="c-means.html"><a href="c-means.html#s-means-descr"><i class="fa fa-check"></i><b>7.2</b> Descriptive statistics for comparisons of groups<a href="c-means.html#s-means-descr" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="7.2.1" data-path="c-means.html"><a href="c-means.html#ss-means-descr-graphs"><i class="fa fa-check"></i><b>7.2.1</b> Graphical methods of comparing sample distributions<a href="c-means.html#ss-means-descr-graphs" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.2.2" data-path="c-means.html"><a href="c-means.html#ss-means-descr-tables"><i class="fa fa-check"></i><b>7.2.2</b> Comparing summary statistics<a href="c-means.html#ss-means-descr-tables" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="7.3" data-path="c-means.html"><a href="c-means.html#s-means-inference"><i class="fa fa-check"></i><b>7.3</b> Inference for two means from independent samples<a href="c-means.html#s-means-inference" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="7.3.1" data-path="c-means.html"><a href="c-means.html#ss-means-inference-intro"><i class="fa fa-check"></i><b>7.3.1</b> Aims of the analysis<a href="c-means.html#ss-means-inference-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.3.2" data-path="c-means.html"><a href="c-means.html#ss-means-inference-test"><i class="fa fa-check"></i><b>7.3.2</b> Significance testing: The two-sample t-test<a href="c-means.html#ss-means-inference-test" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.3.3" data-path="c-means.html"><a href="c-means.html#ss-means-inference-ci"><i class="fa fa-check"></i><b>7.3.3</b> Confidence intervals for a difference of two means<a href="c-means.html#ss-means-inference-ci" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.3.4" data-path="c-means.html"><a href="c-means.html#ss-means-inference-variants"><i class="fa fa-check"></i><b>7.3.4</b> Variants of the test and confidence interval<a href="c-means.html#ss-means-inference-variants" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="7.4" data-path="c-means.html"><a href="c-means.html#s-means-1sample"><i class="fa fa-check"></i><b>7.4</b> Tests and confidence intervals for a single mean<a href="c-means.html#s-means-1sample" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.5" data-path="c-means.html"><a href="c-means.html#s-means-dependent"><i class="fa fa-check"></i><b>7.5</b> Inference for dependent samples<a href="c-means.html#s-means-dependent" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.6" data-path="c-means.html"><a href="c-means.html#s-means-tests3"><i class="fa fa-check"></i><b>7.6</b> Further comments on significance tests<a href="c-means.html#s-means-tests3" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="7.6.1" data-path="c-means.html"><a href="c-means.html#ss-means-tests3-errors"><i class="fa fa-check"></i><b>7.6.1</b> Different types of error<a href="c-means.html#ss-means-tests3-errors" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.6.2" data-path="c-means.html"><a href="c-means.html#ss-means-tests3-power"><i class="fa fa-check"></i><b>7.6.2</b> Power of significance tests<a href="c-means.html#ss-means-tests3-power" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.6.3" data-path="c-means.html"><a href="c-means.html#ss-means-tests3-importance"><i class="fa fa-check"></i><b>7.6.3</b> Significance vs. importance<a href="c-means.html#ss-means-tests3-importance" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="8" data-path="c-regression.html"><a href="c-regression.html"><i class="fa fa-check"></i><b>8</b> Linear regression models<a href="c-regression.html#c-regression" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.1" data-path="c-regression.html"><a href="c-regression.html#s-regression-intro"><i class="fa fa-check"></i><b>8.1</b> Introduction<a href="c-regression.html#s-regression-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.2" data-path="c-regression.html"><a href="c-regression.html#s-regression-descr"><i class="fa fa-check"></i><b>8.2</b> Describing association between two continuous variables<a href="c-regression.html#s-regression-descr" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.2.1" data-path="c-regression.html"><a href="c-regression.html#ss-regression-descr-intro"><i class="fa fa-check"></i><b>8.2.1</b> Introduction<a href="c-regression.html#ss-regression-descr-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.2.2" data-path="c-regression.html"><a href="c-regression.html#ss-regression-descr-plots"><i class="fa fa-check"></i><b>8.2.2</b> Graphical methods<a href="c-regression.html#ss-regression-descr-plots" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.2.3" data-path="c-regression.html"><a href="c-regression.html#ss-regression-descr-assoc"><i class="fa fa-check"></i><b>8.2.3</b> Linear associations<a href="c-regression.html#ss-regression-descr-assoc" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.2.4" data-path="c-regression.html"><a href="c-regression.html#ss-regression-descr-corr"><i class="fa fa-check"></i><b>8.2.4</b> Measures of association: covariance and correlation<a href="c-regression.html#ss-regression-descr-corr" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="8.3" data-path="c-regression.html"><a href="c-regression.html#s-regression-simple"><i class="fa fa-check"></i><b>8.3</b> Simple linear regression models<a href="c-regression.html#s-regression-simple" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.3.1" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-intro"><i class="fa fa-check"></i><b>8.3.1</b> Introduction<a href="c-regression.html#ss-regression-simple-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.3.2" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-def"><i class="fa fa-check"></i><b>8.3.2</b> Definition of the model<a href="c-regression.html#ss-regression-simple-def" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.3.3" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-int"><i class="fa fa-check"></i><b>8.3.3</b> Interpretation of the model parameters<a href="c-regression.html#ss-regression-simple-int" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.3.4" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-est"><i class="fa fa-check"></i><b>8.3.4</b> Estimation of the parameters<a href="c-regression.html#ss-regression-simple-est" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.3.5" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-inf"><i class="fa fa-check"></i><b>8.3.5</b> Statistical inference for the regression coefficients<a href="c-regression.html#ss-regression-simple-inf" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="8.4" data-path="c-regression.html"><a href="c-regression.html#s-regression-causality"><i class="fa fa-check"></i><b>8.4</b> Interlude: Association and causality<a href="c-regression.html#s-regression-causality" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.5" data-path="c-regression.html"><a href="c-regression.html#s-regression-multiple"><i class="fa fa-check"></i><b>8.5</b> Multiple linear regression models<a href="c-regression.html#s-regression-multiple" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.5.1" data-path="c-regression.html"><a href="c-regression.html#ss-regression-multiple-intro"><i class="fa fa-check"></i><b>8.5.1</b> Introduction<a href="c-regression.html#ss-regression-multiple-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.5.2" data-path="c-regression.html"><a href="c-regression.html#ss-regression-multiple-def"><i class="fa fa-check"></i><b>8.5.2</b> Definition of the model<a href="c-regression.html#ss-regression-multiple-def" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.5.3" data-path="c-regression.html"><a href="c-regression.html#ss-regression-multiple-unchanged"><i class="fa fa-check"></i><b>8.5.3</b> Unchanged elements from simple linear models<a href="c-regression.html#ss-regression-multiple-unchanged" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.5.4" data-path="c-regression.html"><a href="c-regression.html#ss-regression-multiple-beta"><i class="fa fa-check"></i><b>8.5.4</b> Interpretation and inference for the regression coefficients<a href="c-regression.html#ss-regression-multiple-beta" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="8.6" data-path="c-regression.html"><a href="c-regression.html#s-regression-dummies"><i class="fa fa-check"></i><b>8.6</b> Including categorical explanatory variables<a href="c-regression.html#s-regression-dummies" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.6.1" data-path="c-regression.html"><a href="c-regression.html#ss-regression-dummies-def"><i class="fa fa-check"></i><b>8.6.1</b> Dummy variables<a href="c-regression.html#ss-regression-dummies-def" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.6.2" data-path="c-regression.html"><a href="c-regression.html#ss-regression-dummies-example"><i class="fa fa-check"></i><b>8.6.2</b> A second example<a href="c-regression.html#ss-regression-dummies-example" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="8.7" data-path="c-regression.html"><a href="c-regression.html#s-regression-rest"><i class="fa fa-check"></i><b>8.7</b> Other issues in linear regression modelling<a href="c-regression.html#s-regression-rest" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="9" data-path="c-3waytables.html"><a href="c-3waytables.html"><i class="fa fa-check"></i><b>9</b> Analysis of 3-way contingency tables<a href="c-3waytables.html#c-3waytables" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="10" data-path="c-more.html"><a href="c-more.html"><i class="fa fa-check"></i><b>10</b> More statistics…<a href="c-more.html#c-more" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="" data-path="c-more.html"><a href="c-more.html#statistical-tables"><i class="fa fa-check"></i>Statistical tables<a href="c-more.html#statistical-tables" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="" data-path="c-more.html"><a href="c-more.html#table-of-standard-normal-tail-probabilities"><i class="fa fa-check"></i>Table of standard normal tail probabilities<a href="c-more.html#table-of-standard-normal-tail-probabilities" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="" data-path="c-more.html"><a href="c-more.html#table-of-critical-values-for-t-distributions"><i class="fa fa-check"></i>Table of critical values for t-distributions<a href="c-more.html#table-of-critical-values-for-t-distributions" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="" data-path="c-more.html"><a href="c-more.html#table-of-critical-values-for-chi-square-distributions"><i class="fa fa-check"></i>Table of critical values for chi-square distributions<a href="c-more.html#table-of-critical-values-for-chi-square-distributions" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+</ul></li>
+<li class="divider"></li>
+<li><a href="https://github.com/rstudio/bookdown">Published with bookdown</a></li>
+
+</ul>
+
+      </nav>
+    </div>
+
+    <div class="book-body">
+      <div class="body-inner">
+        <div class="book-header" role="navigation">
+          <h1>
+            <i class="fa fa-circle-o-notch fa-spin"></i><a href="./">MY464 Introduction to Quantitative Analysis for Media and Communications</a>
+          </h1>
+        </div>
+
+        <div class="page-wrapper" tabindex="-1" role="main">
+          <div class="page-inner">
+
+            <section class="normal" id="section-">
+<div id="c-probs" class="section level1 hasAnchor">
+<h1><span class="header-section-number">Chapter 5</span> Inference for population proportions<a href="c-probs.html#c-probs" class="anchor-section" aria-label="Anchor link to header"></a></h1>
+<div id="s-probs-intro" class="section level2 hasAnchor">
+<h2><span class="header-section-number">5.1</span> Introduction<a href="c-probs.html#s-probs-intro" class="anchor-section" aria-label="Anchor link to header"></a></h2>
+<p>In this chapter we still consider statistical analyses which involve
+only discrete, categorical variables. In fact, we now focus on the
+simplest case of all, that of <strong>dichotomous</strong> (binary) variables which
+have only two possible values. Four examples which will be used for
+illustration throughout this chapter are introduced in Section
+<a href="c-probs.html#s-probs-examples">5.2</a>. In the first two of them we consider a binary
+variable in a single population, while in the last two examples the
+question of interest involves a comparison of the distributions of the
+variable between two populations (<em>groups</em>).</p>
+<p>The data for such analyses can be summarised in simple tables, the
+one-group case with a one-way table of two cells, and the two-group case
+with a <span class="math inline">\(2\times 2\)</span> contingency table. Here, however, we formulate the
+questions of interest slightly differently, with primary emphasis on the
+<em>probability</em> of one of the two values of the variable of interest. In
+the one-group case the questions of interest are then about the
+population value of a single probability, and in the two-group case
+about the comparison of the values of this probability between the two
+groups.</p>
+<p>While we describe specific methods of inference for these cases, we also
+use them to introduce some further general elements of statistical
+inference:</p>
+<ul>
+<li><p>Population <strong>parameters</strong> of probability distributions.</p></li>
+<li><p><strong>Point estimation</strong> of population parameters.</p></li>
+<li><p>Hypotheses anout the parameters, and significance tests for them.</p></li>
+<li><p><strong>Confidence intervals</strong> for population parameters.</p></li>
+</ul>
+<p>The comparisons in the two-group analyses again address questions about
+associations, now between the group and the dichotomous variable of
+interest. Here it will be useful to employ the terminology introduced in
+Section <a href="c-intro.html#ss-intro-def-assoc">1.2.4</a>, which distinguishes between the
+<strong>explanatory variable</strong> and the <strong>response variable</strong> in the
+association. Following a common convention, we will denote the
+explanatory variable by <span class="math inline">\(X\)</span> and the response variable by <span class="math inline">\(Y\)</span>. In the
+two-group cases of this chapter, <span class="math inline">\(X\)</span> will be the group (which is itself
+also binary) and <span class="math inline">\(Y\)</span> the binary variable whose probabilities we are
+interested in. We will use <span class="math inline">\(Y\)</span> to denote this binary variable also in
+the one-group examples.</p>
+</div>
+<div id="s-probs-examples" class="section level2 hasAnchor">
+<h2><span class="header-section-number">5.2</span> Examples<a href="c-probs.html#s-probs-examples" class="anchor-section" aria-label="Anchor link to header"></a></h2>
+<p>The following four examples will be discussed in this chapter. Examples
+5.1 and 5.2 concern only one group, while in Examples 5.3 and 5.4 two
+groups are to be compared. Table <a href="c-probs.html#tab:t-probex">5.1</a> shows basic sample
+statistics for the examples, together with the results of the
+significance tests and confidence intervals described later.</p>
+<p><strong>Example 5.1</strong>: <strong>An EU referendum</strong></p>
+<p>A referendum about joining the European Union was held in Finland on the
+16th of October, 1994. Suppose that in an opinion poll conducted around
+October 4th (just before the beginning of postal voting), 330 of the
+<span class="math inline">\(n=702\)</span> respondents (47.0%) indicated that they would definitely vote
+Yes to joining the EU, 190 (27.1%) said they would definitely vote No,
+and 182 (25.9%) were still undecided.<a href="#fn18" class="footnote-ref" id="fnref18"><sup>18</sup></a> Here we will consider the
+dichotomous variable with the values of Yes (330 respondents) versus No
+or Undecided (372 respondents, or 53.0%). The proportion of voters who
+definitely intend to vote Yes provides a lower bound for the proportion
+of Yes-votes in the referendum, even if all of the currently undecided
+voters eventually decided to vote No.</p>
+<p><strong>Example 5.2</strong>: <strong>Evidence of possible racial bias in jury selection</strong></p>
+<p>As part of an official inquiry into the extent of racial and gender bias
+in the justice system in the U.S. state of Pennsylvania, an
+investigation was made of whether people from minority racial groups
+were underrepresented in trial juries.<a href="#fn19" class="footnote-ref" id="fnref19"><sup>19</sup></a> One part of the assessment was a
+survey administered to all those called for the jury panel for criminal
+trials (from which the juries for actual trials will be selected) in
+Allegheny County, Pennsylvania (the city of Pittsburgh and its
+surrounding areas) between May and October, 2001. We will consider the
+dichotomous variable of whether a respondent to the survey identified
+his or her own race as Black (African American) or some other race
+category. Of the <span class="math inline">\(n=4950\)</span> respondents, 226 (4.57%) identified themselves
+as black. This will be compared to the the percentage of black people in the
+whole population of people aged 18 and over (those eligible for jury
+service) in the county, which is 12.4% (this is a census estimate which
+will here be treated as a known population quantity, ignoring any
+possible census error in it).</p>
+<table style="width:99%;">
+<caption><span id="tab:t-probex">Table 5.1: </span>Examples of analyses of population proportions used in Chapter
+<a href="c-probs.html#c-probs">5</a>. In addition to sample sizes <span class="math inline">\(n\)</span> and proportions
+<span class="math inline">\(\hat{\pi}\)</span>, the table shows for the one-sample examples 5.1 and 5.2
+the <span class="math inline">\(z\)</span>-test statistic for the hypothesis <span class="math inline">\(H_{0}: \pi=\pi_{0}\)</span>, its
+<span class="math inline">\(P\)</span>-value against a two-sided alternative, and a 95% confidence
+interval for <span class="math inline">\(\pi\)</span>. For the two-sample examples 5.3 and 5.4, the table
+shows the estimated between-group difference of proportions
+<span class="math inline">\(\hat{\Delta}\)</span>, the <span class="math inline">\(z\)</span>-test statistic for the hypothesis
+<span class="math inline">\(H_{0}: \Delta=0\)</span>, its <span class="math inline">\(P\)</span>-value against a two-sided alternative, and
+a 95% confidence interval for <span class="math inline">\(\Delta\)</span>.</caption>
+<colgroup>
+<col width="35%" />
+<col width="6%" />
+<col width="5%" />
+<col width="8%" />
+<col width="15%" />
+<col width="7%" />
+<col width="7%" />
+<col width="12%" />
+</colgroup>
+<tbody>
+<tr class="odd">
+<td align="left"><strong>One sample</strong></td>
+<td align="right"><br />
+</td>
+<td align="right"><br />
+</td>
+<td align="right"><br />
+</td>
+<td align="right"><br />
+</td>
+<td align="right"><br />
+</td>
+<td align="right"><br />
+</td>
+<td align="right"><br />
+</td>
+</tr>
+<tr class="even">
+<td align="left">Example 5.1: Voting intention in an EU referendum</td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+</tr>
+<tr class="odd">
+<td align="left"></td>
+<td align="right"><span class="math inline">\(n\)</span></td>
+<td align="right">Yes</td>
+<td align="right"><span class="math inline">\(\hat{\pi}\)</span></td>
+<td align="right"><span class="math inline">\(\pi_{0}\)</span></td>
+<td align="right"><span class="math inline">\(z\)</span></td>
+<td align="right"><span class="math inline">\(P\)</span></td>
+<td align="right">95% CI</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="right">702</td>
+<td align="right">330</td>
+<td align="right">0.470</td>
+<td align="right">0.5</td>
+<td align="right"><span class="math inline">\(-1.59\)</span></td>
+<td align="right">0.112</td>
+<td align="right">(0.433;
+0.507)</td>
+</tr>
+<tr class="odd">
+<td align="left">Example 5.2: Race of members of jury panel</td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="right"><span class="math inline">\(n\)</span></td>
+<td align="right">Black</td>
+<td align="right"><span class="math inline">\(\hat{\pi}\)</span></td>
+<td align="right"><span class="math inline">\(\pi_{0}\)</span></td>
+<td align="right"><span class="math inline">\(z\)</span></td>
+<td align="right"><span class="math inline">\(P\)</span></td>
+<td align="right">95% CI</td>
+</tr>
+<tr class="odd">
+<td align="left"></td>
+<td align="right">4950</td>
+<td align="right">226</td>
+<td align="right">0.0457</td>
+<td align="right">0.124</td>
+<td align="right"><span class="math inline">\(-16.71\)</span></td>
+<td align="right"><span class="math inline">\(&lt;0.001\)</span></td>
+<td align="right">(0.040;
+0.052)</td>
+</tr>
+<tr class="even">
+<td align="left"><strong>Two Independent samples</strong></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+</tr>
+<tr class="odd">
+<td align="left">Example 5.3: Polio diagnoses in a vaccine trial</td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="right"><span class="math inline">\(n\)</span></td>
+<td align="right">Yes</td>
+<td align="right"><span class="math inline">\(\hat{\pi}\)</span></td>
+<td align="right">Diff. (<span class="math inline">\(\hat{\Delta}\)</span>)</td>
+<td align="right"><span class="math inline">\(z\)</span></td>
+<td align="right"><span class="math inline">\(P\)</span></td>
+<td align="right">95% CI</td>
+</tr>
+<tr class="odd">
+<td align="left">Control group
+(placebo)</td>
+<td align="right">201,229</td>
+<td align="right">142</td>
+<td align="right">0.000706</td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+</tr>
+<tr class="even">
+<td align="left">Treatment group
+(vaccine)</td>
+<td align="right">200,745</td>
+<td align="right">57</td>
+<td align="right">0.000284</td>
+<td align="right"><span class="math inline">\(-0.000422\)</span></td>
+<td align="right"><span class="math inline">\(-6.01\)</span></td>
+<td align="right"><span class="math inline">\(&lt;0.001\)</span></td>
+<td align="right"><span class="math inline">\((-0.000560;\)</span>
+<span class="math inline">\(-0.000284)\)</span></td>
+</tr>
+<tr class="odd">
+<td align="left">Example 5.4: Optimistic about young people’s future</td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="right"><span class="math inline">\(n\)</span></td>
+<td align="right">Yes</td>
+<td align="right"><span class="math inline">\(\hat{\pi}\)</span></td>
+<td align="right">Diff. (<span class="math inline">\(\hat{\Delta}\)</span>)</td>
+<td align="right"><span class="math inline">\(z\)</span></td>
+<td align="right"><span class="math inline">\(P\)</span></td>
+<td align="right">95% CI</td>
+</tr>
+<tr class="odd">
+<td align="left">Negative question</td>
+<td align="right">921</td>
+<td align="right">257</td>
+<td align="right">0.279</td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+</tr>
+<tr class="even">
+<td align="left">Positive question</td>
+<td align="right">929</td>
+<td align="right">338</td>
+<td align="right">0.364</td>
+<td align="right">0.085</td>
+<td align="right">3.92</td>
+<td align="right"><span class="math inline">\(&lt;0.001\)</span></td>
+<td align="right">(0.043;
+0.127)</td>
+</tr>
+</tbody>
+</table>
+<p><strong>Example 5.3: The Salk polio vaccine field trial of 1954</strong></p>
+<p>The first large-scale field trials of the “killed virus” polio
+vaccination developed by Dr. Jonas Salk were carried out in the U.S. in
+1954.<a href="#fn20" class="footnote-ref" id="fnref20"><sup>20</sup></a> In the randomized, double-blind placebo-control part of the
+trial, a sample of schoolchildren were randomly assigned to receive
+either three injections of the polio vaccine, or three injections of a
+placebo, inert saltwater which was externally indistinguishable from the
+real vaccine. The explanatory variable <span class="math inline">\(X\)</span> is thus the group (vaccine or
+“treatment” group vs. placebo or “control” group). The response variable
+<span class="math inline">\(Y\)</span> is whether the child was diagnosed with polio during the trial
+period (yes or no). There were <span class="math inline">\(n_{1}=201,229\)</span> children in the control
+group, and 142 of them were diagnosed with polio; in the treatment
+group, there were 57 new polio cases among <span class="math inline">\(n_{2}=200,745\)</span> children (in
+both cases only those children who received all three injections are
+included here). The proportions of cases of polio were thus <span class="math inline">\(0.000706\)</span>
+in the control group and <span class="math inline">\(0.000284\)</span> in the vaccinated group (i.e. 7.06
+and 2.84 cases per 10,000 subjects, respectively).</p>
+<p><strong>Example 5.4: Split-ballot experiment on acquiescence bias</strong></p>
+<p>Survey questions often ask whether respondents agree or disagree with
+given statements on opinions or attitudes. <em>Acquiescence bias</em> means the
+tendency of respondents to agree with such statements, regardless of
+their contents. If it is present, we will overestimate the proportion of
+people holding the opinion corresponding to agreement with the
+statement. The data used in this example come from a study which
+examined acquiescence bias through a randomized experiment.<a href="#fn21" class="footnote-ref" id="fnref21"><sup>21</sup></a> In a
+survey carried out in Kazakhstan, the respondents were presented with a
+number of attitude statements, with four response categories: “Fully
+agree”, “Somewhat agree”, “Somewhat disagree”, and “Fully disagree”.
+Here we combine the first two and the last two, and consider the
+resulting dichotomous variable, with values labelled “Agree” and
+“Disagree”.</p>
+<p>We consider one item from the survey, concerning the respondents’
+opinions on the expectations of today’s young people. There were two
+forms of the question:</p>
+<ul>
+<li><p>“A young person today can expect little of the future”</p></li>
+<li><p>“A young person today can expect much of the future”</p></li>
+</ul>
+<p>We will call these the “Negative” and “Positive” question respectively.
+Around half of the respondents were randomly assigned to receive the
+positive question, and the rest got the negative question. The
+explanatory variable <span class="math inline">\(X\)</span> indicates the type of question, with Negative
+and Positive questions coded here as 1 and 2 respectively. The
+dichotomous response variable <span class="math inline">\(Y\)</span> is whether the respondent gave a
+response which was optimistic about the future (i.e. agreed with the
+positive or disagreed with the negative question) or a pessimistic
+response. The sample sizes and proportions of optimistic responses in
+the two groups are reported in Table <a href="c-probs.html#tab:t-probex">5.1</a>. The proportion is
+higher when the question was worded positively, as we would expect if
+there was acquiescence bias. Whether this difference is statistically
+significant remains to be determined.</p>
+</div>
+<div id="s-probs-distribution" class="section level2 hasAnchor">
+<h2><span class="header-section-number">5.3</span> Probability distribution of a dichotomous variable<a href="c-probs.html#s-probs-distribution" class="anchor-section" aria-label="Anchor link to header"></a></h2>
+<p>The response variables <span class="math inline">\(Y\)</span> considered in this section have only two
+possible values. It is common to code them as 0 and 1. In our examples,
+we will define the values of the variable of interest as follows:</p>
+<ul>
+<li><p>Example 5.1: 1 if a person says that he or she will definitely vote
+Yes, and 0 if the respondent will vote No or is undecided</p></li>
+<li><p>Example 5.2: 1 for black respondents and 0 for all others</p></li>
+<li><p>Example 5.3: 1 if a child developed polio, 0 if not</p></li>
+<li><p>Example 5.4: 1 if the respondent gave an optimistic response, 0 if
+not</p></li>
+</ul>
+<p>The population distribution of such a variable is completely specified
+by one number, the <strong>probability</strong> that a randomly selected member of
+the population will have the value <span class="math inline">\(Y=1\)</span> rather than 0. It can also be
+thought of as the <strong>proportion</strong> of units in the population with <span class="math inline">\(Y=1\)</span>;
+we will use the two terms interchangeably. This probability is denoted
+here <span class="math inline">\(\pi\)</span> (the lower-case Greek letter “pi”).<a href="#fn22" class="footnote-ref" id="fnref22"><sup>22</sup></a> The value of <span class="math inline">\(\pi\)</span> is
+between 0 (no-one in the population has <span class="math inline">\(Y=1\)</span>) and 1 (everyone has
+<span class="math inline">\(Y=1\)</span>). Because <span class="math inline">\(Y\)</span> can have only two possible values, and the sum of
+probabilities must be one, the population probability of <span class="math inline">\(Y=0\)</span> is
+<span class="math inline">\(1-\pi\)</span>.</p>
+<p> The probability distribution which corresponds to this
+kind of population distribution is the <em>Binomial distribution</em>. For
+later use, we note already here that the mean of this distribution is
+<span class="math inline">\(\pi\)</span> and its variance is <span class="math inline">\(\pi(1-\pi)\)</span>.</p>
+<p>In Example 5.1, the population is that of eligible voters at the time of
+the opinion poll, and <span class="math inline">\(\pi\)</span> is the probability that a randomly selected
+eligible voter definitely intended to vote Yes. In Example 5.2, <span class="math inline">\(\pi\)</span> is
+the probability that a black person living in the county will be
+selected to the jury panel. In Example 5.3, <span class="math inline">\(\pi\)</span> is the probability
+(possibly different in the vaccinated and unvaccinated groups) that a
+child will develop polio, and in Example 5.4 it is the probability
+(which possibly depends on how the question was asked) that a respondent
+will give an optimistic answer to the survey question.</p>
+<p>The probability <span class="math inline">\(\pi\)</span> is the <strong>parameter</strong> of the binomial distribution.
+In general, the parameters of a probability distribution are one or more
+numbers which fully determine the distribution. For example, in the
+analyses of Chapter <a href="c-tables.html#c-tables">4</a> we considered conditional
+distributions of a one variable in a contingency table given the other
+variable. Although we did not make use of this terminology there, these
+distributions also have their parameters, whcih are the probabilities of
+(all but one of) the categories of the response variable. Another case
+will be introduced in Chapter <a href="c-means.html#c-means">7</a>, where we consider a
+probability distribution for a continuous variable, and its parameters.</p>
+</div>
+<div id="s-probs-pointest" class="section level2 hasAnchor">
+<h2><span class="header-section-number">5.4</span> Point estimation of a population probability<a href="c-probs.html#s-probs-pointest" class="anchor-section" aria-label="Anchor link to header"></a></h2>
+<p>Questions and hypotheses about population distributions are usually most
+conveniently formulated in terms of the parameters of the distributions.
+For a binary variable <span class="math inline">\(Y\)</span>, this means that statistical inference will be
+focused on the probability <span class="math inline">\(\pi\)</span>.</p>
+<p>The most obvious question about a parameter is “what is our best guess
+of the value of the parameter in the population?” The answer will be
+based on the information in the sample, using some sample statistic as
+the best guess or <strong>estimate</strong> of the population parameter.
+Specifically, this is a <strong>point estimate</strong>, because it is expressed as a
+single value or a “point”, to distinguish it from <em>interval</em> estimates
+defined later.</p>
+<p>We denote a point estimate of <span class="math inline">\(\pi\)</span> by <span class="math inline">\(\hat{\pi}\)</span>. The “<span class="math inline">\(\; \hat{\;}\;\)</span>” or “hat” is often used to denote an estimate of a parameter
+indicated by the symbol under the hat; <span class="math inline">\(\hat{\pi}\)</span> is read as “pi-hat”.
+As <span class="math inline">\(\pi\)</span> for a binomial distribution is the population proportion of
+<span class="math inline">\(Y=1\)</span>, the obvious choice for a point estimate of it is the <em>sample</em>
+proportion of units with <span class="math inline">\(Y=1\)</span>. If we denote the <em>number</em> of such units
+by <span class="math inline">\(m\)</span>, the proportion is thus <span class="math inline">\(\hat{\pi}=m/n\)</span>, i.e. <span class="math inline">\(m\)</span> divided by the
+sample size <span class="math inline">\(n\)</span>. In Example 5.1, <span class="math inline">\(m=330\)</span> and <span class="math inline">\(n=702\)</span>, and
+<span class="math inline">\(\hat{\pi}=330/702=0.47\)</span>. This and the estimated proportions in the
+other examples are shown in Table <a href="c-probs.html#tab:t-probex">5.1</a>, in the two-sample
+examples 5.3 and 5.4 separately for the two groups.</p>
+<p>When <span class="math inline">\(Y\)</span> is coded with values 0 and 1, <span class="math inline">\(\hat{\pi}\)</span> is also equal to the
+sample mean of <span class="math inline">\(Y\)</span>, since
+<span class="math display" id="eq:pihat-as-ybar">\[\begin{equation}
+\bar{Y}=\frac{Y_{1}+Y_{2}+\dots+Y_{n}}{n}=\frac{0+0+\dots+0+\overbrace{1+1+\dots+1}^{m \text{ ones}}}{n}=\frac{m}{n}=\hat{\pi}.
+\tag{5.1}
+\end{equation}\]</span></p>
+</div>
+<div id="s-probs-test1sample" class="section level2 hasAnchor">
+<h2><span class="header-section-number">5.5</span> Significance test of a single proportion<a href="c-probs.html#s-probs-test1sample" class="anchor-section" aria-label="Anchor link to header"></a></h2>
+<div id="ss-probs-test1sample-hypotheses" class="section level3 hasAnchor">
+<h3><span class="header-section-number">5.5.1</span> Null and alternative hypotheses<a href="c-probs.html#ss-probs-test1sample-hypotheses" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>A null hypothesis about a single population probability <span class="math inline">\(\pi\)</span> is of the
+form
+<span class="math display" id="eq:H0p">\[\begin{equation}
+H_{0}:\; \pi=\pi_{0}
+\tag{5.2}
+\end{equation}\]</span>
+where <span class="math inline">\(\pi_{0}\)</span> is a given number which is either of
+specific interest or in some other sense a suitable benchmark in a given
+application. For example, in the voting example 5.1 we could consider
+<span class="math inline">\(\pi_{0}=0.5\)</span>, i.e. that the referendum was too close to call. In the
+jury example 5.2 the value of interest would be <span class="math inline">\(\pi_{0}=0.124\)</span>, the
+proportion of black people in the general adult population of the county.</p>
+<p>An alternative but equivalent form of (<a href="c-probs.html#eq:H0p">(5.2)</a>) is expressed in terms
+of the difference
+<span class="math display" id="eq:Dp">\[\begin{equation}
+\Delta=\pi-\pi_{0}
+\tag{5.3}
+\end{equation}\]</span>
+(<span class="math inline">\(\Delta\)</span> is the upper-case Greek letter “Delta”). Then
+(@<a href="c-probs.html#eq:H0p">(5.2)</a>) can also be written as
+<span class="math display" id="eq:H0p2">\[\begin{equation}
+H_{0}: \; \Delta=0,
+\tag{5.4}
+\end{equation}\]</span>
+i.e. that there is no difference between the true
+population probability and the hypothesised value <span class="math inline">\(\pi_{0}\)</span>. This
+version of the notation allows us later to draw attention to the
+similarities between different analyses in this chapter and in
+Chapter <a href="c-means.html#c-means">7</a>. In all of these cases the quantities of interest
+turn out to be differences of some kind, and the formulas for test
+statistics and confidence intervals will be of essentially the same
+form.</p>
+<p>The alternative hypothesis to the null hypothesis (<a href="c-probs.html#eq:H0p2">(5.4)</a>) requires
+some further comments, because there are some new possibilities that did
+not arise for the <span class="math inline">\(\chi^{2}\)</span> test of independence in Chapter
+<a href="c-tables.html#c-tables">4</a>. For the difference <span class="math inline">\(\Delta\)</span>, we may consider two basic
+kinds of alternative hypotheses. The first is a <strong>two-sided alternative
+hypothesis</strong>
+<span class="math display" id="eq:Hatwo">\[\begin{equation}
+H_{a}: \; \Delta\ne 0
+\tag{5.5}
+\end{equation}\]</span>
+(where “<span class="math inline">\(\ne\)</span>” means “not equal to”). This claims that
+the true value of the population difference <span class="math inline">\(\Delta\)</span> is some unknown
+value which is <em>not</em> 0 as claimed by the null hypothesis. With a
+two-sided <span class="math inline">\(H_{a}\)</span>, sample evidence that the true difference differs from
+0 will be regarded as evidence against the null hypothesis, irrespective
+of whether it suggests that <span class="math inline">\(\Delta\)</span> is actually smaller or larger than
+0 (hence the word “two-sided”). When <span class="math inline">\(\Delta=\pi-\pi_{0}\)</span>, this means
+that we are trying to assess whether the true probability <span class="math inline">\(\pi\)</span> is
+different from the claimed value <span class="math inline">\(\pi_{0}\)</span>, but without any expectations
+about whether <span class="math inline">\(\pi\)</span> might be smaller or larger than <span class="math inline">\(\pi_{0}\)</span>.</p>
+<p>The second main possibility is one of the two <strong>one-sided alternative
+hypotheses</strong>
+<span class="math display" id="eq:Haonegt">\[\begin{equation}
+H_{a}:  \Delta&gt; 0
+\tag{5.6}
+\end{equation}\]</span>
+or
+<span class="math display" id="eq:Haonelt">\[\begin{equation}
+H_{a}:  \Delta &lt; 0
+\tag{5.7}
+\end{equation}\]</span>
+Such a hypothesis is only interested in
+values of <span class="math inline">\(\Delta\)</span> to one side of 0, either larger or smaller than it.
+For example, hypothesis (<a href="c-probs.html#eq:Haonegt">(5.6)</a>) in the referendum example 5.1,
+with <span class="math inline">\(\pi_{0}=0.5\)</span>, is <span class="math inline">\(H_{a}:\; \pi&gt;0.5\)</span>, i.e. that the proportion who
+intend to vote Yes is <em>greater</em> than one half. Similarly, in the jury
+example 5.2, with <span class="math inline">\(\pi_{0}=0.124\)</span>, (<a href="c-probs.html#eq:Haonelt">(5.7)</a>) is the hypothesis
+<span class="math inline">\(H_{a}:\; \pi&lt;0.124\)</span>, i.e. that the probability that an eligible black
+person is selected to a jury panel is <em>smaller</em> than the proportion of
+black people in the general population.</p>
+<p>Whether we choose to consider a one-sided or a two-sided alternative
+hypothesis depends largely on the research questions. In general, a
+one-sided hypothesis would be used when deviations from the null
+hypothesis only in one direction would be interesting and/or surprising.
+This draws on background information about the variables. A two-sided
+alternative hypothesis is neutral in this respect. Partly for this
+reason, two-sided hypotheses are in practice used more often than
+one-sided ones. Choosing a two-sided alternative hypothesis is not wrong
+even when a one-sided one could also be considered; this will simply
+lead to a more cautious (conservative) approach in that it takes
+stronger evidence to reject the null hypothesis when the alternative is
+two-sided than when it is one-sided. Such conservatism is typically
+regarded as a desirable feature in statistical inference (this will be
+discussed further in Section <a href="c-means.html#ss-means-tests3-errors">7.6.1</a>).</p>
+<p>The two-sided alternative hypothesis
+(<a href="c-probs.html#eq:Hatwo">(5.5)</a>) is clearly the logical opposite of the null hypothesis
+(<a href="c-probs.html#eq:H0p2">(5.4)</a>): if <span class="math inline">\(\Delta\)</span> is not equal to 0, it must be “not equal” to
+0. So a two-sided alternative hypothesis must correspond to a “point”
+null hypothesis (<a href="c-probs.html#eq:H0p2">(5.4)</a>). For a one-sided alternative hypothesis,
+the same logic would seem to imply that the null hypothesis should also
+be one-sided: for example, <span class="math inline">\(H_{0}: \; \Delta\le 0\)</span> and
+<span class="math inline">\(H_{a}:\; \Delta&gt;0\)</span> would form such a logical pair. Often such
+“one-sided” null hypothesis is also closest to our research questions:
+for example, it would seem more interesting to try to test the
+hypothesis that the proportion of Yes-voters is less than or equal to
+0.5 than that it is exactly 0.5. It turns out, however, that when the
+alternative hypothesis is, say, <span class="math inline">\(H_{a}: \Delta&gt;0\)</span>, the test will be the
+same when the null hypothesis is <span class="math inline">\(H_{0}: \; \Delta\le 0\)</span> as when it is
+<span class="math inline">\(H_{0}: \Delta= 0\)</span>, and rejecting or not rejecting one of them is
+equivalent to rejecting or not rejecting the other. We can thus here
+always take the null hypothesis to be technically of the form
+(<a href="c-probs.html#eq:H0p2">(5.4)</a>), even if we are really interested in a corresponding
+“one-sided” null hypothesis. It is then only the alternative hypothesis
+which is explicitly either two-sided or one-sided.</p>
+</div>
+<div id="ss-probs-test1sample-teststatistic" class="section level3 hasAnchor">
+<h3><span class="header-section-number">5.5.2</span> The test statistic<a href="c-probs.html#ss-probs-test1sample-teststatistic" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>The test statistic used to test hypotheses of the form (<a href="c-probs.html#eq:H0p">(5.2)</a>) is
+the <strong>z-test statistic</strong>[^
+]
+<span class="math display" id="eq:ttest-gen">\[\begin{equation}
+z=\frac{\hat{\Delta}}{\hat{\sigma}_{\hat{\Delta}}}=\frac{\text{Estimate of the population difference $\Delta$}}{\text{Estimated standard error of the estimate of $\Delta$}}.
+\tag{5.8}
+\end{equation}\]</span>
+The statistic is introduced first in this form in
+order to draw attention to its generality. Null hypotheses in many
+ostensibly different situations can be formulated as hypotheses of the
+form (<a href="c-probs.html#eq:H0p2">(5.4)</a>) about population differences of some kind, and each
+can be tested with the test statistic (<a href="c-probs.html#eq:ttest-gen">(5.8)</a>). For example,
+all of the test statistics discussed in Chapters <a href="c-probs.html#c-probs">5</a>,
+<a href="c-means.html#c-means">7</a> and <a href="c-regression.html#c-regression">8</a> of this course pack will be of this
+type (but the <span class="math inline">\(\chi^{2}\)</span> test statistic of Chapter <a href="c-tables.html#c-tables">4</a> is
+not). The principles of the use and interpretation of the test that are
+introduced in this section apply almost unchanged also in these other
+contexts, and only the exact formulas for calculating <span class="math inline">\(\hat{\Delta}\)</span> and
+<span class="math inline">\(\hat{\sigma}_{\hat{\Delta}}\)</span> will need to be defined separately for
+each of them. In some applications considered in Chapter <a href="c-means.html#c-means">7</a>
+the test statistic is typically called the <strong>t-test statistic</strong> instead
+of the <span class="math inline">\(z\)</span>-test statistic, but its basic idea is still the same.</p>
+<p>In (<a href="c-probs.html#eq:ttest-gen">(5.8)</a>), <span class="math inline">\(\hat{\Delta}\)</span> denotes a sample estimate of
+<span class="math inline">\(\Delta\)</span>. For a test of a single proportion, this is
+<span class="math display" id="eq:Dhatp">\[\begin{equation}
+\hat{\Delta} = \hat{\pi}-\pi_{0},
+\tag{5.9}
+\end{equation}\]</span>
+i.e. the difference between the sample proportion and
+<span class="math inline">\(\pi_{0}\)</span>. This is the core of the test statistic. Although the forms of
+the two statistics seem rather different, (<a href="c-probs.html#eq:Dhatp">(5.9)</a>) contains the
+comparison of the observed and expected sample values that was also at
+the heart of the <span class="math inline">\(\chi^{2}\)</span> test statistic (see formula at end of Section <a href="c-tables.html#ss-tables-chi2test-stat">4.3.3</a>) in Chapter
+<a href="c-tables.html#c-tables">4</a>. Here the “observed value” is the sample estimate
+<span class="math inline">\(\hat{\pi}\)</span> of the probability parameter, “expected value” is the value
+<span class="math inline">\(\pi_{0}\)</span> claimed for it by the null hypothesis, and
+<span class="math inline">\(\hat{\Delta}=\hat{\pi}-\pi_{0}\)</span> is their difference. (Equivalently, we
+could also say that the expected value of <span class="math inline">\(\Delta=\pi-\pi_{0}\)</span> under the
+null hypothesis (<a href="c-probs.html#eq:H0p2">(5.4)</a>) is 0, its observed value is <span class="math inline">\(\hat{\Delta}\)</span>,
+and <span class="math inline">\(\hat{\Delta}=\hat{\Delta}-0\)</span> is their difference.)</p>
+<p>If the null hypothesis was true, we would expect the observed difference
+<span class="math inline">\(\hat{\Delta}\)</span> to be close to 0. If, on the other hand, the true <span class="math inline">\(\pi\)</span>
+was different from <span class="math inline">\(\pi_{0}\)</span>, we would expect the same to be true of
+<span class="math inline">\(\hat{\pi}\)</span> and thus <span class="math inline">\(\hat{\Delta}\)</span> to be different from 0. In other
+words, the difference <span class="math inline">\(\hat{\Delta}=\hat{\pi}-\pi_{0}\)</span> tends to be small
+(close to zero) when the null hypothesis is true, and large (far from
+zero) when it is not true, thus satisfying one of the requirements for a
+good test statistic that were stated at the beginning of Section <a href="c-tables.html#ss-tables-chi2test-sdist">4.3.4</a>. (Whether
+in this we count as “large” both large positive and large negative
+values, or just one or the other, depends on the form of the alternative
+hypothesis, as explained in the next section.)</p>
+<p>The <span class="math inline">\(\hat{\sigma}_{\hat{\Delta}}\)</span> in (<a href="c-probs.html#eq:ttest-gen">(5.8)</a>) denotes an
+estimate of the standard deviation of the sampling distribution of
+<span class="math inline">\(\hat{\Delta}\)</span>, which is also known as the estimated <strong>standard error</strong>
+of <span class="math inline">\(\hat{\Delta}\)</span>. For the test statistic (<a href="c-probs.html#eq:ttest-gen">(5.8)</a>), it is
+evaluated under the null hypothesis. The concept of a standard error of
+an estimate will be discussed in more detail in Section
+<a href="c-contd.html#s-contd-clt">6.4</a>. Its role in the test statistic is to provide an
+interpretable scale for the size of <span class="math inline">\(\hat{\Delta}\)</span>, so that the sampling
+distribution discussed in the next section will be of a convenient form.</p>
+<p>For a test of the hypothesis (<a href="c-probs.html#eq:H0p">(5.2)</a>) about a single proportion, the
+estimated standard error under the null hypothesis is
+<span class="math display" id="eq:seDhatp">\[\begin{equation}
+\hat{\sigma}_{\hat{\Delta}} = \sqrt{\frac{\pi_{0}(1-\pi_{0})}{n}},
+\tag{5.10}
+\end{equation}\]</span>
+and the specific formula of the test statistic
+(<a href="c-probs.html#eq:ttest-gen">(5.8)</a>) is then[^
+]
+<span class="math display" id="eq:ztestp">\[\begin{equation}
+z=\frac{\hat{\pi}-\pi_{0}}{\sqrt{\pi_{0}(1-\pi_{0})/n}}.
+\tag{5.11}
+\end{equation}\]</span>
+This is the <strong>one-sample <span class="math inline">\(z\)</span>-test statistic for a
+population proportion</strong>.</p>
+<p>In Example 5.1 we have <span class="math inline">\(\hat{\pi}=0.47\)</span>, <span class="math inline">\(\pi_{0}=0.5\)</span>, and <span class="math inline">\(n=702\)</span>, so
+<span class="math display">\[z=\frac{\hat{\pi}-\pi_{0}}{\sqrt{\pi_{0}(1-\pi_{0})/n}}=
+\frac{0.47-0.50}{\sqrt{0.50\times(1-0.50)/702}}=-1.59.\]</span> Similarly, in
+Example 5.2 we have <span class="math inline">\(\hat{\pi}=0.0457\)</span>, <span class="math inline">\(\pi_{0}=0.124\)</span>, <span class="math inline">\(n=4950\)</span>, and
+<span class="math display">\[z=\frac{0.0457-0.124}{\sqrt{0.124\times(1-0.124)/4950}}
+=
+\frac{-0.0783}{\sqrt{0.10862/4950}}=-16.71.\]</span> Strangely, SPSS does not
+provide a direct way of calculating this value. However, since the
+formula (<a href="c-probs.html#eq:ztestp">(5.11)</a>) is very simple, we can easily calculate it with a
+pocket calculator, after first using SPSS to find out <span class="math inline">\(\hat{\pi}\)</span>. This
+approach will be used in the computer classes.</p>
+</div>
+<div id="ss-probs-test1sample-samplingd" class="section level3 hasAnchor">
+<h3><span class="header-section-number">5.5.3</span> The sampling distribution of the test statistic and P-values<a href="c-probs.html#ss-probs-test1sample-samplingd" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>Like the <span class="math inline">\(\chi^{2}\)</span> test of Chapter <a href="c-tables.html#c-tables">4</a>, the
+<span class="math inline">\(z\)</span>-test for a population proportion requires some conditions on the
+sample size in order for the approximate sampling distribution of the
+test statistic to be appropriate. These depend also on the value of
+<span class="math inline">\(\pi\)</span>, which we can estimate by <span class="math inline">\(\hat{\pi}\)</span>. One rule of thumb is that
+<span class="math inline">\(n\)</span> should be larger than 10 divided by <span class="math inline">\(\pi\)</span> or <span class="math inline">\(1-\pi\)</span>, whichever is
+smaller. When <span class="math inline">\(\pi\)</span> is not very small or very large, e.g. if it is
+between 0.3 and 0.7, this essentially amounts to the condition that <span class="math inline">\(n\)</span>
+should be at least 30. In the voting example 5.1, where
+<span class="math inline">\(\hat{\pi}=0.47\)</span>, the sample size of <span class="math inline">\(n=702\)</span> is clearly large enough. In
+the jury example 5.2, <span class="math inline">\(\hat{\pi}=0.0457\)</span> is much closer to zero, but
+since <span class="math inline">\(10/0.0457\)</span> is a little over 200, a sample of <span class="math inline">\(n=4950\)</span> is again
+sufficient.</p>
+<p>When the sample size is large enough, the sampling distribution of <span class="math inline">\(z\)</span>
+defined by (<a href="c-probs.html#eq:ztestp">(5.11)</a>) is approximately the <strong>standard normal
+distribution</strong>. The probability curve of this distribution is shown in
+Figure <a href="c-probs.html#fig:f-pval-prob">5.1</a>. For now we just take it as given, and postpone
+a general discussion of the normal distribution until Chapter
+<a href="c-means.html#c-means">7</a>.</p>
+<div class="figure"><span style="display:block;" id="fig:f-pval-prob"></span>
+<img src="pval_zp.png" alt="Illustration of the calculation of P-values from the standard normal distribution. Here the value of the z-test statistic is z=-1.59 (as in the referendum example 5.1). The areas in grey indicate the two-sided P-values, i.e. the probabilities of values at least as far from 0 as the observed value of z." style="width:10cm" />
+<p class="caption">Figure 5.1: Illustration of the calculation of <span class="math inline">\(P\)</span>-values from the standard normal distribution. Here the value of the <span class="math inline">\(z\)</span>-test statistic is <span class="math inline">\(z=-1.59\)</span> (as in the referendum example 5.1). The areas in grey indicate the two-sided <span class="math inline">\(P\)</span>-values, i.e. the probabilities of values at least as far from 0 as the observed value of <span class="math inline">\(z\)</span>.</p>
+</div>
+<p>The <span class="math inline">\(P\)</span>-value of the test is calculated from this distribution using the
+general principles introduced in Section <a href="c-tables.html#ss-tables-chi2test-Pval">4.3.5</a>.
+In other words, the <span class="math inline">\(P\)</span>-value is the probability that the test statistic
+<span class="math inline">\(z\)</span> has a value that is as or more extreme than the value of <span class="math inline">\(z\)</span> in the
+observed sample. Now, however, the details of this calculation depend
+also on the alternative hypothesis, so some additional explanation is
+needed.</p>
+<p>Consider first the more common case of a two-sided alternative
+hypothesis (<a href="c-probs.html#eq:Hatwo">(5.5)</a>), that <span class="math inline">\(\Delta\ne 0\)</span>. As discussed in the
+previous section, it is <em>large</em> values of the test statistic which
+indicate evidence against the null hypothesis, because a large <span class="math inline">\(z\)</span> is
+obtained when the sample difference <span class="math inline">\(\hat{\Delta}=\hat{\pi}-\pi_{0}\)</span> is
+very different from the zero difference claimed by the null hypothesis.
+When the alternative is two-sided, “large” is taken to mean any value of
+<span class="math inline">\(z\)</span> far from zero, i.e. either large positive or large negative values,
+because both indicate that the sample difference is far from 0. If <span class="math inline">\(z\)</span>
+is large and positive, <span class="math inline">\(\hat{\Delta}\)</span> is much <em>larger</em> than 0. In
+example 5.1 this would indicate that a much larger proportion than 0.5
+of the sample say they intend to vote Yes. If <span class="math inline">\(z\)</span> is large and negative,
+<span class="math inline">\(\hat{\Delta}\)</span> is much <em>smaller</em> than 0, indicating a much smaller
+sample proportion than 0.5. Both of these cases would count as evidence
+against <span class="math inline">\(H_{0}\)</span> when the alternative hypothesis is two-sided.</p>
+<p>The observed value of the <span class="math inline">\(z\)</span>-test statistic in Example 5.1 was actually
+<span class="math inline">\(z=-1.59\)</span>. Evidence would thus be “as strong” against <span class="math inline">\(H_{0}\)</span> as the
+observed <span class="math inline">\(z\)</span> if we obtained a <span class="math inline">\(z\)</span>-test statistic of <span class="math inline">\(-1.59\)</span> or 1.59, the
+value exactly as far from 0 as the observed <span class="math inline">\(z\)</span> but above rather than
+below 0. Similarly, evidence against the null would be even stronger if
+<span class="math inline">\(z\)</span> was further from zero than 1.59, i.e. larger than 1.59 or smaller
+than <span class="math inline">\(-1.59\)</span>. To obtain the <span class="math inline">\(P\)</span>-value, we thus need to calculate the
+probability of observing a <span class="math inline">\(z\)</span>-test statistic which is at most <span class="math inline">\(-1.59\)</span>
+or at least 1.59 when the null hypothesis is true in the population. In
+general, the <span class="math inline">\(P\)</span>-value for testing the null hypothesis against a
+two-sided alternative is the probability of obtaining a value at least
+<span class="math inline">\(z\)</span> or at most <span class="math inline">\(-z\)</span> (when <span class="math inline">\(z\)</span> is positive, vice versa when it is
+negative), where <span class="math inline">\(z\)</span> here denotes the value of the test statistic in the
+sample. Such probabilities are calculated from the approximately
+standard normal sampling distribution of the test statistic under
+<span class="math inline">\(H_{0}\)</span>.</p>
+<p>This calculation of the <span class="math inline">\(P\)</span>-value is illustrated graphically in Figure
+<a href="c-probs.html#fig:f-pval-prob">5.1</a>. The curve in the plot is that of the standard normal
+distribution. Two areas are shown in grey under the curve, one on each
+tail of the distribution. The one on the left corresponds to values of
+<span class="math inline">\(-1.59\)</span> and smaller, and the one on the right to values of 1.59 or
+larger. Each of these areas is about 0.056, and the <span class="math inline">\(P\)</span>-value for a test
+against a two-sided alternative is their combined area,
+i.e. <span class="math inline">\(P=0.056+0.056=0.112\)</span>. This means that even if the true population
+proportion of Yes-voters was actually exactly 0.5, there would be a
+probability of 0.112 of obtaining a test statistic as or more extreme
+than the <span class="math inline">\(z=-1.59\)</span> that was actually observed in Example 5.1.</p>
+<p>In example 5.2 the observed test statistic was <span class="math inline">\(z=-16.71\)</span>. The two-sided
+<span class="math inline">\(P\)</span>-value is then the probability of values that are at most <span class="math inline">\(-16.71\)</span> or
+at least 16.71. These areas are not shown in Figure <a href="c-probs.html#fig:f-pval-prob">5.1</a>
+because they would not be visible in it. The horizontal axis of the
+figures runs from <span class="math inline">\(-4\)</span> to <span class="math inline">\(+4\)</span>, so <span class="math inline">\(-16.71\)</span> is clearly far in the tail
+of the distribution and the corresponding probability is very small; we
+would report it as <span class="math inline">\(P&lt;0.001\)</span>.</p>
+<p>Consider now the case of a one-sided alternative
+hypothesis. For example, in the referendum example we might have decided
+beforehand to focus only on the possiblity that the proportion of people
+who intend to vote Yes is smaller than 0.5, and hence consider the
+alternative hypothesis that <span class="math inline">\(\Delta&lt;0\)</span>. Two situations might then arise.
+First, suppose that the observed value of the sample difference is in
+the direction indicated by the alternative hypothesis. This is the case
+in the example, where the sample difference <span class="math inline">\(\hat{\Delta}=-0.03\)</span> is
+indeed smaller than zero, and the test statistic <span class="math inline">\(t=-1.59\)</span> is negative.
+The possible values of <span class="math inline">\(z\)</span> contributing to the <span class="math inline">\(P\)</span>-value are then those
+of <span class="math inline">\(-1.59\)</span> or smaller. Values of <span class="math inline">\(1.59\)</span> and larger are now not included,
+because positive values of the test statistic (corresponding to sample
+differences greater than 0) would not be regarded as evidence in favour
+of the claim that <span class="math inline">\(\Delta\)</span> is smaller than 0. The <span class="math inline">\(P\)</span>-value is thus only
+the probability corresponding to the area on the left tail of the curve
+in Figure <a href="c-probs.html#fig:f-pval-prob">5.1</a>, and the corresponding area on the right
+tail is not included. Since both areas have the same size, the one-sided
+<span class="math inline">\(P\)</span>-value is half the two-sided value, i.e. 0.056 instead of 0.112. In
+general, the one-sided <span class="math inline">\(P\)</span>-value for a <span class="math inline">\(z\)</span>-test of a proportion and
+other similar tests is always obtained by dividing the two-sided value
+by 2, if the sample evidence is in the direction of the one-sided
+alternative hypothesis.</p>
+<p>The second case occurs when the sample difference is not in the
+direction indicated by a one-sided alternative hypothesis. For example,
+suppose that the sample proportion of Yes-voters had actually been 0.53,
+i.e. 0.03 larger than 0.5, so that we had obtained <span class="math inline">\(z=+1.59\)</span> instead.
+The possible values of the test statistic which contributed to the
+<span class="math inline">\(P\)</span>-value would then be <span class="math inline">\(z=1.59\)</span> and all smaller values. These are “as
+strong or stronger evidence against the null hypothesis and in the
+direction of the alternative hypothesis” as required by the definition
+at the beginning of Section <a href="c-tables.html#ss-tables-chi2test-Pval">4.3.5</a>, since they agree with the alternative
+hypothesis (negative values of <span class="math inline">\(z\)</span>) or at least disagree with it less
+than the observed <span class="math inline">\(z\)</span> (positive values from 0 to 1.59). In Figure
+<a href="c-probs.html#fig:f-pval-prob">5.1</a>, these values would correspond to the area under the
+whole curve, apart from the region to the right of <span class="math inline">\(1.59\)</span> on the right
+tail. Since the probability of the latter is 0.056 and the total
+probability under the curve is 1, the required probability is
+<span class="math inline">\(P=1-0.0.56=0.944\)</span>. However, calculating the <span class="math inline">\(P\)</span>-value so precisely is
+hardly necessary in this case, as it is clearly going to be closer to 1
+than to 0. The conclusion from such a large <span class="math inline">\(P\)</span>-value will always be
+that the null hypothesis should not be rejected. This is also
+intuitively obvious, as a sample difference in the opposite direction
+from the one claimed by the alternative hypothesis is clearly not to be
+regarded as evidence in favour of that alternative hypothesis. In short,
+if the sample difference is in a different direction than a one-sided
+alternative hypothesis, the <span class="math inline">\(P\)</span>-value can be reported simply as <span class="math inline">\(P&gt;0.5\)</span>
+without further calculations.</p>
+<p>If a statistical software package is used to carry out the test, it will
+also report the <span class="math inline">\(P\)</span>-value and no further calculations are needed (except
+dividing a two-sided <span class="math inline">\(P\)</span>-value by 2, if a one-sided value is needed and
+only a two-sided one is reported). However, since SPSS does not
+currently provide a procedure for this test, and for exam purposes, we
+will briefly outline how an approximate <span class="math inline">\(P\)</span>-value is obtained using
+critical values from a table. This is done in a very similar way as for
+the <span class="math inline">\(\chi^{2}\)</span> test in Section <a href="c-tables.html#ss-tables-chi2test-Pval">4.3.5</a>.</p>
+<p>The first part of Table <a href="c-probs.html#tab:t-ttable">5.2</a> shows a table of critical values
+for the standard normal distribution. These values are also shown in the Appendix at the end of this course pack, on the
+last row of a larger table (the other parts of this table will be
+explained later, in Section <a href="c-means.html#ss-means-inference-variants">7.3.4</a>). A
+version of this table is included in all introductory text books on
+statistics, although its format may be slightly different in different
+books.</p>
+<table>
+<caption><span id="tab:t-ttable">Table 5.2: </span>A table of critical values for the standard normal distribution. The
+upper part of the table shows the critical values in one row, as in
+standard statistical tables (see the last row of the table in the Appendix). The lower part of the table includes the
+same numbers rearranged to show critical values for conventional
+significance levels for one- and two-sided tests.</caption>
+<tbody>
+<tr class="odd">
+<td></td>
+<td align="right">0.100</td>
+<td align="right">0.050</td>
+<td align="right">0.025</td>
+<td align="right">0.01</td>
+<td align="right">0.005</td>
+<td align="right">0.001</td>
+<td align="right">0.0005</td>
+</tr>
+<tr class="even">
+<td>Critical value</td>
+<td align="right">1.282</td>
+<td align="right">1.645</td>
+<td align="right">1.960</td>
+<td align="right">2.326</td>
+<td align="right">2.576</td>
+<td align="right">3.090</td>
+<td align="right">3.291</td>
+</tr>
+</tbody>
+</table>
+<table>
+<tbody>
+<tr class="odd">
+<td align="left">Alternative hypothesis</td>
+<td align="right">Significance levels 0.10</td>
+<td align="right">0.05</td>
+<td align="right">0.01</td>
+<td align="right">0.001</td>
+</tr>
+<tr class="even">
+<td align="left">Two-sided</td>
+<td align="right">1.65</td>
+<td align="right">1.96</td>
+<td align="right">2.58</td>
+<td align="right">3.29</td>
+</tr>
+<tr class="odd">
+<td align="left">One-sided</td>
+<td align="right">1.28</td>
+<td align="right">1.65</td>
+<td align="right">2.33</td>
+<td align="right">3.09</td>
+</tr>
+</tbody>
+</table>
+<p>The columns of the first part of Table <a href="c-probs.html#tab:t-ttable">5.2</a> are labelled
+“Right-hand tail probabilities”, with separate columns for some values
+from 0.100 to 0.0005. This means that the probability that a value from
+the standard normal distribution is at least as large as the value given
+in a particular column is the number given at the top of that column.
+For example, the value in the column labelled “0.025” is 1.960,
+indicating that the probability of obtaining a value equal to or greater
+than 1.960 from the standard normal distribution is 0.025. Because the
+distribution is symmetric, the probability of values of at most <span class="math inline">\(-1.960\)</span>
+is also 0.025, and the total probability that a value is at least 1.960
+units from zero is <span class="math inline">\(0.025+0.025=0.05\)</span>.</p>
+<p>These values can be used to obtain bounds for <span class="math inline">\(P\)</span>-values, expressed in
+terms of conventional significance levels of 0.10, 0.05, 0.01 and 0.001.
+The values at which these tail probabilities are obtained are the
+corresponding critical values for the test statistic. They are shown in
+the lower part of Table <a href="c-probs.html#tab:t-ttable">5.2</a>, slightly rearranged for clarity
+of presentation and rounded to two decimal places (which is accurate
+enough for practical purposes). The basic idea of using the critical
+values is that if the observed (absolute value of) the <span class="math inline">\(z\)</span>-test
+statistic is <em>larger</em> than a critical value (for the required kind of
+alternative hypothesis) shown in the lower part of Table <a href="c-probs.html#tab:t-ttable">5.2</a>,
+the <span class="math inline">\(P\)</span>-value is <em>smaller</em> than the significance level corresponding to
+that critical value.</p>
+<p>The table shows only positive critical values. If the observed test
+statistic is actually negative, its negative (<span class="math inline">\(-\)</span>) sign is omitted and
+the resulting positive value (i.e. the absolute value of the statistic)
+is compared to the critical values. Note also that the critical value
+for a given significance level depends on whether the alternative
+hypothesis is two-sided or one-sided. In the one-sided case, the test
+statistic is compared to the critical values only if it is actually in
+the direction of the alternative hypothesis; if not, we can simply
+report <span class="math inline">\(P&gt;0.5\)</span> as discussed above.</p>
+<p>The <span class="math inline">\(P\)</span>-value obtained from the table is reported as being smaller than
+the smallest conventional significance level for which the corresponding
+critical value is exceeded by the observed test statistic. For instance,
+in the jury example 5.2 we have <span class="math inline">\(z=-16.71\)</span>. Considering a two-sided
+alternative hypothesis, 16.71 is larger than the critical values 1.65,
+1.96, 2.58 and 3.29 for all the standard significance levels, so we can
+report that <span class="math inline">\(P&lt;0.001\)</span>. For Example 5.1, in contrast, <span class="math inline">\(z=-1.59\)</span>, the
+absolute value of which is smaller than even the critical value 1.65 for
+the 10% significance level. For this example, we would report <span class="math inline">\(P&gt;0.1\)</span>.</p>
+<p>The intuitive idea of the critical values and their connection to the
+<span class="math inline">\(P\)</span>-values is illustrated for Example 5.1 by Figure <a href="c-probs.html#fig:f-pval-prob">5.1</a>. Here the observed test statistic is
+<span class="math inline">\(t=-1.59\)</span>, so the two-sided <span class="math inline">\(P\)</span>-value is the probability of values at
+least 1.59 or at most <span class="math inline">\(-1.59\)</span>, which correspond to the two gray areas in
+the tails of the distribution. Also shown in the plot is one of the
+critical values for two-sided tests, the 1.96 for significance level
+0.05. By definition of the critical values, the combined tail
+probability of values at least 1.96 from 0, i.e. the probability of
+values at least 1.96 or at most <span class="math inline">\(-1.96\)</span>, is 0.05. It is clear from the
+plot that since 1.59 is smaller than 1.96, these areas are smaller than
+the tail areas corresponding to 1.59 and <span class="math inline">\(-1.59\)</span>, and the combined area
+of the latter must be more than 0.05, i.e. it must be that <span class="math inline">\(P&gt;0.05\)</span>.
+Similar argument for the 10% critical value of 1.65 shows that <span class="math inline">\(P\)</span> is
+here also larger than 0.1.</p>
+</div>
+<div id="ss-probs-test1sample-conclusions" class="section level3 hasAnchor">
+<h3><span class="header-section-number">5.5.4</span> Conclusions from the test<a href="c-probs.html#ss-probs-test1sample-conclusions" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>The general principles of drawing and stating conclusions from a
+significance test have already been explained in Section
+<a href="c-tables.html#ss-tables-chi2test-conclusions">4.3.6</a>, so they need not be repeated here.
+Considering two-sided alternative hypotheses, the conclusions in our two
+examples are as follows:</p>
+<ul>
+<li><p>In the referendum example 5.1, <span class="math inline">\(P=0.112\)</span> for the null hypothesis
+that <span class="math inline">\(\pi=0.5\)</span> in the population of eligible voters. The null
+hypothesis is not rejected at conventional levels of significance.
+There is not enough evidence to conclude that the proportion of
+voters who definitely intend to vote Yes differs from one half.</p></li>
+<li><p>In the jury example 5.2, <span class="math inline">\(P&lt;0.001\)</span> for the null hypothesis that
+<span class="math inline">\(\pi=0.124\)</span>. The null hypothesis is thus overwhelmingly rejected at
+any conventional level of significance. There is very strong
+evidence that the probability of a black person being selected to
+the jury pool differs from the proportion of black people in the
+population of the county.</p></li>
+</ul>
+</div>
+<div id="ss-probs-test1sample-summary" class="section level3 hasAnchor">
+<h3><span class="header-section-number">5.5.5</span> Summary of the test<a href="c-probs.html#ss-probs-test1sample-summary" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>As a summary, let us again repeat the main steps of the test described
+in this section in a concise form, using the voting variable of Example
+5.1 for illustration:</p>
+<ol style="list-style-type: decimal">
+<li><p>Data: a sample of size <span class="math inline">\(n=702\)</span> of a dichotomous variable <span class="math inline">\(Y\)</span> with
+values 1 (Yes) and 0 (No or undecided), with the sample proportion
+of ones <span class="math inline">\(\hat{\pi}=0.47\)</span>.</p></li>
+<li><p>Assumptions: the observations are a random sample from a population
+distribution with some population proportion (probability) <span class="math inline">\(\pi\)</span>,
+and the sample size <span class="math inline">\(n\)</span> is large enough for the test to be valid
+(for example, <span class="math inline">\(n\ge 30\)</span> when <span class="math inline">\(\pi_{0}\)</span> is between about 0.3 and 0.7,
+as it is here).</p></li>
+<li><p>Hypotheses: null hypothesis <span class="math inline">\(H_{0}: \pi=\pi_{0}\)</span> against the
+alternative hypothesis <span class="math inline">\(H_{a}: \pi\ne \pi_{0}\)</span>, where <span class="math inline">\(\pi_{0}=0.5\)</span>.</p></li>
+<li><p>The test statistic: the <span class="math inline">\(z\)</span>-statistic
+<span class="math display">\[z=\frac{\hat{\pi}-\pi_{0}}{\sqrt{\pi_{0}(1-\pi_{0})/n}}=
+\frac{0.47-0.50}{\sqrt{0.50\times(1-0.50)/702}}=-1.59.\]</span></p></li>
+<li><p>The sampling distribution of the test statistic when <span class="math inline">\(H_{0}\)</span> is
+true: a standard normal distribution.</p></li>
+<li><p>The <span class="math inline">\(P\)</span>-value: the probability that a randomly selected value from
+the the standard normal distribution is at most <span class="math inline">\(-1.59\)</span> or at least
+1.59, which is <span class="math inline">\(P=0.112\)</span>.</p>
+<ul>
+<li>If the precise <span class="math inline">\(P\)</span>-value is not available, we can observe that
+1.59 is smaller than the two-sided critical value 1.65 for the
+10% level of significance. Thus it must be that <span class="math inline">\(P&gt;0.1\)</span>.</li>
+</ul></li>
+<li><p>Conclusion: The null hypothesis is not rejected (<span class="math inline">\(P=0.112\)</span>). There
+is not enough evidence to conclude that the proportion of eligible
+voters who definitely intend to vote Yes differs from one half.
+Based on this opinion poll, the referendum remains too close
+to call.</p></li>
+</ol>
+</div>
+</div>
+<div id="s-probs-1sampleci" class="section level2 hasAnchor">
+<h2><span class="header-section-number">5.6</span> Confidence interval for a single proportion<a href="c-probs.html#s-probs-1sampleci" class="anchor-section" aria-label="Anchor link to header"></a></h2>
+<div id="s-probs-1sampleci-intro" class="section level3 hasAnchor">
+<h3><span class="header-section-number">5.6.1</span> Introduction<a href="c-probs.html#s-probs-1sampleci-intro" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>A significance test assesses whether it is plausible, given the evidence
+in the observed data, that a population parameter or parameters have a
+specific set of values claimed by the null hypothesis. For example, in
+Section <a href="c-probs.html#s-probs-test1sample">5.5</a> we asked such a question about the
+probability parameter of a binary variable in a single population.</p>
+<p>In many ways a more natural approach would be try to identify all of
+those values of a parameter which <em>are</em> plausible given the data. This
+leads to a form of statistical inference known as <strong>interval
+estimation</strong>, which aims to present not only a single best guess (i.e. a
+point estimate) of a population parameter, but also a range of plausible
+values (an <strong>interval estimate</strong>) for it. Such an interval is known as a
+<strong>confidence interval</strong>. This section introduces the idea of confidence
+intervals, and shows how to construct them for a population probability.
+In later sections, the same principles will be used to calculate
+confidence intervals for other kinds of population parameters.</p>
+<p>Interval estimation is an often underused part of statistical inference,
+while significance testing is arguably overused or at least often
+misused. In most contexts it would be useful to report confidence
+intervals in addition to, or instead of, results of significance tests.
+This is not done often enough in research publications in the social
+sciences.</p>
+</div>
+<div id="s-probs-1sampleci-calc" class="section level3 hasAnchor">
+<h3><span class="header-section-number">5.6.2</span> Calculation of the interval<a href="c-probs.html#s-probs-1sampleci-calc" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>Our aim is again to draw inference on the difference
+<span class="math inline">\(\Delta=\pi-\pi_{0}\)</span> or, equivalently, the population probability <span class="math inline">\(\pi\)</span>.
+The point estimate of <span class="math inline">\(\Delta\)</span> is <span class="math inline">\(\hat{\Delta}=\hat{\pi}-\pi_{0}\)</span> where
+<span class="math inline">\(\hat{\pi}\)</span> is the sample proportion corresponding to <span class="math inline">\(\pi\)</span>. Suppose
+that the conditions on the sample size <span class="math inline">\(n\)</span> that were discussed in
+Section <a href="c-probs.html#ss-probs-test1sample-samplingd">5.5.3</a> are again satisfied.</p>
+<p>Consider now Figure @ref(fig:f-pval-prob}. One of
+the results illustrated by it is that if <span class="math inline">\(\pi_{0}\)</span> is the true value of
+of the population probability <span class="math inline">\(\pi\)</span>, so that <span class="math inline">\(\Delta=\pi-\pi_{0}=0\)</span>,
+there is a probability of 0.95 that for a randomly drawn sample from the
+population the <span class="math inline">\(z\)</span>-test statistic
+<span class="math inline">\(z=\hat{\Delta}/\hat{\sigma}_{\hat{\Delta}}\)</span> is between <span class="math inline">\(-1.96\)</span> and
+<span class="math inline">\(+1.96\)</span>. This also implies that the probability is 0.95 that in such a
+sample the observed value of <span class="math inline">\(\hat{\Delta}\)</span> will be between
+<span class="math inline">\(\Delta-1.96\,\hat{\sigma}_{\hat{\Delta}}\)</span> and
+<span class="math inline">\(\Delta+1.96\,\hat{\sigma}_{\hat{\Delta}}\)</span>. Furthermore, it is clear
+from the figure that all of the values within this interval are more
+likely to occur than any of the values outside the interval (i.e. those
+in the two tails of the sampling distribution). The interval thus seems
+like a sensible summary of the “most likely” values that the estimate
+<span class="math inline">\(\hat{\Delta}\)</span> may have in random samples.</p>
+<p>A confidence interval essentially turns this around, into a statement
+about the unknown true value of <span class="math inline">\(\Delta\)</span> in the population, even in
+cases where <span class="math inline">\(\Delta\)</span> is not 0. This is done by substituting
+<span class="math inline">\(\hat{\Delta}\)</span> for <span class="math inline">\(\Delta\)</span> above, to create the interval
+<span class="math display" id="eq:cim0">\[\begin{equation}
+\text{from  }\hat{\Delta} -1.96\times \hat{\sigma}_{\hat{\Delta}}\text{  to  }\hat{\Delta}+1.96\times \hat{\sigma}_{\hat{\Delta}}.
+\tag{5.12}
+\end{equation}\]</span>
+This is the <strong>95 % confidence interval</strong> for the
+population difference <span class="math inline">\(\Delta\)</span>. It is usually written more concisely as
+<span class="math display" id="eq:cim1">\[\begin{equation}
+\hat{\Delta}\pm 1.96\, \hat{\sigma}_{\hat{\Delta}}
+\tag{5.13}
+\end{equation}\]</span>
+where the “plusminus” symbol <span class="math inline">\(\pm\)</span> indicates that we
+calculate the two endpoints of the interval as in (<a href="c-probs.html#eq:cim0">(5.12)</a>), one
+below and one above <span class="math inline">\(\hat{\Delta}\)</span>.</p>
+<p>Expression (<a href="c-probs.html#eq:cim1">(5.13)</a>) is general in the sense that many different
+quantities can take the role of <span class="math inline">\(\Delta\)</span> in it. Here we consider for now
+the case of <span class="math inline">\(\Delta=\pi-\pi_{0}\)</span>. The estimated standard error
+<span class="math inline">\(\hat{\sigma}_{\hat{\Delta}}\)</span> is analogous to (<a href="c-probs.html#eq:seDhatp">(5.10)</a>) used for
+the <span class="math inline">\(z\)</span>-test, but not the same. This is because the confidence interval
+is not calculated under the null hypothesis <span class="math inline">\(H_{0}:\; \pi=\pi_{0}\)</span>, so
+we cannot use <span class="math inline">\(\pi_{0}\)</span> for <span class="math inline">\(\pi\)</span> in the standard error. Instead, <span class="math inline">\(\pi\)</span>
+is estimated by the sample proportion <span class="math inline">\(\hat{\pi}\)</span>, giving the estimated
+standard error[^
+]
+<span class="math display" id="eq:sephat2">\[\begin{equation}
+\hat{\sigma}_{\hat{\Delta}} = \sqrt{\frac{\hat{\pi}(1-\hat{\pi})}{n}}
+\tag{5.14}
+\end{equation}\]</span>
+and thus the 95% confidence interval
+<span class="math display">\[(\hat{\pi}-\pi_{0}) \pm 1.96 \;
+\sqrt{
+\frac{\hat{\pi}(1-\hat{\pi})}{n}}\]</span> for <span class="math inline">\(\Delta=\pi-\pi_{0}\)</span>.
+Alternatively, a confidence interval for <span class="math inline">\(\pi\)</span> itself is given by
+<span class="math display" id="eq:cip2">\[\begin{equation}
+\hat{\pi} \pm 1.96 \;\sqrt{\frac{\hat{\pi}(1-\hat{\pi})}{n}}.
+\tag{5.15}
+\end{equation}\]</span>
+This is typically the most useful interval for use in
+practice. For instance, in the referendum example 5.1 this gives a 95%
+confidence interval of
+<span class="math display">\[0.470\pm 1.96\times \sqrt{\frac{0.470\times(1-0.470)}{702}}
+=0.470\pm 0.0369=(0.433, 0.507)\]</span> for the proportion of definite
+Yes-voters in the population. Similarly, in Example 5.2 the 95%
+confidence interval for the probability of a black person being selected
+for the jury pool is (0.040, 0.052). These intervals are also shown in
+Table <a href="c-probs.html#tab:t-probex">5.1</a>.</p>
+</div>
+<div id="s-probs-1sampleci-int" class="section level3 hasAnchor">
+<h3><span class="header-section-number">5.6.3</span> Interpretation of confidence intervals<a href="c-probs.html#s-probs-1sampleci-int" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>As with the <span class="math inline">\(P\)</span>-value of a significance test, the precise interpretation
+of a confidence interval refers to probabilities calculated from a
+sampling distribution, i.e. probabilities evaluated from a hypothetical
+exercise of repeated sampling:</p>
+<ul>
+<li>If we obtained many samples from the population and calculated the
+confidence interval for each such sample using the formula
+(<a href="c-probs.html#eq:cip2">(5.15)</a>), approximately 95% of these intervals would contain the
+true value of the population proportion <span class="math inline">\(\pi\)</span>.</li>
+</ul>
+<p>This is undeniably convoluted, even more so than the precise
+interpretation of a <span class="math inline">\(P\)</span>-value. In practise a confidence interval would
+not usually be described in exactly these words. Instead, a research
+report might, for example, write that (in the referendum example) “the
+95 % confidence interval for the proportion of eligible voters in the
+population who definitely intend to vote Yes is (0.433, 0.507)”, or that
+“we are 95 % confident that the proportion of eligible voters in the
+population who definitely intend to vote Yes is between 43.3% and
+50.7%”. Such a statement in effect assumes that the readers will be
+familiar enough with the idea of confidence intervals to understand the
+claim. It is nevertheless useful to be aware of the more careful
+interpretation of a confidence interval, if only to avoid
+misunderstandings. The most common error is to claim that “there is a
+95% probability that the proportion in the population is between 0.433
+and 0.507”. Although the difference to the interpretation given above
+may seem small, the latter statement is not really true, or strictly
+speaking even meaningful, in the statistical framework considered here.</p>
+<p>In place of the 1.96 in (<a href="c-probs.html#eq:cim1">(5.13)</a>), we may also use other numbers. To
+allow for this in the notation, we can also write
+<span class="math display" id="eq:ci-D-gen">\[\begin{equation}
+\hat{\Delta} \pm z_{\alpha/2}\; \hat{\sigma}_{\hat{\Delta}}.
+\tag{5.16}
+\end{equation}\]</span>
+where the multiplier <span class="math inline">\(z_{\alpha/2}\)</span> is a number which
+depends on two things. One of them is the sampling distribution of
+<span class="math inline">\(\hat{\Delta}\)</span>, which is here assumed to be the normal distribution
+(another possibility is discussed in Section
+<a href="c-means.html#ss-means-inference-variants">7.3.4</a>). The second is the <strong>confidence
+level</strong> which we have chosen for the confidence interval. For example,
+the probability of 0.95 in the interpretation of a 95% confidence
+interval discussed above is the confidence level of that interval.
+Conventionally the 0.95 level is most commonly used, while other
+standard choices are 0.90 and 0.99, i.e. 90% and 99% confidence
+intervals.</p>
+<p>In the symbol <span class="math inline">\(z_{\alpha/2}\)</span>, <span class="math inline">\(\alpha\)</span> is a number such that <span class="math inline">\(1-\alpha\)</span>
+equals the required confidence level. In other words, <span class="math inline">\(\alpha=0.1\)</span>,
+0.05, and 0.01 for confidence levels of <span class="math inline">\(1-\alpha=0.90\)</span>, 0.95 and 0.99
+respectively. The values that are required for the conventional levels
+are <span class="math inline">\(z_{0.10/2}=z_{0.05}=1.64\)</span>, <span class="math inline">\(z_{0.05/2}=z_{0.025}=1.96\)</span>, and
+<span class="math inline">\(z_{0.01/2}=z_{0.005}=2.58\)</span>, which correspond to intervals at the
+confidence levels of 90%, 95% and 99% respectively. These values are
+also shown in Table <a href="c-probs.html#tab:t-ciq">5.3</a>.</p>
+<table style="width:88%;">
+<caption><span id="tab:t-ciq">Table 5.3: </span>Multipliers <span class="math inline">\(z_{\alpha/2}\)</span> used to obtain confidence intervals based
+on the normal distribution, for three standard confidence levels.
+These values are substituted for <span class="math inline">\(z_{\alpha/2}\)</span> in formula
+(<a href="c-probs.html#eq:ci-D-gen">(5.16)</a>) to obtain the confidence interval.</caption>
+<colgroup>
+<col width="38%" />
+<col width="29%" />
+<col width="9%" />
+<col width="9%" />
+</colgroup>
+<tbody>
+<tr class="odd">
+<td align="left"><br />
+</td>
+<td align="right">Confidence levels:</td>
+<td align="right"><br />
+</td>
+<td align="right"><br />
+</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="right">90%</td>
+<td align="right">95%</td>
+<td align="right">99%</td>
+</tr>
+<tr class="odd">
+<td align="left">Multiplier <span class="math inline">\(z_{\alpha/2}\)</span></td>
+<td align="right">1.64</td>
+<td align="right">1.96</td>
+<td align="right">2.58</td>
+</tr>
+</tbody>
+</table>
+<p>A confidence interval contains, loosely speaking, those numbers which
+are considered plausible values for the unknown population difference
+<span class="math inline">\(\Delta\)</span> in the light of the evidence in the data. The <em>width</em> of the
+interval thus reflects our uncertainty about the exact value of
+<span class="math inline">\(\Delta\)</span>, which in turn is related to the amount of information the data
+provide about <span class="math inline">\(\Delta\)</span>. If the interval is wide, many values are
+consistent with the observed data, so there is still a large amount of
+uncertainty; if the interval is narrow, we have much information about
+<span class="math inline">\(\Delta\)</span> and thus little uncertainty. Another way of stating this is
+that when the confidence interval is narrow, estimates of <span class="math inline">\(\Delta\)</span> are
+very <em>precise</em>.</p>
+<p>The width of the interval (<a href="c-probs.html#eq:ci-D-gen">(5.16)</a>) is
+<span class="math inline">\(2\times z_{\alpha/2}\times \hat{\sigma}_{\hat{\Delta}}\)</span>. This depends
+on</p>
+<ul>
+<li><p>The confidence level: the higher the level, the wider the interval.
+Thus a 99% confidence interval is always wider than a 95% interval
+for the same data, and wider still than a 90% interval. This is
+logically inevitable: if we want to state with high level of
+confidence that a parameter is within a certain interval, we must
+allow the interval to contain a wide range of values. It also
+explains why we do not consider a 100% confidence interval: this
+would contain all possible values of <span class="math inline">\(\Delta\)</span> and exclude none,
+making no use of the data at all. Instead, we aim for a high but not
+perfect level of confidence, obtaining an interval which contains
+some but not all possible values, for the price of a small chance of
+incorrect conclusions.</p></li>
+<li><p>The standard error <span class="math inline">\(\hat{\sigma}_{\hat{\Delta}}\)</span>, which in the case
+of a single proportion is (<a href="c-probs.html#eq:sephat2">(5.14)</a>). This in turn depends on</p>
+<ul>
+<li><p>the sample size <span class="math inline">\(n\)</span>: the larger this is, the narrower the
+interval. Increasing the sample size thus results (other things
+being equal) in reduced uncertainty and higher precision.</p></li>
+<li><p>the true population proportion <span class="math inline">\(\pi\)</span>: the closer this is to 0.5,
+the wider the interval. Unlike the sample size, this determinant
+of the estimation uncertainty is not in our control.</p></li>
+</ul></li>
+</ul>
+<p>Opinion polls of the kind illustrated by the referendum example are
+probably where non-academic audiences are most likely to encounter
+confidence intervals, although not under that label. Media reports of
+such polls typically include a <em>margin of error</em> for the results. For
+example, in the referendum example it might be reported that 47% of the
+respondents said that they would definitely vote Yes, and that “the
+study has a margin of error of plus or minus four percentage points”. In
+most cases the phrase “margin of error” refers to a 95% confidence
+interval. Unless otherwise mentioned, we can thus take a statement like
+the one above to mean that the 95% confidence interval for the
+proportion of interest is approximately <span class="math inline">\(47\pm 4\)</span> percentage points. For
+a realistic interpretation of the implications of the results, the width
+of this interval is at least as important as the point estimate of the
+proportion. This is often neglected in media reports of opinion polls,
+where the point estimate tends to be headline news, while the margin of
+error is typically mentioned only in passing or omitted altogether.</p>
+</div>
+<div id="ss-means-ci-vstests" class="section level3 hasAnchor">
+<h3><span class="header-section-number">5.6.4</span> Confidence intervals vs. significance tests<a href="c-probs.html#ss-means-ci-vstests" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>There are some obvious similarities between the conclusions from
+significance tests and confidence intervals. For example, a <span class="math inline">\(z\)</span>-test in
+the referendum example 5.1 showed that the null hypothesis that the
+population proportion <span class="math inline">\(\pi\)</span> was 0.5 was not rejected (<span class="math inline">\(P=0.112\)</span>). Thus
+0.5 is a plausible value for <span class="math inline">\(\pi\)</span> in light of the observed data. The
+95% confidence interval for <span class="math inline">\(\pi\)</span> showed that, at this level of
+confidence, plausible values for <span class="math inline">\(\pi\)</span> are those between 0.433 and
+0.507. In particular, these include 0.5, so the confidence interval also
+indicates that a proportion of 0.5 is plausible. This connection between
+the test and the confidence interval is in fact exact:</p>
+<ul>
+<li>If the hypothesis <span class="math inline">\(H_{0}: \Delta=0\)</span> about a population quantity
+<span class="math inline">\(\Delta\)</span> is rejected at the 5% level of significance using the
+<span class="math inline">\(z\)</span>-test against a <em>two-sided</em> alternative hypothesis, the 95 %
+confidence interval for <span class="math inline">\(\Delta\)</span> will not contain 0, and vice versa.
+Similarly, if <span class="math inline">\(H_{0}\)</span> is not rejected, the confidence interval will
+contain 0, and vice versa.</li>
+</ul>
+<p>The same is true for other matching pairs of levels of significance and
+confidence, e.g. for a test with a 1% level of significance and a 99%
+(i.e. (100-1)%) confidence interval. In short, the significance test and
+the confidence interval will in these cases always give the same answer
+about whether or not a parameter value is plausible (consistent with the
+data) at a given level of significance/confidence.</p>
+<p>These pairs of a test and an interval are exactly comparable in that
+they concern the same population parameter, estimate all parameters in
+the same way, use the same sampling distribution for inference, and use
+the same level of significance/confidence. Not all tests and confidence
+intervals have exact pairs in this way. Also, some tests are for
+hypotheses about more than one parameter at once, so there is no
+corresponding single confidence interval. Nevertheless, the connection
+stated above is useful for understanding the ideas of both tests and
+confidence intervals.</p>
+<p>These results also illustrate how confidence
+intervals are inherently more informative than significance tests. For
+instance, in the jury example 5.2, both the test and the confidence
+interval agree on the implausibility of the claim that the population
+probability of being selected to the jury panel is the same as the
+proportion (0.124) of black people in the population, since the claim
+that <span class="math inline">\(\pi=0.124\)</span> is rejected by the test (with <span class="math inline">\(P&lt;0.001\)</span>) and outside
+the interval <span class="math inline">\((0.040; 0.052)\)</span>. Unlike the test, however, the confidence
+interval summarizes the plausibility of <em>all</em> possible values of <span class="math inline">\(\pi\)</span>
+and not just <span class="math inline">\(\pi_{0}=0.124\)</span>. One way to describe this is to consider
+what would have happened if we had carried out a series of significance
+tests of null hypotheses of the form <span class="math inline">\(H_{0}: \pi=\pi_{0}\)</span> for a range of
+values of <span class="math inline">\(\pi_{0}\)</span>. The confidence interval contains all those values
+<span class="math inline">\(\pi_{0}\)</span> which would not have been rejected by the test, while all the
+values outside the interval would have been rejected. Here
+<span class="math inline">\(H_{0}: \pi=\pi_{0}\)</span> would thus not have been rejected at the 5% level
+if <span class="math inline">\(\pi_{0}\)</span> had been between 0.040 and 0.052, and rejected otherwise.
+This, of course, is not how significance tests are actually conducted,
+but it provides a useful additional interpretation of confidence
+intervals.</p>
+<p>A confidence interval is particularly useful when the parameter of
+interest is measured in familiar units, such as the proportions
+considered so far. We may then try to judge, in substantive terms, how
+wide the interval is and how far it is from particular values of
+interest. In the jury example the 95% confidence interval ranges from
+4.0% to 5.2%, which suggests that the population probability is
+estimated fairly precisely by this survey. The interval also reveals
+that even its upper bound is less than half of the figure of 12.4% which
+would correspond to proportional representation of black people in the jury
+pool, a result which suggests quite substantial underrepresentation in
+the pool.</p>
+</div>
+</div>
+<div id="s-probs-2samples" class="section level2 hasAnchor">
+<h2><span class="header-section-number">5.7</span> Inference for comparing two proportions<a href="c-probs.html#s-probs-2samples" class="anchor-section" aria-label="Anchor link to header"></a></h2>
+<p>In Examples 5.3 and 5.4, the aim is to compare the proportion of a
+dichotomous response variable <span class="math inline">\(Y\)</span> between two groups of a dichotomous
+explanatory variable <span class="math inline">\(X\)</span>:</p>
+<ul>
+<li><p>Example 5.3: compare the proportion of polio cases among the
+unvaccinated (<span class="math inline">\(\pi_{1}\)</span>) and vaccinated (<span class="math inline">\(\pi_{2}\)</span>) children.</p></li>
+<li><p>Example 5.4: compare the proportion of optimistic responses to a
+negative (<span class="math inline">\(\pi_{1}\)</span>) vs. positive wording of the question
+(<span class="math inline">\(\pi_{2}\)</span>).</p></li>
+</ul>
+<p>The quantity of interest is then the population difference
+<span class="math display" id="eq:Dp2sample">\[\begin{equation}
+\Delta=\pi_{2}-\pi_{1}.
+\tag{5.17}
+\end{equation}\]</span>
+For a significance test of this, the null hypothesis
+will again be <span class="math inline">\(H_{0}:\; \Delta=0\)</span>, which is in this case equivalent to
+the hypothesis of equal proportions
+<span class="math display" id="eq:H0pD">\[\begin{equation}
+H_{0}:\; \pi_{1} = \pi_{2}.
+\tag{5.18}
+\end{equation}\]</span>
+The null hypothesis thus claims that there is no
+association between the group variable <span class="math inline">\(X\)</span> and the dichotomous response
+variable <span class="math inline">\(Y\)</span>, while the alternative hypothesis (e.g.  the two-sided one
+<span class="math inline">\(H_{a}:\; \pi_{1}\ne \pi_{2}\)</span>, i.e. <span class="math inline">\(H_{a}:\; \Delta\ne 0\)</span>) implies that there is an association.</p>
+<p>The obvious estimates of <span class="math inline">\(\pi_{1}\)</span> and <span class="math inline">\(\pi_{2}\)</span> are the corresponding
+sample proportions <span class="math inline">\(\hat{\pi}_{1}\)</span> and <span class="math inline">\(\hat{\pi}_{2}\)</span>, calculated from
+samples of sizes <span class="math inline">\(n_{1}\)</span> and <span class="math inline">\(n_{2}\)</span> respectively, and the estimate of
+<span class="math inline">\(\Delta\)</span> is then
+<span class="math display" id="eq:Dhatpi">\[\begin{equation}
+\hat{\Delta}=\hat{\pi}_{2} - \hat{\pi}_{1}.
+\tag{5.19}
+\end{equation}\]</span>
+This gives <span class="math inline">\(\hat{\Delta}=0.000284-0.000706=-0.000422\)</span>
+in Example 5.3 and <span class="math inline">\(\hat{\Delta}=0.364-0.279=0.085\)</span> in Example 5.4. In
+the samples, the proportion of polio cases is thus lower in the
+vaccinated group, and the proportion of optimistic answers is higher in
+response to a positively worded question. Note also that although the
+inference discussed below focuses on the difference of the proportions,
+for purely descriptive purposes we might prefer to use some other
+statistic, such as the ratio of the proportions. For example, the
+difference of 0.000422 in polio incidence between vaccine and control
+groups may seem small, because the proportions in both groups are small.
+A better idea of the the magnitude of the contrast is given by their
+ratio of <span class="math inline">\(0.000706/0.000284=2.49\)</span> (this is known as the <em>risk ratio</em>).
+In other words, the rate of polio infection in the unvaccinated group
+was two and a half times the rate in the vaccinated group.</p>
+<p> The tests and confidence intervals discussed below are
+again based on the assumption that the relevant sampling distributions
+are approximately normal, which is true when the sample sizes <span class="math inline">\(n_{1}\)</span>
+and <span class="math inline">\(n_{2}\)</span> are large enough. The conditions for this are not very
+demanding: one rule of thumb states that the methods described in this
+section are reasonably valid if in both groups the number of
+observations with <span class="math inline">\(Y\)</span> having the value 1, and of ones with the value 0,
+are both more than 5. This condition is satisfied in both of the
+examples considered here.</p>
+<p>The validity of the test, as well as the amount of information the data
+provide about <span class="math inline">\(\pi_{1}\)</span> and <span class="math inline">\(\pi_{2}\)</span> in general, thus depends not just
+on the overall sample sizes but on having enough observations of both
+values of <span class="math inline">\(Y\)</span>. The critical quantity is then the number of observations
+in the rarer category of <span class="math inline">\(Y\)</span>. In Example 5.3 this means the numbers of
+children diagnosed with polio, because the probability of polio was low
+in the study population. The numbers of eventual polio cases were 142
+and 57 in the control and treatment groups respectively, so the rule of
+thumb stated above was satisfied. With such low probabilities of polio
+incidence, sufficient numbers of cases were achieved only by making the
+overall sample sizes <span class="math inline">\(n_{1}\)</span> and <span class="math inline">\(n_{2}\)</span> large enough. That is why the
+trial had to be very large, involving hundreds of thousands of
+participants.</p>
+<p>The standard error of <span class="math inline">\(\hat{\Delta}\)</span> is
+<span class="math display" id="eq:sigmaDpi">\[\begin{equation}
+\sigma_{\hat{\Delta}} =\sqrt{\frac{\pi_{2}(1-\pi_{2})}{n_{2}}+\frac{\pi_{1}(1-\pi_{1})}{n_{1}}}.
+\tag{5.20}
+\end{equation}\]</span>
+As in the one-sample case above, the best way to
+estimate this is different for a significance test than for a confidence
+interval. For a test, the standard error can be estimated under
+assumption that the null hypothesis (<a href="c-probs.html#eq:H0pD">(5.18)</a>) is true, in which case
+the population proportion is the same in both groups. A good estimate of
+this common proportion, denoted below by <span class="math inline">\(\hat{\pi}\)</span>, is the proportion
+of observations with value 1 for <span class="math inline">\(Y\)</span> in the total sample of
+<span class="math inline">\(n_{1}+n_{2}\)</span> observations, pooling observations from both groups
+together; expressed in terms of the group-specific estimates, this is
+<span class="math display" id="eq:phat2sample">\[\begin{equation}
+\hat{\pi} = \frac{n_{1}\hat{\pi}_{1}+n_{2}\hat{\pi}_{2}}{n_{1}+n_{2}}.
+\tag{5.21}
+\end{equation}\]</span>
+Using this for both <span class="math inline">\(\pi_{1}\)</span> and <span class="math inline">\(\pi_{2}\)</span> in
+(<a href="c-probs.html#eq:sigmaDpi">(5.20)</a>) gives the estimated standard error
+<span class="math display" id="eq:seDpi">\[\begin{equation}
+\hat{\sigma}_{\hat{\Delta}}=\sqrt{\hat{\pi}(1-\hat{\pi}) \; \left(\frac{1}{n_{2}}+\frac{1}{n_{1}}\right),}
+\tag{5.22}
+\end{equation}\]</span>
+and using (<a href="c-probs.html#eq:Dhatpi">(5.19)</a>) and (<a href="c-probs.html#eq:seDpi">(5.22)</a>) in the
+general formula (<a href="c-probs.html#eq:ttest-gen">(5.8)</a>) gives the <strong>two-sample <span class="math inline">\(z\)</span>-test
+statistic for proportions</strong>
+<span class="math display" id="eq:ztestDpi">\[\begin{equation}
+z=\frac{\hat{\pi}_{2}-\hat{\pi}_{1}}{\sqrt{\hat{\pi}(1-\hat{\pi})(1/n_{2}+1/n_{1})}}
+\tag{5.23}
+\end{equation}\]</span>
+where <span class="math inline">\(\hat{\pi}\)</span> is given by (<a href="c-probs.html#eq:phat2sample">(5.21)</a>).
+When the null hypothesis is true, the sampling distribution of this test
+statistic is approximately standard normal when the sample sizes are
+large enough.</p>
+<p>For a confidence interval, the calculation of the estimated standard
+error cannot assume that (<a href="c-probs.html#eq:H0pD">(5.18)</a>) is true. Instead, we use the
+estimate
+<span class="math display" id="eq:seDpi2">\[\begin{equation}
+\hat{\sigma}_{\hat{\Delta}} =\sqrt{\frac{\hat{\pi}_{2}(1-\hat{\pi}_{2})}{n_{2}}\frac{\hat{\pi}_{1}(1-\hat{\pi}_{1})}{n_{1}}}
+\tag{5.24}
+\end{equation}\]</span>
+and, substituting this to the general formula
+(<a href="c-probs.html#eq:ci-D-gen">(5.16)</a>), we get
+<span class="math display" id="eq:ciDpi">\[\begin{equation}
+(\hat{\pi}_{2}-\hat{\pi}_{1}) \pm z_{\alpha/2} \;\sqrt{\frac{\hat{\pi}_{2}(1-\hat{\pi}_{2})}{n_{2}} +\frac{\hat{\pi}_{1}(1-\hat{\pi}_{1})}{n_{1}}}
+\tag{5.25}
+\end{equation}\]</span>
+as the confidence interval for <span class="math inline">\(\Delta=\pi_{2}-\pi_{1}\)</span>,
+with confidence level <span class="math inline">\(1-\alpha\)</span>.</p>
+<p>For an illustration of the calculations, consider Example 5.4. Denoting
+the group of respondents answering the negatively worded question by 1
+and those with the positive question by 2, the basic quantities are
+<span class="math inline">\(n_{1}=921\)</span>, <span class="math inline">\(\hat{\pi}_{1}=0.279\)</span>, <span class="math inline">\(n_{2}=929\)</span> and
+<span class="math inline">\(\hat{\pi}_{2}=0.364\)</span>. The estimated difference in the proportions of
+respondents giving an optimistic answer is thus
+<span class="math display">\[\hat{\Delta} = \hat{\pi}_{2}-\hat{\pi}_{1} = 0.364-0.279 = 0.085.\]</span>
+For a significance test, the estimated standard error of <span class="math inline">\(\hat{\Delta}\)</span>
+uses the pooled estimate (<a href="c-probs.html#eq:phat2sample">(5.21)</a>) of the population
+proportion, which is given by
+<span class="math display">\[\hat{\pi} = \frac{921\times 0.279+929\times 0.364}{921+929}=
+\frac{257+338}{921+929} = 0.322.\]</span> The standard error from (<a href="c-probs.html#eq:seDpi">(5.22)</a>)
+is then <span class="math display">\[\hat{\sigma}_{\hat{\Delta}}
+=
+\sqrt{
+0.322\times(1-0.322) \times \left(
+\frac{1}{929}+
+\frac{1}{921}
+\right)
+}
+=
+\sqrt{
+\frac{0.2182}{462.5}
+}=0.0217,\]</span> and the test statistic (<a href="c-probs.html#eq:ztestDpi">(5.23)</a>) is
+<span class="math display">\[z=\frac{0.085}{0.0217}=3.92.\]</span> For the confidence interval, the
+standard error of <span class="math inline">\(\hat{\Delta}\)</span> is estimated from (<a href="c-probs.html#eq:seDpi2">(5.24)</a>) as
+<span class="math display">\[\begin{aligned}
+\hat{\sigma}_{\hat{\Delta}} &amp;=&amp;
+\sqrt{
+\frac{0.364\times (1-0.364)}{929} +
+\frac{0.279\times (1-0.279)}{921}
+} \\
+&amp;=&amp;
+\sqrt{
+\frac{0.2315}{929}
++\frac{0.2012}{921}
+}=0.0216\end{aligned}\]</span> and a 95% confidence interval from (<a href="c-probs.html#eq:ciDpi">(5.25)</a>)
+is <span class="math display">\[0.085 \pm 1.96 \times 0.0216 = 0.085\pm 0.042 = (0.043; 0.127).\]</span>
+The <span class="math inline">\(P\)</span>-value for the test statistic is clearly very low (in fact about
+<span class="math inline">\(0.00009\)</span>), so the null hypothesis of equal proportions is convincingly
+rejected. There is very strong evidence that the probability that a
+respondent will give an answer indicating optimism for the future is
+different for the two differently worded questions. The confidence
+interval indicates that we are 95% confident that the proportion of
+optimistic answers is between 4.3 and 12.7 percentage points higher when
+the question is worded positively than when it is worded negatively.
+This suggests quite a substantial acquiescence bias arising from
+changing just one word in the survey question, as described in the
+introduction to Example 5.4 at the beginning of this chapter.</p>
+<p>In Example 5.3, the estimated difference is <span class="math inline">\(\hat{\Delta}=-0.000422\)</span>
+(see Table <a href="c-probs.html#tab:t-probex">5.1</a>, i.e. 422 fewer
+polio cases per million children in the vaccinated group than in the
+unvaccinated group. Similar calculations as above show that the value of
+the test statistic is <span class="math inline">\(z=-6.01\)</span>, so the <span class="math inline">\(P\)</span>-value is again very small
+(in fact about 0.000000001) and the null hypothesis of equal
+probabilities is strongly rejected. There is thus overwhelming evidence
+that the proportion of polio cases was different among the vaccinated
+children than among the unvaccinated ones. The 95% confidence interval
+for the difference shows that we are 95% confident that this difference
+was a reduction of between 284 and 560 polio cases per million
+children.<a href="#fn23" class="footnote-ref" id="fnref23"><sup>23</sup></a> This was acknowledged as a convincing demonstration that
+the Salk vaccine worked (see Figure <a href="c-probs.html#fig:f-nytimes">5.2</a>), and it (and later
+other types of polio vaccination) was soon put to widespread use. The
+resulting dramatic decline in the incidence of polio is one of the great
+success stories of modern medicine. Compared to the 199 children with
+polio in 1954 among the less than half a million participants of the
+vaccine trial alone, in 2014 there were 414 confirmed cases of polio in
+the whole world (see
+<a href="http://www.polioeradication.org/Dataandmonitoring/Poliothisweek.aspx" class="uri">http://www.polioeradication.org/Dataandmonitoring/Poliothisweek.aspx</a>). There
+is hope that that number will reach 0 in a not-too-distant future, so
+that the once devastating disease will one day be entirely eradicated.</p>
+<div class="figure"><span style="display:block;" id="fig:f-nytimes"></span>
+<img src="salk_nytimes.png" alt="Public reaction to statistical inference." style="width:150mm" />
+<p class="caption">Figure 5.2: Public reaction to statistical inference.</p>
+</div>
+
+</div>
+</div>
+<div class="footnotes">
+<hr />
+<ol start="18">
+<li id="fn18"><p>This example is based on a newspaper report of a real poll for
+which the percentages were reported only as 47, 27, and 26 out of
+“about 700” respondents. The exact numbers used here for
+illustration have been made up to correspond to these real results.<a href="c-probs.html#fnref18" class="footnote-back">↩</a></p></li>
+<li id="fn19"><p>The Pennsylvania Supreme Court Committee on racial and gender bias
+in the justice system; the example used here is from the survey by
+J. F. Kairns published as part of the final report of the committee
+(March 2003).<a href="c-probs.html#fnref19" class="footnote-back">↩</a></p></li>
+<li id="fn20"><p>The data used here are from the official evaluation of the trials
+in Francis, T. et al. (1955). “An evaluation of the 1954
+poliomyelitits vaccine trials: summary report”. <em>American Journal of
+Public Health</em>, <strong>45</strong>, 1–50. For some background information about
+the trials, see Meldrum, M. (1998), “ ‘A calculated risk’: the Salk
+polio vaccine field trials of 1954”. <em>British Medical Journal</em>,
+<strong>317</strong>, 1233–1236.<a href="c-probs.html#fnref20" class="footnote-back">↩</a></p></li>
+<li id="fn21"><p>Javeline, D. (1999). “Response effects in polite cultures: A test
+of acquiescence in Kazakhstan”. <em>Public Opinion Quarterly</em>, <strong>63</strong>,
+1–28.<a href="c-probs.html#fnref21" class="footnote-back">↩</a></p></li>
+<li id="fn22"><p>In this context the letter does <em>not</em> refer to the mathematical
+constant <span class="math inline">\(\pi=3.14159\dots\)</span>, for which the same symbol is also used.<a href="c-probs.html#fnref22" class="footnote-back">↩</a></p></li>
+<li id="fn23"><p>Note that incidence was not zero even in the vaccinated group,
+because the Salk vaccine — like most vaccines — is not 100%
+effective. Despite this, it is possible for a broad enough
+vaccination programme to eliminate a disease completely, by
+depriving it the chance to spread and conferring so-called herd
+immunity for the whole population. Conversely, if vaccination rates
+drop too low, herd immunity is removed and the disease may reappear
+at a higher rate than implied by the reduction in vaccination alone.<a href="c-probs.html#fnref23" class="footnote-back">↩</a></p></li>
+</ol>
+</div>
+            </section>
+
+          </div>
+        </div>
+      </div>
+<a href="c-tables.html" class="navigation navigation-prev " aria-label="Previous page"><i class="fa fa-angle-left"></i></a>
+<a href="c-contd.html" class="navigation navigation-next " aria-label="Next page"><i class="fa fa-angle-right"></i></a>
+    </div>
+  </div>
+<script src="libs/gitbook-2.6.7/js/app.min.js"></script>
+<script src="libs/gitbook-2.6.7/js/clipboard.min.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-search.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-sharing.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-fontsettings.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-bookdown.js"></script>
+<script src="libs/gitbook-2.6.7/js/jquery.highlight.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-clipboard.js"></script>
+<script>
+gitbook.require(["gitbook"], function(gitbook) {
+gitbook.start({
+"sharing": {
+"github": false,
+"facebook": true,
+"twitter": true,
+"linkedin": false,
+"weibo": false,
+"instapaper": false,
+"vk": false,
+"whatsapp": false,
+"all": ["facebook", "twitter", "linkedin", "weibo", "instapaper"]
+},
+"fontsettings": {
+"theme": "white",
+"family": "sans",
+"size": 2
+},
+"edit": {
+"link": "https://github.com/LSE-Methodology/MY464/edit/master/05-MY464-probs.Rmd",
+"text": "Edit"
+},
+"history": {
+"link": null,
+"text": null
+},
+"view": {
+"link": null,
+"text": null
+},
+"download": ["Coursepack-MY464.pdf", "Coursepack-MY464.epub"],
+"search": {
+"engine": "fuse",
+"options": null
+},
+"toc": {
+"collapse": "section"
+}
+});
+});
+</script>
+
+<!-- dynamically load mathjax for compatibility with self-contained -->
+<script>
+  (function () {
+    var script = document.createElement("script");
+    script.type = "text/javascript";
+    var src = "true";
+    if (src === "" || src === "true") src = "https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.9/latest.js?config=TeX-MML-AM_CHTML";
+    if (location.protocol !== "file:")
+      if (/^https?:/.test(src))
+        src = src.replace(/^https?:/, '');
+    script.src = src;
+    document.getElementsByTagName("head")[0].appendChild(script);
+  })();
+</script>
+</body>
+
+</html>
diff --git a/c-regression.html b/c-regression.html
new file mode 100644
index 0000000..d7fc2fd
--- /dev/null
+++ b/c-regression.html
@@ -0,0 +1,3155 @@
+<!DOCTYPE html>
+<html lang="" xml:lang="">
+<head>
+
+  <meta charset="utf-8" />
+  <meta http-equiv="X-UA-Compatible" content="IE=edge" />
+  <title>Chapter 8 Linear regression models | MY464 Introduction to Quantitative Analysis for Media and Communications</title>
+  <meta name="description" content="This course is intended for those with little or no past training in quantitative methods. The course is an intensive introduction to some of the principles and methods of statistical analysis in social research. Topics covered in MY464 include descriptive statistics, basic ideas of inference and estimation, contingency tables and an introduction to linear regression models." />
+  <meta name="generator" content="bookdown 0.39 and GitBook 2.6.7" />
+
+  <meta property="og:title" content="Chapter 8 Linear regression models | MY464 Introduction to Quantitative Analysis for Media and Communications" />
+  <meta property="og:type" content="book" />
+  
+  <meta property="og:description" content="This course is intended for those with little or no past training in quantitative methods. The course is an intensive introduction to some of the principles and methods of statistical analysis in social research. Topics covered in MY464 include descriptive statistics, basic ideas of inference and estimation, contingency tables and an introduction to linear regression models." />
+  <meta name="github-repo" content="LSE-Methodology/MY464" />
+
+  <meta name="twitter:card" content="summary" />
+  <meta name="twitter:title" content="Chapter 8 Linear regression models | MY464 Introduction to Quantitative Analysis for Media and Communications" />
+  
+  <meta name="twitter:description" content="This course is intended for those with little or no past training in quantitative methods. The course is an intensive introduction to some of the principles and methods of statistical analysis in social research. Topics covered in MY464 include descriptive statistics, basic ideas of inference and estimation, contingency tables and an introduction to linear regression models." />
+  
+
+<meta name="author" content="Department of Methodology, London School of Economics and Political Science" />
+
+
+
+  <meta name="viewport" content="width=device-width, initial-scale=1" />
+  <meta name="apple-mobile-web-app-capable" content="yes" />
+  <meta name="apple-mobile-web-app-status-bar-style" content="black" />
+  
+  
+<link rel="prev" href="c-means.html"/>
+<link rel="next" href="c-3waytables.html"/>
+<script src="libs/jquery-3.6.0/jquery-3.6.0.min.js"></script>
+<script src="https://cdn.jsdelivr.net/npm/fuse.js@6.4.6/dist/fuse.min.js"></script>
+<link href="libs/gitbook-2.6.7/css/style.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-table.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-bookdown.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-highlight.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-search.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-fontsettings.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-clipboard.css" rel="stylesheet" />
+
+
+
+
+
+
+
+
+<link href="libs/anchor-sections-1.1.0/anchor-sections.css" rel="stylesheet" />
+<link href="libs/anchor-sections-1.1.0/anchor-sections-hash.css" rel="stylesheet" />
+<script src="libs/anchor-sections-1.1.0/anchor-sections.js"></script>
+
+
+
+<style type="text/css">
+  
+  div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
+</style>
+
+<link rel="stylesheet" href="style.css" type="text/css" />
+</head>
+
+<body>
+
+
+
+  <div class="book without-animation with-summary font-size-2 font-family-1" data-basepath=".">
+
+    <div class="book-summary">
+      <nav role="navigation">
+
+<ul class="summary">
+<li><a href="./">MY464 Introduction to Quantitative Analysis for Media and Communications</a></li>
+
+<li class="divider"></li>
+<li class="chapter" data-level="" data-path="index.html"><a href="index.html"><i class="fa fa-check"></i>Course information<a href="index.html#course-information" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1" data-path="c-intro.html"><a href="c-intro.html"><i class="fa fa-check"></i><b>1</b> Introduction<a href="c-intro.html#c-intro" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="1.1" data-path="c-intro.html"><a href="c-intro.html#s-intro-purpose"><i class="fa fa-check"></i><b>1.1</b> What is the purpose of this course?<a href="c-intro.html#s-intro-purpose" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.2" data-path="c-intro.html"><a href="c-intro.html#s-intro-definitions"><i class="fa fa-check"></i><b>1.2</b> Some basic definitions<a href="c-intro.html#s-intro-definitions" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="1.2.1" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-subj"><i class="fa fa-check"></i><b>1.2.1</b> Subjects and variables<a href="c-intro.html#ss-intro-def-subj" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.2.2" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-vartypes"><i class="fa fa-check"></i><b>1.2.2</b> Types of variables<a href="c-intro.html#ss-intro-def-vartypes" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.2.3" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-descr"><i class="fa fa-check"></i><b>1.2.3</b> Description and inference<a href="c-intro.html#ss-intro-def-descr" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.2.4" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-assoc"><i class="fa fa-check"></i><b>1.2.4</b> Association and causation<a href="c-intro.html#ss-intro-def-assoc" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="1.3" data-path="c-intro.html"><a href="c-intro.html#s-intro-outline"><i class="fa fa-check"></i><b>1.3</b> Outline of the course<a href="c-intro.html#s-intro-outline" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.4" data-path="c-intro.html"><a href="c-intro.html#s-intro-maths"><i class="fa fa-check"></i><b>1.4</b> The use of mathematics and computing<a href="c-intro.html#s-intro-maths" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="1.4.1" data-path="c-intro.html"><a href="c-intro.html#symbolic-mathematics-and-mathematical-notation"><i class="fa fa-check"></i><b>1.4.1</b> Symbolic mathematics and mathematical notation<a href="c-intro.html#symbolic-mathematics-and-mathematical-notation" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.4.2" data-path="c-intro.html"><a href="c-intro.html#computing-1"><i class="fa fa-check"></i><b>1.4.2</b> Computing<a href="c-intro.html#computing-1" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="2" data-path="c-descr1.html"><a href="c-descr1.html"><i class="fa fa-check"></i><b>2</b> Descriptive statistics<a href="c-descr1.html#c-descr1" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.1" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-intro"><i class="fa fa-check"></i><b>2.1</b> Introduction<a href="c-descr1.html#s-descr1-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.2" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-examples"><i class="fa fa-check"></i><b>2.2</b> Example data sets<a href="c-descr1.html#s-descr1-examples" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.3" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-1cat"><i class="fa fa-check"></i><b>2.3</b> Single categorical variable<a href="c-descr1.html#s-descr1-1cat" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.3.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-distr"><i class="fa fa-check"></i><b>2.3.1</b> Describing the sample distribution<a href="c-descr1.html#ss-descr1-1cat-distr" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.3.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-tables"><i class="fa fa-check"></i><b>2.3.2</b> Tabular methods: Tables of frequencies<a href="c-descr1.html#ss-descr1-1cat-tables" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.3.3" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-charts"><i class="fa fa-check"></i><b>2.3.3</b> Graphical methods: Bar charts<a href="c-descr1.html#ss-descr1-1cat-charts" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.3.4" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-descriptives"><i class="fa fa-check"></i><b>2.3.4</b> Simple descriptive statistics<a href="c-descr1.html#ss-descr1-1cat-descriptives" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="2.4" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-2cat"><i class="fa fa-check"></i><b>2.4</b> Two categorical variables<a href="c-descr1.html#s-descr1-2cat" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.4.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-tables"><i class="fa fa-check"></i><b>2.4.1</b> Two-way contingency tables<a href="c-descr1.html#ss-descr1-2cat-tables" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.4.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-cond"><i class="fa fa-check"></i><b>2.4.2</b> Conditional proportions<a href="c-descr1.html#ss-descr1-2cat-cond" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.4.3" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-assoc"><i class="fa fa-check"></i><b>2.4.3</b> Conditional distributions and associations<a href="c-descr1.html#ss-descr1-2cat-assoc" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.4.4" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-descr"><i class="fa fa-check"></i><b>2.4.4</b> Describing an association using conditional proportions<a href="c-descr1.html#ss-descr1-2cat-descr" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.4.5" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-gamma"><i class="fa fa-check"></i><b>2.4.5</b> A measure of association for ordinal variables<a href="c-descr1.html#ss-descr1-2cat-gamma" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="2.5" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-1cont"><i class="fa fa-check"></i><b>2.5</b> Sample distributions of a single continuous variable<a href="c-descr1.html#s-descr1-1cont" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.5.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cont-tab"><i class="fa fa-check"></i><b>2.5.1</b> Tabular methods<a href="c-descr1.html#ss-descr1-1cont-tab" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.5.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cont-graphs"><i class="fa fa-check"></i><b>2.5.2</b> Graphical methods<a href="c-descr1.html#ss-descr1-1cont-graphs" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="2.6" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-nums"><i class="fa fa-check"></i><b>2.6</b> Numerical descriptive statistics<a href="c-descr1.html#s-descr1-nums" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.6.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-nums-central"><i class="fa fa-check"></i><b>2.6.1</b> Measures of central tendency<a href="c-descr1.html#ss-descr1-nums-central" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.6.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-nums-variation"><i class="fa fa-check"></i><b>2.6.2</b> Measures of variation<a href="c-descr1.html#ss-descr1-nums-variation" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="2.7" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-2cont"><i class="fa fa-check"></i><b>2.7</b> Associations which involve continuous variables<a href="c-descr1.html#s-descr1-2cont" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.8" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-presentation"><i class="fa fa-check"></i><b>2.8</b> Presentation of tables and graphs<a href="c-descr1.html#s-descr1-presentation" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.9" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-app"><i class="fa fa-check"></i><b>2.9</b> Appendix: Country data<a href="c-descr1.html#s-descr1-app" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="3" data-path="c-samples.html"><a href="c-samples.html"><i class="fa fa-check"></i><b>3</b> Samples and populations<a href="c-samples.html#c-samples" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="3.1" data-path="c-samples.html"><a href="c-samples.html#s-samples-intro"><i class="fa fa-check"></i><b>3.1</b> Introduction<a href="c-samples.html#s-samples-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.2" data-path="c-samples.html"><a href="c-samples.html#s-samples-finpops"><i class="fa fa-check"></i><b>3.2</b> Finite populations<a href="c-samples.html#s-samples-finpops" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.3" data-path="c-samples.html"><a href="c-samples.html#s-samples-samples"><i class="fa fa-check"></i><b>3.3</b> Samples from finite populations<a href="c-samples.html#s-samples-samples" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.4" data-path="c-samples.html"><a href="c-samples.html#s-samples-infpops"><i class="fa fa-check"></i><b>3.4</b> Conceptual and infinite populations<a href="c-samples.html#s-samples-infpops" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.5" data-path="c-samples.html"><a href="c-samples.html#s-samples-popdistrs"><i class="fa fa-check"></i><b>3.5</b> Population distributions<a href="c-samples.html#s-samples-popdistrs" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.6" data-path="c-samples.html"><a href="c-samples.html#s-samples-inference"><i class="fa fa-check"></i><b>3.6</b> Need for statistical inference<a href="c-samples.html#s-samples-inference" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="4" data-path="c-tables.html"><a href="c-tables.html"><i class="fa fa-check"></i><b>4</b> Statistical inference for two-way tables<a href="c-tables.html#c-tables" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="4.1" data-path="c-tables.html"><a href="c-tables.html#s-tables-intro"><i class="fa fa-check"></i><b>4.1</b> Introduction<a href="c-tables.html#s-tables-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.2" data-path="c-tables.html"><a href="c-tables.html#s-tables-tests"><i class="fa fa-check"></i><b>4.2</b> Significance tests<a href="c-tables.html#s-tables-tests" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3" data-path="c-tables.html"><a href="c-tables.html#s-tables-chi2test"><i class="fa fa-check"></i><b>4.3</b> The chi-square test of independence<a href="c-tables.html#s-tables-chi2test" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="4.3.1" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-null"><i class="fa fa-check"></i><b>4.3.1</b> Hypotheses<a href="c-tables.html#ss-tables-chi2test-null" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.2" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-ass"><i class="fa fa-check"></i><b>4.3.2</b> Assumptions of a significance test<a href="c-tables.html#ss-tables-chi2test-ass" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.3" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-stat"><i class="fa fa-check"></i><b>4.3.3</b> The test statistic<a href="c-tables.html#ss-tables-chi2test-stat" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.4" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-sdist"><i class="fa fa-check"></i><b>4.3.4</b> The sampling distribution of the test statistic<a href="c-tables.html#ss-tables-chi2test-sdist" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.5" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-Pval"><i class="fa fa-check"></i><b>4.3.5</b> The P-value<a href="c-tables.html#ss-tables-chi2test-Pval" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.6" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-conclusions"><i class="fa fa-check"></i><b>4.3.6</b> Drawing conclusions from a test<a href="c-tables.html#ss-tables-chi2test-conclusions" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="4.4" data-path="c-tables.html"><a href="c-tables.html#s-tables-summary"><i class="fa fa-check"></i><b>4.4</b> Summary of the chi-square test of independence<a href="c-tables.html#s-tables-summary" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="5" data-path="c-probs.html"><a href="c-probs.html"><i class="fa fa-check"></i><b>5</b> Inference for population proportions<a href="c-probs.html#c-probs" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="5.1" data-path="c-probs.html"><a href="c-probs.html#s-probs-intro"><i class="fa fa-check"></i><b>5.1</b> Introduction<a href="c-probs.html#s-probs-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.2" data-path="c-probs.html"><a href="c-probs.html#s-probs-examples"><i class="fa fa-check"></i><b>5.2</b> Examples<a href="c-probs.html#s-probs-examples" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.3" data-path="c-probs.html"><a href="c-probs.html#s-probs-distribution"><i class="fa fa-check"></i><b>5.3</b> Probability distribution of a dichotomous variable<a href="c-probs.html#s-probs-distribution" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.4" data-path="c-probs.html"><a href="c-probs.html#s-probs-pointest"><i class="fa fa-check"></i><b>5.4</b> Point estimation of a population probability<a href="c-probs.html#s-probs-pointest" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5" data-path="c-probs.html"><a href="c-probs.html#s-probs-test1sample"><i class="fa fa-check"></i><b>5.5</b> Significance test of a single proportion<a href="c-probs.html#s-probs-test1sample" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="5.5.1" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-hypotheses"><i class="fa fa-check"></i><b>5.5.1</b> Null and alternative hypotheses<a href="c-probs.html#ss-probs-test1sample-hypotheses" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5.2" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-teststatistic"><i class="fa fa-check"></i><b>5.5.2</b> The test statistic<a href="c-probs.html#ss-probs-test1sample-teststatistic" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5.3" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-samplingd"><i class="fa fa-check"></i><b>5.5.3</b> The sampling distribution of the test statistic and P-values<a href="c-probs.html#ss-probs-test1sample-samplingd" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5.4" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-conclusions"><i class="fa fa-check"></i><b>5.5.4</b> Conclusions from the test<a href="c-probs.html#ss-probs-test1sample-conclusions" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5.5" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-summary"><i class="fa fa-check"></i><b>5.5.5</b> Summary of the test<a href="c-probs.html#ss-probs-test1sample-summary" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="5.6" data-path="c-probs.html"><a href="c-probs.html#s-probs-1sampleci"><i class="fa fa-check"></i><b>5.6</b> Confidence interval for a single proportion<a href="c-probs.html#s-probs-1sampleci" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="5.6.1" data-path="c-probs.html"><a href="c-probs.html#s-probs-1sampleci-intro"><i class="fa fa-check"></i><b>5.6.1</b> Introduction<a href="c-probs.html#s-probs-1sampleci-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.6.2" data-path="c-probs.html"><a href="c-probs.html#s-probs-1sampleci-calc"><i class="fa fa-check"></i><b>5.6.2</b> Calculation of the interval<a href="c-probs.html#s-probs-1sampleci-calc" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.6.3" data-path="c-probs.html"><a href="c-probs.html#s-probs-1sampleci-int"><i class="fa fa-check"></i><b>5.6.3</b> Interpretation of confidence intervals<a href="c-probs.html#s-probs-1sampleci-int" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.6.4" data-path="c-probs.html"><a href="c-probs.html#ss-means-ci-vstests"><i class="fa fa-check"></i><b>5.6.4</b> Confidence intervals vs. significance tests<a href="c-probs.html#ss-means-ci-vstests" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="5.7" data-path="c-probs.html"><a href="c-probs.html#s-probs-2samples"><i class="fa fa-check"></i><b>5.7</b> Inference for comparing two proportions<a href="c-probs.html#s-probs-2samples" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="6" data-path="c-contd.html"><a href="c-contd.html"><i class="fa fa-check"></i><b>6</b> Continuous variables: Population and sampling distributions<a href="c-contd.html#c-contd" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="6.1" data-path="c-contd.html"><a href="c-contd.html#s-contd-intro"><i class="fa fa-check"></i><b>6.1</b> Introduction<a href="c-contd.html#s-contd-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="6.2" data-path="c-contd.html"><a href="c-contd.html#s-contd-popdistrs"><i class="fa fa-check"></i><b>6.2</b> Population distributions of continuous variables<a href="c-contd.html#s-contd-popdistrs" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="6.2.1" data-path="c-contd.html"><a href="c-contd.html#ss-contd-popdistrs-params"><i class="fa fa-check"></i><b>6.2.1</b> Population parameters and their point estimates<a href="c-contd.html#ss-contd-popdistrs-params" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="6.3" data-path="c-contd.html"><a href="c-contd.html#s-contd-probdistrs"><i class="fa fa-check"></i><b>6.3</b> Probability distributions of continuous variables<a href="c-contd.html#s-contd-probdistrs" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="6.3.1" data-path="c-contd.html"><a href="c-contd.html#ss-contd-probdistrs-general"><i class="fa fa-check"></i><b>6.3.1</b> General comments<a href="c-contd.html#ss-contd-probdistrs-general" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="6.3.2" data-path="c-contd.html"><a href="c-contd.html#ss-contd-probdistrs-normal"><i class="fa fa-check"></i><b>6.3.2</b> The normal distribution as a population distribution<a href="c-contd.html#ss-contd-probdistrs-normal" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="6.4" data-path="c-contd.html"><a href="c-contd.html#s-contd-clt"><i class="fa fa-check"></i><b>6.4</b> The normal distribution as a sampling distribution<a href="c-contd.html#s-contd-clt" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="7" data-path="c-means.html"><a href="c-means.html"><i class="fa fa-check"></i><b>7</b> Analysis of population means<a href="c-means.html#c-means" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="7.1" data-path="c-means.html"><a href="c-means.html#s-means-intro"><i class="fa fa-check"></i><b>7.1</b> Introduction and examples<a href="c-means.html#s-means-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.2" data-path="c-means.html"><a href="c-means.html#s-means-descr"><i class="fa fa-check"></i><b>7.2</b> Descriptive statistics for comparisons of groups<a href="c-means.html#s-means-descr" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="7.2.1" data-path="c-means.html"><a href="c-means.html#ss-means-descr-graphs"><i class="fa fa-check"></i><b>7.2.1</b> Graphical methods of comparing sample distributions<a href="c-means.html#ss-means-descr-graphs" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.2.2" data-path="c-means.html"><a href="c-means.html#ss-means-descr-tables"><i class="fa fa-check"></i><b>7.2.2</b> Comparing summary statistics<a href="c-means.html#ss-means-descr-tables" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="7.3" data-path="c-means.html"><a href="c-means.html#s-means-inference"><i class="fa fa-check"></i><b>7.3</b> Inference for two means from independent samples<a href="c-means.html#s-means-inference" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="7.3.1" data-path="c-means.html"><a href="c-means.html#ss-means-inference-intro"><i class="fa fa-check"></i><b>7.3.1</b> Aims of the analysis<a href="c-means.html#ss-means-inference-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.3.2" data-path="c-means.html"><a href="c-means.html#ss-means-inference-test"><i class="fa fa-check"></i><b>7.3.2</b> Significance testing: The two-sample t-test<a href="c-means.html#ss-means-inference-test" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.3.3" data-path="c-means.html"><a href="c-means.html#ss-means-inference-ci"><i class="fa fa-check"></i><b>7.3.3</b> Confidence intervals for a difference of two means<a href="c-means.html#ss-means-inference-ci" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.3.4" data-path="c-means.html"><a href="c-means.html#ss-means-inference-variants"><i class="fa fa-check"></i><b>7.3.4</b> Variants of the test and confidence interval<a href="c-means.html#ss-means-inference-variants" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="7.4" data-path="c-means.html"><a href="c-means.html#s-means-1sample"><i class="fa fa-check"></i><b>7.4</b> Tests and confidence intervals for a single mean<a href="c-means.html#s-means-1sample" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.5" data-path="c-means.html"><a href="c-means.html#s-means-dependent"><i class="fa fa-check"></i><b>7.5</b> Inference for dependent samples<a href="c-means.html#s-means-dependent" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.6" data-path="c-means.html"><a href="c-means.html#s-means-tests3"><i class="fa fa-check"></i><b>7.6</b> Further comments on significance tests<a href="c-means.html#s-means-tests3" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="7.6.1" data-path="c-means.html"><a href="c-means.html#ss-means-tests3-errors"><i class="fa fa-check"></i><b>7.6.1</b> Different types of error<a href="c-means.html#ss-means-tests3-errors" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.6.2" data-path="c-means.html"><a href="c-means.html#ss-means-tests3-power"><i class="fa fa-check"></i><b>7.6.2</b> Power of significance tests<a href="c-means.html#ss-means-tests3-power" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.6.3" data-path="c-means.html"><a href="c-means.html#ss-means-tests3-importance"><i class="fa fa-check"></i><b>7.6.3</b> Significance vs. importance<a href="c-means.html#ss-means-tests3-importance" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="8" data-path="c-regression.html"><a href="c-regression.html"><i class="fa fa-check"></i><b>8</b> Linear regression models<a href="c-regression.html#c-regression" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.1" data-path="c-regression.html"><a href="c-regression.html#s-regression-intro"><i class="fa fa-check"></i><b>8.1</b> Introduction<a href="c-regression.html#s-regression-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.2" data-path="c-regression.html"><a href="c-regression.html#s-regression-descr"><i class="fa fa-check"></i><b>8.2</b> Describing association between two continuous variables<a href="c-regression.html#s-regression-descr" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.2.1" data-path="c-regression.html"><a href="c-regression.html#ss-regression-descr-intro"><i class="fa fa-check"></i><b>8.2.1</b> Introduction<a href="c-regression.html#ss-regression-descr-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.2.2" data-path="c-regression.html"><a href="c-regression.html#ss-regression-descr-plots"><i class="fa fa-check"></i><b>8.2.2</b> Graphical methods<a href="c-regression.html#ss-regression-descr-plots" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.2.3" data-path="c-regression.html"><a href="c-regression.html#ss-regression-descr-assoc"><i class="fa fa-check"></i><b>8.2.3</b> Linear associations<a href="c-regression.html#ss-regression-descr-assoc" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.2.4" data-path="c-regression.html"><a href="c-regression.html#ss-regression-descr-corr"><i class="fa fa-check"></i><b>8.2.4</b> Measures of association: covariance and correlation<a href="c-regression.html#ss-regression-descr-corr" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="8.3" data-path="c-regression.html"><a href="c-regression.html#s-regression-simple"><i class="fa fa-check"></i><b>8.3</b> Simple linear regression models<a href="c-regression.html#s-regression-simple" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.3.1" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-intro"><i class="fa fa-check"></i><b>8.3.1</b> Introduction<a href="c-regression.html#ss-regression-simple-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.3.2" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-def"><i class="fa fa-check"></i><b>8.3.2</b> Definition of the model<a href="c-regression.html#ss-regression-simple-def" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.3.3" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-int"><i class="fa fa-check"></i><b>8.3.3</b> Interpretation of the model parameters<a href="c-regression.html#ss-regression-simple-int" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.3.4" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-est"><i class="fa fa-check"></i><b>8.3.4</b> Estimation of the parameters<a href="c-regression.html#ss-regression-simple-est" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.3.5" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-inf"><i class="fa fa-check"></i><b>8.3.5</b> Statistical inference for the regression coefficients<a href="c-regression.html#ss-regression-simple-inf" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="8.4" data-path="c-regression.html"><a href="c-regression.html#s-regression-causality"><i class="fa fa-check"></i><b>8.4</b> Interlude: Association and causality<a href="c-regression.html#s-regression-causality" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.5" data-path="c-regression.html"><a href="c-regression.html#s-regression-multiple"><i class="fa fa-check"></i><b>8.5</b> Multiple linear regression models<a href="c-regression.html#s-regression-multiple" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.5.1" data-path="c-regression.html"><a href="c-regression.html#ss-regression-multiple-intro"><i class="fa fa-check"></i><b>8.5.1</b> Introduction<a href="c-regression.html#ss-regression-multiple-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.5.2" data-path="c-regression.html"><a href="c-regression.html#ss-regression-multiple-def"><i class="fa fa-check"></i><b>8.5.2</b> Definition of the model<a href="c-regression.html#ss-regression-multiple-def" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.5.3" data-path="c-regression.html"><a href="c-regression.html#ss-regression-multiple-unchanged"><i class="fa fa-check"></i><b>8.5.3</b> Unchanged elements from simple linear models<a href="c-regression.html#ss-regression-multiple-unchanged" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.5.4" data-path="c-regression.html"><a href="c-regression.html#ss-regression-multiple-beta"><i class="fa fa-check"></i><b>8.5.4</b> Interpretation and inference for the regression coefficients<a href="c-regression.html#ss-regression-multiple-beta" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="8.6" data-path="c-regression.html"><a href="c-regression.html#s-regression-dummies"><i class="fa fa-check"></i><b>8.6</b> Including categorical explanatory variables<a href="c-regression.html#s-regression-dummies" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.6.1" data-path="c-regression.html"><a href="c-regression.html#ss-regression-dummies-def"><i class="fa fa-check"></i><b>8.6.1</b> Dummy variables<a href="c-regression.html#ss-regression-dummies-def" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.6.2" data-path="c-regression.html"><a href="c-regression.html#ss-regression-dummies-example"><i class="fa fa-check"></i><b>8.6.2</b> A second example<a href="c-regression.html#ss-regression-dummies-example" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="8.7" data-path="c-regression.html"><a href="c-regression.html#s-regression-rest"><i class="fa fa-check"></i><b>8.7</b> Other issues in linear regression modelling<a href="c-regression.html#s-regression-rest" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="9" data-path="c-3waytables.html"><a href="c-3waytables.html"><i class="fa fa-check"></i><b>9</b> Analysis of 3-way contingency tables<a href="c-3waytables.html#c-3waytables" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="10" data-path="c-more.html"><a href="c-more.html"><i class="fa fa-check"></i><b>10</b> More statistics…<a href="c-more.html#c-more" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="" data-path="c-more.html"><a href="c-more.html#statistical-tables"><i class="fa fa-check"></i>Statistical tables<a href="c-more.html#statistical-tables" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="" data-path="c-more.html"><a href="c-more.html#table-of-standard-normal-tail-probabilities"><i class="fa fa-check"></i>Table of standard normal tail probabilities<a href="c-more.html#table-of-standard-normal-tail-probabilities" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="" data-path="c-more.html"><a href="c-more.html#table-of-critical-values-for-t-distributions"><i class="fa fa-check"></i>Table of critical values for t-distributions<a href="c-more.html#table-of-critical-values-for-t-distributions" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="" data-path="c-more.html"><a href="c-more.html#table-of-critical-values-for-chi-square-distributions"><i class="fa fa-check"></i>Table of critical values for chi-square distributions<a href="c-more.html#table-of-critical-values-for-chi-square-distributions" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+</ul></li>
+<li class="divider"></li>
+<li><a href="https://github.com/rstudio/bookdown">Published with bookdown</a></li>
+
+</ul>
+
+      </nav>
+    </div>
+
+    <div class="book-body">
+      <div class="body-inner">
+        <div class="book-header" role="navigation">
+          <h1>
+            <i class="fa fa-circle-o-notch fa-spin"></i><a href="./">MY464 Introduction to Quantitative Analysis for Media and Communications</a>
+          </h1>
+        </div>
+
+        <div class="page-wrapper" tabindex="-1" role="main">
+          <div class="page-inner">
+
+            <section class="normal" id="section-">
+<div id="c-regression" class="section level1 hasAnchor">
+<h1><span class="header-section-number">Chapter 8</span> Linear regression models<a href="c-regression.html#c-regression" class="anchor-section" aria-label="Anchor link to header"></a></h1>
+<div id="s-regression-intro" class="section level2 hasAnchor">
+<h2><span class="header-section-number">8.1</span> Introduction<a href="c-regression.html#s-regression-intro" class="anchor-section" aria-label="Anchor link to header"></a></h2>
+<p>This chapter continues the theme of analysing statistical associations
+between variables. The methods described here are appropriate when the
+response variable <span class="math inline">\(Y\)</span> is a continuous, interval level variable. We will
+begin by considering bivariate situations where the only explanatory
+variable <span class="math inline">\(X\)</span> is also a continuous variable. Section
+<a href="c-regression.html#s-regression-descr">8.2</a> first discusses graphical and numerical
+descriptive techniques for this case, focusing on two very commonly used
+tools: a <em>scatterplot</em> of two variables, and a measure of association
+known as the <em>correlation</em> coefficient. Section
+<a href="c-regression.html#s-regression-simple">8.3</a> then describes methods of statistical
+inference for associations between two continuous variables. This is
+done in the context of a statistical model known as the <em>simple linear
+regression model</em>.</p>
+<p>The ideas of simple linear regression modelling can be extended to a
+much more general and powerful set methods known as <em>multiple linear
+regression models</em>. These can have several explanatory variables, which
+makes it possible to examine associations between any explanatory
+variable and the response variable, while controlling for other
+explanatory variables. An important reason for the usefulness of these
+models is that they play a key role in statistical analyses which
+correspond to research questions that are causal in nature. As an
+interlude, we discuss issues of causality in research design and
+analysis briefly in Section <a href="c-regression.html#s-regression-causality">8.4</a>. Multiple
+linear models are then introduced in Section
+<a href="c-regression.html#s-regression-multiple">8.5</a>. The models can also include categorical
+explanatory variables with any number of categories, as explained in
+Section <a href="c-regression.html#s-regression-dummies">8.6</a>.</p>
+<p>The following example will be used for illustration throughout this
+chapter:</p>
+<p><strong>Example 8.1: Indicators of Global Civil Society</strong></p>
+<p>The <em>Global Civil Society 2004/5</em> yearbook gives tables of a range of
+characteristics of the countries of the world.<a href="#fn39" class="footnote-ref" id="fnref39"><sup>39</sup></a> The following
+measures will be considered in this chapter:</p>
+<ul>
+<li><p>Gross Domestic Product (<strong>GDP</strong>) per capita in 2001 (in current
+international dollars, adjusted for purchasing power parity)</p></li>
+<li><p><strong>Income level</strong> of the country in three groups used by the
+Yearbook, as Low income, Middle income or High income</p></li>
+<li><p><strong>Income inequality</strong> measured by the Gini index (with 0
+representing perfect equality and 100 perfect inequality)</p></li>
+<li><p>A measure of <strong>political rights and civil liberties</strong> in 2004,
+obtained as the average of two indices for these characteristics
+produced by the Freedom House organisation (1 to 7, with higher
+values indicating more rights and liberties)</p></li>
+<li><p>World Bank Institute’s measure of control of <strong>corruption</strong> for 2002
+(with high values indicating low levels of corruption)</p></li>
+<li><p>Net <strong>primary school enrolment</strong> ratio 2000-01 (%)</p></li>
+<li><p><strong>Infant mortality rate</strong> 2001 (% of live births)</p></li>
+</ul>
+<p>We will discuss various associations between these variables. It should
+be noted that the analyses are mainly illustrative examples, and the
+choices of explanatory and response variables do not imply any strong
+claims about causal connections between them. Also, the fact that
+different measures refer to slightly different years is ignored; in
+effect, we treat each variable as a measure of “recent” situation in the
+countries. The full data set used here includes 165 countries. Many of
+the variables are not available for all of them, so most of the analyses
+below use a smaller number of countries.</p>
+</div>
+<div id="s-regression-descr" class="section level2 hasAnchor">
+<h2><span class="header-section-number">8.2</span> Describing association between two continuous variables<a href="c-regression.html#s-regression-descr" class="anchor-section" aria-label="Anchor link to header"></a></h2>
+<div id="ss-regression-descr-intro" class="section level3 hasAnchor">
+<h3><span class="header-section-number">8.2.1</span> Introduction<a href="c-regression.html#ss-regression-descr-intro" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>Suppose for now that we are considering data on two continuous
+variables. The descriptive techniques discussed in this section do not
+strictly speaking require a distinction between an explanatory variable
+and a response variable, but it is nevertheless useful in many if not
+most applications. We will reflect this in the notation by denoting the
+variables <span class="math inline">\(X\)</span> (for the explanatory variable) and <span class="math inline">\(Y\)</span> (for the response
+variable). The observed data consist of the pairs of observations
+<span class="math inline">\((X_{1}, Y_{1}), (X_{2}, Y_{2}), \dots, (X_{n}, Y_{n})\)</span> of <span class="math inline">\(X\)</span> and <span class="math inline">\(Y\)</span>
+for each of the <span class="math inline">\(n\)</span> subjects in a sample, or, with more concise
+notation, <span class="math inline">\((X_{i}, Y_{i})\)</span> for <span class="math inline">\(i=1,2,\dots,n\)</span>.</p>
+<p>We are interested in analysing the association between <span class="math inline">\(X\)</span> and <span class="math inline">\(Y\)</span>.
+Methods for <em>describing</em> this association in the sample are first
+described in this section, initially with some standard graphical
+methods in Section <a href="c-regression.html#ss-regression-descr-plots">8.2.2</a>. This leads to a
+discussion in Section <a href="c-regression.html#ss-regression-descr-assoc">8.2.3</a> of what we
+actually mean by associations in this context, and then to a definion of
+numerical summary measures for such associations in Section
+<a href="c-regression.html#ss-regression-descr-corr">8.2.4</a>. Statistical <em>inference</em> for the
+associations will be considered in Section <a href="c-regression.html#s-regression-simple">8.3</a>.</p>
+</div>
+<div id="ss-regression-descr-plots" class="section level3 hasAnchor">
+<h3><span class="header-section-number">8.2.2</span> Graphical methods<a href="c-regression.html#ss-regression-descr-plots" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<div id="scatterplots" class="section level4 unnumbered hasAnchor">
+<h4>Scatterplots<a href="c-regression.html#scatterplots" class="anchor-section" aria-label="Anchor link to header"></a></h4>
+<p>The standard statistical graphic for summarising the association between
+two continuous variables is a <strong>scatterplot</strong>. An example of it is given
+in Figure <a href="c-regression.html#fig:f-corruption1">8.1</a>, which shows a scatterplot of Control of
+corruption against GDP per capita for 61 countries for which the
+corruption variable is at least 60 (the motivation of this restriction
+will be discussed later). The two axes of the plot show possible values
+of the two variables. The horizontal axis, here corresponding to Control
+of corruption, is conventionally used for the explanatory variable <span class="math inline">\(X\)</span>,
+and is often referred to as the <strong>X-axis</strong>. The vertical axis, here used
+for GDP per capita, then corresponds to the response variable <span class="math inline">\(Y\)</span>, and
+is known as the <strong>Y-axis</strong>.</p>
+<div class="figure"><span style="display:block;" id="fig:f-corruption1"></span>
+<img src="corruption1.png" alt="A scatterplot of Control of corruption vs. GDP per capita in the Global Civil Society data set, for 61 countries with Control of corruption at least 60. The dotted lines are drawn to the point corresponding to the United Kingdom." style="width:13.5cm" />
+<p class="caption">Figure 8.1: A scatterplot of Control of corruption vs. GDP per capita in the Global Civil Society data set, for 61 countries with Control of corruption at least 60. The dotted lines are drawn to the point corresponding to the United Kingdom.</p>
+</div>
+<p>The observed data are shown as points in the scatterplot, one for each
+of the <span class="math inline">\(n\)</span> units. The location of each point is determined by its values
+of <span class="math inline">\(X\)</span> and <span class="math inline">\(Y\)</span>. For example, Figure <a href="c-regression.html#fig:f-corruption1">8.1</a> highlights the
+observation for the United Kingdom, for which the corruption measure
+(<span class="math inline">\(X\)</span>) is 94.3 and GDP per capita (<span class="math inline">\(Y\)</span>) is $24160. The point for UK is
+thus placed at the intersection of a vertical line drawn from 94.3 on
+the <span class="math inline">\(X\)</span>-axis and a horizontal line from 24160 on the <span class="math inline">\(Y\)</span>-axis, as shown
+in the plot.</p>
+<p>The principles of good graphical presentation on clear labelling,
+avoidance of spurious decoration and so on (c.f. Section
+<a href="c-descr1.html#s-descr1-presentation">2.8</a>) are the same for scatterplots as for any
+statistical graphics. Because the crucial visual information in a
+scatterplot is the shape of the cloud of the points, it is now often not
+necessary for the scales of the axes to begin at zero, especially if
+this is well outside the ranges of the observed values of the variables
+(as it is for the <span class="math inline">\(X\)</span>-axis of Figure <a href="c-regression.html#fig:f-corruption1">8.1</a>). Instead, the
+scales are typically selected so that the points cover most of the
+plotting surface. This is done by statistical software, but there are
+many situations were it is advisable to overrule the automatic selection
+(e.g. for making scatterplots of the same variables in two different
+samples directly comparable).</p>
+<p>The main purpose of a scatterplot is to examine possible associations
+between <span class="math inline">\(X\)</span> and <span class="math inline">\(Y\)</span>. Loosely speaking, this means considering the shape
+and orientation of the cloud of points in the graph. In Figure
+<a href="c-regression.html#fig:f-corruption1">8.1</a>, for example, it seems that most of the points are
+in a cluster sloping from lower left to upper right. This indicates that
+countries with low levels of Control of corruption (i.e. high levels of
+corruption itself) tend to have low GDP per capita, and those with
+little corruption tend to have high levels of GDP. A more careful
+discussion of such associations again relates them to the formal
+definition in terms of conditional distributions, and also provides a
+basis for the methods of inference introduced later in this chapter. We
+will resume the discussion of these issues in Section
+<a href="c-regression.html#ss-regression-descr-assoc">8.2.3</a> below. Before that, however, we will
+digress briefly from the main thrust of this chapter in order to
+describe a slightly different kind of scatterplot.</p>
+</div>
+<div id="line-plots-for-time-series" class="section level4 unnumbered hasAnchor">
+<h4>Line plots for time series<a href="c-regression.html#line-plots-for-time-series" class="anchor-section" aria-label="Anchor link to header"></a></h4>
+<p>A very common special case of a scatterplot is one where the
+observations correspond to measurements of a variable for the same unit
+at several occasions over time. This is illustrated by the following
+example (another one is Figure <a href="c-descr1.html#fig:f-houseprices">2.9</a>):</p>
+<p><em>Example: Changes in temperature, 1903–2004</em></p>
+<p>Figure <a href="c-regression.html#fig:f-temperatures">8.2</a> summarises data on average annual
+temperatures over the past century in five locations. The data were
+obtained from the GISS Surface Temperature (GISTEMP) database maintained
+by the NASA Goddard Institute for Space Studies.<a href="#fn40" class="footnote-ref" id="fnref40"><sup>40</sup></a> The database
+contains time series of average monthly surface temperatures from
+several hundred meterological stations across the world. The five sites
+considered here are Haparanda in Northern Sweden, Independence, Kansas
+in the USA, Choshi on the east coast of Japan, Kimberley in South
+Africa, and the Base Orcadas Station on Laurie Island, off the coast of
+Antarctica. These were chosen rather haphazardly for this illustration,
+with the aim of obtaining a geographically scattered set of rural or
+small urban locations (to avoid issues with the heating effects of large
+urban areas). The temperature for each year at each location is here
+recorded as the difference from the temperature at that location in
+1903.<a href="#fn41" class="footnote-ref" id="fnref41"><sup>41</sup></a></p>
+<div class="figure"><span style="display:block;" id="fig:f-temperatures"></span>
+<img src="temperplot.png" alt="Changes of average annual temperature (11-year moving averages) from 1903 in five locations. See the text for further details. Source: The GISTEMP database &lt;data.giss.nasa.gov/gistemp/&gt;" style="width:13cm" />
+<p class="caption">Figure 8.2: Changes of average annual temperature (11-year moving averages) from 1903 in five locations. See the text for further details. Source: The GISTEMP database &lt;data.giss.nasa.gov/gistemp/&gt;</p>
+</div>
+<p>Consider first the data for Haparanda only. Here we have two variables,
+year and temperature, and 102 pairs of observations of them, one for
+each year between 1903 and 2004. These pairs could now be plotted in a
+scatterplot as described above. Here, however, we can go further to
+enhance the visual effect of the plot. This is because the observations
+represent measurements of a variable (temperature difference) for the
+same unit (the town of Haparanda) at several successive times (years).
+These 102 measurements form a <em>time series</em> of temperature differences
+for Haparanda over 1903–2004. A standard graphical trick for such series
+is to connect the points for successive times by lines, making it easy
+for the eye to follow the changes over time in the variable on the
+<span class="math inline">\(Y\)</span>-axis. In Figure <a href="c-regression.html#fig:f-temperatures">8.2</a> this is done for Haparanda
+using a solid line. Note that doing this would make no sense for scatter
+plots like the one in Figure <a href="c-regression.html#fig:f-corruption1">8.1</a>, because all the points
+there represent different subjects, in that case countries.</p>
+<p>We can easily include several such series in the same graph. In Figure
+<a href="c-regression.html#fig:f-temperatures">8.2</a> this is done by plotting the temperature
+differences for each of the five locations using different line styles.
+The graph now summarises data on three variables, year, temperature and
+location. We can then examine changes over time for any one location,
+but also compare patterns of changes between them. Here there is clearly
+much variation within and between locations, but also some common
+features. Most importantly, the temperatures have all increased over the
+past century. In all five locations the average annual temperatures at
+the end of the period were around 1–2<span class="math inline">\(^{\circ}\)</span>C higher than in 1903.</p>
+<p>A set of time series like this is an example of dependent data in the
+sense discussed in Section <a href="c-means.html#s-means-dependent">7.5</a>. There we considered
+cases with pairs of observations, where the two observations in each
+pair had to be treated as statistically dependent. Here all of the
+temperature measurements for one location are dependent, probably with
+strongest dependence between adjacent years and less dependence between
+ones further apart. This means that we will not be able to analyse these
+data with the methods described later in this chapter, because these
+assume statistically independent observations. Methods of statistical
+modelling and inference for dependent data of the kind illustrated by
+the temperature example are beyond the scope of this course. This,
+however, does not prevent us from using a plot like Figure
+<a href="c-regression.html#fig:f-temperatures">8.2</a> to <em>describe</em> such data.</p>
+</div>
+</div>
+<div id="ss-regression-descr-assoc" class="section level3 hasAnchor">
+<h3><span class="header-section-number">8.2.3</span> Linear associations<a href="c-regression.html#ss-regression-descr-assoc" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>Consider again statistically independent observations of <span class="math inline">\((X_{i}, Y_{i})\)</span>, such as those displayed in Figure <a href="c-regression.html#fig:f-corruption1">8.1</a>. Recall
+the definition that two variables are associated if the conditional
+distribution of <span class="math inline">\(Y\)</span> given <span class="math inline">\(X\)</span> is different for different values of <span class="math inline">\(X\)</span>.
+In the two-sample examples of Chapter <a href="c-means.html#c-means">7</a> this could be
+examined by comparing two conditional distributions, since <span class="math inline">\(X\)</span> had only
+two possible values. Now, however, <span class="math inline">\(X\)</span> has many (in principle,
+infinitely many) possible values, so we will need to somehow define and
+compare conditional distributions given each of them. We will begin with
+a rather informal discussion of how this might be done. This will lead
+directly to a more precise and formal definition introduced in Section
+<a href="c-regression.html#s-regression-simple">8.3</a>.</p>
+<div class="figure"><span style="display:block;" id="fig:f-corruption2"></span>
+<img src="corruption2.png" alt="The same scatterplot of Control of corruption vs. GDP per capita as in Figure 8.1, augmented by the best-fitting (least squares) straight line (solid line) and reference lines for two example values of Control of corruption (dotted lines)." style="width:13.5cm" />
+<p class="caption">Figure 8.3: The same scatterplot of Control of corruption vs. GDP per capita as in Figure <a href="c-regression.html#fig:f-corruption1">8.1</a>, augmented by the best-fitting (least squares) straight line (solid line) and reference lines for two example values of Control of corruption (dotted lines).</p>
+</div>
+<p>Figure <a href="c-regression.html#fig:f-corruption2">8.3</a> shows the same scatterplot as Figure
+<a href="c-regression.html#fig:f-corruption1">8.1</a>. Consider first one value of <span class="math inline">\(X\)</span> (Control of
+corruption), say 65. To get a rough idea of the conditional distribution
+of <span class="math inline">\(Y\)</span> (GDP per capita) given this value of <span class="math inline">\(X\)</span>, we could examine the
+sample distribution of the values of <span class="math inline">\(Y\)</span> for the units for which the
+value of <span class="math inline">\(X\)</span> is close to 65. These correspond to the points near the
+vertical line drawn at <span class="math inline">\(X=65\)</span> in Figure <a href="c-regression.html#fig:f-corruption2">8.3</a>. This can be
+repeated for any value of <span class="math inline">\(X\)</span>; for example, Figure <a href="c-regression.html#fig:f-corruption2">8.3</a>
+also includes a vertical reference line at <span class="math inline">\(X=95\)</span>, for examining the
+conditional distribution of <span class="math inline">\(Y\)</span> given <span class="math inline">\(X=95\)</span>.<a href="#fn42" class="footnote-ref" id="fnref42"><sup>42</sup></a></p>
+<p>As in Chapter <a href="c-means.html#c-means">7</a>, associations between variables will here be
+considered almost solely in terms of differences in the <em>means</em> of the
+conditional distributions of <span class="math inline">\(Y\)</span> at different values of <span class="math inline">\(X\)</span>. For
+example, Figure <a href="c-regression.html#fig:f-corruption2">8.3</a> suggests that the conditional mean
+of <span class="math inline">\(Y\)</span> when X is 65 is around or just under 10000. At <span class="math inline">\(X=95\)</span>, on the
+other hand, the conditional mean seems to be between 20000 and 25000.
+The mean of <span class="math inline">\(Y\)</span> is thus higher at the larger value of X. More generally,
+this finding is consistent across the scatterplot, in that the
+conditional mean of <span class="math inline">\(Y\)</span> appears to increase when we consider
+increasingly large values of <span class="math inline">\(X\)</span>, indicating that higher levels of
+Control of corruption are associated with higher average levels of GDP.
+This is often expressed by saying that the conditional mean of <span class="math inline">\(Y\)</span>
+increases when we “increase” <span class="math inline">\(X\)</span>.<a href="#fn43" class="footnote-ref" id="fnref43"><sup>43</sup></a> This is the sense in which we will
+examine associations between continuous variables: does the conditional
+mean of <span class="math inline">\(Y\)</span> change (increase or decrease) when we increase <span class="math inline">\(X\)</span>? If it
+does, the two variables are associated; if it does not, there is no
+association of this kind. This definition also agrees with the one
+linking association with prediction: if the mean of <span class="math inline">\(Y\)</span> is different for
+different values of <span class="math inline">\(X\)</span>, knowing the value of <span class="math inline">\(X\)</span> will clearly help us
+in making predictions about likely values of <span class="math inline">\(Y\)</span>. Based on the
+information in Figure <a href="c-regression.html#fig:f-corruption2">8.3</a>, for example, our best guesses
+of the GDPs of two countries would clearly be different if we were told
+that the control of corruption measure was 65 for one country and 95 for
+the other.</p>
+<p>The <em>nature</em> of the association between <span class="math inline">\(X\)</span> and <span class="math inline">\(Y\)</span> is characterised by
+<em>how</em> the values of <span class="math inline">\(Y\)</span> change when <span class="math inline">\(X\)</span> increases. First, it is almost
+always reasonable to conceive these changes as reasonably smooth and
+gradual. In other words, if two values of <span class="math inline">\(X\)</span> are close to each other,
+the conditional means of <span class="math inline">\(Y\)</span> will be similar too; for example, if the
+mean of <span class="math inline">\(Y\)</span> is 5 when <span class="math inline">\(X=10\)</span>, its mean when <span class="math inline">\(X=10.01\)</span> is likely to be
+quite close to 5 rather than, say, 405. In technical terms, this means
+that the conditional mean of <span class="math inline">\(Y\)</span> will be described by a smooth
+mathematical function of <span class="math inline">\(X\)</span>. Graphically, the means of <span class="math inline">\(Y\)</span> as <span class="math inline">\(X\)</span>
+increases will then trace a smooth curve in the scatterplot. The
+simplest possibility for such a curve is a straight line. This
+possibility is illustrated by plot (a) of Figure <a href="c-regression.html#fig:f-scatterplots">8.4</a>
+(this and the other five plots in the figure display artificial data,
+generated for this illustration). Here all of the points fall on a line,
+so that when <span class="math inline">\(X\)</span> increases, the values of <span class="math inline">\(Y\)</span> increase at a constant
+rate. A relationship like this is known as a <strong>linear association</strong>
+between <span class="math inline">\(X\)</span> and <span class="math inline">\(Y\)</span>. Linear associations are the starting point for
+examining associations between continuous variables, and often the only
+ones considered. In this chapter we too will focus almost completely on
+them.</p>
+<div class="figure"><span style="display:block;" id="fig:f-scatterplots"></span>
+<img src="scatterplots.png" alt="Scatterplots of artificial data sets of two variables. Each plot also shows the best-fitting (least squares) straight line and the correlation coefficient r." style="width:13.5cm" />
+<p class="caption">Figure 8.4: Scatterplots of artificial data sets of two variables. Each plot also shows the best-fitting (least squares) straight line and the correlation coefficient <span class="math inline">\(r\)</span>.</p>
+</div>
+<p>In plot (a) of Figure <a href="c-regression.html#fig:f-scatterplots">8.4</a> all the points are exactly on
+the straight line. This indicates a <em>perfect</em> linear association, where
+<span class="math inline">\(Y\)</span> can be predicted exactly if <span class="math inline">\(X\)</span> is known, so that the association is
+<em>deterministic</em>. Such a situation is neither realistic in practice, nor
+necessary for the association to be described as linear. All that is
+required for the latter is that the conditional <em>means</em> of <span class="math inline">\(Y\)</span> given
+different values of <span class="math inline">\(X\)</span> fall (approximately) on a straight line. This is
+illustrated by plot (b) of Figure <a href="c-regression.html#fig:f-scatterplots">8.4</a>, which shows a
+scatterplot of individual observations together with an approximation of
+the line of the means of <span class="math inline">\(Y\)</span> given <span class="math inline">\(X\)</span> (how the line was drawn will be
+explained later). Here the linear association is not perfect, as the
+individual points are not all on the same line but scattered around it.
+Nevertheless, the line seems to capture an important systematic feature
+of the data, which is that the <em>average</em> values of <span class="math inline">\(Y\)</span> increase at an
+approximately constant rate as <span class="math inline">\(X\)</span> increases. This combination of
+systematic and random elements is characteristic of all statistical
+associations, and it is also central to the formal setting for
+statistical inference for linear associations described in Section
+<a href="c-regression.html#s-regression-simple">8.3</a> below.</p>
+<p>The <strong>direction</strong> of a linear association can be either <strong>positive</strong> or
+<strong>negative</strong>. Plots (a) and (b) of Figure <a href="c-regression.html#fig:f-scatterplots">8.4</a> show a
+positive association, because increasing <span class="math inline">\(X\)</span> is associated with
+increasing average values of <span class="math inline">\(Y\)</span>. This is indicated by the upward slope
+of the line describing the association. Plot (c) shows an example of a
+negative association, where the line slopes downwards and increasing
+values of <span class="math inline">\(X\)</span> are associated with decreasing values of <span class="math inline">\(Y\)</span>. The third
+possibility, illustrated by plot (d), is that the line slopes neither up
+nor down, so that the mean of <span class="math inline">\(Y\)</span> is the same for all values of <span class="math inline">\(X\)</span>. In
+this case there is no (linear) association between the variables.</p>
+<p>Not all associations between continuous variables are linear, as shown
+by the remaining two plots of Figure <a href="c-regression.html#fig:f-scatterplots">8.4</a>. These
+illustrate two kinds of <strong>nonlinear</strong> associations. In plot (e), the
+association is still clearly <em>monotonic</em>, meaning that average values of
+<span class="math inline">\(Y\)</span> change in the same direction — here increase — when <span class="math inline">\(X\)</span> increases.
+The rate of this increase, however, is not constant, as indicated by the
+slightly curved shape of the cloud of points. The values of <span class="math inline">\(Y\)</span> seem to
+increase faster for small values of <span class="math inline">\(X\)</span> than for large ones. A straight
+line drawn through the scatterplot captures the general direction of the
+increase, but misses its nonlinearity. One practical example of such a
+relationship is the one between years of job experience and salary: it
+is often found that salary increases fastest early on in a person’s
+career and more slowly later on.</p>
+<p>Plot (f) shows a nonlinear and nonmonotonic relationship: as <span class="math inline">\(X\)</span>
+increases, average values of <span class="math inline">\(Y\)</span> first decrease to a minimum, and then
+increase again, resulting in a U-shaped scatterplot. A straight line is
+clearly an entirely inadequate description of such a relationship. A
+nonmonotonic association of this kind might be seen, for example, when
+considering the dependence of the failure rates of some electrical
+components (<span class="math inline">\(Y\)</span>) on their age (<span class="math inline">\(X\)</span>). It might then be that the failure
+rates were high early (from quick failures of flawed components) and
+late on (from inevitable wear and tear) and lowest in between for
+“middle-aged but healthy” components.</p>
+<div class="figure"><span style="display:block;" id="fig:f-corruption3"></span>
+<img src="corruption3.png" alt="A scatterplot of Control of corruption vs. GDP per capita for 163 countries in the Global Civil Society data set. The solid line is the best-fitting (least squares) straight line for the points." style="width:13.5cm" />
+<p class="caption">Figure 8.5: A scatterplot of Control of corruption vs. GDP per capita for 163 countries in the Global Civil Society data set. The solid line is the best-fitting (least squares) straight line for the points.</p>
+</div>
+<p>Returning to real data, recall that we have so far considered control of
+corruption and GDP per capita only among countries with a Control of
+corruption score of at least 60. The scatterplot for these, shown in
+Figure <a href="c-regression.html#fig:f-corruption2">8.3</a>, also includes a best-fitting straight line.
+The observed relationship is clearly positive, and seems to be fairly
+well described by a straight line. For countries with relatively low
+levels of corruption, the association between control of corruption and
+GDP can be reasonably well characterised as linear.</p>
+<p>Consider now the set of all countries, including also those with high
+levels of corruption (scores of less than 60). In a scatterplot for
+them, shown in Figure <a href="c-regression.html#fig:f-corruption3">8.5</a>, the points with at least 60
+on the <span class="math inline">\(X\)</span>-axis are the same as those in Figure <a href="c-regression.html#fig:f-corruption2">8.3</a>, and
+the new points are to the left of them. The plot now shows a nonlinear
+relationship comparable to the one in plot (e) of Figure
+<a href="c-regression.html#fig:f-scatterplots">8.4</a>. The linear relationship which was a good
+description for the countries considered above is thus not adequate for
+the full set of countries. Instead, it seems that the association is
+much weaker for the countries with high levels of corruption,
+essentially all of which have fairly low values of GDP per capita. The
+straight line fitted to the plot identifies the overall positive
+association, but cannot describe its nonlinearity. This example further
+illustrates how scatterplots can be used to examine relationships
+between variables and to assess whether they can be best described as
+linear or nonlinear associations.<a href="#fn44" class="footnote-ref" id="fnref44"><sup>44</sup></a></p>
+<p>So far we have said nothing about how the exact location and direction
+of the straight lines shown in the figures have been selected. These are
+determined so that the fitted line is in a certain sense the best
+possible one for describing the data in the scatterplot. Because the
+calculations needed for this are also (and more importantly) used in the
+context of statistical inference for such data, we will postpone a
+description of them until Section <a href="c-regression.html#ss-regression-simple-est">8.3.4</a>. For
+now we can treat the line simply as a visual summary of the linear
+association in a scatterplot.</p>
+</div>
+<div id="ss-regression-descr-corr" class="section level3 hasAnchor">
+<h3><span class="header-section-number">8.2.4</span> Measures of association: covariance and correlation<a href="c-regression.html#ss-regression-descr-corr" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>A scatterplot is a very powerful tool for examining sample associations
+of pairs of variables in detail. Sometimes, however, this is more than
+we really need for an initial summary of a data set, especially if there
+are many variables and thus many possible pairs of them. It is then
+convenient also to be able to summarise each pairwise association using
+a single-number measure of association. This section introduces the
+correlation coefficient, the most common such measure for continuous
+variables. It is a measure of the strength of <em>linear</em> associations of
+the kind defined above.</p>
+<p>Suppose that we consider two variables, denoted <span class="math inline">\(X\)</span> and <span class="math inline">\(Y\)</span>. This again
+implies a distinction between an explanatory and a response variable, to
+maintain continuity of notation between different parts of this chapter.
+The correlation coefficient itself, however, is completely symmetric, so
+that its value for a pair of variables will be the same whether or not
+we treat one or the other of them as explanatory for the other. First,
+recall from equation of standard deviation towards the end of Section <a href="c-descr1.html#ss-descr1-nums-variation">2.6.2</a> that the sample standard deviations of
+the two variables are calculated as
+<span class="math display" id="eq:sdyx">\[\begin{equation}
+s_{x} = \sqrt{\frac{\sum(X_{i}-\bar{X})^{2}}{n-1}} \text{and} s_{y} = \sqrt{\frac{\sum (Y_{i}-\bar{Y})^{2}}{n-1}}
+\tag{8.1}
+\end{equation}\]</span>
+where the subscripts <span class="math inline">\(x\)</span> and <span class="math inline">\(y\)</span> identify the two
+variables, and <span class="math inline">\(\bar{X}\)</span> and <span class="math inline">\(\bar{Y}\)</span> are their sample means. A new
+statistic is the <strong>sample covariance</strong> between <span class="math inline">\(X\)</span> and <span class="math inline">\(Y\)</span>, defined as
+<span class="math display" id="eq:sxy">\[\begin{equation}
+s_{xy} = \frac{\sum (X_{i}-\bar{X})(Y_{i}-\bar{Y})}{n-1}.
+\tag{8.2}
+\end{equation}\]</span>
+This is a measure of linear association between <span class="math inline">\(X\)</span> and
+<span class="math inline">\(Y\)</span>. It is positive if the sample association is positive and negative
+if the association is negative.</p>
+<p>In theoretical statistics, covariance is the fundamental summary of
+sample and population associations between two continuous variables. For
+descriptive purposes, however, it has the inconvenient feature that its
+magnitude depends on the units in which <span class="math inline">\(X\)</span> and <span class="math inline">\(Y\)</span> are measured. This
+makes it difficult to judge whether a value of the covariance for
+particular variables should be regarded as large or small. To remove
+this complication, we can standardise the sample covariance by dividing
+it by the standard deviations, to obtain the statistic
+<span class="math display" id="eq:corr">\[\begin{equation}
+r=\frac{s_{xy}}{s_{x}s_{y}} = \frac{\sum (X_{i}-\bar{X})(Y_{i}-\bar{Y})}{\sqrt{\sum\left(X_{i}-\bar{X}\right)^{2} \sum\left(Y_{i}-\bar{Y}\right)^{2}}}.
+\tag{8.3}
+\end{equation}\]</span>
+This is the (sample) <strong>correlation</strong> coefficient, or
+correlation for short, between <span class="math inline">\(X\)</span> and <span class="math inline">\(Y\)</span>. It is also often (e.g. in
+SPSS) known as <em>Pearson’s</em> correlation coefficient after Karl Pearson
+(of the <span class="math inline">\(\chi^{2}\)</span> test, see first footnote in Chapter <a href="c-tables.html#c-tables">4</a>), although both
+the word and the statistic are really due to Sir Francis Galton.<a href="#fn45" class="footnote-ref" id="fnref45"><sup>45</sup></a></p>
+<p>The properties of the correlation coefficient can be described by going
+through the same list as for the <span class="math inline">\(\gamma\)</span> coefficient in Section
+<a href="c-descr1.html#ss-descr1-2cat-gamma">2.4.5</a>. While doing so, it is useful to refer to the
+examples in Figure <a href="c-regression.html#fig:f-scatterplots">8.4</a>, where the correlations are also
+shown.</p>
+<ul>
+<li><p><strong>Sign</strong>: Correlation is positive if the <em>linear</em> association
+between the variables is positive, i.e. if the best-fitting straight
+line slopes upwards (as in plots a, b and e) and negative if the
+association is negative (c). A zero correlation indicates complete
+lack of linear association (d and f).</p></li>
+<li><p><strong>Extreme values</strong>: The largest possible correlation is <span class="math inline">\(+1\)</span>
+(plot a) and the smallest <span class="math inline">\(-1\)</span>, indicating perfect positive and
+negative linear associations respectively. More generally, the
+magnitude of the correlation indicates the strength of the
+association, so that the closer to <span class="math inline">\(+1\)</span> or <span class="math inline">\(-1\)</span> the correlation is,
+the stronger the association (e.g. compare plots a–d). It should
+again be noted that the correlation captures only the linear aspect
+of the association, as illustrated by the two nonlinear cases in
+Figure <a href="c-regression.html#fig:f-scatterplots">8.4</a>. In plot (e), there is curvature but
+also a strong positive trend, and the latter is reflected in a
+fairly high correlation. In plot (f), the trend is absent and the
+correlation is 0, even though there is an obvious
+nonlinear relationship. Thus the correlation coefficient is a
+reasonable initial summary of the strength of association in (e),
+but completely misleading in (f).</p></li>
+<li><p><strong>Formal interpretation</strong>: The correlation coefficient cannot be
+interpreted as a Proportional Reduction in Error (PRE) measure, but
+its square can. The latter statistic, so-called coefficient of
+determination or <span class="math inline">\(R^{2}\)</span>, is described in Section
+<a href="c-regression.html#ss-regression-simple-int">8.3.3</a>.</p></li>
+<li><p><strong>Substantive interpretation</strong>: As with any measure of association,
+the question of whether a particular sample correlation is high or
+low is not a purely statistical question, but depends on the nature
+of the variables. This can be judged properly only with the help of
+experience of correlations between similar variables in
+different contexts. As one very rough rule thumb it might be said
+that in many social science contexts correlations greater than 0.4
+(or smaller than <span class="math inline">\(-0.4\)</span>) would typically be considered noteworthy
+and ones greater than 0.7 quite strong.</p></li>
+</ul>
+<p>Returning to real data, Table <a href="c-regression.html#tab:t-civilsoc-r">8.1</a> shows the correlation
+coefficients for all fifteen distinct pairs of the six continuous
+variables in the Global Civil Society data set mentioned in Example 8.1. This is an example of a <strong>correlation matrix</strong>,
+which is simply a table with the variables as both its rows and columns,
+and the correlation between each pair of variables given at the
+intersection of corresponding row and column. For example, the
+correlation of GDP per capita and School enrolment is here 0.42. This is
+shown at the intersection of the first row (GDP) and fifth column
+(School enrolment), and also of the fifth row and first column. In
+general, every correlation is shown twice in the matrix, once in its
+upper triangle and once in the lower. The triangles are separated by a
+list of ones on the diagonal of the matrix. This simply indicates that
+the correlation of any variable with itself is 1, which is true by
+definition and thus of no real interest.</p>
+<table>
+<caption><span id="tab:t-civilsoc-r">Table 8.1: </span>Correlation matrix of six continuous variables in the Global Civil
+Society data set. See Example 8.1 for more information
+on the variables.</caption>
+<thead>
+<tr class="header">
+<th align="right">Variable</th>
+<th align="right">GDP</th>
+<th align="right">Gini</th>
+<th align="right">Pol. </th>
+<th align="right">Corrupt. </th>
+<th align="right">School</th>
+<th align="right">IMR</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="right">GDP per capita [GDP ]</td>
+<td align="right">1</td>
+<td align="right">-0.39</td>
+<td align="right">0.51</td>
+<td align="right">0.77</td>
+<td align="right">0.42</td>
+<td align="right">-0.62</td>
+</tr>
+<tr class="even">
+<td align="right">Income inequality [Gini ]</td>
+<td align="right">-0.39</td>
+<td align="right">1</td>
+<td align="right">-0.15</td>
+<td align="right">-0.27</td>
+<td align="right">-0.27</td>
+<td align="right">0.42</td>
+</tr>
+<tr class="odd">
+<td align="right">Political rights [Pol. ]</td>
+<td align="right">0.51</td>
+<td align="right">-0.15</td>
+<td align="right">1</td>
+<td align="right">0.59</td>
+<td align="right">0.40</td>
+<td align="right">-0.44</td>
+</tr>
+<tr class="even">
+<td align="right">Control of corruption [Corrupt. ]</td>
+<td align="right">0.77</td>
+<td align="right">-0.27</td>
+<td align="right">0.59</td>
+<td align="right">1</td>
+<td align="right">0.41</td>
+<td align="right">-0.64</td>
+</tr>
+<tr class="odd">
+<td align="right">School enrolment [School ]</td>
+<td align="right">0.42</td>
+<td align="right">-0.27</td>
+<td align="right">0.40</td>
+<td align="right">0.41</td>
+<td align="right">1</td>
+<td align="right">-0.73</td>
+</tr>
+<tr class="even">
+<td align="right">Infant mortality [IMR ]</td>
+<td align="right">-0.62</td>
+<td align="right">0.42</td>
+<td align="right">-0.44</td>
+<td align="right">-0.64</td>
+<td align="right">-0.73</td>
+<td align="right">1</td>
+</tr>
+</tbody>
+</table>
+<p>All of the observed associations in this example are in unsurprising
+directions. For example, School enrolment is positively correlated with
+GDP, Political rights and Control of corruption, and negatively
+correlated with Income inequality and Infant mortality. In other words,
+countries with large percentages of children enrolled in primary school
+tend to have high levels of GDP per capita and of political rights and
+civil liberties, and low levels of corruption, income inequality and
+infant mortality. The strongest associations in these data are between
+GDP per capita and Control of corruption (<span class="math inline">\(r=0.77\)</span>) and School enrolment
+and Infant mortality rate (<span class="math inline">\(r=-0.73\)</span>), and the weakest between Income
+inequality on the one hand and Political rights, Control of corruption
+and School enrolment on the other (correlations of <span class="math inline">\(-0.15\)</span>, <span class="math inline">\(-0.27\)</span> and
+<span class="math inline">\(-0.27\)</span> respectively).</p>
+<p>These correlations describe only the linear element of sample
+associations, but give no hint of any nonlinear ones. For example, the
+correlation of 0.77 between GDP and Control of corruption summarises the
+way the observations cluster around the straight line shown in Figure
+<a href="c-regression.html#fig:f-corruption3">8.5</a>. The correlation is high because this increase in
+GDP as Control of corruption increases is quite strong, but it gives no
+indication of the nonlinearity of the association. A scatterplot is
+needed for revealing this feature of the data. The correlation for the
+restricted set of countries shown in Figure <a href="c-regression.html#fig:f-corruption2">8.3</a> is 0.82.</p>
+<p>A correlation coefficient can also be defined for the joint population
+distribution of two variables. The sample correlation <span class="math inline">\(r\)</span> can then be
+treated as an estimate of the population correlation, which is often
+denoted by <span class="math inline">\(\rho\)</span> (the lower-case Greek “rho”). Statistical inference
+for the population correlation can also be derived. For example, SPSS
+automatically outputs significance tests for the null hypothesis that
+<span class="math inline">\(\rho\)</span> is 0, i.e. that there is no linear association between <span class="math inline">\(X\)</span> and
+<span class="math inline">\(Y\)</span> in the population. Here, however, we will not discuss this, choosing
+to treat <span class="math inline">\(r\)</span> purely as a descriptive sample statistic. The next section
+provides a different set of tools for inference on population
+associations.</p>
+</div>
+</div>
+<div id="s-regression-simple" class="section level2 hasAnchor">
+<h2><span class="header-section-number">8.3</span> Simple linear regression models<a href="c-regression.html#s-regression-simple" class="anchor-section" aria-label="Anchor link to header"></a></h2>
+<div id="ss-regression-simple-intro" class="section level3 hasAnchor">
+<h3><span class="header-section-number">8.3.1</span> Introduction<a href="c-regression.html#ss-regression-simple-intro" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>The rest of this course is devoted to the method of linear regression
+modelling. Its purpose is the analysis of associations in cases where
+the response variable is a continuous, interval level variable, and the
+possibly several explanatory variables can be of any type. We begin in
+this section with <em>simple</em> linear regression, where there is only one
+explanatory variable. We will further assume that this is also
+continuous. The situation considered here is thus the same as in the
+previous section, but here the focus will be on statistical inference
+rather than description. Most of the main concepts of linear regression
+can be introduced in this context. Those that go beyond it are described
+in subsequent sections. Section <a href="c-regression.html#s-regression-multiple">8.5</a> introduces
+<em>multiple</em> regression involving more than one explanatory variable. The
+use of categorical explanatory variables in such models is explained in
+Section <a href="c-regression.html#s-regression-dummies">8.6</a>. Finally, Section
+<a href="c-regression.html#s-regression-rest">8.7</a> gives a brief review of some further aspects of
+linear regression modelling which are not covered on this course.</p>
+<p><em>Example: Predictors of Infant Mortality Rate</em></p>
+<p>The concepts of linear regression models will be illustrated as they are
+introduced with a second example from the Global Civil Society data set.
+The response variable will now be Infant Mortality Rate (IMR). This is
+an illuminating outcome variable, because it is a sensitive and
+unquestionably important reflection of a country’s wellbeing; whatever
+we mean by “development”, it is difficult to disagree that high levels
+of it should coincide with low levels of infant mortality. We will
+initially consider only one explanatory variable, Net primary school
+enrolment ratio, referred to as “School enrolment” for short. This is
+defined as the percentage of all children of primary school age who are
+enrolled in school. Enrolment numbers and the population size are often
+obtained from different official sources, which sometimes leads to
+discrepancies. In particular, School enrolment for several countries is
+recorded as over 100, which is logically impossible. This is an
+illustration of the kinds of measurement errors often affecting
+variables in the social sciences. We will use the School enrolment
+values as recorded, even though they are known to contain some error.</p>
+<p>A scatterplot of IMR vs. School enrolment is shown in Figure
+<a href="c-regression.html#fig:f-imr1">8.6</a>, together with the best-fitting straight line. Later we
+will also consider three additional explanatory variables: Control of
+corruption, Income inequality and Income level of the country in three
+categories (c.f. Example 8.1). For further reference,
+Table <a href="c-regression.html#tab:t-imrvars">8.2</a> shows various summary statistics for these
+variables. Throughout, the analyses are restricted to those 111
+countries for which all of the five variables are recorded. For this
+reason the correlations in Table <a href="c-regression.html#tab:t-imrvars">8.2</a> differ slightly from
+those in Table <a href="c-regression.html#tab:t-civilsoc-r">8.1</a>, where each correlation was calculated
+for all the countries with non-missing values of that pair of variables.</p>
+<div class="figure"><span style="display:block;" id="fig:f-imr1"></span>
+<img src="imr1.png" alt="A scatterplot of net primary school enrolment ratio vs. Infant mortality rate for countries in the Global Civil Society data set (n=111). The solid line is the best-fitting (least squares) straight line for the points." style="width:13.5cm" />
+<p class="caption">Figure 8.6: A scatterplot of net primary school enrolment ratio vs. Infant mortality rate for countries in the Global Civil Society data set (<span class="math inline">\(n=111\)</span>). The solid line is the best-fitting (least squares) straight line for the points.</p>
+</div>
+<table style="width:98%;">
+<caption><span id="tab:t-imrvars">Table 8.2: </span>Summary statistics for Infant Mortality Rate (IMR) and explanatory
+variables for it considered in the examples of Sections
+<a href="c-regression.html#s-regression-simple">8.3</a> and <a href="c-regression.html#s-regression-multiple">8.5</a> (<span class="math inline">\(n=111\)</span>).
+See Example 8.1 for further information on the
+variables.</caption>
+<colgroup>
+<col width="53%" />
+<col width="7%" />
+<col width="11%" />
+<col width="12%" />
+<col width="12%" />
+</colgroup>
+<thead>
+<tr class="header">
+<th></th>
+<th align="right"><br />
+IMR</th>
+<th align="right">School
+enrolment</th>
+<th align="right">Control of
+corruption</th>
+<th align="right">Income
+inequality</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td><em>Summary statistics</em></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+</tr>
+<tr class="even">
+<td>Mean</td>
+<td align="right">4.3</td>
+<td align="right">86.1</td>
+<td align="right">50.1</td>
+<td align="right">40.5</td>
+</tr>
+<tr class="odd">
+<td>std. deviation</td>
+<td align="right">4.0</td>
+<td align="right">16.7</td>
+<td align="right">28.4</td>
+<td align="right">10.2</td>
+</tr>
+<tr class="even">
+<td>Minimum</td>
+<td align="right">0.3</td>
+<td align="right">30.0</td>
+<td align="right">3.6</td>
+<td align="right">24.4</td>
+</tr>
+<tr class="odd">
+<td>Maximum</td>
+<td align="right">15.6</td>
+<td align="right">109.0</td>
+<td align="right">100.0</td>
+<td align="right">70.7</td>
+</tr>
+<tr class="even">
+<td><em>Correlation matrix</em></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+</tr>
+<tr class="odd">
+<td>IMR</td>
+<td align="right">1</td>
+<td align="right">-0.75</td>
+<td align="right">-0.60</td>
+<td align="right">0.39</td>
+</tr>
+<tr class="even">
+<td>School enrolment</td>
+<td align="right">-0.75</td>
+<td align="right">1</td>
+<td align="right">0.39</td>
+<td align="right">-0.27</td>
+</tr>
+<tr class="odd">
+<td>Control of corruption</td>
+<td align="right">-0.60</td>
+<td align="right">0.39</td>
+<td align="right">1</td>
+<td align="right">-0.27</td>
+</tr>
+<tr class="even">
+<td>Income inequality</td>
+<td align="right">0.39</td>
+<td align="right">-0.27</td>
+<td align="right">-0.27</td>
+<td align="right">1</td>
+</tr>
+<tr class="odd">
+<td><em>Means for countries in different income categories</em></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+</tr>
+<tr class="even">
+<td>Low income (<span class="math inline">\(n=41\)</span>)</td>
+<td align="right">8.2</td>
+<td align="right">72.1</td>
+<td align="right">27.5</td>
+<td align="right">41.7</td>
+</tr>
+<tr class="odd">
+<td>Middle income (<span class="math inline">\(n=48\)</span>)</td>
+<td align="right">2.8</td>
+<td align="right">92.5</td>
+<td align="right">50.8</td>
+<td align="right">43.3</td>
+</tr>
+<tr class="even">
+<td>High income (<span class="math inline">\(n=22\)</span>)</td>
+<td align="right">0.5</td>
+<td align="right">98.4</td>
+<td align="right">90.7</td>
+<td align="right">32.0</td>
+</tr>
+</tbody>
+</table>
+</div>
+<div id="ss-regression-simple-def" class="section level3 hasAnchor">
+<h3><span class="header-section-number">8.3.2</span> Definition of the model<a href="c-regression.html#ss-regression-simple-def" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>The simple linear regression model defined in this section is a
+statistical model for a continuous, interval level response variable <span class="math inline">\(Y\)</span>
+given a single explanatory variable <span class="math inline">\(X\)</span>, such as IMR given School
+enrolment. The model will be used to carry out statistical inference on
+the association between the variables in a population (which in the IMR
+example is clearly again of the conceptual variety).</p>
+<p>For motivation, recall first the situation considered in Section
+<a href="c-means.html#s-means-inference">7.3</a>. There the data consisted of observations
+<span class="math inline">\((Y_{i}, X_{i})\)</span> for <span class="math inline">\(i=1,2,\dots,n\)</span>, which were assumed to be statistically
+independent. The response variable <span class="math inline">\(Y\)</span> was continuous but <span class="math inline">\(X\)</span> had only
+two possible values, coded 1 and 2. A model was then set up where the
+population distribution of <span class="math inline">\(Y\)</span> had mean <span class="math inline">\(\mu_{1}\)</span> and variance
+<span class="math inline">\(\sigma^{2}_{1}\)</span> for units with <span class="math inline">\(X=1\)</span>, and mean <span class="math inline">\(\mu_{2}\)</span> and variance
+<span class="math inline">\(\sigma^{2}_{2}\)</span> when <span class="math inline">\(X=2\)</span>. In some cases it was further assumed that
+the population distributions were both normal, and that the population
+variances were equal, i.e. that <span class="math inline">\(\sigma^{2}_{1}=\sigma^{2}_{2}\)</span>, with
+their common value denoted <span class="math inline">\(\sigma^{2}\)</span>. With these further assumptions,
+which will also be used here, the model for <span class="math inline">\(Y\)</span> given a dichotomous <span class="math inline">\(X\)</span>
+stated that (1) observations for different units <span class="math inline">\(i\)</span> were statistically
+independent; (2) each <span class="math inline">\(Y_{i}\)</span> was sampled at random from a population
+distribution which was normal with mean <span class="math inline">\(\mu_{i}\)</span> and variance
+<span class="math inline">\(\sigma^{2}\)</span>; and (3) <span class="math inline">\(\mu_{i}\)</span> depended on <span class="math inline">\(X_{i}\)</span> so that it was equal
+to <span class="math inline">\(\mu_{1}\)</span> if <span class="math inline">\(X_{i}\)</span> was 1 and <span class="math inline">\(\mu_{2}\)</span> if <span class="math inline">\(X_{i}\)</span> was 2.</p>
+<p>The situation in this section is exactly the same, except that <span class="math inline">\(X\)</span> is
+now continuous instead of dichotomous. We will use the same basic model,
+but will change the specification of the conditional mean <span class="math inline">\(\mu_{i}\)</span>
+appropriately. In the light of the discussion in previous sections of
+this chapter, it is no surprise that this will be defined in such a way
+that it describes a linear association between <span class="math inline">\(X\)</span> and <span class="math inline">\(Y\)</span>. This is done
+by setting <span class="math inline">\(\mu_{i}=\alpha+\beta X_{i}\)</span>, where <span class="math inline">\(\alpha\)</span> and <span class="math inline">\(\beta\)</span> are
+unknown population parameters. This is the equation of straight line (we
+will return to it in the next section). With this specification, the
+model for observations
+<span class="math inline">\((Y_{1},X_{1}), (Y_{2}, X_{2}), \dots, (Y_{n}, X_{n})\)</span> becomes</p>
+<ol style="list-style-type: decimal">
+<li><p>Observations for different units <span class="math inline">\(i\)</span> (<span class="math inline">\(=1,2,\dots,n\)</span>) are
+statistically independent.</p></li>
+<li><p>Each <span class="math inline">\(Y_{i}\)</span> is normally distributed with mean <span class="math inline">\(\mu_{i}\)</span> and
+variance <span class="math inline">\(\sigma^{2}\)</span>.</p></li>
+<li><p>The means <span class="math inline">\(\mu_{i}\)</span> depend on <span class="math inline">\(X_{i}\)</span> through <span class="math inline">\(\mu_{i}=\alpha+\beta X_{i}\)</span>.</p></li>
+</ol>
+<p>Often the model is expressed in an equivalent form where 2. and 3. are
+combined as
+<span class="math display" id="eq:slinmodel">\[\begin{equation}
+Y_{i}=\alpha+\beta X_{i} +\epsilon_{i}
+\tag{8.4}
+\end{equation}\]</span>
+where each <span class="math inline">\(\epsilon_{i}\)</span> is normally distributed
+with mean 0 and variance <span class="math inline">\(\sigma^{2}\)</span>. The <span class="math inline">\(\epsilon_{i}\)</span> are known as
+<strong>error terms</strong> or <strong>population residuals</strong> (and the letter <span class="math inline">\(\epsilon\)</span>
+is the lower-case Greek “epsilon”). This formulation of the model
+clearly separates the mean of <span class="math inline">\(Y_{i}\)</span>, which traces the straight line
+<span class="math inline">\(\alpha+\beta X_{i}\)</span> as <span class="math inline">\(X_{i}\)</span> changes, from the variation around that
+line, which is described by the variability of <span class="math inline">\(\epsilon_{i}\)</span>.</p>
+<p>The model defined above is known as the <strong>simple linear regression
+model</strong>:</p>
+<ul>
+<li><p><strong>Simple</strong> because it has only one explanatory variable, as opposed
+to <em>multiple</em> linear regression models which will have more
+than one.</p></li>
+<li><p><strong>Linear</strong> because it specifies a linear association between <span class="math inline">\(X\)</span> and
+<span class="math inline">\(Y\)</span>.<a href="#fn46" class="footnote-ref" id="fnref46"><sup>46</sup></a></p></li>
+<li><p><strong>Regression</strong>: This is now an established part of the name of the
+model, although the origins of the word are not central to the use
+of the model.<a href="#fn47" class="footnote-ref" id="fnref47"><sup>47</sup></a></p></li>
+<li><p><strong>Model</strong>, because this is a statistical model in the sense
+discussed in the middle of Section <a href="c-contd.html#ss-contd-probdistrs-general">6.3.1</a>. In other words, the model is
+always only a simplified abstraction of the true, immeasurably
+complex processes which determine the values of <span class="math inline">\(Y\)</span>. Nevertheless,
+it is believed that a well-chosen model can be useful for explaining
+and predicting observed values of <span class="math inline">\(Y\)</span>. This spirit is captured by
+the well-known statement by the statistician George Box:<a href="#fn48" class="footnote-ref" id="fnref48"><sup>48</sup></a></p>
+<blockquote>
+<p><em>All models are wrong, but some are useful.</em></p>
+</blockquote>
+<p>A model like this has the advantage that it reduces the examination
+of associations in the population to estimation and inference on a
+small number of model parameters, in the case of the simple linear
+regression model just <span class="math inline">\(\alpha\)</span>, <span class="math inline">\(\beta\)</span> and <span class="math inline">\(\sigma^{2}\)</span>.</p></li>
+</ul>
+<p>Of course, not all models are equally appropriate for given data, and
+some will be both wrong and useless. The results from a model should
+thus be seriously presented and interpreted only if the model is deemed
+to be reasonably adequate. For the simple linear regression model, this
+can be partly done by examining whether the scatterplot between <span class="math inline">\(X\)</span> and
+<span class="math inline">\(Y\)</span> appears to be reasonably consistent with a linear relationship. Some
+further comments on the assessment of model adequacy will be given in
+Section <a href="c-regression.html#s-regression-rest">8.7</a>.</p>
+</div>
+<div id="ss-regression-simple-int" class="section level3 hasAnchor">
+<h3><span class="header-section-number">8.3.3</span> Interpretation of the model parameters<a href="c-regression.html#ss-regression-simple-int" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>The simple linear regression model (<a href="c-regression.html#eq:slinmodel">(8.4)</a>) has three
+parameters, <span class="math inline">\(\alpha\)</span>, <span class="math inline">\(\beta\)</span> and <span class="math inline">\(\sigma^{2}\)</span>. Each of these has its
+own interpretation, which are explained in this section. Sometimes it
+will be useful to illustrate the definition with specific numerical
+values, for which we will use ones for the model for IMR given School
+enrolment in our example. SPSS output for this model is shown in Figure
+<a href="c-regression.html#fig:f-spss-linreg">8.7</a>. Note that although these values are first used here
+to illustrate the interpretation of the <em>population</em> parameters in the
+model, they are of course only estimates (of a kind explained in the
+next section) of those parameters. Other parts of the SPSS output will
+be explained later in this chapter.</p>
+<div class="figure"><span style="display:block;" id="fig:f-spss-linreg"></span>
+<img src="spsslinreg.png" alt="SPSS output for a simple linear regression model for Infant mortality rate given School enrolment in the Global Civil Society data." style="width:15.5cm" />
+<p class="caption">Figure 8.7: SPSS output for a simple linear regression model for Infant mortality rate given School enrolment in the Global Civil Society data.</p>
+</div>
+<p>According to the model, the conditional mean (also often known as the
+conditional <strong>expected value</strong>) of <span class="math inline">\(Y\)</span> given <span class="math inline">\(X\)</span> in the population is
+(dropping the subscript <span class="math inline">\(i\)</span> for now for notational simplicity)
+<span class="math inline">\(\mu=\alpha+\beta X\)</span>. The two parameters <span class="math inline">\(\alpha\)</span> and <span class="math inline">\(\beta\)</span> in this
+formula are known as <strong>regression coefficients</strong>. They are interpreted
+as follows:</p>
+<ul>
+<li><p><span class="math inline">\(\alpha\)</span> is the expected value of <span class="math inline">\(Y\)</span> when <span class="math inline">\(X\)</span> is equal to 0. It is
+known as the <strong>intercept</strong> or <strong>constant</strong> term of the model.</p></li>
+<li><p><span class="math inline">\(\beta\)</span> is the change in the expected value of <span class="math inline">\(Y\)</span> when <span class="math inline">\(X\)</span>
+increases by 1 unit. It is known as the <strong>slope</strong> term or the
+<strong>coefficient of</strong> <span class="math inline">\(X\)</span>.</p></li>
+</ul>
+<p>Just to include one mathematical proof in this coursepack, these results
+can be derived as follows:</p>
+<ul>
+<li><p>When <span class="math inline">\(X=0\)</span>, the mean of <span class="math inline">\(Y\)</span> is
+<span class="math inline">\(\mu=\alpha+\beta X=\alpha+\beta\times 0 =\alpha+0=\alpha\)</span>.</p></li>
+<li><p>Compare two observations, one with value <span class="math inline">\(X\)</span> of the explanatory
+variable, and the other with one unit more, i.e. <span class="math inline">\(X+1\)</span>. The
+corresponding means of <span class="math inline">\(Y\)</span> are</p>
+<table>
+<tbody>
+<tr class="odd">
+<td align="right">with <span class="math inline">\(X+1\)</span>:</td>
+<td align="left"><span class="math inline">\(\mu\)</span></td>
+<td align="left"><span class="math inline">\(=\alpha+\beta\times (X+1)\)</span></td>
+<td align="left"><span class="math inline">\(=\alpha+\beta X +\beta\)</span></td>
+</tr>
+<tr class="even">
+<td align="right">with <span class="math inline">\(X\)</span>:</td>
+<td align="left"><span class="math inline">\(\mu\)</span></td>
+<td align="left"></td>
+<td align="left"><span class="math inline">\(=\alpha+\beta X\)</span></td>
+</tr>
+<tr class="odd">
+<td align="right">Difference:</td>
+<td align="left"></td>
+<td align="left"></td>
+<td align="left"><span class="math inline">\(\beta\)</span></td>
+</tr>
+</tbody>
+</table></li>
+</ul>
+<p>which completes the proof of the claims above — Q.E.D. In case you
+prefer a graphical summary, this is given in Figure
+<a href="c-regression.html#fig:f-linmod-params">8.8</a>.</p>
+<div class="figure"><span style="display:block;" id="fig:f-linmod-params"></span>
+<img src="lmparams.png" alt="Illustration of the interpretation of the regression coefficients of a simple linear regression model." style="width:12.5cm" />
+<p class="caption">Figure 8.8: Illustration of the interpretation of the regression coefficients of a simple linear regression model.</p>
+</div>
+<p>The most important parameter of the model, and usually the only one
+really discussed in interpreting the results, is <span class="math inline">\(\beta\)</span>, the regression
+coefficient of <span class="math inline">\(X\)</span>. It is also called the slope because it is literally
+the slope of the regression line, as shown in Figure
+<a href="c-regression.html#fig:f-linmod-params">8.8</a>. It is the only parameter in the model which
+describes the association between <span class="math inline">\(X\)</span> and <span class="math inline">\(Y\)</span>, and it does so in the
+above terms of expected changes in <span class="math inline">\(Y\)</span> corresponding to changes in X
+(<span class="math inline">\(\beta\)</span> is also related to the correlation between <span class="math inline">\(X\)</span> and <span class="math inline">\(Y\)</span>, in a
+way explained in the next section). The sign of <span class="math inline">\(\beta\)</span> indicates the
+direction of the association. When <span class="math inline">\(\beta\)</span> is positive (greater than 0),
+the regression line slopes upwards and increasing <span class="math inline">\(X\)</span> thus also
+increases the expected value of <span class="math inline">\(Y\)</span> — in other words, the association
+between <span class="math inline">\(X\)</span> and <span class="math inline">\(Y\)</span> is positive. This is the case illustrated in Figure
+<a href="c-regression.html#fig:f-linmod-params">8.8</a>. If <span class="math inline">\(\beta\)</span> is negative, the regression line
+slopes downwards and the association is also negative. Finally, if
+<span class="math inline">\(\beta\)</span> is zero, the line is parallel with the <span class="math inline">\(X\)</span>-axis, so that
+changing <span class="math inline">\(X\)</span> does not change the expected value of <span class="math inline">\(Y\)</span>. Thus <span class="math inline">\(\beta=0\)</span>
+corresponds to no (linear) association between <span class="math inline">\(X\)</span> and <span class="math inline">\(Y\)</span>.</p>
+<p>In the real example shown in Figure <a href="c-regression.html#fig:f-spss-linreg">8.7</a>, <span class="math inline">\(X\)</span> is School
+enrolment and <span class="math inline">\(Y\)</span> is IMR. In SPSS output, the estimated regression
+coefficients are given in the “<strong>Coefficients</strong>” table in the column
+labelled “B” under “Unstandardized coefficients”. The estimated constant
+term <span class="math inline">\(\alpha\)</span> is given in the row labelled “(Constant)”, and the slope
+term on the next row, labelled with the name or label of the explanatory
+variable as specified in the SPSS data file — here “Net primary school
+enrolment ratio 2000-2001 (%)”. The value of the intercept is here
+19.736 and the slope coefficient is <span class="math inline">\(-0.179\)</span>. The estimated regression
+line for expected IMR is thus <span class="math inline">\(19.736-0.179 X\)</span>, where <span class="math inline">\(X\)</span> denotes School
+enrolment. This is the line shown in Figure <a href="c-regression.html#fig:f-imr1">8.6</a>.</p>
+<p>Because the slope coefficient in the example is negative, the
+association between the variables is also negative, i.e. higher levels
+of school enrolment are associated with lower levels of infant
+mortality. More specifically, every increase of one unit (here one
+percentage point) in School enrolment is associated with a decrease of
+0.179 units (here percentage points) in expected IMR.</p>
+<p>Since the meaning of <span class="math inline">\(\beta\)</span> is related to a unit increase of the
+explanatory variable, the interpretation of its magnitude depends on
+what those units are. In many cases one unit of <span class="math inline">\(X\)</span> is too small or too
+large for convenient interpretation. For example, a change of one
+percentage point in School enrolment is rather small, given that the
+range of this variable in our data is 79 percentage points (c.f. Table
+<a href="c-regression.html#tab:t-imrvars">8.2</a>). In such cases the results can easily be reexpressed by
+using multiples of <span class="math inline">\(\beta\)</span>: specifically, the effect on expected value
+of <span class="math inline">\(Y\)</span> of changing <span class="math inline">\(X\)</span> by <span class="math inline">\(A\)</span> units is obtained by multiplying <span class="math inline">\(\beta\)</span>
+by <span class="math inline">\(A\)</span>. For instance, in our example the estimated effect of increasing
+School enrolment by 10 percentage points is to decrease expected IMR by
+<span class="math inline">\(10\times 0.179=1.79\)</span> percentage points.</p>
+<p>The constant term <span class="math inline">\(\alpha\)</span> is a necessary part of the model, but it is
+almost never of interest in itself. This is because the expected value
+of <span class="math inline">\(Y\)</span> at <span class="math inline">\(X=0\)</span> is rarely specifically interesting. Very often <span class="math inline">\(X=0\)</span> is
+also unrealistic, as in our example where it corresponds to a country
+with zero primary school enrolment. There are fortunately no such
+countries in the data, where the lowest School enrolment is 30%. It is
+then of no interest to discuss expected IMR for a hypothetical country
+where no children went to school. Doing so would also represent
+unwarranted <em>extrapolation</em> of the model beyond the range of the
+observed data. Even though the estimated linear model seems to fit
+reasonably well for these data, this is no guarantee that it would do so
+also for countries with much lower school enrolment, even if they
+existed.</p>
+<p>The third parameter of the simple regression model is <span class="math inline">\(\sigma^{2}\)</span>. This
+is the variance of the conditional distribution of <span class="math inline">\(Y\)</span> given <span class="math inline">\(X\)</span>. It is
+also known as the <strong>conditional variance</strong> of <span class="math inline">\(Y\)</span>, the <strong>error
+variance</strong> or the <strong>residual variance</strong>. Similarly, its square root
+<span class="math inline">\(\sigma\)</span> is known as the conditional, error or <strong>residual standard
+deviation</strong>. To understand <span class="math inline">\(\sigma\)</span>, let us consider a single value of
+<span class="math inline">\(X\)</span>, such as one corresponding to one of the vertical dashed lines in
+Figure <a href="c-regression.html#fig:f-linmod-params">8.8</a> or, say, school enrolment of 85 in Figure
+<a href="c-regression.html#fig:f-imr1">8.6</a>. The model specifies a distribution for <span class="math inline">\(Y\)</span> given any such
+value of <span class="math inline">\(X\)</span>. If we were to (hypothetically) collect a large number of
+observations, all with this same value of <span class="math inline">\(X\)</span>, the distribution of <span class="math inline">\(Y\)</span>
+for them would describe the conditional distribution of <span class="math inline">\(Y\)</span> given that
+value of <span class="math inline">\(X\)</span>. The model states that the average of these values,
+i.e. the conditional mean of <span class="math inline">\(Y\)</span>, is <span class="math inline">\(\alpha+\beta X\)</span>, which is the
+point on the regression line corresponding to <span class="math inline">\(X\)</span>. The individual values
+of <span class="math inline">\(Y\)</span>, however, would of course not all be on the line but somewhere
+around it, some above and some below.</p>
+<p>The linear regression model further specifies that the form of the
+conditional distribution of <span class="math inline">\(Y\)</span> is approximately normal. You can try to
+visualise this by imagining a normal probability curve (c.f. Figure
+<a href="c-contd.html#fig:f-norm1">6.5</a>) on the vertical line from <span class="math inline">\(X\)</span>, centered on the regression
+line and sticking up from the page. The bell shape of the curve
+indicates that most of the values of <span class="math inline">\(Y\)</span> for a given <span class="math inline">\(X\)</span> will be close
+to the regression line, and only small proportions of them far from it.
+The residual standard deviation <span class="math inline">\(\sigma\)</span> is the standard deviation of
+this conditional normal distribution, in essence describing how tightly
+concentrated values of <span class="math inline">\(Y\)</span> tend to be around the regression line. The
+model assumes, mainly for simplicity, that the same value of <span class="math inline">\(\sigma\)</span>
+applies to the conditional distributions at all values of <span class="math inline">\(X\)</span>; this is
+known as the assumption of <em>homoscedasticity</em>.</p>
+<p>In SPSS output, an estimate of <span class="math inline">\(\sigma\)</span> is given in the “<strong>Model
+Summary</strong>” table under the misleading label “Std. Error of the
+Estimate”. An estimate of the residual variance <span class="math inline">\(\sigma^{2}\)</span> is found
+also in the “<strong>ANOVA</strong>” table under “Mean Square” for “Residual”. In our
+example the estimate of <span class="math inline">\(\sigma\)</span> is 2.6173 (and that of <span class="math inline">\(\sigma^{2}\)</span> is
+6.85). This is usually not of direct interest for interpretation, but it
+will be a necessary component of some parts of the analysis discussed
+below.</p>
+</div>
+<div id="ss-regression-simple-est" class="section level3 hasAnchor">
+<h3><span class="header-section-number">8.3.4</span> Estimation of the parameters<a href="c-regression.html#ss-regression-simple-est" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>Since the regression coefficients <span class="math inline">\(\alpha\)</span> and <span class="math inline">\(\beta\)</span> and the residual
+standard deviation <span class="math inline">\(\sigma\)</span> are unknown population parameters, we will
+need to use the observed data to obtain sensible estimates for them. How
+to do so is now less obvious than in the cases of simple means and
+proportions considered before. This section explains the standard method
+of estimation for the parameters of linear regression models.</p>
+<p>We will denote estimates of <span class="math inline">\(\alpha\)</span> and <span class="math inline">\(\beta\)</span> by <span class="math inline">\(\hat{\alpha}\)</span> and
+<span class="math inline">\(\hat{\beta}\)</span> (“alpha-hat” and “beta-hat”) respectively (other notations
+are also often used, e.g. <span class="math inline">\(a\)</span> and <span class="math inline">\(b\)</span>). Similarly, we can define
+<span class="math display">\[\hat{Y}=\hat{\alpha}+\hat{\beta} X\]</span> for <span class="math inline">\(Y\)</span> given any value of <span class="math inline">\(X\)</span>.
+These are the values on the estimated regression line. They are known as
+<strong>fitted values</strong> for <span class="math inline">\(Y\)</span>, and estimating the parameters of the
+regression model is often referred to as “fitting the model” to the
+observed data. The fitted values represent our predictions of expected
+values of <span class="math inline">\(Y\)</span> given <span class="math inline">\(X\)</span>, so they are also known as <strong>predicted values</strong>
+of <span class="math inline">\(Y\)</span>.</p>
+<p>In particular, fitted values <span class="math inline">\(\hat{Y}_{i}=\hat{\alpha}+\hat{\beta}X_{i}\)</span>
+can be calculated at the values <span class="math inline">\(X_{i}\)</span> of the explanatory variable <span class="math inline">\(X\)</span>
+for each unit <span class="math inline">\(i\)</span> in the observed sample. These can then be compared to
+the correponding values <span class="math inline">\(Y_{i}\)</span> of the response variable. Their
+differences <span class="math inline">\(Y_{i}-\hat{Y}_{i}\)</span> are known as the (sample) <strong>residuals</strong>.
+These quantities are illustrated in Figure <a href="c-regression.html#fig:f-residuals">8.9</a>. This shows
+a fitted regression line, which is in fact the one for IMR given School
+enrolment also shown in Figure <a href="c-regression.html#fig:f-imr1">8.6</a>. Also shown are two points
+<span class="math inline">\((X_{i}, Y_{i})\)</span>. These are also from Figure <a href="c-regression.html#fig:f-imr1">8.6</a>; the rest have
+been omitted to simplify the plot. The point further to the left is the
+one for Mali, which has School enrolment <span class="math inline">\(X_{i}=43.0\)</span> and IMR
+<span class="math inline">\(Y_{i}=14.1\)</span>. Using the estimated coefficients <span class="math inline">\(\hat{\alpha}=19.736\)</span> and
+<span class="math inline">\(\hat{\beta}=-0.179\)</span> in Figure <a href="c-regression.html#fig:f-spss-linreg">8.7</a>, the fitted value for
+Mali is <span class="math inline">\(\hat{Y}_{i}=19.736-0.179\times 43.0=12.0\)</span>. Their difference is
+the residual <span class="math inline">\(Y_{i}-\hat{Y}_{i}=14.1-12.0=2.1\)</span>. Because the observed
+value is here larger than the fitted value, the residual is positive and
+the observed value is above the fitted line, as shown in Figure
+<a href="c-regression.html#fig:f-residuals">8.9</a>.</p>
+<div class="figure"><span style="display:block;" id="fig:f-residuals"></span>
+<img src="lmresids.png" alt="Illustration of the quantities involved in the definitions of least squares estimates and the coefficient of determination R^{2}. See the text for explanation." style="width:13.5cm" />
+<p class="caption">Figure 8.9: Illustration of the quantities involved in the definitions of least squares estimates and the coefficient of determination <span class="math inline">\(R^{2}\)</span>. See the text for explanation.</p>
+</div>
+<!--
+$Y_{i}-\hat{Y}_{i}$
+
+$Y_{i}-\hat{Y}_{i}$
+
+$\bar{Y}$
+
+$\hat{Y}_{i}-\bar{Y}$
+
+$Y_{i}-\bar{Y}$
+
+$\hat{Y}=\hat{\alpha}+\hat{\beta} X$
+-->
+<p>The second point shown in Figure <a href="c-regression.html#fig:f-residuals">8.9</a> corresponds to the
+observation for Ghana, for which <span class="math inline">\(X_{i}=58.0\)</span> and <span class="math inline">\(Y_{i}=5.7\)</span>. The
+fitted value is then <span class="math inline">\(\hat{Y}_{i}=19.736-0.179\times 58.0=9.4\)</span> and the
+residual <span class="math inline">\(Y_{i}-\hat{Y}_{i}=5.7-9.4=-3.7\)</span>. Because the observed value is
+now smaller than the fitted value, the residual is negative and the
+observed <span class="math inline">\(Y_{i}\)</span> is below the fitted regression line.</p>
+<p>So far we have still not explained how the specific values of the
+parameter estimates in Figure <a href="c-regression.html#fig:f-spss-linreg">8.7</a> were obtained. In
+doing so, we are faced with the task of identifying a regression line
+which provides the best fit to the observed points in a scatterplot like
+Figure <a href="c-regression.html#fig:f-imr1">8.6</a>. Each possible choice of <span class="math inline">\(\hat{\alpha}\)</span> and
+<span class="math inline">\(\hat{\beta}\)</span> corresponds to a different regression line, and some
+choices are clearly better than others. For example, it seems
+intuitively obvious that it would be better for the line to go through
+the cloud of points rather than stay completely outside it. To make such
+considerations explicit, the residuals can be used as a criterion of
+model fit. The aim will then be to make the total magnitude of the
+residuals as small as possible, so that the fitted line is as close as
+possible to the observed points <span class="math inline">\(Y_{i}\)</span> in some overall sense. This
+cannot be done simply by adding up the residuals, because they can have
+different signs, and positive and negative residuals could thus cancel
+out each other in the addition. As often before, the way around this is
+to remove the signs by considering the squares of the residuals. Summing
+these over all units <span class="math inline">\(i\)</span> in the sample leads to the sum of squared
+residuals <span class="math display">\[SSE = \sum (Y_{i}-\hat{Y}_{i})^{2}.\]</span> Here <span class="math inline">\(SSE\)</span> is short
+for Sum of Squares of Errors (it is also often called the Residual Sum
+of Squares or <span class="math inline">\(RSS\)</span>). This is the quantity used as the criterion in
+estimating regression coefficients for a linear model. Different
+candidate values for <span class="math inline">\(\hat{\alpha}\)</span> and <span class="math inline">\(\hat{\beta}\)</span> lead to different
+values of <span class="math inline">\(\hat{Y}_{i}\)</span> and thus of <span class="math inline">\(SSE\)</span>. The final estimates are the
+ones which give the smallest value of <span class="math inline">\(SSE\)</span>. Their formulas are
+<span class="math display" id="eq:ols-b">\[\begin{equation}
+\hat{\beta}=\frac{\sum (X_{i}-\bar{X})(Y_{i}-\bar{Y})}{\sum (X_{i}-\bar{X})^{2}}=\frac{s_{xy}}{s_{x}^{2}}
+\tag{8.5}
+\end{equation}\]</span>
+and
+<span class="math display" id="eq:ols-a">\[\begin{equation}
+\hat{\alpha}=\bar{Y}-\hat{\beta}\bar{X}
+\tag{8.6}
+\end{equation}\]</span>
+where <span class="math inline">\(\bar{Y}\)</span>, <span class="math inline">\(\bar{X}\)</span>, <span class="math inline">\(s_{x}\)</span> and <span class="math inline">\(s_{xy}\)</span> are the
+usual sample means, standard deviations and covariances for <span class="math inline">\(Y\)</span> and <span class="math inline">\(X\)</span>.
+These are known as the <strong>least squares estimates</strong> of the regression
+coefficients (or as Ordinary Least Squares or OLS estimates), and the
+reasoning used to obtain them is the <strong>method of least squares</strong>.<a href="#fn49" class="footnote-ref" id="fnref49"><sup>49</sup></a>
+Least squares estimates are almost always used for linear regression
+models, and they are the ones displayed by SPSS and other software. For
+our model for IMR given School enrolment, the estimates are the
+<span class="math inline">\(\hat{\alpha}=19.736\)</span> and <span class="math inline">\(\hat{\beta}=-0.179\)</span> shown in Figure
+<a href="c-regression.html#fig:f-spss-linreg">8.7</a>.</p>
+<p>The estimated coefficients can be used to calculate predicted values for
+<span class="math inline">\(Y\)</span> at any values of <span class="math inline">\(X\)</span>, not just those included in the observed
+sample. For instance, in the infant mortality example the predicted IMR
+for a country with School enrolment of 80% would be
+<span class="math inline">\(\hat{Y}=19.736-0.179\times 80=5.4\)</span>. Such predictions should usually be
+limited to the range of values of <span class="math inline">\(X\)</span> actually observed in the data, and
+extrapolation beyond these values should be avoided.</p>
+<p>The most common estimate of the remaining parameter of the model, the
+residual standard deviation <span class="math inline">\(\sigma\)</span>, is
+<span class="math display" id="eq:sigma-linreg">\[\begin{equation}
+\hat{\sigma}=\sqrt{\frac{\sum \left ( Y_{i}-\hat{Y}_{i}\right ) ^{2}}{n- \left ( k+1 \right )}}=\sqrt{\frac{SSE}{n- \left ( k+1 \right )}}
+\tag{8.7}
+\end{equation}\]</span>
+where <span class="math inline">\(k\)</span> is here set equal to 1. This bears an
+obvious resemblance to the formula for the basic sample standard
+deviation, shown for <span class="math inline">\(Y_{i}\)</span> in (<a href="c-regression.html#eq:sdyx">(8.1)</a>). One difference to that
+formula is that the denominator of (<a href="c-regression.html#eq:sigma-linreg">(8.7)</a>) is shown as
+<span class="math inline">\(n-(k+1)\)</span> rather than <span class="math inline">\(n-1\)</span>. Here <span class="math inline">\(k=1\)</span> is the number of explanatory
+variables in the model, and <span class="math inline">\(k+1=2\)</span> is the number of regression
+coefficients (<span class="math inline">\(\alpha\)</span> and <span class="math inline">\(\beta\)</span>) including the constant term
+<span class="math inline">\(\alpha\)</span>. The quantity <span class="math inline">\(n-(k+1)\)</span>, i.e. here <span class="math inline">\(n-2\)</span>, is the <strong>degrees of
+freedom</strong> (<span class="math inline">\(df\)</span>) of the parameter estimates. We will need it again in
+the next section. It is here given in the general form involving the
+symbol <span class="math inline">\(k\)</span>, so that we can later refer to the same formula for models
+with more explanatory variables and thus <span class="math inline">\(k\)</span> greater than 1. In SPSS
+output, the degrees of freedom are shown in the “<strong>ANOVA</strong>” table under
+“df” for “Residual”. In the infant mortality example <span class="math inline">\(n=111\)</span>, <span class="math inline">\(k=1\)</span> and
+<span class="math inline">\(df=111-2=109\)</span>, as shown in Figure <a href="c-regression.html#fig:f-spss-linreg">8.7</a>.</p>
+<p>Finally, two connections between previous topics and the parameters
+<span class="math inline">\(\hat{\alpha}\)</span>, <span class="math inline">\(\hat{\beta}\)</span> and <span class="math inline">\(\hat{\sigma}\)</span> are worth highlighting:</p>
+<ul>
+<li><p>The estimated slope <span class="math inline">\(\hat{\beta}\)</span> from (<a href="c-regression.html#eq:ols-b">(8.5)</a>) is related to
+the sample correlation <span class="math inline">\(r\)</span> from (<a href="c-regression.html#eq:corr">(8.3)</a>) by
+<span class="math inline">\(r=(s_{x}/s_{y})\,\hat{\beta}\)</span>. In both of these it is <span class="math inline">\(\hat{\beta}\)</span>
+which carries information about the association between <span class="math inline">\(X\)</span> and <span class="math inline">\(Y\)</span>.
+The ratio <span class="math inline">\(s_{x}/s_{y}\)</span> serves only to standardise the correlation
+coefficient so that it is always between <span class="math inline">\(-1\)</span> and <span class="math inline">\(+1\)</span>. The slope
+coefficient <span class="math inline">\(\hat{\beta}\)</span> is not standardised, and the
+interpretation of its magnitude depends on the units of measurement
+of <span class="math inline">\(X\)</span> and <span class="math inline">\(Y\)</span> in the way defined in Section
+<a href="c-regression.html#ss-regression-simple-int">8.3.3</a>.</p></li>
+<li><p>Suppose we simplify the simple linear regression model
+(<a href="c-regression.html#eq:slinmodel">(8.4)</a>) further by setting <span class="math inline">\(\beta=0\)</span>, thus removing
+<span class="math inline">\(\beta X\)</span> from the model. The new model states that all <span class="math inline">\(Y_{i}\)</span> are
+normally distributed with the same mean <span class="math inline">\(\alpha\)</span> and standard
+deviation <span class="math inline">\(\sigma\)</span>. Apart from the purely notational difference of
+using <span class="math inline">\(\alpha\)</span> instead of <span class="math inline">\(\mu\)</span>, this is exactly the single-sample
+model considered in Section <a href="c-means.html#s-means-1sample">7.4</a>. Using the methods
+of this section to obtain estimates of the two parameters of this
+model also leads to exactly the same results as before. The least
+squares estimate of <span class="math inline">\(\alpha\)</span> is then <span class="math inline">\(\hat{\alpha}=\bar{Y}\)</span>,
+obtained by setting <span class="math inline">\(\hat{\beta}=0\)</span> in (<a href="c-regression.html#eq:ols-a">(8.6)</a>). Since there is
+no <span class="math inline">\(\hat{\beta}\)</span> in this case, <span class="math inline">\(\hat{Y}_{i}=\bar{Y}\)</span> for all
+observations, <span class="math inline">\(k=0\)</span> and <span class="math inline">\(df=n-(k+1)=n-1\)</span>. Substituting these into
+(<a href="c-regression.html#eq:sigma-linreg">(8.7)</a>) shows that <span class="math inline">\(\hat{\sigma}\)</span> is then equal to the
+usual sample standard deviation <span class="math inline">\(s_{y}\)</span> of <span class="math inline">\(Y_{i}\)</span>.</p></li>
+</ul>
+<div id="coefficient-of-determination-r2" class="section level4 unnumbered hasAnchor">
+<h4>Coefficient of determination (<span class="math inline">\(R^{2}\)</span>)<a href="c-regression.html#coefficient-of-determination-r2" class="anchor-section" aria-label="Anchor link to header"></a></h4>
+<!--{#p-R2}-->
+<p>The <strong>coefficient of determination</strong>, more commonly known as
+<span class="math inline">\(\mathbf{R^{2}}\)</span> (“R-squared”), is a measure of association very often
+used to describe the results of linear regression models. It is based on
+the same idea of sums of squared errors as least squares estimation, and
+on comparison of them between two models for <span class="math inline">\(Y\)</span>. The first of these
+models is the very simple one where the explanatory variable <span class="math inline">\(X\)</span> is not
+included at all. As discussed above, the estimate of the expected value
+of <span class="math inline">\(Y\)</span> is then the sample mean <span class="math inline">\(\bar{Y}\)</span>. This is the best prediction of
+<span class="math inline">\(Y\)</span> we can make, if the same predicted value is to be used for all
+observations. The error in the prediction of each value <span class="math inline">\(Y_{i}\)</span> in the
+observed data is then <span class="math inline">\(Y_{i}-\bar{Y}\)</span> (c.f. Figure <a href="c-regression.html#fig:f-residuals">8.9</a> for
+an illustration of this for one observation). The sum of squares of
+these errors is <span class="math inline">\(TSS=\sum (Y_{i}-\bar{Y})^{2}\)</span>, where <span class="math inline">\(TSS\)</span> is short for
+“Total Sum of Squares”. This can also be regarded as a measure of the
+<strong>total variation</strong> in <span class="math inline">\(Y_{i}\)</span> in the sample (note that <span class="math inline">\(TSS/(n-1)\)</span> is
+the usual sample variance <span class="math inline">\(s^{2}_{y}\)</span>).</p>
+<p>When an explanatory variable <span class="math inline">\(X\)</span> is included in the model, the predicted
+value for each <span class="math inline">\(Y_{i}\)</span> is <span class="math inline">\(\hat{Y}_{i}=\hat{\alpha}+\hat{\beta}X_{i}\)</span>,
+the error in this prediction is <span class="math inline">\(Y_{i}-\hat{Y}_{i}\)</span>, and the error sum
+of squares is <span class="math inline">\(SSE=\sum (Y_{i}-\hat{Y}_{i})^{2}\)</span>. The two sums of
+squares are related by
+<span class="math display" id="eq:ss-decomp">\[\begin{equation}
+\sum (Y_{i}-\bar{Y})^{2} =\sum (Y_{i}-\hat{Y}_{i})^{2} +\sum(\hat{Y}_{i}-\bar{Y})^{2}.
+\tag{8.8}
+\end{equation}\]</span>
+Here <span class="math inline">\(SSM=\sum (\hat{Y}_{i}-\bar{Y})^{2}=TSS-SSE\)</span> is
+the “Model sum of squares”. It is the reduction in squared prediction
+errors achieved when we make use of <span class="math inline">\(X_{i}\)</span> to predict values of <span class="math inline">\(Y_{i}\)</span>
+with the regression model, instead of predicting <span class="math inline">\(\bar{Y}\)</span> for all
+observations. In slightly informal language, <span class="math inline">\(SSM\)</span> is the part of the
+total variation <span class="math inline">\(TSS\)</span> “explained” by the fitted regression model. In
+this language, (<a href="c-regression.html#eq:ss-decomp">(8.8)</a>) can be stated as</p>
+<table style="width:98%;">
+<colgroup>
+<col width="29%" />
+<col width="7%" />
+<col width="25%" />
+<col width="7%" />
+<col width="28%" />
+</colgroup>
+<tbody>
+<tr class="odd">
+<td align="center">Total variation of <span class="math inline">\(Y\)</span></td>
+<td align="center">=</td>
+<td align="center">Variation explained
+by regression</td>
+<td align="center">+</td>
+<td align="center">Unexplained variation</td>
+</tr>
+<tr class="even">
+<td align="center"><span class="math inline">\(TSS\)</span></td>
+<td align="center"><span class="math inline">\(=\)</span></td>
+<td align="center"><span class="math inline">\(SSM\)</span></td>
+<td align="center"><span class="math inline">\(+\)</span></td>
+<td align="center"><span class="math inline">\(SSE\)</span></td>
+</tr>
+</tbody>
+</table>
+<p>The <span class="math inline">\(R^{2}\)</span> statistic is defined as
+<span class="math display" id="eq:R2">\[\begin{equation}
+R^{2}= \frac{TSS-SSE}{TSS} = 1-\frac{SSE}{TSS}=1-\frac{\sum (Y_{i}-\hat{Y}_{i})^{2}}{\sum (Y_{i}-\bar{Y})^{2}}.
+\tag{8.9}
+\end{equation}\]</span>
+This is the <em>proportion</em> of the total variation of <span class="math inline">\(Y\)</span> in
+the sample explained by the fitted regression model. Its smallest
+possible value is 0, which is obtained when <span class="math inline">\(\hat{\beta}=0\)</span>, so that <span class="math inline">\(X\)</span>
+and <span class="math inline">\(Y\)</span> are completely unassociated, <span class="math inline">\(X\)</span> provides no help for predicting
+<span class="math inline">\(Y\)</span>, and thus <span class="math inline">\(SSE=TSS\)</span>. The largest possible value of <span class="math inline">\(R^{2}\)</span> is 1,
+obtained when <span class="math inline">\(\hat{\sigma}=0\)</span>, so that the observed <span class="math inline">\(Y\)</span> can be
+predicted perfectly from the corresponding <span class="math inline">\(X\)</span> and thus <span class="math inline">\(SSE=0\)</span>. More
+generally, <span class="math inline">\(R^{2}\)</span> is somewhere between 0 and 1, with large values
+indicating strong linear association between <span class="math inline">\(X\)</span> and <span class="math inline">\(Y\)</span>.</p>
+<p><span class="math inline">\(R^{2}\)</span> is clearly a Proportional Reduction of Error (PRE) measure of
+association of the kind discussed in Section <a href="c-descr1.html#ss-descr1-2cat-gamma">2.4.5</a>,
+with <span class="math inline">\(E_{1}=TSS\)</span> and <span class="math inline">\(E_{2}=SSE\)</span> in the notation of equation for the PRE measure
+of association in Section <a href="c-descr1.html#ss-descr1-2cat-gamma">2.4.5</a>. It is also related to the correlation coefficient. In
+simple linear regression, <span class="math inline">\(R^{2}\)</span> is the square of the correlation <span class="math inline">\(r\)</span>
+between <span class="math inline">\(X_{i}\)</span> and <span class="math inline">\(Y_{i}\)</span>. Furthermore, the square root of <span class="math inline">\(R^{2}\)</span> is
+the correlation between <span class="math inline">\(Y_{i}\)</span> and the fitted values <span class="math inline">\(\hat{Y}_{i}\)</span>.
+This quantity, known as the <strong>multiple correlation coefficient</strong> and
+typically denoted <span class="math inline">\(R\)</span>, is always between 0 and 1. It is equal to the
+correlation <span class="math inline">\(r\)</span> between <span class="math inline">\(X_{i}\)</span> and <span class="math inline">\(Y_{i}\)</span> when <span class="math inline">\(r\)</span> is positive, and
+the absolute value (removing the <span class="math inline">\(-\)</span> sign) of <span class="math inline">\(r\)</span> when <span class="math inline">\(r\)</span> is negative.
+For example, for our infant mortality model <span class="math inline">\(r=-0.753\)</span>,
+<span class="math inline">\(R^{2}=r^{2}=0.567\)</span> and <span class="math inline">\(R=\sqrt{R^{2}}=0.753\)</span>.</p>
+<p>In SPSS output, the “<strong>ANOVA</strong>” table shows the model, error and total
+sums of squares <span class="math inline">\(SSM\)</span>, <span class="math inline">\(SSE\)</span> and <span class="math inline">\(TSS\)</span> in the “Sum of Squares column”,
+on the “Regression”, “Residual” and “Total” rows respectively. <span class="math inline">\(R^{2}\)</span>
+is shown in “<strong>Model summary</strong>” under “R Square” and multiple
+correlation <span class="math inline">\(R\)</span> next to it as “R”. Figure <a href="c-regression.html#fig:f-spss-linreg">8.7</a> shows
+these results for the model for IMR given School enrolment. Here
+<span class="math inline">\(R^{2}=0.567\)</span>. Using each country’s level of school enrolment to predict
+its IMR thus reduces the prediction errors by 56.7% compared to the
+situation where the predicted IMR is the overall sample mean (here 4.34)
+for every country. Another conventional way of describing this <span class="math inline">\(R^{2}\)</span>
+result is to say that the variation in rates of School enrolment
+explains 56.7% of the observed variation in Infant mortality rates.</p>
+<p><span class="math inline">\(R^{2}\)</span> is a useful statistic with a convenient interpretation. However,
+its importance should not be exaggerated. <span class="math inline">\(R^{2}\)</span> is rarely the only or
+the most important part of the model results. This may be the case if
+the regression model is fitted solely for the purpose of <em>predicting</em>
+future observations of the response variable. More often, however, we
+are at least or more interested in examining the nature and strength of
+the associations between the response variable and the explanatory
+variable (later, variables), in which case the regression coefficients
+are the main parameters of interest. This point is worth emphasising
+because in our experience many users of linear regression models tend to
+place far too much importance on <span class="math inline">\(R^{2}\)</span>, often hoping to treat it as
+the ultimate measure of the goodness of the model. We are frequently
+asked questions along the lines of “My model has <span class="math inline">\(R^{2}\)</span> of 0.42 — is
+that good?”. The answer tends to be “I have no idea” or, at best, “It
+depends”. This not a sign of ignorance, because it really does depend:</p>
+<ul>
+<li><p>Which values of <span class="math inline">\(R^{2}\)</span> are large or small or “good” is not a
+statistical question but a substantive one, to which the answer
+depends on the nature of the variables under consideration. For
+example, most associations between variables in the social sciences
+involve much unexplained variation, so their <span class="math inline">\(R^{2}\)</span> values tend to
+be smaller than for quantities in, say, physics. Similarly, even in
+social sciences models for aggregates such as countries often have
+higher values of <span class="math inline">\(R^{2}\)</span> than ones for characteristics of
+individual people. For example, the <span class="math inline">\(R^{2}=0.567\)</span> in our infant
+mortality example (let alone the <span class="math inline">\(R^{2}=0.753\)</span> we will achieve for a
+multiple linear model for IMR in Section <a href="c-regression.html#s-regression-dummies">8.6</a>)
+would be unachievably high for many types of individual-level data.</p></li>
+<li><p>In any case, achieving large <span class="math inline">\(R^{2}\)</span> is usually not the ultimate
+criterion for selecting a model, and a model can be very useful
+without having a large <span class="math inline">\(R^{2}\)</span>. The <span class="math inline">\(R^{2}\)</span> statistic reflects the
+magnitude of the variation around the fitted regression line,
+corresponding to the residual standard deviation <span class="math inline">\(\hat{\sigma}\)</span>.
+Because this is an accepted part of the model, <span class="math inline">\(R^{2}\)</span> is not a
+measure of how well the model fits: we can have a model which is
+essentially true (in that <span class="math inline">\(X\)</span> is linearly associated with <span class="math inline">\(Y\)</span>) but
+has large residual standard error and thus small <span class="math inline">\(R^{2}\)</span>.</p></li>
+</ul>
+</div>
+</div>
+<div id="ss-regression-simple-inf" class="section level3 hasAnchor">
+<h3><span class="header-section-number">8.3.5</span> Statistical inference for the regression coefficients<a href="c-regression.html#ss-regression-simple-inf" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>The only parameter of the simple linear regression model for which we
+will describe methods of statistical inference is the slope coefficient
+<span class="math inline">\(\beta\)</span>. Tests and confidence intervals for population values of the
+intercept <span class="math inline">\(\alpha\)</span> are rarely and ones about the residual standard
+deviation <span class="math inline">\(\sigma\)</span> almost never substantively interesting, so they will
+not be considered. Similarly, the only null hypothesis on <span class="math inline">\(\beta\)</span>
+discussed here is that its value is zero, i.e.
+<span class="math display" id="eq:H0betasimple">\[\begin{equation}
+H_{0}:\; \beta=0.
+\tag{8.10}
+\end{equation}\]</span>
+Recall that when <span class="math inline">\(\beta\)</span> is 0, there is no linear association between the explanatory variable <span class="math inline">\(X\)</span> and the response
+variable <span class="math inline">\(Y\)</span>. Graphically, this corresponds to a regression line in the
+population which is parallel to the <span class="math inline">\(X\)</span>-axis (see plot (d) of Figure
+<a href="c-regression.html#fig:f-scatterplots">8.4</a> for an illustration of such a line in a sample).
+The hypothesis (<a href="c-regression.html#eq:H0betasimple">(8.10)</a>) can thus be expressed in words as
+<span class="math display" id="eq:H0betasimple2">\[\begin{equation}
+H_{0}:\; \text{\emph{There is no linear association between }} X \text{\emph{and }} Y \text{ \emph{in the population}}.
+\tag{8.11}
+\end{equation}\]</span>
+Tests of this are usually carried out against a two-sided alternative hypothesis <span class="math inline">\(H_{a}: \; \beta\ne 0\)</span>, and we will also concentrate on this case.</p>
+<p>Formulation (<a href="c-regression.html#eq:H0betasimple2">(8.11)</a>) implies that the hypothesis that
+<span class="math inline">\(\beta=0\)</span> is equivalent to one that the population correlation <span class="math inline">\(\rho\)</span>
+between <span class="math inline">\(X\)</span> and <span class="math inline">\(Y\)</span> is also 0. The test statistic presented below for
+testing (<a href="c-regression.html#eq:H0betasimple">(8.10)</a>) is also identical to a common test
+statistic for <span class="math inline">\(\rho=0\)</span>. A test of <span class="math inline">\(\beta=0\)</span> can thus be interpreted also
+as a test of no correlation in the population.</p>
+<p>The tests and confidence intervals involve both the estimate
+<span class="math inline">\(\hat{\beta}\)</span> and its estimated standard error, which we will here
+denote <span class="math inline">\(\hat{\text{se}}(\hat{\beta})\)</span>.<a href="#fn50" class="footnote-ref" id="fnref50"><sup>50</sup></a> It is calculated as
+<span class="math display" id="eq:sebeta">\[\begin{equation}
+\hat{\text{se}}(\hat{\beta})=\frac{\hat{\sigma}}{\sqrt{\sum\left(X_{i}-\bar{X}\right)^{2}}}=\frac{\hat{\sigma}}{s_{x}\sqrt{n-1}}
+\tag{8.12}
+\end{equation}\]</span>
+where <span class="math inline">\(\hat{\sigma}\)</span> is the estimated residual standard
+deviation given by (<a href="c-regression.html#eq:sigma-linreg">(8.7)</a>), and <span class="math inline">\(s_{x}\)</span> is the sample
+standard deviation of <span class="math inline">\(X\)</span>. The standard error indicates the level of
+precision with which <span class="math inline">\(\hat{\beta}\)</span> estimates the population parameter
+<span class="math inline">\(\beta\)</span>. The last expression in (<a href="c-regression.html#eq:sebeta">(8.12)</a>) shows that the sample
+size <span class="math inline">\(n\)</span> appears in the denominator of the standard error formula. This
+means that the standard error becomes smaller as the sample size
+increases. In other words, the precision of estimation increases when
+the sample size increases, as with all the other estimates of population
+parameters we have considered before. In SPSS output, the estimated
+standard error is given under “Std. Error” in the “<strong>Coefficients</strong>”
+table. Figure <a href="c-regression.html#fig:f-spss-linreg">8.7</a> shows that
+<span class="math inline">\(\hat{\text{se}}(\hat{\beta})=0.015\)</span> for the estimated coefficient
+<span class="math inline">\(\hat{\beta}\)</span> of School enrolment.</p>
+<p>The test statistic for the null hypothesis (<a href="c-regression.html#eq:H0betasimple">(8.10)</a>) is once
+again of the general form (see the beginning of Section <a href="c-probs.html#ss-probs-test1sample-teststatistic">5.5.2</a>), i.e. a point estimate
+divided by its standard error. Here this gives
+<span class="math display" id="eq:tbeta">\[\begin{equation}
+t=\frac{\hat{\beta}}{\hat{\text{se}}(\hat{\beta})}.
+\tag{8.13}
+\end{equation}\]</span>
+The logic of this is the same as in previous
+applications of the same idea. Since the null hypothesis
+(<a href="c-regression.html#eq:H0betasimple">(8.10)</a>) claims that the population <span class="math inline">\(\beta\)</span> is zero, values
+of its estimate <span class="math inline">\(\hat{\beta}\)</span> far from zero will be treated as evidence
+against the null hypothesis. What counts as “far from zero” depends on
+how precisely <span class="math inline">\(\beta\)</span> is estimated from the observed data by
+<span class="math inline">\(\hat{\beta}\)</span> (i.e. how much uncertainty there is in <span class="math inline">\(\hat{\beta}\)</span>), so
+<span class="math inline">\(\hat{\beta}\)</span> is standardised by dividing by its standard error to
+obtain the test statistic.</p>
+<p>When the null hypothesis (<a href="c-regression.html#eq:H0betasimple">(8.10)</a>) is true, the sampling
+distribution of the test statistic (<a href="c-regression.html#eq:tbeta">(8.13)</a>) is a <span class="math inline">\(t\)</span> distribution
+with <span class="math inline">\(n-2\)</span> degrees of freedom (i.e. <span class="math inline">\(n-(k+1)\)</span> where <span class="math inline">\(k=1\)</span> is the number
+of explanatory variables in the model). The <span class="math inline">\(P\)</span>-value for the test
+against a two-sided alternative hypothesis <span class="math inline">\(\beta\ne 0\)</span> is then the
+probability that a value from a <span class="math inline">\(t_{n-2}\)</span> distribution is at least as
+far from zero as the value of the observed test statistic. As for the
+tests of one and two means discussed in Chapter <a href="c-means.html#c-means">7</a>, it would
+again be possible to consider a large-sample version of the test which
+relaxes the assumption that <span class="math inline">\(Y_{i}\)</span> given <span class="math inline">\(X_{i}\)</span> are normally
+distributed, and uses (thanks to the Central Limit Theorem again) the
+standard normal distribution to obtain the <span class="math inline">\(P\)</span>-value. With linear
+regression models, however, the <span class="math inline">\(t\)</span> distribution version of the test is
+usually used and included in standard computer output, so only it will
+be discussed here. The difference between <span class="math inline">\(P\)</span>-values from the <span class="math inline">\(t_{n-2}\)</span>
+and standard normal distributions is in any case minimal when the sample
+size is reasonably large (at least 30, say).</p>
+<p>In the infant mortality example shown in Figure <a href="c-regression.html#fig:f-spss-linreg">8.7</a>, the
+estimated coefficient of School enrolment is <span class="math inline">\(\hat{\beta}=-0.179\)</span>, and
+its estimated standard error is <span class="math inline">\(\hat{\text{se}}(\hat{\beta})=0.015\)</span>, so
+the test statistic is <span class="math display">\[t=\frac{-0.179}{0.015}=-11.94\]</span> (up to some
+rounding error). This is shown in the “t” column of the
+“<strong>Coefficients</strong>” table. The <span class="math inline">\(P\)</span>-value, obtained from the <span class="math inline">\(t\)</span>
+distribution with <span class="math inline">\(n-2=109\)</span> degrees of freedom, is shown in the “Sig.”
+column. Here <span class="math inline">\(P&lt;0.001\)</span>, so the null hypothesis is clearly rejected. The
+data thus provide very strong evidence that primary school enrolment is
+associated with infant mortality rate in the population.</p>
+<p>In many analyses, rejecting the null hypothesis of no association will
+be entirely unsurprising. The question of interest is then not <em>whether</em>
+there is an association in the population, but <em>how strong</em> it is. This
+question is addressed with the point estimate <span class="math inline">\(\hat{\beta}\)</span>, combined
+with a confidence interval which reflects the level of uncertainty in
+<span class="math inline">\(\hat{\beta}\)</span> as an estimate of the population parameter <span class="math inline">\(\beta\)</span>. A
+confidence interval for <span class="math inline">\(\beta\)</span> with the confidence level <span class="math inline">\(1-\alpha\)</span> is
+given by
+<span class="math display" id="eq:cibeta">\[\begin{equation}
+\hat{\beta} \pm t_{\alpha/2}^{(n-2)} \, \hat{\text{se}}(\hat{\beta})
+\tag{8.14}
+\end{equation}\]</span>
+where the multiplier <span class="math inline">\(t_{\alpha/2}^{(n-2)}\)</span> is obtained
+from the <span class="math inline">\(t_{n-2}\)</span> distribution as in previous applications of <span class="math inline">\(t\)</span>-based
+confidence intervals (c.f. the description in Section
+<a href="c-means.html#ss-means-inference-variants">7.3.4</a>). For a 95% confidence interval
+(i.e. one with <span class="math inline">\(\alpha=0.05\)</span>) in the infant mortality example, the
+multiplier is <span class="math inline">\(t_{0.025}^{(109)}=1.98\)</span>, and the endpoints of the
+interval are <span class="math display">\[-0.179-1.98\times 0.015=-0.209  \text{and}
+-0.179+1.98\times 0.015=-0.149.\]</span> These are also shown in the last two
+columns of the “<strong>Coefficients</strong>” table of SPSS output. In this example
+we are thus 95% confident that the expected change in IMR associated
+with an increase of one percentage point in School enrolment is a
+decrease of between 0.149 and 0.209 percentage points. If you are
+calculating this confidence interval by hand, it is (if the sample size
+is at least 30) again acceptable to use the multiplier 1.96 from the
+standard normal distribution instead of the <span class="math inline">\(t\)</span>-based multiplier. Here
+this would give the confidence interval <span class="math inline">\((-0.208; -0.150)\)</span>.</p>
+<p>It is often more convenient to interpret the slope coefficient in terms
+of larger or smaller increments in <span class="math inline">\(X\)</span> than one unit. As noted earlier,
+a point estimate for the effect of this is obtained by multiplying
+<span class="math inline">\(\hat{\beta}\)</span> by the appropriate constant. A confidence interval for it
+is calculated similarly, by multiplying the end points of an interval
+for <span class="math inline">\(\hat{\beta}\)</span> by the same constant. For example, the estimated
+effect of a 10-unit increase in School enrolment is <span class="math inline">\(10\times \hat{\beta}=-1.79\)</span>, and a 95% confidence interval for this is <span class="math inline">\(10\times (-0.209; -0.149)=(-2.09; -1.49)\)</span>. In other words, we are 95% confident
+that the effect is a decrease of between 2.09 and 1.49 percentage
+points.</p>
+<div style="page-break-after: always;"></div>
+</div>
+</div>
+<div id="s-regression-causality" class="section level2 hasAnchor">
+<h2><span class="header-section-number">8.4</span> Interlude: Association and causality<a href="c-regression.html#s-regression-causality" class="anchor-section" aria-label="Anchor link to header"></a></h2>
+<blockquote>
+<p>Felix, qui potuit rerum cognoscere causas,<br />
+atque metus omnis et inexorabile fatum<br />
+subiecit pedibus strepitumque Acherontis avari</p>
+<p>Blessed is he whose mind had power to probe<br />
+The causes of things and trample underfoot<br />
+All terrors and inexorable fate<br />
+And the clamour of devouring Acheron</p>
+<p>(Publius Vergilius Maro: <em>Georgica</em> (37-30 BCE), 2.490-492;<br />
+translation by L. P. Wilkinson)</p>
+</blockquote>
+<p>These verses from Virgil’s Georgics are the source of the LSE motto —
+“Rerum cognoscere causas”, or “To know the causes of things” — which you
+can see on the School’s coat of arms on the cover of this coursepack. As
+the choice of the motto suggests, questions on <em>causes</em> and <em>effects</em>
+are of great importance in social and all other sciences. Causal
+connections are the mechanisms through which we try to understand and
+predict what we observe in the world, and the most interesting and
+important research questions thus tend to involve claims about causes
+and effects.</p>
+<p>We have already discussed several examples of statistical analyses of
+<em>associations</em> between variables. Association is not the same as
+causation, as two variables can be statistically associated without
+being in any way directly causally related. Finding an association is
+thus not <em>sufficient</em> for establishing a causal link. It is, however,
+<em>necessary</em> for such a claim: if two variables are not associated, they
+will not be causally connected either. This implies that examination of
+associations must be a part of any analysis aiming to obtain conclusions
+about causal effects.</p>
+<p>Definition and analysis of causal effects are considered in more detail
+on the course MY400 and in much greater depth still on MY457. Here we
+will discuss only the following simplified empirical version of the
+question.<a href="#fn51" class="footnote-ref" id="fnref51"><sup>51</sup></a> Suppose we are considering two variables <span class="math inline">\(X\)</span> and <span class="math inline">\(Y\)</span>, and
+suspect that <span class="math inline">\(X\)</span> is a cause of <span class="math inline">\(Y\)</span>. To support such a claim, we must be
+able to show that the following three conditions are satisfied:</p>
+<ol style="list-style-type: decimal">
+<li><p>There is a statistical association between <span class="math inline">\(X\)</span> and <span class="math inline">\(Y\)</span>.</p></li>
+<li><p>An appropriate time order: <span class="math inline">\(X\)</span> comes before <span class="math inline">\(Y\)</span>.</p></li>
+<li><p>All alternative explanations for the association are ruled out.</p></li>
+</ol>
+<p>The first two conditions are relatively straightforward, at least in
+principle. Statistical associations are examined using the kinds of
+techniques covered on this course, and decisions about whether or not
+there is an association are usually made largely with the help of
+statistical inference. Note also that making <em>statistical</em> associations
+one of the conditions implies that this empirical definition of causal
+effects is not limited to <em>deterministic</em> effects, where a particular
+value of <span class="math inline">\(X\)</span> always leads to exactly the same value of <span class="math inline">\(Y\)</span>. Instead, we
+consider <em>probabilistic</em> causal effects, where changes in <span class="math inline">\(X\)</span> make
+different values of <span class="math inline">\(Y\)</span> more or less likely. This is clearly crucial in
+the social sciences, where hardly any effects are even close to
+deterministic.</p>
+<p>The second condition is trivial in many cases where <span class="math inline">\(X\)</span> must logically
+precede <span class="math inline">\(Y\)</span> in time: for example, a person’s sex is clearly determined
+before his or her income at age 20. In other cases the order is less
+obvious: for example, if we consider the relationship between political
+attitudes and readership of different newspapers, it may not be clear
+whether attitude came before choice of paper of vice versa. Clarifying
+the time order in such cases requires careful research design, often
+involving measurements taken at several different times.</p>
+<p>The really difficult condition is the third one. The list of “all
+alternative explanations” is essentially endless, and we can hardly ever
+be sure that all of them have been “ruled out”. Most of the effort and
+ingenuity in research design and analysis in a study of any causal
+hypothesis usually goes into finding reasonably convincing ways of
+eliminating even the most important alternative explanations. Here we
+will discuss only one general class of such explanations, that of
+spurious associations due to common causes of <span class="math inline">\(X\)</span> and <span class="math inline">\(Y\)</span>. Suppose that
+we observe an association, here denoted symbolically by <span class="math inline">\(X\)</span> — <span class="math inline">\(Y\)</span>, and
+would like to claim that this implies a causal connection
+<span class="math inline">\(X\longrightarrow Y\)</span>. One situation where such a claim is <em>not</em>
+justified is when both <span class="math inline">\(X\)</span> and <span class="math inline">\(Y\)</span> are caused by a third variable <span class="math inline">\(Z\)</span>,
+as in the graph in Figure <a href="c-regression.html#fig:f-xyzspurious">8.10</a>. If we here consider only
+<span class="math inline">\(X\)</span> and <span class="math inline">\(Y\)</span>, they will appear to be associated, but the connection is
+not a causal one. Instead, it is a <strong>spurious association</strong> induced by
+the dependence on both variables on the common cause <span class="math inline">\(Z\)</span>.</p>
+<div class="figure"><span style="display:block;" id="fig:f-xyzspurious"></span>
+<img src="xyzspurious.png" alt="A graphical representation of a spurious association between X and Y, explained by dependence on a common cause Z." style="width:3.7cm" />
+<p class="caption">Figure 8.10: A graphical representation of a spurious association between <span class="math inline">\(X\)</span> and <span class="math inline">\(Y\)</span>, explained by dependence on a common cause <span class="math inline">\(Z\)</span>.</p>
+</div>
+<p>To illustrate a spurious association with a silly but memorable teaching
+example, suppose that we examine a sample of house fires in London, and
+record the number of fire engines sent to each incident (<span class="math inline">\(X\)</span>) and the
+amount of damage caused by the fire (<span class="math inline">\(Y\)</span>). There will be a strong
+association between <span class="math inline">\(X\)</span> and <span class="math inline">\(Y\)</span>, with large numbers of fire engines
+associated with large amounts of damage. It is also reasonably clear
+that the number of fire engines is determined before the final extent of
+damage. The first two conditions discussed above are thus satisfied. We
+would, however, be unlikely to infer from this that the relationship is
+causal and conclude that we should try to reduce the cost of fires by
+dispatching fewer fire engines to them. This is because the association
+between the number of fire engines and the amount of damages is due to
+both of them being influenced by the size of the fire (<span class="math inline">\(Z\)</span>). Here this
+is of course obvious, but in most real research questions possible
+spurious associations are less easy to spot.</p>
+<p>How can we then rule out spurious associations due to some background
+variables <span class="math inline">\(Z\)</span>? The usual approach is to try to remove the association
+between <span class="math inline">\(X\)</span> and <span class="math inline">\(Z\)</span>. This means in effect setting up comparisons between
+units which have different values of <span class="math inline">\(X\)</span> but the same or similar values
+of <span class="math inline">\(Z\)</span>. Any differences in <span class="math inline">\(Y\)</span> can then more confidently be attributed
+to <span class="math inline">\(X\)</span>, because they cannot be due to differences in <span class="math inline">\(Z\)</span>. This approach
+is known as <strong>controlling</strong> for other variables <span class="math inline">\(Z\)</span> in examining the
+association between <span class="math inline">\(X\)</span> and <span class="math inline">\(Y\)</span>.</p>
+<p>The most powerful way of controlling for background variables is to
+conduct a <strong>randomized experiment</strong>, where the values of the explanatory
+variable <span class="math inline">\(X\)</span> can be set by the researcher, and are assigned at random to
+the units in the study. For instance, of the examples considered in
+Chapters <a href="c-probs.html#c-probs">5</a> and <a href="c-means.html#c-means">7</a>, Examples 5.3, 5.4 and 7.3 are
+randomized experiments, each with an intervention variable <span class="math inline">\(X\)</span> with two
+possible values (placebo or real vaccine, one of two forms of a survey
+question, and police officer wearing or not wearing sunglasses,
+respectively). The randomization assures that units with different
+values of <span class="math inline">\(X\)</span> are on average similar in <em>all</em> variables <span class="math inline">\(Z\)</span> which
+precede <span class="math inline">\(X\)</span> and <span class="math inline">\(Y\)</span>, thus ruling out the possibility of spurious
+associations due to such variables.</p>
+<p>Randomized experiments are for practical or ethical reasons infeasible
+in many contexts, especially in the social sciences. We may then resort
+to other, less powerful research designs which help to control for some
+background variables. This, however, is usually only partially
+successful, so we may also need methods of control applied at the
+analysis stage of the research process. This is known as <strong>statistical
+control</strong>. The aim of statistical control is to estimate and test
+associations between <span class="math inline">\(X\)</span> and <span class="math inline">\(Y\)</span> while effectively holding the values of
+some other variables <span class="math inline">\(Z\)</span> constant in the analysis. When the response
+variable is continuous, the most common way of doing this is the method
+of multiple linear regression which is described in the next section.
+When all the variables are categorical, one simple way of achieving the
+control is analysis of multiway contingency tables, which is described
+in Chapter <a href="c-3waytables.html#c-3waytables">9</a>.</p>
+</div>
+<div id="s-regression-multiple" class="section level2 hasAnchor">
+<h2><span class="header-section-number">8.5</span> Multiple linear regression models<a href="c-regression.html#s-regression-multiple" class="anchor-section" aria-label="Anchor link to header"></a></h2>
+<div id="ss-regression-multiple-intro" class="section level3 hasAnchor">
+<h3><span class="header-section-number">8.5.1</span> Introduction<a href="c-regression.html#ss-regression-multiple-intro" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>Simple linear regression becomes multiple linear regression when more
+than one explanatory variable is included in the model. How this is done
+is explained in Section <a href="c-regression.html#ss-regression-multiple-def">8.5.2</a> below. The
+definition of the model builds directly on that of the simple linear
+model, and most of the elements of the multiple model are either
+unchanged or minimally modified from the simple one. As a result, we can
+in Section <a href="c-regression.html#ss-regression-multiple-unchanged">8.5.3</a> cover much of the
+multiple linear model simply by referring back to the descriptions in
+Section <a href="c-regression.html#s-regression-simple">8.3</a>. One aspect of the model is, however,
+conceptually expanded when there are multiple explanatory variables, and
+requires a careful discussion. This is the meaning of the regression
+coefficients of the explanatory variables. The interpretation of and
+inference for these parameters are discussed in Section
+<a href="c-regression.html#ss-regression-multiple-beta">8.5.4</a>. The crucial part of this
+interpretation, and the main motivation for considering multiple
+regression models, is that it is one way of implementing the ideas of
+statistical control in analyses for continuous response variables.</p>
+<p>The concepts are be illustrated with a further example from the Global
+Civil Society data set. The response variable will still be the Infant
+mortality rate of a country, and there will be three explanatory
+variables: School enrolment, Control of corruption and Income inequality
+as measured by the Gini index (see Example 8.1). Results for
+this model are shown in Table <a href="c-regression.html#tab:t-imr-m2">8.3</a>, to which we will refer
+throughout this section. The table is also an example of the kind of
+format in which results of regression models are typically reported.
+Presenting raw computer output such as that in Figure
+<a href="c-regression.html#fig:f-spss-linreg">8.7</a> is normally not appropriate in formal research
+reports.</p>
+<table style="width:98%;">
+<caption><span id="tab:t-imr-m2">Table 8.3: </span>Response variable: Infant Mortality Rate (%). Results for a linear regression model for Infant mortality rate
+given three explanatory variables in the Global Civil Society
+data. <span class="math inline">\(\hat{\sigma}=2.23\)</span>; <span class="math inline">\(R^{2}=0.692\)</span>; <span class="math inline">\(n=111\)</span>; <span class="math inline">\(df=107\)</span></caption>
+<colgroup>
+<col width="27%" />
+<col width="15%" />
+<col width="11%" />
+<col width="8%" />
+<col width="12%" />
+<col width="21%" />
+</colgroup>
+<thead>
+<tr class="header">
+<th align="left">Explanatory
+variable</th>
+<th align="right">Coefficient</th>
+<th align="right">Standard
+error</th>
+<th align="right"><span class="math inline">\(t\)</span></th>
+<th align="right"><span class="math inline">\(P\)</span>-value</th>
+<th align="center">95 % Confidence
+interval</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="left">Constant</td>
+<td align="right">16.40</td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="center"></td>
+</tr>
+<tr class="even">
+<td align="left">School enrolment (%)</td>
+<td align="right">-0.139</td>
+<td align="right">0.014</td>
+<td align="right">-9.87</td>
+<td align="right"><span class="math inline">\(&lt;0.001\)</span></td>
+<td align="center">(-0.167; -0.111 )</td>
+</tr>
+<tr class="odd">
+<td align="left">Control of corruption</td>
+<td align="right">-0.046</td>
+<td align="right">0.008</td>
+<td align="right">-5.53</td>
+<td align="right"><span class="math inline">\(&lt;0.001\)</span></td>
+<td align="center">(-0.062; -0.029)</td>
+</tr>
+<tr class="even">
+<td align="left">Income inequality</td>
+<td align="right">0.055</td>
+<td align="right">0.022</td>
+<td align="right">2.50</td>
+<td align="right">0.014</td>
+<td align="center">(0.011; 0.098)</td>
+</tr>
+</tbody>
+</table>
+</div>
+<div id="ss-regression-multiple-def" class="section level3 hasAnchor">
+<h3><span class="header-section-number">8.5.2</span> Definition of the model<a href="c-regression.html#ss-regression-multiple-def" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>Having multiple explanatory variables requires a slight extension of
+notation. Let us denote the number of explanatory variables in the model
+by <span class="math inline">\(k\)</span>; in our example <span class="math inline">\(k=3\)</span>. Individual explanatory variables are then
+denoted with subscripts as <span class="math inline">\(X_{1}\)</span>, <span class="math inline">\(X_{2}\)</span>, …, <span class="math inline">\(X_{k}\)</span>, in the example
+for instance as <span class="math inline">\(X_{1}=\)</span> School enrolment, <span class="math inline">\(X_{2}=\)</span> Control of
+corruption and <span class="math inline">\(X_{3}=\)</span> Income inequality. Observations for individual
+units <span class="math inline">\(i\)</span> (with values <span class="math inline">\(i=1,2,\dots,n\)</span>) in the sample are indicated by a
+further subscript, so that <span class="math inline">\(X_{1i}, X_{2i}, \dots, X_{ki}\)</span> denote the
+observed values of the <span class="math inline">\(k\)</span> explanatory variables for unit <span class="math inline">\(i\)</span>.</p>
+<p>The multiple linear regression model is essentially the same as the
+simple linear model. The values <span class="math inline">\(Y_{i}\)</span> of the response variable in the
+sample are again assumed to be statistically independent, and each of
+them is regarded as an observation sampled from a normal distribution
+with mean <span class="math inline">\(\mu_{i}\)</span> and variance <span class="math inline">\(\sigma^{2}\)</span>. The crucial change is
+that the expected values <span class="math inline">\(\mu_{i}\)</span> now depend on the multiple
+explanatory variables through
+<span class="math display" id="eq:mu-multiple">\[\begin{equation}
+\mu_{i} = \alpha +\beta_{1}X_{1i}+\beta_{2}X_{2i}+\dots+\beta_{k}X_{ki}
+\tag{8.15}
+\end{equation}\]</span>
+where the coefficients
+<span class="math inline">\(\beta_{1}, \beta_{2}, \dots, \beta_{k}\)</span> of individual explanatory
+variables are now also identified with subscripts. As in
+(<a href="c-regression.html#eq:slinmodel">(8.4)</a>) for the simple linear model, the multiple model can
+also be expressed in the concise form
+<span class="math display" id="eq:mlinmodel">\[\begin{equation}
+Y_{i} = \alpha+\beta_{1}X_{1i}+\beta_{2}X_{2i}+\dots+\beta_{k}X_{ki}+\epsilon_{i}
+\tag{8.16}
+\end{equation}\]</span>
+where the error term (population residual)
+<span class="math inline">\(\epsilon_{i}\)</span> is normally distributed with mean 0 and variance
+<span class="math inline">\(\sigma^{2}\)</span>.</p>
+<p>The expected value of <span class="math inline">\(Y\)</span> as defined in (<a href="c-regression.html#eq:mu-multiple">(8.15)</a>) is a linear
+function of <span class="math inline">\(X_{1}, X_{2}, \dots, X_{k}\)</span>. If there are two explanatory
+variables (<span class="math inline">\(k=2\)</span>), <span class="math inline">\(\mu\)</span> is described by a flat <em>plane</em> as <span class="math inline">\(X_{1}\)</span> and
+<span class="math inline">\(X_{2}\)</span> take different values (think of a flat sheet of paper, at an
+angle and extended indefinitely in all directions, cutting across a room
+in the air). A plane is the two-dimensional generalisation of a
+one-dimensional straight line. The actual observations of <span class="math inline">\(Y_{i}\)</span> now
+correspond to points in a three-dimensional space, and they are
+generally not on the regression plane (think of them as a swarm of bees
+in the air, some perhaps sitting on that sheet of paper but most
+hovering above or below it). When <span class="math inline">\(k\)</span> is larger than 2, the regression
+surface is a higher-dimensional linear object known as a hyperplane.
+This is impossible to visualise in our three-dimensional world, but
+mathematically the idea of the model remains unchanged. In each case,
+the observed values of <span class="math inline">\(Y\)</span> exist in a yet higher dimension, so they
+cannot in general be predicted exactly even with multiple explanatory
+variables. A regression plane defined by several <span class="math inline">\(X\)</span>-variables does
+nevertheless allow for more flexibility for <span class="math inline">\(\mu_{i}\)</span> than a straight
+line, so it is in general possible to predict <span class="math inline">\(Y_{i}\)</span> more accurately
+with a multiple regression model than a simple one. This, however, is
+not usually the only or main criterion for selecting a good regression
+model, for reasons discussed in Section
+<a href="c-regression.html#ss-regression-multiple-beta">8.5.4</a> below.</p>
+</div>
+<div id="ss-regression-multiple-unchanged" class="section level3 hasAnchor">
+<h3><span class="header-section-number">8.5.3</span> Unchanged elements from simple linear models<a href="c-regression.html#ss-regression-multiple-unchanged" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>As mentioned at the beginning of this section, most elements of the
+multiple linear regression model are the same or very similar as for the
+simple model, and require little further explanation:</p>
+<ul>
+<li><p>The <strong>constant term</strong> (intercept) <span class="math inline">\(\alpha\)</span> is interpreted as the
+expected value of <span class="math inline">\(Y\)</span> when all of the explanatory variables have the
+value 0. This can be seen by setting <span class="math inline">\(X_{1i}, X_{2i}, \dots, X_{ki}\)</span>
+all to 0 in (<a href="c-regression.html#eq:mu-multiple">(8.15)</a>). As before, <span class="math inline">\(\alpha\)</span> is rarely of
+any substantive interest. In the example in Table <a href="c-regression.html#tab:t-imr-m2">8.3</a>,
+the estimated value of <span class="math inline">\(\alpha\)</span> is 16.40.</p></li>
+<li><p>The <strong>residual standard deviation</strong> <span class="math inline">\(\sigma\)</span> is the standard
+deviation of the conditional distribution of <span class="math inline">\(Y\)</span> given the values of
+all of <span class="math inline">\(X_{1}, X_{2}, \dots, X_{k}\)</span>. It thus describes the magnitude
+of the variability of <span class="math inline">\(Y\)</span> around the regression plane. The model
+assumes that <span class="math inline">\(\sigma\)</span> is the same at all values of the explanatory
+variables. In Table <a href="c-regression.html#tab:t-imr-m2">8.3</a>, the estimate of <span class="math inline">\(\sigma\)</span>
+is 2.23.</p></li>
+<li><p><strong>Estimates of the regression coefficients</strong> are here denoted with
+hats as <span class="math inline">\(\hat{\alpha}\)</span> and <span class="math inline">\(\hat{\beta}_{1}, \hat{\beta}_{2}, \dots, \hat{\beta}_{k}\)</span>, and fitted (predicted) values for <span class="math inline">\(Y_{i}\)</span> are
+given by The estimated regression coefficients are again
+obtained with the method of <strong>least squares</strong> by finding the values
+for <span class="math inline">\(\hat{\alpha}, \hat{\beta}_{1}, \hat{\beta}_{2}, \dots, \hat{\beta}_{k}\)</span> which make
+the error sum of squares <span class="math inline">\(SSE=\sum (Y_{i}-\hat{Y}_{i})^{2}\)</span> as small
+as possible. This is both mathematically and intuitively the same
+exercise as least squares estimation for a simple linear model,
+except with more dimensions: here we are finding the best-fitting
+hyperplane through a high-dimensional cloud of points rather than
+the best-fitting straight line through a
+two-dimensional scatterplot.</p>
+<p>With more than one explanatory variable, the computational formulas
+for the estimates become difficult to write down<a href="#fn52" class="footnote-ref" id="fnref52"><sup>52</sup></a> and
+essentially impossible to calculate by hand. This is not a problem
+in practice, as they are easily computed by statistical software
+such as SPSS. In Table <a href="c-regression.html#tab:t-imr-m2">8.3</a>, the least squares estimates
+of the regression coefficients are shown in the
+“Coefficient” column. Each row of the table gives the coefficient
+for one explanatory variable, identified in the first column. A
+similar format is adopted in SPSS output, where the
+“<strong>Coefficients</strong>” table looks very similar to the main part of
+Table <a href="c-regression.html#tab:t-imr-m2">8.3</a>. The arrangement of other parts of SPSS output
+for multiple linear regression is essentially unchanged from the
+format shown in Figure <a href="c-regression.html#fig:f-spss-linreg">8.7</a>.</p></li>
+<li><p>Predicted values for <span class="math inline">\(Y\)</span> can be calculated from (<a href="#eq:Yhat">(<strong>??</strong>)</a>) for any
+set of values of the explanatory variables (whether those observed
+in the data or not, as long as extrapolation outside their observed
+ranges is avoided). This is often very useful for illustrating the
+implications of a fitted model. For example, Table <a href="c-regression.html#tab:t-imrvars">8.2</a>
+shows that the sample averages of the explanatory variables in Table
+<a href="c-regression.html#tab:t-imr-m2">8.3</a> are approximately 86 for School enrolment (<span class="math inline">\(X-{1}\)</span>),
+50 for Control of corruption (<span class="math inline">\(X_{2}\)</span>) and 40 for Income inequality
+(<span class="math inline">\(X_{3}\)</span>). The predicted IMR for a hypothetical “average” country
+with all these values would be
+<span class="math display">\[\hat{Y}=16.4-0.139\times 86-0.046\times 50+0.055\times 40=4.35\]</span>
+using the estimated intercept <span class="math inline">\(\hat{\alpha}=16.4\)</span>, and the estimated
+regression coefficients <span class="math inline">\(\hat{\beta}_{1}=-0.139\)</span>,
+<span class="math inline">\(\hat{\beta}_{2}=-0.046\)</span> and <span class="math inline">\(\hat{\beta}_{3}=0.055\)</span> for <span class="math inline">\(X_{1}\)</span>,
+<span class="math inline">\(X_{2}\)</span> and <span class="math inline">\(X_{3}\)</span>. For further illustration, we might compare this
+to other predicted values calculated for, say, different
+combinations of large and/or small values of the
+explanatory variables.</p></li>
+<li><p>The estimated residual standard error <span class="math inline">\(\hat{\sigma}\)</span> is again
+calculated from (<a href="c-regression.html#eq:sigma-linreg">(8.7)</a>), using the appropriate value of
+<span class="math inline">\(k\)</span>. Here <span class="math inline">\(n=111\)</span> and <span class="math inline">\(k=3\)</span>, and so the degrees of freedom are
+<span class="math inline">\(df=n-(k+1)=n-4=107\)</span>. The estimate is <span class="math inline">\(\hat{\sigma}=2.23\)</span>.</p></li>
+<li><p>The explanation of the coefficient of determination <span class="math inline">\(R^{2}\)</span> is
+entirely unchanged from the one given under “Coefficient of determination (<span class="math inline">\(R^{2}\)</span>)” in Section <a href="c-regression.html#ss-regression-simple-est">8.3.4</a>. It is
+still calculated with the formula (<a href="c-regression.html#eq:R2">(8.9)</a>), and its interpretation
+is also the same. The <span class="math inline">\(R^{2}\)</span> statistic thus describes the
+proportion of the sample variation in <span class="math inline">\(Y\)</span> explained by the
+regression model, i.e. by the variation in the
+explanatory variables. Similarly, the multiple correlation
+coefficient <span class="math inline">\(R=\sqrt{R^{2}}\)</span> is again the correlation between the
+observed <span class="math inline">\(Y_{i}\)</span> and the fitted values <span class="math inline">\(\hat{Y}_{i}\)</span>. In our
+example, <span class="math inline">\(R^{2}=0.692\)</span> (and <span class="math inline">\(R=\sqrt{0.692}=0.832\)</span>), i.e. about
+69.2% of the observed variation in IMR between countries is
+explained by the variation in levels of School enrolment, Control of
+corruption and Income inequality between them. Compared to the
+<span class="math inline">\(R^{2}=0.567\)</span> for the simple regression model in Figure
+<a href="c-regression.html#fig:f-spss-linreg">8.7</a>, adding the two new explanatory variables has
+increased <span class="math inline">\(R^{2}\)</span> by 12.5 percentage points, which seems like a
+reasonably large increase.</p></li>
+</ul>
+</div>
+<div id="ss-regression-multiple-beta" class="section level3 hasAnchor">
+<h3><span class="header-section-number">8.5.4</span> Interpretation and inference for the regression coefficients<a href="c-regression.html#ss-regression-multiple-beta" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<div id="interpretation" class="section level4 unnumbered hasAnchor">
+<h4>Interpretation<a href="c-regression.html#interpretation" class="anchor-section" aria-label="Anchor link to header"></a></h4>
+<p>The concept of statistical control was outlined in Section
+<a href="c-regression.html#s-regression-causality">8.4</a> above. In essence, its idea is to examine
+the association between a response variable and a particular explanatory
+variable, while holding all other explanatory variables at constant
+values. This is useful for assessing claims about causal effects, but
+also more broadly whenever we want to analyse associations free of the
+confounding effects of other variables.</p>
+<p>When all of the variables were categorical, statistical control could be
+carried out obviously and transparently by considering partial tables,
+where the control variables are literally held constant. This is not
+possible when some of the control variables are continuous, because they
+then have too many different values for it to be feasible to consider
+each one separately. Instead, statistical control is implemented with
+the help of a multiple regression model, and interpreted in terms of the
+regression coefficients.</p>
+<p>Consider, for example, a linear regression model with three explanatory
+variables <span class="math inline">\(X_{1}\)</span>, <span class="math inline">\(X_{2}\)</span> and <span class="math inline">\(X_{3}\)</span>. This specifies the expected
+value of <span class="math inline">\(Y\)</span> as
+<span class="math display" id="eq:mu1">\[\begin{equation}
+\mu=\alpha+\beta_{1}X_{1}+\beta_{2}X_{2}+\beta_{3}X_{3}
+\tag{8.17}
+\end{equation}\]</span>
+for any values of <span class="math inline">\(X_{1}\)</span>, <span class="math inline">\(X_{2}\)</span> and <span class="math inline">\(X_{3}\)</span>. Suppose
+now that we consider a second observation, which has the same values of
+<span class="math inline">\(X_{1}\)</span> and <span class="math inline">\(X_{2}\)</span> as before, but the value of <span class="math inline">\(X_{3}\)</span> larger by one
+unit, i.e. <span class="math inline">\(X_{3}+1\)</span>. The expected value of <span class="math inline">\(Y\)</span> is now
+<span class="math display" id="eq:mu2">\[\begin{equation}
+\mu=\alpha+\beta_{1}X_{1}+\beta_{2}X_{2}+\beta_{3}(X_{3}+1)=\alpha+\beta_{1}X_{1}+\beta_{2}X_{2}+\beta_{3}X_{3}+\beta_{3}.
+\tag{8.18}
+\end{equation}\]</span>
+Subtracting (<a href="c-regression.html#eq:mu1">(8.17)</a>) from (<a href="c-regression.html#eq:mu2">(8.18)</a>) leaves us with
+<span class="math inline">\(\beta_{3}\)</span>. In other words, <span class="math inline">\(\beta_{3}\)</span> is the change in expected value
+of <span class="math inline">\(Y\)</span> when <span class="math inline">\(X_{3}\)</span> is increased by one unit, while keeping the values
+of <span class="math inline">\(X_{1}\)</span> and <span class="math inline">\(X_{2}\)</span> unchanged. The same result would obviously be
+obtained for <span class="math inline">\(X_{1}\)</span> and <span class="math inline">\(X_{2}\)</span>, and for models with any number of
+explanatory variables. Thus in general</p>
+<ul>
+<li>The regression coefficient of any explanatory variable in a multiple
+linear regression model shows the change in expected value of the
+response variable <span class="math inline">\(Y\)</span> when that explanatory variable is increased by
+one unit, while holding all other explanatory variables constant.</li>
+</ul>
+<p>When there is only one explanatory variable, the “while holding…” part
+is omitted and the interpretation becomes the one for simple linear
+regression in Section <a href="c-regression.html#ss-regression-simple-int">8.3.3</a>.</p>
+<p>This interpretation of the regression coefficients is obtained by
+“increasing by one unit” and “holding constant” values of explanatory
+variables by mathematical manipulations alone. It is thus true within
+the model even when the values of the explanatory variables are not and
+cannot actually be controlled and set at different values by the
+researcher. This, however, also implies that this appealing
+interpretation is a mathematical construction which does not
+automatically correspond to reality. In short, the interpretation of the
+regression coefficients is always mathematically true, but whether it is
+also an approximately correct description of an association in the real
+world depends on the appropriateness of the model for the data and study
+at hand. In some studies it is indeed possible to manipulate at least
+some explanatory variables, and corresponding regression models can then
+help to draw reasonably strong conclusions about associations between
+variables. Useful results can also be obtained in studies where no
+variables are in our control (so-called <em>observational studies</em>), as
+long as the model is selected carefully. This requires, in particular,
+that a linear model specification is adequate for the data, and that no
+crucially important explanatory variables have been omitted from the
+model.</p>
+<p>In the IMR example, the estimated coefficients in Table <a href="c-regression.html#tab:t-imr-m2">8.3</a>
+are interpreted as follows:</p>
+<ul>
+<li><p>Holding levels of Control of corruption and Income inequality
+constant, increasing School enrolment by one percentage point
+decreases expected IMR by 0.139 percentage points.</p></li>
+<li><p>Holding levels of School enrolment and Income inequality constant,
+increasing Control of corruption by one unit decreases expected IMR
+by 0.046 percentage points.</p></li>
+<li><p>Holding levels of School enrolment and Control of corruption
+constant, increasing Income inequality by one unit increases
+expected IMR by 0.055 percentage points.</p></li>
+</ul>
+<p>Instead of “holding constant”, we often talk about “controlling for”
+other variables in such statements. As before, it may be more convenient
+to express the interpretations in terms of other increments than one
+unit (e.g. ten units of the measure of Income inequality) by multiplying
+the coefficient by the correponding value.</p>
+<p>The association between the response variable <span class="math inline">\(Y\)</span> and a particular
+explanatory variable <span class="math inline">\(X\)</span> described by the coefficient of <span class="math inline">\(X\)</span> in a
+multiple regression model is known as a <strong>partial association</strong> between
+<span class="math inline">\(X\)</span> and <span class="math inline">\(Y\)</span>, controlling for the other explanatory variables in the
+model. This will often differ from the association estimated from a
+simple regression model for <span class="math inline">\(Y\)</span> given <span class="math inline">\(X\)</span>, because of the correlations
+between the control variables and <span class="math inline">\(X\)</span> and <span class="math inline">\(Y\)</span>. In the infant mortality
+example, the estimated effect of School enrolment was qualitatively
+unaffected by controlling for the other variables, and decreased in
+magnitude from -0.179 to -0.139.</p>
+</div>
+<div id="inference" class="section level4 unnumbered hasAnchor">
+<h4>Inference<a href="c-regression.html#inference" class="anchor-section" aria-label="Anchor link to header"></a></h4>
+<p>Inference for the regression coefficients in a multiple linear model
+differs from that for the simple model in interpretation but not in
+execution. Let <span class="math inline">\(\hat{\beta}_{j}\)</span> denote the estimated coefficient of an
+explanatory variable <span class="math inline">\(X_{j}\)</span> (where <span class="math inline">\(j\)</span> may be any of <span class="math inline">\(1,2,\dots,k\)</span>),
+and let <span class="math inline">\(\hat{\text{se}}(\hat{\beta}_{j})\)</span> denote its estimated standard
+error. The standard errors cannot now be calculated by hand, but they
+are routinely produced by computer packages and displayed as in Table
+<a href="c-regression.html#tab:t-imr-m2">8.3</a>. A <span class="math inline">\(t\)</span>-test statistic for the null hypothesis discussed
+below is given by
+<span class="math display" id="eq:tbeta-m">\[\begin{equation}
+t=\frac{\hat{\beta}_{j}}{\hat{\text{se}}(\hat{\beta}_{j})}.
+\tag{8.19}
+\end{equation}\]</span>
+This is identical in form to statistic (<a href="c-regression.html#eq:tbeta">(8.13)</a>)
+for the simple regression model. The corresponding null hypotheses are,
+however, subtly but crucially different in the two cases. In a multiple
+model, (<a href="c-regression.html#eq:tbeta-m">(8.19)</a>) is a test statistic for the null hypothesis
+<span class="math display" id="eq:H0beta-m">\[\begin{equation}
+H_{0}:\; \beta_{j}=0, \text{other regression coefficients are unrestricted}
+\tag{8.20}
+\end{equation}\]</span>
+against the alternative hypothesis
+<span class="math display">\[H_{a}:\; \beta_{j}\ne0, \text{other regression coefficients
+are unrestricted}.\]</span> Here the statement about “unrestricted” other
+parameters implies that neither hypothesis makes any claims about the
+values of other coefficients than <span class="math inline">\(\beta_{j}\)</span>, and these are allowed to
+have any values. The null hypothesis is a claim about the association
+between <span class="math inline">\(X_{j}\)</span> and <span class="math inline">\(Y\)</span> when the other explanatory variables are already
+included in the model. In other words, (<a href="c-regression.html#eq:tbeta-m">(8.19)</a>) tests
+<span class="math display">\[\begin{aligned}
+H_{0}:&amp; &amp; \text{There is no partial association between }
+X_{j} \text{ and } Y,\\
+&amp;&amp;  \text{controlling for the other explanatory
+variables.}\end{aligned}\]</span></p>
+<p>The sampling distribution of (<a href="c-regression.html#eq:tbeta-m">(8.19)</a>) when the null hypothesis
+(<a href="c-regression.html#eq:H0beta-m">(8.20)</a>) holds is a <span class="math inline">\(t\)</span> distribution with <span class="math inline">\(n-(k+1)\)</span> degrees of
+freedom, where <span class="math inline">\(k\)</span> is again the number of explanatory variables in the
+model. The test statistic and its <span class="math inline">\(P\)</span>-value from the <span class="math inline">\(t_{n-(k+1)}\)</span>
+distribution are shown in standard computer output, in a form similar to
+Table <a href="c-regression.html#tab:t-imr-m2">8.3</a>.</p>
+<p>It is important to note two things about test results for multiple
+regression models, such as those in Table <a href="c-regression.html#tab:t-imr-m2">8.3</a>. First,
+(<a href="c-regression.html#eq:H0beta-m">(8.20)</a>) implies that if the null hypothesis is not rejected,
+<span class="math inline">\(X_{j}\)</span> is not associated with <span class="math inline">\(Y\)</span>, <em>if</em> the other explanatory variables
+are already included in the model. We would typically react to this by
+removing <span class="math inline">\(X_{j}\)</span> from the model, while keeping the other variables in
+it. This is because of a general principle that models should usually be
+as simple (<em>parsimonious</em>) as possible, and not include variables which
+have no partial effect on the response variable. Second, the <span class="math inline">\(k\)</span> tests
+and <span class="math inline">\(P\)</span>-values actually refer to <span class="math inline">\(k\)</span> <em>different</em> hypotheses of the form
+(<a href="c-regression.html#eq:H0beta-m">(8.20)</a>), one for each explanatory variable. This raises the
+question of what to do if, say, tests for two variables have large
+<span class="math inline">\(P\)</span>-values, suggesting that either of them could be removed from the
+model. The appropriate reaction is to remove one of the variables
+(perhaps the one with the larger <span class="math inline">\(P\)</span>-value) rather than both at once,
+and then see whether the other still remains nonsignificant (if so, it
+can then also be removed). This is part of the general area of <strong>model
+selection</strong>, principles and practice of which are mostly beyond the
+scope of this course; some further comments on it are given in Section
+<a href="c-regression.html#s-regression-rest">8.7</a>.</p>
+<p>In the example shown in Table <a href="c-regression.html#tab:t-imr-m2">8.3</a>, the <span class="math inline">\(P\)</span>-values are small
+for the tests for all of the coefficients. Each of the three explanatory
+variables thus has a significant effect on the response even after
+controlling for the other two, so none of the variables should be
+removed from the model.</p>
+<p>A confidence interval with confidence level <span class="math inline">\(1-\alpha\)</span> for any
+<span class="math inline">\(\beta_{j}\)</span> is given by
+<span class="math display" id="eq:cibeta-m">\[\begin{equation}
+\hat{\beta}_{j} \pm t_{\alpha/2}^{(n-(k+1))} \,\hat{\text{se}}(\hat{\beta}_{j}).
+\tag{8.21}
+\end{equation}\]</span>
+This is identical in form and interpretation to the
+interval (<a href="c-regression.html#eq:cibeta">(8.14)</a>) for simple regression (except that the degrees
+of freedom are now <span class="math inline">\(df=n-(k+1)\)</span>), so no new issues arise. The confidence
+intervals for the coefficients in our example (where <span class="math inline">\(df=n-4=107\)</span> and
+<span class="math inline">\(t_{0.025}^{(107)}=1.98\)</span>) are shown in Table <a href="c-regression.html#tab:t-imr-m2">8.3</a>.</p>
+</div>
+</div>
+</div>
+<div id="s-regression-dummies" class="section level2 hasAnchor">
+<h2><span class="header-section-number">8.6</span> Including categorical explanatory variables<a href="c-regression.html#s-regression-dummies" class="anchor-section" aria-label="Anchor link to header"></a></h2>
+<div id="ss-regression-dummies-def" class="section level3 hasAnchor">
+<h3><span class="header-section-number">8.6.1</span> Dummy variables<a href="c-regression.html#ss-regression-dummies-def" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>Our models for Infant mortality rate so far did not include some more
+basic characteristics of the countries than school enrolment, corruption
+and income inequality. In particular, it seems desirable to control for
+the wealth of a country, which is likely to be correlated with both a
+health outcome like infant mortality and the other measures of
+development used as explanatory variables. We will do this by adding to
+the model the income level of the country, classified in the Global
+Civil Society Yearbook into three groups as Low, Middle or High income.
+Here one reason for considering income as a categorical variable is
+obviously to obtain an illustrative example for this section. However,
+using a variable like income in a grouped form is also more generally
+common practice. It also has the advantage that it is one way of dealing
+with cases where the effects on <span class="math inline">\(Y\)</span> of the corresponding continuous
+explanatory variable may be nonlinear.</p>
+<p>Summary statistics in Table <a href="c-regression.html#tab:t-imrvars">8.2</a> show that income group is
+associated with both IMR and the explanatory variables considered so
+far: countries with higher income tend to have lower levels of infant
+mortality, and higher school enrolment, less corruption and less income
+inequality than lower-income countries. It thus seems that controlling
+for income is potentially necessary, and may change the conclusions from
+the model.</p>
+<p>Trying to add income level to the model confronts us with a new problem:
+how can a categorical explanatory variable like this be used in linear
+regression? This question is not limited to the current example, but is
+unavoidable in the social sciences. Even just the standard background
+variables such as sex, marital status, education level, party
+preference, employment status and region of residence considered in most
+individual-level studies are mostly categorical. Similarly, most survey
+data on attitudes and behaviour are collected in a categorical form, and
+even variables such as age or income which are originally continuous are
+often used in a grouped form. Categorical variables are thus ubiquitous
+in the social sciences, and it is essential to be able to use them also
+in regression models. How this is done is explained in this section,
+again illustrated with the infant mortality example. Section
+<a href="c-regression.html#ss-regression-dummies-example">8.6.2</a> then describes a different example
+for further illustration of the techniques.</p>
+<p>The key to handling categorical explanatory variables is the use of
+dummy variables. A <strong>dummy variable</strong> (or <strong>indicator variable</strong>) is a
+variable with only two values, 0 and 1. Its value is 1 if a unit is in a
+particular category of a categorical variable, and 0 if it is not in
+that category. For example, we can define for each country the variable
+<span class="math display">\[D_{m}=\begin{cases}
+1 &amp; \text{if Income level is ``Middle&#39;&#39;} \\
+0 &amp; \text{otherwise.}
+\end{cases}\]</span> This would typically be referred to as something like
+“dummy for middle income level”. Note that the label <span class="math inline">\(D_{m}\)</span> used here
+has no special significance, and was chosen simply to make it easy to
+remember. Dummy variables will be treated below as regular explanatory
+variables, and we could denote them as <span class="math inline">\(X\)</span>s just like all the others. A
+dummy for high income level is defined similarly as
+<span class="math display">\[D_{h}=\begin{cases}
+1 &amp; \text{if Income level is ``High&#39;&#39;} \\
+0 &amp; \text{otherwise.}
+\end{cases}\]</span> The two variables <span class="math inline">\(D_{m}\)</span> and <span class="math inline">\(D_{h}\)</span> are enough to
+identify the income level of any country. If a country is in the
+middle-income group, the two dummies have the values <span class="math inline">\(D_{m}=1\)</span> and
+<span class="math inline">\(D_{h}=0\)</span> (as no country can be in two groups), and if it has high
+income, the dummies are <span class="math inline">\(D_{m}=0\)</span> and <span class="math inline">\(D_{h}=1\)</span>. For low-income
+countries, both <span class="math inline">\(D_{m}=0\)</span> and <span class="math inline">\(D_{h}=0\)</span>. There is thus no need to define
+a dummy for low income, because this category is identified by the two
+other dummies being both zero. The same is true in general: if a
+categorical variable has <span class="math inline">\(K\)</span> categories, only <span class="math inline">\(K-1\)</span> dummy variables are
+needed to identify the category of every unit. Note, in particular, that
+<em>dichotomous</em> variables with only two categories (<span class="math inline">\(K=2\)</span>) are fully
+identified by just one dummy variable. The category which is not given a
+dummy of its own is known as the <strong>reference category</strong> or <strong>baseline
+category</strong>. Any category can be the baseline, and this is usually chosen
+in a way to make interpretation (discussed below) convenient. The
+results of the model will be the same, whichever baseline is used.</p>
+<p>Categorical variables are used as explanatory variables in regression
+models by including the dummy variables for them in the model. The
+results for this in our example are shown in Table <a href="c-regression.html#tab:t-imr-m3">8.4</a>. This
+requires no changes in the definition or estimation of the model, and
+the parameter estimates, standard errors and quantities for statistical
+inference are obtained exactly as before even when some of the
+explanatory variables are dummy variables. The only aspect which
+requires some further explanation is the interpretation of the
+coefficients of the dummy variables.</p>
+<table style="width:98%;">
+<caption><span id="tab:t-imr-m3">Table 8.4: </span>Response variable: Infant Mortality Rate (%). Results for a linear regression model for Infant mortality rate in
+the Global Civil Society data, given the three explanatory variables
+in Table <a href="c-regression.html#tab:t-imr-m2">8.3</a> and Income level in three groups. <span class="math inline">\(\hat{\sigma}=2.01\)</span>; <span class="math inline">\(R^{2}=0.753\)</span>; <span class="math inline">\(n=111\)</span>; <span class="math inline">\(df=105\)</span>.</caption>
+<colgroup>
+<col width="29%" />
+<col width="15%" />
+<col width="8%" />
+<col width="11%" />
+<col width="13%" />
+<col width="19%" />
+</colgroup>
+<thead>
+<tr class="header">
+<th align="left">Explanatory variable</th>
+<th align="right">Coefficient</th>
+<th align="right">Std. 
+error</th>
+<th align="right"><span class="math inline">\(t\)</span></th>
+<th align="right"><span class="math inline">\(P\)</span>-value</th>
+<th align="center">95 % Conf. 
+interval</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="left">Constant</td>
+<td align="right">12.00</td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="center"></td>
+</tr>
+<tr class="even">
+<td align="left">School enrolment (%)</td>
+<td align="right"><span class="math inline">\(-0.091\)</span></td>
+<td align="right">0.016</td>
+<td align="right"><span class="math inline">\(-5.69\)</span></td>
+<td align="right"><span class="math inline">\(&lt;0.001\)</span></td>
+<td align="center"><span class="math inline">\((-0.123; -0.059)\)</span></td>
+</tr>
+<tr class="odd">
+<td align="left">Control of corruption</td>
+<td align="right"><span class="math inline">\(-0.020\)</span></td>
+<td align="right">0.011</td>
+<td align="right"><span class="math inline">\(-1.75\)</span></td>
+<td align="right"><span class="math inline">\(0.083\)</span></td>
+<td align="center"><span class="math inline">\((-0.043; 0.003)\)</span></td>
+</tr>
+<tr class="even">
+<td align="left">Income inequality</td>
+<td align="right">0.080</td>
+<td align="right">0.021</td>
+<td align="right">3.75</td>
+<td align="right"><span class="math inline">\(&lt;0.001\)</span></td>
+<td align="center">(0.038; 0.122)</td>
+</tr>
+<tr class="odd">
+<td align="left">Income level
+(Reference group: Low)</td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="center"></td>
+</tr>
+<tr class="even">
+<td align="left">  Middle</td>
+<td align="right"><span class="math inline">\(-3.210\)</span></td>
+<td align="right">0.631</td>
+<td align="right"><span class="math inline">\(-5.09\)</span></td>
+<td align="right"><span class="math inline">\(&lt;0.001\)</span></td>
+<td align="center"><span class="math inline">\((-4.461; -1.958)\)</span></td>
+</tr>
+<tr class="odd">
+<td align="left">  High</td>
+<td align="right"><span class="math inline">\(-3.296\)</span></td>
+<td align="right">1.039</td>
+<td align="right"><span class="math inline">\(-3.17\)</span></td>
+<td align="right">0.002</td>
+<td align="center"><span class="math inline">\((-5.357; -1.235)\)</span></td>
+</tr>
+</tbody>
+</table>
+<p>Recall that the regression coefficient of a continuous explanatory
+variable <span class="math inline">\(X\)</span> is the expected change in the response variable when <span class="math inline">\(X\)</span> is
+increased by one unit, holding all other explanatory variables constant.
+Exactly the same interpretation works for dummy variables, except that
+it is limited to the only one-unit increase possible for them, i.e. from
+0 to 1. For example, consider two (hypothetical) countries with values 0
+and 1 for the dummy <span class="math inline">\(D_{m}\)</span> for middle income, but with the same values
+for the three continuous explanatory variables. How about the other
+dummy variable <span class="math inline">\(D_{h}\)</span>, for high income? The interpretation requires
+that this too is held constant in the comparison. If this constant value
+was 1, it would not be possible for <span class="math inline">\(D_{m}\)</span> to be 1 because every
+country is in only one income group. Thus the only value at which
+<span class="math inline">\(D_{h}\)</span> can be held constant while <span class="math inline">\(D_{m}\)</span> is changed is 0, so that the
+comparison will be between a country with <span class="math inline">\(D_{m}=1, \, D_{h}=0\)</span> and one
+with <span class="math inline">\(D_{m}=0,\, D_{h}=0\)</span>, both with the same values of the other
+explanatory variables. In other words, the interpretation of the
+coefficient of <span class="math inline">\(D_{m}\)</span> refers to a comparison in expected value of <span class="math inline">\(Y\)</span>
+between a middle-income country and a country in the baseline category
+of low income, controlling for the other explanatory variables. The same
+applies to the coefficient of <span class="math inline">\(D_{h}\)</span>, and of dummy variables in
+general:</p>
+<ul>
+<li>The coefficient of a dummy variable for a particular level of a
+categorical explanatory variable is interpreted as the <em>difference</em>
+in the expected value of the response variable <span class="math inline">\(Y\)</span> between a unit
+with that level of the categorical variable and a unit in the
+baseline category, holding all other explanatory variables constant.</li>
+</ul>
+<p>Here the estimated coefficient of <span class="math inline">\(D_{m}\)</span> is <span class="math inline">\(-3.21\)</span>. In other words,
+comparing a middle-income country and a low-income country, both with
+the same levels of School enrolment, Control of corruption and Income
+inequality, the expected IMR is 3.21 percentage points lower in the
+middle-income country than in the low-income one. Similarly, a
+high-income country has an expected IMR 3.296 percentage points lower
+than a low-income one, other things being equal. The expected difference
+between the two non-reference levels is obtained as the difference of
+their coefficients (or by making one of them the reference level, as
+discussed below); here <span class="math inline">\(-3.296-(-3.210)=-0.086\)</span>, so a high-income
+country has an expected IMR 0.086 percentage points lower than a
+middle-income one, controlling for the other explanatory variables.</p>
+<p>Predicted values are again obtained by substituting values for the
+explanatory variables, including appropriate zeros and ones for the
+dummy variables, into the estimated regression equation. For example,
+the predicted IMR for a country with School enrolment of 86 %, Control
+of corruption score of 50 and Income inequality of 40 is
+<span class="math display">\[\begin{aligned}
+\hat{Y}&amp;=&amp;12.0-0.091\times 86-0.020\times 50+0.080\times 40-3.210\times
+0-3.296\times 0\\
+&amp;=&amp;
+6.37 \text{for a low-income country, and }\\
+\hat{Y}&amp;=&amp;12.0-0.091\times 86-0.020\times 50+0.080\times 40-3.210\times
+1-3.296\times 0\\
+&amp;=&amp;
+6.37-3.21=3.16 \text{for a middle-income country,}\end{aligned}\]</span> with a
+difference of 3.21, as expected. Note how the constant term 12.0 sets
+the level for the baseline (low-income) group, and the coefficient
+<span class="math inline">\(-3.21\)</span> shows the change from that level when considering a
+middle-income country instead. Note also that we should again avoid
+unrealistic combinations of the variables in such predictions. For
+example, the above values would not be appropriate for high-income
+countries, because there are no such countries in these data with
+Control of corruption as low as 50.</p>
+<p>The choice of the reference category does not affect the fitted model,
+and exactly the same results are obtained with any choice. For example,
+if high income is used as the reference category instead, the
+coefficients of the three continuous variables are unchanged from Table
+<a href="c-regression.html#tab:t-imr-m3">8.4</a>, and the coefficients of the dummy variables for low and
+middle incomes are 3.296 and 0.086 respectively. The conclusions from
+these are the same as above: controlling for the other explanatory
+variables, the difference in expected IMR is 3.296 between low and
+high-income, 0.086 between middle and high-income and
+<span class="math inline">\(3.296-0.086=3.210\)</span> between low and middle-income countries. Because the
+choice is arbitrary, the baseline level should be selected in whichever
+way is most convenient for stating the interpretation. If the
+categorical variable is ordinal (as it is here), it makes most sense for
+the baseline to be the first or last category. In other ways the
+dummy-variable treatment makes no distinction between nominal and
+ordinal categorical variables. Both are treated effectively as nominal
+in fitting the model, and information on any ordering of the categories
+is ignored.</p>
+<p>Significance tests and confidence intervals are obtained for
+coefficients of dummy variables exactly as for any regression
+coefficients. Since the coefficient is in this case interpreted as an
+expected difference between a level of a categorical variable and the
+reference level, the null hypothesis of a zero coefficient is the
+hypothesis that there is no such difference. For example, Table
+<a href="c-regression.html#tab:t-imr-m3">8.4</a> shows that the coefficients of both the middle income and
+high income dummies are clearly significantly different from zero. This
+shows that, controlling for the other explanatory variables, expected
+infant mortality for both middle and high-income countries is different
+from that in low-income countries. The 95% confidence intervals in Table
+<a href="c-regression.html#tab:t-imr-m3">8.4</a> are intervals for this difference.</p>
+<p>On the other hand, the coefficients of the two higher groups are very
+similar, which suggests that they may not be different from each other.
+This can be confirmed by fitting the same model with high income as the
+reference level, including dummies for low and middle groups. In this
+model (not shown here), the coefficient of the middle income dummy
+corresponds to the difference of the Middle and High groups. Its
+<span class="math inline">\(P\)</span>-value is 0.907 and 95% confidence interval <span class="math inline">\((-1.37; 1.54)\)</span>, so the
+difference is clearly not significant. This suggests that we could
+simplify the model further by combining the two higher groups and
+considering only two income groups, low vs. middle/high.</p>
+<p>In cases like this where a categorical explanatory variable has more
+than two categories, <span class="math inline">\(t\)</span>-tests of individual coefficients are tests of
+hypotheses about no differences between individual categories, not the
+hypothesis that the variable has no effect overall. This is the
+hypothesis that the coefficients of the dummies for <em>all</em> of the
+categories are zero. This requires a slightly different test, which will
+not be considered here. In our example the low income category is so
+obviously different from the other two that it is clear that the
+hypothesis of no overall income effect would be rejected.</p>
+<p>The main reason for including income group in the example was not to
+study income effects themselves (it is after all not all that surprising
+that infant mortality is highest in poor countries), but to control for
+them when examining partial associations between IMR and the other
+explanatory variables. These describe the estimated effects of these
+continuous variables when comparing countries with similar income
+levels. Comparing the results in Tables <a href="c-regression.html#tab:t-imr-m2">8.3</a> and
+<a href="c-regression.html#tab:t-imr-m3">8.4</a>, it can be seen that the effect of School enrolment
+remains significant and negative (with higher enrolment associated with
+lower mortality), although its magnitude decreases somewhat after
+controlling for income group. Some but not all of its estimated effect
+in the first model is thus explained away by the fact that income is
+associated with both primary school enrolment and infant mortality, with
+richer countries having both higher enrolment and lower mortality.</p>
+<p>The effect of Income inequality also remains significant in the larger
+model, even with a slightly increased coefficient. Countries with larger
+income inequality tend to have higher levels of infant mortality, even
+when we compare countries with similar levels of income. The effect of
+Control of corruption, on the other hand, is no longer significant in
+Table <a href="c-regression.html#tab:t-imr-m3">8.4</a>. This variable is strongly associated with income
+(as seen in Table <a href="c-regression.html#tab:t-imrvars">8.2</a>), with the more corrupt countries
+typically being poor. Controlling for income, however, level of
+corruption appears to have little further effect on infant mortality.
+This also suggests that we might simplify the model by omitting the
+corruption variable.</p>
+<p>One final remark on dummy variables establishes a connection to the
+techniques discussed in Chapter <a href="c-means.html#c-means">7</a>. There we described
+statistical inference for comparisons of the population means of a
+continuous response variable <span class="math inline">\(Y\)</span> between two groups, denoted 1 and 2.
+Suppose now that we fit a simple linear regression model for <span class="math inline">\(Y\)</span>, with a
+dummy variable for group 2 as the only explanatory variable. This gives
+exactly the same results as the two-sample <span class="math inline">\(t\)</span>-tests and confidence
+intervals (under the assumption of equal variances in the groups) in
+Section <a href="c-means.html#s-means-inference">7.3</a>. Related to the notation of that
+section, the coefficients from the model are <span class="math inline">\(\hat{\alpha}=\bar{Y}_{1}\)</span>,
+<span class="math inline">\(\hat{\beta}=\bar{Y}_{2}-\bar{Y}_{1}\)</span>, and <span class="math inline">\(\hat{\sigma}\)</span> from
+(<a href="c-regression.html#eq:sigma-linreg">(8.7)</a>) is equal to (see equation 11 in Section <a href="c-means.html#ss-means-inference-test">7.3.2</a>). Similarly, the
+standard error (<a href="c-regression.html#eq:sebeta">(8.12)</a>) is the same as
+<span class="math inline">\(\hat{\sigma}_{\hat{\Delta}}\)</span> in the standard error equation in Section <a href="c-means.html#ss-means-inference-test">7.3.2</a>, and the test statistic
+(<a href="c-regression.html#eq:tbeta">(8.13)</a>) and confidence interval (<a href="c-regression.html#eq:cibeta">(8.14)</a>) are identical with
+the t-test statistic in Section <a href="c-means.html#ss-means-inference-test">7.3.2</a> and the <span class="math inline">\(t\)</span> distribution version of the equation in Section <a href="c-means.html#ss-means-inference-ci">7.3.3</a>
+respectively.</p>
+<p>The connection between linear regression and the two-sample <span class="math inline">\(t\)</span>-test is
+an illustration of how statistical methods are not a series of separate
+tricks for different situations, but a set of connected procedures
+unified by common principles. Whenever possible, methods for more
+complex problems have been created by extending those for simpler ones,
+and simple analyses are in turn special cases of more general ones.
+Although these connections are unfortunately often somewhat obscured by
+changes in language and notation, trying to understand them is very
+useful for effective learning of statistics.</p>
+</div>
+<div id="ss-regression-dummies-example" class="section level3 hasAnchor">
+<h3><span class="header-section-number">8.6.2</span> A second example<a href="c-regression.html#ss-regression-dummies-example" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>Because the analysis of the models for infant mortality was presented
+piecemeal to accompany the introduction of different elements of linear
+regression, an overall picture of that example may have been difficult
+to discern. This section describes a different analysis in a more
+concise manner. It is particularly an illustration of the use of dummy
+variables, as most of the explanatory variables are categorical. The
+example concerns the relationship between minimum wage and employment,
+and uses data originally collected and analysed by David Card and Alan
+Krueger.<a href="#fn53" class="footnote-ref" id="fnref53"><sup>53</sup></a> Most of the choices of analyses and variables considered
+here are based on those of Card and Krueger. Their article should be
+consulted for discussion and more detailed analyses.</p>
+<p>A minimum wage of $5.05 per hour came into effect in the U.S. state of
+New Jersey on April 1 1992. This represented an increase from the
+previous, federally mandated minimum wage of $4.25 per hour.
+Conventional economic theory predicts that employers will react to such
+an increase by cutting their work force. Here the research hypothesis is
+thus that employment will be reduced among businesses affected by the
+new minimum wage. This can be addressed using suitable data, examining a
+statistical hypothesis of no association between measures of minimum
+wage increase and change of employment, controlling for other relevant
+explanatory variables.</p>
+<p>Card and Krueger collected data for 410 fast food restaurants at two
+times, about one month before and eight months after the mininum wage
+increase came into effect in New Jersey. Only the 368 restaurants with
+known values of all the variables used in the analyses are included
+here. Of them, 268 were in New Jersey and had starting wages below
+$5.05 at the time of the first interview, so that these had to be
+increased to meet the new minimum wage. The remaining 100 restaurants
+provide a control group which was not affected by the change: 75 of them
+were in neighbouring eastern Pennsylvania where the minimum wage
+remained at $4.25, and 25 were in New Jersey but had starting wages of
+at least $5.05 even before the increase. The theoretical prediction is
+that the control group should experience a smaller negative employment
+change than the restaurants affected by the wage increase,
+i.e. employment in the control restaurants should not decrease or at
+least decrease less than in the affected restaurants. Card and Krueger
+argue that fast food restaurants provide a good population for examining
+the research question, because they employ many low-wage workers,
+generally comply with minimum wage legislation, do not receive tips
+which would complicate wage calculations, and are relatively easy to
+sample and interview.</p>
+<p>The response variable considered here is the change between the two
+interviews in full-time equivalent employment at the restaurant, defined
+as the number of full-time workers (including managers) plus half the
+number of part-time workers. This will be called “Employment change”
+below. We consider two variables which indicate how the restaurant was
+affected by the minimum wage increase. The first is simply a dummy
+variable which is 1 for those New Jersey restaurants where wages needed
+to be raised because of the increase, and 0 for the other restaurants.
+These will be referred to as “Affected” and “Unaffected” restaurants
+respectively. The second variable is also 0 for the unaffected
+restaurants; for the affected ones, it is the proportion by which their
+previous starting wage had to be increased to meet the new minimum wage.
+For example, if the previous starting wage was the old minimum of
+$4.25, this “Wage gap” is <span class="math inline">\((5.05-4.25)/4.25=0.188\)</span>. Finally, we will
+also use information on the chain the restaurant belongs to (Burger
+King, Roy Rogers, Wendy’s or KFC) and whether it is owned by the parent
+company or the franchisee. These will be included in the analyses as
+partial control for other factors affecting the labour market, which
+might have had a differential impact on different types of restaurants
+over the study period. Summary statistics for the variables are shown in
+Table <a href="#tab:t-fastfood-descr">8.5</a>.</p>
+<table style="width:97%;">
+<colgroup>
+<col width="24%" />
+<col width="8%" />
+<col width="10%" />
+<col width="17%" />
+<col width="20%" />
+<col width="16%" />
+</colgroup>
+<tbody>
+<tr class="odd">
+<td align="left"><br />
+</td>
+<td align="right"><br />
+</td>
+<td align="right"><br />
+</td>
+<td align="right">Minimum-</td>
+<td align="center">wage variable</td>
+<td align="center">Response
+variable:</td>
+</tr>
+<tr class="even">
+<td align="left">Group</td>
+<td align="right">%</td>
+<td align="right"><span class="math inline">\((n)\)</span></td>
+<td align="right">Affected
+% (<span class="math inline">\(n\)</span>)</td>
+<td align="center">Wage gap
+(mean for
+affected
+restaurants)</td>
+<td align="center">Employment
+change
+(mean)</td>
+</tr>
+<tr class="odd">
+<td align="left">Overall</td>
+<td align="right">100</td>
+<td align="right">(368)</td>
+<td align="right">72.8 (268)</td>
+<td align="center">0.115</td>
+<td align="center"><span class="math inline">\(-0.30\)</span></td>
+</tr>
+<tr class="even">
+<td align="left">Ownership</td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="center"></td>
+<td align="center"></td>
+</tr>
+<tr class="odd">
+<td align="left"> Franchisee</td>
+<td align="right">64.7</td>
+<td align="right">(238)</td>
+<td align="right">71.8 (171)</td>
+<td align="center">0.122</td>
+<td align="center"><span class="math inline">\(-0.17\)</span></td>
+</tr>
+<tr class="even">
+<td align="left"> Company</td>
+<td align="right">35.3</td>
+<td align="right">(130)</td>
+<td align="right">74.6 (97)</td>
+<td align="center">0.103</td>
+<td align="center"><span class="math inline">\(-0.52\)</span></td>
+</tr>
+<tr class="odd">
+<td align="left">Chain</td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="center"></td>
+<td align="center"></td>
+</tr>
+<tr class="even">
+<td align="left"> Burger King</td>
+<td align="right">41.0</td>
+<td align="right">(151)</td>
+<td align="right">73.5 (111)</td>
+<td align="center">0.129</td>
+<td align="center"><span class="math inline">\(+0.02\)</span></td>
+</tr>
+<tr class="odd">
+<td align="left"> Roy Rogers</td>
+<td align="right">24.7</td>
+<td align="right">(91)</td>
+<td align="right">72.5 (66)</td>
+<td align="center">0.104</td>
+<td align="center"><span class="math inline">\(-1.89\)</span></td>
+</tr>
+<tr class="even">
+<td align="left"> Wendy’s</td>
+<td align="right">13.0</td>
+<td align="right">(48)</td>
+<td align="right">60.4 (29)</td>
+<td align="center">0.086</td>
+<td align="center"><span class="math inline">\(-0.28\)</span></td>
+</tr>
+<tr class="odd">
+<td align="left"> KFC</td>
+<td align="right">21.2</td>
+<td align="right">(78)</td>
+<td align="right">79.5 (62)</td>
+<td align="center">0.117</td>
+<td align="center"><span class="math inline">\(+0.94\)</span></td>
+</tr>
+</tbody>
+</table>
+<p>:(#tab:t-fastfood-descr)Summary statistics for the variables considered in the minimum wage
+example of Section <a href="c-regression.html#ss-regression-dummies-example">8.6.2</a>. Mean employment change: Among unaffected restaurants: <span class="math inline">\(-2.93\)</span>; Among affected restaurants: <span class="math inline">\(+0.68\)</span>.</p>
+<table>
+<tbody>
+<tr class="odd">
+<td>  Model (1)   Model (2) \</td>
+</tr>
+<tr class="even">
+<td>Coefficient (<span class="math inline">\(t\)</span>) Coefficient (<span class="math inline">\(t\)</span>)</td>
+</tr>
+<tr class="odd">
+<td>Variable (std error) <span class="math inline">\(P\)</span>-value (std error) <span class="math inline">\(P\)</span>-value</td>
+</tr>
+</tbody>
+</table>
+<p>Constant</p>
+<pre><code>                                     -2.63                       -1.54</code></pre>
+<p>Affected by the increase 3.56 (3.50) — —</p>
+<pre><code>                                    (1.02)         0.001             —           —</code></pre>
+<p>Wage gap — — 15.88 (2.63)</p>
+<pre><code>                                         —             —        (6.04)       0.009</code></pre>
+<p>Ownership (vs. Franchisee)</p>
+<p>  Company 0.22 (0.20) 0.43 (0.40)</p>
+<pre><code>                                    (1.06)          0.84        (1.07)        0.69</code></pre>
+<p>Chain (vs. Burger King)</p>
+<p>  Roy Rogers -2.00 (-1.56) -1.84 (-1.43)</p>
+<pre><code>                                    (1.28)          0.12        (1.29)        0.15</code></pre>
+<p>  Wendy’s 0.15 (0.11) 0.36 (0.24)</p>
+<pre><code>                                    (1.44)          0.92        (1.46)        0.81</code></pre>
+<p>  KFC 0.64 (0.51) 0.81 (0.64)</p>
+<pre><code>                                    (1.25)          0.61        (1.26)        0.52</code></pre>
+<p><span class="math inline">\(R^{2}\)</span> 0.046 0.032
+————————————————————————————</p>
+<p>: (#tab:t-fastfood-models)Response variable: Change in Full-time equivalent employment. Two fitted models for Employment change given exposure to minimum
+wage increase and control variables. See the text for further
+details.</p>
+<p>Table <a href="#tab:t-fastfood-models">8.6</a> shows results for two linear regression
+models for Employment change, one using the dummy for affected
+restaurants and one using Wage gap as an explanatory variable. Both
+include the same dummy variables for ownership and chain of the
+restaurant. Consider first the model in column (1), which includes the
+dummy variable for affected restaurants. The estimated coefficient of
+this is 3.56, which is statistically significant (with <span class="math inline">\(t=3.50\)</span> and
+<span class="math inline">\(P=0.001\)</span>). This means that the estimated expected Employment change for
+the restaurants affected by the minimum wage increase was 3.56 full-time
+equivalent employees larger (in the positive direction) than for
+unaffected restaurants, controlling for the chain and type of ownership
+of the restaurant. This is the opposite of the theoretical prediction
+that the difference would be negative, due to the minimum wage increase
+leading to reductions of work force among affected restaurants but
+little change for the unaffected ones. In fact, the summary statistics
+in Table <a href="#tab:t-fastfood-descr">8.5</a> show (although without controlling for
+chain and ownership) that average employment actually <em>increased</em> in
+absolute terms among the affected restaurants, but decreased among the
+unaffected ones.</p>
+<p>The coefficients of the control variables in Table
+<a href="#tab:t-fastfood-models">8.6</a> describe estimated differences between
+company-owned and franchisee-owned restaurants, and between Burger Kings
+and restaurants of other chains, controlling for the other variables in
+the model. All of these coefficients have high <span class="math inline">\(P\)</span>-values for both
+models, suggesting that the differences are small. In fact, the only one
+which is borderline significant, after all the other control dummies are
+successively removed from the model (not shown here), is that Employment
+change seems to have been more negative for Roy Rogers restaurants than
+for the rest. This side issue is not investigated in detail here. In any
+case, the control variables have little influence on the effect of the
+variable of main interest: if all the control dummies are removed from
+Model (1), the coefficient of the dummy variable for affected
+restaurants becomes 3.61 with a standard error of 1.01, little changed
+from the estimates in Table <a href="#tab:t-fastfood-models">8.6</a>. This is not
+entirely surprising, as the control variables are weakly associated with
+the variable of interest: as seen in Table <a href="#tab:t-fastfood-descr">8.5</a>, the
+proportions of affected restaurants are mostly fairly similar among
+restaurants of all chains and types of ownership.</p>
+<p>In their article, Card and Krueger carefully explore (and confirm) the
+robustness of their findings by considering a series of variants of the
+analysis, with different choices of variables and sets of observations.
+This is done to try to rule out the possibility that the main
+conclusions are reliant on, and possibly biased by, some specific
+features of the data and variables in the initial analysis, such as
+missing data or measurement error. Such sensitivity analyses would be
+desirable in most social science contexts, where single definitely best
+form of analysis is rarely obvious. Here we will carry out a modest
+version of such an assessment by considering the Wage gap variable as an
+alternative measure of the impact of minimum wage increase, instead of a
+dummy for affected restaurants. This is a continuous variable, but one
+whose values are 0 for all unaffected restaurants and vary only among
+the affected ones. The logic of using Wage gap as an explanatory
+variable here is that Employment change could be expected to depend not
+only on <em>whether</em> a restaurant had to increase its starting wage to meet
+the new minimum wage, but also on <em>how large</em> that compulsory increase
+was.</p>
+<p>The results for the second analysis are shown as Model (2) in Table
+<a href="#tab:t-fastfood-models">8.6</a>. The results are qualitatively the same as for
+Model (1), in that the coefficients of the control dummies are not
+significant, and that of Wage gap (which is 15.88) is significant and
+positive. The estimated employment change is thus again larger for
+affected than for unaffected restaurants, and their difference now even
+increases when the wage rise required from an affected restaurant
+increases. To compare these results more directly to Model (1), we can
+consider a comparison between an unaffected restaurant (with Wage gap 0)
+and an affected one with Wage gap equal to its mean among the affected
+restaurants, which is 0.115 (c.f. Table <a href="#tab:t-fastfood-descr">8.5</a>). The
+estimated difference in Employment change between them, controlling for
+ownership and chain, is <span class="math inline">\(0.115\times 15.88=1.83\)</span>, which is somewhat
+lower than the 3.56 estimated from model (1).</p>
+<p>This example is also a good illustration of the limitations of the
+<span class="math inline">\(R^{2}\)</span> statistic. The <span class="math inline">\(R^{2}\)</span> values of 0.046 and 0.032 are very small,
+so over 95% of the observed variation in employment changes remains
+unexplained by the variation in the three explanatory variables. In
+other words, there are large differences in Employment change
+experienced even by affected or unaffected restaurants of the same chain
+and type of ownership. This would make <em>predicting</em> the employment
+change for a particular restaurant a fairly hopeless task with these
+explanatory variables. However, prediction is not the point here. The
+research question focuses on possible differences in <em>average</em> changes
+in employment, and finding such differences is of interest even if
+variation around the averages is large.</p>
+<p>In summary, the analysis provides no support for the theoretical
+prediction that the restaurants affected by a minimum wage increase
+would experience a larger negative job change than control restaurants.
+In fact, there was a small but significant difference in the opposite
+direction in both models described here, and in all of the analyses
+considered by Card and Krueger. The authors propose a number of
+tentative explanations for this finding, but do not present any of them
+as definitive.</p>
+</div>
+</div>
+<div id="s-regression-rest" class="section level2 hasAnchor">
+<h2><span class="header-section-number">8.7</span> Other issues in linear regression modelling<a href="c-regression.html#s-regression-rest" class="anchor-section" aria-label="Anchor link to header"></a></h2>
+<p>The material in this chapter provides a reasonably self-contained
+introduction to linear regression models. However, it is not possible
+for a course like this to cover comprehensively all aspects of the
+models, so some topics have been described rather superficially and
+several have been omitted altogether. In this section we briefly discuss
+some of them. First, three previously unexplained small items in
+standard SPSS output are described. Second, a list of further topics in
+linear regression is given.</p>
+<p>An example of SPSS output for linear regression models was given in
+Figure <a href="c-regression.html#fig:f-spss-linreg">8.7</a>. Most parts of it have been explained above,
+but three have not been mentioned. These can be safely ignored, because
+each is of minor importance in most analyses. However, it is worth
+giving a brief explanation so as not to leave these three as mysteries:</p>
+<ul>
+<li><p>“Adjusted R Square” in the “<strong>Model Summary</strong>” table is a statistic
+defined as <span class="math inline">\(R^{2}_{adj}=[(n-1)R^{2}-k]/(n-k-1)\)</span>. This is most
+relevant in situations where the main purpose of the model is
+prediction of future observations of <span class="math inline">\(Y\)</span>. The population value of
+the <span class="math inline">\(R^{2}\)</span> statistic is then a key criterion of model selection.
+<span class="math inline">\(R^{2}_{adj}\)</span> is a better estimate of it than standard <span class="math inline">\(R^{2}\)</span>.
+Unlike <span class="math inline">\(R^{2}\)</span>, <span class="math inline">\(R^{2}_{adj}\)</span> does not always increase when new
+explanatory variables are added to the model. As a sample statistic,
+<span class="math inline">\(R^{2}_{adj}\)</span> does not have the same interpretation as the
+proportion of variation of <span class="math inline">\(Y\)</span> explained as standard <span class="math inline">\(R^{2}\)</span>.</p></li>
+<li><p>The last two columns of the “<strong>ANOVA</strong>” (Analysis of Variance) table
+show the test statistic and <span class="math inline">\(P\)</span>-value for the so-called
+<span class="math inline">\(F\)</span>-test.<a href="#fn54" class="footnote-ref" id="fnref54"><sup>54</sup></a> The null hypothesis for this is that the regression
+coefficients of <em>all</em> the explanatory variables are zero,
+i.e. <span class="math inline">\(\beta_{1}=\beta_{2}=\dots=\beta_{k}=0\)</span>. In the case of simple
+regression (<span class="math inline">\(k=1\)</span>), this is equivalent to the <span class="math inline">\(t\)</span>-test for
+<span class="math inline">\(\beta=0\)</span>. In multiple regression, it implies that none of the
+explanatory variables have an effect on the response variable. In
+practice, this is rejected in most applications. Rejecting the
+hypothesis implies that at least one of the explanatory variables is
+associated with the response, but the test provides no help for
+identifying <em>which</em> of the individual partial effects
+are significant. The <span class="math inline">\(F\)</span>-test is thus usually largely irrelevant.
+More useful is an extended version of it (which is not included in
+the default output), which is used for hypotheses that a set of
+several regression coefficients (but not all of them) is zero. For
+example, this could be used in the example of Table <a href="c-regression.html#tab:t-imr-m3">8.4</a>
+to test if income level had no effect on IMR, i.e. if the
+coefficients of the dummies for <em>both</em> middle and high income
+were zero.</p></li>
+<li><p>The “Standardized Coefficients/Beta” in the “<strong>Coefficients</strong>” table
+are defined as <span class="math inline">\((s_{xj}/s_{y})\hat{\beta}_{j}\)</span>, where
+<span class="math inline">\(\hat{\beta}_{j}\)</span> is the estimated coefficient of <span class="math inline">\(X_{j}\)</span>, and
+<span class="math inline">\(s_{xj}\)</span> and <span class="math inline">\(s_{y}\)</span> are sample standard deviations of <span class="math inline">\(X_{j}\)</span> and
+<span class="math inline">\(Y\)</span> respectively. This is equal to the correlation of <span class="math inline">\(Y\)</span> and
+<span class="math inline">\(X_{j}\)</span> when <span class="math inline">\(X_{j}\)</span> is the only explanatory variable in the model,
+but not otherwise. The standardized coefficient describes the
+expected change in <span class="math inline">\(Y\)</span> in units of its sample standard error, when
+<span class="math inline">\(X_{j}\)</span> is increased by one standard error <span class="math inline">\(s_{xj}\)</span>, holding other
+explanatory variables constant. The aim of this exercise is to
+obtain coefficients which are more directly comparable between
+different explanatory variables. Ultimately it refers to the
+question of <strong>relative importance</strong> of the explanatory variables,
+i.e. “Which of the explanatory variables in the model is the most
+important?” This is understandably of interest in many cases, often
+perhaps more so than any other aspect of the model. Unfortunately,
+however, relative importance is also one of the hardest questions in
+modelling, and one without a purely statistical solution. Despite
+their appealing title, standardized coefficients have problems of
+their own and do not provide a simple tool for judging
+relative importance. For example, their values depend not only on
+the strength of association described by <span class="math inline">\(\hat{\beta}_{j}\)</span> but also
+on the standard deviation <span class="math inline">\(s_{xj}\)</span>, which can be different in
+different samples.</p>
+<p>Sensible comparisons of the magnitudes of expected changes in <span class="math inline">\(Y\)</span> in
+response to changes in individual explanatory variables can usually
+be presented even without reference to standardized coefficients,
+simply by using the usual coefficients <span class="math inline">\(\hat{\beta}_{j}\)</span> and
+carefully considering the effects of suitably chosen increments of
+the explanatory variables. In general, it is also worth bearing in
+mind that questions of relative importance are often conceptually
+troublesome, for example between explanatory variables with very
+different practical implications. For instance, suppose that we have
+fitted a model for a measure of the health status of a person, given
+the amount of physical exercise the person takes (which can be
+changed by him/herself), investment in preventive healthcare in the
+area where the person lives (which can be changed, but with more
+effort and not by the individual) and the person’s age (which cannot
+be manipulated at all). The values of the unstandardized or
+standardized coefficients of these explanatory variables can
+certainly be compared, but it is not clear what statements about the
+relative sizes of the effects of “increasing” them would
+really mean.</p></li>
+</ul>
+<p>A further course on linear regression (e.g. first half of MY452) will
+typically examine the topics covered on this course in more detail, and
+then go on to discuss further issues. Here we will give just a list of
+some such topics, in no particular order:</p>
+<ul>
+<li><p>Model <strong>diagnostics</strong> to examine whether a particular model appears
+to be adequate for the data. The residuals <span class="math inline">\(Y_{i}-\hat{Y}_{i}\)</span> are a
+key tool in this, and the most important graphical diagnostic is
+simply a scatterplot of the residuals against the fitted values
+<span class="math inline">\(\hat{Y}_{i}\)</span>. One task of diagnostics is to identify individual
+<strong>outliers</strong> and <strong>influential observations</strong> which have a
+substantial impact on the fitted model.</p></li>
+<li><p>Modelling <strong>nonlinear effects</strong> of the explanatory variables. This
+is mostly done simply by including transformed values like squares
+<span class="math inline">\(X^{2}\)</span> or logarithms <span class="math inline">\(\log(X)\)</span> as explanatory variables in the
+model. It is sometimes also useful to transform the response
+variable, e.g. using <span class="math inline">\(\log(Y)\)</span> as the response instead of <span class="math inline">\(Y\)</span>.</p></li>
+<li><p>Including <strong>interactions</strong> between explanatory variables in
+the model. This is achieved simply by including products of them as
+explanatory variables.</p></li>
+<li><p>Identifying and dealing with problems caused by extremely high
+correlations between the explanatory variables, known as problems of
+<strong>multicollinearity</strong>.</p></li>
+<li><p><strong>Model selection</strong> to identify the best sets of explanatory
+variables to be used in the model. This may employ both significance
+tests and other approaches.</p></li>
+<li><p>Analysis of Variance (<strong>ANOVA</strong>) and Analysis of Covariance
+(<strong>ANCOVA</strong>), which are terms used for models involving only or
+mostly categorical explanatory variables, particularly in the
+context of randomized experiments. Many of these models can be
+fitted as standard linear regression models with appropriate use of
+dummy variables, but the conventional terminology and notation for
+ANOVA and ANCOVA are somewhat different from the ones used here.</p></li>
+</ul>
+
+</div>
+</div>
+<div class="footnotes">
+<hr />
+<ol start="39">
+<li id="fn39"><p>Anheier, H., Glasius, M. and Kaldor, M. (eds.) (2005). <em>Global
+Civil Society 2004/5</em>. London: Sage. The book gives detailed
+references to the indices considered here. Many thanks to Sally
+Stares for providing the data in an electronic form.<a href="c-regression.html#fnref39" class="footnote-back">↩</a></p></li>
+<li id="fn40"><p>Accessible at <code>data.giss.nasa.gov/gistemp/</code>. The temperatures used
+here are those listed in the data base under “after combining
+sources at same location”.<a href="c-regression.html#fnref40" class="footnote-back">↩</a></p></li>
+<li id="fn41"><p>More specifically, the differences are between 11-year <em>moving
+averages</em>, where each year is represented by the average of the
+temperature for that year and the five years before and five after
+it (except at the ends of the series, where fewer observations are
+used). This is done to smooth out short-term fluctuations from the
+data, so that longer-term trends become more clearly visible.<a href="c-regression.html#fnref41" class="footnote-back">↩</a></p></li>
+<li id="fn42"><p>This discussion is obviously rather approximate. Strictly
+speaking, the conditional distribution of <span class="math inline">\(Y\)</span> given, say, <span class="math inline">\(X=65\)</span>
+refers only to units with <span class="math inline">\(X\)</span> exactly rather than approximately
+equal to 65. This, however, is difficult to illustrate using a
+sample, because most values of a continuous <span class="math inline">\(X\)</span> appear at most once
+in a sample. For reasons discussed later in this chapter, the
+present approximate treatment still provides a reasonable general
+idea of the nature of the kinds of associations considered here.<a href="c-regression.html#fnref42" class="footnote-back">↩</a></p></li>
+<li id="fn43"><p>This wording is commonly used for convenience even in cases where
+the nature of <span class="math inline">\(X\)</span> is such that its values can never actually be
+manipulated.<a href="c-regression.html#fnref43" class="footnote-back">↩</a></p></li>
+<li id="fn44"><p>In this particular example, a more closely linear association is
+obtained by considering the logarithm of GDP as the response
+variable instead of GDP itself. This approach, which is common in
+dealing with skewed variables such as income, is, however, beyond
+the scope of this course.<a href="c-regression.html#fnref44" class="footnote-back">↩</a></p></li>
+<li id="fn45"><p>Galton, F. (1888). “Co-relations and their measurement, chiefly
+from anthropometric data”. <em>Proceedings of the Royal Society of
+London</em>, <strong>45</strong>, 135–145.<a href="c-regression.html#fnref45" class="footnote-back">↩</a></p></li>
+<li id="fn46"><p>This is slightly misleading: what actually matters in general is
+that the conditional mean is a linear function of the <em>parameters</em>
+<span class="math inline">\(\alpha\)</span> and <span class="math inline">\(\beta\)</span>. This need not concern us at this stage.<a href="c-regression.html#fnref46" class="footnote-back">↩</a></p></li>
+<li id="fn47"><p>Galton, F. (1886). “Regression towards mediocrity in hereditary
+stature”. <em>Journal of the Anthropological Institute</em>, <strong>15</strong>,
+246–263. The original context is essentially the one discussed on
+courses on research design as “regression toward the mean”.<a href="c-regression.html#fnref47" class="footnote-back">↩</a></p></li>
+<li id="fn48"><p>This exact phrase apparently first appears in Box, G.E.P. (1979).
+Robustness in the strategy of scientific model building. In Launer,
+R.L. and Wilkinson, G.N., <em>Robustness in Statistics</em>, pp. 201–236.<a href="c-regression.html#fnref48" class="footnote-back">↩</a></p></li>
+<li id="fn49"><p>This is another old idea. Different approaches to the problem of
+fitting curves to observations were gradually developed by Tobias
+Mayer, Rudjer Bošković and Pierre Simon Laplace from the 1750s
+onwards, and the method of least squares itself was presented by
+Adrien Marie Legendre in 1805.<a href="c-regression.html#fnref49" class="footnote-back">↩</a></p></li>
+<li id="fn50"><p>It would have been more consistent with related notation used in
+Chapter <a href="c-means.html#c-means">7</a> to denote it something like
+<span class="math inline">\(\hat{\sigma}_{\hat{\beta}}\)</span>, but this would later become somewhat
+cumbersome.<a href="c-regression.html#fnref50" class="footnote-back">↩</a></p></li>
+<li id="fn51"><p>Here adapted from a discussion in Agresti and Finlay,
+<em>Statistical Methods for the Social Sciences</em> (1997).<a href="c-regression.html#fnref51" class="footnote-back">↩</a></p></li>
+<li id="fn52"><p>At least until we adopt extended, so-called matrix notation. In
+this, the least squares estimates are expressible simply as
+<span class="math inline">\(\hat{\boldsymbol{\beta}}= (\mathbf{X}&#39;\mathbf{X})^{-1}(\mathbf{X}&#39;\mathbf{Y})\)</span>.<a href="c-regression.html#fnref52" class="footnote-back">↩</a></p></li>
+<li id="fn53"><p>Card, D. and Krueger, A. B. (1994). Minimum wages and employment:
+A case study of the fast-food industry in New Jersey and
+Pennsylvania. <em>The American Economic Review</em> <strong>84</strong>, 772–793.<a href="c-regression.html#fnref53" class="footnote-back">↩</a></p></li>
+<li id="fn54"><p>The sampling distribution of this test is the <span class="math inline">\(F\)</span> distribution.
+The letter in both refers to Sir Ronald Fisher, the founder of
+modern statistics.<a href="c-regression.html#fnref54" class="footnote-back">↩</a></p></li>
+</ol>
+</div>
+            </section>
+
+          </div>
+        </div>
+      </div>
+<a href="c-means.html" class="navigation navigation-prev " aria-label="Previous page"><i class="fa fa-angle-left"></i></a>
+<a href="c-3waytables.html" class="navigation navigation-next " aria-label="Next page"><i class="fa fa-angle-right"></i></a>
+    </div>
+  </div>
+<script src="libs/gitbook-2.6.7/js/app.min.js"></script>
+<script src="libs/gitbook-2.6.7/js/clipboard.min.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-search.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-sharing.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-fontsettings.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-bookdown.js"></script>
+<script src="libs/gitbook-2.6.7/js/jquery.highlight.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-clipboard.js"></script>
+<script>
+gitbook.require(["gitbook"], function(gitbook) {
+gitbook.start({
+"sharing": {
+"github": false,
+"facebook": true,
+"twitter": true,
+"linkedin": false,
+"weibo": false,
+"instapaper": false,
+"vk": false,
+"whatsapp": false,
+"all": ["facebook", "twitter", "linkedin", "weibo", "instapaper"]
+},
+"fontsettings": {
+"theme": "white",
+"family": "sans",
+"size": 2
+},
+"edit": {
+"link": "https://github.com/LSE-Methodology/MY464/edit/master/08-MY464-regression.Rmd",
+"text": "Edit"
+},
+"history": {
+"link": null,
+"text": null
+},
+"view": {
+"link": null,
+"text": null
+},
+"download": ["Coursepack-MY464.pdf", "Coursepack-MY464.epub"],
+"search": {
+"engine": "fuse",
+"options": null
+},
+"toc": {
+"collapse": "section"
+}
+});
+});
+</script>
+
+<!-- dynamically load mathjax for compatibility with self-contained -->
+<script>
+  (function () {
+    var script = document.createElement("script");
+    script.type = "text/javascript";
+    var src = "true";
+    if (src === "" || src === "true") src = "https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.9/latest.js?config=TeX-MML-AM_CHTML";
+    if (location.protocol !== "file:")
+      if (/^https?:/.test(src))
+        src = src.replace(/^https?:/, '');
+    script.src = src;
+    document.getElementsByTagName("head")[0].appendChild(script);
+  })();
+</script>
+</body>
+
+</html>
diff --git a/c-samples.html b/c-samples.html
new file mode 100644
index 0000000..cc12c8d
--- /dev/null
+++ b/c-samples.html
@@ -0,0 +1,849 @@
+<!DOCTYPE html>
+<html lang="" xml:lang="">
+<head>
+
+  <meta charset="utf-8" />
+  <meta http-equiv="X-UA-Compatible" content="IE=edge" />
+  <title>Chapter 3 Samples and populations | MY464 Introduction to Quantitative Analysis for Media and Communications</title>
+  <meta name="description" content="This course is intended for those with little or no past training in quantitative methods. The course is an intensive introduction to some of the principles and methods of statistical analysis in social research. Topics covered in MY464 include descriptive statistics, basic ideas of inference and estimation, contingency tables and an introduction to linear regression models." />
+  <meta name="generator" content="bookdown 0.39 and GitBook 2.6.7" />
+
+  <meta property="og:title" content="Chapter 3 Samples and populations | MY464 Introduction to Quantitative Analysis for Media and Communications" />
+  <meta property="og:type" content="book" />
+  
+  <meta property="og:description" content="This course is intended for those with little or no past training in quantitative methods. The course is an intensive introduction to some of the principles and methods of statistical analysis in social research. Topics covered in MY464 include descriptive statistics, basic ideas of inference and estimation, contingency tables and an introduction to linear regression models." />
+  <meta name="github-repo" content="LSE-Methodology/MY464" />
+
+  <meta name="twitter:card" content="summary" />
+  <meta name="twitter:title" content="Chapter 3 Samples and populations | MY464 Introduction to Quantitative Analysis for Media and Communications" />
+  
+  <meta name="twitter:description" content="This course is intended for those with little or no past training in quantitative methods. The course is an intensive introduction to some of the principles and methods of statistical analysis in social research. Topics covered in MY464 include descriptive statistics, basic ideas of inference and estimation, contingency tables and an introduction to linear regression models." />
+  
+
+<meta name="author" content="Department of Methodology, London School of Economics and Political Science" />
+
+
+
+  <meta name="viewport" content="width=device-width, initial-scale=1" />
+  <meta name="apple-mobile-web-app-capable" content="yes" />
+  <meta name="apple-mobile-web-app-status-bar-style" content="black" />
+  
+  
+<link rel="prev" href="c-descr1.html"/>
+<link rel="next" href="c-tables.html"/>
+<script src="libs/jquery-3.6.0/jquery-3.6.0.min.js"></script>
+<script src="https://cdn.jsdelivr.net/npm/fuse.js@6.4.6/dist/fuse.min.js"></script>
+<link href="libs/gitbook-2.6.7/css/style.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-table.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-bookdown.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-highlight.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-search.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-fontsettings.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-clipboard.css" rel="stylesheet" />
+
+
+
+
+
+
+
+
+<link href="libs/anchor-sections-1.1.0/anchor-sections.css" rel="stylesheet" />
+<link href="libs/anchor-sections-1.1.0/anchor-sections-hash.css" rel="stylesheet" />
+<script src="libs/anchor-sections-1.1.0/anchor-sections.js"></script>
+
+
+
+<style type="text/css">
+  
+  div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
+</style>
+
+<link rel="stylesheet" href="style.css" type="text/css" />
+</head>
+
+<body>
+
+
+
+  <div class="book without-animation with-summary font-size-2 font-family-1" data-basepath=".">
+
+    <div class="book-summary">
+      <nav role="navigation">
+
+<ul class="summary">
+<li><a href="./">MY464 Introduction to Quantitative Analysis for Media and Communications</a></li>
+
+<li class="divider"></li>
+<li class="chapter" data-level="" data-path="index.html"><a href="index.html"><i class="fa fa-check"></i>Course information<a href="index.html#course-information" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1" data-path="c-intro.html"><a href="c-intro.html"><i class="fa fa-check"></i><b>1</b> Introduction<a href="c-intro.html#c-intro" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="1.1" data-path="c-intro.html"><a href="c-intro.html#s-intro-purpose"><i class="fa fa-check"></i><b>1.1</b> What is the purpose of this course?<a href="c-intro.html#s-intro-purpose" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.2" data-path="c-intro.html"><a href="c-intro.html#s-intro-definitions"><i class="fa fa-check"></i><b>1.2</b> Some basic definitions<a href="c-intro.html#s-intro-definitions" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="1.2.1" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-subj"><i class="fa fa-check"></i><b>1.2.1</b> Subjects and variables<a href="c-intro.html#ss-intro-def-subj" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.2.2" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-vartypes"><i class="fa fa-check"></i><b>1.2.2</b> Types of variables<a href="c-intro.html#ss-intro-def-vartypes" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.2.3" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-descr"><i class="fa fa-check"></i><b>1.2.3</b> Description and inference<a href="c-intro.html#ss-intro-def-descr" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.2.4" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-assoc"><i class="fa fa-check"></i><b>1.2.4</b> Association and causation<a href="c-intro.html#ss-intro-def-assoc" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="1.3" data-path="c-intro.html"><a href="c-intro.html#s-intro-outline"><i class="fa fa-check"></i><b>1.3</b> Outline of the course<a href="c-intro.html#s-intro-outline" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.4" data-path="c-intro.html"><a href="c-intro.html#s-intro-maths"><i class="fa fa-check"></i><b>1.4</b> The use of mathematics and computing<a href="c-intro.html#s-intro-maths" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="1.4.1" data-path="c-intro.html"><a href="c-intro.html#symbolic-mathematics-and-mathematical-notation"><i class="fa fa-check"></i><b>1.4.1</b> Symbolic mathematics and mathematical notation<a href="c-intro.html#symbolic-mathematics-and-mathematical-notation" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.4.2" data-path="c-intro.html"><a href="c-intro.html#computing-1"><i class="fa fa-check"></i><b>1.4.2</b> Computing<a href="c-intro.html#computing-1" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="2" data-path="c-descr1.html"><a href="c-descr1.html"><i class="fa fa-check"></i><b>2</b> Descriptive statistics<a href="c-descr1.html#c-descr1" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.1" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-intro"><i class="fa fa-check"></i><b>2.1</b> Introduction<a href="c-descr1.html#s-descr1-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.2" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-examples"><i class="fa fa-check"></i><b>2.2</b> Example data sets<a href="c-descr1.html#s-descr1-examples" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.3" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-1cat"><i class="fa fa-check"></i><b>2.3</b> Single categorical variable<a href="c-descr1.html#s-descr1-1cat" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.3.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-distr"><i class="fa fa-check"></i><b>2.3.1</b> Describing the sample distribution<a href="c-descr1.html#ss-descr1-1cat-distr" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.3.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-tables"><i class="fa fa-check"></i><b>2.3.2</b> Tabular methods: Tables of frequencies<a href="c-descr1.html#ss-descr1-1cat-tables" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.3.3" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-charts"><i class="fa fa-check"></i><b>2.3.3</b> Graphical methods: Bar charts<a href="c-descr1.html#ss-descr1-1cat-charts" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.3.4" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-descriptives"><i class="fa fa-check"></i><b>2.3.4</b> Simple descriptive statistics<a href="c-descr1.html#ss-descr1-1cat-descriptives" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="2.4" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-2cat"><i class="fa fa-check"></i><b>2.4</b> Two categorical variables<a href="c-descr1.html#s-descr1-2cat" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.4.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-tables"><i class="fa fa-check"></i><b>2.4.1</b> Two-way contingency tables<a href="c-descr1.html#ss-descr1-2cat-tables" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.4.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-cond"><i class="fa fa-check"></i><b>2.4.2</b> Conditional proportions<a href="c-descr1.html#ss-descr1-2cat-cond" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.4.3" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-assoc"><i class="fa fa-check"></i><b>2.4.3</b> Conditional distributions and associations<a href="c-descr1.html#ss-descr1-2cat-assoc" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.4.4" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-descr"><i class="fa fa-check"></i><b>2.4.4</b> Describing an association using conditional proportions<a href="c-descr1.html#ss-descr1-2cat-descr" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.4.5" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-gamma"><i class="fa fa-check"></i><b>2.4.5</b> A measure of association for ordinal variables<a href="c-descr1.html#ss-descr1-2cat-gamma" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="2.5" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-1cont"><i class="fa fa-check"></i><b>2.5</b> Sample distributions of a single continuous variable<a href="c-descr1.html#s-descr1-1cont" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.5.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cont-tab"><i class="fa fa-check"></i><b>2.5.1</b> Tabular methods<a href="c-descr1.html#ss-descr1-1cont-tab" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.5.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cont-graphs"><i class="fa fa-check"></i><b>2.5.2</b> Graphical methods<a href="c-descr1.html#ss-descr1-1cont-graphs" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="2.6" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-nums"><i class="fa fa-check"></i><b>2.6</b> Numerical descriptive statistics<a href="c-descr1.html#s-descr1-nums" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.6.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-nums-central"><i class="fa fa-check"></i><b>2.6.1</b> Measures of central tendency<a href="c-descr1.html#ss-descr1-nums-central" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.6.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-nums-variation"><i class="fa fa-check"></i><b>2.6.2</b> Measures of variation<a href="c-descr1.html#ss-descr1-nums-variation" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="2.7" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-2cont"><i class="fa fa-check"></i><b>2.7</b> Associations which involve continuous variables<a href="c-descr1.html#s-descr1-2cont" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.8" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-presentation"><i class="fa fa-check"></i><b>2.8</b> Presentation of tables and graphs<a href="c-descr1.html#s-descr1-presentation" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.9" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-app"><i class="fa fa-check"></i><b>2.9</b> Appendix: Country data<a href="c-descr1.html#s-descr1-app" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="3" data-path="c-samples.html"><a href="c-samples.html"><i class="fa fa-check"></i><b>3</b> Samples and populations<a href="c-samples.html#c-samples" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="3.1" data-path="c-samples.html"><a href="c-samples.html#s-samples-intro"><i class="fa fa-check"></i><b>3.1</b> Introduction<a href="c-samples.html#s-samples-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.2" data-path="c-samples.html"><a href="c-samples.html#s-samples-finpops"><i class="fa fa-check"></i><b>3.2</b> Finite populations<a href="c-samples.html#s-samples-finpops" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.3" data-path="c-samples.html"><a href="c-samples.html#s-samples-samples"><i class="fa fa-check"></i><b>3.3</b> Samples from finite populations<a href="c-samples.html#s-samples-samples" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.4" data-path="c-samples.html"><a href="c-samples.html#s-samples-infpops"><i class="fa fa-check"></i><b>3.4</b> Conceptual and infinite populations<a href="c-samples.html#s-samples-infpops" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.5" data-path="c-samples.html"><a href="c-samples.html#s-samples-popdistrs"><i class="fa fa-check"></i><b>3.5</b> Population distributions<a href="c-samples.html#s-samples-popdistrs" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.6" data-path="c-samples.html"><a href="c-samples.html#s-samples-inference"><i class="fa fa-check"></i><b>3.6</b> Need for statistical inference<a href="c-samples.html#s-samples-inference" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="4" data-path="c-tables.html"><a href="c-tables.html"><i class="fa fa-check"></i><b>4</b> Statistical inference for two-way tables<a href="c-tables.html#c-tables" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="4.1" data-path="c-tables.html"><a href="c-tables.html#s-tables-intro"><i class="fa fa-check"></i><b>4.1</b> Introduction<a href="c-tables.html#s-tables-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.2" data-path="c-tables.html"><a href="c-tables.html#s-tables-tests"><i class="fa fa-check"></i><b>4.2</b> Significance tests<a href="c-tables.html#s-tables-tests" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3" data-path="c-tables.html"><a href="c-tables.html#s-tables-chi2test"><i class="fa fa-check"></i><b>4.3</b> The chi-square test of independence<a href="c-tables.html#s-tables-chi2test" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="4.3.1" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-null"><i class="fa fa-check"></i><b>4.3.1</b> Hypotheses<a href="c-tables.html#ss-tables-chi2test-null" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.2" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-ass"><i class="fa fa-check"></i><b>4.3.2</b> Assumptions of a significance test<a href="c-tables.html#ss-tables-chi2test-ass" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.3" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-stat"><i class="fa fa-check"></i><b>4.3.3</b> The test statistic<a href="c-tables.html#ss-tables-chi2test-stat" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.4" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-sdist"><i class="fa fa-check"></i><b>4.3.4</b> The sampling distribution of the test statistic<a href="c-tables.html#ss-tables-chi2test-sdist" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.5" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-Pval"><i class="fa fa-check"></i><b>4.3.5</b> The P-value<a href="c-tables.html#ss-tables-chi2test-Pval" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.6" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-conclusions"><i class="fa fa-check"></i><b>4.3.6</b> Drawing conclusions from a test<a href="c-tables.html#ss-tables-chi2test-conclusions" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="4.4" data-path="c-tables.html"><a href="c-tables.html#s-tables-summary"><i class="fa fa-check"></i><b>4.4</b> Summary of the chi-square test of independence<a href="c-tables.html#s-tables-summary" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="5" data-path="c-probs.html"><a href="c-probs.html"><i class="fa fa-check"></i><b>5</b> Inference for population proportions<a href="c-probs.html#c-probs" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="5.1" data-path="c-probs.html"><a href="c-probs.html#s-probs-intro"><i class="fa fa-check"></i><b>5.1</b> Introduction<a href="c-probs.html#s-probs-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.2" data-path="c-probs.html"><a href="c-probs.html#s-probs-examples"><i class="fa fa-check"></i><b>5.2</b> Examples<a href="c-probs.html#s-probs-examples" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.3" data-path="c-probs.html"><a href="c-probs.html#s-probs-distribution"><i class="fa fa-check"></i><b>5.3</b> Probability distribution of a dichotomous variable<a href="c-probs.html#s-probs-distribution" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.4" data-path="c-probs.html"><a href="c-probs.html#s-probs-pointest"><i class="fa fa-check"></i><b>5.4</b> Point estimation of a population probability<a href="c-probs.html#s-probs-pointest" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5" data-path="c-probs.html"><a href="c-probs.html#s-probs-test1sample"><i class="fa fa-check"></i><b>5.5</b> Significance test of a single proportion<a href="c-probs.html#s-probs-test1sample" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="5.5.1" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-hypotheses"><i class="fa fa-check"></i><b>5.5.1</b> Null and alternative hypotheses<a href="c-probs.html#ss-probs-test1sample-hypotheses" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5.2" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-teststatistic"><i class="fa fa-check"></i><b>5.5.2</b> The test statistic<a href="c-probs.html#ss-probs-test1sample-teststatistic" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5.3" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-samplingd"><i class="fa fa-check"></i><b>5.5.3</b> The sampling distribution of the test statistic and P-values<a href="c-probs.html#ss-probs-test1sample-samplingd" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5.4" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-conclusions"><i class="fa fa-check"></i><b>5.5.4</b> Conclusions from the test<a href="c-probs.html#ss-probs-test1sample-conclusions" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5.5" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-summary"><i class="fa fa-check"></i><b>5.5.5</b> Summary of the test<a href="c-probs.html#ss-probs-test1sample-summary" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="5.6" data-path="c-probs.html"><a href="c-probs.html#s-probs-1sampleci"><i class="fa fa-check"></i><b>5.6</b> Confidence interval for a single proportion<a href="c-probs.html#s-probs-1sampleci" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="5.6.1" data-path="c-probs.html"><a href="c-probs.html#s-probs-1sampleci-intro"><i class="fa fa-check"></i><b>5.6.1</b> Introduction<a href="c-probs.html#s-probs-1sampleci-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.6.2" data-path="c-probs.html"><a href="c-probs.html#s-probs-1sampleci-calc"><i class="fa fa-check"></i><b>5.6.2</b> Calculation of the interval<a href="c-probs.html#s-probs-1sampleci-calc" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.6.3" data-path="c-probs.html"><a href="c-probs.html#s-probs-1sampleci-int"><i class="fa fa-check"></i><b>5.6.3</b> Interpretation of confidence intervals<a href="c-probs.html#s-probs-1sampleci-int" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.6.4" data-path="c-probs.html"><a href="c-probs.html#ss-means-ci-vstests"><i class="fa fa-check"></i><b>5.6.4</b> Confidence intervals vs. significance tests<a href="c-probs.html#ss-means-ci-vstests" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="5.7" data-path="c-probs.html"><a href="c-probs.html#s-probs-2samples"><i class="fa fa-check"></i><b>5.7</b> Inference for comparing two proportions<a href="c-probs.html#s-probs-2samples" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="6" data-path="c-contd.html"><a href="c-contd.html"><i class="fa fa-check"></i><b>6</b> Continuous variables: Population and sampling distributions<a href="c-contd.html#c-contd" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="6.1" data-path="c-contd.html"><a href="c-contd.html#s-contd-intro"><i class="fa fa-check"></i><b>6.1</b> Introduction<a href="c-contd.html#s-contd-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="6.2" data-path="c-contd.html"><a href="c-contd.html#s-contd-popdistrs"><i class="fa fa-check"></i><b>6.2</b> Population distributions of continuous variables<a href="c-contd.html#s-contd-popdistrs" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="6.2.1" data-path="c-contd.html"><a href="c-contd.html#ss-contd-popdistrs-params"><i class="fa fa-check"></i><b>6.2.1</b> Population parameters and their point estimates<a href="c-contd.html#ss-contd-popdistrs-params" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="6.3" data-path="c-contd.html"><a href="c-contd.html#s-contd-probdistrs"><i class="fa fa-check"></i><b>6.3</b> Probability distributions of continuous variables<a href="c-contd.html#s-contd-probdistrs" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="6.3.1" data-path="c-contd.html"><a href="c-contd.html#ss-contd-probdistrs-general"><i class="fa fa-check"></i><b>6.3.1</b> General comments<a href="c-contd.html#ss-contd-probdistrs-general" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="6.3.2" data-path="c-contd.html"><a href="c-contd.html#ss-contd-probdistrs-normal"><i class="fa fa-check"></i><b>6.3.2</b> The normal distribution as a population distribution<a href="c-contd.html#ss-contd-probdistrs-normal" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="6.4" data-path="c-contd.html"><a href="c-contd.html#s-contd-clt"><i class="fa fa-check"></i><b>6.4</b> The normal distribution as a sampling distribution<a href="c-contd.html#s-contd-clt" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="7" data-path="c-means.html"><a href="c-means.html"><i class="fa fa-check"></i><b>7</b> Analysis of population means<a href="c-means.html#c-means" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="7.1" data-path="c-means.html"><a href="c-means.html#s-means-intro"><i class="fa fa-check"></i><b>7.1</b> Introduction and examples<a href="c-means.html#s-means-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.2" data-path="c-means.html"><a href="c-means.html#s-means-descr"><i class="fa fa-check"></i><b>7.2</b> Descriptive statistics for comparisons of groups<a href="c-means.html#s-means-descr" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="7.2.1" data-path="c-means.html"><a href="c-means.html#ss-means-descr-graphs"><i class="fa fa-check"></i><b>7.2.1</b> Graphical methods of comparing sample distributions<a href="c-means.html#ss-means-descr-graphs" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.2.2" data-path="c-means.html"><a href="c-means.html#ss-means-descr-tables"><i class="fa fa-check"></i><b>7.2.2</b> Comparing summary statistics<a href="c-means.html#ss-means-descr-tables" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="7.3" data-path="c-means.html"><a href="c-means.html#s-means-inference"><i class="fa fa-check"></i><b>7.3</b> Inference for two means from independent samples<a href="c-means.html#s-means-inference" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="7.3.1" data-path="c-means.html"><a href="c-means.html#ss-means-inference-intro"><i class="fa fa-check"></i><b>7.3.1</b> Aims of the analysis<a href="c-means.html#ss-means-inference-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.3.2" data-path="c-means.html"><a href="c-means.html#ss-means-inference-test"><i class="fa fa-check"></i><b>7.3.2</b> Significance testing: The two-sample t-test<a href="c-means.html#ss-means-inference-test" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.3.3" data-path="c-means.html"><a href="c-means.html#ss-means-inference-ci"><i class="fa fa-check"></i><b>7.3.3</b> Confidence intervals for a difference of two means<a href="c-means.html#ss-means-inference-ci" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.3.4" data-path="c-means.html"><a href="c-means.html#ss-means-inference-variants"><i class="fa fa-check"></i><b>7.3.4</b> Variants of the test and confidence interval<a href="c-means.html#ss-means-inference-variants" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="7.4" data-path="c-means.html"><a href="c-means.html#s-means-1sample"><i class="fa fa-check"></i><b>7.4</b> Tests and confidence intervals for a single mean<a href="c-means.html#s-means-1sample" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.5" data-path="c-means.html"><a href="c-means.html#s-means-dependent"><i class="fa fa-check"></i><b>7.5</b> Inference for dependent samples<a href="c-means.html#s-means-dependent" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.6" data-path="c-means.html"><a href="c-means.html#s-means-tests3"><i class="fa fa-check"></i><b>7.6</b> Further comments on significance tests<a href="c-means.html#s-means-tests3" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="7.6.1" data-path="c-means.html"><a href="c-means.html#ss-means-tests3-errors"><i class="fa fa-check"></i><b>7.6.1</b> Different types of error<a href="c-means.html#ss-means-tests3-errors" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.6.2" data-path="c-means.html"><a href="c-means.html#ss-means-tests3-power"><i class="fa fa-check"></i><b>7.6.2</b> Power of significance tests<a href="c-means.html#ss-means-tests3-power" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.6.3" data-path="c-means.html"><a href="c-means.html#ss-means-tests3-importance"><i class="fa fa-check"></i><b>7.6.3</b> Significance vs. importance<a href="c-means.html#ss-means-tests3-importance" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="8" data-path="c-regression.html"><a href="c-regression.html"><i class="fa fa-check"></i><b>8</b> Linear regression models<a href="c-regression.html#c-regression" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.1" data-path="c-regression.html"><a href="c-regression.html#s-regression-intro"><i class="fa fa-check"></i><b>8.1</b> Introduction<a href="c-regression.html#s-regression-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.2" data-path="c-regression.html"><a href="c-regression.html#s-regression-descr"><i class="fa fa-check"></i><b>8.2</b> Describing association between two continuous variables<a href="c-regression.html#s-regression-descr" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.2.1" data-path="c-regression.html"><a href="c-regression.html#ss-regression-descr-intro"><i class="fa fa-check"></i><b>8.2.1</b> Introduction<a href="c-regression.html#ss-regression-descr-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.2.2" data-path="c-regression.html"><a href="c-regression.html#ss-regression-descr-plots"><i class="fa fa-check"></i><b>8.2.2</b> Graphical methods<a href="c-regression.html#ss-regression-descr-plots" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.2.3" data-path="c-regression.html"><a href="c-regression.html#ss-regression-descr-assoc"><i class="fa fa-check"></i><b>8.2.3</b> Linear associations<a href="c-regression.html#ss-regression-descr-assoc" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.2.4" data-path="c-regression.html"><a href="c-regression.html#ss-regression-descr-corr"><i class="fa fa-check"></i><b>8.2.4</b> Measures of association: covariance and correlation<a href="c-regression.html#ss-regression-descr-corr" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="8.3" data-path="c-regression.html"><a href="c-regression.html#s-regression-simple"><i class="fa fa-check"></i><b>8.3</b> Simple linear regression models<a href="c-regression.html#s-regression-simple" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.3.1" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-intro"><i class="fa fa-check"></i><b>8.3.1</b> Introduction<a href="c-regression.html#ss-regression-simple-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.3.2" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-def"><i class="fa fa-check"></i><b>8.3.2</b> Definition of the model<a href="c-regression.html#ss-regression-simple-def" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.3.3" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-int"><i class="fa fa-check"></i><b>8.3.3</b> Interpretation of the model parameters<a href="c-regression.html#ss-regression-simple-int" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.3.4" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-est"><i class="fa fa-check"></i><b>8.3.4</b> Estimation of the parameters<a href="c-regression.html#ss-regression-simple-est" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.3.5" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-inf"><i class="fa fa-check"></i><b>8.3.5</b> Statistical inference for the regression coefficients<a href="c-regression.html#ss-regression-simple-inf" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="8.4" data-path="c-regression.html"><a href="c-regression.html#s-regression-causality"><i class="fa fa-check"></i><b>8.4</b> Interlude: Association and causality<a href="c-regression.html#s-regression-causality" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.5" data-path="c-regression.html"><a href="c-regression.html#s-regression-multiple"><i class="fa fa-check"></i><b>8.5</b> Multiple linear regression models<a href="c-regression.html#s-regression-multiple" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.5.1" data-path="c-regression.html"><a href="c-regression.html#ss-regression-multiple-intro"><i class="fa fa-check"></i><b>8.5.1</b> Introduction<a href="c-regression.html#ss-regression-multiple-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.5.2" data-path="c-regression.html"><a href="c-regression.html#ss-regression-multiple-def"><i class="fa fa-check"></i><b>8.5.2</b> Definition of the model<a href="c-regression.html#ss-regression-multiple-def" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.5.3" data-path="c-regression.html"><a href="c-regression.html#ss-regression-multiple-unchanged"><i class="fa fa-check"></i><b>8.5.3</b> Unchanged elements from simple linear models<a href="c-regression.html#ss-regression-multiple-unchanged" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.5.4" data-path="c-regression.html"><a href="c-regression.html#ss-regression-multiple-beta"><i class="fa fa-check"></i><b>8.5.4</b> Interpretation and inference for the regression coefficients<a href="c-regression.html#ss-regression-multiple-beta" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="8.6" data-path="c-regression.html"><a href="c-regression.html#s-regression-dummies"><i class="fa fa-check"></i><b>8.6</b> Including categorical explanatory variables<a href="c-regression.html#s-regression-dummies" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.6.1" data-path="c-regression.html"><a href="c-regression.html#ss-regression-dummies-def"><i class="fa fa-check"></i><b>8.6.1</b> Dummy variables<a href="c-regression.html#ss-regression-dummies-def" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.6.2" data-path="c-regression.html"><a href="c-regression.html#ss-regression-dummies-example"><i class="fa fa-check"></i><b>8.6.2</b> A second example<a href="c-regression.html#ss-regression-dummies-example" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="8.7" data-path="c-regression.html"><a href="c-regression.html#s-regression-rest"><i class="fa fa-check"></i><b>8.7</b> Other issues in linear regression modelling<a href="c-regression.html#s-regression-rest" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="9" data-path="c-3waytables.html"><a href="c-3waytables.html"><i class="fa fa-check"></i><b>9</b> Analysis of 3-way contingency tables<a href="c-3waytables.html#c-3waytables" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="10" data-path="c-more.html"><a href="c-more.html"><i class="fa fa-check"></i><b>10</b> More statistics…<a href="c-more.html#c-more" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="" data-path="c-more.html"><a href="c-more.html#statistical-tables"><i class="fa fa-check"></i>Statistical tables<a href="c-more.html#statistical-tables" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="" data-path="c-more.html"><a href="c-more.html#table-of-standard-normal-tail-probabilities"><i class="fa fa-check"></i>Table of standard normal tail probabilities<a href="c-more.html#table-of-standard-normal-tail-probabilities" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="" data-path="c-more.html"><a href="c-more.html#table-of-critical-values-for-t-distributions"><i class="fa fa-check"></i>Table of critical values for t-distributions<a href="c-more.html#table-of-critical-values-for-t-distributions" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="" data-path="c-more.html"><a href="c-more.html#table-of-critical-values-for-chi-square-distributions"><i class="fa fa-check"></i>Table of critical values for chi-square distributions<a href="c-more.html#table-of-critical-values-for-chi-square-distributions" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+</ul></li>
+<li class="divider"></li>
+<li><a href="https://github.com/rstudio/bookdown">Published with bookdown</a></li>
+
+</ul>
+
+      </nav>
+    </div>
+
+    <div class="book-body">
+      <div class="body-inner">
+        <div class="book-header" role="navigation">
+          <h1>
+            <i class="fa fa-circle-o-notch fa-spin"></i><a href="./">MY464 Introduction to Quantitative Analysis for Media and Communications</a>
+          </h1>
+        </div>
+
+        <div class="page-wrapper" tabindex="-1" role="main">
+          <div class="page-inner">
+
+            <section class="normal" id="section-">
+<div id="c-samples" class="section level1 hasAnchor">
+<h1><span class="header-section-number">Chapter 3</span> Samples and populations<a href="c-samples.html#c-samples" class="anchor-section" aria-label="Anchor link to header"></a></h1>
+<div id="s-samples-intro" class="section level2 hasAnchor">
+<h2><span class="header-section-number">3.1</span> Introduction<a href="c-samples.html#s-samples-intro" class="anchor-section" aria-label="Anchor link to header"></a></h2>
+<p>So far we have discussed statistical description, which is concerned
+with summarizing features of a sample of observed data. From now on,
+most of the attention will be on statistical inference. As noted in
+Section <a href="c-intro.html#ss-intro-def-descr">1.2.3</a>, the purpose of inference is to draw
+conclusions about the characteristics of some larger population based on
+what is observed in a sample. In this chapter we will first give more
+careful definitions of the concepts of populations and samples, and of
+the connections between them. In Section <a href="c-samples.html#s-samples-popdistrs">3.5</a> we
+then consider the idea of a population distribution, which is the target
+of statistical inference. The discussion of statistical inference will
+continue in Chapters <a href="c-tables.html#c-tables">4</a>–<a href="c-means.html#c-means">7</a> where we gradually
+introduce the basic elements of inference in the contexts of different
+types of analyses.</p>
+</div>
+<div id="s-samples-finpops" class="section level2 hasAnchor">
+<h2><span class="header-section-number">3.2</span> Finite populations<a href="c-samples.html#s-samples-finpops" class="anchor-section" aria-label="Anchor link to header"></a></h2>
+<p>In many cases the population of interest is a particular group of real
+people or other units. Consider, for example, the European Social Survey
+(ESS) which we used in Chapter <a href="c-descr1.html#c-descr1">2</a> (see early in Section <a href="c-descr1.html#s-descr1-examples">2.2</a>).<a href="#fn10" class="footnote-ref" id="fnref10"><sup>10</sup></a> The ESS is a cross-national survey carried
+out biennially in around 30 European countries. It is an
+academically-driven social survey which is designed to measure a wide
+range attitudes, beliefs and behaviour patterns among the European
+population, especially for purposes for cross-national comparisons.</p>
+<p>The target population of ESS is explicitly stated as being “all persons
+aged 15 and over resident within private households, regardless of their
+nationality, citizenship, language or legal status” in each of the
+participating countries. This is, once “private household” has been
+defined carefully, and notwithstanding the inevitable ambiguity in that
+the precise number and composition of households are constantly
+changing, a well-defined, existing group. It is also a large group: in
+the UK, for example, there are around 50 million such people.
+Nevertheless, we have no conceptual difficulty with imagining this
+collection of individuals. We will call any such population a <em>finite
+population</em>.</p>
+<p>The main problem with studying a large finite population is that it is
+usually not feasible to collect data on all of its members. A <strong>census</strong>
+is a study where some variables <em>are</em> in fact measured for the entire
+population. The best-known example is the Census of Population, which at
+least aims to be a complete evaluation of all persons living in a
+country on a particular date with respect to basic demographic data.
+Similarly, we have the Census of Production, Census of Distribution etc.
+For most research, however, a census is not feasible. Even when one is
+attempted, it is rarely truly comprehensive. For example, all population
+censuses which involve collecting the data from the people themselves
+end up missing a substantial (and non-random) proportion of the
+population. For most purposes a well-executed sample of the kind
+described below is actually preferable to an unsuccessful census.</p>
+</div>
+<div id="s-samples-samples" class="section level2 hasAnchor">
+<h2><span class="header-section-number">3.3</span> Samples from finite populations<a href="c-samples.html#s-samples-samples" class="anchor-section" aria-label="Anchor link to header"></a></h2>
+<p>When a census is not possible, information on the population is obtained
+by observing only a subset of units from it, i.e. a sample. This is
+meant to be <em>representative</em> of the population, so that we can
+<em>generalise</em> findings from the sample to the population. To be
+representative in a sense appropriate for statistical inference, a
+sample from a finite population must be a <em>probability sample</em>, obtained
+using</p>
+<ul>
+<li><strong>probability sampling</strong>: a sampling method where every unit in the
+population has a <strong>known</strong>, <strong>non-zero</strong> probability of being
+selected to the sample.</li>
+</ul>
+<p>Probability sampling requires first a <strong>sampling frame</strong>, essentially
+one or more lists of units or collections of units which make it
+possible to select and contact members of the sample. For example, the
+first stage of sampling for many UK surveys uses the Postcode Address
+File, a list of postal addresses in the country. A <strong>sampling design</strong>
+is then created in such a way that it assigns a <strong>sampling probability</strong>
+for each unit, and the sample is drawn so that each unit’s probability
+of being selected into the sample is given by their sampling
+probability. The selection of the specific set of units actually
+included in the sample thus involves <em>randomness</em>, usually implemented
+with the help of random number generators on computers.</p>
+<p>The simplest form of probability sampling is</p>
+<ul>
+<li><strong>simple random sampling</strong>, where every unit in the population has
+the <em>same</em> probability of selection.</li>
+</ul>
+<p>This requirement of equal selection probabilities is by no means
+essential. Other probability sampling methods which relax it include</p>
+<ul>
+<li><p><strong>stratified sampling</strong>, where the selection probabilities are set
+separately for different groups (<em>strata</em>) in the population, for
+example separately for men and women, different ethnic groups or
+people living in different regions.</p></li>
+<li><p><strong>cluster sampling</strong>, where the units of interest are not sampled
+individually but in groups (<em>clusters</em>). For example, a school
+survey might involve sampling entire classes and then interviewing
+every pupil in each selected class.</p></li>
+<li><p><strong>multistage sampling</strong>, which employs a sequence of steps, often
+with a combination of stratification, clustering and simple
+random sampling. For example, many social surveys use a <em>multistage
+area sampling</em> design which begins with one or more stages of
+sampling areas, then households (addresses) within selected small
+areas, and finally individuals within selected households.</p></li>
+</ul>
+<p>These more complex sampling methods are in fact used for most
+large-scale social surveys to improve their accuracy and/or
+cost-efficiency compared to simple random sampling. For example, the UK
+component of the European Social Survey uses a design of three stages:
+(1) a stratified sample of postcode sectors, stratified by region, level
+of deprivation, percentage of privately rented households, and
+percentage of pensioners; (2) simple random sample of addresses within
+the selected sectors; and (3) simple random sample of one adult from
+each selected address.</p>
+<p>Some analyses of such data require the use of <em>survey weights</em> to adjust
+for the fact that some units were more likely than others to end up in
+the sample. The questions of how and when the weights should be used
+are, however, beyond the scope of this course. Here we will omit the
+weights even in examples where they might normally be used.<a href="#fn11" class="footnote-ref" id="fnref11"><sup>11</sup></a></p>
+<p>Not all sampling methods satisfy the requirements of probability
+sampling. Such techniques of <strong>non-probability sampling</strong> include</p>
+<ul>
+<li><p><em>purposive sampling</em>, where the investigator uses his or her own
+“expert” judgement to select units considered to be representative
+of the population. It is very difficult to do this well, and very
+easy to introduce conscious or unconscious biases into
+the selection. In general, it is better to leave the task to the
+random processes of probability sampling.</p></li>
+<li><p><em>haphazard</em> or <em>convenience</em> sampling, as when a researcher simply
+uses the first <span class="math inline">\(n\)</span> passers-by who happen to be available and willing
+to answer questions. One version of this is <em>volunteer</em> sampling,
+familiar from call-in “polls” carried out by morning television
+shows and newspapers on various topics of current interest. All we
+learn from such exercises are the opinions of those readers or
+viewers who felt strongly enough about the issue to send in their
+response, but these tell us essentially nothing about the average
+attitudes of the general population.</p></li>
+<li><p><em>quota sampling</em>, where interviewers are required to select a
+certain number (quota) of respondents in each of a set of categories
+(defined, for example, by sex, age group and income group). The
+selection of specific respondents within each group is left to the
+interviewer, and is usually done using some (unstated) form of
+purposive or convenience sampling. Quota sampling is quite common,
+especially in market research, and can sometimes give
+reasonable results. However, it is easy to introduce biases in the
+selection stage, and almost impossible to know whether the resulting
+sample is a representative one.</p></li>
+</ul>
+<p>A famous example of the dangers of non-probability sampling is the
+survey by the <em>Literary Digest</em> magazine to predict the results of the
+1936 U.S. presidential election. The magazine sent out about 10 million
+questionnaires on post cards to potential respondents, and based its
+conclusions on those that were returned. This introduced biases in at
+least two ways. First, the list of those who were sent the questionnaire
+was based on registers such as the subscribers to the magazine, and of
+people with telephones, cars and various club memberships. In 1936 these
+were mainly wealthier people who were more likely to be Republican
+voters, and the typically poorer people not on the source lists had no
+chance of being included. Second, only about 25% of the questionnaires
+were actually returned, effectively rendering the sample into a
+volunteer sample. The magazine predicted that the Republican candidate
+Alf Landon would receive 57% of the vote, when in fact his Democratic
+opponent F. D. Roosevelt gained an overwhelming victory with 62% of the
+vote. The outcome of the election was predicted correctly by a much
+smaller probability sample collected by George Gallup.</p>
+<p>A more recent example is the “GM Nation” public consultation exercise on
+attitudes to genetically modified (GM) agricultural products, carried
+out in the U.K. in 2002–3.<a href="#fn12" class="footnote-ref" id="fnref12"><sup>12</sup></a> This involved various activities,
+including national, regional and local events where interested members
+of the public were invited to take part in discussions on GM foods. At
+all such events the participants also completed a questionnaire, which
+was also available on the GM Nation website. In all, around 37000 people
+completed the questionnaire, and around 90% of those expressed
+opposition to GM foods. While the authors of the final report of the
+consultation drew some attention to the unrepresentative nature of this
+sample, this fact had certainly been lost by the time the results were
+reported in the national newspapers as “5 to 1 against GM crops in
+biggest ever public survey”. At the same time, probability samples
+suggested that the British public is actually about evenly split between
+supporters and opponents of GM foods.</p>
+</div>
+<div id="s-samples-infpops" class="section level2 hasAnchor">
+<h2><span class="header-section-number">3.4</span> Conceptual and infinite populations<a href="c-samples.html#s-samples-infpops" class="anchor-section" aria-label="Anchor link to header"></a></h2>
+<p>Even a cursory inspection of academic journals in the social sciences
+will reveal that a finite population of the kind discussed above is not
+always clearly defined, nor is there often any reference to probability
+sampling. Instead, the study designs may for example resemble the
+following two examples:</p>
+<p><em>Example: A psychological experiment</em><br />
+Fifty-nine undegraduate students from a large U.S. university took part
+in a psychological experiment, either as part of a class project or
+for extra credit on a psychology course.<a href="#fn13" class="footnote-ref" id="fnref13"><sup>13</sup></a> The participants were randomly
+assigned to listen to one of two songs, one with clearly violent lyrics
+and one with no violent content. One of the variables of interest was a
+measure (from a 35-item attitude scale) of state hostility
+(i.e. temporary hostile feelings), obtained after the participants had
+listened to a song, and the researchers were interested in comparing
+levels of hostility between the two groups.</p>
+<p><em>Example: Voting in a congressional election</em><br />
+A political-science article considered the U.S. congressional
+election which took place between June 1862 and November 1863,
+i.e. during a crucial period in the American Civil War.<a href="#fn14" class="footnote-ref" id="fnref14"><sup>14</sup></a> The units of
+analysis were the districts in the House of Representatives. One part of
+the analysis examined whether the likelihood of the candidate of the
+Republican Party (the party of the sitting president Abraham Lincoln)
+being elected from a district was associated with such explanatory
+variables as whether the Republican was the incumbent, a measure of the
+quality of the other main candidate, number of military casualties for
+the district, and the timing of the election in the district (especially
+in relation to the Union armies’ changing fortunes over the period).</p>
+<p>There is no reference here to the kinds of finite populations and
+probability samples discussed Sections <a href="c-samples.html#s-samples-finpops">3.2</a> and
+<a href="c-samples.html#s-samples-samples">3.3</a>. In the experiment, the participants were a
+convenience sample of respondents easily available to the researcher,
+while in the election study the units represent (nearly) all the
+districts in a single (and historically unique) election. Yet both
+articles contain plenty of statistical inference, so the language and
+concepts of samples and populations are clearly being used. How is this
+to be justified?</p>
+<p>In the example of the psychological experiment the subjects will clearly
+not be representative of a general (non-student) population in many
+respects, e.g. in age and education level. However, it is not really
+such characteristics that the study is concerned with, nor is the
+population of interest really a population of people. Instead, the
+implicit “population” being considered is that of possible values of
+level of hostility after a person has listened to one of the songs in
+the experiment. In this extended framework, these possible values
+include not just the levels of hostitility possibly obtained for
+different people, but also those that a single person might have after
+listening to the song at different times or in different moods etc. The
+generalisation from the observed data in the experiment is to this
+hypothetical population of possible reactions.</p>
+<p>In the political science example the population is also a hypothetical
+one, namely those election results that <em>could</em> have been obtained if
+something had happened differently, i.e. if different people turned up
+to vote, if some voters had made different decisions, and so on (or if
+we considered a different election in the same conditions, although that
+is less realistic in this example, since other elections have not taken
+place in the middle of a civil war). In other words, votes that actually
+took place are treated as a sample from the population of votes that
+could conceivably have taken place.</p>
+<p>In both cases the “population” is in some sense a hypothetical or
+conceptual one, a population of possible realisations of events, and the
+data actually observed are a sample from that population. Sometimes it
+is useful to apply similar thinking even to samples from ostensibly
+quite finite populations. Any such population, say the residents of a
+country, is exactly fixed at one moment only, and was and will be
+slightly different at any other time, or would be even now if any one of
+a myriad of small events had happened slightly differently in the past.
+We could thus view the finite population itself at a single moment as a
+sample from a conceptual population of possible realisations. This is
+known in survey literature as a <em>superpopulation</em>. The data actually
+observed are then also a sample from the superpopulation. With this
+extension, it is possible to regard almost any set of data as a sample
+from some conceptual superpopulation.</p>
+<p>The highly hypothetical notion of a conceptual population of possible
+events is clearly going to be less easy both to justify and to
+understand than the concept of a large but finite population of real
+subjects defined in Section <a href="c-samples.html#s-samples-finpops">3.2</a>. If you find the
+whole idea distracting, you can focus in your mind on the more
+understandable latter case, at least if you are willing to believe that
+the idea of a conceptual population is also meaningful. Its main
+justification is that much of the time it works, in the sense that
+useful decision rules and methods of analysis are obtained based on the
+idea. Most of the motivation and ideas of statistical inference are
+essentially the same for both kinds of populations.</p>
+<p>Even when the idea of a conceptual population is invoked, questions of
+representativeness of and generalisability to real, finite populations
+will still need to be kept in mind in most applications. For example,
+the assumption behind the psychological experiment described above is
+that the findings about how hearing a violent song affects levels of
+hostility are generalisable to some larger population, beyond the 59
+participants in the experiment and beyond the body of students in a
+particular university. This may well be the case at least to some
+extent, but it is still open to questioning. For this reason findings
+from studies like this only become really convincing when they are
+<em>replicated</em> in comparable experiments among different kinds of
+participants.</p>
+<p>Because the kinds of populations discussed in this section are
+hypothetical, there is no sense of them having a particular fixed number
+of members. Instead, they are considered to be <em>infinite</em> in size. This
+also implies (although it may not be obvious why) that we can
+essentially always treat samples from such populations as if they were
+obtained using simple random sampling.</p>
+</div>
+<div id="s-samples-popdistrs" class="section level2 hasAnchor">
+<h2><span class="header-section-number">3.5</span> Population distributions<a href="c-samples.html#s-samples-popdistrs" class="anchor-section" aria-label="Anchor link to header"></a></h2>
+<p>We will introduce the idea of a population distribution first for finite
+populations, before extending it to infinite ones. The discussion in
+this section focuses on categorical variables, because the concepts are
+easiest to explain in that context; generalisations to continuous
+variables are discussed in Chapter <a href="c-means.html#c-means">7</a>.</p>
+<p>Suppose that we have drawn a sample of <span class="math inline">\(n\)</span> units from a finite
+population and determined the values of some variables for them. The
+units that are not in the sample also possess values of the variables,
+even though these are not observed. We can thus easily imagine how any
+of the methods which were in Chapter <a href="c-descr1.html#c-descr1">2</a> used to describe a
+sample could also be applied in the same way to the whole population, if
+only we knew all the values in it. In particular, we can, paralleling
+the sample distribution of a variable, define the <strong>population
+distribution</strong> as the set of values of the variable which appear in the
+population, together with the frequencies of each value.</p>
+<p>For illustration, consider again the example introduced early in Section <a href="c-descr1.html#s-descr1-examples">2.2</a>. The two variables there are a person’s sex and
+his or her attitude toward income redistribution. We have observed them
+for a sample <span class="math inline">\(n=2344\)</span> people drawn from the population of all UK
+residents aged 15 or over. The sample distributions are summarised by
+Table <a href="c-descr1.html#tab:t-attitude">2.3</a>.</p>
+<table style="width:98%;">
+<caption><span id="tab:t-sex-attitude-pop">Table 3.1: </span><em>``The government should take measures to reduce differences in income levels’’</em>: Attitude towards income redistribution by sex, in a hypothetical
+population of 50 million people. The numbers in the table are
+frequencies in millions of people, row percentages (in parentheses)
+and overall percentages in square brackets.</caption>
+<colgroup>
+<col width="10%" />
+<col width="17%" />
+<col width="13%" />
+<col width="17%" />
+<col width="12%" />
+<col width="12%" />
+<col width="13%" />
+</colgroup>
+<thead>
+<tr class="header">
+<th align="left"><br />
+Sex</th>
+<th align="center">Agree
+strongly</th>
+<th align="center"><br />
+Agree</th>
+<th align="center">Neither agree
+nor disagree</th>
+<th align="center"><br />
+Disagree</th>
+<th align="center">Disagree
+strongly</th>
+<th align="right"><br />
+Total</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="left">Male</td>
+<td align="center">3.84</td>
+<td align="center">10.08</td>
+<td align="center">4.56</td>
+<td align="center">4.32</td>
+<td align="center">1.20</td>
+<td align="right">24.00</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="center">(16.00)</td>
+<td align="center">(42.00)</td>
+<td align="center">(19.00)</td>
+<td align="center">(18.00)</td>
+<td align="center">(5.00)</td>
+<td align="right">(100)</td>
+</tr>
+<tr class="odd">
+<td align="left"></td>
+<td align="center">[7.68]</td>
+<td align="center">[20.16]</td>
+<td align="center">[9.12]</td>
+<td align="center">[8.64]</td>
+<td align="center">[2.40]</td>
+<td align="right">[48.00]</td>
+</tr>
+<tr class="even">
+<td align="left">Female</td>
+<td align="center">4.16</td>
+<td align="center">13.00</td>
+<td align="center">4.68</td>
+<td align="center">3.38</td>
+<td align="center">0.78</td>
+<td align="right">26.00</td>
+</tr>
+<tr class="odd">
+<td align="left"></td>
+<td align="center">(16.00)</td>
+<td align="center">(50.00)</td>
+<td align="center">(18.00)</td>
+<td align="center">(13.00)</td>
+<td align="center">(3.00)</td>
+<td align="right">(100)</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="center">[8.32]</td>
+<td align="center">[26.00]</td>
+<td align="center">[9.36]</td>
+<td align="center">[6.76]</td>
+<td align="center">[1.56]</td>
+<td align="right">[52.00]</td>
+</tr>
+<tr class="odd">
+<td align="left">Total</td>
+<td align="center">8.00</td>
+<td align="center">23.08</td>
+<td align="center">9.24</td>
+<td align="center">7.70</td>
+<td align="center">1.98</td>
+<td align="right">50</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="center">(16.00)</td>
+<td align="center">(46.16)</td>
+<td align="center">(18.48)</td>
+<td align="center">(15.40)</td>
+<td align="center">(3.96)</td>
+<td align="right">(100)</td>
+</tr>
+</tbody>
+</table>
+<p>Imagine now that the full population consisted of 50 million people, and
+that the values of the two variables for them were as shown in Table
+<a href="c-samples.html#tab:t-sex-attitude-pop">3.1</a>. The frequencies in this table desribe the
+population distribution of the variables in this hypothetical
+population, with the joint distribution of sex and attitude shown by the
+internal cells of the table and the marginal distributions by its
+margins. So there are for example 3.84 million men and 4.16 million
+women in the population who strongly agree with the attitude statement,
+and 1.98 million people overall who strongly disagree with it.</p>
+<p>Rather than the frequencies, it is more helpful to discuss population
+distributions in terms of proportions. Table <a href="c-samples.html#tab:t-sex-attitude-pop">3.1</a>
+shows two sets of them, the overall proportions in square brackets
+out of the total population size, and the two rows of conditional
+proportions of attitude given sex (in parentheses). Either of these can
+be used to introduce the ideas of population distributions, but we focus
+on the conditional proportions because they will be more convenient for
+the discussion in later chapters. In this population we observe, for
+example, that the conditional proportion of “Strongly disagree” given
+that a person is a woman is 0.03, i.e. 3% of women strongly disagree
+with the statement, while among men the corresponding conditional
+proportion is 0.05.</p>
+<p>Instead of “proportions”, when we discuss population distributions we
+will usually talk of “probabilities”. The two terms are equivalent when
+the population is finite and the variables are categorical, as in Table
+<a href="c-samples.html#tab:t-sex-attitude-pop">3.1</a>, but the language of probabilities is more
+appropriate in other cases. We can then say that Table
+<a href="c-samples.html#tab:t-sex-attitude-pop">3.1</a> shows two sets of <strong>conditional probabilities</strong>
+in the population, which define two conditional <strong>probability
+distributions</strong> for attitude given sex.</p>
+<p>The notion of a probability distribution creates a conceptual connection
+between population distributions and sampling from them. This is that
+the probabilities of the population distribution can also be thought of
+as sampling probabilities in (simple random) sampling from the
+population. For example, here the conditional probability of “Strongly
+disagree” among men is 0.05, while the probability of “Strongly agree”
+is 0.16. The sampling interpretation of this is that if we sample a man
+at random from the population, the probability is 0.05 that he strongly
+disagrees and 0.16 that he strongly agrees with the attitude statement.</p>
+<p>The view of population distributions as probability distributions works
+also in other cases than the kind that is illustrated by Table
+<a href="c-samples.html#tab:t-sex-attitude-pop">3.1</a>. First, it applies also for continuous
+variables, where proportions of individual values are less useful (this
+is discussed further in Chapter <a href="c-means.html#c-means">7</a>). Second, it is also
+appropriate when the population is regarded as an infinite
+superpopulation, in which case the idea of population <em>frequencies</em> is
+not meaningful. With this device we have thus reached a formulation of a
+population distribution which is flexible enough to cover all the
+situations where we will need it.</p>
+</div>
+<div id="s-samples-inference" class="section level2 hasAnchor">
+<h2><span class="header-section-number">3.6</span> Need for statistical inference<a href="c-samples.html#s-samples-inference" class="anchor-section" aria-label="Anchor link to header"></a></h2>
+<p>We have now introduced the first key concepts that are involved in
+statistical inference:</p>
+<ul>
+<li><p>The population, which may regarded as finite or infinite.
+Distributions of variables in the population are the population
+distributions, which are formulated as probability distributions of
+the possible values of the variables.</p></li>
+<li><p>Random samples from the population, and sample distributions of
+variables in the sample.</p></li>
+</ul>
+<p>Substantive research questions are most often questions about population
+distributions. This raises the fundamental challenge of inference: what
+we are interested in — the population — is not fully observed, while
+what we do observe — the sample — is not of main interest for itself.
+The sample is, however, what information we do have to draw on for
+conclusions about the population. Here a second challenge arises:
+because of random variation in the sampling, sample distributions will
+not be identical to population distributions, so inference will not be
+as simple as concluding that whatever is true of the sample is also true
+of the population. Something cleverer is needed to weigh the evidence in
+the sample, and that something is statistical inference.</p>
+<p>The next three chapters are mostly about statistical inference. Each of
+them discusses a particular type of analysis and inferential and
+decriptive statistical methods for it. These methods are some of the
+most commonly used in basic statistical analyses of empirical data. In
+addition, we will also use them as contexts in which to introduce the
+general concepts of statistical inference. This will be done gradually,
+with each chapter both building on previous concepts and introducing new
+ones, as follows:</p>
+<ul>
+<li><p>Chapter <a href="c-tables.html#c-tables">4</a>: Associations in two-way contingency tables
+(significance testing, sampling distributions of statistics).</p></li>
+<li><p>Chapter <a href="c-probs.html#c-probs">5</a>: Single proportions and comparisons of
+proportions (probability distributions, parameters, point
+estimation, confidence intervals).</p></li>
+<li><p>Chapter <a href="c-means.html#c-means">7</a>: Means of continuous variables (probability
+distributions of continuous variables, and inference for
+such variables).</p></li>
+</ul>
+
+</div>
+</div>
+<div class="footnotes">
+<hr />
+<ol start="10">
+<li id="fn10"><p>European Social Survey (2012). ESS Round 5 (2010/2011) Technical
+Report. London: Centre for Comparative Social Surveys, City
+University London. See <a href="http://www.europeansocialsurvey.org" class="uri">http://www.europeansocialsurvey.org</a> for more on
+the ESS.<a href="c-samples.html#fnref10" class="footnote-back">↩</a></p></li>
+<li id="fn11"><p>For more on survey weights and the design and analysis of surveys
+in general, please see MY456 (Survey Methodology) in the Lent Term.<a href="c-samples.html#fnref11" class="footnote-back">↩</a></p></li>
+<li id="fn12"><p>For more information, see Gaskell, G. (2004). “Science policy and
+society: the British debate over GM agriculture”, <em>Current Opinion
+in Biotechnology</em> <strong>15</strong>, 241–245.<a href="c-samples.html#fnref12" class="footnote-back">↩</a></p></li>
+<li id="fn13"><p>Experiment 1 in Anderson, C. A., Carnagey, N. L., and Eubanks,
+J. (2003). “Exposure to violent media: the effects of songs with
+violent lyrics on aggressive thoughts and feelings”. <em>Journal of
+Personality and Social Psychology</em> <strong>84</strong>, 960–971.<a href="c-samples.html#fnref13" class="footnote-back">↩</a></p></li>
+<li id="fn14"><p>Carson, J. L. et al. (2001). “The impact of national tides and
+district-level effects on electoral outcomes: the U.S. congressional
+elections of 1862–63”. <em>American J. of Political Science</em> <strong>45</strong>,
+887–898.<a href="c-samples.html#fnref14" class="footnote-back">↩</a></p></li>
+</ol>
+</div>
+            </section>
+
+          </div>
+        </div>
+      </div>
+<a href="c-descr1.html" class="navigation navigation-prev " aria-label="Previous page"><i class="fa fa-angle-left"></i></a>
+<a href="c-tables.html" class="navigation navigation-next " aria-label="Next page"><i class="fa fa-angle-right"></i></a>
+    </div>
+  </div>
+<script src="libs/gitbook-2.6.7/js/app.min.js"></script>
+<script src="libs/gitbook-2.6.7/js/clipboard.min.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-search.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-sharing.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-fontsettings.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-bookdown.js"></script>
+<script src="libs/gitbook-2.6.7/js/jquery.highlight.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-clipboard.js"></script>
+<script>
+gitbook.require(["gitbook"], function(gitbook) {
+gitbook.start({
+"sharing": {
+"github": false,
+"facebook": true,
+"twitter": true,
+"linkedin": false,
+"weibo": false,
+"instapaper": false,
+"vk": false,
+"whatsapp": false,
+"all": ["facebook", "twitter", "linkedin", "weibo", "instapaper"]
+},
+"fontsettings": {
+"theme": "white",
+"family": "sans",
+"size": 2
+},
+"edit": {
+"link": "https://github.com/LSE-Methodology/MY464/edit/master/03-MY464-samples.Rmd",
+"text": "Edit"
+},
+"history": {
+"link": null,
+"text": null
+},
+"view": {
+"link": null,
+"text": null
+},
+"download": ["Coursepack-MY464.pdf", "Coursepack-MY464.epub"],
+"search": {
+"engine": "fuse",
+"options": null
+},
+"toc": {
+"collapse": "section"
+}
+});
+});
+</script>
+
+<!-- dynamically load mathjax for compatibility with self-contained -->
+<script>
+  (function () {
+    var script = document.createElement("script");
+    script.type = "text/javascript";
+    var src = "true";
+    if (src === "" || src === "true") src = "https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.9/latest.js?config=TeX-MML-AM_CHTML";
+    if (location.protocol !== "file:")
+      if (/^https?:/.test(src))
+        src = src.replace(/^https?:/, '');
+    script.src = src;
+    document.getElementsByTagName("head")[0].appendChild(script);
+  })();
+</script>
+</body>
+
+</html>
diff --git a/c-tables.html b/c-tables.html
new file mode 100644
index 0000000..61220b1
--- /dev/null
+++ b/c-tables.html
@@ -0,0 +1,1653 @@
+<!DOCTYPE html>
+<html lang="" xml:lang="">
+<head>
+
+  <meta charset="utf-8" />
+  <meta http-equiv="X-UA-Compatible" content="IE=edge" />
+  <title>Chapter 4 Statistical inference for two-way tables | MY464 Introduction to Quantitative Analysis for Media and Communications</title>
+  <meta name="description" content="This course is intended for those with little or no past training in quantitative methods. The course is an intensive introduction to some of the principles and methods of statistical analysis in social research. Topics covered in MY464 include descriptive statistics, basic ideas of inference and estimation, contingency tables and an introduction to linear regression models." />
+  <meta name="generator" content="bookdown 0.39 and GitBook 2.6.7" />
+
+  <meta property="og:title" content="Chapter 4 Statistical inference for two-way tables | MY464 Introduction to Quantitative Analysis for Media and Communications" />
+  <meta property="og:type" content="book" />
+  
+  <meta property="og:description" content="This course is intended for those with little or no past training in quantitative methods. The course is an intensive introduction to some of the principles and methods of statistical analysis in social research. Topics covered in MY464 include descriptive statistics, basic ideas of inference and estimation, contingency tables and an introduction to linear regression models." />
+  <meta name="github-repo" content="LSE-Methodology/MY464" />
+
+  <meta name="twitter:card" content="summary" />
+  <meta name="twitter:title" content="Chapter 4 Statistical inference for two-way tables | MY464 Introduction to Quantitative Analysis for Media and Communications" />
+  
+  <meta name="twitter:description" content="This course is intended for those with little or no past training in quantitative methods. The course is an intensive introduction to some of the principles and methods of statistical analysis in social research. Topics covered in MY464 include descriptive statistics, basic ideas of inference and estimation, contingency tables and an introduction to linear regression models." />
+  
+
+<meta name="author" content="Department of Methodology, London School of Economics and Political Science" />
+
+
+
+  <meta name="viewport" content="width=device-width, initial-scale=1" />
+  <meta name="apple-mobile-web-app-capable" content="yes" />
+  <meta name="apple-mobile-web-app-status-bar-style" content="black" />
+  
+  
+<link rel="prev" href="c-samples.html"/>
+<link rel="next" href="c-probs.html"/>
+<script src="libs/jquery-3.6.0/jquery-3.6.0.min.js"></script>
+<script src="https://cdn.jsdelivr.net/npm/fuse.js@6.4.6/dist/fuse.min.js"></script>
+<link href="libs/gitbook-2.6.7/css/style.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-table.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-bookdown.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-highlight.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-search.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-fontsettings.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-clipboard.css" rel="stylesheet" />
+
+
+
+
+
+
+
+
+<link href="libs/anchor-sections-1.1.0/anchor-sections.css" rel="stylesheet" />
+<link href="libs/anchor-sections-1.1.0/anchor-sections-hash.css" rel="stylesheet" />
+<script src="libs/anchor-sections-1.1.0/anchor-sections.js"></script>
+
+
+
+<style type="text/css">
+  
+  div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
+</style>
+
+<link rel="stylesheet" href="style.css" type="text/css" />
+</head>
+
+<body>
+
+
+
+  <div class="book without-animation with-summary font-size-2 font-family-1" data-basepath=".">
+
+    <div class="book-summary">
+      <nav role="navigation">
+
+<ul class="summary">
+<li><a href="./">MY464 Introduction to Quantitative Analysis for Media and Communications</a></li>
+
+<li class="divider"></li>
+<li class="chapter" data-level="" data-path="index.html"><a href="index.html"><i class="fa fa-check"></i>Course information<a href="index.html#course-information" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1" data-path="c-intro.html"><a href="c-intro.html"><i class="fa fa-check"></i><b>1</b> Introduction<a href="c-intro.html#c-intro" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="1.1" data-path="c-intro.html"><a href="c-intro.html#s-intro-purpose"><i class="fa fa-check"></i><b>1.1</b> What is the purpose of this course?<a href="c-intro.html#s-intro-purpose" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.2" data-path="c-intro.html"><a href="c-intro.html#s-intro-definitions"><i class="fa fa-check"></i><b>1.2</b> Some basic definitions<a href="c-intro.html#s-intro-definitions" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="1.2.1" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-subj"><i class="fa fa-check"></i><b>1.2.1</b> Subjects and variables<a href="c-intro.html#ss-intro-def-subj" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.2.2" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-vartypes"><i class="fa fa-check"></i><b>1.2.2</b> Types of variables<a href="c-intro.html#ss-intro-def-vartypes" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.2.3" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-descr"><i class="fa fa-check"></i><b>1.2.3</b> Description and inference<a href="c-intro.html#ss-intro-def-descr" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.2.4" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-assoc"><i class="fa fa-check"></i><b>1.2.4</b> Association and causation<a href="c-intro.html#ss-intro-def-assoc" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="1.3" data-path="c-intro.html"><a href="c-intro.html#s-intro-outline"><i class="fa fa-check"></i><b>1.3</b> Outline of the course<a href="c-intro.html#s-intro-outline" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.4" data-path="c-intro.html"><a href="c-intro.html#s-intro-maths"><i class="fa fa-check"></i><b>1.4</b> The use of mathematics and computing<a href="c-intro.html#s-intro-maths" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="1.4.1" data-path="c-intro.html"><a href="c-intro.html#symbolic-mathematics-and-mathematical-notation"><i class="fa fa-check"></i><b>1.4.1</b> Symbolic mathematics and mathematical notation<a href="c-intro.html#symbolic-mathematics-and-mathematical-notation" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.4.2" data-path="c-intro.html"><a href="c-intro.html#computing-1"><i class="fa fa-check"></i><b>1.4.2</b> Computing<a href="c-intro.html#computing-1" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="2" data-path="c-descr1.html"><a href="c-descr1.html"><i class="fa fa-check"></i><b>2</b> Descriptive statistics<a href="c-descr1.html#c-descr1" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.1" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-intro"><i class="fa fa-check"></i><b>2.1</b> Introduction<a href="c-descr1.html#s-descr1-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.2" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-examples"><i class="fa fa-check"></i><b>2.2</b> Example data sets<a href="c-descr1.html#s-descr1-examples" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.3" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-1cat"><i class="fa fa-check"></i><b>2.3</b> Single categorical variable<a href="c-descr1.html#s-descr1-1cat" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.3.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-distr"><i class="fa fa-check"></i><b>2.3.1</b> Describing the sample distribution<a href="c-descr1.html#ss-descr1-1cat-distr" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.3.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-tables"><i class="fa fa-check"></i><b>2.3.2</b> Tabular methods: Tables of frequencies<a href="c-descr1.html#ss-descr1-1cat-tables" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.3.3" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-charts"><i class="fa fa-check"></i><b>2.3.3</b> Graphical methods: Bar charts<a href="c-descr1.html#ss-descr1-1cat-charts" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.3.4" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-descriptives"><i class="fa fa-check"></i><b>2.3.4</b> Simple descriptive statistics<a href="c-descr1.html#ss-descr1-1cat-descriptives" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="2.4" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-2cat"><i class="fa fa-check"></i><b>2.4</b> Two categorical variables<a href="c-descr1.html#s-descr1-2cat" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.4.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-tables"><i class="fa fa-check"></i><b>2.4.1</b> Two-way contingency tables<a href="c-descr1.html#ss-descr1-2cat-tables" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.4.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-cond"><i class="fa fa-check"></i><b>2.4.2</b> Conditional proportions<a href="c-descr1.html#ss-descr1-2cat-cond" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.4.3" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-assoc"><i class="fa fa-check"></i><b>2.4.3</b> Conditional distributions and associations<a href="c-descr1.html#ss-descr1-2cat-assoc" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.4.4" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-descr"><i class="fa fa-check"></i><b>2.4.4</b> Describing an association using conditional proportions<a href="c-descr1.html#ss-descr1-2cat-descr" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.4.5" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-gamma"><i class="fa fa-check"></i><b>2.4.5</b> A measure of association for ordinal variables<a href="c-descr1.html#ss-descr1-2cat-gamma" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="2.5" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-1cont"><i class="fa fa-check"></i><b>2.5</b> Sample distributions of a single continuous variable<a href="c-descr1.html#s-descr1-1cont" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.5.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cont-tab"><i class="fa fa-check"></i><b>2.5.1</b> Tabular methods<a href="c-descr1.html#ss-descr1-1cont-tab" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.5.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cont-graphs"><i class="fa fa-check"></i><b>2.5.2</b> Graphical methods<a href="c-descr1.html#ss-descr1-1cont-graphs" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="2.6" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-nums"><i class="fa fa-check"></i><b>2.6</b> Numerical descriptive statistics<a href="c-descr1.html#s-descr1-nums" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.6.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-nums-central"><i class="fa fa-check"></i><b>2.6.1</b> Measures of central tendency<a href="c-descr1.html#ss-descr1-nums-central" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.6.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-nums-variation"><i class="fa fa-check"></i><b>2.6.2</b> Measures of variation<a href="c-descr1.html#ss-descr1-nums-variation" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="2.7" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-2cont"><i class="fa fa-check"></i><b>2.7</b> Associations which involve continuous variables<a href="c-descr1.html#s-descr1-2cont" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.8" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-presentation"><i class="fa fa-check"></i><b>2.8</b> Presentation of tables and graphs<a href="c-descr1.html#s-descr1-presentation" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.9" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-app"><i class="fa fa-check"></i><b>2.9</b> Appendix: Country data<a href="c-descr1.html#s-descr1-app" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="3" data-path="c-samples.html"><a href="c-samples.html"><i class="fa fa-check"></i><b>3</b> Samples and populations<a href="c-samples.html#c-samples" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="3.1" data-path="c-samples.html"><a href="c-samples.html#s-samples-intro"><i class="fa fa-check"></i><b>3.1</b> Introduction<a href="c-samples.html#s-samples-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.2" data-path="c-samples.html"><a href="c-samples.html#s-samples-finpops"><i class="fa fa-check"></i><b>3.2</b> Finite populations<a href="c-samples.html#s-samples-finpops" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.3" data-path="c-samples.html"><a href="c-samples.html#s-samples-samples"><i class="fa fa-check"></i><b>3.3</b> Samples from finite populations<a href="c-samples.html#s-samples-samples" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.4" data-path="c-samples.html"><a href="c-samples.html#s-samples-infpops"><i class="fa fa-check"></i><b>3.4</b> Conceptual and infinite populations<a href="c-samples.html#s-samples-infpops" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.5" data-path="c-samples.html"><a href="c-samples.html#s-samples-popdistrs"><i class="fa fa-check"></i><b>3.5</b> Population distributions<a href="c-samples.html#s-samples-popdistrs" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.6" data-path="c-samples.html"><a href="c-samples.html#s-samples-inference"><i class="fa fa-check"></i><b>3.6</b> Need for statistical inference<a href="c-samples.html#s-samples-inference" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="4" data-path="c-tables.html"><a href="c-tables.html"><i class="fa fa-check"></i><b>4</b> Statistical inference for two-way tables<a href="c-tables.html#c-tables" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="4.1" data-path="c-tables.html"><a href="c-tables.html#s-tables-intro"><i class="fa fa-check"></i><b>4.1</b> Introduction<a href="c-tables.html#s-tables-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.2" data-path="c-tables.html"><a href="c-tables.html#s-tables-tests"><i class="fa fa-check"></i><b>4.2</b> Significance tests<a href="c-tables.html#s-tables-tests" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3" data-path="c-tables.html"><a href="c-tables.html#s-tables-chi2test"><i class="fa fa-check"></i><b>4.3</b> The chi-square test of independence<a href="c-tables.html#s-tables-chi2test" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="4.3.1" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-null"><i class="fa fa-check"></i><b>4.3.1</b> Hypotheses<a href="c-tables.html#ss-tables-chi2test-null" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.2" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-ass"><i class="fa fa-check"></i><b>4.3.2</b> Assumptions of a significance test<a href="c-tables.html#ss-tables-chi2test-ass" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.3" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-stat"><i class="fa fa-check"></i><b>4.3.3</b> The test statistic<a href="c-tables.html#ss-tables-chi2test-stat" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.4" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-sdist"><i class="fa fa-check"></i><b>4.3.4</b> The sampling distribution of the test statistic<a href="c-tables.html#ss-tables-chi2test-sdist" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.5" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-Pval"><i class="fa fa-check"></i><b>4.3.5</b> The P-value<a href="c-tables.html#ss-tables-chi2test-Pval" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.6" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-conclusions"><i class="fa fa-check"></i><b>4.3.6</b> Drawing conclusions from a test<a href="c-tables.html#ss-tables-chi2test-conclusions" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="4.4" data-path="c-tables.html"><a href="c-tables.html#s-tables-summary"><i class="fa fa-check"></i><b>4.4</b> Summary of the chi-square test of independence<a href="c-tables.html#s-tables-summary" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="5" data-path="c-probs.html"><a href="c-probs.html"><i class="fa fa-check"></i><b>5</b> Inference for population proportions<a href="c-probs.html#c-probs" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="5.1" data-path="c-probs.html"><a href="c-probs.html#s-probs-intro"><i class="fa fa-check"></i><b>5.1</b> Introduction<a href="c-probs.html#s-probs-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.2" data-path="c-probs.html"><a href="c-probs.html#s-probs-examples"><i class="fa fa-check"></i><b>5.2</b> Examples<a href="c-probs.html#s-probs-examples" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.3" data-path="c-probs.html"><a href="c-probs.html#s-probs-distribution"><i class="fa fa-check"></i><b>5.3</b> Probability distribution of a dichotomous variable<a href="c-probs.html#s-probs-distribution" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.4" data-path="c-probs.html"><a href="c-probs.html#s-probs-pointest"><i class="fa fa-check"></i><b>5.4</b> Point estimation of a population probability<a href="c-probs.html#s-probs-pointest" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5" data-path="c-probs.html"><a href="c-probs.html#s-probs-test1sample"><i class="fa fa-check"></i><b>5.5</b> Significance test of a single proportion<a href="c-probs.html#s-probs-test1sample" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="5.5.1" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-hypotheses"><i class="fa fa-check"></i><b>5.5.1</b> Null and alternative hypotheses<a href="c-probs.html#ss-probs-test1sample-hypotheses" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5.2" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-teststatistic"><i class="fa fa-check"></i><b>5.5.2</b> The test statistic<a href="c-probs.html#ss-probs-test1sample-teststatistic" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5.3" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-samplingd"><i class="fa fa-check"></i><b>5.5.3</b> The sampling distribution of the test statistic and P-values<a href="c-probs.html#ss-probs-test1sample-samplingd" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5.4" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-conclusions"><i class="fa fa-check"></i><b>5.5.4</b> Conclusions from the test<a href="c-probs.html#ss-probs-test1sample-conclusions" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5.5" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-summary"><i class="fa fa-check"></i><b>5.5.5</b> Summary of the test<a href="c-probs.html#ss-probs-test1sample-summary" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="5.6" data-path="c-probs.html"><a href="c-probs.html#s-probs-1sampleci"><i class="fa fa-check"></i><b>5.6</b> Confidence interval for a single proportion<a href="c-probs.html#s-probs-1sampleci" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="5.6.1" data-path="c-probs.html"><a href="c-probs.html#s-probs-1sampleci-intro"><i class="fa fa-check"></i><b>5.6.1</b> Introduction<a href="c-probs.html#s-probs-1sampleci-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.6.2" data-path="c-probs.html"><a href="c-probs.html#s-probs-1sampleci-calc"><i class="fa fa-check"></i><b>5.6.2</b> Calculation of the interval<a href="c-probs.html#s-probs-1sampleci-calc" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.6.3" data-path="c-probs.html"><a href="c-probs.html#s-probs-1sampleci-int"><i class="fa fa-check"></i><b>5.6.3</b> Interpretation of confidence intervals<a href="c-probs.html#s-probs-1sampleci-int" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.6.4" data-path="c-probs.html"><a href="c-probs.html#ss-means-ci-vstests"><i class="fa fa-check"></i><b>5.6.4</b> Confidence intervals vs. significance tests<a href="c-probs.html#ss-means-ci-vstests" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="5.7" data-path="c-probs.html"><a href="c-probs.html#s-probs-2samples"><i class="fa fa-check"></i><b>5.7</b> Inference for comparing two proportions<a href="c-probs.html#s-probs-2samples" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="6" data-path="c-contd.html"><a href="c-contd.html"><i class="fa fa-check"></i><b>6</b> Continuous variables: Population and sampling distributions<a href="c-contd.html#c-contd" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="6.1" data-path="c-contd.html"><a href="c-contd.html#s-contd-intro"><i class="fa fa-check"></i><b>6.1</b> Introduction<a href="c-contd.html#s-contd-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="6.2" data-path="c-contd.html"><a href="c-contd.html#s-contd-popdistrs"><i class="fa fa-check"></i><b>6.2</b> Population distributions of continuous variables<a href="c-contd.html#s-contd-popdistrs" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="6.2.1" data-path="c-contd.html"><a href="c-contd.html#ss-contd-popdistrs-params"><i class="fa fa-check"></i><b>6.2.1</b> Population parameters and their point estimates<a href="c-contd.html#ss-contd-popdistrs-params" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="6.3" data-path="c-contd.html"><a href="c-contd.html#s-contd-probdistrs"><i class="fa fa-check"></i><b>6.3</b> Probability distributions of continuous variables<a href="c-contd.html#s-contd-probdistrs" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="6.3.1" data-path="c-contd.html"><a href="c-contd.html#ss-contd-probdistrs-general"><i class="fa fa-check"></i><b>6.3.1</b> General comments<a href="c-contd.html#ss-contd-probdistrs-general" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="6.3.2" data-path="c-contd.html"><a href="c-contd.html#ss-contd-probdistrs-normal"><i class="fa fa-check"></i><b>6.3.2</b> The normal distribution as a population distribution<a href="c-contd.html#ss-contd-probdistrs-normal" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="6.4" data-path="c-contd.html"><a href="c-contd.html#s-contd-clt"><i class="fa fa-check"></i><b>6.4</b> The normal distribution as a sampling distribution<a href="c-contd.html#s-contd-clt" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="7" data-path="c-means.html"><a href="c-means.html"><i class="fa fa-check"></i><b>7</b> Analysis of population means<a href="c-means.html#c-means" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="7.1" data-path="c-means.html"><a href="c-means.html#s-means-intro"><i class="fa fa-check"></i><b>7.1</b> Introduction and examples<a href="c-means.html#s-means-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.2" data-path="c-means.html"><a href="c-means.html#s-means-descr"><i class="fa fa-check"></i><b>7.2</b> Descriptive statistics for comparisons of groups<a href="c-means.html#s-means-descr" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="7.2.1" data-path="c-means.html"><a href="c-means.html#ss-means-descr-graphs"><i class="fa fa-check"></i><b>7.2.1</b> Graphical methods of comparing sample distributions<a href="c-means.html#ss-means-descr-graphs" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.2.2" data-path="c-means.html"><a href="c-means.html#ss-means-descr-tables"><i class="fa fa-check"></i><b>7.2.2</b> Comparing summary statistics<a href="c-means.html#ss-means-descr-tables" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="7.3" data-path="c-means.html"><a href="c-means.html#s-means-inference"><i class="fa fa-check"></i><b>7.3</b> Inference for two means from independent samples<a href="c-means.html#s-means-inference" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="7.3.1" data-path="c-means.html"><a href="c-means.html#ss-means-inference-intro"><i class="fa fa-check"></i><b>7.3.1</b> Aims of the analysis<a href="c-means.html#ss-means-inference-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.3.2" data-path="c-means.html"><a href="c-means.html#ss-means-inference-test"><i class="fa fa-check"></i><b>7.3.2</b> Significance testing: The two-sample t-test<a href="c-means.html#ss-means-inference-test" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.3.3" data-path="c-means.html"><a href="c-means.html#ss-means-inference-ci"><i class="fa fa-check"></i><b>7.3.3</b> Confidence intervals for a difference of two means<a href="c-means.html#ss-means-inference-ci" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.3.4" data-path="c-means.html"><a href="c-means.html#ss-means-inference-variants"><i class="fa fa-check"></i><b>7.3.4</b> Variants of the test and confidence interval<a href="c-means.html#ss-means-inference-variants" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="7.4" data-path="c-means.html"><a href="c-means.html#s-means-1sample"><i class="fa fa-check"></i><b>7.4</b> Tests and confidence intervals for a single mean<a href="c-means.html#s-means-1sample" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.5" data-path="c-means.html"><a href="c-means.html#s-means-dependent"><i class="fa fa-check"></i><b>7.5</b> Inference for dependent samples<a href="c-means.html#s-means-dependent" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.6" data-path="c-means.html"><a href="c-means.html#s-means-tests3"><i class="fa fa-check"></i><b>7.6</b> Further comments on significance tests<a href="c-means.html#s-means-tests3" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="7.6.1" data-path="c-means.html"><a href="c-means.html#ss-means-tests3-errors"><i class="fa fa-check"></i><b>7.6.1</b> Different types of error<a href="c-means.html#ss-means-tests3-errors" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.6.2" data-path="c-means.html"><a href="c-means.html#ss-means-tests3-power"><i class="fa fa-check"></i><b>7.6.2</b> Power of significance tests<a href="c-means.html#ss-means-tests3-power" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.6.3" data-path="c-means.html"><a href="c-means.html#ss-means-tests3-importance"><i class="fa fa-check"></i><b>7.6.3</b> Significance vs. importance<a href="c-means.html#ss-means-tests3-importance" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="8" data-path="c-regression.html"><a href="c-regression.html"><i class="fa fa-check"></i><b>8</b> Linear regression models<a href="c-regression.html#c-regression" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.1" data-path="c-regression.html"><a href="c-regression.html#s-regression-intro"><i class="fa fa-check"></i><b>8.1</b> Introduction<a href="c-regression.html#s-regression-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.2" data-path="c-regression.html"><a href="c-regression.html#s-regression-descr"><i class="fa fa-check"></i><b>8.2</b> Describing association between two continuous variables<a href="c-regression.html#s-regression-descr" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.2.1" data-path="c-regression.html"><a href="c-regression.html#ss-regression-descr-intro"><i class="fa fa-check"></i><b>8.2.1</b> Introduction<a href="c-regression.html#ss-regression-descr-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.2.2" data-path="c-regression.html"><a href="c-regression.html#ss-regression-descr-plots"><i class="fa fa-check"></i><b>8.2.2</b> Graphical methods<a href="c-regression.html#ss-regression-descr-plots" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.2.3" data-path="c-regression.html"><a href="c-regression.html#ss-regression-descr-assoc"><i class="fa fa-check"></i><b>8.2.3</b> Linear associations<a href="c-regression.html#ss-regression-descr-assoc" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.2.4" data-path="c-regression.html"><a href="c-regression.html#ss-regression-descr-corr"><i class="fa fa-check"></i><b>8.2.4</b> Measures of association: covariance and correlation<a href="c-regression.html#ss-regression-descr-corr" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="8.3" data-path="c-regression.html"><a href="c-regression.html#s-regression-simple"><i class="fa fa-check"></i><b>8.3</b> Simple linear regression models<a href="c-regression.html#s-regression-simple" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.3.1" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-intro"><i class="fa fa-check"></i><b>8.3.1</b> Introduction<a href="c-regression.html#ss-regression-simple-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.3.2" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-def"><i class="fa fa-check"></i><b>8.3.2</b> Definition of the model<a href="c-regression.html#ss-regression-simple-def" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.3.3" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-int"><i class="fa fa-check"></i><b>8.3.3</b> Interpretation of the model parameters<a href="c-regression.html#ss-regression-simple-int" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.3.4" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-est"><i class="fa fa-check"></i><b>8.3.4</b> Estimation of the parameters<a href="c-regression.html#ss-regression-simple-est" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.3.5" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-inf"><i class="fa fa-check"></i><b>8.3.5</b> Statistical inference for the regression coefficients<a href="c-regression.html#ss-regression-simple-inf" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="8.4" data-path="c-regression.html"><a href="c-regression.html#s-regression-causality"><i class="fa fa-check"></i><b>8.4</b> Interlude: Association and causality<a href="c-regression.html#s-regression-causality" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.5" data-path="c-regression.html"><a href="c-regression.html#s-regression-multiple"><i class="fa fa-check"></i><b>8.5</b> Multiple linear regression models<a href="c-regression.html#s-regression-multiple" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.5.1" data-path="c-regression.html"><a href="c-regression.html#ss-regression-multiple-intro"><i class="fa fa-check"></i><b>8.5.1</b> Introduction<a href="c-regression.html#ss-regression-multiple-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.5.2" data-path="c-regression.html"><a href="c-regression.html#ss-regression-multiple-def"><i class="fa fa-check"></i><b>8.5.2</b> Definition of the model<a href="c-regression.html#ss-regression-multiple-def" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.5.3" data-path="c-regression.html"><a href="c-regression.html#ss-regression-multiple-unchanged"><i class="fa fa-check"></i><b>8.5.3</b> Unchanged elements from simple linear models<a href="c-regression.html#ss-regression-multiple-unchanged" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.5.4" data-path="c-regression.html"><a href="c-regression.html#ss-regression-multiple-beta"><i class="fa fa-check"></i><b>8.5.4</b> Interpretation and inference for the regression coefficients<a href="c-regression.html#ss-regression-multiple-beta" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="8.6" data-path="c-regression.html"><a href="c-regression.html#s-regression-dummies"><i class="fa fa-check"></i><b>8.6</b> Including categorical explanatory variables<a href="c-regression.html#s-regression-dummies" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.6.1" data-path="c-regression.html"><a href="c-regression.html#ss-regression-dummies-def"><i class="fa fa-check"></i><b>8.6.1</b> Dummy variables<a href="c-regression.html#ss-regression-dummies-def" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.6.2" data-path="c-regression.html"><a href="c-regression.html#ss-regression-dummies-example"><i class="fa fa-check"></i><b>8.6.2</b> A second example<a href="c-regression.html#ss-regression-dummies-example" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="8.7" data-path="c-regression.html"><a href="c-regression.html#s-regression-rest"><i class="fa fa-check"></i><b>8.7</b> Other issues in linear regression modelling<a href="c-regression.html#s-regression-rest" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="9" data-path="c-3waytables.html"><a href="c-3waytables.html"><i class="fa fa-check"></i><b>9</b> Analysis of 3-way contingency tables<a href="c-3waytables.html#c-3waytables" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="10" data-path="c-more.html"><a href="c-more.html"><i class="fa fa-check"></i><b>10</b> More statistics…<a href="c-more.html#c-more" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="" data-path="c-more.html"><a href="c-more.html#statistical-tables"><i class="fa fa-check"></i>Statistical tables<a href="c-more.html#statistical-tables" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="" data-path="c-more.html"><a href="c-more.html#table-of-standard-normal-tail-probabilities"><i class="fa fa-check"></i>Table of standard normal tail probabilities<a href="c-more.html#table-of-standard-normal-tail-probabilities" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="" data-path="c-more.html"><a href="c-more.html#table-of-critical-values-for-t-distributions"><i class="fa fa-check"></i>Table of critical values for t-distributions<a href="c-more.html#table-of-critical-values-for-t-distributions" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="" data-path="c-more.html"><a href="c-more.html#table-of-critical-values-for-chi-square-distributions"><i class="fa fa-check"></i>Table of critical values for chi-square distributions<a href="c-more.html#table-of-critical-values-for-chi-square-distributions" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+</ul></li>
+<li class="divider"></li>
+<li><a href="https://github.com/rstudio/bookdown">Published with bookdown</a></li>
+
+</ul>
+
+      </nav>
+    </div>
+
+    <div class="book-body">
+      <div class="body-inner">
+        <div class="book-header" role="navigation">
+          <h1>
+            <i class="fa fa-circle-o-notch fa-spin"></i><a href="./">MY464 Introduction to Quantitative Analysis for Media and Communications</a>
+          </h1>
+        </div>
+
+        <div class="page-wrapper" tabindex="-1" role="main">
+          <div class="page-inner">
+
+            <section class="normal" id="section-">
+<div id="c-tables" class="section level1 hasAnchor">
+<h1><span class="header-section-number">Chapter 4</span> Statistical inference for two-way tables<a href="c-tables.html#c-tables" class="anchor-section" aria-label="Anchor link to header"></a></h1>
+<div id="s-tables-intro" class="section level2 hasAnchor">
+<h2><span class="header-section-number">4.1</span> Introduction<a href="c-tables.html#s-tables-intro" class="anchor-section" aria-label="Anchor link to header"></a></h2>
+<p>In this section we continue the discussion of methods of analysis for
+two-way contingency tables that was begun in Section
+<a href="c-descr1.html#ss-descr1-2cat-tables">2.4.1</a>. We will use again the example from the
+European Social Survey that was introduced early in Section <a href="c-descr1.html#s-descr1-examples">2.2</a>. The two variables in the example are a person’s
+sex and his or her attitude toward income redistribution measured as an
+ordinal variable with five levels. The two-way table of these variables
+in the sample is shown again for convenience in Table
+<a href="c-tables.html#tab:t-sex-attitude-ch4">4.1</a>, including both the frequencies and the
+conditional proportions for attitude given sex.</p>
+<table style="width:98%;">
+<caption><span id="tab:t-sex-attitude-ch4">Table 4.1: </span><em>``The government should take measures to reduce differences in income levels’’</em>: Frequencies of respondents in the survey example, by sex and
+attitude towards income redistribution. The numbers in parentheses are
+conditional proportions of attitude given sex. Data: European Social Survey, Round 5, 2010, UK respondents only.</caption>
+<colgroup>
+<col width="13%" />
+<col width="18%" />
+<col width="11%" />
+<col width="18%" />
+<col width="12%" />
+<col width="12%" />
+<col width="9%" />
+</colgroup>
+<thead>
+<tr class="header">
+<th align="left"><br />
+Sex</th>
+<th align="center">Agree
+strongly</th>
+<th align="center">Agree</th>
+<th align="center">Neither agree
+nor disagree</th>
+<th align="center">Disagree</th>
+<th align="center">Disagree
+strongly</th>
+<th align="right">Total</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="left">Male</td>
+<td align="center">160</td>
+<td align="center">439</td>
+<td align="center">187</td>
+<td align="center">200</td>
+<td align="center">41</td>
+<td align="right">1027</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="center">(0.156)</td>
+<td align="center">(0.428)</td>
+<td align="center">(0.182)</td>
+<td align="center">(0.195)</td>
+<td align="center">(0.040)</td>
+<td align="right">(1.0)</td>
+</tr>
+<tr class="odd">
+<td align="left">Female</td>
+<td align="center">206</td>
+<td align="center">651</td>
+<td align="center">239</td>
+<td align="center">187</td>
+<td align="center">34</td>
+<td align="right">1317</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="center">(0.156)</td>
+<td align="center">(0.494)</td>
+<td align="center">(0.182)</td>
+<td align="center">(0.142)</td>
+<td align="center">(0.026)</td>
+<td align="right">(1.0)</td>
+</tr>
+<tr class="odd">
+<td align="left">Total</td>
+<td align="center">366</td>
+<td align="center">1090</td>
+<td align="center">426</td>
+<td align="center">387</td>
+<td align="center">75</td>
+<td align="right">2344</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="center">(0.156)</td>
+<td align="center">(0.465)</td>
+<td align="center">(0.182)</td>
+<td align="center">(0.165)</td>
+<td align="center">(0.032)</td>
+<td align="right">(1.0)</td>
+</tr>
+</tbody>
+</table>
+<p>Unlike in Section <a href="c-descr1.html#ss-descr1-2cat-tables">2.4.1</a>, we will now go beyond
+description of sample distributions and into statistical inference. The
+observed data are thus treated as a sample from a population, and we
+wish to draw conclusions about the population distributions of the
+variables. In particular, we want to examine whether the sample provides
+evidence that the two variables in the table are associated in the
+population — in the example, whether attitude depends on sex in the
+population. This is done using a statistical significance test known as
+<span class="math inline">\(\chi^{2}\)</span> test of independence. We will use it also as a vehicle for
+introducing the basic ideas of significance testing in general.</p>
+<p>This initial explanation of significance tests is be lengthy and
+detailed, because it is important to gain a good understanding of these
+fundamental concepts from the beginning. From then on, the same ideas
+will be used repeatedly throughout the rest of the course, and in
+practically all statistical methods that you may encounter in the
+future. You will then be able to draw on what you will have learned in
+this chapter, and that learning will also be reinforced through repeated
+appearances of the same concepts in different contexts. It will then not
+be necessary to restate the basic ideas of the tools of inference in
+similar detail. A short summary of the <span class="math inline">\(\chi^{2}\)</span> test considered in
+this chapter is given again at the end of the chapter, in Section
+<a href="c-tables.html#s-tables-summary">4.4</a>.</p>
+</div>
+<div id="s-tables-tests" class="section level2 hasAnchor">
+<h2><span class="header-section-number">4.2</span> Significance tests<a href="c-tables.html#s-tables-tests" class="anchor-section" aria-label="Anchor link to header"></a></h2>
+<p>A <strong>significance test</strong> is a method of statistical inference that is
+used to assess the plausibility of <em>hypotheses</em> about a population. A
+hypothesis is a question about population distributions, formulated as a
+<em>claim</em> about those distributions. For the test considered in this
+chapter, the question is whether or not the two variables in a
+contingency table are associated in the population. In the example we
+want to know whether men and women have the same distribution of
+attitudes towards income redistribution in the population. For
+significance testing, this question is expressed as the claim “The
+distribution of attitudes towards income redistribution <em>is</em> the same
+for men and women”, to which we want to identify the correct response,
+either “Yes, it is” or “No, it isn’t”.</p>
+<p>In trying to answer such questions, we are faced with the complication
+that we only have information from a sample. For example, in Table
+<a href="c-tables.html#tab:t-sex-attitude-ch4">4.1</a> the conditional distributions of attitude are
+certainly not identical for men and women. According to the definition
+in Section <a href="c-descr1.html#ss-descr1-2cat-assoc">2.4.3</a>, this shows that sex and attitude
+are associated <em>in the sample</em>. This, however, does not prove that they
+are also associated <em>in the population</em>. Because of sampling variation,
+the two conditional distributions are very unlike to be exactly
+identical in a sample even if they are the same in the population. In
+other words, the hypothesis will not be exactly true in a sample even if
+it is true in the population.</p>
+<p>On the other hand, some sample values differ from the values claimed by
+the hypothesis by so much that it would be difficult to explain them as
+a result of sampling variation alone. For example, if we had observed a
+sample where 99% of the men but only 1% of the women disagreed with the
+attitude statement, it would seem obvious that this should be evidence
+against the claim that the corresponding probabilities were nevertheless
+equal in the population. It would certainly be stronger evidence against
+such a claim than the difference of 19.5% vs. 14.2% that was actually
+observed in our sample, which in turn would be stronger evidence than,
+say, 19.5% vs. 19.4%. But how are we to decide where to draw the line,
+i.e. when to conclude that a particular sample value is or is not
+evidence against a hypothesis? The task of statistical significance
+testing is to provide explicit and transparent rules for making such
+decisions.</p>
+<p>A significance test uses a statistic calculated
+from the sample data (a <em>test statistic</em>) which has the property that
+its values will be large if the sample provides evidence against the
+hypothesis that is being tested (the <em>null hypothesis</em>) and small
+otherwise. From a description (a <em>sampling distribution</em>) of what kinds
+of values the test statistic might have had if the null hypothesis was
+actually true in the population, we derive a measure (the <em>P-value</em>)
+that summarises in one number the strength of evidence against the null
+hypothesis that the sample provides. Based on this summary, we may then
+use conventional decision rules (<em>significance levels</em>) to make a
+discrete decision about the null hypothesis about the population. This
+decision will be either to <em>fail to reject</em> or <em>reject</em> the null
+hypothesis, in other words to conclude that the observed data are or are
+not consistent with the claim about the population stated by the null
+hypothesis.</p>
+<p>It only remains to put these general ideas into practice by defining
+precisely the steps of statistical significance tests. This is done in
+the sections below. Since some of the ideas are somewhat abstract and
+perhaps initially counterintuitive, we will introduce them slowly,
+discussing one at a time the following basic elements of significance
+tests:</p>
+<ul>
+<li><p>The hypotheses being tested</p></li>
+<li><p>Assumptions of a test</p></li>
+<li><p>Test statistics and their sampling distributions</p></li>
+<li><p><span class="math inline">\(P\)</span>-values</p></li>
+<li><p>Drawing and stating conclusions from tests</p></li>
+</ul>
+<p>The significance test considered in this chapter is known as the
+<span class="math inline">\(\boldsymbol{\chi^{2}}\)</span> <strong>test of independence</strong> (<span class="math inline">\(\chi^{2}\)</span> is
+pronounced “chi-squared”). It is also known as “Pearson’s <span class="math inline">\(\chi^{2}\)</span>
+test”, after Karl Pearson who first proposed it in 1900.<a href="#fn15" class="footnote-ref" id="fnref15"><sup>15</sup></a> We use this
+test to explain the elements of significance testing. These principles
+are, however, not restricted to this case, but are entirely general.
+This means that all of the significance tests you will learn on this
+course or elsewhere have the same basic structure, and differ only in
+their details.</p>
+</div>
+<div id="s-tables-chi2test" class="section level2 hasAnchor">
+<h2><span class="header-section-number">4.3</span> The chi-square test of independence<a href="c-tables.html#s-tables-chi2test" class="anchor-section" aria-label="Anchor link to header"></a></h2>
+<div id="ss-tables-chi2test-null" class="section level3 hasAnchor">
+<h3><span class="header-section-number">4.3.1</span> Hypotheses<a href="c-tables.html#ss-tables-chi2test-null" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<div id="the-null-hypothesis-and-the-alternative-hypothesis" class="section level4 unnumbered hasAnchor">
+<h4>The null hypothesis and the alternative hypothesis<a href="c-tables.html#the-null-hypothesis-and-the-alternative-hypothesis" class="anchor-section" aria-label="Anchor link to header"></a></h4>
+<p>The technical term for the hypothesis that is tested in statistical
+significance testing is the <strong>null hypothesis</strong>. It is often denoted
+<span class="math inline">\(H_{0}\)</span>. The null hypothesis is a specific claim about population
+distributions. The <span class="math inline">\(\chi^{2}\)</span> test of independence concerns the
+association between two categorical variables, and its null hypothesis
+is that there is no such association in the population.</p>
+<p>In the context of this test, it is conventional to use alternative
+terminology where the variables are said to be <strong>statistically
+independent</strong> when there is no association between them, and
+<strong>statistically dependent</strong> when they are associated. Often the word
+“statistically” is omitted, and we talk simply of variables being
+independent or dependent. In this language, the null hypothesis of the
+<span class="math inline">\(\chi^{2}\)</span> test of independence is that
+<span class="math display" id="eq:H0-chi2">\[\begin{equation}
+H_{0}: \;\text{The variables are statistically independent in the population}.
+\tag{4.1}
+\end{equation}\]</span>
+In our example the null hypothesis is thus that a
+person’s sex and his or her attitude toward income redistribution are
+independent in the population of adults in the UK.</p>
+<p>The null hypothesis (<a href="c-tables.html#eq:H0-chi2">(4.1)</a>) and the <span class="math inline">\(\chi^{2}\)</span> test itself are
+symmetric in that there is no need to designate one of the variables as
+explanatory and the other as the response variable. The hypothesis can,
+however, also be expressed in a form which does make use of this
+distinction. This links it more clearly with the definition of
+associations in terms of conditional distributions. In this form, the
+null hypothesis (<a href="c-tables.html#eq:H0-chi2">(4.1)</a>) can also be stated as the claim that the
+conditional distributions of the response variable are the same at all
+levels of the explanatory variable, i.e. in our example as
+<span class="math display">\[H_{0}: \;\text{The conditional distribution of attitude is the same for
+men as for women}.\]</span> The hypothesis could also be expressed for the
+conditional distributions the other way round, i.e. here that the
+distribution of sex is the same at all levels of the attitude. All three
+versions of the null hypothesis mean the same thing for the purposes of
+the significance test. Describing the hypothesis in particular terms is
+useful purely for easy interpretation of the test and its conclusions in
+specific examples.</p>
+<p>As well as the null hypothesis, a significance test usually involves an
+<strong>alternative hypothesis</strong>, often denoted <span class="math inline">\(H_{a}\)</span>. This is in some sense
+the opposite of the null hypothesis, which indicates the kinds of
+observations that will be taken as evidence against <span class="math inline">\(H_{0}\)</span>. For the
+<span class="math inline">\(\chi^{2}\)</span> test of independence this is simply the logical opposite of
+(<a href="c-tables.html#eq:H0-chi2">(4.1)</a>),
+i.e.
+<span class="math display" id="eq:Ha-chi2">\[\begin{equation}
+H_{a}: \;\text{The variables are not statistically independent in the population}.
+\tag{4.2}
+\end{equation}\]</span>
+In terms of conditional distributions, <span class="math inline">\(H_{a}\)</span> is that
+the conditional distributions of one variable given the other are not
+all identical, i.e. that for at least one pair of levels of the
+explanatory variable the conditional probabilities of at least one
+category of the response variable are not the same.</p>
+</div>
+<div id="statistical-hypotheses-and-research-hypotheses" class="section level4 unnumbered hasAnchor">
+<h4>Statistical hypotheses and research hypotheses<a href="c-tables.html#statistical-hypotheses-and-research-hypotheses" class="anchor-section" aria-label="Anchor link to header"></a></h4>
+<p>The word “hypothesis” appears also in research design and philosophy of
+science. There a <strong>research hypothesis</strong> means a specific claim or
+prediction about observable quantities, derived from subject-matter
+theory. The prediction is then compared to empirical observations. If
+the two are in reasonable agreement, the hypothesis and corresponding
+theory gain support or <em>corroboration</em>; if observations disagree with
+the predictions, the hypothesis is <em>falsified</em> and the theory must
+eventually be modified or abandoned. This role of research hypotheses
+is, especially in the philosophy of science originally associated with
+Karl Popper, at the heart of the scientific method. A theory which does
+not produce empirically falsifiable hypotheses, or fails to be modified
+even if its hypotheses are convincingly falsified, cannot be considered
+scientific.</p>
+<p>Research hypotheses of this kind are closely related to the kinds of
+<strong>statistical hypotheses</strong> discussed above. When empirical data are
+quantitative, decisions about research hypotheses are in practice
+usually made, at least in part, as decisions about statistical
+hypotheses implemented through sinificance tests. The falsification and
+corroboration of research hypotheses are then parallelled by rejection
+and non-rejection of statistical hypotheses. The connection is not,
+however, entirely straightforward, as there are several differences
+between research hypotheses and statistical hypotheses:</p>
+<ul>
+<li><p>Statistical significance tests are also often used for testing
+hypotheses which do not correspond to any theoretical
+research hypotheses. Sometimes the purpose of the test is just to
+identify those observed differences and regularities which are large
+enough to deserve further discussion. Sometimes claims stated as
+null hypotheses are interesting for reasons which have nothing to do
+with theoretical predictions but rather with, say, normative or
+policy goals.</p></li>
+<li><p>Research hypotheses are typically stated as predictions about
+theoretical concepts. Translating them into testable statistical
+hypotheses requires further operationalisation of these concepts.
+First, we need to decide how the concepts are to be measured.
+Second, any test involves also assumptions which are imposed not by
+substantive theory but by constraints of statistical methodology.
+Their appropriateness for the data at hand needs to be
+assessed separately.</p></li>
+<li><p>The conceptual connection is clearest when the research hypothesis
+matches the null hypothesis of a test in general form. Then the
+research hypothesis remains unfalsified as long as the null
+hypothesis remains not rejected, and gets falsified when the null
+hypothesis is rejected. Very often, however, the statistical
+hypotheses are for technical reasons defined the other way round. In
+particular, for significance tests that are about associations
+between variables, a research hypothesis is typically that there
+<em>is</em> an association between particular variables, whereas the null
+hypothesis is that there is <em>no</em> association
+(i.e. “null” association). This leads to the rather confusing
+situation where the research hypothesis is supported when the null
+hypothesis is rejected, and possibly falsified when the null
+hypothesis is not rejected.</p></li>
+</ul>
+</div>
+</div>
+<div id="ss-tables-chi2test-ass" class="section level3 hasAnchor">
+<h3><span class="header-section-number">4.3.2</span> Assumptions of a significance test<a href="c-tables.html#ss-tables-chi2test-ass" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>In the following discussion we will sometimes refer to Figure
+<a href="c-tables.html#fig:f-spsschi2">4.1</a>, which shows SPSS output for the <span class="math inline">\(\chi^{2}\)</span> test of
+independence for the data in Table <a href="c-tables.html#tab:t-sex-attitude-ch4">4.1</a>. Output for
+the test is shown on the line labelled “Pearson Chi-Square”, and “N of
+valid cases” gives the sample size <span class="math inline">\(n\)</span>. The other entries in the table
+are output for other tests that are not discussed here, so they can be
+ignored.</p>
+<div class="figure"><span style="display:block;" id="fig:f-spsschi2"></span>
+<img src="chi2test_ess.png" alt="SPSS output of the \chi^{2} test of independence (here labelled “Pearson Chi-square”) for the data in Table 4.1." style="width:100mm" />
+<p class="caption">Figure 4.1: SPSS output of the <span class="math inline">\(\chi^{2}\)</span> test of independence (here labelled
+“Pearson Chi-square”) for the data in Table
+<a href="c-tables.html#tab:t-sex-attitude-ch4">4.1</a>.</p>
+</div>
+<p>When we apply any significance test, we need to be aware of its
+<strong>assumptions</strong>. These are conditions on the data which are not
+themselves being tested, but which need to be approximately satisfied
+for the conclusions from the test to be valid. Two broad types of such
+assumptions are particularly common. The first kind are assumptions
+about the measurement levels and population distributions of the
+variables. For the <span class="math inline">\(\chi^{2}\)</span> test of independence these are relatively
+mild. The two variables must be categorical variables. They can have any
+measurement level, although in most cases this will be either nominal or
+ordinal. The test makes no use of the ordering of the categories, so it
+effectively treats all variables as if they were nominal.</p>
+<p>The second common class of assumptions are
+conditions on the sample size. Many significance tests are appropriate
+only if this is sufficiently large. For the <span class="math inline">\(\chi^{2}\)</span> test, the
+expected frequencies <span class="math inline">\(f_{e}\)</span> (which will be defined below) need to be
+large enough in <em>every cell</em> of the table. A common rule of thumb is
+that the test can be safely used if all expected frequencies are at
+least 5. Another, slightly more lenient rule requires only that no more
+than 20% of the expected frequencies are less than 5, and that none are
+less than 1. These conditions can easily be checked with the help of
+SPSS output for the <span class="math inline">\(\chi^{2}\)</span> test, as shown in Figure
+<a href="c-tables.html#fig:f-spsschi2">4.1</a>. This gives information on the number and proportion of
+expected frequencies (referred to as “expected counts”) less than five,
+and also the size of the smallest of them. In our example the smallest
+expected frequency is about 33, so the sample size condition is easily
+satisfied.</p>
+<p>When the expected frequencies do not satisfy these conditions, the
+<span class="math inline">\(\chi^{2}\)</span> test is not fully valid, and the results should be treated
+with caution (the reasons for this will be discussed below). There are
+alternative tests which do not rely on these large-sample assumptions,
+but they are beyond the scope of this course.</p>
+<p>In general, the hypotheses of a test define the questions it can answer,
+and its assumptions indicate the types of data it is appropriate for.
+Different tests have different hypotheses and assumptions, which need to
+be considered in deciding which test is appropriate for a given
+analysis. We will introduce a number of different significance tests in
+this coursepack, and give guidelines for choosing between them.</p>
+</div>
+<div id="ss-tables-chi2test-stat" class="section level3 hasAnchor">
+<h3><span class="header-section-number">4.3.3</span> The test statistic<a href="c-tables.html#ss-tables-chi2test-stat" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>A <strong>test statistic</strong> is a number calculated from the sample (i.e. a
+statistic in the sense defined at the beginning of Section <a href="c-descr1.html#s-descr1-nums">2.6</a>) which is
+used to test a null hypothesis. We we will describe the calculation of
+the <span class="math inline">\(\chi^{2}\)</span> test statistic step by step, using the data in Table
+<a href="c-tables.html#tab:t-sex-attitude-ch4">4.1</a> for illustration. All of the elements of the
+test statistic for this example are shown in Table
+<a href="c-tables.html#tab:t-sex-attitude-chi2">4.2</a>. These elements are</p>
+<ul>
+<li><p>The <strong>observed frequencies</strong>, denoted <span class="math inline">\(f_{o}\)</span>, one for each cell of
+the table. These are simply the observed cell counts (compare the
+<span class="math inline">\(f_{o}\)</span> column of Table <a href="c-tables.html#tab:t-sex-attitude-chi2">4.2</a> to the counts in
+Table <a href="c-tables.html#tab:t-sex-attitude-ch4">4.1</a>).</p></li>
+<li><p>The <strong>expected frequencies</strong> <span class="math inline">\(f_{e}\)</span>, also one for each cell. These
+are cell counts in a hypothetical table which would show no
+association between the variables. In other words, they represent a
+table for a sample which would exactly agree with the null
+hypothesis of independence in the population. To explain how the
+expected frequencies are calculated, consider the cell in Table
+<a href="c-tables.html#tab:t-sex-attitude-ch4">4.1</a> for Male respondents who strongly agree
+with the statement. As discussed above, if the null hypothesis of
+independence is true in the population, then the conditional
+probability of strongly agreeing is the same for both men and women.
+This also implies that it must then be equal to the
+overall (marginal) probability of strongly agreeing. The sample
+version of this is that the proportion who strongly agree should be
+the same for men as among all respondents overall. This overall
+proportion in Table <a href="c-tables.html#tab:t-sex-attitude-ch4">4.1</a> is <span class="math inline">\(366/2344=0.156\)</span>. If
+this proportion applied also to the 1027 male respondents, the
+number of of them who strongly agreed would be
+<span class="math display">\[f_{e} = \left(\frac{366}{2344}\right)\times 1027 =
+\frac{366\times 1027}{2344}=160.4.\]</span> Here 2344 is the total sample
+size, and 366 and 1027 are the marginal frequencies of strongly
+agreers and male respondents respectively, i.e. the two marginal
+totals corresponding to the cell (Male, Strongly agree). The same
+rule applies also in general: the expected frequency for any cell in
+this or any other table is calculated as the product of the row and
+column totals corresponding to the cell, divided by the total
+sample size.</p></li>
+<li><p>The difference <span class="math inline">\(f_{o}-f_{e}\)</span> between observed and expected
+frequencies for each cell. Since <span class="math inline">\(f_{e}\)</span> are the cell counts in a
+table which exactly agrees with the null hypothesis, the differences
+indicate how closely the counts <span class="math inline">\(f_{o}\)</span> actually observed agree with
+<span class="math inline">\(H_{0}\)</span>. If the differences are small, the observed data are
+consistent with the null hypothesis, whereas large differences
+indicate evidence against it. The test statistic will be obtained by
+aggregating information about these differences across all the cells
+of the table. This cannot, however, be done by adding up the
+differences themselves, because positive (<span class="math inline">\(f_{o}\)</span> is larger than
+<span class="math inline">\(f_{e}\)</span>) and negative (<span class="math inline">\(f_{o}\)</span> is smaller than <span class="math inline">\(f_{e}\)</span>) differences
+will always exactly cancel each other out (c.f. their sum on the
+last row of Table <a href="c-tables.html#tab:t-sex-attitude-chi2">4.2</a>). Instead,
+we consider…</p></li>
+<li><p>…the squared differences <span class="math inline">\((f_{o}-f_{e})^{2}\)</span>. This removes the
+signs from the differences, so that the squares of positive and
+negative differences which are equally far from zero will be treated
+as equally strong evidence against the null hypothesis.</p></li>
+<li><p>Dividing the squared differences by the expected frequencies,
+i.e. <span class="math inline">\((f_{o}-f_{e})^{2}/f_{e}\)</span>. This is an essential but not
+particularly interesting scaling exercise, which expresses the sizes
+of the squared differences relative to the sizes of
+<span class="math inline">\(f_{e}\)</span> themselves.</p></li>
+<li><p>Finally, aggregating these quantities to get the <span class="math inline">\(\chi^{2}\)</span> test
+statistic
+<span class="math display" id="eq:chi2">\[\begin{equation}
+\chi^{2} = \sum \frac{(f_{o}-f_{e})^{2}}{f_{e}}.
+\tag{4.3}
+\end{equation}\]</span>
+Here the summation sign <span class="math inline">\(\Sigma\)</span> indicates that
+<span class="math inline">\(\chi^{2}\)</span> is obtained by adding up the quantities
+<span class="math inline">\((f_{o}-f_{e})^{2}/f_{e}\)</span> across all the cells of the table.</p></li>
+</ul>
+<table>
+<caption><span id="tab:t-sex-attitude-chi2">Table 4.2: </span>Calculating the <span class="math inline">\(\chi^{2}\)</span> test statistic for Table
+<a href="c-tables.html#tab:t-sex-attitude-ch4">4.1</a>. In the second column, SA, A, 0, D, and SD
+are abbreviations for Strongly agree, Agree, Neither agree nor
+disagree, Disagree and Strongly disagree respectively.</caption>
+<colgroup>
+<col width="8%" />
+<col width="10%" />
+<col width="9%" />
+<col width="9%" />
+<col width="15%" />
+<col width="20%" />
+<col width="26%" />
+</colgroup>
+<thead>
+<tr class="header">
+<th align="left">Sex</th>
+<th align="center">Attitude</th>
+<th align="right"><span class="math inline">\(f_{o}\)</span></th>
+<th align="right"><span class="math inline">\(f_{e}\)</span></th>
+<th align="right"><span class="math inline">\(f_{o}-f_{e}\)</span></th>
+<th align="right"><span class="math inline">\((f_{o}-f_{e})^{2}\)</span></th>
+<th align="right"><span class="math inline">\((f_{o}-f_{e})^{2}/f_{e}\)</span></th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="left">Male</td>
+<td align="center">SA</td>
+<td align="right">160</td>
+<td align="right">160.4</td>
+<td align="right"><span class="math inline">\(-0.4\)</span></td>
+<td align="right">0.16</td>
+<td align="right">0.001</td>
+</tr>
+<tr class="even">
+<td align="left">Male</td>
+<td align="center">A</td>
+<td align="right">439</td>
+<td align="right">477.6</td>
+<td align="right"><span class="math inline">\(-38.6\)</span></td>
+<td align="right">1489.96</td>
+<td align="right">3.120</td>
+</tr>
+<tr class="odd">
+<td align="left">Male</td>
+<td align="center">0</td>
+<td align="right">187</td>
+<td align="right">186.6</td>
+<td align="right">0.4</td>
+<td align="right">0.16</td>
+<td align="right">0.001</td>
+</tr>
+<tr class="even">
+<td align="left">Male</td>
+<td align="center">D</td>
+<td align="right">200</td>
+<td align="right">169.6</td>
+<td align="right">30.4</td>
+<td align="right">924.16</td>
+<td align="right">5.449</td>
+</tr>
+<tr class="odd">
+<td align="left">Male</td>
+<td align="center">SD</td>
+<td align="right">41</td>
+<td align="right">32.9</td>
+<td align="right">8.1</td>
+<td align="right">65.61</td>
+<td align="right">1.994</td>
+</tr>
+<tr class="even">
+<td align="left">Female</td>
+<td align="center">SA</td>
+<td align="right">206</td>
+<td align="right">205.6</td>
+<td align="right">0.4</td>
+<td align="right">0.16</td>
+<td align="right">0.001</td>
+</tr>
+<tr class="odd">
+<td align="left">Female</td>
+<td align="center">A</td>
+<td align="right">651</td>
+<td align="right">612.4</td>
+<td align="right">38.6</td>
+<td align="right">1489.96</td>
+<td align="right">2.433</td>
+</tr>
+<tr class="even">
+<td align="left">Female</td>
+<td align="center">0</td>
+<td align="right">239</td>
+<td align="right">239.4</td>
+<td align="right"><span class="math inline">\(-0.4\)</span></td>
+<td align="right">0.16</td>
+<td align="right">0.001</td>
+</tr>
+<tr class="odd">
+<td align="left">Female</td>
+<td align="center">D</td>
+<td align="right">187</td>
+<td align="right">217.4</td>
+<td align="right"><span class="math inline">\(-30.4\)</span></td>
+<td align="right">924.16</td>
+<td align="right">4.251</td>
+</tr>
+<tr class="even">
+<td align="left">Female</td>
+<td align="center">SD</td>
+<td align="right">34</td>
+<td align="right">42.1</td>
+<td align="right"><span class="math inline">\(-8.1\)</span></td>
+<td align="right">65.61</td>
+<td align="right">1.558</td>
+</tr>
+<tr class="odd">
+<td align="left"></td>
+<td align="center">Sum</td>
+<td align="right">2344</td>
+<td align="right">2344</td>
+<td align="right">0</td>
+<td align="right">4960.1</td>
+<td align="right"><span class="math inline">\(\chi^{2}=18.81\)</span></td>
+</tr>
+</tbody>
+</table>
+<p>The calculations can be done even by hand, but we will usually leave
+them to a computer. The last column of Table <a href="c-tables.html#tab:t-sex-attitude-chi2">4.2</a>
+shows that for Table <a href="c-tables.html#tab:t-sex-attitude-ch4">4.1</a> the test statistic is
+<span class="math inline">\(\chi^{2}=18.81\)</span> (which includes some rounding error, the correct value
+is 18.862). In the SPSS output in Figure <a href="c-tables.html#fig:f-spsschi2">4.1</a>, it is given
+in the “Value” column of the “Pearson Chi-Square” row.</p>
+</div>
+<div id="ss-tables-chi2test-sdist" class="section level3 hasAnchor">
+<h3><span class="header-section-number">4.3.4</span> The sampling distribution of the test statistic<a href="c-tables.html#ss-tables-chi2test-sdist" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>We now know that the value of the <span class="math inline">\(\chi^{2}\)</span> test statistic in the
+example is 18.86. But what does that mean? Why is the test statistic
+defined as (<a href="c-tables.html#eq:chi2">(4.3)</a>) and not in some other form? And what does the
+number mean? Is 18.86 small or large, weak or strong evidence against
+the null hypothesis that sex and attitude are independent in the
+population?</p>
+<p>In general, a test statistic for any null hypothesis should satisfy two
+requirements:</p>
+<ol style="list-style-type: decimal">
+<li><p>The value of the test statistic should be small when evidence
+against the null hypothesis is weak, and large when this evidence
+is strong.</p></li>
+<li><p>The sampling distribution of the test statistic should be known and
+of convenient form when the null hypothesis is true.</p></li>
+</ol>
+<p>Taking the first requirement first, consider the form of (<a href="c-tables.html#eq:chi2">(4.3)</a>).
+The important part of this are the squared differences
+<span class="math inline">\((f_{o}-f_{e})^{2}\)</span> for each cell of the table. Here the expected
+frequencies <span class="math inline">\(f_{e}\)</span> reveal what the table would look like if the sample
+was in perfect agreement with the claim of independence in the
+population, while the observed frequencies <span class="math inline">\(f_{o}\)</span> show what the
+observed table actually does look like. If <span class="math inline">\(f_{o}\)</span> in a cell is close to
+<span class="math inline">\(f_{e}\)</span>, the squared difference is small and the cell contributes only a
+small addition to the test statistic. If <span class="math inline">\(f_{o}\)</span> is very different from
+<span class="math inline">\(f_{e}\)</span> — either much smaller or much larger than it — the squared
+difference and hence the cell’s contribution to the test statistic are
+large.</p>
+<p>Summing the contributions over all the cells, this implies that the
+overall value of the test statistic is small when the observed
+frequencies are close to the expected frequencies under the null
+hypothesis, and large when at least some of the observed frequencies are
+far from the expected ones. (Note also that the smallest possible value
+of the statistic is 0, obtained when the observed and the expected
+frequency are exactly equal in each cell.) It is thus <em>large</em> values of
+<span class="math inline">\(\chi^{2}\)</span> which should be regarded as evidence <em>against</em> the null
+hypothesis, just as required by condition 1 above.</p>
+<p>Turning then to condition 2, we first need to explain what is meant by
+“sampling distribution of the test statistic … when the null
+hypothesis is true”. This is really the conceptual crux of significance
+testing. Because it is both so important and relatively abstract, we
+will introduce the concept of a sampling distribution in some detail,
+starting with a general definition and then focusing on the case of test
+statistics in general and the <span class="math inline">\(\chi^{2}\)</span> test in particular.</p>
+<div id="sampling-distribution-of-statistic-general-definition" class="section level4 unnumbered hasAnchor">
+<h4>Sampling distribution of statistic: General definition<a href="c-tables.html#sampling-distribution-of-statistic-general-definition" class="anchor-section" aria-label="Anchor link to header"></a></h4>
+<p>The <span class="math inline">\(\chi^{2}\)</span> test statistic (<a href="c-tables.html#eq:chi2">(4.3)</a>) is a <em>statistic</em> as defined
+defined at the beginning of Section <a href="c-descr1.html#s-descr1-nums">2.6</a>, that is a number calculated from
+data in a sample. Once we have observed a sample, the value of a
+statistic in that sample is known, such as the 18.862 for <span class="math inline">\(\chi^{2}\)</span> in
+our example.</p>
+<p>However, we also realise that this value would have been different if
+the sample had been different, and also that the sample could indeed
+have been different because the sampling is a process that involves
+randomness. For example, in the actually observed sample in Table
+<a href="c-tables.html#tab:t-sex-attitude-ch4">4.1</a> we had 200 men who disagreed with the statement
+and 41 who strongly disagreed with it. It is easily imaginable that
+another random sample of 2344 respondents from the same population could
+have given us frequencies of, say, 195 and 46 for these cells instead.
+If that had happened, the value of the <span class="math inline">\(\chi^{2}\)</span> statistic would have
+been 19.75 instead of 18.86. Furthermore, it also seems intuitively
+plausible that not all such alternative values are equally likely for
+samples from a given population. For example, it seems quite improbable
+that the population from which the sample in Table
+<a href="c-tables.html#tab:t-sex-attitude-ch4">4.1</a> was drawn would instead produce a sample which
+also had 1027 men and 1317 women but where all the men strongly
+disagreed with the statement (which would yield <span class="math inline">\(\chi^{2}=2210.3\)</span>).</p>
+<p>The ideas that different possible samples would give different values of
+a sample statistic, and that some such values are more likely than
+others, are formalised in the concept of a sampling distribution:</p>
+<ul>
+<li>The <strong>sampling distribution of a statistic</strong> is the distribution of
+the statistic (i.e. its possible values and the proportions with
+which they occur) in the set of all possible random samples of the
+same size from the population.</li>
+</ul>
+<p>To observe a sampling distribution of a statistic, we would thus need to
+draw samples from the population over and over again, and calculate the
+value of the statistic for each such sample, until we had a good idea of
+the proportions with which different values of the statistic appeared in
+the samples. This is clearly an entirely hypothetical exercise in most
+real examples where we have just one sample of actual data, whereas the
+number of possible samples of that size is essentially or actually
+infinite. Despite this, statisticians can find out what sampling
+distributions would look like, under specific assumptions about the
+population. One way to do so is through mathematical derivations.
+Another is a <em>computer simulation</em> where we use a computer program to
+draw a large number of samples from an artificial population, calculate
+the value of a statistic for each of them, and examine the distribution
+of the statistic across these repeated samples. We will make use of both
+of these approaches below.</p>
+</div>
+<div id="sampling-distribution-of-a-test-statistic-under-the-null-hypothesis" class="section level4 unnumbered hasAnchor">
+<h4>Sampling distribution of a test statistic under the null hypothesis<a href="c-tables.html#sampling-distribution-of-a-test-statistic-under-the-null-hypothesis" class="anchor-section" aria-label="Anchor link to header"></a></h4>
+<p>The sampling distribution of any statistic depends primarily on what the
+population is like. For test statistics, note that requirement 2 above
+mentioned only the situation where the null hypothesis is true. This is
+in fact the central conceptual ingredient of significance testing. The
+basic logic of drawing conclusions from such tests is that we consider
+what we would expect to see if the null hypothesis was in fact true in
+the population, and compare that to what was actually observed in our
+sample. The null hypothesis should then be rejected if the observed data
+would be surprising (i.e. unlikely) if the null hypothesis was actually
+true, and not rejected if the observed data would not be surprising
+under the null hypothesis.</p>
+<p>We have already seen that the <span class="math inline">\(\chi^{2}\)</span> test statistic is in effect a
+measure of the discrepancy between what is expected under the null
+hypothesis and what is observed in the sample. All test statistics for
+any hypotheses have this property in one way or another. What then
+remains to be determined is exactly how surprising or otherwise the
+observed data are relative to the null hypothesis. A measure of this is
+derived from the sampling distribution of the test statistic <em>under the
+null hypothesis</em>. It is the only sampling distribution that is needed
+for carrying out a significance test.</p>
+</div>
+<div id="sampling-distribution-of-the-chi2-test-statistic-under-independence" class="section level4 unnumbered hasAnchor">
+<h4>Sampling distribution of the <span class="math inline">\(\chi^{2}\)</span> test statistic under independence<a href="c-tables.html#sampling-distribution-of-the-chi2-test-statistic-under-independence" class="anchor-section" aria-label="Anchor link to header"></a></h4>
+<p>For the <span class="math inline">\(\chi^{2}\)</span> test, we need the sampling distribution of the test
+statistic (<a href="c-tables.html#eq:chi2">(4.3)</a>) under the independence null hypothesis
+(<a href="c-tables.html#eq:H0-chi2">(4.1)</a>). To make these ideas a little more concrete, the upper
+part of Table <a href="c-tables.html#tab:t-sex-attitude-H0pop">4.3</a> shows the crosstabulation of
+sex and attitude in our example for a finite population where the null
+hypothesis holds. We can see that it does because the two conditional
+distributions for attitude, among men and among women, are the same
+(this is the only aspect of the distributions that matters for this
+demonstration; the exact values of the probabilities are otherwise
+irrelevant). These are of course hypothetical population distributions,
+as we do not know the true ones. We also do not claim that this
+hypothetical population is even close to the true one. The whole point
+of this step of hypothesis testing is to set up a population where the
+null hypothesis holds as a fixed point of comparison, to see what
+samples from such a population would look like and how they compare with
+the real sample that we have actually observed.</p>
+<p><em>Population (frequencies are in millions of people):</em></p>
+<table style="width:98%;">
+<caption><span id="tab:t-sex-attitude-H0pop">Table 4.3: </span><em>``The government should take measures to reduce differences in income levels’’</em>: Attitude towards income redistribution by sex (with row proportions
+in parentheses), in a hypothetical population of 50 million people
+where sex and attitude are independent, and in one random sample from
+this population.</caption>
+<colgroup>
+<col width="11%" />
+<col width="17%" />
+<col width="12%" />
+<col width="19%" />
+<col width="13%" />
+<col width="13%" />
+<col width="9%" />
+</colgroup>
+<thead>
+<tr class="header">
+<th align="left"><br />
+Sex</th>
+<th align="center">Agree
+strongly</th>
+<th align="center"><br />
+Agree</th>
+<th align="center">Neither agree
+nor disagree</th>
+<th align="center"><br />
+Disagree</th>
+<th align="center">Disagree
+strongly</th>
+<th align="right"><br />
+Total</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="left">Male</td>
+<td align="center">3.744</td>
+<td align="center">11.160</td>
+<td align="center">4.368</td>
+<td align="center">3.960</td>
+<td align="center">0.768</td>
+<td align="right">24.00</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="center">(0.156)</td>
+<td align="center">(0.465)</td>
+<td align="center">(0.182)</td>
+<td align="center">(0.165)</td>
+<td align="center">(0.032)</td>
+<td align="right">(1.0)</td>
+</tr>
+<tr class="odd">
+<td align="left">Female</td>
+<td align="center">4.056</td>
+<td align="center">12.090</td>
+<td align="center">4.732</td>
+<td align="center">4.290</td>
+<td align="center">0.832</td>
+<td align="right">26.00</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="center">(0.156)</td>
+<td align="center">(0.465)</td>
+<td align="center">(0.182)</td>
+<td align="center">(0.165)</td>
+<td align="center">(0.032)</td>
+<td align="right">(1.0)</td>
+</tr>
+<tr class="odd">
+<td align="left">Total</td>
+<td align="center">7.800</td>
+<td align="center">23.250</td>
+<td align="center">9.100</td>
+<td align="center">8.250</td>
+<td align="center">1.600</td>
+<td align="right">50</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="center">(0.156)</td>
+<td align="center">(0.465)</td>
+<td align="center">(0.182)</td>
+<td align="center">(0.165)</td>
+<td align="center">(0.032)</td>
+<td align="right">(1.0)</td>
+</tr>
+</tbody>
+</table>
+<p><em>Sample:</em></p>
+<table style="width:98%;">
+<caption><span id="tab:t-sex-attitude-H0pop">Table 4.3: </span><span class="math inline">\(\chi^{2}=2.8445\)</span></caption>
+<colgroup>
+<col width="13%" />
+<col width="13%" />
+<col width="12%" />
+<col width="20%" />
+<col width="13%" />
+<col width="13%" />
+<col width="10%" />
+</colgroup>
+<thead>
+<tr class="header">
+<th align="left"><br />
+Sex</th>
+<th align="center">Agree
+strongly</th>
+<th align="center"><br />
+Agree</th>
+<th align="center">Neither agree
+nor disagree</th>
+<th align="center"><br />
+Disagree</th>
+<th align="center">Disagree
+strongly</th>
+<th align="right"><br />
+Total</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="left">Male</td>
+<td align="center">181</td>
+<td align="center">505</td>
+<td align="center">191</td>
+<td align="center">203</td>
+<td align="center">41</td>
+<td align="right">1121</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="center">(0.161)</td>
+<td align="center">(0.450)</td>
+<td align="center">(0.170)</td>
+<td align="center">(0.181)</td>
+<td align="center">(0.037)</td>
+<td align="right">(1.0)</td>
+</tr>
+<tr class="odd">
+<td align="left">Female</td>
+<td align="center">183</td>
+<td align="center">569</td>
+<td align="center">229</td>
+<td align="center">202</td>
+<td align="center">40</td>
+<td align="right">1223</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="center">(0.150)</td>
+<td align="center">(0.465)</td>
+<td align="center">(0.187)</td>
+<td align="center">(0.165)</td>
+<td align="center">(0.033)</td>
+<td align="right">(1.0)</td>
+</tr>
+<tr class="odd">
+<td align="left">Total</td>
+<td align="center">364</td>
+<td align="center">1074</td>
+<td align="center">420</td>
+<td align="center">405</td>
+<td align="center">81</td>
+<td align="right">2344</td>
+</tr>
+<tr class="even">
+<td align="left"></td>
+<td align="center">(0.155)</td>
+<td align="center">(0.458)</td>
+<td align="center">(0.179)</td>
+<td align="center">(0.173)</td>
+<td align="center">(0.035)</td>
+<td align="right">(1.0)</td>
+</tr>
+</tbody>
+</table>
+<p>In the example we have a sample of 2344 observations, so to match that
+we want to identify the sampling distribution of the <span class="math inline">\(\chi^{2}\)</span>
+statistic in random samples of size 2344 from the population like the
+one in the upper part of Table <a href="c-tables.html#tab:t-sex-attitude-H0pop">4.3</a>. The lower
+part of that table shows one such sample. Even though it comes from a
+population where the two variables are independent, the same is not
+exactly true in the sample: we can see that the conditional sample
+distributions are not the same for men and women. The value of the
+<span class="math inline">\(\chi^{2}\)</span> test statistic for this simulated sample is 2.8445.</p>
+<p>Before we proceed with the discussion of the sampling distribution of
+the <span class="math inline">\(\chi^{2}\)</span> statistic, we should note that it will be a <em>continuous</em>
+probability distribution. In other words, the number of distinct values
+that the test statistic can have in different samples is so large that
+their distribution is clearly effectively continuous. This is true even
+though the two <em>variables</em> in the contingency table are themselves
+categorical. The two distributions, the population distribution of the
+variables and the sampling distribution of a test statistic, are quite
+separate entities and need not resemble each other. We will consider the
+nature of continuous probability distributions in more detail in Chapter
+<a href="c-means.html#c-means">7</a>. In this chapter we will discuss them relatively
+superficially and only to the extent that is absolutely necessary.</p>
+<p>Figure <a href="c-tables.html#fig:f-chisampld">4.2</a> shows what we observe if do a computer
+simulation to draw many more samples from the population in Table
+<a href="c-tables.html#tab:t-sex-attitude-H0pop">4.3</a>. The figure shows the histogram of the values
+of the <span class="math inline">\(\chi^{2}\)</span> test statistic calculated from 100,000 such samples.
+We can see, for example, that <span class="math inline">\(\chi^{2}\)</span> is between 0 and 10 for most of
+the samples, and larger than that for only a small proportion of them.
+In particular, we note already that the value <span class="math inline">\(\chi^{2}=18.8\)</span> that was
+actually observed in the real sample occurs very rarely if samples are
+drawn from a population where the null hypothesis of independence holds.</p>
+<div class="figure"><span style="display:block;" id="fig:f-chisampld"></span>
+<img src="chi2sims.png" alt="Example of the sampling distribution of the \chi^{2} test statistic for independence. The plot shows a histogram of the values of the statistic in 100,000 simulated samples of size n=2344 drawn from the population distribution in the upper part of Table 4.3. Superimposed on the histogram is the curve of the approximate sampling distribution, which is the \chi^{2} distribution with 4 degrees of freedom." style="width:8.5cm" />
+<p class="caption">Figure 4.2: Example of the sampling distribution of the <span class="math inline">\(\chi^{2}\)</span> test statistic for independence. The plot shows a histogram of the values of the statistic in 100,000 simulated samples of size <span class="math inline">\(n=2344\)</span> drawn from the population distribution in the upper part of Table <a href="c-tables.html#tab:t-sex-attitude-H0pop">4.3</a>. Superimposed on the histogram is the curve of the approximate sampling distribution, which is the <span class="math inline">\(\chi^{2}\)</span> distribution with 4 degrees of freedom.</p>
+</div>
+<p>The form of the sampling distribution can also be derived through
+mathematical arguments. These show that for any two-way contingency
+table, the approximate sampling distribution of the <span class="math inline">\(\chi^{2}\)</span> statistic
+is a member of a class of continuous probability distributions known as
+the <span class="math inline">\(\boldsymbol{\chi}^{2}\)</span> <strong>distributions</strong> (the same symbol
+<span class="math inline">\(\chi^{2}\)</span> is rather confusingly used to refer both to the test
+statistic and its sampling distribution). The <span class="math inline">\(\chi^{2}\)</span> distributions
+are a family of individual distributions, each of which is identified by
+a number known as the <strong>degrees of freedom</strong> of the distribution. Figure
+<a href="c-tables.html#fig:f-chi2dists">4.3</a> shows the probability curves of some <span class="math inline">\(\chi^{2}\)</span>
+distributions (what such curves mean is explained in more detail below,
+and in Chapter <a href="c-means.html#c-means">7</a>). All of the distributions are skewed to
+the right, and the shape of a particular curve depends on its degrees of
+feedom. All of the curves give non-zero probabilites only for positive
+values of the variable on the horizontal axis, indicating that the value
+of a <span class="math inline">\(\chi^{2}\)</span>-distributed variable can never be negative. This is
+appropriate for the <span class="math inline">\(\chi^{2}\)</span> test statistic (<a href="c-tables.html#eq:chi2">(4.3)</a>), which is
+also always non-negative.</p>
+<div class="figure"><span style="display:block;" id="fig:f-chi2dists"></span>
+<img src="chi2dists.png" alt="Probability curves of some \chi^{2} distributions with different degrees of freedom (df)." style="width:115mm" />
+<p class="caption">Figure 4.3: Probability curves of some <span class="math inline">\(\chi^{2}\)</span> distributions with different
+degrees of freedom (df).</p>
+</div>
+<p>For the <span class="math inline">\(\chi^{2}\)</span> test statistic of independence we have the following
+result:</p>
+<ul>
+<li>When the null hypothesis (<a href="c-tables.html#eq:H0-chi2">(4.1)</a>) is true in the population,
+the sampling distribution of the test statistic (<a href="c-tables.html#eq:chi2">(4.3)</a>)
+calculated for a two-way table with <span class="math inline">\(R\)</span> rows and <span class="math inline">\(C\)</span> columns is
+approximately the <span class="math inline">\(\chi^{2}\)</span> distribution with <span class="math inline">\(df=(R-1)(C-1)\)</span>
+degrees of freedom.</li>
+</ul>
+<p>The degrees of freedom are thus given by the number of rows in the table
+minus one, multiplied by the number of columns minus one. Table
+<a href="c-tables.html#tab:t-sex-attitude-ch4">4.1</a>, for example, has <span class="math inline">\(R=2\)</span> rows and <span class="math inline">\(C=5\)</span> columns,
+so its degrees of freedom are <span class="math inline">\(df=(2-1)\times(5-1)=4\)</span> (as indicated by
+the “df” column of the SPSS output of Figure <a href="c-tables.html#fig:f-spsschi2">4.1</a>). Figure
+<a href="c-tables.html#fig:f-chisampld">4.2</a> shows the curve of the <span class="math inline">\(\chi^{2}\)</span> distribution with
+<span class="math inline">\(df=4\)</span> superimposed on the histogram of the sampling distribution
+obtained from the computer simulation. The two are in essentially
+perfect agreement, as mathematical theory indicates they should be.</p>
+<p>These degrees of freedom can be given a further interpretation which
+relates to the structure of the table.<a href="#fn16" class="footnote-ref" id="fnref16"><sup>16</sup></a> We can, however, ignore this
+and treat <span class="math inline">\(df\)</span> simply as a number which identifies the appropriate
+<span class="math inline">\(\chi^{2}\)</span> distribution to be used for the <span class="math inline">\(\chi^{2}\)</span> test for a
+particular table. Often it is convenient to use the notation
+<span class="math inline">\(\chi^{2}_{df}\)</span> to refer to a specific distribution, e.g. <span class="math inline">\(\chi^{2}_{4}\)</span>
+for the <span class="math inline">\(\chi^{2}\)</span> distribution with 4 degrees of freedom.</p>
+<p>The <span class="math inline">\(\chi^{2}\)</span> sampling distribution is “approximate” in that it is an
+<em>asymptotic approximation</em> which is exactly correct only if the sample
+size is infinite and approximately correct when it is sufficiently
+large. This is the reason for the conditions for the sizes of the
+expected frequencies that were discussed in Section <a href="c-tables.html#ss-tables-chi2test-ass">4.3.2</a>. When these conditions are satisfied, the
+approximation is accurate enough for all practical purposes and we use
+the appropriate <span class="math inline">\(\chi^{2}\)</span> distribution as the sampling distribution.</p>
+<p>In Section <a href="c-tables.html#ss-tables-chi2test-sdist">4.3.4</a>, under requirement 2 for a good test
+statistic, we mentioned that its sampling distribution under the null
+hypothesis should be “known” and “of convenient form”. We now know that
+for the <span class="math inline">\(\chi^{2}\)</span> test it is a <span class="math inline">\(\chi^{2}\)</span> distribution. The “convenient
+form” means that the sampling distribution should not depend on too many
+specific features of the data at hand. For the <span class="math inline">\(\chi^{2}\)</span> test, the
+approximate sampling distribution depends (through the degrees of
+freedom) only on the size of the table but not on the sample size or the
+marginal distributions of the two variables. This is convenient in the
+right way, because it means that we can use the same <span class="math inline">\(\chi^{2}\)</span>
+distribution for any table with a given number of rows and columns, as
+long as the sample size is large enough for the conditions in Section <a href="c-tables.html#ss-tables-chi2test-ass">4.3.2</a> to be satisfied.</p>
+</div>
+</div>
+<div id="ss-tables-chi2test-Pval" class="section level3 hasAnchor">
+<h3><span class="header-section-number">4.3.5</span> The P-value<a href="c-tables.html#ss-tables-chi2test-Pval" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>The last key building block of significance testing operationalises the
+comparison between the observed value of a test statistic and its
+sampling distribution under the null hypothesis. In essence, it provides
+a way to determine whether the test statistic in the sample should be
+regarded as “large” or “not large”, and with this the measure of
+evidence against the null hypothesis that is the end product of the
+test:</p>
+<ul>
+<li>The <span class="math inline">\(\mathbf{P}\)</span><strong>-value</strong> is the probability, if
+the null hypothesis was true in the population, of obtaining a value
+of the test statistic which provides as strong or stronger evidence
+against the null hypothesis, and in the direction of the alternative
+hypothesis, as the the value of the test statistic in the sample
+actually observed.</li>
+</ul>
+<p>The relevance of the phrase “in the direction of the alternative
+hypothesis” is not apparent for the <span class="math inline">\(\chi^{2}\)</span> test, so we can ignore it
+for the moment. As argued above, for this test it is large values of the
+test statistic which indicate evidence against the null hypothesis of
+independence, so the values that correspond to “as strong or stronger
+evidence” against it are the ones that are as large or larger than the
+observed statistic. Their probability is evaluated from the <span class="math inline">\(\chi^{2}\)</span>
+sampling distribution defined above.</p>
+<p>Figure <a href="c-tables.html#fig:f-pvalchisq">4.4</a> illustrates this calculation. It shows the
+curve of the <span class="math inline">\(\chi^{2}_{4}\)</span> distribution, which is the relevant sampling
+distribution for the test for the <span class="math inline">\(2\times 5\)</span> table in our example.
+Suppose first, hypothetically, that we had actually observed the sample
+in the lower part of Table <a href="c-tables.html#tab:t-sex-attitude-H0pop">4.3</a>, for which the
+value of the test statistic is <span class="math inline">\(\chi^{2}=2.84\)</span>. The <span class="math inline">\(P\)</span>-value of the
+test for this sample would then be the probability of values of 2.84 or
+larger, evaluated from the <span class="math inline">\(\chi^{2}_{4}\)</span> distribution.</p>
+<div class="figure"><span style="display:block;" id="fig:f-pvalchisq"></span>
+<img src="chi2_pval.png" alt="Illustration of the P-value for a \chi^{2} test statistic with 4 degrees of freedom and with values \chi^{2}=2.84 (area of the grey region under the curve) and \chi^{2}=18.86." style="width:8cm" />
+<p class="caption">Figure 4.4: Illustration of the <span class="math inline">\(P\)</span>-value for a <span class="math inline">\(\chi^{2}\)</span> test statistic with 4 degrees of freedom and with values <span class="math inline">\(\chi^{2}=2.84\)</span> (area of the grey region under the curve) and <span class="math inline">\(\chi^{2}=18.86\)</span>.</p>
+</div>
+<p>For a probability curve like the one in Figure <a href="c-tables.html#fig:f-pvalchisq">4.4</a>, areas
+under the curve correspond to probabilities. For example, the area under
+the whole curve from 0 to infinity is 1, because a variable which
+follows the <span class="math inline">\(\chi^{2}_{4}\)</span> distribution is certain to have one of these
+values. Similarly, the probability that we need for the <span class="math inline">\(P\)</span>-value for
+<span class="math inline">\(\chi^{2}=2.84\)</span> is the area under the curve to the right of the value
+2.84, which is shown in grey in Figure <a href="c-tables.html#fig:f-pvalchisq">4.4</a>. This is
+<span class="math inline">\(P=0.585\)</span>.</p>
+<p>The test statistic for the real sample in Table <a href="c-tables.html#tab:t-sex-attitude-ch4">4.1</a>
+was <span class="math inline">\(\chi^{2}=18.86\)</span>, so the <span class="math inline">\(P\)</span>-value is the combined probability of
+this and all larger values. This is also shown in Figure
+<a href="c-tables.html#fig:f-pvalchisq">4.4</a>. However, this area is not really visible in the plot
+because 18.86 is far into the tail of the distribution where the
+probabilities are low. The <span class="math inline">\(P\)</span>-value is then also low, specifically
+<span class="math inline">\(P=0.0008\)</span>.</p>
+<p>In practice the <span class="math inline">\(P\)</span>-value is usually calculated by a
+computer. In the SPSS output of Figure <a href="c-tables.html#fig:f-spsschi2">4.1</a> is is shown in
+the column labelled “Asymp. Sig. (2-sided)” which is short for
+“Asymptotic significance level” (you can ignore the “2-sided” for this
+test). The value is listed as 0.001. SPSS reports, by default,
+<span class="math inline">\(P\)</span>-values rounded to three decimal places. Sometimes even the smallest
+of these is zero, in which case the value is displayed as “.000”. This
+is bad practice, as the <span class="math inline">\(P\)</span>-value for most significance tests is never
+<em>exactly</em> zero. <span class="math inline">\(P\)</span>-values given by SPSS as “.000” should be reported
+instead as “<span class="math inline">\(P&lt;0.001\)</span>”.</p>
+<p>Before the widespread availablity of statistical software, <span class="math inline">\(P\)</span>-values
+had to be obtained approximately using tables of distributions. Since
+you may still see this approach described in many text books, it is
+briefly explained here. You may also need to use the table method in the
+examination, where computers are not allowed. Otherwise, however, this
+approach is now of little interest: if the <span class="math inline">\(P\)</span>-value is given in the
+computer output, there is no need to refer to distributional tables.</p>
+<p>All introductory statistical text books include a table of <span class="math inline">\(\chi^{2}\)</span>
+distributions, although its format may vary slightly form book to book.
+Such a table is also included in the Appendix of
+this coursepack. An extract from the table is shown in Table
+<a href="c-tables.html#tab:t-chi2table">4.4</a>. Each row of the table corresponds to a <span class="math inline">\(\chi^{2}\)</span>
+distribution with the degrees of freedom given in the first column. The
+other columns show so-called “critical values” for the probability
+levels given on the first row. Consider, for example, the row for 4
+degrees of freedom. The figure 7.78 in the column for probability level
+0.100 indicates that the probability of a value of 7.78 or larger is
+exactly 0.100 for this distribution. The 9.49 in the next column shows
+that the probability of 9.49 or larger is 0.050. Another way of saying
+this is that if the appropriate degrees of freedom were 4, and the test
+statistic was 7.78, the <span class="math inline">\(P\)</span>-value would be exactly 0.100, and if the
+statistic was 9.49, <span class="math inline">\(P\)</span> would be 0.050.</p>
+<table>
+<caption><span id="tab:t-chi2table">Table 4.4: </span>An extract from a table of critical values for <span class="math inline">\(\chi^{2}\)</span>
+distributions. Row 2-5 show the right-hand tail probability.</caption>
+<thead>
+<tr class="header">
+<th align="left">df</th>
+<th align="right">0.100</th>
+<th align="right">0.050</th>
+<th align="right">0.010</th>
+<th align="right">0.001</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="left">1</td>
+<td align="right">2.71</td>
+<td align="right">3.84</td>
+<td align="right">6.63</td>
+<td align="right">10.83</td>
+</tr>
+<tr class="even">
+<td align="left">2</td>
+<td align="right">4.61</td>
+<td align="right">5.99</td>
+<td align="right">9.21</td>
+<td align="right">13.82</td>
+</tr>
+<tr class="odd">
+<td align="left">3</td>
+<td align="right">6.25</td>
+<td align="right">7.81</td>
+<td align="right">11.34</td>
+<td align="right">16.27</td>
+</tr>
+<tr class="even">
+<td align="left">4</td>
+<td align="right">7.78</td>
+<td align="right">9.49</td>
+<td align="right">13.28</td>
+<td align="right">18.47</td>
+</tr>
+<tr class="odd">
+<td align="left">…</td>
+<td align="right">…</td>
+<td align="right"></td>
+<td align="right"></td>
+<td align="right">…</td>
+</tr>
+</tbody>
+</table>
+<p>The values in the table also provide bounds for other values that are
+not shown. For instance, in the hypothetical sample in Table
+<a href="c-tables.html#tab:t-sex-attitude-H0pop">4.3</a> we had <span class="math inline">\(\chi^{2}=2.84\)</span>, which is smaller than
+7.78. This implies that the corresponding <span class="math inline">\(P\)</span>-value must be larger than
+0.100, which (of course) agrees with the precise value of <span class="math inline">\(P=0.585\)</span> (see
+also Figure <a href="c-tables.html#fig:f-pvalchisq">4.4</a>). Similarly, <span class="math inline">\(\chi^{2}=18.86\)</span> for the real
+data in Table <a href="c-tables.html#tab:t-sex-attitude-ch4">4.1</a>, which is larger than the 18.47
+in the “0.001” column of the table for the <span class="math inline">\(\chi^{2}_{4}\)</span> distribution.
+Thus the corresponding <span class="math inline">\(P\)</span>-value must be smaller than 0.001, again
+agreeing with the correct value of <span class="math inline">\(P=0.0008\)</span>.</p>
+</div>
+<div id="ss-tables-chi2test-conclusions" class="section level3 hasAnchor">
+<h3><span class="header-section-number">4.3.6</span> Drawing conclusions from a test<a href="c-tables.html#ss-tables-chi2test-conclusions" class="anchor-section" aria-label="Anchor link to header"></a></h3>
+<p>The <span class="math inline">\(P\)</span>-value is the end product of any significance test, in that it is
+a complete quantitative summary of the strength of evidence against the
+null hypothesis provided by the data in the sample. More precisely, the
+<span class="math inline">\(P\)</span>-value indicates how likely we would be to obtain a value of the test
+statistic which was as or more extreme as the value for the data, if the
+null hypothesis was true. Thus the <em>smaller</em> the <span class="math inline">\(P\)</span>-value, the stronger
+is the evidence <em>against</em> the null hypothesis. For example, in our
+survey example of sex and attitude toward income redistribution we
+obtained <span class="math inline">\(P=0.0008\)</span> for the <span class="math inline">\(\chi^{2}\)</span> test of independence. This is a
+small number, so it indicates strong evidence against the claim that the
+distributions of attitudes are the same for men and women in the
+population.</p>
+<p>For many purposes it is quite sufficient to simply report the <span class="math inline">\(P\)</span>-value.
+It is, however, quite common also to state the conclusion in the form of
+a more discrete decision of “rejecting” or “not rejecting” the null
+hypothesis. This is usually based on conventional reference levels,
+known as <strong>significance levels</strong> or <span class="math inline">\(\boldsymbol{\alpha}\)</span><strong>-levels</strong>
+(here <span class="math inline">\(\alpha\)</span> is the lower-case Greek letter “alpha”). The standard
+significance levels are 0.10, 0.05, 0.01 and 0.001 (also known as 10%,
+5%, 1% and 0.1% significance levels respectively), of which the 0.05
+level is most commonly used; other values than these are rarely
+considered. The values of the test statistic which correspond exactly to
+these levels are the critical shown in the table of the <span class="math inline">\(\chi^{2}\)</span>
+distribution in Table <a href="c-tables.html#tab:t-chi2table">4.4</a>.</p>
+<p>When the <span class="math inline">\(P\)</span>-value is <em>smaller</em> than a conventional level of
+significance (i.e. the test statistic is <em>larger</em> than the corresponding
+critical value), it is said that the null hypothesis is <strong>rejected</strong> at
+that level of significance, or that the results (i.e. evidence against
+the null hypothesis) are <strong>statistically significant</strong> at that level. In
+our example the <span class="math inline">\(P\)</span>-value was smaller than 0.001. The null hypothesis is
+thus “rejected at the 0.1 % level of significance”, i.e. the evidence
+that the variables are not independent in the population is
+“statistically significant at the 0.1% level” (as well as the 10%, 5%
+and 1% levels of course, but it is enough to state only the strongest
+level).</p>
+<p>The strict decision formulation of significance testing is much overused
+and misused. It is in fact quite rare that the statistical analysis will
+immediately be followed by some practical action which absolutely
+requires a decision about whether to act on the basis of the null
+hypothesis or the alternative hypothesis. Typically the analysis which a
+test is part of aims to examine some research question, and the results
+of the test simply contribute new information to add support for one or
+the other side of the argument about the question. The <span class="math inline">\(P\)</span>-value is the
+key measure of the strength and direction of that evidence, so it should
+<em>always</em> be reported. The standard significance levels used for
+rejecting or not rejecting null hypotheses, on the other hand, are
+merely useful conventional reference points for structuring the
+reporting of test results, and their importance should not be
+overemphasised. Clearly <span class="math inline">\(P\)</span>-values of, say, 0.049 and 0.051 (i.e. ones
+either side of the most common conventional significance level 0.05)
+indicate very similar levels of evidence against a null hypothesis, and
+acting as if one was somehow qualitatively more decisive is simply
+misleading.</p>
+<div id="how-to-state-the-conclusions" class="section level4 unnumbered hasAnchor">
+<h4>How to state the conclusions<a href="c-tables.html#how-to-state-the-conclusions" class="anchor-section" aria-label="Anchor link to header"></a></h4>
+<p>The final step of a significance test is describing its conclusions in a
+research report. This should be done with appropriate care:</p>
+<ul>
+<li><p>The report should make clear which test was used. For example, this
+might be stated as something like “The <span class="math inline">\(\chi^{2}\)</span> test of
+independence was used to test the null hypothesis that in the
+population the attitude toward income redistribution was independent
+of sex in the population”. There is usually no need to give
+literature references for the standard tests described on
+this course.</p></li>
+<li><p>The numerical value of the <span class="math inline">\(P\)</span>-value should be reported, rounded to
+two or three decimal places (e.g. <span class="math inline">\(P=0.108\)</span> or <span class="math inline">\(P=0.11\)</span>). It can
+also reported in an approximate way as, for example, “<span class="math inline">\(P&lt;0.05\)</span>” (or
+the same in symbols to save space, e.g.  for <span class="math inline">\(P&lt;0.1\)</span>, ** for
+<span class="math inline">\(P&lt;0.05\)</span>, and so on). Very small <span class="math inline">\(P\)</span>-values can always be reported
+as something like “<span class="math inline">\(P&lt;0.001\)</span>”.</p></li>
+<li><p>When (cautiously) discussing the results in terms of discrete
+decisions, the most common practice is to say that the null
+hypothesis was either <em>not rejected</em> or <em>rejected</em> at a given
+significance level. It is <em>not</em> acceptable to say that the null
+hypothesis was “accepted” as an alternative to “not rejected”.
+Failing to reject the hypothesis that two variables are independent
+in the population is not the same as proving that they actually
+<em>are</em> independent.</p></li>
+<li><p>A common mistake is to describe the <span class="math inline">\(P\)</span>-value as the probability
+that the null hypothesis is true. This is understandably tempting,
+as such a claim would seem more natural and convenient than the
+correct but convoluted interpretation of the <span class="math inline">\(P\)</span>-value as “the
+probability of obtaining a test statistic as or more extreme as the
+one observed in the data if the test was repeated many times for
+different samples from a population where the null hypothesis was
+true”. Unfortunately, however, the <span class="math inline">\(P\)</span>-value is <em>not</em> the
+probability of the null hypothesis being true. Such a probability
+does not in fact have any real meaning at all in the statistical
+framework considered here.<a href="#fn17" class="footnote-ref" id="fnref17"><sup>17</sup></a></p></li>
+<li><p>The results of significance tests should be stated using the names
+and values of the variables involved, and not just in terms of
+“null” and “alternative” hypotheses. This also forces you to recall
+what the hypotheses actually were, so that you do not accidentally
+describe the result the wrong way round (e.g. that the data support
+a claim when they do just the opposite). There are no compulsory
+phrases for stating the conclusions, so it can be done in a number
+of ways. For example, a fairly complete and careful statement in our
+example would be</p>
+<ul>
+<li>“There is strong evidence that the distributions of attitudes
+toward income redistribution are not the same for men and women
+in the population (<span class="math inline">\(P&lt;0.001\)</span>).”</li>
+</ul>
+<p>Other possibilities are</p>
+<ul>
+<li><p>“The association between sex and attitude toward income
+redistribution in the sample is statistically significant
+(<span class="math inline">\(P&lt;0.001\)</span>).”</p></li>
+<li><p>“The analysis suggests that there is an association between sex
+and attitude toward income redistribution in the population
+(<span class="math inline">\(P&lt;0.001\)</span>).”</p></li>
+</ul>
+<p>The last version is slightly less clear than the other statements in
+that it relies on the reader recognizing that the inclusion of the
+<span class="math inline">\(P\)</span>-value implies that the word “differs” refers to a statistical
+claim rather a statement of absolute fact about the population. In
+many contexts it would be better to say this more explicitly.</p></li>
+</ul>
+<p>Finally, if the null hypothesis of independence is rejected, the test
+should not usually be the only statistical analysis that is reported for
+a two-way table. Instead, we would then go on to describe <em>how</em> the two
+variables appear to be associated, using the of descriptive methods
+discussed in Section <a href="c-descr1.html#s-descr1-2cat">2.4</a>.</p>
+</div>
+</div>
+</div>
+<div id="s-tables-summary" class="section level2 hasAnchor">
+<h2><span class="header-section-number">4.4</span> Summary of the chi-square test of independence<a href="c-tables.html#s-tables-summary" class="anchor-section" aria-label="Anchor link to header"></a></h2>
+<p>We have now described the elements of a significance test in some
+detail. Since it is easy to lose sight of the practical steps of a test
+in such a lengthy discussion, they are briefly repeated here for the
+<span class="math inline">\(\chi^{2}\)</span> test of independence. The test of the association between sex
+and attitude in the survey example is again used for illustration:</p>
+<ol style="list-style-type: decimal">
+<li><p>Data: observations of two categorical variables, here sex and
+attitude towards income redistribution for <span class="math inline">\(n=2344\)</span> respondents,
+presented in the two-way, <span class="math inline">\(2\times 5\)</span> contingency table
+<a href="c-tables.html#tab:t-sex-attitude-ch4">4.1</a>.</p></li>
+<li><p>Assumptions: the variables can have any measurement level, but the
+expected frequencies <span class="math inline">\(f_{e}\)</span> must be large enough. A common rule of
+thumb is that <span class="math inline">\(f_{e}\)</span> should be at least 5 for every cell of
+the table. Here the smallest expected frequency is 32.9, so the
+requirement is comfortably satisfied.</p></li>
+<li><p>Hypotheses: null hypothesis <span class="math inline">\(H_{0}\)</span> that the two variables are
+statistically independent (i.e. not associated) in the population,
+against the alternative hypothesis that they are dependent.</p></li>
+<li><p>The test statistic: the <span class="math inline">\(\chi^{2}\)</span> statistic <span class="math display">\[\chi^{2} = \sum
+\frac{(f_{o}-f_{e})^{2}}{f_{e}}\]</span> where <span class="math inline">\(f_{o}\)</span> denotes observed
+frequencies in the cells of the table and <span class="math inline">\(f_{e}\)</span> the corresponding
+expected frequencies under the null hypothesis, and the summation is
+over all of the cells. For Table <a href="c-tables.html#tab:t-sex-attitude-ch4">4.1</a>,
+<span class="math inline">\(\chi^{2}=18.86\)</span>.</p></li>
+<li><p>The sampling distribution of the test statistic when <span class="math inline">\(H_{0}\)</span> is
+true: a <span class="math inline">\(\chi^{2}\)</span> distribution with <span class="math inline">\((R-1)\times(C-1)=1\times 4=4\)</span>
+degrees of freedom, where <span class="math inline">\(R\)</span> <span class="math inline">\((=2)\)</span> and <span class="math inline">\(C\)</span> <span class="math inline">\((=5)\)</span> denote the
+numbers of rows and columns in the table respectively.</p></li>
+<li><p>The <span class="math inline">\(P\)</span>-value: the probability that a randomly selected value from
+the <span class="math inline">\(\chi^{2}_{4}\)</span> distribution is at least 18.86. This is
+<span class="math inline">\(P=0.0008\)</span>, which may also be reported as <span class="math inline">\(P&lt;0.001\)</span>.</p></li>
+<li><p>Conclusion: the null hypothesis of independence is strongly
+rejected. The <span class="math inline">\(\chi^{2}\)</span> test indicates very strong evidence that
+sex and attitude towards income redistribution are associated in the
+population (<span class="math inline">\(P&lt;0.001\)</span>).</p></li>
+</ol>
+<p>When the association is judged to be statistically significant, its
+nature and magnitude can be further explored using the descriptive
+methods for two way tables discussed in Section <a href="c-descr1.html#s-descr1-2cat">2.4</a>.</p>
+
+</div>
+</div>
+<div class="footnotes">
+<hr />
+<ol start="15">
+<li id="fn15"><p><em>Philosophical Magazine</em>, Series 5, <strong>5</strong>, 157–175.
+The thoroughly descriptive title of the article is
+“On the criterion that a given system of deviations from the
+probable in the case of a correlated system of variables is such
+that it can be reasonably supposed to have arisen from random
+sampling”.<a href="c-tables.html#fnref15" class="footnote-back">↩</a></p></li>
+<li id="fn16"><p>In short, they are the smallest number of cell frequencies such
+that they together with the row and column marginal totals are
+enough to determine all the remaining cell frequencies.<a href="c-tables.html#fnref16" class="footnote-back">↩</a></p></li>
+<li id="fn17"><p>There is an alternative framework, known as <em>Bayesian</em> statistics,
+where quantities resembling <span class="math inline">\(P\)</span>-values <em>can</em> be given this
+interpretation. The differences between the Bayesian approach and
+the so-called <em>frequentist</em> one discussed here are practically and
+philosophically important and interesting, but beyond the scope of
+this course.<a href="c-tables.html#fnref17" class="footnote-back">↩</a></p></li>
+</ol>
+</div>
+            </section>
+
+          </div>
+        </div>
+      </div>
+<a href="c-samples.html" class="navigation navigation-prev " aria-label="Previous page"><i class="fa fa-angle-left"></i></a>
+<a href="c-probs.html" class="navigation navigation-next " aria-label="Next page"><i class="fa fa-angle-right"></i></a>
+    </div>
+  </div>
+<script src="libs/gitbook-2.6.7/js/app.min.js"></script>
+<script src="libs/gitbook-2.6.7/js/clipboard.min.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-search.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-sharing.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-fontsettings.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-bookdown.js"></script>
+<script src="libs/gitbook-2.6.7/js/jquery.highlight.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-clipboard.js"></script>
+<script>
+gitbook.require(["gitbook"], function(gitbook) {
+gitbook.start({
+"sharing": {
+"github": false,
+"facebook": true,
+"twitter": true,
+"linkedin": false,
+"weibo": false,
+"instapaper": false,
+"vk": false,
+"whatsapp": false,
+"all": ["facebook", "twitter", "linkedin", "weibo", "instapaper"]
+},
+"fontsettings": {
+"theme": "white",
+"family": "sans",
+"size": 2
+},
+"edit": {
+"link": "https://github.com/LSE-Methodology/MY464/edit/master/04-MY464-tables.Rmd",
+"text": "Edit"
+},
+"history": {
+"link": null,
+"text": null
+},
+"view": {
+"link": null,
+"text": null
+},
+"download": ["Coursepack-MY464.pdf", "Coursepack-MY464.epub"],
+"search": {
+"engine": "fuse",
+"options": null
+},
+"toc": {
+"collapse": "section"
+}
+});
+});
+</script>
+
+<!-- dynamically load mathjax for compatibility with self-contained -->
+<script>
+  (function () {
+    var script = document.createElement("script");
+    script.type = "text/javascript";
+    var src = "true";
+    if (src === "" || src === "true") src = "https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.9/latest.js?config=TeX-MML-AM_CHTML";
+    if (location.protocol !== "file:")
+      if (/^https?:/.test(src))
+        src = src.replace(/^https?:/, '');
+    script.src = src;
+    document.getElementsByTagName("head")[0].appendChild(script);
+  })();
+</script>
+</body>
+
+</html>
diff --git a/chi2_pval.png b/chi2_pval.png
new file mode 100644
index 0000000..444034d
Binary files /dev/null and b/chi2_pval.png differ
diff --git a/chi2dists.png b/chi2dists.png
new file mode 100644
index 0000000..151a004
Binary files /dev/null and b/chi2dists.png differ
diff --git a/chi2sims.png b/chi2sims.png
new file mode 100644
index 0000000..37a058d
Binary files /dev/null and b/chi2sims.png differ
diff --git a/chi2test_ess.png b/chi2test_ess.png
new file mode 100644
index 0000000..4a0c1e4
Binary files /dev/null and b/chi2test_ess.png differ
diff --git a/classbars.png b/classbars.png
new file mode 100644
index 0000000..87f758b
Binary files /dev/null and b/classbars.png differ
diff --git a/corruption1.png b/corruption1.png
new file mode 100644
index 0000000..bbc398e
Binary files /dev/null and b/corruption1.png differ
diff --git a/corruption2.png b/corruption2.png
new file mode 100644
index 0000000..733b669
Binary files /dev/null and b/corruption2.png differ
diff --git a/corruption3.png b/corruption3.png
new file mode 100644
index 0000000..effaa5e
Binary files /dev/null and b/corruption3.png differ
diff --git a/democ.png b/democ.png
new file mode 100644
index 0000000..053897b
Binary files /dev/null and b/democ.png differ
diff --git a/dept_of_methodology.png b/dept_of_methodology.png
new file mode 100644
index 0000000..3309e19
Binary files /dev/null and b/dept_of_methodology.png differ
diff --git a/gdp.png b/gdp.png
new file mode 100644
index 0000000..6a304f0
Binary files /dev/null and b/gdp.png differ
diff --git a/houseprices.png b/houseprices.png
new file mode 100644
index 0000000..c6440ed
Binary files /dev/null and b/houseprices.png differ
diff --git a/hwork.png b/hwork.png
new file mode 100644
index 0000000..43e8949
Binary files /dev/null and b/hwork.png differ
diff --git a/hworkpyramid.png b/hworkpyramid.png
new file mode 100644
index 0000000..4ae82f3
Binary files /dev/null and b/hworkpyramid.png differ
diff --git a/imr1.png b/imr1.png
new file mode 100644
index 0000000..72944fa
Binary files /dev/null and b/imr1.png differ
diff --git a/index.html b/index.html
new file mode 100644
index 0000000..755f923
--- /dev/null
+++ b/index.html
@@ -0,0 +1,718 @@
+<!DOCTYPE html>
+<html lang="" xml:lang="">
+<head>
+
+  <meta charset="utf-8" />
+  <meta http-equiv="X-UA-Compatible" content="IE=edge" />
+  <title>MY464 Introduction to Quantitative Analysis for Media and Communications</title>
+  <meta name="description" content="This course is intended for those with little or no past training in quantitative methods. The course is an intensive introduction to some of the principles and methods of statistical analysis in social research. Topics covered in MY464 include descriptive statistics, basic ideas of inference and estimation, contingency tables and an introduction to linear regression models." />
+  <meta name="generator" content="bookdown 0.39 and GitBook 2.6.7" />
+
+  <meta property="og:title" content="MY464 Introduction to Quantitative Analysis for Media and Communications" />
+  <meta property="og:type" content="book" />
+  
+  <meta property="og:description" content="This course is intended for those with little or no past training in quantitative methods. The course is an intensive introduction to some of the principles and methods of statistical analysis in social research. Topics covered in MY464 include descriptive statistics, basic ideas of inference and estimation, contingency tables and an introduction to linear regression models." />
+  <meta name="github-repo" content="LSE-Methodology/MY464" />
+
+  <meta name="twitter:card" content="summary" />
+  <meta name="twitter:title" content="MY464 Introduction to Quantitative Analysis for Media and Communications" />
+  
+  <meta name="twitter:description" content="This course is intended for those with little or no past training in quantitative methods. The course is an intensive introduction to some of the principles and methods of statistical analysis in social research. Topics covered in MY464 include descriptive statistics, basic ideas of inference and estimation, contingency tables and an introduction to linear regression models." />
+  
+
+<meta name="author" content="Department of Methodology, London School of Economics and Political Science" />
+
+
+
+  <meta name="viewport" content="width=device-width, initial-scale=1" />
+  <meta name="apple-mobile-web-app-capable" content="yes" />
+  <meta name="apple-mobile-web-app-status-bar-style" content="black" />
+  
+  
+
+<link rel="next" href="c-intro.html"/>
+<script src="libs/jquery-3.6.0/jquery-3.6.0.min.js"></script>
+<script src="https://cdn.jsdelivr.net/npm/fuse.js@6.4.6/dist/fuse.min.js"></script>
+<link href="libs/gitbook-2.6.7/css/style.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-table.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-bookdown.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-highlight.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-search.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-fontsettings.css" rel="stylesheet" />
+<link href="libs/gitbook-2.6.7/css/plugin-clipboard.css" rel="stylesheet" />
+
+
+
+
+
+
+
+
+<link href="libs/anchor-sections-1.1.0/anchor-sections.css" rel="stylesheet" />
+<link href="libs/anchor-sections-1.1.0/anchor-sections-hash.css" rel="stylesheet" />
+<script src="libs/anchor-sections-1.1.0/anchor-sections.js"></script>
+
+
+
+<style type="text/css">
+  
+  div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
+</style>
+
+<link rel="stylesheet" href="style.css" type="text/css" />
+</head>
+
+<body>
+
+
+
+  <div class="book without-animation with-summary font-size-2 font-family-1" data-basepath=".">
+
+    <div class="book-summary">
+      <nav role="navigation">
+
+<ul class="summary">
+<li><a href="./">MY464 Introduction to Quantitative Analysis for Media and Communications</a></li>
+
+<li class="divider"></li>
+<li class="chapter" data-level="" data-path="index.html"><a href="index.html"><i class="fa fa-check"></i>Course information<a href="index.html#course-information" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1" data-path="c-intro.html"><a href="c-intro.html"><i class="fa fa-check"></i><b>1</b> Introduction<a href="c-intro.html#c-intro" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="1.1" data-path="c-intro.html"><a href="c-intro.html#s-intro-purpose"><i class="fa fa-check"></i><b>1.1</b> What is the purpose of this course?<a href="c-intro.html#s-intro-purpose" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.2" data-path="c-intro.html"><a href="c-intro.html#s-intro-definitions"><i class="fa fa-check"></i><b>1.2</b> Some basic definitions<a href="c-intro.html#s-intro-definitions" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="1.2.1" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-subj"><i class="fa fa-check"></i><b>1.2.1</b> Subjects and variables<a href="c-intro.html#ss-intro-def-subj" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.2.2" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-vartypes"><i class="fa fa-check"></i><b>1.2.2</b> Types of variables<a href="c-intro.html#ss-intro-def-vartypes" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.2.3" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-descr"><i class="fa fa-check"></i><b>1.2.3</b> Description and inference<a href="c-intro.html#ss-intro-def-descr" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.2.4" data-path="c-intro.html"><a href="c-intro.html#ss-intro-def-assoc"><i class="fa fa-check"></i><b>1.2.4</b> Association and causation<a href="c-intro.html#ss-intro-def-assoc" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="1.3" data-path="c-intro.html"><a href="c-intro.html#s-intro-outline"><i class="fa fa-check"></i><b>1.3</b> Outline of the course<a href="c-intro.html#s-intro-outline" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.4" data-path="c-intro.html"><a href="c-intro.html#s-intro-maths"><i class="fa fa-check"></i><b>1.4</b> The use of mathematics and computing<a href="c-intro.html#s-intro-maths" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="1.4.1" data-path="c-intro.html"><a href="c-intro.html#symbolic-mathematics-and-mathematical-notation"><i class="fa fa-check"></i><b>1.4.1</b> Symbolic mathematics and mathematical notation<a href="c-intro.html#symbolic-mathematics-and-mathematical-notation" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="1.4.2" data-path="c-intro.html"><a href="c-intro.html#computing-1"><i class="fa fa-check"></i><b>1.4.2</b> Computing<a href="c-intro.html#computing-1" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="2" data-path="c-descr1.html"><a href="c-descr1.html"><i class="fa fa-check"></i><b>2</b> Descriptive statistics<a href="c-descr1.html#c-descr1" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.1" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-intro"><i class="fa fa-check"></i><b>2.1</b> Introduction<a href="c-descr1.html#s-descr1-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.2" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-examples"><i class="fa fa-check"></i><b>2.2</b> Example data sets<a href="c-descr1.html#s-descr1-examples" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.3" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-1cat"><i class="fa fa-check"></i><b>2.3</b> Single categorical variable<a href="c-descr1.html#s-descr1-1cat" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.3.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-distr"><i class="fa fa-check"></i><b>2.3.1</b> Describing the sample distribution<a href="c-descr1.html#ss-descr1-1cat-distr" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.3.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-tables"><i class="fa fa-check"></i><b>2.3.2</b> Tabular methods: Tables of frequencies<a href="c-descr1.html#ss-descr1-1cat-tables" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.3.3" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-charts"><i class="fa fa-check"></i><b>2.3.3</b> Graphical methods: Bar charts<a href="c-descr1.html#ss-descr1-1cat-charts" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.3.4" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cat-descriptives"><i class="fa fa-check"></i><b>2.3.4</b> Simple descriptive statistics<a href="c-descr1.html#ss-descr1-1cat-descriptives" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="2.4" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-2cat"><i class="fa fa-check"></i><b>2.4</b> Two categorical variables<a href="c-descr1.html#s-descr1-2cat" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.4.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-tables"><i class="fa fa-check"></i><b>2.4.1</b> Two-way contingency tables<a href="c-descr1.html#ss-descr1-2cat-tables" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.4.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-cond"><i class="fa fa-check"></i><b>2.4.2</b> Conditional proportions<a href="c-descr1.html#ss-descr1-2cat-cond" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.4.3" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-assoc"><i class="fa fa-check"></i><b>2.4.3</b> Conditional distributions and associations<a href="c-descr1.html#ss-descr1-2cat-assoc" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.4.4" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-descr"><i class="fa fa-check"></i><b>2.4.4</b> Describing an association using conditional proportions<a href="c-descr1.html#ss-descr1-2cat-descr" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.4.5" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-2cat-gamma"><i class="fa fa-check"></i><b>2.4.5</b> A measure of association for ordinal variables<a href="c-descr1.html#ss-descr1-2cat-gamma" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="2.5" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-1cont"><i class="fa fa-check"></i><b>2.5</b> Sample distributions of a single continuous variable<a href="c-descr1.html#s-descr1-1cont" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.5.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cont-tab"><i class="fa fa-check"></i><b>2.5.1</b> Tabular methods<a href="c-descr1.html#ss-descr1-1cont-tab" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.5.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-1cont-graphs"><i class="fa fa-check"></i><b>2.5.2</b> Graphical methods<a href="c-descr1.html#ss-descr1-1cont-graphs" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="2.6" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-nums"><i class="fa fa-check"></i><b>2.6</b> Numerical descriptive statistics<a href="c-descr1.html#s-descr1-nums" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="2.6.1" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-nums-central"><i class="fa fa-check"></i><b>2.6.1</b> Measures of central tendency<a href="c-descr1.html#ss-descr1-nums-central" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.6.2" data-path="c-descr1.html"><a href="c-descr1.html#ss-descr1-nums-variation"><i class="fa fa-check"></i><b>2.6.2</b> Measures of variation<a href="c-descr1.html#ss-descr1-nums-variation" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="2.7" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-2cont"><i class="fa fa-check"></i><b>2.7</b> Associations which involve continuous variables<a href="c-descr1.html#s-descr1-2cont" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.8" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-presentation"><i class="fa fa-check"></i><b>2.8</b> Presentation of tables and graphs<a href="c-descr1.html#s-descr1-presentation" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="2.9" data-path="c-descr1.html"><a href="c-descr1.html#s-descr1-app"><i class="fa fa-check"></i><b>2.9</b> Appendix: Country data<a href="c-descr1.html#s-descr1-app" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="3" data-path="c-samples.html"><a href="c-samples.html"><i class="fa fa-check"></i><b>3</b> Samples and populations<a href="c-samples.html#c-samples" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="3.1" data-path="c-samples.html"><a href="c-samples.html#s-samples-intro"><i class="fa fa-check"></i><b>3.1</b> Introduction<a href="c-samples.html#s-samples-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.2" data-path="c-samples.html"><a href="c-samples.html#s-samples-finpops"><i class="fa fa-check"></i><b>3.2</b> Finite populations<a href="c-samples.html#s-samples-finpops" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.3" data-path="c-samples.html"><a href="c-samples.html#s-samples-samples"><i class="fa fa-check"></i><b>3.3</b> Samples from finite populations<a href="c-samples.html#s-samples-samples" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.4" data-path="c-samples.html"><a href="c-samples.html#s-samples-infpops"><i class="fa fa-check"></i><b>3.4</b> Conceptual and infinite populations<a href="c-samples.html#s-samples-infpops" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.5" data-path="c-samples.html"><a href="c-samples.html#s-samples-popdistrs"><i class="fa fa-check"></i><b>3.5</b> Population distributions<a href="c-samples.html#s-samples-popdistrs" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="3.6" data-path="c-samples.html"><a href="c-samples.html#s-samples-inference"><i class="fa fa-check"></i><b>3.6</b> Need for statistical inference<a href="c-samples.html#s-samples-inference" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="4" data-path="c-tables.html"><a href="c-tables.html"><i class="fa fa-check"></i><b>4</b> Statistical inference for two-way tables<a href="c-tables.html#c-tables" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="4.1" data-path="c-tables.html"><a href="c-tables.html#s-tables-intro"><i class="fa fa-check"></i><b>4.1</b> Introduction<a href="c-tables.html#s-tables-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.2" data-path="c-tables.html"><a href="c-tables.html#s-tables-tests"><i class="fa fa-check"></i><b>4.2</b> Significance tests<a href="c-tables.html#s-tables-tests" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3" data-path="c-tables.html"><a href="c-tables.html#s-tables-chi2test"><i class="fa fa-check"></i><b>4.3</b> The chi-square test of independence<a href="c-tables.html#s-tables-chi2test" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="4.3.1" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-null"><i class="fa fa-check"></i><b>4.3.1</b> Hypotheses<a href="c-tables.html#ss-tables-chi2test-null" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.2" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-ass"><i class="fa fa-check"></i><b>4.3.2</b> Assumptions of a significance test<a href="c-tables.html#ss-tables-chi2test-ass" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.3" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-stat"><i class="fa fa-check"></i><b>4.3.3</b> The test statistic<a href="c-tables.html#ss-tables-chi2test-stat" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.4" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-sdist"><i class="fa fa-check"></i><b>4.3.4</b> The sampling distribution of the test statistic<a href="c-tables.html#ss-tables-chi2test-sdist" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.5" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-Pval"><i class="fa fa-check"></i><b>4.3.5</b> The P-value<a href="c-tables.html#ss-tables-chi2test-Pval" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="4.3.6" data-path="c-tables.html"><a href="c-tables.html#ss-tables-chi2test-conclusions"><i class="fa fa-check"></i><b>4.3.6</b> Drawing conclusions from a test<a href="c-tables.html#ss-tables-chi2test-conclusions" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="4.4" data-path="c-tables.html"><a href="c-tables.html#s-tables-summary"><i class="fa fa-check"></i><b>4.4</b> Summary of the chi-square test of independence<a href="c-tables.html#s-tables-summary" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="5" data-path="c-probs.html"><a href="c-probs.html"><i class="fa fa-check"></i><b>5</b> Inference for population proportions<a href="c-probs.html#c-probs" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="5.1" data-path="c-probs.html"><a href="c-probs.html#s-probs-intro"><i class="fa fa-check"></i><b>5.1</b> Introduction<a href="c-probs.html#s-probs-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.2" data-path="c-probs.html"><a href="c-probs.html#s-probs-examples"><i class="fa fa-check"></i><b>5.2</b> Examples<a href="c-probs.html#s-probs-examples" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.3" data-path="c-probs.html"><a href="c-probs.html#s-probs-distribution"><i class="fa fa-check"></i><b>5.3</b> Probability distribution of a dichotomous variable<a href="c-probs.html#s-probs-distribution" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.4" data-path="c-probs.html"><a href="c-probs.html#s-probs-pointest"><i class="fa fa-check"></i><b>5.4</b> Point estimation of a population probability<a href="c-probs.html#s-probs-pointest" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5" data-path="c-probs.html"><a href="c-probs.html#s-probs-test1sample"><i class="fa fa-check"></i><b>5.5</b> Significance test of a single proportion<a href="c-probs.html#s-probs-test1sample" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="5.5.1" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-hypotheses"><i class="fa fa-check"></i><b>5.5.1</b> Null and alternative hypotheses<a href="c-probs.html#ss-probs-test1sample-hypotheses" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5.2" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-teststatistic"><i class="fa fa-check"></i><b>5.5.2</b> The test statistic<a href="c-probs.html#ss-probs-test1sample-teststatistic" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5.3" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-samplingd"><i class="fa fa-check"></i><b>5.5.3</b> The sampling distribution of the test statistic and P-values<a href="c-probs.html#ss-probs-test1sample-samplingd" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5.4" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-conclusions"><i class="fa fa-check"></i><b>5.5.4</b> Conclusions from the test<a href="c-probs.html#ss-probs-test1sample-conclusions" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.5.5" data-path="c-probs.html"><a href="c-probs.html#ss-probs-test1sample-summary"><i class="fa fa-check"></i><b>5.5.5</b> Summary of the test<a href="c-probs.html#ss-probs-test1sample-summary" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="5.6" data-path="c-probs.html"><a href="c-probs.html#s-probs-1sampleci"><i class="fa fa-check"></i><b>5.6</b> Confidence interval for a single proportion<a href="c-probs.html#s-probs-1sampleci" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="5.6.1" data-path="c-probs.html"><a href="c-probs.html#s-probs-1sampleci-intro"><i class="fa fa-check"></i><b>5.6.1</b> Introduction<a href="c-probs.html#s-probs-1sampleci-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.6.2" data-path="c-probs.html"><a href="c-probs.html#s-probs-1sampleci-calc"><i class="fa fa-check"></i><b>5.6.2</b> Calculation of the interval<a href="c-probs.html#s-probs-1sampleci-calc" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.6.3" data-path="c-probs.html"><a href="c-probs.html#s-probs-1sampleci-int"><i class="fa fa-check"></i><b>5.6.3</b> Interpretation of confidence intervals<a href="c-probs.html#s-probs-1sampleci-int" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="5.6.4" data-path="c-probs.html"><a href="c-probs.html#ss-means-ci-vstests"><i class="fa fa-check"></i><b>5.6.4</b> Confidence intervals vs. significance tests<a href="c-probs.html#ss-means-ci-vstests" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="5.7" data-path="c-probs.html"><a href="c-probs.html#s-probs-2samples"><i class="fa fa-check"></i><b>5.7</b> Inference for comparing two proportions<a href="c-probs.html#s-probs-2samples" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="6" data-path="c-contd.html"><a href="c-contd.html"><i class="fa fa-check"></i><b>6</b> Continuous variables: Population and sampling distributions<a href="c-contd.html#c-contd" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="6.1" data-path="c-contd.html"><a href="c-contd.html#s-contd-intro"><i class="fa fa-check"></i><b>6.1</b> Introduction<a href="c-contd.html#s-contd-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="6.2" data-path="c-contd.html"><a href="c-contd.html#s-contd-popdistrs"><i class="fa fa-check"></i><b>6.2</b> Population distributions of continuous variables<a href="c-contd.html#s-contd-popdistrs" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="6.2.1" data-path="c-contd.html"><a href="c-contd.html#ss-contd-popdistrs-params"><i class="fa fa-check"></i><b>6.2.1</b> Population parameters and their point estimates<a href="c-contd.html#ss-contd-popdistrs-params" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="6.3" data-path="c-contd.html"><a href="c-contd.html#s-contd-probdistrs"><i class="fa fa-check"></i><b>6.3</b> Probability distributions of continuous variables<a href="c-contd.html#s-contd-probdistrs" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="6.3.1" data-path="c-contd.html"><a href="c-contd.html#ss-contd-probdistrs-general"><i class="fa fa-check"></i><b>6.3.1</b> General comments<a href="c-contd.html#ss-contd-probdistrs-general" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="6.3.2" data-path="c-contd.html"><a href="c-contd.html#ss-contd-probdistrs-normal"><i class="fa fa-check"></i><b>6.3.2</b> The normal distribution as a population distribution<a href="c-contd.html#ss-contd-probdistrs-normal" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="6.4" data-path="c-contd.html"><a href="c-contd.html#s-contd-clt"><i class="fa fa-check"></i><b>6.4</b> The normal distribution as a sampling distribution<a href="c-contd.html#s-contd-clt" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="7" data-path="c-means.html"><a href="c-means.html"><i class="fa fa-check"></i><b>7</b> Analysis of population means<a href="c-means.html#c-means" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="7.1" data-path="c-means.html"><a href="c-means.html#s-means-intro"><i class="fa fa-check"></i><b>7.1</b> Introduction and examples<a href="c-means.html#s-means-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.2" data-path="c-means.html"><a href="c-means.html#s-means-descr"><i class="fa fa-check"></i><b>7.2</b> Descriptive statistics for comparisons of groups<a href="c-means.html#s-means-descr" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="7.2.1" data-path="c-means.html"><a href="c-means.html#ss-means-descr-graphs"><i class="fa fa-check"></i><b>7.2.1</b> Graphical methods of comparing sample distributions<a href="c-means.html#ss-means-descr-graphs" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.2.2" data-path="c-means.html"><a href="c-means.html#ss-means-descr-tables"><i class="fa fa-check"></i><b>7.2.2</b> Comparing summary statistics<a href="c-means.html#ss-means-descr-tables" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="7.3" data-path="c-means.html"><a href="c-means.html#s-means-inference"><i class="fa fa-check"></i><b>7.3</b> Inference for two means from independent samples<a href="c-means.html#s-means-inference" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="7.3.1" data-path="c-means.html"><a href="c-means.html#ss-means-inference-intro"><i class="fa fa-check"></i><b>7.3.1</b> Aims of the analysis<a href="c-means.html#ss-means-inference-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.3.2" data-path="c-means.html"><a href="c-means.html#ss-means-inference-test"><i class="fa fa-check"></i><b>7.3.2</b> Significance testing: The two-sample t-test<a href="c-means.html#ss-means-inference-test" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.3.3" data-path="c-means.html"><a href="c-means.html#ss-means-inference-ci"><i class="fa fa-check"></i><b>7.3.3</b> Confidence intervals for a difference of two means<a href="c-means.html#ss-means-inference-ci" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.3.4" data-path="c-means.html"><a href="c-means.html#ss-means-inference-variants"><i class="fa fa-check"></i><b>7.3.4</b> Variants of the test and confidence interval<a href="c-means.html#ss-means-inference-variants" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="7.4" data-path="c-means.html"><a href="c-means.html#s-means-1sample"><i class="fa fa-check"></i><b>7.4</b> Tests and confidence intervals for a single mean<a href="c-means.html#s-means-1sample" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.5" data-path="c-means.html"><a href="c-means.html#s-means-dependent"><i class="fa fa-check"></i><b>7.5</b> Inference for dependent samples<a href="c-means.html#s-means-dependent" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.6" data-path="c-means.html"><a href="c-means.html#s-means-tests3"><i class="fa fa-check"></i><b>7.6</b> Further comments on significance tests<a href="c-means.html#s-means-tests3" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="7.6.1" data-path="c-means.html"><a href="c-means.html#ss-means-tests3-errors"><i class="fa fa-check"></i><b>7.6.1</b> Different types of error<a href="c-means.html#ss-means-tests3-errors" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.6.2" data-path="c-means.html"><a href="c-means.html#ss-means-tests3-power"><i class="fa fa-check"></i><b>7.6.2</b> Power of significance tests<a href="c-means.html#ss-means-tests3-power" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="7.6.3" data-path="c-means.html"><a href="c-means.html#ss-means-tests3-importance"><i class="fa fa-check"></i><b>7.6.3</b> Significance vs. importance<a href="c-means.html#ss-means-tests3-importance" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+</ul></li>
+<li class="chapter" data-level="8" data-path="c-regression.html"><a href="c-regression.html"><i class="fa fa-check"></i><b>8</b> Linear regression models<a href="c-regression.html#c-regression" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.1" data-path="c-regression.html"><a href="c-regression.html#s-regression-intro"><i class="fa fa-check"></i><b>8.1</b> Introduction<a href="c-regression.html#s-regression-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.2" data-path="c-regression.html"><a href="c-regression.html#s-regression-descr"><i class="fa fa-check"></i><b>8.2</b> Describing association between two continuous variables<a href="c-regression.html#s-regression-descr" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.2.1" data-path="c-regression.html"><a href="c-regression.html#ss-regression-descr-intro"><i class="fa fa-check"></i><b>8.2.1</b> Introduction<a href="c-regression.html#ss-regression-descr-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.2.2" data-path="c-regression.html"><a href="c-regression.html#ss-regression-descr-plots"><i class="fa fa-check"></i><b>8.2.2</b> Graphical methods<a href="c-regression.html#ss-regression-descr-plots" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.2.3" data-path="c-regression.html"><a href="c-regression.html#ss-regression-descr-assoc"><i class="fa fa-check"></i><b>8.2.3</b> Linear associations<a href="c-regression.html#ss-regression-descr-assoc" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.2.4" data-path="c-regression.html"><a href="c-regression.html#ss-regression-descr-corr"><i class="fa fa-check"></i><b>8.2.4</b> Measures of association: covariance and correlation<a href="c-regression.html#ss-regression-descr-corr" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="8.3" data-path="c-regression.html"><a href="c-regression.html#s-regression-simple"><i class="fa fa-check"></i><b>8.3</b> Simple linear regression models<a href="c-regression.html#s-regression-simple" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.3.1" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-intro"><i class="fa fa-check"></i><b>8.3.1</b> Introduction<a href="c-regression.html#ss-regression-simple-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.3.2" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-def"><i class="fa fa-check"></i><b>8.3.2</b> Definition of the model<a href="c-regression.html#ss-regression-simple-def" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.3.3" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-int"><i class="fa fa-check"></i><b>8.3.3</b> Interpretation of the model parameters<a href="c-regression.html#ss-regression-simple-int" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.3.4" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-est"><i class="fa fa-check"></i><b>8.3.4</b> Estimation of the parameters<a href="c-regression.html#ss-regression-simple-est" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.3.5" data-path="c-regression.html"><a href="c-regression.html#ss-regression-simple-inf"><i class="fa fa-check"></i><b>8.3.5</b> Statistical inference for the regression coefficients<a href="c-regression.html#ss-regression-simple-inf" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="8.4" data-path="c-regression.html"><a href="c-regression.html#s-regression-causality"><i class="fa fa-check"></i><b>8.4</b> Interlude: Association and causality<a href="c-regression.html#s-regression-causality" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.5" data-path="c-regression.html"><a href="c-regression.html#s-regression-multiple"><i class="fa fa-check"></i><b>8.5</b> Multiple linear regression models<a href="c-regression.html#s-regression-multiple" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.5.1" data-path="c-regression.html"><a href="c-regression.html#ss-regression-multiple-intro"><i class="fa fa-check"></i><b>8.5.1</b> Introduction<a href="c-regression.html#ss-regression-multiple-intro" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.5.2" data-path="c-regression.html"><a href="c-regression.html#ss-regression-multiple-def"><i class="fa fa-check"></i><b>8.5.2</b> Definition of the model<a href="c-regression.html#ss-regression-multiple-def" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.5.3" data-path="c-regression.html"><a href="c-regression.html#ss-regression-multiple-unchanged"><i class="fa fa-check"></i><b>8.5.3</b> Unchanged elements from simple linear models<a href="c-regression.html#ss-regression-multiple-unchanged" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.5.4" data-path="c-regression.html"><a href="c-regression.html#ss-regression-multiple-beta"><i class="fa fa-check"></i><b>8.5.4</b> Interpretation and inference for the regression coefficients<a href="c-regression.html#ss-regression-multiple-beta" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="8.6" data-path="c-regression.html"><a href="c-regression.html#s-regression-dummies"><i class="fa fa-check"></i><b>8.6</b> Including categorical explanatory variables<a href="c-regression.html#s-regression-dummies" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="8.6.1" data-path="c-regression.html"><a href="c-regression.html#ss-regression-dummies-def"><i class="fa fa-check"></i><b>8.6.1</b> Dummy variables<a href="c-regression.html#ss-regression-dummies-def" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="8.6.2" data-path="c-regression.html"><a href="c-regression.html#ss-regression-dummies-example"><i class="fa fa-check"></i><b>8.6.2</b> A second example<a href="c-regression.html#ss-regression-dummies-example" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="8.7" data-path="c-regression.html"><a href="c-regression.html#s-regression-rest"><i class="fa fa-check"></i><b>8.7</b> Other issues in linear regression modelling<a href="c-regression.html#s-regression-rest" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+<li class="chapter" data-level="9" data-path="c-3waytables.html"><a href="c-3waytables.html"><i class="fa fa-check"></i><b>9</b> Analysis of 3-way contingency tables<a href="c-3waytables.html#c-3waytables" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="10" data-path="c-more.html"><a href="c-more.html"><i class="fa fa-check"></i><b>10</b> More statistics…<a href="c-more.html#c-more" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="" data-path="c-more.html"><a href="c-more.html#statistical-tables"><i class="fa fa-check"></i>Statistical tables<a href="c-more.html#statistical-tables" class="anchor-section" aria-label="Anchor link to header"></a></a><ul>
+<li class="chapter" data-level="" data-path="c-more.html"><a href="c-more.html#table-of-standard-normal-tail-probabilities"><i class="fa fa-check"></i>Table of standard normal tail probabilities<a href="c-more.html#table-of-standard-normal-tail-probabilities" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="" data-path="c-more.html"><a href="c-more.html#table-of-critical-values-for-t-distributions"><i class="fa fa-check"></i>Table of critical values for t-distributions<a href="c-more.html#table-of-critical-values-for-t-distributions" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+<li class="chapter" data-level="" data-path="c-more.html"><a href="c-more.html#table-of-critical-values-for-chi-square-distributions"><i class="fa fa-check"></i>Table of critical values for chi-square distributions<a href="c-more.html#table-of-critical-values-for-chi-square-distributions" class="anchor-section" aria-label="Anchor link to header"></a></a></li>
+</ul></li>
+</ul></li>
+<li class="divider"></li>
+<li><a href="https://github.com/rstudio/bookdown">Published with bookdown</a></li>
+
+</ul>
+
+      </nav>
+    </div>
+
+    <div class="book-body">
+      <div class="body-inner">
+        <div class="book-header" role="navigation">
+          <h1>
+            <i class="fa fa-circle-o-notch fa-spin"></i><a href="./">MY464 Introduction to Quantitative Analysis for Media and Communications</a>
+          </h1>
+        </div>
+
+        <div class="page-wrapper" tabindex="-1" role="main">
+          <div class="page-inner">
+
+            <section class="normal" id="section-">
+<div id="header">
+<h1 class="title">MY464 Introduction to Quantitative Analysis for Media and Communications</h1>
+<p class="author"><em>Department of Methodology, London School of Economics and Political Science</em></p>
+</div>
+<div id="course-information" class="section level1 unnumbered hasAnchor">
+<h1>Course information<a href="index.html#course-information" class="anchor-section" aria-label="Anchor link to header"></a></h1>
+<p><img src="dept_of_methodology.png" alt="image" style="height:2.2cm" /><img src="lse.png" alt="image" style="height:2.2cm" /></p>
+<div id="course-description" class="section level4 unnumbered hasAnchor">
+<h4>Course Description<a href="index.html#course-description" class="anchor-section" aria-label="Anchor link to header"></a></h4>
+<p>This course is intended for those with little or no past training in
+quantitative methods. The course is an intensive introduction to some of
+the principles and methods of statistical analysis in social research.
+Topics covered in MY464 include descriptive statistics, basic ideas of
+inference and estimation, contingency tables and an introduction to
+linear regression models. For those with some quantitative training the
+slightly more advanced course MY452 (Applied Regression Analysis) would
+be more appropriate, followed by other Department of Methodology and
+Department of Statistics courses on quantitative methods, such as MY454
+(Applied Statistical Computing), MY455 (Multivariate Analysis and
+Measurement), MY456 (Survey Methodology), MY457 (Causal Inference for
+Observational and Experimental Studies), MY459 (Quantitative Text
+Analysis), ST416 (Multilevel Modelling), and ST442 (Longitudinal Data
+Analysis).</p>
+</div>
+<div id="course-objectives" class="section level4 unnumbered hasAnchor">
+<h4>Course Objectives<a href="index.html#course-objectives" class="anchor-section" aria-label="Anchor link to header"></a></h4>
+<p>This course aims to impart a level of familiarity suitable for a
+moderately critical understanding of the statistical material in the
+journals commonly used by students in their work and the ability to use
+some elementary techniques.</p>
+<div style="page-break-after: always;"></div>
+</div>
+<div id="teaching" class="section level4 unnumbered hasAnchor">
+<h4>Teaching<a href="index.html#teaching" class="anchor-section" aria-label="Anchor link to header"></a></h4>
+<ul>
+<li><p><strong>Lectures</strong>: 2-hour in-person lecture every week.</p></li>
+<li><p><strong>Applied exercises</strong>: Each week there will be an exercise for students to complete
+in which the ideas covered in the lecture for that week will be applied to a real
+data set using the software package R/RStudio. Each exercise will have an accompanying
+explanatory video and a multiple-choice quiz to be completed on Moodle to check
+your learning.</p></li>
+<li><p><strong>Seminars</strong>: Students will attend a one-hour seminar each
+week, <strong>starting in Week 2</strong>. The seminars will go over the material covered
+in that week’s lecture, the corresponding applied exercise and provide a forum
+for students to ask questions and discuss the material covered in the course.
+Seminars will be available to attend in person and online.
+Please consult the on-line timetables for the times and locations of the class groups.
+The allocation of students to seminars is done through LSE for You.
+This will be explained in the first lecture and on the Moodle page.
+Please contact the course administrator listed on the Moodle
+page if you have any issues.</p></li>
+</ul>
+</div>
+<div id="course-materials" class="section level4 unnumbered hasAnchor">
+<h4>Course Materials<a href="index.html#course-materials" class="anchor-section" aria-label="Anchor link to header"></a></h4>
+<ul>
+<li><p><strong>Coursepack</strong>: This coursepack is the main course text. It is available
+to be viewed online at <a href="https://lse-methodology.github.io/MY464/" class="uri">https://lse-methodology.github.io/MY464/</a> You can
+view the coursepack in HTML form, or download it as a PDF or ePub to view
+offline.</p></li>
+<li><p><strong>Lecture slides</strong>: Copies of the slides displayed during
+the lectures can be downloaded from the MY464 Moodle page.</p></li>
+<li><p><strong>Recommended course texts</strong>:</p>
+<ul>
+<li><p>Alan Agresti and Christine Franklin (2013). <em>Statistics: The Art
+and Science of Learning from Data</em> (Third Ed.). Pearson.</p></li>
+<li><p>Alan Agresti and Barbara Finlay (2013). <em>Statistical Methods for
+the Social Sciences</em> (Fourth Ed.). Pearson</p></li>
+</ul>
+<p>Earlier/later editions are also suitable. While neither of these books is
+absolutely required, you may wish to purchase one if you would
+like to have additional explanation, examples and exercises to
+supplement the coursepack. Of these two, Agresti and Finlay is a
+bit more advanced. It is also the recommended additional course text
+for MY452 (which also has a coursepack similar to this one), so you
+may want to purchase it if you are planning to also take
+that course.</p></li>
+<li><p><strong>Other text books</strong>: There are hundreds of introductory textbooks
+on statistics and quantitative methods, many of them covering almost
+identical material. If you have one which you would like to use, and
+which looks as if it covers the same material at about the same
+level as this course, then it is probably suitable as
+additional reading.</p>
+<ul>
+<li>There are also many books and online resources which focus on
+the R/RStudio software package used in the computer classes.
+We do not consider them necessary for this course, or for learning
+statistics.</li>
+</ul></li>
+</ul>
+</div>
+<div id="my464-on-moodle" class="section level4 unnumbered hasAnchor">
+<h4>MY464 on Moodle<a href="index.html#my464-on-moodle" class="anchor-section" aria-label="Anchor link to header"></a></h4>
+<!-- {#sss-moodle} -->
+<p>The course materials are all available on Moodle. Go to
+<a href="http://moodle.lse.ac.uk/" class="uri">http://moodle.lse.ac.uk/</a> and login using your <em>username</em> and
+<em>password</em> (the same as for your LSE e-mail). Then in the <em>select
+courses</em> dialogue box type in MY464, and in <em>search results</em> click on
+MY464. The site contains the structure of the course week by week, the
+readings, weekly applied exercises and the associated data
+sets, coursepack and other materials, as well as a section on news and
+announcements.</p>
+</div>
+<div id="notes-on-studying-for-the-course" class="section level4 unnumbered hasAnchor">
+<h4>Notes on studying for the course<a href="index.html#notes-on-studying-for-the-course" class="anchor-section" aria-label="Anchor link to header"></a></h4>
+<p>To learn the material from this course you must do the work every week
+since it is cumulative; if you miss a week or two
+there is a chance that you will struggle to catch up. Also bear in mind that
+most people cannot learn quantitative techniques passively by just
+watching the lectures and reading the occasional chapter in a
+textbook. To learn statistics you have to do it; there are no shortcuts.
+Thus in addition to a lecture there will be a weekly applied exercise
+(in which you do some data analysis and interpretation
+using R/RStudio - instructions will be provided).
+Doing the exercises and discussing them in the weekly class
+is the best way to make sure you have understood and can apply what was covered
+in the lectures. If you are having any trouble this will reveal what the
+problem is. Thus the course is designed to have multiple, reinforcing
+ways of helping you learn this material.</p>
+</div>
+<div id="examinationsassessment" class="section level4 unnumbered hasAnchor">
+<h4>Examinations/assessment<a href="index.html#examinationsassessment" class="anchor-section" aria-label="Anchor link to header"></a></h4>
+<p>There will be a <strong>two-hour examination in January in the LENT Term</strong>.
+The exam will be completed online during a three hour window.
+Examination papers from previous years are available for
+revision purposes at the LSE library web site. 2018-19 was the first year that
+MY464 has existed, but the past exams for MY451 provide a good guide to the kinds of
+questions that we ask. Exams vary from year to year. Some questions closely follow
+questions that you will have answered in the homeworks or have seen on past exam papers.
+Other require you to apply the principles you have learned in new ways. Students should
+understand that past examinations should only be used as rough guides to
+the types of questions that are likely to appear on the examination.</p>
+</div>
+<div id="computing" class="section level4 unnumbered hasAnchor">
+<h4>Computing<a href="index.html#computing" class="anchor-section" aria-label="Anchor link to header"></a></h4>
+<p>Students must know their Username and Password in time for the first
+applied exercise in week 1. This information can be obtained from IT Help Desk (Library, 1st
+floor). The software package used for MY464 is R/RStudio, which will be introduced in the first
+applied exercise in week 1.</p>
+</div>
+<div id="software-availability" class="section level4 unnumbered hasAnchor">
+<h4>Software availability<a href="index.html#software-availability" class="anchor-section" aria-label="Anchor link to header"></a></h4>
+<p>R/RStudio is free to download and does not require a licence.</p>
+</div>
+<div id="feedback" class="section level4 unnumbered hasAnchor">
+<h4>Feedback<a href="index.html#feedback" class="anchor-section" aria-label="Anchor link to header"></a></h4>
+<p>We welcome any comments you have on the course. If there are any
+problems that we can deal with, we will attempt to do so as quickly as
+possible. Speak to any member of the course team, or to your
+departmental supervisor if you feel that would be easier for you. Also
+please let us know if you find any errors or omissions in the
+coursepack, so that we can correct them.</p>
+</div>
+<div id="acknowledgements" class="section level4 unnumbered hasAnchor">
+<h4>Acknowledgements<a href="index.html#acknowledgements" class="anchor-section" aria-label="Anchor link to header"></a></h4>
+<p>This coursepack bears many traces of previous materials and all of their
+authors, Colm O’Muircheartaigh, Colin Mills, Matt Mulford, Fiona Steele,
+Paul Mitchell, Sally Stares, Jouni Kuha, and Ben Lauderdale.
+Many thanks to Farimah Daftary, Sue Howard, Jon Jackson, Paul Mitchell,
+Indraneel Sircar, and many students of previous years for comments and
+suggestions which are incorporated in the current revision.</p>
+<div style="page-break-after: always;"></div>
+</div>
+<div id="course-programme" class="section level4 unnumbered hasAnchor">
+<h4>Course Programme<a href="index.html#course-programme" class="anchor-section" aria-label="Anchor link to header"></a></h4>
+<table>
+<colgroup>
+<col width="19%" />
+<col width="80%" />
+</colgroup>
+<tbody>
+<tr class="odd">
+<td align="left"><strong>Week 1</strong></td>
+<td align="left"><br />
+</td>
+</tr>
+<tr class="even">
+<td align="left">Lecture</td>
+<td align="left">Course overview and organisation. Introduction to basic concepts</td>
+</tr>
+<tr class="odd">
+<td align="left">Exercise</td>
+<td align="left">Familiarisation with R/RStudio (no seminar week 1)</td>
+</tr>
+<tr class="even">
+<td align="left">Coursepack</td>
+<td align="left">Chapter <a href="c-intro.html#c-intro">1</a></td>
+</tr>
+<tr class="odd">
+<td align="left"><strong>Week 2</strong></td>
+<td align="left"></td>
+</tr>
+<tr class="even">
+<td align="left">Lecture</td>
+<td align="left">Descriptive statistics for categorical variables</td>
+</tr>
+<tr class="odd">
+<td align="left">Exercise/seminar</td>
+<td align="left">Loading data into R/RStudio, descriptive statistics</td>
+</tr>
+<tr class="even">
+<td align="left">Coursepack</td>
+<td align="left">Sections <a href="c-descr1.html#s-descr1-intro">2.1</a>–<a href="c-descr1.html#s-descr1-2cat">2.4</a> and <a href="c-descr1.html#s-descr1-presentation">2.8</a></td>
+</tr>
+<tr class="odd">
+<td align="left"><strong>Week 3</strong></td>
+<td align="left"></td>
+</tr>
+<tr class="even">
+<td align="left">Lecture</td>
+<td align="left">Descriptive statistics for continuous variables</td>
+</tr>
+<tr class="odd">
+<td align="left">Exercise/seminar</td>
+<td align="left">Descriptive statistics for categorical variables</td>
+</tr>
+<tr class="even">
+<td align="left">Coursepack</td>
+<td align="left">Sections <a href="c-descr1.html#s-descr1-1cont">2.5</a>–<a href="c-descr1.html#s-descr1-2cont">2.7</a></td>
+</tr>
+<tr class="odd">
+<td align="left"><strong>Week 4</strong></td>
+<td align="left"></td>
+</tr>
+<tr class="even">
+<td align="left">Lecture</td>
+<td align="left">Analysis of two-way contingency tables</td>
+</tr>
+<tr class="odd">
+<td align="left">Exercise/seminar</td>
+<td align="left">Descriptive statistics for continuous variables</td>
+</tr>
+<tr class="even">
+<td align="left">Coursepack</td>
+<td align="left">Chapters <a href="c-samples.html#c-samples">3</a> and <a href="c-tables.html#c-tables">4</a></td>
+</tr>
+<tr class="odd">
+<td align="left"><strong>Week 5</strong></td>
+<td align="left"></td>
+</tr>
+<tr class="even">
+<td align="left">Lecture</td>
+<td align="left">Inference for means in two populations</td>
+</tr>
+<tr class="odd">
+<td align="left">Exercise/seminar</td>
+<td align="left">Analysis of two-way contingency tables</td>
+</tr>
+<tr class="even">
+<td align="left">Coursepack</td>
+<td align="left">Chapters <a href="c-contd.html#c-contd">6</a> and <a href="c-means.html#c-means">7</a></td>
+</tr>
+<tr class="odd">
+<td align="left"><strong>Week 6</strong></td>
+<td align="left"></td>
+</tr>
+<tr class="even">
+<td align="left">Reading Week</td>
+<td align="left"><strong>No lecture, no exercise/seminar</strong></td>
+</tr>
+<tr class="odd">
+<td align="left"><strong>Week 7</strong></td>
+<td align="left"></td>
+</tr>
+<tr class="even">
+<td align="left">Lecture</td>
+<td align="left">Inference for proportions in one and two populations</td>
+</tr>
+<tr class="odd">
+<td align="left">Exercise/seminar</td>
+<td align="left">Inference for means in two populations</td>
+</tr>
+<tr class="even">
+<td align="left">Coursepack</td>
+<td align="left">Chapter <a href="c-probs.html#c-probs">5</a></td>
+</tr>
+<tr class="odd">
+<td align="left"><strong>Week 8</strong></td>
+<td align="left"></td>
+</tr>
+<tr class="even">
+<td align="left">Lecture</td>
+<td align="left">Correlation and simple linear regression as descriptive methods</td>
+</tr>
+<tr class="odd">
+<td align="left">Exercise/seminar</td>
+<td align="left">Inference for proportions in one and two populations</td>
+</tr>
+<tr class="even">
+<td align="left">Coursepack</td>
+<td align="left">Sections <a href="c-regression.html#s-regression-intro">8.1</a>–<a href="c-regression.html#ss-regression-simple-est">8.3.4</a></td>
+</tr>
+<tr class="odd">
+<td align="left"><strong>Week 9</strong></td>
+<td align="left"></td>
+</tr>
+<tr class="even">
+<td align="left">Lecture</td>
+<td align="left">Inference for the simple linear regression model, 3-way contingency tables</td>
+</tr>
+<tr class="odd">
+<td align="left">Exercise/seminar</td>
+<td align="left">Correlation and simple linear regression</td>
+</tr>
+<tr class="even">
+<td align="left">Coursepack</td>
+<td align="left">Section <a href="c-regression.html#ss-regression-simple-inf">8.3.5</a> (Hour 1); Section <a href="c-regression.html#s-regression-causality">8.4</a> and Chapter <a href="c-3waytables.html#c-3waytables">9</a> (Hour 2)</td>
+</tr>
+<tr class="odd">
+<td align="left"><strong>Week 10</strong></td>
+<td align="left"></td>
+</tr>
+<tr class="even">
+<td align="left">Lecture</td>
+<td align="left">Multiple linear regression</td>
+</tr>
+<tr class="odd">
+<td align="left">Exercise/seminar</td>
+<td align="left">More on linear regression</td>
+</tr>
+<tr class="even">
+<td align="left">Coursepack</td>
+<td align="left">Sections <a href="c-regression.html#s-regression-multiple">8.5</a>–<a href="c-regression.html#s-regression-rest">8.7</a></td>
+</tr>
+<tr class="odd">
+<td align="left"><strong>Week 11</strong></td>
+<td align="left"></td>
+</tr>
+<tr class="even">
+<td align="left">Lecture</td>
+<td align="left">Review and exam preparation</td>
+</tr>
+<tr class="odd">
+<td align="left">Exercise/seminar</td>
+<td align="left">Multiple linear regression</td>
+</tr>
+<tr class="even">
+<td align="left">Coursepack</td>
+<td align="left">Chapter <a href="c-more.html#c-more">10</a></td>
+</tr>
+</tbody>
+</table>
+<div style="page-break-after: always;"></div>
+</div>
+<div id="faq-frequently-asked-questions" class="section level4 unnumbered hasAnchor">
+<h4>FAQ: Frequently Asked Questions<a href="index.html#faq-frequently-asked-questions" class="anchor-section" aria-label="Anchor link to header"></a></h4>
+<p><strong>Why do we use R/RStudio? I’ve heard that SAS/STATA/MINITAB/SPSS/LIMDEP is
+better.</strong> At this level it does not matter which program you use since
+we are learning standard procedures that are common to all programs. In
+favour of R/RStudio is that it is free, flexible and extremely powerful.</p>
+<p><strong>Can I get a copy of the R/RStudio software to use on my home computer?</strong>
+Yes, this will be explained in weeks 1 and 2 applied exercises and classes.</p>
+<p><strong>I’m taking MY464 because I want to learn how to use R/RStudio but we don’t
+seem to learn very much about the program. Why is that?</strong> MY464 is not a
+course about learning to use R/RStudio. We use the program merely to
+facilitate data analysis and interpretation. Some options for learning
+more about R/RStudio will be mentioned in the first lecture.</p>
+<p><strong>I’m taking MY464 to help me analyse data for my dissertation. Can I
+discuss my data and my specific problems with the lecturers?</strong> Yes, but
+not during the course. Staff of the Department of Methodology will be
+happy to talk to you about problems specific to your dissertation during
+the weekly sessions of the Methodology Surgery (see the website of the
+department for more information).</p>
+<p><strong>Does the coursepack contain everything I need to know for the exam?</strong>
+Yes. However, you will stand by far the best chance in the exam if you
+also attend the lectures, where the lecturers emphasise and explain the
+key parts of the material.</p>
+<p><strong>The lecturer introduced some material that was not in the coursepack.
+Do I need to know that material?</strong> This is almost certainly an illusion.
+The lectures will not introduce any genuinely new material not included
+in the course pack. However, sometimes the lecturer may of course use
+different words or a different example to further explain some topic.
+Copies of the most relevant notes displayed at the lectures will be
+posted in the MY464 Moodle site. All of the material required for the
+exam is contained in the coursepack, with the posted lecture notes as
+additional clarification.</p>
+<p><strong>Can I work together on the applied exercises with my friends?</strong> Yes, we
+positively encourage you to discuss the exercises with your
+colleagues. If you do this, please make sure you complete the multiple-choice
+quiz yourself.</p>
+<p><strong>I’m not registered at the LSE but at another University of London
+college. Can I attend this course?</strong> Normally yes, but you will have to
+complete an intercollegiate enrolment form.</p>
+<p><strong>I would like to audit the course without taking the exam. Is that
+OK?</strong> Yes, you are welcome to attend the lectures providing you are an
+LSE/University of London student and there is room for you.</p>
+<p><strong>MY464 is not challenging enough for me. Is there a more difficult
+course?</strong> Yes, MY452 and numerous other courses offered by the
+Department of Methodology and the Statistics department.</p>
+
+
+</div>
+</div>
+            </section>
+
+          </div>
+        </div>
+      </div>
+
+<a href="c-intro.html" class="navigation navigation-next navigation-unique" aria-label="Next page"><i class="fa fa-angle-right"></i></a>
+    </div>
+  </div>
+<script src="libs/gitbook-2.6.7/js/app.min.js"></script>
+<script src="libs/gitbook-2.6.7/js/clipboard.min.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-search.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-sharing.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-fontsettings.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-bookdown.js"></script>
+<script src="libs/gitbook-2.6.7/js/jquery.highlight.js"></script>
+<script src="libs/gitbook-2.6.7/js/plugin-clipboard.js"></script>
+<script>
+gitbook.require(["gitbook"], function(gitbook) {
+gitbook.start({
+"sharing": {
+"github": false,
+"facebook": true,
+"twitter": true,
+"linkedin": false,
+"weibo": false,
+"instapaper": false,
+"vk": false,
+"whatsapp": false,
+"all": ["facebook", "twitter", "linkedin", "weibo", "instapaper"]
+},
+"fontsettings": {
+"theme": "white",
+"family": "sans",
+"size": 2
+},
+"edit": {
+"link": "https://github.com/LSE-Methodology/MY464/edit/master/index.Rmd",
+"text": "Edit"
+},
+"history": {
+"link": null,
+"text": null
+},
+"view": {
+"link": null,
+"text": null
+},
+"download": ["Coursepack-MY464.pdf", "Coursepack-MY464.epub"],
+"search": {
+"engine": "fuse",
+"options": null
+},
+"toc": {
+"collapse": "section"
+}
+});
+});
+</script>
+
+<!-- dynamically load mathjax for compatibility with self-contained -->
+<script>
+  (function () {
+    var script = document.createElement("script");
+    script.type = "text/javascript";
+    var src = "true";
+    if (src === "" || src === "true") src = "https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.9/latest.js?config=TeX-MML-AM_CHTML";
+    if (location.protocol !== "file:")
+      if (/^https?:/.test(src))
+        src = src.replace(/^https?:/, '');
+    script.src = src;
+    document.getElementsByTagName("head")[0].appendChild(script);
+  })();
+</script>
+</body>
+
+</html>
diff --git a/libs/anchor-sections-1.1.0/anchor-sections-hash.css b/libs/anchor-sections-1.1.0/anchor-sections-hash.css
new file mode 100644
index 0000000..b563ec9
--- /dev/null
+++ b/libs/anchor-sections-1.1.0/anchor-sections-hash.css
@@ -0,0 +1,2 @@
+/* Styles for section anchors */
+a.anchor-section::before {content: '#';font-size: 80%;}
diff --git a/libs/anchor-sections-1.1.0/anchor-sections.css b/libs/anchor-sections-1.1.0/anchor-sections.css
new file mode 100644
index 0000000..041905f
--- /dev/null
+++ b/libs/anchor-sections-1.1.0/anchor-sections.css
@@ -0,0 +1,4 @@
+/* Styles for section anchors */
+a.anchor-section {margin-left: 10px; visibility: hidden; color: inherit;}
+.hasAnchor:hover a.anchor-section {visibility: visible;}
+ul > li > .anchor-section {display: none;}
diff --git a/libs/anchor-sections-1.1.0/anchor-sections.js b/libs/anchor-sections-1.1.0/anchor-sections.js
new file mode 100644
index 0000000..fee005d
--- /dev/null
+++ b/libs/anchor-sections-1.1.0/anchor-sections.js
@@ -0,0 +1,11 @@
+document.addEventListener('DOMContentLoaded', function () {
+  // If section divs is used, we need to put the anchor in the child header
+  const headers = document.querySelectorAll("div.hasAnchor.section[class*='level'] > :first-child")
+
+  headers.forEach(function (x) {
+    // Add to the header node
+    if (!x.classList.contains('hasAnchor')) x.classList.add('hasAnchor')
+    // Remove from the section or div created by Pandoc
+    x.parentElement.classList.remove('hasAnchor')
+  })
+})
diff --git a/libs/gitbook-2.6.7/css/fontawesome/fontawesome-webfont.ttf b/libs/gitbook-2.6.7/css/fontawesome/fontawesome-webfont.ttf
new file mode 100644
index 0000000..35acda2
Binary files /dev/null and b/libs/gitbook-2.6.7/css/fontawesome/fontawesome-webfont.ttf differ
diff --git a/libs/gitbook-2.6.7/css/plugin-bookdown.css b/libs/gitbook-2.6.7/css/plugin-bookdown.css
new file mode 100644
index 0000000..ab7c20e
--- /dev/null
+++ b/libs/gitbook-2.6.7/css/plugin-bookdown.css
@@ -0,0 +1,105 @@
+.book .book-header h1 {
+  padding-left: 20px;
+  padding-right: 20px;
+}
+.book .book-header.fixed {
+  position: fixed;
+  right: 0;
+  top: 0;
+  left: 0;
+  border-bottom: 1px solid rgba(0,0,0,.07);
+}
+span.search-highlight {
+  background-color: #ffff88;
+}
+@media (min-width: 600px) {
+  .book.with-summary .book-header.fixed {
+    left: 300px;
+  }
+}
+@media (max-width: 1240px) {
+  .book .book-body.fixed {
+    top: 50px;
+  }
+  .book .book-body.fixed .body-inner {
+    top: auto;
+  }
+}
+@media (max-width: 600px) {
+  .book.with-summary .book-header.fixed {
+    left: calc(100% - 60px);
+    min-width: 300px;
+  }
+  .book.with-summary .book-body {
+    transform: none;
+    left: calc(100% - 60px);
+    min-width: 300px;
+  }
+  .book .book-body.fixed {
+    top: 0;
+  }
+}
+
+.book .book-body.fixed .body-inner {
+  top: 50px;
+}
+.book .book-body .page-wrapper .page-inner section.normal sub, .book .book-body .page-wrapper .page-inner section.normal sup {
+  font-size: 85%;
+}
+
+@media print {
+  .book .book-summary, .book .book-body .book-header, .fa {
+    display: none !important;
+  }
+  .book .book-body.fixed {
+    left: 0px;
+  }
+  .book .book-body,.book .book-body .body-inner, .book.with-summary {
+    overflow: visible !important;
+  }
+}
+.kable_wrapper {
+  border-spacing: 20px 0;
+  border-collapse: separate;
+  border: none;
+  margin: auto;
+}
+.kable_wrapper > tbody > tr > td {
+  vertical-align: top;
+}
+.book .book-body .page-wrapper .page-inner section.normal table tr.header {
+  border-top-width: 2px;
+}
+.book .book-body .page-wrapper .page-inner section.normal table tr:last-child td {
+  border-bottom-width: 2px;
+}
+.book .book-body .page-wrapper .page-inner section.normal table td, .book .book-body .page-wrapper .page-inner section.normal table th {
+  border-left: none;
+  border-right: none;
+}
+.book .book-body .page-wrapper .page-inner section.normal table.kable_wrapper > tbody > tr, .book .book-body .page-wrapper .page-inner section.normal table.kable_wrapper > tbody > tr > td {
+  border-top: none;
+}
+.book .book-body .page-wrapper .page-inner section.normal table.kable_wrapper > tbody > tr:last-child > td {
+    border-bottom: none;
+}
+
+div.theorem, div.lemma, div.corollary, div.proposition, div.conjecture {
+  font-style: italic;
+}
+span.theorem, span.lemma, span.corollary, span.proposition, span.conjecture {
+  font-style: normal;
+}
+div.proof>*:last-child:after {
+  content: "\25a2";
+  float: right;
+}
+.header-section-number {
+  padding-right: .5em;
+}
+#header .multi-author {
+  margin: 0.5em 0 -0.5em 0;
+}
+#header .date {
+  margin-top: 1.5em;
+}
diff --git a/libs/gitbook-2.6.7/css/plugin-clipboard.css b/libs/gitbook-2.6.7/css/plugin-clipboard.css
new file mode 100644
index 0000000..6844a70
--- /dev/null
+++ b/libs/gitbook-2.6.7/css/plugin-clipboard.css
@@ -0,0 +1,18 @@
+div.sourceCode {
+  position: relative;
+}
+
+.copy-to-clipboard-button {
+  position: absolute;
+  right: 0;
+  top: 0;
+  visibility: hidden;
+}
+
+.copy-to-clipboard-button:focus {
+  outline: 0;
+}
+
+div.sourceCode:hover > .copy-to-clipboard-button {
+  visibility: visible;
+}
diff --git a/libs/gitbook-2.6.7/css/plugin-fontsettings.css b/libs/gitbook-2.6.7/css/plugin-fontsettings.css
new file mode 100644
index 0000000..3fa6f35
--- /dev/null
+++ b/libs/gitbook-2.6.7/css/plugin-fontsettings.css
@@ -0,0 +1,303 @@
+/*
+ * Theme 1
+ */
+.color-theme-1 .dropdown-menu {
+  background-color: #111111;
+  border-color: #7e888b;
+}
+.color-theme-1 .dropdown-menu .dropdown-caret .caret-inner {
+  border-bottom: 9px solid #111111;
+}
+.color-theme-1 .dropdown-menu .buttons {
+  border-color: #7e888b;
+}
+.color-theme-1 .dropdown-menu .button {
+  color: #afa790;
+}
+.color-theme-1 .dropdown-menu .button:hover {
+  color: #73553c;
+}
+/*
+ * Theme 2
+ */
+.color-theme-2 .dropdown-menu {
+  background-color: #2d3143;
+  border-color: #272a3a;
+}
+.color-theme-2 .dropdown-menu .dropdown-caret .caret-inner {
+  border-bottom: 9px solid #2d3143;
+}
+.color-theme-2 .dropdown-menu .buttons {
+  border-color: #272a3a;
+}
+.color-theme-2 .dropdown-menu .button {
+  color: #62677f;
+}
+.color-theme-2 .dropdown-menu .button:hover {
+  color: #f4f4f5;
+}
+.book .book-header .font-settings .font-enlarge {
+  line-height: 30px;
+  font-size: 1.4em;
+}
+.book .book-header .font-settings .font-reduce {
+  line-height: 30px;
+  font-size: 1em;
+}
+
+/* sidebar transition background */
+div.book.color-theme-1 {
+  background: #f3eacb;
+}
+.book.color-theme-1 .book-body {
+  color: #704214;
+  background: #f3eacb;
+}
+.book.color-theme-1 .book-body .page-wrapper .page-inner section {
+  background: #f3eacb;
+}
+
+/* sidebar transition background */
+div.book.color-theme-2 {
+  background: #1c1f2b;
+}
+
+.book.color-theme-2 .book-body {
+  color: #bdcadb;
+  background: #1c1f2b;
+}
+.book.color-theme-2 .book-body .page-wrapper .page-inner section {
+  background: #1c1f2b;
+}
+.book.font-size-0 .book-body .page-inner section {
+  font-size: 1.2rem;
+}
+.book.font-size-1 .book-body .page-inner section {
+  font-size: 1.4rem;
+}
+.book.font-size-2 .book-body .page-inner section {
+  font-size: 1.6rem;
+}
+.book.font-size-3 .book-body .page-inner section {
+  font-size: 2.2rem;
+}
+.book.font-size-4 .book-body .page-inner section {
+  font-size: 4rem;
+}
+.book.font-family-0 {
+  font-family: Georgia, serif;
+}
+.book.font-family-1 {
+  font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
+}
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal {
+  color: #704214;
+}
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal a {
+  color: inherit;
+}
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal h1,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal h2,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal h3,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal h4,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal h5,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal h6 {
+  color: inherit;
+}
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal h1,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal h2 {
+  border-color: inherit;
+}
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal h6 {
+  color: inherit;
+}
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal hr {
+  background-color: inherit;
+}
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal blockquote {
+  border-color: #c4b29f;
+  opacity: 0.9;
+}
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code {
+  background: #fdf6e3;
+  color: #657b83;
+  border-color: #f8df9c;
+}
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal .highlight {
+  background-color: inherit;
+}
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal table th,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal table td {
+  border-color: #f5d06c;
+}
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal table tr {
+  color: inherit;
+  background-color: #fdf6e3;
+  border-color: #444444;
+}
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal table tr:nth-child(2n) {
+  background-color: #fbeecb;
+}
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal {
+  color: #bdcadb;
+}
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal a {
+  color: #3eb1d0;
+}
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal h1,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal h2,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal h3,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal h4,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal h5,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal h6 {
+  color: #fffffa;
+}
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal h1,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal h2 {
+  border-color: #373b4e;
+}
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal h6 {
+  color: #373b4e;
+}
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal hr {
+  background-color: #373b4e;
+}
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal blockquote {
+  border-color: #373b4e;
+}
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code {
+  color: #9dbed8;
+  background: #2d3143;
+  border-color: #2d3143;
+}
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal .highlight {
+  background-color: #282a39;
+}
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal table th,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal table td {
+  border-color: #3b3f54;
+}
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal table tr {
+  color: #b6c2d2;
+  background-color: #2d3143;
+  border-color: #3b3f54;
+}
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal table tr:nth-child(2n) {
+  background-color: #35394b;
+}
+.book.color-theme-1 .book-header {
+  color: #afa790;
+  background: transparent;
+}
+.book.color-theme-1 .book-header .btn {
+  color: #afa790;
+}
+.book.color-theme-1 .book-header .btn:hover {
+  color: #73553c;
+  background: none;
+}
+.book.color-theme-1 .book-header h1 {
+  color: #704214;
+}
+.book.color-theme-2 .book-header {
+  color: #7e888b;
+  background: transparent;
+}
+.book.color-theme-2 .book-header .btn {
+  color: #3b3f54;
+}
+.book.color-theme-2 .book-header .btn:hover {
+  color: #fffff5;
+  background: none;
+}
+.book.color-theme-2 .book-header h1 {
+  color: #bdcadb;
+}
+.book.color-theme-1 .book-body .navigation {
+  color: #afa790;
+}
+.book.color-theme-1 .book-body .navigation:hover {
+  color: #73553c;
+}
+.book.color-theme-2 .book-body .navigation {
+  color: #383f52;
+}
+.book.color-theme-2 .book-body .navigation:hover {
+  color: #fffff5;
+}
+/*
+ * Theme 1
+ */
+.book.color-theme-1 .book-summary {
+  color: #afa790;
+  background: #111111;
+  border-right: 1px solid rgba(0, 0, 0, 0.07);
+}
+.book.color-theme-1 .book-summary .book-search {
+  background: transparent;
+}
+.book.color-theme-1 .book-summary .book-search input,
+.book.color-theme-1 .book-summary .book-search input:focus {
+  border: 1px solid transparent;
+}
+.book.color-theme-1 .book-summary ul.summary li.divider {
+  background: #7e888b;
+  box-shadow: none;
+}
+.book.color-theme-1 .book-summary ul.summary li i.fa-check {
+  color: #33cc33;
+}
+.book.color-theme-1 .book-summary ul.summary li.done > a {
+  color: #877f6a;
+}
+.book.color-theme-1 .book-summary ul.summary li a,
+.book.color-theme-1 .book-summary ul.summary li span {
+  color: #877f6a;
+  background: transparent;
+  font-weight: normal;
+}
+.book.color-theme-1 .book-summary ul.summary li.active > a,
+.book.color-theme-1 .book-summary ul.summary li a:hover {
+  color: #704214;
+  background: transparent;
+  font-weight: normal;
+}
+/*
+ * Theme 2
+ */
+.book.color-theme-2 .book-summary {
+  color: #bcc1d2;
+  background: #2d3143;
+  border-right: none;
+}
+.book.color-theme-2 .book-summary .book-search {
+  background: transparent;
+}
+.book.color-theme-2 .book-summary .book-search input,
+.book.color-theme-2 .book-summary .book-search input:focus {
+  border: 1px solid transparent;
+}
+.book.color-theme-2 .book-summary ul.summary li.divider {
+  background: #272a3a;
+  box-shadow: none;
+}
+.book.color-theme-2 .book-summary ul.summary li i.fa-check {
+  color: #33cc33;
+}
+.book.color-theme-2 .book-summary ul.summary li.done > a {
+  color: #62687f;
+}
+.book.color-theme-2 .book-summary ul.summary li a,
+.book.color-theme-2 .book-summary ul.summary li span {
+  color: #c1c6d7;
+  background: transparent;
+  font-weight: 600;
+}
+.book.color-theme-2 .book-summary ul.summary li.active > a,
+.book.color-theme-2 .book-summary ul.summary li a:hover {
+  color: #f4f4f5;
+  background: #252737;
+  font-weight: 600;
+}
diff --git a/libs/gitbook-2.6.7/css/plugin-highlight.css b/libs/gitbook-2.6.7/css/plugin-highlight.css
new file mode 100644
index 0000000..02c0189
--- /dev/null
+++ b/libs/gitbook-2.6.7/css/plugin-highlight.css
@@ -0,0 +1,426 @@
+.book .book-body .page-wrapper .page-inner section.normal pre,
+.book .book-body .page-wrapper .page-inner section.normal code {
+  /* http://jmblog.github.com/color-themes-for-google-code-highlightjs */
+  /* Tomorrow Comment */
+  /* Tomorrow Red */
+  /* Tomorrow Orange */
+  /* Tomorrow Yellow */
+  /* Tomorrow Green */
+  /* Tomorrow Aqua */
+  /* Tomorrow Blue */
+  /* Tomorrow Purple */
+}
+.book .book-body .page-wrapper .page-inner section.normal pre .hljs-comment,
+.book .book-body .page-wrapper .page-inner section.normal code .hljs-comment,
+.book .book-body .page-wrapper .page-inner section.normal pre .hljs-title,
+.book .book-body .page-wrapper .page-inner section.normal code .hljs-title {
+  color: #8e908c;
+}
+.book .book-body .page-wrapper .page-inner section.normal pre .hljs-variable,
+.book .book-body .page-wrapper .page-inner section.normal code .hljs-variable,
+.book .book-body .page-wrapper .page-inner section.normal pre .hljs-attribute,
+.book .book-body .page-wrapper .page-inner section.normal code .hljs-attribute,
+.book .book-body .page-wrapper .page-inner section.normal pre .hljs-tag,
+.book .book-body .page-wrapper .page-inner section.normal code .hljs-tag,
+.book .book-body .page-wrapper .page-inner section.normal pre .hljs-regexp,
+.book .book-body .page-wrapper .page-inner section.normal code .hljs-regexp,
+.book .book-body .page-wrapper .page-inner section.normal pre .ruby .hljs-constant,
+.book .book-body .page-wrapper .page-inner section.normal code .ruby .hljs-constant,
+.book .book-body .page-wrapper .page-inner section.normal pre .xml .hljs-tag .hljs-title,
+.book .book-body .page-wrapper .page-inner section.normal code .xml .hljs-tag .hljs-title,
+.book .book-body .page-wrapper .page-inner section.normal pre .xml .hljs-pi,
+.book .book-body .page-wrapper .page-inner section.normal code .xml .hljs-pi,
+.book .book-body .page-wrapper .page-inner section.normal pre .xml .hljs-doctype,
+.book .book-body .page-wrapper .page-inner section.normal code .xml .hljs-doctype,
+.book .book-body .page-wrapper .page-inner section.normal pre .html .hljs-doctype,
+.book .book-body .page-wrapper .page-inner section.normal code .html .hljs-doctype,
+.book .book-body .page-wrapper .page-inner section.normal pre .css .hljs-id,
+.book .book-body .page-wrapper .page-inner section.normal code .css .hljs-id,
+.book .book-body .page-wrapper .page-inner section.normal pre .css .hljs-class,
+.book .book-body .page-wrapper .page-inner section.normal code .css .hljs-class,
+.book .book-body .page-wrapper .page-inner section.normal pre .css .hljs-pseudo,
+.book .book-body .page-wrapper .page-inner section.normal code .css .hljs-pseudo {
+  color: #c82829;
+}
+.book .book-body .page-wrapper .page-inner section.normal pre .hljs-number,
+.book .book-body .page-wrapper .page-inner section.normal code .hljs-number,
+.book .book-body .page-wrapper .page-inner section.normal pre .hljs-preprocessor,
+.book .book-body .page-wrapper .page-inner section.normal code .hljs-preprocessor,
+.book .book-body .page-wrapper .page-inner section.normal pre .hljs-pragma,
+.book .book-body .page-wrapper .page-inner section.normal code .hljs-pragma,
+.book .book-body .page-wrapper .page-inner section.normal pre .hljs-built_in,
+.book .book-body .page-wrapper .page-inner section.normal code .hljs-built_in,
+.book .book-body .page-wrapper .page-inner section.normal pre .hljs-literal,
+.book .book-body .page-wrapper .page-inner section.normal code .hljs-literal,
+.book .book-body .page-wrapper .page-inner section.normal pre .hljs-params,
+.book .book-body .page-wrapper .page-inner section.normal code .hljs-params,
+.book .book-body .page-wrapper .page-inner section.normal pre .hljs-constant,
+.book .book-body .page-wrapper .page-inner section.normal code .hljs-constant {
+  color: #f5871f;
+}
+.book .book-body .page-wrapper .page-inner section.normal pre .ruby .hljs-class .hljs-title,
+.book .book-body .page-wrapper .page-inner section.normal code .ruby .hljs-class .hljs-title,
+.book .book-body .page-wrapper .page-inner section.normal pre .css .hljs-rules .hljs-attribute,
+.book .book-body .page-wrapper .page-inner section.normal code .css .hljs-rules .hljs-attribute {
+  color: #eab700;
+}
+.book .book-body .page-wrapper .page-inner section.normal pre .hljs-string,
+.book .book-body .page-wrapper .page-inner section.normal code .hljs-string,
+.book .book-body .page-wrapper .page-inner section.normal pre .hljs-value,
+.book .book-body .page-wrapper .page-inner section.normal code .hljs-value,
+.book .book-body .page-wrapper .page-inner section.normal pre .hljs-inheritance,
+.book .book-body .page-wrapper .page-inner section.normal code .hljs-inheritance,
+.book .book-body .page-wrapper .page-inner section.normal pre .hljs-header,
+.book .book-body .page-wrapper .page-inner section.normal code .hljs-header,
+.book .book-body .page-wrapper .page-inner section.normal pre .ruby .hljs-symbol,
+.book .book-body .page-wrapper .page-inner section.normal code .ruby .hljs-symbol,
+.book .book-body .page-wrapper .page-inner section.normal pre .xml .hljs-cdata,
+.book .book-body .page-wrapper .page-inner section.normal code .xml .hljs-cdata {
+  color: #718c00;
+}
+.book .book-body .page-wrapper .page-inner section.normal pre .css .hljs-hexcolor,
+.book .book-body .page-wrapper .page-inner section.normal code .css .hljs-hexcolor {
+  color: #3e999f;
+}
+.book .book-body .page-wrapper .page-inner section.normal pre .hljs-function,
+.book .book-body .page-wrapper .page-inner section.normal code .hljs-function,
+.book .book-body .page-wrapper .page-inner section.normal pre .python .hljs-decorator,
+.book .book-body .page-wrapper .page-inner section.normal code .python .hljs-decorator,
+.book .book-body .page-wrapper .page-inner section.normal pre .python .hljs-title,
+.book .book-body .page-wrapper .page-inner section.normal code .python .hljs-title,
+.book .book-body .page-wrapper .page-inner section.normal pre .ruby .hljs-function .hljs-title,
+.book .book-body .page-wrapper .page-inner section.normal code .ruby .hljs-function .hljs-title,
+.book .book-body .page-wrapper .page-inner section.normal pre .ruby .hljs-title .hljs-keyword,
+.book .book-body .page-wrapper .page-inner section.normal code .ruby .hljs-title .hljs-keyword,
+.book .book-body .page-wrapper .page-inner section.normal pre .perl .hljs-sub,
+.book .book-body .page-wrapper .page-inner section.normal code .perl .hljs-sub,
+.book .book-body .page-wrapper .page-inner section.normal pre .javascript .hljs-title,
+.book .book-body .page-wrapper .page-inner section.normal code .javascript .hljs-title,
+.book .book-body .page-wrapper .page-inner section.normal pre .coffeescript .hljs-title,
+.book .book-body .page-wrapper .page-inner section.normal code .coffeescript .hljs-title {
+  color: #4271ae;
+}
+.book .book-body .page-wrapper .page-inner section.normal pre .hljs-keyword,
+.book .book-body .page-wrapper .page-inner section.normal code .hljs-keyword,
+.book .book-body .page-wrapper .page-inner section.normal pre .javascript .hljs-function,
+.book .book-body .page-wrapper .page-inner section.normal code .javascript .hljs-function {
+  color: #8959a8;
+}
+.book .book-body .page-wrapper .page-inner section.normal pre .hljs,
+.book .book-body .page-wrapper .page-inner section.normal code .hljs {
+  display: block;
+  background: white;
+  color: #4d4d4c;
+  padding: 0.5em;
+}
+.book .book-body .page-wrapper .page-inner section.normal pre .coffeescript .javascript,
+.book .book-body .page-wrapper .page-inner section.normal code .coffeescript .javascript,
+.book .book-body .page-wrapper .page-inner section.normal pre .javascript .xml,
+.book .book-body .page-wrapper .page-inner section.normal code .javascript .xml,
+.book .book-body .page-wrapper .page-inner section.normal pre .tex .hljs-formula,
+.book .book-body .page-wrapper .page-inner section.normal code .tex .hljs-formula,
+.book .book-body .page-wrapper .page-inner section.normal pre .xml .javascript,
+.book .book-body .page-wrapper .page-inner section.normal code .xml .javascript,
+.book .book-body .page-wrapper .page-inner section.normal pre .xml .vbscript,
+.book .book-body .page-wrapper .page-inner section.normal code .xml .vbscript,
+.book .book-body .page-wrapper .page-inner section.normal pre .xml .css,
+.book .book-body .page-wrapper .page-inner section.normal code .xml .css,
+.book .book-body .page-wrapper .page-inner section.normal pre .xml .hljs-cdata,
+.book .book-body .page-wrapper .page-inner section.normal code .xml .hljs-cdata {
+  opacity: 0.5;
+}
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code {
+  /*
+
+Original Style from ethanschoonover.com/solarized (c) Jeremy Hull <sourdrums@gmail.com>
+
+*/
+  /* Solarized Green */
+  /* Solarized Cyan */
+  /* Solarized Blue */
+  /* Solarized Yellow */
+  /* Solarized Orange */
+  /* Solarized Red */
+  /* Solarized Violet */
+}
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs {
+  display: block;
+  padding: 0.5em;
+  background: #fdf6e3;
+  color: #657b83;
+}
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-comment,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-comment,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-template_comment,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-template_comment,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .diff .hljs-header,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .diff .hljs-header,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-doctype,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-doctype,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-pi,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-pi,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .lisp .hljs-string,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .lisp .hljs-string,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-javadoc,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-javadoc {
+  color: #93a1a1;
+}
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-keyword,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-keyword,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-winutils,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-winutils,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .method,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .method,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-addition,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-addition,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .css .hljs-tag,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .css .hljs-tag,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-request,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-request,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-status,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-status,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .nginx .hljs-title,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .nginx .hljs-title {
+  color: #859900;
+}
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-number,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-number,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-command,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-command,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-string,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-string,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-tag .hljs-value,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-tag .hljs-value,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-rules .hljs-value,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-rules .hljs-value,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-phpdoc,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-phpdoc,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .tex .hljs-formula,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .tex .hljs-formula,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-regexp,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-regexp,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-hexcolor,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-hexcolor,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-link_url,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-link_url {
+  color: #2aa198;
+}
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-title,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-title,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-localvars,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-localvars,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-chunk,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-chunk,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-decorator,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-decorator,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-built_in,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-built_in,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-identifier,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-identifier,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .vhdl .hljs-literal,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .vhdl .hljs-literal,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-id,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-id,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .css .hljs-function,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .css .hljs-function {
+  color: #268bd2;
+}
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-attribute,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-attribute,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-variable,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-variable,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .lisp .hljs-body,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .lisp .hljs-body,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .smalltalk .hljs-number,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .smalltalk .hljs-number,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-constant,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-constant,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-class .hljs-title,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-class .hljs-title,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-parent,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-parent,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .haskell .hljs-type,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .haskell .hljs-type,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-link_reference,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-link_reference {
+  color: #b58900;
+}
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-preprocessor,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-preprocessor,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-preprocessor .hljs-keyword,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-preprocessor .hljs-keyword,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-pragma,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-pragma,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-shebang,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-shebang,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-symbol,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-symbol,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-symbol .hljs-string,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-symbol .hljs-string,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .diff .hljs-change,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .diff .hljs-change,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-special,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-special,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-attr_selector,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-attr_selector,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-subst,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-subst,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-cdata,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-cdata,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .clojure .hljs-title,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .clojure .hljs-title,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .css .hljs-pseudo,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .css .hljs-pseudo,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-header,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-header {
+  color: #cb4b16;
+}
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-deletion,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-deletion,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-important,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-important {
+  color: #dc322f;
+}
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-link_label,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-link_label {
+  color: #6c71c4;
+}
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .tex .hljs-formula,
+.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .tex .hljs-formula {
+  background: #eee8d5;
+}
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code {
+  /* Tomorrow Night Bright Theme */
+  /* Original theme - https://github.com/chriskempson/tomorrow-theme */
+  /* http://jmblog.github.com/color-themes-for-google-code-highlightjs */
+  /* Tomorrow Comment */
+  /* Tomorrow Red */
+  /* Tomorrow Orange */
+  /* Tomorrow Yellow */
+  /* Tomorrow Green */
+  /* Tomorrow Aqua */
+  /* Tomorrow Blue */
+  /* Tomorrow Purple */
+}
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .hljs-comment,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .hljs-comment,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .hljs-title,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .hljs-title {
+  color: #969896;
+}
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .hljs-variable,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .hljs-variable,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .hljs-attribute,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .hljs-attribute,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .hljs-tag,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .hljs-tag,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .hljs-regexp,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .hljs-regexp,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .ruby .hljs-constant,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .ruby .hljs-constant,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .xml .hljs-tag .hljs-title,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .xml .hljs-tag .hljs-title,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .xml .hljs-pi,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .xml .hljs-pi,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .xml .hljs-doctype,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .xml .hljs-doctype,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .html .hljs-doctype,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .html .hljs-doctype,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .css .hljs-id,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .css .hljs-id,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .css .hljs-class,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .css .hljs-class,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .css .hljs-pseudo,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .css .hljs-pseudo {
+  color: #d54e53;
+}
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .hljs-number,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .hljs-number,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .hljs-preprocessor,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .hljs-preprocessor,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .hljs-pragma,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .hljs-pragma,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .hljs-built_in,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .hljs-built_in,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .hljs-literal,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .hljs-literal,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .hljs-params,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .hljs-params,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .hljs-constant,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .hljs-constant {
+  color: #e78c45;
+}
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .ruby .hljs-class .hljs-title,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .ruby .hljs-class .hljs-title,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .css .hljs-rules .hljs-attribute,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .css .hljs-rules .hljs-attribute {
+  color: #e7c547;
+}
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .hljs-string,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .hljs-string,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .hljs-value,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .hljs-value,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .hljs-inheritance,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .hljs-inheritance,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .hljs-header,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .hljs-header,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .ruby .hljs-symbol,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .ruby .hljs-symbol,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .xml .hljs-cdata,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .xml .hljs-cdata {
+  color: #b9ca4a;
+}
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .css .hljs-hexcolor,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .css .hljs-hexcolor {
+  color: #70c0b1;
+}
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .hljs-function,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .hljs-function,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .python .hljs-decorator,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .python .hljs-decorator,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .python .hljs-title,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .python .hljs-title,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .ruby .hljs-function .hljs-title,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .ruby .hljs-function .hljs-title,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .ruby .hljs-title .hljs-keyword,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .ruby .hljs-title .hljs-keyword,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .perl .hljs-sub,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .perl .hljs-sub,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .javascript .hljs-title,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .javascript .hljs-title,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .coffeescript .hljs-title,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .coffeescript .hljs-title {
+  color: #7aa6da;
+}
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .hljs-keyword,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .hljs-keyword,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .javascript .hljs-function,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .javascript .hljs-function {
+  color: #c397d8;
+}
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .hljs,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .hljs {
+  display: block;
+  background: black;
+  color: #eaeaea;
+  padding: 0.5em;
+}
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .coffeescript .javascript,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .coffeescript .javascript,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .javascript .xml,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .javascript .xml,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .tex .hljs-formula,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .tex .hljs-formula,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .xml .javascript,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .xml .javascript,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .xml .vbscript,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .xml .vbscript,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .xml .css,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .xml .css,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .xml .hljs-cdata,
+.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .xml .hljs-cdata {
+  opacity: 0.5;
+}
diff --git a/libs/gitbook-2.6.7/css/plugin-search.css b/libs/gitbook-2.6.7/css/plugin-search.css
new file mode 100644
index 0000000..c85e557
--- /dev/null
+++ b/libs/gitbook-2.6.7/css/plugin-search.css
@@ -0,0 +1,31 @@
+.book .book-summary .book-search {
+  padding: 6px;
+  background: transparent;
+  position: absolute;
+  top: -50px;
+  left: 0px;
+  right: 0px;
+  transition: top 0.5s ease;
+}
+.book .book-summary .book-search input,
+.book .book-summary .book-search input:focus,
+.book .book-summary .book-search input:hover {
+  width: 100%;
+  background: transparent;
+  border: 1px solid #ccc;
+  box-shadow: none;
+  outline: none;
+  line-height: 22px;
+  padding: 7px 4px;
+  color: inherit;
+  box-sizing: border-box;
+}
+.book.with-search .book-summary .book-search {
+  top: 0px;
+}
+.book.with-search .book-summary ul.summary {
+  top: 50px;
+}
+.with-search .summary li[data-level] a[href*=".html#"] {
+  display: none;
+}
diff --git a/libs/gitbook-2.6.7/css/plugin-table.css b/libs/gitbook-2.6.7/css/plugin-table.css
new file mode 100644
index 0000000..7fba1b9
--- /dev/null
+++ b/libs/gitbook-2.6.7/css/plugin-table.css
@@ -0,0 +1 @@
+.book .book-body .page-wrapper .page-inner section.normal table{display:table;width:100%;border-collapse:collapse;border-spacing:0;overflow:auto}.book .book-body .page-wrapper .page-inner section.normal table td,.book .book-body .page-wrapper .page-inner section.normal table th{padding:6px 13px;border:1px solid #ddd}.book .book-body .page-wrapper .page-inner section.normal table tr{background-color:#fff;border-top:1px solid #ccc}.book .book-body .page-wrapper .page-inner section.normal table tr:nth-child(2n){background-color:#f8f8f8}.book .book-body .page-wrapper .page-inner section.normal table th{font-weight:700}
diff --git a/libs/gitbook-2.6.7/css/style.css b/libs/gitbook-2.6.7/css/style.css
new file mode 100644
index 0000000..cba69b2
--- /dev/null
+++ b/libs/gitbook-2.6.7/css/style.css
@@ -0,0 +1,13 @@
+/*! normalize.css v2.1.0 | MIT License | git.io/normalize */img,legend{border:0}*{-webkit-font-smoothing:antialiased}sub,sup{position:relative}.book .book-body .page-wrapper .page-inner section.normal hr:after,.book-langs-index .inner .languages:after,.buttons:after,.dropdown-menu .buttons:after{clear:both}body,html{-ms-text-size-adjust:100%;-webkit-text-size-adjust:100%}article,aside,details,figcaption,figure,footer,header,hgroup,main,nav,section,summary{display:block}audio,canvas,video{display:inline-block}.hidden,[hidden]{display:none}audio:not([controls]){display:none;height:0}html{font-family:sans-serif}body,figure{margin:0}a:focus{outline:dotted thin}a:active,a:hover{outline:0}h1{font-size:2em;margin:.67em 0}abbr[title]{border-bottom:1px dotted}b,strong{font-weight:700}dfn{font-style:italic}hr{-moz-box-sizing:content-box;box-sizing:content-box;height:0}mark{background:#ff0;color:#000}code,kbd,pre,samp{font-family:monospace,serif;font-size:1em}pre{white-space:pre-wrap}q{quotes:"\201C" "\201D" "\2018" "\2019"}small{font-size:80%}sub,sup{font-size:75%;line-height:0;vertical-align:baseline}sup{top:-.5em}sub{bottom:-.25em}svg:not(:root){overflow:hidden}fieldset{border:1px solid silver;margin:0 2px;padding:.35em .625em .75em}legend{padding:0}button,input,select,textarea{font-family:inherit;font-size:100%;margin:0}button,input{line-height:normal}button,select{text-transform:none}button,html input[type=button],input[type=reset],input[type=submit]{-webkit-appearance:button;cursor:pointer}button[disabled],html input[disabled]{cursor:default}input[type=checkbox],input[type=radio]{box-sizing:border-box;padding:0}input[type=search]{-webkit-appearance:textfield;-moz-box-sizing:content-box;-webkit-box-sizing:content-box;box-sizing:content-box}input[type=search]::-webkit-search-cancel-button{margin-right:10px;}button::-moz-focus-inner,input::-moz-focus-inner{border:0;padding:0}textarea{overflow:auto;vertical-align:top}table{border-collapse:collapse;border-spacing:0}/*!
+ * Preboot v2
+ *
+ * Open sourced under MIT license by @mdo.
+ * Some variables and mixins from Bootstrap (Apache 2 license).
+ */.link-inherit,.link-inherit:focus,.link-inherit:hover{color:inherit}/*!
+ *  Font Awesome 4.7.0 by @davegandy - http://fontawesome.io - @fontawesome
+ *  License - http://fontawesome.io/license (Font: SIL OFL 1.1, CSS: MIT License)
+ */@font-face{font-family:'FontAwesome';src:url('./fontawesome/fontawesome-webfont.ttf?v=4.7.0') format('truetype');font-weight:normal;font-style:normal}.fa{display:inline-block;font:normal normal normal 14px/1 FontAwesome;font-size:inherit;text-rendering:auto;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale}.fa-lg{font-size:1.33333333em;line-height:.75em;vertical-align:-15%}.fa-2x{font-size:2em}.fa-3x{font-size:3em}.fa-4x{font-size:4em}.fa-5x{font-size:5em}.fa-fw{width:1.28571429em;text-align:center}.fa-ul{padding-left:0;margin-left:2.14285714em;list-style-type:none}.fa-ul>li{position:relative}.fa-li{position:absolute;left:-2.14285714em;width:2.14285714em;top:.14285714em;text-align:center}.fa-li.fa-lg{left:-1.85714286em}.fa-border{padding:.2em .25em .15em;border:solid .08em #eee;border-radius:.1em}.fa-pull-left{float:left}.fa-pull-right{float:right}.fa.fa-pull-left{margin-right:.3em}.fa.fa-pull-right{margin-left:.3em}.pull-right{float:right}.pull-left{float:left}.fa.pull-left{margin-right:.3em}.fa.pull-right{margin-left:.3em}.fa-spin{-webkit-animation:fa-spin 2s infinite linear;animation:fa-spin 2s infinite linear}.fa-pulse{-webkit-animation:fa-spin 1s infinite steps(8);animation:fa-spin 1s infinite steps(8)}@-webkit-keyframes fa-spin{0%{-webkit-transform:rotate(0deg);transform:rotate(0deg)}100%{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}@keyframes fa-spin{0%{-webkit-transform:rotate(0deg);transform:rotate(0deg)}100%{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}.fa-rotate-90{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=1)";-webkit-transform:rotate(90deg);-ms-transform:rotate(90deg);transform:rotate(90deg)}.fa-rotate-180{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=2)";-webkit-transform:rotate(180deg);-ms-transform:rotate(180deg);transform:rotate(180deg)}.fa-rotate-270{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=3)";-webkit-transform:rotate(270deg);-ms-transform:rotate(270deg);transform:rotate(270deg)}.fa-flip-horizontal{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=0, mirror=1)";-webkit-transform:scale(-1, 1);-ms-transform:scale(-1, 1);transform:scale(-1, 1)}.fa-flip-vertical{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=2, mirror=1)";-webkit-transform:scale(1, -1);-ms-transform:scale(1, -1);transform:scale(1, -1)}:root .fa-rotate-90,:root .fa-rotate-180,:root .fa-rotate-270,:root .fa-flip-horizontal,:root .fa-flip-vertical{filter:none}.fa-stack{position:relative;display:inline-block;width:2em;height:2em;line-height:2em;vertical-align:middle}.fa-stack-1x,.fa-stack-2x{position:absolute;left:0;width:100%;text-align:center}.fa-stack-1x{line-height:inherit}.fa-stack-2x{font-size:2em}.fa-inverse{color:#fff}.fa-glass:before{content:"\f000"}.fa-music:before{content:"\f001"}.fa-search:before{content:"\f002"}.fa-envelope-o:before{content:"\f003"}.fa-heart:before{content:"\f004"}.fa-star:before{content:"\f005"}.fa-star-o:before{content:"\f006"}.fa-user:before{content:"\f007"}.fa-film:before{content:"\f008"}.fa-th-large:before{content:"\f009"}.fa-th:before{content:"\f00a"}.fa-th-list:before{content:"\f00b"}.fa-check:before{content:"\f00c"}.fa-remove:before,.fa-close:before,.fa-times:before{content:"\f00d"}.fa-search-plus:before{content:"\f00e"}.fa-search-minus:before{content:"\f010"}.fa-power-off:before{content:"\f011"}.fa-signal:before{content:"\f012"}.fa-gear:before,.fa-cog:before{content:"\f013"}.fa-trash-o:before{content:"\f014"}.fa-home:before{content:"\f015"}.fa-file-o:before{content:"\f016"}.fa-clock-o:before{content:"\f017"}.fa-road:before{content:"\f018"}.fa-download:before{content:"\f019"}.fa-arrow-circle-o-down:before{content:"\f01a"}.fa-arrow-circle-o-up:before{content:"\f01b"}.fa-inbox:before{content:"\f01c"}.fa-play-circle-o:before{content:"\f01d"}.fa-rotate-right:before,.fa-repeat:before{content:"\f01e"}.fa-refresh:before{content:"\f021"}.fa-list-alt:before{content:"\f022"}.fa-lock:before{content:"\f023"}.fa-flag:before{content:"\f024"}.fa-headphones:before{content:"\f025"}.fa-volume-off:before{content:"\f026"}.fa-volume-down:before{content:"\f027"}.fa-volume-up:before{content:"\f028"}.fa-qrcode:before{content:"\f029"}.fa-barcode:before{content:"\f02a"}.fa-tag:before{content:"\f02b"}.fa-tags:before{content:"\f02c"}.fa-book:before{content:"\f02d"}.fa-bookmark:before{content:"\f02e"}.fa-print:before{content:"\f02f"}.fa-camera:before{content:"\f030"}.fa-font:before{content:"\f031"}.fa-bold:before{content:"\f032"}.fa-italic:before{content:"\f033"}.fa-text-height:before{content:"\f034"}.fa-text-width:before{content:"\f035"}.fa-align-left:before{content:"\f036"}.fa-align-center:before{content:"\f037"}.fa-align-right:before{content:"\f038"}.fa-align-justify:before{content:"\f039"}.fa-list:before{content:"\f03a"}.fa-dedent:before,.fa-outdent:before{content:"\f03b"}.fa-indent:before{content:"\f03c"}.fa-video-camera:before{content:"\f03d"}.fa-photo:before,.fa-image:before,.fa-picture-o:before{content:"\f03e"}.fa-pencil:before{content:"\f040"}.fa-map-marker:before{content:"\f041"}.fa-adjust:before{content:"\f042"}.fa-tint:before{content:"\f043"}.fa-edit:before,.fa-pencil-square-o:before{content:"\f044"}.fa-share-square-o:before{content:"\f045"}.fa-check-square-o:before{content:"\f046"}.fa-arrows:before{content:"\f047"}.fa-step-backward:before{content:"\f048"}.fa-fast-backward:before{content:"\f049"}.fa-backward:before{content:"\f04a"}.fa-play:before{content:"\f04b"}.fa-pause:before{content:"\f04c"}.fa-stop:before{content:"\f04d"}.fa-forward:before{content:"\f04e"}.fa-fast-forward:before{content:"\f050"}.fa-step-forward:before{content:"\f051"}.fa-eject:before{content:"\f052"}.fa-chevron-left:before{content:"\f053"}.fa-chevron-right:before{content:"\f054"}.fa-plus-circle:before{content:"\f055"}.fa-minus-circle:before{content:"\f056"}.fa-times-circle:before{content:"\f057"}.fa-check-circle:before{content:"\f058"}.fa-question-circle:before{content:"\f059"}.fa-info-circle:before{content:"\f05a"}.fa-crosshairs:before{content:"\f05b"}.fa-times-circle-o:before{content:"\f05c"}.fa-check-circle-o:before{content:"\f05d"}.fa-ban:before{content:"\f05e"}.fa-arrow-left:before{content:"\f060"}.fa-arrow-right:before{content:"\f061"}.fa-arrow-up:before{content:"\f062"}.fa-arrow-down:before{content:"\f063"}.fa-mail-forward:before,.fa-share:before{content:"\f064"}.fa-expand:before{content:"\f065"}.fa-compress:before{content:"\f066"}.fa-plus:before{content:"\f067"}.fa-minus:before{content:"\f068"}.fa-asterisk:before{content:"\f069"}.fa-exclamation-circle:before{content:"\f06a"}.fa-gift:before{content:"\f06b"}.fa-leaf:before{content:"\f06c"}.fa-fire:before{content:"\f06d"}.fa-eye:before{content:"\f06e"}.fa-eye-slash:before{content:"\f070"}.fa-warning:before,.fa-exclamation-triangle:before{content:"\f071"}.fa-plane:before{content:"\f072"}.fa-calendar:before{content:"\f073"}.fa-random:before{content:"\f074"}.fa-comment:before{content:"\f075"}.fa-magnet:before{content:"\f076"}.fa-chevron-up:before{content:"\f077"}.fa-chevron-down:before{content:"\f078"}.fa-retweet:before{content:"\f079"}.fa-shopping-cart:before{content:"\f07a"}.fa-folder:before{content:"\f07b"}.fa-folder-open:before{content:"\f07c"}.fa-arrows-v:before{content:"\f07d"}.fa-arrows-h:before{content:"\f07e"}.fa-bar-chart-o:before,.fa-bar-chart:before{content:"\f080"}.fa-twitter-square:before{content:"\f081"}.fa-facebook-square:before{content:"\f082"}.fa-camera-retro:before{content:"\f083"}.fa-key:before{content:"\f084"}.fa-gears:before,.fa-cogs:before{content:"\f085"}.fa-comments:before{content:"\f086"}.fa-thumbs-o-up:before{content:"\f087"}.fa-thumbs-o-down:before{content:"\f088"}.fa-star-half:before{content:"\f089"}.fa-heart-o:before{content:"\f08a"}.fa-sign-out:before{content:"\f08b"}.fa-linkedin-square:before{content:"\f08c"}.fa-thumb-tack:before{content:"\f08d"}.fa-external-link:before{content:"\f08e"}.fa-sign-in:before{content:"\f090"}.fa-trophy:before{content:"\f091"}.fa-github-square:before{content:"\f092"}.fa-upload:before{content:"\f093"}.fa-lemon-o:before{content:"\f094"}.fa-phone:before{content:"\f095"}.fa-square-o:before{content:"\f096"}.fa-bookmark-o:before{content:"\f097"}.fa-phone-square:before{content:"\f098"}.fa-twitter:before{content:"\f099"}.fa-facebook-f:before,.fa-facebook:before{content:"\f09a"}.fa-github:before{content:"\f09b"}.fa-unlock:before{content:"\f09c"}.fa-credit-card:before{content:"\f09d"}.fa-feed:before,.fa-rss:before{content:"\f09e"}.fa-hdd-o:before{content:"\f0a0"}.fa-bullhorn:before{content:"\f0a1"}.fa-bell:before{content:"\f0f3"}.fa-certificate:before{content:"\f0a3"}.fa-hand-o-right:before{content:"\f0a4"}.fa-hand-o-left:before{content:"\f0a5"}.fa-hand-o-up:before{content:"\f0a6"}.fa-hand-o-down:before{content:"\f0a7"}.fa-arrow-circle-left:before{content:"\f0a8"}.fa-arrow-circle-right:before{content:"\f0a9"}.fa-arrow-circle-up:before{content:"\f0aa"}.fa-arrow-circle-down:before{content:"\f0ab"}.fa-globe:before{content:"\f0ac"}.fa-wrench:before{content:"\f0ad"}.fa-tasks:before{content:"\f0ae"}.fa-filter:before{content:"\f0b0"}.fa-briefcase:before{content:"\f0b1"}.fa-arrows-alt:before{content:"\f0b2"}.fa-group:before,.fa-users:before{content:"\f0c0"}.fa-chain:before,.fa-link:before{content:"\f0c1"}.fa-cloud:before{content:"\f0c2"}.fa-flask:before{content:"\f0c3"}.fa-cut:before,.fa-scissors:before{content:"\f0c4"}.fa-copy:before,.fa-files-o:before{content:"\f0c5"}.fa-paperclip:before{content:"\f0c6"}.fa-save:before,.fa-floppy-o:before{content:"\f0c7"}.fa-square:before{content:"\f0c8"}.fa-navicon:before,.fa-reorder:before,.fa-bars:before{content:"\f0c9"}.fa-list-ul:before{content:"\f0ca"}.fa-list-ol:before{content:"\f0cb"}.fa-strikethrough:before{content:"\f0cc"}.fa-underline:before{content:"\f0cd"}.fa-table:before{content:"\f0ce"}.fa-magic:before{content:"\f0d0"}.fa-truck:before{content:"\f0d1"}.fa-pinterest:before{content:"\f0d2"}.fa-pinterest-square:before{content:"\f0d3"}.fa-google-plus-square:before{content:"\f0d4"}.fa-google-plus:before{content:"\f0d5"}.fa-money:before{content:"\f0d6"}.fa-caret-down:before{content:"\f0d7"}.fa-caret-up:before{content:"\f0d8"}.fa-caret-left:before{content:"\f0d9"}.fa-caret-right:before{content:"\f0da"}.fa-columns:before{content:"\f0db"}.fa-unsorted:before,.fa-sort:before{content:"\f0dc"}.fa-sort-down:before,.fa-sort-desc:before{content:"\f0dd"}.fa-sort-up:before,.fa-sort-asc:before{content:"\f0de"}.fa-envelope:before{content:"\f0e0"}.fa-linkedin:before{content:"\f0e1"}.fa-rotate-left:before,.fa-undo:before{content:"\f0e2"}.fa-legal:before,.fa-gavel:before{content:"\f0e3"}.fa-dashboard:before,.fa-tachometer:before{content:"\f0e4"}.fa-comment-o:before{content:"\f0e5"}.fa-comments-o:before{content:"\f0e6"}.fa-flash:before,.fa-bolt:before{content:"\f0e7"}.fa-sitemap:before{content:"\f0e8"}.fa-umbrella:before{content:"\f0e9"}.fa-paste:before,.fa-clipboard:before{content:"\f0ea"}.fa-lightbulb-o:before{content:"\f0eb"}.fa-exchange:before{content:"\f0ec"}.fa-cloud-download:before{content:"\f0ed"}.fa-cloud-upload:before{content:"\f0ee"}.fa-user-md:before{content:"\f0f0"}.fa-stethoscope:before{content:"\f0f1"}.fa-suitcase:before{content:"\f0f2"}.fa-bell-o:before{content:"\f0a2"}.fa-coffee:before{content:"\f0f4"}.fa-cutlery:before{content:"\f0f5"}.fa-file-text-o:before{content:"\f0f6"}.fa-building-o:before{content:"\f0f7"}.fa-hospital-o:before{content:"\f0f8"}.fa-ambulance:before{content:"\f0f9"}.fa-medkit:before{content:"\f0fa"}.fa-fighter-jet:before{content:"\f0fb"}.fa-beer:before{content:"\f0fc"}.fa-h-square:before{content:"\f0fd"}.fa-plus-square:before{content:"\f0fe"}.fa-angle-double-left:before{content:"\f100"}.fa-angle-double-right:before{content:"\f101"}.fa-angle-double-up:before{content:"\f102"}.fa-angle-double-down:before{content:"\f103"}.fa-angle-left:before{content:"\f104"}.fa-angle-right:before{content:"\f105"}.fa-angle-up:before{content:"\f106"}.fa-angle-down:before{content:"\f107"}.fa-desktop:before{content:"\f108"}.fa-laptop:before{content:"\f109"}.fa-tablet:before{content:"\f10a"}.fa-mobile-phone:before,.fa-mobile:before{content:"\f10b"}.fa-circle-o:before{content:"\f10c"}.fa-quote-left:before{content:"\f10d"}.fa-quote-right:before{content:"\f10e"}.fa-spinner:before{content:"\f110"}.fa-circle:before{content:"\f111"}.fa-mail-reply:before,.fa-reply:before{content:"\f112"}.fa-github-alt:before{content:"\f113"}.fa-folder-o:before{content:"\f114"}.fa-folder-open-o:before{content:"\f115"}.fa-smile-o:before{content:"\f118"}.fa-frown-o:before{content:"\f119"}.fa-meh-o:before{content:"\f11a"}.fa-gamepad:before{content:"\f11b"}.fa-keyboard-o:before{content:"\f11c"}.fa-flag-o:before{content:"\f11d"}.fa-flag-checkered:before{content:"\f11e"}.fa-terminal:before{content:"\f120"}.fa-code:before{content:"\f121"}.fa-mail-reply-all:before,.fa-reply-all:before{content:"\f122"}.fa-star-half-empty:before,.fa-star-half-full:before,.fa-star-half-o:before{content:"\f123"}.fa-location-arrow:before{content:"\f124"}.fa-crop:before{content:"\f125"}.fa-code-fork:before{content:"\f126"}.fa-unlink:before,.fa-chain-broken:before{content:"\f127"}.fa-question:before{content:"\f128"}.fa-info:before{content:"\f129"}.fa-exclamation:before{content:"\f12a"}.fa-superscript:before{content:"\f12b"}.fa-subscript:before{content:"\f12c"}.fa-eraser:before{content:"\f12d"}.fa-puzzle-piece:before{content:"\f12e"}.fa-microphone:before{content:"\f130"}.fa-microphone-slash:before{content:"\f131"}.fa-shield:before{content:"\f132"}.fa-calendar-o:before{content:"\f133"}.fa-fire-extinguisher:before{content:"\f134"}.fa-rocket:before{content:"\f135"}.fa-maxcdn:before{content:"\f136"}.fa-chevron-circle-left:before{content:"\f137"}.fa-chevron-circle-right:before{content:"\f138"}.fa-chevron-circle-up:before{content:"\f139"}.fa-chevron-circle-down:before{content:"\f13a"}.fa-html5:before{content:"\f13b"}.fa-css3:before{content:"\f13c"}.fa-anchor:before{content:"\f13d"}.fa-unlock-alt:before{content:"\f13e"}.fa-bullseye:before{content:"\f140"}.fa-ellipsis-h:before{content:"\f141"}.fa-ellipsis-v:before{content:"\f142"}.fa-rss-square:before{content:"\f143"}.fa-play-circle:before{content:"\f144"}.fa-ticket:before{content:"\f145"}.fa-minus-square:before{content:"\f146"}.fa-minus-square-o:before{content:"\f147"}.fa-level-up:before{content:"\f148"}.fa-level-down:before{content:"\f149"}.fa-check-square:before{content:"\f14a"}.fa-pencil-square:before{content:"\f14b"}.fa-external-link-square:before{content:"\f14c"}.fa-share-square:before{content:"\f14d"}.fa-compass:before{content:"\f14e"}.fa-toggle-down:before,.fa-caret-square-o-down:before{content:"\f150"}.fa-toggle-up:before,.fa-caret-square-o-up:before{content:"\f151"}.fa-toggle-right:before,.fa-caret-square-o-right:before{content:"\f152"}.fa-euro:before,.fa-eur:before{content:"\f153"}.fa-gbp:before{content:"\f154"}.fa-dollar:before,.fa-usd:before{content:"\f155"}.fa-rupee:before,.fa-inr:before{content:"\f156"}.fa-cny:before,.fa-rmb:before,.fa-yen:before,.fa-jpy:before{content:"\f157"}.fa-ruble:before,.fa-rouble:before,.fa-rub:before{content:"\f158"}.fa-won:before,.fa-krw:before{content:"\f159"}.fa-bitcoin:before,.fa-btc:before{content:"\f15a"}.fa-file:before{content:"\f15b"}.fa-file-text:before{content:"\f15c"}.fa-sort-alpha-asc:before{content:"\f15d"}.fa-sort-alpha-desc:before{content:"\f15e"}.fa-sort-amount-asc:before{content:"\f160"}.fa-sort-amount-desc:before{content:"\f161"}.fa-sort-numeric-asc:before{content:"\f162"}.fa-sort-numeric-desc:before{content:"\f163"}.fa-thumbs-up:before{content:"\f164"}.fa-thumbs-down:before{content:"\f165"}.fa-youtube-square:before{content:"\f166"}.fa-youtube:before{content:"\f167"}.fa-xing:before{content:"\f168"}.fa-xing-square:before{content:"\f169"}.fa-youtube-play:before{content:"\f16a"}.fa-dropbox:before{content:"\f16b"}.fa-stack-overflow:before{content:"\f16c"}.fa-instagram:before{content:"\f16d"}.fa-flickr:before{content:"\f16e"}.fa-adn:before{content:"\f170"}.fa-bitbucket:before{content:"\f171"}.fa-bitbucket-square:before{content:"\f172"}.fa-tumblr:before{content:"\f173"}.fa-tumblr-square:before{content:"\f174"}.fa-long-arrow-down:before{content:"\f175"}.fa-long-arrow-up:before{content:"\f176"}.fa-long-arrow-left:before{content:"\f177"}.fa-long-arrow-right:before{content:"\f178"}.fa-apple:before{content:"\f179"}.fa-windows:before{content:"\f17a"}.fa-android:before{content:"\f17b"}.fa-linux:before{content:"\f17c"}.fa-dribbble:before{content:"\f17d"}.fa-skype:before{content:"\f17e"}.fa-foursquare:before{content:"\f180"}.fa-trello:before{content:"\f181"}.fa-female:before{content:"\f182"}.fa-male:before{content:"\f183"}.fa-gittip:before,.fa-gratipay:before{content:"\f184"}.fa-sun-o:before{content:"\f185"}.fa-moon-o:before{content:"\f186"}.fa-archive:before{content:"\f187"}.fa-bug:before{content:"\f188"}.fa-vk:before{content:"\f189"}.fa-weibo:before{content:"\f18a"}.fa-renren:before{content:"\f18b"}.fa-pagelines:before{content:"\f18c"}.fa-stack-exchange:before{content:"\f18d"}.fa-arrow-circle-o-right:before{content:"\f18e"}.fa-arrow-circle-o-left:before{content:"\f190"}.fa-toggle-left:before,.fa-caret-square-o-left:before{content:"\f191"}.fa-dot-circle-o:before{content:"\f192"}.fa-wheelchair:before{content:"\f193"}.fa-vimeo-square:before{content:"\f194"}.fa-turkish-lira:before,.fa-try:before{content:"\f195"}.fa-plus-square-o:before{content:"\f196"}.fa-space-shuttle:before{content:"\f197"}.fa-slack:before{content:"\f198"}.fa-envelope-square:before{content:"\f199"}.fa-wordpress:before{content:"\f19a"}.fa-openid:before{content:"\f19b"}.fa-institution:before,.fa-bank:before,.fa-university:before{content:"\f19c"}.fa-mortar-board:before,.fa-graduation-cap:before{content:"\f19d"}.fa-yahoo:before{content:"\f19e"}.fa-google:before{content:"\f1a0"}.fa-reddit:before{content:"\f1a1"}.fa-reddit-square:before{content:"\f1a2"}.fa-stumbleupon-circle:before{content:"\f1a3"}.fa-stumbleupon:before{content:"\f1a4"}.fa-delicious:before{content:"\f1a5"}.fa-digg:before{content:"\f1a6"}.fa-pied-piper-pp:before{content:"\f1a7"}.fa-pied-piper-alt:before{content:"\f1a8"}.fa-drupal:before{content:"\f1a9"}.fa-joomla:before{content:"\f1aa"}.fa-language:before{content:"\f1ab"}.fa-fax:before{content:"\f1ac"}.fa-building:before{content:"\f1ad"}.fa-child:before{content:"\f1ae"}.fa-paw:before{content:"\f1b0"}.fa-spoon:before{content:"\f1b1"}.fa-cube:before{content:"\f1b2"}.fa-cubes:before{content:"\f1b3"}.fa-behance:before{content:"\f1b4"}.fa-behance-square:before{content:"\f1b5"}.fa-steam:before{content:"\f1b6"}.fa-steam-square:before{content:"\f1b7"}.fa-recycle:before{content:"\f1b8"}.fa-automobile:before,.fa-car:before{content:"\f1b9"}.fa-cab:before,.fa-taxi:before{content:"\f1ba"}.fa-tree:before{content:"\f1bb"}.fa-spotify:before{content:"\f1bc"}.fa-deviantart:before{content:"\f1bd"}.fa-soundcloud:before{content:"\f1be"}.fa-database:before{content:"\f1c0"}.fa-file-pdf-o:before{content:"\f1c1"}.fa-file-word-o:before{content:"\f1c2"}.fa-file-excel-o:before{content:"\f1c3"}.fa-file-powerpoint-o:before{content:"\f1c4"}.fa-file-photo-o:before,.fa-file-picture-o:before,.fa-file-image-o:before{content:"\f1c5"}.fa-file-zip-o:before,.fa-file-archive-o:before{content:"\f1c6"}.fa-file-sound-o:before,.fa-file-audio-o:before{content:"\f1c7"}.fa-file-movie-o:before,.fa-file-video-o:before{content:"\f1c8"}.fa-file-code-o:before{content:"\f1c9"}.fa-vine:before{content:"\f1ca"}.fa-codepen:before{content:"\f1cb"}.fa-jsfiddle:before{content:"\f1cc"}.fa-life-bouy:before,.fa-life-buoy:before,.fa-life-saver:before,.fa-support:before,.fa-life-ring:before{content:"\f1cd"}.fa-circle-o-notch:before{content:"\f1ce"}.fa-ra:before,.fa-resistance:before,.fa-rebel:before{content:"\f1d0"}.fa-ge:before,.fa-empire:before{content:"\f1d1"}.fa-git-square:before{content:"\f1d2"}.fa-git:before{content:"\f1d3"}.fa-y-combinator-square:before,.fa-yc-square:before,.fa-hacker-news:before{content:"\f1d4"}.fa-tencent-weibo:before{content:"\f1d5"}.fa-qq:before{content:"\f1d6"}.fa-wechat:before,.fa-weixin:before{content:"\f1d7"}.fa-send:before,.fa-paper-plane:before{content:"\f1d8"}.fa-send-o:before,.fa-paper-plane-o:before{content:"\f1d9"}.fa-history:before{content:"\f1da"}.fa-circle-thin:before{content:"\f1db"}.fa-header:before{content:"\f1dc"}.fa-paragraph:before{content:"\f1dd"}.fa-sliders:before{content:"\f1de"}.fa-share-alt:before{content:"\f1e0"}.fa-share-alt-square:before{content:"\f1e1"}.fa-bomb:before{content:"\f1e2"}.fa-soccer-ball-o:before,.fa-futbol-o:before{content:"\f1e3"}.fa-tty:before{content:"\f1e4"}.fa-binoculars:before{content:"\f1e5"}.fa-plug:before{content:"\f1e6"}.fa-slideshare:before{content:"\f1e7"}.fa-twitch:before{content:"\f1e8"}.fa-yelp:before{content:"\f1e9"}.fa-newspaper-o:before{content:"\f1ea"}.fa-wifi:before{content:"\f1eb"}.fa-calculator:before{content:"\f1ec"}.fa-paypal:before{content:"\f1ed"}.fa-google-wallet:before{content:"\f1ee"}.fa-cc-visa:before{content:"\f1f0"}.fa-cc-mastercard:before{content:"\f1f1"}.fa-cc-discover:before{content:"\f1f2"}.fa-cc-amex:before{content:"\f1f3"}.fa-cc-paypal:before{content:"\f1f4"}.fa-cc-stripe:before{content:"\f1f5"}.fa-bell-slash:before{content:"\f1f6"}.fa-bell-slash-o:before{content:"\f1f7"}.fa-trash:before{content:"\f1f8"}.fa-copyright:before{content:"\f1f9"}.fa-at:before{content:"\f1fa"}.fa-eyedropper:before{content:"\f1fb"}.fa-paint-brush:before{content:"\f1fc"}.fa-birthday-cake:before{content:"\f1fd"}.fa-area-chart:before{content:"\f1fe"}.fa-pie-chart:before{content:"\f200"}.fa-line-chart:before{content:"\f201"}.fa-lastfm:before{content:"\f202"}.fa-lastfm-square:before{content:"\f203"}.fa-toggle-off:before{content:"\f204"}.fa-toggle-on:before{content:"\f205"}.fa-bicycle:before{content:"\f206"}.fa-bus:before{content:"\f207"}.fa-ioxhost:before{content:"\f208"}.fa-angellist:before{content:"\f209"}.fa-cc:before{content:"\f20a"}.fa-shekel:before,.fa-sheqel:before,.fa-ils:before{content:"\f20b"}.fa-meanpath:before{content:"\f20c"}.fa-buysellads:before{content:"\f20d"}.fa-connectdevelop:before{content:"\f20e"}.fa-dashcube:before{content:"\f210"}.fa-forumbee:before{content:"\f211"}.fa-leanpub:before{content:"\f212"}.fa-sellsy:before{content:"\f213"}.fa-shirtsinbulk:before{content:"\f214"}.fa-simplybuilt:before{content:"\f215"}.fa-skyatlas:before{content:"\f216"}.fa-cart-plus:before{content:"\f217"}.fa-cart-arrow-down:before{content:"\f218"}.fa-diamond:before{content:"\f219"}.fa-ship:before{content:"\f21a"}.fa-user-secret:before{content:"\f21b"}.fa-motorcycle:before{content:"\f21c"}.fa-street-view:before{content:"\f21d"}.fa-heartbeat:before{content:"\f21e"}.fa-venus:before{content:"\f221"}.fa-mars:before{content:"\f222"}.fa-mercury:before{content:"\f223"}.fa-intersex:before,.fa-transgender:before{content:"\f224"}.fa-transgender-alt:before{content:"\f225"}.fa-venus-double:before{content:"\f226"}.fa-mars-double:before{content:"\f227"}.fa-venus-mars:before{content:"\f228"}.fa-mars-stroke:before{content:"\f229"}.fa-mars-stroke-v:before{content:"\f22a"}.fa-mars-stroke-h:before{content:"\f22b"}.fa-neuter:before{content:"\f22c"}.fa-genderless:before{content:"\f22d"}.fa-facebook-official:before{content:"\f230"}.fa-pinterest-p:before{content:"\f231"}.fa-whatsapp:before{content:"\f232"}.fa-server:before{content:"\f233"}.fa-user-plus:before{content:"\f234"}.fa-user-times:before{content:"\f235"}.fa-hotel:before,.fa-bed:before{content:"\f236"}.fa-viacoin:before{content:"\f237"}.fa-train:before{content:"\f238"}.fa-subway:before{content:"\f239"}.fa-medium:before{content:"\f23a"}.fa-yc:before,.fa-y-combinator:before{content:"\f23b"}.fa-optin-monster:before{content:"\f23c"}.fa-opencart:before{content:"\f23d"}.fa-expeditedssl:before{content:"\f23e"}.fa-battery-4:before,.fa-battery:before,.fa-battery-full:before{content:"\f240"}.fa-battery-3:before,.fa-battery-three-quarters:before{content:"\f241"}.fa-battery-2:before,.fa-battery-half:before{content:"\f242"}.fa-battery-1:before,.fa-battery-quarter:before{content:"\f243"}.fa-battery-0:before,.fa-battery-empty:before{content:"\f244"}.fa-mouse-pointer:before{content:"\f245"}.fa-i-cursor:before{content:"\f246"}.fa-object-group:before{content:"\f247"}.fa-object-ungroup:before{content:"\f248"}.fa-sticky-note:before{content:"\f249"}.fa-sticky-note-o:before{content:"\f24a"}.fa-cc-jcb:before{content:"\f24b"}.fa-cc-diners-club:before{content:"\f24c"}.fa-clone:before{content:"\f24d"}.fa-balance-scale:before{content:"\f24e"}.fa-hourglass-o:before{content:"\f250"}.fa-hourglass-1:before,.fa-hourglass-start:before{content:"\f251"}.fa-hourglass-2:before,.fa-hourglass-half:before{content:"\f252"}.fa-hourglass-3:before,.fa-hourglass-end:before{content:"\f253"}.fa-hourglass:before{content:"\f254"}.fa-hand-grab-o:before,.fa-hand-rock-o:before{content:"\f255"}.fa-hand-stop-o:before,.fa-hand-paper-o:before{content:"\f256"}.fa-hand-scissors-o:before{content:"\f257"}.fa-hand-lizard-o:before{content:"\f258"}.fa-hand-spock-o:before{content:"\f259"}.fa-hand-pointer-o:before{content:"\f25a"}.fa-hand-peace-o:before{content:"\f25b"}.fa-trademark:before{content:"\f25c"}.fa-registered:before{content:"\f25d"}.fa-creative-commons:before{content:"\f25e"}.fa-gg:before{content:"\f260"}.fa-gg-circle:before{content:"\f261"}.fa-tripadvisor:before{content:"\f262"}.fa-odnoklassniki:before{content:"\f263"}.fa-odnoklassniki-square:before{content:"\f264"}.fa-get-pocket:before{content:"\f265"}.fa-wikipedia-w:before{content:"\f266"}.fa-safari:before{content:"\f267"}.fa-chrome:before{content:"\f268"}.fa-firefox:before{content:"\f269"}.fa-opera:before{content:"\f26a"}.fa-internet-explorer:before{content:"\f26b"}.fa-tv:before,.fa-television:before{content:"\f26c"}.fa-contao:before{content:"\f26d"}.fa-500px:before{content:"\f26e"}.fa-amazon:before{content:"\f270"}.fa-calendar-plus-o:before{content:"\f271"}.fa-calendar-minus-o:before{content:"\f272"}.fa-calendar-times-o:before{content:"\f273"}.fa-calendar-check-o:before{content:"\f274"}.fa-industry:before{content:"\f275"}.fa-map-pin:before{content:"\f276"}.fa-map-signs:before{content:"\f277"}.fa-map-o:before{content:"\f278"}.fa-map:before{content:"\f279"}.fa-commenting:before{content:"\f27a"}.fa-commenting-o:before{content:"\f27b"}.fa-houzz:before{content:"\f27c"}.fa-vimeo:before{content:"\f27d"}.fa-black-tie:before{content:"\f27e"}.fa-fonticons:before{content:"\f280"}.fa-reddit-alien:before{content:"\f281"}.fa-edge:before{content:"\f282"}.fa-credit-card-alt:before{content:"\f283"}.fa-codiepie:before{content:"\f284"}.fa-modx:before{content:"\f285"}.fa-fort-awesome:before{content:"\f286"}.fa-usb:before{content:"\f287"}.fa-product-hunt:before{content:"\f288"}.fa-mixcloud:before{content:"\f289"}.fa-scribd:before{content:"\f28a"}.fa-pause-circle:before{content:"\f28b"}.fa-pause-circle-o:before{content:"\f28c"}.fa-stop-circle:before{content:"\f28d"}.fa-stop-circle-o:before{content:"\f28e"}.fa-shopping-bag:before{content:"\f290"}.fa-shopping-basket:before{content:"\f291"}.fa-hashtag:before{content:"\f292"}.fa-bluetooth:before{content:"\f293"}.fa-bluetooth-b:before{content:"\f294"}.fa-percent:before{content:"\f295"}.fa-gitlab:before{content:"\f296"}.fa-wpbeginner:before{content:"\f297"}.fa-wpforms:before{content:"\f298"}.fa-envira:before{content:"\f299"}.fa-universal-access:before{content:"\f29a"}.fa-wheelchair-alt:before{content:"\f29b"}.fa-question-circle-o:before{content:"\f29c"}.fa-blind:before{content:"\f29d"}.fa-audio-description:before{content:"\f29e"}.fa-volume-control-phone:before{content:"\f2a0"}.fa-braille:before{content:"\f2a1"}.fa-assistive-listening-systems:before{content:"\f2a2"}.fa-asl-interpreting:before,.fa-american-sign-language-interpreting:before{content:"\f2a3"}.fa-deafness:before,.fa-hard-of-hearing:before,.fa-deaf:before{content:"\f2a4"}.fa-glide:before{content:"\f2a5"}.fa-glide-g:before{content:"\f2a6"}.fa-signing:before,.fa-sign-language:before{content:"\f2a7"}.fa-low-vision:before{content:"\f2a8"}.fa-viadeo:before{content:"\f2a9"}.fa-viadeo-square:before{content:"\f2aa"}.fa-snapchat:before{content:"\f2ab"}.fa-snapchat-ghost:before{content:"\f2ac"}.fa-snapchat-square:before{content:"\f2ad"}.fa-pied-piper:before{content:"\f2ae"}.fa-first-order:before{content:"\f2b0"}.fa-yoast:before{content:"\f2b1"}.fa-themeisle:before{content:"\f2b2"}.fa-google-plus-circle:before,.fa-google-plus-official:before{content:"\f2b3"}.fa-fa:before,.fa-font-awesome:before{content:"\f2b4"}.fa-handshake-o:before{content:"\f2b5"}.fa-envelope-open:before{content:"\f2b6"}.fa-envelope-open-o:before{content:"\f2b7"}.fa-linode:before{content:"\f2b8"}.fa-address-book:before{content:"\f2b9"}.fa-address-book-o:before{content:"\f2ba"}.fa-vcard:before,.fa-address-card:before{content:"\f2bb"}.fa-vcard-o:before,.fa-address-card-o:before{content:"\f2bc"}.fa-user-circle:before{content:"\f2bd"}.fa-user-circle-o:before{content:"\f2be"}.fa-user-o:before{content:"\f2c0"}.fa-id-badge:before{content:"\f2c1"}.fa-drivers-license:before,.fa-id-card:before{content:"\f2c2"}.fa-drivers-license-o:before,.fa-id-card-o:before{content:"\f2c3"}.fa-quora:before{content:"\f2c4"}.fa-free-code-camp:before{content:"\f2c5"}.fa-telegram:before{content:"\f2c6"}.fa-thermometer-4:before,.fa-thermometer:before,.fa-thermometer-full:before{content:"\f2c7"}.fa-thermometer-3:before,.fa-thermometer-three-quarters:before{content:"\f2c8"}.fa-thermometer-2:before,.fa-thermometer-half:before{content:"\f2c9"}.fa-thermometer-1:before,.fa-thermometer-quarter:before{content:"\f2ca"}.fa-thermometer-0:before,.fa-thermometer-empty:before{content:"\f2cb"}.fa-shower:before{content:"\f2cc"}.fa-bathtub:before,.fa-s15:before,.fa-bath:before{content:"\f2cd"}.fa-podcast:before{content:"\f2ce"}.fa-window-maximize:before{content:"\f2d0"}.fa-window-minimize:before{content:"\f2d1"}.fa-window-restore:before{content:"\f2d2"}.fa-times-rectangle:before,.fa-window-close:before{content:"\f2d3"}.fa-times-rectangle-o:before,.fa-window-close-o:before{content:"\f2d4"}.fa-bandcamp:before{content:"\f2d5"}.fa-grav:before{content:"\f2d6"}.fa-etsy:before{content:"\f2d7"}.fa-imdb:before{content:"\f2d8"}.fa-ravelry:before{content:"\f2d9"}.fa-eercast:before{content:"\f2da"}.fa-microchip:before{content:"\f2db"}.fa-snowflake-o:before{content:"\f2dc"}.fa-superpowers:before{content:"\f2dd"}.fa-wpexplorer:before{content:"\f2de"}.fa-meetup:before{content:"\f2e0"}.sr-only{position:absolute;width:1px;height:1px;padding:0;margin:-1px;overflow:hidden;clip:rect(0, 0, 0, 0);border:0}.sr-only-focusable:active,.sr-only-focusable:focus{position:static;width:auto;height:auto;margin:0;overflow:visible;clip:auto}
+.book .book-header,.book .book-summary{font-family:"Helvetica Neue",Helvetica,Arial,sans-serif}.book-langs-index{width:100%;height:100%;padding:40px 0;margin:0;overflow:auto}@media (max-width:600px){.book-langs-index{padding:0}}.book-langs-index .inner{max-width:600px;width:100%;margin:0 auto;padding:30px;background:#fff;border-radius:3px}.book-langs-index .inner h3{margin:0}.book-langs-index .inner .languages{list-style:none;padding:20px 30px;margin-top:20px;border-top:1px solid #eee}.book-langs-index .inner .languages:after,.book-langs-index .inner .languages:before{content:" ";display:table;line-height:0}.book-langs-index .inner .languages li{width:50%;float:left;padding:10px 5px;font-size:16px}@media (max-width:600px){.book-langs-index .inner .languages li{width:100%;max-width:100%}}.book .book-header{overflow:visible;height:50px;padding:0 8px;z-index:2;font-size:.85em;color:#7e888b;background:0 0}.book .book-header .btn{display:block;height:50px;padding:0 15px;border-bottom:none;color:#ccc;text-transform:uppercase;line-height:50px;-webkit-box-shadow:none!important;box-shadow:none!important;position:relative;font-size:14px}.book .book-header .btn:hover{position:relative;text-decoration:none;color:#444;background:0 0}.book .book-header h1{margin:0;font-size:20px;font-weight:200;text-align:center;line-height:50px;opacity:0;padding-left:200px;padding-right:200px;-webkit-transition:opacity .2s ease;-moz-transition:opacity .2s ease;-o-transition:opacity .2s ease;transition:opacity .2s ease;overflow:hidden;text-overflow:ellipsis;white-space:nowrap}.book .book-header h1 a,.book .book-header h1 a:hover{color:inherit;text-decoration:none}@media (max-width:1000px){.book .book-header h1{display:none}}.book .book-header h1 i{display:none}.book .book-header:hover h1{opacity:1}.book.is-loading .book-header h1 i{display:inline-block}.book.is-loading .book-header h1 a{display:none}.dropdown{position:relative}.dropdown-menu{position:absolute;top:100%;left:0;z-index:100;display:none;float:left;min-width:160px;padding:0;margin:2px 0 0;list-style:none;font-size:14px;background-color:#fafafa;border:1px solid rgba(0,0,0,.07);border-radius:1px;-webkit-box-shadow:0 6px 12px rgba(0,0,0,.175);box-shadow:0 6px 12px rgba(0,0,0,.175);background-clip:padding-box}.dropdown-menu.open{display:block}.dropdown-menu.dropdown-left{left:auto;right:4%}.dropdown-menu.dropdown-left .dropdown-caret{right:14px;left:auto}.dropdown-menu .dropdown-caret{position:absolute;top:-8px;left:14px;width:18px;height:10px;float:left;overflow:hidden}.dropdown-menu .dropdown-caret .caret-inner,.dropdown-menu .dropdown-caret .caret-outer{display:inline-block;top:0;border-left:9px solid transparent;border-right:9px solid transparent;position:absolute}.dropdown-menu .dropdown-caret .caret-outer{border-bottom:9px solid rgba(0,0,0,.1);height:auto;left:0;width:auto;margin-left:-1px}.dropdown-menu .dropdown-caret .caret-inner{margin-top:-1px;top:1px;border-bottom:9px solid #fafafa}.dropdown-menu .buttons{border-bottom:1px solid rgba(0,0,0,.07)}.dropdown-menu .buttons:after,.dropdown-menu .buttons:before{content:" ";display:table;line-height:0}.dropdown-menu .buttons:last-child{border-bottom:none}.dropdown-menu .buttons .button{border:0;background-color:transparent;color:#a6a6a6;width:100%;text-align:center;float:left;line-height:1.42857143;padding:8px 4px}.alert,.dropdown-menu .buttons .button:hover{color:#444}.dropdown-menu .buttons .button:focus,.dropdown-menu .buttons .button:hover{outline:0}.dropdown-menu .buttons .button.size-2{width:50%}.dropdown-menu .buttons .button.size-3{width:33%}.alert{padding:15px;margin-bottom:20px;background:#eee;border-bottom:5px solid #ddd}.alert-success{background:#dff0d8;border-color:#d6e9c6;color:#3c763d}.alert-info{background:#d9edf7;border-color:#bce8f1;color:#31708f}.alert-danger{background:#f2dede;border-color:#ebccd1;color:#a94442}.alert-warning{background:#fcf8e3;border-color:#faebcc;color:#8a6d3b}.book .book-summary{position:absolute;top:0;left:-300px;bottom:0;z-index:1;width:300px;color:#364149;background:#fafafa;border-right:1px solid rgba(0,0,0,.07);-webkit-transition:left 250ms ease;-moz-transition:left 250ms ease;-o-transition:left 250ms ease;transition:left 250ms ease}.book .book-summary ul.summary{position:absolute;top:0;left:0;right:0;bottom:0;overflow-y:auto;list-style:none;margin:0;padding:0;-webkit-transition:top .5s ease;-moz-transition:top .5s ease;-o-transition:top .5s ease;transition:top .5s ease}.book .book-summary ul.summary li{list-style:none}.book .book-summary ul.summary li.divider{height:1px;margin:7px 0;overflow:hidden;background:rgba(0,0,0,.07)}.book .book-summary ul.summary li i.fa-check{display:none;position:absolute;right:9px;top:16px;font-size:9px;color:#3c3}.book .book-summary ul.summary li.done>a{color:#364149;font-weight:400}.book .book-summary ul.summary li.done>a i{display:inline}.book .book-summary ul.summary li a,.book .book-summary ul.summary li span{display:block;padding:10px 15px;border-bottom:none;color:#364149;background:0 0;text-overflow:ellipsis;overflow:hidden;white-space:nowrap;position:relative}.book .book-summary ul.summary li span{cursor:not-allowed;opacity:.3;filter:alpha(opacity=30)}.book .book-summary ul.summary li a:hover,.book .book-summary ul.summary li.active>a{color:#008cff;background:0 0;text-decoration:none}.book .book-summary ul.summary li ul{padding-left:20px}@media (max-width:600px){.book .book-summary{width:calc(100% - 60px);bottom:0;left:-100%}}.book.with-summary .book-summary{left:0}.book.without-animation .book-summary{-webkit-transition:none!important;-moz-transition:none!important;-o-transition:none!important;transition:none!important}.book{position:relative;width:100%;height:100%}.book .book-body,.book .book-body .body-inner{position:absolute;top:0;left:0;overflow-y:auto;bottom:0;right:0}.book .book-body{color:#000;background:#fff;-webkit-transition:left 250ms ease;-moz-transition:left 250ms ease;-o-transition:left 250ms ease;transition:left 250ms ease}.book .book-body .page-wrapper{position:relative;outline:0}.book .book-body .page-wrapper .page-inner{max-width:800px;margin:0 auto;padding:20px 0 40px}.book .book-body .page-wrapper .page-inner section{margin:0;padding:5px 15px;background:#fff;border-radius:2px;line-height:1.7;font-size:1.6rem}.book .book-body .page-wrapper .page-inner .btn-group .btn{border-radius:0;background:#eee;border:0}@media (max-width:1240px){.book .book-body{-webkit-transition:-webkit-transform 250ms ease;-moz-transition:-moz-transform 250ms ease;-o-transition:-o-transform 250ms ease;transition:transform 250ms ease;padding-bottom:20px}.book .book-body .body-inner{position:static;min-height:calc(100% - 50px)}}@media (min-width:600px){.book.with-summary .book-body{left:300px}}@media (max-width:600px){.book.with-summary{overflow:hidden}.book.with-summary .book-body{-webkit-transform:translate(calc(100% - 60px),0);-moz-transform:translate(calc(100% - 60px),0);-ms-transform:translate(calc(100% - 60px),0);-o-transform:translate(calc(100% - 60px),0);transform:translate(calc(100% - 60px),0)}}.book.without-animation .book-body{-webkit-transition:none!important;-moz-transition:none!important;-o-transition:none!important;transition:none!important}.buttons:after,.buttons:before{content:" ";display:table;line-height:0}.button{border:0;background:#eee;color:#666;width:100%;text-align:center;float:left;line-height:1.42857143;padding:8px 4px}.button:hover{color:#444}.button:focus,.button:hover{outline:0}.button.size-2{width:50%}.button.size-3{width:33%}.book .book-body .page-wrapper .page-inner section{display:none}.book .book-body .page-wrapper .page-inner section.normal{display:block;word-wrap:break-word;overflow:hidden;color:#333;line-height:1.7;text-size-adjust:100%;-ms-text-size-adjust:100%;-webkit-text-size-adjust:100%;-moz-text-size-adjust:100%}.book .book-body .page-wrapper .page-inner section.normal *{box-sizing:border-box;-webkit-box-sizing:border-box;}.book .book-body .page-wrapper .page-inner section.normal>:first-child{margin-top:0!important}.book .book-body .page-wrapper .page-inner section.normal>:last-child{margin-bottom:0!important}.book .book-body .page-wrapper .page-inner section.normal blockquote,.book .book-body .page-wrapper .page-inner section.normal code,.book .book-body .page-wrapper .page-inner section.normal figure,.book .book-body .page-wrapper .page-inner section.normal img,.book .book-body .page-wrapper .page-inner section.normal pre,.book .book-body .page-wrapper .page-inner section.normal table,.book .book-body .page-wrapper .page-inner section.normal tr{page-break-inside:avoid}.book .book-body .page-wrapper .page-inner section.normal h2,.book .book-body .page-wrapper .page-inner section.normal h3,.book .book-body .page-wrapper .page-inner section.normal h4,.book .book-body .page-wrapper .page-inner section.normal h5,.book .book-body .page-wrapper .page-inner section.normal p{orphans:3;widows:3}.book .book-body .page-wrapper .page-inner section.normal h1,.book .book-body .page-wrapper .page-inner section.normal h2,.book .book-body .page-wrapper .page-inner section.normal h3,.book .book-body .page-wrapper .page-inner section.normal h4,.book .book-body .page-wrapper .page-inner section.normal h5{page-break-after:avoid}.book .book-body .page-wrapper .page-inner section.normal b,.book .book-body .page-wrapper .page-inner section.normal strong{font-weight:700}.book .book-body .page-wrapper .page-inner section.normal em{font-style:italic}.book .book-body .page-wrapper .page-inner section.normal blockquote,.book .book-body .page-wrapper .page-inner section.normal dl,.book .book-body .page-wrapper .page-inner section.normal ol,.book .book-body .page-wrapper .page-inner section.normal p,.book .book-body .page-wrapper .page-inner section.normal table,.book .book-body .page-wrapper .page-inner section.normal ul{margin-top:0;margin-bottom:.85em}.book .book-body .page-wrapper .page-inner section.normal a{color:#4183c4;text-decoration:none;background:0 0}.book .book-body .page-wrapper .page-inner section.normal a:active,.book .book-body .page-wrapper .page-inner section.normal a:focus,.book .book-body .page-wrapper .page-inner section.normal a:hover{outline:0;text-decoration:underline}.book .book-body .page-wrapper .page-inner section.normal img{border:0;max-width:100%}.book .book-body .page-wrapper .page-inner section.normal hr{height:4px;padding:0;margin:1.7em 0;overflow:hidden;background-color:#e7e7e7;border:none}.book .book-body .page-wrapper .page-inner section.normal hr:after,.book .book-body .page-wrapper .page-inner section.normal hr:before{display:table;content:" "}.book .book-body .page-wrapper .page-inner section.normal h1,.book .book-body .page-wrapper .page-inner section.normal h2,.book .book-body .page-wrapper .page-inner section.normal h3,.book .book-body .page-wrapper .page-inner section.normal h4,.book .book-body .page-wrapper .page-inner section.normal h5,.book .book-body .page-wrapper .page-inner section.normal h6{margin-top:1.275em;margin-bottom:.85em;}.book .book-body .page-wrapper .page-inner section.normal h1{font-size:2em}.book .book-body .page-wrapper .page-inner section.normal h2{font-size:1.75em}.book .book-body .page-wrapper .page-inner section.normal h3{font-size:1.5em}.book .book-body .page-wrapper .page-inner section.normal h4{font-size:1.25em}.book .book-body .page-wrapper .page-inner section.normal h5{font-size:1em}.book .book-body .page-wrapper .page-inner section.normal h6{font-size:1em;color:#777}.book .book-body .page-wrapper .page-inner section.normal code,.book .book-body .page-wrapper .page-inner section.normal pre{font-family:Consolas,"Liberation Mono",Menlo,Courier,monospace;direction:ltr;border:none;color:inherit}.book .book-body .page-wrapper .page-inner section.normal pre{overflow:auto;word-wrap:normal;margin:0 0 1.275em;padding:.85em 1em;background:#f7f7f7}.book .book-body .page-wrapper .page-inner section.normal pre>code{display:inline;max-width:initial;padding:0;margin:0;overflow:initial;line-height:inherit;font-size:.85em;white-space:pre;background:0 0}.book .book-body .page-wrapper .page-inner section.normal pre>code:after,.book .book-body .page-wrapper .page-inner section.normal pre>code:before{content:normal}.book .book-body .page-wrapper .page-inner section.normal code{padding:.2em;margin:0;font-size:.85em;background-color:#f7f7f7}.book .book-body .page-wrapper .page-inner section.normal code:after,.book .book-body .page-wrapper .page-inner section.normal code:before{letter-spacing:-.2em;content:"\00a0"}.book .book-body .page-wrapper .page-inner section.normal ol,.book .book-body .page-wrapper .page-inner section.normal ul{padding:0 0 0 2em;margin:0 0 .85em}.book .book-body .page-wrapper .page-inner section.normal ol ol,.book .book-body .page-wrapper .page-inner section.normal ol ul,.book .book-body .page-wrapper .page-inner section.normal ul ol,.book .book-body .page-wrapper .page-inner section.normal ul ul{margin-top:0;margin-bottom:0}.book .book-body .page-wrapper .page-inner section.normal ol ol{list-style-type:lower-roman}.book .book-body .page-wrapper .page-inner section.normal blockquote{margin:0 0 .85em;padding:0 15px;opacity:0.75;border-left:4px solid #dcdcdc}.book .book-body .page-wrapper .page-inner section.normal blockquote:first-child{margin-top:0}.book .book-body .page-wrapper .page-inner section.normal blockquote:last-child{margin-bottom:0}.book .book-body .page-wrapper .page-inner section.normal dl{padding:0}.book .book-body .page-wrapper .page-inner section.normal dl dt{padding:0;margin-top:.85em;font-style:italic;font-weight:700}.book .book-body .page-wrapper .page-inner section.normal dl dd{padding:0 .85em;margin-bottom:.85em}.book .book-body .page-wrapper .page-inner section.normal dd{margin-left:0}.book .book-body .page-wrapper .page-inner section.normal .glossary-term{cursor:help;text-decoration:underline}.book .book-body .navigation{position:absolute;top:50px;bottom:0;margin:0;max-width:150px;min-width:90px;display:flex;justify-content:center;align-content:center;flex-direction:column;font-size:40px;color:#ccc;text-align:center;-webkit-transition:all 350ms ease;-moz-transition:all 350ms ease;-o-transition:all 350ms ease;transition:all 350ms ease}.book .book-body .navigation:hover{text-decoration:none;color:#444}.book .book-body .navigation.navigation-next{right:0}.book .book-body .navigation.navigation-prev{left:0}@media (max-width:1240px){.book .book-body .navigation{position:static;top:auto;max-width:50%;width:50%;display:inline-block;float:left}.book .book-body .navigation.navigation-unique{max-width:100%;width:100%}}.book .book-body .page-wrapper .page-inner section.glossary{margin-bottom:40px}.book .book-body .page-wrapper .page-inner section.glossary h2 a,.book .book-body .page-wrapper .page-inner section.glossary h2 a:hover{color:inherit;text-decoration:none}.book .book-body .page-wrapper .page-inner section.glossary .glossary-index{list-style:none;margin:0;padding:0}.book .book-body .page-wrapper .page-inner section.glossary .glossary-index li{display:inline;margin:0 8px;white-space:nowrap}*{-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box;-webkit-overflow-scrolling:auto;-webkit-tap-highlight-color:transparent;-webkit-text-size-adjust:none;-webkit-touch-callout:none}a{text-decoration:none}body,html{height:100%}html{font-size:62.5%}body{text-rendering:optimizeLegibility;font-smoothing:antialiased;font-family:"Helvetica Neue",Helvetica,Arial,sans-serif;font-size:14px;letter-spacing:.2px;text-size-adjust:100%}
+.book .book-summary ul.summary li a span {display:inline;padding:initial;overflow:visible;cursor:auto;opacity:1;}
+/* show arrow before summary tag as in bootstrap */
+details > summary {display:list-item;cursor:pointer;}
diff --git a/libs/gitbook-2.6.7/js/app.min.js b/libs/gitbook-2.6.7/js/app.min.js
new file mode 100644
index 0000000..643f1f9
--- /dev/null
+++ b/libs/gitbook-2.6.7/js/app.min.js
@@ -0,0 +1 @@
+(function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o<r.length;o++)s(r[o]);return s})({1:[function(require,module,exports){if(typeof module==="object"&&typeof module.exports==="object"){module.exports=jQuery}},{}],2:[function(require,module,exports){(function(global){(function(){var undefined;var VERSION="3.10.1";var BIND_FLAG=1,BIND_KEY_FLAG=2,CURRY_BOUND_FLAG=4,CURRY_FLAG=8,CURRY_RIGHT_FLAG=16,PARTIAL_FLAG=32,PARTIAL_RIGHT_FLAG=64,ARY_FLAG=128,REARG_FLAG=256;var DEFAULT_TRUNC_LENGTH=30,DEFAULT_TRUNC_OMISSION="...";var HOT_COUNT=150,HOT_SPAN=16;var LARGE_ARRAY_SIZE=200;var LAZY_FILTER_FLAG=1,LAZY_MAP_FLAG=2;var FUNC_ERROR_TEXT="Expected a function";var PLACEHOLDER="__lodash_placeholder__";var argsTag="[object Arguments]",arrayTag="[object Array]",boolTag="[object Boolean]",dateTag="[object Date]",errorTag="[object Error]",funcTag="[object Function]",mapTag="[object Map]",numberTag="[object Number]",objectTag="[object Object]",regexpTag="[object RegExp]",setTag="[object Set]",stringTag="[object String]",weakMapTag="[object WeakMap]";var arrayBufferTag="[object ArrayBuffer]",float32Tag="[object Float32Array]",float64Tag="[object Float64Array]",int8Tag="[object Int8Array]",int16Tag="[object Int16Array]",int32Tag="[object Int32Array]",uint8Tag="[object Uint8Array]",uint8ClampedTag="[object Uint8ClampedArray]",uint16Tag="[object Uint16Array]",uint32Tag="[object Uint32Array]";var reEmptyStringLeading=/\b__p \+= '';/g,reEmptyStringMiddle=/\b(__p \+=) '' \+/g,reEmptyStringTrailing=/(__e\(.*?\)|\b__t\)) \+\n'';/g;var reEscapedHtml=/&(?:amp|lt|gt|quot|#39|#96);/g,reUnescapedHtml=/[&<>"'`]/g,reHasEscapedHtml=RegExp(reEscapedHtml.source),reHasUnescapedHtml=RegExp(reUnescapedHtml.source);var reEscape=/<%-([\s\S]+?)%>/g,reEvaluate=/<%([\s\S]+?)%>/g,reInterpolate=/<%=([\s\S]+?)%>/g;var reIsDeepProp=/\.|\[(?:[^[\]]*|(["'])(?:(?!\1)[^\n\\]|\\.)*?\1)\]/,reIsPlainProp=/^\w*$/,rePropName=/[^.[\]]+|\[(?:(-?\d+(?:\.\d+)?)|(["'])((?:(?!\2)[^\n\\]|\\.)*?)\2)\]/g;var reRegExpChars=/^[:!,]|[\\^$.*+?()[\]{}|\/]|(^[0-9a-fA-Fnrtuvx])|([\n\r\u2028\u2029])/g,reHasRegExpChars=RegExp(reRegExpChars.source);var reComboMark=/[\u0300-\u036f\ufe20-\ufe23]/g;var reEscapeChar=/\\(\\)?/g;var reEsTemplate=/\$\{([^\\}]*(?:\\.[^\\}]*)*)\}/g;var reFlags=/\w*$/;var reHasHexPrefix=/^0[xX]/;var reIsHostCtor=/^\[object .+?Constructor\]$/;var reIsUint=/^\d+$/;var reLatin1=/[\xc0-\xd6\xd8-\xde\xdf-\xf6\xf8-\xff]/g;var reNoMatch=/($^)/;var reUnescapedString=/['\n\r\u2028\u2029\\]/g;var reWords=function(){var upper="[A-Z\\xc0-\\xd6\\xd8-\\xde]",lower="[a-z\\xdf-\\xf6\\xf8-\\xff]+";return RegExp(upper+"+(?="+upper+lower+")|"+upper+"?"+lower+"|"+upper+"+|[0-9]+","g")}();var contextProps=["Array","ArrayBuffer","Date","Error","Float32Array","Float64Array","Function","Int8Array","Int16Array","Int32Array","Math","Number","Object","RegExp","Set","String","_","clearTimeout","isFinite","parseFloat","parseInt","setTimeout","TypeError","Uint8Array","Uint8ClampedArray","Uint16Array","Uint32Array","WeakMap"];var templateCounter=-1;var typedArrayTags={};typedArrayTags[float32Tag]=typedArrayTags[float64Tag]=typedArrayTags[int8Tag]=typedArrayTags[int16Tag]=typedArrayTags[int32Tag]=typedArrayTags[uint8Tag]=typedArrayTags[uint8ClampedTag]=typedArrayTags[uint16Tag]=typedArrayTags[uint32Tag]=true;typedArrayTags[argsTag]=typedArrayTags[arrayTag]=typedArrayTags[arrayBufferTag]=typedArrayTags[boolTag]=typedArrayTags[dateTag]=typedArrayTags[errorTag]=typedArrayTags[funcTag]=typedArrayTags[mapTag]=typedArrayTags[numberTag]=typedArrayTags[objectTag]=typedArrayTags[regexpTag]=typedArrayTags[setTag]=typedArrayTags[stringTag]=typedArrayTags[weakMapTag]=false;var cloneableTags={};cloneableTags[argsTag]=cloneableTags[arrayTag]=cloneableTags[arrayBufferTag]=cloneableTags[boolTag]=cloneableTags[dateTag]=cloneableTags[float32Tag]=cloneableTags[float64Tag]=cloneableTags[int8Tag]=cloneableTags[int16Tag]=cloneableTags[int32Tag]=cloneableTags[numberTag]=cloneableTags[objectTag]=cloneableTags[regexpTag]=cloneableTags[stringTag]=cloneableTags[uint8Tag]=cloneableTags[uint8ClampedTag]=cloneableTags[uint16Tag]=cloneableTags[uint32Tag]=true;cloneableTags[errorTag]=cloneableTags[funcTag]=cloneableTags[mapTag]=cloneableTags[setTag]=cloneableTags[weakMapTag]=false;var deburredLetters={"À":"A","Á":"A","Â":"A","Ã":"A","Ä":"A","Å":"A","à":"a","á":"a","â":"a","ã":"a","ä":"a","å":"a","Ç":"C","ç":"c","Ð":"D","ð":"d","È":"E","É":"E","Ê":"E","Ë":"E","è":"e","é":"e","ê":"e","ë":"e","Ì":"I","Í":"I","Î":"I","Ï":"I","ì":"i","í":"i","î":"i","ï":"i","Ñ":"N","ñ":"n","Ò":"O","Ó":"O","Ô":"O","Õ":"O","Ö":"O","Ø":"O","ò":"o","ó":"o","ô":"o","õ":"o","ö":"o","ø":"o","Ù":"U","Ú":"U","Û":"U","Ü":"U","ù":"u","ú":"u","û":"u","ü":"u","Ý":"Y","ý":"y","ÿ":"y","Æ":"Ae","æ":"ae","Þ":"Th","þ":"th","ß":"ss"};var htmlEscapes={"&":"&amp;","<":"&lt;",">":"&gt;",'"':"&quot;","'":"&#39;","`":"&#96;"};var htmlUnescapes={"&amp;":"&","&lt;":"<","&gt;":">","&quot;":'"',"&#39;":"'","&#96;":"`"};var objectTypes={function:true,object:true};var regexpEscapes={0:"x30",1:"x31",2:"x32",3:"x33",4:"x34",5:"x35",6:"x36",7:"x37",8:"x38",9:"x39",A:"x41",B:"x42",C:"x43",D:"x44",E:"x45",F:"x46",a:"x61",b:"x62",c:"x63",d:"x64",e:"x65",f:"x66",n:"x6e",r:"x72",t:"x74",u:"x75",v:"x76",x:"x78"};var stringEscapes={"\\":"\\","'":"'","\n":"n","\r":"r","\u2028":"u2028","\u2029":"u2029"};var freeExports=objectTypes[typeof exports]&&exports&&!exports.nodeType&&exports;var freeModule=objectTypes[typeof module]&&module&&!module.nodeType&&module;var freeGlobal=freeExports&&freeModule&&typeof global=="object"&&global&&global.Object&&global;var freeSelf=objectTypes[typeof self]&&self&&self.Object&&self;var freeWindow=objectTypes[typeof window]&&window&&window.Object&&window;var moduleExports=freeModule&&freeModule.exports===freeExports&&freeExports;var root=freeGlobal||freeWindow!==(this&&this.window)&&freeWindow||freeSelf||this;function baseCompareAscending(value,other){if(value!==other){var valIsNull=value===null,valIsUndef=value===undefined,valIsReflexive=value===value;var othIsNull=other===null,othIsUndef=other===undefined,othIsReflexive=other===other;if(value>other&&!othIsNull||!valIsReflexive||valIsNull&&!othIsUndef&&othIsReflexive||valIsUndef&&othIsReflexive){return 1}if(value<other&&!valIsNull||!othIsReflexive||othIsNull&&!valIsUndef&&valIsReflexive||othIsUndef&&valIsReflexive){return-1}}return 0}function baseFindIndex(array,predicate,fromRight){var length=array.length,index=fromRight?length:-1;while(fromRight?index--:++index<length){if(predicate(array[index],index,array)){return index}}return-1}function baseIndexOf(array,value,fromIndex){if(value!==value){return indexOfNaN(array,fromIndex)}var index=fromIndex-1,length=array.length;while(++index<length){if(array[index]===value){return index}}return-1}function baseIsFunction(value){return typeof value=="function"||false}function baseToString(value){return value==null?"":value+""}function charsLeftIndex(string,chars){var index=-1,length=string.length;while(++index<length&&chars.indexOf(string.charAt(index))>-1){}return index}function charsRightIndex(string,chars){var index=string.length;while(index--&&chars.indexOf(string.charAt(index))>-1){}return index}function compareAscending(object,other){return baseCompareAscending(object.criteria,other.criteria)||object.index-other.index}function compareMultiple(object,other,orders){var index=-1,objCriteria=object.criteria,othCriteria=other.criteria,length=objCriteria.length,ordersLength=orders.length;while(++index<length){var result=baseCompareAscending(objCriteria[index],othCriteria[index]);if(result){if(index>=ordersLength){return result}var order=orders[index];return result*(order==="asc"||order===true?1:-1)}}return object.index-other.index}function deburrLetter(letter){return deburredLetters[letter]}function escapeHtmlChar(chr){return htmlEscapes[chr]}function escapeRegExpChar(chr,leadingChar,whitespaceChar){if(leadingChar){chr=regexpEscapes[chr]}else if(whitespaceChar){chr=stringEscapes[chr]}return"\\"+chr}function escapeStringChar(chr){return"\\"+stringEscapes[chr]}function indexOfNaN(array,fromIndex,fromRight){var length=array.length,index=fromIndex+(fromRight?0:-1);while(fromRight?index--:++index<length){var other=array[index];if(other!==other){return index}}return-1}function isObjectLike(value){return!!value&&typeof value=="object"}function isSpace(charCode){return charCode<=160&&(charCode>=9&&charCode<=13)||charCode==32||charCode==160||charCode==5760||charCode==6158||charCode>=8192&&(charCode<=8202||charCode==8232||charCode==8233||charCode==8239||charCode==8287||charCode==12288||charCode==65279)}function replaceHolders(array,placeholder){var index=-1,length=array.length,resIndex=-1,result=[];while(++index<length){if(array[index]===placeholder){array[index]=PLACEHOLDER;result[++resIndex]=index}}return result}function sortedUniq(array,iteratee){var seen,index=-1,length=array.length,resIndex=-1,result=[];while(++index<length){var value=array[index],computed=iteratee?iteratee(value,index,array):value;if(!index||seen!==computed){seen=computed;result[++resIndex]=value}}return result}function trimmedLeftIndex(string){var index=-1,length=string.length;while(++index<length&&isSpace(string.charCodeAt(index))){}return index}function trimmedRightIndex(string){var index=string.length;while(index--&&isSpace(string.charCodeAt(index))){}return index}function unescapeHtmlChar(chr){return htmlUnescapes[chr]}function runInContext(context){context=context?_.defaults(root.Object(),context,_.pick(root,contextProps)):root;var Array=context.Array,Date=context.Date,Error=context.Error,Function=context.Function,Math=context.Math,Number=context.Number,Object=context.Object,RegExp=context.RegExp,String=context.String,TypeError=context.TypeError;var arrayProto=Array.prototype,objectProto=Object.prototype,stringProto=String.prototype;var fnToString=Function.prototype.toString;var hasOwnProperty=objectProto.hasOwnProperty;var idCounter=0;var objToString=objectProto.toString;var oldDash=root._;var reIsNative=RegExp("^"+fnToString.call(hasOwnProperty).replace(/[\\^$.*+?()[\]{}|]/g,"\\$&").replace(/hasOwnProperty|(function).*?(?=\\\()| for .+?(?=\\\])/g,"$1.*?")+"$");var ArrayBuffer=context.ArrayBuffer,clearTimeout=context.clearTimeout,parseFloat=context.parseFloat,pow=Math.pow,propertyIsEnumerable=objectProto.propertyIsEnumerable,Set=getNative(context,"Set"),setTimeout=context.setTimeout,splice=arrayProto.splice,Uint8Array=context.Uint8Array,WeakMap=getNative(context,"WeakMap");var nativeCeil=Math.ceil,nativeCreate=getNative(Object,"create"),nativeFloor=Math.floor,nativeIsArray=getNative(Array,"isArray"),nativeIsFinite=context.isFinite,nativeKeys=getNative(Object,"keys"),nativeMax=Math.max,nativeMin=Math.min,nativeNow=getNative(Date,"now"),nativeParseInt=context.parseInt,nativeRandom=Math.random;var NEGATIVE_INFINITY=Number.NEGATIVE_INFINITY,POSITIVE_INFINITY=Number.POSITIVE_INFINITY;var MAX_ARRAY_LENGTH=4294967295,MAX_ARRAY_INDEX=MAX_ARRAY_LENGTH-1,HALF_MAX_ARRAY_LENGTH=MAX_ARRAY_LENGTH>>>1;var MAX_SAFE_INTEGER=9007199254740991;var metaMap=WeakMap&&new WeakMap;var realNames={};function lodash(value){if(isObjectLike(value)&&!isArray(value)&&!(value instanceof LazyWrapper)){if(value instanceof LodashWrapper){return value}if(hasOwnProperty.call(value,"__chain__")&&hasOwnProperty.call(value,"__wrapped__")){return wrapperClone(value)}}return new LodashWrapper(value)}function baseLodash(){}function LodashWrapper(value,chainAll,actions){this.__wrapped__=value;this.__actions__=actions||[];this.__chain__=!!chainAll}var support=lodash.support={};lodash.templateSettings={escape:reEscape,evaluate:reEvaluate,interpolate:reInterpolate,variable:"",imports:{_:lodash}};function LazyWrapper(value){this.__wrapped__=value;this.__actions__=[];this.__dir__=1;this.__filtered__=false;this.__iteratees__=[];this.__takeCount__=POSITIVE_INFINITY;this.__views__=[]}function lazyClone(){var result=new LazyWrapper(this.__wrapped__);result.__actions__=arrayCopy(this.__actions__);result.__dir__=this.__dir__;result.__filtered__=this.__filtered__;result.__iteratees__=arrayCopy(this.__iteratees__);result.__takeCount__=this.__takeCount__;result.__views__=arrayCopy(this.__views__);return result}function lazyReverse(){if(this.__filtered__){var result=new LazyWrapper(this);result.__dir__=-1;result.__filtered__=true}else{result=this.clone();result.__dir__*=-1}return result}function lazyValue(){var array=this.__wrapped__.value(),dir=this.__dir__,isArr=isArray(array),isRight=dir<0,arrLength=isArr?array.length:0,view=getView(0,arrLength,this.__views__),start=view.start,end=view.end,length=end-start,index=isRight?end:start-1,iteratees=this.__iteratees__,iterLength=iteratees.length,resIndex=0,takeCount=nativeMin(length,this.__takeCount__);if(!isArr||arrLength<LARGE_ARRAY_SIZE||arrLength==length&&takeCount==length){return baseWrapperValue(isRight&&isArr?array.reverse():array,this.__actions__)}var result=[];outer:while(length--&&resIndex<takeCount){index+=dir;var iterIndex=-1,value=array[index];while(++iterIndex<iterLength){var data=iteratees[iterIndex],iteratee=data.iteratee,type=data.type,computed=iteratee(value);if(type==LAZY_MAP_FLAG){value=computed}else if(!computed){if(type==LAZY_FILTER_FLAG){continue outer}else{break outer}}}result[resIndex++]=value}return result}function MapCache(){this.__data__={}}function mapDelete(key){return this.has(key)&&delete this.__data__[key]}function mapGet(key){return key=="__proto__"?undefined:this.__data__[key]}function mapHas(key){return key!="__proto__"&&hasOwnProperty.call(this.__data__,key)}function mapSet(key,value){if(key!="__proto__"){this.__data__[key]=value}return this}function SetCache(values){var length=values?values.length:0;this.data={hash:nativeCreate(null),set:new Set};while(length--){this.push(values[length])}}function cacheIndexOf(cache,value){var data=cache.data,result=typeof value=="string"||isObject(value)?data.set.has(value):data.hash[value];return result?0:-1}function cachePush(value){var data=this.data;if(typeof value=="string"||isObject(value)){data.set.add(value)}else{data.hash[value]=true}}function arrayConcat(array,other){var index=-1,length=array.length,othIndex=-1,othLength=other.length,result=Array(length+othLength);while(++index<length){result[index]=array[index]}while(++othIndex<othLength){result[index++]=other[othIndex]}return result}function arrayCopy(source,array){var index=-1,length=source.length;array||(array=Array(length));while(++index<length){array[index]=source[index]}return array}function arrayEach(array,iteratee){var index=-1,length=array.length;while(++index<length){if(iteratee(array[index],index,array)===false){break}}return array}function arrayEachRight(array,iteratee){var length=array.length;while(length--){if(iteratee(array[length],length,array)===false){break}}return array}function arrayEvery(array,predicate){var index=-1,length=array.length;while(++index<length){if(!predicate(array[index],index,array)){return false}}return true}function arrayExtremum(array,iteratee,comparator,exValue){var index=-1,length=array.length,computed=exValue,result=computed;while(++index<length){var value=array[index],current=+iteratee(value);if(comparator(current,computed)){computed=current;result=value}}return result}function arrayFilter(array,predicate){var index=-1,length=array.length,resIndex=-1,result=[];while(++index<length){var value=array[index];if(predicate(value,index,array)){result[++resIndex]=value}}return result}function arrayMap(array,iteratee){var index=-1,length=array.length,result=Array(length);while(++index<length){result[index]=iteratee(array[index],index,array)}return result}function arrayPush(array,values){var index=-1,length=values.length,offset=array.length;while(++index<length){array[offset+index]=values[index]}return array}function arrayReduce(array,iteratee,accumulator,initFromArray){var index=-1,length=array.length;if(initFromArray&&length){accumulator=array[++index]}while(++index<length){accumulator=iteratee(accumulator,array[index],index,array)}return accumulator}function arrayReduceRight(array,iteratee,accumulator,initFromArray){var length=array.length;if(initFromArray&&length){accumulator=array[--length]}while(length--){accumulator=iteratee(accumulator,array[length],length,array)}return accumulator}function arraySome(array,predicate){var index=-1,length=array.length;while(++index<length){if(predicate(array[index],index,array)){return true}}return false}function arraySum(array,iteratee){var length=array.length,result=0;while(length--){result+=+iteratee(array[length])||0}return result}function assignDefaults(objectValue,sourceValue){return objectValue===undefined?sourceValue:objectValue}function assignOwnDefaults(objectValue,sourceValue,key,object){return objectValue===undefined||!hasOwnProperty.call(object,key)?sourceValue:objectValue}function assignWith(object,source,customizer){var index=-1,props=keys(source),length=props.length;while(++index<length){var key=props[index],value=object[key],result=customizer(value,source[key],key,object,source);if((result===result?result!==value:value===value)||value===undefined&&!(key in object)){object[key]=result}}return object}function baseAssign(object,source){return source==null?object:baseCopy(source,keys(source),object)}function baseAt(collection,props){var index=-1,isNil=collection==null,isArr=!isNil&&isArrayLike(collection),length=isArr?collection.length:0,propsLength=props.length,result=Array(propsLength);while(++index<propsLength){var key=props[index];if(isArr){result[index]=isIndex(key,length)?collection[key]:undefined}else{result[index]=isNil?undefined:collection[key]}}return result}function baseCopy(source,props,object){object||(object={});var index=-1,length=props.length;while(++index<length){var key=props[index];object[key]=source[key]}return object}function baseCallback(func,thisArg,argCount){var type=typeof func;if(type=="function"){return thisArg===undefined?func:bindCallback(func,thisArg,argCount)}if(func==null){return identity}if(type=="object"){return baseMatches(func)}return thisArg===undefined?property(func):baseMatchesProperty(func,thisArg)}function baseClone(value,isDeep,customizer,key,object,stackA,stackB){var result;if(customizer){result=object?customizer(value,key,object):customizer(value)}if(result!==undefined){return result}if(!isObject(value)){return value}var isArr=isArray(value);if(isArr){result=initCloneArray(value);if(!isDeep){return arrayCopy(value,result)}}else{var tag=objToString.call(value),isFunc=tag==funcTag;if(tag==objectTag||tag==argsTag||isFunc&&!object){result=initCloneObject(isFunc?{}:value);if(!isDeep){return baseAssign(result,value)}}else{return cloneableTags[tag]?initCloneByTag(value,tag,isDeep):object?value:{}}}stackA||(stackA=[]);stackB||(stackB=[]);var length=stackA.length;while(length--){if(stackA[length]==value){return stackB[length]}}stackA.push(value);stackB.push(result);(isArr?arrayEach:baseForOwn)(value,function(subValue,key){result[key]=baseClone(subValue,isDeep,customizer,key,value,stackA,stackB)});return result}var baseCreate=function(){function object(){}return function(prototype){if(isObject(prototype)){object.prototype=prototype;var result=new object;object.prototype=undefined}return result||{}}}();function baseDelay(func,wait,args){if(typeof func!="function"){throw new TypeError(FUNC_ERROR_TEXT)}return setTimeout(function(){func.apply(undefined,args)},wait)}function baseDifference(array,values){var length=array?array.length:0,result=[];if(!length){return result}var index=-1,indexOf=getIndexOf(),isCommon=indexOf==baseIndexOf,cache=isCommon&&values.length>=LARGE_ARRAY_SIZE?createCache(values):null,valuesLength=values.length;if(cache){indexOf=cacheIndexOf;isCommon=false;values=cache}outer:while(++index<length){var value=array[index];if(isCommon&&value===value){var valuesIndex=valuesLength;while(valuesIndex--){if(values[valuesIndex]===value){continue outer}}result.push(value)}else if(indexOf(values,value,0)<0){result.push(value)}}return result}var baseEach=createBaseEach(baseForOwn);var baseEachRight=createBaseEach(baseForOwnRight,true);function baseEvery(collection,predicate){var result=true;baseEach(collection,function(value,index,collection){result=!!predicate(value,index,collection);return result});return result}function baseExtremum(collection,iteratee,comparator,exValue){var computed=exValue,result=computed;baseEach(collection,function(value,index,collection){var current=+iteratee(value,index,collection);if(comparator(current,computed)||current===exValue&&current===result){computed=current;result=value}});return result}function baseFill(array,value,start,end){var length=array.length;start=start==null?0:+start||0;if(start<0){start=-start>length?0:length+start}end=end===undefined||end>length?length:+end||0;if(end<0){end+=length}length=start>end?0:end>>>0;start>>>=0;while(start<length){array[start++]=value}return array}function baseFilter(collection,predicate){var result=[];baseEach(collection,function(value,index,collection){if(predicate(value,index,collection)){result.push(value)}});return result}function baseFind(collection,predicate,eachFunc,retKey){var result;eachFunc(collection,function(value,key,collection){if(predicate(value,key,collection)){result=retKey?key:value;return false}});return result}function baseFlatten(array,isDeep,isStrict,result){result||(result=[]);var index=-1,length=array.length;while(++index<length){var value=array[index];if(isObjectLike(value)&&isArrayLike(value)&&(isStrict||isArray(value)||isArguments(value))){if(isDeep){baseFlatten(value,isDeep,isStrict,result)}else{arrayPush(result,value)}}else if(!isStrict){result[result.length]=value}}return result}var baseFor=createBaseFor();var baseForRight=createBaseFor(true);function baseForIn(object,iteratee){return baseFor(object,iteratee,keysIn)}function baseForOwn(object,iteratee){return baseFor(object,iteratee,keys)}function baseForOwnRight(object,iteratee){return baseForRight(object,iteratee,keys)}function baseFunctions(object,props){var index=-1,length=props.length,resIndex=-1,result=[];while(++index<length){var key=props[index];if(isFunction(object[key])){result[++resIndex]=key}}return result}function baseGet(object,path,pathKey){if(object==null){return}if(pathKey!==undefined&&pathKey in toObject(object)){path=[pathKey]}var index=0,length=path.length;while(object!=null&&index<length){object=object[path[index++]]}return index&&index==length?object:undefined}function baseIsEqual(value,other,customizer,isLoose,stackA,stackB){if(value===other){return true}if(value==null||other==null||!isObject(value)&&!isObjectLike(other)){return value!==value&&other!==other}return baseIsEqualDeep(value,other,baseIsEqual,customizer,isLoose,stackA,stackB)}function baseIsEqualDeep(object,other,equalFunc,customizer,isLoose,stackA,stackB){var objIsArr=isArray(object),othIsArr=isArray(other),objTag=arrayTag,othTag=arrayTag;if(!objIsArr){objTag=objToString.call(object);if(objTag==argsTag){objTag=objectTag}else if(objTag!=objectTag){objIsArr=isTypedArray(object)}}if(!othIsArr){othTag=objToString.call(other);if(othTag==argsTag){othTag=objectTag}else if(othTag!=objectTag){othIsArr=isTypedArray(other)}}var objIsObj=objTag==objectTag,othIsObj=othTag==objectTag,isSameTag=objTag==othTag;if(isSameTag&&!(objIsArr||objIsObj)){return equalByTag(object,other,objTag)}if(!isLoose){var objIsWrapped=objIsObj&&hasOwnProperty.call(object,"__wrapped__"),othIsWrapped=othIsObj&&hasOwnProperty.call(other,"__wrapped__");if(objIsWrapped||othIsWrapped){return equalFunc(objIsWrapped?object.value():object,othIsWrapped?other.value():other,customizer,isLoose,stackA,stackB)}}if(!isSameTag){return false}stackA||(stackA=[]);stackB||(stackB=[]);var length=stackA.length;while(length--){if(stackA[length]==object){return stackB[length]==other}}stackA.push(object);stackB.push(other);var result=(objIsArr?equalArrays:equalObjects)(object,other,equalFunc,customizer,isLoose,stackA,stackB);stackA.pop();stackB.pop();return result}function baseIsMatch(object,matchData,customizer){var index=matchData.length,length=index,noCustomizer=!customizer;if(object==null){return!length}object=toObject(object);while(index--){var data=matchData[index];if(noCustomizer&&data[2]?data[1]!==object[data[0]]:!(data[0]in object)){return false}}while(++index<length){data=matchData[index];var key=data[0],objValue=object[key],srcValue=data[1];if(noCustomizer&&data[2]){if(objValue===undefined&&!(key in object)){return false}}else{var result=customizer?customizer(objValue,srcValue,key):undefined;if(!(result===undefined?baseIsEqual(srcValue,objValue,customizer,true):result)){return false}}}return true}function baseMap(collection,iteratee){var index=-1,result=isArrayLike(collection)?Array(collection.length):[];baseEach(collection,function(value,key,collection){result[++index]=iteratee(value,key,collection)});return result}function baseMatches(source){var matchData=getMatchData(source);if(matchData.length==1&&matchData[0][2]){var key=matchData[0][0],value=matchData[0][1];return function(object){if(object==null){return false}return object[key]===value&&(value!==undefined||key in toObject(object))}}return function(object){return baseIsMatch(object,matchData)}}function baseMatchesProperty(path,srcValue){var isArr=isArray(path),isCommon=isKey(path)&&isStrictComparable(srcValue),pathKey=path+"";path=toPath(path);return function(object){if(object==null){return false}var key=pathKey;object=toObject(object);if((isArr||!isCommon)&&!(key in object)){object=path.length==1?object:baseGet(object,baseSlice(path,0,-1));if(object==null){return false}key=last(path);object=toObject(object)}return object[key]===srcValue?srcValue!==undefined||key in object:baseIsEqual(srcValue,object[key],undefined,true)}}function baseMerge(object,source,customizer,stackA,stackB){if(!isObject(object)){return object}var isSrcArr=isArrayLike(source)&&(isArray(source)||isTypedArray(source)),props=isSrcArr?undefined:keys(source);arrayEach(props||source,function(srcValue,key){if(props){key=srcValue;srcValue=source[key]}if(isObjectLike(srcValue)){stackA||(stackA=[]);stackB||(stackB=[]);baseMergeDeep(object,source,key,baseMerge,customizer,stackA,stackB)}else{var value=object[key],result=customizer?customizer(value,srcValue,key,object,source):undefined,isCommon=result===undefined;if(isCommon){result=srcValue}if((result!==undefined||isSrcArr&&!(key in object))&&(isCommon||(result===result?result!==value:value===value))){object[key]=result}}});return object}function baseMergeDeep(object,source,key,mergeFunc,customizer,stackA,stackB){var length=stackA.length,srcValue=source[key];while(length--){if(stackA[length]==srcValue){object[key]=stackB[length];return}}var value=object[key],result=customizer?customizer(value,srcValue,key,object,source):undefined,isCommon=result===undefined;if(isCommon){result=srcValue;if(isArrayLike(srcValue)&&(isArray(srcValue)||isTypedArray(srcValue))){result=isArray(value)?value:isArrayLike(value)?arrayCopy(value):[]}else if(isPlainObject(srcValue)||isArguments(srcValue)){result=isArguments(value)?toPlainObject(value):isPlainObject(value)?value:{}}else{isCommon=false}}stackA.push(srcValue);stackB.push(result);if(isCommon){object[key]=mergeFunc(result,srcValue,customizer,stackA,stackB)}else if(result===result?result!==value:value===value){object[key]=result}}function baseProperty(key){return function(object){return object==null?undefined:object[key]}}function basePropertyDeep(path){var pathKey=path+"";path=toPath(path);return function(object){return baseGet(object,path,pathKey)}}function basePullAt(array,indexes){var length=array?indexes.length:0;while(length--){var index=indexes[length];if(index!=previous&&isIndex(index)){var previous=index;splice.call(array,index,1)}}return array}function baseRandom(min,max){return min+nativeFloor(nativeRandom()*(max-min+1))}function baseReduce(collection,iteratee,accumulator,initFromCollection,eachFunc){eachFunc(collection,function(value,index,collection){accumulator=initFromCollection?(initFromCollection=false,value):iteratee(accumulator,value,index,collection)});return accumulator}var baseSetData=!metaMap?identity:function(func,data){metaMap.set(func,data);return func};function baseSlice(array,start,end){var index=-1,length=array.length;start=start==null?0:+start||0;if(start<0){start=-start>length?0:length+start}end=end===undefined||end>length?length:+end||0;if(end<0){end+=length}length=start>end?0:end-start>>>0;start>>>=0;var result=Array(length);while(++index<length){result[index]=array[index+start]}return result}function baseSome(collection,predicate){var result;baseEach(collection,function(value,index,collection){result=predicate(value,index,collection);return!result});return!!result}function baseSortBy(array,comparer){var length=array.length;array.sort(comparer);while(length--){array[length]=array[length].value}return array}function baseSortByOrder(collection,iteratees,orders){var callback=getCallback(),index=-1;iteratees=arrayMap(iteratees,function(iteratee){return callback(iteratee)});var result=baseMap(collection,function(value){var criteria=arrayMap(iteratees,function(iteratee){return iteratee(value)});return{criteria:criteria,index:++index,value:value}});return baseSortBy(result,function(object,other){return compareMultiple(object,other,orders)})}function baseSum(collection,iteratee){var result=0;baseEach(collection,function(value,index,collection){result+=+iteratee(value,index,collection)||0});return result}function baseUniq(array,iteratee){var index=-1,indexOf=getIndexOf(),length=array.length,isCommon=indexOf==baseIndexOf,isLarge=isCommon&&length>=LARGE_ARRAY_SIZE,seen=isLarge?createCache():null,result=[];if(seen){indexOf=cacheIndexOf;isCommon=false}else{isLarge=false;seen=iteratee?[]:result}outer:while(++index<length){var value=array[index],computed=iteratee?iteratee(value,index,array):value;if(isCommon&&value===value){var seenIndex=seen.length;while(seenIndex--){if(seen[seenIndex]===computed){continue outer}}if(iteratee){seen.push(computed)}result.push(value)}else if(indexOf(seen,computed,0)<0){if(iteratee||isLarge){seen.push(computed)}result.push(value)}}return result}function baseValues(object,props){var index=-1,length=props.length,result=Array(length);while(++index<length){result[index]=object[props[index]]}return result}function baseWhile(array,predicate,isDrop,fromRight){var length=array.length,index=fromRight?length:-1;while((fromRight?index--:++index<length)&&predicate(array[index],index,array)){}return isDrop?baseSlice(array,fromRight?0:index,fromRight?index+1:length):baseSlice(array,fromRight?index+1:0,fromRight?length:index)}function baseWrapperValue(value,actions){var result=value;if(result instanceof LazyWrapper){result=result.value()}var index=-1,length=actions.length;while(++index<length){var action=actions[index];result=action.func.apply(action.thisArg,arrayPush([result],action.args))}return result}function binaryIndex(array,value,retHighest){var low=0,high=array?array.length:low;if(typeof value=="number"&&value===value&&high<=HALF_MAX_ARRAY_LENGTH){while(low<high){var mid=low+high>>>1,computed=array[mid];if((retHighest?computed<=value:computed<value)&&computed!==null){low=mid+1}else{high=mid}}return high}return binaryIndexBy(array,value,identity,retHighest)}function binaryIndexBy(array,value,iteratee,retHighest){value=iteratee(value);var low=0,high=array?array.length:0,valIsNaN=value!==value,valIsNull=value===null,valIsUndef=value===undefined;while(low<high){var mid=nativeFloor((low+high)/2),computed=iteratee(array[mid]),isDef=computed!==undefined,isReflexive=computed===computed;if(valIsNaN){var setLow=isReflexive||retHighest}else if(valIsNull){setLow=isReflexive&&isDef&&(retHighest||computed!=null)}else if(valIsUndef){setLow=isReflexive&&(retHighest||isDef)}else if(computed==null){setLow=false}else{setLow=retHighest?computed<=value:computed<value}if(setLow){low=mid+1}else{high=mid}}return nativeMin(high,MAX_ARRAY_INDEX)}function bindCallback(func,thisArg,argCount){if(typeof func!="function"){return identity}if(thisArg===undefined){return func}switch(argCount){case 1:return function(value){return func.call(thisArg,value)};case 3:return function(value,index,collection){return func.call(thisArg,value,index,collection)};case 4:return function(accumulator,value,index,collection){return func.call(thisArg,accumulator,value,index,collection)};case 5:return function(value,other,key,object,source){return func.call(thisArg,value,other,key,object,source)}}return function(){return func.apply(thisArg,arguments)}}function bufferClone(buffer){var result=new ArrayBuffer(buffer.byteLength),view=new Uint8Array(result);view.set(new Uint8Array(buffer));return result}function composeArgs(args,partials,holders){var holdersLength=holders.length,argsIndex=-1,argsLength=nativeMax(args.length-holdersLength,0),leftIndex=-1,leftLength=partials.length,result=Array(leftLength+argsLength);while(++leftIndex<leftLength){result[leftIndex]=partials[leftIndex]}while(++argsIndex<holdersLength){result[holders[argsIndex]]=args[argsIndex]}while(argsLength--){result[leftIndex++]=args[argsIndex++]}return result}function composeArgsRight(args,partials,holders){var holdersIndex=-1,holdersLength=holders.length,argsIndex=-1,argsLength=nativeMax(args.length-holdersLength,0),rightIndex=-1,rightLength=partials.length,result=Array(argsLength+rightLength);while(++argsIndex<argsLength){result[argsIndex]=args[argsIndex]}var offset=argsIndex;while(++rightIndex<rightLength){result[offset+rightIndex]=partials[rightIndex]}while(++holdersIndex<holdersLength){result[offset+holders[holdersIndex]]=args[argsIndex++]}return result}function createAggregator(setter,initializer){return function(collection,iteratee,thisArg){var result=initializer?initializer():{};iteratee=getCallback(iteratee,thisArg,3);if(isArray(collection)){var index=-1,length=collection.length;while(++index<length){var value=collection[index];setter(result,value,iteratee(value,index,collection),collection)}}else{baseEach(collection,function(value,key,collection){setter(result,value,iteratee(value,key,collection),collection)})}return result}}function createAssigner(assigner){return restParam(function(object,sources){var index=-1,length=object==null?0:sources.length,customizer=length>2?sources[length-2]:undefined,guard=length>2?sources[2]:undefined,thisArg=length>1?sources[length-1]:undefined;if(typeof customizer=="function"){customizer=bindCallback(customizer,thisArg,5);length-=2}else{customizer=typeof thisArg=="function"?thisArg:undefined;length-=customizer?1:0}if(guard&&isIterateeCall(sources[0],sources[1],guard)){customizer=length<3?undefined:customizer;length=1}while(++index<length){var source=sources[index];if(source){assigner(object,source,customizer)}}return object})}function createBaseEach(eachFunc,fromRight){return function(collection,iteratee){var length=collection?getLength(collection):0;if(!isLength(length)){return eachFunc(collection,iteratee)}var index=fromRight?length:-1,iterable=toObject(collection);while(fromRight?index--:++index<length){if(iteratee(iterable[index],index,iterable)===false){break}}return collection}}function createBaseFor(fromRight){return function(object,iteratee,keysFunc){var iterable=toObject(object),props=keysFunc(object),length=props.length,index=fromRight?length:-1;while(fromRight?index--:++index<length){var key=props[index];if(iteratee(iterable[key],key,iterable)===false){break}}return object}}function createBindWrapper(func,thisArg){var Ctor=createCtorWrapper(func);function wrapper(){var fn=this&&this!==root&&this instanceof wrapper?Ctor:func;return fn.apply(thisArg,arguments)}return wrapper}function createCache(values){return nativeCreate&&Set?new SetCache(values):null}function createCompounder(callback){return function(string){var index=-1,array=words(deburr(string)),length=array.length,result="";while(++index<length){result=callback(result,array[index],index)}return result}}function createCtorWrapper(Ctor){return function(){var args=arguments;switch(args.length){case 0:return new Ctor;case 1:return new Ctor(args[0]);case 2:return new Ctor(args[0],args[1]);case 3:return new Ctor(args[0],args[1],args[2]);case 4:return new Ctor(args[0],args[1],args[2],args[3]);case 5:return new Ctor(args[0],args[1],args[2],args[3],args[4]);case 6:return new Ctor(args[0],args[1],args[2],args[3],args[4],args[5]);case 7:return new Ctor(args[0],args[1],args[2],args[3],args[4],args[5],args[6])}var thisBinding=baseCreate(Ctor.prototype),result=Ctor.apply(thisBinding,args);return isObject(result)?result:thisBinding}}function createCurry(flag){function curryFunc(func,arity,guard){if(guard&&isIterateeCall(func,arity,guard)){arity=undefined}var result=createWrapper(func,flag,undefined,undefined,undefined,undefined,undefined,arity);result.placeholder=curryFunc.placeholder;return result}return curryFunc}function createDefaults(assigner,customizer){return restParam(function(args){var object=args[0];if(object==null){return object}args.push(customizer);return assigner.apply(undefined,args)})}function createExtremum(comparator,exValue){return function(collection,iteratee,thisArg){if(thisArg&&isIterateeCall(collection,iteratee,thisArg)){iteratee=undefined}iteratee=getCallback(iteratee,thisArg,3);if(iteratee.length==1){collection=isArray(collection)?collection:toIterable(collection);var result=arrayExtremum(collection,iteratee,comparator,exValue);if(!(collection.length&&result===exValue)){return result}}return baseExtremum(collection,iteratee,comparator,exValue)}}function createFind(eachFunc,fromRight){return function(collection,predicate,thisArg){predicate=getCallback(predicate,thisArg,3);if(isArray(collection)){var index=baseFindIndex(collection,predicate,fromRight);return index>-1?collection[index]:undefined}return baseFind(collection,predicate,eachFunc)}}function createFindIndex(fromRight){return function(array,predicate,thisArg){if(!(array&&array.length)){return-1}predicate=getCallback(predicate,thisArg,3);return baseFindIndex(array,predicate,fromRight)}}function createFindKey(objectFunc){return function(object,predicate,thisArg){predicate=getCallback(predicate,thisArg,3);return baseFind(object,predicate,objectFunc,true)}}function createFlow(fromRight){return function(){var wrapper,length=arguments.length,index=fromRight?length:-1,leftIndex=0,funcs=Array(length);while(fromRight?index--:++index<length){var func=funcs[leftIndex++]=arguments[index];if(typeof func!="function"){throw new TypeError(FUNC_ERROR_TEXT)}if(!wrapper&&LodashWrapper.prototype.thru&&getFuncName(func)=="wrapper"){wrapper=new LodashWrapper([],true)}}index=wrapper?-1:length;while(++index<length){func=funcs[index];var funcName=getFuncName(func),data=funcName=="wrapper"?getData(func):undefined;if(data&&isLaziable(data[0])&&data[1]==(ARY_FLAG|CURRY_FLAG|PARTIAL_FLAG|REARG_FLAG)&&!data[4].length&&data[9]==1){wrapper=wrapper[getFuncName(data[0])].apply(wrapper,data[3])}else{wrapper=func.length==1&&isLaziable(func)?wrapper[funcName]():wrapper.thru(func)}}return function(){var args=arguments,value=args[0];if(wrapper&&args.length==1&&isArray(value)&&value.length>=LARGE_ARRAY_SIZE){return wrapper.plant(value).value()}var index=0,result=length?funcs[index].apply(this,args):value;while(++index<length){result=funcs[index].call(this,result)}return result}}}function createForEach(arrayFunc,eachFunc){return function(collection,iteratee,thisArg){return typeof iteratee=="function"&&thisArg===undefined&&isArray(collection)?arrayFunc(collection,iteratee):eachFunc(collection,bindCallback(iteratee,thisArg,3))}}function createForIn(objectFunc){return function(object,iteratee,thisArg){if(typeof iteratee!="function"||thisArg!==undefined){iteratee=bindCallback(iteratee,thisArg,3)}return objectFunc(object,iteratee,keysIn)}}function createForOwn(objectFunc){return function(object,iteratee,thisArg){if(typeof iteratee!="function"||thisArg!==undefined){iteratee=bindCallback(iteratee,thisArg,3)}return objectFunc(object,iteratee)}}function createObjectMapper(isMapKeys){return function(object,iteratee,thisArg){var result={};iteratee=getCallback(iteratee,thisArg,3);baseForOwn(object,function(value,key,object){var mapped=iteratee(value,key,object);key=isMapKeys?mapped:key;value=isMapKeys?value:mapped;result[key]=value});return result}}function createPadDir(fromRight){return function(string,length,chars){string=baseToString(string);return(fromRight?string:"")+createPadding(string,length,chars)+(fromRight?"":string)}}function createPartial(flag){var partialFunc=restParam(function(func,partials){var holders=replaceHolders(partials,partialFunc.placeholder);return createWrapper(func,flag,undefined,partials,holders)});return partialFunc}function createReduce(arrayFunc,eachFunc){return function(collection,iteratee,accumulator,thisArg){var initFromArray=arguments.length<3;return typeof iteratee=="function"&&thisArg===undefined&&isArray(collection)?arrayFunc(collection,iteratee,accumulator,initFromArray):baseReduce(collection,getCallback(iteratee,thisArg,4),accumulator,initFromArray,eachFunc)}}function createHybridWrapper(func,bitmask,thisArg,partials,holders,partialsRight,holdersRight,argPos,ary,arity){var isAry=bitmask&ARY_FLAG,isBind=bitmask&BIND_FLAG,isBindKey=bitmask&BIND_KEY_FLAG,isCurry=bitmask&CURRY_FLAG,isCurryBound=bitmask&CURRY_BOUND_FLAG,isCurryRight=bitmask&CURRY_RIGHT_FLAG,Ctor=isBindKey?undefined:createCtorWrapper(func);function wrapper(){var length=arguments.length,index=length,args=Array(length);while(index--){args[index]=arguments[index]}if(partials){args=composeArgs(args,partials,holders)}if(partialsRight){args=composeArgsRight(args,partialsRight,holdersRight)}if(isCurry||isCurryRight){var placeholder=wrapper.placeholder,argsHolders=replaceHolders(args,placeholder);length-=argsHolders.length;if(length<arity){var newArgPos=argPos?arrayCopy(argPos):undefined,newArity=nativeMax(arity-length,0),newsHolders=isCurry?argsHolders:undefined,newHoldersRight=isCurry?undefined:argsHolders,newPartials=isCurry?args:undefined,newPartialsRight=isCurry?undefined:args;bitmask|=isCurry?PARTIAL_FLAG:PARTIAL_RIGHT_FLAG;bitmask&=~(isCurry?PARTIAL_RIGHT_FLAG:PARTIAL_FLAG);if(!isCurryBound){bitmask&=~(BIND_FLAG|BIND_KEY_FLAG)}var newData=[func,bitmask,thisArg,newPartials,newsHolders,newPartialsRight,newHoldersRight,newArgPos,ary,newArity],result=createHybridWrapper.apply(undefined,newData);if(isLaziable(func)){setData(result,newData)}result.placeholder=placeholder;return result}}var thisBinding=isBind?thisArg:this,fn=isBindKey?thisBinding[func]:func;if(argPos){args=reorder(args,argPos)}if(isAry&&ary<args.length){args.length=ary}if(this&&this!==root&&this instanceof wrapper){fn=Ctor||createCtorWrapper(func)}return fn.apply(thisBinding,args)}return wrapper}function createPadding(string,length,chars){var strLength=string.length;length=+length;if(strLength>=length||!nativeIsFinite(length)){return""}var padLength=length-strLength;chars=chars==null?" ":chars+"";return repeat(chars,nativeCeil(padLength/chars.length)).slice(0,padLength)}function createPartialWrapper(func,bitmask,thisArg,partials){var isBind=bitmask&BIND_FLAG,Ctor=createCtorWrapper(func);function wrapper(){var argsIndex=-1,argsLength=arguments.length,leftIndex=-1,leftLength=partials.length,args=Array(leftLength+argsLength);while(++leftIndex<leftLength){args[leftIndex]=partials[leftIndex]}while(argsLength--){args[leftIndex++]=arguments[++argsIndex]}var fn=this&&this!==root&&this instanceof wrapper?Ctor:func;return fn.apply(isBind?thisArg:this,args)}return wrapper}function createRound(methodName){var func=Math[methodName];return function(number,precision){precision=precision===undefined?0:+precision||0;if(precision){precision=pow(10,precision);return func(number*precision)/precision}return func(number)}}function createSortedIndex(retHighest){return function(array,value,iteratee,thisArg){var callback=getCallback(iteratee);return iteratee==null&&callback===baseCallback?binaryIndex(array,value,retHighest):binaryIndexBy(array,value,callback(iteratee,thisArg,1),retHighest)}}function createWrapper(func,bitmask,thisArg,partials,holders,argPos,ary,arity){var isBindKey=bitmask&BIND_KEY_FLAG;if(!isBindKey&&typeof func!="function"){throw new TypeError(FUNC_ERROR_TEXT)}var length=partials?partials.length:0;if(!length){bitmask&=~(PARTIAL_FLAG|PARTIAL_RIGHT_FLAG);partials=holders=undefined}length-=holders?holders.length:0;if(bitmask&PARTIAL_RIGHT_FLAG){var partialsRight=partials,holdersRight=holders;partials=holders=undefined}var data=isBindKey?undefined:getData(func),newData=[func,bitmask,thisArg,partials,holders,partialsRight,holdersRight,argPos,ary,arity];if(data){mergeData(newData,data);bitmask=newData[1];arity=newData[9]}newData[9]=arity==null?isBindKey?0:func.length:nativeMax(arity-length,0)||0;if(bitmask==BIND_FLAG){var result=createBindWrapper(newData[0],newData[2])}else if((bitmask==PARTIAL_FLAG||bitmask==(BIND_FLAG|PARTIAL_FLAG))&&!newData[4].length){result=createPartialWrapper.apply(undefined,newData)}else{result=createHybridWrapper.apply(undefined,newData)}var setter=data?baseSetData:setData;return setter(result,newData)}function equalArrays(array,other,equalFunc,customizer,isLoose,stackA,stackB){var index=-1,arrLength=array.length,othLength=other.length;if(arrLength!=othLength&&!(isLoose&&othLength>arrLength)){return false}while(++index<arrLength){var arrValue=array[index],othValue=other[index],result=customizer?customizer(isLoose?othValue:arrValue,isLoose?arrValue:othValue,index):undefined;if(result!==undefined){if(result){continue}return false}if(isLoose){if(!arraySome(other,function(othValue){return arrValue===othValue||equalFunc(arrValue,othValue,customizer,isLoose,stackA,stackB)})){return false}}else if(!(arrValue===othValue||equalFunc(arrValue,othValue,customizer,isLoose,stackA,stackB))){return false}}return true}function equalByTag(object,other,tag){switch(tag){case boolTag:case dateTag:return+object==+other;case errorTag:return object.name==other.name&&object.message==other.message;case numberTag:return object!=+object?other!=+other:object==+other;case regexpTag:case stringTag:return object==other+""}return false}function equalObjects(object,other,equalFunc,customizer,isLoose,stackA,stackB){var objProps=keys(object),objLength=objProps.length,othProps=keys(other),othLength=othProps.length;if(objLength!=othLength&&!isLoose){return false}var index=objLength;while(index--){var key=objProps[index];if(!(isLoose?key in other:hasOwnProperty.call(other,key))){return false}}var skipCtor=isLoose;while(++index<objLength){key=objProps[index];var objValue=object[key],othValue=other[key],result=customizer?customizer(isLoose?othValue:objValue,isLoose?objValue:othValue,key):undefined;if(!(result===undefined?equalFunc(objValue,othValue,customizer,isLoose,stackA,stackB):result)){return false}skipCtor||(skipCtor=key=="constructor")}if(!skipCtor){var objCtor=object.constructor,othCtor=other.constructor;if(objCtor!=othCtor&&("constructor"in object&&"constructor"in other)&&!(typeof objCtor=="function"&&objCtor instanceof objCtor&&typeof othCtor=="function"&&othCtor instanceof othCtor)){return false}}return true}function getCallback(func,thisArg,argCount){var result=lodash.callback||callback;result=result===callback?baseCallback:result;return argCount?result(func,thisArg,argCount):result}var getData=!metaMap?noop:function(func){return metaMap.get(func)};function getFuncName(func){var result=func.name,array=realNames[result],length=array?array.length:0;while(length--){var data=array[length],otherFunc=data.func;if(otherFunc==null||otherFunc==func){return data.name}}return result}function getIndexOf(collection,target,fromIndex){var result=lodash.indexOf||indexOf;result=result===indexOf?baseIndexOf:result;return collection?result(collection,target,fromIndex):result}var getLength=baseProperty("length");function getMatchData(object){var result=pairs(object),length=result.length;while(length--){result[length][2]=isStrictComparable(result[length][1])}return result}function getNative(object,key){var value=object==null?undefined:object[key];return isNative(value)?value:undefined}function getView(start,end,transforms){var index=-1,length=transforms.length;while(++index<length){var data=transforms[index],size=data.size;switch(data.type){case"drop":start+=size;break;case"dropRight":end-=size;break;case"take":end=nativeMin(end,start+size);break;case"takeRight":start=nativeMax(start,end-size);break}}return{start:start,end:end}}function initCloneArray(array){var length=array.length,result=new array.constructor(length);if(length&&typeof array[0]=="string"&&hasOwnProperty.call(array,"index")){result.index=array.index;result.input=array.input}return result}function initCloneObject(object){var Ctor=object.constructor;if(!(typeof Ctor=="function"&&Ctor instanceof Ctor)){Ctor=Object}return new Ctor}function initCloneByTag(object,tag,isDeep){var Ctor=object.constructor;switch(tag){case arrayBufferTag:return bufferClone(object);case boolTag:case dateTag:return new Ctor(+object);case float32Tag:case float64Tag:case int8Tag:case int16Tag:case int32Tag:case uint8Tag:case uint8ClampedTag:case uint16Tag:case uint32Tag:var buffer=object.buffer;return new Ctor(isDeep?bufferClone(buffer):buffer,object.byteOffset,object.length);case numberTag:case stringTag:return new Ctor(object);case regexpTag:var result=new Ctor(object.source,reFlags.exec(object));result.lastIndex=object.lastIndex}return result}function invokePath(object,path,args){if(object!=null&&!isKey(path,object)){path=toPath(path);object=path.length==1?object:baseGet(object,baseSlice(path,0,-1));path=last(path)}var func=object==null?object:object[path];return func==null?undefined:func.apply(object,args)}function isArrayLike(value){return value!=null&&isLength(getLength(value))}function isIndex(value,length){value=typeof value=="number"||reIsUint.test(value)?+value:-1;length=length==null?MAX_SAFE_INTEGER:length;return value>-1&&value%1==0&&value<length}function isIterateeCall(value,index,object){if(!isObject(object)){return false}var type=typeof index;if(type=="number"?isArrayLike(object)&&isIndex(index,object.length):type=="string"&&index in object){var other=object[index];return value===value?value===other:other!==other}return false}function isKey(value,object){var type=typeof value;if(type=="string"&&reIsPlainProp.test(value)||type=="number"){return true}if(isArray(value)){return false}var result=!reIsDeepProp.test(value);return result||object!=null&&value in toObject(object)}function isLaziable(func){var funcName=getFuncName(func);if(!(funcName in LazyWrapper.prototype)){return false}var other=lodash[funcName];if(func===other){return true}var data=getData(other);return!!data&&func===data[0]}function isLength(value){return typeof value=="number"&&value>-1&&value%1==0&&value<=MAX_SAFE_INTEGER}function isStrictComparable(value){return value===value&&!isObject(value)}function mergeData(data,source){var bitmask=data[1],srcBitmask=source[1],newBitmask=bitmask|srcBitmask,isCommon=newBitmask<ARY_FLAG;var isCombo=srcBitmask==ARY_FLAG&&bitmask==CURRY_FLAG||srcBitmask==ARY_FLAG&&bitmask==REARG_FLAG&&data[7].length<=source[8]||srcBitmask==(ARY_FLAG|REARG_FLAG)&&bitmask==CURRY_FLAG;if(!(isCommon||isCombo)){return data}if(srcBitmask&BIND_FLAG){data[2]=source[2];newBitmask|=bitmask&BIND_FLAG?0:CURRY_BOUND_FLAG}var value=source[3];if(value){var partials=data[3];data[3]=partials?composeArgs(partials,value,source[4]):arrayCopy(value);data[4]=partials?replaceHolders(data[3],PLACEHOLDER):arrayCopy(source[4])}value=source[5];if(value){partials=data[5];data[5]=partials?composeArgsRight(partials,value,source[6]):arrayCopy(value);data[6]=partials?replaceHolders(data[5],PLACEHOLDER):arrayCopy(source[6])}value=source[7];if(value){data[7]=arrayCopy(value)}if(srcBitmask&ARY_FLAG){data[8]=data[8]==null?source[8]:nativeMin(data[8],source[8])}if(data[9]==null){data[9]=source[9]}data[0]=source[0];data[1]=newBitmask;return data}function mergeDefaults(objectValue,sourceValue){return objectValue===undefined?sourceValue:merge(objectValue,sourceValue,mergeDefaults)}function pickByArray(object,props){object=toObject(object);var index=-1,length=props.length,result={};while(++index<length){var key=props[index];if(key in object){result[key]=object[key]}}return result}function pickByCallback(object,predicate){var result={};baseForIn(object,function(value,key,object){if(predicate(value,key,object)){result[key]=value}});return result}function reorder(array,indexes){var arrLength=array.length,length=nativeMin(indexes.length,arrLength),oldArray=arrayCopy(array);while(length--){var index=indexes[length];array[length]=isIndex(index,arrLength)?oldArray[index]:undefined}return array}var setData=function(){var count=0,lastCalled=0;return function(key,value){var stamp=now(),remaining=HOT_SPAN-(stamp-lastCalled);lastCalled=stamp;if(remaining>0){if(++count>=HOT_COUNT){return key}}else{count=0}return baseSetData(key,value)}}();function shimKeys(object){var props=keysIn(object),propsLength=props.length,length=propsLength&&object.length;var allowIndexes=!!length&&isLength(length)&&(isArray(object)||isArguments(object));var index=-1,result=[];while(++index<propsLength){var key=props[index];if(allowIndexes&&isIndex(key,length)||hasOwnProperty.call(object,key)){result.push(key)}}return result}function toIterable(value){if(value==null){return[]}if(!isArrayLike(value)){return values(value)}return isObject(value)?value:Object(value)}function toObject(value){return isObject(value)?value:Object(value)}function toPath(value){if(isArray(value)){return value}var result=[];baseToString(value).replace(rePropName,function(match,number,quote,string){result.push(quote?string.replace(reEscapeChar,"$1"):number||match)});return result}function wrapperClone(wrapper){return wrapper instanceof LazyWrapper?wrapper.clone():new LodashWrapper(wrapper.__wrapped__,wrapper.__chain__,arrayCopy(wrapper.__actions__))}function chunk(array,size,guard){if(guard?isIterateeCall(array,size,guard):size==null){size=1}else{size=nativeMax(nativeFloor(size)||1,1)}var index=0,length=array?array.length:0,resIndex=-1,result=Array(nativeCeil(length/size));while(index<length){result[++resIndex]=baseSlice(array,index,index+=size)}return result}function compact(array){var index=-1,length=array?array.length:0,resIndex=-1,result=[];while(++index<length){var value=array[index];if(value){result[++resIndex]=value}}return result}var difference=restParam(function(array,values){return isObjectLike(array)&&isArrayLike(array)?baseDifference(array,baseFlatten(values,false,true)):[]});function drop(array,n,guard){var length=array?array.length:0;if(!length){return[]}if(guard?isIterateeCall(array,n,guard):n==null){n=1}return baseSlice(array,n<0?0:n)}function dropRight(array,n,guard){var length=array?array.length:0;if(!length){return[]}if(guard?isIterateeCall(array,n,guard):n==null){n=1}n=length-(+n||0);return baseSlice(array,0,n<0?0:n)}function dropRightWhile(array,predicate,thisArg){return array&&array.length?baseWhile(array,getCallback(predicate,thisArg,3),true,true):[]}function dropWhile(array,predicate,thisArg){return array&&array.length?baseWhile(array,getCallback(predicate,thisArg,3),true):[]}function fill(array,value,start,end){var length=array?array.length:0;if(!length){return[]}if(start&&typeof start!="number"&&isIterateeCall(array,value,start)){start=0;end=length}return baseFill(array,value,start,end)}var findIndex=createFindIndex();var findLastIndex=createFindIndex(true);function first(array){return array?array[0]:undefined}function flatten(array,isDeep,guard){var length=array?array.length:0;if(guard&&isIterateeCall(array,isDeep,guard)){isDeep=false}return length?baseFlatten(array,isDeep):[]}function flattenDeep(array){var length=array?array.length:0;return length?baseFlatten(array,true):[]}function indexOf(array,value,fromIndex){var length=array?array.length:0;if(!length){return-1}if(typeof fromIndex=="number"){fromIndex=fromIndex<0?nativeMax(length+fromIndex,0):fromIndex}else if(fromIndex){var index=binaryIndex(array,value);if(index<length&&(value===value?value===array[index]:array[index]!==array[index])){return index}return-1}return baseIndexOf(array,value,fromIndex||0)}function initial(array){return dropRight(array,1)}var intersection=restParam(function(arrays){var othLength=arrays.length,othIndex=othLength,caches=Array(length),indexOf=getIndexOf(),isCommon=indexOf==baseIndexOf,result=[];while(othIndex--){var value=arrays[othIndex]=isArrayLike(value=arrays[othIndex])?value:[];caches[othIndex]=isCommon&&value.length>=120?createCache(othIndex&&value):null}var array=arrays[0],index=-1,length=array?array.length:0,seen=caches[0];outer:while(++index<length){value=array[index];if((seen?cacheIndexOf(seen,value):indexOf(result,value,0))<0){var othIndex=othLength;while(--othIndex){var cache=caches[othIndex];if((cache?cacheIndexOf(cache,value):indexOf(arrays[othIndex],value,0))<0){continue outer}}if(seen){seen.push(value)}result.push(value)}}return result});function last(array){var length=array?array.length:0;return length?array[length-1]:undefined}function lastIndexOf(array,value,fromIndex){var length=array?array.length:0;if(!length){return-1}var index=length;if(typeof fromIndex=="number"){index=(fromIndex<0?nativeMax(length+fromIndex,0):nativeMin(fromIndex||0,length-1))+1}else if(fromIndex){index=binaryIndex(array,value,true)-1;var other=array[index];if(value===value?value===other:other!==other){return index}return-1}if(value!==value){return indexOfNaN(array,index,true)}while(index--){if(array[index]===value){return index}}return-1}function pull(){var args=arguments,array=args[0];if(!(array&&array.length)){return array}var index=0,indexOf=getIndexOf(),length=args.length;while(++index<length){var fromIndex=0,value=args[index];while((fromIndex=indexOf(array,value,fromIndex))>-1){splice.call(array,fromIndex,1)}}return array}var pullAt=restParam(function(array,indexes){indexes=baseFlatten(indexes);var result=baseAt(array,indexes);basePullAt(array,indexes.sort(baseCompareAscending));return result});function remove(array,predicate,thisArg){var result=[];if(!(array&&array.length)){return result}var index=-1,indexes=[],length=array.length;predicate=getCallback(predicate,thisArg,3);while(++index<length){var value=array[index];if(predicate(value,index,array)){result.push(value);indexes.push(index)}}basePullAt(array,indexes);return result}function rest(array){return drop(array,1)}function slice(array,start,end){var length=array?array.length:0;if(!length){return[]}if(end&&typeof end!="number"&&isIterateeCall(array,start,end)){start=0;end=length}return baseSlice(array,start,end)}var sortedIndex=createSortedIndex();var sortedLastIndex=createSortedIndex(true);function take(array,n,guard){var length=array?array.length:0;if(!length){return[]}if(guard?isIterateeCall(array,n,guard):n==null){n=1}return baseSlice(array,0,n<0?0:n)}function takeRight(array,n,guard){var length=array?array.length:0;if(!length){return[]}if(guard?isIterateeCall(array,n,guard):n==null){n=1}n=length-(+n||0);return baseSlice(array,n<0?0:n)}function takeRightWhile(array,predicate,thisArg){return array&&array.length?baseWhile(array,getCallback(predicate,thisArg,3),false,true):[]}function takeWhile(array,predicate,thisArg){return array&&array.length?baseWhile(array,getCallback(predicate,thisArg,3)):[]}var union=restParam(function(arrays){return baseUniq(baseFlatten(arrays,false,true))});function uniq(array,isSorted,iteratee,thisArg){var length=array?array.length:0;if(!length){return[]}if(isSorted!=null&&typeof isSorted!="boolean"){thisArg=iteratee;iteratee=isIterateeCall(array,isSorted,thisArg)?undefined:isSorted;isSorted=false}var callback=getCallback();if(!(iteratee==null&&callback===baseCallback)){iteratee=callback(iteratee,thisArg,3)}return isSorted&&getIndexOf()==baseIndexOf?sortedUniq(array,iteratee):baseUniq(array,iteratee)}function unzip(array){if(!(array&&array.length)){return[]}var index=-1,length=0;array=arrayFilter(array,function(group){if(isArrayLike(group)){length=nativeMax(group.length,length);return true}});var result=Array(length);while(++index<length){result[index]=arrayMap(array,baseProperty(index))}return result}function unzipWith(array,iteratee,thisArg){var length=array?array.length:0;if(!length){return[]}var result=unzip(array);if(iteratee==null){return result}iteratee=bindCallback(iteratee,thisArg,4);return arrayMap(result,function(group){return arrayReduce(group,iteratee,undefined,true)})}var without=restParam(function(array,values){return isArrayLike(array)?baseDifference(array,values):[]});function xor(){var index=-1,length=arguments.length;while(++index<length){var array=arguments[index];if(isArrayLike(array)){var result=result?arrayPush(baseDifference(result,array),baseDifference(array,result)):array}}return result?baseUniq(result):[]}var zip=restParam(unzip);function zipObject(props,values){var index=-1,length=props?props.length:0,result={};if(length&&!values&&!isArray(props[0])){values=[]}while(++index<length){var key=props[index];if(values){result[key]=values[index]}else if(key){result[key[0]]=key[1]}}return result}var zipWith=restParam(function(arrays){var length=arrays.length,iteratee=length>2?arrays[length-2]:undefined,thisArg=length>1?arrays[length-1]:undefined;if(length>2&&typeof iteratee=="function"){length-=2}else{iteratee=length>1&&typeof thisArg=="function"?(--length,thisArg):undefined;thisArg=undefined}arrays.length=length;return unzipWith(arrays,iteratee,thisArg)});function chain(value){var result=lodash(value);result.__chain__=true;return result}function tap(value,interceptor,thisArg){interceptor.call(thisArg,value);return value}function thru(value,interceptor,thisArg){return interceptor.call(thisArg,value)}function wrapperChain(){return chain(this)}function wrapperCommit(){return new LodashWrapper(this.value(),this.__chain__)}var wrapperConcat=restParam(function(values){values=baseFlatten(values);return this.thru(function(array){return arrayConcat(isArray(array)?array:[toObject(array)],values)})});function wrapperPlant(value){var result,parent=this;while(parent instanceof baseLodash){var clone=wrapperClone(parent);if(result){previous.__wrapped__=clone}else{result=clone}var previous=clone;parent=parent.__wrapped__}previous.__wrapped__=value;return result}function wrapperReverse(){var value=this.__wrapped__;var interceptor=function(value){return wrapped&&wrapped.__dir__<0?value:value.reverse()};if(value instanceof LazyWrapper){var wrapped=value;if(this.__actions__.length){wrapped=new LazyWrapper(this)}wrapped=wrapped.reverse();wrapped.__actions__.push({func:thru,args:[interceptor],thisArg:undefined});return new LodashWrapper(wrapped,this.__chain__)}return this.thru(interceptor)}function wrapperToString(){return this.value()+""}function wrapperValue(){return baseWrapperValue(this.__wrapped__,this.__actions__)}var at=restParam(function(collection,props){return baseAt(collection,baseFlatten(props))});var countBy=createAggregator(function(result,value,key){hasOwnProperty.call(result,key)?++result[key]:result[key]=1});function every(collection,predicate,thisArg){var func=isArray(collection)?arrayEvery:baseEvery;if(thisArg&&isIterateeCall(collection,predicate,thisArg)){predicate=undefined}if(typeof predicate!="function"||thisArg!==undefined){predicate=getCallback(predicate,thisArg,3)}return func(collection,predicate)}function filter(collection,predicate,thisArg){var func=isArray(collection)?arrayFilter:baseFilter;predicate=getCallback(predicate,thisArg,3);return func(collection,predicate)}var find=createFind(baseEach);var findLast=createFind(baseEachRight,true);function findWhere(collection,source){return find(collection,baseMatches(source))}var forEach=createForEach(arrayEach,baseEach);var forEachRight=createForEach(arrayEachRight,baseEachRight);var groupBy=createAggregator(function(result,value,key){if(hasOwnProperty.call(result,key)){result[key].push(value)}else{result[key]=[value]}});function includes(collection,target,fromIndex,guard){var length=collection?getLength(collection):0;if(!isLength(length)){collection=values(collection);length=collection.length}if(typeof fromIndex!="number"||guard&&isIterateeCall(target,fromIndex,guard)){fromIndex=0}else{fromIndex=fromIndex<0?nativeMax(length+fromIndex,0):fromIndex||0}return typeof collection=="string"||!isArray(collection)&&isString(collection)?fromIndex<=length&&collection.indexOf(target,fromIndex)>-1:!!length&&getIndexOf(collection,target,fromIndex)>-1}var indexBy=createAggregator(function(result,value,key){result[key]=value});var invoke=restParam(function(collection,path,args){var index=-1,isFunc=typeof path=="function",isProp=isKey(path),result=isArrayLike(collection)?Array(collection.length):[];baseEach(collection,function(value){var func=isFunc?path:isProp&&value!=null?value[path]:undefined;result[++index]=func?func.apply(value,args):invokePath(value,path,args)});return result});function map(collection,iteratee,thisArg){var func=isArray(collection)?arrayMap:baseMap;iteratee=getCallback(iteratee,thisArg,3);return func(collection,iteratee)}var partition=createAggregator(function(result,value,key){result[key?0:1].push(value)},function(){return[[],[]]});function pluck(collection,path){return map(collection,property(path))}var reduce=createReduce(arrayReduce,baseEach);var reduceRight=createReduce(arrayReduceRight,baseEachRight);function reject(collection,predicate,thisArg){var func=isArray(collection)?arrayFilter:baseFilter;predicate=getCallback(predicate,thisArg,3);return func(collection,function(value,index,collection){return!predicate(value,index,collection)})}function sample(collection,n,guard){if(guard?isIterateeCall(collection,n,guard):n==null){collection=toIterable(collection);var length=collection.length;return length>0?collection[baseRandom(0,length-1)]:undefined}var index=-1,result=toArray(collection),length=result.length,lastIndex=length-1;n=nativeMin(n<0?0:+n||0,length);while(++index<n){var rand=baseRandom(index,lastIndex),value=result[rand];result[rand]=result[index];result[index]=value}result.length=n;return result}function shuffle(collection){return sample(collection,POSITIVE_INFINITY)}function size(collection){var length=collection?getLength(collection):0;return isLength(length)?length:keys(collection).length}function some(collection,predicate,thisArg){var func=isArray(collection)?arraySome:baseSome;if(thisArg&&isIterateeCall(collection,predicate,thisArg)){predicate=undefined}if(typeof predicate!="function"||thisArg!==undefined){predicate=getCallback(predicate,thisArg,3)}return func(collection,predicate)}function sortBy(collection,iteratee,thisArg){if(collection==null){return[]}if(thisArg&&isIterateeCall(collection,iteratee,thisArg)){iteratee=undefined}var index=-1;iteratee=getCallback(iteratee,thisArg,3);var result=baseMap(collection,function(value,key,collection){return{criteria:iteratee(value,key,collection),index:++index,value:value}});return baseSortBy(result,compareAscending)}var sortByAll=restParam(function(collection,iteratees){if(collection==null){return[]}var guard=iteratees[2];if(guard&&isIterateeCall(iteratees[0],iteratees[1],guard)){iteratees.length=1}return baseSortByOrder(collection,baseFlatten(iteratees),[])});function sortByOrder(collection,iteratees,orders,guard){if(collection==null){return[]}if(guard&&isIterateeCall(iteratees,orders,guard)){orders=undefined}if(!isArray(iteratees)){iteratees=iteratees==null?[]:[iteratees]}if(!isArray(orders)){orders=orders==null?[]:[orders]}return baseSortByOrder(collection,iteratees,orders)}function where(collection,source){return filter(collection,baseMatches(source))}var now=nativeNow||function(){return(new Date).getTime()};function after(n,func){if(typeof func!="function"){if(typeof n=="function"){var temp=n;n=func;func=temp}else{throw new TypeError(FUNC_ERROR_TEXT)}}n=nativeIsFinite(n=+n)?n:0;return function(){if(--n<1){return func.apply(this,arguments)}}}function ary(func,n,guard){if(guard&&isIterateeCall(func,n,guard)){n=undefined}n=func&&n==null?func.length:nativeMax(+n||0,0);return createWrapper(func,ARY_FLAG,undefined,undefined,undefined,undefined,n)}function before(n,func){var result;if(typeof func!="function"){if(typeof n=="function"){var temp=n;n=func;func=temp}else{throw new TypeError(FUNC_ERROR_TEXT)}}return function(){if(--n>0){result=func.apply(this,arguments)}if(n<=1){func=undefined}return result}}var bind=restParam(function(func,thisArg,partials){var bitmask=BIND_FLAG;if(partials.length){var holders=replaceHolders(partials,bind.placeholder);bitmask|=PARTIAL_FLAG}return createWrapper(func,bitmask,thisArg,partials,holders)});var bindAll=restParam(function(object,methodNames){methodNames=methodNames.length?baseFlatten(methodNames):functions(object);var index=-1,length=methodNames.length;while(++index<length){var key=methodNames[index];object[key]=createWrapper(object[key],BIND_FLAG,object)}return object});var bindKey=restParam(function(object,key,partials){var bitmask=BIND_FLAG|BIND_KEY_FLAG;if(partials.length){var holders=replaceHolders(partials,bindKey.placeholder);bitmask|=PARTIAL_FLAG}return createWrapper(key,bitmask,object,partials,holders)});var curry=createCurry(CURRY_FLAG);var curryRight=createCurry(CURRY_RIGHT_FLAG);function debounce(func,wait,options){var args,maxTimeoutId,result,stamp,thisArg,timeoutId,trailingCall,lastCalled=0,maxWait=false,trailing=true;if(typeof func!="function"){throw new TypeError(FUNC_ERROR_TEXT)}wait=wait<0?0:+wait||0;if(options===true){var leading=true;trailing=false}else if(isObject(options)){leading=!!options.leading;maxWait="maxWait"in options&&nativeMax(+options.maxWait||0,wait);trailing="trailing"in options?!!options.trailing:trailing}function cancel(){if(timeoutId){clearTimeout(timeoutId)}if(maxTimeoutId){clearTimeout(maxTimeoutId)}lastCalled=0;maxTimeoutId=timeoutId=trailingCall=undefined}function complete(isCalled,id){if(id){clearTimeout(id)}maxTimeoutId=timeoutId=trailingCall=undefined;if(isCalled){lastCalled=now();result=func.apply(thisArg,args);if(!timeoutId&&!maxTimeoutId){args=thisArg=undefined}}}function delayed(){var remaining=wait-(now()-stamp);if(remaining<=0||remaining>wait){complete(trailingCall,maxTimeoutId)}else{timeoutId=setTimeout(delayed,remaining)}}function maxDelayed(){complete(trailing,timeoutId)}function debounced(){args=arguments;stamp=now();thisArg=this;trailingCall=trailing&&(timeoutId||!leading);if(maxWait===false){var leadingCall=leading&&!timeoutId}else{if(!maxTimeoutId&&!leading){lastCalled=stamp}var remaining=maxWait-(stamp-lastCalled),isCalled=remaining<=0||remaining>maxWait;if(isCalled){if(maxTimeoutId){maxTimeoutId=clearTimeout(maxTimeoutId)}lastCalled=stamp;result=func.apply(thisArg,args)}else if(!maxTimeoutId){maxTimeoutId=setTimeout(maxDelayed,remaining)}}if(isCalled&&timeoutId){timeoutId=clearTimeout(timeoutId)}else if(!timeoutId&&wait!==maxWait){timeoutId=setTimeout(delayed,wait)}if(leadingCall){isCalled=true;result=func.apply(thisArg,args)}if(isCalled&&!timeoutId&&!maxTimeoutId){args=thisArg=undefined}return result}debounced.cancel=cancel;return debounced}var defer=restParam(function(func,args){return baseDelay(func,1,args)});var delay=restParam(function(func,wait,args){return baseDelay(func,wait,args)});var flow=createFlow();var flowRight=createFlow(true);function memoize(func,resolver){if(typeof func!="function"||resolver&&typeof resolver!="function"){throw new TypeError(FUNC_ERROR_TEXT)}var memoized=function(){var args=arguments,key=resolver?resolver.apply(this,args):args[0],cache=memoized.cache;if(cache.has(key)){return cache.get(key)}var result=func.apply(this,args);memoized.cache=cache.set(key,result);return result};memoized.cache=new memoize.Cache;return memoized}var modArgs=restParam(function(func,transforms){transforms=baseFlatten(transforms);if(typeof func!="function"||!arrayEvery(transforms,baseIsFunction)){throw new TypeError(FUNC_ERROR_TEXT)}var length=transforms.length;return restParam(function(args){var index=nativeMin(args.length,length);while(index--){args[index]=transforms[index](args[index])}return func.apply(this,args)})});function negate(predicate){if(typeof predicate!="function"){throw new TypeError(FUNC_ERROR_TEXT)}return function(){return!predicate.apply(this,arguments)}}function once(func){return before(2,func)}var partial=createPartial(PARTIAL_FLAG);var partialRight=createPartial(PARTIAL_RIGHT_FLAG);var rearg=restParam(function(func,indexes){return createWrapper(func,REARG_FLAG,undefined,undefined,undefined,baseFlatten(indexes))});function restParam(func,start){if(typeof func!="function"){throw new TypeError(FUNC_ERROR_TEXT)}start=nativeMax(start===undefined?func.length-1:+start||0,0);return function(){var args=arguments,index=-1,length=nativeMax(args.length-start,0),rest=Array(length);while(++index<length){rest[index]=args[start+index]}switch(start){case 0:return func.call(this,rest);case 1:return func.call(this,args[0],rest);case 2:return func.call(this,args[0],args[1],rest)}var otherArgs=Array(start+1);index=-1;while(++index<start){otherArgs[index]=args[index]}otherArgs[start]=rest;return func.apply(this,otherArgs)}}function spread(func){if(typeof func!="function"){throw new TypeError(FUNC_ERROR_TEXT)}return function(array){return func.apply(this,array)}}function throttle(func,wait,options){var leading=true,trailing=true;if(typeof func!="function"){throw new TypeError(FUNC_ERROR_TEXT)}if(options===false){leading=false}else if(isObject(options)){leading="leading"in options?!!options.leading:leading;trailing="trailing"in options?!!options.trailing:trailing}return debounce(func,wait,{leading:leading,maxWait:+wait,trailing:trailing})}function wrap(value,wrapper){wrapper=wrapper==null?identity:wrapper;return createWrapper(wrapper,PARTIAL_FLAG,undefined,[value],[])}function clone(value,isDeep,customizer,thisArg){if(isDeep&&typeof isDeep!="boolean"&&isIterateeCall(value,isDeep,customizer)){isDeep=false}else if(typeof isDeep=="function"){thisArg=customizer;customizer=isDeep;isDeep=false}return typeof customizer=="function"?baseClone(value,isDeep,bindCallback(customizer,thisArg,1)):baseClone(value,isDeep)}function cloneDeep(value,customizer,thisArg){return typeof customizer=="function"?baseClone(value,true,bindCallback(customizer,thisArg,1)):baseClone(value,true)}function gt(value,other){return value>other}function gte(value,other){return value>=other}function isArguments(value){return isObjectLike(value)&&isArrayLike(value)&&hasOwnProperty.call(value,"callee")&&!propertyIsEnumerable.call(value,"callee")}var isArray=nativeIsArray||function(value){return isObjectLike(value)&&isLength(value.length)&&objToString.call(value)==arrayTag};function isBoolean(value){return value===true||value===false||isObjectLike(value)&&objToString.call(value)==boolTag}function isDate(value){return isObjectLike(value)&&objToString.call(value)==dateTag}function isElement(value){return!!value&&value.nodeType===1&&isObjectLike(value)&&!isPlainObject(value)}function isEmpty(value){if(value==null){return true}if(isArrayLike(value)&&(isArray(value)||isString(value)||isArguments(value)||isObjectLike(value)&&isFunction(value.splice))){return!value.length}return!keys(value).length}function isEqual(value,other,customizer,thisArg){customizer=typeof customizer=="function"?bindCallback(customizer,thisArg,3):undefined;var result=customizer?customizer(value,other):undefined;return result===undefined?baseIsEqual(value,other,customizer):!!result}function isError(value){return isObjectLike(value)&&typeof value.message=="string"&&objToString.call(value)==errorTag}function isFinite(value){return typeof value=="number"&&nativeIsFinite(value)}function isFunction(value){return isObject(value)&&objToString.call(value)==funcTag}function isObject(value){var type=typeof value;return!!value&&(type=="object"||type=="function")}function isMatch(object,source,customizer,thisArg){customizer=typeof customizer=="function"?bindCallback(customizer,thisArg,3):undefined;return baseIsMatch(object,getMatchData(source),customizer)}function isNaN(value){return isNumber(value)&&value!=+value}function isNative(value){if(value==null){return false}if(isFunction(value)){return reIsNative.test(fnToString.call(value))}return isObjectLike(value)&&reIsHostCtor.test(value)}function isNull(value){return value===null}function isNumber(value){return typeof value=="number"||isObjectLike(value)&&objToString.call(value)==numberTag}function isPlainObject(value){var Ctor;if(!(isObjectLike(value)&&objToString.call(value)==objectTag&&!isArguments(value))||!hasOwnProperty.call(value,"constructor")&&(Ctor=value.constructor,typeof Ctor=="function"&&!(Ctor instanceof Ctor))){return false}var result;baseForIn(value,function(subValue,key){result=key});return result===undefined||hasOwnProperty.call(value,result)}function isRegExp(value){return isObject(value)&&objToString.call(value)==regexpTag}function isString(value){return typeof value=="string"||isObjectLike(value)&&objToString.call(value)==stringTag}function isTypedArray(value){return isObjectLike(value)&&isLength(value.length)&&!!typedArrayTags[objToString.call(value)]}function isUndefined(value){return value===undefined}function lt(value,other){return value<other}function lte(value,other){return value<=other}function toArray(value){var length=value?getLength(value):0;if(!isLength(length)){return values(value)}if(!length){return[]}return arrayCopy(value)}function toPlainObject(value){return baseCopy(value,keysIn(value))}var merge=createAssigner(baseMerge);var assign=createAssigner(function(object,source,customizer){return customizer?assignWith(object,source,customizer):baseAssign(object,source)});function create(prototype,properties,guard){var result=baseCreate(prototype);if(guard&&isIterateeCall(prototype,properties,guard)){properties=undefined}return properties?baseAssign(result,properties):result}var defaults=createDefaults(assign,assignDefaults);var defaultsDeep=createDefaults(merge,mergeDefaults);var findKey=createFindKey(baseForOwn);var findLastKey=createFindKey(baseForOwnRight);var forIn=createForIn(baseFor);var forInRight=createForIn(baseForRight);var forOwn=createForOwn(baseForOwn);var forOwnRight=createForOwn(baseForOwnRight);function functions(object){return baseFunctions(object,keysIn(object))}function get(object,path,defaultValue){var result=object==null?undefined:baseGet(object,toPath(path),path+"");return result===undefined?defaultValue:result}function has(object,path){if(object==null){return false}var result=hasOwnProperty.call(object,path);if(!result&&!isKey(path)){path=toPath(path);object=path.length==1?object:baseGet(object,baseSlice(path,0,-1));if(object==null){return false}path=last(path);result=hasOwnProperty.call(object,path)}return result||isLength(object.length)&&isIndex(path,object.length)&&(isArray(object)||isArguments(object))}function invert(object,multiValue,guard){if(guard&&isIterateeCall(object,multiValue,guard)){multiValue=undefined}var index=-1,props=keys(object),length=props.length,result={};while(++index<length){var key=props[index],value=object[key];if(multiValue){if(hasOwnProperty.call(result,value)){result[value].push(key)}else{result[value]=[key]}}else{result[value]=key}}return result}var keys=!nativeKeys?shimKeys:function(object){var Ctor=object==null?undefined:object.constructor;if(typeof Ctor=="function"&&Ctor.prototype===object||typeof object!="function"&&isArrayLike(object)){return shimKeys(object)}return isObject(object)?nativeKeys(object):[]};function keysIn(object){if(object==null){return[]}if(!isObject(object)){object=Object(object)}var length=object.length;length=length&&isLength(length)&&(isArray(object)||isArguments(object))&&length||0;var Ctor=object.constructor,index=-1,isProto=typeof Ctor=="function"&&Ctor.prototype===object,result=Array(length),skipIndexes=length>0;while(++index<length){result[index]=index+""}for(var key in object){if(!(skipIndexes&&isIndex(key,length))&&!(key=="constructor"&&(isProto||!hasOwnProperty.call(object,key)))){result.push(key)}}return result}var mapKeys=createObjectMapper(true);var mapValues=createObjectMapper();var omit=restParam(function(object,props){if(object==null){return{}}if(typeof props[0]!="function"){var props=arrayMap(baseFlatten(props),String);return pickByArray(object,baseDifference(keysIn(object),props))}var predicate=bindCallback(props[0],props[1],3);return pickByCallback(object,function(value,key,object){return!predicate(value,key,object)})});function pairs(object){object=toObject(object);var index=-1,props=keys(object),length=props.length,result=Array(length);while(++index<length){var key=props[index];result[index]=[key,object[key]]}return result}var pick=restParam(function(object,props){if(object==null){return{}}return typeof props[0]=="function"?pickByCallback(object,bindCallback(props[0],props[1],3)):pickByArray(object,baseFlatten(props))});function result(object,path,defaultValue){var result=object==null?undefined:object[path];if(result===undefined){if(object!=null&&!isKey(path,object)){path=toPath(path);object=path.length==1?object:baseGet(object,baseSlice(path,0,-1));result=object==null?undefined:object[last(path)]}result=result===undefined?defaultValue:result}return isFunction(result)?result.call(object):result}function set(object,path,value){if(object==null){return object}var pathKey=path+"";path=object[pathKey]!=null||isKey(path,object)?[pathKey]:toPath(path);var index=-1,length=path.length,lastIndex=length-1,nested=object;while(nested!=null&&++index<length){var key=path[index];if(isObject(nested)){if(index==lastIndex){nested[key]=value}else if(nested[key]==null){nested[key]=isIndex(path[index+1])?[]:{}}}nested=nested[key]}return object}function transform(object,iteratee,accumulator,thisArg){var isArr=isArray(object)||isTypedArray(object);iteratee=getCallback(iteratee,thisArg,4);if(accumulator==null){if(isArr||isObject(object)){var Ctor=object.constructor;if(isArr){accumulator=isArray(object)?new Ctor:[]}else{accumulator=baseCreate(isFunction(Ctor)?Ctor.prototype:undefined)}}else{accumulator={}}}(isArr?arrayEach:baseForOwn)(object,function(value,index,object){return iteratee(accumulator,value,index,object)});return accumulator}function values(object){return baseValues(object,keys(object))}function valuesIn(object){return baseValues(object,keysIn(object))}function inRange(value,start,end){start=+start||0;if(end===undefined){end=start;start=0}else{end=+end||0}return value>=nativeMin(start,end)&&value<nativeMax(start,end)}function random(min,max,floating){if(floating&&isIterateeCall(min,max,floating)){max=floating=undefined}var noMin=min==null,noMax=max==null;if(floating==null){if(noMax&&typeof min=="boolean"){floating=min;min=1}else if(typeof max=="boolean"){floating=max;noMax=true}}if(noMin&&noMax){max=1;noMax=false}min=+min||0;if(noMax){max=min;min=0}else{max=+max||0}if(floating||min%1||max%1){var rand=nativeRandom();return nativeMin(min+rand*(max-min+parseFloat("1e-"+((rand+"").length-1))),max)}return baseRandom(min,max)}var camelCase=createCompounder(function(result,word,index){word=word.toLowerCase();return result+(index?word.charAt(0).toUpperCase()+word.slice(1):word)});function capitalize(string){string=baseToString(string);return string&&string.charAt(0).toUpperCase()+string.slice(1)}function deburr(string){string=baseToString(string);return string&&string.replace(reLatin1,deburrLetter).replace(reComboMark,"")}function endsWith(string,target,position){string=baseToString(string);target=target+"";var length=string.length;position=position===undefined?length:nativeMin(position<0?0:+position||0,length);position-=target.length;return position>=0&&string.indexOf(target,position)==position}function escape(string){string=baseToString(string);return string&&reHasUnescapedHtml.test(string)?string.replace(reUnescapedHtml,escapeHtmlChar):string}function escapeRegExp(string){string=baseToString(string);return string&&reHasRegExpChars.test(string)?string.replace(reRegExpChars,escapeRegExpChar):string||"(?:)"}var kebabCase=createCompounder(function(result,word,index){return result+(index?"-":"")+word.toLowerCase()});function pad(string,length,chars){string=baseToString(string);length=+length;var strLength=string.length;if(strLength>=length||!nativeIsFinite(length)){return string}var mid=(length-strLength)/2,leftLength=nativeFloor(mid),rightLength=nativeCeil(mid);chars=createPadding("",rightLength,chars);return chars.slice(0,leftLength)+string+chars}var padLeft=createPadDir();var padRight=createPadDir(true);function parseInt(string,radix,guard){if(guard?isIterateeCall(string,radix,guard):radix==null){radix=0}else if(radix){radix=+radix}string=trim(string);return nativeParseInt(string,radix||(reHasHexPrefix.test(string)?16:10))}function repeat(string,n){var result="";string=baseToString(string);n=+n;if(n<1||!string||!nativeIsFinite(n)){return result}do{if(n%2){result+=string}n=nativeFloor(n/2);string+=string}while(n);return result}var snakeCase=createCompounder(function(result,word,index){return result+(index?"_":"")+word.toLowerCase()});var startCase=createCompounder(function(result,word,index){return result+(index?" ":"")+(word.charAt(0).toUpperCase()+word.slice(1))});function startsWith(string,target,position){string=baseToString(string);position=position==null?0:nativeMin(position<0?0:+position||0,string.length);return string.lastIndexOf(target,position)==position}function template(string,options,otherOptions){var settings=lodash.templateSettings;if(otherOptions&&isIterateeCall(string,options,otherOptions)){options=otherOptions=undefined}string=baseToString(string);options=assignWith(baseAssign({},otherOptions||options),settings,assignOwnDefaults);var imports=assignWith(baseAssign({},options.imports),settings.imports,assignOwnDefaults),importsKeys=keys(imports),importsValues=baseValues(imports,importsKeys);var isEscaping,isEvaluating,index=0,interpolate=options.interpolate||reNoMatch,source="__p += '";var reDelimiters=RegExp((options.escape||reNoMatch).source+"|"+interpolate.source+"|"+(interpolate===reInterpolate?reEsTemplate:reNoMatch).source+"|"+(options.evaluate||reNoMatch).source+"|$","g");var sourceURL="//# sourceURL="+("sourceURL"in options?options.sourceURL:"lodash.templateSources["+ ++templateCounter+"]")+"\n";string.replace(reDelimiters,function(match,escapeValue,interpolateValue,esTemplateValue,evaluateValue,offset){interpolateValue||(interpolateValue=esTemplateValue);source+=string.slice(index,offset).replace(reUnescapedString,escapeStringChar);if(escapeValue){isEscaping=true;source+="' +\n__e("+escapeValue+") +\n'"}if(evaluateValue){isEvaluating=true;source+="';\n"+evaluateValue+";\n__p += '"}if(interpolateValue){source+="' +\n((__t = ("+interpolateValue+")) == null ? '' : __t) +\n'"}index=offset+match.length;return match});source+="';\n";var variable=options.variable;if(!variable){source="with (obj) {\n"+source+"\n}\n"}source=(isEvaluating?source.replace(reEmptyStringLeading,""):source).replace(reEmptyStringMiddle,"$1").replace(reEmptyStringTrailing,"$1;");source="function("+(variable||"obj")+") {\n"+(variable?"":"obj || (obj = {});\n")+"var __t, __p = ''"+(isEscaping?", __e = _.escape":"")+(isEvaluating?", __j = Array.prototype.join;\n"+"function print() { __p += __j.call(arguments, '') }\n":";\n")+source+"return __p\n}";var result=attempt(function(){return Function(importsKeys,sourceURL+"return "+source).apply(undefined,importsValues)});result.source=source;if(isError(result)){throw result}return result}function trim(string,chars,guard){var value=string;string=baseToString(string);if(!string){return string}if(guard?isIterateeCall(value,chars,guard):chars==null){return string.slice(trimmedLeftIndex(string),trimmedRightIndex(string)+1)}chars=chars+"";return string.slice(charsLeftIndex(string,chars),charsRightIndex(string,chars)+1)}function trimLeft(string,chars,guard){var value=string;string=baseToString(string);if(!string){return string}if(guard?isIterateeCall(value,chars,guard):chars==null){return string.slice(trimmedLeftIndex(string))}return string.slice(charsLeftIndex(string,chars+""))}function trimRight(string,chars,guard){var value=string;string=baseToString(string);if(!string){return string}if(guard?isIterateeCall(value,chars,guard):chars==null){return string.slice(0,trimmedRightIndex(string)+1)}return string.slice(0,charsRightIndex(string,chars+"")+1)}function trunc(string,options,guard){if(guard&&isIterateeCall(string,options,guard)){options=undefined}var length=DEFAULT_TRUNC_LENGTH,omission=DEFAULT_TRUNC_OMISSION;if(options!=null){if(isObject(options)){var separator="separator"in options?options.separator:separator;length="length"in options?+options.length||0:length;omission="omission"in options?baseToString(options.omission):omission}else{length=+options||0}}string=baseToString(string);if(length>=string.length){return string}var end=length-omission.length;if(end<1){return omission}var result=string.slice(0,end);if(separator==null){return result+omission}if(isRegExp(separator)){if(string.slice(end).search(separator)){var match,newEnd,substring=string.slice(0,end);if(!separator.global){separator=RegExp(separator.source,(reFlags.exec(separator)||"")+"g")}separator.lastIndex=0;while(match=separator.exec(substring)){newEnd=match.index}result=result.slice(0,newEnd==null?end:newEnd)}}else if(string.indexOf(separator,end)!=end){var index=result.lastIndexOf(separator);if(index>-1){result=result.slice(0,index)}}return result+omission}function unescape(string){string=baseToString(string);return string&&reHasEscapedHtml.test(string)?string.replace(reEscapedHtml,unescapeHtmlChar):string}function words(string,pattern,guard){if(guard&&isIterateeCall(string,pattern,guard)){pattern=undefined}string=baseToString(string);return string.match(pattern||reWords)||[]}var attempt=restParam(function(func,args){try{return func.apply(undefined,args)}catch(e){return isError(e)?e:new Error(e)}});function callback(func,thisArg,guard){if(guard&&isIterateeCall(func,thisArg,guard)){thisArg=undefined}return isObjectLike(func)?matches(func):baseCallback(func,thisArg)}function constant(value){return function(){return value}}function identity(value){return value}function matches(source){return baseMatches(baseClone(source,true))}function matchesProperty(path,srcValue){return baseMatchesProperty(path,baseClone(srcValue,true))}var method=restParam(function(path,args){return function(object){return invokePath(object,path,args)}});var methodOf=restParam(function(object,args){return function(path){return invokePath(object,path,args)}});function mixin(object,source,options){if(options==null){var isObj=isObject(source),props=isObj?keys(source):undefined,methodNames=props&&props.length?baseFunctions(source,props):undefined;if(!(methodNames?methodNames.length:isObj)){methodNames=false;options=source;source=object;object=this}}if(!methodNames){methodNames=baseFunctions(source,keys(source))}var chain=true,index=-1,isFunc=isFunction(object),length=methodNames.length;if(options===false){chain=false}else if(isObject(options)&&"chain"in options){chain=options.chain}while(++index<length){var methodName=methodNames[index],func=source[methodName];object[methodName]=func;if(isFunc){object.prototype[methodName]=function(func){return function(){var chainAll=this.__chain__;if(chain||chainAll){var result=object(this.__wrapped__),actions=result.__actions__=arrayCopy(this.__actions__);actions.push({func:func,args:arguments,thisArg:object});result.__chain__=chainAll;return result}return func.apply(object,arrayPush([this.value()],arguments))}}(func)}}return object}function noConflict(){root._=oldDash;return this}function noop(){}function property(path){return isKey(path)?baseProperty(path):basePropertyDeep(path)}function propertyOf(object){return function(path){return baseGet(object,toPath(path),path+"")}}function range(start,end,step){if(step&&isIterateeCall(start,end,step)){end=step=undefined}start=+start||0;step=step==null?1:+step||0;if(end==null){end=start;start=0}else{end=+end||0}var index=-1,length=nativeMax(nativeCeil((end-start)/(step||1)),0),result=Array(length);while(++index<length){result[index]=start;start+=step}return result}function times(n,iteratee,thisArg){n=nativeFloor(n);if(n<1||!nativeIsFinite(n)){return[]}var index=-1,result=Array(nativeMin(n,MAX_ARRAY_LENGTH));iteratee=bindCallback(iteratee,thisArg,1);while(++index<n){if(index<MAX_ARRAY_LENGTH){result[index]=iteratee(index)}else{iteratee(index)}}return result}function uniqueId(prefix){var id=++idCounter;return baseToString(prefix)+id}function add(augend,addend){return(+augend||0)+(+addend||0)}var ceil=createRound("ceil");var floor=createRound("floor");var max=createExtremum(gt,NEGATIVE_INFINITY);var min=createExtremum(lt,POSITIVE_INFINITY);var round=createRound("round");function sum(collection,iteratee,thisArg){if(thisArg&&isIterateeCall(collection,iteratee,thisArg)){iteratee=undefined}iteratee=getCallback(iteratee,thisArg,3);return iteratee.length==1?arraySum(isArray(collection)?collection:toIterable(collection),iteratee):baseSum(collection,iteratee)}lodash.prototype=baseLodash.prototype;LodashWrapper.prototype=baseCreate(baseLodash.prototype);LodashWrapper.prototype.constructor=LodashWrapper;LazyWrapper.prototype=baseCreate(baseLodash.prototype);LazyWrapper.prototype.constructor=LazyWrapper;MapCache.prototype["delete"]=mapDelete;MapCache.prototype.get=mapGet;MapCache.prototype.has=mapHas;MapCache.prototype.set=mapSet;SetCache.prototype.push=cachePush;memoize.Cache=MapCache;lodash.after=after;lodash.ary=ary;lodash.assign=assign;lodash.at=at;lodash.before=before;lodash.bind=bind;lodash.bindAll=bindAll;lodash.bindKey=bindKey;lodash.callback=callback;lodash.chain=chain;lodash.chunk=chunk;lodash.compact=compact;lodash.constant=constant;lodash.countBy=countBy;lodash.create=create;lodash.curry=curry;lodash.curryRight=curryRight;lodash.debounce=debounce;lodash.defaults=defaults;lodash.defaultsDeep=defaultsDeep;lodash.defer=defer;lodash.delay=delay;lodash.difference=difference;lodash.drop=drop;lodash.dropRight=dropRight;lodash.dropRightWhile=dropRightWhile;lodash.dropWhile=dropWhile;lodash.fill=fill;lodash.filter=filter;lodash.flatten=flatten;lodash.flattenDeep=flattenDeep;lodash.flow=flow;lodash.flowRight=flowRight;lodash.forEach=forEach;lodash.forEachRight=forEachRight;lodash.forIn=forIn;lodash.forInRight=forInRight;lodash.forOwn=forOwn;lodash.forOwnRight=forOwnRight;lodash.functions=functions;lodash.groupBy=groupBy;lodash.indexBy=indexBy;lodash.initial=initial;lodash.intersection=intersection;lodash.invert=invert;lodash.invoke=invoke;lodash.keys=keys;lodash.keysIn=keysIn;lodash.map=map;lodash.mapKeys=mapKeys;lodash.mapValues=mapValues;lodash.matches=matches;lodash.matchesProperty=matchesProperty;lodash.memoize=memoize;lodash.merge=merge;lodash.method=method;lodash.methodOf=methodOf;lodash.mixin=mixin;lodash.modArgs=modArgs;lodash.negate=negate;lodash.omit=omit;lodash.once=once;lodash.pairs=pairs;lodash.partial=partial;lodash.partialRight=partialRight;lodash.partition=partition;lodash.pick=pick;lodash.pluck=pluck;lodash.property=property;lodash.propertyOf=propertyOf;lodash.pull=pull;lodash.pullAt=pullAt;lodash.range=range;lodash.rearg=rearg;lodash.reject=reject;lodash.remove=remove;lodash.rest=rest;lodash.restParam=restParam;lodash.set=set;lodash.shuffle=shuffle;lodash.slice=slice;lodash.sortBy=sortBy;lodash.sortByAll=sortByAll;lodash.sortByOrder=sortByOrder;lodash.spread=spread;lodash.take=take;lodash.takeRight=takeRight;lodash.takeRightWhile=takeRightWhile;lodash.takeWhile=takeWhile;lodash.tap=tap;lodash.throttle=throttle;lodash.thru=thru;lodash.times=times;lodash.toArray=toArray;lodash.toPlainObject=toPlainObject;lodash.transform=transform;lodash.union=union;lodash.uniq=uniq;lodash.unzip=unzip;lodash.unzipWith=unzipWith;lodash.values=values;lodash.valuesIn=valuesIn;lodash.where=where;lodash.without=without;lodash.wrap=wrap;lodash.xor=xor;lodash.zip=zip;lodash.zipObject=zipObject;lodash.zipWith=zipWith;lodash.backflow=flowRight;lodash.collect=map;lodash.compose=flowRight;lodash.each=forEach;lodash.eachRight=forEachRight;lodash.extend=assign;lodash.iteratee=callback;lodash.methods=functions;lodash.object=zipObject;lodash.select=filter;lodash.tail=rest;lodash.unique=uniq;mixin(lodash,lodash);lodash.add=add;lodash.attempt=attempt;lodash.camelCase=camelCase;lodash.capitalize=capitalize;lodash.ceil=ceil;lodash.clone=clone;lodash.cloneDeep=cloneDeep;lodash.deburr=deburr;lodash.endsWith=endsWith;lodash.escape=escape;lodash.escapeRegExp=escapeRegExp;lodash.every=every;lodash.find=find;lodash.findIndex=findIndex;lodash.findKey=findKey;lodash.findLast=findLast;lodash.findLastIndex=findLastIndex;lodash.findLastKey=findLastKey;lodash.findWhere=findWhere;lodash.first=first;lodash.floor=floor;lodash.get=get;lodash.gt=gt;lodash.gte=gte;lodash.has=has;lodash.identity=identity;lodash.includes=includes;lodash.indexOf=indexOf;lodash.inRange=inRange;lodash.isArguments=isArguments;lodash.isArray=isArray;lodash.isBoolean=isBoolean;lodash.isDate=isDate;lodash.isElement=isElement;lodash.isEmpty=isEmpty;lodash.isEqual=isEqual;lodash.isError=isError;lodash.isFinite=isFinite;lodash.isFunction=isFunction;lodash.isMatch=isMatch;lodash.isNaN=isNaN;lodash.isNative=isNative;lodash.isNull=isNull;lodash.isNumber=isNumber;lodash.isObject=isObject;lodash.isPlainObject=isPlainObject;lodash.isRegExp=isRegExp;lodash.isString=isString;lodash.isTypedArray=isTypedArray;lodash.isUndefined=isUndefined;lodash.kebabCase=kebabCase;lodash.last=last;lodash.lastIndexOf=lastIndexOf;lodash.lt=lt;lodash.lte=lte;lodash.max=max;lodash.min=min;lodash.noConflict=noConflict;lodash.noop=noop;lodash.now=now;lodash.pad=pad;lodash.padLeft=padLeft;lodash.padRight=padRight;lodash.parseInt=parseInt;lodash.random=random;lodash.reduce=reduce;lodash.reduceRight=reduceRight;lodash.repeat=repeat;lodash.result=result;lodash.round=round;lodash.runInContext=runInContext;lodash.size=size;lodash.snakeCase=snakeCase;lodash.some=some;lodash.sortedIndex=sortedIndex;lodash.sortedLastIndex=sortedLastIndex;lodash.startCase=startCase;lodash.startsWith=startsWith;lodash.sum=sum;lodash.template=template;lodash.trim=trim;lodash.trimLeft=trimLeft;lodash.trimRight=trimRight;lodash.trunc=trunc;lodash.unescape=unescape;lodash.uniqueId=uniqueId;lodash.words=words;lodash.all=every;lodash.any=some;lodash.contains=includes;lodash.eq=isEqual;lodash.detect=find;lodash.foldl=reduce;lodash.foldr=reduceRight;lodash.head=first;lodash.include=includes;lodash.inject=reduce;mixin(lodash,function(){var source={};baseForOwn(lodash,function(func,methodName){if(!lodash.prototype[methodName]){source[methodName]=func}});return source}(),false);lodash.sample=sample;lodash.prototype.sample=function(n){if(!this.__chain__&&n==null){return sample(this.value())}return this.thru(function(value){return sample(value,n)})};lodash.VERSION=VERSION;arrayEach(["bind","bindKey","curry","curryRight","partial","partialRight"],function(methodName){lodash[methodName].placeholder=lodash});arrayEach(["drop","take"],function(methodName,index){LazyWrapper.prototype[methodName]=function(n){var filtered=this.__filtered__;if(filtered&&!index){return new LazyWrapper(this)}n=n==null?1:nativeMax(nativeFloor(n)||0,0);var result=this.clone();if(filtered){result.__takeCount__=nativeMin(result.__takeCount__,n)}else{result.__views__.push({size:n,type:methodName+(result.__dir__<0?"Right":"")})}return result};LazyWrapper.prototype[methodName+"Right"]=function(n){return this.reverse()[methodName](n).reverse()}});arrayEach(["filter","map","takeWhile"],function(methodName,index){var type=index+1,isFilter=type!=LAZY_MAP_FLAG;LazyWrapper.prototype[methodName]=function(iteratee,thisArg){var result=this.clone();result.__iteratees__.push({iteratee:getCallback(iteratee,thisArg,1),type:type});result.__filtered__=result.__filtered__||isFilter;return result}});arrayEach(["first","last"],function(methodName,index){var takeName="take"+(index?"Right":"");LazyWrapper.prototype[methodName]=function(){return this[takeName](1).value()[0]}});arrayEach(["initial","rest"],function(methodName,index){var dropName="drop"+(index?"":"Right");LazyWrapper.prototype[methodName]=function(){return this.__filtered__?new LazyWrapper(this):this[dropName](1)}});arrayEach(["pluck","where"],function(methodName,index){var operationName=index?"filter":"map",createCallback=index?baseMatches:property;LazyWrapper.prototype[methodName]=function(value){return this[operationName](createCallback(value))}});LazyWrapper.prototype.compact=function(){return this.filter(identity)};LazyWrapper.prototype.reject=function(predicate,thisArg){predicate=getCallback(predicate,thisArg,1);return this.filter(function(value){return!predicate(value)})};LazyWrapper.prototype.slice=function(start,end){start=start==null?0:+start||0;var result=this;if(result.__filtered__&&(start>0||end<0)){return new LazyWrapper(result)}if(start<0){result=result.takeRight(-start)}else if(start){result=result.drop(start)}if(end!==undefined){end=+end||0;result=end<0?result.dropRight(-end):result.take(end-start)}return result};LazyWrapper.prototype.takeRightWhile=function(predicate,thisArg){return this.reverse().takeWhile(predicate,thisArg).reverse()};LazyWrapper.prototype.toArray=function(){return this.take(POSITIVE_INFINITY)};baseForOwn(LazyWrapper.prototype,function(func,methodName){var checkIteratee=/^(?:filter|map|reject)|While$/.test(methodName),retUnwrapped=/^(?:first|last)$/.test(methodName),lodashFunc=lodash[retUnwrapped?"take"+(methodName=="last"?"Right":""):methodName];if(!lodashFunc){return}lodash.prototype[methodName]=function(){var args=retUnwrapped?[1]:arguments,chainAll=this.__chain__,value=this.__wrapped__,isHybrid=!!this.__actions__.length,isLazy=value instanceof LazyWrapper,iteratee=args[0],useLazy=isLazy||isArray(value);if(useLazy&&checkIteratee&&typeof iteratee=="function"&&iteratee.length!=1){isLazy=useLazy=false}var interceptor=function(value){return retUnwrapped&&chainAll?lodashFunc(value,1)[0]:lodashFunc.apply(undefined,arrayPush([value],args))};var action={func:thru,args:[interceptor],thisArg:undefined},onlyLazy=isLazy&&!isHybrid;if(retUnwrapped&&!chainAll){if(onlyLazy){value=value.clone();value.__actions__.push(action);return func.call(value)}return lodashFunc.call(undefined,this.value())[0]}if(!retUnwrapped&&useLazy){value=onlyLazy?value:new LazyWrapper(this);var result=func.apply(value,args);result.__actions__.push(action);return new LodashWrapper(result,chainAll)}return this.thru(interceptor)}});arrayEach(["join","pop","push","replace","shift","sort","splice","split","unshift"],function(methodName){var func=(/^(?:replace|split)$/.test(methodName)?stringProto:arrayProto)[methodName],chainName=/^(?:push|sort|unshift)$/.test(methodName)?"tap":"thru",retUnwrapped=/^(?:join|pop|replace|shift)$/.test(methodName);lodash.prototype[methodName]=function(){var args=arguments;if(retUnwrapped&&!this.__chain__){return func.apply(this.value(),args)}return this[chainName](function(value){return func.apply(value,args)})}});baseForOwn(LazyWrapper.prototype,function(func,methodName){var lodashFunc=lodash[methodName];if(lodashFunc){var key=lodashFunc.name,names=realNames[key]||(realNames[key]=[]);names.push({name:methodName,func:lodashFunc})}});realNames[createHybridWrapper(undefined,BIND_KEY_FLAG).name]=[{name:"wrapper",func:undefined}];LazyWrapper.prototype.clone=lazyClone;LazyWrapper.prototype.reverse=lazyReverse;LazyWrapper.prototype.value=lazyValue;lodash.prototype.chain=wrapperChain;lodash.prototype.commit=wrapperCommit;lodash.prototype.concat=wrapperConcat;lodash.prototype.plant=wrapperPlant;lodash.prototype.reverse=wrapperReverse;lodash.prototype.toString=wrapperToString;lodash.prototype.run=lodash.prototype.toJSON=lodash.prototype.valueOf=lodash.prototype.value=wrapperValue;lodash.prototype.collect=lodash.prototype.map;lodash.prototype.head=lodash.prototype.first;lodash.prototype.select=lodash.prototype.filter;lodash.prototype.tail=lodash.prototype.rest;return lodash}var _=runInContext();if(typeof define=="function"&&typeof define.amd=="object"&&define.amd){root._=_;define(function(){return _})}else if(freeExports&&freeModule){if(moduleExports){(freeModule.exports=_)._=_}else{freeExports._=_}}else{root._=_}}).call(this)}).call(this,typeof global!=="undefined"?global:typeof self!=="undefined"?self:typeof window!=="undefined"?window:{})},{}],3:[function(require,module,exports){(function(window,document,undefined){var _MAP={8:"backspace",9:"tab",13:"enter",16:"shift",17:"ctrl",18:"alt",20:"capslock",27:"esc",32:"space",33:"pageup",34:"pagedown",35:"end",36:"home",37:"left",38:"up",39:"right",40:"down",45:"ins",46:"del",91:"meta",93:"meta",224:"meta"};var _KEYCODE_MAP={106:"*",107:"+",109:"-",110:".",111:"/",186:";",187:"=",188:",",189:"-",190:".",191:"/",192:"`",219:"[",220:"\\",221:"]",222:"'"};var _SHIFT_MAP={"~":"`","!":"1","@":"2","#":"3",$:"4","%":"5","^":"6","&":"7","*":"8","(":"9",")":"0",_:"-","+":"=",":":";",'"':"'","<":",",">":".","?":"/","|":"\\"};var _SPECIAL_ALIASES={option:"alt",command:"meta",return:"enter",escape:"esc",plus:"+",mod:/Mac|iPod|iPhone|iPad/.test(navigator.platform)?"meta":"ctrl"};var _REVERSE_MAP;for(var i=1;i<20;++i){_MAP[111+i]="f"+i}for(i=0;i<=9;++i){_MAP[i+96]=i}function _addEvent(object,type,callback){if(object.addEventListener){object.addEventListener(type,callback,false);return}object.attachEvent("on"+type,callback)}function _characterFromEvent(e){if(e.type=="keypress"){var character=String.fromCharCode(e.which);if(!e.shiftKey){character=character.toLowerCase()}return character}if(_MAP[e.which]){return _MAP[e.which]}if(_KEYCODE_MAP[e.which]){return _KEYCODE_MAP[e.which]}return String.fromCharCode(e.which).toLowerCase()}function _modifiersMatch(modifiers1,modifiers2){return modifiers1.sort().join(",")===modifiers2.sort().join(",")}function _eventModifiers(e){var modifiers=[];if(e.shiftKey){modifiers.push("shift")}if(e.altKey){modifiers.push("alt")}if(e.ctrlKey){modifiers.push("ctrl")}if(e.metaKey){modifiers.push("meta")}return modifiers}function _preventDefault(e){if(e.preventDefault){e.preventDefault();return}e.returnValue=false}function _stopPropagation(e){if(e.stopPropagation){e.stopPropagation();return}e.cancelBubble=true}function _isModifier(key){return key=="shift"||key=="ctrl"||key=="alt"||key=="meta"}function _getReverseMap(){if(!_REVERSE_MAP){_REVERSE_MAP={};for(var key in _MAP){if(key>95&&key<112){continue}if(_MAP.hasOwnProperty(key)){_REVERSE_MAP[_MAP[key]]=key}}}return _REVERSE_MAP}function _pickBestAction(key,modifiers,action){if(!action){action=_getReverseMap()[key]?"keydown":"keypress"}if(action=="keypress"&&modifiers.length){action="keydown"}return action}function _keysFromString(combination){if(combination==="+"){return["+"]}combination=combination.replace(/\+{2}/g,"+plus");return combination.split("+")}function _getKeyInfo(combination,action){var keys;var key;var i;var modifiers=[];keys=_keysFromString(combination);for(i=0;i<keys.length;++i){key=keys[i];if(_SPECIAL_ALIASES[key]){key=_SPECIAL_ALIASES[key]}if(action&&action!="keypress"&&_SHIFT_MAP[key]){key=_SHIFT_MAP[key];modifiers.push("shift")}if(_isModifier(key)){modifiers.push(key)}}action=_pickBestAction(key,modifiers,action);return{key:key,modifiers:modifiers,action:action}}function _belongsTo(element,ancestor){if(element===null||element===document){return false}if(element===ancestor){return true}return _belongsTo(element.parentNode,ancestor)}function Mousetrap(targetElement){var self=this;targetElement=targetElement||document;if(!(self instanceof Mousetrap)){return new Mousetrap(targetElement)}self.target=targetElement;self._callbacks={};self._directMap={};var _sequenceLevels={};var _resetTimer;var _ignoreNextKeyup=false;var _ignoreNextKeypress=false;var _nextExpectedAction=false;function _resetSequences(doNotReset){doNotReset=doNotReset||{};var activeSequences=false,key;for(key in _sequenceLevels){if(doNotReset[key]){activeSequences=true;continue}_sequenceLevels[key]=0}if(!activeSequences){_nextExpectedAction=false}}function _getMatches(character,modifiers,e,sequenceName,combination,level){var i;var callback;var matches=[];var action=e.type;if(!self._callbacks[character]){return[]}if(action=="keyup"&&_isModifier(character)){modifiers=[character]}for(i=0;i<self._callbacks[character].length;++i){callback=self._callbacks[character][i];if(!sequenceName&&callback.seq&&_sequenceLevels[callback.seq]!=callback.level){continue}if(action!=callback.action){continue}if(action=="keypress"&&!e.metaKey&&!e.ctrlKey||_modifiersMatch(modifiers,callback.modifiers)){var deleteCombo=!sequenceName&&callback.combo==combination;var deleteSequence=sequenceName&&callback.seq==sequenceName&&callback.level==level;if(deleteCombo||deleteSequence){self._callbacks[character].splice(i,1)}matches.push(callback)}}return matches}function _fireCallback(callback,e,combo,sequence){if(self.stopCallback(e,e.target||e.srcElement,combo,sequence)){return}if(callback(e,combo)===false){_preventDefault(e);_stopPropagation(e)}}self._handleKey=function(character,modifiers,e){var callbacks=_getMatches(character,modifiers,e);var i;var doNotReset={};var maxLevel=0;var processedSequenceCallback=false;for(i=0;i<callbacks.length;++i){if(callbacks[i].seq){maxLevel=Math.max(maxLevel,callbacks[i].level)}}for(i=0;i<callbacks.length;++i){if(callbacks[i].seq){if(callbacks[i].level!=maxLevel){continue}processedSequenceCallback=true;doNotReset[callbacks[i].seq]=1;_fireCallback(callbacks[i].callback,e,callbacks[i].combo,callbacks[i].seq);continue}if(!processedSequenceCallback){_fireCallback(callbacks[i].callback,e,callbacks[i].combo)}}var ignoreThisKeypress=e.type=="keypress"&&_ignoreNextKeypress;if(e.type==_nextExpectedAction&&!_isModifier(character)&&!ignoreThisKeypress){_resetSequences(doNotReset)}_ignoreNextKeypress=processedSequenceCallback&&e.type=="keydown"};function _handleKeyEvent(e){if(typeof e.which!=="number"){e.which=e.keyCode}var character=_characterFromEvent(e);if(!character){return}if(e.type=="keyup"&&_ignoreNextKeyup===character){_ignoreNextKeyup=false;return}self.handleKey(character,_eventModifiers(e),e)}function _resetSequenceTimer(){clearTimeout(_resetTimer);_resetTimer=setTimeout(_resetSequences,1e3)}function _bindSequence(combo,keys,callback,action){_sequenceLevels[combo]=0;function _increaseSequence(nextAction){return function(){_nextExpectedAction=nextAction;++_sequenceLevels[combo];_resetSequenceTimer()}}function _callbackAndReset(e){_fireCallback(callback,e,combo);if(action!=="keyup"){_ignoreNextKeyup=_characterFromEvent(e)}setTimeout(_resetSequences,10)}for(var i=0;i<keys.length;++i){var isFinal=i+1===keys.length;var wrappedCallback=isFinal?_callbackAndReset:_increaseSequence(action||_getKeyInfo(keys[i+1]).action);_bindSingle(keys[i],wrappedCallback,action,combo,i)}}function _bindSingle(combination,callback,action,sequenceName,level){self._directMap[combination+":"+action]=callback;combination=combination.replace(/\s+/g," ");var sequence=combination.split(" ");var info;if(sequence.length>1){_bindSequence(combination,sequence,callback,action);return}info=_getKeyInfo(combination,action);self._callbacks[info.key]=self._callbacks[info.key]||[];_getMatches(info.key,info.modifiers,{type:info.action},sequenceName,combination,level);self._callbacks[info.key][sequenceName?"unshift":"push"]({callback:callback,modifiers:info.modifiers,action:info.action,seq:sequenceName,level:level,combo:combination})}self._bindMultiple=function(combinations,callback,action){for(var i=0;i<combinations.length;++i){_bindSingle(combinations[i],callback,action)}};_addEvent(targetElement,"keypress",_handleKeyEvent);_addEvent(targetElement,"keydown",_handleKeyEvent);_addEvent(targetElement,"keyup",_handleKeyEvent)}Mousetrap.prototype.bind=function(keys,callback,action){var self=this;keys=keys instanceof Array?keys:[keys];self._bindMultiple.call(self,keys,callback,action);return self};Mousetrap.prototype.unbind=function(keys,action){var self=this;return self.bind.call(self,keys,function(){},action)};Mousetrap.prototype.trigger=function(keys,action){var self=this;if(self._directMap[keys+":"+action]){self._directMap[keys+":"+action]({},keys)}return self};Mousetrap.prototype.reset=function(){var self=this;self._callbacks={};self._directMap={};return self};Mousetrap.prototype.stopCallback=function(e,element){var self=this;if((" "+element.className+" ").indexOf(" mousetrap ")>-1){return false}if(_belongsTo(element,self.target)){return false}return element.tagName=="INPUT"||element.tagName=="SELECT"||element.tagName=="TEXTAREA"||element.isContentEditable};Mousetrap.prototype.handleKey=function(){var self=this;return self._handleKey.apply(self,arguments)};Mousetrap.init=function(){var documentMousetrap=Mousetrap(document);for(var method in documentMousetrap){if(method.charAt(0)!=="_"){Mousetrap[method]=function(method){return function(){return documentMousetrap[method].apply(documentMousetrap,arguments)}}(method)}}};Mousetrap.init();window.Mousetrap=Mousetrap;if(typeof module!=="undefined"&&module.exports){module.exports=Mousetrap}if(typeof define==="function"&&define.amd){define(function(){return Mousetrap})}})(window,document)},{}],4:[function(require,module,exports){(function(process){function normalizeArray(parts,allowAboveRoot){var up=0;for(var i=parts.length-1;i>=0;i--){var last=parts[i];if(last==="."){parts.splice(i,1)}else if(last===".."){parts.splice(i,1);up++}else if(up){parts.splice(i,1);up--}}if(allowAboveRoot){for(;up--;up){parts.unshift("..")}}return parts}var splitPathRe=/^(\/?|)([\s\S]*?)((?:\.{1,2}|[^\/]+?|)(\.[^.\/]*|))(?:[\/]*)$/;var splitPath=function(filename){return splitPathRe.exec(filename).slice(1)};exports.resolve=function(){var resolvedPath="",resolvedAbsolute=false;for(var i=arguments.length-1;i>=-1&&!resolvedAbsolute;i--){var path=i>=0?arguments[i]:process.cwd();if(typeof path!=="string"){throw new TypeError("Arguments to path.resolve must be strings")}else if(!path){continue}resolvedPath=path+"/"+resolvedPath;resolvedAbsolute=path.charAt(0)==="/"}resolvedPath=normalizeArray(filter(resolvedPath.split("/"),function(p){return!!p}),!resolvedAbsolute).join("/");return(resolvedAbsolute?"/":"")+resolvedPath||"."};exports.normalize=function(path){var isAbsolute=exports.isAbsolute(path),trailingSlash=substr(path,-1)==="/";path=normalizeArray(filter(path.split("/"),function(p){return!!p}),!isAbsolute).join("/");if(!path&&!isAbsolute){path="."}if(path&&trailingSlash){path+="/"}return(isAbsolute?"/":"")+path};exports.isAbsolute=function(path){return path.charAt(0)==="/"};exports.join=function(){var paths=Array.prototype.slice.call(arguments,0);return exports.normalize(filter(paths,function(p,index){if(typeof p!=="string"){throw new TypeError("Arguments to path.join must be strings")}return p}).join("/"))};exports.relative=function(from,to){from=exports.resolve(from).substr(1);to=exports.resolve(to).substr(1);function trim(arr){var start=0;for(;start<arr.length;start++){if(arr[start]!=="")break}var end=arr.length-1;for(;end>=0;end--){if(arr[end]!=="")break}if(start>end)return[];return arr.slice(start,end-start+1)}var fromParts=trim(from.split("/"));var toParts=trim(to.split("/"));var length=Math.min(fromParts.length,toParts.length);var samePartsLength=length;for(var i=0;i<length;i++){if(fromParts[i]!==toParts[i]){samePartsLength=i;break}}var outputParts=[];for(var i=samePartsLength;i<fromParts.length;i++){outputParts.push("..")}outputParts=outputParts.concat(toParts.slice(samePartsLength));return outputParts.join("/")};exports.sep="/";exports.delimiter=":";exports.dirname=function(path){var result=splitPath(path),root=result[0],dir=result[1];if(!root&&!dir){return"."}if(dir){dir=dir.substr(0,dir.length-1)}return root+dir};exports.basename=function(path,ext){var f=splitPath(path)[2];if(ext&&f.substr(-1*ext.length)===ext){f=f.substr(0,f.length-ext.length)}return f};exports.extname=function(path){return splitPath(path)[3]};function filter(xs,f){if(xs.filter)return xs.filter(f);var res=[];for(var i=0;i<xs.length;i++){if(f(xs[i],i,xs))res.push(xs[i])}return res}var substr="ab".substr(-1)==="b"?function(str,start,len){return str.substr(start,len)}:function(str,start,len){if(start<0)start=str.length+start;return str.substr(start,len)}}).call(this,require("_process"))},{_process:5}],5:[function(require,module,exports){var process=module.exports={};var queue=[];var draining=false;var currentQueue;var queueIndex=-1;function cleanUpNextTick(){draining=false;if(currentQueue.length){queue=currentQueue.concat(queue)}else{queueIndex=-1}if(queue.length){drainQueue()}}function drainQueue(){if(draining){return}var timeout=setTimeout(cleanUpNextTick);draining=true;var len=queue.length;while(len){currentQueue=queue;queue=[];while(++queueIndex<len){if(currentQueue){currentQueue[queueIndex].run()}}queueIndex=-1;len=queue.length}currentQueue=null;draining=false;clearTimeout(timeout)}process.nextTick=function(fun){var args=new Array(arguments.length-1);if(arguments.length>1){for(var i=1;i<arguments.length;i++){args[i-1]=arguments[i]}}queue.push(new Item(fun,args));if(queue.length===1&&!draining){setTimeout(drainQueue,0)}};function Item(fun,array){this.fun=fun;this.array=array}Item.prototype.run=function(){this.fun.apply(null,this.array)};process.title="browser";process.browser=true;process.env={};process.argv=[];process.version="";process.versions={};function noop(){}process.on=noop;process.addListener=noop;process.once=noop;process.off=noop;process.removeListener=noop;process.removeAllListeners=noop;process.emit=noop;process.binding=function(name){throw new Error("process.binding is not supported")};process.cwd=function(){return"/"};process.chdir=function(dir){throw new Error("process.chdir is not supported")};process.umask=function(){return 0}},{}],6:[function(require,module,exports){(function(global){(function(root){var freeExports=typeof exports=="object"&&exports&&!exports.nodeType&&exports;var freeModule=typeof module=="object"&&module&&!module.nodeType&&module;var freeGlobal=typeof global=="object"&&global;if(freeGlobal.global===freeGlobal||freeGlobal.window===freeGlobal||freeGlobal.self===freeGlobal){root=freeGlobal}var punycode,maxInt=2147483647,base=36,tMin=1,tMax=26,skew=38,damp=700,initialBias=72,initialN=128,delimiter="-",regexPunycode=/^xn--/,regexNonASCII=/[^\x20-\x7E]/,regexSeparators=/[\x2E\u3002\uFF0E\uFF61]/g,errors={overflow:"Overflow: input needs wider integers to process","not-basic":"Illegal input >= 0x80 (not a basic code point)","invalid-input":"Invalid input"},baseMinusTMin=base-tMin,floor=Math.floor,stringFromCharCode=String.fromCharCode,key;function error(type){throw RangeError(errors[type])}function map(array,fn){var length=array.length;var result=[];while(length--){result[length]=fn(array[length])}return result}function mapDomain(string,fn){var parts=string.split("@");var result="";if(parts.length>1){result=parts[0]+"@";string=parts[1]}string=string.replace(regexSeparators,".");var labels=string.split(".");var encoded=map(labels,fn).join(".");return result+encoded}function ucs2decode(string){var output=[],counter=0,length=string.length,value,extra;while(counter<length){value=string.charCodeAt(counter++);if(value>=55296&&value<=56319&&counter<length){extra=string.charCodeAt(counter++);if((extra&64512)==56320){output.push(((value&1023)<<10)+(extra&1023)+65536)}else{output.push(value);counter--}}else{output.push(value)}}return output}function ucs2encode(array){return map(array,function(value){var output="";if(value>65535){value-=65536;output+=stringFromCharCode(value>>>10&1023|55296);value=56320|value&1023}output+=stringFromCharCode(value);return output}).join("")}function basicToDigit(codePoint){if(codePoint-48<10){return codePoint-22}if(codePoint-65<26){return codePoint-65}if(codePoint-97<26){return codePoint-97}return base}function digitToBasic(digit,flag){return digit+22+75*(digit<26)-((flag!=0)<<5)}function adapt(delta,numPoints,firstTime){var k=0;delta=firstTime?floor(delta/damp):delta>>1;delta+=floor(delta/numPoints);for(;delta>baseMinusTMin*tMax>>1;k+=base){delta=floor(delta/baseMinusTMin)}return floor(k+(baseMinusTMin+1)*delta/(delta+skew))}function decode(input){var output=[],inputLength=input.length,out,i=0,n=initialN,bias=initialBias,basic,j,index,oldi,w,k,digit,t,baseMinusT;basic=input.lastIndexOf(delimiter);if(basic<0){basic=0}for(j=0;j<basic;++j){if(input.charCodeAt(j)>=128){error("not-basic")}output.push(input.charCodeAt(j))}for(index=basic>0?basic+1:0;index<inputLength;){for(oldi=i,w=1,k=base;;k+=base){if(index>=inputLength){error("invalid-input")}digit=basicToDigit(input.charCodeAt(index++));if(digit>=base||digit>floor((maxInt-i)/w)){error("overflow")}i+=digit*w;t=k<=bias?tMin:k>=bias+tMax?tMax:k-bias;if(digit<t){break}baseMinusT=base-t;if(w>floor(maxInt/baseMinusT)){error("overflow")}w*=baseMinusT}out=output.length+1;bias=adapt(i-oldi,out,oldi==0);if(floor(i/out)>maxInt-n){error("overflow")}n+=floor(i/out);i%=out;output.splice(i++,0,n)}return ucs2encode(output)}function encode(input){var n,delta,handledCPCount,basicLength,bias,j,m,q,k,t,currentValue,output=[],inputLength,handledCPCountPlusOne,baseMinusT,qMinusT;input=ucs2decode(input);inputLength=input.length;n=initialN;delta=0;bias=initialBias;for(j=0;j<inputLength;++j){currentValue=input[j];if(currentValue<128){output.push(stringFromCharCode(currentValue))}}handledCPCount=basicLength=output.length;if(basicLength){output.push(delimiter)}while(handledCPCount<inputLength){for(m=maxInt,j=0;j<inputLength;++j){currentValue=input[j];if(currentValue>=n&&currentValue<m){m=currentValue}}handledCPCountPlusOne=handledCPCount+1;if(m-n>floor((maxInt-delta)/handledCPCountPlusOne)){error("overflow")}delta+=(m-n)*handledCPCountPlusOne;n=m;for(j=0;j<inputLength;++j){currentValue=input[j];if(currentValue<n&&++delta>maxInt){error("overflow")}if(currentValue==n){for(q=delta,k=base;;k+=base){t=k<=bias?tMin:k>=bias+tMax?tMax:k-bias;if(q<t){break}qMinusT=q-t;baseMinusT=base-t;output.push(stringFromCharCode(digitToBasic(t+qMinusT%baseMinusT,0)));q=floor(qMinusT/baseMinusT)}output.push(stringFromCharCode(digitToBasic(q,0)));bias=adapt(delta,handledCPCountPlusOne,handledCPCount==basicLength);delta=0;++handledCPCount}}++delta;++n}return output.join("")}function toUnicode(input){return mapDomain(input,function(string){return regexPunycode.test(string)?decode(string.slice(4).toLowerCase()):string})}function toASCII(input){return mapDomain(input,function(string){return regexNonASCII.test(string)?"xn--"+encode(string):string})}punycode={version:"1.3.2",ucs2:{decode:ucs2decode,encode:ucs2encode},decode:decode,encode:encode,toASCII:toASCII,toUnicode:toUnicode};if(typeof define=="function"&&typeof define.amd=="object"&&define.amd){define("punycode",function(){return punycode})}else if(freeExports&&freeModule){if(module.exports==freeExports){freeModule.exports=punycode}else{for(key in punycode){punycode.hasOwnProperty(key)&&(freeExports[key]=punycode[key])}}}else{root.punycode=punycode}})(this)}).call(this,typeof global!=="undefined"?global:typeof self!=="undefined"?self:typeof window!=="undefined"?window:{})},{}],7:[function(require,module,exports){"use strict";function hasOwnProperty(obj,prop){return Object.prototype.hasOwnProperty.call(obj,prop)}module.exports=function(qs,sep,eq,options){sep=sep||"&";eq=eq||"=";var obj={};if(typeof qs!=="string"||qs.length===0){return obj}var regexp=/\+/g;qs=qs.split(sep);var maxKeys=1e3;if(options&&typeof options.maxKeys==="number"){maxKeys=options.maxKeys}var len=qs.length;if(maxKeys>0&&len>maxKeys){len=maxKeys}for(var i=0;i<len;++i){var x=qs[i].replace(regexp,"%20"),idx=x.indexOf(eq),kstr,vstr,k,v;if(idx>=0){kstr=x.substr(0,idx);vstr=x.substr(idx+1)}else{kstr=x;vstr=""}k=decodeURIComponent(kstr);v=decodeURIComponent(vstr);if(!hasOwnProperty(obj,k)){obj[k]=v}else if(isArray(obj[k])){obj[k].push(v)}else{obj[k]=[obj[k],v]}}return obj};var isArray=Array.isArray||function(xs){return Object.prototype.toString.call(xs)==="[object Array]"}},{}],8:[function(require,module,exports){"use strict";var stringifyPrimitive=function(v){switch(typeof v){case"string":return v;case"boolean":return v?"true":"false";case"number":return isFinite(v)?v:"";default:return""}};module.exports=function(obj,sep,eq,name){sep=sep||"&";eq=eq||"=";if(obj===null){obj=undefined}if(typeof obj==="object"){return map(objectKeys(obj),function(k){var ks=encodeURIComponent(stringifyPrimitive(k))+eq;if(isArray(obj[k])){return map(obj[k],function(v){return ks+encodeURIComponent(stringifyPrimitive(v))}).join(sep)}else{return ks+encodeURIComponent(stringifyPrimitive(obj[k]))}}).join(sep)}if(!name)return"";return encodeURIComponent(stringifyPrimitive(name))+eq+encodeURIComponent(stringifyPrimitive(obj))};var isArray=Array.isArray||function(xs){return Object.prototype.toString.call(xs)==="[object Array]"};function map(xs,f){if(xs.map)return xs.map(f);var res=[];for(var i=0;i<xs.length;i++){res.push(f(xs[i],i))}return res}var objectKeys=Object.keys||function(obj){var res=[];for(var key in obj){if(Object.prototype.hasOwnProperty.call(obj,key))res.push(key)}return res}},{}],9:[function(require,module,exports){"use strict";exports.decode=exports.parse=require("./decode");exports.encode=exports.stringify=require("./encode")},{"./decode":7,"./encode":8}],10:[function(require,module,exports){var punycode=require("punycode");exports.parse=urlParse;exports.resolve=urlResolve;exports.resolveObject=urlResolveObject;exports.format=urlFormat;exports.Url=Url;function Url(){this.protocol=null;this.slashes=null;this.auth=null;this.host=null;this.port=null;this.hostname=null;this.hash=null;this.search=null;this.query=null;this.pathname=null;this.path=null;this.href=null}var protocolPattern=/^([a-z0-9.+-]+:)/i,portPattern=/:[0-9]*$/,delims=["<",">",'"',"`"," ","\r","\n","\t"],unwise=["{","}","|","\\","^","`"].concat(delims),autoEscape=["'"].concat(unwise),nonHostChars=["%","/","?",";","#"].concat(autoEscape),hostEndingChars=["/","?","#"],hostnameMaxLen=255,hostnamePartPattern=/^[a-z0-9A-Z_-]{0,63}$/,hostnamePartStart=/^([a-z0-9A-Z_-]{0,63})(.*)$/,unsafeProtocol={javascript:true,"javascript:":true},hostlessProtocol={javascript:true,"javascript:":true},slashedProtocol={http:true,https:true,ftp:true,gopher:true,file:true,"http:":true,"https:":true,"ftp:":true,"gopher:":true,"file:":true},querystring=require("querystring");function urlParse(url,parseQueryString,slashesDenoteHost){if(url&&isObject(url)&&url instanceof Url)return url;var u=new Url;u.parse(url,parseQueryString,slashesDenoteHost);return u}Url.prototype.parse=function(url,parseQueryString,slashesDenoteHost){if(!isString(url)){throw new TypeError("Parameter 'url' must be a string, not "+typeof url)}var rest=url;rest=rest.trim();var proto=protocolPattern.exec(rest);if(proto){proto=proto[0];var lowerProto=proto.toLowerCase();this.protocol=lowerProto;rest=rest.substr(proto.length)}if(slashesDenoteHost||proto||rest.match(/^\/\/[^@\/]+@[^@\/]+/)){var slashes=rest.substr(0,2)==="//";if(slashes&&!(proto&&hostlessProtocol[proto])){rest=rest.substr(2);this.slashes=true}}if(!hostlessProtocol[proto]&&(slashes||proto&&!slashedProtocol[proto])){var hostEnd=-1;for(var i=0;i<hostEndingChars.length;i++){var hec=rest.indexOf(hostEndingChars[i]);if(hec!==-1&&(hostEnd===-1||hec<hostEnd))hostEnd=hec}var auth,atSign;if(hostEnd===-1){atSign=rest.lastIndexOf("@")}else{atSign=rest.lastIndexOf("@",hostEnd)}if(atSign!==-1){auth=rest.slice(0,atSign);rest=rest.slice(atSign+1);this.auth=decodeURIComponent(auth)}hostEnd=-1;for(var i=0;i<nonHostChars.length;i++){var hec=rest.indexOf(nonHostChars[i]);if(hec!==-1&&(hostEnd===-1||hec<hostEnd))hostEnd=hec}if(hostEnd===-1)hostEnd=rest.length;this.host=rest.slice(0,hostEnd);rest=rest.slice(hostEnd);this.parseHost();this.hostname=this.hostname||"";var ipv6Hostname=this.hostname[0]==="["&&this.hostname[this.hostname.length-1]==="]";if(!ipv6Hostname){var hostparts=this.hostname.split(/\./);for(var i=0,l=hostparts.length;i<l;i++){var part=hostparts[i];if(!part)continue;if(!part.match(hostnamePartPattern)){var newpart="";for(var j=0,k=part.length;j<k;j++){if(part.charCodeAt(j)>127){newpart+="x"}else{newpart+=part[j]}}if(!newpart.match(hostnamePartPattern)){var validParts=hostparts.slice(0,i);var notHost=hostparts.slice(i+1);var bit=part.match(hostnamePartStart);if(bit){validParts.push(bit[1]);notHost.unshift(bit[2])}if(notHost.length){rest="/"+notHost.join(".")+rest}this.hostname=validParts.join(".");break}}}}if(this.hostname.length>hostnameMaxLen){this.hostname=""}else{this.hostname=this.hostname.toLowerCase()}if(!ipv6Hostname){var domainArray=this.hostname.split(".");var newOut=[];for(var i=0;i<domainArray.length;++i){var s=domainArray[i];newOut.push(s.match(/[^A-Za-z0-9_-]/)?"xn--"+punycode.encode(s):s)}this.hostname=newOut.join(".")}var p=this.port?":"+this.port:"";var h=this.hostname||"";this.host=h+p;this.href+=this.host;if(ipv6Hostname){this.hostname=this.hostname.substr(1,this.hostname.length-2);if(rest[0]!=="/"){rest="/"+rest}}}if(!unsafeProtocol[lowerProto]){for(var i=0,l=autoEscape.length;i<l;i++){var ae=autoEscape[i];var esc=encodeURIComponent(ae);if(esc===ae){esc=escape(ae)}rest=rest.split(ae).join(esc)}}var hash=rest.indexOf("#");if(hash!==-1){this.hash=rest.substr(hash);rest=rest.slice(0,hash)}var qm=rest.indexOf("?");if(qm!==-1){this.search=rest.substr(qm);this.query=rest.substr(qm+1);if(parseQueryString){this.query=querystring.parse(this.query)}rest=rest.slice(0,qm)}else if(parseQueryString){this.search="";this.query={}}if(rest)this.pathname=rest;if(slashedProtocol[lowerProto]&&this.hostname&&!this.pathname){this.pathname="/"}if(this.pathname||this.search){var p=this.pathname||"";var s=this.search||"";this.path=p+s}this.href=this.format();return this};function urlFormat(obj){if(isString(obj))obj=urlParse(obj);if(!(obj instanceof Url))return Url.prototype.format.call(obj);return obj.format()}Url.prototype.format=function(){var auth=this.auth||"";if(auth){auth=encodeURIComponent(auth);auth=auth.replace(/%3A/i,":");auth+="@"}var protocol=this.protocol||"",pathname=this.pathname||"",hash=this.hash||"",host=false,query="";if(this.host){host=auth+this.host}else if(this.hostname){host=auth+(this.hostname.indexOf(":")===-1?this.hostname:"["+this.hostname+"]");if(this.port){host+=":"+this.port}}if(this.query&&isObject(this.query)&&Object.keys(this.query).length){query=querystring.stringify(this.query)}var search=this.search||query&&"?"+query||"";if(protocol&&protocol.substr(-1)!==":")protocol+=":";if(this.slashes||(!protocol||slashedProtocol[protocol])&&host!==false){host="//"+(host||"");if(pathname&&pathname.charAt(0)!=="/")pathname="/"+pathname}else if(!host){host=""}if(hash&&hash.charAt(0)!=="#")hash="#"+hash;if(search&&search.charAt(0)!=="?")search="?"+search;pathname=pathname.replace(/[?#]/g,function(match){return encodeURIComponent(match)});search=search.replace("#","%23");return protocol+host+pathname+search+hash};function urlResolve(source,relative){return urlParse(source,false,true).resolve(relative)}Url.prototype.resolve=function(relative){return this.resolveObject(urlParse(relative,false,true)).format()};function urlResolveObject(source,relative){if(!source)return relative;return urlParse(source,false,true).resolveObject(relative)}Url.prototype.resolveObject=function(relative){if(isString(relative)){var rel=new Url;rel.parse(relative,false,true);relative=rel}var result=new Url;Object.keys(this).forEach(function(k){result[k]=this[k]},this);result.hash=relative.hash;if(relative.href===""){result.href=result.format();return result}if(relative.slashes&&!relative.protocol){Object.keys(relative).forEach(function(k){if(k!=="protocol")result[k]=relative[k]});if(slashedProtocol[result.protocol]&&result.hostname&&!result.pathname){result.path=result.pathname="/"}result.href=result.format();return result}if(relative.protocol&&relative.protocol!==result.protocol){if(!slashedProtocol[relative.protocol]){Object.keys(relative).forEach(function(k){result[k]=relative[k]});result.href=result.format();return result}result.protocol=relative.protocol;if(!relative.host&&!hostlessProtocol[relative.protocol]){var relPath=(relative.pathname||"").split("/");while(relPath.length&&!(relative.host=relPath.shift()));if(!relative.host)relative.host="";if(!relative.hostname)relative.hostname="";if(relPath[0]!=="")relPath.unshift("");if(relPath.length<2)relPath.unshift("");result.pathname=relPath.join("/")}else{result.pathname=relative.pathname}result.search=relative.search;result.query=relative.query;result.host=relative.host||"";result.auth=relative.auth;result.hostname=relative.hostname||relative.host;result.port=relative.port;if(result.pathname||result.search){var p=result.pathname||"";var s=result.search||"";result.path=p+s}result.slashes=result.slashes||relative.slashes;result.href=result.format();return result}var isSourceAbs=result.pathname&&result.pathname.charAt(0)==="/",isRelAbs=relative.host||relative.pathname&&relative.pathname.charAt(0)==="/",mustEndAbs=isRelAbs||isSourceAbs||result.host&&relative.pathname,removeAllDots=mustEndAbs,srcPath=result.pathname&&result.pathname.split("/")||[],relPath=relative.pathname&&relative.pathname.split("/")||[],psychotic=result.protocol&&!slashedProtocol[result.protocol];if(psychotic){result.hostname="";result.port=null;if(result.host){if(srcPath[0]==="")srcPath[0]=result.host;else srcPath.unshift(result.host)}result.host="";if(relative.protocol){relative.hostname=null;relative.port=null;if(relative.host){if(relPath[0]==="")relPath[0]=relative.host;else relPath.unshift(relative.host)}relative.host=null}mustEndAbs=mustEndAbs&&(relPath[0]===""||srcPath[0]==="")}if(isRelAbs){result.host=relative.host||relative.host===""?relative.host:result.host;result.hostname=relative.hostname||relative.hostname===""?relative.hostname:result.hostname;result.search=relative.search;result.query=relative.query;srcPath=relPath}else if(relPath.length){if(!srcPath)srcPath=[];srcPath.pop();srcPath=srcPath.concat(relPath);result.search=relative.search;result.query=relative.query}else if(!isNullOrUndefined(relative.search)){if(psychotic){result.hostname=result.host=srcPath.shift();var authInHost=result.host&&result.host.indexOf("@")>0?result.host.split("@"):false;if(authInHost){result.auth=authInHost.shift();result.host=result.hostname=authInHost.shift()}}result.search=relative.search;result.query=relative.query;if(!isNull(result.pathname)||!isNull(result.search)){result.path=(result.pathname?result.pathname:"")+(result.search?result.search:"")}result.href=result.format();return result}if(!srcPath.length){result.pathname=null;if(result.search){result.path="/"+result.search}else{result.path=null}result.href=result.format();return result}var last=srcPath.slice(-1)[0];var hasTrailingSlash=(result.host||relative.host)&&(last==="."||last==="..")||last==="";var up=0;for(var i=srcPath.length;i>=0;i--){last=srcPath[i];if(last=="."){srcPath.splice(i,1)}else if(last===".."){srcPath.splice(i,1);up++}else if(up){srcPath.splice(i,1);up--}}if(!mustEndAbs&&!removeAllDots){for(;up--;up){srcPath.unshift("..")}}if(mustEndAbs&&srcPath[0]!==""&&(!srcPath[0]||srcPath[0].charAt(0)!=="/")){srcPath.unshift("")}if(hasTrailingSlash&&srcPath.join("/").substr(-1)!=="/"){srcPath.push("")}var isAbsolute=srcPath[0]===""||srcPath[0]&&srcPath[0].charAt(0)==="/";if(psychotic){result.hostname=result.host=isAbsolute?"":srcPath.length?srcPath.shift():"";var authInHost=result.host&&result.host.indexOf("@")>0?result.host.split("@"):false;if(authInHost){result.auth=authInHost.shift();result.host=result.hostname=authInHost.shift()}}mustEndAbs=mustEndAbs||result.host&&srcPath.length;if(mustEndAbs&&!isAbsolute){srcPath.unshift("")}if(!srcPath.length){result.pathname=null;result.path=null}else{result.pathname=srcPath.join("/")}if(!isNull(result.pathname)||!isNull(result.search)){result.path=(result.pathname?result.pathname:"")+(result.search?result.search:"")}result.auth=relative.auth||result.auth;result.slashes=result.slashes||relative.slashes;result.href=result.format();return result};Url.prototype.parseHost=function(){var host=this.host;var port=portPattern.exec(host);if(port){port=port[0];if(port!==":"){this.port=port.substr(1)}host=host.substr(0,host.length-port.length)}if(host)this.hostname=host};function isString(arg){return typeof arg==="string"}function isObject(arg){return typeof arg==="object"&&arg!==null}function isNull(arg){return arg===null}function isNullOrUndefined(arg){return arg==null}},{punycode:6,querystring:9}],11:[function(require,module,exports){var $=require("jquery");function toggleDropdown(e){var $dropdown=$(e.currentTarget).parent().find(".dropdown-menu");$dropdown.toggleClass("open");e.stopPropagation();e.preventDefault()}function closeDropdown(e){$(".dropdown-menu").removeClass("open")}function init(){$(document).on("click",".toggle-dropdown",toggleDropdown);$(document).on("click",".dropdown-menu",function(e){e.stopPropagation()});$(document).on("click",closeDropdown)}module.exports={init:init}},{jquery:1}],12:[function(require,module,exports){var $=require("jquery");module.exports=$({})},{jquery:1}],13:[function(require,module,exports){var $=require("jquery");var _=require("lodash");var storage=require("./storage");var dropdown=require("./dropdown");var events=require("./events");var state=require("./state");var keyboard=require("./keyboard");var navigation=require("./navigation");var sidebar=require("./sidebar");var toolbar=require("./toolbar");function start(config){sidebar.init();keyboard.init();dropdown.init();navigation.init();toolbar.createButton({index:0,icon:"fa fa-align-justify",label:"Toggle Sidebar",onClick:function(e){e.preventDefault();sidebar.toggle()}});events.trigger("start",config);navigation.notify()}var gitbook={start:start,events:events,state:state,toolbar:toolbar,sidebar:sidebar,storage:storage,keyboard:keyboard};var MODULES={gitbook:gitbook,jquery:$,lodash:_};window.gitbook=gitbook;window.$=$;window.jQuery=$;gitbook.require=function(mods,fn){mods=_.map(mods,function(mod){mod=mod.toLowerCase();if(!MODULES[mod]){throw new Error("GitBook module "+mod+" doesn't exist")}return MODULES[mod]});fn.apply(null,mods)};module.exports={}},{"./dropdown":11,"./events":12,"./keyboard":14,"./navigation":16,"./sidebar":18,"./state":19,"./storage":20,"./toolbar":21,jquery:1,lodash:2}],14:[function(require,module,exports){var Mousetrap=require("mousetrap");var navigation=require("./navigation");var sidebar=require("./sidebar");function bindShortcut(keys,fn){Mousetrap.bind(keys,function(e){fn();return false})}function init(){bindShortcut(["right"],function(e){navigation.goNext()});bindShortcut(["left"],function(e){navigation.goPrev()});bindShortcut(["s"],function(e){sidebar.toggle()})}module.exports={init:init,bind:bindShortcut}},{"./navigation":16,"./sidebar":18,mousetrap:3}],15:[function(require,module,exports){var state=require("./state");function showLoading(p){state.$book.addClass("is-loading");p.always(function(){state.$book.removeClass("is-loading")});return p}module.exports={show:showLoading}},{"./state":19}],16:[function(require,module,exports){var $=require("jquery");var url=require("url");var events=require("./events");var state=require("./state");var loading=require("./loading");var usePushState=typeof history.pushState!=="undefined";function handleNavigation(relativeUrl,push){var uri=url.resolve(window.location.pathname,relativeUrl);notifyPageChange();location.href=relativeUrl;return}function updateNavigationPosition(){var bodyInnerWidth,pageWrapperWidth;bodyInnerWidth=parseInt($(".body-inner").css("width"),10);pageWrapperWidth=parseInt($(".page-wrapper").css("width"),10);$(".navigation-next").css("margin-right",bodyInnerWidth-pageWrapperWidth+"px")}function notifyPageChange(){events.trigger("page.change")}function preparePage(notify){var $bookBody=$(".book-body");var $bookInner=$bookBody.find(".body-inner");var $pageWrapper=$bookInner.find(".page-wrapper");updateNavigationPosition();$bookInner.scrollTop(0);$bookBody.scrollTop(0);if(notify!==false)notifyPageChange()}function isLeftClickEvent(e){return e.button===0}function isModifiedEvent(e){return!!(e.metaKey||e.altKey||e.ctrlKey||e.shiftKey)}function handlePagination(e){if(isModifiedEvent(e)||!isLeftClickEvent(e)){return}e.stopPropagation();e.preventDefault();var url=$(this).attr("href");if(url)handleNavigation(url,true)}function goNext(){var url=$(".navigation-next").attr("href");if(url)handleNavigation(url,true)}function goPrev(){var url=$(".navigation-prev").attr("href");if(url)handleNavigation(url,true)}function init(){$.ajaxSetup({});if(location.protocol!=="file:"){history.replaceState({path:window.location.href},"")}window.onpopstate=function(event){if(event.state===null){return}return handleNavigation(event.state.path,false)};$(document).on("click",".navigation-prev",handlePagination);$(document).on("click",".navigation-next",handlePagination);$(document).on("click",".summary [data-path] a",handlePagination);$(window).resize(updateNavigationPosition);preparePage(false)}module.exports={init:init,goNext:goNext,goPrev:goPrev,notify:notifyPageChange}},{"./events":12,"./loading":15,"./state":19,jquery:1,url:10}],17:[function(require,module,exports){module.exports={isMobile:function(){return document.body.clientWidth<=600}}},{}],18:[function(require,module,exports){var $=require("jquery");var _=require("lodash");var storage=require("./storage");var platform=require("./platform");var state=require("./state");function toggleSidebar(_state,animation){if(state!=null&&isOpen()==_state)return;if(animation==null)animation=true;state.$book.toggleClass("without-animation",!animation);state.$book.toggleClass("with-summary",_state);storage.set("sidebar",isOpen())}function isOpen(){return state.$book.hasClass("with-summary")}function init(){if(platform.isMobile()){toggleSidebar(false,false)}else{toggleSidebar(storage.get("sidebar",true),false)}$(document).on("click",".book-summary li.chapter a",function(e){if(platform.isMobile())toggleSidebar(false,false)})}function filterSummary(paths){var $summary=$(".book-summary");$summary.find("li").each(function(){var path=$(this).data("path");var st=paths==null||_.contains(paths,path);$(this).toggle(st);if(st)$(this).parents("li").show()})}module.exports={init:init,isOpen:isOpen,toggle:toggleSidebar,filter:filterSummary}},{"./platform":17,"./state":19,"./storage":20,jquery:1,lodash:2}],19:[function(require,module,exports){var $=require("jquery");var url=require("url");var path=require("path");var state={};state.update=function(dom){var $book=$(dom.find(".book"));state.$book=$book;state.level=$book.data("level");state.basePath=$book.data("basepath");state.innerLanguage=$book.data("innerlanguage");state.revision=$book.data("revision");state.filepath=$book.data("filepath");state.chapterTitle=$book.data("chapter-title");state.root=url.resolve(location.protocol+"//"+location.host,path.dirname(path.resolve(location.pathname.replace(/\/$/,"/index.html"),state.basePath))).replace(/\/?$/,"/");state.bookRoot=state.innerLanguage?url.resolve(state.root,".."):state.root};state.update($);module.exports=state},{jquery:1,path:4,url:10}],20:[function(require,module,exports){var baseKey="";module.exports={setBaseKey:function(key){baseKey=key},set:function(key,value){key=baseKey+":"+key;try{sessionStorage[key]=JSON.stringify(value)}catch(e){}},get:function(key,def){key=baseKey+":"+key;if(sessionStorage[key]===undefined)return def;try{var v=JSON.parse(sessionStorage[key]);return v==null?def:v}catch(err){return sessionStorage[key]||def}},remove:function(key){key=baseKey+":"+key;sessionStorage.removeItem(key)}}},{}],21:[function(require,module,exports){var $=require("jquery");var _=require("lodash");var events=require("./events");var buttons=[];function insertAt(parent,selector,index,element){var lastIndex=parent.children(selector).length;if(index<0){index=Math.max(0,lastIndex+1+index)}parent.append(element);if(index<lastIndex){parent.children(selector).eq(index).before(parent.children(selector).last())}}function defaultOnClick(e){e.preventDefault()}function createDropdownMenu(dropdown){var $menu=$("<div>",{class:"dropdown-menu",html:'<div class="dropdown-caret"><span class="caret-outer"></span><span class="caret-inner"></span></div>'});if(_.isString(dropdown)){$menu.append(dropdown)}else{var groups=_.map(dropdown,function(group){if(_.isArray(group))return group;else return[group]});_.each(groups,function(group){var $group=$("<div>",{class:"buttons"});var sizeClass="size-"+group.length;_.each(group,function(btn){btn=_.defaults(btn||{},{text:"",className:"",onClick:defaultOnClick});var $btn=$("<button>",{class:"button "+sizeClass+" "+btn.className,text:btn.text});$btn.click(btn.onClick);$group.append($btn)});$menu.append($group)})}return $menu}function createButton(opts){opts=_.defaults(opts||{},{label:"",icon:"",text:"",position:"left",className:"",onClick:defaultOnClick,dropdown:null,index:null});buttons.push(opts);updateButton(opts)}function updateButton(opts){var $result;var $toolbar=$(".book-header");var $title=$toolbar.find("h1");var positionClass="pull-"+opts.position;var $btn=$("<a>",{class:"btn",text:opts.text?" "+opts.text:"","aria-label":opts.label,title:opts.label,href:"#"});$btn.click(opts.onClick);if(opts.icon){$("<i>",{class:opts.icon}).prependTo($btn)}if(opts.dropdown){var $container=$("<div>",{class:"dropdown "+positionClass+" "+opts.className});$btn.addClass("toggle-dropdown");$container.append($btn);var $menu=createDropdownMenu(opts.dropdown);$menu.addClass("dropdown-"+(opts.position=="right"?"left":"right"));$container.append($menu);$result=$container}else{$btn.addClass(positionClass);$btn.addClass(opts.className);$result=$btn}$result.addClass("js-toolbar-action");if(_.isNumber(opts.index)&&opts.index>=0){insertAt($toolbar,".btn, .dropdown, h1",opts.index,$result)}else{$result.insertBefore($title)}}module.exports={createButton:createButton}},{"./events":12,jquery:1,lodash:2}]},{},[13]);
diff --git a/libs/gitbook-2.6.7/js/clipboard.min.js b/libs/gitbook-2.6.7/js/clipboard.min.js
new file mode 100644
index 0000000..99561a0
--- /dev/null
+++ b/libs/gitbook-2.6.7/js/clipboard.min.js
@@ -0,0 +1,7 @@
+/*!
+ * clipboard.js v2.0.4
+ * https://zenorocha.github.io/clipboard.js
+ *
+ * Licensed MIT © Zeno Rocha
+ */
+!function(t,e){"object"==typeof exports&&"object"==typeof module?module.exports=e():"function"==typeof define&&define.amd?define([],e):"object"==typeof exports?exports.ClipboardJS=e():t.ClipboardJS=e()}(this,function(){return function(n){var o={};function r(t){if(o[t])return o[t].exports;var e=o[t]={i:t,l:!1,exports:{}};return n[t].call(e.exports,e,e.exports,r),e.l=!0,e.exports}return r.m=n,r.c=o,r.d=function(t,e,n){r.o(t,e)||Object.defineProperty(t,e,{enumerable:!0,get:n})},r.r=function(t){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(t,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(t,"__esModule",{value:!0})},r.t=function(e,t){if(1&t&&(e=r(e)),8&t)return e;if(4&t&&"object"==typeof e&&e&&e.__esModule)return e;var n=Object.create(null);if(r.r(n),Object.defineProperty(n,"default",{enumerable:!0,value:e}),2&t&&"string"!=typeof e)for(var o in e)r.d(n,o,function(t){return e[t]}.bind(null,o));return n},r.n=function(t){var e=t&&t.__esModule?function(){return t.default}:function(){return t};return r.d(e,"a",e),e},r.o=function(t,e){return Object.prototype.hasOwnProperty.call(t,e)},r.p="",r(r.s=0)}([function(t,e,n){"use strict";var r="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(t){return typeof t}:function(t){return t&&"function"==typeof Symbol&&t.constructor===Symbol&&t!==Symbol.prototype?"symbol":typeof t},i=function(){function o(t,e){for(var n=0;n<e.length;n++){var o=e[n];o.enumerable=o.enumerable||!1,o.configurable=!0,"value"in o&&(o.writable=!0),Object.defineProperty(t,o.key,o)}}return function(t,e,n){return e&&o(t.prototype,e),n&&o(t,n),t}}(),a=o(n(1)),c=o(n(3)),u=o(n(4));function o(t){return t&&t.__esModule?t:{default:t}}var l=function(t){function o(t,e){!function(t,e){if(!(t instanceof e))throw new TypeError("Cannot call a class as a function")}(this,o);var n=function(t,e){if(!t)throw new ReferenceError("this hasn't been initialised - super() hasn't been called");return!e||"object"!=typeof e&&"function"!=typeof e?t:e}(this,(o.__proto__||Object.getPrototypeOf(o)).call(this));return n.resolveOptions(e),n.listenClick(t),n}return function(t,e){if("function"!=typeof e&&null!==e)throw new TypeError("Super expression must either be null or a function, not "+typeof e);t.prototype=Object.create(e&&e.prototype,{constructor:{value:t,enumerable:!1,writable:!0,configurable:!0}}),e&&(Object.setPrototypeOf?Object.setPrototypeOf(t,e):t.__proto__=e)}(o,c.default),i(o,[{key:"resolveOptions",value:function(){var t=0<arguments.length&&void 0!==arguments[0]?arguments[0]:{};this.action="function"==typeof t.action?t.action:this.defaultAction,this.target="function"==typeof t.target?t.target:this.defaultTarget,this.text="function"==typeof t.text?t.text:this.defaultText,this.container="object"===r(t.container)?t.container:document.body}},{key:"listenClick",value:function(t){var e=this;this.listener=(0,u.default)(t,"click",function(t){return e.onClick(t)})}},{key:"onClick",value:function(t){var e=t.delegateTarget||t.currentTarget;this.clipboardAction&&(this.clipboardAction=null),this.clipboardAction=new a.default({action:this.action(e),target:this.target(e),text:this.text(e),container:this.container,trigger:e,emitter:this})}},{key:"defaultAction",value:function(t){return s("action",t)}},{key:"defaultTarget",value:function(t){var e=s("target",t);if(e)return document.querySelector(e)}},{key:"defaultText",value:function(t){return s("text",t)}},{key:"destroy",value:function(){this.listener.destroy(),this.clipboardAction&&(this.clipboardAction.destroy(),this.clipboardAction=null)}}],[{key:"isSupported",value:function(){var t=0<arguments.length&&void 0!==arguments[0]?arguments[0]:["copy","cut"],e="string"==typeof t?[t]:t,n=!!document.queryCommandSupported;return e.forEach(function(t){n=n&&!!document.queryCommandSupported(t)}),n}}]),o}();function s(t,e){var n="data-clipboard-"+t;if(e.hasAttribute(n))return e.getAttribute(n)}t.exports=l},function(t,e,n){"use strict";var o,r="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(t){return typeof t}:function(t){return t&&"function"==typeof Symbol&&t.constructor===Symbol&&t!==Symbol.prototype?"symbol":typeof t},i=function(){function o(t,e){for(var n=0;n<e.length;n++){var o=e[n];o.enumerable=o.enumerable||!1,o.configurable=!0,"value"in o&&(o.writable=!0),Object.defineProperty(t,o.key,o)}}return function(t,e,n){return e&&o(t.prototype,e),n&&o(t,n),t}}(),a=n(2),c=(o=a)&&o.__esModule?o:{default:o};var u=function(){function e(t){!function(t,e){if(!(t instanceof e))throw new TypeError("Cannot call a class as a function")}(this,e),this.resolveOptions(t),this.initSelection()}return i(e,[{key:"resolveOptions",value:function(){var t=0<arguments.length&&void 0!==arguments[0]?arguments[0]:{};this.action=t.action,this.container=t.container,this.emitter=t.emitter,this.target=t.target,this.text=t.text,this.trigger=t.trigger,this.selectedText=""}},{key:"initSelection",value:function(){this.text?this.selectFake():this.target&&this.selectTarget()}},{key:"selectFake",value:function(){var t=this,e="rtl"==document.documentElement.getAttribute("dir");this.removeFake(),this.fakeHandlerCallback=function(){return t.removeFake()},this.fakeHandler=this.container.addEventListener("click",this.fakeHandlerCallback)||!0,this.fakeElem=document.createElement("textarea"),this.fakeElem.style.fontSize="12pt",this.fakeElem.style.border="0",this.fakeElem.style.padding="0",this.fakeElem.style.margin="0",this.fakeElem.style.position="absolute",this.fakeElem.style[e?"right":"left"]="-9999px";var n=window.pageYOffset||document.documentElement.scrollTop;this.fakeElem.style.top=n+"px",this.fakeElem.setAttribute("readonly",""),this.fakeElem.value=this.text,this.container.appendChild(this.fakeElem),this.selectedText=(0,c.default)(this.fakeElem),this.copyText()}},{key:"removeFake",value:function(){this.fakeHandler&&(this.container.removeEventListener("click",this.fakeHandlerCallback),this.fakeHandler=null,this.fakeHandlerCallback=null),this.fakeElem&&(this.container.removeChild(this.fakeElem),this.fakeElem=null)}},{key:"selectTarget",value:function(){this.selectedText=(0,c.default)(this.target),this.copyText()}},{key:"copyText",value:function(){var e=void 0;try{e=document.execCommand(this.action)}catch(t){e=!1}this.handleResult(e)}},{key:"handleResult",value:function(t){this.emitter.emit(t?"success":"error",{action:this.action,text:this.selectedText,trigger:this.trigger,clearSelection:this.clearSelection.bind(this)})}},{key:"clearSelection",value:function(){this.trigger&&this.trigger.focus(),window.getSelection().removeAllRanges()}},{key:"destroy",value:function(){this.removeFake()}},{key:"action",set:function(){var t=0<arguments.length&&void 0!==arguments[0]?arguments[0]:"copy";if(this._action=t,"copy"!==this._action&&"cut"!==this._action)throw new Error('Invalid "action" value, use either "copy" or "cut"')},get:function(){return this._action}},{key:"target",set:function(t){if(void 0!==t){if(!t||"object"!==(void 0===t?"undefined":r(t))||1!==t.nodeType)throw new Error('Invalid "target" value, use a valid Element');if("copy"===this.action&&t.hasAttribute("disabled"))throw new Error('Invalid "target" attribute. Please use "readonly" instead of "disabled" attribute');if("cut"===this.action&&(t.hasAttribute("readonly")||t.hasAttribute("disabled")))throw new Error('Invalid "target" attribute. You can\'t cut text from elements with "readonly" or "disabled" attributes');this._target=t}},get:function(){return this._target}}]),e}();t.exports=u},function(t,e){t.exports=function(t){var e;if("SELECT"===t.nodeName)t.focus(),e=t.value;else if("INPUT"===t.nodeName||"TEXTAREA"===t.nodeName){var n=t.hasAttribute("readonly");n||t.setAttribute("readonly",""),t.select(),t.setSelectionRange(0,t.value.length),n||t.removeAttribute("readonly"),e=t.value}else{t.hasAttribute("contenteditable")&&t.focus();var o=window.getSelection(),r=document.createRange();r.selectNodeContents(t),o.removeAllRanges(),o.addRange(r),e=o.toString()}return e}},function(t,e){function n(){}n.prototype={on:function(t,e,n){var o=this.e||(this.e={});return(o[t]||(o[t]=[])).push({fn:e,ctx:n}),this},once:function(t,e,n){var o=this;function r(){o.off(t,r),e.apply(n,arguments)}return r._=e,this.on(t,r,n)},emit:function(t){for(var e=[].slice.call(arguments,1),n=((this.e||(this.e={}))[t]||[]).slice(),o=0,r=n.length;o<r;o++)n[o].fn.apply(n[o].ctx,e);return this},off:function(t,e){var n=this.e||(this.e={}),o=n[t],r=[];if(o&&e)for(var i=0,a=o.length;i<a;i++)o[i].fn!==e&&o[i].fn._!==e&&r.push(o[i]);return r.length?n[t]=r:delete n[t],this}},t.exports=n},function(t,e,n){var d=n(5),h=n(6);t.exports=function(t,e,n){if(!t&&!e&&!n)throw new Error("Missing required arguments");if(!d.string(e))throw new TypeError("Second argument must be a String");if(!d.fn(n))throw new TypeError("Third argument must be a Function");if(d.node(t))return s=e,f=n,(l=t).addEventListener(s,f),{destroy:function(){l.removeEventListener(s,f)}};if(d.nodeList(t))return a=t,c=e,u=n,Array.prototype.forEach.call(a,function(t){t.addEventListener(c,u)}),{destroy:function(){Array.prototype.forEach.call(a,function(t){t.removeEventListener(c,u)})}};if(d.string(t))return o=t,r=e,i=n,h(document.body,o,r,i);throw new TypeError("First argument must be a String, HTMLElement, HTMLCollection, or NodeList");var o,r,i,a,c,u,l,s,f}},function(t,n){n.node=function(t){return void 0!==t&&t instanceof HTMLElement&&1===t.nodeType},n.nodeList=function(t){var e=Object.prototype.toString.call(t);return void 0!==t&&("[object NodeList]"===e||"[object HTMLCollection]"===e)&&"length"in t&&(0===t.length||n.node(t[0]))},n.string=function(t){return"string"==typeof t||t instanceof String},n.fn=function(t){return"[object Function]"===Object.prototype.toString.call(t)}},function(t,e,n){var a=n(7);function i(t,e,n,o,r){var i=function(e,n,t,o){return function(t){t.delegateTarget=a(t.target,n),t.delegateTarget&&o.call(e,t)}}.apply(this,arguments);return t.addEventListener(n,i,r),{destroy:function(){t.removeEventListener(n,i,r)}}}t.exports=function(t,e,n,o,r){return"function"==typeof t.addEventListener?i.apply(null,arguments):"function"==typeof n?i.bind(null,document).apply(null,arguments):("string"==typeof t&&(t=document.querySelectorAll(t)),Array.prototype.map.call(t,function(t){return i(t,e,n,o,r)}))}},function(t,e){if("undefined"!=typeof Element&&!Element.prototype.matches){var n=Element.prototype;n.matches=n.matchesSelector||n.mozMatchesSelector||n.msMatchesSelector||n.oMatchesSelector||n.webkitMatchesSelector}t.exports=function(t,e){for(;t&&9!==t.nodeType;){if("function"==typeof t.matches&&t.matches(e))return t;t=t.parentNode}}}])});
diff --git a/libs/gitbook-2.6.7/js/jquery.highlight.js b/libs/gitbook-2.6.7/js/jquery.highlight.js
new file mode 100644
index 0000000..8ff7a0b
--- /dev/null
+++ b/libs/gitbook-2.6.7/js/jquery.highlight.js
@@ -0,0 +1,86 @@
+gitbook.require(["jQuery"], function(jQuery) {
+
+/*
+ * jQuery Highlight plugin
+ *
+ * Based on highlight v3 by Johann Burkard
+ * http://johannburkard.de/blog/programming/javascript/highlight-javascript-text-higlighting-jquery-plugin.html
+ *
+ * Code a little bit refactored and cleaned (in my humble opinion).
+ * Most important changes:
+ *  - has an option to highlight only entire words (wordsOnly - false by default),
+ *  - has an option to be case sensitive (caseSensitive - false by default)
+ *  - highlight element tag and class names can be specified in options
+ *
+ * Copyright (c) 2009 Bartek Szopka
+ *
+ * Licensed under MIT license.
+ *
+ */
+
+jQuery.extend({
+    highlight: function (node, re, nodeName, className) {
+        if (node.nodeType === 3) {
+            var match = node.data.match(re);
+            if (match) {
+                var highlight = document.createElement(nodeName || 'span');
+                highlight.className = className || 'highlight';
+                var wordNode = node.splitText(match.index);
+                wordNode.splitText(match[0].length);
+                var wordClone = wordNode.cloneNode(true);
+                highlight.appendChild(wordClone);
+                wordNode.parentNode.replaceChild(highlight, wordNode);
+                return 1; //skip added node in parent
+            }
+        } else if ((node.nodeType === 1 && node.childNodes) && // only element nodes that have children
+                !/(script|style)/i.test(node.tagName) && // ignore script and style nodes
+                !(node.tagName === nodeName.toUpperCase() && node.className === className)) { // skip if already highlighted
+            for (var i = 0; i < node.childNodes.length; i++) {
+                i += jQuery.highlight(node.childNodes[i], re, nodeName, className);
+            }
+        }
+        return 0;
+    }
+});
+
+jQuery.fn.unhighlight = function (options) {
+    var settings = { className: 'highlight', element: 'span' };
+    jQuery.extend(settings, options);
+
+    return this.find(settings.element + "." + settings.className).each(function () {
+        var parent = this.parentNode;
+        parent.replaceChild(this.firstChild, this);
+        parent.normalize();
+    }).end();
+};
+
+jQuery.fn.highlight = function (words, options) {
+    var settings = { className: 'highlight', element: 'span', caseSensitive: false, wordsOnly: false };
+    jQuery.extend(settings, options);
+
+    if (words.constructor === String) {
+        words = [words];
+        // also match 'foo-bar' if search for 'foo bar'
+        if (/\s/.test(words[0])) words.push(words[0].replace(/\s+/, '-'));
+    }
+    words = jQuery.grep(words, function(word, i){
+      return word !== '';
+    });
+    words = jQuery.map(words, function(word, i) {
+      return word.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, "\\$&");
+    });
+    if (words.length === 0) { return this; }
+
+    var flag = settings.caseSensitive ? "" : "i";
+    var pattern = "(" + words.join("|") + ")";
+    if (settings.wordsOnly) {
+        pattern = "\\b" + pattern + "\\b";
+    }
+    var re = new RegExp(pattern, flag);
+
+    return this.each(function () {
+        jQuery.highlight(this, re, settings.element, settings.className);
+    });
+};
+
+});
diff --git a/libs/gitbook-2.6.7/js/lunr.js b/libs/gitbook-2.6.7/js/lunr.js
new file mode 100644
index 0000000..3f846a1
--- /dev/null
+++ b/libs/gitbook-2.6.7/js/lunr.js
@@ -0,0 +1,7 @@
+/**
+ * lunr - http://lunrjs.com - A bit like Solr, but much smaller and not as bright - 0.5.12
+ * Copyright (C) 2015 Oliver Nightingale
+ * MIT Licensed
+ * @license
+ */
+!function(){var t=function(e){var n=new t.Index;return n.pipeline.add(t.trimmer,t.stopWordFilter,t.stemmer),e&&e.call(n,n),n};t.version="0.5.12",t.utils={},t.utils.warn=function(t){return function(e){t.console&&console.warn&&console.warn(e)}}(this),t.EventEmitter=function(){this.events={}},t.EventEmitter.prototype.addListener=function(){var t=Array.prototype.slice.call(arguments),e=t.pop(),n=t;if("function"!=typeof e)throw new TypeError("last argument must be a function");n.forEach(function(t){this.hasHandler(t)||(this.events[t]=[]),this.events[t].push(e)},this)},t.EventEmitter.prototype.removeListener=function(t,e){if(this.hasHandler(t)){var n=this.events[t].indexOf(e);this.events[t].splice(n,1),this.events[t].length||delete this.events[t]}},t.EventEmitter.prototype.emit=function(t){if(this.hasHandler(t)){var e=Array.prototype.slice.call(arguments,1);this.events[t].forEach(function(t){t.apply(void 0,e)})}},t.EventEmitter.prototype.hasHandler=function(t){return t in this.events},t.tokenizer=function(t){return arguments.length&&null!=t&&void 0!=t?Array.isArray(t)?t.map(function(t){return t.toLowerCase()}):t.toString().trim().toLowerCase().split(/[\s\-\/]+/):[]},t.Pipeline=function(){this._stack=[]},t.Pipeline.registeredFunctions={},t.Pipeline.registerFunction=function(e,n){n in this.registeredFunctions&&t.utils.warn("Overwriting existing registered function: "+n),e.label=n,t.Pipeline.registeredFunctions[e.label]=e},t.Pipeline.warnIfFunctionNotRegistered=function(e){var n=e.label&&e.label in this.registeredFunctions;n||t.utils.warn("Function is not registered with pipeline. This may cause problems when serialising the index.\n",e)},t.Pipeline.load=function(e){var n=new t.Pipeline;return e.forEach(function(e){var i=t.Pipeline.registeredFunctions[e];if(!i)throw new Error("Cannot load un-registered function: "+e);n.add(i)}),n},t.Pipeline.prototype.add=function(){var e=Array.prototype.slice.call(arguments);e.forEach(function(e){t.Pipeline.warnIfFunctionNotRegistered(e),this._stack.push(e)},this)},t.Pipeline.prototype.after=function(e,n){t.Pipeline.warnIfFunctionNotRegistered(n);var i=this._stack.indexOf(e);if(-1==i)throw new Error("Cannot find existingFn");i+=1,this._stack.splice(i,0,n)},t.Pipeline.prototype.before=function(e,n){t.Pipeline.warnIfFunctionNotRegistered(n);var i=this._stack.indexOf(e);if(-1==i)throw new Error("Cannot find existingFn");this._stack.splice(i,0,n)},t.Pipeline.prototype.remove=function(t){var e=this._stack.indexOf(t);-1!=e&&this._stack.splice(e,1)},t.Pipeline.prototype.run=function(t){for(var e=[],n=t.length,i=this._stack.length,o=0;n>o;o++){for(var r=t[o],s=0;i>s&&(r=this._stack[s](r,o,t),void 0!==r);s++);void 0!==r&&e.push(r)}return e},t.Pipeline.prototype.reset=function(){this._stack=[]},t.Pipeline.prototype.toJSON=function(){return this._stack.map(function(e){return t.Pipeline.warnIfFunctionNotRegistered(e),e.label})},t.Vector=function(){this._magnitude=null,this.list=void 0,this.length=0},t.Vector.Node=function(t,e,n){this.idx=t,this.val=e,this.next=n},t.Vector.prototype.insert=function(e,n){this._magnitude=void 0;var i=this.list;if(!i)return this.list=new t.Vector.Node(e,n,i),this.length++;if(e<i.idx)return this.list=new t.Vector.Node(e,n,i),this.length++;for(var o=i,r=i.next;void 0!=r;){if(e<r.idx)return o.next=new t.Vector.Node(e,n,r),this.length++;o=r,r=r.next}return o.next=new t.Vector.Node(e,n,r),this.length++},t.Vector.prototype.magnitude=function(){if(this._magnitude)return this._magnitude;for(var t,e=this.list,n=0;e;)t=e.val,n+=t*t,e=e.next;return this._magnitude=Math.sqrt(n)},t.Vector.prototype.dot=function(t){for(var e=this.list,n=t.list,i=0;e&&n;)e.idx<n.idx?e=e.next:e.idx>n.idx?n=n.next:(i+=e.val*n.val,e=e.next,n=n.next);return i},t.Vector.prototype.similarity=function(t){return this.dot(t)/(this.magnitude()*t.magnitude())},t.SortedSet=function(){this.length=0,this.elements=[]},t.SortedSet.load=function(t){var e=new this;return e.elements=t,e.length=t.length,e},t.SortedSet.prototype.add=function(){var t,e;for(t=0;t<arguments.length;t++)e=arguments[t],~this.indexOf(e)||this.elements.splice(this.locationFor(e),0,e);this.length=this.elements.length},t.SortedSet.prototype.toArray=function(){return this.elements.slice()},t.SortedSet.prototype.map=function(t,e){return this.elements.map(t,e)},t.SortedSet.prototype.forEach=function(t,e){return this.elements.forEach(t,e)},t.SortedSet.prototype.indexOf=function(t){for(var e=0,n=this.elements.length,i=n-e,o=e+Math.floor(i/2),r=this.elements[o];i>1;){if(r===t)return o;t>r&&(e=o),r>t&&(n=o),i=n-e,o=e+Math.floor(i/2),r=this.elements[o]}return r===t?o:-1},t.SortedSet.prototype.locationFor=function(t){for(var e=0,n=this.elements.length,i=n-e,o=e+Math.floor(i/2),r=this.elements[o];i>1;)t>r&&(e=o),r>t&&(n=o),i=n-e,o=e+Math.floor(i/2),r=this.elements[o];return r>t?o:t>r?o+1:void 0},t.SortedSet.prototype.intersect=function(e){for(var n=new t.SortedSet,i=0,o=0,r=this.length,s=e.length,a=this.elements,h=e.elements;;){if(i>r-1||o>s-1)break;a[i]!==h[o]?a[i]<h[o]?i++:a[i]>h[o]&&o++:(n.add(a[i]),i++,o++)}return n},t.SortedSet.prototype.clone=function(){var e=new t.SortedSet;return e.elements=this.toArray(),e.length=e.elements.length,e},t.SortedSet.prototype.union=function(t){var e,n,i;return this.length>=t.length?(e=this,n=t):(e=t,n=this),i=e.clone(),i.add.apply(i,n.toArray()),i},t.SortedSet.prototype.toJSON=function(){return this.toArray()},t.Index=function(){this._fields=[],this._ref="id",this.pipeline=new t.Pipeline,this.documentStore=new t.Store,this.tokenStore=new t.TokenStore,this.corpusTokens=new t.SortedSet,this.eventEmitter=new t.EventEmitter,this._idfCache={},this.on("add","remove","update",function(){this._idfCache={}}.bind(this))},t.Index.prototype.on=function(){var t=Array.prototype.slice.call(arguments);return this.eventEmitter.addListener.apply(this.eventEmitter,t)},t.Index.prototype.off=function(t,e){return this.eventEmitter.removeListener(t,e)},t.Index.load=function(e){e.version!==t.version&&t.utils.warn("version mismatch: current "+t.version+" importing "+e.version);var n=new this;return n._fields=e.fields,n._ref=e.ref,n.documentStore=t.Store.load(e.documentStore),n.tokenStore=t.TokenStore.load(e.tokenStore),n.corpusTokens=t.SortedSet.load(e.corpusTokens),n.pipeline=t.Pipeline.load(e.pipeline),n},t.Index.prototype.field=function(t,e){var e=e||{},n={name:t,boost:e.boost||1};return this._fields.push(n),this},t.Index.prototype.ref=function(t){return this._ref=t,this},t.Index.prototype.add=function(e,n){var i={},o=new t.SortedSet,r=e[this._ref],n=void 0===n?!0:n;this._fields.forEach(function(n){var r=this.pipeline.run(t.tokenizer(e[n.name]));i[n.name]=r,t.SortedSet.prototype.add.apply(o,r)},this),this.documentStore.set(r,o),t.SortedSet.prototype.add.apply(this.corpusTokens,o.toArray());for(var s=0;s<o.length;s++){var a=o.elements[s],h=this._fields.reduce(function(t,e){var n=i[e.name].length;if(!n)return t;var o=i[e.name].filter(function(t){return t===a}).length;return t+o/n*e.boost},0);this.tokenStore.add(a,{ref:r,tf:h})}n&&this.eventEmitter.emit("add",e,this)},t.Index.prototype.remove=function(t,e){var n=t[this._ref],e=void 0===e?!0:e;if(this.documentStore.has(n)){var i=this.documentStore.get(n);this.documentStore.remove(n),i.forEach(function(t){this.tokenStore.remove(t,n)},this),e&&this.eventEmitter.emit("remove",t,this)}},t.Index.prototype.update=function(t,e){var e=void 0===e?!0:e;this.remove(t,!1),this.add(t,!1),e&&this.eventEmitter.emit("update",t,this)},t.Index.prototype.idf=function(t){var e="@"+t;if(Object.prototype.hasOwnProperty.call(this._idfCache,e))return this._idfCache[e];var n=this.tokenStore.count(t),i=1;return n>0&&(i=1+Math.log(this.documentStore.length/n)),this._idfCache[e]=i},t.Index.prototype.search=function(e){var n=this.pipeline.run(t.tokenizer(e)),i=new t.Vector,o=[],r=this._fields.reduce(function(t,e){return t+e.boost},0),s=n.some(function(t){return this.tokenStore.has(t)},this);if(!s)return[];n.forEach(function(e,n,s){var a=1/s.length*this._fields.length*r,h=this,l=this.tokenStore.expand(e).reduce(function(n,o){var r=h.corpusTokens.indexOf(o),s=h.idf(o),l=1,u=new t.SortedSet;if(o!==e){var c=Math.max(3,o.length-e.length);l=1/Math.log(c)}return r>-1&&i.insert(r,a*s*l),Object.keys(h.tokenStore.get(o)).forEach(function(t){u.add(t)}),n.union(u)},new t.SortedSet);o.push(l)},this);var a=o.reduce(function(t,e){return t.intersect(e)});return a.map(function(t){return{ref:t,score:i.similarity(this.documentVector(t))}},this).sort(function(t,e){return e.score-t.score})},t.Index.prototype.documentVector=function(e){for(var n=this.documentStore.get(e),i=n.length,o=new t.Vector,r=0;i>r;r++){var s=n.elements[r],a=this.tokenStore.get(s)[e].tf,h=this.idf(s);o.insert(this.corpusTokens.indexOf(s),a*h)}return o},t.Index.prototype.toJSON=function(){return{version:t.version,fields:this._fields,ref:this._ref,documentStore:this.documentStore.toJSON(),tokenStore:this.tokenStore.toJSON(),corpusTokens:this.corpusTokens.toJSON(),pipeline:this.pipeline.toJSON()}},t.Index.prototype.use=function(t){var e=Array.prototype.slice.call(arguments,1);e.unshift(this),t.apply(this,e)},t.Store=function(){this.store={},this.length=0},t.Store.load=function(e){var n=new this;return n.length=e.length,n.store=Object.keys(e.store).reduce(function(n,i){return n[i]=t.SortedSet.load(e.store[i]),n},{}),n},t.Store.prototype.set=function(t,e){this.has(t)||this.length++,this.store[t]=e},t.Store.prototype.get=function(t){return this.store[t]},t.Store.prototype.has=function(t){return t in this.store},t.Store.prototype.remove=function(t){this.has(t)&&(delete this.store[t],this.length--)},t.Store.prototype.toJSON=function(){return{store:this.store,length:this.length}},t.stemmer=function(){var t={ational:"ate",tional:"tion",enci:"ence",anci:"ance",izer:"ize",bli:"ble",alli:"al",entli:"ent",eli:"e",ousli:"ous",ization:"ize",ation:"ate",ator:"ate",alism:"al",iveness:"ive",fulness:"ful",ousness:"ous",aliti:"al",iviti:"ive",biliti:"ble",logi:"log"},e={icate:"ic",ative:"",alize:"al",iciti:"ic",ical:"ic",ful:"",ness:""},n="[^aeiou]",i="[aeiouy]",o=n+"[^aeiouy]*",r=i+"[aeiou]*",s="^("+o+")?"+r+o,a="^("+o+")?"+r+o+"("+r+")?$",h="^("+o+")?"+r+o+r+o,l="^("+o+")?"+i,u=new RegExp(s),c=new RegExp(h),f=new RegExp(a),d=new RegExp(l),p=/^(.+?)(ss|i)es$/,m=/^(.+?)([^s])s$/,v=/^(.+?)eed$/,y=/^(.+?)(ed|ing)$/,g=/.$/,S=/(at|bl|iz)$/,w=new RegExp("([^aeiouylsz])\\1$"),x=new RegExp("^"+o+i+"[^aeiouwxy]$"),k=/^(.+?[^aeiou])y$/,b=/^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/,E=/^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/,_=/^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/,F=/^(.+?)(s|t)(ion)$/,O=/^(.+?)e$/,P=/ll$/,N=new RegExp("^"+o+i+"[^aeiouwxy]$"),T=function(n){var i,o,r,s,a,h,l;if(n.length<3)return n;if(r=n.substr(0,1),"y"==r&&(n=r.toUpperCase()+n.substr(1)),s=p,a=m,s.test(n)?n=n.replace(s,"$1$2"):a.test(n)&&(n=n.replace(a,"$1$2")),s=v,a=y,s.test(n)){var T=s.exec(n);s=u,s.test(T[1])&&(s=g,n=n.replace(s,""))}else if(a.test(n)){var T=a.exec(n);i=T[1],a=d,a.test(i)&&(n=i,a=S,h=w,l=x,a.test(n)?n+="e":h.test(n)?(s=g,n=n.replace(s,"")):l.test(n)&&(n+="e"))}if(s=k,s.test(n)){var T=s.exec(n);i=T[1],n=i+"i"}if(s=b,s.test(n)){var T=s.exec(n);i=T[1],o=T[2],s=u,s.test(i)&&(n=i+t[o])}if(s=E,s.test(n)){var T=s.exec(n);i=T[1],o=T[2],s=u,s.test(i)&&(n=i+e[o])}if(s=_,a=F,s.test(n)){var T=s.exec(n);i=T[1],s=c,s.test(i)&&(n=i)}else if(a.test(n)){var T=a.exec(n);i=T[1]+T[2],a=c,a.test(i)&&(n=i)}if(s=O,s.test(n)){var T=s.exec(n);i=T[1],s=c,a=f,h=N,(s.test(i)||a.test(i)&&!h.test(i))&&(n=i)}return s=P,a=c,s.test(n)&&a.test(n)&&(s=g,n=n.replace(s,"")),"y"==r&&(n=r.toLowerCase()+n.substr(1)),n};return T}(),t.Pipeline.registerFunction(t.stemmer,"stemmer"),t.stopWordFilter=function(e){return e&&t.stopWordFilter.stopWords[e]!==e?e:void 0},t.stopWordFilter.stopWords={a:"a",able:"able",about:"about",across:"across",after:"after",all:"all",almost:"almost",also:"also",am:"am",among:"among",an:"an",and:"and",any:"any",are:"are",as:"as",at:"at",be:"be",because:"because",been:"been",but:"but",by:"by",can:"can",cannot:"cannot",could:"could",dear:"dear",did:"did","do":"do",does:"does",either:"either","else":"else",ever:"ever",every:"every","for":"for",from:"from",get:"get",got:"got",had:"had",has:"has",have:"have",he:"he",her:"her",hers:"hers",him:"him",his:"his",how:"how",however:"however",i:"i","if":"if","in":"in",into:"into",is:"is",it:"it",its:"its",just:"just",least:"least",let:"let",like:"like",likely:"likely",may:"may",me:"me",might:"might",most:"most",must:"must",my:"my",neither:"neither",no:"no",nor:"nor",not:"not",of:"of",off:"off",often:"often",on:"on",only:"only",or:"or",other:"other",our:"our",own:"own",rather:"rather",said:"said",say:"say",says:"says",she:"she",should:"should",since:"since",so:"so",some:"some",than:"than",that:"that",the:"the",their:"their",them:"them",then:"then",there:"there",these:"these",they:"they","this":"this",tis:"tis",to:"to",too:"too",twas:"twas",us:"us",wants:"wants",was:"was",we:"we",were:"were",what:"what",when:"when",where:"where",which:"which","while":"while",who:"who",whom:"whom",why:"why",will:"will","with":"with",would:"would",yet:"yet",you:"you",your:"your"},t.Pipeline.registerFunction(t.stopWordFilter,"stopWordFilter"),t.trimmer=function(t){var e=t.replace(/^\W+/,"").replace(/\W+$/,"");return""===e?void 0:e},t.Pipeline.registerFunction(t.trimmer,"trimmer"),t.TokenStore=function(){this.root={docs:{}},this.length=0},t.TokenStore.load=function(t){var e=new this;return e.root=t.root,e.length=t.length,e},t.TokenStore.prototype.add=function(t,e,n){var n=n||this.root,i=t[0],o=t.slice(1);return i in n||(n[i]={docs:{}}),0===o.length?(n[i].docs[e.ref]=e,void(this.length+=1)):this.add(o,e,n[i])},t.TokenStore.prototype.has=function(t){if(!t)return!1;for(var e=this.root,n=0;n<t.length;n++){if(!e[t[n]])return!1;e=e[t[n]]}return!0},t.TokenStore.prototype.getNode=function(t){if(!t)return{};for(var e=this.root,n=0;n<t.length;n++){if(!e[t[n]])return{};e=e[t[n]]}return e},t.TokenStore.prototype.get=function(t,e){return this.getNode(t,e).docs||{}},t.TokenStore.prototype.count=function(t,e){return Object.keys(this.get(t,e)).length},t.TokenStore.prototype.remove=function(t,e){if(t){for(var n=this.root,i=0;i<t.length;i++){if(!(t[i]in n))return;n=n[t[i]]}delete n.docs[e]}},t.TokenStore.prototype.expand=function(t,e){var n=this.getNode(t),i=n.docs||{},e=e||[];return Object.keys(i).length&&e.push(t),Object.keys(n).forEach(function(n){"docs"!==n&&e.concat(this.expand(t+n,e))},this),e},t.TokenStore.prototype.toJSON=function(){return{root:this.root,length:this.length}},function(t,e){"function"==typeof define&&define.amd?define(e):"object"==typeof exports?module.exports=e():t.lunr=e()}(this,function(){return t})}();
diff --git a/libs/gitbook-2.6.7/js/plugin-bookdown.js b/libs/gitbook-2.6.7/js/plugin-bookdown.js
new file mode 100644
index 0000000..0080966
--- /dev/null
+++ b/libs/gitbook-2.6.7/js/plugin-bookdown.js
@@ -0,0 +1,259 @@
+gitbook.require(["gitbook", "lodash", "jQuery"], function(gitbook, _, $) {
+
+  var gs = gitbook.storage;
+
+  gitbook.events.bind("start", function(e, config) {
+
+    // add the Edit button (edit on Github)
+    var edit = config.edit;
+    if (edit && edit.link) gitbook.toolbar.createButton({
+      icon: 'fa fa-edit',
+      label: edit.text || 'Edit',
+      position: 'left',
+      onClick: function(e) {
+        e.preventDefault();
+        window.open(edit.link);
+      }
+    });
+
+    // add the History button (file history on Github)
+    var history = config.history;
+    if (history && history.link) gitbook.toolbar.createButton({
+      icon: 'fa fa-history',
+      label: history.text || 'History',
+      position: 'left',
+      onClick: function(e) {
+        e.preventDefault();
+        window.open(history.link);
+      }
+    });
+
+    // add the View button (file view on Github)
+    var view = config.view;
+    if (view && view.link) gitbook.toolbar.createButton({
+      icon: 'fa fa-eye',
+      label: view.text || 'View Source',
+      position: 'left',
+      onClick: function(e) {
+        e.preventDefault();
+        window.open(view.link);
+      }
+    });
+
+    // add the Download button
+    var down = config.download;
+    var normalizeDownload = function() {
+      if (!down || !(down instanceof Array) || down.length === 0) return;
+      if (down[0] instanceof Array) return down;
+      return $.map(down, function(file, i) {
+        return [[file, file.replace(/.*[.]/g, '').toUpperCase()]];
+      });
+    };
+    down = normalizeDownload(down);
+    if (down) if (down.length === 1 && /[.]pdf$/.test(down[0][0])) {
+      gitbook.toolbar.createButton({
+        icon: 'fa fa-file-pdf-o',
+        label: down[0][1],
+        position: 'left',
+        onClick: function(e) {
+          e.preventDefault();
+          window.open(down[0][0]);
+        }
+      });
+    } else {
+      gitbook.toolbar.createButton({
+        icon: 'fa fa-download',
+        label: 'Download',
+        position: 'left',
+        dropdown: $.map(down, function(item, i) {
+          return {
+            text: item[1],
+            onClick: function(e) {
+              e.preventDefault();
+              window.open(item[0]);
+            }
+          };
+        })
+      });
+    }
+
+    // add the Information button
+    var info = ['Keyboard shortcuts (<> indicates arrow keys):',
+      '<left>/<right>: navigate to previous/next page',
+      's: Toggle sidebar'];
+    if (config.search !== false) info.push('f: Toggle search input ' +
+      '(use <up>/<down>/Enter in the search input to navigate through search matches; ' +
+      'press Esc to cancel search)');
+    if (config.info !== false) gitbook.toolbar.createButton({
+      icon: 'fa fa-info',
+      label: 'Information about the toolbar',
+      position: 'left',
+      onClick: function(e) {
+        e.preventDefault();
+        window.alert(info.join('\n\n'));
+      }
+    });
+
+    // highlight the current section in TOC
+    var href = window.location.pathname;
+    href = href.substr(href.lastIndexOf('/') + 1);
+    // accentuated characters need to be decoded (#819)
+    href = decodeURIComponent(href);
+    if (href === '') href = 'index.html';
+    var li = $('a[href^="' + href + location.hash + '"]').parent('li.chapter').first();
+    var summary = $('ul.summary'), chaps = summary.find('li.chapter');
+    if (li.length === 0) li = chaps.first();
+    li.addClass('active');
+    chaps.on('click', function(e) {
+      chaps.removeClass('active');
+      $(this).addClass('active');
+      gs.set('tocScrollTop', summary.scrollTop());
+    });
+
+    var toc = config.toc;
+    // collapse TOC items that are not for the current chapter
+    if (toc && toc.collapse) (function() {
+      var type = toc.collapse;
+      if (type === 'none') return;
+      if (type !== 'section' && type !== 'subsection') return;
+      // sections under chapters
+      var toc_sub = summary.children('li[data-level]').children('ul');
+      if (type === 'section') {
+        toc_sub.hide()
+          .parent().has(li).children('ul').show();
+      } else {
+        toc_sub.children('li').children('ul').hide()
+          .parent().has(li).children('ul').show();
+      }
+      li.children('ul').show();
+      var toc_sub2 = toc_sub.children('li');
+      if (type === 'section') toc_sub2.children('ul').hide();
+      summary.children('li[data-level]').find('a')
+        .on('click.bookdown', function(e) {
+          if (href === $(this).attr('href').replace(/#.*/, ''))
+            $(this).parent('li').children('ul').toggle();
+        });
+    })();
+
+    // add tooltips to the <a>'s that are truncated
+    $('a').each(function(i, el) {
+      if (el.offsetWidth >= el.scrollWidth) return;
+      if (typeof el.title === 'undefined') return;
+      el.title = el.text;
+    });
+
+    // restore TOC scroll position
+    var pos = gs.get('tocScrollTop');
+    if (typeof pos !== 'undefined') summary.scrollTop(pos);
+
+    // highlight the TOC item that has same text as the heading in view as scrolling
+    if (toc && toc.scroll_highlight !== false && li.length > 0) (function() {
+      // scroll the current TOC item into viewport
+      var ht = $(window).height(), rect = li[0].getBoundingClientRect();
+      if (rect.top >= ht || rect.top <= 0 || rect.bottom <= 0) {
+        summary.scrollTop(li[0].offsetTop);
+      }
+      // current chapter TOC items
+      var items = $('a[href^="' + href + '"]').parent('li.chapter'),
+          m = items.length;
+      if (m === 0) {
+        items = summary.find('li.chapter');
+        m = items.length;
+      }
+      if (m === 0) return;
+      // all section titles on current page
+      var hs = bookInner.find('.page-inner').find('h1,h2,h3'), n = hs.length,
+          ts = hs.map(function(i, el) { return $(el).text(); });
+      if (n === 0) return;
+      var scrollHandler = function(e) {
+        var ht = $(window).height();
+        clearTimeout($.data(this, 'scrollTimer'));
+        $.data(this, 'scrollTimer', setTimeout(function() {
+          // find the first visible title in the viewport
+          for (var i = 0; i < n; i++) {
+            var rect = hs[i].getBoundingClientRect();
+            if (rect.top >= 0 && rect.bottom <= ht) break;
+          }
+          if (i === n) return;
+          items.removeClass('active');
+          for (var j = 0; j < m; j++) {
+            if (items.eq(j).children('a').first().text() === ts[i]) break;
+          }
+          if (j === m) j = 0;  // highlight the chapter title
+          // search bottom-up for a visible TOC item to highlight; if an item is
+          // hidden, we check if its parent is visible, and so on
+          while (j > 0 && items.eq(j).is(':hidden')) j--;
+          items.eq(j).addClass('active');
+        }, 250));
+      };
+      bookInner.on('scroll.bookdown', scrollHandler);
+      bookBody.on('scroll.bookdown', scrollHandler);
+    })();
+
+    // do not refresh the page if the TOC item points to the current page
+    $('a[href="' + href + '"]').parent('li.chapter').children('a')
+      .on('click', function(e) {
+        bookInner.scrollTop(0);
+        bookBody.scrollTop(0);
+        return false;
+      });
+
+    var toolbar = config.toolbar;
+    if (!toolbar || toolbar.position !== 'static') {
+      var bookHeader = $('.book-header');
+      bookBody.addClass('fixed');
+      bookHeader.addClass('fixed')
+      .css('background-color', bookBody.css('background-color'))
+      .on('click.bookdown', function(e) {
+        // the theme may have changed after user clicks the theme button
+        bookHeader.css('background-color', bookBody.css('background-color'));
+      });
+    }
+
+  });
+
+  gitbook.events.bind("page.change", function(e) {
+    // store TOC scroll position
+    var summary = $('ul.summary');
+    gs.set('tocScrollTop', summary.scrollTop());
+  });
+
+  var bookBody = $('.book-body'), bookInner = bookBody.find('.body-inner');
+  var chapterTitle = function() {
+    return bookInner.find('.page-inner').find('h1,h2').first().text();
+  };
+  var saveScrollPos = function(e) {
+    // save scroll position before page is reloaded
+    gs.set('bodyScrollTop', {
+      body: bookBody.scrollTop(),
+      inner: bookInner.scrollTop(),
+      focused: document.hasFocus(),
+      title: chapterTitle()
+    });
+  };
+  $(document).on('servr:reload', saveScrollPos);
+
+  // check if the page is loaded in an iframe (e.g. the RStudio preview window)
+  var inIFrame = function() {
+    var inIframe = true;
+    try { inIframe = window.self !== window.top; } catch (e) {}
+    return inIframe;
+  };
+  if (inIFrame()) {
+    $(window).on('blur unload', saveScrollPos);
+  }
+
+  $(function(e) {
+    var pos = gs.get('bodyScrollTop');
+    if (pos) {
+      if (pos.title === chapterTitle()) {
+        if (pos.body !== 0) bookBody.scrollTop(pos.body);
+        if (pos.inner !== 0) bookInner.scrollTop(pos.inner);
+      }
+    }
+    if ((pos && pos.focused) || !inIFrame()) bookInner.find('.page-wrapper').focus();
+    // clear book body scroll position
+    gs.remove('bodyScrollTop');
+  });
+
+});
diff --git a/libs/gitbook-2.6.7/js/plugin-clipboard.js b/libs/gitbook-2.6.7/js/plugin-clipboard.js
new file mode 100644
index 0000000..f0880be
--- /dev/null
+++ b/libs/gitbook-2.6.7/js/plugin-clipboard.js
@@ -0,0 +1,33 @@
+gitbook.require(["gitbook", "jQuery"], function(gitbook, $) {
+
+  var copyButton = '<button type="button" class="copy-to-clipboard-button" title="Copy to clipboard" aria-label="Copy to clipboard"><i class="fa fa-copy"></i></button>';
+  var clipboard;
+
+  gitbook.events.bind("page.change", function() {
+
+    if (!ClipboardJS.isSupported()) return;
+
+    // the page.change event is thrown twice: before and after the page changes
+    if (clipboard) {
+      // clipboard is already defined but we are on the same page
+      if (clipboard._prevPage === window.location.pathname) return;
+      // clipboard is already defined and url path change
+      // we can deduct that we are before page changes
+      clipboard.destroy(); // destroy the previous events listeners
+      clipboard = undefined; // reset the clipboard object
+      return;
+    }
+
+    $(copyButton).prependTo("div.sourceCode");
+
+    clipboard = new ClipboardJS(".copy-to-clipboard-button", {
+      text: function(trigger) {
+        return trigger.parentNode.textContent;
+      }
+    });
+
+    clipboard._prevPage = window.location.pathname
+
+  });
+
+});
diff --git a/libs/gitbook-2.6.7/js/plugin-fontsettings.js b/libs/gitbook-2.6.7/js/plugin-fontsettings.js
new file mode 100644
index 0000000..a70f0fb
--- /dev/null
+++ b/libs/gitbook-2.6.7/js/plugin-fontsettings.js
@@ -0,0 +1,152 @@
+gitbook.require(["gitbook", "lodash", "jQuery"], function(gitbook, _, $) {
+    var fontState;
+
+    var THEMES = {
+        "white": 0,
+        "sepia": 1,
+        "night": 2
+    };
+
+    var FAMILY = {
+        "serif": 0,
+        "sans": 1
+    };
+
+    // Save current font settings
+    function saveFontSettings() {
+        gitbook.storage.set("fontState", fontState);
+        update();
+    }
+
+    // Increase font size
+    function enlargeFontSize(e) {
+        e.preventDefault();
+        if (fontState.size >= 4) return;
+
+        fontState.size++;
+        saveFontSettings();
+    };
+
+    // Decrease font size
+    function reduceFontSize(e) {
+        e.preventDefault();
+        if (fontState.size <= 0) return;
+
+        fontState.size--;
+        saveFontSettings();
+    };
+
+    // Change font family
+    function changeFontFamily(index, e) {
+        e.preventDefault();
+
+        fontState.family = index;
+        saveFontSettings();
+    };
+
+    // Change type of color
+    function changeColorTheme(index, e) {
+        e.preventDefault();
+
+        var $book = $(".book");
+
+        if (fontState.theme !== 0)
+            $book.removeClass("color-theme-"+fontState.theme);
+
+        fontState.theme = index;
+        if (fontState.theme !== 0)
+            $book.addClass("color-theme-"+fontState.theme);
+
+        saveFontSettings();
+    };
+
+    function update() {
+        var $book = gitbook.state.$book;
+
+        $(".font-settings .font-family-list li").removeClass("active");
+        $(".font-settings .font-family-list li:nth-child("+(fontState.family+1)+")").addClass("active");
+
+        $book[0].className = $book[0].className.replace(/\bfont-\S+/g, '');
+        $book.addClass("font-size-"+fontState.size);
+        $book.addClass("font-family-"+fontState.family);
+
+        if(fontState.theme !== 0) {
+            $book[0].className = $book[0].className.replace(/\bcolor-theme-\S+/g, '');
+            $book.addClass("color-theme-"+fontState.theme);
+        }
+    };
+
+    function init(config) {
+        var $bookBody, $book;
+
+        //Find DOM elements.
+        $book = gitbook.state.$book;
+        $bookBody = $book.find(".book-body");
+
+        // Instantiate font state object
+        fontState = gitbook.storage.get("fontState", {
+            size: config.size || 2,
+            family: FAMILY[config.family || "sans"],
+            theme: THEMES[config.theme || "white"]
+        });
+
+        update();
+    };
+
+
+    gitbook.events.bind("start", function(e, config) {
+        var opts = config.fontsettings;
+        if (!opts) return;
+        
+        // Create buttons in toolbar
+        gitbook.toolbar.createButton({
+            icon: 'fa fa-font',
+            label: 'Font Settings',
+            className: 'font-settings',
+            dropdown: [
+                [
+                    {
+                        text: 'A',
+                        className: 'font-reduce',
+                        onClick: reduceFontSize
+                    },
+                    {
+                        text: 'A',
+                        className: 'font-enlarge',
+                        onClick: enlargeFontSize
+                    }
+                ],
+                [
+                    {
+                        text: 'Serif',
+                        onClick: _.partial(changeFontFamily, 0)
+                    },
+                    {
+                        text: 'Sans',
+                        onClick: _.partial(changeFontFamily, 1)
+                    }
+                ],
+                [
+                    {
+                        text: 'White',
+                        onClick: _.partial(changeColorTheme, 0)
+                    },
+                    {
+                        text: 'Sepia',
+                        onClick: _.partial(changeColorTheme, 1)
+                    },
+                    {
+                        text: 'Night',
+                        onClick: _.partial(changeColorTheme, 2)
+                    }
+                ]
+            ]
+        });
+
+
+        // Init current settings
+        init(opts);
+    });
+});
+
+
diff --git a/libs/gitbook-2.6.7/js/plugin-search.js b/libs/gitbook-2.6.7/js/plugin-search.js
new file mode 100644
index 0000000..747fcce
--- /dev/null
+++ b/libs/gitbook-2.6.7/js/plugin-search.js
@@ -0,0 +1,270 @@
+gitbook.require(["gitbook", "lodash", "jQuery"], function(gitbook, _, $) {
+    var index = null;
+    var fuse = null;
+    var _search = {engine: 'lunr', opts: {}};
+    var $searchInput, $searchLabel, $searchForm;
+    var $highlighted = [], hi, hiOpts = { className: 'search-highlight' };
+    var collapse = false, toc_visible = [];
+
+    function init(config) {
+        // Instantiate search settings
+        _search = gitbook.storage.get("search", {
+            engine: config.search.engine || 'lunr',
+            opts: config.search.options || {},
+        });
+    };
+
+    // Save current search settings
+    function saveSearchSettings() {
+        gitbook.storage.set("search", _search);
+    }
+
+    // Use a specific index
+    function loadIndex(data) {
+        // [Yihui] In bookdown, I use a character matrix to store the chapter
+        // content, and the index is dynamically built on the client side.
+        // Gitbook prebuilds the index data instead: https://github.com/GitbookIO/plugin-search
+        // We can certainly do that via R packages V8 and jsonlite, but let's
+        // see how slow it really is before improving it. On the other hand,
+        // lunr cannot handle non-English text very well, e.g. the default
+        // tokenizer cannot deal with Chinese text, so we may want to replace
+        // lunr with a dumb simple text matching approach.
+        if (_search.engine === 'lunr') {
+          index = lunr(function () {
+            this.ref('url');
+            this.field('title', { boost: 10 });
+            this.field('body');
+          });
+          data.map(function(item) {
+            index.add({
+              url: item[0],
+              title: item[1],
+              body: item[2]
+            });
+          });
+          return;
+        }
+        fuse = new Fuse(data.map((_data => {
+            return {
+                url: _data[0],
+                title: _data[1],
+                body: _data[2]
+            };
+        })), Object.assign(
+            {
+                includeScore: true,
+                threshold: 0.1,
+                ignoreLocation: true,
+                keys: ["title", "body"]
+            },
+            _search.opts
+        ));
+    }
+
+    // Fetch the search index
+    function fetchIndex() {
+        return $.getJSON(gitbook.state.basePath+"/search_index.json")
+                .then(loadIndex);  // [Yihui] we need to use this object later
+    }
+
+    // Search for a term and return results
+    function search(q) {
+        let results = [];
+        switch (_search.engine) {
+            case 'fuse':
+                if (!fuse) return;
+                results = fuse.search(q).map(function(result) {
+                    var parts = result.item.url.split('#');
+                    return {
+                        path: parts[0],
+                        hash: parts[1]
+                    };
+                });
+                break;
+            case 'lunr':
+            default:
+                if (!index) return;
+                results = _.chain(index.search(q)).map(function(result) {
+                    var parts = result.ref.split("#");
+                    return {
+                        path: parts[0],
+                        hash: parts[1]
+                    };
+                })
+                .value();
+        }
+
+        // [Yihui] Highlight the search keyword on current page
+        $highlighted = $('.page-inner')
+          .unhighlight(hiOpts).highlight(q, hiOpts).find('span.search-highlight');
+        scrollToHighlighted(0);
+
+        return results;
+    }
+
+    // [Yihui] Scroll the chapter body to the i-th highlighted string
+    function scrollToHighlighted(d) {
+      var n = $highlighted.length;
+      hi = hi === undefined ? 0 : hi + d;
+      // navignate to the previous/next page in the search results if reached the top/bottom
+      var b = hi < 0;
+      if (d !== 0 && (b || hi >= n)) {
+        var path = currentPath(), n2 = toc_visible.length;
+        if (n2 === 0) return;
+        for (var i = b ? 0 : n2; (b && i < n2) || (!b && i >= 0); i += b ? 1 : -1) {
+          if (toc_visible.eq(i).data('path') === path) break;
+        }
+        i += b ? -1 : 1;
+        if (i < 0) i = n2 - 1;
+        if (i >= n2) i = 0;
+        var lnk = toc_visible.eq(i).find('a[href$=".html"]');
+        if (lnk.length) lnk[0].click();
+        return;
+      }
+      if (n === 0) return;
+      var $p = $highlighted.eq(hi);
+      $p[0].scrollIntoView();
+      $highlighted.css('background-color', '');
+      // an orange background color on the current item and removed later
+      $p.css('background-color', 'orange');
+      setTimeout(function() {
+        $p.css('background-color', '');
+      }, 2000);
+    }
+
+    function currentPath() {
+      var href = window.location.pathname;
+      href = href.substr(href.lastIndexOf('/') + 1);
+      return href === '' ? 'index.html' : href;
+    }
+
+    // Create search form
+    function createForm(value) {
+        if ($searchForm) $searchForm.remove();
+        if ($searchLabel) $searchLabel.remove();
+        if ($searchInput) $searchInput.remove();
+
+        $searchForm = $('<div>', {
+            'class': 'book-search',
+            'role': 'search'
+        });
+
+        $searchLabel = $('<label>', {
+            'for': 'search-box',
+            'aria-hidden': 'false',
+            'hidden': ''
+        });
+
+        $searchInput = $('<input>', {
+            'id': 'search-box',
+            'type': 'search',
+            'class': 'form-control',
+            'val': value,
+            'placeholder': 'Type to search (Enter for navigation)',
+            'title': 'Use Enter or the <Down> key to navigate to the next match, or the <Up> key to the previous match'
+        });
+
+        $searchLabel.append("Type to search");
+        $searchLabel.appendTo($searchForm);
+        $searchInput.appendTo($searchForm);
+        $searchForm.prependTo(gitbook.state.$book.find('.book-summary'));
+    }
+
+    // Return true if search is open
+    function isSearchOpen() {
+        return gitbook.state.$book.hasClass("with-search");
+    }
+
+    // Toggle the search
+    function toggleSearch(_state) {
+        if (isSearchOpen() === _state) return;
+        if (!$searchInput) return;
+
+        gitbook.state.$book.toggleClass("with-search", _state);
+
+        // If search bar is open: focus input
+        if (isSearchOpen()) {
+            gitbook.sidebar.toggle(true);
+            $searchInput.focus();
+        } else {
+            $searchInput.blur();
+            $searchInput.val("");
+            gitbook.storage.remove("keyword");
+            gitbook.sidebar.filter(null);
+            $('.page-inner').unhighlight(hiOpts);
+        }
+    }
+
+    function sidebarFilter(results) {
+        gitbook.sidebar.filter(_.pluck(results, "path"));
+        toc_visible = $('ul.summary').find('li:visible');
+    }
+
+    // Recover current search when page changed
+    function recoverSearch() {
+        var keyword = gitbook.storage.get("keyword", "");
+
+        createForm(keyword);
+
+        if (keyword.length > 0) {
+            if(!isSearchOpen()) {
+                toggleSearch(true); // [Yihui] open the search box
+            }
+            sidebarFilter(search(keyword));
+        }
+    }
+
+
+    gitbook.events.bind("start", function(e, config) {
+        // [Yihui] disable search
+        if (config.search === false) return;
+        init(config);
+        collapse = !config.toc || config.toc.collapse === 'section' ||
+          config.toc.collapse === 'subsection';
+
+        // Pre-fetch search index and create the form
+        fetchIndex()
+        // [Yihui] recover search after the page is loaded
+        .then(recoverSearch);
+
+
+        // Type in search bar
+        $(document).on("keyup", ".book-search input", function(e) {
+            var key = (e.keyCode ? e.keyCode : e.which);
+            // [Yihui] Escape -> close search box; Up/Down/Enter: previous/next highlighted
+            if (key == 27) {
+                e.preventDefault();
+                toggleSearch(false);
+            } else if (key == 38) {
+              scrollToHighlighted(-1);
+            } else if (key == 40 || key == 13) {
+              scrollToHighlighted(1);
+            }
+        }).on("input", ".book-search input", function(e) {
+            var q = $(this).val().trim();
+            if (q.length === 0) {
+                gitbook.sidebar.filter(null);
+                gitbook.storage.remove("keyword");
+                $('.page-inner').unhighlight(hiOpts);
+            } else {
+                var results = search(q);
+                sidebarFilter(results);
+                gitbook.storage.set("keyword", q);
+            }
+        });
+
+        // Create the toggle search button
+        gitbook.toolbar.createButton({
+            icon: 'fa fa-search',
+            label: 'Search',
+            position: 'left',
+            onClick: toggleSearch
+        });
+
+        // Bind keyboard to toggle search
+        gitbook.keyboard.bind(['f'], toggleSearch);
+    });
+
+    // [Yihui] do not try to recover search; always start fresh
+    // gitbook.events.bind("page.change", recoverSearch);
+});
diff --git a/libs/gitbook-2.6.7/js/plugin-sharing.js b/libs/gitbook-2.6.7/js/plugin-sharing.js
new file mode 100644
index 0000000..9a01b0f
--- /dev/null
+++ b/libs/gitbook-2.6.7/js/plugin-sharing.js
@@ -0,0 +1,116 @@
+gitbook.require(["gitbook", "lodash", "jQuery"], function(gitbook, _, $) {
+    var SITES = {
+        'github': {
+            'label': 'Github',
+            'icon': 'fa fa-github',
+            'onClick': function(e) {
+                e.preventDefault();
+                var repo = $('meta[name="github-repo"]').attr('content');
+                if (typeof repo === 'undefined') throw("Github repo not defined");
+                window.open("https://github.com/"+repo);
+            }
+        },
+        'facebook': {
+            'label': 'Facebook',
+            'icon': 'fa fa-facebook',
+            'onClick': function(e) {
+                e.preventDefault();
+                window.open("http://www.facebook.com/sharer/sharer.php?u="+encodeURIComponent(location.href));
+            }
+        },
+        'twitter': {
+            'label': 'Twitter',
+            'icon': 'fa fa-twitter',
+            'onClick': function(e) {
+                e.preventDefault();
+                window.open("http://twitter.com/intent/tweet?text="+encodeURIComponent(document.title)+"&url="+encodeURIComponent(location.href)+"&hashtags=rmarkdown,bookdown");
+            }
+        },
+        'linkedin': {
+            'label': 'LinkedIn',
+            'icon': 'fa fa-linkedin',
+            'onClick': function(e) {
+                e.preventDefault();
+                window.open("https://www.linkedin.com/shareArticle?mini=true&url="+encodeURIComponent(location.href)+"&title="+encodeURIComponent(document.title));
+            }
+        },
+        'weibo': {
+            'label': 'Weibo',
+            'icon': 'fa fa-weibo',
+            'onClick': function(e) {
+                e.preventDefault();
+                window.open("http://service.weibo.com/share/share.php?content=utf-8&url="+encodeURIComponent(location.href)+"&title="+encodeURIComponent(document.title));
+            }
+        },
+        'instapaper': {
+            'label': 'Instapaper',
+            'icon': 'fa fa-italic',
+            'onClick': function(e) {
+                e.preventDefault();
+                window.open("http://www.instapaper.com/text?u="+encodeURIComponent(location.href));
+            }
+        },
+        'vk': {
+            'label': 'VK',
+            'icon': 'fa fa-vk',
+            'onClick': function(e) {
+                e.preventDefault();
+                window.open("http://vkontakte.ru/share.php?url="+encodeURIComponent(location.href));
+            }
+        },
+        'whatsapp': {
+            'label': 'Whatsapp',
+            'icon': 'fa fa-whatsapp',
+            'onClick': function(e) {
+                e.preventDefault();
+                var url = encodeURIComponent(location.href);
+                window.open((isMobile() ? "whatsapp://send" : "https://web.whatsapp.com/send") + "?text=" + url);
+            }
+        },
+    };
+
+    function isMobile() {
+      return !!navigator.maxTouchPoints;
+    }
+
+    gitbook.events.bind("start", function(e, config) {
+        var opts = config.sharing;
+        if (!opts) return;
+
+        // Create dropdown menu
+        var menu = _.chain(opts.all)
+            .map(function(id) {
+                var site = SITES[id];
+                if (!site) return;
+                return {
+                    text: site.label,
+                    onClick: site.onClick
+                };
+            })
+            .compact()
+            .value();
+
+        // Create main button with dropdown
+        if (menu.length > 0) {
+            gitbook.toolbar.createButton({
+                icon: 'fa fa-share-alt',
+                label: 'Share',
+                position: 'right',
+                dropdown: [menu]
+            });
+        }
+
+        // Direct actions to share
+        _.each(SITES, function(site, sideId) {
+            if (!opts[sideId]) return;
+
+            gitbook.toolbar.createButton({
+                icon: site.icon,
+                label: site.label,
+                title: site.label,
+                position: 'right',
+                onClick: site.onClick
+            });
+        });
+    });
+});
diff --git a/libs/jquery-2.2.3/jquery.min.js b/libs/jquery-2.2.3/jquery.min.js
new file mode 100644
index 0000000..b8c4187
--- /dev/null
+++ b/libs/jquery-2.2.3/jquery.min.js
@@ -0,0 +1,4 @@
+/*! jQuery v2.2.3 | (c) jQuery Foundation | jquery.org/license */
+!function(a,b){"object"==typeof module&&"object"==typeof module.exports?module.exports=a.document?b(a,!0):function(a){if(!a.document)throw new Error("jQuery requires a window with a document");return b(a)}:b(a)}("undefined"!=typeof window?window:this,function(a,b){var c=[],d=a.document,e=c.slice,f=c.concat,g=c.push,h=c.indexOf,i={},j=i.toString,k=i.hasOwnProperty,l={},m="2.2.3",n=function(a,b){return new n.fn.init(a,b)},o=/^[\s\uFEFF\xA0]+|[\s\uFEFF\xA0]+$/g,p=/^-ms-/,q=/-([\da-z])/gi,r=function(a,b){return b.toUpperCase()};n.fn=n.prototype={jquery:m,constructor:n,selector:"",length:0,toArray:function(){return e.call(this)},get:function(a){return null!=a?0>a?this[a+this.length]:this[a]:e.call(this)},pushStack:function(a){var b=n.merge(this.constructor(),a);return b.prevObject=this,b.context=this.context,b},each:function(a){return n.each(this,a)},map:function(a){return this.pushStack(n.map(this,function(b,c){return a.call(b,c,b)}))},slice:function(){return this.pushStack(e.apply(this,arguments))},first:function(){return this.eq(0)},last:function(){return this.eq(-1)},eq:function(a){var b=this.length,c=+a+(0>a?b:0);return this.pushStack(c>=0&&b>c?[this[c]]:[])},end:function(){return this.prevObject||this.constructor()},push:g,sort:c.sort,splice:c.splice},n.extend=n.fn.extend=function(){var a,b,c,d,e,f,g=arguments[0]||{},h=1,i=arguments.length,j=!1;for("boolean"==typeof g&&(j=g,g=arguments[h]||{},h++),"object"==typeof g||n.isFunction(g)||(g={}),h===i&&(g=this,h--);i>h;h++)if(null!=(a=arguments[h]))for(b in a)c=g[b],d=a[b],g!==d&&(j&&d&&(n.isPlainObject(d)||(e=n.isArray(d)))?(e?(e=!1,f=c&&n.isArray(c)?c:[]):f=c&&n.isPlainObject(c)?c:{},g[b]=n.extend(j,f,d)):void 0!==d&&(g[b]=d));return g},n.extend({expando:"jQuery"+(m+Math.random()).replace(/\D/g,""),isReady:!0,error:function(a){throw new Error(a)},noop:function(){},isFunction:function(a){return"function"===n.type(a)},isArray:Array.isArray,isWindow:function(a){return null!=a&&a===a.window},isNumeric:function(a){var b=a&&a.toString();return!n.isArray(a)&&b-parseFloat(b)+1>=0},isPlainObject:function(a){var b;if("object"!==n.type(a)||a.nodeType||n.isWindow(a))return!1;if(a.constructor&&!k.call(a,"constructor")&&!k.call(a.constructor.prototype||{},"isPrototypeOf"))return!1;for(b in a);return void 0===b||k.call(a,b)},isEmptyObject:function(a){var b;for(b in a)return!1;return!0},type:function(a){return null==a?a+"":"object"==typeof a||"function"==typeof a?i[j.call(a)]||"object":typeof a},globalEval:function(a){var b,c=eval;a=n.trim(a),a&&(1===a.indexOf("use strict")?(b=d.createElement("script"),b.text=a,d.head.appendChild(b).parentNode.removeChild(b)):c(a))},camelCase:function(a){return a.replace(p,"ms-").replace(q,r)},nodeName:function(a,b){return a.nodeName&&a.nodeName.toLowerCase()===b.toLowerCase()},each:function(a,b){var c,d=0;if(s(a)){for(c=a.length;c>d;d++)if(b.call(a[d],d,a[d])===!1)break}else for(d in a)if(b.call(a[d],d,a[d])===!1)break;return a},trim:function(a){return null==a?"":(a+"").replace(o,"")},makeArray:function(a,b){var c=b||[];return null!=a&&(s(Object(a))?n.merge(c,"string"==typeof a?[a]:a):g.call(c,a)),c},inArray:function(a,b,c){return null==b?-1:h.call(b,a,c)},merge:function(a,b){for(var c=+b.length,d=0,e=a.length;c>d;d++)a[e++]=b[d];return a.length=e,a},grep:function(a,b,c){for(var d,e=[],f=0,g=a.length,h=!c;g>f;f++)d=!b(a[f],f),d!==h&&e.push(a[f]);return e},map:function(a,b,c){var d,e,g=0,h=[];if(s(a))for(d=a.length;d>g;g++)e=b(a[g],g,c),null!=e&&h.push(e);else for(g in a)e=b(a[g],g,c),null!=e&&h.push(e);return f.apply([],h)},guid:1,proxy:function(a,b){var c,d,f;return"string"==typeof b&&(c=a[b],b=a,a=c),n.isFunction(a)?(d=e.call(arguments,2),f=function(){return a.apply(b||this,d.concat(e.call(arguments)))},f.guid=a.guid=a.guid||n.guid++,f):void 0},now:Date.now,support:l}),"function"==typeof Symbol&&(n.fn[Symbol.iterator]=c[Symbol.iterator]),n.each("Boolean Number String Function Array Date RegExp Object Error Symbol".split(" "),function(a,b){i["[object "+b+"]"]=b.toLowerCase()});function s(a){var b=!!a&&"length"in a&&a.length,c=n.type(a);return"function"===c||n.isWindow(a)?!1:"array"===c||0===b||"number"==typeof b&&b>0&&b-1 in a}var t=function(a){var b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u="sizzle"+1*new Date,v=a.document,w=0,x=0,y=ga(),z=ga(),A=ga(),B=function(a,b){return a===b&&(l=!0),0},C=1<<31,D={}.hasOwnProperty,E=[],F=E.pop,G=E.push,H=E.push,I=E.slice,J=function(a,b){for(var c=0,d=a.length;d>c;c++)if(a[c]===b)return c;return-1},K="checked|selected|async|autofocus|autoplay|controls|defer|disabled|hidden|ismap|loop|multiple|open|readonly|required|scoped",L="[\\x20\\t\\r\\n\\f]",M="(?:\\\\.|[\\w-]|[^\\x00-\\xa0])+",N="\\["+L+"*("+M+")(?:"+L+"*([*^$|!~]?=)"+L+"*(?:'((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\"|("+M+"))|)"+L+"*\\]",O=":("+M+")(?:\\((('((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\")|((?:\\\\.|[^\\\\()[\\]]|"+N+")*)|.*)\\)|)",P=new RegExp(L+"+","g"),Q=new RegExp("^"+L+"+|((?:^|[^\\\\])(?:\\\\.)*)"+L+"+$","g"),R=new RegExp("^"+L+"*,"+L+"*"),S=new RegExp("^"+L+"*([>+~]|"+L+")"+L+"*"),T=new RegExp("="+L+"*([^\\]'\"]*?)"+L+"*\\]","g"),U=new RegExp(O),V=new RegExp("^"+M+"$"),W={ID:new RegExp("^#("+M+")"),CLASS:new RegExp("^\\.("+M+")"),TAG:new RegExp("^("+M+"|[*])"),ATTR:new RegExp("^"+N),PSEUDO:new RegExp("^"+O),CHILD:new RegExp("^:(only|first|last|nth|nth-last)-(child|of-type)(?:\\("+L+"*(even|odd|(([+-]|)(\\d*)n|)"+L+"*(?:([+-]|)"+L+"*(\\d+)|))"+L+"*\\)|)","i"),bool:new RegExp("^(?:"+K+")$","i"),needsContext:new RegExp("^"+L+"*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\\("+L+"*((?:-\\d)?\\d*)"+L+"*\\)|)(?=[^-]|$)","i")},X=/^(?:input|select|textarea|button)$/i,Y=/^h\d$/i,Z=/^[^{]+\{\s*\[native \w/,$=/^(?:#([\w-]+)|(\w+)|\.([\w-]+))$/,_=/[+~]/,aa=/'|\\/g,ba=new RegExp("\\\\([\\da-f]{1,6}"+L+"?|("+L+")|.)","ig"),ca=function(a,b,c){var d="0x"+b-65536;return d!==d||c?b:0>d?String.fromCharCode(d+65536):String.fromCharCode(d>>10|55296,1023&d|56320)},da=function(){m()};try{H.apply(E=I.call(v.childNodes),v.childNodes),E[v.childNodes.length].nodeType}catch(ea){H={apply:E.length?function(a,b){G.apply(a,I.call(b))}:function(a,b){var c=a.length,d=0;while(a[c++]=b[d++]);a.length=c-1}}}function fa(a,b,d,e){var f,h,j,k,l,o,r,s,w=b&&b.ownerDocument,x=b?b.nodeType:9;if(d=d||[],"string"!=typeof a||!a||1!==x&&9!==x&&11!==x)return d;if(!e&&((b?b.ownerDocument||b:v)!==n&&m(b),b=b||n,p)){if(11!==x&&(o=$.exec(a)))if(f=o[1]){if(9===x){if(!(j=b.getElementById(f)))return d;if(j.id===f)return d.push(j),d}else if(w&&(j=w.getElementById(f))&&t(b,j)&&j.id===f)return d.push(j),d}else{if(o[2])return H.apply(d,b.getElementsByTagName(a)),d;if((f=o[3])&&c.getElementsByClassName&&b.getElementsByClassName)return H.apply(d,b.getElementsByClassName(f)),d}if(c.qsa&&!A[a+" "]&&(!q||!q.test(a))){if(1!==x)w=b,s=a;else if("object"!==b.nodeName.toLowerCase()){(k=b.getAttribute("id"))?k=k.replace(aa,"\\$&"):b.setAttribute("id",k=u),r=g(a),h=r.length,l=V.test(k)?"#"+k:"[id='"+k+"']";while(h--)r[h]=l+" "+qa(r[h]);s=r.join(","),w=_.test(a)&&oa(b.parentNode)||b}if(s)try{return H.apply(d,w.querySelectorAll(s)),d}catch(y){}finally{k===u&&b.removeAttribute("id")}}}return i(a.replace(Q,"$1"),b,d,e)}function ga(){var a=[];function b(c,e){return a.push(c+" ")>d.cacheLength&&delete b[a.shift()],b[c+" "]=e}return b}function ha(a){return a[u]=!0,a}function ia(a){var b=n.createElement("div");try{return!!a(b)}catch(c){return!1}finally{b.parentNode&&b.parentNode.removeChild(b),b=null}}function ja(a,b){var c=a.split("|"),e=c.length;while(e--)d.attrHandle[c[e]]=b}function ka(a,b){var c=b&&a,d=c&&1===a.nodeType&&1===b.nodeType&&(~b.sourceIndex||C)-(~a.sourceIndex||C);if(d)return d;if(c)while(c=c.nextSibling)if(c===b)return-1;return a?1:-1}function la(a){return function(b){var c=b.nodeName.toLowerCase();return"input"===c&&b.type===a}}function ma(a){return function(b){var c=b.nodeName.toLowerCase();return("input"===c||"button"===c)&&b.type===a}}function na(a){return ha(function(b){return b=+b,ha(function(c,d){var e,f=a([],c.length,b),g=f.length;while(g--)c[e=f[g]]&&(c[e]=!(d[e]=c[e]))})})}function oa(a){return a&&"undefined"!=typeof a.getElementsByTagName&&a}c=fa.support={},f=fa.isXML=function(a){var b=a&&(a.ownerDocument||a).documentElement;return b?"HTML"!==b.nodeName:!1},m=fa.setDocument=function(a){var b,e,g=a?a.ownerDocument||a:v;return g!==n&&9===g.nodeType&&g.documentElement?(n=g,o=n.documentElement,p=!f(n),(e=n.defaultView)&&e.top!==e&&(e.addEventListener?e.addEventListener("unload",da,!1):e.attachEvent&&e.attachEvent("onunload",da)),c.attributes=ia(function(a){return a.className="i",!a.getAttribute("className")}),c.getElementsByTagName=ia(function(a){return a.appendChild(n.createComment("")),!a.getElementsByTagName("*").length}),c.getElementsByClassName=Z.test(n.getElementsByClassName),c.getById=ia(function(a){return o.appendChild(a).id=u,!n.getElementsByName||!n.getElementsByName(u).length}),c.getById?(d.find.ID=function(a,b){if("undefined"!=typeof b.getElementById&&p){var c=b.getElementById(a);return c?[c]:[]}},d.filter.ID=function(a){var b=a.replace(ba,ca);return function(a){return a.getAttribute("id")===b}}):(delete d.find.ID,d.filter.ID=function(a){var b=a.replace(ba,ca);return function(a){var c="undefined"!=typeof a.getAttributeNode&&a.getAttributeNode("id");return c&&c.value===b}}),d.find.TAG=c.getElementsByTagName?function(a,b){return"undefined"!=typeof b.getElementsByTagName?b.getElementsByTagName(a):c.qsa?b.querySelectorAll(a):void 0}:function(a,b){var c,d=[],e=0,f=b.getElementsByTagName(a);if("*"===a){while(c=f[e++])1===c.nodeType&&d.push(c);return d}return f},d.find.CLASS=c.getElementsByClassName&&function(a,b){return"undefined"!=typeof b.getElementsByClassName&&p?b.getElementsByClassName(a):void 0},r=[],q=[],(c.qsa=Z.test(n.querySelectorAll))&&(ia(function(a){o.appendChild(a).innerHTML="<a id='"+u+"'></a><select id='"+u+"-\r\\' msallowcapture=''><option selected=''></option></select>",a.querySelectorAll("[msallowcapture^='']").length&&q.push("[*^$]="+L+"*(?:''|\"\")"),a.querySelectorAll("[selected]").length||q.push("\\["+L+"*(?:value|"+K+")"),a.querySelectorAll("[id~="+u+"-]").length||q.push("~="),a.querySelectorAll(":checked").length||q.push(":checked"),a.querySelectorAll("a#"+u+"+*").length||q.push(".#.+[+~]")}),ia(function(a){var b=n.createElement("input");b.setAttribute("type","hidden"),a.appendChild(b).setAttribute("name","D"),a.querySelectorAll("[name=d]").length&&q.push("name"+L+"*[*^$|!~]?="),a.querySelectorAll(":enabled").length||q.push(":enabled",":disabled"),a.querySelectorAll("*,:x"),q.push(",.*:")})),(c.matchesSelector=Z.test(s=o.matches||o.webkitMatchesSelector||o.mozMatchesSelector||o.oMatchesSelector||o.msMatchesSelector))&&ia(function(a){c.disconnectedMatch=s.call(a,"div"),s.call(a,"[s!='']:x"),r.push("!=",O)}),q=q.length&&new RegExp(q.join("|")),r=r.length&&new RegExp(r.join("|")),b=Z.test(o.compareDocumentPosition),t=b||Z.test(o.contains)?function(a,b){var c=9===a.nodeType?a.documentElement:a,d=b&&b.parentNode;return a===d||!(!d||1!==d.nodeType||!(c.contains?c.contains(d):a.compareDocumentPosition&&16&a.compareDocumentPosition(d)))}:function(a,b){if(b)while(b=b.parentNode)if(b===a)return!0;return!1},B=b?function(a,b){if(a===b)return l=!0,0;var d=!a.compareDocumentPosition-!b.compareDocumentPosition;return d?d:(d=(a.ownerDocument||a)===(b.ownerDocument||b)?a.compareDocumentPosition(b):1,1&d||!c.sortDetached&&b.compareDocumentPosition(a)===d?a===n||a.ownerDocument===v&&t(v,a)?-1:b===n||b.ownerDocument===v&&t(v,b)?1:k?J(k,a)-J(k,b):0:4&d?-1:1)}:function(a,b){if(a===b)return l=!0,0;var c,d=0,e=a.parentNode,f=b.parentNode,g=[a],h=[b];if(!e||!f)return a===n?-1:b===n?1:e?-1:f?1:k?J(k,a)-J(k,b):0;if(e===f)return ka(a,b);c=a;while(c=c.parentNode)g.unshift(c);c=b;while(c=c.parentNode)h.unshift(c);while(g[d]===h[d])d++;return d?ka(g[d],h[d]):g[d]===v?-1:h[d]===v?1:0},n):n},fa.matches=function(a,b){return fa(a,null,null,b)},fa.matchesSelector=function(a,b){if((a.ownerDocument||a)!==n&&m(a),b=b.replace(T,"='$1']"),c.matchesSelector&&p&&!A[b+" "]&&(!r||!r.test(b))&&(!q||!q.test(b)))try{var d=s.call(a,b);if(d||c.disconnectedMatch||a.document&&11!==a.document.nodeType)return d}catch(e){}return fa(b,n,null,[a]).length>0},fa.contains=function(a,b){return(a.ownerDocument||a)!==n&&m(a),t(a,b)},fa.attr=function(a,b){(a.ownerDocument||a)!==n&&m(a);var e=d.attrHandle[b.toLowerCase()],f=e&&D.call(d.attrHandle,b.toLowerCase())?e(a,b,!p):void 0;return void 0!==f?f:c.attributes||!p?a.getAttribute(b):(f=a.getAttributeNode(b))&&f.specified?f.value:null},fa.error=function(a){throw new Error("Syntax error, unrecognized expression: "+a)},fa.uniqueSort=function(a){var b,d=[],e=0,f=0;if(l=!c.detectDuplicates,k=!c.sortStable&&a.slice(0),a.sort(B),l){while(b=a[f++])b===a[f]&&(e=d.push(f));while(e--)a.splice(d[e],1)}return k=null,a},e=fa.getText=function(a){var b,c="",d=0,f=a.nodeType;if(f){if(1===f||9===f||11===f){if("string"==typeof a.textContent)return a.textContent;for(a=a.firstChild;a;a=a.nextSibling)c+=e(a)}else if(3===f||4===f)return a.nodeValue}else while(b=a[d++])c+=e(b);return c},d=fa.selectors={cacheLength:50,createPseudo:ha,match:W,attrHandle:{},find:{},relative:{">":{dir:"parentNode",first:!0}," ":{dir:"parentNode"},"+":{dir:"previousSibling",first:!0},"~":{dir:"previousSibling"}},preFilter:{ATTR:function(a){return a[1]=a[1].replace(ba,ca),a[3]=(a[3]||a[4]||a[5]||"").replace(ba,ca),"~="===a[2]&&(a[3]=" "+a[3]+" "),a.slice(0,4)},CHILD:function(a){return a[1]=a[1].toLowerCase(),"nth"===a[1].slice(0,3)?(a[3]||fa.error(a[0]),a[4]=+(a[4]?a[5]+(a[6]||1):2*("even"===a[3]||"odd"===a[3])),a[5]=+(a[7]+a[8]||"odd"===a[3])):a[3]&&fa.error(a[0]),a},PSEUDO:function(a){var b,c=!a[6]&&a[2];return W.CHILD.test(a[0])?null:(a[3]?a[2]=a[4]||a[5]||"":c&&U.test(c)&&(b=g(c,!0))&&(b=c.indexOf(")",c.length-b)-c.length)&&(a[0]=a[0].slice(0,b),a[2]=c.slice(0,b)),a.slice(0,3))}},filter:{TAG:function(a){var b=a.replace(ba,ca).toLowerCase();return"*"===a?function(){return!0}:function(a){return a.nodeName&&a.nodeName.toLowerCase()===b}},CLASS:function(a){var b=y[a+" "];return b||(b=new RegExp("(^|"+L+")"+a+"("+L+"|$)"))&&y(a,function(a){return b.test("string"==typeof a.className&&a.className||"undefined"!=typeof a.getAttribute&&a.getAttribute("class")||"")})},ATTR:function(a,b,c){return function(d){var e=fa.attr(d,a);return null==e?"!="===b:b?(e+="","="===b?e===c:"!="===b?e!==c:"^="===b?c&&0===e.indexOf(c):"*="===b?c&&e.indexOf(c)>-1:"$="===b?c&&e.slice(-c.length)===c:"~="===b?(" "+e.replace(P," ")+" ").indexOf(c)>-1:"|="===b?e===c||e.slice(0,c.length+1)===c+"-":!1):!0}},CHILD:function(a,b,c,d,e){var f="nth"!==a.slice(0,3),g="last"!==a.slice(-4),h="of-type"===b;return 1===d&&0===e?function(a){return!!a.parentNode}:function(b,c,i){var j,k,l,m,n,o,p=f!==g?"nextSibling":"previousSibling",q=b.parentNode,r=h&&b.nodeName.toLowerCase(),s=!i&&!h,t=!1;if(q){if(f){while(p){m=b;while(m=m[p])if(h?m.nodeName.toLowerCase()===r:1===m.nodeType)return!1;o=p="only"===a&&!o&&"nextSibling"}return!0}if(o=[g?q.firstChild:q.lastChild],g&&s){m=q,l=m[u]||(m[u]={}),k=l[m.uniqueID]||(l[m.uniqueID]={}),j=k[a]||[],n=j[0]===w&&j[1],t=n&&j[2],m=n&&q.childNodes[n];while(m=++n&&m&&m[p]||(t=n=0)||o.pop())if(1===m.nodeType&&++t&&m===b){k[a]=[w,n,t];break}}else if(s&&(m=b,l=m[u]||(m[u]={}),k=l[m.uniqueID]||(l[m.uniqueID]={}),j=k[a]||[],n=j[0]===w&&j[1],t=n),t===!1)while(m=++n&&m&&m[p]||(t=n=0)||o.pop())if((h?m.nodeName.toLowerCase()===r:1===m.nodeType)&&++t&&(s&&(l=m[u]||(m[u]={}),k=l[m.uniqueID]||(l[m.uniqueID]={}),k[a]=[w,t]),m===b))break;return t-=e,t===d||t%d===0&&t/d>=0}}},PSEUDO:function(a,b){var c,e=d.pseudos[a]||d.setFilters[a.toLowerCase()]||fa.error("unsupported pseudo: "+a);return e[u]?e(b):e.length>1?(c=[a,a,"",b],d.setFilters.hasOwnProperty(a.toLowerCase())?ha(function(a,c){var d,f=e(a,b),g=f.length;while(g--)d=J(a,f[g]),a[d]=!(c[d]=f[g])}):function(a){return e(a,0,c)}):e}},pseudos:{not:ha(function(a){var b=[],c=[],d=h(a.replace(Q,"$1"));return d[u]?ha(function(a,b,c,e){var f,g=d(a,null,e,[]),h=a.length;while(h--)(f=g[h])&&(a[h]=!(b[h]=f))}):function(a,e,f){return b[0]=a,d(b,null,f,c),b[0]=null,!c.pop()}}),has:ha(function(a){return function(b){return fa(a,b).length>0}}),contains:ha(function(a){return a=a.replace(ba,ca),function(b){return(b.textContent||b.innerText||e(b)).indexOf(a)>-1}}),lang:ha(function(a){return V.test(a||"")||fa.error("unsupported lang: "+a),a=a.replace(ba,ca).toLowerCase(),function(b){var c;do if(c=p?b.lang:b.getAttribute("xml:lang")||b.getAttribute("lang"))return c=c.toLowerCase(),c===a||0===c.indexOf(a+"-");while((b=b.parentNode)&&1===b.nodeType);return!1}}),target:function(b){var c=a.location&&a.location.hash;return c&&c.slice(1)===b.id},root:function(a){return a===o},focus:function(a){return a===n.activeElement&&(!n.hasFocus||n.hasFocus())&&!!(a.type||a.href||~a.tabIndex)},enabled:function(a){return a.disabled===!1},disabled:function(a){return a.disabled===!0},checked:function(a){var b=a.nodeName.toLowerCase();return"input"===b&&!!a.checked||"option"===b&&!!a.selected},selected:function(a){return a.parentNode&&a.parentNode.selectedIndex,a.selected===!0},empty:function(a){for(a=a.firstChild;a;a=a.nextSibling)if(a.nodeType<6)return!1;return!0},parent:function(a){return!d.pseudos.empty(a)},header:function(a){return Y.test(a.nodeName)},input:function(a){return X.test(a.nodeName)},button:function(a){var b=a.nodeName.toLowerCase();return"input"===b&&"button"===a.type||"button"===b},text:function(a){var b;return"input"===a.nodeName.toLowerCase()&&"text"===a.type&&(null==(b=a.getAttribute("type"))||"text"===b.toLowerCase())},first:na(function(){return[0]}),last:na(function(a,b){return[b-1]}),eq:na(function(a,b,c){return[0>c?c+b:c]}),even:na(function(a,b){for(var c=0;b>c;c+=2)a.push(c);return a}),odd:na(function(a,b){for(var c=1;b>c;c+=2)a.push(c);return a}),lt:na(function(a,b,c){for(var d=0>c?c+b:c;--d>=0;)a.push(d);return a}),gt:na(function(a,b,c){for(var d=0>c?c+b:c;++d<b;)a.push(d);return a})}},d.pseudos.nth=d.pseudos.eq;for(b in{radio:!0,checkbox:!0,file:!0,password:!0,image:!0})d.pseudos[b]=la(b);for(b in{submit:!0,reset:!0})d.pseudos[b]=ma(b);function pa(){}pa.prototype=d.filters=d.pseudos,d.setFilters=new pa,g=fa.tokenize=function(a,b){var c,e,f,g,h,i,j,k=z[a+" "];if(k)return b?0:k.slice(0);h=a,i=[],j=d.preFilter;while(h){c&&!(e=R.exec(h))||(e&&(h=h.slice(e[0].length)||h),i.push(f=[])),c=!1,(e=S.exec(h))&&(c=e.shift(),f.push({value:c,type:e[0].replace(Q," ")}),h=h.slice(c.length));for(g in d.filter)!(e=W[g].exec(h))||j[g]&&!(e=j[g](e))||(c=e.shift(),f.push({value:c,type:g,matches:e}),h=h.slice(c.length));if(!c)break}return b?h.length:h?fa.error(a):z(a,i).slice(0)};function qa(a){for(var b=0,c=a.length,d="";c>b;b++)d+=a[b].value;return d}function ra(a,b,c){var d=b.dir,e=c&&"parentNode"===d,f=x++;return b.first?function(b,c,f){while(b=b[d])if(1===b.nodeType||e)return a(b,c,f)}:function(b,c,g){var h,i,j,k=[w,f];if(g){while(b=b[d])if((1===b.nodeType||e)&&a(b,c,g))return!0}else while(b=b[d])if(1===b.nodeType||e){if(j=b[u]||(b[u]={}),i=j[b.uniqueID]||(j[b.uniqueID]={}),(h=i[d])&&h[0]===w&&h[1]===f)return k[2]=h[2];if(i[d]=k,k[2]=a(b,c,g))return!0}}}function sa(a){return a.length>1?function(b,c,d){var e=a.length;while(e--)if(!a[e](b,c,d))return!1;return!0}:a[0]}function ta(a,b,c){for(var d=0,e=b.length;e>d;d++)fa(a,b[d],c);return c}function ua(a,b,c,d,e){for(var f,g=[],h=0,i=a.length,j=null!=b;i>h;h++)(f=a[h])&&(c&&!c(f,d,e)||(g.push(f),j&&b.push(h)));return g}function va(a,b,c,d,e,f){return d&&!d[u]&&(d=va(d)),e&&!e[u]&&(e=va(e,f)),ha(function(f,g,h,i){var j,k,l,m=[],n=[],o=g.length,p=f||ta(b||"*",h.nodeType?[h]:h,[]),q=!a||!f&&b?p:ua(p,m,a,h,i),r=c?e||(f?a:o||d)?[]:g:q;if(c&&c(q,r,h,i),d){j=ua(r,n),d(j,[],h,i),k=j.length;while(k--)(l=j[k])&&(r[n[k]]=!(q[n[k]]=l))}if(f){if(e||a){if(e){j=[],k=r.length;while(k--)(l=r[k])&&j.push(q[k]=l);e(null,r=[],j,i)}k=r.length;while(k--)(l=r[k])&&(j=e?J(f,l):m[k])>-1&&(f[j]=!(g[j]=l))}}else r=ua(r===g?r.splice(o,r.length):r),e?e(null,g,r,i):H.apply(g,r)})}function wa(a){for(var b,c,e,f=a.length,g=d.relative[a[0].type],h=g||d.relative[" "],i=g?1:0,k=ra(function(a){return a===b},h,!0),l=ra(function(a){return J(b,a)>-1},h,!0),m=[function(a,c,d){var e=!g&&(d||c!==j)||((b=c).nodeType?k(a,c,d):l(a,c,d));return b=null,e}];f>i;i++)if(c=d.relative[a[i].type])m=[ra(sa(m),c)];else{if(c=d.filter[a[i].type].apply(null,a[i].matches),c[u]){for(e=++i;f>e;e++)if(d.relative[a[e].type])break;return va(i>1&&sa(m),i>1&&qa(a.slice(0,i-1).concat({value:" "===a[i-2].type?"*":""})).replace(Q,"$1"),c,e>i&&wa(a.slice(i,e)),f>e&&wa(a=a.slice(e)),f>e&&qa(a))}m.push(c)}return sa(m)}function xa(a,b){var c=b.length>0,e=a.length>0,f=function(f,g,h,i,k){var l,o,q,r=0,s="0",t=f&&[],u=[],v=j,x=f||e&&d.find.TAG("*",k),y=w+=null==v?1:Math.random()||.1,z=x.length;for(k&&(j=g===n||g||k);s!==z&&null!=(l=x[s]);s++){if(e&&l){o=0,g||l.ownerDocument===n||(m(l),h=!p);while(q=a[o++])if(q(l,g||n,h)){i.push(l);break}k&&(w=y)}c&&((l=!q&&l)&&r--,f&&t.push(l))}if(r+=s,c&&s!==r){o=0;while(q=b[o++])q(t,u,g,h);if(f){if(r>0)while(s--)t[s]||u[s]||(u[s]=F.call(i));u=ua(u)}H.apply(i,u),k&&!f&&u.length>0&&r+b.length>1&&fa.uniqueSort(i)}return k&&(w=y,j=v),t};return c?ha(f):f}return h=fa.compile=function(a,b){var c,d=[],e=[],f=A[a+" "];if(!f){b||(b=g(a)),c=b.length;while(c--)f=wa(b[c]),f[u]?d.push(f):e.push(f);f=A(a,xa(e,d)),f.selector=a}return f},i=fa.select=function(a,b,e,f){var i,j,k,l,m,n="function"==typeof a&&a,o=!f&&g(a=n.selector||a);if(e=e||[],1===o.length){if(j=o[0]=o[0].slice(0),j.length>2&&"ID"===(k=j[0]).type&&c.getById&&9===b.nodeType&&p&&d.relative[j[1].type]){if(b=(d.find.ID(k.matches[0].replace(ba,ca),b)||[])[0],!b)return e;n&&(b=b.parentNode),a=a.slice(j.shift().value.length)}i=W.needsContext.test(a)?0:j.length;while(i--){if(k=j[i],d.relative[l=k.type])break;if((m=d.find[l])&&(f=m(k.matches[0].replace(ba,ca),_.test(j[0].type)&&oa(b.parentNode)||b))){if(j.splice(i,1),a=f.length&&qa(j),!a)return H.apply(e,f),e;break}}}return(n||h(a,o))(f,b,!p,e,!b||_.test(a)&&oa(b.parentNode)||b),e},c.sortStable=u.split("").sort(B).join("")===u,c.detectDuplicates=!!l,m(),c.sortDetached=ia(function(a){return 1&a.compareDocumentPosition(n.createElement("div"))}),ia(function(a){return a.innerHTML="<a href='#'></a>","#"===a.firstChild.getAttribute("href")})||ja("type|href|height|width",function(a,b,c){return c?void 0:a.getAttribute(b,"type"===b.toLowerCase()?1:2)}),c.attributes&&ia(function(a){return a.innerHTML="<input/>",a.firstChild.setAttribute("value",""),""===a.firstChild.getAttribute("value")})||ja("value",function(a,b,c){return c||"input"!==a.nodeName.toLowerCase()?void 0:a.defaultValue}),ia(function(a){return null==a.getAttribute("disabled")})||ja(K,function(a,b,c){var d;return c?void 0:a[b]===!0?b.toLowerCase():(d=a.getAttributeNode(b))&&d.specified?d.value:null}),fa}(a);n.find=t,n.expr=t.selectors,n.expr[":"]=n.expr.pseudos,n.uniqueSort=n.unique=t.uniqueSort,n.text=t.getText,n.isXMLDoc=t.isXML,n.contains=t.contains;var u=function(a,b,c){var d=[],e=void 0!==c;while((a=a[b])&&9!==a.nodeType)if(1===a.nodeType){if(e&&n(a).is(c))break;d.push(a)}return d},v=function(a,b){for(var c=[];a;a=a.nextSibling)1===a.nodeType&&a!==b&&c.push(a);return c},w=n.expr.match.needsContext,x=/^<([\w-]+)\s*\/?>(?:<\/\1>|)$/,y=/^.[^:#\[\.,]*$/;function z(a,b,c){if(n.isFunction(b))return n.grep(a,function(a,d){return!!b.call(a,d,a)!==c});if(b.nodeType)return n.grep(a,function(a){return a===b!==c});if("string"==typeof b){if(y.test(b))return n.filter(b,a,c);b=n.filter(b,a)}return n.grep(a,function(a){return h.call(b,a)>-1!==c})}n.filter=function(a,b,c){var d=b[0];return c&&(a=":not("+a+")"),1===b.length&&1===d.nodeType?n.find.matchesSelector(d,a)?[d]:[]:n.find.matches(a,n.grep(b,function(a){return 1===a.nodeType}))},n.fn.extend({find:function(a){var b,c=this.length,d=[],e=this;if("string"!=typeof a)return this.pushStack(n(a).filter(function(){for(b=0;c>b;b++)if(n.contains(e[b],this))return!0}));for(b=0;c>b;b++)n.find(a,e[b],d);return d=this.pushStack(c>1?n.unique(d):d),d.selector=this.selector?this.selector+" "+a:a,d},filter:function(a){return this.pushStack(z(this,a||[],!1))},not:function(a){return this.pushStack(z(this,a||[],!0))},is:function(a){return!!z(this,"string"==typeof a&&w.test(a)?n(a):a||[],!1).length}});var A,B=/^(?:\s*(<[\w\W]+>)[^>]*|#([\w-]*))$/,C=n.fn.init=function(a,b,c){var e,f;if(!a)return this;if(c=c||A,"string"==typeof a){if(e="<"===a[0]&&">"===a[a.length-1]&&a.length>=3?[null,a,null]:B.exec(a),!e||!e[1]&&b)return!b||b.jquery?(b||c).find(a):this.constructor(b).find(a);if(e[1]){if(b=b instanceof n?b[0]:b,n.merge(this,n.parseHTML(e[1],b&&b.nodeType?b.ownerDocument||b:d,!0)),x.test(e[1])&&n.isPlainObject(b))for(e in b)n.isFunction(this[e])?this[e](b[e]):this.attr(e,b[e]);return this}return f=d.getElementById(e[2]),f&&f.parentNode&&(this.length=1,this[0]=f),this.context=d,this.selector=a,this}return a.nodeType?(this.context=this[0]=a,this.length=1,this):n.isFunction(a)?void 0!==c.ready?c.ready(a):a(n):(void 0!==a.selector&&(this.selector=a.selector,this.context=a.context),n.makeArray(a,this))};C.prototype=n.fn,A=n(d);var D=/^(?:parents|prev(?:Until|All))/,E={children:!0,contents:!0,next:!0,prev:!0};n.fn.extend({has:function(a){var b=n(a,this),c=b.length;return this.filter(function(){for(var a=0;c>a;a++)if(n.contains(this,b[a]))return!0})},closest:function(a,b){for(var c,d=0,e=this.length,f=[],g=w.test(a)||"string"!=typeof a?n(a,b||this.context):0;e>d;d++)for(c=this[d];c&&c!==b;c=c.parentNode)if(c.nodeType<11&&(g?g.index(c)>-1:1===c.nodeType&&n.find.matchesSelector(c,a))){f.push(c);break}return this.pushStack(f.length>1?n.uniqueSort(f):f)},index:function(a){return a?"string"==typeof a?h.call(n(a),this[0]):h.call(this,a.jquery?a[0]:a):this[0]&&this[0].parentNode?this.first().prevAll().length:-1},add:function(a,b){return this.pushStack(n.uniqueSort(n.merge(this.get(),n(a,b))))},addBack:function(a){return this.add(null==a?this.prevObject:this.prevObject.filter(a))}});function F(a,b){while((a=a[b])&&1!==a.nodeType);return a}n.each({parent:function(a){var b=a.parentNode;return b&&11!==b.nodeType?b:null},parents:function(a){return u(a,"parentNode")},parentsUntil:function(a,b,c){return u(a,"parentNode",c)},next:function(a){return F(a,"nextSibling")},prev:function(a){return F(a,"previousSibling")},nextAll:function(a){return u(a,"nextSibling")},prevAll:function(a){return u(a,"previousSibling")},nextUntil:function(a,b,c){return u(a,"nextSibling",c)},prevUntil:function(a,b,c){return u(a,"previousSibling",c)},siblings:function(a){return v((a.parentNode||{}).firstChild,a)},children:function(a){return v(a.firstChild)},contents:function(a){return a.contentDocument||n.merge([],a.childNodes)}},function(a,b){n.fn[a]=function(c,d){var e=n.map(this,b,c);return"Until"!==a.slice(-5)&&(d=c),d&&"string"==typeof d&&(e=n.filter(d,e)),this.length>1&&(E[a]||n.uniqueSort(e),D.test(a)&&e.reverse()),this.pushStack(e)}});var G=/\S+/g;function H(a){var b={};return n.each(a.match(G)||[],function(a,c){b[c]=!0}),b}n.Callbacks=function(a){a="string"==typeof a?H(a):n.extend({},a);var b,c,d,e,f=[],g=[],h=-1,i=function(){for(e=a.once,d=b=!0;g.length;h=-1){c=g.shift();while(++h<f.length)f[h].apply(c[0],c[1])===!1&&a.stopOnFalse&&(h=f.length,c=!1)}a.memory||(c=!1),b=!1,e&&(f=c?[]:"")},j={add:function(){return f&&(c&&!b&&(h=f.length-1,g.push(c)),function d(b){n.each(b,function(b,c){n.isFunction(c)?a.unique&&j.has(c)||f.push(c):c&&c.length&&"string"!==n.type(c)&&d(c)})}(arguments),c&&!b&&i()),this},remove:function(){return n.each(arguments,function(a,b){var c;while((c=n.inArray(b,f,c))>-1)f.splice(c,1),h>=c&&h--}),this},has:function(a){return a?n.inArray(a,f)>-1:f.length>0},empty:function(){return f&&(f=[]),this},disable:function(){return e=g=[],f=c="",this},disabled:function(){return!f},lock:function(){return e=g=[],c||(f=c=""),this},locked:function(){return!!e},fireWith:function(a,c){return e||(c=c||[],c=[a,c.slice?c.slice():c],g.push(c),b||i()),this},fire:function(){return j.fireWith(this,arguments),this},fired:function(){return!!d}};return j},n.extend({Deferred:function(a){var b=[["resolve","done",n.Callbacks("once memory"),"resolved"],["reject","fail",n.Callbacks("once memory"),"rejected"],["notify","progress",n.Callbacks("memory")]],c="pending",d={state:function(){return c},always:function(){return e.done(arguments).fail(arguments),this},then:function(){var a=arguments;return n.Deferred(function(c){n.each(b,function(b,f){var g=n.isFunction(a[b])&&a[b];e[f[1]](function(){var a=g&&g.apply(this,arguments);a&&n.isFunction(a.promise)?a.promise().progress(c.notify).done(c.resolve).fail(c.reject):c[f[0]+"With"](this===d?c.promise():this,g?[a]:arguments)})}),a=null}).promise()},promise:function(a){return null!=a?n.extend(a,d):d}},e={};return d.pipe=d.then,n.each(b,function(a,f){var g=f[2],h=f[3];d[f[1]]=g.add,h&&g.add(function(){c=h},b[1^a][2].disable,b[2][2].lock),e[f[0]]=function(){return e[f[0]+"With"](this===e?d:this,arguments),this},e[f[0]+"With"]=g.fireWith}),d.promise(e),a&&a.call(e,e),e},when:function(a){var b=0,c=e.call(arguments),d=c.length,f=1!==d||a&&n.isFunction(a.promise)?d:0,g=1===f?a:n.Deferred(),h=function(a,b,c){return function(d){b[a]=this,c[a]=arguments.length>1?e.call(arguments):d,c===i?g.notifyWith(b,c):--f||g.resolveWith(b,c)}},i,j,k;if(d>1)for(i=new Array(d),j=new Array(d),k=new Array(d);d>b;b++)c[b]&&n.isFunction(c[b].promise)?c[b].promise().progress(h(b,j,i)).done(h(b,k,c)).fail(g.reject):--f;return f||g.resolveWith(k,c),g.promise()}});var I;n.fn.ready=function(a){return n.ready.promise().done(a),this},n.extend({isReady:!1,readyWait:1,holdReady:function(a){a?n.readyWait++:n.ready(!0)},ready:function(a){(a===!0?--n.readyWait:n.isReady)||(n.isReady=!0,a!==!0&&--n.readyWait>0||(I.resolveWith(d,[n]),n.fn.triggerHandler&&(n(d).triggerHandler("ready"),n(d).off("ready"))))}});function J(){d.removeEventListener("DOMContentLoaded",J),a.removeEventListener("load",J),n.ready()}n.ready.promise=function(b){return I||(I=n.Deferred(),"complete"===d.readyState||"loading"!==d.readyState&&!d.documentElement.doScroll?a.setTimeout(n.ready):(d.addEventListener("DOMContentLoaded",J),a.addEventListener("load",J))),I.promise(b)},n.ready.promise();var K=function(a,b,c,d,e,f,g){var h=0,i=a.length,j=null==c;if("object"===n.type(c)){e=!0;for(h in c)K(a,b,h,c[h],!0,f,g)}else if(void 0!==d&&(e=!0,n.isFunction(d)||(g=!0),j&&(g?(b.call(a,d),b=null):(j=b,b=function(a,b,c){return j.call(n(a),c)})),b))for(;i>h;h++)b(a[h],c,g?d:d.call(a[h],h,b(a[h],c)));return e?a:j?b.call(a):i?b(a[0],c):f},L=function(a){return 1===a.nodeType||9===a.nodeType||!+a.nodeType};function M(){this.expando=n.expando+M.uid++}M.uid=1,M.prototype={register:function(a,b){var c=b||{};return a.nodeType?a[this.expando]=c:Object.defineProperty(a,this.expando,{value:c,writable:!0,configurable:!0}),a[this.expando]},cache:function(a){if(!L(a))return{};var b=a[this.expando];return b||(b={},L(a)&&(a.nodeType?a[this.expando]=b:Object.defineProperty(a,this.expando,{value:b,configurable:!0}))),b},set:function(a,b,c){var d,e=this.cache(a);if("string"==typeof b)e[b]=c;else for(d in b)e[d]=b[d];return e},get:function(a,b){return void 0===b?this.cache(a):a[this.expando]&&a[this.expando][b]},access:function(a,b,c){var d;return void 0===b||b&&"string"==typeof b&&void 0===c?(d=this.get(a,b),void 0!==d?d:this.get(a,n.camelCase(b))):(this.set(a,b,c),void 0!==c?c:b)},remove:function(a,b){var c,d,e,f=a[this.expando];if(void 0!==f){if(void 0===b)this.register(a);else{n.isArray(b)?d=b.concat(b.map(n.camelCase)):(e=n.camelCase(b),b in f?d=[b,e]:(d=e,d=d in f?[d]:d.match(G)||[])),c=d.length;while(c--)delete f[d[c]]}(void 0===b||n.isEmptyObject(f))&&(a.nodeType?a[this.expando]=void 0:delete a[this.expando])}},hasData:function(a){var b=a[this.expando];return void 0!==b&&!n.isEmptyObject(b)}};var N=new M,O=new M,P=/^(?:\{[\w\W]*\}|\[[\w\W]*\])$/,Q=/[A-Z]/g;function R(a,b,c){var d;if(void 0===c&&1===a.nodeType)if(d="data-"+b.replace(Q,"-$&").toLowerCase(),c=a.getAttribute(d),"string"==typeof c){try{c="true"===c?!0:"false"===c?!1:"null"===c?null:+c+""===c?+c:P.test(c)?n.parseJSON(c):c;
+}catch(e){}O.set(a,b,c)}else c=void 0;return c}n.extend({hasData:function(a){return O.hasData(a)||N.hasData(a)},data:function(a,b,c){return O.access(a,b,c)},removeData:function(a,b){O.remove(a,b)},_data:function(a,b,c){return N.access(a,b,c)},_removeData:function(a,b){N.remove(a,b)}}),n.fn.extend({data:function(a,b){var c,d,e,f=this[0],g=f&&f.attributes;if(void 0===a){if(this.length&&(e=O.get(f),1===f.nodeType&&!N.get(f,"hasDataAttrs"))){c=g.length;while(c--)g[c]&&(d=g[c].name,0===d.indexOf("data-")&&(d=n.camelCase(d.slice(5)),R(f,d,e[d])));N.set(f,"hasDataAttrs",!0)}return e}return"object"==typeof a?this.each(function(){O.set(this,a)}):K(this,function(b){var c,d;if(f&&void 0===b){if(c=O.get(f,a)||O.get(f,a.replace(Q,"-$&").toLowerCase()),void 0!==c)return c;if(d=n.camelCase(a),c=O.get(f,d),void 0!==c)return c;if(c=R(f,d,void 0),void 0!==c)return c}else d=n.camelCase(a),this.each(function(){var c=O.get(this,d);O.set(this,d,b),a.indexOf("-")>-1&&void 0!==c&&O.set(this,a,b)})},null,b,arguments.length>1,null,!0)},removeData:function(a){return this.each(function(){O.remove(this,a)})}}),n.extend({queue:function(a,b,c){var d;return a?(b=(b||"fx")+"queue",d=N.get(a,b),c&&(!d||n.isArray(c)?d=N.access(a,b,n.makeArray(c)):d.push(c)),d||[]):void 0},dequeue:function(a,b){b=b||"fx";var c=n.queue(a,b),d=c.length,e=c.shift(),f=n._queueHooks(a,b),g=function(){n.dequeue(a,b)};"inprogress"===e&&(e=c.shift(),d--),e&&("fx"===b&&c.unshift("inprogress"),delete f.stop,e.call(a,g,f)),!d&&f&&f.empty.fire()},_queueHooks:function(a,b){var c=b+"queueHooks";return N.get(a,c)||N.access(a,c,{empty:n.Callbacks("once memory").add(function(){N.remove(a,[b+"queue",c])})})}}),n.fn.extend({queue:function(a,b){var c=2;return"string"!=typeof a&&(b=a,a="fx",c--),arguments.length<c?n.queue(this[0],a):void 0===b?this:this.each(function(){var c=n.queue(this,a,b);n._queueHooks(this,a),"fx"===a&&"inprogress"!==c[0]&&n.dequeue(this,a)})},dequeue:function(a){return this.each(function(){n.dequeue(this,a)})},clearQueue:function(a){return this.queue(a||"fx",[])},promise:function(a,b){var c,d=1,e=n.Deferred(),f=this,g=this.length,h=function(){--d||e.resolveWith(f,[f])};"string"!=typeof a&&(b=a,a=void 0),a=a||"fx";while(g--)c=N.get(f[g],a+"queueHooks"),c&&c.empty&&(d++,c.empty.add(h));return h(),e.promise(b)}});var S=/[+-]?(?:\d*\.|)\d+(?:[eE][+-]?\d+|)/.source,T=new RegExp("^(?:([+-])=|)("+S+")([a-z%]*)$","i"),U=["Top","Right","Bottom","Left"],V=function(a,b){return a=b||a,"none"===n.css(a,"display")||!n.contains(a.ownerDocument,a)};function W(a,b,c,d){var e,f=1,g=20,h=d?function(){return d.cur()}:function(){return n.css(a,b,"")},i=h(),j=c&&c[3]||(n.cssNumber[b]?"":"px"),k=(n.cssNumber[b]||"px"!==j&&+i)&&T.exec(n.css(a,b));if(k&&k[3]!==j){j=j||k[3],c=c||[],k=+i||1;do f=f||".5",k/=f,n.style(a,b,k+j);while(f!==(f=h()/i)&&1!==f&&--g)}return c&&(k=+k||+i||0,e=c[1]?k+(c[1]+1)*c[2]:+c[2],d&&(d.unit=j,d.start=k,d.end=e)),e}var X=/^(?:checkbox|radio)$/i,Y=/<([\w:-]+)/,Z=/^$|\/(?:java|ecma)script/i,$={option:[1,"<select multiple='multiple'>","</select>"],thead:[1,"<table>","</table>"],col:[2,"<table><colgroup>","</colgroup></table>"],tr:[2,"<table><tbody>","</tbody></table>"],td:[3,"<table><tbody><tr>","</tr></tbody></table>"],_default:[0,"",""]};$.optgroup=$.option,$.tbody=$.tfoot=$.colgroup=$.caption=$.thead,$.th=$.td;function _(a,b){var c="undefined"!=typeof a.getElementsByTagName?a.getElementsByTagName(b||"*"):"undefined"!=typeof a.querySelectorAll?a.querySelectorAll(b||"*"):[];return void 0===b||b&&n.nodeName(a,b)?n.merge([a],c):c}function aa(a,b){for(var c=0,d=a.length;d>c;c++)N.set(a[c],"globalEval",!b||N.get(b[c],"globalEval"))}var ba=/<|&#?\w+;/;function ca(a,b,c,d,e){for(var f,g,h,i,j,k,l=b.createDocumentFragment(),m=[],o=0,p=a.length;p>o;o++)if(f=a[o],f||0===f)if("object"===n.type(f))n.merge(m,f.nodeType?[f]:f);else if(ba.test(f)){g=g||l.appendChild(b.createElement("div")),h=(Y.exec(f)||["",""])[1].toLowerCase(),i=$[h]||$._default,g.innerHTML=i[1]+n.htmlPrefilter(f)+i[2],k=i[0];while(k--)g=g.lastChild;n.merge(m,g.childNodes),g=l.firstChild,g.textContent=""}else m.push(b.createTextNode(f));l.textContent="",o=0;while(f=m[o++])if(d&&n.inArray(f,d)>-1)e&&e.push(f);else if(j=n.contains(f.ownerDocument,f),g=_(l.appendChild(f),"script"),j&&aa(g),c){k=0;while(f=g[k++])Z.test(f.type||"")&&c.push(f)}return l}!function(){var a=d.createDocumentFragment(),b=a.appendChild(d.createElement("div")),c=d.createElement("input");c.setAttribute("type","radio"),c.setAttribute("checked","checked"),c.setAttribute("name","t"),b.appendChild(c),l.checkClone=b.cloneNode(!0).cloneNode(!0).lastChild.checked,b.innerHTML="<textarea>x</textarea>",l.noCloneChecked=!!b.cloneNode(!0).lastChild.defaultValue}();var da=/^key/,ea=/^(?:mouse|pointer|contextmenu|drag|drop)|click/,fa=/^([^.]*)(?:\.(.+)|)/;function ga(){return!0}function ha(){return!1}function ia(){try{return d.activeElement}catch(a){}}function ja(a,b,c,d,e,f){var g,h;if("object"==typeof b){"string"!=typeof c&&(d=d||c,c=void 0);for(h in b)ja(a,h,c,d,b[h],f);return a}if(null==d&&null==e?(e=c,d=c=void 0):null==e&&("string"==typeof c?(e=d,d=void 0):(e=d,d=c,c=void 0)),e===!1)e=ha;else if(!e)return a;return 1===f&&(g=e,e=function(a){return n().off(a),g.apply(this,arguments)},e.guid=g.guid||(g.guid=n.guid++)),a.each(function(){n.event.add(this,b,e,d,c)})}n.event={global:{},add:function(a,b,c,d,e){var f,g,h,i,j,k,l,m,o,p,q,r=N.get(a);if(r){c.handler&&(f=c,c=f.handler,e=f.selector),c.guid||(c.guid=n.guid++),(i=r.events)||(i=r.events={}),(g=r.handle)||(g=r.handle=function(b){return"undefined"!=typeof n&&n.event.triggered!==b.type?n.event.dispatch.apply(a,arguments):void 0}),b=(b||"").match(G)||[""],j=b.length;while(j--)h=fa.exec(b[j])||[],o=q=h[1],p=(h[2]||"").split(".").sort(),o&&(l=n.event.special[o]||{},o=(e?l.delegateType:l.bindType)||o,l=n.event.special[o]||{},k=n.extend({type:o,origType:q,data:d,handler:c,guid:c.guid,selector:e,needsContext:e&&n.expr.match.needsContext.test(e),namespace:p.join(".")},f),(m=i[o])||(m=i[o]=[],m.delegateCount=0,l.setup&&l.setup.call(a,d,p,g)!==!1||a.addEventListener&&a.addEventListener(o,g)),l.add&&(l.add.call(a,k),k.handler.guid||(k.handler.guid=c.guid)),e?m.splice(m.delegateCount++,0,k):m.push(k),n.event.global[o]=!0)}},remove:function(a,b,c,d,e){var f,g,h,i,j,k,l,m,o,p,q,r=N.hasData(a)&&N.get(a);if(r&&(i=r.events)){b=(b||"").match(G)||[""],j=b.length;while(j--)if(h=fa.exec(b[j])||[],o=q=h[1],p=(h[2]||"").split(".").sort(),o){l=n.event.special[o]||{},o=(d?l.delegateType:l.bindType)||o,m=i[o]||[],h=h[2]&&new RegExp("(^|\\.)"+p.join("\\.(?:.*\\.|)")+"(\\.|$)"),g=f=m.length;while(f--)k=m[f],!e&&q!==k.origType||c&&c.guid!==k.guid||h&&!h.test(k.namespace)||d&&d!==k.selector&&("**"!==d||!k.selector)||(m.splice(f,1),k.selector&&m.delegateCount--,l.remove&&l.remove.call(a,k));g&&!m.length&&(l.teardown&&l.teardown.call(a,p,r.handle)!==!1||n.removeEvent(a,o,r.handle),delete i[o])}else for(o in i)n.event.remove(a,o+b[j],c,d,!0);n.isEmptyObject(i)&&N.remove(a,"handle events")}},dispatch:function(a){a=n.event.fix(a);var b,c,d,f,g,h=[],i=e.call(arguments),j=(N.get(this,"events")||{})[a.type]||[],k=n.event.special[a.type]||{};if(i[0]=a,a.delegateTarget=this,!k.preDispatch||k.preDispatch.call(this,a)!==!1){h=n.event.handlers.call(this,a,j),b=0;while((f=h[b++])&&!a.isPropagationStopped()){a.currentTarget=f.elem,c=0;while((g=f.handlers[c++])&&!a.isImmediatePropagationStopped())a.rnamespace&&!a.rnamespace.test(g.namespace)||(a.handleObj=g,a.data=g.data,d=((n.event.special[g.origType]||{}).handle||g.handler).apply(f.elem,i),void 0!==d&&(a.result=d)===!1&&(a.preventDefault(),a.stopPropagation()))}return k.postDispatch&&k.postDispatch.call(this,a),a.result}},handlers:function(a,b){var c,d,e,f,g=[],h=b.delegateCount,i=a.target;if(h&&i.nodeType&&("click"!==a.type||isNaN(a.button)||a.button<1))for(;i!==this;i=i.parentNode||this)if(1===i.nodeType&&(i.disabled!==!0||"click"!==a.type)){for(d=[],c=0;h>c;c++)f=b[c],e=f.selector+" ",void 0===d[e]&&(d[e]=f.needsContext?n(e,this).index(i)>-1:n.find(e,this,null,[i]).length),d[e]&&d.push(f);d.length&&g.push({elem:i,handlers:d})}return h<b.length&&g.push({elem:this,handlers:b.slice(h)}),g},props:"altKey bubbles cancelable ctrlKey currentTarget detail eventPhase metaKey relatedTarget shiftKey target timeStamp view which".split(" "),fixHooks:{},keyHooks:{props:"char charCode key keyCode".split(" "),filter:function(a,b){return null==a.which&&(a.which=null!=b.charCode?b.charCode:b.keyCode),a}},mouseHooks:{props:"button buttons clientX clientY offsetX offsetY pageX pageY screenX screenY toElement".split(" "),filter:function(a,b){var c,e,f,g=b.button;return null==a.pageX&&null!=b.clientX&&(c=a.target.ownerDocument||d,e=c.documentElement,f=c.body,a.pageX=b.clientX+(e&&e.scrollLeft||f&&f.scrollLeft||0)-(e&&e.clientLeft||f&&f.clientLeft||0),a.pageY=b.clientY+(e&&e.scrollTop||f&&f.scrollTop||0)-(e&&e.clientTop||f&&f.clientTop||0)),a.which||void 0===g||(a.which=1&g?1:2&g?3:4&g?2:0),a}},fix:function(a){if(a[n.expando])return a;var b,c,e,f=a.type,g=a,h=this.fixHooks[f];h||(this.fixHooks[f]=h=ea.test(f)?this.mouseHooks:da.test(f)?this.keyHooks:{}),e=h.props?this.props.concat(h.props):this.props,a=new n.Event(g),b=e.length;while(b--)c=e[b],a[c]=g[c];return a.target||(a.target=d),3===a.target.nodeType&&(a.target=a.target.parentNode),h.filter?h.filter(a,g):a},special:{load:{noBubble:!0},focus:{trigger:function(){return this!==ia()&&this.focus?(this.focus(),!1):void 0},delegateType:"focusin"},blur:{trigger:function(){return this===ia()&&this.blur?(this.blur(),!1):void 0},delegateType:"focusout"},click:{trigger:function(){return"checkbox"===this.type&&this.click&&n.nodeName(this,"input")?(this.click(),!1):void 0},_default:function(a){return n.nodeName(a.target,"a")}},beforeunload:{postDispatch:function(a){void 0!==a.result&&a.originalEvent&&(a.originalEvent.returnValue=a.result)}}}},n.removeEvent=function(a,b,c){a.removeEventListener&&a.removeEventListener(b,c)},n.Event=function(a,b){return this instanceof n.Event?(a&&a.type?(this.originalEvent=a,this.type=a.type,this.isDefaultPrevented=a.defaultPrevented||void 0===a.defaultPrevented&&a.returnValue===!1?ga:ha):this.type=a,b&&n.extend(this,b),this.timeStamp=a&&a.timeStamp||n.now(),void(this[n.expando]=!0)):new n.Event(a,b)},n.Event.prototype={constructor:n.Event,isDefaultPrevented:ha,isPropagationStopped:ha,isImmediatePropagationStopped:ha,preventDefault:function(){var a=this.originalEvent;this.isDefaultPrevented=ga,a&&a.preventDefault()},stopPropagation:function(){var a=this.originalEvent;this.isPropagationStopped=ga,a&&a.stopPropagation()},stopImmediatePropagation:function(){var a=this.originalEvent;this.isImmediatePropagationStopped=ga,a&&a.stopImmediatePropagation(),this.stopPropagation()}},n.each({mouseenter:"mouseover",mouseleave:"mouseout",pointerenter:"pointerover",pointerleave:"pointerout"},function(a,b){n.event.special[a]={delegateType:b,bindType:b,handle:function(a){var c,d=this,e=a.relatedTarget,f=a.handleObj;return e&&(e===d||n.contains(d,e))||(a.type=f.origType,c=f.handler.apply(this,arguments),a.type=b),c}}}),n.fn.extend({on:function(a,b,c,d){return ja(this,a,b,c,d)},one:function(a,b,c,d){return ja(this,a,b,c,d,1)},off:function(a,b,c){var d,e;if(a&&a.preventDefault&&a.handleObj)return d=a.handleObj,n(a.delegateTarget).off(d.namespace?d.origType+"."+d.namespace:d.origType,d.selector,d.handler),this;if("object"==typeof a){for(e in a)this.off(e,b,a[e]);return this}return b!==!1&&"function"!=typeof b||(c=b,b=void 0),c===!1&&(c=ha),this.each(function(){n.event.remove(this,a,c,b)})}});var ka=/<(?!area|br|col|embed|hr|img|input|link|meta|param)(([\w:-]+)[^>]*)\/>/gi,la=/<script|<style|<link/i,ma=/checked\s*(?:[^=]|=\s*.checked.)/i,na=/^true\/(.*)/,oa=/^\s*<!(?:\[CDATA\[|--)|(?:\]\]|--)>\s*$/g;function pa(a,b){return n.nodeName(a,"table")&&n.nodeName(11!==b.nodeType?b:b.firstChild,"tr")?a.getElementsByTagName("tbody")[0]||a.appendChild(a.ownerDocument.createElement("tbody")):a}function qa(a){return a.type=(null!==a.getAttribute("type"))+"/"+a.type,a}function ra(a){var b=na.exec(a.type);return b?a.type=b[1]:a.removeAttribute("type"),a}function sa(a,b){var c,d,e,f,g,h,i,j;if(1===b.nodeType){if(N.hasData(a)&&(f=N.access(a),g=N.set(b,f),j=f.events)){delete g.handle,g.events={};for(e in j)for(c=0,d=j[e].length;d>c;c++)n.event.add(b,e,j[e][c])}O.hasData(a)&&(h=O.access(a),i=n.extend({},h),O.set(b,i))}}function ta(a,b){var c=b.nodeName.toLowerCase();"input"===c&&X.test(a.type)?b.checked=a.checked:"input"!==c&&"textarea"!==c||(b.defaultValue=a.defaultValue)}function ua(a,b,c,d){b=f.apply([],b);var e,g,h,i,j,k,m=0,o=a.length,p=o-1,q=b[0],r=n.isFunction(q);if(r||o>1&&"string"==typeof q&&!l.checkClone&&ma.test(q))return a.each(function(e){var f=a.eq(e);r&&(b[0]=q.call(this,e,f.html())),ua(f,b,c,d)});if(o&&(e=ca(b,a[0].ownerDocument,!1,a,d),g=e.firstChild,1===e.childNodes.length&&(e=g),g||d)){for(h=n.map(_(e,"script"),qa),i=h.length;o>m;m++)j=e,m!==p&&(j=n.clone(j,!0,!0),i&&n.merge(h,_(j,"script"))),c.call(a[m],j,m);if(i)for(k=h[h.length-1].ownerDocument,n.map(h,ra),m=0;i>m;m++)j=h[m],Z.test(j.type||"")&&!N.access(j,"globalEval")&&n.contains(k,j)&&(j.src?n._evalUrl&&n._evalUrl(j.src):n.globalEval(j.textContent.replace(oa,"")))}return a}function va(a,b,c){for(var d,e=b?n.filter(b,a):a,f=0;null!=(d=e[f]);f++)c||1!==d.nodeType||n.cleanData(_(d)),d.parentNode&&(c&&n.contains(d.ownerDocument,d)&&aa(_(d,"script")),d.parentNode.removeChild(d));return a}n.extend({htmlPrefilter:function(a){return a.replace(ka,"<$1></$2>")},clone:function(a,b,c){var d,e,f,g,h=a.cloneNode(!0),i=n.contains(a.ownerDocument,a);if(!(l.noCloneChecked||1!==a.nodeType&&11!==a.nodeType||n.isXMLDoc(a)))for(g=_(h),f=_(a),d=0,e=f.length;e>d;d++)ta(f[d],g[d]);if(b)if(c)for(f=f||_(a),g=g||_(h),d=0,e=f.length;e>d;d++)sa(f[d],g[d]);else sa(a,h);return g=_(h,"script"),g.length>0&&aa(g,!i&&_(a,"script")),h},cleanData:function(a){for(var b,c,d,e=n.event.special,f=0;void 0!==(c=a[f]);f++)if(L(c)){if(b=c[N.expando]){if(b.events)for(d in b.events)e[d]?n.event.remove(c,d):n.removeEvent(c,d,b.handle);c[N.expando]=void 0}c[O.expando]&&(c[O.expando]=void 0)}}}),n.fn.extend({domManip:ua,detach:function(a){return va(this,a,!0)},remove:function(a){return va(this,a)},text:function(a){return K(this,function(a){return void 0===a?n.text(this):this.empty().each(function(){1!==this.nodeType&&11!==this.nodeType&&9!==this.nodeType||(this.textContent=a)})},null,a,arguments.length)},append:function(){return ua(this,arguments,function(a){if(1===this.nodeType||11===this.nodeType||9===this.nodeType){var b=pa(this,a);b.appendChild(a)}})},prepend:function(){return ua(this,arguments,function(a){if(1===this.nodeType||11===this.nodeType||9===this.nodeType){var b=pa(this,a);b.insertBefore(a,b.firstChild)}})},before:function(){return ua(this,arguments,function(a){this.parentNode&&this.parentNode.insertBefore(a,this)})},after:function(){return ua(this,arguments,function(a){this.parentNode&&this.parentNode.insertBefore(a,this.nextSibling)})},empty:function(){for(var a,b=0;null!=(a=this[b]);b++)1===a.nodeType&&(n.cleanData(_(a,!1)),a.textContent="");return this},clone:function(a,b){return a=null==a?!1:a,b=null==b?a:b,this.map(function(){return n.clone(this,a,b)})},html:function(a){return K(this,function(a){var b=this[0]||{},c=0,d=this.length;if(void 0===a&&1===b.nodeType)return b.innerHTML;if("string"==typeof a&&!la.test(a)&&!$[(Y.exec(a)||["",""])[1].toLowerCase()]){a=n.htmlPrefilter(a);try{for(;d>c;c++)b=this[c]||{},1===b.nodeType&&(n.cleanData(_(b,!1)),b.innerHTML=a);b=0}catch(e){}}b&&this.empty().append(a)},null,a,arguments.length)},replaceWith:function(){var a=[];return ua(this,arguments,function(b){var c=this.parentNode;n.inArray(this,a)<0&&(n.cleanData(_(this)),c&&c.replaceChild(b,this))},a)}}),n.each({appendTo:"append",prependTo:"prepend",insertBefore:"before",insertAfter:"after",replaceAll:"replaceWith"},function(a,b){n.fn[a]=function(a){for(var c,d=[],e=n(a),f=e.length-1,h=0;f>=h;h++)c=h===f?this:this.clone(!0),n(e[h])[b](c),g.apply(d,c.get());return this.pushStack(d)}});var wa,xa={HTML:"block",BODY:"block"};function ya(a,b){var c=n(b.createElement(a)).appendTo(b.body),d=n.css(c[0],"display");return c.detach(),d}function za(a){var b=d,c=xa[a];return c||(c=ya(a,b),"none"!==c&&c||(wa=(wa||n("<iframe frameborder='0' width='0' height='0'/>")).appendTo(b.documentElement),b=wa[0].contentDocument,b.write(),b.close(),c=ya(a,b),wa.detach()),xa[a]=c),c}var Aa=/^margin/,Ba=new RegExp("^("+S+")(?!px)[a-z%]+$","i"),Ca=function(b){var c=b.ownerDocument.defaultView;return c&&c.opener||(c=a),c.getComputedStyle(b)},Da=function(a,b,c,d){var e,f,g={};for(f in b)g[f]=a.style[f],a.style[f]=b[f];e=c.apply(a,d||[]);for(f in b)a.style[f]=g[f];return e},Ea=d.documentElement;!function(){var b,c,e,f,g=d.createElement("div"),h=d.createElement("div");if(h.style){h.style.backgroundClip="content-box",h.cloneNode(!0).style.backgroundClip="",l.clearCloneStyle="content-box"===h.style.backgroundClip,g.style.cssText="border:0;width:8px;height:0;top:0;left:-9999px;padding:0;margin-top:1px;position:absolute",g.appendChild(h);function i(){h.style.cssText="-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box;position:relative;display:block;margin:auto;border:1px;padding:1px;top:1%;width:50%",h.innerHTML="",Ea.appendChild(g);var d=a.getComputedStyle(h);b="1%"!==d.top,f="2px"===d.marginLeft,c="4px"===d.width,h.style.marginRight="50%",e="4px"===d.marginRight,Ea.removeChild(g)}n.extend(l,{pixelPosition:function(){return i(),b},boxSizingReliable:function(){return null==c&&i(),c},pixelMarginRight:function(){return null==c&&i(),e},reliableMarginLeft:function(){return null==c&&i(),f},reliableMarginRight:function(){var b,c=h.appendChild(d.createElement("div"));return c.style.cssText=h.style.cssText="-webkit-box-sizing:content-box;box-sizing:content-box;display:block;margin:0;border:0;padding:0",c.style.marginRight=c.style.width="0",h.style.width="1px",Ea.appendChild(g),b=!parseFloat(a.getComputedStyle(c).marginRight),Ea.removeChild(g),h.removeChild(c),b}})}}();function Fa(a,b,c){var d,e,f,g,h=a.style;return c=c||Ca(a),g=c?c.getPropertyValue(b)||c[b]:void 0,""!==g&&void 0!==g||n.contains(a.ownerDocument,a)||(g=n.style(a,b)),c&&!l.pixelMarginRight()&&Ba.test(g)&&Aa.test(b)&&(d=h.width,e=h.minWidth,f=h.maxWidth,h.minWidth=h.maxWidth=h.width=g,g=c.width,h.width=d,h.minWidth=e,h.maxWidth=f),void 0!==g?g+"":g}function Ga(a,b){return{get:function(){return a()?void delete this.get:(this.get=b).apply(this,arguments)}}}var Ha=/^(none|table(?!-c[ea]).+)/,Ia={position:"absolute",visibility:"hidden",display:"block"},Ja={letterSpacing:"0",fontWeight:"400"},Ka=["Webkit","O","Moz","ms"],La=d.createElement("div").style;function Ma(a){if(a in La)return a;var b=a[0].toUpperCase()+a.slice(1),c=Ka.length;while(c--)if(a=Ka[c]+b,a in La)return a}function Na(a,b,c){var d=T.exec(b);return d?Math.max(0,d[2]-(c||0))+(d[3]||"px"):b}function Oa(a,b,c,d,e){for(var f=c===(d?"border":"content")?4:"width"===b?1:0,g=0;4>f;f+=2)"margin"===c&&(g+=n.css(a,c+U[f],!0,e)),d?("content"===c&&(g-=n.css(a,"padding"+U[f],!0,e)),"margin"!==c&&(g-=n.css(a,"border"+U[f]+"Width",!0,e))):(g+=n.css(a,"padding"+U[f],!0,e),"padding"!==c&&(g+=n.css(a,"border"+U[f]+"Width",!0,e)));return g}function Pa(b,c,e){var f=!0,g="width"===c?b.offsetWidth:b.offsetHeight,h=Ca(b),i="border-box"===n.css(b,"boxSizing",!1,h);if(d.msFullscreenElement&&a.top!==a&&b.getClientRects().length&&(g=Math.round(100*b.getBoundingClientRect()[c])),0>=g||null==g){if(g=Fa(b,c,h),(0>g||null==g)&&(g=b.style[c]),Ba.test(g))return g;f=i&&(l.boxSizingReliable()||g===b.style[c]),g=parseFloat(g)||0}return g+Oa(b,c,e||(i?"border":"content"),f,h)+"px"}function Qa(a,b){for(var c,d,e,f=[],g=0,h=a.length;h>g;g++)d=a[g],d.style&&(f[g]=N.get(d,"olddisplay"),c=d.style.display,b?(f[g]||"none"!==c||(d.style.display=""),""===d.style.display&&V(d)&&(f[g]=N.access(d,"olddisplay",za(d.nodeName)))):(e=V(d),"none"===c&&e||N.set(d,"olddisplay",e?c:n.css(d,"display"))));for(g=0;h>g;g++)d=a[g],d.style&&(b&&"none"!==d.style.display&&""!==d.style.display||(d.style.display=b?f[g]||"":"none"));return a}n.extend({cssHooks:{opacity:{get:function(a,b){if(b){var c=Fa(a,"opacity");return""===c?"1":c}}}},cssNumber:{animationIterationCount:!0,columnCount:!0,fillOpacity:!0,flexGrow:!0,flexShrink:!0,fontWeight:!0,lineHeight:!0,opacity:!0,order:!0,orphans:!0,widows:!0,zIndex:!0,zoom:!0},cssProps:{"float":"cssFloat"},style:function(a,b,c,d){if(a&&3!==a.nodeType&&8!==a.nodeType&&a.style){var e,f,g,h=n.camelCase(b),i=a.style;return b=n.cssProps[h]||(n.cssProps[h]=Ma(h)||h),g=n.cssHooks[b]||n.cssHooks[h],void 0===c?g&&"get"in g&&void 0!==(e=g.get(a,!1,d))?e:i[b]:(f=typeof c,"string"===f&&(e=T.exec(c))&&e[1]&&(c=W(a,b,e),f="number"),null!=c&&c===c&&("number"===f&&(c+=e&&e[3]||(n.cssNumber[h]?"":"px")),l.clearCloneStyle||""!==c||0!==b.indexOf("background")||(i[b]="inherit"),g&&"set"in g&&void 0===(c=g.set(a,c,d))||(i[b]=c)),void 0)}},css:function(a,b,c,d){var e,f,g,h=n.camelCase(b);return b=n.cssProps[h]||(n.cssProps[h]=Ma(h)||h),g=n.cssHooks[b]||n.cssHooks[h],g&&"get"in g&&(e=g.get(a,!0,c)),void 0===e&&(e=Fa(a,b,d)),"normal"===e&&b in Ja&&(e=Ja[b]),""===c||c?(f=parseFloat(e),c===!0||isFinite(f)?f||0:e):e}}),n.each(["height","width"],function(a,b){n.cssHooks[b]={get:function(a,c,d){return c?Ha.test(n.css(a,"display"))&&0===a.offsetWidth?Da(a,Ia,function(){return Pa(a,b,d)}):Pa(a,b,d):void 0},set:function(a,c,d){var e,f=d&&Ca(a),g=d&&Oa(a,b,d,"border-box"===n.css(a,"boxSizing",!1,f),f);return g&&(e=T.exec(c))&&"px"!==(e[3]||"px")&&(a.style[b]=c,c=n.css(a,b)),Na(a,c,g)}}}),n.cssHooks.marginLeft=Ga(l.reliableMarginLeft,function(a,b){return b?(parseFloat(Fa(a,"marginLeft"))||a.getBoundingClientRect().left-Da(a,{marginLeft:0},function(){return a.getBoundingClientRect().left}))+"px":void 0}),n.cssHooks.marginRight=Ga(l.reliableMarginRight,function(a,b){return b?Da(a,{display:"inline-block"},Fa,[a,"marginRight"]):void 0}),n.each({margin:"",padding:"",border:"Width"},function(a,b){n.cssHooks[a+b]={expand:function(c){for(var d=0,e={},f="string"==typeof c?c.split(" "):[c];4>d;d++)e[a+U[d]+b]=f[d]||f[d-2]||f[0];return e}},Aa.test(a)||(n.cssHooks[a+b].set=Na)}),n.fn.extend({css:function(a,b){return K(this,function(a,b,c){var d,e,f={},g=0;if(n.isArray(b)){for(d=Ca(a),e=b.length;e>g;g++)f[b[g]]=n.css(a,b[g],!1,d);return f}return void 0!==c?n.style(a,b,c):n.css(a,b)},a,b,arguments.length>1)},show:function(){return Qa(this,!0)},hide:function(){return Qa(this)},toggle:function(a){return"boolean"==typeof a?a?this.show():this.hide():this.each(function(){V(this)?n(this).show():n(this).hide()})}});function Ra(a,b,c,d,e){return new Ra.prototype.init(a,b,c,d,e)}n.Tween=Ra,Ra.prototype={constructor:Ra,init:function(a,b,c,d,e,f){this.elem=a,this.prop=c,this.easing=e||n.easing._default,this.options=b,this.start=this.now=this.cur(),this.end=d,this.unit=f||(n.cssNumber[c]?"":"px")},cur:function(){var a=Ra.propHooks[this.prop];return a&&a.get?a.get(this):Ra.propHooks._default.get(this)},run:function(a){var b,c=Ra.propHooks[this.prop];return this.options.duration?this.pos=b=n.easing[this.easing](a,this.options.duration*a,0,1,this.options.duration):this.pos=b=a,this.now=(this.end-this.start)*b+this.start,this.options.step&&this.options.step.call(this.elem,this.now,this),c&&c.set?c.set(this):Ra.propHooks._default.set(this),this}},Ra.prototype.init.prototype=Ra.prototype,Ra.propHooks={_default:{get:function(a){var b;return 1!==a.elem.nodeType||null!=a.elem[a.prop]&&null==a.elem.style[a.prop]?a.elem[a.prop]:(b=n.css(a.elem,a.prop,""),b&&"auto"!==b?b:0)},set:function(a){n.fx.step[a.prop]?n.fx.step[a.prop](a):1!==a.elem.nodeType||null==a.elem.style[n.cssProps[a.prop]]&&!n.cssHooks[a.prop]?a.elem[a.prop]=a.now:n.style(a.elem,a.prop,a.now+a.unit)}}},Ra.propHooks.scrollTop=Ra.propHooks.scrollLeft={set:function(a){a.elem.nodeType&&a.elem.parentNode&&(a.elem[a.prop]=a.now)}},n.easing={linear:function(a){return a},swing:function(a){return.5-Math.cos(a*Math.PI)/2},_default:"swing"},n.fx=Ra.prototype.init,n.fx.step={};var Sa,Ta,Ua=/^(?:toggle|show|hide)$/,Va=/queueHooks$/;function Wa(){return a.setTimeout(function(){Sa=void 0}),Sa=n.now()}function Xa(a,b){var c,d=0,e={height:a};for(b=b?1:0;4>d;d+=2-b)c=U[d],e["margin"+c]=e["padding"+c]=a;return b&&(e.opacity=e.width=a),e}function Ya(a,b,c){for(var d,e=(_a.tweeners[b]||[]).concat(_a.tweeners["*"]),f=0,g=e.length;g>f;f++)if(d=e[f].call(c,b,a))return d}function Za(a,b,c){var d,e,f,g,h,i,j,k,l=this,m={},o=a.style,p=a.nodeType&&V(a),q=N.get(a,"fxshow");c.queue||(h=n._queueHooks(a,"fx"),null==h.unqueued&&(h.unqueued=0,i=h.empty.fire,h.empty.fire=function(){h.unqueued||i()}),h.unqueued++,l.always(function(){l.always(function(){h.unqueued--,n.queue(a,"fx").length||h.empty.fire()})})),1===a.nodeType&&("height"in b||"width"in b)&&(c.overflow=[o.overflow,o.overflowX,o.overflowY],j=n.css(a,"display"),k="none"===j?N.get(a,"olddisplay")||za(a.nodeName):j,"inline"===k&&"none"===n.css(a,"float")&&(o.display="inline-block")),c.overflow&&(o.overflow="hidden",l.always(function(){o.overflow=c.overflow[0],o.overflowX=c.overflow[1],o.overflowY=c.overflow[2]}));for(d in b)if(e=b[d],Ua.exec(e)){if(delete b[d],f=f||"toggle"===e,e===(p?"hide":"show")){if("show"!==e||!q||void 0===q[d])continue;p=!0}m[d]=q&&q[d]||n.style(a,d)}else j=void 0;if(n.isEmptyObject(m))"inline"===("none"===j?za(a.nodeName):j)&&(o.display=j);else{q?"hidden"in q&&(p=q.hidden):q=N.access(a,"fxshow",{}),f&&(q.hidden=!p),p?n(a).show():l.done(function(){n(a).hide()}),l.done(function(){var b;N.remove(a,"fxshow");for(b in m)n.style(a,b,m[b])});for(d in m)g=Ya(p?q[d]:0,d,l),d in q||(q[d]=g.start,p&&(g.end=g.start,g.start="width"===d||"height"===d?1:0))}}function $a(a,b){var c,d,e,f,g;for(c in a)if(d=n.camelCase(c),e=b[d],f=a[c],n.isArray(f)&&(e=f[1],f=a[c]=f[0]),c!==d&&(a[d]=f,delete a[c]),g=n.cssHooks[d],g&&"expand"in g){f=g.expand(f),delete a[d];for(c in f)c in a||(a[c]=f[c],b[c]=e)}else b[d]=e}function _a(a,b,c){var d,e,f=0,g=_a.prefilters.length,h=n.Deferred().always(function(){delete i.elem}),i=function(){if(e)return!1;for(var b=Sa||Wa(),c=Math.max(0,j.startTime+j.duration-b),d=c/j.duration||0,f=1-d,g=0,i=j.tweens.length;i>g;g++)j.tweens[g].run(f);return h.notifyWith(a,[j,f,c]),1>f&&i?c:(h.resolveWith(a,[j]),!1)},j=h.promise({elem:a,props:n.extend({},b),opts:n.extend(!0,{specialEasing:{},easing:n.easing._default},c),originalProperties:b,originalOptions:c,startTime:Sa||Wa(),duration:c.duration,tweens:[],createTween:function(b,c){var d=n.Tween(a,j.opts,b,c,j.opts.specialEasing[b]||j.opts.easing);return j.tweens.push(d),d},stop:function(b){var c=0,d=b?j.tweens.length:0;if(e)return this;for(e=!0;d>c;c++)j.tweens[c].run(1);return b?(h.notifyWith(a,[j,1,0]),h.resolveWith(a,[j,b])):h.rejectWith(a,[j,b]),this}}),k=j.props;for($a(k,j.opts.specialEasing);g>f;f++)if(d=_a.prefilters[f].call(j,a,k,j.opts))return n.isFunction(d.stop)&&(n._queueHooks(j.elem,j.opts.queue).stop=n.proxy(d.stop,d)),d;return n.map(k,Ya,j),n.isFunction(j.opts.start)&&j.opts.start.call(a,j),n.fx.timer(n.extend(i,{elem:a,anim:j,queue:j.opts.queue})),j.progress(j.opts.progress).done(j.opts.done,j.opts.complete).fail(j.opts.fail).always(j.opts.always)}n.Animation=n.extend(_a,{tweeners:{"*":[function(a,b){var c=this.createTween(a,b);return W(c.elem,a,T.exec(b),c),c}]},tweener:function(a,b){n.isFunction(a)?(b=a,a=["*"]):a=a.match(G);for(var c,d=0,e=a.length;e>d;d++)c=a[d],_a.tweeners[c]=_a.tweeners[c]||[],_a.tweeners[c].unshift(b)},prefilters:[Za],prefilter:function(a,b){b?_a.prefilters.unshift(a):_a.prefilters.push(a)}}),n.speed=function(a,b,c){var d=a&&"object"==typeof a?n.extend({},a):{complete:c||!c&&b||n.isFunction(a)&&a,duration:a,easing:c&&b||b&&!n.isFunction(b)&&b};return d.duration=n.fx.off?0:"number"==typeof d.duration?d.duration:d.duration in n.fx.speeds?n.fx.speeds[d.duration]:n.fx.speeds._default,null!=d.queue&&d.queue!==!0||(d.queue="fx"),d.old=d.complete,d.complete=function(){n.isFunction(d.old)&&d.old.call(this),d.queue&&n.dequeue(this,d.queue)},d},n.fn.extend({fadeTo:function(a,b,c,d){return this.filter(V).css("opacity",0).show().end().animate({opacity:b},a,c,d)},animate:function(a,b,c,d){var e=n.isEmptyObject(a),f=n.speed(b,c,d),g=function(){var b=_a(this,n.extend({},a),f);(e||N.get(this,"finish"))&&b.stop(!0)};return g.finish=g,e||f.queue===!1?this.each(g):this.queue(f.queue,g)},stop:function(a,b,c){var d=function(a){var b=a.stop;delete a.stop,b(c)};return"string"!=typeof a&&(c=b,b=a,a=void 0),b&&a!==!1&&this.queue(a||"fx",[]),this.each(function(){var b=!0,e=null!=a&&a+"queueHooks",f=n.timers,g=N.get(this);if(e)g[e]&&g[e].stop&&d(g[e]);else for(e in g)g[e]&&g[e].stop&&Va.test(e)&&d(g[e]);for(e=f.length;e--;)f[e].elem!==this||null!=a&&f[e].queue!==a||(f[e].anim.stop(c),b=!1,f.splice(e,1));!b&&c||n.dequeue(this,a)})},finish:function(a){return a!==!1&&(a=a||"fx"),this.each(function(){var b,c=N.get(this),d=c[a+"queue"],e=c[a+"queueHooks"],f=n.timers,g=d?d.length:0;for(c.finish=!0,n.queue(this,a,[]),e&&e.stop&&e.stop.call(this,!0),b=f.length;b--;)f[b].elem===this&&f[b].queue===a&&(f[b].anim.stop(!0),f.splice(b,1));for(b=0;g>b;b++)d[b]&&d[b].finish&&d[b].finish.call(this);delete c.finish})}}),n.each(["toggle","show","hide"],function(a,b){var c=n.fn[b];n.fn[b]=function(a,d,e){return null==a||"boolean"==typeof a?c.apply(this,arguments):this.animate(Xa(b,!0),a,d,e)}}),n.each({slideDown:Xa("show"),slideUp:Xa("hide"),slideToggle:Xa("toggle"),fadeIn:{opacity:"show"},fadeOut:{opacity:"hide"},fadeToggle:{opacity:"toggle"}},function(a,b){n.fn[a]=function(a,c,d){return this.animate(b,a,c,d)}}),n.timers=[],n.fx.tick=function(){var a,b=0,c=n.timers;for(Sa=n.now();b<c.length;b++)a=c[b],a()||c[b]!==a||c.splice(b--,1);c.length||n.fx.stop(),Sa=void 0},n.fx.timer=function(a){n.timers.push(a),a()?n.fx.start():n.timers.pop()},n.fx.interval=13,n.fx.start=function(){Ta||(Ta=a.setInterval(n.fx.tick,n.fx.interval))},n.fx.stop=function(){a.clearInterval(Ta),Ta=null},n.fx.speeds={slow:600,fast:200,_default:400},n.fn.delay=function(b,c){return b=n.fx?n.fx.speeds[b]||b:b,c=c||"fx",this.queue(c,function(c,d){var e=a.setTimeout(c,b);d.stop=function(){a.clearTimeout(e)}})},function(){var a=d.createElement("input"),b=d.createElement("select"),c=b.appendChild(d.createElement("option"));a.type="checkbox",l.checkOn=""!==a.value,l.optSelected=c.selected,b.disabled=!0,l.optDisabled=!c.disabled,a=d.createElement("input"),a.value="t",a.type="radio",l.radioValue="t"===a.value}();var ab,bb=n.expr.attrHandle;n.fn.extend({attr:function(a,b){return K(this,n.attr,a,b,arguments.length>1)},removeAttr:function(a){return this.each(function(){n.removeAttr(this,a)})}}),n.extend({attr:function(a,b,c){var d,e,f=a.nodeType;if(3!==f&&8!==f&&2!==f)return"undefined"==typeof a.getAttribute?n.prop(a,b,c):(1===f&&n.isXMLDoc(a)||(b=b.toLowerCase(),e=n.attrHooks[b]||(n.expr.match.bool.test(b)?ab:void 0)),void 0!==c?null===c?void n.removeAttr(a,b):e&&"set"in e&&void 0!==(d=e.set(a,c,b))?d:(a.setAttribute(b,c+""),c):e&&"get"in e&&null!==(d=e.get(a,b))?d:(d=n.find.attr(a,b),null==d?void 0:d))},attrHooks:{type:{set:function(a,b){if(!l.radioValue&&"radio"===b&&n.nodeName(a,"input")){var c=a.value;return a.setAttribute("type",b),c&&(a.value=c),b}}}},removeAttr:function(a,b){var c,d,e=0,f=b&&b.match(G);if(f&&1===a.nodeType)while(c=f[e++])d=n.propFix[c]||c,n.expr.match.bool.test(c)&&(a[d]=!1),a.removeAttribute(c)}}),ab={set:function(a,b,c){return b===!1?n.removeAttr(a,c):a.setAttribute(c,c),c}},n.each(n.expr.match.bool.source.match(/\w+/g),function(a,b){var c=bb[b]||n.find.attr;bb[b]=function(a,b,d){var e,f;return d||(f=bb[b],bb[b]=e,e=null!=c(a,b,d)?b.toLowerCase():null,bb[b]=f),e}});var cb=/^(?:input|select|textarea|button)$/i,db=/^(?:a|area)$/i;n.fn.extend({prop:function(a,b){return K(this,n.prop,a,b,arguments.length>1)},removeProp:function(a){return this.each(function(){delete this[n.propFix[a]||a]})}}),n.extend({prop:function(a,b,c){var d,e,f=a.nodeType;if(3!==f&&8!==f&&2!==f)return 1===f&&n.isXMLDoc(a)||(b=n.propFix[b]||b,
+e=n.propHooks[b]),void 0!==c?e&&"set"in e&&void 0!==(d=e.set(a,c,b))?d:a[b]=c:e&&"get"in e&&null!==(d=e.get(a,b))?d:a[b]},propHooks:{tabIndex:{get:function(a){var b=n.find.attr(a,"tabindex");return b?parseInt(b,10):cb.test(a.nodeName)||db.test(a.nodeName)&&a.href?0:-1}}},propFix:{"for":"htmlFor","class":"className"}}),l.optSelected||(n.propHooks.selected={get:function(a){var b=a.parentNode;return b&&b.parentNode&&b.parentNode.selectedIndex,null},set:function(a){var b=a.parentNode;b&&(b.selectedIndex,b.parentNode&&b.parentNode.selectedIndex)}}),n.each(["tabIndex","readOnly","maxLength","cellSpacing","cellPadding","rowSpan","colSpan","useMap","frameBorder","contentEditable"],function(){n.propFix[this.toLowerCase()]=this});var eb=/[\t\r\n\f]/g;function fb(a){return a.getAttribute&&a.getAttribute("class")||""}n.fn.extend({addClass:function(a){var b,c,d,e,f,g,h,i=0;if(n.isFunction(a))return this.each(function(b){n(this).addClass(a.call(this,b,fb(this)))});if("string"==typeof a&&a){b=a.match(G)||[];while(c=this[i++])if(e=fb(c),d=1===c.nodeType&&(" "+e+" ").replace(eb," ")){g=0;while(f=b[g++])d.indexOf(" "+f+" ")<0&&(d+=f+" ");h=n.trim(d),e!==h&&c.setAttribute("class",h)}}return this},removeClass:function(a){var b,c,d,e,f,g,h,i=0;if(n.isFunction(a))return this.each(function(b){n(this).removeClass(a.call(this,b,fb(this)))});if(!arguments.length)return this.attr("class","");if("string"==typeof a&&a){b=a.match(G)||[];while(c=this[i++])if(e=fb(c),d=1===c.nodeType&&(" "+e+" ").replace(eb," ")){g=0;while(f=b[g++])while(d.indexOf(" "+f+" ")>-1)d=d.replace(" "+f+" "," ");h=n.trim(d),e!==h&&c.setAttribute("class",h)}}return this},toggleClass:function(a,b){var c=typeof a;return"boolean"==typeof b&&"string"===c?b?this.addClass(a):this.removeClass(a):n.isFunction(a)?this.each(function(c){n(this).toggleClass(a.call(this,c,fb(this),b),b)}):this.each(function(){var b,d,e,f;if("string"===c){d=0,e=n(this),f=a.match(G)||[];while(b=f[d++])e.hasClass(b)?e.removeClass(b):e.addClass(b)}else void 0!==a&&"boolean"!==c||(b=fb(this),b&&N.set(this,"__className__",b),this.setAttribute&&this.setAttribute("class",b||a===!1?"":N.get(this,"__className__")||""))})},hasClass:function(a){var b,c,d=0;b=" "+a+" ";while(c=this[d++])if(1===c.nodeType&&(" "+fb(c)+" ").replace(eb," ").indexOf(b)>-1)return!0;return!1}});var gb=/\r/g,hb=/[\x20\t\r\n\f]+/g;n.fn.extend({val:function(a){var b,c,d,e=this[0];{if(arguments.length)return d=n.isFunction(a),this.each(function(c){var e;1===this.nodeType&&(e=d?a.call(this,c,n(this).val()):a,null==e?e="":"number"==typeof e?e+="":n.isArray(e)&&(e=n.map(e,function(a){return null==a?"":a+""})),b=n.valHooks[this.type]||n.valHooks[this.nodeName.toLowerCase()],b&&"set"in b&&void 0!==b.set(this,e,"value")||(this.value=e))});if(e)return b=n.valHooks[e.type]||n.valHooks[e.nodeName.toLowerCase()],b&&"get"in b&&void 0!==(c=b.get(e,"value"))?c:(c=e.value,"string"==typeof c?c.replace(gb,""):null==c?"":c)}}}),n.extend({valHooks:{option:{get:function(a){var b=n.find.attr(a,"value");return null!=b?b:n.trim(n.text(a)).replace(hb," ")}},select:{get:function(a){for(var b,c,d=a.options,e=a.selectedIndex,f="select-one"===a.type||0>e,g=f?null:[],h=f?e+1:d.length,i=0>e?h:f?e:0;h>i;i++)if(c=d[i],(c.selected||i===e)&&(l.optDisabled?!c.disabled:null===c.getAttribute("disabled"))&&(!c.parentNode.disabled||!n.nodeName(c.parentNode,"optgroup"))){if(b=n(c).val(),f)return b;g.push(b)}return g},set:function(a,b){var c,d,e=a.options,f=n.makeArray(b),g=e.length;while(g--)d=e[g],(d.selected=n.inArray(n.valHooks.option.get(d),f)>-1)&&(c=!0);return c||(a.selectedIndex=-1),f}}}}),n.each(["radio","checkbox"],function(){n.valHooks[this]={set:function(a,b){return n.isArray(b)?a.checked=n.inArray(n(a).val(),b)>-1:void 0}},l.checkOn||(n.valHooks[this].get=function(a){return null===a.getAttribute("value")?"on":a.value})});var ib=/^(?:focusinfocus|focusoutblur)$/;n.extend(n.event,{trigger:function(b,c,e,f){var g,h,i,j,l,m,o,p=[e||d],q=k.call(b,"type")?b.type:b,r=k.call(b,"namespace")?b.namespace.split("."):[];if(h=i=e=e||d,3!==e.nodeType&&8!==e.nodeType&&!ib.test(q+n.event.triggered)&&(q.indexOf(".")>-1&&(r=q.split("."),q=r.shift(),r.sort()),l=q.indexOf(":")<0&&"on"+q,b=b[n.expando]?b:new n.Event(q,"object"==typeof b&&b),b.isTrigger=f?2:3,b.namespace=r.join("."),b.rnamespace=b.namespace?new RegExp("(^|\\.)"+r.join("\\.(?:.*\\.|)")+"(\\.|$)"):null,b.result=void 0,b.target||(b.target=e),c=null==c?[b]:n.makeArray(c,[b]),o=n.event.special[q]||{},f||!o.trigger||o.trigger.apply(e,c)!==!1)){if(!f&&!o.noBubble&&!n.isWindow(e)){for(j=o.delegateType||q,ib.test(j+q)||(h=h.parentNode);h;h=h.parentNode)p.push(h),i=h;i===(e.ownerDocument||d)&&p.push(i.defaultView||i.parentWindow||a)}g=0;while((h=p[g++])&&!b.isPropagationStopped())b.type=g>1?j:o.bindType||q,m=(N.get(h,"events")||{})[b.type]&&N.get(h,"handle"),m&&m.apply(h,c),m=l&&h[l],m&&m.apply&&L(h)&&(b.result=m.apply(h,c),b.result===!1&&b.preventDefault());return b.type=q,f||b.isDefaultPrevented()||o._default&&o._default.apply(p.pop(),c)!==!1||!L(e)||l&&n.isFunction(e[q])&&!n.isWindow(e)&&(i=e[l],i&&(e[l]=null),n.event.triggered=q,e[q](),n.event.triggered=void 0,i&&(e[l]=i)),b.result}},simulate:function(a,b,c){var d=n.extend(new n.Event,c,{type:a,isSimulated:!0});n.event.trigger(d,null,b),d.isDefaultPrevented()&&c.preventDefault()}}),n.fn.extend({trigger:function(a,b){return this.each(function(){n.event.trigger(a,b,this)})},triggerHandler:function(a,b){var c=this[0];return c?n.event.trigger(a,b,c,!0):void 0}}),n.each("blur focus focusin focusout load resize scroll unload click dblclick mousedown mouseup mousemove mouseover mouseout mouseenter mouseleave change select submit keydown keypress keyup error contextmenu".split(" "),function(a,b){n.fn[b]=function(a,c){return arguments.length>0?this.on(b,null,a,c):this.trigger(b)}}),n.fn.extend({hover:function(a,b){return this.mouseenter(a).mouseleave(b||a)}}),l.focusin="onfocusin"in a,l.focusin||n.each({focus:"focusin",blur:"focusout"},function(a,b){var c=function(a){n.event.simulate(b,a.target,n.event.fix(a))};n.event.special[b]={setup:function(){var d=this.ownerDocument||this,e=N.access(d,b);e||d.addEventListener(a,c,!0),N.access(d,b,(e||0)+1)},teardown:function(){var d=this.ownerDocument||this,e=N.access(d,b)-1;e?N.access(d,b,e):(d.removeEventListener(a,c,!0),N.remove(d,b))}}});var jb=a.location,kb=n.now(),lb=/\?/;n.parseJSON=function(a){return JSON.parse(a+"")},n.parseXML=function(b){var c;if(!b||"string"!=typeof b)return null;try{c=(new a.DOMParser).parseFromString(b,"text/xml")}catch(d){c=void 0}return c&&!c.getElementsByTagName("parsererror").length||n.error("Invalid XML: "+b),c};var mb=/#.*$/,nb=/([?&])_=[^&]*/,ob=/^(.*?):[ \t]*([^\r\n]*)$/gm,pb=/^(?:about|app|app-storage|.+-extension|file|res|widget):$/,qb=/^(?:GET|HEAD)$/,rb=/^\/\//,sb={},tb={},ub="*/".concat("*"),vb=d.createElement("a");vb.href=jb.href;function wb(a){return function(b,c){"string"!=typeof b&&(c=b,b="*");var d,e=0,f=b.toLowerCase().match(G)||[];if(n.isFunction(c))while(d=f[e++])"+"===d[0]?(d=d.slice(1)||"*",(a[d]=a[d]||[]).unshift(c)):(a[d]=a[d]||[]).push(c)}}function xb(a,b,c,d){var e={},f=a===tb;function g(h){var i;return e[h]=!0,n.each(a[h]||[],function(a,h){var j=h(b,c,d);return"string"!=typeof j||f||e[j]?f?!(i=j):void 0:(b.dataTypes.unshift(j),g(j),!1)}),i}return g(b.dataTypes[0])||!e["*"]&&g("*")}function yb(a,b){var c,d,e=n.ajaxSettings.flatOptions||{};for(c in b)void 0!==b[c]&&((e[c]?a:d||(d={}))[c]=b[c]);return d&&n.extend(!0,a,d),a}function zb(a,b,c){var d,e,f,g,h=a.contents,i=a.dataTypes;while("*"===i[0])i.shift(),void 0===d&&(d=a.mimeType||b.getResponseHeader("Content-Type"));if(d)for(e in h)if(h[e]&&h[e].test(d)){i.unshift(e);break}if(i[0]in c)f=i[0];else{for(e in c){if(!i[0]||a.converters[e+" "+i[0]]){f=e;break}g||(g=e)}f=f||g}return f?(f!==i[0]&&i.unshift(f),c[f]):void 0}function Ab(a,b,c,d){var e,f,g,h,i,j={},k=a.dataTypes.slice();if(k[1])for(g in a.converters)j[g.toLowerCase()]=a.converters[g];f=k.shift();while(f)if(a.responseFields[f]&&(c[a.responseFields[f]]=b),!i&&d&&a.dataFilter&&(b=a.dataFilter(b,a.dataType)),i=f,f=k.shift())if("*"===f)f=i;else if("*"!==i&&i!==f){if(g=j[i+" "+f]||j["* "+f],!g)for(e in j)if(h=e.split(" "),h[1]===f&&(g=j[i+" "+h[0]]||j["* "+h[0]])){g===!0?g=j[e]:j[e]!==!0&&(f=h[0],k.unshift(h[1]));break}if(g!==!0)if(g&&a["throws"])b=g(b);else try{b=g(b)}catch(l){return{state:"parsererror",error:g?l:"No conversion from "+i+" to "+f}}}return{state:"success",data:b}}n.extend({active:0,lastModified:{},etag:{},ajaxSettings:{url:jb.href,type:"GET",isLocal:pb.test(jb.protocol),global:!0,processData:!0,async:!0,contentType:"application/x-www-form-urlencoded; charset=UTF-8",accepts:{"*":ub,text:"text/plain",html:"text/html",xml:"application/xml, text/xml",json:"application/json, text/javascript"},contents:{xml:/\bxml\b/,html:/\bhtml/,json:/\bjson\b/},responseFields:{xml:"responseXML",text:"responseText",json:"responseJSON"},converters:{"* text":String,"text html":!0,"text json":n.parseJSON,"text xml":n.parseXML},flatOptions:{url:!0,context:!0}},ajaxSetup:function(a,b){return b?yb(yb(a,n.ajaxSettings),b):yb(n.ajaxSettings,a)},ajaxPrefilter:wb(sb),ajaxTransport:wb(tb),ajax:function(b,c){"object"==typeof b&&(c=b,b=void 0),c=c||{};var e,f,g,h,i,j,k,l,m=n.ajaxSetup({},c),o=m.context||m,p=m.context&&(o.nodeType||o.jquery)?n(o):n.event,q=n.Deferred(),r=n.Callbacks("once memory"),s=m.statusCode||{},t={},u={},v=0,w="canceled",x={readyState:0,getResponseHeader:function(a){var b;if(2===v){if(!h){h={};while(b=ob.exec(g))h[b[1].toLowerCase()]=b[2]}b=h[a.toLowerCase()]}return null==b?null:b},getAllResponseHeaders:function(){return 2===v?g:null},setRequestHeader:function(a,b){var c=a.toLowerCase();return v||(a=u[c]=u[c]||a,t[a]=b),this},overrideMimeType:function(a){return v||(m.mimeType=a),this},statusCode:function(a){var b;if(a)if(2>v)for(b in a)s[b]=[s[b],a[b]];else x.always(a[x.status]);return this},abort:function(a){var b=a||w;return e&&e.abort(b),z(0,b),this}};if(q.promise(x).complete=r.add,x.success=x.done,x.error=x.fail,m.url=((b||m.url||jb.href)+"").replace(mb,"").replace(rb,jb.protocol+"//"),m.type=c.method||c.type||m.method||m.type,m.dataTypes=n.trim(m.dataType||"*").toLowerCase().match(G)||[""],null==m.crossDomain){j=d.createElement("a");try{j.href=m.url,j.href=j.href,m.crossDomain=vb.protocol+"//"+vb.host!=j.protocol+"//"+j.host}catch(y){m.crossDomain=!0}}if(m.data&&m.processData&&"string"!=typeof m.data&&(m.data=n.param(m.data,m.traditional)),xb(sb,m,c,x),2===v)return x;k=n.event&&m.global,k&&0===n.active++&&n.event.trigger("ajaxStart"),m.type=m.type.toUpperCase(),m.hasContent=!qb.test(m.type),f=m.url,m.hasContent||(m.data&&(f=m.url+=(lb.test(f)?"&":"?")+m.data,delete m.data),m.cache===!1&&(m.url=nb.test(f)?f.replace(nb,"$1_="+kb++):f+(lb.test(f)?"&":"?")+"_="+kb++)),m.ifModified&&(n.lastModified[f]&&x.setRequestHeader("If-Modified-Since",n.lastModified[f]),n.etag[f]&&x.setRequestHeader("If-None-Match",n.etag[f])),(m.data&&m.hasContent&&m.contentType!==!1||c.contentType)&&x.setRequestHeader("Content-Type",m.contentType),x.setRequestHeader("Accept",m.dataTypes[0]&&m.accepts[m.dataTypes[0]]?m.accepts[m.dataTypes[0]]+("*"!==m.dataTypes[0]?", "+ub+"; q=0.01":""):m.accepts["*"]);for(l in m.headers)x.setRequestHeader(l,m.headers[l]);if(m.beforeSend&&(m.beforeSend.call(o,x,m)===!1||2===v))return x.abort();w="abort";for(l in{success:1,error:1,complete:1})x[l](m[l]);if(e=xb(tb,m,c,x)){if(x.readyState=1,k&&p.trigger("ajaxSend",[x,m]),2===v)return x;m.async&&m.timeout>0&&(i=a.setTimeout(function(){x.abort("timeout")},m.timeout));try{v=1,e.send(t,z)}catch(y){if(!(2>v))throw y;z(-1,y)}}else z(-1,"No Transport");function z(b,c,d,h){var j,l,t,u,w,y=c;2!==v&&(v=2,i&&a.clearTimeout(i),e=void 0,g=h||"",x.readyState=b>0?4:0,j=b>=200&&300>b||304===b,d&&(u=zb(m,x,d)),u=Ab(m,u,x,j),j?(m.ifModified&&(w=x.getResponseHeader("Last-Modified"),w&&(n.lastModified[f]=w),w=x.getResponseHeader("etag"),w&&(n.etag[f]=w)),204===b||"HEAD"===m.type?y="nocontent":304===b?y="notmodified":(y=u.state,l=u.data,t=u.error,j=!t)):(t=y,!b&&y||(y="error",0>b&&(b=0))),x.status=b,x.statusText=(c||y)+"",j?q.resolveWith(o,[l,y,x]):q.rejectWith(o,[x,y,t]),x.statusCode(s),s=void 0,k&&p.trigger(j?"ajaxSuccess":"ajaxError",[x,m,j?l:t]),r.fireWith(o,[x,y]),k&&(p.trigger("ajaxComplete",[x,m]),--n.active||n.event.trigger("ajaxStop")))}return x},getJSON:function(a,b,c){return n.get(a,b,c,"json")},getScript:function(a,b){return n.get(a,void 0,b,"script")}}),n.each(["get","post"],function(a,b){n[b]=function(a,c,d,e){return n.isFunction(c)&&(e=e||d,d=c,c=void 0),n.ajax(n.extend({url:a,type:b,dataType:e,data:c,success:d},n.isPlainObject(a)&&a))}}),n._evalUrl=function(a){return n.ajax({url:a,type:"GET",dataType:"script",async:!1,global:!1,"throws":!0})},n.fn.extend({wrapAll:function(a){var b;return n.isFunction(a)?this.each(function(b){n(this).wrapAll(a.call(this,b))}):(this[0]&&(b=n(a,this[0].ownerDocument).eq(0).clone(!0),this[0].parentNode&&b.insertBefore(this[0]),b.map(function(){var a=this;while(a.firstElementChild)a=a.firstElementChild;return a}).append(this)),this)},wrapInner:function(a){return n.isFunction(a)?this.each(function(b){n(this).wrapInner(a.call(this,b))}):this.each(function(){var b=n(this),c=b.contents();c.length?c.wrapAll(a):b.append(a)})},wrap:function(a){var b=n.isFunction(a);return this.each(function(c){n(this).wrapAll(b?a.call(this,c):a)})},unwrap:function(){return this.parent().each(function(){n.nodeName(this,"body")||n(this).replaceWith(this.childNodes)}).end()}}),n.expr.filters.hidden=function(a){return!n.expr.filters.visible(a)},n.expr.filters.visible=function(a){return a.offsetWidth>0||a.offsetHeight>0||a.getClientRects().length>0};var Bb=/%20/g,Cb=/\[\]$/,Db=/\r?\n/g,Eb=/^(?:submit|button|image|reset|file)$/i,Fb=/^(?:input|select|textarea|keygen)/i;function Gb(a,b,c,d){var e;if(n.isArray(b))n.each(b,function(b,e){c||Cb.test(a)?d(a,e):Gb(a+"["+("object"==typeof e&&null!=e?b:"")+"]",e,c,d)});else if(c||"object"!==n.type(b))d(a,b);else for(e in b)Gb(a+"["+e+"]",b[e],c,d)}n.param=function(a,b){var c,d=[],e=function(a,b){b=n.isFunction(b)?b():null==b?"":b,d[d.length]=encodeURIComponent(a)+"="+encodeURIComponent(b)};if(void 0===b&&(b=n.ajaxSettings&&n.ajaxSettings.traditional),n.isArray(a)||a.jquery&&!n.isPlainObject(a))n.each(a,function(){e(this.name,this.value)});else for(c in a)Gb(c,a[c],b,e);return d.join("&").replace(Bb,"+")},n.fn.extend({serialize:function(){return n.param(this.serializeArray())},serializeArray:function(){return this.map(function(){var a=n.prop(this,"elements");return a?n.makeArray(a):this}).filter(function(){var a=this.type;return this.name&&!n(this).is(":disabled")&&Fb.test(this.nodeName)&&!Eb.test(a)&&(this.checked||!X.test(a))}).map(function(a,b){var c=n(this).val();return null==c?null:n.isArray(c)?n.map(c,function(a){return{name:b.name,value:a.replace(Db,"\r\n")}}):{name:b.name,value:c.replace(Db,"\r\n")}}).get()}}),n.ajaxSettings.xhr=function(){try{return new a.XMLHttpRequest}catch(b){}};var Hb={0:200,1223:204},Ib=n.ajaxSettings.xhr();l.cors=!!Ib&&"withCredentials"in Ib,l.ajax=Ib=!!Ib,n.ajaxTransport(function(b){var c,d;return l.cors||Ib&&!b.crossDomain?{send:function(e,f){var g,h=b.xhr();if(h.open(b.type,b.url,b.async,b.username,b.password),b.xhrFields)for(g in b.xhrFields)h[g]=b.xhrFields[g];b.mimeType&&h.overrideMimeType&&h.overrideMimeType(b.mimeType),b.crossDomain||e["X-Requested-With"]||(e["X-Requested-With"]="XMLHttpRequest");for(g in e)h.setRequestHeader(g,e[g]);c=function(a){return function(){c&&(c=d=h.onload=h.onerror=h.onabort=h.onreadystatechange=null,"abort"===a?h.abort():"error"===a?"number"!=typeof h.status?f(0,"error"):f(h.status,h.statusText):f(Hb[h.status]||h.status,h.statusText,"text"!==(h.responseType||"text")||"string"!=typeof h.responseText?{binary:h.response}:{text:h.responseText},h.getAllResponseHeaders()))}},h.onload=c(),d=h.onerror=c("error"),void 0!==h.onabort?h.onabort=d:h.onreadystatechange=function(){4===h.readyState&&a.setTimeout(function(){c&&d()})},c=c("abort");try{h.send(b.hasContent&&b.data||null)}catch(i){if(c)throw i}},abort:function(){c&&c()}}:void 0}),n.ajaxSetup({accepts:{script:"text/javascript, application/javascript, application/ecmascript, application/x-ecmascript"},contents:{script:/\b(?:java|ecma)script\b/},converters:{"text script":function(a){return n.globalEval(a),a}}}),n.ajaxPrefilter("script",function(a){void 0===a.cache&&(a.cache=!1),a.crossDomain&&(a.type="GET")}),n.ajaxTransport("script",function(a){if(a.crossDomain){var b,c;return{send:function(e,f){b=n("<script>").prop({charset:a.scriptCharset,src:a.url}).on("load error",c=function(a){b.remove(),c=null,a&&f("error"===a.type?404:200,a.type)}),d.head.appendChild(b[0])},abort:function(){c&&c()}}}});var Jb=[],Kb=/(=)\?(?=&|$)|\?\?/;n.ajaxSetup({jsonp:"callback",jsonpCallback:function(){var a=Jb.pop()||n.expando+"_"+kb++;return this[a]=!0,a}}),n.ajaxPrefilter("json jsonp",function(b,c,d){var e,f,g,h=b.jsonp!==!1&&(Kb.test(b.url)?"url":"string"==typeof b.data&&0===(b.contentType||"").indexOf("application/x-www-form-urlencoded")&&Kb.test(b.data)&&"data");return h||"jsonp"===b.dataTypes[0]?(e=b.jsonpCallback=n.isFunction(b.jsonpCallback)?b.jsonpCallback():b.jsonpCallback,h?b[h]=b[h].replace(Kb,"$1"+e):b.jsonp!==!1&&(b.url+=(lb.test(b.url)?"&":"?")+b.jsonp+"="+e),b.converters["script json"]=function(){return g||n.error(e+" was not called"),g[0]},b.dataTypes[0]="json",f=a[e],a[e]=function(){g=arguments},d.always(function(){void 0===f?n(a).removeProp(e):a[e]=f,b[e]&&(b.jsonpCallback=c.jsonpCallback,Jb.push(e)),g&&n.isFunction(f)&&f(g[0]),g=f=void 0}),"script"):void 0}),n.parseHTML=function(a,b,c){if(!a||"string"!=typeof a)return null;"boolean"==typeof b&&(c=b,b=!1),b=b||d;var e=x.exec(a),f=!c&&[];return e?[b.createElement(e[1])]:(e=ca([a],b,f),f&&f.length&&n(f).remove(),n.merge([],e.childNodes))};var Lb=n.fn.load;n.fn.load=function(a,b,c){if("string"!=typeof a&&Lb)return Lb.apply(this,arguments);var d,e,f,g=this,h=a.indexOf(" ");return h>-1&&(d=n.trim(a.slice(h)),a=a.slice(0,h)),n.isFunction(b)?(c=b,b=void 0):b&&"object"==typeof b&&(e="POST"),g.length>0&&n.ajax({url:a,type:e||"GET",dataType:"html",data:b}).done(function(a){f=arguments,g.html(d?n("<div>").append(n.parseHTML(a)).find(d):a)}).always(c&&function(a,b){g.each(function(){c.apply(this,f||[a.responseText,b,a])})}),this},n.each(["ajaxStart","ajaxStop","ajaxComplete","ajaxError","ajaxSuccess","ajaxSend"],function(a,b){n.fn[b]=function(a){return this.on(b,a)}}),n.expr.filters.animated=function(a){return n.grep(n.timers,function(b){return a===b.elem}).length};function Mb(a){return n.isWindow(a)?a:9===a.nodeType&&a.defaultView}n.offset={setOffset:function(a,b,c){var d,e,f,g,h,i,j,k=n.css(a,"position"),l=n(a),m={};"static"===k&&(a.style.position="relative"),h=l.offset(),f=n.css(a,"top"),i=n.css(a,"left"),j=("absolute"===k||"fixed"===k)&&(f+i).indexOf("auto")>-1,j?(d=l.position(),g=d.top,e=d.left):(g=parseFloat(f)||0,e=parseFloat(i)||0),n.isFunction(b)&&(b=b.call(a,c,n.extend({},h))),null!=b.top&&(m.top=b.top-h.top+g),null!=b.left&&(m.left=b.left-h.left+e),"using"in b?b.using.call(a,m):l.css(m)}},n.fn.extend({offset:function(a){if(arguments.length)return void 0===a?this:this.each(function(b){n.offset.setOffset(this,a,b)});var b,c,d=this[0],e={top:0,left:0},f=d&&d.ownerDocument;if(f)return b=f.documentElement,n.contains(b,d)?(e=d.getBoundingClientRect(),c=Mb(f),{top:e.top+c.pageYOffset-b.clientTop,left:e.left+c.pageXOffset-b.clientLeft}):e},position:function(){if(this[0]){var a,b,c=this[0],d={top:0,left:0};return"fixed"===n.css(c,"position")?b=c.getBoundingClientRect():(a=this.offsetParent(),b=this.offset(),n.nodeName(a[0],"html")||(d=a.offset()),d.top+=n.css(a[0],"borderTopWidth",!0),d.left+=n.css(a[0],"borderLeftWidth",!0)),{top:b.top-d.top-n.css(c,"marginTop",!0),left:b.left-d.left-n.css(c,"marginLeft",!0)}}},offsetParent:function(){return this.map(function(){var a=this.offsetParent;while(a&&"static"===n.css(a,"position"))a=a.offsetParent;return a||Ea})}}),n.each({scrollLeft:"pageXOffset",scrollTop:"pageYOffset"},function(a,b){var c="pageYOffset"===b;n.fn[a]=function(d){return K(this,function(a,d,e){var f=Mb(a);return void 0===e?f?f[b]:a[d]:void(f?f.scrollTo(c?f.pageXOffset:e,c?e:f.pageYOffset):a[d]=e)},a,d,arguments.length)}}),n.each(["top","left"],function(a,b){n.cssHooks[b]=Ga(l.pixelPosition,function(a,c){return c?(c=Fa(a,b),Ba.test(c)?n(a).position()[b]+"px":c):void 0})}),n.each({Height:"height",Width:"width"},function(a,b){n.each({padding:"inner"+a,content:b,"":"outer"+a},function(c,d){n.fn[d]=function(d,e){var f=arguments.length&&(c||"boolean"!=typeof d),g=c||(d===!0||e===!0?"margin":"border");return K(this,function(b,c,d){var e;return n.isWindow(b)?b.document.documentElement["client"+a]:9===b.nodeType?(e=b.documentElement,Math.max(b.body["scroll"+a],e["scroll"+a],b.body["offset"+a],e["offset"+a],e["client"+a])):void 0===d?n.css(b,c,g):n.style(b,c,d,g)},b,f?d:void 0,f,null)}})}),n.fn.extend({bind:function(a,b,c){return this.on(a,null,b,c)},unbind:function(a,b){return this.off(a,null,b)},delegate:function(a,b,c,d){return this.on(b,a,c,d)},undelegate:function(a,b,c){return 1===arguments.length?this.off(a,"**"):this.off(b,a||"**",c)},size:function(){return this.length}}),n.fn.andSelf=n.fn.addBack,"function"==typeof define&&define.amd&&define("jquery",[],function(){return n});var Nb=a.jQuery,Ob=a.$;return n.noConflict=function(b){return a.$===n&&(a.$=Ob),b&&a.jQuery===n&&(a.jQuery=Nb),n},b||(a.jQuery=a.$=n),n});
diff --git a/libs/jquery-3.6.0/jquery-3.6.0.min.js b/libs/jquery-3.6.0/jquery-3.6.0.min.js
new file mode 100644
index 0000000..c4c6022
--- /dev/null
+++ b/libs/jquery-3.6.0/jquery-3.6.0.min.js
@@ -0,0 +1,2 @@
+/*! jQuery v3.6.0 | (c) OpenJS Foundation and other contributors | jquery.org/license */
+!function(e,t){"use strict";"object"==typeof module&&"object"==typeof module.exports?module.exports=e.document?t(e,!0):function(e){if(!e.document)throw new Error("jQuery requires a window with a document");return t(e)}:t(e)}("undefined"!=typeof window?window:this,function(C,e){"use strict";var t=[],r=Object.getPrototypeOf,s=t.slice,g=t.flat?function(e){return t.flat.call(e)}:function(e){return t.concat.apply([],e)},u=t.push,i=t.indexOf,n={},o=n.toString,v=n.hasOwnProperty,a=v.toString,l=a.call(Object),y={},m=function(e){return"function"==typeof e&&"number"!=typeof e.nodeType&&"function"!=typeof e.item},x=function(e){return null!=e&&e===e.window},E=C.document,c={type:!0,src:!0,nonce:!0,noModule:!0};function b(e,t,n){var r,i,o=(n=n||E).createElement("script");if(o.text=e,t)for(r in c)(i=t[r]||t.getAttribute&&t.getAttribute(r))&&o.setAttribute(r,i);n.head.appendChild(o).parentNode.removeChild(o)}function w(e){return null==e?e+"":"object"==typeof e||"function"==typeof e?n[o.call(e)]||"object":typeof e}var f="3.6.0",S=function(e,t){return new S.fn.init(e,t)};function p(e){var t=!!e&&"length"in e&&e.length,n=w(e);return!m(e)&&!x(e)&&("array"===n||0===t||"number"==typeof t&&0<t&&t-1 in e)}S.fn=S.prototype={jquery:f,constructor:S,length:0,toArray:function(){return s.call(this)},get:function(e){return null==e?s.call(this):e<0?this[e+this.length]:this[e]},pushStack:function(e){var t=S.merge(this.constructor(),e);return t.prevObject=this,t},each:function(e){return S.each(this,e)},map:function(n){return this.pushStack(S.map(this,function(e,t){return n.call(e,t,e)}))},slice:function(){return this.pushStack(s.apply(this,arguments))},first:function(){return this.eq(0)},last:function(){return this.eq(-1)},even:function(){return this.pushStack(S.grep(this,function(e,t){return(t+1)%2}))},odd:function(){return this.pushStack(S.grep(this,function(e,t){return t%2}))},eq:function(e){var t=this.length,n=+e+(e<0?t:0);return this.pushStack(0<=n&&n<t?[this[n]]:[])},end:function(){return this.prevObject||this.constructor()},push:u,sort:t.sort,splice:t.splice},S.extend=S.fn.extend=function(){var e,t,n,r,i,o,a=arguments[0]||{},s=1,u=arguments.length,l=!1;for("boolean"==typeof a&&(l=a,a=arguments[s]||{},s++),"object"==typeof a||m(a)||(a={}),s===u&&(a=this,s--);s<u;s++)if(null!=(e=arguments[s]))for(t in e)r=e[t],"__proto__"!==t&&a!==r&&(l&&r&&(S.isPlainObject(r)||(i=Array.isArray(r)))?(n=a[t],o=i&&!Array.isArray(n)?[]:i||S.isPlainObject(n)?n:{},i=!1,a[t]=S.extend(l,o,r)):void 0!==r&&(a[t]=r));return a},S.extend({expando:"jQuery"+(f+Math.random()).replace(/\D/g,""),isReady:!0,error:function(e){throw new Error(e)},noop:function(){},isPlainObject:function(e){var t,n;return!(!e||"[object Object]"!==o.call(e))&&(!(t=r(e))||"function"==typeof(n=v.call(t,"constructor")&&t.constructor)&&a.call(n)===l)},isEmptyObject:function(e){var t;for(t in e)return!1;return!0},globalEval:function(e,t,n){b(e,{nonce:t&&t.nonce},n)},each:function(e,t){var n,r=0;if(p(e)){for(n=e.length;r<n;r++)if(!1===t.call(e[r],r,e[r]))break}else for(r in e)if(!1===t.call(e[r],r,e[r]))break;return e},makeArray:function(e,t){var n=t||[];return null!=e&&(p(Object(e))?S.merge(n,"string"==typeof e?[e]:e):u.call(n,e)),n},inArray:function(e,t,n){return null==t?-1:i.call(t,e,n)},merge:function(e,t){for(var n=+t.length,r=0,i=e.length;r<n;r++)e[i++]=t[r];return e.length=i,e},grep:function(e,t,n){for(var r=[],i=0,o=e.length,a=!n;i<o;i++)!t(e[i],i)!==a&&r.push(e[i]);return r},map:function(e,t,n){var r,i,o=0,a=[];if(p(e))for(r=e.length;o<r;o++)null!=(i=t(e[o],o,n))&&a.push(i);else for(o in e)null!=(i=t(e[o],o,n))&&a.push(i);return g(a)},guid:1,support:y}),"function"==typeof Symbol&&(S.fn[Symbol.iterator]=t[Symbol.iterator]),S.each("Boolean Number String Function Array Date RegExp Object Error Symbol".split(" "),function(e,t){n["[object "+t+"]"]=t.toLowerCase()});var d=function(n){var e,d,b,o,i,h,f,g,w,u,l,T,C,a,E,v,s,c,y,S="sizzle"+1*new Date,p=n.document,k=0,r=0,m=ue(),x=ue(),A=ue(),N=ue(),j=function(e,t){return e===t&&(l=!0),0},D={}.hasOwnProperty,t=[],q=t.pop,L=t.push,H=t.push,O=t.slice,P=function(e,t){for(var n=0,r=e.length;n<r;n++)if(e[n]===t)return n;return-1},R="checked|selected|async|autofocus|autoplay|controls|defer|disabled|hidden|ismap|loop|multiple|open|readonly|required|scoped",M="[\\x20\\t\\r\\n\\f]",I="(?:\\\\[\\da-fA-F]{1,6}"+M+"?|\\\\[^\\r\\n\\f]|[\\w-]|[^\0-\\x7f])+",W="\\["+M+"*("+I+")(?:"+M+"*([*^$|!~]?=)"+M+"*(?:'((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\"|("+I+"))|)"+M+"*\\]",F=":("+I+")(?:\\((('((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\")|((?:\\\\.|[^\\\\()[\\]]|"+W+")*)|.*)\\)|)",B=new RegExp(M+"+","g"),$=new RegExp("^"+M+"+|((?:^|[^\\\\])(?:\\\\.)*)"+M+"+$","g"),_=new RegExp("^"+M+"*,"+M+"*"),z=new RegExp("^"+M+"*([>+~]|"+M+")"+M+"*"),U=new RegExp(M+"|>"),X=new RegExp(F),V=new RegExp("^"+I+"$"),G={ID:new RegExp("^#("+I+")"),CLASS:new RegExp("^\\.("+I+")"),TAG:new RegExp("^("+I+"|[*])"),ATTR:new RegExp("^"+W),PSEUDO:new RegExp("^"+F),CHILD:new RegExp("^:(only|first|last|nth|nth-last)-(child|of-type)(?:\\("+M+"*(even|odd|(([+-]|)(\\d*)n|)"+M+"*(?:([+-]|)"+M+"*(\\d+)|))"+M+"*\\)|)","i"),bool:new RegExp("^(?:"+R+")$","i"),needsContext:new RegExp("^"+M+"*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\\("+M+"*((?:-\\d)?\\d*)"+M+"*\\)|)(?=[^-]|$)","i")},Y=/HTML$/i,Q=/^(?:input|select|textarea|button)$/i,J=/^h\d$/i,K=/^[^{]+\{\s*\[native \w/,Z=/^(?:#([\w-]+)|(\w+)|\.([\w-]+))$/,ee=/[+~]/,te=new RegExp("\\\\[\\da-fA-F]{1,6}"+M+"?|\\\\([^\\r\\n\\f])","g"),ne=function(e,t){var n="0x"+e.slice(1)-65536;return t||(n<0?String.fromCharCode(n+65536):String.fromCharCode(n>>10|55296,1023&n|56320))},re=/([\0-\x1f\x7f]|^-?\d)|^-$|[^\0-\x1f\x7f-\uFFFF\w-]/g,ie=function(e,t){return t?"\0"===e?"\ufffd":e.slice(0,-1)+"\\"+e.charCodeAt(e.length-1).toString(16)+" ":"\\"+e},oe=function(){T()},ae=be(function(e){return!0===e.disabled&&"fieldset"===e.nodeName.toLowerCase()},{dir:"parentNode",next:"legend"});try{H.apply(t=O.call(p.childNodes),p.childNodes),t[p.childNodes.length].nodeType}catch(e){H={apply:t.length?function(e,t){L.apply(e,O.call(t))}:function(e,t){var n=e.length,r=0;while(e[n++]=t[r++]);e.length=n-1}}}function se(t,e,n,r){var i,o,a,s,u,l,c,f=e&&e.ownerDocument,p=e?e.nodeType:9;if(n=n||[],"string"!=typeof t||!t||1!==p&&9!==p&&11!==p)return n;if(!r&&(T(e),e=e||C,E)){if(11!==p&&(u=Z.exec(t)))if(i=u[1]){if(9===p){if(!(a=e.getElementById(i)))return n;if(a.id===i)return n.push(a),n}else if(f&&(a=f.getElementById(i))&&y(e,a)&&a.id===i)return n.push(a),n}else{if(u[2])return H.apply(n,e.getElementsByTagName(t)),n;if((i=u[3])&&d.getElementsByClassName&&e.getElementsByClassName)return H.apply(n,e.getElementsByClassName(i)),n}if(d.qsa&&!N[t+" "]&&(!v||!v.test(t))&&(1!==p||"object"!==e.nodeName.toLowerCase())){if(c=t,f=e,1===p&&(U.test(t)||z.test(t))){(f=ee.test(t)&&ye(e.parentNode)||e)===e&&d.scope||((s=e.getAttribute("id"))?s=s.replace(re,ie):e.setAttribute("id",s=S)),o=(l=h(t)).length;while(o--)l[o]=(s?"#"+s:":scope")+" "+xe(l[o]);c=l.join(",")}try{return H.apply(n,f.querySelectorAll(c)),n}catch(e){N(t,!0)}finally{s===S&&e.removeAttribute("id")}}}return g(t.replace($,"$1"),e,n,r)}function ue(){var r=[];return function e(t,n){return r.push(t+" ")>b.cacheLength&&delete e[r.shift()],e[t+" "]=n}}function le(e){return e[S]=!0,e}function ce(e){var t=C.createElement("fieldset");try{return!!e(t)}catch(e){return!1}finally{t.parentNode&&t.parentNode.removeChild(t),t=null}}function fe(e,t){var n=e.split("|"),r=n.length;while(r--)b.attrHandle[n[r]]=t}function pe(e,t){var n=t&&e,r=n&&1===e.nodeType&&1===t.nodeType&&e.sourceIndex-t.sourceIndex;if(r)return r;if(n)while(n=n.nextSibling)if(n===t)return-1;return e?1:-1}function de(t){return function(e){return"input"===e.nodeName.toLowerCase()&&e.type===t}}function he(n){return function(e){var t=e.nodeName.toLowerCase();return("input"===t||"button"===t)&&e.type===n}}function ge(t){return function(e){return"form"in e?e.parentNode&&!1===e.disabled?"label"in e?"label"in e.parentNode?e.parentNode.disabled===t:e.disabled===t:e.isDisabled===t||e.isDisabled!==!t&&ae(e)===t:e.disabled===t:"label"in e&&e.disabled===t}}function ve(a){return le(function(o){return o=+o,le(function(e,t){var n,r=a([],e.length,o),i=r.length;while(i--)e[n=r[i]]&&(e[n]=!(t[n]=e[n]))})})}function ye(e){return e&&"undefined"!=typeof e.getElementsByTagName&&e}for(e in d=se.support={},i=se.isXML=function(e){var t=e&&e.namespaceURI,n=e&&(e.ownerDocument||e).documentElement;return!Y.test(t||n&&n.nodeName||"HTML")},T=se.setDocument=function(e){var t,n,r=e?e.ownerDocument||e:p;return r!=C&&9===r.nodeType&&r.documentElement&&(a=(C=r).documentElement,E=!i(C),p!=C&&(n=C.defaultView)&&n.top!==n&&(n.addEventListener?n.addEventListener("unload",oe,!1):n.attachEvent&&n.attachEvent("onunload",oe)),d.scope=ce(function(e){return a.appendChild(e).appendChild(C.createElement("div")),"undefined"!=typeof e.querySelectorAll&&!e.querySelectorAll(":scope fieldset div").length}),d.attributes=ce(function(e){return e.className="i",!e.getAttribute("className")}),d.getElementsByTagName=ce(function(e){return e.appendChild(C.createComment("")),!e.getElementsByTagName("*").length}),d.getElementsByClassName=K.test(C.getElementsByClassName),d.getById=ce(function(e){return a.appendChild(e).id=S,!C.getElementsByName||!C.getElementsByName(S).length}),d.getById?(b.filter.ID=function(e){var t=e.replace(te,ne);return function(e){return e.getAttribute("id")===t}},b.find.ID=function(e,t){if("undefined"!=typeof t.getElementById&&E){var n=t.getElementById(e);return n?[n]:[]}}):(b.filter.ID=function(e){var n=e.replace(te,ne);return function(e){var t="undefined"!=typeof e.getAttributeNode&&e.getAttributeNode("id");return t&&t.value===n}},b.find.ID=function(e,t){if("undefined"!=typeof t.getElementById&&E){var n,r,i,o=t.getElementById(e);if(o){if((n=o.getAttributeNode("id"))&&n.value===e)return[o];i=t.getElementsByName(e),r=0;while(o=i[r++])if((n=o.getAttributeNode("id"))&&n.value===e)return[o]}return[]}}),b.find.TAG=d.getElementsByTagName?function(e,t){return"undefined"!=typeof t.getElementsByTagName?t.getElementsByTagName(e):d.qsa?t.querySelectorAll(e):void 0}:function(e,t){var n,r=[],i=0,o=t.getElementsByTagName(e);if("*"===e){while(n=o[i++])1===n.nodeType&&r.push(n);return r}return o},b.find.CLASS=d.getElementsByClassName&&function(e,t){if("undefined"!=typeof t.getElementsByClassName&&E)return t.getElementsByClassName(e)},s=[],v=[],(d.qsa=K.test(C.querySelectorAll))&&(ce(function(e){var t;a.appendChild(e).innerHTML="<a id='"+S+"'></a><select id='"+S+"-\r\\' msallowcapture=''><option selected=''></option></select>",e.querySelectorAll("[msallowcapture^='']").length&&v.push("[*^$]="+M+"*(?:''|\"\")"),e.querySelectorAll("[selected]").length||v.push("\\["+M+"*(?:value|"+R+")"),e.querySelectorAll("[id~="+S+"-]").length||v.push("~="),(t=C.createElement("input")).setAttribute("name",""),e.appendChild(t),e.querySelectorAll("[name='']").length||v.push("\\["+M+"*name"+M+"*="+M+"*(?:''|\"\")"),e.querySelectorAll(":checked").length||v.push(":checked"),e.querySelectorAll("a#"+S+"+*").length||v.push(".#.+[+~]"),e.querySelectorAll("\\\f"),v.push("[\\r\\n\\f]")}),ce(function(e){e.innerHTML="<a href='' disabled='disabled'></a><select disabled='disabled'><option/></select>";var t=C.createElement("input");t.setAttribute("type","hidden"),e.appendChild(t).setAttribute("name","D"),e.querySelectorAll("[name=d]").length&&v.push("name"+M+"*[*^$|!~]?="),2!==e.querySelectorAll(":enabled").length&&v.push(":enabled",":disabled"),a.appendChild(e).disabled=!0,2!==e.querySelectorAll(":disabled").length&&v.push(":enabled",":disabled"),e.querySelectorAll("*,:x"),v.push(",.*:")})),(d.matchesSelector=K.test(c=a.matches||a.webkitMatchesSelector||a.mozMatchesSelector||a.oMatchesSelector||a.msMatchesSelector))&&ce(function(e){d.disconnectedMatch=c.call(e,"*"),c.call(e,"[s!='']:x"),s.push("!=",F)}),v=v.length&&new RegExp(v.join("|")),s=s.length&&new RegExp(s.join("|")),t=K.test(a.compareDocumentPosition),y=t||K.test(a.contains)?function(e,t){var n=9===e.nodeType?e.documentElement:e,r=t&&t.parentNode;return e===r||!(!r||1!==r.nodeType||!(n.contains?n.contains(r):e.compareDocumentPosition&&16&e.compareDocumentPosition(r)))}:function(e,t){if(t)while(t=t.parentNode)if(t===e)return!0;return!1},j=t?function(e,t){if(e===t)return l=!0,0;var n=!e.compareDocumentPosition-!t.compareDocumentPosition;return n||(1&(n=(e.ownerDocument||e)==(t.ownerDocument||t)?e.compareDocumentPosition(t):1)||!d.sortDetached&&t.compareDocumentPosition(e)===n?e==C||e.ownerDocument==p&&y(p,e)?-1:t==C||t.ownerDocument==p&&y(p,t)?1:u?P(u,e)-P(u,t):0:4&n?-1:1)}:function(e,t){if(e===t)return l=!0,0;var n,r=0,i=e.parentNode,o=t.parentNode,a=[e],s=[t];if(!i||!o)return e==C?-1:t==C?1:i?-1:o?1:u?P(u,e)-P(u,t):0;if(i===o)return pe(e,t);n=e;while(n=n.parentNode)a.unshift(n);n=t;while(n=n.parentNode)s.unshift(n);while(a[r]===s[r])r++;return r?pe(a[r],s[r]):a[r]==p?-1:s[r]==p?1:0}),C},se.matches=function(e,t){return se(e,null,null,t)},se.matchesSelector=function(e,t){if(T(e),d.matchesSelector&&E&&!N[t+" "]&&(!s||!s.test(t))&&(!v||!v.test(t)))try{var n=c.call(e,t);if(n||d.disconnectedMatch||e.document&&11!==e.document.nodeType)return n}catch(e){N(t,!0)}return 0<se(t,C,null,[e]).length},se.contains=function(e,t){return(e.ownerDocument||e)!=C&&T(e),y(e,t)},se.attr=function(e,t){(e.ownerDocument||e)!=C&&T(e);var n=b.attrHandle[t.toLowerCase()],r=n&&D.call(b.attrHandle,t.toLowerCase())?n(e,t,!E):void 0;return void 0!==r?r:d.attributes||!E?e.getAttribute(t):(r=e.getAttributeNode(t))&&r.specified?r.value:null},se.escape=function(e){return(e+"").replace(re,ie)},se.error=function(e){throw new Error("Syntax error, unrecognized expression: "+e)},se.uniqueSort=function(e){var t,n=[],r=0,i=0;if(l=!d.detectDuplicates,u=!d.sortStable&&e.slice(0),e.sort(j),l){while(t=e[i++])t===e[i]&&(r=n.push(i));while(r--)e.splice(n[r],1)}return u=null,e},o=se.getText=function(e){var t,n="",r=0,i=e.nodeType;if(i){if(1===i||9===i||11===i){if("string"==typeof e.textContent)return e.textContent;for(e=e.firstChild;e;e=e.nextSibling)n+=o(e)}else if(3===i||4===i)return e.nodeValue}else while(t=e[r++])n+=o(t);return n},(b=se.selectors={cacheLength:50,createPseudo:le,match:G,attrHandle:{},find:{},relative:{">":{dir:"parentNode",first:!0}," ":{dir:"parentNode"},"+":{dir:"previousSibling",first:!0},"~":{dir:"previousSibling"}},preFilter:{ATTR:function(e){return e[1]=e[1].replace(te,ne),e[3]=(e[3]||e[4]||e[5]||"").replace(te,ne),"~="===e[2]&&(e[3]=" "+e[3]+" "),e.slice(0,4)},CHILD:function(e){return e[1]=e[1].toLowerCase(),"nth"===e[1].slice(0,3)?(e[3]||se.error(e[0]),e[4]=+(e[4]?e[5]+(e[6]||1):2*("even"===e[3]||"odd"===e[3])),e[5]=+(e[7]+e[8]||"odd"===e[3])):e[3]&&se.error(e[0]),e},PSEUDO:function(e){var t,n=!e[6]&&e[2];return G.CHILD.test(e[0])?null:(e[3]?e[2]=e[4]||e[5]||"":n&&X.test(n)&&(t=h(n,!0))&&(t=n.indexOf(")",n.length-t)-n.length)&&(e[0]=e[0].slice(0,t),e[2]=n.slice(0,t)),e.slice(0,3))}},filter:{TAG:function(e){var t=e.replace(te,ne).toLowerCase();return"*"===e?function(){return!0}:function(e){return e.nodeName&&e.nodeName.toLowerCase()===t}},CLASS:function(e){var t=m[e+" "];return t||(t=new RegExp("(^|"+M+")"+e+"("+M+"|$)"))&&m(e,function(e){return t.test("string"==typeof e.className&&e.className||"undefined"!=typeof e.getAttribute&&e.getAttribute("class")||"")})},ATTR:function(n,r,i){return function(e){var t=se.attr(e,n);return null==t?"!="===r:!r||(t+="","="===r?t===i:"!="===r?t!==i:"^="===r?i&&0===t.indexOf(i):"*="===r?i&&-1<t.indexOf(i):"$="===r?i&&t.slice(-i.length)===i:"~="===r?-1<(" "+t.replace(B," ")+" ").indexOf(i):"|="===r&&(t===i||t.slice(0,i.length+1)===i+"-"))}},CHILD:function(h,e,t,g,v){var y="nth"!==h.slice(0,3),m="last"!==h.slice(-4),x="of-type"===e;return 1===g&&0===v?function(e){return!!e.parentNode}:function(e,t,n){var r,i,o,a,s,u,l=y!==m?"nextSibling":"previousSibling",c=e.parentNode,f=x&&e.nodeName.toLowerCase(),p=!n&&!x,d=!1;if(c){if(y){while(l){a=e;while(a=a[l])if(x?a.nodeName.toLowerCase()===f:1===a.nodeType)return!1;u=l="only"===h&&!u&&"nextSibling"}return!0}if(u=[m?c.firstChild:c.lastChild],m&&p){d=(s=(r=(i=(o=(a=c)[S]||(a[S]={}))[a.uniqueID]||(o[a.uniqueID]={}))[h]||[])[0]===k&&r[1])&&r[2],a=s&&c.childNodes[s];while(a=++s&&a&&a[l]||(d=s=0)||u.pop())if(1===a.nodeType&&++d&&a===e){i[h]=[k,s,d];break}}else if(p&&(d=s=(r=(i=(o=(a=e)[S]||(a[S]={}))[a.uniqueID]||(o[a.uniqueID]={}))[h]||[])[0]===k&&r[1]),!1===d)while(a=++s&&a&&a[l]||(d=s=0)||u.pop())if((x?a.nodeName.toLowerCase()===f:1===a.nodeType)&&++d&&(p&&((i=(o=a[S]||(a[S]={}))[a.uniqueID]||(o[a.uniqueID]={}))[h]=[k,d]),a===e))break;return(d-=v)===g||d%g==0&&0<=d/g}}},PSEUDO:function(e,o){var t,a=b.pseudos[e]||b.setFilters[e.toLowerCase()]||se.error("unsupported pseudo: "+e);return a[S]?a(o):1<a.length?(t=[e,e,"",o],b.setFilters.hasOwnProperty(e.toLowerCase())?le(function(e,t){var n,r=a(e,o),i=r.length;while(i--)e[n=P(e,r[i])]=!(t[n]=r[i])}):function(e){return a(e,0,t)}):a}},pseudos:{not:le(function(e){var r=[],i=[],s=f(e.replace($,"$1"));return s[S]?le(function(e,t,n,r){var i,o=s(e,null,r,[]),a=e.length;while(a--)(i=o[a])&&(e[a]=!(t[a]=i))}):function(e,t,n){return r[0]=e,s(r,null,n,i),r[0]=null,!i.pop()}}),has:le(function(t){return function(e){return 0<se(t,e).length}}),contains:le(function(t){return t=t.replace(te,ne),function(e){return-1<(e.textContent||o(e)).indexOf(t)}}),lang:le(function(n){return V.test(n||"")||se.error("unsupported lang: "+n),n=n.replace(te,ne).toLowerCase(),function(e){var t;do{if(t=E?e.lang:e.getAttribute("xml:lang")||e.getAttribute("lang"))return(t=t.toLowerCase())===n||0===t.indexOf(n+"-")}while((e=e.parentNode)&&1===e.nodeType);return!1}}),target:function(e){var t=n.location&&n.location.hash;return t&&t.slice(1)===e.id},root:function(e){return e===a},focus:function(e){return e===C.activeElement&&(!C.hasFocus||C.hasFocus())&&!!(e.type||e.href||~e.tabIndex)},enabled:ge(!1),disabled:ge(!0),checked:function(e){var t=e.nodeName.toLowerCase();return"input"===t&&!!e.checked||"option"===t&&!!e.selected},selected:function(e){return e.parentNode&&e.parentNode.selectedIndex,!0===e.selected},empty:function(e){for(e=e.firstChild;e;e=e.nextSibling)if(e.nodeType<6)return!1;return!0},parent:function(e){return!b.pseudos.empty(e)},header:function(e){return J.test(e.nodeName)},input:function(e){return Q.test(e.nodeName)},button:function(e){var t=e.nodeName.toLowerCase();return"input"===t&&"button"===e.type||"button"===t},text:function(e){var t;return"input"===e.nodeName.toLowerCase()&&"text"===e.type&&(null==(t=e.getAttribute("type"))||"text"===t.toLowerCase())},first:ve(function(){return[0]}),last:ve(function(e,t){return[t-1]}),eq:ve(function(e,t,n){return[n<0?n+t:n]}),even:ve(function(e,t){for(var n=0;n<t;n+=2)e.push(n);return e}),odd:ve(function(e,t){for(var n=1;n<t;n+=2)e.push(n);return e}),lt:ve(function(e,t,n){for(var r=n<0?n+t:t<n?t:n;0<=--r;)e.push(r);return e}),gt:ve(function(e,t,n){for(var r=n<0?n+t:n;++r<t;)e.push(r);return e})}}).pseudos.nth=b.pseudos.eq,{radio:!0,checkbox:!0,file:!0,password:!0,image:!0})b.pseudos[e]=de(e);for(e in{submit:!0,reset:!0})b.pseudos[e]=he(e);function me(){}function xe(e){for(var t=0,n=e.length,r="";t<n;t++)r+=e[t].value;return r}function be(s,e,t){var u=e.dir,l=e.next,c=l||u,f=t&&"parentNode"===c,p=r++;return e.first?function(e,t,n){while(e=e[u])if(1===e.nodeType||f)return s(e,t,n);return!1}:function(e,t,n){var r,i,o,a=[k,p];if(n){while(e=e[u])if((1===e.nodeType||f)&&s(e,t,n))return!0}else while(e=e[u])if(1===e.nodeType||f)if(i=(o=e[S]||(e[S]={}))[e.uniqueID]||(o[e.uniqueID]={}),l&&l===e.nodeName.toLowerCase())e=e[u]||e;else{if((r=i[c])&&r[0]===k&&r[1]===p)return a[2]=r[2];if((i[c]=a)[2]=s(e,t,n))return!0}return!1}}function we(i){return 1<i.length?function(e,t,n){var r=i.length;while(r--)if(!i[r](e,t,n))return!1;return!0}:i[0]}function Te(e,t,n,r,i){for(var o,a=[],s=0,u=e.length,l=null!=t;s<u;s++)(o=e[s])&&(n&&!n(o,r,i)||(a.push(o),l&&t.push(s)));return a}function Ce(d,h,g,v,y,e){return v&&!v[S]&&(v=Ce(v)),y&&!y[S]&&(y=Ce(y,e)),le(function(e,t,n,r){var i,o,a,s=[],u=[],l=t.length,c=e||function(e,t,n){for(var r=0,i=t.length;r<i;r++)se(e,t[r],n);return n}(h||"*",n.nodeType?[n]:n,[]),f=!d||!e&&h?c:Te(c,s,d,n,r),p=g?y||(e?d:l||v)?[]:t:f;if(g&&g(f,p,n,r),v){i=Te(p,u),v(i,[],n,r),o=i.length;while(o--)(a=i[o])&&(p[u[o]]=!(f[u[o]]=a))}if(e){if(y||d){if(y){i=[],o=p.length;while(o--)(a=p[o])&&i.push(f[o]=a);y(null,p=[],i,r)}o=p.length;while(o--)(a=p[o])&&-1<(i=y?P(e,a):s[o])&&(e[i]=!(t[i]=a))}}else p=Te(p===t?p.splice(l,p.length):p),y?y(null,t,p,r):H.apply(t,p)})}function Ee(e){for(var i,t,n,r=e.length,o=b.relative[e[0].type],a=o||b.relative[" "],s=o?1:0,u=be(function(e){return e===i},a,!0),l=be(function(e){return-1<P(i,e)},a,!0),c=[function(e,t,n){var r=!o&&(n||t!==w)||((i=t).nodeType?u(e,t,n):l(e,t,n));return i=null,r}];s<r;s++)if(t=b.relative[e[s].type])c=[be(we(c),t)];else{if((t=b.filter[e[s].type].apply(null,e[s].matches))[S]){for(n=++s;n<r;n++)if(b.relative[e[n].type])break;return Ce(1<s&&we(c),1<s&&xe(e.slice(0,s-1).concat({value:" "===e[s-2].type?"*":""})).replace($,"$1"),t,s<n&&Ee(e.slice(s,n)),n<r&&Ee(e=e.slice(n)),n<r&&xe(e))}c.push(t)}return we(c)}return me.prototype=b.filters=b.pseudos,b.setFilters=new me,h=se.tokenize=function(e,t){var n,r,i,o,a,s,u,l=x[e+" "];if(l)return t?0:l.slice(0);a=e,s=[],u=b.preFilter;while(a){for(o in n&&!(r=_.exec(a))||(r&&(a=a.slice(r[0].length)||a),s.push(i=[])),n=!1,(r=z.exec(a))&&(n=r.shift(),i.push({value:n,type:r[0].replace($," ")}),a=a.slice(n.length)),b.filter)!(r=G[o].exec(a))||u[o]&&!(r=u[o](r))||(n=r.shift(),i.push({value:n,type:o,matches:r}),a=a.slice(n.length));if(!n)break}return t?a.length:a?se.error(e):x(e,s).slice(0)},f=se.compile=function(e,t){var n,v,y,m,x,r,i=[],o=[],a=A[e+" "];if(!a){t||(t=h(e)),n=t.length;while(n--)(a=Ee(t[n]))[S]?i.push(a):o.push(a);(a=A(e,(v=o,m=0<(y=i).length,x=0<v.length,r=function(e,t,n,r,i){var o,a,s,u=0,l="0",c=e&&[],f=[],p=w,d=e||x&&b.find.TAG("*",i),h=k+=null==p?1:Math.random()||.1,g=d.length;for(i&&(w=t==C||t||i);l!==g&&null!=(o=d[l]);l++){if(x&&o){a=0,t||o.ownerDocument==C||(T(o),n=!E);while(s=v[a++])if(s(o,t||C,n)){r.push(o);break}i&&(k=h)}m&&((o=!s&&o)&&u--,e&&c.push(o))}if(u+=l,m&&l!==u){a=0;while(s=y[a++])s(c,f,t,n);if(e){if(0<u)while(l--)c[l]||f[l]||(f[l]=q.call(r));f=Te(f)}H.apply(r,f),i&&!e&&0<f.length&&1<u+y.length&&se.uniqueSort(r)}return i&&(k=h,w=p),c},m?le(r):r))).selector=e}return a},g=se.select=function(e,t,n,r){var i,o,a,s,u,l="function"==typeof e&&e,c=!r&&h(e=l.selector||e);if(n=n||[],1===c.length){if(2<(o=c[0]=c[0].slice(0)).length&&"ID"===(a=o[0]).type&&9===t.nodeType&&E&&b.relative[o[1].type]){if(!(t=(b.find.ID(a.matches[0].replace(te,ne),t)||[])[0]))return n;l&&(t=t.parentNode),e=e.slice(o.shift().value.length)}i=G.needsContext.test(e)?0:o.length;while(i--){if(a=o[i],b.relative[s=a.type])break;if((u=b.find[s])&&(r=u(a.matches[0].replace(te,ne),ee.test(o[0].type)&&ye(t.parentNode)||t))){if(o.splice(i,1),!(e=r.length&&xe(o)))return H.apply(n,r),n;break}}}return(l||f(e,c))(r,t,!E,n,!t||ee.test(e)&&ye(t.parentNode)||t),n},d.sortStable=S.split("").sort(j).join("")===S,d.detectDuplicates=!!l,T(),d.sortDetached=ce(function(e){return 1&e.compareDocumentPosition(C.createElement("fieldset"))}),ce(function(e){return e.innerHTML="<a href='#'></a>","#"===e.firstChild.getAttribute("href")})||fe("type|href|height|width",function(e,t,n){if(!n)return e.getAttribute(t,"type"===t.toLowerCase()?1:2)}),d.attributes&&ce(function(e){return e.innerHTML="<input/>",e.firstChild.setAttribute("value",""),""===e.firstChild.getAttribute("value")})||fe("value",function(e,t,n){if(!n&&"input"===e.nodeName.toLowerCase())return e.defaultValue}),ce(function(e){return null==e.getAttribute("disabled")})||fe(R,function(e,t,n){var r;if(!n)return!0===e[t]?t.toLowerCase():(r=e.getAttributeNode(t))&&r.specified?r.value:null}),se}(C);S.find=d,S.expr=d.selectors,S.expr[":"]=S.expr.pseudos,S.uniqueSort=S.unique=d.uniqueSort,S.text=d.getText,S.isXMLDoc=d.isXML,S.contains=d.contains,S.escapeSelector=d.escape;var h=function(e,t,n){var r=[],i=void 0!==n;while((e=e[t])&&9!==e.nodeType)if(1===e.nodeType){if(i&&S(e).is(n))break;r.push(e)}return r},T=function(e,t){for(var n=[];e;e=e.nextSibling)1===e.nodeType&&e!==t&&n.push(e);return n},k=S.expr.match.needsContext;function A(e,t){return e.nodeName&&e.nodeName.toLowerCase()===t.toLowerCase()}var N=/^<([a-z][^\/\0>:\x20\t\r\n\f]*)[\x20\t\r\n\f]*\/?>(?:<\/\1>|)$/i;function j(e,n,r){return m(n)?S.grep(e,function(e,t){return!!n.call(e,t,e)!==r}):n.nodeType?S.grep(e,function(e){return e===n!==r}):"string"!=typeof n?S.grep(e,function(e){return-1<i.call(n,e)!==r}):S.filter(n,e,r)}S.filter=function(e,t,n){var r=t[0];return n&&(e=":not("+e+")"),1===t.length&&1===r.nodeType?S.find.matchesSelector(r,e)?[r]:[]:S.find.matches(e,S.grep(t,function(e){return 1===e.nodeType}))},S.fn.extend({find:function(e){var t,n,r=this.length,i=this;if("string"!=typeof e)return this.pushStack(S(e).filter(function(){for(t=0;t<r;t++)if(S.contains(i[t],this))return!0}));for(n=this.pushStack([]),t=0;t<r;t++)S.find(e,i[t],n);return 1<r?S.uniqueSort(n):n},filter:function(e){return this.pushStack(j(this,e||[],!1))},not:function(e){return this.pushStack(j(this,e||[],!0))},is:function(e){return!!j(this,"string"==typeof e&&k.test(e)?S(e):e||[],!1).length}});var D,q=/^(?:\s*(<[\w\W]+>)[^>]*|#([\w-]+))$/;(S.fn.init=function(e,t,n){var r,i;if(!e)return this;if(n=n||D,"string"==typeof e){if(!(r="<"===e[0]&&">"===e[e.length-1]&&3<=e.length?[null,e,null]:q.exec(e))||!r[1]&&t)return!t||t.jquery?(t||n).find(e):this.constructor(t).find(e);if(r[1]){if(t=t instanceof S?t[0]:t,S.merge(this,S.parseHTML(r[1],t&&t.nodeType?t.ownerDocument||t:E,!0)),N.test(r[1])&&S.isPlainObject(t))for(r in t)m(this[r])?this[r](t[r]):this.attr(r,t[r]);return this}return(i=E.getElementById(r[2]))&&(this[0]=i,this.length=1),this}return e.nodeType?(this[0]=e,this.length=1,this):m(e)?void 0!==n.ready?n.ready(e):e(S):S.makeArray(e,this)}).prototype=S.fn,D=S(E);var L=/^(?:parents|prev(?:Until|All))/,H={children:!0,contents:!0,next:!0,prev:!0};function O(e,t){while((e=e[t])&&1!==e.nodeType);return e}S.fn.extend({has:function(e){var t=S(e,this),n=t.length;return this.filter(function(){for(var e=0;e<n;e++)if(S.contains(this,t[e]))return!0})},closest:function(e,t){var n,r=0,i=this.length,o=[],a="string"!=typeof e&&S(e);if(!k.test(e))for(;r<i;r++)for(n=this[r];n&&n!==t;n=n.parentNode)if(n.nodeType<11&&(a?-1<a.index(n):1===n.nodeType&&S.find.matchesSelector(n,e))){o.push(n);break}return this.pushStack(1<o.length?S.uniqueSort(o):o)},index:function(e){return e?"string"==typeof e?i.call(S(e),this[0]):i.call(this,e.jquery?e[0]:e):this[0]&&this[0].parentNode?this.first().prevAll().length:-1},add:function(e,t){return this.pushStack(S.uniqueSort(S.merge(this.get(),S(e,t))))},addBack:function(e){return this.add(null==e?this.prevObject:this.prevObject.filter(e))}}),S.each({parent:function(e){var t=e.parentNode;return t&&11!==t.nodeType?t:null},parents:function(e){return h(e,"parentNode")},parentsUntil:function(e,t,n){return h(e,"parentNode",n)},next:function(e){return O(e,"nextSibling")},prev:function(e){return O(e,"previousSibling")},nextAll:function(e){return h(e,"nextSibling")},prevAll:function(e){return h(e,"previousSibling")},nextUntil:function(e,t,n){return h(e,"nextSibling",n)},prevUntil:function(e,t,n){return h(e,"previousSibling",n)},siblings:function(e){return T((e.parentNode||{}).firstChild,e)},children:function(e){return T(e.firstChild)},contents:function(e){return null!=e.contentDocument&&r(e.contentDocument)?e.contentDocument:(A(e,"template")&&(e=e.content||e),S.merge([],e.childNodes))}},function(r,i){S.fn[r]=function(e,t){var n=S.map(this,i,e);return"Until"!==r.slice(-5)&&(t=e),t&&"string"==typeof t&&(n=S.filter(t,n)),1<this.length&&(H[r]||S.uniqueSort(n),L.test(r)&&n.reverse()),this.pushStack(n)}});var P=/[^\x20\t\r\n\f]+/g;function R(e){return e}function M(e){throw e}function I(e,t,n,r){var i;try{e&&m(i=e.promise)?i.call(e).done(t).fail(n):e&&m(i=e.then)?i.call(e,t,n):t.apply(void 0,[e].slice(r))}catch(e){n.apply(void 0,[e])}}S.Callbacks=function(r){var e,n;r="string"==typeof r?(e=r,n={},S.each(e.match(P)||[],function(e,t){n[t]=!0}),n):S.extend({},r);var i,t,o,a,s=[],u=[],l=-1,c=function(){for(a=a||r.once,o=i=!0;u.length;l=-1){t=u.shift();while(++l<s.length)!1===s[l].apply(t[0],t[1])&&r.stopOnFalse&&(l=s.length,t=!1)}r.memory||(t=!1),i=!1,a&&(s=t?[]:"")},f={add:function(){return s&&(t&&!i&&(l=s.length-1,u.push(t)),function n(e){S.each(e,function(e,t){m(t)?r.unique&&f.has(t)||s.push(t):t&&t.length&&"string"!==w(t)&&n(t)})}(arguments),t&&!i&&c()),this},remove:function(){return S.each(arguments,function(e,t){var n;while(-1<(n=S.inArray(t,s,n)))s.splice(n,1),n<=l&&l--}),this},has:function(e){return e?-1<S.inArray(e,s):0<s.length},empty:function(){return s&&(s=[]),this},disable:function(){return a=u=[],s=t="",this},disabled:function(){return!s},lock:function(){return a=u=[],t||i||(s=t=""),this},locked:function(){return!!a},fireWith:function(e,t){return a||(t=[e,(t=t||[]).slice?t.slice():t],u.push(t),i||c()),this},fire:function(){return f.fireWith(this,arguments),this},fired:function(){return!!o}};return f},S.extend({Deferred:function(e){var o=[["notify","progress",S.Callbacks("memory"),S.Callbacks("memory"),2],["resolve","done",S.Callbacks("once memory"),S.Callbacks("once memory"),0,"resolved"],["reject","fail",S.Callbacks("once memory"),S.Callbacks("once memory"),1,"rejected"]],i="pending",a={state:function(){return i},always:function(){return s.done(arguments).fail(arguments),this},"catch":function(e){return a.then(null,e)},pipe:function(){var i=arguments;return S.Deferred(function(r){S.each(o,function(e,t){var n=m(i[t[4]])&&i[t[4]];s[t[1]](function(){var e=n&&n.apply(this,arguments);e&&m(e.promise)?e.promise().progress(r.notify).done(r.resolve).fail(r.reject):r[t[0]+"With"](this,n?[e]:arguments)})}),i=null}).promise()},then:function(t,n,r){var u=0;function l(i,o,a,s){return function(){var n=this,r=arguments,e=function(){var e,t;if(!(i<u)){if((e=a.apply(n,r))===o.promise())throw new TypeError("Thenable self-resolution");t=e&&("object"==typeof e||"function"==typeof e)&&e.then,m(t)?s?t.call(e,l(u,o,R,s),l(u,o,M,s)):(u++,t.call(e,l(u,o,R,s),l(u,o,M,s),l(u,o,R,o.notifyWith))):(a!==R&&(n=void 0,r=[e]),(s||o.resolveWith)(n,r))}},t=s?e:function(){try{e()}catch(e){S.Deferred.exceptionHook&&S.Deferred.exceptionHook(e,t.stackTrace),u<=i+1&&(a!==M&&(n=void 0,r=[e]),o.rejectWith(n,r))}};i?t():(S.Deferred.getStackHook&&(t.stackTrace=S.Deferred.getStackHook()),C.setTimeout(t))}}return S.Deferred(function(e){o[0][3].add(l(0,e,m(r)?r:R,e.notifyWith)),o[1][3].add(l(0,e,m(t)?t:R)),o[2][3].add(l(0,e,m(n)?n:M))}).promise()},promise:function(e){return null!=e?S.extend(e,a):a}},s={};return S.each(o,function(e,t){var n=t[2],r=t[5];a[t[1]]=n.add,r&&n.add(function(){i=r},o[3-e][2].disable,o[3-e][3].disable,o[0][2].lock,o[0][3].lock),n.add(t[3].fire),s[t[0]]=function(){return s[t[0]+"With"](this===s?void 0:this,arguments),this},s[t[0]+"With"]=n.fireWith}),a.promise(s),e&&e.call(s,s),s},when:function(e){var n=arguments.length,t=n,r=Array(t),i=s.call(arguments),o=S.Deferred(),a=function(t){return function(e){r[t]=this,i[t]=1<arguments.length?s.call(arguments):e,--n||o.resolveWith(r,i)}};if(n<=1&&(I(e,o.done(a(t)).resolve,o.reject,!n),"pending"===o.state()||m(i[t]&&i[t].then)))return o.then();while(t--)I(i[t],a(t),o.reject);return o.promise()}});var W=/^(Eval|Internal|Range|Reference|Syntax|Type|URI)Error$/;S.Deferred.exceptionHook=function(e,t){C.console&&C.console.warn&&e&&W.test(e.name)&&C.console.warn("jQuery.Deferred exception: "+e.message,e.stack,t)},S.readyException=function(e){C.setTimeout(function(){throw e})};var F=S.Deferred();function B(){E.removeEventListener("DOMContentLoaded",B),C.removeEventListener("load",B),S.ready()}S.fn.ready=function(e){return F.then(e)["catch"](function(e){S.readyException(e)}),this},S.extend({isReady:!1,readyWait:1,ready:function(e){(!0===e?--S.readyWait:S.isReady)||(S.isReady=!0)!==e&&0<--S.readyWait||F.resolveWith(E,[S])}}),S.ready.then=F.then,"complete"===E.readyState||"loading"!==E.readyState&&!E.documentElement.doScroll?C.setTimeout(S.ready):(E.addEventListener("DOMContentLoaded",B),C.addEventListener("load",B));var $=function(e,t,n,r,i,o,a){var s=0,u=e.length,l=null==n;if("object"===w(n))for(s in i=!0,n)$(e,t,s,n[s],!0,o,a);else if(void 0!==r&&(i=!0,m(r)||(a=!0),l&&(a?(t.call(e,r),t=null):(l=t,t=function(e,t,n){return l.call(S(e),n)})),t))for(;s<u;s++)t(e[s],n,a?r:r.call(e[s],s,t(e[s],n)));return i?e:l?t.call(e):u?t(e[0],n):o},_=/^-ms-/,z=/-([a-z])/g;function U(e,t){return t.toUpperCase()}function X(e){return e.replace(_,"ms-").replace(z,U)}var V=function(e){return 1===e.nodeType||9===e.nodeType||!+e.nodeType};function G(){this.expando=S.expando+G.uid++}G.uid=1,G.prototype={cache:function(e){var t=e[this.expando];return t||(t={},V(e)&&(e.nodeType?e[this.expando]=t:Object.defineProperty(e,this.expando,{value:t,configurable:!0}))),t},set:function(e,t,n){var r,i=this.cache(e);if("string"==typeof t)i[X(t)]=n;else for(r in t)i[X(r)]=t[r];return i},get:function(e,t){return void 0===t?this.cache(e):e[this.expando]&&e[this.expando][X(t)]},access:function(e,t,n){return void 0===t||t&&"string"==typeof t&&void 0===n?this.get(e,t):(this.set(e,t,n),void 0!==n?n:t)},remove:function(e,t){var n,r=e[this.expando];if(void 0!==r){if(void 0!==t){n=(t=Array.isArray(t)?t.map(X):(t=X(t))in r?[t]:t.match(P)||[]).length;while(n--)delete r[t[n]]}(void 0===t||S.isEmptyObject(r))&&(e.nodeType?e[this.expando]=void 0:delete e[this.expando])}},hasData:function(e){var t=e[this.expando];return void 0!==t&&!S.isEmptyObject(t)}};var Y=new G,Q=new G,J=/^(?:\{[\w\W]*\}|\[[\w\W]*\])$/,K=/[A-Z]/g;function Z(e,t,n){var r,i;if(void 0===n&&1===e.nodeType)if(r="data-"+t.replace(K,"-$&").toLowerCase(),"string"==typeof(n=e.getAttribute(r))){try{n="true"===(i=n)||"false"!==i&&("null"===i?null:i===+i+""?+i:J.test(i)?JSON.parse(i):i)}catch(e){}Q.set(e,t,n)}else n=void 0;return n}S.extend({hasData:function(e){return Q.hasData(e)||Y.hasData(e)},data:function(e,t,n){return Q.access(e,t,n)},removeData:function(e,t){Q.remove(e,t)},_data:function(e,t,n){return Y.access(e,t,n)},_removeData:function(e,t){Y.remove(e,t)}}),S.fn.extend({data:function(n,e){var t,r,i,o=this[0],a=o&&o.attributes;if(void 0===n){if(this.length&&(i=Q.get(o),1===o.nodeType&&!Y.get(o,"hasDataAttrs"))){t=a.length;while(t--)a[t]&&0===(r=a[t].name).indexOf("data-")&&(r=X(r.slice(5)),Z(o,r,i[r]));Y.set(o,"hasDataAttrs",!0)}return i}return"object"==typeof n?this.each(function(){Q.set(this,n)}):$(this,function(e){var t;if(o&&void 0===e)return void 0!==(t=Q.get(o,n))?t:void 0!==(t=Z(o,n))?t:void 0;this.each(function(){Q.set(this,n,e)})},null,e,1<arguments.length,null,!0)},removeData:function(e){return this.each(function(){Q.remove(this,e)})}}),S.extend({queue:function(e,t,n){var r;if(e)return t=(t||"fx")+"queue",r=Y.get(e,t),n&&(!r||Array.isArray(n)?r=Y.access(e,t,S.makeArray(n)):r.push(n)),r||[]},dequeue:function(e,t){t=t||"fx";var n=S.queue(e,t),r=n.length,i=n.shift(),o=S._queueHooks(e,t);"inprogress"===i&&(i=n.shift(),r--),i&&("fx"===t&&n.unshift("inprogress"),delete o.stop,i.call(e,function(){S.dequeue(e,t)},o)),!r&&o&&o.empty.fire()},_queueHooks:function(e,t){var n=t+"queueHooks";return Y.get(e,n)||Y.access(e,n,{empty:S.Callbacks("once memory").add(function(){Y.remove(e,[t+"queue",n])})})}}),S.fn.extend({queue:function(t,n){var e=2;return"string"!=typeof t&&(n=t,t="fx",e--),arguments.length<e?S.queue(this[0],t):void 0===n?this:this.each(function(){var e=S.queue(this,t,n);S._queueHooks(this,t),"fx"===t&&"inprogress"!==e[0]&&S.dequeue(this,t)})},dequeue:function(e){return this.each(function(){S.dequeue(this,e)})},clearQueue:function(e){return this.queue(e||"fx",[])},promise:function(e,t){var n,r=1,i=S.Deferred(),o=this,a=this.length,s=function(){--r||i.resolveWith(o,[o])};"string"!=typeof e&&(t=e,e=void 0),e=e||"fx";while(a--)(n=Y.get(o[a],e+"queueHooks"))&&n.empty&&(r++,n.empty.add(s));return s(),i.promise(t)}});var ee=/[+-]?(?:\d*\.|)\d+(?:[eE][+-]?\d+|)/.source,te=new RegExp("^(?:([+-])=|)("+ee+")([a-z%]*)$","i"),ne=["Top","Right","Bottom","Left"],re=E.documentElement,ie=function(e){return S.contains(e.ownerDocument,e)},oe={composed:!0};re.getRootNode&&(ie=function(e){return S.contains(e.ownerDocument,e)||e.getRootNode(oe)===e.ownerDocument});var ae=function(e,t){return"none"===(e=t||e).style.display||""===e.style.display&&ie(e)&&"none"===S.css(e,"display")};function se(e,t,n,r){var i,o,a=20,s=r?function(){return r.cur()}:function(){return S.css(e,t,"")},u=s(),l=n&&n[3]||(S.cssNumber[t]?"":"px"),c=e.nodeType&&(S.cssNumber[t]||"px"!==l&&+u)&&te.exec(S.css(e,t));if(c&&c[3]!==l){u/=2,l=l||c[3],c=+u||1;while(a--)S.style(e,t,c+l),(1-o)*(1-(o=s()/u||.5))<=0&&(a=0),c/=o;c*=2,S.style(e,t,c+l),n=n||[]}return n&&(c=+c||+u||0,i=n[1]?c+(n[1]+1)*n[2]:+n[2],r&&(r.unit=l,r.start=c,r.end=i)),i}var ue={};function le(e,t){for(var n,r,i,o,a,s,u,l=[],c=0,f=e.length;c<f;c++)(r=e[c]).style&&(n=r.style.display,t?("none"===n&&(l[c]=Y.get(r,"display")||null,l[c]||(r.style.display="")),""===r.style.display&&ae(r)&&(l[c]=(u=a=o=void 0,a=(i=r).ownerDocument,s=i.nodeName,(u=ue[s])||(o=a.body.appendChild(a.createElement(s)),u=S.css(o,"display"),o.parentNode.removeChild(o),"none"===u&&(u="block"),ue[s]=u)))):"none"!==n&&(l[c]="none",Y.set(r,"display",n)));for(c=0;c<f;c++)null!=l[c]&&(e[c].style.display=l[c]);return e}S.fn.extend({show:function(){return le(this,!0)},hide:function(){return le(this)},toggle:function(e){return"boolean"==typeof e?e?this.show():this.hide():this.each(function(){ae(this)?S(this).show():S(this).hide()})}});var ce,fe,pe=/^(?:checkbox|radio)$/i,de=/<([a-z][^\/\0>\x20\t\r\n\f]*)/i,he=/^$|^module$|\/(?:java|ecma)script/i;ce=E.createDocumentFragment().appendChild(E.createElement("div")),(fe=E.createElement("input")).setAttribute("type","radio"),fe.setAttribute("checked","checked"),fe.setAttribute("name","t"),ce.appendChild(fe),y.checkClone=ce.cloneNode(!0).cloneNode(!0).lastChild.checked,ce.innerHTML="<textarea>x</textarea>",y.noCloneChecked=!!ce.cloneNode(!0).lastChild.defaultValue,ce.innerHTML="<option></option>",y.option=!!ce.lastChild;var ge={thead:[1,"<table>","</table>"],col:[2,"<table><colgroup>","</colgroup></table>"],tr:[2,"<table><tbody>","</tbody></table>"],td:[3,"<table><tbody><tr>","</tr></tbody></table>"],_default:[0,"",""]};function ve(e,t){var n;return n="undefined"!=typeof e.getElementsByTagName?e.getElementsByTagName(t||"*"):"undefined"!=typeof e.querySelectorAll?e.querySelectorAll(t||"*"):[],void 0===t||t&&A(e,t)?S.merge([e],n):n}function ye(e,t){for(var n=0,r=e.length;n<r;n++)Y.set(e[n],"globalEval",!t||Y.get(t[n],"globalEval"))}ge.tbody=ge.tfoot=ge.colgroup=ge.caption=ge.thead,ge.th=ge.td,y.option||(ge.optgroup=ge.option=[1,"<select multiple='multiple'>","</select>"]);var me=/<|&#?\w+;/;function xe(e,t,n,r,i){for(var o,a,s,u,l,c,f=t.createDocumentFragment(),p=[],d=0,h=e.length;d<h;d++)if((o=e[d])||0===o)if("object"===w(o))S.merge(p,o.nodeType?[o]:o);else if(me.test(o)){a=a||f.appendChild(t.createElement("div")),s=(de.exec(o)||["",""])[1].toLowerCase(),u=ge[s]||ge._default,a.innerHTML=u[1]+S.htmlPrefilter(o)+u[2],c=u[0];while(c--)a=a.lastChild;S.merge(p,a.childNodes),(a=f.firstChild).textContent=""}else p.push(t.createTextNode(o));f.textContent="",d=0;while(o=p[d++])if(r&&-1<S.inArray(o,r))i&&i.push(o);else if(l=ie(o),a=ve(f.appendChild(o),"script"),l&&ye(a),n){c=0;while(o=a[c++])he.test(o.type||"")&&n.push(o)}return f}var be=/^([^.]*)(?:\.(.+)|)/;function we(){return!0}function Te(){return!1}function Ce(e,t){return e===function(){try{return E.activeElement}catch(e){}}()==("focus"===t)}function Ee(e,t,n,r,i,o){var a,s;if("object"==typeof t){for(s in"string"!=typeof n&&(r=r||n,n=void 0),t)Ee(e,s,n,r,t[s],o);return e}if(null==r&&null==i?(i=n,r=n=void 0):null==i&&("string"==typeof n?(i=r,r=void 0):(i=r,r=n,n=void 0)),!1===i)i=Te;else if(!i)return e;return 1===o&&(a=i,(i=function(e){return S().off(e),a.apply(this,arguments)}).guid=a.guid||(a.guid=S.guid++)),e.each(function(){S.event.add(this,t,i,r,n)})}function Se(e,i,o){o?(Y.set(e,i,!1),S.event.add(e,i,{namespace:!1,handler:function(e){var t,n,r=Y.get(this,i);if(1&e.isTrigger&&this[i]){if(r.length)(S.event.special[i]||{}).delegateType&&e.stopPropagation();else if(r=s.call(arguments),Y.set(this,i,r),t=o(this,i),this[i](),r!==(n=Y.get(this,i))||t?Y.set(this,i,!1):n={},r!==n)return e.stopImmediatePropagation(),e.preventDefault(),n&&n.value}else r.length&&(Y.set(this,i,{value:S.event.trigger(S.extend(r[0],S.Event.prototype),r.slice(1),this)}),e.stopImmediatePropagation())}})):void 0===Y.get(e,i)&&S.event.add(e,i,we)}S.event={global:{},add:function(t,e,n,r,i){var o,a,s,u,l,c,f,p,d,h,g,v=Y.get(t);if(V(t)){n.handler&&(n=(o=n).handler,i=o.selector),i&&S.find.matchesSelector(re,i),n.guid||(n.guid=S.guid++),(u=v.events)||(u=v.events=Object.create(null)),(a=v.handle)||(a=v.handle=function(e){return"undefined"!=typeof S&&S.event.triggered!==e.type?S.event.dispatch.apply(t,arguments):void 0}),l=(e=(e||"").match(P)||[""]).length;while(l--)d=g=(s=be.exec(e[l])||[])[1],h=(s[2]||"").split(".").sort(),d&&(f=S.event.special[d]||{},d=(i?f.delegateType:f.bindType)||d,f=S.event.special[d]||{},c=S.extend({type:d,origType:g,data:r,handler:n,guid:n.guid,selector:i,needsContext:i&&S.expr.match.needsContext.test(i),namespace:h.join(".")},o),(p=u[d])||((p=u[d]=[]).delegateCount=0,f.setup&&!1!==f.setup.call(t,r,h,a)||t.addEventListener&&t.addEventListener(d,a)),f.add&&(f.add.call(t,c),c.handler.guid||(c.handler.guid=n.guid)),i?p.splice(p.delegateCount++,0,c):p.push(c),S.event.global[d]=!0)}},remove:function(e,t,n,r,i){var o,a,s,u,l,c,f,p,d,h,g,v=Y.hasData(e)&&Y.get(e);if(v&&(u=v.events)){l=(t=(t||"").match(P)||[""]).length;while(l--)if(d=g=(s=be.exec(t[l])||[])[1],h=(s[2]||"").split(".").sort(),d){f=S.event.special[d]||{},p=u[d=(r?f.delegateType:f.bindType)||d]||[],s=s[2]&&new RegExp("(^|\\.)"+h.join("\\.(?:.*\\.|)")+"(\\.|$)"),a=o=p.length;while(o--)c=p[o],!i&&g!==c.origType||n&&n.guid!==c.guid||s&&!s.test(c.namespace)||r&&r!==c.selector&&("**"!==r||!c.selector)||(p.splice(o,1),c.selector&&p.delegateCount--,f.remove&&f.remove.call(e,c));a&&!p.length&&(f.teardown&&!1!==f.teardown.call(e,h,v.handle)||S.removeEvent(e,d,v.handle),delete u[d])}else for(d in u)S.event.remove(e,d+t[l],n,r,!0);S.isEmptyObject(u)&&Y.remove(e,"handle events")}},dispatch:function(e){var t,n,r,i,o,a,s=new Array(arguments.length),u=S.event.fix(e),l=(Y.get(this,"events")||Object.create(null))[u.type]||[],c=S.event.special[u.type]||{};for(s[0]=u,t=1;t<arguments.length;t++)s[t]=arguments[t];if(u.delegateTarget=this,!c.preDispatch||!1!==c.preDispatch.call(this,u)){a=S.event.handlers.call(this,u,l),t=0;while((i=a[t++])&&!u.isPropagationStopped()){u.currentTarget=i.elem,n=0;while((o=i.handlers[n++])&&!u.isImmediatePropagationStopped())u.rnamespace&&!1!==o.namespace&&!u.rnamespace.test(o.namespace)||(u.handleObj=o,u.data=o.data,void 0!==(r=((S.event.special[o.origType]||{}).handle||o.handler).apply(i.elem,s))&&!1===(u.result=r)&&(u.preventDefault(),u.stopPropagation()))}return c.postDispatch&&c.postDispatch.call(this,u),u.result}},handlers:function(e,t){var n,r,i,o,a,s=[],u=t.delegateCount,l=e.target;if(u&&l.nodeType&&!("click"===e.type&&1<=e.button))for(;l!==this;l=l.parentNode||this)if(1===l.nodeType&&("click"!==e.type||!0!==l.disabled)){for(o=[],a={},n=0;n<u;n++)void 0===a[i=(r=t[n]).selector+" "]&&(a[i]=r.needsContext?-1<S(i,this).index(l):S.find(i,this,null,[l]).length),a[i]&&o.push(r);o.length&&s.push({elem:l,handlers:o})}return l=this,u<t.length&&s.push({elem:l,handlers:t.slice(u)}),s},addProp:function(t,e){Object.defineProperty(S.Event.prototype,t,{enumerable:!0,configurable:!0,get:m(e)?function(){if(this.originalEvent)return e(this.originalEvent)}:function(){if(this.originalEvent)return this.originalEvent[t]},set:function(e){Object.defineProperty(this,t,{enumerable:!0,configurable:!0,writable:!0,value:e})}})},fix:function(e){return e[S.expando]?e:new S.Event(e)},special:{load:{noBubble:!0},click:{setup:function(e){var t=this||e;return pe.test(t.type)&&t.click&&A(t,"input")&&Se(t,"click",we),!1},trigger:function(e){var t=this||e;return pe.test(t.type)&&t.click&&A(t,"input")&&Se(t,"click"),!0},_default:function(e){var t=e.target;return pe.test(t.type)&&t.click&&A(t,"input")&&Y.get(t,"click")||A(t,"a")}},beforeunload:{postDispatch:function(e){void 0!==e.result&&e.originalEvent&&(e.originalEvent.returnValue=e.result)}}}},S.removeEvent=function(e,t,n){e.removeEventListener&&e.removeEventListener(t,n)},S.Event=function(e,t){if(!(this instanceof S.Event))return new S.Event(e,t);e&&e.type?(this.originalEvent=e,this.type=e.type,this.isDefaultPrevented=e.defaultPrevented||void 0===e.defaultPrevented&&!1===e.returnValue?we:Te,this.target=e.target&&3===e.target.nodeType?e.target.parentNode:e.target,this.currentTarget=e.currentTarget,this.relatedTarget=e.relatedTarget):this.type=e,t&&S.extend(this,t),this.timeStamp=e&&e.timeStamp||Date.now(),this[S.expando]=!0},S.Event.prototype={constructor:S.Event,isDefaultPrevented:Te,isPropagationStopped:Te,isImmediatePropagationStopped:Te,isSimulated:!1,preventDefault:function(){var e=this.originalEvent;this.isDefaultPrevented=we,e&&!this.isSimulated&&e.preventDefault()},stopPropagation:function(){var e=this.originalEvent;this.isPropagationStopped=we,e&&!this.isSimulated&&e.stopPropagation()},stopImmediatePropagation:function(){var e=this.originalEvent;this.isImmediatePropagationStopped=we,e&&!this.isSimulated&&e.stopImmediatePropagation(),this.stopPropagation()}},S.each({altKey:!0,bubbles:!0,cancelable:!0,changedTouches:!0,ctrlKey:!0,detail:!0,eventPhase:!0,metaKey:!0,pageX:!0,pageY:!0,shiftKey:!0,view:!0,"char":!0,code:!0,charCode:!0,key:!0,keyCode:!0,button:!0,buttons:!0,clientX:!0,clientY:!0,offsetX:!0,offsetY:!0,pointerId:!0,pointerType:!0,screenX:!0,screenY:!0,targetTouches:!0,toElement:!0,touches:!0,which:!0},S.event.addProp),S.each({focus:"focusin",blur:"focusout"},function(e,t){S.event.special[e]={setup:function(){return Se(this,e,Ce),!1},trigger:function(){return Se(this,e),!0},_default:function(){return!0},delegateType:t}}),S.each({mouseenter:"mouseover",mouseleave:"mouseout",pointerenter:"pointerover",pointerleave:"pointerout"},function(e,i){S.event.special[e]={delegateType:i,bindType:i,handle:function(e){var t,n=e.relatedTarget,r=e.handleObj;return n&&(n===this||S.contains(this,n))||(e.type=r.origType,t=r.handler.apply(this,arguments),e.type=i),t}}}),S.fn.extend({on:function(e,t,n,r){return Ee(this,e,t,n,r)},one:function(e,t,n,r){return Ee(this,e,t,n,r,1)},off:function(e,t,n){var r,i;if(e&&e.preventDefault&&e.handleObj)return r=e.handleObj,S(e.delegateTarget).off(r.namespace?r.origType+"."+r.namespace:r.origType,r.selector,r.handler),this;if("object"==typeof e){for(i in e)this.off(i,t,e[i]);return this}return!1!==t&&"function"!=typeof t||(n=t,t=void 0),!1===n&&(n=Te),this.each(function(){S.event.remove(this,e,n,t)})}});var ke=/<script|<style|<link/i,Ae=/checked\s*(?:[^=]|=\s*.checked.)/i,Ne=/^\s*<!(?:\[CDATA\[|--)|(?:\]\]|--)>\s*$/g;function je(e,t){return A(e,"table")&&A(11!==t.nodeType?t:t.firstChild,"tr")&&S(e).children("tbody")[0]||e}function De(e){return e.type=(null!==e.getAttribute("type"))+"/"+e.type,e}function qe(e){return"true/"===(e.type||"").slice(0,5)?e.type=e.type.slice(5):e.removeAttribute("type"),e}function Le(e,t){var n,r,i,o,a,s;if(1===t.nodeType){if(Y.hasData(e)&&(s=Y.get(e).events))for(i in Y.remove(t,"handle events"),s)for(n=0,r=s[i].length;n<r;n++)S.event.add(t,i,s[i][n]);Q.hasData(e)&&(o=Q.access(e),a=S.extend({},o),Q.set(t,a))}}function He(n,r,i,o){r=g(r);var e,t,a,s,u,l,c=0,f=n.length,p=f-1,d=r[0],h=m(d);if(h||1<f&&"string"==typeof d&&!y.checkClone&&Ae.test(d))return n.each(function(e){var t=n.eq(e);h&&(r[0]=d.call(this,e,t.html())),He(t,r,i,o)});if(f&&(t=(e=xe(r,n[0].ownerDocument,!1,n,o)).firstChild,1===e.childNodes.length&&(e=t),t||o)){for(s=(a=S.map(ve(e,"script"),De)).length;c<f;c++)u=e,c!==p&&(u=S.clone(u,!0,!0),s&&S.merge(a,ve(u,"script"))),i.call(n[c],u,c);if(s)for(l=a[a.length-1].ownerDocument,S.map(a,qe),c=0;c<s;c++)u=a[c],he.test(u.type||"")&&!Y.access(u,"globalEval")&&S.contains(l,u)&&(u.src&&"module"!==(u.type||"").toLowerCase()?S._evalUrl&&!u.noModule&&S._evalUrl(u.src,{nonce:u.nonce||u.getAttribute("nonce")},l):b(u.textContent.replace(Ne,""),u,l))}return n}function Oe(e,t,n){for(var r,i=t?S.filter(t,e):e,o=0;null!=(r=i[o]);o++)n||1!==r.nodeType||S.cleanData(ve(r)),r.parentNode&&(n&&ie(r)&&ye(ve(r,"script")),r.parentNode.removeChild(r));return e}S.extend({htmlPrefilter:function(e){return e},clone:function(e,t,n){var r,i,o,a,s,u,l,c=e.cloneNode(!0),f=ie(e);if(!(y.noCloneChecked||1!==e.nodeType&&11!==e.nodeType||S.isXMLDoc(e)))for(a=ve(c),r=0,i=(o=ve(e)).length;r<i;r++)s=o[r],u=a[r],void 0,"input"===(l=u.nodeName.toLowerCase())&&pe.test(s.type)?u.checked=s.checked:"input"!==l&&"textarea"!==l||(u.defaultValue=s.defaultValue);if(t)if(n)for(o=o||ve(e),a=a||ve(c),r=0,i=o.length;r<i;r++)Le(o[r],a[r]);else Le(e,c);return 0<(a=ve(c,"script")).length&&ye(a,!f&&ve(e,"script")),c},cleanData:function(e){for(var t,n,r,i=S.event.special,o=0;void 0!==(n=e[o]);o++)if(V(n)){if(t=n[Y.expando]){if(t.events)for(r in t.events)i[r]?S.event.remove(n,r):S.removeEvent(n,r,t.handle);n[Y.expando]=void 0}n[Q.expando]&&(n[Q.expando]=void 0)}}}),S.fn.extend({detach:function(e){return Oe(this,e,!0)},remove:function(e){return Oe(this,e)},text:function(e){return $(this,function(e){return void 0===e?S.text(this):this.empty().each(function(){1!==this.nodeType&&11!==this.nodeType&&9!==this.nodeType||(this.textContent=e)})},null,e,arguments.length)},append:function(){return He(this,arguments,function(e){1!==this.nodeType&&11!==this.nodeType&&9!==this.nodeType||je(this,e).appendChild(e)})},prepend:function(){return He(this,arguments,function(e){if(1===this.nodeType||11===this.nodeType||9===this.nodeType){var t=je(this,e);t.insertBefore(e,t.firstChild)}})},before:function(){return He(this,arguments,function(e){this.parentNode&&this.parentNode.insertBefore(e,this)})},after:function(){return He(this,arguments,function(e){this.parentNode&&this.parentNode.insertBefore(e,this.nextSibling)})},empty:function(){for(var e,t=0;null!=(e=this[t]);t++)1===e.nodeType&&(S.cleanData(ve(e,!1)),e.textContent="");return this},clone:function(e,t){return e=null!=e&&e,t=null==t?e:t,this.map(function(){return S.clone(this,e,t)})},html:function(e){return $(this,function(e){var t=this[0]||{},n=0,r=this.length;if(void 0===e&&1===t.nodeType)return t.innerHTML;if("string"==typeof e&&!ke.test(e)&&!ge[(de.exec(e)||["",""])[1].toLowerCase()]){e=S.htmlPrefilter(e);try{for(;n<r;n++)1===(t=this[n]||{}).nodeType&&(S.cleanData(ve(t,!1)),t.innerHTML=e);t=0}catch(e){}}t&&this.empty().append(e)},null,e,arguments.length)},replaceWith:function(){var n=[];return He(this,arguments,function(e){var t=this.parentNode;S.inArray(this,n)<0&&(S.cleanData(ve(this)),t&&t.replaceChild(e,this))},n)}}),S.each({appendTo:"append",prependTo:"prepend",insertBefore:"before",insertAfter:"after",replaceAll:"replaceWith"},function(e,a){S.fn[e]=function(e){for(var t,n=[],r=S(e),i=r.length-1,o=0;o<=i;o++)t=o===i?this:this.clone(!0),S(r[o])[a](t),u.apply(n,t.get());return this.pushStack(n)}});var Pe=new RegExp("^("+ee+")(?!px)[a-z%]+$","i"),Re=function(e){var t=e.ownerDocument.defaultView;return t&&t.opener||(t=C),t.getComputedStyle(e)},Me=function(e,t,n){var r,i,o={};for(i in t)o[i]=e.style[i],e.style[i]=t[i];for(i in r=n.call(e),t)e.style[i]=o[i];return r},Ie=new RegExp(ne.join("|"),"i");function We(e,t,n){var r,i,o,a,s=e.style;return(n=n||Re(e))&&(""!==(a=n.getPropertyValue(t)||n[t])||ie(e)||(a=S.style(e,t)),!y.pixelBoxStyles()&&Pe.test(a)&&Ie.test(t)&&(r=s.width,i=s.minWidth,o=s.maxWidth,s.minWidth=s.maxWidth=s.width=a,a=n.width,s.width=r,s.minWidth=i,s.maxWidth=o)),void 0!==a?a+"":a}function Fe(e,t){return{get:function(){if(!e())return(this.get=t).apply(this,arguments);delete this.get}}}!function(){function e(){if(l){u.style.cssText="position:absolute;left:-11111px;width:60px;margin-top:1px;padding:0;border:0",l.style.cssText="position:relative;display:block;box-sizing:border-box;overflow:scroll;margin:auto;border:1px;padding:1px;width:60%;top:1%",re.appendChild(u).appendChild(l);var e=C.getComputedStyle(l);n="1%"!==e.top,s=12===t(e.marginLeft),l.style.right="60%",o=36===t(e.right),r=36===t(e.width),l.style.position="absolute",i=12===t(l.offsetWidth/3),re.removeChild(u),l=null}}function t(e){return Math.round(parseFloat(e))}var n,r,i,o,a,s,u=E.createElement("div"),l=E.createElement("div");l.style&&(l.style.backgroundClip="content-box",l.cloneNode(!0).style.backgroundClip="",y.clearCloneStyle="content-box"===l.style.backgroundClip,S.extend(y,{boxSizingReliable:function(){return e(),r},pixelBoxStyles:function(){return e(),o},pixelPosition:function(){return e(),n},reliableMarginLeft:function(){return e(),s},scrollboxSize:function(){return e(),i},reliableTrDimensions:function(){var e,t,n,r;return null==a&&(e=E.createElement("table"),t=E.createElement("tr"),n=E.createElement("div"),e.style.cssText="position:absolute;left:-11111px;border-collapse:separate",t.style.cssText="border:1px solid",t.style.height="1px",n.style.height="9px",n.style.display="block",re.appendChild(e).appendChild(t).appendChild(n),r=C.getComputedStyle(t),a=parseInt(r.height,10)+parseInt(r.borderTopWidth,10)+parseInt(r.borderBottomWidth,10)===t.offsetHeight,re.removeChild(e)),a}}))}();var Be=["Webkit","Moz","ms"],$e=E.createElement("div").style,_e={};function ze(e){var t=S.cssProps[e]||_e[e];return t||(e in $e?e:_e[e]=function(e){var t=e[0].toUpperCase()+e.slice(1),n=Be.length;while(n--)if((e=Be[n]+t)in $e)return e}(e)||e)}var Ue=/^(none|table(?!-c[ea]).+)/,Xe=/^--/,Ve={position:"absolute",visibility:"hidden",display:"block"},Ge={letterSpacing:"0",fontWeight:"400"};function Ye(e,t,n){var r=te.exec(t);return r?Math.max(0,r[2]-(n||0))+(r[3]||"px"):t}function Qe(e,t,n,r,i,o){var a="width"===t?1:0,s=0,u=0;if(n===(r?"border":"content"))return 0;for(;a<4;a+=2)"margin"===n&&(u+=S.css(e,n+ne[a],!0,i)),r?("content"===n&&(u-=S.css(e,"padding"+ne[a],!0,i)),"margin"!==n&&(u-=S.css(e,"border"+ne[a]+"Width",!0,i))):(u+=S.css(e,"padding"+ne[a],!0,i),"padding"!==n?u+=S.css(e,"border"+ne[a]+"Width",!0,i):s+=S.css(e,"border"+ne[a]+"Width",!0,i));return!r&&0<=o&&(u+=Math.max(0,Math.ceil(e["offset"+t[0].toUpperCase()+t.slice(1)]-o-u-s-.5))||0),u}function Je(e,t,n){var r=Re(e),i=(!y.boxSizingReliable()||n)&&"border-box"===S.css(e,"boxSizing",!1,r),o=i,a=We(e,t,r),s="offset"+t[0].toUpperCase()+t.slice(1);if(Pe.test(a)){if(!n)return a;a="auto"}return(!y.boxSizingReliable()&&i||!y.reliableTrDimensions()&&A(e,"tr")||"auto"===a||!parseFloat(a)&&"inline"===S.css(e,"display",!1,r))&&e.getClientRects().length&&(i="border-box"===S.css(e,"boxSizing",!1,r),(o=s in e)&&(a=e[s])),(a=parseFloat(a)||0)+Qe(e,t,n||(i?"border":"content"),o,r,a)+"px"}function Ke(e,t,n,r,i){return new Ke.prototype.init(e,t,n,r,i)}S.extend({cssHooks:{opacity:{get:function(e,t){if(t){var n=We(e,"opacity");return""===n?"1":n}}}},cssNumber:{animationIterationCount:!0,columnCount:!0,fillOpacity:!0,flexGrow:!0,flexShrink:!0,fontWeight:!0,gridArea:!0,gridColumn:!0,gridColumnEnd:!0,gridColumnStart:!0,gridRow:!0,gridRowEnd:!0,gridRowStart:!0,lineHeight:!0,opacity:!0,order:!0,orphans:!0,widows:!0,zIndex:!0,zoom:!0},cssProps:{},style:function(e,t,n,r){if(e&&3!==e.nodeType&&8!==e.nodeType&&e.style){var i,o,a,s=X(t),u=Xe.test(t),l=e.style;if(u||(t=ze(s)),a=S.cssHooks[t]||S.cssHooks[s],void 0===n)return a&&"get"in a&&void 0!==(i=a.get(e,!1,r))?i:l[t];"string"===(o=typeof n)&&(i=te.exec(n))&&i[1]&&(n=se(e,t,i),o="number"),null!=n&&n==n&&("number"!==o||u||(n+=i&&i[3]||(S.cssNumber[s]?"":"px")),y.clearCloneStyle||""!==n||0!==t.indexOf("background")||(l[t]="inherit"),a&&"set"in a&&void 0===(n=a.set(e,n,r))||(u?l.setProperty(t,n):l[t]=n))}},css:function(e,t,n,r){var i,o,a,s=X(t);return Xe.test(t)||(t=ze(s)),(a=S.cssHooks[t]||S.cssHooks[s])&&"get"in a&&(i=a.get(e,!0,n)),void 0===i&&(i=We(e,t,r)),"normal"===i&&t in Ge&&(i=Ge[t]),""===n||n?(o=parseFloat(i),!0===n||isFinite(o)?o||0:i):i}}),S.each(["height","width"],function(e,u){S.cssHooks[u]={get:function(e,t,n){if(t)return!Ue.test(S.css(e,"display"))||e.getClientRects().length&&e.getBoundingClientRect().width?Je(e,u,n):Me(e,Ve,function(){return Je(e,u,n)})},set:function(e,t,n){var r,i=Re(e),o=!y.scrollboxSize()&&"absolute"===i.position,a=(o||n)&&"border-box"===S.css(e,"boxSizing",!1,i),s=n?Qe(e,u,n,a,i):0;return a&&o&&(s-=Math.ceil(e["offset"+u[0].toUpperCase()+u.slice(1)]-parseFloat(i[u])-Qe(e,u,"border",!1,i)-.5)),s&&(r=te.exec(t))&&"px"!==(r[3]||"px")&&(e.style[u]=t,t=S.css(e,u)),Ye(0,t,s)}}}),S.cssHooks.marginLeft=Fe(y.reliableMarginLeft,function(e,t){if(t)return(parseFloat(We(e,"marginLeft"))||e.getBoundingClientRect().left-Me(e,{marginLeft:0},function(){return e.getBoundingClientRect().left}))+"px"}),S.each({margin:"",padding:"",border:"Width"},function(i,o){S.cssHooks[i+o]={expand:function(e){for(var t=0,n={},r="string"==typeof e?e.split(" "):[e];t<4;t++)n[i+ne[t]+o]=r[t]||r[t-2]||r[0];return n}},"margin"!==i&&(S.cssHooks[i+o].set=Ye)}),S.fn.extend({css:function(e,t){return $(this,function(e,t,n){var r,i,o={},a=0;if(Array.isArray(t)){for(r=Re(e),i=t.length;a<i;a++)o[t[a]]=S.css(e,t[a],!1,r);return o}return void 0!==n?S.style(e,t,n):S.css(e,t)},e,t,1<arguments.length)}}),((S.Tween=Ke).prototype={constructor:Ke,init:function(e,t,n,r,i,o){this.elem=e,this.prop=n,this.easing=i||S.easing._default,this.options=t,this.start=this.now=this.cur(),this.end=r,this.unit=o||(S.cssNumber[n]?"":"px")},cur:function(){var e=Ke.propHooks[this.prop];return e&&e.get?e.get(this):Ke.propHooks._default.get(this)},run:function(e){var t,n=Ke.propHooks[this.prop];return this.options.duration?this.pos=t=S.easing[this.easing](e,this.options.duration*e,0,1,this.options.duration):this.pos=t=e,this.now=(this.end-this.start)*t+this.start,this.options.step&&this.options.step.call(this.elem,this.now,this),n&&n.set?n.set(this):Ke.propHooks._default.set(this),this}}).init.prototype=Ke.prototype,(Ke.propHooks={_default:{get:function(e){var t;return 1!==e.elem.nodeType||null!=e.elem[e.prop]&&null==e.elem.style[e.prop]?e.elem[e.prop]:(t=S.css(e.elem,e.prop,""))&&"auto"!==t?t:0},set:function(e){S.fx.step[e.prop]?S.fx.step[e.prop](e):1!==e.elem.nodeType||!S.cssHooks[e.prop]&&null==e.elem.style[ze(e.prop)]?e.elem[e.prop]=e.now:S.style(e.elem,e.prop,e.now+e.unit)}}}).scrollTop=Ke.propHooks.scrollLeft={set:function(e){e.elem.nodeType&&e.elem.parentNode&&(e.elem[e.prop]=e.now)}},S.easing={linear:function(e){return e},swing:function(e){return.5-Math.cos(e*Math.PI)/2},_default:"swing"},S.fx=Ke.prototype.init,S.fx.step={};var Ze,et,tt,nt,rt=/^(?:toggle|show|hide)$/,it=/queueHooks$/;function ot(){et&&(!1===E.hidden&&C.requestAnimationFrame?C.requestAnimationFrame(ot):C.setTimeout(ot,S.fx.interval),S.fx.tick())}function at(){return C.setTimeout(function(){Ze=void 0}),Ze=Date.now()}function st(e,t){var n,r=0,i={height:e};for(t=t?1:0;r<4;r+=2-t)i["margin"+(n=ne[r])]=i["padding"+n]=e;return t&&(i.opacity=i.width=e),i}function ut(e,t,n){for(var r,i=(lt.tweeners[t]||[]).concat(lt.tweeners["*"]),o=0,a=i.length;o<a;o++)if(r=i[o].call(n,t,e))return r}function lt(o,e,t){var n,a,r=0,i=lt.prefilters.length,s=S.Deferred().always(function(){delete u.elem}),u=function(){if(a)return!1;for(var e=Ze||at(),t=Math.max(0,l.startTime+l.duration-e),n=1-(t/l.duration||0),r=0,i=l.tweens.length;r<i;r++)l.tweens[r].run(n);return s.notifyWith(o,[l,n,t]),n<1&&i?t:(i||s.notifyWith(o,[l,1,0]),s.resolveWith(o,[l]),!1)},l=s.promise({elem:o,props:S.extend({},e),opts:S.extend(!0,{specialEasing:{},easing:S.easing._default},t),originalProperties:e,originalOptions:t,startTime:Ze||at(),duration:t.duration,tweens:[],createTween:function(e,t){var n=S.Tween(o,l.opts,e,t,l.opts.specialEasing[e]||l.opts.easing);return l.tweens.push(n),n},stop:function(e){var t=0,n=e?l.tweens.length:0;if(a)return this;for(a=!0;t<n;t++)l.tweens[t].run(1);return e?(s.notifyWith(o,[l,1,0]),s.resolveWith(o,[l,e])):s.rejectWith(o,[l,e]),this}}),c=l.props;for(!function(e,t){var n,r,i,o,a;for(n in e)if(i=t[r=X(n)],o=e[n],Array.isArray(o)&&(i=o[1],o=e[n]=o[0]),n!==r&&(e[r]=o,delete e[n]),(a=S.cssHooks[r])&&"expand"in a)for(n in o=a.expand(o),delete e[r],o)n in e||(e[n]=o[n],t[n]=i);else t[r]=i}(c,l.opts.specialEasing);r<i;r++)if(n=lt.prefilters[r].call(l,o,c,l.opts))return m(n.stop)&&(S._queueHooks(l.elem,l.opts.queue).stop=n.stop.bind(n)),n;return S.map(c,ut,l),m(l.opts.start)&&l.opts.start.call(o,l),l.progress(l.opts.progress).done(l.opts.done,l.opts.complete).fail(l.opts.fail).always(l.opts.always),S.fx.timer(S.extend(u,{elem:o,anim:l,queue:l.opts.queue})),l}S.Animation=S.extend(lt,{tweeners:{"*":[function(e,t){var n=this.createTween(e,t);return se(n.elem,e,te.exec(t),n),n}]},tweener:function(e,t){m(e)?(t=e,e=["*"]):e=e.match(P);for(var n,r=0,i=e.length;r<i;r++)n=e[r],lt.tweeners[n]=lt.tweeners[n]||[],lt.tweeners[n].unshift(t)},prefilters:[function(e,t,n){var r,i,o,a,s,u,l,c,f="width"in t||"height"in t,p=this,d={},h=e.style,g=e.nodeType&&ae(e),v=Y.get(e,"fxshow");for(r in n.queue||(null==(a=S._queueHooks(e,"fx")).unqueued&&(a.unqueued=0,s=a.empty.fire,a.empty.fire=function(){a.unqueued||s()}),a.unqueued++,p.always(function(){p.always(function(){a.unqueued--,S.queue(e,"fx").length||a.empty.fire()})})),t)if(i=t[r],rt.test(i)){if(delete t[r],o=o||"toggle"===i,i===(g?"hide":"show")){if("show"!==i||!v||void 0===v[r])continue;g=!0}d[r]=v&&v[r]||S.style(e,r)}if((u=!S.isEmptyObject(t))||!S.isEmptyObject(d))for(r in f&&1===e.nodeType&&(n.overflow=[h.overflow,h.overflowX,h.overflowY],null==(l=v&&v.display)&&(l=Y.get(e,"display")),"none"===(c=S.css(e,"display"))&&(l?c=l:(le([e],!0),l=e.style.display||l,c=S.css(e,"display"),le([e]))),("inline"===c||"inline-block"===c&&null!=l)&&"none"===S.css(e,"float")&&(u||(p.done(function(){h.display=l}),null==l&&(c=h.display,l="none"===c?"":c)),h.display="inline-block")),n.overflow&&(h.overflow="hidden",p.always(function(){h.overflow=n.overflow[0],h.overflowX=n.overflow[1],h.overflowY=n.overflow[2]})),u=!1,d)u||(v?"hidden"in v&&(g=v.hidden):v=Y.access(e,"fxshow",{display:l}),o&&(v.hidden=!g),g&&le([e],!0),p.done(function(){for(r in g||le([e]),Y.remove(e,"fxshow"),d)S.style(e,r,d[r])})),u=ut(g?v[r]:0,r,p),r in v||(v[r]=u.start,g&&(u.end=u.start,u.start=0))}],prefilter:function(e,t){t?lt.prefilters.unshift(e):lt.prefilters.push(e)}}),S.speed=function(e,t,n){var r=e&&"object"==typeof e?S.extend({},e):{complete:n||!n&&t||m(e)&&e,duration:e,easing:n&&t||t&&!m(t)&&t};return S.fx.off?r.duration=0:"number"!=typeof r.duration&&(r.duration in S.fx.speeds?r.duration=S.fx.speeds[r.duration]:r.duration=S.fx.speeds._default),null!=r.queue&&!0!==r.queue||(r.queue="fx"),r.old=r.complete,r.complete=function(){m(r.old)&&r.old.call(this),r.queue&&S.dequeue(this,r.queue)},r},S.fn.extend({fadeTo:function(e,t,n,r){return this.filter(ae).css("opacity",0).show().end().animate({opacity:t},e,n,r)},animate:function(t,e,n,r){var i=S.isEmptyObject(t),o=S.speed(e,n,r),a=function(){var e=lt(this,S.extend({},t),o);(i||Y.get(this,"finish"))&&e.stop(!0)};return a.finish=a,i||!1===o.queue?this.each(a):this.queue(o.queue,a)},stop:function(i,e,o){var a=function(e){var t=e.stop;delete e.stop,t(o)};return"string"!=typeof i&&(o=e,e=i,i=void 0),e&&this.queue(i||"fx",[]),this.each(function(){var e=!0,t=null!=i&&i+"queueHooks",n=S.timers,r=Y.get(this);if(t)r[t]&&r[t].stop&&a(r[t]);else for(t in r)r[t]&&r[t].stop&&it.test(t)&&a(r[t]);for(t=n.length;t--;)n[t].elem!==this||null!=i&&n[t].queue!==i||(n[t].anim.stop(o),e=!1,n.splice(t,1));!e&&o||S.dequeue(this,i)})},finish:function(a){return!1!==a&&(a=a||"fx"),this.each(function(){var e,t=Y.get(this),n=t[a+"queue"],r=t[a+"queueHooks"],i=S.timers,o=n?n.length:0;for(t.finish=!0,S.queue(this,a,[]),r&&r.stop&&r.stop.call(this,!0),e=i.length;e--;)i[e].elem===this&&i[e].queue===a&&(i[e].anim.stop(!0),i.splice(e,1));for(e=0;e<o;e++)n[e]&&n[e].finish&&n[e].finish.call(this);delete t.finish})}}),S.each(["toggle","show","hide"],function(e,r){var i=S.fn[r];S.fn[r]=function(e,t,n){return null==e||"boolean"==typeof e?i.apply(this,arguments):this.animate(st(r,!0),e,t,n)}}),S.each({slideDown:st("show"),slideUp:st("hide"),slideToggle:st("toggle"),fadeIn:{opacity:"show"},fadeOut:{opacity:"hide"},fadeToggle:{opacity:"toggle"}},function(e,r){S.fn[e]=function(e,t,n){return this.animate(r,e,t,n)}}),S.timers=[],S.fx.tick=function(){var e,t=0,n=S.timers;for(Ze=Date.now();t<n.length;t++)(e=n[t])()||n[t]!==e||n.splice(t--,1);n.length||S.fx.stop(),Ze=void 0},S.fx.timer=function(e){S.timers.push(e),S.fx.start()},S.fx.interval=13,S.fx.start=function(){et||(et=!0,ot())},S.fx.stop=function(){et=null},S.fx.speeds={slow:600,fast:200,_default:400},S.fn.delay=function(r,e){return r=S.fx&&S.fx.speeds[r]||r,e=e||"fx",this.queue(e,function(e,t){var n=C.setTimeout(e,r);t.stop=function(){C.clearTimeout(n)}})},tt=E.createElement("input"),nt=E.createElement("select").appendChild(E.createElement("option")),tt.type="checkbox",y.checkOn=""!==tt.value,y.optSelected=nt.selected,(tt=E.createElement("input")).value="t",tt.type="radio",y.radioValue="t"===tt.value;var ct,ft=S.expr.attrHandle;S.fn.extend({attr:function(e,t){return $(this,S.attr,e,t,1<arguments.length)},removeAttr:function(e){return this.each(function(){S.removeAttr(this,e)})}}),S.extend({attr:function(e,t,n){var r,i,o=e.nodeType;if(3!==o&&8!==o&&2!==o)return"undefined"==typeof e.getAttribute?S.prop(e,t,n):(1===o&&S.isXMLDoc(e)||(i=S.attrHooks[t.toLowerCase()]||(S.expr.match.bool.test(t)?ct:void 0)),void 0!==n?null===n?void S.removeAttr(e,t):i&&"set"in i&&void 0!==(r=i.set(e,n,t))?r:(e.setAttribute(t,n+""),n):i&&"get"in i&&null!==(r=i.get(e,t))?r:null==(r=S.find.attr(e,t))?void 0:r)},attrHooks:{type:{set:function(e,t){if(!y.radioValue&&"radio"===t&&A(e,"input")){var n=e.value;return e.setAttribute("type",t),n&&(e.value=n),t}}}},removeAttr:function(e,t){var n,r=0,i=t&&t.match(P);if(i&&1===e.nodeType)while(n=i[r++])e.removeAttribute(n)}}),ct={set:function(e,t,n){return!1===t?S.removeAttr(e,n):e.setAttribute(n,n),n}},S.each(S.expr.match.bool.source.match(/\w+/g),function(e,t){var a=ft[t]||S.find.attr;ft[t]=function(e,t,n){var r,i,o=t.toLowerCase();return n||(i=ft[o],ft[o]=r,r=null!=a(e,t,n)?o:null,ft[o]=i),r}});var pt=/^(?:input|select|textarea|button)$/i,dt=/^(?:a|area)$/i;function ht(e){return(e.match(P)||[]).join(" ")}function gt(e){return e.getAttribute&&e.getAttribute("class")||""}function vt(e){return Array.isArray(e)?e:"string"==typeof e&&e.match(P)||[]}S.fn.extend({prop:function(e,t){return $(this,S.prop,e,t,1<arguments.length)},removeProp:function(e){return this.each(function(){delete this[S.propFix[e]||e]})}}),S.extend({prop:function(e,t,n){var r,i,o=e.nodeType;if(3!==o&&8!==o&&2!==o)return 1===o&&S.isXMLDoc(e)||(t=S.propFix[t]||t,i=S.propHooks[t]),void 0!==n?i&&"set"in i&&void 0!==(r=i.set(e,n,t))?r:e[t]=n:i&&"get"in i&&null!==(r=i.get(e,t))?r:e[t]},propHooks:{tabIndex:{get:function(e){var t=S.find.attr(e,"tabindex");return t?parseInt(t,10):pt.test(e.nodeName)||dt.test(e.nodeName)&&e.href?0:-1}}},propFix:{"for":"htmlFor","class":"className"}}),y.optSelected||(S.propHooks.selected={get:function(e){var t=e.parentNode;return t&&t.parentNode&&t.parentNode.selectedIndex,null},set:function(e){var t=e.parentNode;t&&(t.selectedIndex,t.parentNode&&t.parentNode.selectedIndex)}}),S.each(["tabIndex","readOnly","maxLength","cellSpacing","cellPadding","rowSpan","colSpan","useMap","frameBorder","contentEditable"],function(){S.propFix[this.toLowerCase()]=this}),S.fn.extend({addClass:function(t){var e,n,r,i,o,a,s,u=0;if(m(t))return this.each(function(e){S(this).addClass(t.call(this,e,gt(this)))});if((e=vt(t)).length)while(n=this[u++])if(i=gt(n),r=1===n.nodeType&&" "+ht(i)+" "){a=0;while(o=e[a++])r.indexOf(" "+o+" ")<0&&(r+=o+" ");i!==(s=ht(r))&&n.setAttribute("class",s)}return this},removeClass:function(t){var e,n,r,i,o,a,s,u=0;if(m(t))return this.each(function(e){S(this).removeClass(t.call(this,e,gt(this)))});if(!arguments.length)return this.attr("class","");if((e=vt(t)).length)while(n=this[u++])if(i=gt(n),r=1===n.nodeType&&" "+ht(i)+" "){a=0;while(o=e[a++])while(-1<r.indexOf(" "+o+" "))r=r.replace(" "+o+" "," ");i!==(s=ht(r))&&n.setAttribute("class",s)}return this},toggleClass:function(i,t){var o=typeof i,a="string"===o||Array.isArray(i);return"boolean"==typeof t&&a?t?this.addClass(i):this.removeClass(i):m(i)?this.each(function(e){S(this).toggleClass(i.call(this,e,gt(this),t),t)}):this.each(function(){var e,t,n,r;if(a){t=0,n=S(this),r=vt(i);while(e=r[t++])n.hasClass(e)?n.removeClass(e):n.addClass(e)}else void 0!==i&&"boolean"!==o||((e=gt(this))&&Y.set(this,"__className__",e),this.setAttribute&&this.setAttribute("class",e||!1===i?"":Y.get(this,"__className__")||""))})},hasClass:function(e){var t,n,r=0;t=" "+e+" ";while(n=this[r++])if(1===n.nodeType&&-1<(" "+ht(gt(n))+" ").indexOf(t))return!0;return!1}});var yt=/\r/g;S.fn.extend({val:function(n){var r,e,i,t=this[0];return arguments.length?(i=m(n),this.each(function(e){var t;1===this.nodeType&&(null==(t=i?n.call(this,e,S(this).val()):n)?t="":"number"==typeof t?t+="":Array.isArray(t)&&(t=S.map(t,function(e){return null==e?"":e+""})),(r=S.valHooks[this.type]||S.valHooks[this.nodeName.toLowerCase()])&&"set"in r&&void 0!==r.set(this,t,"value")||(this.value=t))})):t?(r=S.valHooks[t.type]||S.valHooks[t.nodeName.toLowerCase()])&&"get"in r&&void 0!==(e=r.get(t,"value"))?e:"string"==typeof(e=t.value)?e.replace(yt,""):null==e?"":e:void 0}}),S.extend({valHooks:{option:{get:function(e){var t=S.find.attr(e,"value");return null!=t?t:ht(S.text(e))}},select:{get:function(e){var t,n,r,i=e.options,o=e.selectedIndex,a="select-one"===e.type,s=a?null:[],u=a?o+1:i.length;for(r=o<0?u:a?o:0;r<u;r++)if(((n=i[r]).selected||r===o)&&!n.disabled&&(!n.parentNode.disabled||!A(n.parentNode,"optgroup"))){if(t=S(n).val(),a)return t;s.push(t)}return s},set:function(e,t){var n,r,i=e.options,o=S.makeArray(t),a=i.length;while(a--)((r=i[a]).selected=-1<S.inArray(S.valHooks.option.get(r),o))&&(n=!0);return n||(e.selectedIndex=-1),o}}}}),S.each(["radio","checkbox"],function(){S.valHooks[this]={set:function(e,t){if(Array.isArray(t))return e.checked=-1<S.inArray(S(e).val(),t)}},y.checkOn||(S.valHooks[this].get=function(e){return null===e.getAttribute("value")?"on":e.value})}),y.focusin="onfocusin"in C;var mt=/^(?:focusinfocus|focusoutblur)$/,xt=function(e){e.stopPropagation()};S.extend(S.event,{trigger:function(e,t,n,r){var i,o,a,s,u,l,c,f,p=[n||E],d=v.call(e,"type")?e.type:e,h=v.call(e,"namespace")?e.namespace.split("."):[];if(o=f=a=n=n||E,3!==n.nodeType&&8!==n.nodeType&&!mt.test(d+S.event.triggered)&&(-1<d.indexOf(".")&&(d=(h=d.split(".")).shift(),h.sort()),u=d.indexOf(":")<0&&"on"+d,(e=e[S.expando]?e:new S.Event(d,"object"==typeof e&&e)).isTrigger=r?2:3,e.namespace=h.join("."),e.rnamespace=e.namespace?new RegExp("(^|\\.)"+h.join("\\.(?:.*\\.|)")+"(\\.|$)"):null,e.result=void 0,e.target||(e.target=n),t=null==t?[e]:S.makeArray(t,[e]),c=S.event.special[d]||{},r||!c.trigger||!1!==c.trigger.apply(n,t))){if(!r&&!c.noBubble&&!x(n)){for(s=c.delegateType||d,mt.test(s+d)||(o=o.parentNode);o;o=o.parentNode)p.push(o),a=o;a===(n.ownerDocument||E)&&p.push(a.defaultView||a.parentWindow||C)}i=0;while((o=p[i++])&&!e.isPropagationStopped())f=o,e.type=1<i?s:c.bindType||d,(l=(Y.get(o,"events")||Object.create(null))[e.type]&&Y.get(o,"handle"))&&l.apply(o,t),(l=u&&o[u])&&l.apply&&V(o)&&(e.result=l.apply(o,t),!1===e.result&&e.preventDefault());return e.type=d,r||e.isDefaultPrevented()||c._default&&!1!==c._default.apply(p.pop(),t)||!V(n)||u&&m(n[d])&&!x(n)&&((a=n[u])&&(n[u]=null),S.event.triggered=d,e.isPropagationStopped()&&f.addEventListener(d,xt),n[d](),e.isPropagationStopped()&&f.removeEventListener(d,xt),S.event.triggered=void 0,a&&(n[u]=a)),e.result}},simulate:function(e,t,n){var r=S.extend(new S.Event,n,{type:e,isSimulated:!0});S.event.trigger(r,null,t)}}),S.fn.extend({trigger:function(e,t){return this.each(function(){S.event.trigger(e,t,this)})},triggerHandler:function(e,t){var n=this[0];if(n)return S.event.trigger(e,t,n,!0)}}),y.focusin||S.each({focus:"focusin",blur:"focusout"},function(n,r){var i=function(e){S.event.simulate(r,e.target,S.event.fix(e))};S.event.special[r]={setup:function(){var e=this.ownerDocument||this.document||this,t=Y.access(e,r);t||e.addEventListener(n,i,!0),Y.access(e,r,(t||0)+1)},teardown:function(){var e=this.ownerDocument||this.document||this,t=Y.access(e,r)-1;t?Y.access(e,r,t):(e.removeEventListener(n,i,!0),Y.remove(e,r))}}});var bt=C.location,wt={guid:Date.now()},Tt=/\?/;S.parseXML=function(e){var t,n;if(!e||"string"!=typeof e)return null;try{t=(new C.DOMParser).parseFromString(e,"text/xml")}catch(e){}return n=t&&t.getElementsByTagName("parsererror")[0],t&&!n||S.error("Invalid XML: "+(n?S.map(n.childNodes,function(e){return e.textContent}).join("\n"):e)),t};var Ct=/\[\]$/,Et=/\r?\n/g,St=/^(?:submit|button|image|reset|file)$/i,kt=/^(?:input|select|textarea|keygen)/i;function At(n,e,r,i){var t;if(Array.isArray(e))S.each(e,function(e,t){r||Ct.test(n)?i(n,t):At(n+"["+("object"==typeof t&&null!=t?e:"")+"]",t,r,i)});else if(r||"object"!==w(e))i(n,e);else for(t in e)At(n+"["+t+"]",e[t],r,i)}S.param=function(e,t){var n,r=[],i=function(e,t){var n=m(t)?t():t;r[r.length]=encodeURIComponent(e)+"="+encodeURIComponent(null==n?"":n)};if(null==e)return"";if(Array.isArray(e)||e.jquery&&!S.isPlainObject(e))S.each(e,function(){i(this.name,this.value)});else for(n in e)At(n,e[n],t,i);return r.join("&")},S.fn.extend({serialize:function(){return S.param(this.serializeArray())},serializeArray:function(){return this.map(function(){var e=S.prop(this,"elements");return e?S.makeArray(e):this}).filter(function(){var e=this.type;return this.name&&!S(this).is(":disabled")&&kt.test(this.nodeName)&&!St.test(e)&&(this.checked||!pe.test(e))}).map(function(e,t){var n=S(this).val();return null==n?null:Array.isArray(n)?S.map(n,function(e){return{name:t.name,value:e.replace(Et,"\r\n")}}):{name:t.name,value:n.replace(Et,"\r\n")}}).get()}});var Nt=/%20/g,jt=/#.*$/,Dt=/([?&])_=[^&]*/,qt=/^(.*?):[ \t]*([^\r\n]*)$/gm,Lt=/^(?:GET|HEAD)$/,Ht=/^\/\//,Ot={},Pt={},Rt="*/".concat("*"),Mt=E.createElement("a");function It(o){return function(e,t){"string"!=typeof e&&(t=e,e="*");var n,r=0,i=e.toLowerCase().match(P)||[];if(m(t))while(n=i[r++])"+"===n[0]?(n=n.slice(1)||"*",(o[n]=o[n]||[]).unshift(t)):(o[n]=o[n]||[]).push(t)}}function Wt(t,i,o,a){var s={},u=t===Pt;function l(e){var r;return s[e]=!0,S.each(t[e]||[],function(e,t){var n=t(i,o,a);return"string"!=typeof n||u||s[n]?u?!(r=n):void 0:(i.dataTypes.unshift(n),l(n),!1)}),r}return l(i.dataTypes[0])||!s["*"]&&l("*")}function Ft(e,t){var n,r,i=S.ajaxSettings.flatOptions||{};for(n in t)void 0!==t[n]&&((i[n]?e:r||(r={}))[n]=t[n]);return r&&S.extend(!0,e,r),e}Mt.href=bt.href,S.extend({active:0,lastModified:{},etag:{},ajaxSettings:{url:bt.href,type:"GET",isLocal:/^(?:about|app|app-storage|.+-extension|file|res|widget):$/.test(bt.protocol),global:!0,processData:!0,async:!0,contentType:"application/x-www-form-urlencoded; charset=UTF-8",accepts:{"*":Rt,text:"text/plain",html:"text/html",xml:"application/xml, text/xml",json:"application/json, text/javascript"},contents:{xml:/\bxml\b/,html:/\bhtml/,json:/\bjson\b/},responseFields:{xml:"responseXML",text:"responseText",json:"responseJSON"},converters:{"* text":String,"text html":!0,"text json":JSON.parse,"text xml":S.parseXML},flatOptions:{url:!0,context:!0}},ajaxSetup:function(e,t){return t?Ft(Ft(e,S.ajaxSettings),t):Ft(S.ajaxSettings,e)},ajaxPrefilter:It(Ot),ajaxTransport:It(Pt),ajax:function(e,t){"object"==typeof e&&(t=e,e=void 0),t=t||{};var c,f,p,n,d,r,h,g,i,o,v=S.ajaxSetup({},t),y=v.context||v,m=v.context&&(y.nodeType||y.jquery)?S(y):S.event,x=S.Deferred(),b=S.Callbacks("once memory"),w=v.statusCode||{},a={},s={},u="canceled",T={readyState:0,getResponseHeader:function(e){var t;if(h){if(!n){n={};while(t=qt.exec(p))n[t[1].toLowerCase()+" "]=(n[t[1].toLowerCase()+" "]||[]).concat(t[2])}t=n[e.toLowerCase()+" "]}return null==t?null:t.join(", ")},getAllResponseHeaders:function(){return h?p:null},setRequestHeader:function(e,t){return null==h&&(e=s[e.toLowerCase()]=s[e.toLowerCase()]||e,a[e]=t),this},overrideMimeType:function(e){return null==h&&(v.mimeType=e),this},statusCode:function(e){var t;if(e)if(h)T.always(e[T.status]);else for(t in e)w[t]=[w[t],e[t]];return this},abort:function(e){var t=e||u;return c&&c.abort(t),l(0,t),this}};if(x.promise(T),v.url=((e||v.url||bt.href)+"").replace(Ht,bt.protocol+"//"),v.type=t.method||t.type||v.method||v.type,v.dataTypes=(v.dataType||"*").toLowerCase().match(P)||[""],null==v.crossDomain){r=E.createElement("a");try{r.href=v.url,r.href=r.href,v.crossDomain=Mt.protocol+"//"+Mt.host!=r.protocol+"//"+r.host}catch(e){v.crossDomain=!0}}if(v.data&&v.processData&&"string"!=typeof v.data&&(v.data=S.param(v.data,v.traditional)),Wt(Ot,v,t,T),h)return T;for(i in(g=S.event&&v.global)&&0==S.active++&&S.event.trigger("ajaxStart"),v.type=v.type.toUpperCase(),v.hasContent=!Lt.test(v.type),f=v.url.replace(jt,""),v.hasContent?v.data&&v.processData&&0===(v.contentType||"").indexOf("application/x-www-form-urlencoded")&&(v.data=v.data.replace(Nt,"+")):(o=v.url.slice(f.length),v.data&&(v.processData||"string"==typeof v.data)&&(f+=(Tt.test(f)?"&":"?")+v.data,delete v.data),!1===v.cache&&(f=f.replace(Dt,"$1"),o=(Tt.test(f)?"&":"?")+"_="+wt.guid+++o),v.url=f+o),v.ifModified&&(S.lastModified[f]&&T.setRequestHeader("If-Modified-Since",S.lastModified[f]),S.etag[f]&&T.setRequestHeader("If-None-Match",S.etag[f])),(v.data&&v.hasContent&&!1!==v.contentType||t.contentType)&&T.setRequestHeader("Content-Type",v.contentType),T.setRequestHeader("Accept",v.dataTypes[0]&&v.accepts[v.dataTypes[0]]?v.accepts[v.dataTypes[0]]+("*"!==v.dataTypes[0]?", "+Rt+"; q=0.01":""):v.accepts["*"]),v.headers)T.setRequestHeader(i,v.headers[i]);if(v.beforeSend&&(!1===v.beforeSend.call(y,T,v)||h))return T.abort();if(u="abort",b.add(v.complete),T.done(v.success),T.fail(v.error),c=Wt(Pt,v,t,T)){if(T.readyState=1,g&&m.trigger("ajaxSend",[T,v]),h)return T;v.async&&0<v.timeout&&(d=C.setTimeout(function(){T.abort("timeout")},v.timeout));try{h=!1,c.send(a,l)}catch(e){if(h)throw e;l(-1,e)}}else l(-1,"No Transport");function l(e,t,n,r){var i,o,a,s,u,l=t;h||(h=!0,d&&C.clearTimeout(d),c=void 0,p=r||"",T.readyState=0<e?4:0,i=200<=e&&e<300||304===e,n&&(s=function(e,t,n){var r,i,o,a,s=e.contents,u=e.dataTypes;while("*"===u[0])u.shift(),void 0===r&&(r=e.mimeType||t.getResponseHeader("Content-Type"));if(r)for(i in s)if(s[i]&&s[i].test(r)){u.unshift(i);break}if(u[0]in n)o=u[0];else{for(i in n){if(!u[0]||e.converters[i+" "+u[0]]){o=i;break}a||(a=i)}o=o||a}if(o)return o!==u[0]&&u.unshift(o),n[o]}(v,T,n)),!i&&-1<S.inArray("script",v.dataTypes)&&S.inArray("json",v.dataTypes)<0&&(v.converters["text script"]=function(){}),s=function(e,t,n,r){var i,o,a,s,u,l={},c=e.dataTypes.slice();if(c[1])for(a in e.converters)l[a.toLowerCase()]=e.converters[a];o=c.shift();while(o)if(e.responseFields[o]&&(n[e.responseFields[o]]=t),!u&&r&&e.dataFilter&&(t=e.dataFilter(t,e.dataType)),u=o,o=c.shift())if("*"===o)o=u;else if("*"!==u&&u!==o){if(!(a=l[u+" "+o]||l["* "+o]))for(i in l)if((s=i.split(" "))[1]===o&&(a=l[u+" "+s[0]]||l["* "+s[0]])){!0===a?a=l[i]:!0!==l[i]&&(o=s[0],c.unshift(s[1]));break}if(!0!==a)if(a&&e["throws"])t=a(t);else try{t=a(t)}catch(e){return{state:"parsererror",error:a?e:"No conversion from "+u+" to "+o}}}return{state:"success",data:t}}(v,s,T,i),i?(v.ifModified&&((u=T.getResponseHeader("Last-Modified"))&&(S.lastModified[f]=u),(u=T.getResponseHeader("etag"))&&(S.etag[f]=u)),204===e||"HEAD"===v.type?l="nocontent":304===e?l="notmodified":(l=s.state,o=s.data,i=!(a=s.error))):(a=l,!e&&l||(l="error",e<0&&(e=0))),T.status=e,T.statusText=(t||l)+"",i?x.resolveWith(y,[o,l,T]):x.rejectWith(y,[T,l,a]),T.statusCode(w),w=void 0,g&&m.trigger(i?"ajaxSuccess":"ajaxError",[T,v,i?o:a]),b.fireWith(y,[T,l]),g&&(m.trigger("ajaxComplete",[T,v]),--S.active||S.event.trigger("ajaxStop")))}return T},getJSON:function(e,t,n){return S.get(e,t,n,"json")},getScript:function(e,t){return S.get(e,void 0,t,"script")}}),S.each(["get","post"],function(e,i){S[i]=function(e,t,n,r){return m(t)&&(r=r||n,n=t,t=void 0),S.ajax(S.extend({url:e,type:i,dataType:r,data:t,success:n},S.isPlainObject(e)&&e))}}),S.ajaxPrefilter(function(e){var t;for(t in e.headers)"content-type"===t.toLowerCase()&&(e.contentType=e.headers[t]||"")}),S._evalUrl=function(e,t,n){return S.ajax({url:e,type:"GET",dataType:"script",cache:!0,async:!1,global:!1,converters:{"text script":function(){}},dataFilter:function(e){S.globalEval(e,t,n)}})},S.fn.extend({wrapAll:function(e){var t;return this[0]&&(m(e)&&(e=e.call(this[0])),t=S(e,this[0].ownerDocument).eq(0).clone(!0),this[0].parentNode&&t.insertBefore(this[0]),t.map(function(){var e=this;while(e.firstElementChild)e=e.firstElementChild;return e}).append(this)),this},wrapInner:function(n){return m(n)?this.each(function(e){S(this).wrapInner(n.call(this,e))}):this.each(function(){var e=S(this),t=e.contents();t.length?t.wrapAll(n):e.append(n)})},wrap:function(t){var n=m(t);return this.each(function(e){S(this).wrapAll(n?t.call(this,e):t)})},unwrap:function(e){return this.parent(e).not("body").each(function(){S(this).replaceWith(this.childNodes)}),this}}),S.expr.pseudos.hidden=function(e){return!S.expr.pseudos.visible(e)},S.expr.pseudos.visible=function(e){return!!(e.offsetWidth||e.offsetHeight||e.getClientRects().length)},S.ajaxSettings.xhr=function(){try{return new C.XMLHttpRequest}catch(e){}};var Bt={0:200,1223:204},$t=S.ajaxSettings.xhr();y.cors=!!$t&&"withCredentials"in $t,y.ajax=$t=!!$t,S.ajaxTransport(function(i){var o,a;if(y.cors||$t&&!i.crossDomain)return{send:function(e,t){var n,r=i.xhr();if(r.open(i.type,i.url,i.async,i.username,i.password),i.xhrFields)for(n in i.xhrFields)r[n]=i.xhrFields[n];for(n in i.mimeType&&r.overrideMimeType&&r.overrideMimeType(i.mimeType),i.crossDomain||e["X-Requested-With"]||(e["X-Requested-With"]="XMLHttpRequest"),e)r.setRequestHeader(n,e[n]);o=function(e){return function(){o&&(o=a=r.onload=r.onerror=r.onabort=r.ontimeout=r.onreadystatechange=null,"abort"===e?r.abort():"error"===e?"number"!=typeof r.status?t(0,"error"):t(r.status,r.statusText):t(Bt[r.status]||r.status,r.statusText,"text"!==(r.responseType||"text")||"string"!=typeof r.responseText?{binary:r.response}:{text:r.responseText},r.getAllResponseHeaders()))}},r.onload=o(),a=r.onerror=r.ontimeout=o("error"),void 0!==r.onabort?r.onabort=a:r.onreadystatechange=function(){4===r.readyState&&C.setTimeout(function(){o&&a()})},o=o("abort");try{r.send(i.hasContent&&i.data||null)}catch(e){if(o)throw e}},abort:function(){o&&o()}}}),S.ajaxPrefilter(function(e){e.crossDomain&&(e.contents.script=!1)}),S.ajaxSetup({accepts:{script:"text/javascript, application/javascript, application/ecmascript, application/x-ecmascript"},contents:{script:/\b(?:java|ecma)script\b/},converters:{"text script":function(e){return S.globalEval(e),e}}}),S.ajaxPrefilter("script",function(e){void 0===e.cache&&(e.cache=!1),e.crossDomain&&(e.type="GET")}),S.ajaxTransport("script",function(n){var r,i;if(n.crossDomain||n.scriptAttrs)return{send:function(e,t){r=S("<script>").attr(n.scriptAttrs||{}).prop({charset:n.scriptCharset,src:n.url}).on("load error",i=function(e){r.remove(),i=null,e&&t("error"===e.type?404:200,e.type)}),E.head.appendChild(r[0])},abort:function(){i&&i()}}});var _t,zt=[],Ut=/(=)\?(?=&|$)|\?\?/;S.ajaxSetup({jsonp:"callback",jsonpCallback:function(){var e=zt.pop()||S.expando+"_"+wt.guid++;return this[e]=!0,e}}),S.ajaxPrefilter("json jsonp",function(e,t,n){var r,i,o,a=!1!==e.jsonp&&(Ut.test(e.url)?"url":"string"==typeof e.data&&0===(e.contentType||"").indexOf("application/x-www-form-urlencoded")&&Ut.test(e.data)&&"data");if(a||"jsonp"===e.dataTypes[0])return r=e.jsonpCallback=m(e.jsonpCallback)?e.jsonpCallback():e.jsonpCallback,a?e[a]=e[a].replace(Ut,"$1"+r):!1!==e.jsonp&&(e.url+=(Tt.test(e.url)?"&":"?")+e.jsonp+"="+r),e.converters["script json"]=function(){return o||S.error(r+" was not called"),o[0]},e.dataTypes[0]="json",i=C[r],C[r]=function(){o=arguments},n.always(function(){void 0===i?S(C).removeProp(r):C[r]=i,e[r]&&(e.jsonpCallback=t.jsonpCallback,zt.push(r)),o&&m(i)&&i(o[0]),o=i=void 0}),"script"}),y.createHTMLDocument=((_t=E.implementation.createHTMLDocument("").body).innerHTML="<form></form><form></form>",2===_t.childNodes.length),S.parseHTML=function(e,t,n){return"string"!=typeof e?[]:("boolean"==typeof t&&(n=t,t=!1),t||(y.createHTMLDocument?((r=(t=E.implementation.createHTMLDocument("")).createElement("base")).href=E.location.href,t.head.appendChild(r)):t=E),o=!n&&[],(i=N.exec(e))?[t.createElement(i[1])]:(i=xe([e],t,o),o&&o.length&&S(o).remove(),S.merge([],i.childNodes)));var r,i,o},S.fn.load=function(e,t,n){var r,i,o,a=this,s=e.indexOf(" ");return-1<s&&(r=ht(e.slice(s)),e=e.slice(0,s)),m(t)?(n=t,t=void 0):t&&"object"==typeof t&&(i="POST"),0<a.length&&S.ajax({url:e,type:i||"GET",dataType:"html",data:t}).done(function(e){o=arguments,a.html(r?S("<div>").append(S.parseHTML(e)).find(r):e)}).always(n&&function(e,t){a.each(function(){n.apply(this,o||[e.responseText,t,e])})}),this},S.expr.pseudos.animated=function(t){return S.grep(S.timers,function(e){return t===e.elem}).length},S.offset={setOffset:function(e,t,n){var r,i,o,a,s,u,l=S.css(e,"position"),c=S(e),f={};"static"===l&&(e.style.position="relative"),s=c.offset(),o=S.css(e,"top"),u=S.css(e,"left"),("absolute"===l||"fixed"===l)&&-1<(o+u).indexOf("auto")?(a=(r=c.position()).top,i=r.left):(a=parseFloat(o)||0,i=parseFloat(u)||0),m(t)&&(t=t.call(e,n,S.extend({},s))),null!=t.top&&(f.top=t.top-s.top+a),null!=t.left&&(f.left=t.left-s.left+i),"using"in t?t.using.call(e,f):c.css(f)}},S.fn.extend({offset:function(t){if(arguments.length)return void 0===t?this:this.each(function(e){S.offset.setOffset(this,t,e)});var e,n,r=this[0];return r?r.getClientRects().length?(e=r.getBoundingClientRect(),n=r.ownerDocument.defaultView,{top:e.top+n.pageYOffset,left:e.left+n.pageXOffset}):{top:0,left:0}:void 0},position:function(){if(this[0]){var e,t,n,r=this[0],i={top:0,left:0};if("fixed"===S.css(r,"position"))t=r.getBoundingClientRect();else{t=this.offset(),n=r.ownerDocument,e=r.offsetParent||n.documentElement;while(e&&(e===n.body||e===n.documentElement)&&"static"===S.css(e,"position"))e=e.parentNode;e&&e!==r&&1===e.nodeType&&((i=S(e).offset()).top+=S.css(e,"borderTopWidth",!0),i.left+=S.css(e,"borderLeftWidth",!0))}return{top:t.top-i.top-S.css(r,"marginTop",!0),left:t.left-i.left-S.css(r,"marginLeft",!0)}}},offsetParent:function(){return this.map(function(){var e=this.offsetParent;while(e&&"static"===S.css(e,"position"))e=e.offsetParent;return e||re})}}),S.each({scrollLeft:"pageXOffset",scrollTop:"pageYOffset"},function(t,i){var o="pageYOffset"===i;S.fn[t]=function(e){return $(this,function(e,t,n){var r;if(x(e)?r=e:9===e.nodeType&&(r=e.defaultView),void 0===n)return r?r[i]:e[t];r?r.scrollTo(o?r.pageXOffset:n,o?n:r.pageYOffset):e[t]=n},t,e,arguments.length)}}),S.each(["top","left"],function(e,n){S.cssHooks[n]=Fe(y.pixelPosition,function(e,t){if(t)return t=We(e,n),Pe.test(t)?S(e).position()[n]+"px":t})}),S.each({Height:"height",Width:"width"},function(a,s){S.each({padding:"inner"+a,content:s,"":"outer"+a},function(r,o){S.fn[o]=function(e,t){var n=arguments.length&&(r||"boolean"!=typeof e),i=r||(!0===e||!0===t?"margin":"border");return $(this,function(e,t,n){var r;return x(e)?0===o.indexOf("outer")?e["inner"+a]:e.document.documentElement["client"+a]:9===e.nodeType?(r=e.documentElement,Math.max(e.body["scroll"+a],r["scroll"+a],e.body["offset"+a],r["offset"+a],r["client"+a])):void 0===n?S.css(e,t,i):S.style(e,t,n,i)},s,n?e:void 0,n)}})}),S.each(["ajaxStart","ajaxStop","ajaxComplete","ajaxError","ajaxSuccess","ajaxSend"],function(e,t){S.fn[t]=function(e){return this.on(t,e)}}),S.fn.extend({bind:function(e,t,n){return this.on(e,null,t,n)},unbind:function(e,t){return this.off(e,null,t)},delegate:function(e,t,n,r){return this.on(t,e,n,r)},undelegate:function(e,t,n){return 1===arguments.length?this.off(e,"**"):this.off(t,e||"**",n)},hover:function(e,t){return this.mouseenter(e).mouseleave(t||e)}}),S.each("blur focus focusin focusout resize scroll click dblclick mousedown mouseup mousemove mouseover mouseout mouseenter mouseleave change select submit keydown keypress keyup contextmenu".split(" "),function(e,n){S.fn[n]=function(e,t){return 0<arguments.length?this.on(n,null,e,t):this.trigger(n)}});var Xt=/^[\s\uFEFF\xA0]+|[\s\uFEFF\xA0]+$/g;S.proxy=function(e,t){var n,r,i;if("string"==typeof t&&(n=e[t],t=e,e=n),m(e))return r=s.call(arguments,2),(i=function(){return e.apply(t||this,r.concat(s.call(arguments)))}).guid=e.guid=e.guid||S.guid++,i},S.holdReady=function(e){e?S.readyWait++:S.ready(!0)},S.isArray=Array.isArray,S.parseJSON=JSON.parse,S.nodeName=A,S.isFunction=m,S.isWindow=x,S.camelCase=X,S.type=w,S.now=Date.now,S.isNumeric=function(e){var t=S.type(e);return("number"===t||"string"===t)&&!isNaN(e-parseFloat(e))},S.trim=function(e){return null==e?"":(e+"").replace(Xt,"")},"function"==typeof define&&define.amd&&define("jquery",[],function(){return S});var Vt=C.jQuery,Gt=C.$;return S.noConflict=function(e){return C.$===S&&(C.$=Gt),e&&C.jQuery===S&&(C.jQuery=Vt),S},"undefined"==typeof e&&(C.jQuery=C.$=S),S});
diff --git a/lmparams.png b/lmparams.png
new file mode 100644
index 0000000..07f8cd5
Binary files /dev/null and b/lmparams.png differ
diff --git a/lmresids.png b/lmresids.png
new file mode 100644
index 0000000..4d32580
Binary files /dev/null and b/lmresids.png differ
diff --git a/lse.png b/lse.png
new file mode 100644
index 0000000..a10e3db
Binary files /dev/null and b/lse.png differ
diff --git a/mark.png b/mark.png
new file mode 100644
index 0000000..3fb3e9c
Binary files /dev/null and b/mark.png differ
diff --git a/norm1.png b/norm1.png
new file mode 100644
index 0000000..5ccfc67
Binary files /dev/null and b/norm1.png differ
diff --git a/normbp.png b/normbp.png
new file mode 100644
index 0000000..5a9eecb
Binary files /dev/null and b/normbp.png differ
diff --git a/olspl1.png b/olspl1.png
new file mode 100644
index 0000000..1248863
Binary files /dev/null and b/olspl1.png differ
diff --git a/olspl2.png b/olspl2.png
new file mode 100644
index 0000000..bf14b6f
Binary files /dev/null and b/olspl2.png differ
diff --git a/olspl3.png b/olspl3.png
new file mode 100644
index 0000000..8d68bf0
Binary files /dev/null and b/olspl3.png differ
diff --git a/olspl4.png b/olspl4.png
new file mode 100644
index 0000000..5c87787
Binary files /dev/null and b/olspl4.png differ
diff --git a/olspl5.png b/olspl5.png
new file mode 100644
index 0000000..73b2a19
Binary files /dev/null and b/olspl5.png differ
diff --git a/olspl6.png b/olspl6.png
new file mode 100644
index 0000000..7f1d8cc
Binary files /dev/null and b/olspl6.png differ
diff --git a/pval_zp.png b/pval_zp.png
new file mode 100644
index 0000000..c930605
Binary files /dev/null and b/pval_zp.png differ
diff --git a/reference-keys.txt b/reference-keys.txt
new file mode 100644
index 0000000..bf32760
--- /dev/null
+++ b/reference-keys.txt
@@ -0,0 +1,299 @@
+tab:t-datamatrix
+tab:t-vartypes
+tab:t-region
+tab:t-democ
+tab:t-attitude
+fig:f-bars-region
+fig:f-bars-democ
+fig:f-bars-attitude
+tab:t-sex-attitude
+tab:t-sex-attitude-pr
+tab:t-sex-attitude-row
+tab:t-investors
+tab:t-gdp
+fig:f-hist-gdp
+tab:t-stemgdp
+tab:t-stemhours
+fig:f-boxplot-gdp
+tab:t-countries-sums
+fig:f-skews
+tab:t-classmarks
+tab:t-sdex
+fig:f-classbars
+fig:f-yuk
+fig:f-houseprices
+tab:t-countrydata
+tab:t-sex-attitude-pop
+tab:t-sex-attitude-ch4
+eq:H0-chi2
+eq:Ha-chi2
+fig:f-spsschi2
+eq:chi2
+tab:t-sex-attitude-chi2
+tab:t-sex-attitude-H0pop
+fig:f-chisampld
+fig:f-chi2dists
+fig:f-pvalchisq
+tab:t-chi2table
+tab:t-probex
+eq:pihat-as-ybar
+eq:H0p
+eq:Dp
+eq:H0p2
+eq:Hatwo
+eq:Haonegt
+eq:Haonelt
+eq:ttest-gen
+eq:Dhatp
+eq:seDhatp
+eq:ztestp
+fig:f-pval-prob
+tab:t-ttable
+eq:cim0
+eq:cim1
+eq:sephat2
+eq:cip2
+eq:ci-D-gen
+tab:t-ciq
+eq:Dp2sample
+eq:H0pD
+eq:Dhatpi
+eq:sigmaDpi
+eq:phat2sample
+eq:seDpi
+eq:ztestDpi
+eq:seDpi2
+eq:ciDpi
+fig:f-nytimes
+fig:f-bp1
+eq:mu
+eq:sigma2
+eq:sigma
+eq:Ybar-ch6
+eq:s2-ch6
+eq:s-ch6
+tab:t-bp-example
+eq:s2b
+fig:f-bp2
+fig:f-10dm
+fig:f-3norms
+fig:f-norm1
+fig:f-normbp
+eq:Zscore
+tab:t-normtab
+fig:f-sampld2
+fig:f-sampld
+eq:lindeberg
+fig:f-cltbin
+tab:t-groupex
+tab:t-F13
+fig:f-hworkpyramid
+fig:f-hworkpolygons
+fig:f-twoboxplots
+eq:DeltaB
+eq:mH0a
+eq:tma
+eq:ciDpa
+eq:H0m
+eq:Hatwom
+eq:Dhatmu
+eq:sigmaDmu
+eq:seDpop
+eq:seD2
+eq:sehatjoint
+eq:ztestmuDb
+tab:t-2testsY1
+fig:f-spss2test
+eq:ciDmu2
+eq:seDmu-ne
+eq:ztestmuD
+eq:ciDmu
+fig:f-tdistr1
+eq:satter-df
+tab:t-ttests1
+eq:H01
+eq:Ha1two
+eq:Ha1onegt
+eq:Ha1onelt
+eq:D1mu
+eq:H0D
+eq:HaDtwo
+eq:HaDonegt
+eq:HaDonelt
+eq:Dhat1
+eq:seDhat1
+eq:tD1
+eq:ciD1
+eq:cimu1
+fig:f-spsstest
+eq:Dbar-dep
+eq:sDbar-dep
+eq:s2D-dep
+eq:zD-dep
+eq:ciD-dep
+tab:t-2tests-dep
+tab:t-twoerrors
+fig:f-corruption1
+fig:f-temperatures
+fig:f-corruption2
+fig:f-scatterplots
+fig:f-corruption3
+eq:sdyx
+eq:sxy
+eq:corr
+tab:t-civilsoc-r
+fig:f-imr1
+tab:t-imrvars
+eq:slinmodel
+fig:f-spss-linreg
+fig:f-linmod-params
+fig:f-residuals
+eq:ols-b
+eq:ols-a
+eq:sigma-linreg
+eq:ss-decomp
+eq:R2
+eq:H0betasimple
+eq:H0betasimple2
+eq:sebeta
+eq:tbeta
+eq:cibeta
+fig:f-xyzspurious
+tab:t-imr-m2
+eq:mu-multiple
+eq:mlinmodel
+eq:mu1
+eq:mu2
+eq:tbeta-m
+eq:H0beta-m
+eq:cibeta-m
+tab:t-imr-m3
+tab:t-fastfood-descr
+tab:t-fastfood-models
+tab:t-berkeley1
+tab:t-berkeley2
+tab:t-berkeley3
+tab:t-investors3
+tab:t-titanic2
+tab:t-titanic3
+tab:t-whickham1
+tab:t-whickham2
+tab:t-whickham3
+c-intro
+s-intro-purpose
+s-intro-definitions
+ss-intro-def-subj
+ss-intro-def-vartypes
+ss-intro-def-descr
+ss-intro-def-assoc
+s-intro-outline
+s-intro-maths
+symbolic-mathematics-and-mathematical-notation
+computing-1
+c-descr1
+s-descr1-intro
+s-descr1-examples
+s-descr1-1cat
+ss-descr1-1cat-distr
+ss-descr1-1cat-tables
+ss-descr1-1cat-charts
+ss-descr1-1cat-descriptives
+s-descr1-2cat
+ss-descr1-2cat-tables
+ss-descr1-2cat-cond
+ss-descr1-2cat-assoc
+ss-descr1-2cat-descr
+ss-descr1-2cat-gamma
+s-descr1-1cont
+ss-descr1-1cont-tab
+ss-descr1-1cont-graphs
+s-descr1-nums
+ss-descr1-nums-central
+ss-descr1-nums-variation
+s-descr1-2cont
+s-descr1-presentation
+s-descr1-app
+c-samples
+s-samples-intro
+s-samples-finpops
+s-samples-samples
+s-samples-infpops
+s-samples-popdistrs
+s-samples-inference
+c-tables
+s-tables-intro
+s-tables-tests
+s-tables-chi2test
+ss-tables-chi2test-null
+ss-tables-chi2test-ass
+ss-tables-chi2test-stat
+ss-tables-chi2test-sdist
+ss-tables-chi2test-Pval
+ss-tables-chi2test-conclusions
+s-tables-summary
+c-probs
+s-probs-intro
+s-probs-examples
+s-probs-distribution
+s-probs-pointest
+s-probs-test1sample
+ss-probs-test1sample-hypotheses
+ss-probs-test1sample-teststatistic
+ss-probs-test1sample-samplingd
+ss-probs-test1sample-conclusions
+ss-probs-test1sample-summary
+s-probs-1sampleci
+s-probs-1sampleci-intro
+s-probs-1sampleci-calc
+s-probs-1sampleci-int
+ss-means-ci-vstests
+s-probs-2samples
+c-contd
+s-contd-intro
+s-contd-popdistrs
+ss-contd-popdistrs-params
+s-contd-probdistrs
+ss-contd-probdistrs-general
+ss-contd-probdistrs-normal
+s-contd-clt
+c-means
+s-means-intro
+s-means-descr
+ss-means-descr-graphs
+ss-means-descr-tables
+s-means-inference
+ss-means-inference-intro
+ss-means-inference-test
+ss-means-inference-ci
+ss-means-inference-variants
+s-means-1sample
+s-means-dependent
+s-means-tests3
+ss-means-tests3-errors
+ss-means-tests3-power
+ss-means-tests3-importance
+c-regression
+s-regression-intro
+s-regression-descr
+ss-regression-descr-intro
+ss-regression-descr-plots
+ss-regression-descr-assoc
+ss-regression-descr-corr
+s-regression-simple
+ss-regression-simple-intro
+ss-regression-simple-def
+ss-regression-simple-int
+ss-regression-simple-est
+ss-regression-simple-inf
+s-regression-causality
+s-regression-multiple
+ss-regression-multiple-intro
+ss-regression-multiple-def
+ss-regression-multiple-unchanged
+ss-regression-multiple-beta
+s-regression-dummies
+ss-regression-dummies-def
+ss-regression-dummies-example
+s-regression-rest
+c-3waytables
+c-more
diff --git a/regions.png b/regions.png
new file mode 100644
index 0000000..95206bf
Binary files /dev/null and b/regions.png differ
diff --git a/salk_nytimes.png b/salk_nytimes.png
new file mode 100644
index 0000000..07310df
Binary files /dev/null and b/salk_nytimes.png differ
diff --git a/sampld1_bp.png b/sampld1_bp.png
new file mode 100644
index 0000000..351fade
Binary files /dev/null and b/sampld1_bp.png differ
diff --git a/sampld2_bp.png b/sampld2_bp.png
new file mode 100644
index 0000000..f081955
Binary files /dev/null and b/sampld2_bp.png differ
diff --git a/sampld_p.png b/sampld_p.png
new file mode 100644
index 0000000..7acec12
Binary files /dev/null and b/sampld_p.png differ
diff --git a/scatterplots.png b/scatterplots.png
new file mode 100644
index 0000000..3751f56
Binary files /dev/null and b/scatterplots.png differ
diff --git a/search_index.json b/search_index.json
new file mode 100644
index 0000000..ae3e2a6
--- /dev/null
+++ b/search_index.json
@@ -0,0 +1 @@
+[["index.html", "MY464 Introduction to Quantitative Analysis for Media and Communications Course information", " MY464 Introduction to Quantitative Analysis for Media and Communications Department of Methodology, London School of Economics and Political Science Course information Course Description This course is intended for those with little or no past training in quantitative methods. The course is an intensive introduction to some of the principles and methods of statistical analysis in social research. Topics covered in MY464 include descriptive statistics, basic ideas of inference and estimation, contingency tables and an introduction to linear regression models. For those with some quantitative training the slightly more advanced course MY452 (Applied Regression Analysis) would be more appropriate, followed by other Department of Methodology and Department of Statistics courses on quantitative methods, such as MY454 (Applied Statistical Computing), MY455 (Multivariate Analysis and Measurement), MY456 (Survey Methodology), MY457 (Causal Inference for Observational and Experimental Studies), MY459 (Quantitative Text Analysis), ST416 (Multilevel Modelling), and ST442 (Longitudinal Data Analysis). Course Objectives This course aims to impart a level of familiarity suitable for a moderately critical understanding of the statistical material in the journals commonly used by students in their work and the ability to use some elementary techniques. Teaching Lectures: 2-hour in-person lecture every week. Applied exercises: Each week there will be an exercise for students to complete in which the ideas covered in the lecture for that week will be applied to a real data set using the software package R/RStudio. Each exercise will have an accompanying explanatory video and a multiple-choice quiz to be completed on Moodle to check your learning. Seminars: Students will attend a one-hour seminar each week, starting in Week 2. The seminars will go over the material covered in that week’s lecture, the corresponding applied exercise and provide a forum for students to ask questions and discuss the material covered in the course. Seminars will be available to attend in person and online. Please consult the on-line timetables for the times and locations of the class groups. The allocation of students to seminars is done through LSE for You. This will be explained in the first lecture and on the Moodle page. Please contact the course administrator listed on the Moodle page if you have any issues. Course Materials Coursepack: This coursepack is the main course text. It is available to be viewed online at https://lse-methodology.github.io/MY464/ You can view the coursepack in HTML form, or download it as a PDF or ePub to view offline. Lecture slides: Copies of the slides displayed during the lectures can be downloaded from the MY464 Moodle page. Recommended course texts: Alan Agresti and Christine Franklin (2013). Statistics: The Art and Science of Learning from Data (Third Ed.). Pearson. Alan Agresti and Barbara Finlay (2013). Statistical Methods for the Social Sciences (Fourth Ed.). Pearson Earlier/later editions are also suitable. While neither of these books is absolutely required, you may wish to purchase one if you would like to have additional explanation, examples and exercises to supplement the coursepack. Of these two, Agresti and Finlay is a bit more advanced. It is also the recommended additional course text for MY452 (which also has a coursepack similar to this one), so you may want to purchase it if you are planning to also take that course. Other text books: There are hundreds of introductory textbooks on statistics and quantitative methods, many of them covering almost identical material. If you have one which you would like to use, and which looks as if it covers the same material at about the same level as this course, then it is probably suitable as additional reading. There are also many books and online resources which focus on the R/RStudio software package used in the computer classes. We do not consider them necessary for this course, or for learning statistics. MY464 on Moodle The course materials are all available on Moodle. Go to http://moodle.lse.ac.uk/ and login using your username and password (the same as for your LSE e-mail). Then in the select courses dialogue box type in MY464, and in search results click on MY464. The site contains the structure of the course week by week, the readings, weekly applied exercises and the associated data sets, coursepack and other materials, as well as a section on news and announcements. Notes on studying for the course To learn the material from this course you must do the work every week since it is cumulative; if you miss a week or two there is a chance that you will struggle to catch up. Also bear in mind that most people cannot learn quantitative techniques passively by just watching the lectures and reading the occasional chapter in a textbook. To learn statistics you have to do it; there are no shortcuts. Thus in addition to a lecture there will be a weekly applied exercise (in which you do some data analysis and interpretation using R/RStudio - instructions will be provided). Doing the exercises and discussing them in the weekly class is the best way to make sure you have understood and can apply what was covered in the lectures. If you are having any trouble this will reveal what the problem is. Thus the course is designed to have multiple, reinforcing ways of helping you learn this material. Examinations/assessment There will be a two-hour examination in January in the LENT Term. The exam will be completed online during a three hour window. Examination papers from previous years are available for revision purposes at the LSE library web site. 2018-19 was the first year that MY464 has existed, but the past exams for MY451 provide a good guide to the kinds of questions that we ask. Exams vary from year to year. Some questions closely follow questions that you will have answered in the homeworks or have seen on past exam papers. Other require you to apply the principles you have learned in new ways. Students should understand that past examinations should only be used as rough guides to the types of questions that are likely to appear on the examination. Computing Students must know their Username and Password in time for the first applied exercise in week 1. This information can be obtained from IT Help Desk (Library, 1st floor). The software package used for MY464 is R/RStudio, which will be introduced in the first applied exercise in week 1. Software availability R/RStudio is free to download and does not require a licence. Feedback We welcome any comments you have on the course. If there are any problems that we can deal with, we will attempt to do so as quickly as possible. Speak to any member of the course team, or to your departmental supervisor if you feel that would be easier for you. Also please let us know if you find any errors or omissions in the coursepack, so that we can correct them. Acknowledgements This coursepack bears many traces of previous materials and all of their authors, Colm O’Muircheartaigh, Colin Mills, Matt Mulford, Fiona Steele, Paul Mitchell, Sally Stares, Jouni Kuha, and Ben Lauderdale. Many thanks to Farimah Daftary, Sue Howard, Jon Jackson, Paul Mitchell, Indraneel Sircar, and many students of previous years for comments and suggestions which are incorporated in the current revision. Course Programme Week 1 Lecture Course overview and organisation. Introduction to basic concepts Exercise Familiarisation with R/RStudio (no seminar week 1) Coursepack Chapter 1 Week 2 Lecture Descriptive statistics for categorical variables Exercise/seminar Loading data into R/RStudio, descriptive statistics Coursepack Sections 2.1–2.4 and 2.8 Week 3 Lecture Descriptive statistics for continuous variables Exercise/seminar Descriptive statistics for categorical variables Coursepack Sections 2.5–2.7 Week 4 Lecture Analysis of two-way contingency tables Exercise/seminar Descriptive statistics for continuous variables Coursepack Chapters 3 and 4 Week 5 Lecture Inference for means in two populations Exercise/seminar Analysis of two-way contingency tables Coursepack Chapters 6 and 7 Week 6 Reading Week No lecture, no exercise/seminar Week 7 Lecture Inference for proportions in one and two populations Exercise/seminar Inference for means in two populations Coursepack Chapter 5 Week 8 Lecture Correlation and simple linear regression as descriptive methods Exercise/seminar Inference for proportions in one and two populations Coursepack Sections 8.1–8.3.4 Week 9 Lecture Inference for the simple linear regression model, 3-way contingency tables Exercise/seminar Correlation and simple linear regression Coursepack Section 8.3.5 (Hour 1); Section 8.4 and Chapter 9 (Hour 2) Week 10 Lecture Multiple linear regression Exercise/seminar More on linear regression Coursepack Sections 8.5–8.7 Week 11 Lecture Review and exam preparation Exercise/seminar Multiple linear regression Coursepack Chapter 10 FAQ: Frequently Asked Questions Why do we use R/RStudio? I’ve heard that SAS/STATA/MINITAB/SPSS/LIMDEP is better. At this level it does not matter which program you use since we are learning standard procedures that are common to all programs. In favour of R/RStudio is that it is free, flexible and extremely powerful. Can I get a copy of the R/RStudio software to use on my home computer? Yes, this will be explained in weeks 1 and 2 applied exercises and classes. I’m taking MY464 because I want to learn how to use R/RStudio but we don’t seem to learn very much about the program. Why is that? MY464 is not a course about learning to use R/RStudio. We use the program merely to facilitate data analysis and interpretation. Some options for learning more about R/RStudio will be mentioned in the first lecture. I’m taking MY464 to help me analyse data for my dissertation. Can I discuss my data and my specific problems with the lecturers? Yes, but not during the course. Staff of the Department of Methodology will be happy to talk to you about problems specific to your dissertation during the weekly sessions of the Methodology Surgery (see the website of the department for more information). Does the coursepack contain everything I need to know for the exam? Yes. However, you will stand by far the best chance in the exam if you also attend the lectures, where the lecturers emphasise and explain the key parts of the material. The lecturer introduced some material that was not in the coursepack. Do I need to know that material? This is almost certainly an illusion. The lectures will not introduce any genuinely new material not included in the course pack. However, sometimes the lecturer may of course use different words or a different example to further explain some topic. Copies of the most relevant notes displayed at the lectures will be posted in the MY464 Moodle site. All of the material required for the exam is contained in the coursepack, with the posted lecture notes as additional clarification. Can I work together on the applied exercises with my friends? Yes, we positively encourage you to discuss the exercises with your colleagues. If you do this, please make sure you complete the multiple-choice quiz yourself. I’m not registered at the LSE but at another University of London college. Can I attend this course? Normally yes, but you will have to complete an intercollegiate enrolment form. I would like to audit the course without taking the exam. Is that OK? Yes, you are welcome to attend the lectures providing you are an LSE/University of London student and there is room for you. MY464 is not challenging enough for me. Is there a more difficult course? Yes, MY452 and numerous other courses offered by the Department of Methodology and the Statistics department. "],["c-intro.html", "Chapter 1 Introduction 1.1 What is the purpose of this course? 1.2 Some basic definitions 1.3 Outline of the course 1.4 The use of mathematics and computing", " Chapter 1 Introduction 1.1 What is the purpose of this course? The title of any course should be descriptive of its contents. This one is called MY464: Introduction to Quantitative Analysis Every part of this tells us something about the nature of the course: The M stands for Methodology of social research. Here research refers to activities aimed at obtaining new knowledge about the world, in the case of the social sciences the social world of people and their institutions and interactions. Here we are concerned solely with empirical research, where such knowledge is based on information obtained by making observations on what goes on in that world. There are many different ways (methods) of making such observations, some better than others for deriving valid knowledge. “Methodology” refers both to the methods used in particular studies, and the study of research methods in general. The word analysis indicates the area of research methodology that the course is about. In general, any empirical research project will involve at least the following stages: Identifying a research topic Formulating research questions Deciding what kinds of information to collect to try to answer the research questions, and deciding how to collect it and where to collect it from Collecting the information Analysing the information in appropriate ways to answer the research questions Reporting the findings The empirical information collected in the research process is often referred to as data. This course is mostly about some basic methods for step 5, the analysis of such data. Methods of analysis, however competently used, will not be very useful unless other parts of the research process have also been carried out well. These other parts, which (especially steps 2–4 above) can be broadly termed research design are covered on other courses. Here we will mostly not consider research design, in effect assuming that we start at a point where we want to analyse some data which have been collected in a sensible way to answer meaningful research questions. However, you should bear in mind throughout the course that in a real research situation both good design and good analysis are essential for success. The word quantitative in the title of the course indicates that the methods you will learn here are used to analyse quantitative data. This means that the data will enter the analysis in the form of numbers of some kind. In social sciences, for example, data obtained from administrative records or from surveys using structured interviews are typically quantitative. An alternative is qualitative data, which are not rendered into numbers for the analysis. For example, unstructured interviews, focus groups and ethnography typically produce mostly qualitative data. Both quantitative and qualitative data are important and widely used in social research. For some research questions, one or the other may be clearly more appropriate, but in many if not most cases the research would benefit from collecting both qualitative and quantitative data. This course will concentrate solely on quantitative data analysis, while the collection and analysis of qualitative data are covered on other courses (e.g. MY421, MY426 and MY427), which we hope you will also be taking. All the methods taught here, and almost all approaches used for quantitative data analysis in the social sciences in general, are statistical methods. The defining feature of such methods is that randomness and probability play an essential role in them; some of the ways in which they do so will become apparent later, others need not concern us here. The title of the course could thus also have included the word statistics. However, the Department of Methodology courses on statistical methods (e.g. MY464, MY465, MY452, MY455 and MY459) have traditionally been labelled as courses on “quantitative analysis” rather than “statistics”. This is done to indicate that they differ from classical introductory statistics courses in some ways, especially in the presentation being less mathematical. The course is called an “Introduction to Quantitative Analysis” because it is an introductory course which does not assume that you have learned any statistics before. MY464 or a comparable course should be taken before more advanced courses on quantitative methods. Statistics is a cumulative subject where later courses build on material learned on earlier ones. Because MY464 is introductory, it will start with very simple methods, and many of the more advanced (and powerful) ones will only be covered on the later courses. This does not, however, mean that you are wasting your time here even if it is methods from, say, MY452 that you will eventually need most: understanding the material of this course is essential for learning more advanced methods. At the end of the course you should be familiar with certain methods of statistical analysis. This will enable you to be both a user and a consumer of statistics: You will be able to use the methods to analyse your own data and to report the results of the analyses. Perhaps even more importantly, you will also be able to understand (and possibly criticize) their use in other people’s research. Because interpreting results is typically somewhat easier than carrying out new analyses, and because all statistical methods use the same basic ideas introduced here, you will even have some understanding of many of the techniques not discussed on this course. Another pair of different but complementary aims of the course is that MY464 is both a self-contained unit and a prerequisite for courses that follow it: If this is the last statistics course you will take, it will enable you to understand and use the particular methods covered here. This includes the technique of linear regression modelling (described in Chapter 8), which is arguably the most important and commonly used statistical method of all. This course can, however, introduce only the most important elements of linear regression, while some of the more advanced ones are discussed only on MY452. The ideas learned on this course will provide the conceptual foundation for any further courses in quantitative methods that you may take. The basic ideas will then not need to be learned from scratch again, and the other courses can instead concentrate on introducing further, ever more powerful statistical methods for different types of data. 1.2 Some basic definitions Like any discipline, statistics involves some special terminology which makes it easier to discuss its concepts with sufficient precision. Some of these terms are defined in this section, while others will be introduced later when they are needed. You should bear in mind that all terminology is arbitrary, so there may be different terms for the same concept. The same is true of notation and symbols (such as \\(n\\), \\(\\mu\\), \\(\\bar{Y}\\), \\(R^{2}\\), and others) which will be introduced later. Some statistical terms and symbols are so well established that they are almost always used in the same way, but for many others there are several versions in common use. While we try to be consistent with the notation and terminology within this coursepack, we cannot absolutely guarantee that we will not occasionally use different terms for the same concept even here. In other textbooks and in research articles you will certainly occasionally encounter alternative terminology for some of these concepts. If you find yourself confused by such differences, please come to the advisory hours or ask your class teacher for clarification. 1.2.1 Subjects and variables Table 1.1 shows a small set of quantitative data. Once collected, the data are typically arranged and stored in this kind of spreadsheet-type rectangular table, known as a data matrix. In the computer classes you will see data in this form in SPSS. Id age sex educ wrkstat life income4 pres92 1 43 1 11 1 2 3 2 2 44 1 16 1 3 3 1 3 43 2 16 1 3 3 2 4 78 2 17 5 3 4 1 5 83 1 11 5 2 1 1 6 55 2 12 1 2 99 1 7 75 1 12 5 2 1 0 8 31 1 18 1 3 4 2 9 54 2 18 2 3 1 1 10 23 2 15 1 2 3 3 11 63 2 4 5 1 1 1 12 33 2 10 4 3 1 0 13 39 2 8 7 3 1 0 14 55 2 16 1 2 4 1 15 36 2 14 3 2 4 1 16 44 2 18 2 3 4 1 17 45 2 16 1 2 4 1 18 36 2 18 1 2 99 1 19 29 1 16 1 3 3 1 20 30 2 14 1 2 2 1 :(#tab:t-datamatrix)An example of a small data matrix based on data from the U.S. General Social Survey (GSS), showing measurements of seven variables for 20 respondents in a social survey. The variables are defined as age: age in years; sex: sex (1=male; 2=female); educ: highest year of school completed; wrkstat: labour force status (1=working full time; 2=working part time; 3=temporarily not working; 4=unemployed; 5=retired; 6=in education; 7=keeping house; 8=other); life: is life exciting or dull? (1=dull; 2=routine; 3=exciting); income4: total annual family income (1=$24,999 or less; 2=$25,000–$39,999; 3=$40,000–$59,999; 4=$60,000 or more; 99 indicates a missing value); pres92: vote in the 1992 presidential election (0=did not vote or not eligible to vote; 1=Bill Clinton; 2=George H. W. Bush; 3=Ross Perot; 4=Other). The rows (moving downwards) and columns (moving left to right) of a data matrix correspond to the first two important terms: the rows to the subjects and the columns to the variables in the data. A subject is the smallest unit yielding information in the study. In the example of Table 1.1, the subjects are individual people, as they are in very many social science examples. In other cases they may instead be families, companies, neighbourhoods, countries, or whatever else is relevant in a particular study. There is also much variation in the term itself, so that instead of “subjects”, a study might refer to “units”, “elements”, “respondents” or “participants”, or simply to “persons”, “individuals”, “families” or “countries”, for example. Whatever the term, it is usually clear from the context what the subjects are in a particular analysis. The subjects in the data of Table 1.1 are uniquely identified only by a number (labelled “Id”) assigned by the researcher, as in a survey like this their names would not typically be recorded. In situations where the identities of individual subjects are available and of interest (such as when they are countries), their names would typically be included in the data matrix. A variable is a characteristic which varies between subjects. For example, Table 1.1 contains data on seven variables — age, sex, education, labour force status, attitude to life, family income and vote in a past election — defined and recorded in the particular ways explained in the caption of the table. It can be seen that these are indeed “variable” in that not everyone has the same value of any of them. It is this variation that makes collecting data on many subjects necessary and worthwhile. In contrast, research questions about characteristics which are the same for every subject (i.e. constants rather than variables) are rare, usually not particularly interesting, and not very difficult to answer. The labels of the columns in Table 1.1 (age, wrkstat, income4 etc.) are the names by which the variables are uniquely identified in the data file on a computer. Such concise titles are useful for this purpose, but should be avoided when reporting the results of data analyses, where clear English terms can be used instead. In other words, a report should not say something like “The analysis suggests that WRKSTAT of the respondents is…” but instead something like “The analysis suggests that the labour force status of the respondents is…”, with the definition of this variable and its categories also clearly stated. Collecting quantitative data involves determining the values of a set of variables for a group of subjects and assigning numbers to these values. This is also known as measuring the values of the variables. Here the word “measure” is used in a broader sense than in everyday language, so that, for example, we are measuring a person’s sex in this sense when we assign a variable called “Sex” the value 1 if the person is male and 2 if she is female. The value assigned to a variable for a subject is called a measurement or an observation. Our data thus consist of the measurements of a set of variables for a set of subjects. In the data matrix, each row contains the measurements of all the variables in the data for one subject, and each column contains the measurements of one variable for all of the subjects. The number of subjects in a set of data is known as the sample size, and is typically denoted by \\(n\\). In a survey, for example, this would be the number of people who responded to the questions in the survey interview. In Table 1.1 we have \\(n=20\\). This would normally be a very small sample size for a survey, and indeed the real sample size in this one is several thousands. The twenty subjects here were drawn from among them to obtain a small example which fits on a page. A common problem in many studies is nonresponse or missing data, which occurs when some measurements are not obtained. For example, some survey respondents may refuse to answer certain questions, so that the values of the variables corresponding to those questions will be missing for them. In Table 1.1, the income variable is missing for subjects 6 and 18, and recorded only as a missing value code, here “99”. Missing values create a problem which has to be addressed somehow before or during the statistical analysis. The easiest approach is to simply ignore all the subjects with missing values and use only those with complete data on all the variables needed for a given analysis. For example, any analysis of the data in Table 1.1 which involved the variable income4 would then exclude all the data for subjects 6 and 18. This method of “complete-case analysis” is usually applied automatically by most statistical software packages, including SPSS. It is, however, not a very good approach. For example, it means that a lot of information will be thrown away if there are many subjects with some observations missing. Statisticians have developed better ways of dealing with missing data, but they are unfortunately beyond the scope of this course. 1.2.2 Types of variables Information on a variable consists of the observations (measurements) of it for the subjects in our data, recorded in the form of numbers. However, not all numbers are the same. First, a particular way of measuring a variable may or may not provide a good measure of the concept of interest. For example, a measurement of a person’s weight from a well-calibrated scale would typically be a good measure of the person’s true weight, but an answer to the survey question “How many units of alcohol did you drink in the last seven days?” might be a much less accurate measurement of the person’s true alcohol consumption (i.e. it might have measurement error for a variety of reasons). So just because you have put a number on a concept does not automatically mean that you have captured that concept in a useful way. Devising good ways of measuring variables is a major part of research design. For example, social scientists are often interested in studying attitudes, beliefs or personality traits, which are very difficult to measure directly. A common approach is to develop attitude scales, which combine answers to multiple questions (“items”) on the attitude into one number. Here we will again leave questions of measurement to courses on research design, effectively assuming that the variables we are analysing have been measured well enough for the analysis to be meaningful. Even then we will have to consider some distinctions between different kinds of variables. This is because the type of a variable largely determines which methods of statistical analysis are appropriate for that variable. It will be necessary to consider two related distinctions: Between different measurement levels Between continuous and discrete variables Measurement levels When a numerical value of a particular variable is allocated to a subject, it becomes possible to relate that value to the values assigned to other subjects. The measurement level of the variable indicates how much information the number provides for such comparisons. To introduce this concept, consider the variables obtained as answers to the following three questions in the former U.K. General Household Survey: [1] Are you single, that is, never married? (coded as 1) married and living with your husband/wife? (2) married and separated from your husband/wife? (3) divorced? (4) or widowed? (5) [2] Over the last twelve months, would you say your health has on the whole been good, fairly good, or not good? (“Good” is coded as 1, “Fairly Good” as 2, and “Not Good” as 3.) [3] About how many cigaretters A DAY do you usually smoke on weekdays? (Recorded as the number of cigarettes) These variables illustrate three of the four possibilities in the most common classification of measurement levels: A variable is measured on a nominal scale if the numbers are simply labels for different possible values (levels or categories) of the variable. The only possible comparison is then to identify whether two subjects have the same or different values of the variable. The marital status variable [1] is measured on a nominal scale. The values of such nominal-level variables are not in any order, so we cannot talk about one subject having “more” or “less” of the variable than another subject; even though “divorced” is coded with a larger number (4) than “single” (1), divorced is not more or bigger than single in any relevant sense. We also cannot carry out arithmetical calculations on the values, as if they were numbers in the ordinary sense. For example, if one person is single and another widowed, it is obviously nonsensical to say that they are on average separated (even though \\((1+5)/2=3\\)). The only requirement for the codes assigned to the levels of a nominal-level variable is that different levels must receive different codes. Apart from that, the codes are arbitrary, so that we can use any set of numbers for them in any order. Indeed, the codes do not even need to be numbers, so they may instead be displayed in the data matrix as short words (“labels” for the categories). Using successive small whole numbers (\\(1,2,3,\\dots\\)) is just a simple and concise choice for the codes. Further examples of nominal-level variables are the variables sex, wrkstat, and pres92 in Table 1.1. A variable is measured on an ordinal scale if its values do have a natural ordering. It is then possible to determine not only whether two subjects have the same value, but also whether one or the other has a higher value. For example, the self-reported health variable [2] is an ordinal-level variable, as larger values indicate worse states of health. The numbers assigned to the categories now have to be in the correct order, because otherwise information about the true ordering of the categories would be distorted. Apart from the order, the choice of the actual numbers is still arbitrary, and calculations on them are still not strictly speaking meaningful. Further examples of ordinal-level variables are life and income4 in Table 1.1. A variable is measured on an interval scale if differences in its values are comparable. One example is temperature measured on the Celsius (Centigrade) scale. It is now meaningful to state not only that 20\\(^{\\circ}\\)C is a different and higher temperature than 5\\(^{\\circ}\\)C, but also that the difference between them is 15\\(^{\\circ}\\)C, and that that difference is of the same size as the difference between, say, 40\\(^{\\circ}\\)C and 25\\(^{\\circ}\\)C. Interval-level measurements are “proper” numbers in that calculations such as the average noon temperature in London over a year are meaningful. What we cannot do is to compare ratios of interval-level variables. Thus 20\\(^{\\circ}\\)C is not four times as warm as 5\\(^{\\circ}\\)C, nor is their real ratio the same as that of 40\\(^{\\circ}\\)C and 10\\(^{\\circ}\\)C. This is because the zero value of the Celcius scale (0\\(^{\\circ}\\)C) is not the lowest possible temperature but an arbitrary point chosen for convenience of definition. A variable is measured on a ratio scale if it has all the properties of an interval-level variable and also a true zero point. For example, the smoking variable [3] is measured on a ratio level, with zero cigarettes as its point of origin. It is now possible to carry out all the comparisons possible for interval-level variables, and also to compare ratios. For example, it is meaningful to say that someone who smokes 20 cigarettes a day smokes twice as many cigarettes as one who smokes 10 cigarettes, and that that ratio is equal to the ratio of 30 and 15 cigarettes. Further examples of ratio-level variables are age and educ in Table 1.1. The distinction between interval-level and ratio-level variables is in practice mostly unimportant, as the same statistical methods can be applied to both. We will thus consider them together throughout this course, and will, for simplicity, refer to variables on either scale as interval level variables. Doing so is logically coherent, because ratio level variables have all the properties of interval level variables, as well the additional property of a true zero point. Similarly, nominal and ordinal variables can often be analysed with the same methods. When this is the case, we will refer to them together as nominal/ordinal level variables. There are, however, contexts where the difference between them matters, and we will then discuss nominal and ordinal scales separately. The simplest kind of nominal variable is one with only two possible values, for example sex recorded as “male” or “female” or an opinion recorded just as “agree” or “disagree”. Such a variable is said to be binary or dichotomous. As with any nominal variable, codes for the two levels can be assigned in any way we like (as long as different levels get different codes), for example as 1=Female and 2=Male; later it will turn out that in some analyses it is most convenient to use the values 0 and 1. The distinction between ordinal-level and interval-level variables is sometimes further blurred in practice. Consider, for example, an attitude scale of the kind mentioned above, let’s say a scale for happiness. Suppose that the possible values of the scale range from 0 (least happy) to 48 (most happy). In most cases it would be most realistic to consider these measurements to be on an ordinal rather than an interval scale. However, statistical methods developed specifically for ordinal-level variables do not cope very well with variables with this many possible values. Thus ordinal variables with many possible values (at least more than ten, say) are typically treated as if they were measured on an interval scale. Continuous and discrete variables This distinction is based on the possible values a variable can have: A variable is discrete if its basic unit of measurement cannot be subdivided. Thus a discrete variable can only have certain values, and the values between these are logically impossible. For example, the marital status variable [1] and the health variable [2] defined under “Measurement Levels” in Section 1.2.2 are discrete, because values like marital status of 2.3 or self-reported health of 1.7 are impossible given the way the variables are defined. A variable is continuous if it can in principle take infinitely varied fractional values. The idea implies an unbroken scale or continuum of possible values. Age is an example of a continuous variable, as we can in principle measure it to any degree of accuracy we like — years, days, minutes, seconds, micro-seconds. Similarly, distance, weight and even income can be considered to be continuous. You should note the “in principle” in this definition of continuous variables above. Continuity is here a pragmatic concept, not a philosophical one. Thus we will treat age and income as continous even though they are in practice measured to the nearest year or the nearest hundred pounds, and not in microseconds or millionths of a penny (nor is the definition inviting you to start musing on quantum mechanics and arguing that nothing is fundamentally continuous). What the distinction between discrete and continuous really amounts to in practice is the difference between variables which in our data tend to take relatively few values (discrete variables) and ones which can take lots of different values (continuous variables). This also implies that we will sometimes treat variables which are undeniably discrete in the strict sense as if they were really continuous. For example, the number of people is clearly discrete when it refers to numbers of registered voters in households (with a limited number of possible values in practice), but effectively continuous when it refers to populations of countries (with very many possible values). The measurement level of a variable refers to the way a characteristic is recorded in the data, not to some other, perhaps more fundamental version of that characteristic. For example, annual income recorded to the nearest dollar is continuous, but an income variable (c.f. Table 1.1) with values if annual income is $24,999 or less; if annual income is $25,000–$39,999; if annual income is $40,000–$59,999; if annual income is $60,000 or more is discrete. This kind of variable, obtained by grouping ranges of values of an initially continuous measurement, is common in the social sciences, where the exact values of such variables are often not that interesting and may not be very accurately measured. The term categorical variable will be used in this coursepack to refer to a discrete variable which has only a finite (in practice quite small) number of possible values, which are known in advance. For example, a person’s sex is typically coded simply as “Male” or “Female”, with no other values. Similarly, the grouped income variable shown above is categorical, as every income corresponds to one of its four categories (note that it is the “rest” category 4 which guarantees that the variable does indeed cover all possibilities). Categorical variables are of separate interest because they are common and because some statistical methods are designed specifically for them. An example of a non-categorical discrete variable is the population of a country, which does not have a small, fixed set of possible values (unless it is again transformed into a grouped variable as in the income example above). Relationships between the two distinctions The distinctions between variables with different measurement levels on one hand, and continuous and discrete variables on the other, are partially related. Essentially all nominal/ordinal-level variables are discrete, and almost all continous variables are interval-level variables. This leaves one further possibility, namely a discrete interval-level variable; the most common example of this is a count, such as the number of children in a family or the population of a country. These connections are summarized in Table 1.2. Measurement level Measurement level Nominal/ordinal Interval/ratio Discrete Many Counts - Always categorical, i.e. having a fixed set of possible values (categories) - If only two categories, variable is binary (dichotomous) - If many different observed values, often treated as effectively continuous Continuous None Many :(#tab:t-vartypes)Relationships between the types of variables discussed in Section @ref(ss-intro-def-vartypes. In practice the situation may be even simpler than this, in that the most relevant distinction is often between the following two cases: Discrete variables with a small number of observed values. This includes both categorical variables, for which all possible values are known in advance, and variables for which only a small number of values were actually observed even if others might have been possible.1 Such variables can be conveniently summarized in the form of tables and handled by methods appropriate for such tables, as described later in this coursepack. This group also includes all nominal variables, even ones with a relatively large number of categories, since methods for group 2. below are entirely inappropriate for them. Variables with a large number of possible values. This includes all continuous variables and those interval-level or ordinal discrete variables which have so many values that it is pragmatic to treat them as effectively continuous. Although there are contexts where we need to distinguish between types of variables more carefully than this, for practical purposes this simple distinction is often sufficient. 1.2.3 Description and inference In the past, the subtitle of this course was “Description and inference”. This is still descriptive of the contents of the course. These words refer to two different although related tasks of statistical analysis. They can be thought of as solutions to what might be called the “too much and not enough” problems with observed data. A set of data is “too much” in that it is very difficult to understand or explain the data, or to draw any conclusions from it, simply by staring at the numbers in a data matrix. Making much sense of even a small data matrix like the one in Table 1.1 is challenging, and the task becomes entirely impossible with bigger ones. There is thus a clear need for methods of statistical description: Description: summarizing some features of the data in ways that make them easily understandable. Such methods of description may be in the form of numbers or graphs. The “not enough” problem is that quite often the subjects in the data are treated as representatives of some larger group which is our real object of interest. In statistical terminology, the observed subjects are regarded as a sample from a larger population. For example, a pre-election opinion poll is not carried out because we are particularly interested in the voting intentions of the particular thousand or so people who answer the questions in the poll (the sample), but because we hope that their answers will help us draw conclusions about the preferences of all of those who intend to vote on election day (the population). The job of statistical inference is to provide methods for generalising from a sample to the population: Inference: drawing conclusions about characteristics of a population based on the data observed in a sample. The two main tools of statistical inference are significance tests and confidence intervals. Some of the methods described on this course are mainly intended for description and others for inference, but many also have a useful role in both. 1.2.4 Association and causation The simplest methods of analysis described on this course consider questions which involve only one variable at a time. For example, the variable might be the political party a respondent intends to vote for in the next general election. We might then want to know what proportion of voters plan to vote for the Labour party, or which party is likely to receive the most votes. However, considering variables one at a time is not going to entertain us for very long. This is because most interesting research questions involve associations between variables. One way to define an association is that There is an association between two variables if knowing the value of one of the variables will help to predict the value of the other variable. (A more careful definition will be given later.) Other ways of referring to the same concept are that the variables are “related” or that there is a “dependence” between them. For example, suppose that instead of considering voting intentions overall, we were interested in comparing them between two groups of people, homeowners and people who live in rented accommodation. Surveys typically suggest that homeowners are more likely to vote for the Conservatives and less likely to vote for Labour than renters. There is then an association between the two (discrete) variables “type of accommodation” and “voting intention”, and knowing the type of a person’s accommodation would help us better predict who they intend to vote for. Similarly, a study of education and income might find that people with more education (measured by years of education completed) tend to have higher incomes (measured by annual income in pounds), again suggesting an association between these two (continuous) variables. Sometimes the variables in an association are in some sense on an equal footing. More often, however, they are instead considered asymmetrically in that it is more natural to think of one of them as being used to predict the other. For example, in the examples of the previous paragraph it seems easier to talk about home ownership predicting voting intention than vice versa, and of level of education predicting income than vice versa. The variable used for prediction is then known as an explanatory variable and the variable to be predicted as the response variable (an alternative convention is to talk about independent rather than explanatory variables and dependent instead of response variables). The most powerful statistical techniques for analysing associations between explanatory and response variables are known as regression methods. They are by far the most important family of methods of quantitative data analysis. On this course you will learn about the most important member of this family, the method of linear regression. In the many research questions where regression methods are useful, it almost always turns out to be crucially important to be able to consider several different explanatory variables simultaneously for a single response variable. Regression methods allow for this through the techniques of multiple regression. The statistical concept of association is closely related to the stronger concept of causation, which is at the heart of very many research questions in the social sciences and elsewhere. The two concepts are not the same. In particular, association is not sufficient evidence for causation, i.e. finding that two variables are statistically associated does not prove that either variable has a causal effect on the other. On the other hand, association is almost always necessary for causation: if there is no association between two variables, it is very unlikely that there is a direct causal effect between them. This means that analysis of associations is a necessary part, but not the only part, of the analysis of causal effects from quantitative data. Furthermore, statistical analysis of associations is carried out in essentially the same way whether or not it is intended as part of a causal argument. On this course we will mostly focus on associations. The kinds of additional arguments that are needed to support causal conclusions are based on information on the research design and the nature of the variables. They are discussed only briefly on this course, and at greater length on courses of research design such as MY400 (and the more advanced MY457, which considers design and analysis for causal inference together). 1.3 Outline of the course We have now defined three separate distinctions between different problems for statistical analysis, according to (1) the types of variables involved, (2) whether description or inference is required, and (3) whether we are examining one variable only or associations between several variables. Different combinations of these elements require different methods of statistical analysis. They also provide the structure for the course, as follows: Chapter 2: Description for single variables of any type, and for associations between categorical variables. Chapter 3: Some general concepts of statistical inference. Chapter 4: Inference for associations between categorical variables. Chapter 5: Inference for single dichotomous variables, and for associations between a dichotomous explanatory variable and a dichotomous response variable. Chapter 6: More general concepts of statistical inference. Chapter 7: Description and inference for associations between a dichotomous explanatory variable and a continuous response variable, and inference for single continuous variables. Chapter 8: Description and inference for associations between any kinds of explanatory variables and a continuous response variable. Chapter 9: Some additional comments on analyses which involve three or more categorical variables. As well as in Chapters 3 and 6, general concepts of statistical inference are also gradually introduced in Chapters 4, 5 and 7, initially in the context of the specific analyses considered in these chapters. 1.4 The use of mathematics and computing Many of you will approach this course with some reluctance and uncertainty, even anxiety. Often this is because of fears about mathematics, which may be something you never liked or never learned that well. Statistics does indeed involve a lot of mathematics in both its algebraic (symbolical) and arithmetic (numerical) senses. However, the understanding and use of statistical concepts and methods can be usefully taught and learned even without most of that mathematics, and that is what we hope to do on this course. It is perfectly possible to do well on the course without being at all good at mathematics of the secondary school kind. 1.4.1 Symbolic mathematics and mathematical notation Statistics is a mathematical subject in that its concepts and methods are expressed using mathematical formalism, and grounded in a branch of mathematics known as probability theory. As a result, heavy use of mathematics is essential for those who develop these methods (i.e. statisticians). However, those who only use them (i.e. you) can ignore most of it and still gain a solid and non-trivialised understanding of the methods. We will thus be able to omit most of the mathematical details. In particular, we will not show you how the methods are derived or prove theorems about them, nor do we expect you to do anything like that. We will, however, use mathematical notation whenever necessary to state the main results and to define the methods used. This is because mathematics is the language in which many of these results are easiest to express clearly and accurately, and trying to avoid all mathematical notation would be contrived and unhelpful. Most of the notation is fairly simple and will be explained in detail. We will also interpret such formulas in English as well to draw attention to their most important features. Another way of explaining statistical methods is through applied examples. These will be used throughout the course. Most of them are drawn from real data from research in a range social of social sciences. If you wish to find further examples of how these methods are used in your own discipline, a good place to start is in relevant books and research journals. 1.4.2 Computing Statistical analysis involves also a lot of mathematics of the numerical kind, i.e. various calculations on the numbers in the data. Doing such calculations by hand or with a pocket calculator would be tedious and unenlightening, and in any case impossible for all but the smallest samples and simplest methods. We will mostly avoid doing that by leaving the drudgery of calculation to computers, where the methods are implemented in statistical software packages. This also means that you can carry out the analyses without understanding all the numerical details of the calculations. Instead, we can focus on trying to understand when and why certain methods of analysis are used, and learning to interpret their results. A simple pocket calculator is still more convenient than a computer for some very simple calculations. You will also need one for this purpose in the examination, where computers are not allowed. Any such calculations required in the examination will be extremely simple to do (assuming you know what you are trying to do, of course). For more complex analyses, the exam questions will involve interpreting computer output rather than carrying out the calculations. The homework questions that follow the computer classes contain examples of both of these types of questions. The software package used in the computer classes of this course is called R/RStudio. There are other statistics packages, for example SAS, Minitab, Stata and SPSS. Any one of them could be used for the analyses on this course, and the exact choice does not matter very much. R/RStudio is convenient for our purposes, because it is free, open source, widely used, has a reasonably user-friendly menu interface, and is capable of powerful extenstions for those who wish to pursue things further. Sometimes you may see a phrase such as “R Course” used apparently as a synonym for “Statistics course”. This makes as little sense as treating an introduction to Microsoft Word as a course on how to write good English. It is not possible to learn quantitative data analysis well by just sitting down in front of R or any other statistics package and trying to figure out what all those menus are for. On the other hand, using R/RStudio to apply statistical methods to analyse real data is an effective way of strengthening the understanding of those methods after they have first been introduced in lectures. That is why this course has weekly computer classes. The software-specific questions on how to carry out statistical analyses are typically of a lesser order of difficulty once the methods themselves are reasonably well understood. In other words, once you have a clear idea of what you want to do, finding out how to do it in R/RStudio tends not to be that difficult. For example, in the next chapter we will discuss the mean, one simple tool of descriptive statistics. Suppose that you then want to calculate the mean of a variable called Age in a data set. Learning how to do this in R is then a matter of (1) finding a package and command to calculate a mean and (2) finding the part of the R output where the calculated mean of Age is reported. Instructions for steps like this for techniques covered on this course are given in the descriptions of the corresponding computer classes. There are, however, some tasks which have more to do with specific software packages than with statistics in general. For example, the fact that R/RStudio uses text commands rather than drop down menus , and the general style of those menus,need to be understood first. You also need to learn how to get data into R in the first place, how to manipulate the data in various ways, and how to export output from the analyses to other packages. Some instructions on how to do such things are given in the first computer class. The introduction to the computer classes also includes details of some SPSS guidebooks and other sources of information which you may find useful if you want to know more about the program. For example, suppose we collected data on the number of traffic accidents on each of a sample of streets in a week, and suppose that the only numbers observed were 0, 1, 2, and 3. Other, even much larger values were clearly at least logically possible, but they just did not occur. Of course, redefining the largest value as “3 or more” would turn the variable into an unambiguously categorical one.↩ "],["c-descr1.html", "Chapter 2 Descriptive statistics 2.1 Introduction 2.2 Example data sets 2.3 Single categorical variable 2.4 Two categorical variables 2.5 Sample distributions of a single continuous variable 2.6 Numerical descriptive statistics 2.7 Associations which involve continuous variables 2.8 Presentation of tables and graphs 2.9 Appendix: Country data", " Chapter 2 Descriptive statistics 2.1 Introduction This chapter introduces some common descriptive statistical methods. It is organised around two dichotomies: Methods that are used only for variables with small numbers of values, vs. methods that are used also or only for variables with many values (see the end of Section 1.2.2 for more on this distinction). The former include, in particular, descriptive methods for categorical variables, and the latter the methods for continuous variables. Univariate descriptive methods which consider only one variable at a time, vs. bivariate methods which aim to describe the association between two variables. Section 2.3 describes univariate methods for categorical variables and Section 2.4 bivariate methods for cases where both variables are categorical. Sections 2.5 and 2.6 cover univariate methods which are mostly used for continuous variables. Section 2.7 lists some bivariate methods where at least one variable is continuous; these methods are discussed in detail elsewhere in the coursepack. The chapter concludes with some general guidelines for presentation of descriptive tables and graphs in Section 2.8. 2.2 Example data sets Two examples are used to illustrate the methods throughout this chapter: Example: Country data Consider data for 155 countries on three variables: The region where the country is located, coded as 1=Africa, 2=Asia, 3=Europe, 4=Latin America, 5=Northern America, 6=Oceania. A measure of the level of democracy in the country, measured on an 11-point scale from 0 (lowest level of democracy) to 10 (highest). Gross Domestic Product (GDP) per capita, in thousands of U.S. dollars. Further information on the variables is given in the appendix to this chapter (Section 2.9), together with the whole data set, shown in Table 2.14. Region is clearly a discrete (and categorical), nominal-level variable, and GDP a continuous, interval-level variable. The democracy index is discrete; it is most realistic to consider its measurement level to be ordinal, and it is regarded as such in this chapter. However, it is the kind of variable which might in many analyses be treated instead as an effectively continuous, interval-level variable. Example: Survey data on attitudes towards income redistribution The data for the second example come from Round 5 of the European Social Survey (ESS), which was carried out in 2010.2 The survey was fielded in 28 countries, but here we use only data from 2344 respondents in the UK. Two variables are considered: Sex of the respondent, coded as 1=Male, 2=Female. Answer to the following survey question: “The government should take measures to reduce differences in income levels”, with five response options coded as “Agree strongly”=1, “Agree”=2, “Neither agree nor disagree”=3, “Disagree”=4, and “Disagree strongly”=5. This is a measure of the respondent’s attitude towards income redistribution. Both of these are discrete, categorical variables. Sex is binary and attitude is ordinal. Attitudes towards income redistribution are an example of the broader topic of public opinion on welfare state policies. This is a large topic of classic and current interest in the social sciences, and questions on it have been included in many public opinion surveys.3 Of key interest is to explore the how people’s attitudes are associated with their individual characteristics (including such factors as age, sex, education and income) and the contexts in which they live (for example the type of welfare regime adopted in their country). In section 2.4 below we use descriptive statistics to examine such associations between sex and attitude in this sample. 2.3 Single categorical variable 2.3.1 Describing the sample distribution The term distribution is very important in statistics. In this section we consider the distribution of a single variable in the observed data, i.e. its sample distribution: The sample distribution of a variable consists of a list of the values of the variable which occur in a sample, together with the number of times each value occurs. Later we will discuss other kinds of distributions, such as population, probability and sampling distributions, but they will all be variants of the same concept. The task of descriptive statistics for a single variable is to summarize the sample distribution or some features of it. This can be done in the form of tables, graphs or single numbers. 2.3.2 Tabular methods: Tables of frequencies When a variable has only a limited number of distinct values, its sample distribution can be summarized directly from the definition given above. In other words, we simply count and display the number of times each of the values appears in the data. One way to do the display is as a table, like the ones for region and the democracy index in the country data, and attitude in the survey example, which are shown in Tables 2.1, 2.2 and 2.3 respectively. Table 2.1: Frequency distribution of the region variable in the country data. Region Frequency Proportion % Africa 48 0.310 31.0 Asia 44 0.284 28.4 Europe 34 0.219 21.9 Latin America 23 0.148 14.8 Northern America 2 0.013 1.3 Oceania 4 0.026 2.6 Total 155 1.000 100.0 Table 2.2: Frequency distribution of the democracy index in the country data. Democracy score Frequency Proportion % Cumulative % 0 35 0.226 22.6 22.6 1 12 0.077 7.7 30.3 2 4 0.026 2.6 32.9 3 6 0.039 3.9 36.8 4 5 0.032 3.2 40.0 5 5 0.032 3.2 43.2 6 12 0.077 7.7 50.9 7 13 0.084 8.4 59.3 8 16 0.103 10.3 69.6 9 15 0.097 9.7 79.3 10 32 0.206 20.6 99.9 Total 155 0.999 99.9 Table 2.3: Frequency distribution of responses to a question on attitude towards income redistribution in the survey example. Response Frequency Proportion % Cumulative % Agree strongly (1) 366 0.156 15.6 15.6 Agree (2) 1090 0.465 46.5 62.1 Neither agree nor disagree (3) 426 0.182 18.2 80.3 Disagree (4) 387 0.165 16.5 96.8 Disagree strongly (5) 75 0.032 3.2 100.0 Total 2344 1.00 100.0 Each row of such a table corresponds to one possible value of a variable, and the second column shows the number of units with that value in the data. Thus there are 48 countries from Africa and 44 from Asia in the contry data set and 32 countries with the highest democracy score 10, and so on. Similarly, 366 respondents in the survey sample strongly agreed with the attitude question, and 75 strongly disagreed with it. These counts are also called frequencies, a distribution like this is a frequency distribution, and the table is also known as a frequency table. The sum of the frequencies, given on the line labelled “Total” in the tables, is the sample size \\(n\\), here 155 for the country data and 2344 for the survey data. It is sometimes more convenient to consider relative values of the frequencies instead of the frequencies themselves. The relative frequency or proportion of a category of a variable is its frequency divided by the sample size. For example, the proportion of countries from Africa in the country data is \\(48/155=0.310\\) (rounded to three decimal places). A close relative of the proportion is the percentage, which is simply proportion multiplied by a hundred; for example, 31% of the countries in the sample are from Africa. The sum of the proportions is one, and the sum of the percentages is one hundred (because of rounding error, the sum in a reported table may be very slightly different, as it is in Table 2.2). 2.3.3 Graphical methods: Bar charts Graphical methods of describing data (statistical graphics) make use of our ability to process and interpret even very large amounts of visual information. The basic graph for summarising the sample distribution of a discrete variable is a bar chart. It is the graphical equivalent of a one-way table of frequencies. Figures 2.1, 2.2 and 2.3 show the bar charts for region, democracy index and attitude, corresponding to the frequencies in Tables 2.1, 2.2 and 2.3. Each bar corresponds to one category of the variable, and the height of the bar is proportional to the frequency of observations in that category. This visual cue allows us to make quick comparisons between the frequencies of different categories by comparing the heights of the bars. Figure 2.1: Bar chart of regions in the country data. Figure 2.2: Bar chart of the democracy index in the country data. Figure 2.3: Bar chart of the attitude variable in the survey data example. Agreement with statement: ``The government should take measures to reduce differences in income levels’’. European Social Survey, Round 5 (2010), UK respondents only. Some guidelines for drawing bar charts are: The heights of the bars may represent frequencies, proportions or percentages. This only changes the units on the vertical axis but not the relative heights of the bars. The shape of the graph will be the same in each case. In Figure 2.1, the units are frequencies, while in Figures 2.2 and 2.3 they are percentages. The bars do not touch each other, to highlight the discrete nature of the variable. The bars must start at zero. It they do not, visual comparisons between their heights are distorted and the graph becomes useless. If the variable is ordinal, the bars must be in the natural order of the categories, as in Figures 2.2 and 2.3. If the variable is nominal, the order is arbitrary. Often it makes sense to order the categories from largest (i.e. the one with the largest frequency) to the smallest, possibly leaving any “Others” category last. In Figure 2.1, the frequency ordering would swap Northern America and Oceania, but it seems more natural to keep Northern and Latin America next to each other. A bar chart is a relatively unexciting statistical graphic in that it does not convey very much visual information. For nominal variables, in particular, the corresponding table is often just as easy to understand and takes less space. For ordinal variables, the bar chart has the additional advantage that its shape shows how the frequencies vary across the ordering of the categories. For example, Figure 2.2 quite effectively conveys the information that the most common values of the democracy index are the extreme scores 0 and 10. Sometimes you may see graphs which look like bar charts of this kind, but which actually show the values of a single variable for some units rather than frequncies or percentages. For example, a report on the economies of East Asia might show a chart of GDP per capita for Japan, China, South Korea and North Korea, with one bar for each country, and their heights proportional to 28.2, 5.0, 17.8 and 1.3 respectively (c.f. the data in Table 2.14). The basic idea of such graphs is the same as that of standard bar charts. However, they are not particularly useful as descriptive statistics, since they simply display values in the original data without any summarization or simplification. 2.3.4 Simple descriptive statistics Instead of the whole sample distribution, we may want to summarise only some individual aspects of it, such as its central tendency or variation. Descriptive statistics that are used for this purpose are broadly similar for both discrete and continuous variables, so they will be discussed together for both in Section 2.6. 2.4 Two categorical variables 2.4.1 Two-way contingency tables The next task we consider is how to describe the sample distributions of two categorical variables together, and in so doing also summarise the association between these variables. The key tool is a table which shows the crosstabulation of the frequencies of the variables. This is also known as a contingency table. Table 2.4 shows such a table for the respondents’ sex and attitude in our survey example. We use it to introduce the basic structure and terminology of contingency tables: Table 2.4: ``The government should take measures to reduce differences in income levels’’: Two-way table of frequencies of respondents in the survey example, by sex and attitude towards income redistribution. Data: European Social Survey, Round 5, 2010, UK respondents only. Sex Agree strongly Agree Neither agree nor disagree Disagree Disagree strongly Total Male 160 439 187 200 41 1027 Female 206 651 239 187 34 1317 Total 366 1090 426 387 75 2344 Because a table like 2.4 summarizes the values of two variables, it is known as a two-way contingency table. Similarly, the tables of single variables introduced in Section 2.3.2 are one-way tables. It is also possible to construct tables involving more than two variables, i.e. three-way tables, four-way tables, and so on. These are discussed in Chapter 9. The variables in a contingency table may ordinal or nominal (including dichotomous). Often an ordinal variable is derived by grouping an originally continuous, interval-level variable, a practice which is discussed further in Section 2.5. The horizontal divisions of a table (e.g. the lines corresponding to the two sexes in Table 2.4) are its rows, and the vertical divisions (e.g. the survey responses in Table 2.4) are its columns. The size of a contingency table is stated in terms of the numbers of its rows and columns. For example, Table 2.4 is a \\(2\\times 5\\) (pronounced “two-by-five”) table, because it has two rows and five columns. This notation may also be used symbolically, so that we may refer generically to \\(R\\times C\\) tables which have some (unspecified) number of \\(R\\) rows and \\(C\\) columns. The smallest two-way table is thus a \\(2\\times 2\\) table, where both variables are dichotomous. The intersection of a row and a column is a cell of the table. The basic two-way contingency table shows in each cell the number (frequency) of units in the data set with the corresponding values of the row variable and the column variable. For example, Table 2.4 shows that there were 160 male respondents who strongly agreed with the statement, and 239 female respondents who neither agreed nor disagreed with it. These frequencies are also known as cell counts. The row and column labelled “Total” in Table 2.4 are known as the margins of the table. They show the frequencies of the values of the row and the column variable separately, summing the frequencies over the categories of the other variable. For example, the table shows that there were overall 1027 (\\(=160+439+187+200+41\\)) male respondents, and that overall 75 (\\(=41+34\\)) respondents strongly disagreed with the statement. In other words, the margins are one-way tables of the frequencies of each of the two variables, so for example the frequencies on the margin for attitude in Table 2.4 are the same as the ones in the one-way table for this variable shown in Table 2.3. The distributions described by the margins are known as the marginal distributions of the row and column variables. In contrast, the frequencies in the internal cells of the table, which show how many units have each possible combination of the row and column variables, describe the joint distribution of the two variables. The number in the bottom right-hand corner of the table is the sum of all of the frequencies, i.e. the total sample size \\(n\\). In addition to frequencies, it is often convenient to display proportions or percentages. Dividing the frequencies by the sample size gives overall proportions and (multiplying by a hundred) percentages. This is illustrated in Table 2.5, which shows the overall proportions, obtained by dividing the frequencies in Table 2.4 by \\(n=2344\\). For example, out of all these respondents, the proportion of 0.102 (\\(=239/2344\\)) were women who neither agreed nor disagreed with the statement. The proportions are also shown for the marginal distributions: for example, 15.6% (i.e. the proportion \\(0.156=366/2344\\)) of the respondents strongly agreed with the statement. The sum of the proportions over all the cells is 1, as shown in the bottom right corner of the table. Sex Agree strongly Agree Neither agree nor disagree Disagree Disagree strongly Total Male 0.068 0.187 0.080 0.085 0.017 0.438 Female 0.088 0.278 0.102 0.080 0.015 0.562 Total 0.156 0.465 0.182 0.165 0.032 1.000 :(#tab:t-sex-attitude-pr)``The government should take measures to reduce differences in income levels’’: Two-way table of joint proportions of respondents in the survey example, with each combination of sex and attitude towards income redistribution. Data: European Social Survey, Round 5, 2010, UK respondents only. 2.4.2 Conditional proportions A two-way contingency table is symmetric in that it does not distinguish between explanatory and response variables. In many applications, however, this distinction is useful for interpretation. In our example, for instance, it is natural to treat sex as the explanatory variable and attitude towards income redistribution as the response response, and so to focus the interpretation on how attitude may depend on sex. The overall proportions are in such cases not the most relevant quantities for interpretation of a table. Instead, we typically calculate proportions within each category of the row variable or the column variable, i.e. the conditional proportions of one variable given the other. The numbers in brackets in Table 2.6 show these proportions calculated for each row of Table 2.4 (Table 2.6 also includes the actual frequencies; it is advisable to include them even when conditional proportions are of most interest, to show the numbers on which the proportions are based). In other words, these are the conditional proportions of attitude towards income redistribution given sex, i.e. separately for men and women. For example, the number 0.156 in the top left-hand corner of Table 2.6 is obtained by dividing the number of male respondents who agreed strongly with the statement (160) by the total number of male respondents (1027). Thus 15.6% of the men strongly agreed, and for example 2.6% of women strongly disagreed with the statement. The (1.0) in the last column of the table indicate that the proportions sum to 1 along each row, to remind us that the conditional proportions have been calculated within the rows. The bracketed proportions in the ‘Total’ row are the proportions of the marginal distribution of the attitude variable, so they are the same as the proportions in the ‘Total’ row of Table 2.5. Table 2.6: ``The government should take measures to reduce differences in income levels’’: Two-way table of frequencies of respondents in the survey example, by sex and attitude towards income redistribution. The numbers in brackets are proportions within the rows, i.e. conditional proportions of attitude given sex. Data: European Social Survey, Round 5, 2010, UK respondents only. Sex Agree strongly Agree Neither agree nor disagree Disagree Disagree strongly Total Male 160 439 187 200 41 1027 (0.156) (0.428) (0.182) (0.195) (0.040) (1.0) Female 206 651 239 187 34 1317 (0.156) (0.494) (0.182) (0.142) (0.026) (1.0) Total 366 1090 426 387 75 2344 (0.156) (0.465) (0.182) (0.165) (0.032) (1.0) We could also have calculated conditional proportions within the columns, i.e. for sex given attitude. For example, the proportion \\(0.563=206/366\\) of all respondents who strongly agreed with the statement are women. These, however, seem less interesting, because it seems more natural to examine how attitude varies by sex rather than how sex varies by attitude. In general, for any two-way table we can calculate conditional proportions for both the rows and the columns, but typically only one of them is used for interpretation. 2.4.3 Conditional distributions and associations Suppose that we regard one variable in a two-way table as the explanatory variable (let us denote it by \\(X\\)) and the other variable as the response variable (\\(Y\\)). In our survey example, sex is thus \\(X\\) and attitude is \\(Y\\). Here the dichotomous \\(X\\) divides the full sample into two groups, identified by the observed value of \\(X\\) — men and women. We may then think of these two groups as two separate samples, and consider statistical quantities separately for each of them. In particular, in Table 2.6 we calculated conditional proportions for \\(Y\\) given \\(X\\), i.e. for attitude given sex. These proportions describe two distinct sample distributions of \\(Y\\), one for men and one for women. They are examples of conditional distributions: The conditional distribution of a variable \\(Y\\) given another variable \\(X\\) is the distribution of \\(Y\\) among those units which have a particular value of \\(X\\). This concept is not limited to two-way tables but extends also to other kinds of variables and distributions that are discussed later in this coursepack. Both the response variable \\(Y\\) and the explanatory variable \\(X\\) may be continuous as well as discrete, and can have any number of values. In all such cases there is a separate conditional distribution for \\(Y\\) for each possible value of \\(X\\). A particular one of these distributions is sometimes referred to more explicitly as the conditional distribution of \\(Y\\) given \\(X=x\\), where the “\\(X=x\\)” indicates that \\(X\\) is considered at a particular value \\(x\\) (as in “the distribution of \\(Y\\) given \\(X=2\\)”, say). Conditional distributions of one variable given another allow us to define and describe associations between the variables. The informal definition in Section 1.2.4 stated that there is an association between two variables if knowing the value of one of them will help to predict the value of the other. We can now give a more precise definition: There is an association between variables \\(X\\) and \\(Y\\) if the conditional distribution of \\(Y\\) given \\(X\\) is different for different values of \\(X\\). This definition coincides with the more informal one. If the conditional distribution of \\(Y\\) varies with \\(X\\) and if we know \\(X\\), it is best to predict \\(Y\\) from its conditional distribution given the known value of \\(X\\). This will indeed work better than predicting \\(Y\\) without using information on \\(X\\), i.e. from the marginal distribution of \\(Y\\). Prediction based on the conditional distribution would still be subject to error, because in most cases \\(X\\) does not predict \\(Y\\) perfectly. In other words, the definition of an association considered here is statistical (or probabilistic) rather than deterministic. In our example a deterministic association would mean that there is one response given by all the men and one response (possibly different from the men’s) given by all the women. This is of course not the case here nor in most other applications in the social sciences. It is thus crucially important that we have the tools also to analyse statistical associations. In our example, sex and attitude are associated if men and women differ in their attitudes toward income redistribution. Previous studies suggest that such an association exists, and that it takes the form that women tend to have higher levels of support than men for redistribution.4 As possible explanations for this pattern, both structural reasons (women tend to have lower incomes than men and to rely more on welfare state support) and cultural or psychological ones (women are more likely than men to adopt social values of equality and caring) have been suggested. 2.4.4 Describing an association using conditional proportions Two variables presented in a contingency table are associated in the sample if the conditional distributions of one of them vary across the values of the other. This is the case in our data set: for example, 4.0% of men but 2.6% of women strongly disagree with the statement. There is thus some association between sex and attitude in this sample. This much is easy to conclude. What requires a little more work is a more detailed description of the pattern and strength of the association, i.e. how and where the conditional distributions differ from each other. The most general way of summarising associations in a contingency table is by comparing the conditional proportions of the same level of the response given different levels of the explanatory variable. There is no simple formula for how this should be done, so you should use your common sense to present comparisons which give a good summary of the patterns across the table. Unless both variables in the table are dichotomous, several different comparisons may be needed, and may not all display similar patterns. For example, in Table 2.6 the same proportion (0.156, or 15.6%) of both men and women strongly agree with the statement, whereas the proportion who respond “Agree” is higher for women (49.4%) than for men (42.8%). When the response variable is ordinal, it is often more illuminating to focus on comparisons of cumulative proportions which add up conditional proportions over two or more adjacent categories. For instance, the combined proportion of respondents who either strongly agree or agree with the statement is a useful summary of the general level of agreement among the respondents. In our example this is 58.4% (\\(=15.5\\%+42.8\\%\\)) for men but 65.0% for women. A comparison between two proportions may be further distilled into a single number by reporting the difference or ratio between them. For example, for the proportions of agreeing or strongly agreeing above, the difference is \\(0.650-0.584=0.066\\), so the proportion is 0.066 (i.e. 6.6 percentage points) higher for women than for men. The ratio of these proportions is \\(0.650/0.584=1.11\\), so the proportion for women is 1.11 times the proportion for men (i.e. 11% higher). Both of these indicate that in this sample women were more likely to agree or strongly agree with the statement than were men. In a particular application we might report a difference or a ratio like this, depending on which of them was considered more relevant or easily understandable. Other summaries are also possible; for example, on MY452 we will discuss a measure called the odds ratio, which turns out to be convenient for more general methods of analysing associations involving categorical variables. The broad conclusion in the example is that there is an association between sex and attitude in these data from the European Social Survey, and that it is of the kind suggested by existing literature. A larger proportion of women than of men indicate agreement with the statement that the government should take measures to reduce income differences, and conversely larger proportion of men disagree with it (e.g. 23.5% of men but only 16.8% of women disagree or strongly disagree). Thus in this sample women do indeed demonstrate somewhat higher levels of support for income redistribution. Whether these differences also warrant a generalisation of the conclusions to people outside the sample is a question which we will take up in Chapters 3 and 4. 2.4.5 A measure of association for ordinal variables In the previous example the explanatory variable (sex) had 2 categories and the response variable (attitude) had 5. A full examination of the individual conditional distributions of attitude given sex then involved comparisons of five pairs of proportions, one for each level of the attitude variable. This number gets larger still if the explanatory variable also has several levels, as in the following example: Example: Importance of short-term gains for investors Information on the behaviour and expectations of individual investors was collected by sending a questionnaire to a sample of customers of a U.S. brokerage house.5 One of the questions asked the respondents to state how much importance they placed on quick profits (short-term gains) as an objective when they invested money. The responses were recorded in four categories as “Irrelevant”, “Slightly important”, “Important” or “Very important”. Table 2.7 shows the crosstabulation of this variable with the age of the respondent in four age groups. Table 2.7: Importance of short-term gains: Frequencies of respondents in the investment example, by age group and attitude towards short-term gains as investment goal. Conditional proportions of attitude given age group are shown in brackets. The value of the \\(\\gamma\\) measure of association is \\(-0.377\\). Age group Irrelevant Slightly important Important Very important Total Under 45 37 45 38 26 146 (0.253) (0.308) (0.260) (0.178) (1.00) 45–54 111 77 57 37 282 (0.394) (0.273) (0.202) (0.131) (1.00) 55–64 153 49 31 20 253 (0.605) (0.194) (0.123) (0.079) (1.00) 65 and over 193 64 19 15 291 (0.663) (0.220) (0.065) (0.052) (1.00) Total 494 235 145 98 972 Here there are four conditional distributions, one for each age group, and each of them is described by four proportions of different levels of attitude. There are then many possible comparisons of the kind discussed above. For example, we might want to compare the proportions of respondents who consider short-term gains irrelevant between the oldest and the youngest age group, the proportions for whom such gains are very important between these two groups, or, in general, the proportions in any category of the response variable between any two age groups. Although pairwise comparisons like this are important and informative, they can clearly become cumbersome when the number of possible comparisons is large. A potentially attractive alternative is then to try to summarise the strength of the association between the variables in a single number, a measure of association of some kind. There are many such measures for two-way contingency tables, labelled with a range of Greek and Roman letters (e.g. \\(\\phi\\), \\(\\lambda\\), \\(\\gamma\\), \\(\\rho\\), \\(\\tau\\), V, Q, U and d). The most useful of them are designed for tables where both of the variables are measured at the ordinal level, as is the case in Table 2.7. The ordering of the categories can then be exploited to capture the strength of the association in a single measure. This is not possible when at least one of the variables is measured at the nominal level, as any attempt to reduce the patterns of the conditional probabilities into one number will then inevitably obscure much of the information in the table. It is better to avoid measures of association defined for nominal variables, and to describe their associations only through comparisons of conditional probabilities as described in the previous section. Here we will discuss only one measure of association for two-way tables of ordinal variables. It is known as \\(\\gamma\\) (“gamma”). It characterises one possible general pattern of association between two ordinal variables, namely the extent to which high values of one variable tend to be associated with high or low values of the other variable. Here speaking of “low” and “high” values, or of “increasing” or “decreasing” them, is meaningful when the variables are ordinal. For example, in Table 2.7 the categories corresponding to the bottom rows and right-most columns are in an obvious sense “high” values of age and importance respectively. Consider the conditional proportions of importance given age group shown in Table 2.7. It is clear that, for example, the proportion of respondents for whom short-term gains are very important is highest in the youngest, and lowest in the oldest age group. Similarly, the proportion of respondents for whom such gains are irrelevant increases consistently from the youngest to the oldest group. In other words, respondents with high values of the explanatory variable (age group) tend to have low values the response variable (importance of short-term gains). Such an association is said to be negative. A positive association would be seen in a table where high values of one variable were associated with high values of the other. Measures of association for summarising such patterns are typically based on the numbers of concordant and discordant pairs of observations. Suppose we compare two units classified according to the two variables in the table. These units form a concordant pair if one of them has a higher value of both variables than the other. For example, consider two respondents in Table 2.7, one with values (Under 45; Irrelevant) and the other with (45–54; Important). This is a concordant pair, because the second respondent has both a higher value of age group (45–54 vs. Under 45) and a higher value of the importance variable (Important vs. Irrelevant) than the first respondent. In contrast, in a discordant pair one unit has a higher value of one variable but a lower value of the other variable than the other unit. For example, a pair of respondents with values (45–54; Very important) and (55–64; Irrelevant) is discordant, because the latter has a higher value of age group but a lower value of the importance variable than the former. Pairs of units with the same value of one or both of the variables are known as tied pairs. They are not used in the calculations discussed below. The \\(\\gamma\\) measure of association is defined as \\[\\begin{equation}\\gamma=\\frac{C-D}{C+D} \\label{eq:gamma}\\end{equation}\\] where \\(C\\) is the total number of concordant pairs in the table, and \\(D\\) is the number of discordant pairs. For Table 2.7, the value of this is \\(\\gamma=-0.377\\). Calculation of \\(C\\) and \\(D\\) is straightforward but tedious and uninteresting, and can be left to a computer. Remembering the exact form of ((??)) is also not crucial. More important than the formula of \\(\\gamma\\) (or any other measure of association) is its interpretation. This can be considered on several levels of specificity, which are discussed separately below. The discussion is relatively detailed, as these considerations are relevant and useful not only for \\(\\gamma\\), but also for all other measures of association in statistics. The sign of the statistic: It can be seen from ((??)) that \\(\\gamma\\) is positive (greater than zero) when there are more concordant pairs than discordant ones (i.e. \\(C&gt;D\\)), and negative when there are more discordant than concordant pairs (\\(C&lt;D\\)). This also implies that \\(\\gamma\\) will be positive when the association is positive in the sense discussed above, and negative when the association is negative. A value of \\(\\gamma=0\\) indicates a complete lack of association of this kind. In Table 2.7 we have \\(\\gamma=-0.377\\), indicating a negative association. This agrees with the conclusion obtained informally above. The extreme values of the statistic: Clearly \\(\\gamma=1\\) if there are no discordant pairs (\\(D=0\\)), and \\(\\gamma=-1\\) if there are no concordant pairs (\\(C=0\\)). The values \\(\\gamma=-1\\) and \\(\\gamma=1\\) are the smallest and largest possible values of \\(\\gamma\\), and indicate the strongest possible levels of negative and positive association respectively. More generally, the closer \\(\\gamma\\) is to \\(-1\\) or 1, the stronger is the (negative or positive) association. The formal interpretation of the statistic: This refers to any way of interpreting the value more understandably than just vaguely as a measure of “strength of association”. Most often, such an intepretation is expressed as a proportion of some kind. For \\(\\gamma\\), this is done using a principle known as Proportional reduction of error (PRE). Because the PRE idea is also used to interpret many other measures of association in statistics, we will first describe it in general terms which are not limited to \\(\\gamma\\). Suppose we consider an explanatory variable \\(X\\) and a response variable \\(Y\\), and want to make predictions of the values of \\(Y\\) in a data set. This is done twice, first in a way which makes no use of \\(X\\), and then in a way which predicts the value of \\(Y\\) for each unit using information on the corresponding value of \\(X\\) and on the strength and direction of the association between \\(X\\) and \\(Y\\). Recalling the connection between association and prediction, it is clear that the second approach should result in better predictions if the two variables are associated. The comparison also reflects the strength of the association: the stronger it is, the bigger is the improvement in prediction gained by utilising information on \\(X\\). A PRE measure describes the size of this improvement. Suppose that the magnitude or number of errors made in predicting the values of \\(Y\\) in a data set using the first scheme, i.e. ignoring information on \\(X\\), is somehow measured by a single number \\(E_{1}\\), and that \\(E_{2}\\) is the same measure of errors for the second prediction scheme which makes use of \\(X\\). The difference \\(E_{1}-E_{2}\\) is thus the improvement in prediction achieved by the second scheme over the first. A PRE measure of association is the ratio \\[\\begin{equation}\\text{PRE}= \\frac{E_{1}-E_{2}}{E_{1}}, \\label{eq:PRE}\\end{equation}\\] i.e. the improvement in predictions as a proportion of the number of errors \\(E_{1}\\) under the first scheme. This formulation is convenient for interpretation, because a proportion is easily understandable even if \\(E_{1}\\) and \\(E_{2}\\) themselves are expressed in some unfamiliar units. The smallest possible value of ((??)) is clearly 0, obtained when \\(E_{2}=E_{1}\\), i.e. when using information on \\(X\\) gives no improvement in predictions. The largest possible value of PRE is 1, obtained when \\(E_{2}=0\\), i.e. when \\(Y\\) can be predicted perfectly from \\(X\\). The values 0 and 1 indicate no association and perfect association respectively. The \\(\\gamma\\) statistic is a PRE measure, although with a somewhat convoluted explanation. Suppose that we consider a pair of observations which is known to be either concordant or discordant (the PRE interpretation of \\(\\gamma\\) ignores tied observations). One of the two observations thus has a higher value of \\(X\\) than the other. For example, suppose that we consider two respondents in Table 2.7 from different age groups. We are then asked to predict the order of the values of \\(Y\\), i.e. which of the two units has the higher value of \\(Y\\). In the example of Table 2.7, this means predicting whether the older respondent places a higher or lower level of importance on short-term gains than the younger respondent. Two sets of predictions are again compared. The first approach makes the prediction at random and with equal probabilities, essentially tossing a coin to guess whether the observation with the higher value of \\(X\\) has the higher or lower value of \\(Y\\). The second prediction makes use of information on the direction of the association between \\(X\\) and \\(Y\\). If the association is known to be negative (i.e. there are more discordant than concordant pairs), every pair is predicted to be discordant; if it is positive, every pair is predicted to be concordant. For example, in Table 2.7 the association is negative, so we would always predict that the older of two respondents places a lower value of importance on short-term gains. If these predictions are repeated for every non-tied pair in the table, the expected number of incorrect predictions under the first scheme is \\(E_{1}=(C+D)/2\\). Under the second scheme it is \\(E_{2}=D\\) if the association is positive and \\(E_{2}=C\\) if it is negative. Substituting these into the general formula ((??)) shows that the \\(\\gamma\\) statistic ((??)) is of the PRE form when \\(\\gamma\\) is positive; when it is negative, the absolute value of \\(\\gamma\\) (i.e. its value with the minus sign omitted) is a PRE measure, and the negative sign of \\(\\gamma\\) indicates that the association is in the negative direction. In our example \\(\\gamma=-0.377\\), so age and attitude are negatively associated. Its absolute value \\(0.377\\) shows that we will make 37.7% fewer errors if we predict for every non-tied pair that the older respondent places less importance on short-term gains, compared to predictions made by tossing a coin for each pair. The final property of interest is the substantive interpretation of the strength of association indicated by \\(\\gamma\\) for a particular table. For example, should \\(\\gamma=-0.377\\) for Table 2.7 be regarded as evidence of weak, moderate or strong negative association between age and attitude? Although this is usually the most (or only) interesting part of the interpretation, it is also the most difficult, and one to which a statistician’s response is likely to be a firm “it depends”. This is because the strength of associations we may expect to observe depends on the variables under consideration: a \\(\\gamma\\) of 0.5, say, might be commonplace for some types of variables but never observed for others. Considerations of the magnitude of \\(\\gamma\\) are most useful in comparisons of associations between the same two variables in different samples or groups. For example, in Chapter 9 we will calculate \\(\\gamma\\) for the variables in Table 2.7 separately for men and women (see Table 9.4). These turn out to be very similar, so the strength of the association appears to be roughly similar in these two groups. Three further observations complete our discussion of \\(\\gamma\\): Since “high” values of a variable were defined as ones towards the bottom and right of a table, reversing the order in which the categories are listed will also reverse the interpretation of “high” and “low” and of a “negative” or “positive” association. Such a reversal for one variable will change the sign of \\(\\gamma\\) but not its absolute value. For example, in Table 2.7 we could have listed the age groups from the oldest to the youngest, in which case we would have obtained \\(\\gamma=0.377\\) instead of \\(\\gamma=-0.377\\). Reversing the ordering of both of the variables will give the same value of \\(\\gamma\\) as when neither is reversed. The nature and interpretation of the association remain unchanged in each case. \\(\\gamma\\) can also be used when one or both of the variables are dichotomous, but not when either is nominal and has more than two categories. If, for example, the table includes a nominal variable with four categories, there are 24 different and equally acceptable ways of ordering the categories, each giving a different value of \\(\\gamma\\) (or rather 12 different positive values and their negatives). An interpretation of the value obtained for any particular ordering is then entirely meaningless. \\(\\gamma\\) can also be treated as an estimate of the corresponding measure of association in a population from which the observed table is a sample. To emphasise this, the symbol \\(\\hat{\\gamma}\\) is sometimes used for the sample statistic we have discussed here, reserving \\(\\gamma\\) for the population parameter. It is then also possible to define significance tests and confidence intervals for the population \\(\\gamma\\). These are given, for example, in SPSS output for two-way tables. Here, however, we will not discuss them, but will treat \\(\\gamma\\) purely as a descriptive measure of association. Statistical inference on associations for two-way tables will be considered only in the context of a different test, introduced in Chapter 4. 2.5 Sample distributions of a single continuous variable 2.5.1 Tabular methods A table of frequencies and proportions or percentages is a concise and easily understandable summary of the sample distribution of a categorical variable or any variable for which only a small number of different values have been observed. On the other hand, applying the same idea to a continuous variable or a discrete variable with many different values is likely to be less useful, because all of the individual frequencies may be small. For example, in this section we illustrate the methods using the GDP variable in the country data introduced at the beginning of Section 2.2. This has 99 different values among the 155 countries, 66 of these values appear only once, and the largest frequency (for 0.8) is five. A frequency table of these values would be entirely unenlightening. GDP   \\ (thousands of dollars) Frequency % less than 2.0 49 31.6 2.0–4.9 32 20.6 5.0–9.9 29 18.7 10.0–19.9 21 13.5 20.0–29.9 19 12.3 30.0 or more 5 3.2 Total 155 99.9 ———————————- : (#tab:t-gdp)Frequency distribution of GDP per capita in the country data. Instead, we can count the frequencies for some intervals of values. Table 2.8 shows an example of this for the GDP variable. The frequency on its first line shows that there are 49 countries with GDP per capita of less than $2000, the second line that there are 32 countries with the GDP per capita between $2000 and $4900 (these values included), and so on. We have thus in effect first created an ordinal categorical variable by grouping the original continuous GDP variable, and then drawn a frequency table of the grouped variable in the same way as we do for categorical variables. Some information about the distribution of the original, ungrouped variable will be lost in doing so, in that the exact values of the observations within each interval are obscured. This, however, is a minor loss compared to the benefit of obtaining a useful summary of the main features of the distribution. The intervals must be mutually exclusive, so that no value belongs to more than one interval, and exhaustive, so that all values in the data belong to some interval. Otherwise the choice is arbitrary, in that we can choose the intervals in any way which is sensible and informative. Often this is a question of finding the right balance between too few categories (losing too much of the original information) and too many categories (making the table harder to read). 2.5.2 Graphical methods Histograms Figure 2.4: Histogram of GDP per capita in the country data, together with the corresponding frequency polygon. A histogram is the graphical version of a frequency table for a grouped variable, like that in Table 2.8. Figure 2.4 shows a histogram for the GDP variable (the histogram consists of the bars; the lines belong to a different graph, the frequency polygon explained below). The basic idea of a histogram is very similar to that of the bar chart, except that now the bars touch each other to emphasise the fact that the original (ungrouped) variable is considered continuous. Because the grouped variable is ordinal, the bars of a histogram must be in the correct order. A good choice of the grouping intervals of the variable and thus the number of bars in the histogram is important for the usefulness of the graph. If there are too few bars, too much information is obscured; if too many, the shape of the histogram may become confusingly irregular. Often the number of intervals used for a histogram will be larger than what would be sensible for a table like 2.8. Furthermore, intervals like those in Table 2.8 are not even allowed in a histogram, because they are of different widths (of 2, 3, 5, 10 and 10 units for the first five, and unbounded for the last one). The intervals in a histogram must be of equal widths, because otherwise the visual information in it becomes distorted (at least unless the histogram is modified in ways not discussed here). For example, the intervals in Figure 2.4 (less than 2.5, 2.5–less than 5.0, 5.0–less than 7.5 etc.) are all 2.5 units wide. The exact choice can usually be left to computer packages such as SPSS which use automatic rules for choosing sensible intervals. Frequency polygons Figure 2.4 also shows a frequency polygon of the GDP variable. This is obtained by drawing lines to connect the mid-points of the tops of the bars in a histogram. At each end of the histogram the lines are further connected to zero, as if the histogram had additional bars of zero height to the left and right of the smallest and largest observed categories. The result is a curve with a similar shape as the corresponding histogram, and its interpretation is similar to that of the histogram. A histogram is usually preferable to a frequency polygon for presenting a single distribution, especially since histograms are typically much easier to produce in standard software such as SPSS. However, frequency polygons will later be useful for making comparisons between several distributions. Stem and leaf plots A stem and leaf plot is a close relative of the histogram, and is used for much the same purposes, mostly in small data sets. It is easiest to explain through an example, so let us consider the GDP variable again. The stem and leaf plot for it is shown in Figure 2.9. First, note that the values of the variable in the sample (from $500 to $37800, recorded as 0.5 to 37.8 thousands of dollars) have at most three significant digits. If the observations have too many digits to be convenient for a stem and leaf plot, they can be rounded first; for example, if the GDP figures had actually been recorded down to the last dollar, we would have rounded them to the nearest hundred dollars (as in Table 2.14) for the plot. The last digit (here hundreds of dollars) will determine the leaves for the plot, while other digits (here round thousands of dollars) will define the stem. Table 2.9: Stem and leaf plot of GDP per capita in the country data (Stem=thousands of dollars, Leaf=hundreds of dollars). 0 5566677778888899 1 0001112233334445566677788899999 2 1122234556799 3 02334579 4 00013567889 5 014588 6 0013334779 7 002466 8 9 9 000159 10 267 11 12448 12 38 13 139 14 15 7 16 9 17 8 18 0 19 0028 20 0 21 56 22 0 23 247 24 25 26 78 27 4667 28 26 29 0168 30 0 31 1 32 7 33 34 35 36 37 88 The left-hand column in 2.9 lists the stem values in the data, from smallest (0) to the largest (37). Each data value with the same stem is represented on the same line by its leaf, i.e. its last digit. Thus the smallest value, 0.5 for Sierra Leone, is shown as a leaf “5” on the “0” stem, East Timor (another 0.5) as another “5” next to it, and so on up to the largest value 37.8 for Norway, shown as an “8” leaf on the “37” stem. The stem and leaf plot is very similar to a histogram (try turning Figure 2.9 on its side, and compare to Figure 2.4). It has the additional advantage that it also shows the actual numerical values of the observations. In some rather special cases this can reveal additional features of the data. Consider, for example, the plot shown in Figure 2.10. The variable here is the number of hours 86 respondents in a social survey (a small subset of all the respondents, drawn purely for this illustration) reported their spouse worked in the previous week. An obvious feature of the plot is the prevalence of zeroes as the leaves, especially the many observations with 40 reported hours. This suggests that most respondents probably did not carefully recall and add up the exact hours their spouses worked the previous week; instead, a round “40” is likely to be effectively a synonym for “my spouse has a regular nine-to-five job”. Such digit preference is quite common for many variables in surveys, and serves as a reminder that our measurements are not always as precise as they may appear. Table 2.10: Stem and leaf plot of the reported hours worked last week by the spouses of respondents in a social survey (the data are a sample from data from the U.S. General Survey; observations with less than 12 reported hours have been excluded). The stems and leaves indicate tens of hours and single hours respectively. The main disadvantage of a stem and leaf plot is that since every data value is shown separately, the plot can only be used when the sample size is relatively small. In such cases it is, however, a very useful and user-friendly graph. Also, “small” does not mean “tiny”. For example, the country data set has as many as \\(n=155\\) observations, yet Figure 2.9 is still quite readable and fits on a single page. 1 55 2 0000000555 3 00002222556889 4 000000000000000000000000000000255556888 5 000000355 6 000000555 7 022 Figure 2.5: An annotated box plot of GDP per capita in the country data (\\(n=155\\)). Box plots A box plot differs from the graphs discussed so far in that it does not attempt to display the whole distribution, but only certain characteristics of it. The quantities included in a box plot are some of the summary statistics defined in Section 2.6. To introduce the idea, one box plot is shown in Figure 2.5. The variable considered here is again GDP per capita. The vertical axis shows possible values of the variable, and the plot itself contains the following elements: The line inside the central box is the median of the variable. Here it is 4.7. The end points of the box are the first and third quartile of the variable, here 1.7 and 11.4 respectively. The length of the box is thus the interquartile range (IQR), here \\(\\text{IQR}=11.4-1.7=9.7\\). The range of values covered by the box contains the middle 50% of the observations. Half of the countries in this sample have GDPs between $1700 and $11400. The two lines extending from the box on either side are known as the whiskers. Their length is determined as follows: Calculate the value of 1.5 times the IQR. This is the maximum length of each whisker. Here this is \\(1.5\\times 9.7=14.6\\) The lower whisker extends to the smallest value (minimum) of the variable in the sample, or to the smallest value which is at most 1.5\\(\\times\\)IQR units below the first quartile, whichever is larger. Here the minimum is 0.5, which is less than 14.6 units below the first quartile of 1.7, so the lower whisker ends at 0.5. The upper whisker extends to the largest value (maximum) in the sample, or to the largest value which is at most 1.5\\(\\times\\)IQR units above the third quartile, whichever is smaller. Here the maximum is 37.8, which is further than the maximum distance of 14.6 above the third quartile of 11.4 allowed for a whisker. Thus the upper whisker could be drawn at most to \\(11.4+14.6=26\\). In this sample there are actually no observations of exactly 26, so the whisker ends at the next smallest observed value, which is 23.7. If the mimimum is further than 1.5\\(\\times\\)IQR below the first quartile, or maximum further than 1.5\\(\\times\\)IQR above the third quartile, there are still observations which are not in the range spanned by the box and the whiskers. Such extreme observations are considered outliers in the plot. The values for each outlier are plotted separately as points. Here there are 15 different outlying values, all with large values of the variable (because in two cases two countries have the same value, these 15 points actually represent 17 countries). A box plot thus shows some of the main features of a distribution with the following visual cues: The central line shows a central value (the median) of the distribution. The box shows the location of the central bulk (middle 50%) of the observations The whiskers show the range of the regular (non-outlying) observations. Very extreme values (outliers), if any, are shown individually. This can be quite effective for summarizing a distribution. For example, a box plot where the median line is not roughly in the middle of the box, or where one of the whiskers is much longer than the other, indicates that the sample distribution is skewed in the direction of the longer half of the box and the longer whisker. Here the distribution of GDP per capita is clearly positively skewed, as we have already observed. However, for a single distribution all such information and more can also be obtained from a histogram. It is instead for comparisons of distributions between two or more samples that box plots are particularly convenient, because it is easy to place two or more of them side by side. This will be illustrated later in Section 7.2.1. Other graphs for single variables Other types of graphs that are not described here are also sometimes used for displaying distributions. One of them is a pie chart, which shows the proportions of the levels of a categorical (or grouped continuous) variable as sectors of a circle. The relative area of a sector indicates the proportion of the category. We will not discuss pie charts further here, because we do not find them particularly useful (the same information can usually be presented more clearly in a table, bar chart or histogram). That, however, is partly a matter of taste, and there is nothing inherently wrong with (clearly and properly presented) pie charts. 2.6 Numerical descriptive statistics The tabular and graphical methods discussed above aim to display the whole sample distribution of a variable in an understandable form. The methods introduced in this section have a different purpose. Each of them is used to summarize some important single feature of the distribution in one number. In general, any such number calculated from the data is called a statistic. When it is used for data description, it is a descriptive statistic, also known as a summary statistic. This creates some terminological confusion, as the phrase “descriptive statistics” can mean either all statistical methods used for description or those statistics (i.e. numbers calculated from the data) with a descriptive purpose. The difference is usually unimportant or clear from the context. The two salient features of a distribution for which we will define descriptive statistics are its central tendency and its variation. 2.6.1 Measures of central tendency If you were allowed to know only one feature of the sample distribution of a variable, chances are you would ask for something like its most typical value, the middle value, or the average value — in short, you would be interested in a measure of central tendency. We will discuss three such measures below: the mode, the median and the mean (corresponding, respectively, to the phrases “most typical”, “middle” and “average” used above). The mode The mode is the value of the variable which occurs most often in the data, i.e. the one with the highest frequency. For example, Tables 2.1 and 2.2 show that the mode of the region variable in the country data is “Africa” and the mode of the democracy score is 0. The GDP variable has two modes, 0.8 and 1.9, which both appear five times (a distribution can have several modes; one with two modes is said to be bimodal). The mode can be used for variables of any measurement level. For nominal variables it is the only available measure of central tendency, as the median and the mean are not appropriate for such variables. The mode does not need to be a central value in the sense that it can even be the largest or smallest value of the variable, if this occurs most often. This is the case for the democracy index in our example. The mode is most useful for categorical variables, where the number of possible values is small, and the most common value thus has a high frequency. With continuous variables (like GDP) and discrete variables with many different values, the mode may be unstable and misleading. For example, it is perfectly possible that all but one value appear once each in a sample, and the mode is the value which happens to occur twice. The median Suppose that the values of a variable in a sample are first ordered from the smallest to the largest. For example, in Table 2.14 the countries are ordered in this way according to their GDP (starting from the bottom of the table). The median is the value which falls in the middle of this ordering, so that it divides the observed values into two halves. Because this requires a meaningful ordering of the values, the median is appropriate only for ordinal and interval-level variables, but not for nominal ones. More specifically, suppose that there are \\(n\\) observations, indexed from 1 for the smallest to \\(n\\) for the largest. The index of the middle observation is then \\((n+1)/2\\). If \\(n\\) is an odd number, the median is simply the observation in the ordered sample with this index. If \\(n\\) is even, \\((n+1)/2\\) falls between two whole numbers, and the median is the mean (of the kind defined below) of the observations with these two indices. For example, in the country data set \\(n=155\\) (an odd number), and \\((n+1)/2=78\\), so the median is the value of the 78th observation in the ordered sample; if instead there had been \\(n=156\\) countries, \\((n+1)/2=78.5\\), so the median would have been the mean of the 78th and 79th observations. In the country data set the median of the democracy score is 6, and the median GDP is $4700 (the 78th observation in GDP order is Paraguay). In practice these are of course found using a a computer package like SPSS. For an ordinal categorical variable like the democracy score the median can also be found easily from the frequency table by considering the cumulative percentages (or proportions) of the categories. These are obtained by adding up the percentages up to and including each category, as shown in the last column of Table 2.2. The median is then the category in which the cumulative percentage reaches or passes 50%. For the democracy index this happens for the score of 6, which has a cumulative percentage of 50.9%. The mean The mean is the best-known and most widely used measure of central tendency. It is also known as the average. To define the mean, we need to introduce our first pieces of mathematical notation. Suppose first that the variable of interest is denoted by \\(Y\\). In practice the variable is of course called something else, like GDP or Age or Income, but in the formulas below it is much more convenient to refer to any such variable generically by one letter (note also that the choice of the letter itself is arbitrary; for example, you may often see \\(X\\) used instead of \\(Y\\) when the mean is defined). Individual observations of \\(Y\\) are denoted generically by \\(Y_{i}\\), where the subscript \\(i\\) identifies a single subject. The values of \\(i\\) range from \\(1\\) to \\(n\\), so all of the observations in the sample are \\(Y_{1}, Y_{2}, \\dots, Y_{n}\\), e.g. in the country example (with \\(n=155\\)) \\(Y_{1}, Y_{2}, \\dots, Y_{155}\\). The ordering of the observations is arbitrary here, so it might for example be the order in which they are listed in your SPSS data file. The mean \\(\\bar{Y}\\) (“Y-bar”) of the observations of \\(Y\\) in the sample is defined as \\[\\begin{equation}\\bar{Y} = \\frac{\\sum Y_{i}}{n}. \\label{eq:Ybar}\\end{equation}\\] Here \\(n\\) is again the sample size. The symbol \\(\\Sigma\\) (upper-case Greek letter “Sigma”) is a summation symbol, which indicates that we calculate the sum of all \\(Y_{i}\\) (often this is stated more explicitly by the notation \\(\\sum_{i} Y_{i}\\) or \\(\\sum_{i=1}^{n} Y_{i}\\) to make it clear that the summation is over all the values of \\(i\\)). In other words, ((??)) is a concise expression for \\[\\bar{Y}= \\frac{Y_{1}+Y_{2}+\\dots+Y_{n}}{n}\\] or, in English, “calculate the sum of all the observations of the variable \\(Y\\) in the sample, and divide this sum by the number of observations to obtain the mean of \\(Y\\) in the sample”. For example, for GDP per capita this calculation gives \\[\\bar{Y}= \\frac{37.8+37.8+32.7+\\dots+0.6+0.5+0.5}{155} =\\frac{1335.1}{155}=8.6\\] (rounded to one decimal place), i.e. mean GDP among these countries is about $8600. Because the mean requires arithmetical calculations (summation and division) on the observations \\(Y_{i}\\), it is strictly speaking only appropriate for interval level variables, but not for ordinal ones, for which the numbers of the categories are ordered labels rather than real numbers. However, it is common to see this instruction ignored and means calculated for ordinal variables, especially when they have a large number of categories (see also the discussion under “Measurement Levels” in Section 1.2.2). For example, the mean democracy score in our sample (using the codes 0–10 as its values) is 5.4. This may be used as a summary of the central tendency of the variable, but it should not be overinterpreted as its meaning is not quite as clear as that of, say, mean GDP. For interval level variables the mean is by far the most commonly used measure of central tendency. It does, however, have one arguably undesirable feature. This is illustrated by the statistics for the GDP variable, as shown in Table 2.11. Its mean ($8600) is clearly much larger than the median ($4700). This is due to the shape of the distribution of GDP, as revealed by Figure 2.4 or even more clearly by the stem and leaf plot of Figure 2.9. While most of the countries are concentrated around a fairly narrow range of GDPs, there are also a number of countries with much larger GDPs. The ranges of values in the small and large ends of the values in a distribution are (for fairly obvious visual reasons) called the tails of the distribution. A distribution with a (much) longer tail in one end than the other is said to be skewed. A distribution like that of GDP in Figure 2.4, with its long tail towards the large values, is said to be skewed to the right or positively skewed. A distribution shown in panel A of Figure 2.6 is skewed to the left (negatively skewed): while the examination marks of most students are relatively high, there are some students with very low marks. A distribution which has no clear skewness in either direction, like the distribution of typical weekly working hours in panel B of Figure 2.6 is (approximately) symmetric. Table 2.11: Summary statistics for the three variables in the country data. IQR=interquartile range; s.d.=standard deviation; *: inappropriate for a nominal variable; \\(\\dagger\\): if the democracy index is treated as an interval-level variable. Measures of central tendency Measures of variation Variable Mode Median Mean Range IQR s.d. Region Africa * * * * * Democracy index 0 6 5.4\\(^{\\dagger}\\) 10\\(^{\\dagger}\\) 8\\(^{\\dagger}\\) 3.9\\(^{\\dagger}\\) GDP per capita $800 and $1900 $4700 $8600 $37300 $9700 $9450 Figure 2.6: Examples of a negatively skewed and an approximately symmetric sample distribution. Panel A shows the distribution of examination marks for MY451 (2003; \\(n=419\\)), and B shows the distribution of the number of hours a person usually works in their main job in the 3 per cent Individual Sample of Anonymized Records from the 2001 U.K. Census (\\(n=867,016\\), respondents with hours 0 or not applicable omitted) Source of the data for panel B: Cathie Marsh Centre for Census and Survey Research, University of Manchester, http://www.ccsr.ac.uk/sars/. The mean is almost always further in the direction of skewness than the median. That is why the mean of the positively skewed GDP variable is larger than its median. In general, a comparison between the two statistics will reveal the direction of any skewness, and give an indication of its magnitude. When the difference is large, as it is here, it is typically sensible to report both the mean and the median. The mean is sensitive even to individual observations far in the tails of the distribution. Such observations, which are very different (much larger or smaller) from the rest of the data, are known as outliers. Even a single outlier can, if it is extreme enough, pull the mean far towards itself, even beyond the range of all the other observations, as in the following example: Example: A sample with an outlier Suppose that an M.Sc. student, preparing her dissertation on elements of social capital in Canada, is examining various measures of community activities in a sample of fourty municipalities in the province of Manitoba.6 As part of an initial description of these communities, she wants to summarize their populations, which are 5, 79, 143, 226, 303, 317, 384, 417, 448, 505, 524, 525, 538, 619, 621, 629, 637, 760, 801, 906, 955, 959, 964, 1047, 1111, 1152, 1457, 1491, 1722, 1907, 2079, 2405, 2723, 3950, 4012, 4032, 4183, 4427, 12602, 619544. The outlier in this case is the city of Winnipeg, whose population of nearly 620,000 is 49 times as large as that of the next largest municipality in the sample. With it included in the sample, the mean population of the 40 municipalities is about 17000; without it, the mean for the other 39 is 1600. The two numbers give rather different pictures of the size of an “average” community in the data (similar differences would probably be observed for other variables too, so the large city would be an outlier in many respects in a study like this). The median, on the other hand, is 906 for the 39 smaller communities, and 930.5 with Winnipeg included. It is thus essentially unaffected by the outlier, basically because it is only influenced by the fact that 619,554 is bigger than the mid-point of the data, but not by how much bigger it is. 2.6.2 Measures of variation A measure of central tendency is not a complete summary of a distribution, in that there can be distributions which have the same central tendency but which are different in some other respect. To illustrate this with a hypothetical example, suppose we are studying the students in three classrooms of the same grade at a local school. Each class has 14 students, and all students have just taken the same test, graded 1 (low) to 10 (high). The marks of the students are found to be as shown in Table 2.12. Both the mean and the median of the marks are 6 in every class. However, the classes are otherwise clearly not similar. In particular, the variation (or dispersion) of the marks is very different. There is no variation at all in Class 1 where everyone has the same score, and quite a lot of variation in Class 3, while Class 2 seems to fall between the two. To capture this, some measure of variation will be needed. Three such measures are described here. All of them stricly speaking require the variable to be measured at an interval level, because they involve calculations of differences between its values. Using them on an ordinal variable is thus subject to similar cautions as for the mean above. These measures of variation are entirely inappropriate for nominal-level variables. There are some measures which can be used for such variables, but they are not described here. Table 2.12: A hypothetical examples of test marks of students in three classes. Class 1: 6 6 6 6 6 6 6 6 6 6 6 6 6 6 Class 2: 4 4 5 5 5 6 6 6 6 7 7 7 8 8 Class 3: 1 2 2 3 4 4 4 8 8 9 9 10 10 10 Range The range of a variable is simply the difference between its largest and smallest observed values (the maximum and minimum in statistical terminology). In the class example above, Class 1: Range \\(= 6-6 =0\\) Class 2: Range \\(= 8-4 =4\\) Class 3: Range \\(= 10-1 =9\\) The measure is largest for Class 3 and smallest for Class 1, so it seems to capture the differences in variation suggested by an initial look at the numbers themselves. For Class 1 the range is 0, because all of the observations are the same. In general, any sensible measure of variation should be zero when there is no variation (all observations are identical), and all of the measures described here have that property. In the country data, the range of GDP is $37800-$500=$37300, and the range of the democracy score (if we cautiously treat it as an interval-level variable) is 10-0=10. Interquartile range The range is often not a particularly useful measure of variation, because it depends only on the two extremes of the data. It is thus very sensitive to outliers. If, for example, there is one large outlier, the range will be large even if all of the other observations are very similar to each other. One way to reduce the effects of outliers is to ignore the tails of the distribution and consider the variation only among the central range of the data. This idea is expressed in the Interquartile range. First we have to define the quartiles: The first quartile is the value such that 25% (one quarter) of the observations are smaller than (or equal to) it, and 75% (three quarters) bigger than (or equal to) it. The third quartile is the value such that 75% of the observations are smaller than (or equal to) it, and 25% bigger than (or equal to) it. The quartiles are thus similar in spirit to the median. Just as the median divides the observations into two equal halves (those below and those above the median), the quartiles divide them into two groups at different points. For example, the first quartile divides the observations into the smallest 25% and the remaining largest 75%. (The median can thus also be described as the second quartile, and all of these statistics are special cases of a larger class of similar statistics known as percentiles.) The interquartile range (IQR) is the difference between the third and the first quartile. It is the range of the middle 50% of the observations, leaving out the smallest 25% and the largest 25%. This effectively eliminates the effects of any outliers, so IQR is a useful measure of variation (often used together with the median as measure of central tendency) when there are serious outliers or when the distribution is very skewed. For the class example the interquartile ranges are Class 1: IQR \\(= 6-6 =0\\) Class 2: IQR \\(= 7-5 =2\\) Class 3: IQR \\(= 9.25-2.75 =6.5\\) These are again in the expected order.7 For the country data, the first and third quartiles for GDP are 1.7 and 11.4 respectively, and IQR=11.4-1.7=9.7. For the democracy score the quartiles are 1 and 9, and IQR=8. Standard deviation The most commonly used measure of variation is based on the deviations \\[Y_{i}-\\bar{Y}\\] where \\(Y_{i}\\) again denotes an individual observation of a variable, and \\(\\bar{Y}\\) is its mean. A deviation is the difference between an individual observation and the average value in the sample. Table 2.13 shows the deviations for Class 3 in the class example, together with the other calculations discussed below. Here a negative deviation indicates that an observation is smaller than the mean of 6 (e.g. \\(1-6=-5\\)), and a positive deviation that an observation is larger than the mean (e.g. \\(10-6=+4\\)). Table 2.13: Calculating the standard deviation of test marks for Class 3 in the class example at the beginning of Section 2.6.2. Student \\(Y_{i}\\) \\(Y_{i}-\\bar{Y}\\) \\((Y_{i}-\\bar{Y})^{2}\\) 1 1 \\(-5\\) 25 2 2 \\(-4\\) 16 3 2 \\(-4\\) 16 4 3 \\(-3\\) 9 5 4 \\(-2\\) 4 6 4 \\(-2\\) 4 7 4 \\(-2\\) 4 8 8 +2 4 9 8 +2 4 10 9 +3 9 11 9 +3 9 12 10 +4 16 13 10 +4 16 \\(14=n\\) 10 +4 16 Sum \\(\\sum Y_{i}=84\\) \\(\\sum(Y_{i}-\\bar{Y})=0\\) \\(\\sum(Y_{i}-\\bar{Y})^{2}=152\\) \\(\\bar{Y}=84/14=6\\) \\(\\sum(Y_{i}-\\bar{Y})/n=0\\) \\(s^{2}=152/13=11.69\\) \\(s=\\sqrt{11.69}=3.4\\) The deviations are clearly related to variation, as a sample with little variation will have small deviations (most observations are close to the mean) and one with a lot of variation will have many large deviations (many observations are far from the mean). All that remains is to aggregate them in some sensible way into a single number. An inappropriate summary of the deviations is their mean, i.e. \\(\\sum (Y_{i}-\\bar{Y})/n\\). In the class example this turns out to be zero (see the second column of Table 2.13), and not by coincidence. It can be shown that the mean of the deviations is in fact zero for any set of numbers. This happens because positive and negative deviations will always exactly cancel out each other in the sum. This is clearly not what we want, because a negative deviation of, say, \\(-2\\) (an observation two units below the mean) should be equally strong evidence of variation as a positive deviation of +2 (an observation two units above the mean). The signs of the deviations thus need to be eliminated somehow. Just dropping the negative signs (so that \\(-2\\) becomes 2) means calculating the absolute values of the deviations, denoted \\(|Y_{i}-\\bar{Y}|\\). Taking the mean of these gives the mean absolute deviation or MAD, defined as \\[\\text{MAD}=\\frac{\\sum |Y_{i}-\\bar{Y}|}{n}.\\] This is a perfectly sensible measure of variation, but it is not very commonly used. This is largely because absolute values are mathematically rather difficult to work with, and this would make MAD very inconvenient for more sophisticated analyses, where measures of variation will also be needed.8 Instead, we eliminate the signs of the deviations by using their squares \\((Y_{i}-\\bar{Y})^{2}\\), i.e. by multiplying each deviation by itself (c.f. the third column of Table 2.13 for an illustration). These are used to calculate the variance, denoted \\(s^{2}\\) and defined as \\[\\begin{equation}s^{2} = \\frac{\\sum (Y_{i}-\\bar{Y})^{2}}{n-1}. \\label{eq:samplevar}\\end{equation}\\] This is (apart from the \\(n-1\\) rather than \\(n\\) as the divisor) essentially the mean of the squared deviations. Its units of measurement are also squares of the units of the original measurements. For example, the variance of the GDP variable, which is itself measured in (thousands of) dollars, is expressed in dollars squared. This is rather inconvenient for any meaningful interpretation. To obtain a measure of variation expressed in the original units, we can take the square root (indicated below by \\(\\sqrt{\\; \\; }\\)) of the variance. This statistic is the standard deviation, often abbreviated as S.D., denoted by \\(s\\) and defined as \\[\\begin{equation}s = \\sqrt{\\frac{\\sum (Y_{i}-\\bar{Y})^{2}}{n-1}.} \\label{eq:sd}\\end{equation}\\] For the class example, this is 0 for Class 1, 1.3 for Class 2, and 3.4 for class 3. In the country data, the standard deviation of GDP is $9450 and that of the democracy score (if it is treated as an interval-level variable) is 3.9, as shown in Table 2.11. Like the mean, the standard deviation is sensitive to outliers and skewness of the distribution, so sometimes other measures of variation (e.g. IQR or MAD) should be reported instead of, or in addition to it. Nevertheless, the standard deviation is by far the most commonly used measure of variation. One reason for this is that it is very important not just as a descriptive statistic but also as an element in several forms of statistical inference. For description it is typically less immediately interpretable than measures of central tendency. Often the most revealing descriptive uses of the standard deviation are in comparisons between samples, like in the class example above. The following is a real example of this kind, where variation was in fact of more interest than central tendency: Example: Variation in rates of economic growth In an article titled “Dancing in step” on November 13th 2004, The Economist discussed a set of data (collected by the J. P. Morgan Chase bank) on the annual growth rates (in percentage points) of the Gross Domestic Products (GDP) of 30 countries for each year since 1971. Measures of central tendency, such as average growth rates for each country and each year, are clearly interesting in this case. However, most of the discussion in the article concerned variation in growth rates, measured by their standard deviation across countries for each year, and especially changes in this variation over time. The standard deviation of growth rates was around 3–5 percentage points for every year until the early 1990s, had fallen to about 2 percentage points in 2003, and was forecast to decline further in subsequent years. There had thus previously been a fair amount of variation in rates of economic growth (with some economies growing faster and some slower, some perhaps being in recession), whereas recently the growth rates had become more similar across countries. The article summarized this in its subtitle as “The world’s economies are more synchronised than ever before”, and went on to discuss the implications of this development for global economy. The formula ((??)) for the standard deviation involves the divisor \\(n-1\\), where the discussion leading up to it might make you expect \\(n\\) instead. The reasons for this will be discussed briefly in Section 6.2.1. The definition is not entirely consistent in that some textbooks do use \\(n\\) instead of \\(n-1\\). The difference is of no great importance, and using either \\(n\\) or \\(n-1\\) would be fine for our purposes. Whenever \\(n\\) is even moderately large, the difference between \\(n\\) and \\(n-1\\) is in any case small, and both definitions of standard deviation give very similar values. Finally, measures of central tendency and measures of variation, even though they summarise the two most important features of a sample distribution of a variable, may still miss some important features of the distribution. Consider, for example, the class marks in Classes 2 and 3 in our hypothetical example. These are summarized by the bar charts of Figure 2.7. The distribution for Class 2 is symmetric and concentrated around the mean value of 6. The most noticeable feature of the marks in Class 3, on the other hand, is that there appear to be two distinct groups of students, one with very low scores and one with high scores. A similar feature was also noted in the distribution of the democracy index in the country data (c.f. Figure 2.2). This property would not be revealed by measures of central tendency or variation, so it is an illustration of why it is always sensible to also examine the whole distribution of a variable using frequency tables or graphical methods. Figure 2.7: Bar charts of the test marks in the class example at the beginning of Section 2.6.2 for Classes 2 (on the left) and 3 (on the right). 2.7 Associations which involve continuous variables Bivariate descriptive methods which are designed for situations where at least one of the two variables is continuous are not described here but in later sections: Explanatory variable is categorical and response variable continuous: Parallel histograms, frequency polygons and box plots (Section 7.2). Both explanatory and response variables are continuous: Scatter plots and line plots (Section 8.2.2). We do not discuss the remaining possibility, where the explanatory variable is continuous and the response is categorical. The simplest and usually quite sufficient way to give an initial description of the associations in this case is to group the explanatory variable into a categorical variable and then apply the methods of Section 2.4. 2.8 Presentation of tables and graphs The purpose of statistical tables and graphs is to communicate information correctly, clearly and effectively. If they do not do that, that is, if they leave the reader misled, confused or uninformed, they have failed and should not have been shown at all. Creating good tables and graphics is not only a matter of understanding the technical details described above. It also involves general principles of design and presentation. Most of these should be simple common sense but clearly are not, judging by the many entirely unhelpful tables and graphs appearing in all kinds of publications. This section discusses very briefly some principles of good practice in presenting descriptive statistics in tables and graphs. Much of the section is based on two books, The Visual Display of Quantitative Information by Edward R. Tufte (Graphics Press, 1983) and Visual Revelations by Howard Wainer (Copernicus, 1997). These can be consulted for further information and examples of both good and bad practice. First, a reader of a table or graph should be able to understand what it is about: The variables should be labelled clearly. In particular, the names used in computer data files should not be used unless they are also understandable words. So even if a variable is called ATTDFOXH in your SPSS file, it should still be labelled “Attitude to foxhunting”or something similar in presentation. Similarly, the categories of variables should be labelled in words wherever appropriate. Items such as the columns of a table or the vertical axis of a bar chart should also be labelled clearly (e.g. whether they are for frequencies or percentages). More generally, a table or figure and its caption should be (within reason) as self-contained as possible, in that the reader should be able to understand them with little reference to the rest of the text for explanation (remember that tables and figures often float, i.e. they may appear on a different page from where they are referred to in the main text). This may also include giving the source of the data in a note or caption to the table or figure. Some guidelines for constructing tables are A table produced by software such as SPSS, although it contains the necessary numbers, is rarely suitable for presentation directly. Tables included in research reports should be retyped and reformatted. The categories of the variable should be in a sensible order. For ordinal variables (including those obtained by grouping a continuous one), this should obviously be the natural ordering of the categories. For a nominal variable, the order can be chosen in whichever way is most useful for presentation. Often it makes sense to order categories from the largest to the smallest, typically leaving any “Others” category last. If only proportions or percentages are shown, the sample size \\(n\\) should also be reported, perhaps in a note or caption to the table. This will allow the reader to judge how informative the table is. A percentage of 20% is clearly richer information when it corresponds to a frequency of 2,000 in a sample 10,000 than when it means 2 out of 10 observations. When \\(n\\) is very small, proportions and percentages should be avoided altogether: reporting 1 out 7 as 14.3% is simply nonsensical. Proportions and percentages can and should be rounded. It is rarely necessary to see percentages with more than one decimal place, if even that. With graphs, it is always useful to bear in mind Wainer’s principle: The aim of good data graphics is to display data accurately and clearly The way to produce bad graphs is thus to break some part of this, for example by (1) not showing much data, (2) showing much that is not data, (3) showing the data inaccurately, or (4) obscuring the data. Graphs with these characteristics are a form of visual lying, distorting the graphical cues in a plot in ways which make it difficult or impossible to obtain accurate information from it. One example of a lying graph already mentioned is the “cut” bar chart where the bars do not begin at zero. Another is the pseudo third dimension, an example of which is shown in Figure 2.8. The information presented in this graph is the same as that of Figure 2.1, i.e. frequencies of different regions. These are represented by the heights of the bars. The additional information conveyed by the apparent thickness of the bars, represented in perspective to give an illusion of three-dimensional bars, is then — exactly nothing. The fake third dimension represents no data, and serves only to distort the real data that are being shown. We can thus give a simple instruction: using a fake third dimension like the one in Figure 2.8 is always wrong and not acceptable under any circumstances. This is true irrespective of the fact that such graphs are often seen and easily (often almost automatically) produced by software packages like Microsoft Excel. All this proves is that the programmers of those packages have little graphical sense, or perhaps that their companies have discovered that their customers are willing to pay for such “features” as colourful but pointless graphs. Indeed, many if not most of the graph styles provided by, say, Excel (exploding pie charts, doughnuts, cones, pyramids and so on) are entirely useless for accurate presentation of data. Figure 2.8: An example of an unacceptable graph: a bar chart with a pseudo three-dimensional effect. The data are the same as in Figure 2.1. An objection sometimes offered to such a severe rule is that bad graphs “look good”. This can be answered in two ways. First, a statistical graphic is not a decoration, but a tool for presenting information. Authors who confuse the two often end up displaying pretty colours and strange shapes to hide the lack of actual information in a graph. Second, even in an aesthetic sense a useful and accurate graph is preferable to a bad one, in the way that any well-designed object with a function tends to be more attractive than a badly designed one. What, then, is the recipe for good graphics? Mostly this is just a matter of using basic graph types in a sensible and restrained manner, focusing on presenting information and avoiding all distracting decoration. Some such examples have been given earlier in this chapter. Other types of graphs are used to illustrate associations between variables, which we have not yet discussed. To anticipate that a little, Figure 2.9 shows one (good but not in any way exceptional) example of such graphs. It is a reproduction of a graph originally published in a survey of Spain in The Economist, and shows changes in average house prices in Spain, Germany and Britain between 1993 and 2003. Even without an introductory statistics course, the main message of Figure 2.9 is immediately clear: increases in Spanish house prices over the period have been comparable to those in Britain, with prices more than doubling in both countries, and very unlike those in Germany, where the prices have remained unchanged. Note also that the graph distinguishes between the lines for different countries by using different types of line. Different colours can of course be used instead, but their differences will become obscured if the graph is photocopied or printed in black and white. Figure 2.9: An example of an informative graph: house prices in three countries between 1993 and 2003, indexed to 100 in 1993. Source: The Economist, June 26th, 2004. The numbers were estimated from the graph in the magazine, so they are approximate. In addition to such modest but sensible and useful basic graphs, you may sometimes encounter inspired examples of special graphs which manage to describe particular data sets in exceptionally vivid and informative ways. Some such examples are shown at http://www.datavis.ca/gallery/index.php, on the web page maintained by Michael Friendly at York University in Canada (unfortunately, however, the electronic images do not always do justice to the originals; crisper versions can be found in the books mentioned above). For example, the page shows what Edward Tufte has described as possibly “the best statistical graphic ever drawn”. This is Charles Joseph Minard’s graphical memorial, drawn in 1861, to the fate of Napoleon I’s army in their invasion of Russia in 1812. For contrast, the page also shows a number of examples of visual lying and other terrible graphs, including a mould-breaking re-intrepretation of the idea of a pie chart by Fox News, and a colourful effort that Tufte has called possibly “the worst graphic ever to find its way into print”. Clearly not all pictures tell us as much as a thousand words. 2.9 Appendix: Country data The data used for illustration throughout this chapter are given in Table 2.14. The variables are defined as follows: region indicates the macro region where the country is located, coded as 1=Africa, 2=Asia, 3=Europe, 4=Latin America, 5=Northern America, 6=Oceania. The list of regions and the assignment of countries to regions are those used by the UN Statistics Division (see &lt;unstats.un.org/unsd/methods/m49/m49.htm&gt;). democracy is a measure of institutionalised democracy by the Polity IV project.9 The values refer to each country’s classification in 2002. The variable has an 11-point scale from 0 (lowest level of democracy) to 10 (highest). Countries coded as being in the state of “interruption” or “interregnum” have been omitted. GDP is the country’s Gross Domestic Product per capita (in thousands of U.S. dollars), adjusted for purchasing power parity. The data were obtained from CIA’s The World Factbook 2004 (https://www.cia.gov/library/publications/resources/the-world-factbook/). The figures refer to slightly different years for different countries. The data set contains those 155 countries for which recent data on all of the three variables were available at the time the example created. Table 2.14: Country R D GDP Country R D GDP Country R D GDP Norway 3 10 37.8 Bulgaria 3 9 7.6 Pakistan 2 0 2.1 USA 5 10 37.8 Thailand 2 9 7.4 Angola 1 1 1.9 Switzerland 3 10 32.7 Namibia 1 6 7.2 Bangladesh 2 6 1.9 Denmark 3 10 31.1 Iran 2 4 7.0 Cambodia 2 3 1.9 Austria 3 10 30.0 Romania 3 8 7.0 Sudan 1 0 1.9 Canada 5 10 29.8 Tunisia 1 1 6.9 Zimbabwe 1 0 1.9 Ireland 3 10 29.6 Macedonia 3 9 6.7 Burma 2 0 1.8 Belgium 3 10 29.1 Turkey 2 8 6.7 Cameroon 1 1 1.8 Australia 6 10 29.0 Libya 1 0 6.4 Mauritania 1 0 1.8 Netherlands 3 10 28.6 Colombia 4 7 6.3 Moldova 3 8 1.8 Japan 2 10 28.2 Kazakhstan 2 0 6.3 Mongolia 2 10 1.8 UK 3 10 27.7 Panama 4 9 6.3 Laos 2 0 1.7 France 3 9 27.6 Belarus 3 0 6.1 Gambia 1 0 1.7 Germany 3 10 27.6 Algeria 1 1 6.0 Uzbekistan 2 0 1.7 Finland 3 10 27.4 Dominican R. 4 8 6.0 Haiti 4 1 1.6 Sweden 3 10 26.8 Fiji 6 6 5.8 Kyrgyzstan 2 1 1.6 Italy 3 10 26.7 Turkmenistan 2 0 5.8 Senegal 1 8 1.6 Singapore 2 2 23.7 Gabon 1 0 5.5 Iraq 2 0 1.5 Taiwan 2 9 23.4 Ukraine 3 7 5.4 Togo 1 1 1.5 UAE 2 0 23.2 Peru 4 9 5.1 Cote d’Ivoire 1 5 1.4 Spain 3 10 22.0 China 2 0 5.0 Nepal 2 1 1.4 NZ 6 10 21.6 Swaziland 1 0 4.9 Uganda 1 0 1.4 Qatar 2 0 21.5 El Salvador 4 7 4.8 Bhutan 2 0 1.3 Greece 3 10 20.0 Venezuela 4 6 4.8 Djibouti 1 3 1.3 Israel 2 10 19.8 Paraguay 4 7 4.7 N. Korea 2 0 1.3 Cyprus 2 10 19.2 Philippines 2 8 4.6 Rwanda 1 0 1.3 Kuwait 2 0 19.0 Albania 3 7 4.5 Chad 1 1 1.2 Slovenia 3 10 19.0 Jordan 2 2 4.3 Mozambique 1 6 1.2 Portugal 3 10 18.0 Guatemala 4 8 4.1 Benin 1 6 1.1 S. Korea 2 8 17.8 Egypt 1 0 4.0 Burkina Faso 1 2 1.1 Bahrain 2 0 16.9 Guyana 4 6 4.0 C. Afr. R. 1 5 1.1 Czech R. 3 10 15.7 Morocco 1 0 4.0 Kenya 1 8 1.0 Hungary 3 10 13.9 Jamaica 4 9 3.9 Liberia 1 3 1.0 Slovakia 3 9 13.3 Sri Lanka 2 7 3.7 Tajikistan 2 2 1.0 Oman 2 0 13.1 Armenia 2 6 3.5 Mali 1 6 .9 Uruguay 4 10 12.8 Azerbaijan 2 0 3.4 Nigeria 1 4 .9 Estonia 3 7 12.3 Ecuador 4 6 3.3 Guinea-Bissau 1 5 .8 Saudi Ar. 2 0 11.8 Syria 2 0 3.3 Madagascar 1 7 .8 Lithuania 3 10 11.4 Indonesia 2 8 3.2 Niger 1 4 .8 Mauritius 1 10 11.4 Lesotho 1 8 3.0 Yemen 2 1 .8 Argentina 4 8 11.2 Cuba 4 0 2.9 Zambia 1 3 .8 Poland 3 9 11.1 India 2 9 2.9 Comoros 1 4 .7 S. Africa 1 9 10.7 Equatorial G. 1 0 2.7 Eritrea 1 0 .7 Croatia 3 7 10.6 Honduras 4 7 2.6 Ethiopia 1 3 .7 Latvia 3 8 10.2 Georgia 2 5 2.5 Congo (Br.) 1 0 .7 Trinidad 4 10 9.5 Vietnam 2 0 2.5 Burundi 1 1 .6 Costa Rica 4 10 9.1 Bolivia 4 9 2.4 Malawi 1 6 .6 Botswana 1 9 9.0 Nicaragua 4 8 2.3 Tanzania 1 3 .6 Malaysia 2 4 9.0 Ghana 1 7 2.2 East Timor 2 6 .5 Mexico 4 8 9.0 PNG 6 10 2.2 Sierra Leone 1 5 .5 Russia 3 7 8.9 Serbia 3 7 2.2 Brazil 4 8 7.6 Guinea 1 1 2.1 ESS Round 5: European Social Survey Round 5 Data (2010). Data file edition 2.0. Norwegian Social Science Data Services, Norway - Data Archive and distributor of ESS data.↩ For recent findings, see for example Svallfors, S. (ed.) (2012), Contested Welfare States: Welfare Attitudes in Europe and Beyond. Stanford University Press.↩ See, for example, Svallfors (1997), Words of welfare and attitudes to redistribution: A comparison of eight western nations, European Sociological Review, 13, 283-304; and Blekesaune and Quadagno (2003), Public attitudes towards welfare state policies: A comparative analysis of 24 nations, European Sociological Review, 19, 415-427.↩ Lewellen, W. G., Lease, R. G., and Schlarbaum, G. G. (1977). “Patterns of investment strategy and behavior among individual investors”. The Journal of Business, 50, 296–333. The published article gave only the total sample size, the marginal distributions of sex and age group, and conditional proportions for the short-term gains variable given sex and age group. These were used to create tables of frequencies separately for men and women (assuming further that the age distribution was the same for both), and Table 2.7 was obtained by combining these. The resulting table is consistent with information in the article, apart from rounding error.↩ This is a random sample of municipalities, obtained for this illustration from the 2001 census data provided by Statistics Canada at http://www.statcan.gc.ca.↩ There is no need to worry about how the quartile values 9.25 and 2.75 for class 3 were calculated. Different software packages may in fact do that slightly differently; these values are from SPSS.↩ In mathematical terms, the difficulty is that the absolute value function has no derivative at zero.↩ Monty G. Marshall and Keith Jaggers (2002). Polity IV Dataset. \\[Computer file; version p4v2002\\] College Park, MD: Center for International Development and Conflict Management, University of Maryland.↩ "],["c-samples.html", "Chapter 3 Samples and populations 3.1 Introduction 3.2 Finite populations 3.3 Samples from finite populations 3.4 Conceptual and infinite populations 3.5 Population distributions 3.6 Need for statistical inference", " Chapter 3 Samples and populations 3.1 Introduction So far we have discussed statistical description, which is concerned with summarizing features of a sample of observed data. From now on, most of the attention will be on statistical inference. As noted in Section 1.2.3, the purpose of inference is to draw conclusions about the characteristics of some larger population based on what is observed in a sample. In this chapter we will first give more careful definitions of the concepts of populations and samples, and of the connections between them. In Section 3.5 we then consider the idea of a population distribution, which is the target of statistical inference. The discussion of statistical inference will continue in Chapters 4–7 where we gradually introduce the basic elements of inference in the contexts of different types of analyses. 3.2 Finite populations In many cases the population of interest is a particular group of real people or other units. Consider, for example, the European Social Survey (ESS) which we used in Chapter 2 (see early in Section 2.2).10 The ESS is a cross-national survey carried out biennially in around 30 European countries. It is an academically-driven social survey which is designed to measure a wide range attitudes, beliefs and behaviour patterns among the European population, especially for purposes for cross-national comparisons. The target population of ESS is explicitly stated as being “all persons aged 15 and over resident within private households, regardless of their nationality, citizenship, language or legal status” in each of the participating countries. This is, once “private household” has been defined carefully, and notwithstanding the inevitable ambiguity in that the precise number and composition of households are constantly changing, a well-defined, existing group. It is also a large group: in the UK, for example, there are around 50 million such people. Nevertheless, we have no conceptual difficulty with imagining this collection of individuals. We will call any such population a finite population. The main problem with studying a large finite population is that it is usually not feasible to collect data on all of its members. A census is a study where some variables are in fact measured for the entire population. The best-known example is the Census of Population, which at least aims to be a complete evaluation of all persons living in a country on a particular date with respect to basic demographic data. Similarly, we have the Census of Production, Census of Distribution etc. For most research, however, a census is not feasible. Even when one is attempted, it is rarely truly comprehensive. For example, all population censuses which involve collecting the data from the people themselves end up missing a substantial (and non-random) proportion of the population. For most purposes a well-executed sample of the kind described below is actually preferable to an unsuccessful census. 3.3 Samples from finite populations When a census is not possible, information on the population is obtained by observing only a subset of units from it, i.e. a sample. This is meant to be representative of the population, so that we can generalise findings from the sample to the population. To be representative in a sense appropriate for statistical inference, a sample from a finite population must be a probability sample, obtained using probability sampling: a sampling method where every unit in the population has a known, non-zero probability of being selected to the sample. Probability sampling requires first a sampling frame, essentially one or more lists of units or collections of units which make it possible to select and contact members of the sample. For example, the first stage of sampling for many UK surveys uses the Postcode Address File, a list of postal addresses in the country. A sampling design is then created in such a way that it assigns a sampling probability for each unit, and the sample is drawn so that each unit’s probability of being selected into the sample is given by their sampling probability. The selection of the specific set of units actually included in the sample thus involves randomness, usually implemented with the help of random number generators on computers. The simplest form of probability sampling is simple random sampling, where every unit in the population has the same probability of selection. This requirement of equal selection probabilities is by no means essential. Other probability sampling methods which relax it include stratified sampling, where the selection probabilities are set separately for different groups (strata) in the population, for example separately for men and women, different ethnic groups or people living in different regions. cluster sampling, where the units of interest are not sampled individually but in groups (clusters). For example, a school survey might involve sampling entire classes and then interviewing every pupil in each selected class. multistage sampling, which employs a sequence of steps, often with a combination of stratification, clustering and simple random sampling. For example, many social surveys use a multistage area sampling design which begins with one or more stages of sampling areas, then households (addresses) within selected small areas, and finally individuals within selected households. These more complex sampling methods are in fact used for most large-scale social surveys to improve their accuracy and/or cost-efficiency compared to simple random sampling. For example, the UK component of the European Social Survey uses a design of three stages: (1) a stratified sample of postcode sectors, stratified by region, level of deprivation, percentage of privately rented households, and percentage of pensioners; (2) simple random sample of addresses within the selected sectors; and (3) simple random sample of one adult from each selected address. Some analyses of such data require the use of survey weights to adjust for the fact that some units were more likely than others to end up in the sample. The questions of how and when the weights should be used are, however, beyond the scope of this course. Here we will omit the weights even in examples where they might normally be used.11 Not all sampling methods satisfy the requirements of probability sampling. Such techniques of non-probability sampling include purposive sampling, where the investigator uses his or her own “expert” judgement to select units considered to be representative of the population. It is very difficult to do this well, and very easy to introduce conscious or unconscious biases into the selection. In general, it is better to leave the task to the random processes of probability sampling. haphazard or convenience sampling, as when a researcher simply uses the first \\(n\\) passers-by who happen to be available and willing to answer questions. One version of this is volunteer sampling, familiar from call-in “polls” carried out by morning television shows and newspapers on various topics of current interest. All we learn from such exercises are the opinions of those readers or viewers who felt strongly enough about the issue to send in their response, but these tell us essentially nothing about the average attitudes of the general population. quota sampling, where interviewers are required to select a certain number (quota) of respondents in each of a set of categories (defined, for example, by sex, age group and income group). The selection of specific respondents within each group is left to the interviewer, and is usually done using some (unstated) form of purposive or convenience sampling. Quota sampling is quite common, especially in market research, and can sometimes give reasonable results. However, it is easy to introduce biases in the selection stage, and almost impossible to know whether the resulting sample is a representative one. A famous example of the dangers of non-probability sampling is the survey by the Literary Digest magazine to predict the results of the 1936 U.S. presidential election. The magazine sent out about 10 million questionnaires on post cards to potential respondents, and based its conclusions on those that were returned. This introduced biases in at least two ways. First, the list of those who were sent the questionnaire was based on registers such as the subscribers to the magazine, and of people with telephones, cars and various club memberships. In 1936 these were mainly wealthier people who were more likely to be Republican voters, and the typically poorer people not on the source lists had no chance of being included. Second, only about 25% of the questionnaires were actually returned, effectively rendering the sample into a volunteer sample. The magazine predicted that the Republican candidate Alf Landon would receive 57% of the vote, when in fact his Democratic opponent F. D. Roosevelt gained an overwhelming victory with 62% of the vote. The outcome of the election was predicted correctly by a much smaller probability sample collected by George Gallup. A more recent example is the “GM Nation” public consultation exercise on attitudes to genetically modified (GM) agricultural products, carried out in the U.K. in 2002–3.12 This involved various activities, including national, regional and local events where interested members of the public were invited to take part in discussions on GM foods. At all such events the participants also completed a questionnaire, which was also available on the GM Nation website. In all, around 37000 people completed the questionnaire, and around 90% of those expressed opposition to GM foods. While the authors of the final report of the consultation drew some attention to the unrepresentative nature of this sample, this fact had certainly been lost by the time the results were reported in the national newspapers as “5 to 1 against GM crops in biggest ever public survey”. At the same time, probability samples suggested that the British public is actually about evenly split between supporters and opponents of GM foods. 3.4 Conceptual and infinite populations Even a cursory inspection of academic journals in the social sciences will reveal that a finite population of the kind discussed above is not always clearly defined, nor is there often any reference to probability sampling. Instead, the study designs may for example resemble the following two examples: Example: A psychological experiment Fifty-nine undegraduate students from a large U.S. university took part in a psychological experiment, either as part of a class project or for extra credit on a psychology course.13 The participants were randomly assigned to listen to one of two songs, one with clearly violent lyrics and one with no violent content. One of the variables of interest was a measure (from a 35-item attitude scale) of state hostility (i.e. temporary hostile feelings), obtained after the participants had listened to a song, and the researchers were interested in comparing levels of hostility between the two groups. Example: Voting in a congressional election A political-science article considered the U.S. congressional election which took place between June 1862 and November 1863, i.e. during a crucial period in the American Civil War.14 The units of analysis were the districts in the House of Representatives. One part of the analysis examined whether the likelihood of the candidate of the Republican Party (the party of the sitting president Abraham Lincoln) being elected from a district was associated with such explanatory variables as whether the Republican was the incumbent, a measure of the quality of the other main candidate, number of military casualties for the district, and the timing of the election in the district (especially in relation to the Union armies’ changing fortunes over the period). There is no reference here to the kinds of finite populations and probability samples discussed Sections 3.2 and 3.3. In the experiment, the participants were a convenience sample of respondents easily available to the researcher, while in the election study the units represent (nearly) all the districts in a single (and historically unique) election. Yet both articles contain plenty of statistical inference, so the language and concepts of samples and populations are clearly being used. How is this to be justified? In the example of the psychological experiment the subjects will clearly not be representative of a general (non-student) population in many respects, e.g. in age and education level. However, it is not really such characteristics that the study is concerned with, nor is the population of interest really a population of people. Instead, the implicit “population” being considered is that of possible values of level of hostility after a person has listened to one of the songs in the experiment. In this extended framework, these possible values include not just the levels of hostitility possibly obtained for different people, but also those that a single person might have after listening to the song at different times or in different moods etc. The generalisation from the observed data in the experiment is to this hypothetical population of possible reactions. In the political science example the population is also a hypothetical one, namely those election results that could have been obtained if something had happened differently, i.e. if different people turned up to vote, if some voters had made different decisions, and so on (or if we considered a different election in the same conditions, although that is less realistic in this example, since other elections have not taken place in the middle of a civil war). In other words, votes that actually took place are treated as a sample from the population of votes that could conceivably have taken place. In both cases the “population” is in some sense a hypothetical or conceptual one, a population of possible realisations of events, and the data actually observed are a sample from that population. Sometimes it is useful to apply similar thinking even to samples from ostensibly quite finite populations. Any such population, say the residents of a country, is exactly fixed at one moment only, and was and will be slightly different at any other time, or would be even now if any one of a myriad of small events had happened slightly differently in the past. We could thus view the finite population itself at a single moment as a sample from a conceptual population of possible realisations. This is known in survey literature as a superpopulation. The data actually observed are then also a sample from the superpopulation. With this extension, it is possible to regard almost any set of data as a sample from some conceptual superpopulation. The highly hypothetical notion of a conceptual population of possible events is clearly going to be less easy both to justify and to understand than the concept of a large but finite population of real subjects defined in Section 3.2. If you find the whole idea distracting, you can focus in your mind on the more understandable latter case, at least if you are willing to believe that the idea of a conceptual population is also meaningful. Its main justification is that much of the time it works, in the sense that useful decision rules and methods of analysis are obtained based on the idea. Most of the motivation and ideas of statistical inference are essentially the same for both kinds of populations. Even when the idea of a conceptual population is invoked, questions of representativeness of and generalisability to real, finite populations will still need to be kept in mind in most applications. For example, the assumption behind the psychological experiment described above is that the findings about how hearing a violent song affects levels of hostility are generalisable to some larger population, beyond the 59 participants in the experiment and beyond the body of students in a particular university. This may well be the case at least to some extent, but it is still open to questioning. For this reason findings from studies like this only become really convincing when they are replicated in comparable experiments among different kinds of participants. Because the kinds of populations discussed in this section are hypothetical, there is no sense of them having a particular fixed number of members. Instead, they are considered to be infinite in size. This also implies (although it may not be obvious why) that we can essentially always treat samples from such populations as if they were obtained using simple random sampling. 3.5 Population distributions We will introduce the idea of a population distribution first for finite populations, before extending it to infinite ones. The discussion in this section focuses on categorical variables, because the concepts are easiest to explain in that context; generalisations to continuous variables are discussed in Chapter 7. Suppose that we have drawn a sample of \\(n\\) units from a finite population and determined the values of some variables for them. The units that are not in the sample also possess values of the variables, even though these are not observed. We can thus easily imagine how any of the methods which were in Chapter 2 used to describe a sample could also be applied in the same way to the whole population, if only we knew all the values in it. In particular, we can, paralleling the sample distribution of a variable, define the population distribution as the set of values of the variable which appear in the population, together with the frequencies of each value. For illustration, consider again the example introduced early in Section 2.2. The two variables there are a person’s sex and his or her attitude toward income redistribution. We have observed them for a sample \\(n=2344\\) people drawn from the population of all UK residents aged 15 or over. The sample distributions are summarised by Table 2.3. Table 3.1: ``The government should take measures to reduce differences in income levels’’: Attitude towards income redistribution by sex, in a hypothetical population of 50 million people. The numbers in the table are frequencies in millions of people, row percentages (in parentheses) and overall percentages in square brackets. Sex Agree strongly Agree Neither agree nor disagree Disagree Disagree strongly Total Male 3.84 10.08 4.56 4.32 1.20 24.00 (16.00) (42.00) (19.00) (18.00) (5.00) (100) [7.68] [20.16] [9.12] [8.64] [2.40] [48.00] Female 4.16 13.00 4.68 3.38 0.78 26.00 (16.00) (50.00) (18.00) (13.00) (3.00) (100) [8.32] [26.00] [9.36] [6.76] [1.56] [52.00] Total 8.00 23.08 9.24 7.70 1.98 50 (16.00) (46.16) (18.48) (15.40) (3.96) (100) Imagine now that the full population consisted of 50 million people, and that the values of the two variables for them were as shown in Table 3.1. The frequencies in this table desribe the population distribution of the variables in this hypothetical population, with the joint distribution of sex and attitude shown by the internal cells of the table and the marginal distributions by its margins. So there are for example 3.84 million men and 4.16 million women in the population who strongly agree with the attitude statement, and 1.98 million people overall who strongly disagree with it. Rather than the frequencies, it is more helpful to discuss population distributions in terms of proportions. Table 3.1 shows two sets of them, the overall proportions in square brackets out of the total population size, and the two rows of conditional proportions of attitude given sex (in parentheses). Either of these can be used to introduce the ideas of population distributions, but we focus on the conditional proportions because they will be more convenient for the discussion in later chapters. In this population we observe, for example, that the conditional proportion of “Strongly disagree” given that a person is a woman is 0.03, i.e. 3% of women strongly disagree with the statement, while among men the corresponding conditional proportion is 0.05. Instead of “proportions”, when we discuss population distributions we will usually talk of “probabilities”. The two terms are equivalent when the population is finite and the variables are categorical, as in Table 3.1, but the language of probabilities is more appropriate in other cases. We can then say that Table 3.1 shows two sets of conditional probabilities in the population, which define two conditional probability distributions for attitude given sex. The notion of a probability distribution creates a conceptual connection between population distributions and sampling from them. This is that the probabilities of the population distribution can also be thought of as sampling probabilities in (simple random) sampling from the population. For example, here the conditional probability of “Strongly disagree” among men is 0.05, while the probability of “Strongly agree” is 0.16. The sampling interpretation of this is that if we sample a man at random from the population, the probability is 0.05 that he strongly disagrees and 0.16 that he strongly agrees with the attitude statement. The view of population distributions as probability distributions works also in other cases than the kind that is illustrated by Table 3.1. First, it applies also for continuous variables, where proportions of individual values are less useful (this is discussed further in Chapter 7). Second, it is also appropriate when the population is regarded as an infinite superpopulation, in which case the idea of population frequencies is not meaningful. With this device we have thus reached a formulation of a population distribution which is flexible enough to cover all the situations where we will need it. 3.6 Need for statistical inference We have now introduced the first key concepts that are involved in statistical inference: The population, which may regarded as finite or infinite. Distributions of variables in the population are the population distributions, which are formulated as probability distributions of the possible values of the variables. Random samples from the population, and sample distributions of variables in the sample. Substantive research questions are most often questions about population distributions. This raises the fundamental challenge of inference: what we are interested in — the population — is not fully observed, while what we do observe — the sample — is not of main interest for itself. The sample is, however, what information we do have to draw on for conclusions about the population. Here a second challenge arises: because of random variation in the sampling, sample distributions will not be identical to population distributions, so inference will not be as simple as concluding that whatever is true of the sample is also true of the population. Something cleverer is needed to weigh the evidence in the sample, and that something is statistical inference. The next three chapters are mostly about statistical inference. Each of them discusses a particular type of analysis and inferential and decriptive statistical methods for it. These methods are some of the most commonly used in basic statistical analyses of empirical data. In addition, we will also use them as contexts in which to introduce the general concepts of statistical inference. This will be done gradually, with each chapter both building on previous concepts and introducing new ones, as follows: Chapter 4: Associations in two-way contingency tables (significance testing, sampling distributions of statistics). Chapter 5: Single proportions and comparisons of proportions (probability distributions, parameters, point estimation, confidence intervals). Chapter 7: Means of continuous variables (probability distributions of continuous variables, and inference for such variables). European Social Survey (2012). ESS Round 5 (2010/2011) Technical Report. London: Centre for Comparative Social Surveys, City University London. See http://www.europeansocialsurvey.org for more on the ESS.↩ For more on survey weights and the design and analysis of surveys in general, please see MY456 (Survey Methodology) in the Lent Term.↩ For more information, see Gaskell, G. (2004). “Science policy and society: the British debate over GM agriculture”, Current Opinion in Biotechnology 15, 241–245.↩ Experiment 1 in Anderson, C. A., Carnagey, N. L., and Eubanks, J. (2003). “Exposure to violent media: the effects of songs with violent lyrics on aggressive thoughts and feelings”. Journal of Personality and Social Psychology 84, 960–971.↩ Carson, J. L. et al. (2001). “The impact of national tides and district-level effects on electoral outcomes: the U.S. congressional elections of 1862–63”. American J. of Political Science 45, 887–898.↩ "],["c-tables.html", "Chapter 4 Statistical inference for two-way tables 4.1 Introduction 4.2 Significance tests 4.3 The chi-square test of independence 4.4 Summary of the chi-square test of independence", " Chapter 4 Statistical inference for two-way tables 4.1 Introduction In this section we continue the discussion of methods of analysis for two-way contingency tables that was begun in Section 2.4.1. We will use again the example from the European Social Survey that was introduced early in Section 2.2. The two variables in the example are a person’s sex and his or her attitude toward income redistribution measured as an ordinal variable with five levels. The two-way table of these variables in the sample is shown again for convenience in Table 4.1, including both the frequencies and the conditional proportions for attitude given sex. Table 4.1: ``The government should take measures to reduce differences in income levels’’: Frequencies of respondents in the survey example, by sex and attitude towards income redistribution. The numbers in parentheses are conditional proportions of attitude given sex. Data: European Social Survey, Round 5, 2010, UK respondents only. Sex Agree strongly Agree Neither agree nor disagree Disagree Disagree strongly Total Male 160 439 187 200 41 1027 (0.156) (0.428) (0.182) (0.195) (0.040) (1.0) Female 206 651 239 187 34 1317 (0.156) (0.494) (0.182) (0.142) (0.026) (1.0) Total 366 1090 426 387 75 2344 (0.156) (0.465) (0.182) (0.165) (0.032) (1.0) Unlike in Section 2.4.1, we will now go beyond description of sample distributions and into statistical inference. The observed data are thus treated as a sample from a population, and we wish to draw conclusions about the population distributions of the variables. In particular, we want to examine whether the sample provides evidence that the two variables in the table are associated in the population — in the example, whether attitude depends on sex in the population. This is done using a statistical significance test known as \\(\\chi^{2}\\) test of independence. We will use it also as a vehicle for introducing the basic ideas of significance testing in general. This initial explanation of significance tests is be lengthy and detailed, because it is important to gain a good understanding of these fundamental concepts from the beginning. From then on, the same ideas will be used repeatedly throughout the rest of the course, and in practically all statistical methods that you may encounter in the future. You will then be able to draw on what you will have learned in this chapter, and that learning will also be reinforced through repeated appearances of the same concepts in different contexts. It will then not be necessary to restate the basic ideas of the tools of inference in similar detail. A short summary of the \\(\\chi^{2}\\) test considered in this chapter is given again at the end of the chapter, in Section 4.4. 4.2 Significance tests A significance test is a method of statistical inference that is used to assess the plausibility of hypotheses about a population. A hypothesis is a question about population distributions, formulated as a claim about those distributions. For the test considered in this chapter, the question is whether or not the two variables in a contingency table are associated in the population. In the example we want to know whether men and women have the same distribution of attitudes towards income redistribution in the population. For significance testing, this question is expressed as the claim “The distribution of attitudes towards income redistribution is the same for men and women”, to which we want to identify the correct response, either “Yes, it is” or “No, it isn’t”. In trying to answer such questions, we are faced with the complication that we only have information from a sample. For example, in Table 4.1 the conditional distributions of attitude are certainly not identical for men and women. According to the definition in Section 2.4.3, this shows that sex and attitude are associated in the sample. This, however, does not prove that they are also associated in the population. Because of sampling variation, the two conditional distributions are very unlike to be exactly identical in a sample even if they are the same in the population. In other words, the hypothesis will not be exactly true in a sample even if it is true in the population. On the other hand, some sample values differ from the values claimed by the hypothesis by so much that it would be difficult to explain them as a result of sampling variation alone. For example, if we had observed a sample where 99% of the men but only 1% of the women disagreed with the attitude statement, it would seem obvious that this should be evidence against the claim that the corresponding probabilities were nevertheless equal in the population. It would certainly be stronger evidence against such a claim than the difference of 19.5% vs. 14.2% that was actually observed in our sample, which in turn would be stronger evidence than, say, 19.5% vs. 19.4%. But how are we to decide where to draw the line, i.e. when to conclude that a particular sample value is or is not evidence against a hypothesis? The task of statistical significance testing is to provide explicit and transparent rules for making such decisions. A significance test uses a statistic calculated from the sample data (a test statistic) which has the property that its values will be large if the sample provides evidence against the hypothesis that is being tested (the null hypothesis) and small otherwise. From a description (a sampling distribution) of what kinds of values the test statistic might have had if the null hypothesis was actually true in the population, we derive a measure (the P-value) that summarises in one number the strength of evidence against the null hypothesis that the sample provides. Based on this summary, we may then use conventional decision rules (significance levels) to make a discrete decision about the null hypothesis about the population. This decision will be either to fail to reject or reject the null hypothesis, in other words to conclude that the observed data are or are not consistent with the claim about the population stated by the null hypothesis. It only remains to put these general ideas into practice by defining precisely the steps of statistical significance tests. This is done in the sections below. Since some of the ideas are somewhat abstract and perhaps initially counterintuitive, we will introduce them slowly, discussing one at a time the following basic elements of significance tests: The hypotheses being tested Assumptions of a test Test statistics and their sampling distributions \\(P\\)-values Drawing and stating conclusions from tests The significance test considered in this chapter is known as the \\(\\boldsymbol{\\chi^{2}}\\) test of independence (\\(\\chi^{2}\\) is pronounced “chi-squared”). It is also known as “Pearson’s \\(\\chi^{2}\\) test”, after Karl Pearson who first proposed it in 1900.15 We use this test to explain the elements of significance testing. These principles are, however, not restricted to this case, but are entirely general. This means that all of the significance tests you will learn on this course or elsewhere have the same basic structure, and differ only in their details. 4.3 The chi-square test of independence 4.3.1 Hypotheses The null hypothesis and the alternative hypothesis The technical term for the hypothesis that is tested in statistical significance testing is the null hypothesis. It is often denoted \\(H_{0}\\). The null hypothesis is a specific claim about population distributions. The \\(\\chi^{2}\\) test of independence concerns the association between two categorical variables, and its null hypothesis is that there is no such association in the population. In the context of this test, it is conventional to use alternative terminology where the variables are said to be statistically independent when there is no association between them, and statistically dependent when they are associated. Often the word “statistically” is omitted, and we talk simply of variables being independent or dependent. In this language, the null hypothesis of the \\(\\chi^{2}\\) test of independence is that \\[\\begin{equation} H_{0}: \\;\\text{The variables are statistically independent in the population}. \\tag{4.1} \\end{equation}\\] In our example the null hypothesis is thus that a person’s sex and his or her attitude toward income redistribution are independent in the population of adults in the UK. The null hypothesis ((4.1)) and the \\(\\chi^{2}\\) test itself are symmetric in that there is no need to designate one of the variables as explanatory and the other as the response variable. The hypothesis can, however, also be expressed in a form which does make use of this distinction. This links it more clearly with the definition of associations in terms of conditional distributions. In this form, the null hypothesis ((4.1)) can also be stated as the claim that the conditional distributions of the response variable are the same at all levels of the explanatory variable, i.e. in our example as \\[H_{0}: \\;\\text{The conditional distribution of attitude is the same for men as for women}.\\] The hypothesis could also be expressed for the conditional distributions the other way round, i.e. here that the distribution of sex is the same at all levels of the attitude. All three versions of the null hypothesis mean the same thing for the purposes of the significance test. Describing the hypothesis in particular terms is useful purely for easy interpretation of the test and its conclusions in specific examples. As well as the null hypothesis, a significance test usually involves an alternative hypothesis, often denoted \\(H_{a}\\). This is in some sense the opposite of the null hypothesis, which indicates the kinds of observations that will be taken as evidence against \\(H_{0}\\). For the \\(\\chi^{2}\\) test of independence this is simply the logical opposite of ((4.1)), i.e. \\[\\begin{equation} H_{a}: \\;\\text{The variables are not statistically independent in the population}. \\tag{4.2} \\end{equation}\\] In terms of conditional distributions, \\(H_{a}\\) is that the conditional distributions of one variable given the other are not all identical, i.e. that for at least one pair of levels of the explanatory variable the conditional probabilities of at least one category of the response variable are not the same. Statistical hypotheses and research hypotheses The word “hypothesis” appears also in research design and philosophy of science. There a research hypothesis means a specific claim or prediction about observable quantities, derived from subject-matter theory. The prediction is then compared to empirical observations. If the two are in reasonable agreement, the hypothesis and corresponding theory gain support or corroboration; if observations disagree with the predictions, the hypothesis is falsified and the theory must eventually be modified or abandoned. This role of research hypotheses is, especially in the philosophy of science originally associated with Karl Popper, at the heart of the scientific method. A theory which does not produce empirically falsifiable hypotheses, or fails to be modified even if its hypotheses are convincingly falsified, cannot be considered scientific. Research hypotheses of this kind are closely related to the kinds of statistical hypotheses discussed above. When empirical data are quantitative, decisions about research hypotheses are in practice usually made, at least in part, as decisions about statistical hypotheses implemented through sinificance tests. The falsification and corroboration of research hypotheses are then parallelled by rejection and non-rejection of statistical hypotheses. The connection is not, however, entirely straightforward, as there are several differences between research hypotheses and statistical hypotheses: Statistical significance tests are also often used for testing hypotheses which do not correspond to any theoretical research hypotheses. Sometimes the purpose of the test is just to identify those observed differences and regularities which are large enough to deserve further discussion. Sometimes claims stated as null hypotheses are interesting for reasons which have nothing to do with theoretical predictions but rather with, say, normative or policy goals. Research hypotheses are typically stated as predictions about theoretical concepts. Translating them into testable statistical hypotheses requires further operationalisation of these concepts. First, we need to decide how the concepts are to be measured. Second, any test involves also assumptions which are imposed not by substantive theory but by constraints of statistical methodology. Their appropriateness for the data at hand needs to be assessed separately. The conceptual connection is clearest when the research hypothesis matches the null hypothesis of a test in general form. Then the research hypothesis remains unfalsified as long as the null hypothesis remains not rejected, and gets falsified when the null hypothesis is rejected. Very often, however, the statistical hypotheses are for technical reasons defined the other way round. In particular, for significance tests that are about associations between variables, a research hypothesis is typically that there is an association between particular variables, whereas the null hypothesis is that there is no association (i.e. “null” association). This leads to the rather confusing situation where the research hypothesis is supported when the null hypothesis is rejected, and possibly falsified when the null hypothesis is not rejected. 4.3.2 Assumptions of a significance test In the following discussion we will sometimes refer to Figure 4.1, which shows SPSS output for the \\(\\chi^{2}\\) test of independence for the data in Table 4.1. Output for the test is shown on the line labelled “Pearson Chi-Square”, and “N of valid cases” gives the sample size \\(n\\). The other entries in the table are output for other tests that are not discussed here, so they can be ignored. Figure 4.1: SPSS output of the \\(\\chi^{2}\\) test of independence (here labelled “Pearson Chi-square”) for the data in Table 4.1. When we apply any significance test, we need to be aware of its assumptions. These are conditions on the data which are not themselves being tested, but which need to be approximately satisfied for the conclusions from the test to be valid. Two broad types of such assumptions are particularly common. The first kind are assumptions about the measurement levels and population distributions of the variables. For the \\(\\chi^{2}\\) test of independence these are relatively mild. The two variables must be categorical variables. They can have any measurement level, although in most cases this will be either nominal or ordinal. The test makes no use of the ordering of the categories, so it effectively treats all variables as if they were nominal. The second common class of assumptions are conditions on the sample size. Many significance tests are appropriate only if this is sufficiently large. For the \\(\\chi^{2}\\) test, the expected frequencies \\(f_{e}\\) (which will be defined below) need to be large enough in every cell of the table. A common rule of thumb is that the test can be safely used if all expected frequencies are at least 5. Another, slightly more lenient rule requires only that no more than 20% of the expected frequencies are less than 5, and that none are less than 1. These conditions can easily be checked with the help of SPSS output for the \\(\\chi^{2}\\) test, as shown in Figure 4.1. This gives information on the number and proportion of expected frequencies (referred to as “expected counts”) less than five, and also the size of the smallest of them. In our example the smallest expected frequency is about 33, so the sample size condition is easily satisfied. When the expected frequencies do not satisfy these conditions, the \\(\\chi^{2}\\) test is not fully valid, and the results should be treated with caution (the reasons for this will be discussed below). There are alternative tests which do not rely on these large-sample assumptions, but they are beyond the scope of this course. In general, the hypotheses of a test define the questions it can answer, and its assumptions indicate the types of data it is appropriate for. Different tests have different hypotheses and assumptions, which need to be considered in deciding which test is appropriate for a given analysis. We will introduce a number of different significance tests in this coursepack, and give guidelines for choosing between them. 4.3.3 The test statistic A test statistic is a number calculated from the sample (i.e. a statistic in the sense defined at the beginning of Section 2.6) which is used to test a null hypothesis. We we will describe the calculation of the \\(\\chi^{2}\\) test statistic step by step, using the data in Table 4.1 for illustration. All of the elements of the test statistic for this example are shown in Table 4.2. These elements are The observed frequencies, denoted \\(f_{o}\\), one for each cell of the table. These are simply the observed cell counts (compare the \\(f_{o}\\) column of Table 4.2 to the counts in Table 4.1). The expected frequencies \\(f_{e}\\), also one for each cell. These are cell counts in a hypothetical table which would show no association between the variables. In other words, they represent a table for a sample which would exactly agree with the null hypothesis of independence in the population. To explain how the expected frequencies are calculated, consider the cell in Table 4.1 for Male respondents who strongly agree with the statement. As discussed above, if the null hypothesis of independence is true in the population, then the conditional probability of strongly agreeing is the same for both men and women. This also implies that it must then be equal to the overall (marginal) probability of strongly agreeing. The sample version of this is that the proportion who strongly agree should be the same for men as among all respondents overall. This overall proportion in Table 4.1 is \\(366/2344=0.156\\). If this proportion applied also to the 1027 male respondents, the number of of them who strongly agreed would be \\[f_{e} = \\left(\\frac{366}{2344}\\right)\\times 1027 = \\frac{366\\times 1027}{2344}=160.4.\\] Here 2344 is the total sample size, and 366 and 1027 are the marginal frequencies of strongly agreers and male respondents respectively, i.e. the two marginal totals corresponding to the cell (Male, Strongly agree). The same rule applies also in general: the expected frequency for any cell in this or any other table is calculated as the product of the row and column totals corresponding to the cell, divided by the total sample size. The difference \\(f_{o}-f_{e}\\) between observed and expected frequencies for each cell. Since \\(f_{e}\\) are the cell counts in a table which exactly agrees with the null hypothesis, the differences indicate how closely the counts \\(f_{o}\\) actually observed agree with \\(H_{0}\\). If the differences are small, the observed data are consistent with the null hypothesis, whereas large differences indicate evidence against it. The test statistic will be obtained by aggregating information about these differences across all the cells of the table. This cannot, however, be done by adding up the differences themselves, because positive (\\(f_{o}\\) is larger than \\(f_{e}\\)) and negative (\\(f_{o}\\) is smaller than \\(f_{e}\\)) differences will always exactly cancel each other out (c.f. their sum on the last row of Table 4.2). Instead, we consider… …the squared differences \\((f_{o}-f_{e})^{2}\\). This removes the signs from the differences, so that the squares of positive and negative differences which are equally far from zero will be treated as equally strong evidence against the null hypothesis. Dividing the squared differences by the expected frequencies, i.e. \\((f_{o}-f_{e})^{2}/f_{e}\\). This is an essential but not particularly interesting scaling exercise, which expresses the sizes of the squared differences relative to the sizes of \\(f_{e}\\) themselves. Finally, aggregating these quantities to get the \\(\\chi^{2}\\) test statistic \\[\\begin{equation} \\chi^{2} = \\sum \\frac{(f_{o}-f_{e})^{2}}{f_{e}}. \\tag{4.3} \\end{equation}\\] Here the summation sign \\(\\Sigma\\) indicates that \\(\\chi^{2}\\) is obtained by adding up the quantities \\((f_{o}-f_{e})^{2}/f_{e}\\) across all the cells of the table. Table 4.2: Calculating the \\(\\chi^{2}\\) test statistic for Table 4.1. In the second column, SA, A, 0, D, and SD are abbreviations for Strongly agree, Agree, Neither agree nor disagree, Disagree and Strongly disagree respectively. Sex Attitude \\(f_{o}\\) \\(f_{e}\\) \\(f_{o}-f_{e}\\) \\((f_{o}-f_{e})^{2}\\) \\((f_{o}-f_{e})^{2}/f_{e}\\) Male SA 160 160.4 \\(-0.4\\) 0.16 0.001 Male A 439 477.6 \\(-38.6\\) 1489.96 3.120 Male 0 187 186.6 0.4 0.16 0.001 Male D 200 169.6 30.4 924.16 5.449 Male SD 41 32.9 8.1 65.61 1.994 Female SA 206 205.6 0.4 0.16 0.001 Female A 651 612.4 38.6 1489.96 2.433 Female 0 239 239.4 \\(-0.4\\) 0.16 0.001 Female D 187 217.4 \\(-30.4\\) 924.16 4.251 Female SD 34 42.1 \\(-8.1\\) 65.61 1.558 Sum 2344 2344 0 4960.1 \\(\\chi^{2}=18.81\\) The calculations can be done even by hand, but we will usually leave them to a computer. The last column of Table 4.2 shows that for Table 4.1 the test statistic is \\(\\chi^{2}=18.81\\) (which includes some rounding error, the correct value is 18.862). In the SPSS output in Figure 4.1, it is given in the “Value” column of the “Pearson Chi-Square” row. 4.3.4 The sampling distribution of the test statistic We now know that the value of the \\(\\chi^{2}\\) test statistic in the example is 18.86. But what does that mean? Why is the test statistic defined as ((4.3)) and not in some other form? And what does the number mean? Is 18.86 small or large, weak or strong evidence against the null hypothesis that sex and attitude are independent in the population? In general, a test statistic for any null hypothesis should satisfy two requirements: The value of the test statistic should be small when evidence against the null hypothesis is weak, and large when this evidence is strong. The sampling distribution of the test statistic should be known and of convenient form when the null hypothesis is true. Taking the first requirement first, consider the form of ((4.3)). The important part of this are the squared differences \\((f_{o}-f_{e})^{2}\\) for each cell of the table. Here the expected frequencies \\(f_{e}\\) reveal what the table would look like if the sample was in perfect agreement with the claim of independence in the population, while the observed frequencies \\(f_{o}\\) show what the observed table actually does look like. If \\(f_{o}\\) in a cell is close to \\(f_{e}\\), the squared difference is small and the cell contributes only a small addition to the test statistic. If \\(f_{o}\\) is very different from \\(f_{e}\\) — either much smaller or much larger than it — the squared difference and hence the cell’s contribution to the test statistic are large. Summing the contributions over all the cells, this implies that the overall value of the test statistic is small when the observed frequencies are close to the expected frequencies under the null hypothesis, and large when at least some of the observed frequencies are far from the expected ones. (Note also that the smallest possible value of the statistic is 0, obtained when the observed and the expected frequency are exactly equal in each cell.) It is thus large values of \\(\\chi^{2}\\) which should be regarded as evidence against the null hypothesis, just as required by condition 1 above. Turning then to condition 2, we first need to explain what is meant by “sampling distribution of the test statistic … when the null hypothesis is true”. This is really the conceptual crux of significance testing. Because it is both so important and relatively abstract, we will introduce the concept of a sampling distribution in some detail, starting with a general definition and then focusing on the case of test statistics in general and the \\(\\chi^{2}\\) test in particular. Sampling distribution of statistic: General definition The \\(\\chi^{2}\\) test statistic ((4.3)) is a statistic as defined defined at the beginning of Section 2.6, that is a number calculated from data in a sample. Once we have observed a sample, the value of a statistic in that sample is known, such as the 18.862 for \\(\\chi^{2}\\) in our example. However, we also realise that this value would have been different if the sample had been different, and also that the sample could indeed have been different because the sampling is a process that involves randomness. For example, in the actually observed sample in Table 4.1 we had 200 men who disagreed with the statement and 41 who strongly disagreed with it. It is easily imaginable that another random sample of 2344 respondents from the same population could have given us frequencies of, say, 195 and 46 for these cells instead. If that had happened, the value of the \\(\\chi^{2}\\) statistic would have been 19.75 instead of 18.86. Furthermore, it also seems intuitively plausible that not all such alternative values are equally likely for samples from a given population. For example, it seems quite improbable that the population from which the sample in Table 4.1 was drawn would instead produce a sample which also had 1027 men and 1317 women but where all the men strongly disagreed with the statement (which would yield \\(\\chi^{2}=2210.3\\)). The ideas that different possible samples would give different values of a sample statistic, and that some such values are more likely than others, are formalised in the concept of a sampling distribution: The sampling distribution of a statistic is the distribution of the statistic (i.e. its possible values and the proportions with which they occur) in the set of all possible random samples of the same size from the population. To observe a sampling distribution of a statistic, we would thus need to draw samples from the population over and over again, and calculate the value of the statistic for each such sample, until we had a good idea of the proportions with which different values of the statistic appeared in the samples. This is clearly an entirely hypothetical exercise in most real examples where we have just one sample of actual data, whereas the number of possible samples of that size is essentially or actually infinite. Despite this, statisticians can find out what sampling distributions would look like, under specific assumptions about the population. One way to do so is through mathematical derivations. Another is a computer simulation where we use a computer program to draw a large number of samples from an artificial population, calculate the value of a statistic for each of them, and examine the distribution of the statistic across these repeated samples. We will make use of both of these approaches below. Sampling distribution of a test statistic under the null hypothesis The sampling distribution of any statistic depends primarily on what the population is like. For test statistics, note that requirement 2 above mentioned only the situation where the null hypothesis is true. This is in fact the central conceptual ingredient of significance testing. The basic logic of drawing conclusions from such tests is that we consider what we would expect to see if the null hypothesis was in fact true in the population, and compare that to what was actually observed in our sample. The null hypothesis should then be rejected if the observed data would be surprising (i.e. unlikely) if the null hypothesis was actually true, and not rejected if the observed data would not be surprising under the null hypothesis. We have already seen that the \\(\\chi^{2}\\) test statistic is in effect a measure of the discrepancy between what is expected under the null hypothesis and what is observed in the sample. All test statistics for any hypotheses have this property in one way or another. What then remains to be determined is exactly how surprising or otherwise the observed data are relative to the null hypothesis. A measure of this is derived from the sampling distribution of the test statistic under the null hypothesis. It is the only sampling distribution that is needed for carrying out a significance test. Sampling distribution of the \\(\\chi^{2}\\) test statistic under independence For the \\(\\chi^{2}\\) test, we need the sampling distribution of the test statistic ((4.3)) under the independence null hypothesis ((4.1)). To make these ideas a little more concrete, the upper part of Table 4.3 shows the crosstabulation of sex and attitude in our example for a finite population where the null hypothesis holds. We can see that it does because the two conditional distributions for attitude, among men and among women, are the same (this is the only aspect of the distributions that matters for this demonstration; the exact values of the probabilities are otherwise irrelevant). These are of course hypothetical population distributions, as we do not know the true ones. We also do not claim that this hypothetical population is even close to the true one. The whole point of this step of hypothesis testing is to set up a population where the null hypothesis holds as a fixed point of comparison, to see what samples from such a population would look like and how they compare with the real sample that we have actually observed. Population (frequencies are in millions of people): Table 4.3: ``The government should take measures to reduce differences in income levels’’: Attitude towards income redistribution by sex (with row proportions in parentheses), in a hypothetical population of 50 million people where sex and attitude are independent, and in one random sample from this population. Sex Agree strongly Agree Neither agree nor disagree Disagree Disagree strongly Total Male 3.744 11.160 4.368 3.960 0.768 24.00 (0.156) (0.465) (0.182) (0.165) (0.032) (1.0) Female 4.056 12.090 4.732 4.290 0.832 26.00 (0.156) (0.465) (0.182) (0.165) (0.032) (1.0) Total 7.800 23.250 9.100 8.250 1.600 50 (0.156) (0.465) (0.182) (0.165) (0.032) (1.0) Sample: Table 4.3: \\(\\chi^{2}=2.8445\\) Sex Agree strongly Agree Neither agree nor disagree Disagree Disagree strongly Total Male 181 505 191 203 41 1121 (0.161) (0.450) (0.170) (0.181) (0.037) (1.0) Female 183 569 229 202 40 1223 (0.150) (0.465) (0.187) (0.165) (0.033) (1.0) Total 364 1074 420 405 81 2344 (0.155) (0.458) (0.179) (0.173) (0.035) (1.0) In the example we have a sample of 2344 observations, so to match that we want to identify the sampling distribution of the \\(\\chi^{2}\\) statistic in random samples of size 2344 from the population like the one in the upper part of Table 4.3. The lower part of that table shows one such sample. Even though it comes from a population where the two variables are independent, the same is not exactly true in the sample: we can see that the conditional sample distributions are not the same for men and women. The value of the \\(\\chi^{2}\\) test statistic for this simulated sample is 2.8445. Before we proceed with the discussion of the sampling distribution of the \\(\\chi^{2}\\) statistic, we should note that it will be a continuous probability distribution. In other words, the number of distinct values that the test statistic can have in different samples is so large that their distribution is clearly effectively continuous. This is true even though the two variables in the contingency table are themselves categorical. The two distributions, the population distribution of the variables and the sampling distribution of a test statistic, are quite separate entities and need not resemble each other. We will consider the nature of continuous probability distributions in more detail in Chapter 7. In this chapter we will discuss them relatively superficially and only to the extent that is absolutely necessary. Figure 4.2 shows what we observe if do a computer simulation to draw many more samples from the population in Table 4.3. The figure shows the histogram of the values of the \\(\\chi^{2}\\) test statistic calculated from 100,000 such samples. We can see, for example, that \\(\\chi^{2}\\) is between 0 and 10 for most of the samples, and larger than that for only a small proportion of them. In particular, we note already that the value \\(\\chi^{2}=18.8\\) that was actually observed in the real sample occurs very rarely if samples are drawn from a population where the null hypothesis of independence holds. Figure 4.2: Example of the sampling distribution of the \\(\\chi^{2}\\) test statistic for independence. The plot shows a histogram of the values of the statistic in 100,000 simulated samples of size \\(n=2344\\) drawn from the population distribution in the upper part of Table 4.3. Superimposed on the histogram is the curve of the approximate sampling distribution, which is the \\(\\chi^{2}\\) distribution with 4 degrees of freedom. The form of the sampling distribution can also be derived through mathematical arguments. These show that for any two-way contingency table, the approximate sampling distribution of the \\(\\chi^{2}\\) statistic is a member of a class of continuous probability distributions known as the \\(\\boldsymbol{\\chi}^{2}\\) distributions (the same symbol \\(\\chi^{2}\\) is rather confusingly used to refer both to the test statistic and its sampling distribution). The \\(\\chi^{2}\\) distributions are a family of individual distributions, each of which is identified by a number known as the degrees of freedom of the distribution. Figure 4.3 shows the probability curves of some \\(\\chi^{2}\\) distributions (what such curves mean is explained in more detail below, and in Chapter 7). All of the distributions are skewed to the right, and the shape of a particular curve depends on its degrees of feedom. All of the curves give non-zero probabilites only for positive values of the variable on the horizontal axis, indicating that the value of a \\(\\chi^{2}\\)-distributed variable can never be negative. This is appropriate for the \\(\\chi^{2}\\) test statistic ((4.3)), which is also always non-negative. Figure 4.3: Probability curves of some \\(\\chi^{2}\\) distributions with different degrees of freedom (df). For the \\(\\chi^{2}\\) test statistic of independence we have the following result: When the null hypothesis ((4.1)) is true in the population, the sampling distribution of the test statistic ((4.3)) calculated for a two-way table with \\(R\\) rows and \\(C\\) columns is approximately the \\(\\chi^{2}\\) distribution with \\(df=(R-1)(C-1)\\) degrees of freedom. The degrees of freedom are thus given by the number of rows in the table minus one, multiplied by the number of columns minus one. Table 4.1, for example, has \\(R=2\\) rows and \\(C=5\\) columns, so its degrees of freedom are \\(df=(2-1)\\times(5-1)=4\\) (as indicated by the “df” column of the SPSS output of Figure 4.1). Figure 4.2 shows the curve of the \\(\\chi^{2}\\) distribution with \\(df=4\\) superimposed on the histogram of the sampling distribution obtained from the computer simulation. The two are in essentially perfect agreement, as mathematical theory indicates they should be. These degrees of freedom can be given a further interpretation which relates to the structure of the table.16 We can, however, ignore this and treat \\(df\\) simply as a number which identifies the appropriate \\(\\chi^{2}\\) distribution to be used for the \\(\\chi^{2}\\) test for a particular table. Often it is convenient to use the notation \\(\\chi^{2}_{df}\\) to refer to a specific distribution, e.g. \\(\\chi^{2}_{4}\\) for the \\(\\chi^{2}\\) distribution with 4 degrees of freedom. The \\(\\chi^{2}\\) sampling distribution is “approximate” in that it is an asymptotic approximation which is exactly correct only if the sample size is infinite and approximately correct when it is sufficiently large. This is the reason for the conditions for the sizes of the expected frequencies that were discussed in Section 4.3.2. When these conditions are satisfied, the approximation is accurate enough for all practical purposes and we use the appropriate \\(\\chi^{2}\\) distribution as the sampling distribution. In Section 4.3.4, under requirement 2 for a good test statistic, we mentioned that its sampling distribution under the null hypothesis should be “known” and “of convenient form”. We now know that for the \\(\\chi^{2}\\) test it is a \\(\\chi^{2}\\) distribution. The “convenient form” means that the sampling distribution should not depend on too many specific features of the data at hand. For the \\(\\chi^{2}\\) test, the approximate sampling distribution depends (through the degrees of freedom) only on the size of the table but not on the sample size or the marginal distributions of the two variables. This is convenient in the right way, because it means that we can use the same \\(\\chi^{2}\\) distribution for any table with a given number of rows and columns, as long as the sample size is large enough for the conditions in Section 4.3.2 to be satisfied. 4.3.5 The P-value The last key building block of significance testing operationalises the comparison between the observed value of a test statistic and its sampling distribution under the null hypothesis. In essence, it provides a way to determine whether the test statistic in the sample should be regarded as “large” or “not large”, and with this the measure of evidence against the null hypothesis that is the end product of the test: The \\(\\mathbf{P}\\)-value is the probability, if the null hypothesis was true in the population, of obtaining a value of the test statistic which provides as strong or stronger evidence against the null hypothesis, and in the direction of the alternative hypothesis, as the the value of the test statistic in the sample actually observed. The relevance of the phrase “in the direction of the alternative hypothesis” is not apparent for the \\(\\chi^{2}\\) test, so we can ignore it for the moment. As argued above, for this test it is large values of the test statistic which indicate evidence against the null hypothesis of independence, so the values that correspond to “as strong or stronger evidence” against it are the ones that are as large or larger than the observed statistic. Their probability is evaluated from the \\(\\chi^{2}\\) sampling distribution defined above. Figure 4.4 illustrates this calculation. It shows the curve of the \\(\\chi^{2}_{4}\\) distribution, which is the relevant sampling distribution for the test for the \\(2\\times 5\\) table in our example. Suppose first, hypothetically, that we had actually observed the sample in the lower part of Table 4.3, for which the value of the test statistic is \\(\\chi^{2}=2.84\\). The \\(P\\)-value of the test for this sample would then be the probability of values of 2.84 or larger, evaluated from the \\(\\chi^{2}_{4}\\) distribution. Figure 4.4: Illustration of the \\(P\\)-value for a \\(\\chi^{2}\\) test statistic with 4 degrees of freedom and with values \\(\\chi^{2}=2.84\\) (area of the grey region under the curve) and \\(\\chi^{2}=18.86\\). For a probability curve like the one in Figure 4.4, areas under the curve correspond to probabilities. For example, the area under the whole curve from 0 to infinity is 1, because a variable which follows the \\(\\chi^{2}_{4}\\) distribution is certain to have one of these values. Similarly, the probability that we need for the \\(P\\)-value for \\(\\chi^{2}=2.84\\) is the area under the curve to the right of the value 2.84, which is shown in grey in Figure 4.4. This is \\(P=0.585\\). The test statistic for the real sample in Table 4.1 was \\(\\chi^{2}=18.86\\), so the \\(P\\)-value is the combined probability of this and all larger values. This is also shown in Figure 4.4. However, this area is not really visible in the plot because 18.86 is far into the tail of the distribution where the probabilities are low. The \\(P\\)-value is then also low, specifically \\(P=0.0008\\). In practice the \\(P\\)-value is usually calculated by a computer. In the SPSS output of Figure 4.1 is is shown in the column labelled “Asymp. Sig. (2-sided)” which is short for “Asymptotic significance level” (you can ignore the “2-sided” for this test). The value is listed as 0.001. SPSS reports, by default, \\(P\\)-values rounded to three decimal places. Sometimes even the smallest of these is zero, in which case the value is displayed as “.000”. This is bad practice, as the \\(P\\)-value for most significance tests is never exactly zero. \\(P\\)-values given by SPSS as “.000” should be reported instead as “\\(P&lt;0.001\\)”. Before the widespread availablity of statistical software, \\(P\\)-values had to be obtained approximately using tables of distributions. Since you may still see this approach described in many text books, it is briefly explained here. You may also need to use the table method in the examination, where computers are not allowed. Otherwise, however, this approach is now of little interest: if the \\(P\\)-value is given in the computer output, there is no need to refer to distributional tables. All introductory statistical text books include a table of \\(\\chi^{2}\\) distributions, although its format may vary slightly form book to book. Such a table is also included in the Appendix of this coursepack. An extract from the table is shown in Table 4.4. Each row of the table corresponds to a \\(\\chi^{2}\\) distribution with the degrees of freedom given in the first column. The other columns show so-called “critical values” for the probability levels given on the first row. Consider, for example, the row for 4 degrees of freedom. The figure 7.78 in the column for probability level 0.100 indicates that the probability of a value of 7.78 or larger is exactly 0.100 for this distribution. The 9.49 in the next column shows that the probability of 9.49 or larger is 0.050. Another way of saying this is that if the appropriate degrees of freedom were 4, and the test statistic was 7.78, the \\(P\\)-value would be exactly 0.100, and if the statistic was 9.49, \\(P\\) would be 0.050. Table 4.4: An extract from a table of critical values for \\(\\chi^{2}\\) distributions. Row 2-5 show the right-hand tail probability. df 0.100 0.050 0.010 0.001 1 2.71 3.84 6.63 10.83 2 4.61 5.99 9.21 13.82 3 6.25 7.81 11.34 16.27 4 7.78 9.49 13.28 18.47 … … … The values in the table also provide bounds for other values that are not shown. For instance, in the hypothetical sample in Table 4.3 we had \\(\\chi^{2}=2.84\\), which is smaller than 7.78. This implies that the corresponding \\(P\\)-value must be larger than 0.100, which (of course) agrees with the precise value of \\(P=0.585\\) (see also Figure 4.4). Similarly, \\(\\chi^{2}=18.86\\) for the real data in Table 4.1, which is larger than the 18.47 in the “0.001” column of the table for the \\(\\chi^{2}_{4}\\) distribution. Thus the corresponding \\(P\\)-value must be smaller than 0.001, again agreeing with the correct value of \\(P=0.0008\\). 4.3.6 Drawing conclusions from a test The \\(P\\)-value is the end product of any significance test, in that it is a complete quantitative summary of the strength of evidence against the null hypothesis provided by the data in the sample. More precisely, the \\(P\\)-value indicates how likely we would be to obtain a value of the test statistic which was as or more extreme as the value for the data, if the null hypothesis was true. Thus the smaller the \\(P\\)-value, the stronger is the evidence against the null hypothesis. For example, in our survey example of sex and attitude toward income redistribution we obtained \\(P=0.0008\\) for the \\(\\chi^{2}\\) test of independence. This is a small number, so it indicates strong evidence against the claim that the distributions of attitudes are the same for men and women in the population. For many purposes it is quite sufficient to simply report the \\(P\\)-value. It is, however, quite common also to state the conclusion in the form of a more discrete decision of “rejecting” or “not rejecting” the null hypothesis. This is usually based on conventional reference levels, known as significance levels or \\(\\boldsymbol{\\alpha}\\)-levels (here \\(\\alpha\\) is the lower-case Greek letter “alpha”). The standard significance levels are 0.10, 0.05, 0.01 and 0.001 (also known as 10%, 5%, 1% and 0.1% significance levels respectively), of which the 0.05 level is most commonly used; other values than these are rarely considered. The values of the test statistic which correspond exactly to these levels are the critical shown in the table of the \\(\\chi^{2}\\) distribution in Table 4.4. When the \\(P\\)-value is smaller than a conventional level of significance (i.e. the test statistic is larger than the corresponding critical value), it is said that the null hypothesis is rejected at that level of significance, or that the results (i.e. evidence against the null hypothesis) are statistically significant at that level. In our example the \\(P\\)-value was smaller than 0.001. The null hypothesis is thus “rejected at the 0.1 % level of significance”, i.e. the evidence that the variables are not independent in the population is “statistically significant at the 0.1% level” (as well as the 10%, 5% and 1% levels of course, but it is enough to state only the strongest level). The strict decision formulation of significance testing is much overused and misused. It is in fact quite rare that the statistical analysis will immediately be followed by some practical action which absolutely requires a decision about whether to act on the basis of the null hypothesis or the alternative hypothesis. Typically the analysis which a test is part of aims to examine some research question, and the results of the test simply contribute new information to add support for one or the other side of the argument about the question. The \\(P\\)-value is the key measure of the strength and direction of that evidence, so it should always be reported. The standard significance levels used for rejecting or not rejecting null hypotheses, on the other hand, are merely useful conventional reference points for structuring the reporting of test results, and their importance should not be overemphasised. Clearly \\(P\\)-values of, say, 0.049 and 0.051 (i.e. ones either side of the most common conventional significance level 0.05) indicate very similar levels of evidence against a null hypothesis, and acting as if one was somehow qualitatively more decisive is simply misleading. How to state the conclusions The final step of a significance test is describing its conclusions in a research report. This should be done with appropriate care: The report should make clear which test was used. For example, this might be stated as something like “The \\(\\chi^{2}\\) test of independence was used to test the null hypothesis that in the population the attitude toward income redistribution was independent of sex in the population”. There is usually no need to give literature references for the standard tests described on this course. The numerical value of the \\(P\\)-value should be reported, rounded to two or three decimal places (e.g. \\(P=0.108\\) or \\(P=0.11\\)). It can also reported in an approximate way as, for example, “\\(P&lt;0.05\\)” (or the same in symbols to save space, e.g.  for \\(P&lt;0.1\\), ** for \\(P&lt;0.05\\), and so on). Very small \\(P\\)-values can always be reported as something like “\\(P&lt;0.001\\)”. When (cautiously) discussing the results in terms of discrete decisions, the most common practice is to say that the null hypothesis was either not rejected or rejected at a given significance level. It is not acceptable to say that the null hypothesis was “accepted” as an alternative to “not rejected”. Failing to reject the hypothesis that two variables are independent in the population is not the same as proving that they actually are independent. A common mistake is to describe the \\(P\\)-value as the probability that the null hypothesis is true. This is understandably tempting, as such a claim would seem more natural and convenient than the correct but convoluted interpretation of the \\(P\\)-value as “the probability of obtaining a test statistic as or more extreme as the one observed in the data if the test was repeated many times for different samples from a population where the null hypothesis was true”. Unfortunately, however, the \\(P\\)-value is not the probability of the null hypothesis being true. Such a probability does not in fact have any real meaning at all in the statistical framework considered here.17 The results of significance tests should be stated using the names and values of the variables involved, and not just in terms of “null” and “alternative” hypotheses. This also forces you to recall what the hypotheses actually were, so that you do not accidentally describe the result the wrong way round (e.g. that the data support a claim when they do just the opposite). There are no compulsory phrases for stating the conclusions, so it can be done in a number of ways. For example, a fairly complete and careful statement in our example would be “There is strong evidence that the distributions of attitudes toward income redistribution are not the same for men and women in the population (\\(P&lt;0.001\\)).” Other possibilities are “The association between sex and attitude toward income redistribution in the sample is statistically significant (\\(P&lt;0.001\\)).” “The analysis suggests that there is an association between sex and attitude toward income redistribution in the population (\\(P&lt;0.001\\)).” The last version is slightly less clear than the other statements in that it relies on the reader recognizing that the inclusion of the \\(P\\)-value implies that the word “differs” refers to a statistical claim rather a statement of absolute fact about the population. In many contexts it would be better to say this more explicitly. Finally, if the null hypothesis of independence is rejected, the test should not usually be the only statistical analysis that is reported for a two-way table. Instead, we would then go on to describe how the two variables appear to be associated, using the of descriptive methods discussed in Section 2.4. 4.4 Summary of the chi-square test of independence We have now described the elements of a significance test in some detail. Since it is easy to lose sight of the practical steps of a test in such a lengthy discussion, they are briefly repeated here for the \\(\\chi^{2}\\) test of independence. The test of the association between sex and attitude in the survey example is again used for illustration: Data: observations of two categorical variables, here sex and attitude towards income redistribution for \\(n=2344\\) respondents, presented in the two-way, \\(2\\times 5\\) contingency table 4.1. Assumptions: the variables can have any measurement level, but the expected frequencies \\(f_{e}\\) must be large enough. A common rule of thumb is that \\(f_{e}\\) should be at least 5 for every cell of the table. Here the smallest expected frequency is 32.9, so the requirement is comfortably satisfied. Hypotheses: null hypothesis \\(H_{0}\\) that the two variables are statistically independent (i.e. not associated) in the population, against the alternative hypothesis that they are dependent. The test statistic: the \\(\\chi^{2}\\) statistic \\[\\chi^{2} = \\sum \\frac{(f_{o}-f_{e})^{2}}{f_{e}}\\] where \\(f_{o}\\) denotes observed frequencies in the cells of the table and \\(f_{e}\\) the corresponding expected frequencies under the null hypothesis, and the summation is over all of the cells. For Table 4.1, \\(\\chi^{2}=18.86\\). The sampling distribution of the test statistic when \\(H_{0}\\) is true: a \\(\\chi^{2}\\) distribution with \\((R-1)\\times(C-1)=1\\times 4=4\\) degrees of freedom, where \\(R\\) \\((=2)\\) and \\(C\\) \\((=5)\\) denote the numbers of rows and columns in the table respectively. The \\(P\\)-value: the probability that a randomly selected value from the \\(\\chi^{2}_{4}\\) distribution is at least 18.86. This is \\(P=0.0008\\), which may also be reported as \\(P&lt;0.001\\). Conclusion: the null hypothesis of independence is strongly rejected. The \\(\\chi^{2}\\) test indicates very strong evidence that sex and attitude towards income redistribution are associated in the population (\\(P&lt;0.001\\)). When the association is judged to be statistically significant, its nature and magnitude can be further explored using the descriptive methods for two way tables discussed in Section 2.4. Philosophical Magazine, Series 5, 5, 157–175. The thoroughly descriptive title of the article is “On the criterion that a given system of deviations from the probable in the case of a correlated system of variables is such that it can be reasonably supposed to have arisen from random sampling”.↩ In short, they are the smallest number of cell frequencies such that they together with the row and column marginal totals are enough to determine all the remaining cell frequencies.↩ There is an alternative framework, known as Bayesian statistics, where quantities resembling \\(P\\)-values can be given this interpretation. The differences between the Bayesian approach and the so-called frequentist one discussed here are practically and philosophically important and interesting, but beyond the scope of this course.↩ "],["c-probs.html", "Chapter 5 Inference for population proportions 5.1 Introduction 5.2 Examples 5.3 Probability distribution of a dichotomous variable 5.4 Point estimation of a population probability 5.5 Significance test of a single proportion 5.6 Confidence interval for a single proportion 5.7 Inference for comparing two proportions", " Chapter 5 Inference for population proportions 5.1 Introduction In this chapter we still consider statistical analyses which involve only discrete, categorical variables. In fact, we now focus on the simplest case of all, that of dichotomous (binary) variables which have only two possible values. Four examples which will be used for illustration throughout this chapter are introduced in Section 5.2. In the first two of them we consider a binary variable in a single population, while in the last two examples the question of interest involves a comparison of the distributions of the variable between two populations (groups). The data for such analyses can be summarised in simple tables, the one-group case with a one-way table of two cells, and the two-group case with a \\(2\\times 2\\) contingency table. Here, however, we formulate the questions of interest slightly differently, with primary emphasis on the probability of one of the two values of the variable of interest. In the one-group case the questions of interest are then about the population value of a single probability, and in the two-group case about the comparison of the values of this probability between the two groups. While we describe specific methods of inference for these cases, we also use them to introduce some further general elements of statistical inference: Population parameters of probability distributions. Point estimation of population parameters. Hypotheses anout the parameters, and significance tests for them. Confidence intervals for population parameters. The comparisons in the two-group analyses again address questions about associations, now between the group and the dichotomous variable of interest. Here it will be useful to employ the terminology introduced in Section 1.2.4, which distinguishes between the explanatory variable and the response variable in the association. Following a common convention, we will denote the explanatory variable by \\(X\\) and the response variable by \\(Y\\). In the two-group cases of this chapter, \\(X\\) will be the group (which is itself also binary) and \\(Y\\) the binary variable whose probabilities we are interested in. We will use \\(Y\\) to denote this binary variable also in the one-group examples. 5.2 Examples The following four examples will be discussed in this chapter. Examples 5.1 and 5.2 concern only one group, while in Examples 5.3 and 5.4 two groups are to be compared. Table 5.1 shows basic sample statistics for the examples, together with the results of the significance tests and confidence intervals described later. Example 5.1: An EU referendum A referendum about joining the European Union was held in Finland on the 16th of October, 1994. Suppose that in an opinion poll conducted around October 4th (just before the beginning of postal voting), 330 of the \\(n=702\\) respondents (47.0%) indicated that they would definitely vote Yes to joining the EU, 190 (27.1%) said they would definitely vote No, and 182 (25.9%) were still undecided.18 Here we will consider the dichotomous variable with the values of Yes (330 respondents) versus No or Undecided (372 respondents, or 53.0%). The proportion of voters who definitely intend to vote Yes provides a lower bound for the proportion of Yes-votes in the referendum, even if all of the currently undecided voters eventually decided to vote No. Example 5.2: Evidence of possible racial bias in jury selection As part of an official inquiry into the extent of racial and gender bias in the justice system in the U.S. state of Pennsylvania, an investigation was made of whether people from minority racial groups were underrepresented in trial juries.19 One part of the assessment was a survey administered to all those called for the jury panel for criminal trials (from which the juries for actual trials will be selected) in Allegheny County, Pennsylvania (the city of Pittsburgh and its surrounding areas) between May and October, 2001. We will consider the dichotomous variable of whether a respondent to the survey identified his or her own race as Black (African American) or some other race category. Of the \\(n=4950\\) respondents, 226 (4.57%) identified themselves as black. This will be compared to the the percentage of black people in the whole population of people aged 18 and over (those eligible for jury service) in the county, which is 12.4% (this is a census estimate which will here be treated as a known population quantity, ignoring any possible census error in it). Table 5.1: Examples of analyses of population proportions used in Chapter 5. In addition to sample sizes \\(n\\) and proportions \\(\\hat{\\pi}\\), the table shows for the one-sample examples 5.1 and 5.2 the \\(z\\)-test statistic for the hypothesis \\(H_{0}: \\pi=\\pi_{0}\\), its \\(P\\)-value against a two-sided alternative, and a 95% confidence interval for \\(\\pi\\). For the two-sample examples 5.3 and 5.4, the table shows the estimated between-group difference of proportions \\(\\hat{\\Delta}\\), the \\(z\\)-test statistic for the hypothesis \\(H_{0}: \\Delta=0\\), its \\(P\\)-value against a two-sided alternative, and a 95% confidence interval for \\(\\Delta\\). One sample Example 5.1: Voting intention in an EU referendum \\(n\\) Yes \\(\\hat{\\pi}\\) \\(\\pi_{0}\\) \\(z\\) \\(P\\) 95% CI 702 330 0.470 0.5 \\(-1.59\\) 0.112 (0.433; 0.507) Example 5.2: Race of members of jury panel \\(n\\) Black \\(\\hat{\\pi}\\) \\(\\pi_{0}\\) \\(z\\) \\(P\\) 95% CI 4950 226 0.0457 0.124 \\(-16.71\\) \\(&lt;0.001\\) (0.040; 0.052) Two Independent samples Example 5.3: Polio diagnoses in a vaccine trial \\(n\\) Yes \\(\\hat{\\pi}\\) Diff. (\\(\\hat{\\Delta}\\)) \\(z\\) \\(P\\) 95% CI Control group (placebo) 201,229 142 0.000706 Treatment group (vaccine) 200,745 57 0.000284 \\(-0.000422\\) \\(-6.01\\) \\(&lt;0.001\\) \\((-0.000560;\\) \\(-0.000284)\\) Example 5.4: Optimistic about young people’s future \\(n\\) Yes \\(\\hat{\\pi}\\) Diff. (\\(\\hat{\\Delta}\\)) \\(z\\) \\(P\\) 95% CI Negative question 921 257 0.279 Positive question 929 338 0.364 0.085 3.92 \\(&lt;0.001\\) (0.043; 0.127) Example 5.3: The Salk polio vaccine field trial of 1954 The first large-scale field trials of the “killed virus” polio vaccination developed by Dr. Jonas Salk were carried out in the U.S. in 1954.20 In the randomized, double-blind placebo-control part of the trial, a sample of schoolchildren were randomly assigned to receive either three injections of the polio vaccine, or three injections of a placebo, inert saltwater which was externally indistinguishable from the real vaccine. The explanatory variable \\(X\\) is thus the group (vaccine or “treatment” group vs. placebo or “control” group). The response variable \\(Y\\) is whether the child was diagnosed with polio during the trial period (yes or no). There were \\(n_{1}=201,229\\) children in the control group, and 142 of them were diagnosed with polio; in the treatment group, there were 57 new polio cases among \\(n_{2}=200,745\\) children (in both cases only those children who received all three injections are included here). The proportions of cases of polio were thus \\(0.000706\\) in the control group and \\(0.000284\\) in the vaccinated group (i.e. 7.06 and 2.84 cases per 10,000 subjects, respectively). Example 5.4: Split-ballot experiment on acquiescence bias Survey questions often ask whether respondents agree or disagree with given statements on opinions or attitudes. Acquiescence bias means the tendency of respondents to agree with such statements, regardless of their contents. If it is present, we will overestimate the proportion of people holding the opinion corresponding to agreement with the statement. The data used in this example come from a study which examined acquiescence bias through a randomized experiment.21 In a survey carried out in Kazakhstan, the respondents were presented with a number of attitude statements, with four response categories: “Fully agree”, “Somewhat agree”, “Somewhat disagree”, and “Fully disagree”. Here we combine the first two and the last two, and consider the resulting dichotomous variable, with values labelled “Agree” and “Disagree”. We consider one item from the survey, concerning the respondents’ opinions on the expectations of today’s young people. There were two forms of the question: “A young person today can expect little of the future” “A young person today can expect much of the future” We will call these the “Negative” and “Positive” question respectively. Around half of the respondents were randomly assigned to receive the positive question, and the rest got the negative question. The explanatory variable \\(X\\) indicates the type of question, with Negative and Positive questions coded here as 1 and 2 respectively. The dichotomous response variable \\(Y\\) is whether the respondent gave a response which was optimistic about the future (i.e. agreed with the positive or disagreed with the negative question) or a pessimistic response. The sample sizes and proportions of optimistic responses in the two groups are reported in Table 5.1. The proportion is higher when the question was worded positively, as we would expect if there was acquiescence bias. Whether this difference is statistically significant remains to be determined. 5.3 Probability distribution of a dichotomous variable The response variables \\(Y\\) considered in this section have only two possible values. It is common to code them as 0 and 1. In our examples, we will define the values of the variable of interest as follows: Example 5.1: 1 if a person says that he or she will definitely vote Yes, and 0 if the respondent will vote No or is undecided Example 5.2: 1 for black respondents and 0 for all others Example 5.3: 1 if a child developed polio, 0 if not Example 5.4: 1 if the respondent gave an optimistic response, 0 if not The population distribution of such a variable is completely specified by one number, the probability that a randomly selected member of the population will have the value \\(Y=1\\) rather than 0. It can also be thought of as the proportion of units in the population with \\(Y=1\\); we will use the two terms interchangeably. This probability is denoted here \\(\\pi\\) (the lower-case Greek letter “pi”).22 The value of \\(\\pi\\) is between 0 (no-one in the population has \\(Y=1\\)) and 1 (everyone has \\(Y=1\\)). Because \\(Y\\) can have only two possible values, and the sum of probabilities must be one, the population probability of \\(Y=0\\) is \\(1-\\pi\\). The probability distribution which corresponds to this kind of population distribution is the Binomial distribution. For later use, we note already here that the mean of this distribution is \\(\\pi\\) and its variance is \\(\\pi(1-\\pi)\\). In Example 5.1, the population is that of eligible voters at the time of the opinion poll, and \\(\\pi\\) is the probability that a randomly selected eligible voter definitely intended to vote Yes. In Example 5.2, \\(\\pi\\) is the probability that a black person living in the county will be selected to the jury panel. In Example 5.3, \\(\\pi\\) is the probability (possibly different in the vaccinated and unvaccinated groups) that a child will develop polio, and in Example 5.4 it is the probability (which possibly depends on how the question was asked) that a respondent will give an optimistic answer to the survey question. The probability \\(\\pi\\) is the parameter of the binomial distribution. In general, the parameters of a probability distribution are one or more numbers which fully determine the distribution. For example, in the analyses of Chapter 4 we considered conditional distributions of a one variable in a contingency table given the other variable. Although we did not make use of this terminology there, these distributions also have their parameters, whcih are the probabilities of (all but one of) the categories of the response variable. Another case will be introduced in Chapter 7, where we consider a probability distribution for a continuous variable, and its parameters. 5.4 Point estimation of a population probability Questions and hypotheses about population distributions are usually most conveniently formulated in terms of the parameters of the distributions. For a binary variable \\(Y\\), this means that statistical inference will be focused on the probability \\(\\pi\\). The most obvious question about a parameter is “what is our best guess of the value of the parameter in the population?” The answer will be based on the information in the sample, using some sample statistic as the best guess or estimate of the population parameter. Specifically, this is a point estimate, because it is expressed as a single value or a “point”, to distinguish it from interval estimates defined later. We denote a point estimate of \\(\\pi\\) by \\(\\hat{\\pi}\\). The “\\(\\; \\hat{\\;}\\;\\)” or “hat” is often used to denote an estimate of a parameter indicated by the symbol under the hat; \\(\\hat{\\pi}\\) is read as “pi-hat”. As \\(\\pi\\) for a binomial distribution is the population proportion of \\(Y=1\\), the obvious choice for a point estimate of it is the sample proportion of units with \\(Y=1\\). If we denote the number of such units by \\(m\\), the proportion is thus \\(\\hat{\\pi}=m/n\\), i.e. \\(m\\) divided by the sample size \\(n\\). In Example 5.1, \\(m=330\\) and \\(n=702\\), and \\(\\hat{\\pi}=330/702=0.47\\). This and the estimated proportions in the other examples are shown in Table 5.1, in the two-sample examples 5.3 and 5.4 separately for the two groups. When \\(Y\\) is coded with values 0 and 1, \\(\\hat{\\pi}\\) is also equal to the sample mean of \\(Y\\), since \\[\\begin{equation} \\bar{Y}=\\frac{Y_{1}+Y_{2}+\\dots+Y_{n}}{n}=\\frac{0+0+\\dots+0+\\overbrace{1+1+\\dots+1}^{m \\text{ ones}}}{n}=\\frac{m}{n}=\\hat{\\pi}. \\tag{5.1} \\end{equation}\\] 5.5 Significance test of a single proportion 5.5.1 Null and alternative hypotheses A null hypothesis about a single population probability \\(\\pi\\) is of the form \\[\\begin{equation} H_{0}:\\; \\pi=\\pi_{0} \\tag{5.2} \\end{equation}\\] where \\(\\pi_{0}\\) is a given number which is either of specific interest or in some other sense a suitable benchmark in a given application. For example, in the voting example 5.1 we could consider \\(\\pi_{0}=0.5\\), i.e. that the referendum was too close to call. In the jury example 5.2 the value of interest would be \\(\\pi_{0}=0.124\\), the proportion of black people in the general adult population of the county. An alternative but equivalent form of ((5.2)) is expressed in terms of the difference \\[\\begin{equation} \\Delta=\\pi-\\pi_{0} \\tag{5.3} \\end{equation}\\] (\\(\\Delta\\) is the upper-case Greek letter “Delta”). Then (@(5.2)) can also be written as \\[\\begin{equation} H_{0}: \\; \\Delta=0, \\tag{5.4} \\end{equation}\\] i.e. that there is no difference between the true population probability and the hypothesised value \\(\\pi_{0}\\). This version of the notation allows us later to draw attention to the similarities between different analyses in this chapter and in Chapter 7. In all of these cases the quantities of interest turn out to be differences of some kind, and the formulas for test statistics and confidence intervals will be of essentially the same form. The alternative hypothesis to the null hypothesis ((5.4)) requires some further comments, because there are some new possibilities that did not arise for the \\(\\chi^{2}\\) test of independence in Chapter 4. For the difference \\(\\Delta\\), we may consider two basic kinds of alternative hypotheses. The first is a two-sided alternative hypothesis \\[\\begin{equation} H_{a}: \\; \\Delta\\ne 0 \\tag{5.5} \\end{equation}\\] (where “\\(\\ne\\)” means “not equal to”). This claims that the true value of the population difference \\(\\Delta\\) is some unknown value which is not 0 as claimed by the null hypothesis. With a two-sided \\(H_{a}\\), sample evidence that the true difference differs from 0 will be regarded as evidence against the null hypothesis, irrespective of whether it suggests that \\(\\Delta\\) is actually smaller or larger than 0 (hence the word “two-sided”). When \\(\\Delta=\\pi-\\pi_{0}\\), this means that we are trying to assess whether the true probability \\(\\pi\\) is different from the claimed value \\(\\pi_{0}\\), but without any expectations about whether \\(\\pi\\) might be smaller or larger than \\(\\pi_{0}\\). The second main possibility is one of the two one-sided alternative hypotheses \\[\\begin{equation} H_{a}: \\Delta&gt; 0 \\tag{5.6} \\end{equation}\\] or \\[\\begin{equation} H_{a}: \\Delta &lt; 0 \\tag{5.7} \\end{equation}\\] Such a hypothesis is only interested in values of \\(\\Delta\\) to one side of 0, either larger or smaller than it. For example, hypothesis ((5.6)) in the referendum example 5.1, with \\(\\pi_{0}=0.5\\), is \\(H_{a}:\\; \\pi&gt;0.5\\), i.e. that the proportion who intend to vote Yes is greater than one half. Similarly, in the jury example 5.2, with \\(\\pi_{0}=0.124\\), ((5.7)) is the hypothesis \\(H_{a}:\\; \\pi&lt;0.124\\), i.e. that the probability that an eligible black person is selected to a jury panel is smaller than the proportion of black people in the general population. Whether we choose to consider a one-sided or a two-sided alternative hypothesis depends largely on the research questions. In general, a one-sided hypothesis would be used when deviations from the null hypothesis only in one direction would be interesting and/or surprising. This draws on background information about the variables. A two-sided alternative hypothesis is neutral in this respect. Partly for this reason, two-sided hypotheses are in practice used more often than one-sided ones. Choosing a two-sided alternative hypothesis is not wrong even when a one-sided one could also be considered; this will simply lead to a more cautious (conservative) approach in that it takes stronger evidence to reject the null hypothesis when the alternative is two-sided than when it is one-sided. Such conservatism is typically regarded as a desirable feature in statistical inference (this will be discussed further in Section 7.6.1). The two-sided alternative hypothesis ((5.5)) is clearly the logical opposite of the null hypothesis ((5.4)): if \\(\\Delta\\) is not equal to 0, it must be “not equal” to 0. So a two-sided alternative hypothesis must correspond to a “point” null hypothesis ((5.4)). For a one-sided alternative hypothesis, the same logic would seem to imply that the null hypothesis should also be one-sided: for example, \\(H_{0}: \\; \\Delta\\le 0\\) and \\(H_{a}:\\; \\Delta&gt;0\\) would form such a logical pair. Often such “one-sided” null hypothesis is also closest to our research questions: for example, it would seem more interesting to try to test the hypothesis that the proportion of Yes-voters is less than or equal to 0.5 than that it is exactly 0.5. It turns out, however, that when the alternative hypothesis is, say, \\(H_{a}: \\Delta&gt;0\\), the test will be the same when the null hypothesis is \\(H_{0}: \\; \\Delta\\le 0\\) as when it is \\(H_{0}: \\Delta= 0\\), and rejecting or not rejecting one of them is equivalent to rejecting or not rejecting the other. We can thus here always take the null hypothesis to be technically of the form ((5.4)), even if we are really interested in a corresponding “one-sided” null hypothesis. It is then only the alternative hypothesis which is explicitly either two-sided or one-sided. 5.5.2 The test statistic The test statistic used to test hypotheses of the form ((5.2)) is the z-test statistic[^ ] \\[\\begin{equation} z=\\frac{\\hat{\\Delta}}{\\hat{\\sigma}_{\\hat{\\Delta}}}=\\frac{\\text{Estimate of the population difference $\\Delta$}}{\\text{Estimated standard error of the estimate of $\\Delta$}}. \\tag{5.8} \\end{equation}\\] The statistic is introduced first in this form in order to draw attention to its generality. Null hypotheses in many ostensibly different situations can be formulated as hypotheses of the form ((5.4)) about population differences of some kind, and each can be tested with the test statistic ((5.8)). For example, all of the test statistics discussed in Chapters 5, 7 and 8 of this course pack will be of this type (but the \\(\\chi^{2}\\) test statistic of Chapter 4 is not). The principles of the use and interpretation of the test that are introduced in this section apply almost unchanged also in these other contexts, and only the exact formulas for calculating \\(\\hat{\\Delta}\\) and \\(\\hat{\\sigma}_{\\hat{\\Delta}}\\) will need to be defined separately for each of them. In some applications considered in Chapter 7 the test statistic is typically called the t-test statistic instead of the \\(z\\)-test statistic, but its basic idea is still the same. In ((5.8)), \\(\\hat{\\Delta}\\) denotes a sample estimate of \\(\\Delta\\). For a test of a single proportion, this is \\[\\begin{equation} \\hat{\\Delta} = \\hat{\\pi}-\\pi_{0}, \\tag{5.9} \\end{equation}\\] i.e. the difference between the sample proportion and \\(\\pi_{0}\\). This is the core of the test statistic. Although the forms of the two statistics seem rather different, ((5.9)) contains the comparison of the observed and expected sample values that was also at the heart of the \\(\\chi^{2}\\) test statistic (see formula at end of Section 4.3.3) in Chapter 4. Here the “observed value” is the sample estimate \\(\\hat{\\pi}\\) of the probability parameter, “expected value” is the value \\(\\pi_{0}\\) claimed for it by the null hypothesis, and \\(\\hat{\\Delta}=\\hat{\\pi}-\\pi_{0}\\) is their difference. (Equivalently, we could also say that the expected value of \\(\\Delta=\\pi-\\pi_{0}\\) under the null hypothesis ((5.4)) is 0, its observed value is \\(\\hat{\\Delta}\\), and \\(\\hat{\\Delta}=\\hat{\\Delta}-0\\) is their difference.) If the null hypothesis was true, we would expect the observed difference \\(\\hat{\\Delta}\\) to be close to 0. If, on the other hand, the true \\(\\pi\\) was different from \\(\\pi_{0}\\), we would expect the same to be true of \\(\\hat{\\pi}\\) and thus \\(\\hat{\\Delta}\\) to be different from 0. In other words, the difference \\(\\hat{\\Delta}=\\hat{\\pi}-\\pi_{0}\\) tends to be small (close to zero) when the null hypothesis is true, and large (far from zero) when it is not true, thus satisfying one of the requirements for a good test statistic that were stated at the beginning of Section 4.3.4. (Whether in this we count as “large” both large positive and large negative values, or just one or the other, depends on the form of the alternative hypothesis, as explained in the next section.) The \\(\\hat{\\sigma}_{\\hat{\\Delta}}\\) in ((5.8)) denotes an estimate of the standard deviation of the sampling distribution of \\(\\hat{\\Delta}\\), which is also known as the estimated standard error of \\(\\hat{\\Delta}\\). For the test statistic ((5.8)), it is evaluated under the null hypothesis. The concept of a standard error of an estimate will be discussed in more detail in Section 6.4. Its role in the test statistic is to provide an interpretable scale for the size of \\(\\hat{\\Delta}\\), so that the sampling distribution discussed in the next section will be of a convenient form. For a test of the hypothesis ((5.2)) about a single proportion, the estimated standard error under the null hypothesis is \\[\\begin{equation} \\hat{\\sigma}_{\\hat{\\Delta}} = \\sqrt{\\frac{\\pi_{0}(1-\\pi_{0})}{n}}, \\tag{5.10} \\end{equation}\\] and the specific formula of the test statistic ((5.8)) is then[^ ] \\[\\begin{equation} z=\\frac{\\hat{\\pi}-\\pi_{0}}{\\sqrt{\\pi_{0}(1-\\pi_{0})/n}}. \\tag{5.11} \\end{equation}\\] This is the one-sample \\(z\\)-test statistic for a population proportion. In Example 5.1 we have \\(\\hat{\\pi}=0.47\\), \\(\\pi_{0}=0.5\\), and \\(n=702\\), so \\[z=\\frac{\\hat{\\pi}-\\pi_{0}}{\\sqrt{\\pi_{0}(1-\\pi_{0})/n}}= \\frac{0.47-0.50}{\\sqrt{0.50\\times(1-0.50)/702}}=-1.59.\\] Similarly, in Example 5.2 we have \\(\\hat{\\pi}=0.0457\\), \\(\\pi_{0}=0.124\\), \\(n=4950\\), and \\[z=\\frac{0.0457-0.124}{\\sqrt{0.124\\times(1-0.124)/4950}} = \\frac{-0.0783}{\\sqrt{0.10862/4950}}=-16.71.\\] Strangely, SPSS does not provide a direct way of calculating this value. However, since the formula ((5.11)) is very simple, we can easily calculate it with a pocket calculator, after first using SPSS to find out \\(\\hat{\\pi}\\). This approach will be used in the computer classes. 5.5.3 The sampling distribution of the test statistic and P-values Like the \\(\\chi^{2}\\) test of Chapter 4, the \\(z\\)-test for a population proportion requires some conditions on the sample size in order for the approximate sampling distribution of the test statistic to be appropriate. These depend also on the value of \\(\\pi\\), which we can estimate by \\(\\hat{\\pi}\\). One rule of thumb is that \\(n\\) should be larger than 10 divided by \\(\\pi\\) or \\(1-\\pi\\), whichever is smaller. When \\(\\pi\\) is not very small or very large, e.g. if it is between 0.3 and 0.7, this essentially amounts to the condition that \\(n\\) should be at least 30. In the voting example 5.1, where \\(\\hat{\\pi}=0.47\\), the sample size of \\(n=702\\) is clearly large enough. In the jury example 5.2, \\(\\hat{\\pi}=0.0457\\) is much closer to zero, but since \\(10/0.0457\\) is a little over 200, a sample of \\(n=4950\\) is again sufficient. When the sample size is large enough, the sampling distribution of \\(z\\) defined by ((5.11)) is approximately the standard normal distribution. The probability curve of this distribution is shown in Figure 5.1. For now we just take it as given, and postpone a general discussion of the normal distribution until Chapter 7. Figure 5.1: Illustration of the calculation of \\(P\\)-values from the standard normal distribution. Here the value of the \\(z\\)-test statistic is \\(z=-1.59\\) (as in the referendum example 5.1). The areas in grey indicate the two-sided \\(P\\)-values, i.e. the probabilities of values at least as far from 0 as the observed value of \\(z\\). The \\(P\\)-value of the test is calculated from this distribution using the general principles introduced in Section 4.3.5. In other words, the \\(P\\)-value is the probability that the test statistic \\(z\\) has a value that is as or more extreme than the value of \\(z\\) in the observed sample. Now, however, the details of this calculation depend also on the alternative hypothesis, so some additional explanation is needed. Consider first the more common case of a two-sided alternative hypothesis ((5.5)), that \\(\\Delta\\ne 0\\). As discussed in the previous section, it is large values of the test statistic which indicate evidence against the null hypothesis, because a large \\(z\\) is obtained when the sample difference \\(\\hat{\\Delta}=\\hat{\\pi}-\\pi_{0}\\) is very different from the zero difference claimed by the null hypothesis. When the alternative is two-sided, “large” is taken to mean any value of \\(z\\) far from zero, i.e. either large positive or large negative values, because both indicate that the sample difference is far from 0. If \\(z\\) is large and positive, \\(\\hat{\\Delta}\\) is much larger than 0. In example 5.1 this would indicate that a much larger proportion than 0.5 of the sample say they intend to vote Yes. If \\(z\\) is large and negative, \\(\\hat{\\Delta}\\) is much smaller than 0, indicating a much smaller sample proportion than 0.5. Both of these cases would count as evidence against \\(H_{0}\\) when the alternative hypothesis is two-sided. The observed value of the \\(z\\)-test statistic in Example 5.1 was actually \\(z=-1.59\\). Evidence would thus be “as strong” against \\(H_{0}\\) as the observed \\(z\\) if we obtained a \\(z\\)-test statistic of \\(-1.59\\) or 1.59, the value exactly as far from 0 as the observed \\(z\\) but above rather than below 0. Similarly, evidence against the null would be even stronger if \\(z\\) was further from zero than 1.59, i.e. larger than 1.59 or smaller than \\(-1.59\\). To obtain the \\(P\\)-value, we thus need to calculate the probability of observing a \\(z\\)-test statistic which is at most \\(-1.59\\) or at least 1.59 when the null hypothesis is true in the population. In general, the \\(P\\)-value for testing the null hypothesis against a two-sided alternative is the probability of obtaining a value at least \\(z\\) or at most \\(-z\\) (when \\(z\\) is positive, vice versa when it is negative), where \\(z\\) here denotes the value of the test statistic in the sample. Such probabilities are calculated from the approximately standard normal sampling distribution of the test statistic under \\(H_{0}\\). This calculation of the \\(P\\)-value is illustrated graphically in Figure 5.1. The curve in the plot is that of the standard normal distribution. Two areas are shown in grey under the curve, one on each tail of the distribution. The one on the left corresponds to values of \\(-1.59\\) and smaller, and the one on the right to values of 1.59 or larger. Each of these areas is about 0.056, and the \\(P\\)-value for a test against a two-sided alternative is their combined area, i.e. \\(P=0.056+0.056=0.112\\). This means that even if the true population proportion of Yes-voters was actually exactly 0.5, there would be a probability of 0.112 of obtaining a test statistic as or more extreme than the \\(z=-1.59\\) that was actually observed in Example 5.1. In example 5.2 the observed test statistic was \\(z=-16.71\\). The two-sided \\(P\\)-value is then the probability of values that are at most \\(-16.71\\) or at least 16.71. These areas are not shown in Figure 5.1 because they would not be visible in it. The horizontal axis of the figures runs from \\(-4\\) to \\(+4\\), so \\(-16.71\\) is clearly far in the tail of the distribution and the corresponding probability is very small; we would report it as \\(P&lt;0.001\\). Consider now the case of a one-sided alternative hypothesis. For example, in the referendum example we might have decided beforehand to focus only on the possiblity that the proportion of people who intend to vote Yes is smaller than 0.5, and hence consider the alternative hypothesis that \\(\\Delta&lt;0\\). Two situations might then arise. First, suppose that the observed value of the sample difference is in the direction indicated by the alternative hypothesis. This is the case in the example, where the sample difference \\(\\hat{\\Delta}=-0.03\\) is indeed smaller than zero, and the test statistic \\(t=-1.59\\) is negative. The possible values of \\(z\\) contributing to the \\(P\\)-value are then those of \\(-1.59\\) or smaller. Values of \\(1.59\\) and larger are now not included, because positive values of the test statistic (corresponding to sample differences greater than 0) would not be regarded as evidence in favour of the claim that \\(\\Delta\\) is smaller than 0. The \\(P\\)-value is thus only the probability corresponding to the area on the left tail of the curve in Figure 5.1, and the corresponding area on the right tail is not included. Since both areas have the same size, the one-sided \\(P\\)-value is half the two-sided value, i.e. 0.056 instead of 0.112. In general, the one-sided \\(P\\)-value for a \\(z\\)-test of a proportion and other similar tests is always obtained by dividing the two-sided value by 2, if the sample evidence is in the direction of the one-sided alternative hypothesis. The second case occurs when the sample difference is not in the direction indicated by a one-sided alternative hypothesis. For example, suppose that the sample proportion of Yes-voters had actually been 0.53, i.e. 0.03 larger than 0.5, so that we had obtained \\(z=+1.59\\) instead. The possible values of the test statistic which contributed to the \\(P\\)-value would then be \\(z=1.59\\) and all smaller values. These are “as strong or stronger evidence against the null hypothesis and in the direction of the alternative hypothesis” as required by the definition at the beginning of Section 4.3.5, since they agree with the alternative hypothesis (negative values of \\(z\\)) or at least disagree with it less than the observed \\(z\\) (positive values from 0 to 1.59). In Figure 5.1, these values would correspond to the area under the whole curve, apart from the region to the right of \\(1.59\\) on the right tail. Since the probability of the latter is 0.056 and the total probability under the curve is 1, the required probability is \\(P=1-0.0.56=0.944\\). However, calculating the \\(P\\)-value so precisely is hardly necessary in this case, as it is clearly going to be closer to 1 than to 0. The conclusion from such a large \\(P\\)-value will always be that the null hypothesis should not be rejected. This is also intuitively obvious, as a sample difference in the opposite direction from the one claimed by the alternative hypothesis is clearly not to be regarded as evidence in favour of that alternative hypothesis. In short, if the sample difference is in a different direction than a one-sided alternative hypothesis, the \\(P\\)-value can be reported simply as \\(P&gt;0.5\\) without further calculations. If a statistical software package is used to carry out the test, it will also report the \\(P\\)-value and no further calculations are needed (except dividing a two-sided \\(P\\)-value by 2, if a one-sided value is needed and only a two-sided one is reported). However, since SPSS does not currently provide a procedure for this test, and for exam purposes, we will briefly outline how an approximate \\(P\\)-value is obtained using critical values from a table. This is done in a very similar way as for the \\(\\chi^{2}\\) test in Section 4.3.5. The first part of Table 5.2 shows a table of critical values for the standard normal distribution. These values are also shown in the Appendix at the end of this course pack, on the last row of a larger table (the other parts of this table will be explained later, in Section 7.3.4). A version of this table is included in all introductory text books on statistics, although its format may be slightly different in different books. Table 5.2: A table of critical values for the standard normal distribution. The upper part of the table shows the critical values in one row, as in standard statistical tables (see the last row of the table in the Appendix). The lower part of the table includes the same numbers rearranged to show critical values for conventional significance levels for one- and two-sided tests. 0.100 0.050 0.025 0.01 0.005 0.001 0.0005 Critical value 1.282 1.645 1.960 2.326 2.576 3.090 3.291 Alternative hypothesis Significance levels 0.10 0.05 0.01 0.001 Two-sided 1.65 1.96 2.58 3.29 One-sided 1.28 1.65 2.33 3.09 The columns of the first part of Table 5.2 are labelled “Right-hand tail probabilities”, with separate columns for some values from 0.100 to 0.0005. This means that the probability that a value from the standard normal distribution is at least as large as the value given in a particular column is the number given at the top of that column. For example, the value in the column labelled “0.025” is 1.960, indicating that the probability of obtaining a value equal to or greater than 1.960 from the standard normal distribution is 0.025. Because the distribution is symmetric, the probability of values of at most \\(-1.960\\) is also 0.025, and the total probability that a value is at least 1.960 units from zero is \\(0.025+0.025=0.05\\). These values can be used to obtain bounds for \\(P\\)-values, expressed in terms of conventional significance levels of 0.10, 0.05, 0.01 and 0.001. The values at which these tail probabilities are obtained are the corresponding critical values for the test statistic. They are shown in the lower part of Table 5.2, slightly rearranged for clarity of presentation and rounded to two decimal places (which is accurate enough for practical purposes). The basic idea of using the critical values is that if the observed (absolute value of) the \\(z\\)-test statistic is larger than a critical value (for the required kind of alternative hypothesis) shown in the lower part of Table 5.2, the \\(P\\)-value is smaller than the significance level corresponding to that critical value. The table shows only positive critical values. If the observed test statistic is actually negative, its negative (\\(-\\)) sign is omitted and the resulting positive value (i.e. the absolute value of the statistic) is compared to the critical values. Note also that the critical value for a given significance level depends on whether the alternative hypothesis is two-sided or one-sided. In the one-sided case, the test statistic is compared to the critical values only if it is actually in the direction of the alternative hypothesis; if not, we can simply report \\(P&gt;0.5\\) as discussed above. The \\(P\\)-value obtained from the table is reported as being smaller than the smallest conventional significance level for which the corresponding critical value is exceeded by the observed test statistic. For instance, in the jury example 5.2 we have \\(z=-16.71\\). Considering a two-sided alternative hypothesis, 16.71 is larger than the critical values 1.65, 1.96, 2.58 and 3.29 for all the standard significance levels, so we can report that \\(P&lt;0.001\\). For Example 5.1, in contrast, \\(z=-1.59\\), the absolute value of which is smaller than even the critical value 1.65 for the 10% significance level. For this example, we would report \\(P&gt;0.1\\). The intuitive idea of the critical values and their connection to the \\(P\\)-values is illustrated for Example 5.1 by Figure 5.1. Here the observed test statistic is \\(t=-1.59\\), so the two-sided \\(P\\)-value is the probability of values at least 1.59 or at most \\(-1.59\\), which correspond to the two gray areas in the tails of the distribution. Also shown in the plot is one of the critical values for two-sided tests, the 1.96 for significance level 0.05. By definition of the critical values, the combined tail probability of values at least 1.96 from 0, i.e. the probability of values at least 1.96 or at most \\(-1.96\\), is 0.05. It is clear from the plot that since 1.59 is smaller than 1.96, these areas are smaller than the tail areas corresponding to 1.59 and \\(-1.59\\), and the combined area of the latter must be more than 0.05, i.e. it must be that \\(P&gt;0.05\\). Similar argument for the 10% critical value of 1.65 shows that \\(P\\) is here also larger than 0.1. 5.5.4 Conclusions from the test The general principles of drawing and stating conclusions from a significance test have already been explained in Section 4.3.6, so they need not be repeated here. Considering two-sided alternative hypotheses, the conclusions in our two examples are as follows: In the referendum example 5.1, \\(P=0.112\\) for the null hypothesis that \\(\\pi=0.5\\) in the population of eligible voters. The null hypothesis is not rejected at conventional levels of significance. There is not enough evidence to conclude that the proportion of voters who definitely intend to vote Yes differs from one half. In the jury example 5.2, \\(P&lt;0.001\\) for the null hypothesis that \\(\\pi=0.124\\). The null hypothesis is thus overwhelmingly rejected at any conventional level of significance. There is very strong evidence that the probability of a black person being selected to the jury pool differs from the proportion of black people in the population of the county. 5.5.5 Summary of the test As a summary, let us again repeat the main steps of the test described in this section in a concise form, using the voting variable of Example 5.1 for illustration: Data: a sample of size \\(n=702\\) of a dichotomous variable \\(Y\\) with values 1 (Yes) and 0 (No or undecided), with the sample proportion of ones \\(\\hat{\\pi}=0.47\\). Assumptions: the observations are a random sample from a population distribution with some population proportion (probability) \\(\\pi\\), and the sample size \\(n\\) is large enough for the test to be valid (for example, \\(n\\ge 30\\) when \\(\\pi_{0}\\) is between about 0.3 and 0.7, as it is here). Hypotheses: null hypothesis \\(H_{0}: \\pi=\\pi_{0}\\) against the alternative hypothesis \\(H_{a}: \\pi\\ne \\pi_{0}\\), where \\(\\pi_{0}=0.5\\). The test statistic: the \\(z\\)-statistic \\[z=\\frac{\\hat{\\pi}-\\pi_{0}}{\\sqrt{\\pi_{0}(1-\\pi_{0})/n}}= \\frac{0.47-0.50}{\\sqrt{0.50\\times(1-0.50)/702}}=-1.59.\\] The sampling distribution of the test statistic when \\(H_{0}\\) is true: a standard normal distribution. The \\(P\\)-value: the probability that a randomly selected value from the the standard normal distribution is at most \\(-1.59\\) or at least 1.59, which is \\(P=0.112\\). If the precise \\(P\\)-value is not available, we can observe that 1.59 is smaller than the two-sided critical value 1.65 for the 10% level of significance. Thus it must be that \\(P&gt;0.1\\). Conclusion: The null hypothesis is not rejected (\\(P=0.112\\)). There is not enough evidence to conclude that the proportion of eligible voters who definitely intend to vote Yes differs from one half. Based on this opinion poll, the referendum remains too close to call. 5.6 Confidence interval for a single proportion 5.6.1 Introduction A significance test assesses whether it is plausible, given the evidence in the observed data, that a population parameter or parameters have a specific set of values claimed by the null hypothesis. For example, in Section 5.5 we asked such a question about the probability parameter of a binary variable in a single population. In many ways a more natural approach would be try to identify all of those values of a parameter which are plausible given the data. This leads to a form of statistical inference known as interval estimation, which aims to present not only a single best guess (i.e. a point estimate) of a population parameter, but also a range of plausible values (an interval estimate) for it. Such an interval is known as a confidence interval. This section introduces the idea of confidence intervals, and shows how to construct them for a population probability. In later sections, the same principles will be used to calculate confidence intervals for other kinds of population parameters. Interval estimation is an often underused part of statistical inference, while significance testing is arguably overused or at least often misused. In most contexts it would be useful to report confidence intervals in addition to, or instead of, results of significance tests. This is not done often enough in research publications in the social sciences. 5.6.2 Calculation of the interval Our aim is again to draw inference on the difference \\(\\Delta=\\pi-\\pi_{0}\\) or, equivalently, the population probability \\(\\pi\\). The point estimate of \\(\\Delta\\) is \\(\\hat{\\Delta}=\\hat{\\pi}-\\pi_{0}\\) where \\(\\hat{\\pi}\\) is the sample proportion corresponding to \\(\\pi\\). Suppose that the conditions on the sample size \\(n\\) that were discussed in Section 5.5.3 are again satisfied. Consider now Figure @ref(fig:f-pval-prob}. One of the results illustrated by it is that if \\(\\pi_{0}\\) is the true value of of the population probability \\(\\pi\\), so that \\(\\Delta=\\pi-\\pi_{0}=0\\), there is a probability of 0.95 that for a randomly drawn sample from the population the \\(z\\)-test statistic \\(z=\\hat{\\Delta}/\\hat{\\sigma}_{\\hat{\\Delta}}\\) is between \\(-1.96\\) and \\(+1.96\\). This also implies that the probability is 0.95 that in such a sample the observed value of \\(\\hat{\\Delta}\\) will be between \\(\\Delta-1.96\\,\\hat{\\sigma}_{\\hat{\\Delta}}\\) and \\(\\Delta+1.96\\,\\hat{\\sigma}_{\\hat{\\Delta}}\\). Furthermore, it is clear from the figure that all of the values within this interval are more likely to occur than any of the values outside the interval (i.e. those in the two tails of the sampling distribution). The interval thus seems like a sensible summary of the “most likely” values that the estimate \\(\\hat{\\Delta}\\) may have in random samples. A confidence interval essentially turns this around, into a statement about the unknown true value of \\(\\Delta\\) in the population, even in cases where \\(\\Delta\\) is not 0. This is done by substituting \\(\\hat{\\Delta}\\) for \\(\\Delta\\) above, to create the interval \\[\\begin{equation} \\text{from }\\hat{\\Delta} -1.96\\times \\hat{\\sigma}_{\\hat{\\Delta}}\\text{ to }\\hat{\\Delta}+1.96\\times \\hat{\\sigma}_{\\hat{\\Delta}}. \\tag{5.12} \\end{equation}\\] This is the 95 % confidence interval for the population difference \\(\\Delta\\). It is usually written more concisely as \\[\\begin{equation} \\hat{\\Delta}\\pm 1.96\\, \\hat{\\sigma}_{\\hat{\\Delta}} \\tag{5.13} \\end{equation}\\] where the “plusminus” symbol \\(\\pm\\) indicates that we calculate the two endpoints of the interval as in ((5.12)), one below and one above \\(\\hat{\\Delta}\\). Expression ((5.13)) is general in the sense that many different quantities can take the role of \\(\\Delta\\) in it. Here we consider for now the case of \\(\\Delta=\\pi-\\pi_{0}\\). The estimated standard error \\(\\hat{\\sigma}_{\\hat{\\Delta}}\\) is analogous to ((5.10)) used for the \\(z\\)-test, but not the same. This is because the confidence interval is not calculated under the null hypothesis \\(H_{0}:\\; \\pi=\\pi_{0}\\), so we cannot use \\(\\pi_{0}\\) for \\(\\pi\\) in the standard error. Instead, \\(\\pi\\) is estimated by the sample proportion \\(\\hat{\\pi}\\), giving the estimated standard error[^ ] \\[\\begin{equation} \\hat{\\sigma}_{\\hat{\\Delta}} = \\sqrt{\\frac{\\hat{\\pi}(1-\\hat{\\pi})}{n}} \\tag{5.14} \\end{equation}\\] and thus the 95% confidence interval \\[(\\hat{\\pi}-\\pi_{0}) \\pm 1.96 \\; \\sqrt{ \\frac{\\hat{\\pi}(1-\\hat{\\pi})}{n}}\\] for \\(\\Delta=\\pi-\\pi_{0}\\). Alternatively, a confidence interval for \\(\\pi\\) itself is given by \\[\\begin{equation} \\hat{\\pi} \\pm 1.96 \\;\\sqrt{\\frac{\\hat{\\pi}(1-\\hat{\\pi})}{n}}. \\tag{5.15} \\end{equation}\\] This is typically the most useful interval for use in practice. For instance, in the referendum example 5.1 this gives a 95% confidence interval of \\[0.470\\pm 1.96\\times \\sqrt{\\frac{0.470\\times(1-0.470)}{702}} =0.470\\pm 0.0369=(0.433, 0.507)\\] for the proportion of definite Yes-voters in the population. Similarly, in Example 5.2 the 95% confidence interval for the probability of a black person being selected for the jury pool is (0.040, 0.052). These intervals are also shown in Table 5.1. 5.6.3 Interpretation of confidence intervals As with the \\(P\\)-value of a significance test, the precise interpretation of a confidence interval refers to probabilities calculated from a sampling distribution, i.e. probabilities evaluated from a hypothetical exercise of repeated sampling: If we obtained many samples from the population and calculated the confidence interval for each such sample using the formula ((5.15)), approximately 95% of these intervals would contain the true value of the population proportion \\(\\pi\\). This is undeniably convoluted, even more so than the precise interpretation of a \\(P\\)-value. In practise a confidence interval would not usually be described in exactly these words. Instead, a research report might, for example, write that (in the referendum example) “the 95 % confidence interval for the proportion of eligible voters in the population who definitely intend to vote Yes is (0.433, 0.507)”, or that “we are 95 % confident that the proportion of eligible voters in the population who definitely intend to vote Yes is between 43.3% and 50.7%”. Such a statement in effect assumes that the readers will be familiar enough with the idea of confidence intervals to understand the claim. It is nevertheless useful to be aware of the more careful interpretation of a confidence interval, if only to avoid misunderstandings. The most common error is to claim that “there is a 95% probability that the proportion in the population is between 0.433 and 0.507”. Although the difference to the interpretation given above may seem small, the latter statement is not really true, or strictly speaking even meaningful, in the statistical framework considered here. In place of the 1.96 in ((5.13)), we may also use other numbers. To allow for this in the notation, we can also write \\[\\begin{equation} \\hat{\\Delta} \\pm z_{\\alpha/2}\\; \\hat{\\sigma}_{\\hat{\\Delta}}. \\tag{5.16} \\end{equation}\\] where the multiplier \\(z_{\\alpha/2}\\) is a number which depends on two things. One of them is the sampling distribution of \\(\\hat{\\Delta}\\), which is here assumed to be the normal distribution (another possibility is discussed in Section 7.3.4). The second is the confidence level which we have chosen for the confidence interval. For example, the probability of 0.95 in the interpretation of a 95% confidence interval discussed above is the confidence level of that interval. Conventionally the 0.95 level is most commonly used, while other standard choices are 0.90 and 0.99, i.e. 90% and 99% confidence intervals. In the symbol \\(z_{\\alpha/2}\\), \\(\\alpha\\) is a number such that \\(1-\\alpha\\) equals the required confidence level. In other words, \\(\\alpha=0.1\\), 0.05, and 0.01 for confidence levels of \\(1-\\alpha=0.90\\), 0.95 and 0.99 respectively. The values that are required for the conventional levels are \\(z_{0.10/2}=z_{0.05}=1.64\\), \\(z_{0.05/2}=z_{0.025}=1.96\\), and \\(z_{0.01/2}=z_{0.005}=2.58\\), which correspond to intervals at the confidence levels of 90%, 95% and 99% respectively. These values are also shown in Table 5.3. Table 5.3: Multipliers \\(z_{\\alpha/2}\\) used to obtain confidence intervals based on the normal distribution, for three standard confidence levels. These values are substituted for \\(z_{\\alpha/2}\\) in formula ((5.16)) to obtain the confidence interval. Confidence levels: 90% 95% 99% Multiplier \\(z_{\\alpha/2}\\) 1.64 1.96 2.58 A confidence interval contains, loosely speaking, those numbers which are considered plausible values for the unknown population difference \\(\\Delta\\) in the light of the evidence in the data. The width of the interval thus reflects our uncertainty about the exact value of \\(\\Delta\\), which in turn is related to the amount of information the data provide about \\(\\Delta\\). If the interval is wide, many values are consistent with the observed data, so there is still a large amount of uncertainty; if the interval is narrow, we have much information about \\(\\Delta\\) and thus little uncertainty. Another way of stating this is that when the confidence interval is narrow, estimates of \\(\\Delta\\) are very precise. The width of the interval ((5.16)) is \\(2\\times z_{\\alpha/2}\\times \\hat{\\sigma}_{\\hat{\\Delta}}\\). This depends on The confidence level: the higher the level, the wider the interval. Thus a 99% confidence interval is always wider than a 95% interval for the same data, and wider still than a 90% interval. This is logically inevitable: if we want to state with high level of confidence that a parameter is within a certain interval, we must allow the interval to contain a wide range of values. It also explains why we do not consider a 100% confidence interval: this would contain all possible values of \\(\\Delta\\) and exclude none, making no use of the data at all. Instead, we aim for a high but not perfect level of confidence, obtaining an interval which contains some but not all possible values, for the price of a small chance of incorrect conclusions. The standard error \\(\\hat{\\sigma}_{\\hat{\\Delta}}\\), which in the case of a single proportion is ((5.14)). This in turn depends on the sample size \\(n\\): the larger this is, the narrower the interval. Increasing the sample size thus results (other things being equal) in reduced uncertainty and higher precision. the true population proportion \\(\\pi\\): the closer this is to 0.5, the wider the interval. Unlike the sample size, this determinant of the estimation uncertainty is not in our control. Opinion polls of the kind illustrated by the referendum example are probably where non-academic audiences are most likely to encounter confidence intervals, although not under that label. Media reports of such polls typically include a margin of error for the results. For example, in the referendum example it might be reported that 47% of the respondents said that they would definitely vote Yes, and that “the study has a margin of error of plus or minus four percentage points”. In most cases the phrase “margin of error” refers to a 95% confidence interval. Unless otherwise mentioned, we can thus take a statement like the one above to mean that the 95% confidence interval for the proportion of interest is approximately \\(47\\pm 4\\) percentage points. For a realistic interpretation of the implications of the results, the width of this interval is at least as important as the point estimate of the proportion. This is often neglected in media reports of opinion polls, where the point estimate tends to be headline news, while the margin of error is typically mentioned only in passing or omitted altogether. 5.6.4 Confidence intervals vs. significance tests There are some obvious similarities between the conclusions from significance tests and confidence intervals. For example, a \\(z\\)-test in the referendum example 5.1 showed that the null hypothesis that the population proportion \\(\\pi\\) was 0.5 was not rejected (\\(P=0.112\\)). Thus 0.5 is a plausible value for \\(\\pi\\) in light of the observed data. The 95% confidence interval for \\(\\pi\\) showed that, at this level of confidence, plausible values for \\(\\pi\\) are those between 0.433 and 0.507. In particular, these include 0.5, so the confidence interval also indicates that a proportion of 0.5 is plausible. This connection between the test and the confidence interval is in fact exact: If the hypothesis \\(H_{0}: \\Delta=0\\) about a population quantity \\(\\Delta\\) is rejected at the 5% level of significance using the \\(z\\)-test against a two-sided alternative hypothesis, the 95 % confidence interval for \\(\\Delta\\) will not contain 0, and vice versa. Similarly, if \\(H_{0}\\) is not rejected, the confidence interval will contain 0, and vice versa. The same is true for other matching pairs of levels of significance and confidence, e.g. for a test with a 1% level of significance and a 99% (i.e. (100-1)%) confidence interval. In short, the significance test and the confidence interval will in these cases always give the same answer about whether or not a parameter value is plausible (consistent with the data) at a given level of significance/confidence. These pairs of a test and an interval are exactly comparable in that they concern the same population parameter, estimate all parameters in the same way, use the same sampling distribution for inference, and use the same level of significance/confidence. Not all tests and confidence intervals have exact pairs in this way. Also, some tests are for hypotheses about more than one parameter at once, so there is no corresponding single confidence interval. Nevertheless, the connection stated above is useful for understanding the ideas of both tests and confidence intervals. These results also illustrate how confidence intervals are inherently more informative than significance tests. For instance, in the jury example 5.2, both the test and the confidence interval agree on the implausibility of the claim that the population probability of being selected to the jury panel is the same as the proportion (0.124) of black people in the population, since the claim that \\(\\pi=0.124\\) is rejected by the test (with \\(P&lt;0.001\\)) and outside the interval \\((0.040; 0.052)\\). Unlike the test, however, the confidence interval summarizes the plausibility of all possible values of \\(\\pi\\) and not just \\(\\pi_{0}=0.124\\). One way to describe this is to consider what would have happened if we had carried out a series of significance tests of null hypotheses of the form \\(H_{0}: \\pi=\\pi_{0}\\) for a range of values of \\(\\pi_{0}\\). The confidence interval contains all those values \\(\\pi_{0}\\) which would not have been rejected by the test, while all the values outside the interval would have been rejected. Here \\(H_{0}: \\pi=\\pi_{0}\\) would thus not have been rejected at the 5% level if \\(\\pi_{0}\\) had been between 0.040 and 0.052, and rejected otherwise. This, of course, is not how significance tests are actually conducted, but it provides a useful additional interpretation of confidence intervals. A confidence interval is particularly useful when the parameter of interest is measured in familiar units, such as the proportions considered so far. We may then try to judge, in substantive terms, how wide the interval is and how far it is from particular values of interest. In the jury example the 95% confidence interval ranges from 4.0% to 5.2%, which suggests that the population probability is estimated fairly precisely by this survey. The interval also reveals that even its upper bound is less than half of the figure of 12.4% which would correspond to proportional representation of black people in the jury pool, a result which suggests quite substantial underrepresentation in the pool. 5.7 Inference for comparing two proportions In Examples 5.3 and 5.4, the aim is to compare the proportion of a dichotomous response variable \\(Y\\) between two groups of a dichotomous explanatory variable \\(X\\): Example 5.3: compare the proportion of polio cases among the unvaccinated (\\(\\pi_{1}\\)) and vaccinated (\\(\\pi_{2}\\)) children. Example 5.4: compare the proportion of optimistic responses to a negative (\\(\\pi_{1}\\)) vs. positive wording of the question (\\(\\pi_{2}\\)). The quantity of interest is then the population difference \\[\\begin{equation} \\Delta=\\pi_{2}-\\pi_{1}. \\tag{5.17} \\end{equation}\\] For a significance test of this, the null hypothesis will again be \\(H_{0}:\\; \\Delta=0\\), which is in this case equivalent to the hypothesis of equal proportions \\[\\begin{equation} H_{0}:\\; \\pi_{1} = \\pi_{2}. \\tag{5.18} \\end{equation}\\] The null hypothesis thus claims that there is no association between the group variable \\(X\\) and the dichotomous response variable \\(Y\\), while the alternative hypothesis (e.g.  the two-sided one \\(H_{a}:\\; \\pi_{1}\\ne \\pi_{2}\\), i.e. \\(H_{a}:\\; \\Delta\\ne 0\\)) implies that there is an association. The obvious estimates of \\(\\pi_{1}\\) and \\(\\pi_{2}\\) are the corresponding sample proportions \\(\\hat{\\pi}_{1}\\) and \\(\\hat{\\pi}_{2}\\), calculated from samples of sizes \\(n_{1}\\) and \\(n_{2}\\) respectively, and the estimate of \\(\\Delta\\) is then \\[\\begin{equation} \\hat{\\Delta}=\\hat{\\pi}_{2} - \\hat{\\pi}_{1}. \\tag{5.19} \\end{equation}\\] This gives \\(\\hat{\\Delta}=0.000284-0.000706=-0.000422\\) in Example 5.3 and \\(\\hat{\\Delta}=0.364-0.279=0.085\\) in Example 5.4. In the samples, the proportion of polio cases is thus lower in the vaccinated group, and the proportion of optimistic answers is higher in response to a positively worded question. Note also that although the inference discussed below focuses on the difference of the proportions, for purely descriptive purposes we might prefer to use some other statistic, such as the ratio of the proportions. For example, the difference of 0.000422 in polio incidence between vaccine and control groups may seem small, because the proportions in both groups are small. A better idea of the the magnitude of the contrast is given by their ratio of \\(0.000706/0.000284=2.49\\) (this is known as the risk ratio). In other words, the rate of polio infection in the unvaccinated group was two and a half times the rate in the vaccinated group. The tests and confidence intervals discussed below are again based on the assumption that the relevant sampling distributions are approximately normal, which is true when the sample sizes \\(n_{1}\\) and \\(n_{2}\\) are large enough. The conditions for this are not very demanding: one rule of thumb states that the methods described in this section are reasonably valid if in both groups the number of observations with \\(Y\\) having the value 1, and of ones with the value 0, are both more than 5. This condition is satisfied in both of the examples considered here. The validity of the test, as well as the amount of information the data provide about \\(\\pi_{1}\\) and \\(\\pi_{2}\\) in general, thus depends not just on the overall sample sizes but on having enough observations of both values of \\(Y\\). The critical quantity is then the number of observations in the rarer category of \\(Y\\). In Example 5.3 this means the numbers of children diagnosed with polio, because the probability of polio was low in the study population. The numbers of eventual polio cases were 142 and 57 in the control and treatment groups respectively, so the rule of thumb stated above was satisfied. With such low probabilities of polio incidence, sufficient numbers of cases were achieved only by making the overall sample sizes \\(n_{1}\\) and \\(n_{2}\\) large enough. That is why the trial had to be very large, involving hundreds of thousands of participants. The standard error of \\(\\hat{\\Delta}\\) is \\[\\begin{equation} \\sigma_{\\hat{\\Delta}} =\\sqrt{\\frac{\\pi_{2}(1-\\pi_{2})}{n_{2}}+\\frac{\\pi_{1}(1-\\pi_{1})}{n_{1}}}. \\tag{5.20} \\end{equation}\\] As in the one-sample case above, the best way to estimate this is different for a significance test than for a confidence interval. For a test, the standard error can be estimated under assumption that the null hypothesis ((5.18)) is true, in which case the population proportion is the same in both groups. A good estimate of this common proportion, denoted below by \\(\\hat{\\pi}\\), is the proportion of observations with value 1 for \\(Y\\) in the total sample of \\(n_{1}+n_{2}\\) observations, pooling observations from both groups together; expressed in terms of the group-specific estimates, this is \\[\\begin{equation} \\hat{\\pi} = \\frac{n_{1}\\hat{\\pi}_{1}+n_{2}\\hat{\\pi}_{2}}{n_{1}+n_{2}}. \\tag{5.21} \\end{equation}\\] Using this for both \\(\\pi_{1}\\) and \\(\\pi_{2}\\) in ((5.20)) gives the estimated standard error \\[\\begin{equation} \\hat{\\sigma}_{\\hat{\\Delta}}=\\sqrt{\\hat{\\pi}(1-\\hat{\\pi}) \\; \\left(\\frac{1}{n_{2}}+\\frac{1}{n_{1}}\\right),} \\tag{5.22} \\end{equation}\\] and using ((5.19)) and ((5.22)) in the general formula ((5.8)) gives the two-sample \\(z\\)-test statistic for proportions \\[\\begin{equation} z=\\frac{\\hat{\\pi}_{2}-\\hat{\\pi}_{1}}{\\sqrt{\\hat{\\pi}(1-\\hat{\\pi})(1/n_{2}+1/n_{1})}} \\tag{5.23} \\end{equation}\\] where \\(\\hat{\\pi}\\) is given by ((5.21)). When the null hypothesis is true, the sampling distribution of this test statistic is approximately standard normal when the sample sizes are large enough. For a confidence interval, the calculation of the estimated standard error cannot assume that ((5.18)) is true. Instead, we use the estimate \\[\\begin{equation} \\hat{\\sigma}_{\\hat{\\Delta}} =\\sqrt{\\frac{\\hat{\\pi}_{2}(1-\\hat{\\pi}_{2})}{n_{2}}\\frac{\\hat{\\pi}_{1}(1-\\hat{\\pi}_{1})}{n_{1}}} \\tag{5.24} \\end{equation}\\] and, substituting this to the general formula ((5.16)), we get \\[\\begin{equation} (\\hat{\\pi}_{2}-\\hat{\\pi}_{1}) \\pm z_{\\alpha/2} \\;\\sqrt{\\frac{\\hat{\\pi}_{2}(1-\\hat{\\pi}_{2})}{n_{2}} +\\frac{\\hat{\\pi}_{1}(1-\\hat{\\pi}_{1})}{n_{1}}} \\tag{5.25} \\end{equation}\\] as the confidence interval for \\(\\Delta=\\pi_{2}-\\pi_{1}\\), with confidence level \\(1-\\alpha\\). For an illustration of the calculations, consider Example 5.4. Denoting the group of respondents answering the negatively worded question by 1 and those with the positive question by 2, the basic quantities are \\(n_{1}=921\\), \\(\\hat{\\pi}_{1}=0.279\\), \\(n_{2}=929\\) and \\(\\hat{\\pi}_{2}=0.364\\). The estimated difference in the proportions of respondents giving an optimistic answer is thus \\[\\hat{\\Delta} = \\hat{\\pi}_{2}-\\hat{\\pi}_{1} = 0.364-0.279 = 0.085.\\] For a significance test, the estimated standard error of \\(\\hat{\\Delta}\\) uses the pooled estimate ((5.21)) of the population proportion, which is given by \\[\\hat{\\pi} = \\frac{921\\times 0.279+929\\times 0.364}{921+929}= \\frac{257+338}{921+929} = 0.322.\\] The standard error from ((5.22)) is then \\[\\hat{\\sigma}_{\\hat{\\Delta}} = \\sqrt{ 0.322\\times(1-0.322) \\times \\left( \\frac{1}{929}+ \\frac{1}{921} \\right) } = \\sqrt{ \\frac{0.2182}{462.5} }=0.0217,\\] and the test statistic ((5.23)) is \\[z=\\frac{0.085}{0.0217}=3.92.\\] For the confidence interval, the standard error of \\(\\hat{\\Delta}\\) is estimated from ((5.24)) as \\[\\begin{aligned} \\hat{\\sigma}_{\\hat{\\Delta}} &amp;=&amp; \\sqrt{ \\frac{0.364\\times (1-0.364)}{929} + \\frac{0.279\\times (1-0.279)}{921} } \\\\ &amp;=&amp; \\sqrt{ \\frac{0.2315}{929} +\\frac{0.2012}{921} }=0.0216\\end{aligned}\\] and a 95% confidence interval from ((5.25)) is \\[0.085 \\pm 1.96 \\times 0.0216 = 0.085\\pm 0.042 = (0.043; 0.127).\\] The \\(P\\)-value for the test statistic is clearly very low (in fact about \\(0.00009\\)), so the null hypothesis of equal proportions is convincingly rejected. There is very strong evidence that the probability that a respondent will give an answer indicating optimism for the future is different for the two differently worded questions. The confidence interval indicates that we are 95% confident that the proportion of optimistic answers is between 4.3 and 12.7 percentage points higher when the question is worded positively than when it is worded negatively. This suggests quite a substantial acquiescence bias arising from changing just one word in the survey question, as described in the introduction to Example 5.4 at the beginning of this chapter. In Example 5.3, the estimated difference is \\(\\hat{\\Delta}=-0.000422\\) (see Table 5.1, i.e. 422 fewer polio cases per million children in the vaccinated group than in the unvaccinated group. Similar calculations as above show that the value of the test statistic is \\(z=-6.01\\), so the \\(P\\)-value is again very small (in fact about 0.000000001) and the null hypothesis of equal probabilities is strongly rejected. There is thus overwhelming evidence that the proportion of polio cases was different among the vaccinated children than among the unvaccinated ones. The 95% confidence interval for the difference shows that we are 95% confident that this difference was a reduction of between 284 and 560 polio cases per million children.23 This was acknowledged as a convincing demonstration that the Salk vaccine worked (see Figure 5.2), and it (and later other types of polio vaccination) was soon put to widespread use. The resulting dramatic decline in the incidence of polio is one of the great success stories of modern medicine. Compared to the 199 children with polio in 1954 among the less than half a million participants of the vaccine trial alone, in 2014 there were 414 confirmed cases of polio in the whole world (see http://www.polioeradication.org/Dataandmonitoring/Poliothisweek.aspx). There is hope that that number will reach 0 in a not-too-distant future, so that the once devastating disease will one day be entirely eradicated. Figure 5.2: Public reaction to statistical inference. This example is based on a newspaper report of a real poll for which the percentages were reported only as 47, 27, and 26 out of “about 700” respondents. The exact numbers used here for illustration have been made up to correspond to these real results.↩ The Pennsylvania Supreme Court Committee on racial and gender bias in the justice system; the example used here is from the survey by J. F. Kairns published as part of the final report of the committee (March 2003).↩ The data used here are from the official evaluation of the trials in Francis, T. et al. (1955). “An evaluation of the 1954 poliomyelitits vaccine trials: summary report”. American Journal of Public Health, 45, 1–50. For some background information about the trials, see Meldrum, M. (1998), “ ‘A calculated risk’: the Salk polio vaccine field trials of 1954”. British Medical Journal, 317, 1233–1236.↩ Javeline, D. (1999). “Response effects in polite cultures: A test of acquiescence in Kazakhstan”. Public Opinion Quarterly, 63, 1–28.↩ In this context the letter does not refer to the mathematical constant \\(\\pi=3.14159\\dots\\), for which the same symbol is also used.↩ Note that incidence was not zero even in the vaccinated group, because the Salk vaccine — like most vaccines — is not 100% effective. Despite this, it is possible for a broad enough vaccination programme to eliminate a disease completely, by depriving it the chance to spread and conferring so-called herd immunity for the whole population. Conversely, if vaccination rates drop too low, herd immunity is removed and the disease may reappear at a higher rate than implied by the reduction in vaccination alone.↩ "],["c-contd.html", "Chapter 6 Continuous variables: Population and sampling distributions 6.1 Introduction 6.2 Population distributions of continuous variables 6.3 Probability distributions of continuous variables 6.4 The normal distribution as a sampling distribution", " Chapter 6 Continuous variables: Population and sampling distributions 6.1 Introduction This chapter serves both as an explanation of some topics that were skimmed over previously, and as preparation for later chapters. Its central theme is probability distributions of continuous variables. These may appear in two distinct roles: As population distributions of continuous variables, for instance blood pressure in the illustrative example of this chapter. This contrasts with the kinds of discrete variables that were considered in Chapters 4 and 5. Methods of inference for continuous variables will be introduced in Chapters 7 and 8. As sampling distributions of sample statistics. These are typically continuous even in analyses of discrete variables, such as in Chapter 5 where the variable of interest \\(Y\\) was binary but the sampling distributions of a sample proportion \\(\\hat{\\pi}\\) and the \\(z\\)-test statistic for population probability \\(\\pi\\) were nevertheless continuous. We have already encountered two continuous distributions in this role, the \\(\\chi^{2}\\) distributions in Chapter 4 and the standard normal distribution in Chapter 5. Their origins are explained in more detail below. To illustrate the concepts, we use data from the Health Survey for England 2002 (HES).24 One part of the survey was a short physical examination by a nurse. Figure 6.1 shows a histogram and frequency polygon of diastolic blood pressure (in mm Hg) for 4489 respondents, measured by the mean of the last two of three measurements taken during the examination. Data from respondents for whom the measurements were not obtained or were considered invalid have been excluded. Respondents aged under 25 have also been excluded for simplicity, because this age group was oversampled in the 2002 HES. Figure 6.1: Histogram of diastolic blood pressure, with the corresponding frequency polygon, from Health Survey for England 2002 (respondents aged 25 or over, \\(n=4489\\)). The respondents whose blood pressures are summarized in Figure 6.1 are in reality a sample from a larger population in the sense of Sections 3.2 and 3.3. However, for illustrative purposes we will here pretend that they are actually an entire finite population of 4489 people (the adults in a small town, say). The values summarised in Figure 6.1 then form the population distribution of blood pressure in this population. It is clear that blood pressure is best treated as a continuous variable. 6.2 Population distributions of continuous variables 6.2.1 Population parameters and their point estimates If we knew all of its values, we could summarise a finite population distribution by, say, a histogram like Figure 6.1. We can also consider specific characteristics of the distribution, i.e. its parameters in the sense introduced in Section 5.3. For the distribution of a continuous variable, the most important parameters are the population mean \\[\\begin{equation} \\mu=\\frac{\\sum Y_{i}}{N} \\tag{6.1} \\end{equation}\\] and the population variance \\[\\begin{equation} \\sigma^{2} = \\frac{\\sum (Y_{i}-\\mu)^{2}}{N} \\tag{6.2} \\end{equation}\\] or, instead of the variance, the population standard deviation \\[\\begin{equation} \\sigma = \\sqrt{\\frac{\\sum (Y_{i}-\\mu)^{2}}{N}}. \\tag{6.3} \\end{equation}\\] Here \\(\\mu\\) and \\(\\sigma\\) are the lower-case Greek letters “mu” and “sigma” respectively, and \\(\\sigma^{2}\\) is read “sigma squared”. It is common to use Greek letters for population parameters, as we did also for the probability parameter \\(\\pi\\) in Chapter 5. In ((6.1))–((6.3)), \\(N\\) is the number of units in a finite population and the sums indicated by \\(\\Sigma\\) are over all of these \\(N\\) units. If we treat the data in Figure 6.1 as a population, \\(N=4489\\) and these population parameters are \\(\\mu=74.2\\), \\(\\sigma^{2}=127.87\\) and \\(\\sigma=11.3\\). Because the formulas ((6.1))–((6.3)) involve the population size \\(N\\), they apply in this exact form only to finite populations like the one in this example (and as discussed more generally in Section 3.2) but not to infinite ones of the kind discussed in Section 3.4. However, the definitions of \\(\\mu\\), \\(\\sigma^{2}\\), \\(\\sigma\\) and other parameters can be extended to apply also to infinite populations. These definitions, which will be omitted here, involve the concept of continuous probability distributions that is discussed in the next section. The interpretations of the population parameters turn out to be intuitively similar for both the finite and infinite-population cases, and the same methods of analysis apply to both, so we can here ignore the distinction without further comment. The population formulas ((6.1))–((6.3)) clearly resemble those of some sample statistics introduced in Chapter 2, specifically the sample mean, variance and standard deviation \\[\\begin{equation} \\bar{Y}=\\frac{\\sum Y_{i}}{n}, \\tag{6.4} \\end{equation}\\] \\[\\begin{equation} s^{2} = \\frac{\\sum (Y_{i}-\\bar{Y})^{2}}{n-1} \\tag{6.5} \\end{equation}\\] and \\[\\begin{equation} s = \\sqrt{\\frac{\\sum (Y_{i}-\\bar{Y})^{2}}{n-1}} \\tag{6.6} \\end{equation}\\] where the sums are now over the \\(n\\) observations in a sample. These can be used as descriptions of the sample distribution as discussed in Chapter 2, but also as point estimates of the corresponding population parameters in the sense defined in Section 5.4. We may thus use the sample mean \\(\\bar{Y}\\) as a point estimate of the population mean \\(\\mu\\), and the sample variance \\(s^{2}\\) and sample standard deviation \\(s\\) as point estimates of population variance \\(\\sigma^{2}\\) and standard deviation \\(\\sigma\\) respectively. These same estimates can be used for both finite and infinite population distributions. For further illustration of the connection between population and sample quantities, we have also drawn a simple random sample of \\(n=50\\) observations from the finite population of \\(N=4489\\) observations in Figure 6.1. Table 6.1 shows the summary statistics ((6.4)–((6.6)) in this sample and the corresponding parameters ((6.1))–((6.3)) in the population. Table 6.1: Summary statistics for diastolic blood pressure in the population and a sample from it in the example used for illustration in Sections 6.2–6.4. Size Mean Standard Deviation Variance Population \\(N=4489\\) \\(\\mu=74.2\\) \\(\\sigma=11.3\\) \\(\\sigma^{2}=127.87\\) Sample \\(n=50\\) \\(\\bar{Y}=72.6\\) \\(s=12.7\\) \\(s^{2}=161.19\\) You may have noticed that the formulas of the sample variance ((6.5)) and sample standard deviation ((6.6)) involve the divisor \\(n-1\\) rather than the \\(n\\) which might seem more natural, while the population formulas ((6.2)) and ((6.3)) do use \\(N\\) rather than \\(N-1\\). The reason for this is that using \\(n-1\\) gives the estimators certain mathematically desirable properties (\\(s^{2}\\) is an unbiased estimate of \\(\\sigma^{2}\\), but \\(\\hat{\\sigma}^{2}\\) below is not). This detail need not concern us here. In fact, the statistics which use \\(n\\) instead, i.e. \\[\\begin{equation} \\hat{\\sigma}^{2}=\\frac{\\sum (Y_{i}-\\bar{Y})^{2}}{n} \\tag{6.7} \\end{equation}\\] for \\(\\sigma^{2}\\) and \\(\\hat{\\sigma}=\\sqrt{\\hat{\\sigma}^{2}}\\) for \\(\\sigma\\), are also sensible estimates and very similar to \\(s^{2}\\) and \\(s\\) unless \\(n\\) is very small. In general, there are often several possible sample statistics which could be used as estimates for the same population parameter. 6.3 Probability distributions of continuous variables 6.3.1 General comments Thinking about population distributions of continuous distributions using, say, histograms as in Figure 6.1 would present difficulties for statistical inference, for at least two reasons. First, samples cannot in practice give us enough information to make reliable inferences on all the details of a population distribution, such as the small kinks and bumps of Figure 6.1. Such details would typically not even be particularly interesting compared to major features like the central tendency and variation of the population distribution. Second, this way of thinking about the population distribution is not appropriate when the population is regarded as infinite. Addressing both of these problems requires one more conceptual leap. This is to make the assumption that the population distribution is well-represented by a continuous probability distribution, and focus on inference on the parameters of that distribution. We have already introduced the concept of probability distributions in Section 3.5, and considered instances of it in Chapters 4 and 5. There, however, the term was not emphasised because it added no crucial insight into the methods of inference. This was because for discrete variables a probability distribution is specified simply by listing the probabilities of all the categories of the variable. The additional terminology of probability distributions and their parameters seems almost redundant in that context. The situation is very different for continuous variables. This is illustrated by Figure 6.2, which shows the same frequency polygon as in Figure 6.1, now supplemented by a smooth curve. This curve (“a probability density function”) describes a particular probability distribution. It can be thought of as a smoothed version of the shape of the frequency polygon. What we will do in the future is to use some such probability distribution to represent the population distribution. This means effectively arguing that we believe that the shape of the true population distribution is sufficiently regular to be well described by a smooth curve such as the one in Figure 6.2. In Figure 6.2 the curve and the frequency polygon have reasonably similar shapes, so the assumption that the former is a good representation of the latter does not seem far-fetched. However, the two are clearly not exactly the same, nor do we expect that even the blood pressures of all English adults exactly match this curve or any other simple probability distribution. All we require is that a population distribution is close enough to a specified probability distribution for the results from analyses based on this assumption to be meaningful and not misleading. Figure 6.2: The frequency polygon of Figure 6.1, together with a normal curve with the same mean and variance. Such a simplifying assumption about the population distribution is known as a statistical model for the population. The reason for working with a model is that it leads to much simpler methods of analysis than would otherwise be required. For example, the shape of the distribution shown in Figure 6.2 is entirely determined by just two parameters, its mean and variance. Under this model, all questions about the population distribution can thus be reduced to questions about these two population parameters, and inference can focus on tests and confidence intervals for them. The potential cost of choosing a specific probability distribution as the statistical model for a particular application is that the assumption may be inappropriate for the data at hand, and if it is, conclusions about population parameters derived from analyses based on this assumption may be misleading. The distribution should thus be chosen carefully, usually based on both substantive considerations and initial descriptive examination of the observed data. For example, the particular probability distribution shown in Figure 6.2, which is known as the normal distribution, is by definition symmetric around its mean. While it is an adequate approximation of many approximately symmetric population distributions of continuous variables, such as that of blood pressure, many other population distributions are not even roughly symmetric. It would be unrealistic to assume the population distributions of such variables to be normal. Instead, we might consider other continuous probability distributions which can be skewed. Examples of these are the Exponential, Gamma, Weibull, and Beta distributions. Discrete distributions, of course, will require quite different probability disributions, such as the Binomial distribution discussed in Chapter 5, or the Multinomial and Poisson distributions. On this course, however, we will not include further discussion of these various possibilities. 6.3.2 The normal distribution as a population distribution The particular probability distribution that is included in Figure 6.2 is a normal distribution, also known as the Gaussian distribution, after the great German mathematician Karl Friedrich Gauss who was one of the first to derive it in 1809. Figure 6.3 shows a portrait of Gauss from the former German 10-DM banknote, together with pictures of the university town of Göttingen and of the normal curve (even the mathematical formula of the curve is engraved on the note). The curve of the normal distribution is also known as the “bell curve” because of its shape. Figure 6.3: A portrait of Gauss and the normal curve on a former German 10-DM banknote. The normal distribution is by far the most important probability distribution in statistics. The main reason for this is its use as a sampling distribution in a wide range of contexts, for reasons that are explained in Section 6.4. However, the normal distribution is also useful for describing many approximately symmetric population distributions, and it is in this context that we introduce its properties first. A normal distribution is completely specified by two numbers, its mean (or “expected value”) \\(\\mu\\) and variance \\(\\sigma^{2}\\). This is sometimes expressed in notation as \\(Y\\sim N(\\mu, \\sigma^{2})\\), which is read as “\\(Y\\) is normally distributed with mean \\(\\mu\\) and variance \\(\\sigma^{2}\\)”. Different values for \\(\\mu\\) and \\(\\sigma^{2}\\) give different distributions. For example, the curve in Figure 6.2 is that of the \\(N(74.2, \\, 127.87)\\) distribution, where the mean \\(\\mu=74.2\\) and variance \\(\\sigma^{2}=127.87\\) are the same as the mean and variance calculated from formulas ((6.1)) and ((6.2)) for the 4489 observations of blood pressure. This ensures that this particular normal curve best matches the frequency polygon in Figure 6.2. The mean \\(\\mu\\) describes the central tendency of the distribution, and the variance \\(\\sigma^{2}\\) its variability. This is illustrated by Figure 6.4, which shows the curves for three different normal distributions. The mean of a normal distribution is also equal to both its median and its mode. Thus \\(\\mu\\) is the central value in the sense that it divides the distribution into two equal halves, and it also indicates the peak of the curve (the highest probability, as discussed below). In Figure 6.4, the curves for \\(N(0, 1)\\) and \\(N(0, 9)\\) are both centered around \\(\\mu=0\\); the mean of the \\(N(5, 1)\\) distribution is \\(\\mu=5\\), so the whole curve is shifted to the right and centered around 5. Figure 6.4: Three normal distributions with different means and/or variances. The variance \\(\\sigma^{2}\\) determines how widely spread the curve is. In Figure 6.4, the curves for \\(N(0, 1)\\) and \\(N(5, 1)\\) have the same variance \\(\\sigma^{2}=1\\), so they have the same shape in terms of their spread. The curve for \\(N(0, 9)\\), on the other hand, is more spread out, because it has a higher variance of \\(\\sigma^{2}=9\\). As before, it is often more convenient to describe variability in terms of the standard deviation \\(\\sigma\\), which is the square root of the variance. Thus we may also say that the \\(N(0, 9)\\) distribution has the standard deviation \\(\\sigma=\\sqrt{9}=3\\) (for \\(\\sigma^{2}=1\\) the two numbers are the same, since \\(\\sqrt{1}=1\\)). In the histogram in Figure 6.1, the heights of the bars correspond to the proportions of different ranges of blood pressure among the 4489 people in the data set. Another way of stating this is that if we were to sample a person from this group at random, the heights of the bars indicate the probabilities that the selected person’s blood pressure would be in a particular range. Some values are clearly more likely than others. For example, for blood pressures in the range 50–51.5, the probability is about 0.0025, corresponding to a low bar, while for the range 74–75.5 it is about 0.0365, corresponding to a much higher bar. The interpretation is the same for the curve of a continuous probability distribution. Its height also indicates the probability of different values in random sampling from a population with that distribution. More precisely, the areas under the curve give such probabilities for ranges of values. Probabilities of all the possible values must add up to one, so the area under the whole curve is one — i.e. a randomly sampled unit must have some value of the variable in question. More generally, the area under the curve for a range of values gives the probability that the value of a randomly sampled observation is in that range. These are the same principles that we have already used to derive \\(P\\)-values for tests in Sections 4.3.5 and 5.5.3. Figure 6.5: Illustration of probabilities for the normal distribution. The probability of an observation being within one standard deviation of the mean (the grey area) is 0.68, and the probability of it being within 1.96 standard deviations of the mean (grey and shaded areas together) is 0.95. Figure 6.5 illustrates this further with some results which hold for any normal distribution, whatever its mean and variance. The grey area in the figure corresponds to values from \\(\\mu-\\sigma\\) to \\(\\mu+\\sigma\\), i.e. those values which are no further than one standard deviation from the mean. The area of the grey region is 0.68, so the probability that a randomly sampled value from a normal distribution is within one standard deviation of the mean is 0.68. The two shaded regions either side of the grey area extend the area to 1.96 standard deviations below and above the mean. The probability of this region (the grey and shaded areas together) is 0.95. Rounding the 1.96 to 2, we can thus say that approximately 95% of observations drawn from a normal distribution tend to be within two standard deviations of the mean. This leaves the remaining 5% in the two tails of the distribution, further than 1.96 standard deviations from the mean (the two white areas in Figure 6.5). Because the normal distribution is symmetric, these two areas are of equal size and each thus has the probability 0.025 (i.e. 0.05/2). Such calculations can also be used to determine probabilities in particular examples. Returning to the blood pressure data, we might for example be interested in the proportion of people in some population whose diastolic blood pressure is higher than 90 (one possible cut-off point for high blood pressure or hypertension) the proportion of people with diastolic blood pressure below 60 (possibly indicating unusually low blood pressure or hypotension) the proportion of people in the normal pressure range of 60–90 Figure 6.6: Illustration of probabilities for a normal distribution in the blood pressure example, where \\(\\mu=74.2\\) and \\(\\sigma=11.3\\). The plot shows probabilities for the ranges of values at most 60 (“Low”), between 60 and 90 (“Mid”) and over 90 (“High”). Such figures might be of interest for example for predicting health service needs for treating hypertension. Suppose that we were reasonably confident (perhaps from surveys like the one described above) that the distribution of diastolic blood pressure in the population of interest was approximately normally distributed with mean 74.2 and variance 127.87 (and thus standard deviation 11.3). The probabilities of interest are then the areas of the regions shown in Figure 6.6. The remaining question is how to calculate such probabilities. The short answer is “with a computer”. However, to explain an approach which is required for this in some computer packages and also to provide an alternative method which does not require a computer, we need to introduce one more new quantity. This is the Z score, which is defined as \\[\\begin{equation} Z = \\frac{Y-\\mu}{\\sigma} \\tag{6.8} \\end{equation}\\] where \\(Y\\) can be any value of the variable of interest. For example, in the blood pressure example the \\(Z\\) scores corresponding to values 60 and 90 are \\(Z=(60-74.2)/11.3=-1.26\\) and \\(Z=(90-74.2)/11.3=1.40\\) respectively. The \\(Z\\) score can be interpreted as the distance of the value \\(Y\\) from the mean \\(\\mu\\), measured in standard deviations \\(\\sigma\\). Thus the blood pressure 60, with a \\(Z\\) score of \\(-1.26\\), is 1.26 standard deviations below (hence the negative sign) the mean, while 90 (with \\(Z\\) score 1.40) is 1.40 standard deviations above the mean. The probability distribution of the \\(Z\\) scores is a normal distribution with mean 0 and variance 1, i.e. \\(Z\\sim N(0,1)\\). This is known as the standard normal distribution. The usefulness of \\(Z\\) scores lies in the fact that by transforming the original variable \\(Y\\) from the \\(N(\\mu, \\sigma^{2}\\)) distribution into the standard normal distribution they remove the specific values of \\(\\mu\\) and \\(\\sigma\\) from the calculation. With this trick, probabilities for any normal distribution can be calculated using a single table for \\(Z\\) scores. Such a table is given in the Appendix, and an extract from it is shown in Table 6.2 (note that it is not always presented exactly like this, as different books may use slightly different format or notation). The first column lists values of the \\(Z\\) score (a full table would typically give all values from 0.00 to about 3.50). The second column, labelled “Tail Prob.”, gives the probability that a \\(Z\\) score for a normal distribution is larger than the value given by \\(z\\), i.e. the area of the region to the right of \\(z\\). Table 6.2: Extract from the table of right-hand tail probabilities for normal \\(Z\\) scores. Here “Tail Prob.” is the probability that a value from the standard normal distribution is at least the value in the column labelled “\\(z\\)”. The full table is shown in the Appendix. \\(z\\) Tail Prob.  … … 1.24 0.1075 1.25 0.1056 1.26 0.1038 1.27 0.1020 … … 1.38 0.0838 1.39 0.0823 1.40 0.0808 1.41 0.0793 … … Consider first the probability that blood pressure is greater than 90, i.e. the area labelled “High” in Figure 6.6. We have seen that 90 corresponds to a \\(Z\\) score of 1.40, so the probability of high blood pressure is the same as the probability that the normal \\(Z\\) score is greater than 1.40. The row for \\(z=1.40\\) in the table tells us that this probability is 0.0808, or 0.08 when rounded to two decimal places as in Figure 6.6. The second quantity of interest was the probability of a blood pressure at most 60, i.e. the area of the “Low” region in Figure 6.6. The corresponding \\(Z\\) score is \\(-1.26\\). The table, however, shows only positive values of \\(z\\). This is because we can use the symmetry of the normal distribution to reduce all such questions to ones about positive values of \\(z\\). Because the distribution is symmetric, the probability that a \\(Z\\) score is at most \\(-1.26\\) (the area of the left-hand tail to the left of \\(-1.26\\)) is the same as the probability that it is at least 1.26 (the area of the right-hand tail to the right of 1.26). This is the kind of quantity we calculated above.25 The required probability is thus equal to the right-hand tail probability for 1.26, which the table shows to be 0.1038 (rounded to 0.10 in Figure 6.6). Finally, the probability of the “Mid” range of blood pressure is the remaining probability not in the two other regions. Because the whole area under the curve (the total probability) is 1, the required probability is obtained by subtraction as \\(1-(0.0808+0.1038)=0.8154\\). In this example these values obtained from the normal approximation of the population distribution are very accurate. The exact proportions of the 4489 respondents who had diastolic blood pressure at most 60 or greater than 90 were 0.0996 and 0.0793 respectively, so rounded to two decimal places they were the same as the 0.10 and 0.08 obtained from the normal approximation. These days we can use statistical computer programs to calculate such probabilities directly for a normal distribution with any mean and standard deviation. For example, SPSS has a function called CDF.NORMAL(quant,mean,stddev) for this purpose. It calculates the probability that the value from a normal distribution with mean mean and standard deviation stddev is at most quant. In practice we do not usually know the population mean and variance, so their sample estimates will be used in such calculations. For example, for the sample in Table 6.1 we had \\(\\bar{Y}=72.6\\) and \\(s=12.7\\). Using these values in a similar calculation as above gives the estimated proportion of people in the population with diastolic blood pressures over 90 as 8.5%. Even with a sample of only 50 observations, the estimate is reasonably close to the true population proportion of about 8.1%. 6.4 The normal distribution as a sampling distribution We have already encountered the normal distribution in Section 5.5.3, in the role of the sampling distribution of a test statistic rather than as a model for the population distribution of a variable. In fact, the most important use of the normal distribution is as a sampling distribution, because in this role it often cannot be replaced by any other simple distributions. The reasons for this claim are explained in this section. We begin with the case of the distribution of the sample mean in samples from a normal population, before extending it with a result which provides the justification for the standard normal sampling distributions used for inference on proportions in Chapter 5, and even for the \\(\\chi^{2}\\) sampling distribution of the \\(\\chi^{2}\\) test in Chapter 4. Recall from Section 4.3.4 that the sampling distribution of a statistic is its distribution across all possible random samples of a given size from a population. The statistic we focus on here is the sample mean \\(\\bar{Y}\\). If we assume that the population distribution is exactly normal, we have the following result: If the population distribution of a variable \\(Y\\) is normal with mean \\(\\mu\\) and variance \\(\\sigma^{2}\\), the sampling distribution of the sample mean \\(\\bar{Y}\\) for a random sample of size \\(n\\) is also a normal distribution, with mean \\(\\mu\\) and variance \\(\\sigma^{2}/n\\). The mean and variance of this sampling distribution are worth discussing separately: The mean of the sampling distribution of \\(\\bar{Y}\\) is equal to the population mean \\(\\mu\\) of \\(Y\\). This means that while \\(\\bar{Y}\\) from a single sample may be below or above the true \\(\\mu\\), in repeated samples it would on average estimate the correct parameter. In statistical language, \\(\\bar{Y}\\) is then an unbiased estimate of \\(\\mu\\). More generally, most possible samples would give values of \\(\\bar{Y}\\) not very far from \\(\\mu\\), where the scale for “far” is provided by the standard deviation discussed below. The variance of the sampling distribution of \\(\\bar{Y}\\) is \\(\\sigma^{2}/n\\) or, equivalently, its standard deviation is \\(\\sigma/\\sqrt{n}\\). This standard deviation is also known as the standard error of the mean, and is often denoted by something like \\(\\sigma_{\\bar{Y}}\\). It describes the variability of the sampling distribution. Its magnitude depends on \\(\\sigma\\), i.e. on the variability of \\(Y\\) in the population. More interestingly, it also depends on the sample size \\(n\\), which appears in the denominator in \\(\\sigma/\\sqrt{n}\\). This means that the standard error of the mean is smaller for large samples than for small ones. This is illustrated in Figure 6.7. It shows the sampling distribution of \\(\\bar{Y}\\) for samples of sizes \\(n=50\\) and \\(n=1000\\) from a normal population with \\(\\mu=74.2\\) and \\(\\sigma=11.3\\), i.e. the population mean and standard deviation in the blood pressure example. It can be seen that while both sampling distributions are centered around the true mean \\(\\mu=74.2\\), the distribution for the smaller sample is more spread out than that for the larger sample: more precisely, the standard error of the mean is \\(\\sigma/\\sqrt{n}=11.3/\\sqrt{50}=1.60\\) when \\(n=50\\) and \\(11.3/\\sqrt{1000}=0.36\\) when \\(n=1000\\). Recalling from Section 6.3.2 that approximately 95% of the probability in a normal distribution is within two standard deviations of the mean, this means that about 95% of samples of size 50 in this case would give a value of \\(\\bar{Y}\\) between \\(\\mu-2*1.60=74.2-3.2=71.0\\) and \\(74.2+3.2=77.4\\). For samples of size \\(n=1000\\), on the other hand, 95% of samples would yield \\(\\bar{Y}\\) in the much narrower range of \\(74.2-2*0.36=73.5\\) to \\(74.2+2*0.36=74.9\\). Figure 6.7: Illustration of the sampling distribution of the sample mean for two sample sizes. In both cases the population distribution is normal with \\(\\mu=74.2\\) and \\(\\sigma=11.3\\). The connection between sample size and the variability of a sampling distribution applies not only to the sample mean but to (almost) all estimates of population parameters. In general, (i) the task of statistical inference is to use information in a sample to draw conclusions about population parameters; (ii) the expected magnitude of the sampling error, i.e. the remaining uncertainty about population parameters resulting from having information only on a sample, is characterised by the variability of the sampling distributions of estimates of the parameters; and (iii) other things being equal, the variability of a sampling distribution decreases when the sample size increases. Thus data really are the currency of statistics and more data are better than less data. In practice data collection of course costs time and money, so we cannot always obtain samples which are as large as we might otherwise want. Apart from resource constraints, the choice of sample size depends also on such things as the aims of the analysis, the level of precision required, and guesses about the variability of variables in the population. Statistical considerations of the trade-offs between them in order to make decisions about sample sizes are known as power calculations. They will be discussed very briefly later, in Section 7.6.2. In Figure 6.8 we use a computer simulation rather than a mathematical theorem to examine the sampling distribution of a sample mean. Here 100,000 simple random samples of size \\(n=50\\) were drawn from the \\(N=4489\\) values of blood pressure that we are treating as the finite population in this illustration. The sample mean \\(\\bar{Y}\\) of blood pressure was calculated for each of these samples, and the histogram of these 100,000 values of \\(\\bar{Y}\\) is shown in Figure 6.8. Also shown is the curve of the normal distribution with the mean \\(\\mu\\) and standard deviation \\(\\sigma/\\sqrt{50}\\) determined by the theoretical result given above. Figure 6.8: Example of the sampling distribution of the sample mean. The plot shows a histogram of the values of the sample mean in 100,000 samples of size \\(n=50\\) drawn from the 4489 values of diastolic blood pressure shown in Figure 6.1, for which the mean is \\(\\mu=74.2\\) and standard deviation is \\(\\sigma=11.3\\). Superimposed on the histogram is the curve of the approximate sampling distribution, which is normal with mean \\(\\mu\\) and standard deviation \\(\\sigma/\\sqrt{n}\\). The match between the curve and the histogram in Figure 6.8 is clearly very close. This is actually a nontrivial finding which illustrates a result which is of crucial importance for statistical inference. Recall that the normal curve shown in Figure 6.8 is derived from the mathematical result stated above, which assumed that the population distribution of \\(Y\\) is exactly normal. The histogram in Figure 6.8, on the other hand, is based on repeated samples from the actual population distribution of blood pressure, which, while quite close to a normal distribution as shown in Figure 6.2, is certainly not exactly normal. Despite this, it is clear that the normal curve describes the histogram essentially exactly. If this was not true, that is if the sampling distribution that applies for the normal distribution was inadequate when the the true population distribution was even slightly different from normal, the result would be of little practical use. No population distribution is ever exactly normal, and many are very far from normality. Fortunately, however, it turns out that quite the opposite is true, and that the sampling distribution of the mean is approximately the same for nearly all population distributions. This is the conclusion from the Central Limit Theorem (CLT), one of the most remarkable results in all of mathematics. Establishing the CLT with increasing levels of generality has been the work of many mathematicians over several centuries, as different versions of it have been proved by, among others, de Moivre, Laplace, Cauchy, Chebyshev, Markov, Liapounov, Lindeberg, Feller, Lévy, Hoeffding, Robbins, and Rebolledo between about 1730 and 1980. One version of the CLT can be stated as The (Lindeberg-Feller) Central Limit Theorem: For each \\(n=1,2,\\dots\\), let \\(Y_{nj}\\), for \\(j=1,2,\\dots,n\\), be independent random variables with \\(\\text{E}(Y_{nj})=0\\) and \\(\\text{var}(Y_{nj})=\\sigma^{2}_{nj}\\). Let \\(Z_{n}=\\sum_{j=1}^{n} Y_{nj}\\), and let \\(B^{2}_{n}=\\text{var}(Z_{n})=\\sum_{j=1}^{n} \\sigma^{2}_{nj}\\). Suppose also that the following condition holds: for every \\(\\epsilon&gt;0\\), \\[\\begin{equation} \\frac{1}{B_{n}^{2}}\\,\\sum_{j=1}^{n} \\, \\text{E}\\{ Y_{nj}^{2} I(|Y_{nj}|\\ge \\epsilon B_{n})\\}\\rightarrow 0 \\; \\text{ as } \\; n\\rightarrow \\infty. \\tag{6.9} \\end{equation}\\] Then \\(Z_{n}/B_{n} \\stackrel{\\mathcal{L}}{\\longrightarrow} N(0,1)\\). No, that will not come up in the examination. The theorem is given here just as a glimpse of how this topic would be introduced in a very different kind of text book,26 and because it pleases the author of this coursepack to note that Jarl Lindeberg was Finnish. For our purposes, it is better to state the same result in English: If \\(Y_{1}, Y_{2}, \\dots, Y_{n}\\) are a random sample of observations from (almost)27 any distribution with a population mean \\(\\mu\\) and variance \\(\\sigma^{2}\\), and if \\(n\\) is reasonably large, the sampling distribution of their sample mean \\(\\bar{Y}\\) is approximately a normal distribution with mean \\(\\mu\\) and variance \\(\\sigma^{2}/n\\). Thus the sampling distribution of the mean from practically any population distribution is approximately the same as when the population distribution is normal, as long as the sample size is “reasonably large”. The larger the sample size is, the closer the sampling distribution is to the normal distribution, and it becomes exactly normal when the sample size is infinitely large (i.e. “asymptotically”). What is large enough depends particularly on the nature of the population distribution. For continuous variables, the CLT approximation is typically adequate even for sample sizes as small as \\(n=30\\), so we can make use of the approximate normal sampling distribution when \\(n\\) is 30 or larger. This is, of course, simply a pragmatic rule of thumb which does not mean that the normal approximation is completely appropriate for \\(n=30\\) but entirely inappropriate for \\(n=29\\); rather, the approximation becomes better and better as the sample size increases, while below about 30 the chance of incorrect conclusions from using it becomes large enough for us not to usually want to take that risk. We have seen in Figure 6.7 that in the blood pressure example the sampling distribution given by the Central Limit Theorem is essentially exact for samples of size \\(n=50\\). In this case this is hardly surprising, as the population distribution itself is already quite close to a normal distribution. The theorem is not, however, limited to such easy cases but works quite generally. To demonstrate this with a more severe test, let us consider a population distribution that is as far as possible from normal. This is the binomial distribution of a binary variable that was introduced in Section 5.3. If the probability parameter of this distribution is \\(\\pi\\), its mean and variance are \\(\\mu=\\pi\\) and \\(\\sigma^{2}=\\pi(1-\\pi)\\), and the sample mean \\(\\bar{Y}\\) of observations from the distribution is the sample proportion \\(\\hat{\\pi}\\) (see the equation at the end of Section 5.4). The CLT then tells us that When \\(n\\) is large enough, the sampling distribution of the sample proportion \\(\\hat{\\pi}\\) of a dichotomous variable \\(Y\\) with population proportion \\(\\pi\\) is approximately a normal distribution with mean \\(\\pi\\) and variance \\(\\pi(1-\\pi)/n\\). This powerful result is illustrated in Figure 6.9. It is similar to Figure 6.8 in that it shows sampling distributions obtained from a computer simulation, together with the normal curve suggested by the CLT. For each plot, 5000 samples of size \\(n\\) were simulated from a population where \\(\\pi\\) was 0.2. The sample proportion \\(\\hat{\\pi}\\) was then calculated for each simulated sample, and the histogram of these 5000 values drawn. Four different sample sizes were used: \\(n=10\\), 30, 100, and 1000. It can be seen that the normal distribution is not a very good approximation of the sampling distribution of \\(\\hat{\\pi}\\) when \\(n\\) is as small as 10 or even 30. For the larger values of 100 and 1000, however, the normal approximation is already quite good, as expected from the CLT. Figure 6.9: Illustration of the Central Limit Theorem for the sample proportion of a dichotomous variable. Each plot shows the histogram of the sample proportions \\(\\hat{\\pi}\\) calculated for 5000 samples simulated from a population distribution with proportion \\(\\pi=0.2\\), together with the normal curve with mean \\(\\pi\\) and variance \\(\\pi(1-\\pi)/n\\). The samples sizes \\(n\\) are 10, 30, 100 and 1000. The variability of the sampling distribution will again depend on \\(n\\). In Figure 6.9, the observed range of values of \\(\\hat{\\pi}\\) decreases substantially as \\(n\\) increases. When \\(n=10\\), values of between about 0 and 0.4 are quite common, whereas with \\(n=1000\\), essentially all of the samples give \\(\\hat{\\pi}\\) between about 0.16 and 0.24, and a large majority are between 0.18 and 0.22. Thus increasing the sample size will again increase the precision with which we can estimate \\(\\pi\\), and decrease the uncertainty in inference about its true value. The Central Limit Theorem is, with some additional results, the justification for the standard normal sampling distribution used for tests and confidence intervals for proportions in Chapter 5. The conditions for sample sizes mentioned there (at the beginning of Section 5.5.3 and 5.7) again derive from conditions for the CLT to be adequate. The same is also ultimately true for the \\(\\chi^{2}\\) distribution and conditions for the \\(\\chi^{2}\\) test in Chapter 4. Results like these, and many others, explain the central importance of the CLT in statistical methodology. Carried out on behalf of The Department of Health by SCPR and the Department of Epidemiology and Public Health, UCL. Data used here were obtained from the UK Data Archive at http://www.data-archive.ac.uk.↩ Note that there we were looking for the probability of a Z score being “bigger than” rather than “at least” a certain value; for a continuous probability distribution this makes no difference, and both probabilities are the same.↩ Ferguson, T. S. (1996). A Course in Large Sample Theory, Chapman &amp; Hall, London.↩ The CLT does not hold in some rather weird cases which need not concern us here. Condition ((6.9)) is a mathematical expression for “not weird”.↩ "],["c-means.html", "Chapter 7 Analysis of population means 7.1 Introduction and examples 7.2 Descriptive statistics for comparisons of groups 7.3 Inference for two means from independent samples 7.4 Tests and confidence intervals for a single mean 7.5 Inference for dependent samples 7.6 Further comments on significance tests", " Chapter 7 Analysis of population means 7.1 Introduction and examples This chapter introduces some basic methods of analysis for continuous, interval-level variables. The main focus is on statistical inference on population means of such variables, but some new methods of descriptive statistics are also described. The discussion draws on the general ideas that have already been explaned for inference in Chapters 4 and 5, and for continuous distributions in Chapter 6. Few if any new concepts thus need to be introduced here. Instead, this chapter can focus on describing the specifics of these very commonly used methods for continuous variables. As in Chapter 5, questions on both a single group and on comparisons between two groups are discussed here. Now, however, the main focus is on the two-group case. There we treat the group as the explanatory variable \\(X\\) and the continuous variable of interest as the response variable \\(Y\\), and assess the possible associations between \\(X\\) and \\(Y\\) by comparing the distributions (and especially the means) of \\(Y\\) in the two groups. The following five examples will be used for illustration throughout this chapter. Summary statistics for them are shown in Table 7.1. Example 7.1: Survey data on diet The National Diet and Nutrition Survey of adults aged 19–64 living in private households in Great Britain was carried out in 2000–01.28 One part of the survey was a food diary where the respondents recorded all food and drink they consumed in a seven-day period. We consider two variables derived from the diary: the consumption of fruit and vegetables in portions (of 400g) per day (with mean in the sample of size \\(n=1724\\) of \\(\\bar{Y}=2.8\\), and standard deviation \\(s=2.15\\)), and the percentage of daily food energy intake obtained from fat and fatty acids (\\(n=1724\\), \\(\\bar{Y}=35.3\\), and \\(s=6.11\\)). One sample \\(n\\) \\(\\bar{Y}\\) \\(s\\) Diff. Example 7.1: Variables from the National Diet and Nutrition Survey   Fruit and vegetable consumption (400g portions) 1724 2.8 2.15   Total energy intake from fat (%) 1724 35.3 6.11 Two independent samples Example 7.2: Average weekly hours spent on housework   Men 635 7.33 5.53   Women 469 8.49 6.14 1.16 Example 7.3: Perceived friendliness of a police officer   No sunglasses 67 8.23 2.39   Sunglasses 66 6.49 2.01 -1.74 Two dependent samples Example 7.4: Father’s personal well-being   Sixth month of wife’s pregnancy 109 30.69   One month after the birth 109 30.77 2.58 0.08 Example 7.5: Traffic flows on successive Fridays   Friday the 6th 10 128,385   Friday the 13th 10 126,550 1176 -1835 :(#tab:t-groupex)Examples of analyses of population means used in Chapter 7. Here \\(n\\) and \\(\\bar{Y}\\) denote the sample size and sample mean respectively, in the two-group examples 7.2–7.5 separately for the two groups. “Diff.” denotes the between-group difference of means, and \\(s\\) is the sample standard deviation of the response variable \\(Y\\) for the whole sample (Example 7.1), of the response variable within each group (Examples 7.2 and 7.3), or of the within-pair differences (Examples 7.4 and 7.5). Example 7.2: Housework by men and women This example uses data from the 12th wave of the British Household Panel Survey (BHPS), collected in 2002. BHPS is an ongoing survey of UK households, measuring a range of socioeconomic variables. One of the questions in 2002 was “About how many hours do you spend on housework in an average week, such as time spent cooking, cleaning and doing the laundry?” The response to this question (recorded in whole hours) will be the response variable \\(Y\\), and the respondent’s sex will be the explanatory variable \\(X\\). We consider only those respondents who were less than 65 years old at the time of the interview and who lived in single-person households (thus the comparisons considered here will not involve questions of the division of domestic work within families).29 We can indicate summary statistics separately for the two groups by using subscripts 1 for men and 2 for women (for example). The sample sizes are \\(n_{1}=635\\) for men and \\(n_{2}=469\\) for women, and the sample means of \\(Y\\) are \\(\\bar{Y}_{1}=7.33\\) and \\(\\bar{Y}_{2}=8.49\\). These and the sample standard deviations \\(s_{1}\\) and \\(s_{2}\\) are also shown in Table 7.1. Example 7.3: Eye contact and perceived friendliness of police officers This example is based on an experiment conducted to examine the effects of some aspects of the appearance and behaviour of police officers on how members of the public perceive their encounters with the police.30 The subjects of the study were 133 people stopped by the Traffic Patrol Division of a detachment of the Royal Canadian Mounted Police. When talking to the driver who had been stopped, the police officer either wore reflective sunglasses which hid his eyes, or wore no glasses at all, thus permitting eye contact with the respondent. These two conditions define the explanatory variable \\(X\\), coded 1 if the officer wore no glasses and 2 if he wore sunglasses. The choice of whether sunglasses were worn was made at random before a driver was stopped. While the police officer went back to his car to write out a report, a researcher asked the respondent some further questions, one of which is used here as the response variable \\(Y\\). It is a measure of the respondent’s perception of the friendliness of the police officer, measured on a 10-point scale where large values indicate high levels of friendliness. The article describing the experiment does not report all the summary statistics needed for our purposes. The statistics shown in Table 7.1 have thus been partially made up for use here. They are, however, consistent with the real results from the study. In particular, the direction and statistical significance of the difference between \\(\\bar{Y}_{2}\\) and \\(\\bar{Y}_{1}\\) are the same as those in the published report. Example 7.4: Transition to parenthood In a study of the stresses and feelings associated with parenthood, 109 couples expecting their first child were interviewed before and after the birth of the baby.31 Here we consider only data for the fathers, and only one of the variables measured in the study. This variable is a measure of personal well-being, obtained from a seven-item attitude scale, where larger values indicate higher levels of well-being. Measurements of it were obtained for each father at three time points: when the mother was six months pregnant, one month after the birth of the baby, and six months after the birth. Here we will use only the first two of the measurements. The response variable \\(Y\\) will thus be the measure of personal well-being, and the explanatory variable \\(X\\) will be the time of measurement (sixth month of the pregnancy or one month after the birth). The means of \\(Y\\) at the two times are shown in Table 7.1. As in Example 7.3, not all of the numbers needed here were given in the original article. Specifically, the standard error of the difference in Table 7.1 has been made up in such a way that the results of a significance test for the mean difference agree with those in the article. Example 7.5: Traffic patterns on Friday the 13th A common superstition regards the 13th day of any month falling on a Friday as a particularly unlucky day. In a study examining the possible effects of this belief on people’s behaviour,32 data were obtained on the numbers of vehicles travelling between junctions 7 and 8 and junctions 9 and 10 on the M25 motorway around London during every Friday the 13th in 1990–92. For comparison, the same numbers were also recorded during the previous Friday (i.e. the 6th) in each case. There are only ten such pairs here, and the full data set is shown in Table 7.2. Here the explanatory variable \\(X\\) indicates whether a day is Friday the 6th (coded as 1) or Friday the 13th (coded as 2), and the response variable is the number of vehicles travelling between two junctions. Date Junctions Friday the 6th Friday the 13th Difference July 1990 7 to 8 139246 138548 -698 July 1990 9 to 10 134012 132908 -1104 September 1991 7 to 8 137055 136018 -1037 September 1991 9 to 10 133732 131843 -1889 December 1991 7 to 8 123552 121641 -1911 December 1991 9 to 10 121139 118723 -2416 March 1992 7 to 8 128293 125532 -2761 March 1992 9 to 10 124631 120249 -4382 November 1992 7 to 8 124609 122770 -1839 November 1992 9 to 10 117584 117263 -321 :(#tab:t-F13)Data for Example 7.5: Traffic flows between junctions of the M25 on each Friday the 6th and Friday the 13th in 1990-92. In each of these cases, we will regard the variable of interest \\(Y\\) as a continuous, interval-level variable. The five examples illustrate three different situations considered in this chapter. Example 7.1 includes two separate \\(Y\\)-variables (consumption of fruit and vegetables, and fat intake), each of which is considered for a single population. Questions of interest are about the mean of the variable in the population. This is analogous to the one-group questions on proportions in Sections 5.5 and 5.6. In this chapter the one-group case is discussed only relatively briefly, in Section 7.4. The main focus here is on the case illustrated by Examples 7.2 and 7.3. These involve samples of a response variable (hours of housework, or preceived friendliness) from two groups (men and women, or police with or without sunglasses). We are then interested in comparing the distributions, and especially the means, of the response variable between the groups. This case will be discussed first. Descriptive statistics for it are described in Section 7.2, and statistical inference in Section 7.3. Finally, examples 7.4 and 7.5 also involve comparisons between two groups, but of a slightly different kind than examples 7.2 and 7.3. The two types of cases differ in the nature of the two samples (groups) being compared. In Examples 7.2 and 7.3, the samples can be considered to be independent. What this claim means will be discussed briefly later; informally, it is justified in these examples because the subjects in the two groups are separate and unrelated individuals. In Examples 7.4 and 7.5, in contrast, the samples (before and after the birth of a child, or two successive Fridays) must be considered dependent, essentially because they concern measurements on the same units at two distinct times. This case is discussed in Section 7.5. In each of the four two-group examples we are primarily interested in questions about possible association between the group variable \\(X\\) and the response variable \\(Y\\). As before, this is the question of whether the conditional distributions of \\(Y\\) are different at the two levels of \\(X\\). There is thus an association between \\(X\\) and \\(Y\\) if Example 7.2: The distribution of hours of housework is different for men than for women. Example 7.3: The distribution of perceptions of a police officer’s friendliness is different when he is wearing mirrored sunglasses than when he is not. Example 7.4: The distribution of measurements of personal well-being is different at the sixth month of the pregnancy than one month after the birth. Example 7.5: The distributions of the numbers of cars on the motorway differ between Friday the 6th and the following Friday the 13th. We denote the two values of \\(X\\), i.e. the two groups, by 1 and 2. The mean of the population distribution of \\(Y\\) given \\(X=1\\) will be denoted \\(\\mu_{1}\\) and the standard deviation \\(\\sigma_{1}\\), and the mean and standard deviation of the population distribution given \\(X=2\\) are denoted \\(\\mu_{2}\\) and \\(\\sigma_{2}\\) similarly. The corresponding sample quantities are the conditional sample means \\(\\bar{Y}_{1}\\) and \\(\\bar{Y}_{2}\\) and sample standard deviations \\(s_{1}\\) and \\(s_{2}\\). For inference, we will focus on the population difference \\(\\Delta=\\mu_{2}-\\mu_{1}\\) which is estimated by the sample difference \\(\\hat{\\Delta}=\\bar{Y}_{2}-\\bar{Y}_{1}\\). Some of the descriptive methods described in Section 7.2, on the other hand, also aim to summarise and compare other aspects of the two conditional sample distributions. 7.2 Descriptive statistics for comparisons of groups 7.2.1 Graphical methods of comparing sample distributions There is an association between the group variable \\(X\\) and the response variable \\(Y\\) if the distributions of \\(Y\\) in the two groups are not the same. To determine the extent and nature of any such association, we need to compare the two distributions. This section describes methods of doing so for observed data, i.e. for examining associations in a sample. We begin with graphical methods which can be used to detect differences in any aspects of the two distributions. We then discuss some non-graphical summaries which compare specific aspects of the sample distributions, especially their means. Although the methods of inference described later in this chapter will be limited to the case where the group variable \\(X\\) is dichotomous, many of the descriptive methods discussed below can just as easily be applied when more than two groups are being compared. This will be mentioned wherever appropriate. For inference in the multiple-group case some of the methods discussed in Chapter 8 are applicable. In Section 2.5.2 we described four graphical methods of summarizing the sample distribution of one continuous variable \\(Y\\): the histogram, the stem and leaf plot, the frequency polygon and the box plot. Each of these can be adapted for comparisons of two or more distributions, although some more conveniently than others. We illustrate the use three of the plots for this purpose, using the comparison of housework hours in Example 7.2 for illustration. Stem and leaf plots will not be shown, because they are less appropriate when the sample sizes are as large as they are in this example. Two sample distributions can be compared by displaying histograms of them side by side, as shown in Figure 7.1. This is not a very common type of graph, and not ideal for visually comparing the two distributions, because the bars to be compared (here for men vs. women) end at opposite ends of the plot. A better alternative is to use frequency polygons. Since these represent a sample distribution by a single line, it is easy to include two of them in the same plot, as shown in Figure 7.2. Finally, Figure 7.3 shows two boxplots of reported housework hours, one for men and one for women. The plots suggest that the distributions are quite similar for men and women. In both groups, the largest proportion of respondents stated that they do between 4 and 7 hours of housework a week. The distributions are clearly positively skewed, since the reported number of hours was much higher than average for a number of people (whereas less than zero hours were of course not recorded for anyone). The proportions of observations in categories including values 5, 10, 15, 20, 25 and 30 tend to be relatively high, suggesting that many respondents chose to report their answers in such round numbers. The box plots show that the median number of hours is higher for women than for men (7 vs. 6 hours), and women’s responses have slightly less variation, as measured by both the IQR and the range of the whiskers. Both distributions have several larger, outlying observations (note that SPSS, which was used to produce Figure 7.3, divides outliers into moderate and “extreme” ones; the latter are observations more than 3 IQR from the end of the box, and are plotted with asterisks). Figure 7.1: Histograms of the sample distributions of reported weekly hours of housework in Example 7.2, separately for men (\\(n=635\\)) and women (\\(n=469\\)). Figure 7.2: Frequency polygons of the sample distributions of reported weekly hours of housework in Example 7.2, separately for men and women. The points show the percentages of observations in the intervals of 0–3, 4–7, \\(\\dots\\), 32–35 hours (plus zero percentages at each end of the curve). Figure 7.3: Box plots of the sample distributions of reported weekly hours of housework in Example 7.2, separately for men and women. Figures 7.1–7.3 also illustrate an important general point about such comparisons. Typically we focus on comparing means of the conditional distributions. Here the difference between the sample means is 1.16, i.e. women in the sample spend, on average, over an hour longer on housework per week than men. The direction of the difference could also be guessed from Figure 7.2, which shows that somewhat smaller proportions of women than of men report small numbers of hours, and larger proportions of women report large numbers. This difference will later be shown to be statistically significant, and it is also arguably relatively large in a substantive sense. However, it is equally important to note that the two distributions summarized by the graphs are nevertheless largely similar. For example, even though the mean is higher for women, there are clearly many women who report spending hardly any time on housework, and many men who spend a lot of time on it. In other words, the two distributions overlap to a large extent. This obvious point is often somewhat neglected in public discussions of differences between groups such as men and women or different ethnic groups. It is not uncommon to see reports of research indicating that (say) men have higher or lower values of something or other then women. Such statements usually refer to differences of averages, and are often clearly important and interesting. Less helpful, however, is the tendency to discuss the differences almost as if the corresponding distributions had no overlap at all, i.e. as if all men were higher or lower in some characteristic than all women. This is obviously hardly ever the case. Box plots and frequency polygons can also be used to compare more than two sample distributions. For example, the experimental conditions in the study behind Example 7.3 actually involved not only whether or not a police officer wore sunglasses, but also whether or not he wore a gun. Distributions of perceived friendliness given all four combinations of these two conditions could easily be summarized by drawing four box plots or frequency polygons in the same plot, one for each experimental condition. 7.2.2 Comparing summary statistics Main features of sample distributions, such as their central tendencies and variations, are described using the summary statistics introduced in Section 2.6. These too can be compared between groups. Table 7.1 shows such statistics for the examples of this chapter. Tables like these are routinely reported for initial description of data, even if more elaborate statistical methods are later used. Sometimes the association between two variables in a sample is summarized in a single measure of association calculated from the data. This is especially convenient when both of the variables are continuous (in which case the most common measure of association is known as the correlation coefficient). In this section we consider as such a summary the difference \\(\\hat{\\Delta}=\\bar{Y}_{2}-\\bar{Y}_{1}\\) of the sample means of \\(Y\\) in the two groups. These differences are also shown in Table 7.1. The difference of means is important because it is also the focus of the most common methods of inference for two-group comparisons. For purely descriptive purposes it may be as or more convenient to report some other statistic. For example, the difference of means of 1.16 hours in Example 7.2 could also be described in relative terms by saying that the women’s average is about 16 per cent higher than the men’s average (because \\(1.16/7.33=0.158\\), i.e. the difference represents 15.8 % of the men’s average). 7.3 Inference for two means from independent samples 7.3.1 Aims of the analysis Formulated as a statistical model in the sense discussed on page in Section 6.3.1, the assumptions of the analyses considered in this section are as follows: We have a sample of \\(n_{1}\\) independent observations of a variable \\(Y\\) in group 1, which have a population distribution with mean \\(\\mu_{1}\\) and standard deviation \\(\\sigma_{1}\\). We have a sample of \\(n_{2}\\) independent observations of \\(Y\\) in group 2, which have a population distribution with mean \\(\\mu_{2}\\) and standard deviation \\(\\sigma_{2}\\). The two samples are independent, in the sense discussed following Example 7.5. For now, we further assume that the population standard deviations \\(\\sigma_{1}\\) and \\(\\sigma_{2}\\) are equal, with a common value denoted by \\(\\sigma\\). This relatively minor assumption will be discussed further in Section 7.3.4. We could have stated the starting points of the analyses in Chapters 4 and 5 also in such formal terms. It is not absolutely necessary to always do so, but we should at least remember that any statistical analysis is based on some such model. In particular, this helps to make it clear what our methods of analysis do and do not assume, so that we may critically examine whether these assumptions appear to be justified for the data at hand. The model stated above does not require that the population distributions of \\(Y\\) should have the form of any particular probability distribution. It is often further assumed that these distributions are normal distributions, but this is not essential. Discussion of this question is postponed until Section 7.3.4. The only new term in this model statement was the “independent” under assumptions 1 and 2. This statistical term can be roughly translated as “unrelated”. The condition can usually be regarded as satisfied when the units of analysis are different entities, as in Examples 7.2 and 7.3 where the units within each group are distinct individual people. In these examples the individuals in the two groups are also distinct, from which it follows that the two samples are independent as required by assumption 3. The same assumption of independent observations is also required by all of the methods described in Chapters 4 and 5, although we did not state this explicitly there. This situation is illustrated by Example 7.2, where \\(Y\\) is the number of hours a person spends doing housework in a week, and the two groups are men (group 1) and women (group 2). The quantity of main interest is here the difference of population means \\[\\begin{equation} \\Delta=\\mu_{2}-\\mu_{1}. \\tag{7.1} \\end{equation}\\] In particular, if \\(\\Delta=0\\), the population means in the two groups are the same. If \\(\\Delta\\ne 0\\), they are not the same, which implies that there is an association between \\(Y\\) and the group in the population. Inference on \\(\\Delta\\) can be carried out using methods which are straightforward modifications of the ones introduced first in Chapter 5. For significance testing, the null hypothesis of interest is \\[\\begin{equation} H_{0}: \\; \\Delta=0, \\tag{7.2} \\end{equation}\\] to be tested against a two-sided (\\(H_{a}:\\; \\Delta\\ne 0\\)) or one-sided (\\(H_{a}:\\; \\Delta&gt; 0\\) or \\(H_{a}:\\; \\Delta&lt; 0\\)) alternative hypothesis. The test statistic used to test ((7.2)) is again of the form \\[\\begin{equation} t=\\frac{\\hat{\\Delta}}{\\hat{\\sigma}_{\\hat{\\Delta}}} \\tag{7.3} \\end{equation}\\] where \\(\\hat{\\Delta}\\) is a sample estimate of \\(\\Delta\\), and \\(\\hat{\\sigma}_{\\hat{\\Delta}}\\) its estimated standard error. Here the statistic is conventionally labelled \\(t\\) rather than \\(z\\) and called the t-test statistic because sometimes the \\(t\\)-distribution rather than the normal is used as its sampling distribution. This possibility is discussed in Section 7.3.4, and we can ignore it until then. Confidence intervals for the differences \\(\\Delta\\) are also of the familiar form \\[\\begin{equation} \\hat{\\Delta} \\pm z_{\\alpha/2}\\, \\hat{\\sigma}_{\\hat{\\Delta}} \\tag{7.4} \\end{equation}\\] where \\(z_{\\alpha/2}\\) is the appropriate multiplier from the standard normal distribution to obtain the required confidence level, e.g. \\(z_{0.025}=1.96\\) for 95% confidence intervals. The multiplier is replaced with a slightly different one if the \\(t\\)-distribution is used as the sampling distribution, as discussed in Section 7.3.4. The details of these formulas in the case of two-sample inference on means are described next, in Section 7.3.2 for the significance test and in Section 7.3.3 for the confidence interval. 7.3.2 Significance testing: The two-sample t-test For tests of the difference of means \\(\\Delta=\\mu_{2}-\\mu_{1}\\) between two population distributions, we consider the null hypothesis of no difference \\[\\begin{equation} H_{0}: \\; \\Delta=0. \\tag{7.5} \\end{equation}\\] In the housework example, this is the hypothesis that average weekly hours of housework in the population are the same for men and women. It is tested against an alternative hypothesis, either the two-sided alternative hypotheses \\[\\begin{equation} H_{a}: \\; \\Delta\\ne 0 \\tag{7.6} \\end{equation}\\] or one of the one-sided alternative hypotheses \\[H_{a}: \\Delta&gt; 0 \\text{ or } H_{a}: \\Delta&lt; 0\\] In the discussion below, we concentrate on the more common two-sided alternative. The test statistic for testing ((7.5)) is of the general form ((7.3)). Here it depends on the data only through the sample means \\(\\bar{Y}_{1}\\) and \\(\\bar{Y}_{2}\\) and sample variances \\(s_{1}^{2}\\) and \\(s_{2}^{2}\\) of \\(Y\\) in the two groups. A point estimate of \\(\\Delta\\) is \\[\\begin{equation} \\hat{\\Delta}=\\bar{Y}_{2}-\\bar{Y}_{1}. \\tag{7.7} \\end{equation}\\] In terms of the population parameters, the standard error of \\(\\hat{\\Delta}\\) is \\[\\begin{equation} \\sigma_{\\hat{\\Delta}}=\\sqrt{\\sigma^{2}_{\\bar{Y}_{2}}+\\sigma^{2}_{\\bar{Y}_{1}}}=\\sqrt{\\frac{\\sigma^{2}_{2}}{n_{2}}+\\frac{\\sigma^{2}_{1}}{n_{1}}}. \\tag{7.8} \\end{equation}\\] When we assume that the population standard deviations \\(\\sigma_{1}\\) and \\(\\sigma_{2}\\) are equal, with a common value \\(\\sigma\\), ((7.8)) simplifies to \\[\\begin{equation} \\sigma_{\\hat{\\Delta}} =\\sigma\\; \\sqrt{\\frac{1}{n_{2}}+\\frac{1}{n_{1}}}. \\tag{7.9} \\end{equation}\\] The formula of the test statistic uses an estimate of this standard error, given by \\[\\begin{equation} \\hat{\\sigma}_{\\hat{\\Delta}} =\\hat{\\sigma} \\; \\sqrt{\\frac{1}{n_{2}}+\\frac{1}{n_{1}}} \\tag{7.10} \\end{equation}\\] where \\(\\hat{\\sigma}\\) is an estimate of \\(\\sigma\\), calculated from \\[\\begin{equation} \\hat{\\sigma}=\\sqrt{\\frac{(n_{2}-1)s^{2}_{2}+(n_{1}-1)s^{2}_{1}}{n_{1}+n_{2}-2}}. \\tag{7.11} \\end{equation}\\] Substituting ((7.7)) and ((7.10)) into the general formula ((7.3)) gives the two-sample t-test statistic for means \\[\\begin{equation} t=\\frac{\\bar{Y}_{2}-\\bar{Y}_{1}} {\\hat{\\sigma}\\, \\sqrt{1/n_{2}+1/n_{1}}} \\tag{7.12} \\end{equation}\\] where \\(\\hat{\\sigma}\\) is given by ((7.11)). For an illustration of the calculations, consider again the housework Example 7.2. Here, denoting men by 1 and women by 2, \\(n_{1}=635\\), \\(n_{2}=469\\), \\(\\bar{Y}_{1}=7.33\\), \\(\\bar{Y}_{2}=8.49\\), \\(s_{1}=5.53\\) and \\(s_{2}=6.14\\). The estimated mean difference is thus \\[\\hat{\\Delta}=\\bar{Y}_{2}-\\bar{Y}_{1}=8.49-7.33=1.16.\\] The common value of the population standard deviation \\(\\sigma\\) is estimated from ((7.11)) as \\[\\begin{aligned} \\hat{\\sigma}&amp;=&amp; \\sqrt{\\frac{(n_{2}-1)s^{2}_{2}+(n_{1}-1)s^{2}_{1}}{n_{1}+n_{2}-2}} = \\sqrt{\\frac{(469-1) 6.14^{2}+(635-1) 5.53^{2}}{635+469-2}}\\\\ &amp;=&amp; \\sqrt{33.604}=5.797\\end{aligned}\\] and the estimated standard error of \\(\\hat{\\Delta}\\) is given by ((7.10)) as \\[\\hat{\\sigma}_{\\hat{\\Delta}} = \\hat{\\sigma} \\; \\sqrt{\\frac{1}{n_{2}}+\\frac{1}{n_{1}}} =5.797 \\; \\sqrt{\\frac{1}{469}+\\frac{1}{635}}=0.353.\\] The value of the t-test statistic ((7.12)) is then obtained as \\[t=\\frac{1.16}{0.353}=3.29.\\] These values and other quantities explained later, as well as similar results for Example 7.3, are also shown in Table 7.3. \\(\\hat{\\Delta}\\) \\(\\hat{\\sigma}_{\\hat{\\Delta}}\\) \\(t\\) \\(P\\)-value 95 % C.I. Example 7.2: Average weekly hours spent on housework 1.16 0.353 3.29 0.001 (0.47; 1.85) Example 7.3: Perceived friendliness of a police officer \\(-1.74\\) 0.383 \\(-4.55\\) \\(&lt;0.001\\) \\((-2.49; -0.99)\\) :(#tab:t-2testsY1)Results of tests and confidence intervals for comparing means for two independent samples. For Example 7.2, the difference of means is between women and men, and for Example 7.3, it is between wearing and not wearing sunglasses. The test statistics and confidence intervals are obtained under the assumption of equal population standard deviations, and the \\(P\\)-values are for a test with a two-sided alternative hypothesis. See the text for the definitions of the statistics. If necessary, calculations like these can be carried out even with a pocket calculator. It is, however, much more convenient to leave them to statistical software. Figure 7.4 shows SPSS output for the two-sample t-test for the housework data. The first part of the table, labelled “Group Statistics”, shows the sample sizes \\(n\\), means \\(\\bar{Y}\\) and standard deviations \\(s\\) separately for the two groups. The quantity labelled “Std. Error Mean” is \\(s/\\sqrt{n}\\). This is an estimate of the standard error of the sample mean, which is the quantity \\(\\sigma/\\sqrt{n}\\) discussed in Section 6.4. The second part of the table in Figure 7.4, labelled “Independent Samples Test”, gives results for the t-test itself. The test considered here, which assumes a common population standard deviation \\(\\sigma\\) (and thus also variance \\(\\sigma^{2}\\)), is found on the row labelled “Equal variances assumed”. The test statistic is shown in the column labelled “\\(t\\)”, and the difference \\(\\hat{\\Delta}=\\bar{Y}_{2}-\\bar{Y}_{1}\\) and its standard error \\(\\hat{\\sigma}_{\\hat{\\Delta}}\\) are shown in the “Mean Difference” and “Std. Error Difference” columns respectively. Note that the difference (\\(-1.16\\)) has been calculated in SPSS between men and women rather than vice versa as in Table 7.3, but this will make no difference to the conclusions from the test. Figure 7.4: SPSS output for a two-sample \\(t\\)-test in Example 7.2, comparing average weekly hours spent on housework between men and women. In the two-sample situation with assumptions 1–4 at the beginning of Section 7.3.1, the sampling distribution of the t-test statistic ((7.12)) is approximately a standard normal distribution when the null hypothesis \\(H_{0}: \\; \\Delta=\\mu_{2}-\\mu_{1}=0\\) is true in the population and the sample sizes are large enough. This is again a consequence of the Central Limit Theorem. The requirement for “large enough” sample sizes is fairly easy to satisfy. A good rule of thumb is that the sample sizes \\(n_{1}\\) and \\(n_{2}\\) in the two groups should both be at least 20 for the sampling distribution of the test statistic to be well enough approximated by the standard normal distribution. In the housework example we have data on 635 men and 469 women, so the sample sizes are clearly large enough. A variant of the test which relaxes the condition on the sample sizes is discussed in Section 7.3.4 below. The \\(P\\)-value of the test is calculated from this sampling distribution in exactly the same way as for the tests of proportions in Section 5.5.3. In the housework example the value of the \\(t\\)-test statistic is \\(t=3.29\\). The \\(P\\)-value for testing the null hypothesis against the two-sided alternative ((7.6)) is then the probability, calculated from the standard normal distribution, of values that are at least 3.29 or at most \\(-3.29\\). Each of these two probabilities is about 0.0005, so the \\(P\\)-value is \\(0.0005+0.0005=0.001\\). In the SPSS output of Figure 7.4 it is given in the column labelled “Sig. (2-tailed)”, where “Sig.” is short for “significance” and “2-tailed” is a synonym for “2-sided”. The \\(P\\)-value can also be calculated approximately using the table of the standard normal distribution (see Table 5.2, as explained in Section 5.5.3. Here the test statistic \\(t=3.29\\), which is larger than the critical values 1.65, 1.96 and 2.58 for the 0.10, 0.05 and 0.01 significance levels for a two-sided test, so we can report that \\(P&lt;0.01\\). Here \\(t\\) is by chance actually equal (to two decimal places) to the critical value for the 0.001 significance level, so we could also report \\(P=0.001\\). These findings agree, as they should, with the exact \\(P\\)-value of 0.001 shown in the SPSS output. In conclusion, the two-sample \\(t\\)-test in Example 7.2 indicates that there is very strong evidence (with \\(P=0.001\\) for the two-sided test) against the claim that the hours of weekly housework are on average the same for men and women in the population. Here we showed raw SPSS output in Figure 7.4 because we wanted to explain its contents and format. Note, however, that such unedited computer output is rarely if ever appropriate in research reports. Instead, results of statistical analyses should be given in text or tables formatted in appropriate ways for presentation. See Table 7.3 and various other examples in this coursepack and textbooks on statistics. To summarise the elements of the test again, we repeat them briefly, now for Example 7.3, the experiment on the effect of eye contact on the perceived friendliness of police officers (c.f. Table 7.1 for the summary statistics): Data: samples from two groups, one with the experimental condition where the officer wore no sunglasses, with sample size \\(n_{1}=67\\), mean \\(\\bar{Y}_{1}=8.23\\) and standard deviation \\(s_{1}=2.39\\), and the second with the experimental condition where the officer did wear sunglasses, with \\(n_{2}=66\\), \\(\\bar{Y}_{2}=6.49\\) and \\(s_{2}=2.01\\). Assumptions: the observations are random samples of statistically independent observations from two populations, one with mean \\(\\mu_{1}\\) and standard deviation \\(\\sigma_{1}\\), and the other with with mean \\(\\mu_{2}\\) and the same standard deviation \\(\\sigma_{2}\\), where the standard deviations are equal, with value \\(\\sigma=\\sigma_{1}=\\sigma_{2}\\). The sample sizes \\(n_{1}\\) and \\(n_{2}\\) are sufficiently large, say both at least 20, for the sampling distribution of the test statistic under the null hypothesis to be approximately standard normal. Hypotheses: These are about the difference of the population means \\(\\Delta=\\mu_{2}-\\mu_{1}\\), with null hypothesis \\(H_{0}: \\Delta=0\\). The two-sided alternative hypothesis \\(H_{a}: \\Delta\\ne 0\\) is considered in this example. The test statistic: the two-sample \\(t\\)-statistic \\[t=\\frac{\\hat{\\Delta}}{\\hat{\\sigma}_{\\hat{\\Delta}}}= \\frac{-1.74}{0.383}=-4.55\\] where \\[\\hat{\\Delta}=\\bar{Y}_{2}-\\bar{Y}_{1}=6.49-8.23=-1.74\\] and \\[\\hat{\\sigma}_{\\hat{\\Delta}}= \\hat{\\sigma} \\; \\sqrt{\\frac{1}{n_{2}}+\\frac{1}{n_{1}}} =2.210 \\times \\sqrt{ \\frac{1}{66}+\\frac{1}{67}}=0.383\\] with \\[\\hat{\\sigma}= \\sqrt{\\frac{(n_{2}-1)s^{2}_{2}+(n_{1}-1)s^{2}_{1}}{n_{1}+n_{2}-2}} = \\sqrt{\\frac{65\\times 2.01^{2}+66\\times 2.39^{2}}{131}} =2.210\\] The sampling distribution of the test statistic when \\(H_{0}\\) is true: approximately the standard normal distribution. The \\(P\\)-value: the probability that a randomly selected value from the standard normal distribution is at most \\(-4.55\\) or at least 4.55, which is about 0.000005 (reported as \\(P&lt;0.001\\)). Conclusion: A two-sample \\(t\\)-test indicates very strong evidence that the average perceived level of the friendliness of a police officer is different when the officer is wearing reflective sunglasses than when the officer is not wearing such glasses (\\(P&lt;0.001\\)). 7.3.3 Confidence intervals for a difference of two means A confidence interval for the mean difference \\(\\Delta=\\mu_{1}-\\mu_{2}\\) is obtained by substituting appropriate expressions into the general formula ((7.4)). Specifically, here \\(\\hat{\\Delta}=\\bar{Y}_{2}-\\bar{Y}_{1}\\) and a 95% confidence interval for \\(\\Delta\\) is \\[\\begin{equation} (\\bar{Y}_{2}-\\bar{Y}_{1}) \\pm 1.96\\; \\hat{\\sigma} \\;\\sqrt{\\frac{1}{n_{2}}+\\frac{1}{n_{1}}} \\tag{7.13} \\end{equation}\\] where \\(\\hat{\\sigma}\\) is obtained from equation (7.11). The validity of this again requires that the sample sizes \\(n_{1}\\) and \\(n_{2}\\) from both groups are reasonably large, say both at least 20. For the housework Example 7.2, the 95% confidence interval is \\[1.16\\pm 1.96\\times 0.353 = 1.16 \\pm 0.69 = (0.47; 1.85)\\] using the values of \\(\\bar{Y}_{2}-\\bar{Y}_{1}\\) and its standard error calculated earlier. This interval is also shown in Table 7.3 and in the SPSS output in Figure 7.4 . In the latter, the interval is given as (-1.85; -0.47) because it is expressed for the difference defined in the opposite direction (men \\(-\\) women instead of vice versa). For Example 7.3, the 95% confidence interval is \\(-1.74\\pm 1.96\\times 0.383=(-2.49; -0.99)\\). Based on the data in Example 7.2 we are thus 95 % confident that the difference between women’s and men’s average hours of reported weekly housework in the population is between 0.47 and 1.85 hours. In substantive terms this interval, from just under half an hour to nearly two hours, is arguably fairly wide in that its two end points might well be regarded as substantially different from each other. The difference between women’s and men’s average housework hours is thus estimated fairly imprecisely from this survey. 7.3.4 Variants of the test and confidence interval Allowing unequal population variances The two-sample \\(t\\)-test and confidence interval for the difference of means were stated above under the assumption that the standard deviations \\(\\sigma_{1}\\) and \\(\\sigma_{2}\\) of the variable of interest \\(Y\\) are the same in both of the two groups being compared. This assumption is not in fact essential. If it is omitted, we obtain formulas which differ from the ones discussed above only in one part of the calculations. Suppose that we do allow the unknown values of \\(\\sigma_{1}\\) and \\(\\sigma_{2}\\) to be different from each other. In other words, we consider the model stated at the beginning of Section 7.3.1, without assumption 4 that \\(\\sigma_{1}=\\sigma_{2}\\). The test statistic is then still of the same form as before, i.e. \\(t=\\hat{\\Delta}/\\hat{\\sigma}_{\\hat{\\Delta}}\\), with \\(\\hat{\\Delta}=\\bar{Y}_{2}-\\bar{Y}_{1}\\). The only change in the calculations is that the estimate of the standard error of \\(\\hat{\\Delta}\\), the formula of which is given by equation ((7.8)), now uses separate estimates of \\(\\sigma_{1}\\) and \\(\\sigma_{2}\\). The obvious choices for these are the corresponding sample standard deviations, \\(s_{1}\\) for \\(\\sigma_{1}\\) and \\(s_{2}\\) for \\(\\sigma_{2}\\). This gives the estimated standard error as \\[\\begin{equation} \\hat{\\sigma}_{\\hat{\\Delta}}=\\sqrt{\\frac{s_{2}^{2}}{n_{2}}+\\frac{s_{1}^{2}}{n_{1}}}. \\tag{7.14} \\end{equation}\\] Substituting this to the formula of the test statistic yields the two-sample \\(t\\)-test statistic without the assumption of equal population standard deviations, \\[\\begin{equation} t=\\frac{\\bar{Y}_{2}-\\bar{Y}_{1}}{\\sqrt{s^{2}_{2}/n_{2}+s^{2}_{1}/n_{1}}}. \\tag{7.15} \\end{equation}\\] The sampling distribution of this under the null hypothesis is again approximately a standard normal distribution when the sample sizes \\(n_{1}\\) and \\(n_{2}\\) are both at least 20. The \\(P\\)-value for the test is obtained in exactly the same way as before, and the principles of interpreting the result of the test are also unchanged. For the confidence interval, the only change from Section 7.3.3 is again that the estimated standard error is changed, so for a 95% confidence interval we use \\[\\begin{equation} (\\bar{Y}_{2}-\\bar{Y}_{1}) \\pm 1.96 \\;\\sqrt{\\frac{s^{2}_{2}}{n_{2}}+\\frac{s^{2}_{1}}{n_{1}}}. \\tag{7.16} \\end{equation}\\] In the housework example 7.2, the estimated standard error ((7.14)) is \\[\\hat{\\sigma}_{\\hat{\\Delta}}= \\sqrt{ \\frac{6.14^{2}}{469}+ \\frac{5.53^{2}}{635} }= \\sqrt{0.1285}=0.359,\\] the value of the test statistic is \\[t=\\frac{1.16}{0.359}=3.23,\\] and the two-sided \\(P\\)-value is now \\(P=0.001\\). Recall that when the population standard deviations were assumed to be equal, we obtained \\(\\hat{\\sigma}_{\\hat{\\Delta}}=0.353\\), \\(t=3.29\\) and again \\(P=0.001\\). The two sets of results are thus very similar, and the conclusions from the test are the same in both cases. The differences between the two variants of the test are even smaller in Example 7.3, where the estimated standard error \\(\\hat{\\sigma}_{\\hat{\\Delta}}=0.383\\) is the same (to three decimal places) in both cases, and the results are thus identical.33 In both examples the confidence intervals obtained from ((7.13)) and ((7.16)) are also very similar. Both variants of the two-sample analyses are shown in SPSS output (c.f. Figure 7.4), the ones assuming equal population standard deviations on the row labelled “Equal variances assumed” and the one without this assumption on the “Equal variances not assumed” row.34 Which methods should we then use, the ones with or without the assumption of equal population variances? In practice the choice rarely makes much difference, and the \\(P\\)-values and conclusions from the two versions of the test are typically very similar.35 Not assuming the variances to be equal has the advantage of making fewer restrictive assumptions about the population. For this reason it should be used in the rare cases where the \\(P\\)-values obtained under the different assumptions are substantially different. This version of the test statistic is also slightly easier to calculate by hand, since ((7.14)) is a slightly simpler formula than ((7.10))–((7.11)). On the other hand, the test statistic which does assume equal standard deviations has the advantage that it is more closely related to analogous tests used in more general contexts (especially the method of linear regression modelling, discussed in Chapter 8). It is also preferable when the sample sizes are very small, as discussed below. Using the \\(t\\) distribution As discussed in Section 6.3, it is often assumed that the population distributions of the variables under consideration are described by particular probability distributions. In this chapter, however, such assumptions have so far been avoided. This is a consequence of the Central Limit Theorem, which ensures that as long as the sample sizes are large enough, the sampling distribution of the two-sample \\(t\\)-test statistic is approximately the standard normal distribution, irrespective of the forms of the population distributions of \\(Y\\) in the two groups. In this section we briefly describe variants of the test and confidence interval which do assume that the population distributions are of a particular form, specifically that they are normal distributions. This changes the sampling distribution that is used for the test statistic and for the multiplier of the confidence interval, but the analyses are otherwise unchanged. For the significance test, there are again two variants depending on the assumptions about the the population standard deviations \\(\\sigma_{1}\\) and \\(\\sigma_{2}\\). Consider first the case where these are assumed to be equal. The sampling distribution is then given by the following result, which now holds for any sample sizes \\(n_{1}\\) and \\(n_{2}\\): In the two-sample situation specified by assumptions 1–4 at the beginning of Section 7.3.1 (including the assumption of equal population standard deviations, \\(\\sigma_{1}=\\sigma_{2}=\\sigma\\)), and if also the distribution of \\(Y\\) is a normal distribution in both groups, the sampling distribution of the t-test statistic ((7.12)) is a \\(t\\) distribution with \\(n_{1}+n_{2}-2\\) degrees of freedom when the null hypothesis \\(H_{0}: \\; \\Delta=\\mu_{2}-\\mu_{1}=0\\) is true in the population. The \\(\\mathbf{t}\\) distributions mentioned in this result are a family of distributions with different degrees of freedom, in a similar way as the \\(\\chi^{2}\\) distributions discussed in Section 4.3.4. All \\(t\\) distributions are symmetric around 0. Their shape is quite similar to that of the standard normal distribution, except that the variance of a \\(t\\) distribution is somewhat larger and its tails thus heavier. The difference is noticeable only when the degrees of freedom are small, as seen in Figure 7.5. This shows the curves for the \\(t\\) distributions with 6 and 30 degrees of freedom, compared to the standard normal distribution. It can be seen that the \\(t_{30}\\) distribution is already very similar to the \\(N(0,1)\\) distribution. With degrees of freedom larger than about 30, the difference becomes almost indistinguishable. Figure 7.5: Curves of two \\(t\\) distributions with small degrees of freedom, compared to the standard normal distribution. If we use this result for the test, the \\(P\\)-value is obtained from the \\(t\\) distribution with \\(n_{1}+n_{2}-2\\) degrees of freedom (often denoted \\(t_{n1+n2-2}\\)). The principles of doing this are exactly the same as those described in Section 5.5.3, and can be graphically illustrated by plots similar to those in Figure 5.1. Precise \\(P\\)-values are again obtained using a computer. In fact, \\(P\\)-values in SPSS output for the two-sample \\(t\\)-test (c.f. Figure 7.4) are actually those obtained from the \\(t\\) distribution (with the degrees of freedom shown in the column labelled “df”) rather than the standard normal distribution. Differences between the two are, however, very small if the sample sizes are even moderately large, because then the degrees of freedom \\(df=n_{1}+n_{2}-2\\) are large enough for the two distributions to be virtually identical. This is the case, for instance, in both of the examples considered so far in this chapter, where \\(df=1102\\) in Example 7.2 and \\(df=131\\) in Example 7.3. If precise \\(P\\)-values from the \\(t\\) distribution are not available, upper bounds for them can again be obtained using appropriate tables, in the same way as in Section 5.5.3. Now, however, the critical values depend also on the degrees of freedom. Because of this, introductory text books on statistics typically include a table of critical values for \\(t\\) distributions for a selection of degrees of freedom. A table of this kind is shown in the Appendix at the end of this course pack. Each row of the table corresponds to a \\(t\\) distribution with the degrees of freedom given in the column labelled “df”. As here, such tables typically include all degrees of freedom between 1 and 30, plus a selection of larger values, here 40, 60 and 120. The last row is labelled “\\(\\infty\\)”, the mathematical symbol for infinity. This corresponds to the standard normal distribution, as a \\(t\\) distribution with infinite degrees of freedom is equal to the standard normal. The practical implication of this is that the standard normal distribution is a good enough approximation for any \\(t\\) distribution with reasonably large degrees of freedom. The table thus lists individual degrees of freedom only up to some point, and the last row will be used for any values larger than this. For degrees of freedom between two values shown in the table (e.g. 50 when only 40 and 60 are given), it is best to use the values for the nearest available degrees of freedom below the required ones (e.g. use 40 for 50). This will give a “conservative” approximate \\(P\\)-value which may be slightly larger than the exact value. As for the standard normal distribution, the table is used to identify critical values for different significance levels (c.f. the information in Table 5.2). For example, if the degrees of freedom are 20, the critical value for two-sided tests at the significance level 0.05 in the “0.025” column on the row labelled “20”. This is 2.086. In general, critical values for \\(t\\) distributions are somewhat larger than corresponding values for the standard normal distribution, but the difference between the two is quite small when the degrees of freedom are reasonably large. The \\(t\\)-test and the \\(t\\) distribution are among the oldest tools of statistical inference. They were introduced in 1908 by W. S. Gosset,36 initially for the one-sample case discussed in Section 7.4. Gosset was working as a chemist at the Guinness brewery at St. James’ Gate, Dublin. He published his findings under the pseudonym “Student”, and the distribution is often known as Student’s \\(t\\) distribution. These results for the sampling distribution hold when the population standard deviations \\(\\sigma_{1}\\) and \\(\\sigma_{2}\\) are assumed to be equal. If this assumption is not made, the test statistic is again calculated using formulas ((7.14)) and ((7.15)). This case is mathematically more difficult than the previous one, because the sampling distribution of the test statistic under the null hypothesis is then not exactly a \\(t\\) distribution even when the population distributions are normal. One way of dealing with this complication (which is known as the Behrens–Fisher problem) is to find a \\(t\\) distribution which is a good approximation of the true sampling distribution. The degrees of freedom of this approximating distribution are given by \\[\\begin{equation} df=\\frac{\\left(\\frac{s^{2}_{1}}{n_{1}}+\\frac{s^{2}_{2}}{n_{2}}\\right)^{2}}{\\left(\\frac{s_{1}^{2}}{n_{1}}\\right)^{2}\\;\\left(\\frac{1}{n_{1}-1}\\right)+\\left(\\frac{s_{2}^{2}}{n_{2}}\\right)^{2}\\;\\left(\\frac{1}{n_{2}-1}\\right)}. \\tag{7.17} \\end{equation}\\] This formula, which is known as the Welch-Satterthwaite approximation, is not particularly interesting or worth learning in itself. It is presented here purely for completeness, and to give an idea of how the degrees of freedom given in the SPSS output are obtained. In Example 7.2 (see Figure 7.4) these degrees of freedom are 945.777, showing that the approximate degrees of freedom from ((7.17)) are often not whole numbers. If approximate \\(P\\)-values are then obtained from a \\(t\\)-table, we need to use values for the nearest whole-number degrees of freedom shown in the table. This problem does not arise if the calculations are done with a computer. Two sample \\(t\\)-test statistics (in two variants, under equal and unequal population standard deviations) have now been defined under two different sets of assumptions about the population distributions. In each case, the formula of the test statistic is the same, so the only difference is in the form of its sampling distribution under the null hypothesis. If the population distributions of \\(Y\\) in the two groups are assumed to be normal, the sampling distribution of the \\(t\\)-statistic is a \\(t\\) distribution with appropriate degrees of freedom. If the sample sizes are reasonably large, the sampling distribution is approximately standard normal, whatever the shape of the population distribution. Which set of assumptions should we then use? The following guidelines can be used to make the choice: The easiest and arguably most common case is the one where both sample sizes \\(n_{1}\\) and \\(n_{2}\\) are large enough (both greater than 20, say) for the standard normal approximation of the sampling distribution to be reasonably accurate. Because the degrees of freedom of the appropriate \\(t\\) distribution are then also large, the two sampling distributions are very similar, and conclusions from the test will be similar in either case. It is then purely a matter of convenience which sampling distribution is used: If you use a computer (e.g. SPSS) to carry out the test or you are (e.g. in an exam) given computer output, use the \\(P\\)-value in the output. This will be from the \\(t\\) distribution. If you need to calculate the test statistic by hand and thus need to use tables of critical values to draw the conclusion, use the critical values for the standard normal distribution (see Table 5.2). When the sample sizes are small (e.g. if one or both of them are less than 20), only the \\(t\\) distribution can be used, and even then only if \\(Y\\) is approximately normally distributed in both groups in the population. For some variables (say weight or blood pressure) we might have some confidence that this is the case, perhaps from previous, larger studies. In other cases the normality of \\(Y\\) can only be assessed based on its sample distribution, which of course is not very informative when the sample is small. In most cases, some doubt will remain, so the results of a \\(t\\)-test from small samples should be treated with caution. An alternative is then to use nonparametric tests which avoid the assumption of normality, for example the so-called Wilcoxon–Mann–Whitney test. These, however, are not covered on this course. There are also situations where the population distribution of \\(Y\\) cannot possibly be normal, so the possibility of referring to a \\(t\\) distribution does not arise. One example are the tests on population proportions that were discussed in Chapter 5. There the only possibility we discussed was to use the approximate standard normal sampling distribution, as long as the sample sizes were large enough. Because the \\(t\\)-distribution is never relevant there, the test statistic is conventionally called the \\(z\\)-test statistic rather than \\(t\\). Sometimes the label \\(z\\) instead of \\(t\\) is used also for two-sample \\(t\\)-statistics described in this chapter. This does not change the test itself. It is also possible to obtain a confidence interval for \\(\\Delta\\) which is valid for even very small sample sizes \\(n_{1}\\) and \\(n_{2}\\), but only under the further assumption that the population distribution of \\(Y\\) in both groups is normal. This affects only the multiplier of the standard errors, which is now based on a \\(t\\) distribution. The appropriate degrees of freedom are again \\(df=n_{1}+n_{2}-2\\) when the population standard deviations are assumed equal, and approximately given by equation ((7.17)) if not. In this case the multiplier in ((7.4)) may be labelled \\(t^{(df)}_{\\alpha/2}\\) instead of \\(z_{\\alpha/2}\\) to draw attention to the fact that it comes from a \\(t\\)-distribution and depends on the degrees of freedom \\(df\\) as well as the significance level \\(1-\\alpha\\). Any multiplier \\(t_{\\alpha/2}^{(df)}\\) is obtained from the relevant \\(t\\) distribution using exactly the same logic as the one explained for the normal distribution in the previous section, using a computer or a table of \\(t\\) distributions. For example, in the \\(t\\) table in the Appendix, multipliers for a 95% confidence interval are the numbers given in the column labelled “0.025”. Suppose, for instance, that the sample sizes \\(n_{1}\\) and \\(n_{2}\\) are both 10 and population standard deviations are assumed equal, so that \\(df=10+10-2=18\\). The table shows that a \\(t\\)-based 95% confidence interval would then use the multiplier 2.101. This is somewhat larger than the corresponding multiplier 1.96 from the normal distribution, and the \\(t\\)-based interval is somewhat wider than one based on the normal distribution. The difference between the two becomes very small when the sample sizes are even moderately large, because then \\(df\\) is large and \\(t_{\\alpha/2}^{(df)}\\) is very close to 1.96. The choice between confidence intervals based on the normal or a \\(t\\) distribution involves the same considerations as for the significance test. In short, if the sample sizes are not very small, the choice makes little difference and can be based on convenience. If you are calculating an interval by hand, a normal-based one is easier to use because the multiplier (e.g. 1.96 for 95% intervals) does not depend on the sample sizes. If, instead, a computer is used, it typically gives confidence intervals for differences of means based on the \\(t\\) distribution, so these are easier to use. Finally, if one or both of the sample sizes are small, only \\(t\\)-based intervals can safely be used, and then only if you are confident that the population distributions of \\(Y\\) are approximately normal. 7.4 Tests and confidence intervals for a single mean The task considered in this section is inference on the population mean of a continuous, interval-level variable \\(Y\\) in a single population. This is thus analogous to the analysis of a single proportion in Sections 5.5–5.6, but with a continuous variable of interest. We use Example 7.1 on survey data on diet for illustration. We will consider two variables, daily consumption of portions of fruit and vegetables, and the percentage of total faily energy intake obtained from fat and fatty acids. These will be analysed separately, each in turn in the role of the variable of interest \\(Y\\). Summary statistics for the variables are shown in Table 7.4           \\(P\\)-value \\(P\\)-value \\ Two- One- 95% CI Variable \\(n\\) \\(\\bar{Y}\\) \\(s\\) \\(\\mu_{0}\\) \\(t\\) sided\\(^{*}\\) sided\\(^{\\dagger}\\) for \\(\\mu\\) Fruit and vegetable 1724 2.8 2.15 5 -49.49 \\(&lt;0.001\\) \\(&lt;0.001\\) (2.70; 2.90) consumption (400g portions) Total energy intake 1724 35.3 6.11 35 2.04 0.042 0.021 (35.01; 35.59) from fat (%) ————————————————————————————————————————————— :(#tab:t-ttests1)Summary statistics, \\(t\\)-tests and confidence intervals for the mean for the two variables in Example 7.1 (variables from the Diet and Nutrition Survey). \\(n=\\)sample size; \\(\\bar{Y}=\\)sample mean; \\(s=\\)sample standard deviation; \\(\\mu_{0}=\\)null hypothesis about the population mean; \\(t=t\\)-test statistic; \\(*\\): Alternative hypothesis \\(H_{a}: \\mu\\ne \\mu_{0}\\); \\(\\dagger\\): Alternative hypotheses \\(H_{a}: \\mu&lt;5\\) and \\(\\mu&gt;35\\) respectively. The setting for the analysis of this section is summarised as a statistical model for observations of a variable \\(Y\\) as follows: The population distribution of \\(Y\\) has some unknown mean \\(\\mu\\) and unknown standard deviation \\(\\sigma\\). The observations \\(Y_{1}, Y_{2}, \\dots, Y_{n}\\) in the sample are a random sample from the population. The observations are statistically independent, as discussed at the beginning of Section 7.3.1. It is not necessary to assume that the population distribution has a particular form. However, this is again sometimes assumed to be a normal distribution, in which case the analyses may be modified in ways discussed below. The only quantity of interest considered here is \\(\\mu\\), the population mean of \\(Y\\). In the diet examples this is the mean number of portions of fruit and vegetables, or mean percentage of energy derived from fat (both on an average day for an individual) among the members of the population (which for this survey is British adults aged 19–64). Because no separate groups are being compared, questions of interest are now not about differences between different group means, but about the value of \\(\\mu\\) itself. The best single estimate (point estimate) of \\(\\mu\\) is the sample mean \\(\\bar{Y}\\). More information is provided by a confidence interval which shows which values of \\(\\mu\\) are plausible given the observed data. Significance testing focuses on the question of whether it is plausible that the true value of \\(\\mu\\) is equal to a particular value \\(\\mu_{0}\\) specified by the researcher. The specific value of \\(\\mu_{0}\\) to be tested is suggested by the research questions. For example, we will consider \\(\\mu_{0}=5\\) for portions of fruit and vegetables and \\(\\mu_{0}=35\\) for the percentage of energy from fat. These values are chosen because they correspond to recommendations by the Department of Health that we should consume at least 5 portions of fruit and vegetables a day, and that fat should contribute no more than 35% of total energy intake. The statistical question is thus whether the average level of consumption in the population is at the recommended level. In this setting, the null hypothesis for a significance test will be of the form \\[\\begin{equation} H_{0}: \\; \\mu=\\mu_{0}, \\tag{7.18} \\end{equation}\\] i.e. it claims that the unknown population mean \\(\\mu\\) is equal to the value \\(\\mu_{0}\\) specified by the null hypothesis. This will be tested against the two-sided alternative hypothesis \\[\\begin{equation} H_{a}: \\; \\mu\\ne \\mu_{0} \\tag{7.19} \\end{equation}\\] or one of the one-sided alternative hypotheses \\[\\begin{equation} H_{a}: \\mu&gt; \\mu_{0} \\tag{7.20} \\end{equation}\\] or \\[\\begin{equation} H_{a}: \\mu&lt; \\mu_{0}. \\tag{7.21} \\end{equation}\\] For example, we might consider the one-sided alternative hypotheses \\(H_{a}:\\; \\mu&lt;5\\) for portions of fruit and vegetables and \\(H_{a}:\\;\\mu&gt;35\\) for the percentage of energy from fat. For both of these, the alternative corresponds to a difference from \\(\\mu_{0}\\) in the unhealthy direction, i.e. less fruit and vegetables and more fat than are recommended. To establish a connection to the general formulas that have been stated previously, it is again useful to express these hypotheses in terms of \\[\\begin{equation} \\Delta=\\mu-\\mu_{0}, \\tag{7.22} \\end{equation}\\] i.e. the difference between the unknown true mean \\(\\mu\\) and the value \\(\\mu_{0}\\) claimed by the null hypothesis. Because this is 0 if and only if \\(\\mu\\) and \\(\\mu_{0}\\) are equal, the null hypothesis ((7.18)) can also be expressed as \\[\\begin{equation} H_{0}: \\; \\Delta=0, \\tag{7.23} \\end{equation}\\] and possible alternative hypotheses as \\[\\begin{equation} H_{0}: \\Delta\\ne0, \\tag{7.24} \\end{equation}\\] \\[\\begin{equation} H_{0}: \\Delta&gt;0 \\tag{7.25} \\end{equation}\\] and \\[\\begin{equation} H_{0}: \\Delta&lt; 0, \\tag{7.26} \\end{equation}\\] corresponding to ((7.19)), ((7.20)) and ((7.21)) respectively. The general formulas summarised in Section 7.3.1 can again be used, as long as their details are modified to apply to \\(\\Delta\\) defined as \\(\\mu-\\mu_{0}\\). The resulting formulas are listed briefly below, and then illustrated using the data from the diet survey: The point estimate of the difference \\(\\Delta=\\mu-\\mu_{0}\\) is \\[\\begin{equation} \\hat{\\Delta}=\\bar{Y}-\\mu_{0}. \\tag{7.27} \\end{equation}\\] The standard error of \\(\\hat{\\Delta}\\), i.e. the standard deviation of its sampling distribution, is \\(\\sigma_{\\hat{\\Delta}}=\\sigma/\\sqrt{n}\\) (note that this is equal to the standard error \\(\\sigma_{\\bar{Y}}\\) of the sample mean \\(\\bar{Y}\\) itself).37 This is estimated by \\[\\begin{equation} \\hat{\\sigma}_{\\hat{\\Delta}} = \\frac{s}{\\sqrt{n}}. \\tag{7.28} \\end{equation}\\] The \\(t\\)-test statistic for testing the null hypothesis ((7.23)) is \\[\\begin{equation} t=\\frac{\\hat{\\Delta}}{\\hat{\\sigma}_{\\hat{\\Delta}}} = \\frac{\\bar{Y}-\\mu_{0}}{s/\\sqrt{n}}. \\tag{7.29} \\end{equation}\\] The sampling distribution of the \\(t\\)-statistic, when the null hypothesis is true, is approximately a standard normal distribution, when the sample size \\(n\\) is reasonably large. A common rule of thumb is that this sampling distribution is adequate when \\(n\\) is at least 30. Alternatively, we may make the further assumption that the population distribution of \\(Y\\) is normal, in which case no conditions on \\(n\\) are required. The sampling distribution of \\(t\\) is then a \\(t\\) distribution with \\(n-1\\) degrees of freedom. The choice of which sampling distribution to refer to is based on the considerations outlined in Section 7.3.4. When \\(n\\) is 30 or larger, the two approaches give very similar results. \\(P\\)-values are obtained and the conclusions drawn in the same way as for two-sample tests, with appropriate modifications to the wording of the conclusions. A confidence interval for \\(\\Delta\\), with confidence level \\(1-\\alpha\\) and based on the approximate normal sampling distribution, is given by \\[\\begin{equation} \\hat{\\Delta}\\pm z_{\\alpha/2}\\, \\hat{\\sigma}_{\\hat{\\Delta}} = (\\bar{Y}-\\mu_{0}) \\pm z_{\\alpha/2} \\, \\frac{s}{\\sqrt{n}} \\tag{7.30} \\end{equation}\\] where \\(z_{\\alpha/2}\\) is the multiplier from the standard normal distribution for the required significance level (see Table 5.3), most often 1.96 for a 95% confidence interval. If an interval based on the \\(t\\) distribution is wanted instead, \\(z_{\\alpha/2}\\) is replaced by the corresponding multiplier \\(t_{\\alpha/2}^{(n-1)}\\) from the \\(t_{n-1}\\) distribution. Instead of the interval ((7.30)) for the difference \\(\\Delta=\\mu-\\mu_{0}\\), it is usually more sensible to report a confidence interval for \\(\\mu\\) itself. This is given by \\[\\begin{equation} \\bar{Y} \\pm z_{\\alpha/2} \\, \\frac{s}{\\sqrt{n}}, \\tag{7.31} \\end{equation}\\] which is obtained by adding \\(\\mu_{0}\\) to both end points of ((7.30)). For the fruit and vegetable variable in the diet example, the mean under the null hypothesis is the dietary recommendation \\(\\mu_{0}=5\\). The estimated difference ((7.27)) is \\[\\hat{\\Delta}=2.8-5=-2.2\\] and its estimated standard error ((7.28)) is \\[\\hat{\\sigma}_{\\hat{\\Delta}}= \\frac{2.15}{\\sqrt{1724}} = 0.05178,\\] so the \\(t\\)-test statistic ((7.29)) is \\[t=\\frac{-2.2}{0.05178} = -42.49.\\] To obtain the \\(P\\)-value for the test, \\(t=-42.49\\) is referred to the sampling distribution under the null hypothesis, which can here be taken to be the standard normal distribution, as the sample size \\(n=1723\\) is large. If we consider the two-sided alternative hypothesis \\(H_{a}:\\; \\Delta\\ne 0\\) (i.e. \\(H_{a}:\\; \\mu\\ne5\\)), the \\(P\\)-value is the probability that a randomly selected value from the standard normal distribution is at most \\(-42.49\\) or at least 42.49. This is a very small probability, approximately \\(0.00\\cdots019\\), with 268 zeroes between the decimal point and the 1. This is, of course, to all practical purposes zero, and can be reported as \\(P&lt;0.001\\). The null hypothesis \\(H_{0}:\\; \\mu=5\\) is rejected at any conventional level of significance. A \\(t\\)-test for the mean indicates very strong evidence that the average daily number of portions of fruit and vegetables consumed by members of the population differs from the recommended minimum of five. If we considered instead the one-sided alternative hypothesis \\(H_{a}:\\;\\Delta&lt;0\\) (i.e. \\(H_{a}: \\; \\mu&lt;5\\)), the observed sample mean \\(\\bar{Y}=2.8&lt;5\\) is in the direction of this alternative. The \\(P\\)-value is then the one-sided \\(P\\)-value divided by 2, which is here a small value reported as \\(P&lt;0.001\\) again. The null hypothesis \\(H_{0}: \\; \\mu=5\\) (and by implication also the one-sided null hypothesis \\(H_{0}:\\; \\mu\\ge 5\\), as discussed at the end of Section 5.5.1) is thus also rejected in favour of this one-sided alternative, at any conventional significance level. A 95% confidence interval for \\(\\mu\\) is obtained from ((7.31)) as \\[2.8\\pm 1.96 \\times \\frac{2.15}{\\sqrt{1724}} =2.8\\pm 1.96 \\times 0.05178= 2.8\\pm 0.10 = (2.70; 2.90).\\] We are thus 95% confident that the average daily number of portions of fruit and vegetables consumed by members of the population is between 2.70 and 2.90. Figure 7.6 shows how these results for the fruit and vegetable variable are displayed in SPSS output. The label “portions” refers to the name given to the variable in the SPSS data file, and “Test Value = 5” indicates the null hypothesis value \\(\\mu_{0}\\) being tested. Other parts of the SPSS output correspond to the information in Table 7.4 in fairly obvious ways, so “N” indicates the sample size \\(n\\) (and not a population size, which is denoted by \\(N\\) in our notation), “Mean” the sample mean \\(\\bar{Y}\\), “Std. Deviation” the sample standard deviation \\(s\\), “Std. Error Mean” the estimate of the standard error of the mean given by \\(s/\\sqrt{n}=2.15/\\sqrt{1724}=0.05178\\), “Mean Difference” the difference \\(\\hat{\\Delta}=\\bar{Y}-\\mu_{0}=2.8-5=-2.2\\), and “t” the \\(t\\)-test statistic ((7.29)). The \\(P\\)-value against the two-sided alternative hypothesis is shown as “Sig. (2-tailed)” (reported in the somewhat sloppy SPSS manner as “.000”). This is actually obtained from the \\(t\\) distribution, the degrees of freedom of which (\\(n-1=1723\\)) are given under “df”. Finally, the output also contains a 95% confidence interval for the difference \\(\\Delta=\\mu-\\mu_{0}\\), i.e. the interval ((7.30)).38 This is given as \\((-2.30; -2.10)\\). To obtain the more convenient confidence interval ((7.31)) for \\(\\mu\\) itself, we only need to add \\(\\mu_{0}=5\\) to both end points of the interval shown by SPSS, to obtain \\((-2.30+5; -2.10+5)=(2.70; 2.90)\\) as before. Figure 7.6: SPSS output for a \\(t\\)-test of a single mean. The output is for the variable on fruit and vegetable consumption in Table 7.4, with the null hypothesis \\(H-{0}: \\mu=5\\). Similar results for the variable on the percentage of dietary energy obtained from fat are also shown in Table 7.4. Here \\(\\mu_{0}=35\\), \\(\\hat{\\Delta}=35.3-35=0.3\\), \\(\\hat{\\sigma}_{\\hat{\\Delta}}=6.11/\\sqrt{1724}=0.147\\), \\(t=0.3/0.147\\), and the two-sided \\(P\\)-value is \\(P=0.042\\). Here \\(P&lt;0.05\\), so null hypothesis that the population average of the percentage of energy obtained from fat is 35 is rejected at the 5% level of significance. However, because \\(P&gt;0.01\\), the hypothesis would not be rejected at the next conventional significance level of 1%. The conclusions are the same if we considered the one-sided alternative hypothesis \\(H_{a}:\\; \\mu&gt;35\\), for which \\(P=0.042/2=0.021\\) (as the observed sample mean \\(\\bar{Y}=35.3\\) is in the direction of \\(H_{a}\\)). In this case the evidence against the null hypothesis is thus somewhat less strong than for the fruit and vegetable variable, for which the \\(P\\)-value was extremely small. The 95% confidence interval for the population average of the fat variable is \\(35.3\\pm 1.96\\times 0.147=(35.01; 35.59)\\). Analysis of a single population mean is a good illustration of some of the advantages of confidence intervals over significance tests. First, a confidence interval provides a summary of all the plausible values of \\(\\mu\\) even when, as is very often the case, there is no obvious single value \\(\\mu_{0}\\) to be considered as the null hypothesis of the one-sample \\(t\\)-test. Second, even when such a significance test is sensible, the conclusion can also be obtained from the confidence interval, as discussed at the end of Section 5.6.4. In other words, \\(H_{0}:\\; \\mu=\\mu_{0}\\) is rejected at a given significance level against a two-sided alternative hypothesis, if the confidence interval for \\(\\mu\\) at the corresponding confidence level does not contain \\(\\mu_{0}\\), and not rejected if the interval contains \\(\\mu_{0}\\). Here the 95% confidence interval (2.70; 2.90) does not contain 5 for the fruit and vegetable variable, and the interval (35.01; 35.59) does not contain 35 for the fat variable, so the null hypotheses with these values as \\(\\mu_{0}\\) are rejected at the 5% level of significance. The width of a confidence interval also gives information on how precise the results of the statistical analysis are. Here the intervals seem quite narrow for both variables, in that it seems that their end points (e.g. 2.7 and 2.9 for portions of fruit and vegetables) would imply qualitatively similar conclusions about the level of consumption in the population. Analysis of the sample of 1724 respondents in the National Diet and Nutrition Survey thus appears to have given us quite precise information on the population averages for most practical purposes. Of course, what is precise enough ultimately depends on what those purposes are. If much higher precision was required, the sample size in the survey would have to be correspondingly larger. Finally, in cases where a null hypothesis is rejected by a significance test, a confidence interval has the additional advantage of providing a way to assess whether the observed deviation from the null hypothesis seems large in some substantive sense. For example, the confidence interval for the fat variable draws attention to the fact that the evidence against a population mean of 35 is not very strong. The lower bound of the interval is only 0.01 units above 35, which is very little relative to the overall width (about 0.60) of the interval. The \\(P\\)-value (0.041) of the test, which is not much below the reference level of 0.05, also suggests this, but in a less obvious way. Even the upper limit (35.59) of the interval is arguably not very far from 35, so it suggests that we can be fairly confident that the population mean does not differ from 35 by very much in the substantive sense. This contrasts with the results for the fruit and vegetable variable, where all the values covered by the confidence interval (2.70; 2.90) are much more obviously far from the recommended value of 5. 7.5 Inference for dependent samples In the two-sample cases considered in Section 7.3, the two groups being compared consisted of separate and presumably unrelated units (people, in all of these cases). It thus seemed justified to treat the groups as statistically independent. The third and last general case considered in this chapter is one where this assumption cannot be made, because there are some obvious connections between the groups. Examples 7.4 and 7.5 illustrate this situation. Specifically, in both cases we can find for each observation in one group a natural pair in the other group. In Example 7.4, the data consist of observations of a variable for a group of fathers at two time points, so the pairs of observations are clearly formed by the two measurements for each father. In Example 7.5 the basic observations are for separate days, but these are paired (matched) in that for each Friday the 13th in one group, the preceding Friday the 6th is included in the other. In both cases the existence of the pairings implies that we must treat the two groups as statistically dependent. Data with dependent samples are quite common, largely because they are often very informative. Principles of good research design suggest that one key condition for being able to make valid and powerful comparisons between two groups is that the groups should be as similar as possible, apart from differing in the characteristic being considered. Dependent samples represent an attempt to achieve this through intelligent data collection. In Example 7.4, the comparison of interest is between a man’s sense of well-being before and after the birth of his first child. It is likely that there are also other factors which affect well-being, such as personality and life circumstances unrelated to the birth of a child. Here, however, we can compare the well-being for the same men before and after the birth, which should mean that many of those other characteristics remain approximately unchanged between the two measurements. Information on the effects of the birth of a child will then mostly come not from overall levels of well-being but changes in it for each man. In Example 7.5, time of the year and day of the week are likely to have a very strong effect on traffic levels. Comparing, say, Friday, November 13th to Friday, July 6th, let alone to Sunday, November 15th, would thus not provide much information about possible additional differences which were due specifically to a Friday being the 13th. To keep these other characteristics approximately constant and thus to focus on the effects of Friday the 13th, each such Friday has here been matched with the nearest preceding Friday. With this design, data on just ten matched pairs will (as seen below) allow us to conclude that the differences are statistically significant. Generalisations of the research designs illustrated by Examples 7.4 and 7.5 allow for measurements at more than two occasions for each subject (so-called longitudinal or panel studies) and groups of more than two matched units (clustered designs). Most of these are analysed using statistical methods which are beyond the scope of this course. The paired case is an exception, for which the analysis is in fact easier than for two independent samples. This is because the pairing of observations allows us to reduce the analysis into a one-sample problem, simply by considering within-pair differences in the response variable \\(Y\\). Only the case where \\(Y\\) is a continuous variable is considered here. There are also methods of inference for comparing two (or more) dependent samples of response variables of other types, but they are not covered here. The quantity of interest is again a population difference. This time it can be formulated as \\(\\Delta=\\mu_{2}-\\mu_{1}\\), where \\(\\mu_{1}\\) is the mean of \\(Y\\) for the first group (e.g. the first time point in Example 7.4) and \\(\\mu_{2}\\) its mean for the second group. Methods of inference for \\(\\Delta\\) will again be obtained using the same general results which were previously applied to one-sample analyses and comparisons of two independent samples. The easiest way to do this is now to consider a new variable \\(D\\), defined for each pair \\(i\\) as \\(D_{i}=Y_{2i}-Y_{1i}\\), where \\(Y_{1i}\\) denotes the value of the first measurement of \\(Y\\) for pair \\(i\\), and \\(Y_{2i}\\) is the second measurement of \\(Y\\) for the same pair. In Example 7.4 this is thus the difference between a man’s well-being after the birth of his first baby, and the same man’s well-being before the birth. In Example 7.5, \\(D\\) is the difference in traffic flows on a stretch of motorway between a Friday the 13th and the Friday a week earlier (these values are shown in the last column of Table 7.2). The number of observations of \\(D\\) is the number of pairs, which is equal to the sample sizes \\(n_{1}\\) and \\(n_{2}\\) in each of the two groups (the case where one of the two measurements might be missing for some pairs is not considered here). We will denote it by \\(n\\). The population mean of the differences \\(D\\) is also \\(\\Delta=\\mu_{2}-\\mu_{1}\\), so the observed values \\(D_{i}\\) can be used for inference on \\(\\Delta\\). An estimate of \\(\\Delta\\) is the sample average of \\(D_{i}\\), i.e. \\[\\begin{equation} \\hat{\\Delta}=\\overline{D}=\\frac{1}{n}\\sum_{i=1}^{n} D_{i}. \\tag{7.32} \\end{equation}\\] In other words, this is the average of the within-pair differences between the two measurements of \\(Y\\). Its standard error is estimated by \\[\\begin{equation} \\hat{\\sigma}_{\\hat{\\Delta}} = \\frac{s_{D}}{\\sqrt{n}} \\tag{7.33} \\end{equation}\\] where \\(s_{D}\\) is the sample standard deviation of \\(D\\), i.e. \\[\\begin{equation} s_{D} = \\sqrt{\\frac{\\sum (D_{i}-\\overline{D})^{2}}{n-1}}. \\tag{7.34} \\end{equation}\\] A test statistic for the null hypothesis \\(H_{0}: \\Delta=0\\) is given by \\[\\begin{equation} t=\\frac{\\hat{\\Delta}}{\\hat{\\sigma}_{\\hat{\\Delta}}}=\\frac{\\overline{D}}{s_{D}/\\sqrt{n}} \\tag{7.35} \\end{equation}\\] and its \\(P\\)-value is obtained either from the standard normal distribution or the \\(t_{n-1}\\) distribution. A confidence interval for \\(\\Delta\\) with confidence level \\(1-\\alpha\\) is given by \\[\\begin{equation} \\hat{\\Delta} \\pm q_{\\alpha/2} \\times \\hat{\\sigma}_{\\hat{\\Delta}}=\\overline{D} \\pm q_{\\alpha/2} \\times \\frac{s_{D}}{\\sqrt{n}} \\tag{7.36} \\end{equation}\\] where the multiplier \\(q_{\\alpha/2}\\) is either \\(z_{\\alpha/2}\\) or \\(t_{\\alpha/2}^{(n-1)}\\). These formulas are obtained by noting that this is simply a one-sample analysis with the differences \\(D\\) in place of the variable \\(Y\\), and applying the formulas of Section 7.4 to the observed values of \\(D\\). \\(\\hat{\\Delta}\\) \\(\\hat{\\sigma}_{\\hat{\\Delta}}\\) Test of \\(H_{0}: \\Delta=0\\) \\(t\\) Test of \\(H_{0}: \\Delta=0\\) \\(P\\)-value 95 % C.I. for \\(\\Delta\\) Example 7.4: Father’s personal well-being 0.08 0.247 0.324 0.75\\(^{\\dagger}\\) (-0.40; 0.56) Example 7.5: Traffic flows on successive Fridays -1835 372 -4.93 0.001\\(^{*}\\) (-2676; -994) :(#tab:t-2tests-dep)Results of tests and confidence intervals for comparing means of two dependent samples. For Example 7.4, the difference is between after and before the birth of the child, and for Example 7.5 it is between Friday the 13th and the preceding Friday the 6th. See the text for the definitions of the statistics. (* Obtained from the \\(t_{9}\\) distribution; \\(\\dagger\\) Obtained from the standard normal distribution.) Results for Examples 7.4 and 7.5 are shown in Table 7.5. To illustrate the calculations, consider Example 7.5. The \\(n=10\\) values of \\(D_{i}\\) for it are shown in Table 7.2, and the summary statistics \\(\\overline{D}=-1835\\) and \\(s_{D}=1176\\) in Table 7.1. The standard error of \\(\\overline{D}\\) is thus \\(s_{D}/\\sqrt{n}=1176/\\sqrt{10}=372\\) and the value of the test statistic ((7.35)) is \\[z=\\frac{-1835}{1176/\\sqrt{10}}=\\frac{-1835}{372}=-4.93.\\] This example differs from others we have considered so far in that the sample size of \\(n=10\\) is clearly too small for us to rely on large-sample results. It is thus not appropriate to refer the test statistic to a standard normal distribution. Instead, \\(P\\)-values can be obtained from a \\(t\\) distribution, but only if the population distribution of \\(D\\) itself can be assumed to be approximately normal. Here we have only the ten observed values of \\(D\\) to use for a rather informal assessment of whether this assumption appears to be reasonable. One value of \\(D\\) is smaller than -4000, and 2, 5, 2 of them are in the ranges -3000 to -2001, -2000 to -1001, and -1000 to -1 respectively. Apart from the smallest observation, the sample distribution of \\(D\\) is thus at least approximately symmetric. While this definitely does not prove that \\(D\\) is normally distributed, it is at least not obviously inconsistent with such a claim. We thus feel moderately confident that we can apply here tests and confidence intervals based on the \\(t\\) distribution. The \\(P\\)-value, obtained from a \\(t\\) distribution with \\(n-1=9\\) degrees of freedom, for the test statistic \\(-4.93\\) is approximately 0.001. Even with only ten pairs of observations, there is significant evidence that the volume of traffic on a Friday the 13th differs from that of the preceding Friday. A confidence interval for the difference is obtained from ((7.36)) as \\[-1835 \\pm 2.26 \\times 372 = (-2676; -994)\\] where the multiplier 2.26 is the quantity \\(t_{\\alpha/2}^{(n-1)}=t_{0.975}^{(9)}\\), obtained from a computer or a table of the \\(t_{9}\\)-distribution. The interval shows that we are 95% confident that the average reduction in traffic on Friday the 13th on the stretches of motorway considered here is between 994 and 2676 vehicles. This seems like a substantial systematic difference, although not particularly large as a proportion of the total volume of traffic on those roads. In the absence of other information we are tempted to associate the reduction with some people avoiding driving on a day they consider to be unlucky. In Example 7.4 the \\(P\\)-value is 0.75, so we cannot reject the null hypothesis that \\(\\Delta=0\\). There is thus no evidence that there was a difference in first-time fathers’ self-assessed level of well-being between the time their wives were six months pregnant, and a month after the birth of the baby. This is also indicated by the 95% confidence interval for the difference, which clearly covers the value 0 of no difference. 7.6 Further comments on significance tests Some further aspects of significance testing are dicussed here. These are not practical issues that need to be actively considered every time you carry out a test. Instead, they provide context and motivation for the principles behind significance tests. 7.6.1 Different types of error Consider for the moment the approach to significance testing where the outcome is presented in the form of a discrete claim or decision about the hypotheses, stating that the null hypothesis was either rejected or not rejected. This claim can either be correct or incorrect, depending on whether the null hypothesis is true in the population. There are four possibilities, summarized in Table 7.6. Two of these are correct decisions and two are incorrect. The two kinds of incorrect decisions are traditionally called Type I error: rejecting the null hypothesis when it is true Type II error: not rejecting the null hypothesis when it is false The terms are unmemorably bland, but they do at least suggest an order of importance. Type I error is conventionally considered more serious than Type II, so what we most want to avoid is rejecting the null hypothesis unnecessarily. This implies that we will maintain the null hypothesis unless data provide strong enough evidence to justify rejecting it, a principle which is somewhat analogous to the “keep a theory until falsified” thinking of Popperian philosophy of science, or even the “innocent until proven guilty” principle of jurisprudence. \\(H_{0}\\) is \\(H_{0}\\) is Not Rejected Rejected \\(H_{0}\\) is True Correct decision Type I error False Type II error Correct decision :(#tab:t-twoerrors)The four possible combinations of the truth of a null hypothesis \\(H_{0}\\) in a population and decision about it from a significance test. Dispite our dislike of Type I errors, we will not try to avoid them completely. The only way to guarantee that the null hypothesis is never incorrectly rejected is never to reject it at all, whatever the evidence. This is not a useful decision rule for empirical research. Instead, we will decide in advance how high a probability of Type I error we are willing to tolerate, and then use a test procedure with that probability. Suppose that we use a 5% level of significance to make decisions from a test. The null hypothesis is then rejected if the sample yields a test statistic for which the \\(P\\)-value is less than 0.05. If the null hypothesis is actually true, such values are, by the definition of the \\(P\\)-value, obtained with probability 0.05. Thus the significance level (\\(\\alpha\\)-level) of a test is the probability of making a Type I error. If we use a large \\(\\alpha\\)-level (say \\(\\alpha=0.10\\)), the null hypothesis is rejected relatively easily (whenever \\(P\\)-value is less than 0.10), but the chances of committing a Type I error are correspondingly high (also 0.10); with a smaller value like \\(\\alpha=0.01\\), the error probability is lower because \\(H_{0}\\) is rejected only when evidence against it is quite strong. This description assumes that the true probability of Type I error for a test is equal to its stated \\(\\alpha\\)-level. This is true when the assumptions of the test (about the population distribution, sample size etc.) are satisfied. If the assumptions fail, the true significance level will differ from the stated one, i.e. the \\(P\\)-value calculated from the standard sampling distribution for that particular test will differ from the true \\(P\\)-value which would be obtained from the exact sampling distribution from the population in question. Sometimes the difference is minor and can be ignored for most practical purposes (the test is then said to be robust to violations of some of its assumptions). In many situations, however, using an inappropriate test may lead to incorrect conclusions: for example, a test which claims that the \\(P\\)-value is 0.02 when it is really 0.35 will clearly give a misleading picture of the strength of evidence against the null hypothesis. To avoid this, the task of statisticians is to develop valid (and preferably robust) tests for many different kinds of hypotheses and data. The task of the empirical researcher is to choose a test which is appropriate for his or her data. In the spirit of regarding Type I errors as the most serious, the worst kind of incorrect test is one which gives too low a \\(P\\)-value, i.e. exaggerates the strength of evidence against the null hypothesis. Sometimes it is known that this is impossible or unlikely, so that the \\(P\\)-value is either correct or too high. The significance test is then said to be conservative, because its true rate of Type I errors will be the same or lower than the stated \\(\\alpha\\)-level. A conservative procedure of statistical inference is regarded as the next best thing to one which has the correct level of significance. For example, when the sample size is relatively large, \\(P\\)-values for all of the tests discussed in this chapter may be calculated from a standard normal or from a \\(t\\) distribution. \\(P\\)-values from a \\(t\\) distribution are then always somewhat larger. This means that using the \\(t\\) distribution is (very slightly) conservative when the population distributions are not normal, so that we can safely use the \\(P\\)-values from SPSS output of a \\(t\\)-test even in that case (this argument does not, however, justify using the \\(t\\)-test when \\(Y\\) is not normally distributed and the sample size is small, because the sampling distribution of the \\(t\\)-test statistic may then be very far from normal). 7.6.2 Power of significance tests After addressing the question of Type I error by selecting an appropriate test and deciding on the significance level to be used, we turn our attention to Type II errors. The probability that a significance test will reject the null hypothesis when it is in fact not true, i.e. the probability of avoiding a Type II error, is known as the power of the test. It depends, in particular, on The nature of the test. If several valid tests are available for a particular analysis, we would naturally prefer one which tends to have the highest power. One aim of theoretical statistics is to identify the most powerful test procedures for different problems. The sample size: other things being equal, larger samples mean higher power. The true value of the population parameter to be tested, here the population mean or proportion. The power of any test will be highest when the true value is very different from the value specified by the null hypothesis. For example, it will obviously be easier to detect that a population mean differs from a null value of \\(\\mu_{0}=5\\) when the true mean is 25 than when it is 5.1. The population variability of the variable. Since large population variance translates into large sampling variability and hence high levels of uncertainty, the power will be low when population variability is large, and high if the population variability is low. The last three of these considerations are often used at the design stage of a study to get an idea of the sample size required for a certain level of power, or of the power achievable with a given sample size. Since data collection costs time and money, we would not want to collect a much larger sample than is required for a level of certainty sufficient for the purposes of a study. On the other hand, if a preliminary calculation reveals that the largest sample we can afford would still be unlikely to give enough information to detect interesting effects, the study might be best abandoned. A power calculation requires the researcher to specify the kinds of differences from a null hypothesis which are large enough to be of practical or theoretical interest, so that she or he would want to be able to detect them with high probability (it must always be accepted that the power will be lower for smaller differences). For example, suppose that we are planning a study to compare the effects of two alternative teaching methods on the performance of students in an examination where possible scores are between 0 and 100. The null hypothesis is that average results are the same for students taught with each method. It is decided that we want enough data to be able to reject this with high probability if the true difference \\(\\Delta\\) of the average exam scores between the two groups is larger than 5 points, i.e. \\(\\Delta&lt;-5\\) or \\(\\Delta&gt;5\\). The power calculation might then answer questions like What is the smallest sample size for which the probability of rejecting \\(H_{0}: \\Delta=0\\) is at least 0.9, when the true value of \\(\\Delta\\) is smaller than \\(-5\\) or larger than 5? The largest sample sizes we can afford are 1000 in both groups, i.e. \\(n_{1}=n_{2}=1000\\). What is the probability this gives us of rejecting \\(H_{0}: \\Delta=0\\) when the true value of \\(\\Delta\\) is smaller than \\(-5\\) or larger than 5? To answer these questions, we would also need a rough guess of the population standard deviations \\(\\sigma_{1}\\) and \\(\\sigma_{2}\\), perhaps obtained from previous studies. Such calculations employ further mathematical results for test statistics, essentially using their sampling distributions under specific alternative hypotheses. The details are, however, beyond the scope of this course. 7.6.3 Significance vs. importance The \\(P\\)-value is a measure of the strength of evidence the data provide against the null hypothesis. This is not the same as the magnitude of the difference between sample estimates and the null hypothesis, or the practical importance of such differences. As noted above, the power of a test increases with increasing sampling size. One implication of this is that when \\(n\\) is large, even quite small observed deviations from the values that correspond exactly to the null hypothesis will be judged to be statistically significant. Consider, for example, the two dietary variables in Table 7.4. The sample mean of the fat variable is 35.3, which is significantly different (at the 5% level of significance) from \\(\\mu_{0}\\) of 35. It is possible, however, that a difference of 0.3 might be considered unimportant in practice. In contrast, the sample mean of the fruit and vegetable variable is 2.8, and the difference from \\(\\mu_{0}\\) of 5 seems not only strongly significant but also large for most practical purposes. In contrast to the large-sample case, in small samples even quite large apparent deviations from the null hypothesis might still result in a large \\(P\\)-value. For example, in a very small study a sample mean of the fat variable of, say, 30 or even 50 might not be interpreted as sufficiently strong evidence against a population mean of 35. This is obviously related to the discussion of statistical power in the previous section, in that it illustrates what happens when the sample is too small to provide enough information for useful conclusions. In these and all other cases, decisions about what is or is not of practical importance are subject-matter questions rather than statistical ones, and would have to be based on information about the nature and implications of the variables in question. In our dietary examples this would involve at least medical considerations, and perhaps also financial implications of the public health costs of the observed situation or of possible efforts of trying to change it. Conducted for the Food Standards Agency and the Department of Health by ONS and MRC Human Nutrition Research. The sample statistics used here are from the survey reports published by HMSO in 2002-04, aggregating results published separately for men and women. The standard errors have been adjusted for non-constant sampling probabilities using design factors published in the survey reports. We will treat these numbers as if they were from a simple random sample.↩ The data were obtained from the UK Data Archive. Three respondents with outlying values of the housework variable (two women and one man, with 50, 50 and 70 reported weekly hours) have been omitted from the analysis considered here.↩ Boyanowsky, E. O. and Griffiths, C. T. (1982). “Weapons and eye contact as instigators or inhibitors of aggressive arousal in police-citizen interaction”. Journal of Applied Social Psychology, 12, 398–407.↩ Miller, B. C. and Sollie, D. L. (1980). “Normal stresses during the transition to parenthood”. Family Relations, 29, 459–465. See the article for further information, including results for the mothers.↩ Scanlon, T. J. et al. (1993). “Is Friday the 13th bad for your health?”. British Medical Journal, 307, 1584–1586. The data were obtained from The Data and Story Library at Carnegie Mellon University (lib.stat.cmu.edu/DASL).↩ In this case this is a consquence of the fact that the sample sizes (67 and 66) in the two groups are very similar. When they are exactly equal, formulas ((7.11))–((7.12)) and ((7.14)) actually give exactly the same value for the standard error \\(\\hat{\\sigma}_{\\hat{\\Delta}}\\), and \\(t\\) is thus also the same for both variants of the test.↩ The output also shows, under “Levene’s test”, a test statistic and \\(P\\)-value for testing the hypothesis of equal standard deviations (\\(H_{0}: \\, \\sigma_{1}=\\sigma_{2}\\)). However, we prefer not to rely on this because the test requires the additional assumption that the population distributions are normal, and is very sensitive to the correctness of this assumption.↩ In the MY464 examination and homework, for example, both variants of the test are equally acceptable, unless a question explicitly states otherwise.↩ Student (1908). “The probable error of a mean”. Biometrika 6, 1–25.↩ The two are the same because \\(\\mu_{0}\\) in \\(\\hat{\\Delta}=\\bar{Y}-\\mu_{0}\\) is a known number rather a data-dependent statistic, which means that it does not affect the standard error.↩ Except that SPSS uses the multiplier from \\(t_{1723}\\) distribution rather than the normal distribution. This makes no difference here, as the former is 1.961 and the latter 1.960.↩ "],["c-regression.html", "Chapter 8 Linear regression models 8.1 Introduction 8.2 Describing association between two continuous variables 8.3 Simple linear regression models 8.4 Interlude: Association and causality 8.5 Multiple linear regression models 8.6 Including categorical explanatory variables 8.7 Other issues in linear regression modelling", " Chapter 8 Linear regression models 8.1 Introduction This chapter continues the theme of analysing statistical associations between variables. The methods described here are appropriate when the response variable \\(Y\\) is a continuous, interval level variable. We will begin by considering bivariate situations where the only explanatory variable \\(X\\) is also a continuous variable. Section 8.2 first discusses graphical and numerical descriptive techniques for this case, focusing on two very commonly used tools: a scatterplot of two variables, and a measure of association known as the correlation coefficient. Section 8.3 then describes methods of statistical inference for associations between two continuous variables. This is done in the context of a statistical model known as the simple linear regression model. The ideas of simple linear regression modelling can be extended to a much more general and powerful set methods known as multiple linear regression models. These can have several explanatory variables, which makes it possible to examine associations between any explanatory variable and the response variable, while controlling for other explanatory variables. An important reason for the usefulness of these models is that they play a key role in statistical analyses which correspond to research questions that are causal in nature. As an interlude, we discuss issues of causality in research design and analysis briefly in Section 8.4. Multiple linear models are then introduced in Section 8.5. The models can also include categorical explanatory variables with any number of categories, as explained in Section 8.6. The following example will be used for illustration throughout this chapter: Example 8.1: Indicators of Global Civil Society The Global Civil Society 2004/5 yearbook gives tables of a range of characteristics of the countries of the world.39 The following measures will be considered in this chapter: Gross Domestic Product (GDP) per capita in 2001 (in current international dollars, adjusted for purchasing power parity) Income level of the country in three groups used by the Yearbook, as Low income, Middle income or High income Income inequality measured by the Gini index (with 0 representing perfect equality and 100 perfect inequality) A measure of political rights and civil liberties in 2004, obtained as the average of two indices for these characteristics produced by the Freedom House organisation (1 to 7, with higher values indicating more rights and liberties) World Bank Institute’s measure of control of corruption for 2002 (with high values indicating low levels of corruption) Net primary school enrolment ratio 2000-01 (%) Infant mortality rate 2001 (% of live births) We will discuss various associations between these variables. It should be noted that the analyses are mainly illustrative examples, and the choices of explanatory and response variables do not imply any strong claims about causal connections between them. Also, the fact that different measures refer to slightly different years is ignored; in effect, we treat each variable as a measure of “recent” situation in the countries. The full data set used here includes 165 countries. Many of the variables are not available for all of them, so most of the analyses below use a smaller number of countries. 8.2 Describing association between two continuous variables 8.2.1 Introduction Suppose for now that we are considering data on two continuous variables. The descriptive techniques discussed in this section do not strictly speaking require a distinction between an explanatory variable and a response variable, but it is nevertheless useful in many if not most applications. We will reflect this in the notation by denoting the variables \\(X\\) (for the explanatory variable) and \\(Y\\) (for the response variable). The observed data consist of the pairs of observations \\((X_{1}, Y_{1}), (X_{2}, Y_{2}), \\dots, (X_{n}, Y_{n})\\) of \\(X\\) and \\(Y\\) for each of the \\(n\\) subjects in a sample, or, with more concise notation, \\((X_{i}, Y_{i})\\) for \\(i=1,2,\\dots,n\\). We are interested in analysing the association between \\(X\\) and \\(Y\\). Methods for describing this association in the sample are first described in this section, initially with some standard graphical methods in Section 8.2.2. This leads to a discussion in Section 8.2.3 of what we actually mean by associations in this context, and then to a definion of numerical summary measures for such associations in Section 8.2.4. Statistical inference for the associations will be considered in Section 8.3. 8.2.2 Graphical methods Scatterplots The standard statistical graphic for summarising the association between two continuous variables is a scatterplot. An example of it is given in Figure 8.1, which shows a scatterplot of Control of corruption against GDP per capita for 61 countries for which the corruption variable is at least 60 (the motivation of this restriction will be discussed later). The two axes of the plot show possible values of the two variables. The horizontal axis, here corresponding to Control of corruption, is conventionally used for the explanatory variable \\(X\\), and is often referred to as the X-axis. The vertical axis, here used for GDP per capita, then corresponds to the response variable \\(Y\\), and is known as the Y-axis. Figure 8.1: A scatterplot of Control of corruption vs. GDP per capita in the Global Civil Society data set, for 61 countries with Control of corruption at least 60. The dotted lines are drawn to the point corresponding to the United Kingdom. The observed data are shown as points in the scatterplot, one for each of the \\(n\\) units. The location of each point is determined by its values of \\(X\\) and \\(Y\\). For example, Figure 8.1 highlights the observation for the United Kingdom, for which the corruption measure (\\(X\\)) is 94.3 and GDP per capita (\\(Y\\)) is $24160. The point for UK is thus placed at the intersection of a vertical line drawn from 94.3 on the \\(X\\)-axis and a horizontal line from 24160 on the \\(Y\\)-axis, as shown in the plot. The principles of good graphical presentation on clear labelling, avoidance of spurious decoration and so on (c.f. Section 2.8) are the same for scatterplots as for any statistical graphics. Because the crucial visual information in a scatterplot is the shape of the cloud of the points, it is now often not necessary for the scales of the axes to begin at zero, especially if this is well outside the ranges of the observed values of the variables (as it is for the \\(X\\)-axis of Figure 8.1). Instead, the scales are typically selected so that the points cover most of the plotting surface. This is done by statistical software, but there are many situations were it is advisable to overrule the automatic selection (e.g. for making scatterplots of the same variables in two different samples directly comparable). The main purpose of a scatterplot is to examine possible associations between \\(X\\) and \\(Y\\). Loosely speaking, this means considering the shape and orientation of the cloud of points in the graph. In Figure 8.1, for example, it seems that most of the points are in a cluster sloping from lower left to upper right. This indicates that countries with low levels of Control of corruption (i.e. high levels of corruption itself) tend to have low GDP per capita, and those with little corruption tend to have high levels of GDP. A more careful discussion of such associations again relates them to the formal definition in terms of conditional distributions, and also provides a basis for the methods of inference introduced later in this chapter. We will resume the discussion of these issues in Section 8.2.3 below. Before that, however, we will digress briefly from the main thrust of this chapter in order to describe a slightly different kind of scatterplot. Line plots for time series A very common special case of a scatterplot is one where the observations correspond to measurements of a variable for the same unit at several occasions over time. This is illustrated by the following example (another one is Figure 2.9): Example: Changes in temperature, 1903–2004 Figure 8.2 summarises data on average annual temperatures over the past century in five locations. The data were obtained from the GISS Surface Temperature (GISTEMP) database maintained by the NASA Goddard Institute for Space Studies.40 The database contains time series of average monthly surface temperatures from several hundred meterological stations across the world. The five sites considered here are Haparanda in Northern Sweden, Independence, Kansas in the USA, Choshi on the east coast of Japan, Kimberley in South Africa, and the Base Orcadas Station on Laurie Island, off the coast of Antarctica. These were chosen rather haphazardly for this illustration, with the aim of obtaining a geographically scattered set of rural or small urban locations (to avoid issues with the heating effects of large urban areas). The temperature for each year at each location is here recorded as the difference from the temperature at that location in 1903.41 Figure 8.2: Changes of average annual temperature (11-year moving averages) from 1903 in five locations. See the text for further details. Source: The GISTEMP database &lt;data.giss.nasa.gov/gistemp/&gt; Consider first the data for Haparanda only. Here we have two variables, year and temperature, and 102 pairs of observations of them, one for each year between 1903 and 2004. These pairs could now be plotted in a scatterplot as described above. Here, however, we can go further to enhance the visual effect of the plot. This is because the observations represent measurements of a variable (temperature difference) for the same unit (the town of Haparanda) at several successive times (years). These 102 measurements form a time series of temperature differences for Haparanda over 1903–2004. A standard graphical trick for such series is to connect the points for successive times by lines, making it easy for the eye to follow the changes over time in the variable on the \\(Y\\)-axis. In Figure 8.2 this is done for Haparanda using a solid line. Note that doing this would make no sense for scatter plots like the one in Figure 8.1, because all the points there represent different subjects, in that case countries. We can easily include several such series in the same graph. In Figure 8.2 this is done by plotting the temperature differences for each of the five locations using different line styles. The graph now summarises data on three variables, year, temperature and location. We can then examine changes over time for any one location, but also compare patterns of changes between them. Here there is clearly much variation within and between locations, but also some common features. Most importantly, the temperatures have all increased over the past century. In all five locations the average annual temperatures at the end of the period were around 1–2\\(^{\\circ}\\)C higher than in 1903. A set of time series like this is an example of dependent data in the sense discussed in Section 7.5. There we considered cases with pairs of observations, where the two observations in each pair had to be treated as statistically dependent. Here all of the temperature measurements for one location are dependent, probably with strongest dependence between adjacent years and less dependence between ones further apart. This means that we will not be able to analyse these data with the methods described later in this chapter, because these assume statistically independent observations. Methods of statistical modelling and inference for dependent data of the kind illustrated by the temperature example are beyond the scope of this course. This, however, does not prevent us from using a plot like Figure 8.2 to describe such data. 8.2.3 Linear associations Consider again statistically independent observations of \\((X_{i}, Y_{i})\\), such as those displayed in Figure 8.1. Recall the definition that two variables are associated if the conditional distribution of \\(Y\\) given \\(X\\) is different for different values of \\(X\\). In the two-sample examples of Chapter 7 this could be examined by comparing two conditional distributions, since \\(X\\) had only two possible values. Now, however, \\(X\\) has many (in principle, infinitely many) possible values, so we will need to somehow define and compare conditional distributions given each of them. We will begin with a rather informal discussion of how this might be done. This will lead directly to a more precise and formal definition introduced in Section 8.3. Figure 8.3: The same scatterplot of Control of corruption vs. GDP per capita as in Figure 8.1, augmented by the best-fitting (least squares) straight line (solid line) and reference lines for two example values of Control of corruption (dotted lines). Figure 8.3 shows the same scatterplot as Figure 8.1. Consider first one value of \\(X\\) (Control of corruption), say 65. To get a rough idea of the conditional distribution of \\(Y\\) (GDP per capita) given this value of \\(X\\), we could examine the sample distribution of the values of \\(Y\\) for the units for which the value of \\(X\\) is close to 65. These correspond to the points near the vertical line drawn at \\(X=65\\) in Figure 8.3. This can be repeated for any value of \\(X\\); for example, Figure 8.3 also includes a vertical reference line at \\(X=95\\), for examining the conditional distribution of \\(Y\\) given \\(X=95\\).42 As in Chapter 7, associations between variables will here be considered almost solely in terms of differences in the means of the conditional distributions of \\(Y\\) at different values of \\(X\\). For example, Figure 8.3 suggests that the conditional mean of \\(Y\\) when X is 65 is around or just under 10000. At \\(X=95\\), on the other hand, the conditional mean seems to be between 20000 and 25000. The mean of \\(Y\\) is thus higher at the larger value of X. More generally, this finding is consistent across the scatterplot, in that the conditional mean of \\(Y\\) appears to increase when we consider increasingly large values of \\(X\\), indicating that higher levels of Control of corruption are associated with higher average levels of GDP. This is often expressed by saying that the conditional mean of \\(Y\\) increases when we “increase” \\(X\\).43 This is the sense in which we will examine associations between continuous variables: does the conditional mean of \\(Y\\) change (increase or decrease) when we increase \\(X\\)? If it does, the two variables are associated; if it does not, there is no association of this kind. This definition also agrees with the one linking association with prediction: if the mean of \\(Y\\) is different for different values of \\(X\\), knowing the value of \\(X\\) will clearly help us in making predictions about likely values of \\(Y\\). Based on the information in Figure 8.3, for example, our best guesses of the GDPs of two countries would clearly be different if we were told that the control of corruption measure was 65 for one country and 95 for the other. The nature of the association between \\(X\\) and \\(Y\\) is characterised by how the values of \\(Y\\) change when \\(X\\) increases. First, it is almost always reasonable to conceive these changes as reasonably smooth and gradual. In other words, if two values of \\(X\\) are close to each other, the conditional means of \\(Y\\) will be similar too; for example, if the mean of \\(Y\\) is 5 when \\(X=10\\), its mean when \\(X=10.01\\) is likely to be quite close to 5 rather than, say, 405. In technical terms, this means that the conditional mean of \\(Y\\) will be described by a smooth mathematical function of \\(X\\). Graphically, the means of \\(Y\\) as \\(X\\) increases will then trace a smooth curve in the scatterplot. The simplest possibility for such a curve is a straight line. This possibility is illustrated by plot (a) of Figure 8.4 (this and the other five plots in the figure display artificial data, generated for this illustration). Here all of the points fall on a line, so that when \\(X\\) increases, the values of \\(Y\\) increase at a constant rate. A relationship like this is known as a linear association between \\(X\\) and \\(Y\\). Linear associations are the starting point for examining associations between continuous variables, and often the only ones considered. In this chapter we too will focus almost completely on them. Figure 8.4: Scatterplots of artificial data sets of two variables. Each plot also shows the best-fitting (least squares) straight line and the correlation coefficient \\(r\\). In plot (a) of Figure 8.4 all the points are exactly on the straight line. This indicates a perfect linear association, where \\(Y\\) can be predicted exactly if \\(X\\) is known, so that the association is deterministic. Such a situation is neither realistic in practice, nor necessary for the association to be described as linear. All that is required for the latter is that the conditional means of \\(Y\\) given different values of \\(X\\) fall (approximately) on a straight line. This is illustrated by plot (b) of Figure 8.4, which shows a scatterplot of individual observations together with an approximation of the line of the means of \\(Y\\) given \\(X\\) (how the line was drawn will be explained later). Here the linear association is not perfect, as the individual points are not all on the same line but scattered around it. Nevertheless, the line seems to capture an important systematic feature of the data, which is that the average values of \\(Y\\) increase at an approximately constant rate as \\(X\\) increases. This combination of systematic and random elements is characteristic of all statistical associations, and it is also central to the formal setting for statistical inference for linear associations described in Section 8.3 below. The direction of a linear association can be either positive or negative. Plots (a) and (b) of Figure 8.4 show a positive association, because increasing \\(X\\) is associated with increasing average values of \\(Y\\). This is indicated by the upward slope of the line describing the association. Plot (c) shows an example of a negative association, where the line slopes downwards and increasing values of \\(X\\) are associated with decreasing values of \\(Y\\). The third possibility, illustrated by plot (d), is that the line slopes neither up nor down, so that the mean of \\(Y\\) is the same for all values of \\(X\\). In this case there is no (linear) association between the variables. Not all associations between continuous variables are linear, as shown by the remaining two plots of Figure 8.4. These illustrate two kinds of nonlinear associations. In plot (e), the association is still clearly monotonic, meaning that average values of \\(Y\\) change in the same direction — here increase — when \\(X\\) increases. The rate of this increase, however, is not constant, as indicated by the slightly curved shape of the cloud of points. The values of \\(Y\\) seem to increase faster for small values of \\(X\\) than for large ones. A straight line drawn through the scatterplot captures the general direction of the increase, but misses its nonlinearity. One practical example of such a relationship is the one between years of job experience and salary: it is often found that salary increases fastest early on in a person’s career and more slowly later on. Plot (f) shows a nonlinear and nonmonotonic relationship: as \\(X\\) increases, average values of \\(Y\\) first decrease to a minimum, and then increase again, resulting in a U-shaped scatterplot. A straight line is clearly an entirely inadequate description of such a relationship. A nonmonotonic association of this kind might be seen, for example, when considering the dependence of the failure rates of some electrical components (\\(Y\\)) on their age (\\(X\\)). It might then be that the failure rates were high early (from quick failures of flawed components) and late on (from inevitable wear and tear) and lowest in between for “middle-aged but healthy” components. Figure 8.5: A scatterplot of Control of corruption vs. GDP per capita for 163 countries in the Global Civil Society data set. The solid line is the best-fitting (least squares) straight line for the points. Returning to real data, recall that we have so far considered control of corruption and GDP per capita only among countries with a Control of corruption score of at least 60. The scatterplot for these, shown in Figure 8.3, also includes a best-fitting straight line. The observed relationship is clearly positive, and seems to be fairly well described by a straight line. For countries with relatively low levels of corruption, the association between control of corruption and GDP can be reasonably well characterised as linear. Consider now the set of all countries, including also those with high levels of corruption (scores of less than 60). In a scatterplot for them, shown in Figure 8.5, the points with at least 60 on the \\(X\\)-axis are the same as those in Figure 8.3, and the new points are to the left of them. The plot now shows a nonlinear relationship comparable to the one in plot (e) of Figure 8.4. The linear relationship which was a good description for the countries considered above is thus not adequate for the full set of countries. Instead, it seems that the association is much weaker for the countries with high levels of corruption, essentially all of which have fairly low values of GDP per capita. The straight line fitted to the plot identifies the overall positive association, but cannot describe its nonlinearity. This example further illustrates how scatterplots can be used to examine relationships between variables and to assess whether they can be best described as linear or nonlinear associations.44 So far we have said nothing about how the exact location and direction of the straight lines shown in the figures have been selected. These are determined so that the fitted line is in a certain sense the best possible one for describing the data in the scatterplot. Because the calculations needed for this are also (and more importantly) used in the context of statistical inference for such data, we will postpone a description of them until Section 8.3.4. For now we can treat the line simply as a visual summary of the linear association in a scatterplot. 8.2.4 Measures of association: covariance and correlation A scatterplot is a very powerful tool for examining sample associations of pairs of variables in detail. Sometimes, however, this is more than we really need for an initial summary of a data set, especially if there are many variables and thus many possible pairs of them. It is then convenient also to be able to summarise each pairwise association using a single-number measure of association. This section introduces the correlation coefficient, the most common such measure for continuous variables. It is a measure of the strength of linear associations of the kind defined above. Suppose that we consider two variables, denoted \\(X\\) and \\(Y\\). This again implies a distinction between an explanatory and a response variable, to maintain continuity of notation between different parts of this chapter. The correlation coefficient itself, however, is completely symmetric, so that its value for a pair of variables will be the same whether or not we treat one or the other of them as explanatory for the other. First, recall from equation of standard deviation towards the end of Section 2.6.2 that the sample standard deviations of the two variables are calculated as \\[\\begin{equation} s_{x} = \\sqrt{\\frac{\\sum(X_{i}-\\bar{X})^{2}}{n-1}} \\text{and} s_{y} = \\sqrt{\\frac{\\sum (Y_{i}-\\bar{Y})^{2}}{n-1}} \\tag{8.1} \\end{equation}\\] where the subscripts \\(x\\) and \\(y\\) identify the two variables, and \\(\\bar{X}\\) and \\(\\bar{Y}\\) are their sample means. A new statistic is the sample covariance between \\(X\\) and \\(Y\\), defined as \\[\\begin{equation} s_{xy} = \\frac{\\sum (X_{i}-\\bar{X})(Y_{i}-\\bar{Y})}{n-1}. \\tag{8.2} \\end{equation}\\] This is a measure of linear association between \\(X\\) and \\(Y\\). It is positive if the sample association is positive and negative if the association is negative. In theoretical statistics, covariance is the fundamental summary of sample and population associations between two continuous variables. For descriptive purposes, however, it has the inconvenient feature that its magnitude depends on the units in which \\(X\\) and \\(Y\\) are measured. This makes it difficult to judge whether a value of the covariance for particular variables should be regarded as large or small. To remove this complication, we can standardise the sample covariance by dividing it by the standard deviations, to obtain the statistic \\[\\begin{equation} r=\\frac{s_{xy}}{s_{x}s_{y}} = \\frac{\\sum (X_{i}-\\bar{X})(Y_{i}-\\bar{Y})}{\\sqrt{\\sum\\left(X_{i}-\\bar{X}\\right)^{2} \\sum\\left(Y_{i}-\\bar{Y}\\right)^{2}}}. \\tag{8.3} \\end{equation}\\] This is the (sample) correlation coefficient, or correlation for short, between \\(X\\) and \\(Y\\). It is also often (e.g. in SPSS) known as Pearson’s correlation coefficient after Karl Pearson (of the \\(\\chi^{2}\\) test, see first footnote in Chapter 4), although both the word and the statistic are really due to Sir Francis Galton.45 The properties of the correlation coefficient can be described by going through the same list as for the \\(\\gamma\\) coefficient in Section 2.4.5. While doing so, it is useful to refer to the examples in Figure 8.4, where the correlations are also shown. Sign: Correlation is positive if the linear association between the variables is positive, i.e. if the best-fitting straight line slopes upwards (as in plots a, b and e) and negative if the association is negative (c). A zero correlation indicates complete lack of linear association (d and f). Extreme values: The largest possible correlation is \\(+1\\) (plot a) and the smallest \\(-1\\), indicating perfect positive and negative linear associations respectively. More generally, the magnitude of the correlation indicates the strength of the association, so that the closer to \\(+1\\) or \\(-1\\) the correlation is, the stronger the association (e.g. compare plots a–d). It should again be noted that the correlation captures only the linear aspect of the association, as illustrated by the two nonlinear cases in Figure 8.4. In plot (e), there is curvature but also a strong positive trend, and the latter is reflected in a fairly high correlation. In plot (f), the trend is absent and the correlation is 0, even though there is an obvious nonlinear relationship. Thus the correlation coefficient is a reasonable initial summary of the strength of association in (e), but completely misleading in (f). Formal interpretation: The correlation coefficient cannot be interpreted as a Proportional Reduction in Error (PRE) measure, but its square can. The latter statistic, so-called coefficient of determination or \\(R^{2}\\), is described in Section 8.3.3. Substantive interpretation: As with any measure of association, the question of whether a particular sample correlation is high or low is not a purely statistical question, but depends on the nature of the variables. This can be judged properly only with the help of experience of correlations between similar variables in different contexts. As one very rough rule thumb it might be said that in many social science contexts correlations greater than 0.4 (or smaller than \\(-0.4\\)) would typically be considered noteworthy and ones greater than 0.7 quite strong. Returning to real data, Table 8.1 shows the correlation coefficients for all fifteen distinct pairs of the six continuous variables in the Global Civil Society data set mentioned in Example 8.1. This is an example of a correlation matrix, which is simply a table with the variables as both its rows and columns, and the correlation between each pair of variables given at the intersection of corresponding row and column. For example, the correlation of GDP per capita and School enrolment is here 0.42. This is shown at the intersection of the first row (GDP) and fifth column (School enrolment), and also of the fifth row and first column. In general, every correlation is shown twice in the matrix, once in its upper triangle and once in the lower. The triangles are separated by a list of ones on the diagonal of the matrix. This simply indicates that the correlation of any variable with itself is 1, which is true by definition and thus of no real interest. Table 8.1: Correlation matrix of six continuous variables in the Global Civil Society data set. See Example 8.1 for more information on the variables. Variable GDP Gini Pol.  Corrupt.  School IMR GDP per capita [GDP ] 1 -0.39 0.51 0.77 0.42 -0.62 Income inequality [Gini ] -0.39 1 -0.15 -0.27 -0.27 0.42 Political rights [Pol. ] 0.51 -0.15 1 0.59 0.40 -0.44 Control of corruption [Corrupt. ] 0.77 -0.27 0.59 1 0.41 -0.64 School enrolment [School ] 0.42 -0.27 0.40 0.41 1 -0.73 Infant mortality [IMR ] -0.62 0.42 -0.44 -0.64 -0.73 1 All of the observed associations in this example are in unsurprising directions. For example, School enrolment is positively correlated with GDP, Political rights and Control of corruption, and negatively correlated with Income inequality and Infant mortality. In other words, countries with large percentages of children enrolled in primary school tend to have high levels of GDP per capita and of political rights and civil liberties, and low levels of corruption, income inequality and infant mortality. The strongest associations in these data are between GDP per capita and Control of corruption (\\(r=0.77\\)) and School enrolment and Infant mortality rate (\\(r=-0.73\\)), and the weakest between Income inequality on the one hand and Political rights, Control of corruption and School enrolment on the other (correlations of \\(-0.15\\), \\(-0.27\\) and \\(-0.27\\) respectively). These correlations describe only the linear element of sample associations, but give no hint of any nonlinear ones. For example, the correlation of 0.77 between GDP and Control of corruption summarises the way the observations cluster around the straight line shown in Figure 8.5. The correlation is high because this increase in GDP as Control of corruption increases is quite strong, but it gives no indication of the nonlinearity of the association. A scatterplot is needed for revealing this feature of the data. The correlation for the restricted set of countries shown in Figure 8.3 is 0.82. A correlation coefficient can also be defined for the joint population distribution of two variables. The sample correlation \\(r\\) can then be treated as an estimate of the population correlation, which is often denoted by \\(\\rho\\) (the lower-case Greek “rho”). Statistical inference for the population correlation can also be derived. For example, SPSS automatically outputs significance tests for the null hypothesis that \\(\\rho\\) is 0, i.e. that there is no linear association between \\(X\\) and \\(Y\\) in the population. Here, however, we will not discuss this, choosing to treat \\(r\\) purely as a descriptive sample statistic. The next section provides a different set of tools for inference on population associations. 8.3 Simple linear regression models 8.3.1 Introduction The rest of this course is devoted to the method of linear regression modelling. Its purpose is the analysis of associations in cases where the response variable is a continuous, interval level variable, and the possibly several explanatory variables can be of any type. We begin in this section with simple linear regression, where there is only one explanatory variable. We will further assume that this is also continuous. The situation considered here is thus the same as in the previous section, but here the focus will be on statistical inference rather than description. Most of the main concepts of linear regression can be introduced in this context. Those that go beyond it are described in subsequent sections. Section 8.5 introduces multiple regression involving more than one explanatory variable. The use of categorical explanatory variables in such models is explained in Section 8.6. Finally, Section 8.7 gives a brief review of some further aspects of linear regression modelling which are not covered on this course. Example: Predictors of Infant Mortality Rate The concepts of linear regression models will be illustrated as they are introduced with a second example from the Global Civil Society data set. The response variable will now be Infant Mortality Rate (IMR). This is an illuminating outcome variable, because it is a sensitive and unquestionably important reflection of a country’s wellbeing; whatever we mean by “development”, it is difficult to disagree that high levels of it should coincide with low levels of infant mortality. We will initially consider only one explanatory variable, Net primary school enrolment ratio, referred to as “School enrolment” for short. This is defined as the percentage of all children of primary school age who are enrolled in school. Enrolment numbers and the population size are often obtained from different official sources, which sometimes leads to discrepancies. In particular, School enrolment for several countries is recorded as over 100, which is logically impossible. This is an illustration of the kinds of measurement errors often affecting variables in the social sciences. We will use the School enrolment values as recorded, even though they are known to contain some error. A scatterplot of IMR vs. School enrolment is shown in Figure 8.6, together with the best-fitting straight line. Later we will also consider three additional explanatory variables: Control of corruption, Income inequality and Income level of the country in three categories (c.f. Example 8.1). For further reference, Table 8.2 shows various summary statistics for these variables. Throughout, the analyses are restricted to those 111 countries for which all of the five variables are recorded. For this reason the correlations in Table 8.2 differ slightly from those in Table 8.1, where each correlation was calculated for all the countries with non-missing values of that pair of variables. Figure 8.6: A scatterplot of net primary school enrolment ratio vs. Infant mortality rate for countries in the Global Civil Society data set (\\(n=111\\)). The solid line is the best-fitting (least squares) straight line for the points. Table 8.2: Summary statistics for Infant Mortality Rate (IMR) and explanatory variables for it considered in the examples of Sections 8.3 and 8.5 (\\(n=111\\)). See Example 8.1 for further information on the variables. IMR School enrolment Control of corruption Income inequality Summary statistics Mean 4.3 86.1 50.1 40.5 std. deviation 4.0 16.7 28.4 10.2 Minimum 0.3 30.0 3.6 24.4 Maximum 15.6 109.0 100.0 70.7 Correlation matrix IMR 1 -0.75 -0.60 0.39 School enrolment -0.75 1 0.39 -0.27 Control of corruption -0.60 0.39 1 -0.27 Income inequality 0.39 -0.27 -0.27 1 Means for countries in different income categories Low income (\\(n=41\\)) 8.2 72.1 27.5 41.7 Middle income (\\(n=48\\)) 2.8 92.5 50.8 43.3 High income (\\(n=22\\)) 0.5 98.4 90.7 32.0 8.3.2 Definition of the model The simple linear regression model defined in this section is a statistical model for a continuous, interval level response variable \\(Y\\) given a single explanatory variable \\(X\\), such as IMR given School enrolment. The model will be used to carry out statistical inference on the association between the variables in a population (which in the IMR example is clearly again of the conceptual variety). For motivation, recall first the situation considered in Section 7.3. There the data consisted of observations \\((Y_{i}, X_{i})\\) for \\(i=1,2,\\dots,n\\), which were assumed to be statistically independent. The response variable \\(Y\\) was continuous but \\(X\\) had only two possible values, coded 1 and 2. A model was then set up where the population distribution of \\(Y\\) had mean \\(\\mu_{1}\\) and variance \\(\\sigma^{2}_{1}\\) for units with \\(X=1\\), and mean \\(\\mu_{2}\\) and variance \\(\\sigma^{2}_{2}\\) when \\(X=2\\). In some cases it was further assumed that the population distributions were both normal, and that the population variances were equal, i.e. that \\(\\sigma^{2}_{1}=\\sigma^{2}_{2}\\), with their common value denoted \\(\\sigma^{2}\\). With these further assumptions, which will also be used here, the model for \\(Y\\) given a dichotomous \\(X\\) stated that (1) observations for different units \\(i\\) were statistically independent; (2) each \\(Y_{i}\\) was sampled at random from a population distribution which was normal with mean \\(\\mu_{i}\\) and variance \\(\\sigma^{2}\\); and (3) \\(\\mu_{i}\\) depended on \\(X_{i}\\) so that it was equal to \\(\\mu_{1}\\) if \\(X_{i}\\) was 1 and \\(\\mu_{2}\\) if \\(X_{i}\\) was 2. The situation in this section is exactly the same, except that \\(X\\) is now continuous instead of dichotomous. We will use the same basic model, but will change the specification of the conditional mean \\(\\mu_{i}\\) appropriately. In the light of the discussion in previous sections of this chapter, it is no surprise that this will be defined in such a way that it describes a linear association between \\(X\\) and \\(Y\\). This is done by setting \\(\\mu_{i}=\\alpha+\\beta X_{i}\\), where \\(\\alpha\\) and \\(\\beta\\) are unknown population parameters. This is the equation of straight line (we will return to it in the next section). With this specification, the model for observations \\((Y_{1},X_{1}), (Y_{2}, X_{2}), \\dots, (Y_{n}, X_{n})\\) becomes Observations for different units \\(i\\) (\\(=1,2,\\dots,n\\)) are statistically independent. Each \\(Y_{i}\\) is normally distributed with mean \\(\\mu_{i}\\) and variance \\(\\sigma^{2}\\). The means \\(\\mu_{i}\\) depend on \\(X_{i}\\) through \\(\\mu_{i}=\\alpha+\\beta X_{i}\\). Often the model is expressed in an equivalent form where 2. and 3. are combined as \\[\\begin{equation} Y_{i}=\\alpha+\\beta X_{i} +\\epsilon_{i} \\tag{8.4} \\end{equation}\\] where each \\(\\epsilon_{i}\\) is normally distributed with mean 0 and variance \\(\\sigma^{2}\\). The \\(\\epsilon_{i}\\) are known as error terms or population residuals (and the letter \\(\\epsilon\\) is the lower-case Greek “epsilon”). This formulation of the model clearly separates the mean of \\(Y_{i}\\), which traces the straight line \\(\\alpha+\\beta X_{i}\\) as \\(X_{i}\\) changes, from the variation around that line, which is described by the variability of \\(\\epsilon_{i}\\). The model defined above is known as the simple linear regression model: Simple because it has only one explanatory variable, as opposed to multiple linear regression models which will have more than one. Linear because it specifies a linear association between \\(X\\) and \\(Y\\).46 Regression: This is now an established part of the name of the model, although the origins of the word are not central to the use of the model.47 Model, because this is a statistical model in the sense discussed in the middle of Section 6.3.1. In other words, the model is always only a simplified abstraction of the true, immeasurably complex processes which determine the values of \\(Y\\). Nevertheless, it is believed that a well-chosen model can be useful for explaining and predicting observed values of \\(Y\\). This spirit is captured by the well-known statement by the statistician George Box:48 All models are wrong, but some are useful. A model like this has the advantage that it reduces the examination of associations in the population to estimation and inference on a small number of model parameters, in the case of the simple linear regression model just \\(\\alpha\\), \\(\\beta\\) and \\(\\sigma^{2}\\). Of course, not all models are equally appropriate for given data, and some will be both wrong and useless. The results from a model should thus be seriously presented and interpreted only if the model is deemed to be reasonably adequate. For the simple linear regression model, this can be partly done by examining whether the scatterplot between \\(X\\) and \\(Y\\) appears to be reasonably consistent with a linear relationship. Some further comments on the assessment of model adequacy will be given in Section 8.7. 8.3.3 Interpretation of the model parameters The simple linear regression model ((8.4)) has three parameters, \\(\\alpha\\), \\(\\beta\\) and \\(\\sigma^{2}\\). Each of these has its own interpretation, which are explained in this section. Sometimes it will be useful to illustrate the definition with specific numerical values, for which we will use ones for the model for IMR given School enrolment in our example. SPSS output for this model is shown in Figure 8.7. Note that although these values are first used here to illustrate the interpretation of the population parameters in the model, they are of course only estimates (of a kind explained in the next section) of those parameters. Other parts of the SPSS output will be explained later in this chapter. Figure 8.7: SPSS output for a simple linear regression model for Infant mortality rate given School enrolment in the Global Civil Society data. According to the model, the conditional mean (also often known as the conditional expected value) of \\(Y\\) given \\(X\\) in the population is (dropping the subscript \\(i\\) for now for notational simplicity) \\(\\mu=\\alpha+\\beta X\\). The two parameters \\(\\alpha\\) and \\(\\beta\\) in this formula are known as regression coefficients. They are interpreted as follows: \\(\\alpha\\) is the expected value of \\(Y\\) when \\(X\\) is equal to 0. It is known as the intercept or constant term of the model. \\(\\beta\\) is the change in the expected value of \\(Y\\) when \\(X\\) increases by 1 unit. It is known as the slope term or the coefficient of \\(X\\). Just to include one mathematical proof in this coursepack, these results can be derived as follows: When \\(X=0\\), the mean of \\(Y\\) is \\(\\mu=\\alpha+\\beta X=\\alpha+\\beta\\times 0 =\\alpha+0=\\alpha\\). Compare two observations, one with value \\(X\\) of the explanatory variable, and the other with one unit more, i.e. \\(X+1\\). The corresponding means of \\(Y\\) are with \\(X+1\\): \\(\\mu\\) \\(=\\alpha+\\beta\\times (X+1)\\) \\(=\\alpha+\\beta X +\\beta\\) with \\(X\\): \\(\\mu\\) \\(=\\alpha+\\beta X\\) Difference: \\(\\beta\\) which completes the proof of the claims above — Q.E.D. In case you prefer a graphical summary, this is given in Figure 8.8. Figure 8.8: Illustration of the interpretation of the regression coefficients of a simple linear regression model. The most important parameter of the model, and usually the only one really discussed in interpreting the results, is \\(\\beta\\), the regression coefficient of \\(X\\). It is also called the slope because it is literally the slope of the regression line, as shown in Figure 8.8. It is the only parameter in the model which describes the association between \\(X\\) and \\(Y\\), and it does so in the above terms of expected changes in \\(Y\\) corresponding to changes in X (\\(\\beta\\) is also related to the correlation between \\(X\\) and \\(Y\\), in a way explained in the next section). The sign of \\(\\beta\\) indicates the direction of the association. When \\(\\beta\\) is positive (greater than 0), the regression line slopes upwards and increasing \\(X\\) thus also increases the expected value of \\(Y\\) — in other words, the association between \\(X\\) and \\(Y\\) is positive. This is the case illustrated in Figure 8.8. If \\(\\beta\\) is negative, the regression line slopes downwards and the association is also negative. Finally, if \\(\\beta\\) is zero, the line is parallel with the \\(X\\)-axis, so that changing \\(X\\) does not change the expected value of \\(Y\\). Thus \\(\\beta=0\\) corresponds to no (linear) association between \\(X\\) and \\(Y\\). In the real example shown in Figure 8.7, \\(X\\) is School enrolment and \\(Y\\) is IMR. In SPSS output, the estimated regression coefficients are given in the “Coefficients” table in the column labelled “B” under “Unstandardized coefficients”. The estimated constant term \\(\\alpha\\) is given in the row labelled “(Constant)”, and the slope term on the next row, labelled with the name or label of the explanatory variable as specified in the SPSS data file — here “Net primary school enrolment ratio 2000-2001 (%)”. The value of the intercept is here 19.736 and the slope coefficient is \\(-0.179\\). The estimated regression line for expected IMR is thus \\(19.736-0.179 X\\), where \\(X\\) denotes School enrolment. This is the line shown in Figure 8.6. Because the slope coefficient in the example is negative, the association between the variables is also negative, i.e. higher levels of school enrolment are associated with lower levels of infant mortality. More specifically, every increase of one unit (here one percentage point) in School enrolment is associated with a decrease of 0.179 units (here percentage points) in expected IMR. Since the meaning of \\(\\beta\\) is related to a unit increase of the explanatory variable, the interpretation of its magnitude depends on what those units are. In many cases one unit of \\(X\\) is too small or too large for convenient interpretation. For example, a change of one percentage point in School enrolment is rather small, given that the range of this variable in our data is 79 percentage points (c.f. Table 8.2). In such cases the results can easily be reexpressed by using multiples of \\(\\beta\\): specifically, the effect on expected value of \\(Y\\) of changing \\(X\\) by \\(A\\) units is obtained by multiplying \\(\\beta\\) by \\(A\\). For instance, in our example the estimated effect of increasing School enrolment by 10 percentage points is to decrease expected IMR by \\(10\\times 0.179=1.79\\) percentage points. The constant term \\(\\alpha\\) is a necessary part of the model, but it is almost never of interest in itself. This is because the expected value of \\(Y\\) at \\(X=0\\) is rarely specifically interesting. Very often \\(X=0\\) is also unrealistic, as in our example where it corresponds to a country with zero primary school enrolment. There are fortunately no such countries in the data, where the lowest School enrolment is 30%. It is then of no interest to discuss expected IMR for a hypothetical country where no children went to school. Doing so would also represent unwarranted extrapolation of the model beyond the range of the observed data. Even though the estimated linear model seems to fit reasonably well for these data, this is no guarantee that it would do so also for countries with much lower school enrolment, even if they existed. The third parameter of the simple regression model is \\(\\sigma^{2}\\). This is the variance of the conditional distribution of \\(Y\\) given \\(X\\). It is also known as the conditional variance of \\(Y\\), the error variance or the residual variance. Similarly, its square root \\(\\sigma\\) is known as the conditional, error or residual standard deviation. To understand \\(\\sigma\\), let us consider a single value of \\(X\\), such as one corresponding to one of the vertical dashed lines in Figure 8.8 or, say, school enrolment of 85 in Figure 8.6. The model specifies a distribution for \\(Y\\) given any such value of \\(X\\). If we were to (hypothetically) collect a large number of observations, all with this same value of \\(X\\), the distribution of \\(Y\\) for them would describe the conditional distribution of \\(Y\\) given that value of \\(X\\). The model states that the average of these values, i.e. the conditional mean of \\(Y\\), is \\(\\alpha+\\beta X\\), which is the point on the regression line corresponding to \\(X\\). The individual values of \\(Y\\), however, would of course not all be on the line but somewhere around it, some above and some below. The linear regression model further specifies that the form of the conditional distribution of \\(Y\\) is approximately normal. You can try to visualise this by imagining a normal probability curve (c.f. Figure 6.5) on the vertical line from \\(X\\), centered on the regression line and sticking up from the page. The bell shape of the curve indicates that most of the values of \\(Y\\) for a given \\(X\\) will be close to the regression line, and only small proportions of them far from it. The residual standard deviation \\(\\sigma\\) is the standard deviation of this conditional normal distribution, in essence describing how tightly concentrated values of \\(Y\\) tend to be around the regression line. The model assumes, mainly for simplicity, that the same value of \\(\\sigma\\) applies to the conditional distributions at all values of \\(X\\); this is known as the assumption of homoscedasticity. In SPSS output, an estimate of \\(\\sigma\\) is given in the “Model Summary” table under the misleading label “Std. Error of the Estimate”. An estimate of the residual variance \\(\\sigma^{2}\\) is found also in the “ANOVA” table under “Mean Square” for “Residual”. In our example the estimate of \\(\\sigma\\) is 2.6173 (and that of \\(\\sigma^{2}\\) is 6.85). This is usually not of direct interest for interpretation, but it will be a necessary component of some parts of the analysis discussed below. 8.3.4 Estimation of the parameters Since the regression coefficients \\(\\alpha\\) and \\(\\beta\\) and the residual standard deviation \\(\\sigma\\) are unknown population parameters, we will need to use the observed data to obtain sensible estimates for them. How to do so is now less obvious than in the cases of simple means and proportions considered before. This section explains the standard method of estimation for the parameters of linear regression models. We will denote estimates of \\(\\alpha\\) and \\(\\beta\\) by \\(\\hat{\\alpha}\\) and \\(\\hat{\\beta}\\) (“alpha-hat” and “beta-hat”) respectively (other notations are also often used, e.g. \\(a\\) and \\(b\\)). Similarly, we can define \\[\\hat{Y}=\\hat{\\alpha}+\\hat{\\beta} X\\] for \\(Y\\) given any value of \\(X\\). These are the values on the estimated regression line. They are known as fitted values for \\(Y\\), and estimating the parameters of the regression model is often referred to as “fitting the model” to the observed data. The fitted values represent our predictions of expected values of \\(Y\\) given \\(X\\), so they are also known as predicted values of \\(Y\\). In particular, fitted values \\(\\hat{Y}_{i}=\\hat{\\alpha}+\\hat{\\beta}X_{i}\\) can be calculated at the values \\(X_{i}\\) of the explanatory variable \\(X\\) for each unit \\(i\\) in the observed sample. These can then be compared to the correponding values \\(Y_{i}\\) of the response variable. Their differences \\(Y_{i}-\\hat{Y}_{i}\\) are known as the (sample) residuals. These quantities are illustrated in Figure 8.9. This shows a fitted regression line, which is in fact the one for IMR given School enrolment also shown in Figure 8.6. Also shown are two points \\((X_{i}, Y_{i})\\). These are also from Figure 8.6; the rest have been omitted to simplify the plot. The point further to the left is the one for Mali, which has School enrolment \\(X_{i}=43.0\\) and IMR \\(Y_{i}=14.1\\). Using the estimated coefficients \\(\\hat{\\alpha}=19.736\\) and \\(\\hat{\\beta}=-0.179\\) in Figure 8.7, the fitted value for Mali is \\(\\hat{Y}_{i}=19.736-0.179\\times 43.0=12.0\\). Their difference is the residual \\(Y_{i}-\\hat{Y}_{i}=14.1-12.0=2.1\\). Because the observed value is here larger than the fitted value, the residual is positive and the observed value is above the fitted line, as shown in Figure 8.9. Figure 8.9: Illustration of the quantities involved in the definitions of least squares estimates and the coefficient of determination \\(R^{2}\\). See the text for explanation. The second point shown in Figure 8.9 corresponds to the observation for Ghana, for which \\(X_{i}=58.0\\) and \\(Y_{i}=5.7\\). The fitted value is then \\(\\hat{Y}_{i}=19.736-0.179\\times 58.0=9.4\\) and the residual \\(Y_{i}-\\hat{Y}_{i}=5.7-9.4=-3.7\\). Because the observed value is now smaller than the fitted value, the residual is negative and the observed \\(Y_{i}\\) is below the fitted regression line. So far we have still not explained how the specific values of the parameter estimates in Figure 8.7 were obtained. In doing so, we are faced with the task of identifying a regression line which provides the best fit to the observed points in a scatterplot like Figure 8.6. Each possible choice of \\(\\hat{\\alpha}\\) and \\(\\hat{\\beta}\\) corresponds to a different regression line, and some choices are clearly better than others. For example, it seems intuitively obvious that it would be better for the line to go through the cloud of points rather than stay completely outside it. To make such considerations explicit, the residuals can be used as a criterion of model fit. The aim will then be to make the total magnitude of the residuals as small as possible, so that the fitted line is as close as possible to the observed points \\(Y_{i}\\) in some overall sense. This cannot be done simply by adding up the residuals, because they can have different signs, and positive and negative residuals could thus cancel out each other in the addition. As often before, the way around this is to remove the signs by considering the squares of the residuals. Summing these over all units \\(i\\) in the sample leads to the sum of squared residuals \\[SSE = \\sum (Y_{i}-\\hat{Y}_{i})^{2}.\\] Here \\(SSE\\) is short for Sum of Squares of Errors (it is also often called the Residual Sum of Squares or \\(RSS\\)). This is the quantity used as the criterion in estimating regression coefficients for a linear model. Different candidate values for \\(\\hat{\\alpha}\\) and \\(\\hat{\\beta}\\) lead to different values of \\(\\hat{Y}_{i}\\) and thus of \\(SSE\\). The final estimates are the ones which give the smallest value of \\(SSE\\). Their formulas are \\[\\begin{equation} \\hat{\\beta}=\\frac{\\sum (X_{i}-\\bar{X})(Y_{i}-\\bar{Y})}{\\sum (X_{i}-\\bar{X})^{2}}=\\frac{s_{xy}}{s_{x}^{2}} \\tag{8.5} \\end{equation}\\] and \\[\\begin{equation} \\hat{\\alpha}=\\bar{Y}-\\hat{\\beta}\\bar{X} \\tag{8.6} \\end{equation}\\] where \\(\\bar{Y}\\), \\(\\bar{X}\\), \\(s_{x}\\) and \\(s_{xy}\\) are the usual sample means, standard deviations and covariances for \\(Y\\) and \\(X\\). These are known as the least squares estimates of the regression coefficients (or as Ordinary Least Squares or OLS estimates), and the reasoning used to obtain them is the method of least squares.49 Least squares estimates are almost always used for linear regression models, and they are the ones displayed by SPSS and other software. For our model for IMR given School enrolment, the estimates are the \\(\\hat{\\alpha}=19.736\\) and \\(\\hat{\\beta}=-0.179\\) shown in Figure 8.7. The estimated coefficients can be used to calculate predicted values for \\(Y\\) at any values of \\(X\\), not just those included in the observed sample. For instance, in the infant mortality example the predicted IMR for a country with School enrolment of 80% would be \\(\\hat{Y}=19.736-0.179\\times 80=5.4\\). Such predictions should usually be limited to the range of values of \\(X\\) actually observed in the data, and extrapolation beyond these values should be avoided. The most common estimate of the remaining parameter of the model, the residual standard deviation \\(\\sigma\\), is \\[\\begin{equation} \\hat{\\sigma}=\\sqrt{\\frac{\\sum \\left ( Y_{i}-\\hat{Y}_{i}\\right ) ^{2}}{n- \\left ( k+1 \\right )}}=\\sqrt{\\frac{SSE}{n- \\left ( k+1 \\right )}} \\tag{8.7} \\end{equation}\\] where \\(k\\) is here set equal to 1. This bears an obvious resemblance to the formula for the basic sample standard deviation, shown for \\(Y_{i}\\) in ((8.1)). One difference to that formula is that the denominator of ((8.7)) is shown as \\(n-(k+1)\\) rather than \\(n-1\\). Here \\(k=1\\) is the number of explanatory variables in the model, and \\(k+1=2\\) is the number of regression coefficients (\\(\\alpha\\) and \\(\\beta\\)) including the constant term \\(\\alpha\\). The quantity \\(n-(k+1)\\), i.e. here \\(n-2\\), is the degrees of freedom (\\(df\\)) of the parameter estimates. We will need it again in the next section. It is here given in the general form involving the symbol \\(k\\), so that we can later refer to the same formula for models with more explanatory variables and thus \\(k\\) greater than 1. In SPSS output, the degrees of freedom are shown in the “ANOVA” table under “df” for “Residual”. In the infant mortality example \\(n=111\\), \\(k=1\\) and \\(df=111-2=109\\), as shown in Figure 8.7. Finally, two connections between previous topics and the parameters \\(\\hat{\\alpha}\\), \\(\\hat{\\beta}\\) and \\(\\hat{\\sigma}\\) are worth highlighting: The estimated slope \\(\\hat{\\beta}\\) from ((8.5)) is related to the sample correlation \\(r\\) from ((8.3)) by \\(r=(s_{x}/s_{y})\\,\\hat{\\beta}\\). In both of these it is \\(\\hat{\\beta}\\) which carries information about the association between \\(X\\) and \\(Y\\). The ratio \\(s_{x}/s_{y}\\) serves only to standardise the correlation coefficient so that it is always between \\(-1\\) and \\(+1\\). The slope coefficient \\(\\hat{\\beta}\\) is not standardised, and the interpretation of its magnitude depends on the units of measurement of \\(X\\) and \\(Y\\) in the way defined in Section 8.3.3. Suppose we simplify the simple linear regression model ((8.4)) further by setting \\(\\beta=0\\), thus removing \\(\\beta X\\) from the model. The new model states that all \\(Y_{i}\\) are normally distributed with the same mean \\(\\alpha\\) and standard deviation \\(\\sigma\\). Apart from the purely notational difference of using \\(\\alpha\\) instead of \\(\\mu\\), this is exactly the single-sample model considered in Section 7.4. Using the methods of this section to obtain estimates of the two parameters of this model also leads to exactly the same results as before. The least squares estimate of \\(\\alpha\\) is then \\(\\hat{\\alpha}=\\bar{Y}\\), obtained by setting \\(\\hat{\\beta}=0\\) in ((8.6)). Since there is no \\(\\hat{\\beta}\\) in this case, \\(\\hat{Y}_{i}=\\bar{Y}\\) for all observations, \\(k=0\\) and \\(df=n-(k+1)=n-1\\). Substituting these into ((8.7)) shows that \\(\\hat{\\sigma}\\) is then equal to the usual sample standard deviation \\(s_{y}\\) of \\(Y_{i}\\). Coefficient of determination (\\(R^{2}\\)) The coefficient of determination, more commonly known as \\(\\mathbf{R^{2}}\\) (“R-squared”), is a measure of association very often used to describe the results of linear regression models. It is based on the same idea of sums of squared errors as least squares estimation, and on comparison of them between two models for \\(Y\\). The first of these models is the very simple one where the explanatory variable \\(X\\) is not included at all. As discussed above, the estimate of the expected value of \\(Y\\) is then the sample mean \\(\\bar{Y}\\). This is the best prediction of \\(Y\\) we can make, if the same predicted value is to be used for all observations. The error in the prediction of each value \\(Y_{i}\\) in the observed data is then \\(Y_{i}-\\bar{Y}\\) (c.f. Figure 8.9 for an illustration of this for one observation). The sum of squares of these errors is \\(TSS=\\sum (Y_{i}-\\bar{Y})^{2}\\), where \\(TSS\\) is short for “Total Sum of Squares”. This can also be regarded as a measure of the total variation in \\(Y_{i}\\) in the sample (note that \\(TSS/(n-1)\\) is the usual sample variance \\(s^{2}_{y}\\)). When an explanatory variable \\(X\\) is included in the model, the predicted value for each \\(Y_{i}\\) is \\(\\hat{Y}_{i}=\\hat{\\alpha}+\\hat{\\beta}X_{i}\\), the error in this prediction is \\(Y_{i}-\\hat{Y}_{i}\\), and the error sum of squares is \\(SSE=\\sum (Y_{i}-\\hat{Y}_{i})^{2}\\). The two sums of squares are related by \\[\\begin{equation} \\sum (Y_{i}-\\bar{Y})^{2} =\\sum (Y_{i}-\\hat{Y}_{i})^{2} +\\sum(\\hat{Y}_{i}-\\bar{Y})^{2}. \\tag{8.8} \\end{equation}\\] Here \\(SSM=\\sum (\\hat{Y}_{i}-\\bar{Y})^{2}=TSS-SSE\\) is the “Model sum of squares”. It is the reduction in squared prediction errors achieved when we make use of \\(X_{i}\\) to predict values of \\(Y_{i}\\) with the regression model, instead of predicting \\(\\bar{Y}\\) for all observations. In slightly informal language, \\(SSM\\) is the part of the total variation \\(TSS\\) “explained” by the fitted regression model. In this language, ((8.8)) can be stated as Total variation of \\(Y\\) = Variation explained by regression + Unexplained variation \\(TSS\\) \\(=\\) \\(SSM\\) \\(+\\) \\(SSE\\) The \\(R^{2}\\) statistic is defined as \\[\\begin{equation} R^{2}= \\frac{TSS-SSE}{TSS} = 1-\\frac{SSE}{TSS}=1-\\frac{\\sum (Y_{i}-\\hat{Y}_{i})^{2}}{\\sum (Y_{i}-\\bar{Y})^{2}}. \\tag{8.9} \\end{equation}\\] This is the proportion of the total variation of \\(Y\\) in the sample explained by the fitted regression model. Its smallest possible value is 0, which is obtained when \\(\\hat{\\beta}=0\\), so that \\(X\\) and \\(Y\\) are completely unassociated, \\(X\\) provides no help for predicting \\(Y\\), and thus \\(SSE=TSS\\). The largest possible value of \\(R^{2}\\) is 1, obtained when \\(\\hat{\\sigma}=0\\), so that the observed \\(Y\\) can be predicted perfectly from the corresponding \\(X\\) and thus \\(SSE=0\\). More generally, \\(R^{2}\\) is somewhere between 0 and 1, with large values indicating strong linear association between \\(X\\) and \\(Y\\). \\(R^{2}\\) is clearly a Proportional Reduction of Error (PRE) measure of association of the kind discussed in Section 2.4.5, with \\(E_{1}=TSS\\) and \\(E_{2}=SSE\\) in the notation of equation for the PRE measure of association in Section 2.4.5. It is also related to the correlation coefficient. In simple linear regression, \\(R^{2}\\) is the square of the correlation \\(r\\) between \\(X_{i}\\) and \\(Y_{i}\\). Furthermore, the square root of \\(R^{2}\\) is the correlation between \\(Y_{i}\\) and the fitted values \\(\\hat{Y}_{i}\\). This quantity, known as the multiple correlation coefficient and typically denoted \\(R\\), is always between 0 and 1. It is equal to the correlation \\(r\\) between \\(X_{i}\\) and \\(Y_{i}\\) when \\(r\\) is positive, and the absolute value (removing the \\(-\\) sign) of \\(r\\) when \\(r\\) is negative. For example, for our infant mortality model \\(r=-0.753\\), \\(R^{2}=r^{2}=0.567\\) and \\(R=\\sqrt{R^{2}}=0.753\\). In SPSS output, the “ANOVA” table shows the model, error and total sums of squares \\(SSM\\), \\(SSE\\) and \\(TSS\\) in the “Sum of Squares column”, on the “Regression”, “Residual” and “Total” rows respectively. \\(R^{2}\\) is shown in “Model summary” under “R Square” and multiple correlation \\(R\\) next to it as “R”. Figure 8.7 shows these results for the model for IMR given School enrolment. Here \\(R^{2}=0.567\\). Using each country’s level of school enrolment to predict its IMR thus reduces the prediction errors by 56.7% compared to the situation where the predicted IMR is the overall sample mean (here 4.34) for every country. Another conventional way of describing this \\(R^{2}\\) result is to say that the variation in rates of School enrolment explains 56.7% of the observed variation in Infant mortality rates. \\(R^{2}\\) is a useful statistic with a convenient interpretation. However, its importance should not be exaggerated. \\(R^{2}\\) is rarely the only or the most important part of the model results. This may be the case if the regression model is fitted solely for the purpose of predicting future observations of the response variable. More often, however, we are at least or more interested in examining the nature and strength of the associations between the response variable and the explanatory variable (later, variables), in which case the regression coefficients are the main parameters of interest. This point is worth emphasising because in our experience many users of linear regression models tend to place far too much importance on \\(R^{2}\\), often hoping to treat it as the ultimate measure of the goodness of the model. We are frequently asked questions along the lines of “My model has \\(R^{2}\\) of 0.42 — is that good?”. The answer tends to be “I have no idea” or, at best, “It depends”. This not a sign of ignorance, because it really does depend: Which values of \\(R^{2}\\) are large or small or “good” is not a statistical question but a substantive one, to which the answer depends on the nature of the variables under consideration. For example, most associations between variables in the social sciences involve much unexplained variation, so their \\(R^{2}\\) values tend to be smaller than for quantities in, say, physics. Similarly, even in social sciences models for aggregates such as countries often have higher values of \\(R^{2}\\) than ones for characteristics of individual people. For example, the \\(R^{2}=0.567\\) in our infant mortality example (let alone the \\(R^{2}=0.753\\) we will achieve for a multiple linear model for IMR in Section 8.6) would be unachievably high for many types of individual-level data. In any case, achieving large \\(R^{2}\\) is usually not the ultimate criterion for selecting a model, and a model can be very useful without having a large \\(R^{2}\\). The \\(R^{2}\\) statistic reflects the magnitude of the variation around the fitted regression line, corresponding to the residual standard deviation \\(\\hat{\\sigma}\\). Because this is an accepted part of the model, \\(R^{2}\\) is not a measure of how well the model fits: we can have a model which is essentially true (in that \\(X\\) is linearly associated with \\(Y\\)) but has large residual standard error and thus small \\(R^{2}\\). 8.3.5 Statistical inference for the regression coefficients The only parameter of the simple linear regression model for which we will describe methods of statistical inference is the slope coefficient \\(\\beta\\). Tests and confidence intervals for population values of the intercept \\(\\alpha\\) are rarely and ones about the residual standard deviation \\(\\sigma\\) almost never substantively interesting, so they will not be considered. Similarly, the only null hypothesis on \\(\\beta\\) discussed here is that its value is zero, i.e. \\[\\begin{equation} H_{0}:\\; \\beta=0. \\tag{8.10} \\end{equation}\\] Recall that when \\(\\beta\\) is 0, there is no linear association between the explanatory variable \\(X\\) and the response variable \\(Y\\). Graphically, this corresponds to a regression line in the population which is parallel to the \\(X\\)-axis (see plot (d) of Figure 8.4 for an illustration of such a line in a sample). The hypothesis ((8.10)) can thus be expressed in words as \\[\\begin{equation} H_{0}:\\; \\text{\\emph{There is no linear association between }} X \\text{\\emph{and }} Y \\text{ \\emph{in the population}}. \\tag{8.11} \\end{equation}\\] Tests of this are usually carried out against a two-sided alternative hypothesis \\(H_{a}: \\; \\beta\\ne 0\\), and we will also concentrate on this case. Formulation ((8.11)) implies that the hypothesis that \\(\\beta=0\\) is equivalent to one that the population correlation \\(\\rho\\) between \\(X\\) and \\(Y\\) is also 0. The test statistic presented below for testing ((8.10)) is also identical to a common test statistic for \\(\\rho=0\\). A test of \\(\\beta=0\\) can thus be interpreted also as a test of no correlation in the population. The tests and confidence intervals involve both the estimate \\(\\hat{\\beta}\\) and its estimated standard error, which we will here denote \\(\\hat{\\text{se}}(\\hat{\\beta})\\).50 It is calculated as \\[\\begin{equation} \\hat{\\text{se}}(\\hat{\\beta})=\\frac{\\hat{\\sigma}}{\\sqrt{\\sum\\left(X_{i}-\\bar{X}\\right)^{2}}}=\\frac{\\hat{\\sigma}}{s_{x}\\sqrt{n-1}} \\tag{8.12} \\end{equation}\\] where \\(\\hat{\\sigma}\\) is the estimated residual standard deviation given by ((8.7)), and \\(s_{x}\\) is the sample standard deviation of \\(X\\). The standard error indicates the level of precision with which \\(\\hat{\\beta}\\) estimates the population parameter \\(\\beta\\). The last expression in ((8.12)) shows that the sample size \\(n\\) appears in the denominator of the standard error formula. This means that the standard error becomes smaller as the sample size increases. In other words, the precision of estimation increases when the sample size increases, as with all the other estimates of population parameters we have considered before. In SPSS output, the estimated standard error is given under “Std. Error” in the “Coefficients” table. Figure 8.7 shows that \\(\\hat{\\text{se}}(\\hat{\\beta})=0.015\\) for the estimated coefficient \\(\\hat{\\beta}\\) of School enrolment. The test statistic for the null hypothesis ((8.10)) is once again of the general form (see the beginning of Section 5.5.2), i.e. a point estimate divided by its standard error. Here this gives \\[\\begin{equation} t=\\frac{\\hat{\\beta}}{\\hat{\\text{se}}(\\hat{\\beta})}. \\tag{8.13} \\end{equation}\\] The logic of this is the same as in previous applications of the same idea. Since the null hypothesis ((8.10)) claims that the population \\(\\beta\\) is zero, values of its estimate \\(\\hat{\\beta}\\) far from zero will be treated as evidence against the null hypothesis. What counts as “far from zero” depends on how precisely \\(\\beta\\) is estimated from the observed data by \\(\\hat{\\beta}\\) (i.e. how much uncertainty there is in \\(\\hat{\\beta}\\)), so \\(\\hat{\\beta}\\) is standardised by dividing by its standard error to obtain the test statistic. When the null hypothesis ((8.10)) is true, the sampling distribution of the test statistic ((8.13)) is a \\(t\\) distribution with \\(n-2\\) degrees of freedom (i.e. \\(n-(k+1)\\) where \\(k=1\\) is the number of explanatory variables in the model). The \\(P\\)-value for the test against a two-sided alternative hypothesis \\(\\beta\\ne 0\\) is then the probability that a value from a \\(t_{n-2}\\) distribution is at least as far from zero as the value of the observed test statistic. As for the tests of one and two means discussed in Chapter 7, it would again be possible to consider a large-sample version of the test which relaxes the assumption that \\(Y_{i}\\) given \\(X_{i}\\) are normally distributed, and uses (thanks to the Central Limit Theorem again) the standard normal distribution to obtain the \\(P\\)-value. With linear regression models, however, the \\(t\\) distribution version of the test is usually used and included in standard computer output, so only it will be discussed here. The difference between \\(P\\)-values from the \\(t_{n-2}\\) and standard normal distributions is in any case minimal when the sample size is reasonably large (at least 30, say). In the infant mortality example shown in Figure 8.7, the estimated coefficient of School enrolment is \\(\\hat{\\beta}=-0.179\\), and its estimated standard error is \\(\\hat{\\text{se}}(\\hat{\\beta})=0.015\\), so the test statistic is \\[t=\\frac{-0.179}{0.015}=-11.94\\] (up to some rounding error). This is shown in the “t” column of the “Coefficients” table. The \\(P\\)-value, obtained from the \\(t\\) distribution with \\(n-2=109\\) degrees of freedom, is shown in the “Sig.” column. Here \\(P&lt;0.001\\), so the null hypothesis is clearly rejected. The data thus provide very strong evidence that primary school enrolment is associated with infant mortality rate in the population. In many analyses, rejecting the null hypothesis of no association will be entirely unsurprising. The question of interest is then not whether there is an association in the population, but how strong it is. This question is addressed with the point estimate \\(\\hat{\\beta}\\), combined with a confidence interval which reflects the level of uncertainty in \\(\\hat{\\beta}\\) as an estimate of the population parameter \\(\\beta\\). A confidence interval for \\(\\beta\\) with the confidence level \\(1-\\alpha\\) is given by \\[\\begin{equation} \\hat{\\beta} \\pm t_{\\alpha/2}^{(n-2)} \\, \\hat{\\text{se}}(\\hat{\\beta}) \\tag{8.14} \\end{equation}\\] where the multiplier \\(t_{\\alpha/2}^{(n-2)}\\) is obtained from the \\(t_{n-2}\\) distribution as in previous applications of \\(t\\)-based confidence intervals (c.f. the description in Section 7.3.4). For a 95% confidence interval (i.e. one with \\(\\alpha=0.05\\)) in the infant mortality example, the multiplier is \\(t_{0.025}^{(109)}=1.98\\), and the endpoints of the interval are \\[-0.179-1.98\\times 0.015=-0.209 \\text{and} -0.179+1.98\\times 0.015=-0.149.\\] These are also shown in the last two columns of the “Coefficients” table of SPSS output. In this example we are thus 95% confident that the expected change in IMR associated with an increase of one percentage point in School enrolment is a decrease of between 0.149 and 0.209 percentage points. If you are calculating this confidence interval by hand, it is (if the sample size is at least 30) again acceptable to use the multiplier 1.96 from the standard normal distribution instead of the \\(t\\)-based multiplier. Here this would give the confidence interval \\((-0.208; -0.150)\\). It is often more convenient to interpret the slope coefficient in terms of larger or smaller increments in \\(X\\) than one unit. As noted earlier, a point estimate for the effect of this is obtained by multiplying \\(\\hat{\\beta}\\) by the appropriate constant. A confidence interval for it is calculated similarly, by multiplying the end points of an interval for \\(\\hat{\\beta}\\) by the same constant. For example, the estimated effect of a 10-unit increase in School enrolment is \\(10\\times \\hat{\\beta}=-1.79\\), and a 95% confidence interval for this is \\(10\\times (-0.209; -0.149)=(-2.09; -1.49)\\). In other words, we are 95% confident that the effect is a decrease of between 2.09 and 1.49 percentage points. 8.4 Interlude: Association and causality Felix, qui potuit rerum cognoscere causas, atque metus omnis et inexorabile fatum subiecit pedibus strepitumque Acherontis avari Blessed is he whose mind had power to probe The causes of things and trample underfoot All terrors and inexorable fate And the clamour of devouring Acheron (Publius Vergilius Maro: Georgica (37-30 BCE), 2.490-492; translation by L. P. Wilkinson) These verses from Virgil’s Georgics are the source of the LSE motto — “Rerum cognoscere causas”, or “To know the causes of things” — which you can see on the School’s coat of arms on the cover of this coursepack. As the choice of the motto suggests, questions on causes and effects are of great importance in social and all other sciences. Causal connections are the mechanisms through which we try to understand and predict what we observe in the world, and the most interesting and important research questions thus tend to involve claims about causes and effects. We have already discussed several examples of statistical analyses of associations between variables. Association is not the same as causation, as two variables can be statistically associated without being in any way directly causally related. Finding an association is thus not sufficient for establishing a causal link. It is, however, necessary for such a claim: if two variables are not associated, they will not be causally connected either. This implies that examination of associations must be a part of any analysis aiming to obtain conclusions about causal effects. Definition and analysis of causal effects are considered in more detail on the course MY400 and in much greater depth still on MY457. Here we will discuss only the following simplified empirical version of the question.51 Suppose we are considering two variables \\(X\\) and \\(Y\\), and suspect that \\(X\\) is a cause of \\(Y\\). To support such a claim, we must be able to show that the following three conditions are satisfied: There is a statistical association between \\(X\\) and \\(Y\\). An appropriate time order: \\(X\\) comes before \\(Y\\). All alternative explanations for the association are ruled out. The first two conditions are relatively straightforward, at least in principle. Statistical associations are examined using the kinds of techniques covered on this course, and decisions about whether or not there is an association are usually made largely with the help of statistical inference. Note also that making statistical associations one of the conditions implies that this empirical definition of causal effects is not limited to deterministic effects, where a particular value of \\(X\\) always leads to exactly the same value of \\(Y\\). Instead, we consider probabilistic causal effects, where changes in \\(X\\) make different values of \\(Y\\) more or less likely. This is clearly crucial in the social sciences, where hardly any effects are even close to deterministic. The second condition is trivial in many cases where \\(X\\) must logically precede \\(Y\\) in time: for example, a person’s sex is clearly determined before his or her income at age 20. In other cases the order is less obvious: for example, if we consider the relationship between political attitudes and readership of different newspapers, it may not be clear whether attitude came before choice of paper of vice versa. Clarifying the time order in such cases requires careful research design, often involving measurements taken at several different times. The really difficult condition is the third one. The list of “all alternative explanations” is essentially endless, and we can hardly ever be sure that all of them have been “ruled out”. Most of the effort and ingenuity in research design and analysis in a study of any causal hypothesis usually goes into finding reasonably convincing ways of eliminating even the most important alternative explanations. Here we will discuss only one general class of such explanations, that of spurious associations due to common causes of \\(X\\) and \\(Y\\). Suppose that we observe an association, here denoted symbolically by \\(X\\) — \\(Y\\), and would like to claim that this implies a causal connection \\(X\\longrightarrow Y\\). One situation where such a claim is not justified is when both \\(X\\) and \\(Y\\) are caused by a third variable \\(Z\\), as in the graph in Figure 8.10. If we here consider only \\(X\\) and \\(Y\\), they will appear to be associated, but the connection is not a causal one. Instead, it is a spurious association induced by the dependence on both variables on the common cause \\(Z\\). Figure 8.10: A graphical representation of a spurious association between \\(X\\) and \\(Y\\), explained by dependence on a common cause \\(Z\\). To illustrate a spurious association with a silly but memorable teaching example, suppose that we examine a sample of house fires in London, and record the number of fire engines sent to each incident (\\(X\\)) and the amount of damage caused by the fire (\\(Y\\)). There will be a strong association between \\(X\\) and \\(Y\\), with large numbers of fire engines associated with large amounts of damage. It is also reasonably clear that the number of fire engines is determined before the final extent of damage. The first two conditions discussed above are thus satisfied. We would, however, be unlikely to infer from this that the relationship is causal and conclude that we should try to reduce the cost of fires by dispatching fewer fire engines to them. This is because the association between the number of fire engines and the amount of damages is due to both of them being influenced by the size of the fire (\\(Z\\)). Here this is of course obvious, but in most real research questions possible spurious associations are less easy to spot. How can we then rule out spurious associations due to some background variables \\(Z\\)? The usual approach is to try to remove the association between \\(X\\) and \\(Z\\). This means in effect setting up comparisons between units which have different values of \\(X\\) but the same or similar values of \\(Z\\). Any differences in \\(Y\\) can then more confidently be attributed to \\(X\\), because they cannot be due to differences in \\(Z\\). This approach is known as controlling for other variables \\(Z\\) in examining the association between \\(X\\) and \\(Y\\). The most powerful way of controlling for background variables is to conduct a randomized experiment, where the values of the explanatory variable \\(X\\) can be set by the researcher, and are assigned at random to the units in the study. For instance, of the examples considered in Chapters 5 and 7, Examples 5.3, 5.4 and 7.3 are randomized experiments, each with an intervention variable \\(X\\) with two possible values (placebo or real vaccine, one of two forms of a survey question, and police officer wearing or not wearing sunglasses, respectively). The randomization assures that units with different values of \\(X\\) are on average similar in all variables \\(Z\\) which precede \\(X\\) and \\(Y\\), thus ruling out the possibility of spurious associations due to such variables. Randomized experiments are for practical or ethical reasons infeasible in many contexts, especially in the social sciences. We may then resort to other, less powerful research designs which help to control for some background variables. This, however, is usually only partially successful, so we may also need methods of control applied at the analysis stage of the research process. This is known as statistical control. The aim of statistical control is to estimate and test associations between \\(X\\) and \\(Y\\) while effectively holding the values of some other variables \\(Z\\) constant in the analysis. When the response variable is continuous, the most common way of doing this is the method of multiple linear regression which is described in the next section. When all the variables are categorical, one simple way of achieving the control is analysis of multiway contingency tables, which is described in Chapter 9. 8.5 Multiple linear regression models 8.5.1 Introduction Simple linear regression becomes multiple linear regression when more than one explanatory variable is included in the model. How this is done is explained in Section 8.5.2 below. The definition of the model builds directly on that of the simple linear model, and most of the elements of the multiple model are either unchanged or minimally modified from the simple one. As a result, we can in Section 8.5.3 cover much of the multiple linear model simply by referring back to the descriptions in Section 8.3. One aspect of the model is, however, conceptually expanded when there are multiple explanatory variables, and requires a careful discussion. This is the meaning of the regression coefficients of the explanatory variables. The interpretation of and inference for these parameters are discussed in Section 8.5.4. The crucial part of this interpretation, and the main motivation for considering multiple regression models, is that it is one way of implementing the ideas of statistical control in analyses for continuous response variables. The concepts are be illustrated with a further example from the Global Civil Society data set. The response variable will still be the Infant mortality rate of a country, and there will be three explanatory variables: School enrolment, Control of corruption and Income inequality as measured by the Gini index (see Example 8.1). Results for this model are shown in Table 8.3, to which we will refer throughout this section. The table is also an example of the kind of format in which results of regression models are typically reported. Presenting raw computer output such as that in Figure 8.7 is normally not appropriate in formal research reports. Table 8.3: Response variable: Infant Mortality Rate (%). Results for a linear regression model for Infant mortality rate given three explanatory variables in the Global Civil Society data. \\(\\hat{\\sigma}=2.23\\); \\(R^{2}=0.692\\); \\(n=111\\); \\(df=107\\) Explanatory variable Coefficient Standard error \\(t\\) \\(P\\)-value 95 % Confidence interval Constant 16.40 School enrolment (%) -0.139 0.014 -9.87 \\(&lt;0.001\\) (-0.167; -0.111 ) Control of corruption -0.046 0.008 -5.53 \\(&lt;0.001\\) (-0.062; -0.029) Income inequality 0.055 0.022 2.50 0.014 (0.011; 0.098) 8.5.2 Definition of the model Having multiple explanatory variables requires a slight extension of notation. Let us denote the number of explanatory variables in the model by \\(k\\); in our example \\(k=3\\). Individual explanatory variables are then denoted with subscripts as \\(X_{1}\\), \\(X_{2}\\), …, \\(X_{k}\\), in the example for instance as \\(X_{1}=\\) School enrolment, \\(X_{2}=\\) Control of corruption and \\(X_{3}=\\) Income inequality. Observations for individual units \\(i\\) (with values \\(i=1,2,\\dots,n\\)) in the sample are indicated by a further subscript, so that \\(X_{1i}, X_{2i}, \\dots, X_{ki}\\) denote the observed values of the \\(k\\) explanatory variables for unit \\(i\\). The multiple linear regression model is essentially the same as the simple linear model. The values \\(Y_{i}\\) of the response variable in the sample are again assumed to be statistically independent, and each of them is regarded as an observation sampled from a normal distribution with mean \\(\\mu_{i}\\) and variance \\(\\sigma^{2}\\). The crucial change is that the expected values \\(\\mu_{i}\\) now depend on the multiple explanatory variables through \\[\\begin{equation} \\mu_{i} = \\alpha +\\beta_{1}X_{1i}+\\beta_{2}X_{2i}+\\dots+\\beta_{k}X_{ki} \\tag{8.15} \\end{equation}\\] where the coefficients \\(\\beta_{1}, \\beta_{2}, \\dots, \\beta_{k}\\) of individual explanatory variables are now also identified with subscripts. As in ((8.4)) for the simple linear model, the multiple model can also be expressed in the concise form \\[\\begin{equation} Y_{i} = \\alpha+\\beta_{1}X_{1i}+\\beta_{2}X_{2i}+\\dots+\\beta_{k}X_{ki}+\\epsilon_{i} \\tag{8.16} \\end{equation}\\] where the error term (population residual) \\(\\epsilon_{i}\\) is normally distributed with mean 0 and variance \\(\\sigma^{2}\\). The expected value of \\(Y\\) as defined in ((8.15)) is a linear function of \\(X_{1}, X_{2}, \\dots, X_{k}\\). If there are two explanatory variables (\\(k=2\\)), \\(\\mu\\) is described by a flat plane as \\(X_{1}\\) and \\(X_{2}\\) take different values (think of a flat sheet of paper, at an angle and extended indefinitely in all directions, cutting across a room in the air). A plane is the two-dimensional generalisation of a one-dimensional straight line. The actual observations of \\(Y_{i}\\) now correspond to points in a three-dimensional space, and they are generally not on the regression plane (think of them as a swarm of bees in the air, some perhaps sitting on that sheet of paper but most hovering above or below it). When \\(k\\) is larger than 2, the regression surface is a higher-dimensional linear object known as a hyperplane. This is impossible to visualise in our three-dimensional world, but mathematically the idea of the model remains unchanged. In each case, the observed values of \\(Y\\) exist in a yet higher dimension, so they cannot in general be predicted exactly even with multiple explanatory variables. A regression plane defined by several \\(X\\)-variables does nevertheless allow for more flexibility for \\(\\mu_{i}\\) than a straight line, so it is in general possible to predict \\(Y_{i}\\) more accurately with a multiple regression model than a simple one. This, however, is not usually the only or main criterion for selecting a good regression model, for reasons discussed in Section 8.5.4 below. 8.5.3 Unchanged elements from simple linear models As mentioned at the beginning of this section, most elements of the multiple linear regression model are the same or very similar as for the simple model, and require little further explanation: The constant term (intercept) \\(\\alpha\\) is interpreted as the expected value of \\(Y\\) when all of the explanatory variables have the value 0. This can be seen by setting \\(X_{1i}, X_{2i}, \\dots, X_{ki}\\) all to 0 in ((8.15)). As before, \\(\\alpha\\) is rarely of any substantive interest. In the example in Table 8.3, the estimated value of \\(\\alpha\\) is 16.40. The residual standard deviation \\(\\sigma\\) is the standard deviation of the conditional distribution of \\(Y\\) given the values of all of \\(X_{1}, X_{2}, \\dots, X_{k}\\). It thus describes the magnitude of the variability of \\(Y\\) around the regression plane. The model assumes that \\(\\sigma\\) is the same at all values of the explanatory variables. In Table 8.3, the estimate of \\(\\sigma\\) is 2.23. Estimates of the regression coefficients are here denoted with hats as \\(\\hat{\\alpha}\\) and \\(\\hat{\\beta}_{1}, \\hat{\\beta}_{2}, \\dots, \\hat{\\beta}_{k}\\), and fitted (predicted) values for \\(Y_{i}\\) are given by The estimated regression coefficients are again obtained with the method of least squares by finding the values for \\(\\hat{\\alpha}, \\hat{\\beta}_{1}, \\hat{\\beta}_{2}, \\dots, \\hat{\\beta}_{k}\\) which make the error sum of squares \\(SSE=\\sum (Y_{i}-\\hat{Y}_{i})^{2}\\) as small as possible. This is both mathematically and intuitively the same exercise as least squares estimation for a simple linear model, except with more dimensions: here we are finding the best-fitting hyperplane through a high-dimensional cloud of points rather than the best-fitting straight line through a two-dimensional scatterplot. With more than one explanatory variable, the computational formulas for the estimates become difficult to write down52 and essentially impossible to calculate by hand. This is not a problem in practice, as they are easily computed by statistical software such as SPSS. In Table 8.3, the least squares estimates of the regression coefficients are shown in the “Coefficient” column. Each row of the table gives the coefficient for one explanatory variable, identified in the first column. A similar format is adopted in SPSS output, where the “Coefficients” table looks very similar to the main part of Table 8.3. The arrangement of other parts of SPSS output for multiple linear regression is essentially unchanged from the format shown in Figure 8.7. Predicted values for \\(Y\\) can be calculated from ((??)) for any set of values of the explanatory variables (whether those observed in the data or not, as long as extrapolation outside their observed ranges is avoided). This is often very useful for illustrating the implications of a fitted model. For example, Table 8.2 shows that the sample averages of the explanatory variables in Table 8.3 are approximately 86 for School enrolment (\\(X-{1}\\)), 50 for Control of corruption (\\(X_{2}\\)) and 40 for Income inequality (\\(X_{3}\\)). The predicted IMR for a hypothetical “average” country with all these values would be \\[\\hat{Y}=16.4-0.139\\times 86-0.046\\times 50+0.055\\times 40=4.35\\] using the estimated intercept \\(\\hat{\\alpha}=16.4\\), and the estimated regression coefficients \\(\\hat{\\beta}_{1}=-0.139\\), \\(\\hat{\\beta}_{2}=-0.046\\) and \\(\\hat{\\beta}_{3}=0.055\\) for \\(X_{1}\\), \\(X_{2}\\) and \\(X_{3}\\). For further illustration, we might compare this to other predicted values calculated for, say, different combinations of large and/or small values of the explanatory variables. The estimated residual standard error \\(\\hat{\\sigma}\\) is again calculated from ((8.7)), using the appropriate value of \\(k\\). Here \\(n=111\\) and \\(k=3\\), and so the degrees of freedom are \\(df=n-(k+1)=n-4=107\\). The estimate is \\(\\hat{\\sigma}=2.23\\). The explanation of the coefficient of determination \\(R^{2}\\) is entirely unchanged from the one given under “Coefficient of determination (\\(R^{2}\\))” in Section 8.3.4. It is still calculated with the formula ((8.9)), and its interpretation is also the same. The \\(R^{2}\\) statistic thus describes the proportion of the sample variation in \\(Y\\) explained by the regression model, i.e. by the variation in the explanatory variables. Similarly, the multiple correlation coefficient \\(R=\\sqrt{R^{2}}\\) is again the correlation between the observed \\(Y_{i}\\) and the fitted values \\(\\hat{Y}_{i}\\). In our example, \\(R^{2}=0.692\\) (and \\(R=\\sqrt{0.692}=0.832\\)), i.e. about 69.2% of the observed variation in IMR between countries is explained by the variation in levels of School enrolment, Control of corruption and Income inequality between them. Compared to the \\(R^{2}=0.567\\) for the simple regression model in Figure 8.7, adding the two new explanatory variables has increased \\(R^{2}\\) by 12.5 percentage points, which seems like a reasonably large increase. 8.5.4 Interpretation and inference for the regression coefficients Interpretation The concept of statistical control was outlined in Section 8.4 above. In essence, its idea is to examine the association between a response variable and a particular explanatory variable, while holding all other explanatory variables at constant values. This is useful for assessing claims about causal effects, but also more broadly whenever we want to analyse associations free of the confounding effects of other variables. When all of the variables were categorical, statistical control could be carried out obviously and transparently by considering partial tables, where the control variables are literally held constant. This is not possible when some of the control variables are continuous, because they then have too many different values for it to be feasible to consider each one separately. Instead, statistical control is implemented with the help of a multiple regression model, and interpreted in terms of the regression coefficients. Consider, for example, a linear regression model with three explanatory variables \\(X_{1}\\), \\(X_{2}\\) and \\(X_{3}\\). This specifies the expected value of \\(Y\\) as \\[\\begin{equation} \\mu=\\alpha+\\beta_{1}X_{1}+\\beta_{2}X_{2}+\\beta_{3}X_{3} \\tag{8.17} \\end{equation}\\] for any values of \\(X_{1}\\), \\(X_{2}\\) and \\(X_{3}\\). Suppose now that we consider a second observation, which has the same values of \\(X_{1}\\) and \\(X_{2}\\) as before, but the value of \\(X_{3}\\) larger by one unit, i.e. \\(X_{3}+1\\). The expected value of \\(Y\\) is now \\[\\begin{equation} \\mu=\\alpha+\\beta_{1}X_{1}+\\beta_{2}X_{2}+\\beta_{3}(X_{3}+1)=\\alpha+\\beta_{1}X_{1}+\\beta_{2}X_{2}+\\beta_{3}X_{3}+\\beta_{3}. \\tag{8.18} \\end{equation}\\] Subtracting ((8.17)) from ((8.18)) leaves us with \\(\\beta_{3}\\). In other words, \\(\\beta_{3}\\) is the change in expected value of \\(Y\\) when \\(X_{3}\\) is increased by one unit, while keeping the values of \\(X_{1}\\) and \\(X_{2}\\) unchanged. The same result would obviously be obtained for \\(X_{1}\\) and \\(X_{2}\\), and for models with any number of explanatory variables. Thus in general The regression coefficient of any explanatory variable in a multiple linear regression model shows the change in expected value of the response variable \\(Y\\) when that explanatory variable is increased by one unit, while holding all other explanatory variables constant. When there is only one explanatory variable, the “while holding…” part is omitted and the interpretation becomes the one for simple linear regression in Section 8.3.3. This interpretation of the regression coefficients is obtained by “increasing by one unit” and “holding constant” values of explanatory variables by mathematical manipulations alone. It is thus true within the model even when the values of the explanatory variables are not and cannot actually be controlled and set at different values by the researcher. This, however, also implies that this appealing interpretation is a mathematical construction which does not automatically correspond to reality. In short, the interpretation of the regression coefficients is always mathematically true, but whether it is also an approximately correct description of an association in the real world depends on the appropriateness of the model for the data and study at hand. In some studies it is indeed possible to manipulate at least some explanatory variables, and corresponding regression models can then help to draw reasonably strong conclusions about associations between variables. Useful results can also be obtained in studies where no variables are in our control (so-called observational studies), as long as the model is selected carefully. This requires, in particular, that a linear model specification is adequate for the data, and that no crucially important explanatory variables have been omitted from the model. In the IMR example, the estimated coefficients in Table 8.3 are interpreted as follows: Holding levels of Control of corruption and Income inequality constant, increasing School enrolment by one percentage point decreases expected IMR by 0.139 percentage points. Holding levels of School enrolment and Income inequality constant, increasing Control of corruption by one unit decreases expected IMR by 0.046 percentage points. Holding levels of School enrolment and Control of corruption constant, increasing Income inequality by one unit increases expected IMR by 0.055 percentage points. Instead of “holding constant”, we often talk about “controlling for” other variables in such statements. As before, it may be more convenient to express the interpretations in terms of other increments than one unit (e.g. ten units of the measure of Income inequality) by multiplying the coefficient by the correponding value. The association between the response variable \\(Y\\) and a particular explanatory variable \\(X\\) described by the coefficient of \\(X\\) in a multiple regression model is known as a partial association between \\(X\\) and \\(Y\\), controlling for the other explanatory variables in the model. This will often differ from the association estimated from a simple regression model for \\(Y\\) given \\(X\\), because of the correlations between the control variables and \\(X\\) and \\(Y\\). In the infant mortality example, the estimated effect of School enrolment was qualitatively unaffected by controlling for the other variables, and decreased in magnitude from -0.179 to -0.139. Inference Inference for the regression coefficients in a multiple linear model differs from that for the simple model in interpretation but not in execution. Let \\(\\hat{\\beta}_{j}\\) denote the estimated coefficient of an explanatory variable \\(X_{j}\\) (where \\(j\\) may be any of \\(1,2,\\dots,k\\)), and let \\(\\hat{\\text{se}}(\\hat{\\beta}_{j})\\) denote its estimated standard error. The standard errors cannot now be calculated by hand, but they are routinely produced by computer packages and displayed as in Table 8.3. A \\(t\\)-test statistic for the null hypothesis discussed below is given by \\[\\begin{equation} t=\\frac{\\hat{\\beta}_{j}}{\\hat{\\text{se}}(\\hat{\\beta}_{j})}. \\tag{8.19} \\end{equation}\\] This is identical in form to statistic ((8.13)) for the simple regression model. The corresponding null hypotheses are, however, subtly but crucially different in the two cases. In a multiple model, ((8.19)) is a test statistic for the null hypothesis \\[\\begin{equation} H_{0}:\\; \\beta_{j}=0, \\text{other regression coefficients are unrestricted} \\tag{8.20} \\end{equation}\\] against the alternative hypothesis \\[H_{a}:\\; \\beta_{j}\\ne0, \\text{other regression coefficients are unrestricted}.\\] Here the statement about “unrestricted” other parameters implies that neither hypothesis makes any claims about the values of other coefficients than \\(\\beta_{j}\\), and these are allowed to have any values. The null hypothesis is a claim about the association between \\(X_{j}\\) and \\(Y\\) when the other explanatory variables are already included in the model. In other words, ((8.19)) tests \\[\\begin{aligned} H_{0}:&amp; &amp; \\text{There is no partial association between } X_{j} \\text{ and } Y,\\\\ &amp;&amp; \\text{controlling for the other explanatory variables.}\\end{aligned}\\] The sampling distribution of ((8.19)) when the null hypothesis ((8.20)) holds is a \\(t\\) distribution with \\(n-(k+1)\\) degrees of freedom, where \\(k\\) is again the number of explanatory variables in the model. The test statistic and its \\(P\\)-value from the \\(t_{n-(k+1)}\\) distribution are shown in standard computer output, in a form similar to Table 8.3. It is important to note two things about test results for multiple regression models, such as those in Table 8.3. First, ((8.20)) implies that if the null hypothesis is not rejected, \\(X_{j}\\) is not associated with \\(Y\\), if the other explanatory variables are already included in the model. We would typically react to this by removing \\(X_{j}\\) from the model, while keeping the other variables in it. This is because of a general principle that models should usually be as simple (parsimonious) as possible, and not include variables which have no partial effect on the response variable. Second, the \\(k\\) tests and \\(P\\)-values actually refer to \\(k\\) different hypotheses of the form ((8.20)), one for each explanatory variable. This raises the question of what to do if, say, tests for two variables have large \\(P\\)-values, suggesting that either of them could be removed from the model. The appropriate reaction is to remove one of the variables (perhaps the one with the larger \\(P\\)-value) rather than both at once, and then see whether the other still remains nonsignificant (if so, it can then also be removed). This is part of the general area of model selection, principles and practice of which are mostly beyond the scope of this course; some further comments on it are given in Section 8.7. In the example shown in Table 8.3, the \\(P\\)-values are small for the tests for all of the coefficients. Each of the three explanatory variables thus has a significant effect on the response even after controlling for the other two, so none of the variables should be removed from the model. A confidence interval with confidence level \\(1-\\alpha\\) for any \\(\\beta_{j}\\) is given by \\[\\begin{equation} \\hat{\\beta}_{j} \\pm t_{\\alpha/2}^{(n-(k+1))} \\,\\hat{\\text{se}}(\\hat{\\beta}_{j}). \\tag{8.21} \\end{equation}\\] This is identical in form and interpretation to the interval ((8.14)) for simple regression (except that the degrees of freedom are now \\(df=n-(k+1)\\)), so no new issues arise. The confidence intervals for the coefficients in our example (where \\(df=n-4=107\\) and \\(t_{0.025}^{(107)}=1.98\\)) are shown in Table 8.3. 8.6 Including categorical explanatory variables 8.6.1 Dummy variables Our models for Infant mortality rate so far did not include some more basic characteristics of the countries than school enrolment, corruption and income inequality. In particular, it seems desirable to control for the wealth of a country, which is likely to be correlated with both a health outcome like infant mortality and the other measures of development used as explanatory variables. We will do this by adding to the model the income level of the country, classified in the Global Civil Society Yearbook into three groups as Low, Middle or High income. Here one reason for considering income as a categorical variable is obviously to obtain an illustrative example for this section. However, using a variable like income in a grouped form is also more generally common practice. It also has the advantage that it is one way of dealing with cases where the effects on \\(Y\\) of the corresponding continuous explanatory variable may be nonlinear. Summary statistics in Table 8.2 show that income group is associated with both IMR and the explanatory variables considered so far: countries with higher income tend to have lower levels of infant mortality, and higher school enrolment, less corruption and less income inequality than lower-income countries. It thus seems that controlling for income is potentially necessary, and may change the conclusions from the model. Trying to add income level to the model confronts us with a new problem: how can a categorical explanatory variable like this be used in linear regression? This question is not limited to the current example, but is unavoidable in the social sciences. Even just the standard background variables such as sex, marital status, education level, party preference, employment status and region of residence considered in most individual-level studies are mostly categorical. Similarly, most survey data on attitudes and behaviour are collected in a categorical form, and even variables such as age or income which are originally continuous are often used in a grouped form. Categorical variables are thus ubiquitous in the social sciences, and it is essential to be able to use them also in regression models. How this is done is explained in this section, again illustrated with the infant mortality example. Section 8.6.2 then describes a different example for further illustration of the techniques. The key to handling categorical explanatory variables is the use of dummy variables. A dummy variable (or indicator variable) is a variable with only two values, 0 and 1. Its value is 1 if a unit is in a particular category of a categorical variable, and 0 if it is not in that category. For example, we can define for each country the variable \\[D_{m}=\\begin{cases} 1 &amp; \\text{if Income level is ``Middle&#39;&#39;} \\\\ 0 &amp; \\text{otherwise.} \\end{cases}\\] This would typically be referred to as something like “dummy for middle income level”. Note that the label \\(D_{m}\\) used here has no special significance, and was chosen simply to make it easy to remember. Dummy variables will be treated below as regular explanatory variables, and we could denote them as \\(X\\)s just like all the others. A dummy for high income level is defined similarly as \\[D_{h}=\\begin{cases} 1 &amp; \\text{if Income level is ``High&#39;&#39;} \\\\ 0 &amp; \\text{otherwise.} \\end{cases}\\] The two variables \\(D_{m}\\) and \\(D_{h}\\) are enough to identify the income level of any country. If a country is in the middle-income group, the two dummies have the values \\(D_{m}=1\\) and \\(D_{h}=0\\) (as no country can be in two groups), and if it has high income, the dummies are \\(D_{m}=0\\) and \\(D_{h}=1\\). For low-income countries, both \\(D_{m}=0\\) and \\(D_{h}=0\\). There is thus no need to define a dummy for low income, because this category is identified by the two other dummies being both zero. The same is true in general: if a categorical variable has \\(K\\) categories, only \\(K-1\\) dummy variables are needed to identify the category of every unit. Note, in particular, that dichotomous variables with only two categories (\\(K=2\\)) are fully identified by just one dummy variable. The category which is not given a dummy of its own is known as the reference category or baseline category. Any category can be the baseline, and this is usually chosen in a way to make interpretation (discussed below) convenient. The results of the model will be the same, whichever baseline is used. Categorical variables are used as explanatory variables in regression models by including the dummy variables for them in the model. The results for this in our example are shown in Table 8.4. This requires no changes in the definition or estimation of the model, and the parameter estimates, standard errors and quantities for statistical inference are obtained exactly as before even when some of the explanatory variables are dummy variables. The only aspect which requires some further explanation is the interpretation of the coefficients of the dummy variables. Table 8.4: Response variable: Infant Mortality Rate (%). Results for a linear regression model for Infant mortality rate in the Global Civil Society data, given the three explanatory variables in Table 8.3 and Income level in three groups. \\(\\hat{\\sigma}=2.01\\); \\(R^{2}=0.753\\); \\(n=111\\); \\(df=105\\). Explanatory variable Coefficient Std.  error \\(t\\) \\(P\\)-value 95 % Conf.  interval Constant 12.00 School enrolment (%) \\(-0.091\\) 0.016 \\(-5.69\\) \\(&lt;0.001\\) \\((-0.123; -0.059)\\) Control of corruption \\(-0.020\\) 0.011 \\(-1.75\\) \\(0.083\\) \\((-0.043; 0.003)\\) Income inequality 0.080 0.021 3.75 \\(&lt;0.001\\) (0.038; 0.122) Income level (Reference group: Low)   Middle \\(-3.210\\) 0.631 \\(-5.09\\) \\(&lt;0.001\\) \\((-4.461; -1.958)\\)   High \\(-3.296\\) 1.039 \\(-3.17\\) 0.002 \\((-5.357; -1.235)\\) Recall that the regression coefficient of a continuous explanatory variable \\(X\\) is the expected change in the response variable when \\(X\\) is increased by one unit, holding all other explanatory variables constant. Exactly the same interpretation works for dummy variables, except that it is limited to the only one-unit increase possible for them, i.e. from 0 to 1. For example, consider two (hypothetical) countries with values 0 and 1 for the dummy \\(D_{m}\\) for middle income, but with the same values for the three continuous explanatory variables. How about the other dummy variable \\(D_{h}\\), for high income? The interpretation requires that this too is held constant in the comparison. If this constant value was 1, it would not be possible for \\(D_{m}\\) to be 1 because every country is in only one income group. Thus the only value at which \\(D_{h}\\) can be held constant while \\(D_{m}\\) is changed is 0, so that the comparison will be between a country with \\(D_{m}=1, \\, D_{h}=0\\) and one with \\(D_{m}=0,\\, D_{h}=0\\), both with the same values of the other explanatory variables. In other words, the interpretation of the coefficient of \\(D_{m}\\) refers to a comparison in expected value of \\(Y\\) between a middle-income country and a country in the baseline category of low income, controlling for the other explanatory variables. The same applies to the coefficient of \\(D_{h}\\), and of dummy variables in general: The coefficient of a dummy variable for a particular level of a categorical explanatory variable is interpreted as the difference in the expected value of the response variable \\(Y\\) between a unit with that level of the categorical variable and a unit in the baseline category, holding all other explanatory variables constant. Here the estimated coefficient of \\(D_{m}\\) is \\(-3.21\\). In other words, comparing a middle-income country and a low-income country, both with the same levels of School enrolment, Control of corruption and Income inequality, the expected IMR is 3.21 percentage points lower in the middle-income country than in the low-income one. Similarly, a high-income country has an expected IMR 3.296 percentage points lower than a low-income one, other things being equal. The expected difference between the two non-reference levels is obtained as the difference of their coefficients (or by making one of them the reference level, as discussed below); here \\(-3.296-(-3.210)=-0.086\\), so a high-income country has an expected IMR 0.086 percentage points lower than a middle-income one, controlling for the other explanatory variables. Predicted values are again obtained by substituting values for the explanatory variables, including appropriate zeros and ones for the dummy variables, into the estimated regression equation. For example, the predicted IMR for a country with School enrolment of 86 %, Control of corruption score of 50 and Income inequality of 40 is \\[\\begin{aligned} \\hat{Y}&amp;=&amp;12.0-0.091\\times 86-0.020\\times 50+0.080\\times 40-3.210\\times 0-3.296\\times 0\\\\ &amp;=&amp; 6.37 \\text{for a low-income country, and }\\\\ \\hat{Y}&amp;=&amp;12.0-0.091\\times 86-0.020\\times 50+0.080\\times 40-3.210\\times 1-3.296\\times 0\\\\ &amp;=&amp; 6.37-3.21=3.16 \\text{for a middle-income country,}\\end{aligned}\\] with a difference of 3.21, as expected. Note how the constant term 12.0 sets the level for the baseline (low-income) group, and the coefficient \\(-3.21\\) shows the change from that level when considering a middle-income country instead. Note also that we should again avoid unrealistic combinations of the variables in such predictions. For example, the above values would not be appropriate for high-income countries, because there are no such countries in these data with Control of corruption as low as 50. The choice of the reference category does not affect the fitted model, and exactly the same results are obtained with any choice. For example, if high income is used as the reference category instead, the coefficients of the three continuous variables are unchanged from Table 8.4, and the coefficients of the dummy variables for low and middle incomes are 3.296 and 0.086 respectively. The conclusions from these are the same as above: controlling for the other explanatory variables, the difference in expected IMR is 3.296 between low and high-income, 0.086 between middle and high-income and \\(3.296-0.086=3.210\\) between low and middle-income countries. Because the choice is arbitrary, the baseline level should be selected in whichever way is most convenient for stating the interpretation. If the categorical variable is ordinal (as it is here), it makes most sense for the baseline to be the first or last category. In other ways the dummy-variable treatment makes no distinction between nominal and ordinal categorical variables. Both are treated effectively as nominal in fitting the model, and information on any ordering of the categories is ignored. Significance tests and confidence intervals are obtained for coefficients of dummy variables exactly as for any regression coefficients. Since the coefficient is in this case interpreted as an expected difference between a level of a categorical variable and the reference level, the null hypothesis of a zero coefficient is the hypothesis that there is no such difference. For example, Table 8.4 shows that the coefficients of both the middle income and high income dummies are clearly significantly different from zero. This shows that, controlling for the other explanatory variables, expected infant mortality for both middle and high-income countries is different from that in low-income countries. The 95% confidence intervals in Table 8.4 are intervals for this difference. On the other hand, the coefficients of the two higher groups are very similar, which suggests that they may not be different from each other. This can be confirmed by fitting the same model with high income as the reference level, including dummies for low and middle groups. In this model (not shown here), the coefficient of the middle income dummy corresponds to the difference of the Middle and High groups. Its \\(P\\)-value is 0.907 and 95% confidence interval \\((-1.37; 1.54)\\), so the difference is clearly not significant. This suggests that we could simplify the model further by combining the two higher groups and considering only two income groups, low vs. middle/high. In cases like this where a categorical explanatory variable has more than two categories, \\(t\\)-tests of individual coefficients are tests of hypotheses about no differences between individual categories, not the hypothesis that the variable has no effect overall. This is the hypothesis that the coefficients of the dummies for all of the categories are zero. This requires a slightly different test, which will not be considered here. In our example the low income category is so obviously different from the other two that it is clear that the hypothesis of no overall income effect would be rejected. The main reason for including income group in the example was not to study income effects themselves (it is after all not all that surprising that infant mortality is highest in poor countries), but to control for them when examining partial associations between IMR and the other explanatory variables. These describe the estimated effects of these continuous variables when comparing countries with similar income levels. Comparing the results in Tables 8.3 and 8.4, it can be seen that the effect of School enrolment remains significant and negative (with higher enrolment associated with lower mortality), although its magnitude decreases somewhat after controlling for income group. Some but not all of its estimated effect in the first model is thus explained away by the fact that income is associated with both primary school enrolment and infant mortality, with richer countries having both higher enrolment and lower mortality. The effect of Income inequality also remains significant in the larger model, even with a slightly increased coefficient. Countries with larger income inequality tend to have higher levels of infant mortality, even when we compare countries with similar levels of income. The effect of Control of corruption, on the other hand, is no longer significant in Table 8.4. This variable is strongly associated with income (as seen in Table 8.2), with the more corrupt countries typically being poor. Controlling for income, however, level of corruption appears to have little further effect on infant mortality. This also suggests that we might simplify the model by omitting the corruption variable. One final remark on dummy variables establishes a connection to the techniques discussed in Chapter 7. There we described statistical inference for comparisons of the population means of a continuous response variable \\(Y\\) between two groups, denoted 1 and 2. Suppose now that we fit a simple linear regression model for \\(Y\\), with a dummy variable for group 2 as the only explanatory variable. This gives exactly the same results as the two-sample \\(t\\)-tests and confidence intervals (under the assumption of equal variances in the groups) in Section 7.3. Related to the notation of that section, the coefficients from the model are \\(\\hat{\\alpha}=\\bar{Y}_{1}\\), \\(\\hat{\\beta}=\\bar{Y}_{2}-\\bar{Y}_{1}\\), and \\(\\hat{\\sigma}\\) from ((8.7)) is equal to (see equation 11 in Section 7.3.2). Similarly, the standard error ((8.12)) is the same as \\(\\hat{\\sigma}_{\\hat{\\Delta}}\\) in the standard error equation in Section 7.3.2, and the test statistic ((8.13)) and confidence interval ((8.14)) are identical with the t-test statistic in Section 7.3.2 and the \\(t\\) distribution version of the equation in Section 7.3.3 respectively. The connection between linear regression and the two-sample \\(t\\)-test is an illustration of how statistical methods are not a series of separate tricks for different situations, but a set of connected procedures unified by common principles. Whenever possible, methods for more complex problems have been created by extending those for simpler ones, and simple analyses are in turn special cases of more general ones. Although these connections are unfortunately often somewhat obscured by changes in language and notation, trying to understand them is very useful for effective learning of statistics. 8.6.2 A second example Because the analysis of the models for infant mortality was presented piecemeal to accompany the introduction of different elements of linear regression, an overall picture of that example may have been difficult to discern. This section describes a different analysis in a more concise manner. It is particularly an illustration of the use of dummy variables, as most of the explanatory variables are categorical. The example concerns the relationship between minimum wage and employment, and uses data originally collected and analysed by David Card and Alan Krueger.53 Most of the choices of analyses and variables considered here are based on those of Card and Krueger. Their article should be consulted for discussion and more detailed analyses. A minimum wage of $5.05 per hour came into effect in the U.S. state of New Jersey on April 1 1992. This represented an increase from the previous, federally mandated minimum wage of $4.25 per hour. Conventional economic theory predicts that employers will react to such an increase by cutting their work force. Here the research hypothesis is thus that employment will be reduced among businesses affected by the new minimum wage. This can be addressed using suitable data, examining a statistical hypothesis of no association between measures of minimum wage increase and change of employment, controlling for other relevant explanatory variables. Card and Krueger collected data for 410 fast food restaurants at two times, about one month before and eight months after the mininum wage increase came into effect in New Jersey. Only the 368 restaurants with known values of all the variables used in the analyses are included here. Of them, 268 were in New Jersey and had starting wages below $5.05 at the time of the first interview, so that these had to be increased to meet the new minimum wage. The remaining 100 restaurants provide a control group which was not affected by the change: 75 of them were in neighbouring eastern Pennsylvania where the minimum wage remained at $4.25, and 25 were in New Jersey but had starting wages of at least $5.05 even before the increase. The theoretical prediction is that the control group should experience a smaller negative employment change than the restaurants affected by the wage increase, i.e. employment in the control restaurants should not decrease or at least decrease less than in the affected restaurants. Card and Krueger argue that fast food restaurants provide a good population for examining the research question, because they employ many low-wage workers, generally comply with minimum wage legislation, do not receive tips which would complicate wage calculations, and are relatively easy to sample and interview. The response variable considered here is the change between the two interviews in full-time equivalent employment at the restaurant, defined as the number of full-time workers (including managers) plus half the number of part-time workers. This will be called “Employment change” below. We consider two variables which indicate how the restaurant was affected by the minimum wage increase. The first is simply a dummy variable which is 1 for those New Jersey restaurants where wages needed to be raised because of the increase, and 0 for the other restaurants. These will be referred to as “Affected” and “Unaffected” restaurants respectively. The second variable is also 0 for the unaffected restaurants; for the affected ones, it is the proportion by which their previous starting wage had to be increased to meet the new minimum wage. For example, if the previous starting wage was the old minimum of $4.25, this “Wage gap” is \\((5.05-4.25)/4.25=0.188\\). Finally, we will also use information on the chain the restaurant belongs to (Burger King, Roy Rogers, Wendy’s or KFC) and whether it is owned by the parent company or the franchisee. These will be included in the analyses as partial control for other factors affecting the labour market, which might have had a differential impact on different types of restaurants over the study period. Summary statistics for the variables are shown in Table 8.5. Minimum- wage variable Response variable: Group % \\((n)\\) Affected % (\\(n\\)) Wage gap (mean for affected restaurants) Employment change (mean) Overall 100 (368) 72.8 (268) 0.115 \\(-0.30\\) Ownership  Franchisee 64.7 (238) 71.8 (171) 0.122 \\(-0.17\\)  Company 35.3 (130) 74.6 (97) 0.103 \\(-0.52\\) Chain  Burger King 41.0 (151) 73.5 (111) 0.129 \\(+0.02\\)  Roy Rogers 24.7 (91) 72.5 (66) 0.104 \\(-1.89\\)  Wendy’s 13.0 (48) 60.4 (29) 0.086 \\(-0.28\\)  KFC 21.2 (78) 79.5 (62) 0.117 \\(+0.94\\) :(#tab:t-fastfood-descr)Summary statistics for the variables considered in the minimum wage example of Section 8.6.2. Mean employment change: Among unaffected restaurants: \\(-2.93\\); Among affected restaurants: \\(+0.68\\).   Model (1)   Model (2) \\ Coefficient (\\(t\\)) Coefficient (\\(t\\)) Variable (std error) \\(P\\)-value (std error) \\(P\\)-value Constant -2.63 -1.54 Affected by the increase 3.56 (3.50) — — (1.02) 0.001 — — Wage gap — — 15.88 (2.63) — — (6.04) 0.009 Ownership (vs. Franchisee)   Company 0.22 (0.20) 0.43 (0.40) (1.06) 0.84 (1.07) 0.69 Chain (vs. Burger King)   Roy Rogers -2.00 (-1.56) -1.84 (-1.43) (1.28) 0.12 (1.29) 0.15   Wendy’s 0.15 (0.11) 0.36 (0.24) (1.44) 0.92 (1.46) 0.81   KFC 0.64 (0.51) 0.81 (0.64) (1.25) 0.61 (1.26) 0.52 \\(R^{2}\\) 0.046 0.032 ———————————————————————————— : (#tab:t-fastfood-models)Response variable: Change in Full-time equivalent employment. Two fitted models for Employment change given exposure to minimum wage increase and control variables. See the text for further details. Table 8.6 shows results for two linear regression models for Employment change, one using the dummy for affected restaurants and one using Wage gap as an explanatory variable. Both include the same dummy variables for ownership and chain of the restaurant. Consider first the model in column (1), which includes the dummy variable for affected restaurants. The estimated coefficient of this is 3.56, which is statistically significant (with \\(t=3.50\\) and \\(P=0.001\\)). This means that the estimated expected Employment change for the restaurants affected by the minimum wage increase was 3.56 full-time equivalent employees larger (in the positive direction) than for unaffected restaurants, controlling for the chain and type of ownership of the restaurant. This is the opposite of the theoretical prediction that the difference would be negative, due to the minimum wage increase leading to reductions of work force among affected restaurants but little change for the unaffected ones. In fact, the summary statistics in Table 8.5 show (although without controlling for chain and ownership) that average employment actually increased in absolute terms among the affected restaurants, but decreased among the unaffected ones. The coefficients of the control variables in Table 8.6 describe estimated differences between company-owned and franchisee-owned restaurants, and between Burger Kings and restaurants of other chains, controlling for the other variables in the model. All of these coefficients have high \\(P\\)-values for both models, suggesting that the differences are small. In fact, the only one which is borderline significant, after all the other control dummies are successively removed from the model (not shown here), is that Employment change seems to have been more negative for Roy Rogers restaurants than for the rest. This side issue is not investigated in detail here. In any case, the control variables have little influence on the effect of the variable of main interest: if all the control dummies are removed from Model (1), the coefficient of the dummy variable for affected restaurants becomes 3.61 with a standard error of 1.01, little changed from the estimates in Table 8.6. This is not entirely surprising, as the control variables are weakly associated with the variable of interest: as seen in Table 8.5, the proportions of affected restaurants are mostly fairly similar among restaurants of all chains and types of ownership. In their article, Card and Krueger carefully explore (and confirm) the robustness of their findings by considering a series of variants of the analysis, with different choices of variables and sets of observations. This is done to try to rule out the possibility that the main conclusions are reliant on, and possibly biased by, some specific features of the data and variables in the initial analysis, such as missing data or measurement error. Such sensitivity analyses would be desirable in most social science contexts, where single definitely best form of analysis is rarely obvious. Here we will carry out a modest version of such an assessment by considering the Wage gap variable as an alternative measure of the impact of minimum wage increase, instead of a dummy for affected restaurants. This is a continuous variable, but one whose values are 0 for all unaffected restaurants and vary only among the affected ones. The logic of using Wage gap as an explanatory variable here is that Employment change could be expected to depend not only on whether a restaurant had to increase its starting wage to meet the new minimum wage, but also on how large that compulsory increase was. The results for the second analysis are shown as Model (2) in Table 8.6. The results are qualitatively the same as for Model (1), in that the coefficients of the control dummies are not significant, and that of Wage gap (which is 15.88) is significant and positive. The estimated employment change is thus again larger for affected than for unaffected restaurants, and their difference now even increases when the wage rise required from an affected restaurant increases. To compare these results more directly to Model (1), we can consider a comparison between an unaffected restaurant (with Wage gap 0) and an affected one with Wage gap equal to its mean among the affected restaurants, which is 0.115 (c.f. Table 8.5). The estimated difference in Employment change between them, controlling for ownership and chain, is \\(0.115\\times 15.88=1.83\\), which is somewhat lower than the 3.56 estimated from model (1). This example is also a good illustration of the limitations of the \\(R^{2}\\) statistic. The \\(R^{2}\\) values of 0.046 and 0.032 are very small, so over 95% of the observed variation in employment changes remains unexplained by the variation in the three explanatory variables. In other words, there are large differences in Employment change experienced even by affected or unaffected restaurants of the same chain and type of ownership. This would make predicting the employment change for a particular restaurant a fairly hopeless task with these explanatory variables. However, prediction is not the point here. The research question focuses on possible differences in average changes in employment, and finding such differences is of interest even if variation around the averages is large. In summary, the analysis provides no support for the theoretical prediction that the restaurants affected by a minimum wage increase would experience a larger negative job change than control restaurants. In fact, there was a small but significant difference in the opposite direction in both models described here, and in all of the analyses considered by Card and Krueger. The authors propose a number of tentative explanations for this finding, but do not present any of them as definitive. 8.7 Other issues in linear regression modelling The material in this chapter provides a reasonably self-contained introduction to linear regression models. However, it is not possible for a course like this to cover comprehensively all aspects of the models, so some topics have been described rather superficially and several have been omitted altogether. In this section we briefly discuss some of them. First, three previously unexplained small items in standard SPSS output are described. Second, a list of further topics in linear regression is given. An example of SPSS output for linear regression models was given in Figure 8.7. Most parts of it have been explained above, but three have not been mentioned. These can be safely ignored, because each is of minor importance in most analyses. However, it is worth giving a brief explanation so as not to leave these three as mysteries: “Adjusted R Square” in the “Model Summary” table is a statistic defined as \\(R^{2}_{adj}=[(n-1)R^{2}-k]/(n-k-1)\\). This is most relevant in situations where the main purpose of the model is prediction of future observations of \\(Y\\). The population value of the \\(R^{2}\\) statistic is then a key criterion of model selection. \\(R^{2}_{adj}\\) is a better estimate of it than standard \\(R^{2}\\). Unlike \\(R^{2}\\), \\(R^{2}_{adj}\\) does not always increase when new explanatory variables are added to the model. As a sample statistic, \\(R^{2}_{adj}\\) does not have the same interpretation as the proportion of variation of \\(Y\\) explained as standard \\(R^{2}\\). The last two columns of the “ANOVA” (Analysis of Variance) table show the test statistic and \\(P\\)-value for the so-called \\(F\\)-test.54 The null hypothesis for this is that the regression coefficients of all the explanatory variables are zero, i.e. \\(\\beta_{1}=\\beta_{2}=\\dots=\\beta_{k}=0\\). In the case of simple regression (\\(k=1\\)), this is equivalent to the \\(t\\)-test for \\(\\beta=0\\). In multiple regression, it implies that none of the explanatory variables have an effect on the response variable. In practice, this is rejected in most applications. Rejecting the hypothesis implies that at least one of the explanatory variables is associated with the response, but the test provides no help for identifying which of the individual partial effects are significant. The \\(F\\)-test is thus usually largely irrelevant. More useful is an extended version of it (which is not included in the default output), which is used for hypotheses that a set of several regression coefficients (but not all of them) is zero. For example, this could be used in the example of Table 8.4 to test if income level had no effect on IMR, i.e. if the coefficients of the dummies for both middle and high income were zero. The “Standardized Coefficients/Beta” in the “Coefficients” table are defined as \\((s_{xj}/s_{y})\\hat{\\beta}_{j}\\), where \\(\\hat{\\beta}_{j}\\) is the estimated coefficient of \\(X_{j}\\), and \\(s_{xj}\\) and \\(s_{y}\\) are sample standard deviations of \\(X_{j}\\) and \\(Y\\) respectively. This is equal to the correlation of \\(Y\\) and \\(X_{j}\\) when \\(X_{j}\\) is the only explanatory variable in the model, but not otherwise. The standardized coefficient describes the expected change in \\(Y\\) in units of its sample standard error, when \\(X_{j}\\) is increased by one standard error \\(s_{xj}\\), holding other explanatory variables constant. The aim of this exercise is to obtain coefficients which are more directly comparable between different explanatory variables. Ultimately it refers to the question of relative importance of the explanatory variables, i.e. “Which of the explanatory variables in the model is the most important?” This is understandably of interest in many cases, often perhaps more so than any other aspect of the model. Unfortunately, however, relative importance is also one of the hardest questions in modelling, and one without a purely statistical solution. Despite their appealing title, standardized coefficients have problems of their own and do not provide a simple tool for judging relative importance. For example, their values depend not only on the strength of association described by \\(\\hat{\\beta}_{j}\\) but also on the standard deviation \\(s_{xj}\\), which can be different in different samples. Sensible comparisons of the magnitudes of expected changes in \\(Y\\) in response to changes in individual explanatory variables can usually be presented even without reference to standardized coefficients, simply by using the usual coefficients \\(\\hat{\\beta}_{j}\\) and carefully considering the effects of suitably chosen increments of the explanatory variables. In general, it is also worth bearing in mind that questions of relative importance are often conceptually troublesome, for example between explanatory variables with very different practical implications. For instance, suppose that we have fitted a model for a measure of the health status of a person, given the amount of physical exercise the person takes (which can be changed by him/herself), investment in preventive healthcare in the area where the person lives (which can be changed, but with more effort and not by the individual) and the person’s age (which cannot be manipulated at all). The values of the unstandardized or standardized coefficients of these explanatory variables can certainly be compared, but it is not clear what statements about the relative sizes of the effects of “increasing” them would really mean. A further course on linear regression (e.g. first half of MY452) will typically examine the topics covered on this course in more detail, and then go on to discuss further issues. Here we will give just a list of some such topics, in no particular order: Model diagnostics to examine whether a particular model appears to be adequate for the data. The residuals \\(Y_{i}-\\hat{Y}_{i}\\) are a key tool in this, and the most important graphical diagnostic is simply a scatterplot of the residuals against the fitted values \\(\\hat{Y}_{i}\\). One task of diagnostics is to identify individual outliers and influential observations which have a substantial impact on the fitted model. Modelling nonlinear effects of the explanatory variables. This is mostly done simply by including transformed values like squares \\(X^{2}\\) or logarithms \\(\\log(X)\\) as explanatory variables in the model. It is sometimes also useful to transform the response variable, e.g. using \\(\\log(Y)\\) as the response instead of \\(Y\\). Including interactions between explanatory variables in the model. This is achieved simply by including products of them as explanatory variables. Identifying and dealing with problems caused by extremely high correlations between the explanatory variables, known as problems of multicollinearity. Model selection to identify the best sets of explanatory variables to be used in the model. This may employ both significance tests and other approaches. Analysis of Variance (ANOVA) and Analysis of Covariance (ANCOVA), which are terms used for models involving only or mostly categorical explanatory variables, particularly in the context of randomized experiments. Many of these models can be fitted as standard linear regression models with appropriate use of dummy variables, but the conventional terminology and notation for ANOVA and ANCOVA are somewhat different from the ones used here. Anheier, H., Glasius, M. and Kaldor, M. (eds.) (2005). Global Civil Society 2004/5. London: Sage. The book gives detailed references to the indices considered here. Many thanks to Sally Stares for providing the data in an electronic form.↩ Accessible at data.giss.nasa.gov/gistemp/. The temperatures used here are those listed in the data base under “after combining sources at same location”.↩ More specifically, the differences are between 11-year moving averages, where each year is represented by the average of the temperature for that year and the five years before and five after it (except at the ends of the series, where fewer observations are used). This is done to smooth out short-term fluctuations from the data, so that longer-term trends become more clearly visible.↩ This discussion is obviously rather approximate. Strictly speaking, the conditional distribution of \\(Y\\) given, say, \\(X=65\\) refers only to units with \\(X\\) exactly rather than approximately equal to 65. This, however, is difficult to illustrate using a sample, because most values of a continuous \\(X\\) appear at most once in a sample. For reasons discussed later in this chapter, the present approximate treatment still provides a reasonable general idea of the nature of the kinds of associations considered here.↩ This wording is commonly used for convenience even in cases where the nature of \\(X\\) is such that its values can never actually be manipulated.↩ In this particular example, a more closely linear association is obtained by considering the logarithm of GDP as the response variable instead of GDP itself. This approach, which is common in dealing with skewed variables such as income, is, however, beyond the scope of this course.↩ Galton, F. (1888). “Co-relations and their measurement, chiefly from anthropometric data”. Proceedings of the Royal Society of London, 45, 135–145.↩ This is slightly misleading: what actually matters in general is that the conditional mean is a linear function of the parameters \\(\\alpha\\) and \\(\\beta\\). This need not concern us at this stage.↩ Galton, F. (1886). “Regression towards mediocrity in hereditary stature”. Journal of the Anthropological Institute, 15, 246–263. The original context is essentially the one discussed on courses on research design as “regression toward the mean”.↩ This exact phrase apparently first appears in Box, G.E.P. (1979). Robustness in the strategy of scientific model building. In Launer, R.L. and Wilkinson, G.N., Robustness in Statistics, pp. 201–236.↩ This is another old idea. Different approaches to the problem of fitting curves to observations were gradually developed by Tobias Mayer, Rudjer Bošković and Pierre Simon Laplace from the 1750s onwards, and the method of least squares itself was presented by Adrien Marie Legendre in 1805.↩ It would have been more consistent with related notation used in Chapter 7 to denote it something like \\(\\hat{\\sigma}_{\\hat{\\beta}}\\), but this would later become somewhat cumbersome.↩ Here adapted from a discussion in Agresti and Finlay, Statistical Methods for the Social Sciences (1997).↩ At least until we adopt extended, so-called matrix notation. In this, the least squares estimates are expressible simply as \\(\\hat{\\boldsymbol{\\beta}}= (\\mathbf{X}&#39;\\mathbf{X})^{-1}(\\mathbf{X}&#39;\\mathbf{Y})\\).↩ Card, D. and Krueger, A. B. (1994). Minimum wages and employment: A case study of the fast-food industry in New Jersey and Pennsylvania. The American Economic Review 84, 772–793.↩ The sampling distribution of this test is the \\(F\\) distribution. The letter in both refers to Sir Ronald Fisher, the founder of modern statistics.↩ "],["c-3waytables.html", "Chapter 9 Analysis of 3-way contingency tables", " Chapter 9 Analysis of 3-way contingency tables In Section 2.4 and Chapter 4 we discussed the analysis of two-way contingency tables (crosstabulations) for examining the associations between two categorical variables. In this section we extend this by introducing the basic ideas of multiway contingency tables which include more than two categorical variables. We focus solely on the simplest instance of them, a three-way table of three variables. This topic is thematically related also to some of Chapter 8, in that a multiway contingency table can be seen as a way of implementing for categorical variables the ideas of statistical control that were also a feature of the multiple linear regression model of Section 8.5. Here, however, we will not consider formal regression models for categorical variables (these are mentioned only briefly at the end of the chapter). Instead, we give examples of analyses which simply apply familiar methods for two-way tables repeatedly for tables of two variables at fixed values of a third variable. The discussion is organised arond three examples. In each case we start with a two-way table, and then introduce a third variable which we want to control for. This reveals various features in the examples, to illustrate the types of findings that may be uncovered by statistical control. Example 9.1: Berkeley admissions Table 9.1 summarises data on applications for admission to graduate study at the University of California, Berkeley, for the fall quarter 1973.55 The data are for five of the six departments with the largest number of applications, labelled below Departments 2–5 (Department 1 will be discussed at the end of this section). Table 9.1 shows the two-way contingency table of the sex of the applicant and whether he or she was admitted to the university. Table 9.1: Table of sex of applicant vs. admission in the Berkeley admissions data. The column labelled ‘% Yes’ is the percentage of applicants admitted within each row. \\(\\chi^{2}=38.4\\), \\(df=1\\), \\(P&lt;0.001\\). Sex Admitted No Admitted Yes % Yes Total Male 1180 686 36.8 1866 Female 1259 468 27.1 1727 Total 2439 1154 32.1 3593 The percentages in Table 9.1 show that men were more likely to be admitted, with a 36.8% success rate compared to 27.1% for women. The difference is strongly significant, with \\(P&lt;0.001\\) for the \\(\\chi^{2}\\) test of independence. If this association was interpreted causally, it might be regarded as evidence of sex bias in the admissions process. However, other important variables may also need to be considered in the analysis. One of them is the academic department to which an applicant had applied. Information on the department as well as sex and admission is shown in Table 9.2. Table 9.2: Sex of applicant vs. admission by academic department in the Berkeley admissions data. Department Sex Admitted No Admitted Yes % Yes Total 2 Male 207 353 63.0 560 Female 8 17 68.0 25 Total 215 370 63.2 585 \\(\\chi^{2}=0.25\\), \\(P=0.61\\) 3 Male 205 120 36.9 325 Female 391 202 34.1 593 Total 596 322 35.1 918 \\(\\chi^{2}=0.75\\), \\(P=0.39\\) 4 Male 279 138 33.1 417 Female 244 131 34.9 375 Total 523 269 34.0 792 \\(\\chi^{2}=0.30\\), \\(P=0.59\\) 5 Male 138 53 27.7 191 Female 299 94 23.9 393 Total 437 147 25.2 584 \\(\\chi^{2}=1.00\\), \\(P=0.32\\) 6 Male 351 22 5.9 373 Female 317 24 7.0 341 Total 668 46 6.4 714 \\(\\chi^{2}=0.38\\), \\(P=0.54\\) Total 2439 1154 32.1 3593 Table 9.2 is a three-way contingency table, because each of its internal cells shows the number of applicants with a particular combination of three variables: department, sex and admission status. For example, the frequency 207 in the top left corner indicates that there were 207 male applicants to department 2 who were not admitted. Table 9.2 is presented in the form of a series of tables of sex vs. admission, just like in the original two-way table 9.1, but now with one table for each department. These are known as partial tables of sex vs. admission, controlling for department. The word “control” is used here in the same sense as before: each partial table summarises the data for the applicants to a single department, so the variable “department” is literally held constant within the partial tables. Table 9.2 also contains the marginal distributions of sex and admission status within each department. They can be used to construct the other two possible two-way tables for these variables, for department vs. sex of applicant and department vs. admission status. This information, summarised in Table 9.3, is discussed further below. The association between sex and admission within each partial table can be examined using methods for two-way tables. For every one of them, the \\(\\chi^{2}\\) test shows that the hypothesis of independence cannot be rejected, so there is no evidence of sex bias within any department. The apparent association in Table 9.1 is thus spurious, and disappears when we control for department. Why this happens can be understood by considering the distributions of sex and admissions across departments, as shown in Table 9.3. Department is clearly associated with sex of the applicant: for example, almost all of the applicants to department 2, but only a third of the applicants to department 5 are men. Similarly, there is an association between department and admission: for example, nearly two thirds of the applicants to department 2 but only a quarter of the applicants to department 5 were admitted. It is the combination of these associations which induces the spurious association between sex and admission in Table 9.1. In essence, women had a lower admission rate overall because relatively more of them applied to the more selective departments and fewer to the easy ones. Table 9.3: Percentages of male applicants and applicants admitted by department in the Berkeley admissions data. Of all applicants Department 2 3 4 5 6 % Male 96 35 53 33 52 % Admitted 63 35 34 25 6 Number of applicants 585 918 792 584 714 One possible set of causal connections leading to a spurious association between \\(X\\) and \\(Y\\) was represented graphically by Figure 8.10. There are, however, other possibilities which may be more appropriate in particular cases. In the admissions example, department (corresponding to the control variable \\(Z\\)) cannot be regarded as the cause of the sex of the applicant. Instead, we may consider the causal chain Sex \\(\\longrightarrow\\) Department \\(\\longrightarrow\\) Admission. Here department is an intervening variable between sex and admission rather than a common cause of them. We can still argue that sex has an effect on admission, but it is an indirect effect operating through the effect of sex on choice of department. The distinction is important for the original research question behind these data, that of possible sex bias in admissions. A direct effect of sex on likelihood on admission might be evidence of such bias, because it might indicate that departments are treating male and female candidates differently. An indirect effect of the kind found here does not suggest bias, because it results from the applicants’ own choices of which department to apply to. In the admissions example a strong association in the initial two-way table was “explained away” when we controlled for a third variable. The next example is one where controlling leaves the initial association unchanged. Example 9.2: Importance of short-term gains for investors (continued) Table 2.7 showed a relatively strong association between a person’s age group and his or her attitude towards short-term gains as an investment goal. This association is also strongly significant, with \\(P&lt;0.001\\) for the \\(\\chi^{2}\\) test of independence. Table 9.4 shows the crosstabulations of these variables, now controlling also for the respondent’s sex. The association is now still significant in both partial tables. An investigation of the row proportions suggests that the pattern of association is very similar in both tables, as is its strength as measured by the \\(\\gamma\\) statistic (\\(\\gamma=-0.376\\) among men and \\(\\gamma=-0.395\\) among women). The conclusions obtained from the original two-way table are thus unchanged after controlling for sex. Table 9.4: Frequencies of respondents by age group and attitude towards short-term gains in Example 9.2, controlling for sex of respondent. The numbers below the frequencies are proportions within rows. \\(\\chi^{2}=82.4\\), \\(df=9\\), \\(P&lt;0.001\\). \\(\\gamma=-0.376\\). MEN Age group Irrelevant Slightly important Important Very important Total Under 45 29 35 30 22 116 0.250 0.302 0.259 0.190 1.000 45–54 83 60 52 29 224 0.371 0.268 0.232 0.129 1.000 55–64 116 40 28 16 200 0.580 0.200 0.140 0.080 1.000 65 and over 150 53 16 12 231 0.649 0.229 0.069 0.052 1.000 Total 378 188 126 79 771 0.490 0.244 0.163 0.102 1.000 Table 9.4: Frequencies of respondents by age group and attitude towards short-term gains in Example 9.2, controlling for sex of respondent. The numbers below the frequencies are proportions within rows. \\(\\chi^{2}=27.6\\), \\(df=9\\), \\(P=0.001\\). \\(\\gamma=-0.395\\). WOMEN Age group Irrelevant Slightly important Important Very important Total Under 45 8 10 8 4 30 0.267 0.333 0.267 0.133 1.000 45–54 28 17 5 8 58 0.483 0.293 0.086 0.138 1.000 55–64 37 9 3 4 53 0.698 0.170 0.057 0.075 1.000 65 and over 43 11 3 3 60 0.717 0.183 0.050 0.050 1.000 Total 116 47 19 19 201 0.577 0.234 0.095 0.095 1.000 Example 9.3: The Titanic The passenger liner RMS Titanic hit an iceberg and sank in the North Atlantic on 14 April 1912, with heavy loss of life. Table 9.5 shows a crosstabulation of the people on board the Titanic, classified according to their status (as male passenger, female or child passenger, or member of the ship’s crew) and whether they survived the sinking.56 The \\(\\chi^{2}\\) test of independence has \\(P&lt;0.001\\) for this table, so there are statistically significant differences in probabilities of survival between the groups. The table suggests, in particular, that women and children among the passengers were more likely to survive than male passengers or the ship’s crew. Table 9.5: Survival status of the people aboard the Titanic, divided into three groups. The numbers in brackets are proportions of survivors and non-survivors within each group. \\(\\chi^{2}=418\\), \\(\\text{df}=2\\), \\(P&lt;0.001\\). Group Survivor: Yes No Total Male passenger 146 659 805 (0.181) (0.819) (1.000) Female or child passenger 353 158 511 (0.691) (0.309) (1.000) Crew member 212 673 885 (0.240) (0.760) (1.000) Total 711 1490 2201 (0.323) (0.677) (1.000) We next control also for the class in which a person was travelling, classified as first, second or third class. Since class does not apply to the ship’s crew, this analysis is limited to the passengers, classified as men vs. women and children. The two-way table of sex by survival status for them is given by Table 9.5, ignoring the row for crew members. This association is strongly significant, with \\(\\chi^{2}=344\\) and \\(P&lt;0.001\\). Class Group Survivor: Yes No Total First Man 57 118 175 0.326 0.674 1.000 Woman or child 146 4 150 0.973 0.027 1.000 Total 203 122 325 0.625 0.375 1.000 Second Man 14 154 168 0.083 0.917 1.000 Woman or child 104 13 117 0.889 0.111 1.000 Total 118 167 285 0.414 0.586 1.000 Third Man 75 387 462 0.162 0.838 1.000 Woman or child 103 141 244 0.422 0.578 1.000 Total 178 528 706 0.252 0.748 1.000 Total 499 817 1316 0.379 0.621 1.000 :(#tab:t-titanic3)Survival status of the passengers of the Titanic, classified by class and sex. The numbers below the frequencies are proportions within rows. Two-way tables involving class (not shown here) suggest that it is mildly associated with sex (with percentages of men 54%, 59% and 65% in first, second and third class respectively) and strongly associated with survival (with 63%, 41% and 25% of the passengers surviving). It is thus possible that class might influence the association between sex and survival. This is investigated in Table 9.6, which shows the partial associations between sex and survival status, controlling for class. This association is strongly significant (with \\(P&lt;0.001\\) for the \\(\\chi^{2}\\) test) in every partial table, so it is clearly not explained away by associations involving class. The direction of the association is also the same in each table, with women and children more likely to survive than men among passengers of every class. The presence and direction of the association in the two-way Table 9.5 are thus preserved and similar in every partial table controlling for class. However, there appear to be differences in the strength of the association between the partial tables. Considering, for example, the ratios of the proportions in each class, women and children were about 3.0 (\\(=0.973/0.326\\)) times more likely to survive than men in first class, while the ratio was about 10.7 in second class and 2.6 in the third. The contrast of men vs. women and children was thus strongest among second-class passengers. This example differs in this respect from the previous ones, where the associations were similar in each partial table, either because they were all essentially zero (Example 9.1) or non-zero but similar in both direction and strength (Example 9.2). We are now considering three variables, class, sex and survival. Although it is not necessary for this analysis to divide them into explanatory and response variables, introducing such a distinction is useful for discussion of the results. Here it is most natural to treat survival as the response variable, and both class and sex as explanatory variables for survival. The associations in the partial tables in Table 9.6 are then partial associations between the response variable and one of the explanatory variables (sex), controlling for the other explanatory variable (class). As discussed above, the strength of this partial association is different for different values of class. This is an example of a statistical interaction. In general, there is an interaction between two explanatory variables if the strength and nature of the partial association of (either) one of them on a response variable depends on the value at which the other explanatory variable is controlled. Here there is an interaction between class and sex, because the association between sex and survival is different at different levels of class. Interactions are an important but challenging element of many statistical analyses. Important, because they often correspond to interesting and subtle features of associations in the data. Challenging, because understanding and interpreting them involves talking about (at least) three variables at once. This can seem rather complicated, at least initially. It adds to the difficulty that interactions can take many forms. In the Titanic example, for instance, the nature of the class-sex interaction was that the association between sex and survival was in the same direction but of different strengths at different levels of class. In other cases associations may disappear in some but not all of the partial tables, or remain strong but in different directions in different ones. They may even all or nearly all be in a different direction from the association in the original two-way table, as in the next example. Example 9.4: Smoking and mortality A health survey was carried out in Whickham near Newcastle upon Tyne in 1972–74, and a follow-up survey of the same respondents twenty years later.57 Here we consider only the \\(n=1314\\) female respondents who were classified by the first survey either as current smokers or as never having smoked. Table 9.7 shows the crossclassification of these women according to their smoking status in 1972–74 and whether they were still alive twenty years later. The \\(\\chi^{2}\\) test indicates a strongly significant association (with \\(P=0.003\\)), and the numbers suggest that a smaller proportion of smokers than of nonsmokers had died between the surveys. Should we thus conclude that smoking helps to keep you alive? Probably not, given that it is known beyond reasonable doubt that the causal relationship between smoking and mortality is in the opposite direction. Clearly the picture has been distorted by failure to control for some relevant further variables. One such variable is the age of the respondents. Table 9.7: Table of smoking status in 1972–74 vs. twenty-year survival among the respondents in Example 9.4. The numbers below the frequencies are proportions within rows. Smoker Dead Alive Total Yes 139 443 582 0.239 0.761 1.000 No 230 502 732 0.314 0.686 1.000 Total 369 945 1314 0.281 0.719 1.000 Table 9.8 shows the partial tables of smoking vs. survival controlling for age at the time of the first survey, classified into seven categories. Note first that this three-way table appears somewhat different from those shown in Tables 9.2, 9.4 and 9.6. This is because one variable, survival status, is summarised only by the percentage of survivors within each combination of age group and smoking status. This is a common trick to save space in three-way tables involving dichotomous variables like survival here. The full table can easily be constructed from these numbers if needed. For example, 98.4% of the nonsmokers aged 18–24 were alive at the time of the second survey. Since there were a total of 62 respondents in this group (as shown in the last column), this means that 61 of them (i.e. 98.4%) were alive and 1 (or 1.6%) was not. The percentages in Table 9.8 show that in five of the seven age groups the proportion of survivors is higher among nonsmokers than smokers, i.e. these partial associations in the sample are in the opposite direction from the association in Table 9.7. This reversal is known as Simpson’s paradox. The term is somewhat misleading, as the finding is not really paradoxical in any logical sense. Instead, it is again a consequence of a particular pattern of associations between the control variable and the other two variables. Table 9.8: Percentage of respondents in Example 9.4 surviving at the time of the second survey, by smoking status and age group in 1972–74. Age group % Alive after 20 years: Smokers Nonsmokers Number (in 1972–74): Smokers Nonsmokers 18–24 96.4 98.4 55 62 25–34 97.6 96.8 124 157 35–44 87.2 94.2 109 121 45–54 79.2 84.6 130 78 55–64 55.7 66.9 115 121 65–74 19.4 21.7 36 129 75– 0.0 0.0 12 64 All age groups 76.1 68.6 582 732 The two-way tables of age by survival and age by smoking are shown side by side in Table 9.9. The table is somewhat elaborate and unconventional, so it requires some explanation. The rows of the table correspond to the age groups, identified by the second column, and the frequencies of respondents in each age group are given in the last column. The left-hand column shows the percentages of survivors within each age group. The right-hand side of the table gives the two-way table of age group and smoking status. It contains percentages calculated both within the rows (without parentheses) and columns (in parentheses) of the table. As an example, consider numbers for the age group 18–24. There were 117 respondents in this age group at the time of the first survey. Of them, 47% were then smokers and 53% were nonsmokers, and 97% were still alive at the time of the second survey. Furthermore, 10% of all the 582 smokers, 9% of all the 732 nonsmokers and 9% of the 1314 respondents overall were in this age group. Table 9.9: Two-way contingency tables of age group vs. survival (on the left) and age group vs. smoking (on the right) in Example 6.4. The percentages in parentheses are column percentages (within smokers or nonsmokers) and the ones without parentheses are row percentages (within age groups). % Alive Age group Row and column % Smokers Nonsmokers Total % Count 97 18–24 47 53 100 117 (10) (9) (9) 97 25–34 44 56 100 281 (21) (21) (21) 91 35–44 47 53 100 230 (19) (17) (18) 81 45–54 63 38 100 208 (22) (11) (16) 61 55–64 49 51 100 236 (20) (17) (13) 21 65–74 22 78 100 165 (6) (18) (13) 0 75– 17 83 100 77 (2) (9) (6) 72 Total % 44 56 100 (100) (100) (100) 945 Total count 582 732 1314 Table 9.9 shows a clear association between age and survival, for understandable reasons mostly unconnected with smoking. The youngest respondents of the first survey were highly likely and the oldest unlikely to be alive twenty years later. There is also an association between age and smoking: in particular, the proportion of smokers was lowest among the oldest respondents. The implications of this are seen perhaps more clearly by considering the column proportions, i.e. the age distributions of smokers and nonsmokers in the original sample. These show that the group of nonsmokers was substantially older than that of the smokers; for example, 27% of the nonsmokers but only 8% of the smokers belonged to the two oldest age groups. It is this imbalance which explains why nonsmokers, more of whom are old, appear to have lower chances of survival until we control for age. The discussion above refers to the sample associations between smoking and survival in the partial tables, which suggest that mortality is higher among smokers than nonsmokers. In terms of statistical significance, however, the evidence is fairly weak: the association is even borderline significant only in the 35–44 and 55–64 age groups, with \\(P\\)-values of 0.063 and 0.075 respectively for the \\(\\chi^{2}\\) test. This is an indication not so much of lack of a real association but of the fact that these data do not provide much power for detecting it. Overall twenty-year mortality is a fairly rough measure of health status for comparisons of smokers and nonsmokers, especially in the youngest and oldest age groups where mortality is either very low or very high for everyone. Differences are likely to be have been further diluted by many of the original smokers having stopped smoking between the surveys. This example should thus not be regarded as a serious examination of the health effects of smoking, for which much more specific data and more careful analyses are required.58 The Berkeley admissions data discussed earlier provide another example of a partial Simpson’s paradox. Previously we considered only departments 2–6, for none of which there was a significant partial association between sex and admission. For department 1, the partial table indicates a strongly significant difference in favour of women, with 82% of the 108 female applicants admitted, compared to 62% of the 825 male applicants. However, the two-way association between sex and admission for departments 1–6 combined remains strongly significant and shows an even larger difference in favour of men than before. This result can now be readily explained as a result of imbalances in sex ratios and rates of admission between departments. Department 1 is both easy to get into (with 64% admitted) and heavily favoured by men (88% of the applicants). These features combine to contribute to higher admissions percentages for men overall, even though within department 1 itself women are more likely to be admitted. In summary, the examples discussed above demonstrate that many things can happen to an association between two variables when we control for a third one. The association may disappear, indicating that it was spurious, or it may remain similar and unchanged in all of the partial tables. It may also become different in different partial tables, indicating an interaction. Which of these occurs depends on the patterns of associations between the control variable and the other two variables. The crucial point is that the two-way table alone cannot reveal which of these cases we are dealing with, because the counts in the two-way table could split into three-way tables in many different ways. The only way to determine how controlling for other variables will affect an association is to actually do so. This is the case not only for multiway contingency tables, but for all methods of statistical control, in particular multiple linear regression and other regression models. Two final remarks round off our discussion of multiway contingency tables: Extension of the ideas of three-way tables to four-way and larger contingency tables is obvious and straightforward. In such tables, every cell corresponds to the subjects with a particular combination of the values of four or more variables. This involves no new conceptual difficulties, and the only challenge is how to arrange the table for convenient presentation. When the main interest is in associations between a particular pair of two variables, the usual solution is to present a set of partial two-way tables for them, one for each combination of the other (control) variables. Suppose, for instance, that in the university admissions example we had obtained similar data at two different years, say 1973 and 2003. We would then have four variables: year, department, sex and admission status. These could be summarised as in Table 9.2, except that each partial table for sex vs. admission would now be conditional on the values of both year and department. The full four-way table would then consist of ten \\(2\\times 2\\) partial tables, one for each of the ten combinations of two years and five departments, (i.e. applicants to Department 2 in 1973, Department 2 in 2003, and so on to Department 6 in 2003). The only inferential tool for multiway contingency tables discussed here was to arrange the table as a set of two-way partial tables and to apply the \\(\\chi^{2}\\) test of independence to each of them. This is a perfectly sensible approach and a great improvement over just analysing two-way tables. There are, however, questions which cannot easily be answered with this method. For example, when can we say that associations in different partial tables are different enough for us to declare that there is evidence of an interaction? Or what if we want to consider many different partial associations, either for a response variable with each of the other variables in turn, or because there is no single response variable? More powerful methods are required for such analyses. They are multiple regression models like the multiple linear regression of Section 8.5, but modified so that they become approriate for categorical response variables. Some of these models are introduced on the course MY452. These data, which were produced by the Graduate Division of UC Berkeley, were first discussed in Bickel, P. J., Hammel, E. A., and O’Connell, J. W. (1975), “Sex bias in graduate admissions: Data from Berkeley”, Science 187, 398–404. They have since become a much-used teaching example. The version of the data considered here are from Freedman, D., Pisani, R., and Purves, R., Statistics (W. W. Norton, 1978).↩ The data are from the 1912 report of the official British Wreck Commissioner’s inquiry into the sinking, available at http://www.titanicinquiry.org.↩ The two studies are reported in Tunbridge, W. M. G. et al. (1977). “The spectrum of thyroid disease in a community: The Whickham survey”. Clinical Endocrinology 7, 481–493, and Vanderpump, M. P. J. et al. (1995). “The incidence of thyroid disorders in the community: A twenty-year follow-up of the Whickham survey”. Clinical Endocrinology 43, 55–69. The data are used to illustrate Simpson’s paradox by Appleton, D. R. et al. (1996). “Ignoring a covariate: An example of Simpson’s paradox”. The American Statistician 50, 340–341.↩ For one remarkable example of such studies, see Doll, R. et al. (2004), “Mortality in relation to smoking: 50 years’ observations on male British doctors”, British Medical Journal 328, 1519–1528, and Doll, R. and Hill, A. B. (1954), “The Mortality of doctors in relation to their smoking habits: A preliminary report”, British Medical Journal 228, 1451–1455. The older paper is reprinted together with the more recent one in the 2004 issue of BMJ.↩ "],["c-more.html", "Chapter 10 More statistics… Statistical tables", " Chapter 10 More statistics… You will no doubt be pleased to learn that the topics covered on this course have not quite exhausted the list of available statistical methods. In this chapter we outline some of the most important further areas of statistics, so that you are at least aware of their existence and titles. For some of them, codes of LSE courses which cover these methods are given in parentheses. A very large part of advanced statistics is devoted to further types of regression models. The basic idea of them is the same as for multiple linear regression, i.e. modelling expected values of response variables given several explanatory variables. The issues involved in the form and treatment of explanatory variables are usually almost exactly the same as for linear models. Different classes of regression models are needed mainly to accommodate different types of response variables: Models for categorical response variables. These exist for situations where the response variable is dichotomous (binary regression, especially logistic models), has more than two unordered (multinomial logistic models) or ordered (ordinal regression models) categories, or is a count, for example in a contingency table (Poisson regression, loglinear models). Despite the many different titles, all of these models are closely connected (MY452) Models for cases where the response is a length of time to some event, such as a spell of unemployment, interval between births of children or survival of a patient in a medical study. These techniques are known as event history analysis, survival analysis or lifetime data analysis. Despite the different terms, all refer to the same statistical models. Techniques for the analysis of dependent data, which do not require the assumption of statistically independent observations used by almost all the methods on this course: Time series analysis for one or more long sequence of observations of the same quantity over time. For example, each of the five temperature sequencies in Figure 8.2 is a time series of this kind. Regression models for hierarchical data, where some sets of observations are not independent of each other. There are two main types of such data: longitudinal or panel data which consist of short time series for many units (e.g. answers by respondents in successive waves of a panel survey), and nested or multilevel data where basic units are grouped in natural groups or clusters (e.g. pupils in classes and schools in an educational study). Both of these can be analysed using the same general classes of models, which in turn are generalisations of linear and other regression models used for independent data (ST416 for models for multilevel data and ST442 for models for longitudinal data). Methods for multivariate data. Roughly speaking, this means data with several variables for comparable quantities treated on an equal footing, so that none of them is obviously a response to the others. For example, results for the ten events in the decathlon data of the week 7 computer class or, more seriously, the responses to a series of related attitude items in a survey are multivariate data of this kind. Various methods of descriptive multivariate analysis for jointly summarising and presenting information on the many variables, e.g. cluster analysis, multidimensional scaling and principal component analysis (MY455 for principal components analysis). Model-based methods for multivariate data. These are typically latent variable models, which also involve variables which can never be directly observed. The simplest latent variable technique is exploratory factor analysis, and others include confirmatory factor analysis, structural equation models, and latent trait and latent class models (MY455). Some types of research design may also involve particular statistical considerations: Sampling theory for the design of probability samples, e.g.  for surveys (part of MY456, which also covers methodology of surveys in general). Design of experiments for more complex randomized experiments. Finally, some areas of statistics are concerned with broader and more fundamental aspects of statistical analysis, such as alternative forms of model specification and inference (e.g. nonparametric methods) or the basic ideas of inference itself (e.g. Bayesian statistics). These and the more specific tools further build on the foundations of all statistical methods, which are the subject of probability theory and mathematical statistics. However, you are welcome, if you wish, to leave the details of these fields to professional statisticians, if only to keep them too in employment. Statistical tables Explanation of the “Table of standard normal tail probabilities” in Section @ref(s_disttables_Z): The table shows, for values of \\(Z\\) between 0 and 3.5, the probability that a value from the standard normal distribution is larger than \\(Z\\) (i.e. the “right-hand” tail probabilities). For example, the probability of values larger than 0.50 is 0.3085. For negative values of \\(Z\\), the probability of values smaller than \\(Z\\) (the “left-hand” tail probability) is equal to the right-hand tail probability for the corresponding positive value of \\(Z\\). For example, the probability of values smaller than \\(-0.50\\) is also 0.3085. Table of standard normal tail probabilities \\(z\\) Prob.  \\(z\\) Prob.  \\(z\\) Prob.  \\(z\\) Prob.  \\(z\\) Prob.  \\(z\\) Prob.  0.00 0.5000 0.50 0.3085 1.00 0.1587 1.50 0.0668 2.00 0.0228 2.50 0.0062 0.01 0.4960 0.51 0.3050 1.01 0.1562 1.51 0.0655 2.01 0.0222 2.52 0.0059 0.02 0.4920 0.52 0.3015 1.02 0.1539 1.52 0.0643 2.02 0.0217 2.54 0.0055 0.03 0.4880 0.53 0.2981 1.03 0.1515 1.53 0.0630 2.03 0.0212 2.56 0.0052 0.04 0.4840 0.54 0.2946 1.04 0.1492 1.54 0.0618 2.04 0.0207 2.58 0.0049 0.05 0.4801 0.55 0.2912 1.05 0.1469 1.55 0.0606 2.05 0.0202 2.60 0.0047 0.06 0.4761 0.56 0.2877 1.06 0.1446 1.56 0.0594 2.06 0.0197 2.62 0.0044 0.07 0.4721 0.57 0.2843 1.07 0.1423 1.57 0.0582 2.07 0.0192 2.64 0.0041 0.08 0.4681 0.58 0.2810 1.08 0.1401 1.58 0.0571 2.08 0.0188 2.66 0.0039 0.09 0.4641 0.59 0.2776 1.09 0.1379 1.59 0.0559 2.09 0.0183 2.68 0.0037 0.10 0.4602 0.60 0.2743 1.10 0.1357 1.60 0.0548 2.10 0.0179 2.70 0.0035 0.11 0.4562 0.61 0.2709 1.11 0.1335 1.61 0.0537 2.11 0.0174 2.72 0.0033 0.12 0.4522 0.62 0.2676 1.12 0.1314 1.62 0.0526 2.12 0.0170 2.74 0.0031 0.13 0.4483 0.63 0.2643 1.13 0.1292 1.63 0.0516 2.13 0.0166 2.76 0.0029 0.14 0.4443 0.64 0.2611 1.14 0.1271 1.64 0.0505 2.14 0.0162 2.78 0.0027 0.15 0.4404 0.65 0.2578 1.15 0.1251 1.65 0.0495 2.15 0.0158 2.80 0.0026 0.16 0.4364 0.66 0.2546 1.16 0.1230 1.66 0.0485 2.16 0.0154 2.82 0.0024 0.17 0.4325 0.67 0.2514 1.17 0.1210 1.67 0.0475 2.17 0.0150 2.84 0.0023 0.18 0.4286 0.68 0.2483 1.18 0.1190 1.68 0.0465 2.18 0.0146 2.86 0.0021 0.19 0.4247 0.69 0.2451 1.19 0.1170 1.69 0.0455 2.19 0.0143 2.88 0.0020 0.20 0.4207 0.70 0.2420 1.20 0.1151 1.70 0.0446 2.20 0.0139 2.90 0.0019 0.21 0.4168 0.71 0.2389 1.21 0.1131 1.71 0.0436 2.21 0.0136 2.92 0.0018 0.22 0.4129 0.72 0.2358 1.22 0.1112 1.72 0.0427 2.22 0.0132 2.94 0.0016 0.23 0.4090 0.73 0.2327 1.23 0.1093 1.73 0.0418 2.23 0.0129 2.96 0.0015 0.24 0.4052 0.74 0.2296 1.24 0.1075 1.74 0.0409 2.24 0.0125 2.98 0.0014 0.25 0.4013 0.75 0.2266 1.25 0.1056 1.75 0.0401 2.25 0.0122 3.00 0.0013 0.26 0.3974 0.76 0.2236 1.26 0.1038 1.76 0.0392 2.26 0.0119 3.02 0.0013 0.27 0.3936 0.77 0.2206 1.27 0.1020 1.77 0.0384 2.27 0.0116 3.04 0.0012 0.28 0.3897 0.78 0.2177 1.28 0.1003 1.78 0.0375 2.28 0.0113 3.06 0.0011 0.29 0.3859 0.79 0.2148 1.29 0.0985 1.79 0.0367 2.29 0.0110 3.08 0.0010 0.30 0.3821 0.80 0.2119 1.30 0.0968 1.80 0.0359 2.30 0.0107 3.10 0.0010 0.31 0.3783 0.81 0.2090 1.31 0.0951 1.81 0.0351 2.31 0.0104 3.12 0.0009 0.32 0.3745 0.82 0.2061 1.32 0.0934 1.82 0.0344 2.32 0.0102 3.14 0.0008 0.33 0.3707 0.83 0.2033 1.33 0.0918 1.83 0.0336 2.33 0.0099 3.16 0.0008 0.34 0.3669 0.84 0.2005 1.34 0.0901 1.84 0.0329 2.34 0.0096 3.18 0.0007 0.35 0.3632 0.85 0.1977 1.35 0.0885 1.85 0.0322 2.35 0.0094 3.20 0.0007 0.36 0.3594 0.86 0.1949 1.36 0.0869 1.86 0.0314 2.36 0.0091 3.22 0.0006 0.37 0.3557 0.87 0.1922 1.37 0.0853 1.87 0.0307 2.37 0.0089 3.24 0.0006 0.38 0.3520 0.88 0.1894 1.38 0.0838 1.88 0.0301 2.38 0.0087 3.26 0.0006 0.39 0.3483 0.89 0.1867 1.39 0.0823 1.89 0.0294 2.39 0.0084 3.28 0.0005 0.40 0.3446 0.90 0.1841 1.40 0.0808 1.90 0.0287 2.40 0.0082 3.30 0.0005 0.41 0.3409 0.91 0.1814 1.41 0.0793 1.91 0.0281 2.41 0.0080 3.32 0.0005 0.42 0.3372 0.92 0.1788 1.42 0.0778 1.92 0.0274 2.42 0.0078 3.34 0.0004 0.43 0.3336 0.93 0.1762 1.43 0.0764 1.93 0.0268 2.43 0.0075 3.36 0.0004 0.44 0.3300 0.94 0.1736 1.44 0.0749 1.94 0.0262 2.44 0.0073 3.38 0.0004 0.45 0.3264 0.95 0.1711 1.45 0.0735 1.95 0.0256 2.45 0.0071 3.40 0.0003 0.46 0.3228 0.96 0.1685 1.46 0.0721 1.96 0.0250 2.46 0.0069 3.42 0.0003 0.47 0.3192 0.97 0.1660 1.47 0.0708 1.97 0.0244 2.47 0.0068 3.44 0.0003 0.48 0.3156 0.98 0.1635 1.48 0.0694 1.98 0.0239 2.48 0.0066 3.46 0.0003 0.49 0.3121 0.99 0.1611 1.49 0.0681 1.99 0.0233 2.49 0.0064 3.48 0.0003 Table of critical values for t-distributions df 0.100 0.050 0.025 0.010 0.005 0.001 0.0005 1 3.078 6.314 12.706 31.821 63.657 318.309 636.619 2 1.886 2.920 4.303 6.965 9.925 22.327 31.599 3 1.638 2.353 3.182 4.541 5.841 10.215 12.924 4 1.533 2.132 2.776 3.747 4.604 7.173 8.610 5 1.476 2.015 2.571 3.365 4.032 5.893 6.869 6 1.440 1.943 2.447 3.143 3.707 5.208 5.959 7 1.415 1.895 2.365 2.998 3.499 4.785 5.408 8 1.397 1.860 2.306 2.896 3.355 4.501 5.041 9 1.383 1.833 2.262 2.821 3.250 4.297 4.781 10 1.372 1.812 2.228 2.764 3.169 4.144 4.587 11 1.363 1.796 2.201 2.718 3.106 4.025 4.437 12 1.356 1.782 2.179 2.681 3.055 3.930 4.318 13 1.350 1.771 2.160 2.650 3.012 3.852 4.221 14 1.345 1.761 2.145 2.624 2.977 3.787 4.140 15 1.341 1.753 2.131 2.602 2.947 3.733 4.073 16 1.337 1.746 2.120 2.583 2.921 3.686 4.015 17 1.333 1.740 2.110 2.567 2.898 3.646 3.965 18 1.330 1.734 2.101 2.552 2.878 3.610 3.922 19 1.328 1.729 2.093 2.539 2.861 3.579 3.883 20 1.325 1.725 2.086 2.528 2.845 3.552 3.850 21 1.323 1.721 2.080 2.518 2.831 3.527 3.819 22 1.321 1.717 2.074 2.508 2.819 3.505 3.792 23 1.319 1.714 2.069 2.500 2.807 3.485 3.768 24 1.318 1.711 2.064 2.492 2.797 3.467 3.745 25 1.316 1.708 2.060 2.485 2.787 3.450 3.725 26 1.315 1.706 2.056 2.479 2.779 3.435 3.707 27 1.314 1.703 2.052 2.473 2.771 3.421 3.690 28 1.313 1.701 2.048 2.467 2.763 3.408 3.674 29 1.311 1.699 2.045 2.462 2.756 3.396 3.659 30 1.310 1.697 2.042 2.457 2.750 3.385 3.646 40 1.303 1.684 2.021 2.423 2.704 3.307 3.551 60 1.296 1.671 2.000 2.390 2.660 3.232 3.460 120 1.289 1.658 1.980 2.358 2.617 3.160 3.373 \\(\\infty\\) 1.282 1.645 1.960 2.326 2.576 3.090 3.291 Explanation: An example, consider the value 3.078 in the top left corner. This indicates that for a \\(t\\)-distribution with 1 degree of freedom the probability of values greater than 3.078 is 0.100. The last row shows critical values for the standard normal distribution. Table of critical values for chi-square distributions df 0.100 0.050 0.010 0.001 1 2.71 3.84 6.63 10.828 2 4.61 5.99 9.21 13.816 3 6.25 7.81 11.34 16.266 4 7.78 9.49 13.28 18.467 5 9.24 11.07 15.09 20.515 6 10.64 12.59 16.81 22.458 7 12.02 14.07 18.48 24.322 8 13.36 15.51 20.09 26.124 9 14.68 16.92 21.67 27.877 10 15.99 18.31 23.21 29.588 11 17.28 19.68 24.72 31.264 12 18.55 21.03 26.22 32.909 13 19.81 22.36 27.69 34.528 14 21.06 23.68 29.14 36.123 15 22.31 25.00 30.58 37.697 16 23.54 26.30 32.00 39.252 17 24.77 27.59 33.41 40.790 18 25.99 28.87 34.81 42.312 19 27.20 30.14 36.19 43.820 20 28.41 31.41 37.57 45.315 25 34.38 37.65 44.31 52.620 30 40.26 43.77 50.89 59.703 40 51.81 55.76 63.69 73.402 50 63.17 67.50 76.15 86.661 60 74.40 79.08 88.38 99.607 70 85.53 90.53 100.43 112.317 80 96.58 101.88 112.33 124.839 90 107.57 113.15 124.12 137.208 100 118.50 124.34 135.81 149.449 Explanation: For example, the value 2.71 in the top left corner indicates that for a \\(\\chi^{2}\\) distribution with 1 degree of freedom the probability of values greater than 2.71 is 0.100. "],["404.html", "Page not found", " Page not found The page you requested cannot be found (perhaps it was moved or renamed). You may want to try searching to find the page's new location, or use the table of contents to find the page you are looking for. "]]
diff --git a/spss2t.png b/spss2t.png
new file mode 100644
index 0000000..d02e580
Binary files /dev/null and b/spss2t.png differ
diff --git a/spsslinreg.png b/spsslinreg.png
new file mode 100644
index 0000000..a0df55c
Binary files /dev/null and b/spsslinreg.png differ
diff --git a/style.css b/style.css
new file mode 100644
index 0000000..9609796
--- /dev/null
+++ b/style.css
@@ -0,0 +1,17 @@
+p.caption {
+  color: #777;
+  margin-top: 10px;
+}
+p code {
+  white-space: inherit;
+}
+pre {
+  word-break: normal;
+  word-wrap: normal;
+}
+pre code {
+  white-space: inherit;
+}
+p {
+line-height: 1.5
+}
diff --git a/tdistr1.png b/tdistr1.png
new file mode 100644
index 0000000..5d43403
Binary files /dev/null and b/tdistr1.png differ
diff --git a/temperplot.png b/temperplot.png
new file mode 100644
index 0000000..e611052
Binary files /dev/null and b/temperplot.png differ
diff --git a/threeD.png b/threeD.png
new file mode 100644
index 0000000..cbaaa64
Binary files /dev/null and b/threeD.png differ
diff --git a/threenorms.png b/threenorms.png
new file mode 100644
index 0000000..2d0f3a4
Binary files /dev/null and b/threenorms.png differ
diff --git a/ttestspss.png b/ttestspss.png
new file mode 100644
index 0000000..54f2c88
Binary files /dev/null and b/ttestspss.png differ
diff --git a/twoboxplots.png b/twoboxplots.png
new file mode 100644
index 0000000..90a288d
Binary files /dev/null and b/twoboxplots.png differ
diff --git a/twohists.png b/twohists.png
new file mode 100644
index 0000000..bb1a017
Binary files /dev/null and b/twohists.png differ
diff --git a/xyzspurious.png b/xyzspurious.png
new file mode 100644
index 0000000..f83a411
Binary files /dev/null and b/xyzspurious.png differ

\(z\)	Prob.	\(z\)	Prob.	\(z\)	Prob.	\(z\)	Prob.	\(z\)	Prob.	\(z\)	Prob.
0.00	0.5000	0.50	0.3085	1.00	0.1587	1.50	0.0668	2.00	0.0228	2.50	0.0062
0.01	0.4960	0.51	0.3050	1.01	0.1562	1.51	0.0655	2.01	0.0222	2.52	0.0059
0.02	0.4920	0.52	0.3015	1.02	0.1539	1.52	0.0643	2.02	0.0217	2.54	0.0055
0.03	0.4880	0.53	0.2981	1.03	0.1515	1.53	0.0630	2.03	0.0212	2.56	0.0052
0.04	0.4840	0.54	0.2946	1.04	0.1492	1.54	0.0618	2.04	0.0207	2.58	0.0049
0.05	0.4801	0.55	0.2912	1.05	0.1469	1.55	0.0606	2.05	0.0202	2.60	0.0047
0.06	0.4761	0.56	0.2877	1.06	0.1446	1.56	0.0594	2.06	0.0197	2.62	0.0044
0.07	0.4721	0.57	0.2843	1.07	0.1423	1.57	0.0582	2.07	0.0192	2.64	0.0041
0.08	0.4681	0.58	0.2810	1.08	0.1401	1.58	0.0571	2.08	0.0188	2.66	0.0039
0.09	0.4641	0.59	0.2776	1.09	0.1379	1.59	0.0559	2.09	0.0183	2.68	0.0037
0.10	0.4602	0.60	0.2743	1.10	0.1357	1.60	0.0548	2.10	0.0179	2.70	0.0035
0.11	0.4562	0.61	0.2709	1.11	0.1335	1.61	0.0537	2.11	0.0174	2.72	0.0033
0.12	0.4522	0.62	0.2676	1.12	0.1314	1.62	0.0526	2.12	0.0170	2.74	0.0031
0.13	0.4483	0.63	0.2643	1.13	0.1292	1.63	0.0516	2.13	0.0166	2.76	0.0029
0.14	0.4443	0.64	0.2611	1.14	0.1271	1.64	0.0505	2.14	0.0162	2.78	0.0027
0.15	0.4404	0.65	0.2578	1.15	0.1251	1.65	0.0495	2.15	0.0158	2.80	0.0026
0.16	0.4364	0.66	0.2546	1.16	0.1230	1.66	0.0485	2.16	0.0154	2.82	0.0024
0.17	0.4325	0.67	0.2514	1.17	0.1210	1.67	0.0475	2.17	0.0150	2.84	0.0023
0.18	0.4286	0.68	0.2483	1.18	0.1190	1.68	0.0465	2.18	0.0146	2.86	0.0021
0.19	0.4247	0.69	0.2451	1.19	0.1170	1.69	0.0455	2.19	0.0143	2.88	0.0020
0.20	0.4207	0.70	0.2420	1.20	0.1151	1.70	0.0446	2.20	0.0139	2.90	0.0019
0.21	0.4168	0.71	0.2389	1.21	0.1131	1.71	0.0436	2.21	0.0136	2.92	0.0018
0.22	0.4129	0.72	0.2358	1.22	0.1112	1.72	0.0427	2.22	0.0132	2.94	0.0016
0.23	0.4090	0.73	0.2327	1.23	0.1093	1.73	0.0418	2.23	0.0129	2.96	0.0015
0.24	0.4052	0.74	0.2296	1.24	0.1075	1.74	0.0409	2.24	0.0125	2.98	0.0014
0.25	0.4013	0.75	0.2266	1.25	0.1056	1.75	0.0401	2.25	0.0122	3.00	0.0013
0.26	0.3974	0.76	0.2236	1.26	0.1038	1.76	0.0392	2.26	0.0119	3.02	0.0013
0.27	0.3936	0.77	0.2206	1.27	0.1020	1.77	0.0384	2.27	0.0116	3.04	0.0012
0.28	0.3897	0.78	0.2177	1.28	0.1003	1.78	0.0375	2.28	0.0113	3.06	0.0011
0.29	0.3859	0.79	0.2148	1.29	0.0985	1.79	0.0367	2.29	0.0110	3.08	0.0010
0.30	0.3821	0.80	0.2119	1.30	0.0968	1.80	0.0359	2.30	0.0107	3.10	0.0010
0.31	0.3783	0.81	0.2090	1.31	0.0951	1.81	0.0351	2.31	0.0104	3.12	0.0009
0.32	0.3745	0.82	0.2061	1.32	0.0934	1.82	0.0344	2.32	0.0102	3.14	0.0008
0.33	0.3707	0.83	0.2033	1.33	0.0918	1.83	0.0336	2.33	0.0099	3.16	0.0008
0.34	0.3669	0.84	0.2005	1.34	0.0901	1.84	0.0329	2.34	0.0096	3.18	0.0007
0.35	0.3632	0.85	0.1977	1.35	0.0885	1.85	0.0322	2.35	0.0094	3.20	0.0007
0.36	0.3594	0.86	0.1949	1.36	0.0869	1.86	0.0314	2.36	0.0091	3.22	0.0006
0.37	0.3557	0.87	0.1922	1.37	0.0853	1.87	0.0307	2.37	0.0089	3.24	0.0006
0.38	0.3520	0.88	0.1894	1.38	0.0838	1.88	0.0301	2.38	0.0087	3.26	0.0006
0.39	0.3483	0.89	0.1867	1.39	0.0823	1.89	0.0294	2.39	0.0084	3.28	0.0005
0.40	0.3446	0.90	0.1841	1.40	0.0808	1.90	0.0287	2.40	0.0082	3.30	0.0005
0.41	0.3409	0.91	0.1814	1.41	0.0793	1.91	0.0281	2.41	0.0080	3.32	0.0005
0.42	0.3372	0.92	0.1788	1.42	0.0778	1.92	0.0274	2.42	0.0078	3.34	0.0004
0.43	0.3336	0.93	0.1762	1.43	0.0764	1.93	0.0268	2.43	0.0075	3.36	0.0004
0.44	0.3300	0.94	0.1736	1.44	0.0749	1.94	0.0262	2.44	0.0073	3.38	0.0004
0.45	0.3264	0.95	0.1711	1.45	0.0735	1.95	0.0256	2.45	0.0071	3.40	0.0003
0.46	0.3228	0.96	0.1685	1.46	0.0721	1.96	0.0250	2.46	0.0069	3.42	0.0003
0.47	0.3192	0.97	0.1660	1.47	0.0708	1.97	0.0244	2.47	0.0068	3.44	0.0003
0.48	0.3156	0.98	0.1635	1.48	0.0694	1.98	0.0239	2.48	0.0066	3.46	0.0003
0.49	0.3121	0.99	0.1611	1.49	0.0681	1.99	0.0233	2.49	0.0064	3.48	0.0003
df	0.100	0.050	0.025	0.010	0.005	0.001	0.0005
1	3.078	6.314	12.706	31.821	63.657	318.309	636.619
2	1.886	2.920	4.303	6.965	9.925	22.327	31.599
3	1.638	2.353	3.182	4.541	5.841	10.215	12.924
4	1.533	2.132	2.776	3.747	4.604	7.173	8.610
5	1.476	2.015	2.571	3.365	4.032	5.893	6.869
6	1.440	1.943	2.447	3.143	3.707	5.208	5.959
7	1.415	1.895	2.365	2.998	3.499	4.785	5.408
8	1.397	1.860	2.306	2.896	3.355	4.501	5.041
9	1.383	1.833	2.262	2.821	3.250	4.297	4.781
10	1.372	1.812	2.228	2.764	3.169	4.144	4.587
11	1.363	1.796	2.201	2.718	3.106	4.025	4.437
12	1.356	1.782	2.179	2.681	3.055	3.930	4.318
13	1.350	1.771	2.160	2.650	3.012	3.852	4.221
14	1.345	1.761	2.145	2.624	2.977	3.787	4.140
15	1.341	1.753	2.131	2.602	2.947	3.733	4.073
16	1.337	1.746	2.120	2.583	2.921	3.686	4.015
17	1.333	1.740	2.110	2.567	2.898	3.646	3.965
18	1.330	1.734	2.101	2.552	2.878	3.610	3.922
19	1.328	1.729	2.093	2.539	2.861	3.579	3.883
20	1.325	1.725	2.086	2.528	2.845	3.552	3.850
21	1.323	1.721	2.080	2.518	2.831	3.527	3.819
22	1.321	1.717	2.074	2.508	2.819	3.505	3.792
23	1.319	1.714	2.069	2.500	2.807	3.485	3.768
24	1.318	1.711	2.064	2.492	2.797	3.467	3.745
25	1.316	1.708	2.060	2.485	2.787	3.450	3.725
26	1.315	1.706	2.056	2.479	2.779	3.435	3.707
27	1.314	1.703	2.052	2.473	2.771	3.421	3.690
28	1.313	1.701	2.048	2.467	2.763	3.408	3.674
29	1.311	1.699	2.045	2.462	2.756	3.396	3.659
30	1.310	1.697	2.042	2.457	2.750	3.385	3.646
40	1.303	1.684	2.021	2.423	2.704	3.307	3.551
60	1.296	1.671	2.000	2.390	2.660	3.232	3.460
120	1.289	1.658	1.980	2.358	2.617	3.160	3.373
\(\infty\)	1.282	1.645	1.960	2.326	2.576	3.090	3.291
+Sex	Admitted +No	Admitted +Yes	+% Yes	+Total
Male	1180	686	36.8	1866
Female	1259	468	27.1	1727
Total	2439	1154	32.1	3593
Department	+Sex	Admitted +No	Admitted +Yes	+% Yes	+Total
2	Male	207	353	63.0	560
	Female	8	17	68.0	25
	Total	215	370	63.2	585
\(\chi^{2}=0.25\), \(P=0.61\)
3	Male	205	120	36.9	325
	Female	391	202	34.1	593
	Total	596	322	35.1	918
\(\chi^{2}=0.75\), \(P=0.39\)
4	Male	279	138	33.1	417
	Female	244	131	34.9	375
	Total	523	269	34.0	792
\(\chi^{2}=0.30\), \(P=0.59\)
5	Male	138	53	27.7	191
	Female	299	94	23.9	393
	Total	437	147	25.2	584
\(\chi^{2}=1.00\), \(P=0.32\)
6	Male	351	22	5.9	373
	Female	317	24	7.0	341
	Total	668	46	6.4	714
\(\chi^{2}=0.38\), \(P=0.54\)
Total		2439	1154	32.1	3593
Of all applicants	Department 2	3	4	5	6
% Male	96	35	53	33	52
% Admitted	63	35	34	25	6
Number of applicants	585	918	792	584	714
MEN +Age group	+Irrelevant	Slightly +important	+Important	Very +important	+Total
Under 45	29	35	30	22	116
	0.250	0.302	0.259	0.190	1.000
45–54	83	60	52	29	224
	0.371	0.268	0.232	0.129	1.000
55–64	116	40	28	16	200
	0.580	0.200	0.140	0.080	1.000
65 and over	150	53	16	12	231
	0.649	0.229	0.069	0.052	1.000
Total	378	188	126	79	771
	0.490	0.244	0.163	0.102	1.000
WOMEN +Age group	+Irrelevant	Slightly +important	+Important	Very +important	+Total
Under 45	8	10	8	4	30
	0.267	0.333	0.267	0.133	1.000
45–54	28	17	5	8	58
	0.483	0.293	0.086	0.138	1.000
55–64	37	9	3	4	53
	0.698	0.170	0.057	0.075	1.000
65 and over	43	11	3	3	60
	0.717	0.183	0.050	0.050	1.000
Total	116	47	19	19	201
	0.577	0.234	0.095	0.095	1.000
Group	Survivor: +Yes	+No	+Total
Male passenger	146	659	805
	(0.181)	(0.819)	(1.000)
Female or child passenger	353	158	511
	(0.691)	(0.309)	(1.000)
Crew member	212	673	885
	(0.240)	(0.760)	(1.000)
Total	711	1490	2201
	(0.323)	(0.677)	(1.000)
+Class	+Group	Survivor: +Yes	+No	+Total
First	Man	57	118	175
		0.326	0.674	1.000
	Woman or child	146	4	150
		0.973	0.027	1.000
	Total	203	122	325
		0.625	0.375	1.000
Second	Man	14	154	168
		0.083	0.917	1.000
	Woman or child	104	13	117
		0.889	0.111	1.000
	Total	118	167	285
		0.414	0.586	1.000
Third	Man	75	387	462
		0.162	0.838	1.000
	Woman or child	103	141	244
		0.422	0.578	1.000
	Total	178	528	706
		0.252	0.748	1.000
Total		499	817	1316
		0.379	0.621	1.000
Smoker	Dead	Alive	Total
Yes	139	443	582
	0.239	0.761	1.000
No	230	502	732
	0.314	0.686	1.000
Total	369	945	1314
	0.281	0.719	1.000