testing_counts_1.html

<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>

<meta charset="utf-8">
<meta name="generator" content="quarto-1.6.1">

<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">


<title>21&nbsp; Hypothesis-Testing with Counted Data, Part 1 – Resampling statistics</title>
<style>
code{white-space: pre-wrap;}
span.smallcaps{font-variant: small-caps;}
div.columns{display: flex; gap: min(4vw, 1.5em);}
div.column{flex: auto; overflow-x: auto;}
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
ul.task-list{list-style: none;}
ul.task-list li input[type="checkbox"] {
  width: 0.8em;
  margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */ 
  vertical-align: middle;
}
/* CSS for syntax highlighting */
pre > code.sourceCode { white-space: pre; position: relative; }
pre > code.sourceCode > span { line-height: 1.25; }
pre > code.sourceCode > span:empty { height: 1.2em; }
.sourceCode { overflow: visible; }
code.sourceCode > span { color: inherit; text-decoration: inherit; }
div.sourceCode { margin: 1em 0; }
pre.sourceCode { margin: 0; }
@media screen {
div.sourceCode { overflow: auto; }
}
@media print {
pre > code.sourceCode { white-space: pre-wrap; }
pre > code.sourceCode > span { display: inline-block; text-indent: -5em; padding-left: 5em; }
}
pre.numberSource code
  { counter-reset: source-line 0; }
pre.numberSource code > span
  { position: relative; left: -4em; counter-increment: source-line; }
pre.numberSource code > span > a:first-child::before
  { content: counter(source-line);
    position: relative; left: -1em; text-align: right; vertical-align: baseline;
    border: none; display: inline-block;
    -webkit-touch-callout: none; -webkit-user-select: none;
    -khtml-user-select: none; -moz-user-select: none;
    -ms-user-select: none; user-select: none;
    padding: 0 4px; width: 4em;
  }
pre.numberSource { margin-left: 3em;  padding-left: 4px; }
div.sourceCode
  {   }
@media screen {
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
}
/* CSS for citations */
div.csl-bib-body { }
div.csl-entry {
  clear: both;
  margin-bottom: 0em;
}
.hanging-indent div.csl-entry {
  margin-left:2em;
  text-indent:-2em;
}
div.csl-left-margin {
  min-width:2em;
  float:left;
}
div.csl-right-inline {
  margin-left:2em;
  padding-left:1em;
}
div.csl-indent {
  margin-left: 2em;
}</style>


<script src="site_libs/quarto-nav/quarto-nav.js"></script>
<script src="site_libs/quarto-nav/headroom.min.js"></script>
<script src="site_libs/clipboard/clipboard.min.js"></script>
<script src="site_libs/quarto-search/autocomplete.umd.js"></script>
<script src="site_libs/quarto-search/fuse.min.js"></script>
<script src="site_libs/quarto-search/quarto-search.js"></script>
<meta name="quarto:offset" content="./">
<link href="./significance.html" rel="next">
<link href="./framing_questions.html" rel="prev">
<script src="site_libs/quarto-html/quarto.js"></script>
<script src="site_libs/quarto-html/popper.min.js"></script>
<script src="site_libs/quarto-html/tippy.umd.min.js"></script>
<script src="site_libs/quarto-html/anchor.min.js"></script>
<link href="site_libs/quarto-html/tippy.css" rel="stylesheet">
<link href="site_libs/quarto-html/quarto-syntax-highlighting.css" rel="stylesheet" id="quarto-text-highlighting-styles">
<script src="site_libs/bootstrap/bootstrap.min.js"></script>
<link href="site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
<link href="site_libs/bootstrap/bootstrap.min.css" rel="stylesheet" id="quarto-bootstrap" data-mode="light">
<script id="quarto-search-options" type="application/json">{
  "location": "sidebar",
  "copy-button": false,
  "collapse-after": 3,
  "panel-placement": "start",
  "type": "textbox",
  "limit": 50,
  "keyboard-shortcut": [
    "f",
    "/",
    "s"
  ],
  "show-item-context": false,
  "language": {
    "search-no-results-text": "No results",
    "search-matching-documents-text": "matching documents",
    "search-copy-link-title": "Copy link to search",
    "search-hide-matches-text": "Hide additional matches",
    "search-more-match-text": "more match in this document",
    "search-more-matches-text": "more matches in this document",
    "search-clear-button-title": "Clear",
    "search-text-placeholder": "",
    "search-detached-cancel-button-title": "Cancel",
    "search-submit-button-title": "Submit",
    "search-label": "Search"
  }
}</script>
<script type="text/javascript">
  $(document).ready(function() {
    $("table").addClass('lightable-paper lightable-striped lightable-hover')
  });
</script>
<script src="site_libs/kePrint-0.0.1/kePrint.js"></script>
<link href="site_libs/lightable-0.0.1/lightable.css" rel="stylesheet">


<link rel="stylesheet" href="style.css">
<link rel="stylesheet" href="font-awesome.min.css">
</head>

<body class="nav-sidebar floating">

<div id="quarto-search-results"></div>
  <header id="quarto-header" class="headroom fixed-top">
  <nav class="quarto-secondary-nav">
    <div class="container-fluid d-flex">
      <button type="button" class="quarto-btn-toggle btn" data-bs-toggle="collapse" role="button" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
        <i class="bi bi-layout-text-sidebar-reverse"></i>
      </button>
        <nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="./testing_counts_1.html"><span class="chapter-number">21</span>&nbsp; <span class="chapter-title">Hypothesis-Testing with Counted Data, Part 1</span></a></li></ol></nav>
        <a class="flex-grow-1" role="navigation" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">      
        </a>
      <button type="button" class="btn quarto-search-button" aria-label="Search" onclick="window.quartoOpenSearch();">
        <i class="bi bi-search"></i>
      </button>
    </div>
  </nav>
</header>
<!-- content -->
<div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article">
<!-- sidebar -->
  <nav id="quarto-sidebar" class="sidebar collapse collapse-horizontal quarto-sidebar-collapse-item sidebar-navigation floating overflow-auto">
    <div class="pt-lg-2 mt-2 text-left sidebar-header">
    <div class="sidebar-title mb-0 py-0">
      <a href="./">Resampling statistics</a> 
    </div>
      </div>
        <div class="mt-2 flex-shrink-0 align-items-center">
        <div class="sidebar-search">
        <div id="quarto-search" class="" title="Search"></div>
        </div>
        </div>
    <div class="sidebar-menu-container"> 
    <ul class="list-unstyled mt-1">
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./index.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text">Python version</span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./preface_third.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text">Preface to the third edition</span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./preface_second.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text">Preface to the second edition</span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./intro.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">1</span>&nbsp; <span class="chapter-title">Introduction</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./resampling_method.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">2</span>&nbsp; <span class="chapter-title">The resampling method</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./what_is_probability.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">3</span>&nbsp; <span class="chapter-title">What is probability?</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./about_technology.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">4</span>&nbsp; <span class="chapter-title">Introducing Python and the Jupyter notebook</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./resampling_with_code.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">5</span>&nbsp; <span class="chapter-title">Resampling with code</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./resampling_with_code2.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">6</span>&nbsp; <span class="chapter-title">More resampling with code</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./sampling_tools.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">7</span>&nbsp; <span class="chapter-title">Tools for samples and sampling</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./probability_theory_1a.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">8</span>&nbsp; <span class="chapter-title">Probability Theory, Part 1</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./probability_theory_1b.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">9</span>&nbsp; <span class="chapter-title">Probability Theory Part I (continued)</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./more_sampling_tools.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">10</span>&nbsp; <span class="chapter-title">Two puzzles and more tools</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./probability_theory_2_compound.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">11</span>&nbsp; <span class="chapter-title">Probability Theory, Part 2: Compound Probability</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./probability_theory_3.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">12</span>&nbsp; <span class="chapter-title">Probability Theory, Part 3</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./probability_theory_4_finite.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">13</span>&nbsp; <span class="chapter-title">Probability Theory, Part 4: Estimating Probabilities from Finite Universes</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./sampling_variability.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">14</span>&nbsp; <span class="chapter-title">On Variability in Sampling</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./monte_carlo.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">15</span>&nbsp; <span class="chapter-title">The Procedures of Monte Carlo Simulation (and Resampling)</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./standard_scores.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">16</span>&nbsp; <span class="chapter-title">Ranks, Quantiles and Standard Scores</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./inference_ideas.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">17</span>&nbsp; <span class="chapter-title">The Basic Ideas in Statistical Inference</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./inference_intro.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">18</span>&nbsp; <span class="chapter-title">Introduction to Statistical Inference</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./point_estimation.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">19</span>&nbsp; <span class="chapter-title">Point Estimation</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./framing_questions.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">20</span>&nbsp; <span class="chapter-title">Framing Statistical Questions</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./testing_counts_1.html" class="sidebar-item-text sidebar-link active">
 <span class="menu-text"><span class="chapter-number">21</span>&nbsp; <span class="chapter-title">Hypothesis-Testing with Counted Data, Part 1</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./significance.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">22</span>&nbsp; <span class="chapter-title">The Concept of Statistical Significance in Testing Hypotheses</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./testing_counts_2.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">23</span>&nbsp; <span class="chapter-title">The Statistics of Hypothesis-Testing with Counted Data, Part 2</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./testing_measured.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">24</span>&nbsp; <span class="chapter-title">The Statistics of Hypothesis-Testing With Measured Data</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./testing_procedures.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">25</span>&nbsp; <span class="chapter-title">General Procedures for Testing Hypotheses</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./confidence_1.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">26</span>&nbsp; <span class="chapter-title">Confidence Intervals, Part 1: Assessing the Accuracy of Samples</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./confidence_2.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">27</span>&nbsp; <span class="chapter-title">Confidence Intervals, Part 2: The Two Approaches to Estimating Confidence Intervals</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./reliability_average.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">28</span>&nbsp; <span class="chapter-title">Some Last Words About the Reliability of Sample Averages</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./correlation_causation.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">29</span>&nbsp; <span class="chapter-title">Correlation and Causation</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./how_big_sample.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">30</span>&nbsp; <span class="chapter-title">How Large a Sample?</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./bayes_simulation.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">31</span>&nbsp; <span class="chapter-title">Bayesian Analysis by Simulation</span></span></a>
  </div>
</li>
        <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./references.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text">References</span></a>
  </div>
</li>
        <li class="sidebar-item sidebar-item-section">
      <div class="sidebar-item-container"> 
            <a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" role="navigation" aria-expanded="true">
 <span class="menu-text">Appendices</span></a>
          <a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" role="navigation" aria-expanded="true" aria-label="Toggle section">
            <i class="bi bi-chevron-right ms-2"></i>
          </a> 
      </div>
      <ul id="quarto-sidebar-section-1" class="collapse list-unstyled sidebar-section depth1 show">  
          <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./exercise_solutions.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">A</span>&nbsp; <span class="chapter-title">Exercise Solutions</span></span></a>
  </div>
</li>
          <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./technical_note.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">B</span>&nbsp; <span class="chapter-title">Technical Note to the Professional Reader</span></span></a>
  </div>
</li>
          <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./acknowlegements.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">C</span>&nbsp; <span class="chapter-title">Acknowledgements</span></span></a>
  </div>
</li>
          <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./code_topics.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">D</span>&nbsp; <span class="chapter-title">Code topics</span></span></a>
  </div>
</li>
          <li class="sidebar-item">
  <div class="sidebar-item-container"> 
  <a href="./errors_suggestions.html" class="sidebar-item-text sidebar-link">
 <span class="menu-text"><span class="chapter-number">E</span>&nbsp; <span class="chapter-title">Errors and suggestions</span></span></a>
  </div>
</li>
      </ul>
  </li>
    </ul>
    </div>
</nav>
<div id="quarto-sidebar-glass" class="quarto-sidebar-collapse-item" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item"></div>
<!-- margin-sidebar -->
    <div id="quarto-margin-sidebar" class="sidebar margin-sidebar">
        <nav id="TOC" role="doc-toc" class="toc-active">
    <h2 id="toc-title">Table of contents</h2>
   
  <ul>
  <li><a href="#introduction" id="toc-introduction" class="nav-link active" data-scroll-target="#introduction"><span class="header-section-number">21.1</span> Introduction</a>
  <ul class="collapse">
  <li><a href="#sec-building-strings" id="toc-sec-building-strings" class="nav-link" data-scroll-target="#sec-building-strings"><span class="header-section-number">21.1.1</span> Building strings for labels and messages</a></li>
  </ul></li>
  <li><a href="#should-a-single-sample-of-counted-data-be-considered-different-from-a-benchmark-universe" id="toc-should-a-single-sample-of-counted-data-be-considered-different-from-a-benchmark-universe" class="nav-link" data-scroll-target="#should-a-single-sample-of-counted-data-be-considered-different-from-a-benchmark-universe"><span class="header-section-number">21.2</span> Should a single sample of counted data be considered different from a benchmark universe?</a>
  <ul class="collapse">
  <li><a href="#sec-fruitfly" id="toc-sec-fruitfly" class="nav-link" data-scroll-target="#sec-fruitfly"><span class="header-section-number">21.2.1</span> Example: Does irradiation affect the sex ratio in fruit flies?</a></li>
  <li><a href="#sec-female-calves" id="toc-sec-female-calves" class="nav-link" data-scroll-target="#sec-female-calves"><span class="header-section-number">21.2.2</span> Example: Does a treatment increase the female calf rate?</a></li>
  <li><a href="#sec-contract-poll" id="toc-sec-contract-poll" class="nav-link" data-scroll-target="#sec-contract-poll"><span class="header-section-number">21.2.3</span> Example: A public-opinion poll</a></li>
  <li><a href="#example-did-the-trump-clinton-poll-indicate-that-trump-would-win" id="toc-example-did-the-trump-clinton-poll-indicate-that-trump-would-win" class="nav-link" data-scroll-target="#example-did-the-trump-clinton-poll-indicate-that-trump-would-win"><span class="header-section-number">21.2.4</span> Example: Did the Trump-Clinton poll indicate that Trump would win?</a></li>
  <li><a href="#sec-cancer-cures" id="toc-sec-cancer-cures" class="nav-link" data-scroll-target="#sec-cancer-cures"><span class="header-section-number">21.2.5</span> Example: Comparison of possible cancer cure to placebo</a></li>
  <li><a href="#example-did-attitudes-about-marijuana-change" id="toc-example-did-attitudes-about-marijuana-change" class="nav-link" data-scroll-target="#example-did-attitudes-about-marijuana-change"><span class="header-section-number">21.2.6</span> Example: Did attitudes about marijuana change?</a></li>
  <li><a href="#sec-framingham-example" id="toc-sec-framingham-example" class="nav-link" data-scroll-target="#sec-framingham-example"><span class="header-section-number">21.2.7</span> Example: Infarction and cholesterol: Framingham study</a></li>
  <li><a href="#sec-pig-rations" id="toc-sec-pig-rations" class="nav-link" data-scroll-target="#sec-pig-rations"><span class="header-section-number">21.2.8</span> Example: Is one pig ration more effective than the other?</a></li>
  <li><a href="#example-do-planet-densities-differ" id="toc-example-do-planet-densities-differ" class="nav-link" data-scroll-target="#example-do-planet-densities-differ"><span class="header-section-number">21.2.9</span> Example: Do planet densities differ?</a></li>
  </ul></li>
  <li><a href="#conclusion" id="toc-conclusion" class="nav-link" data-scroll-target="#conclusion"><span class="header-section-number">21.3</span> Conclusion</a></li>
  </ul>
</nav>
    </div>
<!-- main -->
<main class="content" id="quarto-document-content">

<header id="title-block-header" class="quarto-title-block default">
<div class="quarto-title">
<h1 class="title"><span id="sec-testing-counts-one" class="quarto-section-identifier"><span class="chapter-number">21</span>&nbsp; <span class="chapter-title">Hypothesis-Testing with Counted Data, Part 1</span></span></h1>
</div>


<div class="quarto-title-meta">

    
  </div>
  

</header>


<section id="introduction" class="level2" data-number="21.1">
<h2 data-number="21.1" class="anchored" data-anchor-id="introduction"><span class="header-section-number">21.1</span> Introduction</h2>
<p>The first task in inferential statistics is to make one or more <em>point estimates</em> — that is, to make one or more statements about <em>how much</em> there is of something we are interested in — including especially the mean and the dispersion. (That work goes under the label “estimation” and is discussed in <a href="point_estimation.html" class="quarto-xref"><span>Chapter 19</span></a>.) Frequently the next step, after making such quantitative estimation of the universe from which a sample has been drawn, is to consider whether two or more samples are different from each other, or whether the single sample is different from a specified value; this work goes under the label “hypothesis testing.” We ask: Did something happen? Or: Is there a difference between two universes? These are yes-no questions.</p>
<p>In other cases, the next step is to inquire into the reliability of the estimates; this goes under the label “confidence intervals.” (Some writers include assessing reliability under the rubric of estimation, but I judge it better not to do so).</p>
<p>So: Having reviewed how to convert hypothesis-testing problems into statistically testable questions in <a href="framing_questions.html" class="quarto-xref"><span>Chapter 20</span></a>, we now must ask: How does one employ resampling methods to make the statistical test? As is always the case when using resampling techniques, there is no unique series of steps by which to proceed. The crucial criterion in assessing the model is whether it accurately simulates the actual event. With hypothesis-testing problems, any number of models may be correct. Generally speaking, though, the model that makes fullest use of the quantitative information available from the data is the best model.</p>
<p>When attempting to deduce the characteristics of a universe from sample data, or when asking whether a sample was drawn from a particular universe, a crucial issue is whether a “one-tailed test” or a “two-tailed test” should be applied. That is, in examining the results of our resampling experiment based on the benchmark universe, do we examine both ends of the frequency distribution, or just one? If there is strong reason to believe <em>a priori</em> that the difference between the benchmark (null) universe and the sample will be in a given direction — for example if you hypothesize that the sample mean will be <em>smaller</em> than the mean of the benchmark universe — you should then employ a <em>one-tailed test</em>. If you do <em>not</em> have strong basis for such a prediction, use the <em>two-tailed</em> test. As an example, when a scientist tests a new medication, his/her hypothesis would be that the number of patients who get well will be higher in the treated group than in the control group. Thus, s/he applies the one-tailed test. See the text below for more detail on one- and two-tailed tests.</p>
<p>Some language first:</p>
<p><strong>Hypothesis:</strong> In inferential statistics, a statement or claim about a universe that can be tested and that you wish to investigate.</p>
<p><strong>Testing:</strong> The process of investigating the validity of a hypothesis.</p>
<p><strong>Benchmark (or null) hypothesis:</strong> A particular hypothesis chosen for convenience when testing hypotheses in inferential statistics. For example, we could test the hypothesis that there is <em>no difference</em> between a sample and a given universe, or between two samples, or that a parameter is less than or greater than a certain value. The benchmark universe refers to this hypothesis. (The concept of the benchmark or null hypothesis was discussed in <a href="probability_theory_1b.html" class="quarto-xref"><span>Chapter 9</span></a> and <a href="framing_questions.html" class="quarto-xref"><span>Chapter 20</span></a>.)</p>
<p>Soon we will begin the actual statistical testing of various sorts of hypotheses about samples and populations.</p>
<p>But, before we get there, we will take a short technical detour.</p>
<section id="sec-building-strings" class="level3" data-number="21.1.1">
<h3 data-number="21.1.1" class="anchored" data-anchor-id="sec-building-strings"><span class="header-section-number">21.1.1</span> Building strings for labels and messages</h3>
<div id="nte-building_strings" class="callout callout-style-default callout-note callout-titled">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon"></i>
</div>
<div class="callout-title-container flex-fill">
Note&nbsp;21.1: Notebook: Building strings for labels
</div>
</div>
<div class="callout-body-container callout-body">
<div class="nb-links">
<p><a class="notebook-link" href="notebooks/building_strings.ipynb">Download notebook</a> <a class="interact-button" href="./interact/lab/index.html?path=building_strings.ipynb">Interact</a></p>
</div>
</div>
</div>
<div class="nb-start" name="building_strings" title="Building strings for labels">

</div>
<p>As the examples in this book proceed, we will use more code techniques to write the examples in a more concise and efficient way.</p>
<p>One task that we often have, is to build up helpful strings to use as labels on plots, or to print out as messages. These strings will often mix numbers and text. For example, we may want to print out a helpful message such as: <code>Simulation using 10000 trials</code>, where the number 10000 in the message comes from some variable, such as <code>n_trials</code>. Let’s set that variable now:</p>
<div class="cell" data-layout-align="center">
<div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a>n_trials <span class="op">=</span> <span class="dv">10000</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>Building the useful string above would involve taking the string <code>'Simulation using '</code>, then appending a string to represent the number 10,000 — as in:</p>
<div class="cell" data-layout-align="center">
<div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Convert the number (integer) to a string representing the number.</span></span>
<span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a><span class="bu">str</span>(n_trials)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code>'10000'</code></pre>
</div>
</div>
<p>Finally we need to append another string to the result — <code>' trials.'</code>. So our task will be to <em>concatenate</em> (stick together) these three strings.</p>
<!---
End of R section.
-->
<div class="python">
<p>There are several ways to concatenate strings in Python. For example, Python interprets <code>+</code>, between strings, to mean <em>concatenate</em>. One way to make a new string that concatenates the strings <code>'resampling '</code>, <code>'is '</code> and <code>'better'</code> is to use <code>+</code>, like this:</p>
<div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="co"># + between strings means "concatenate".</span></span>
<span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a><span class="co">'resampling '</span> <span class="op">+</span> <span class="st">'is '</span> <span class="op">+</span> <span class="st">'better'</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>We could always insert a number as a string, by converting the number to a string, and concatenating, like this:</p>
<div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="co">'resampling '</span> <span class="op">+</span> <span class="st">'is '</span> <span class="op">+</span> <span class="bu">str</span>(<span class="dv">100</span>) <span class="op">+</span> <span class="st">' times '</span> <span class="op">+</span> <span class="st">'better'</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>However, as you can see, this starts looking rather ugly and verbose. It’s easy to forget to append spaces to the strings to concatenate, and end up with messages like:</p>
<div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="co">'resampling'</span> <span class="op">+</span> <span class="st">'is'</span> <span class="op">+</span> <span class="bu">str</span>(<span class="dv">100</span>) <span class="op">+</span> <span class="st">'times'</span> <span class="op">+</span> <span class="st">'better'</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>One of the most flexible ways to solve problems like this, is to use Python “f” (Format) strings. Format strings start with an <code>f</code> and can include values inside the string, enclosed in curly brackets. This is best explained by example:</p>
<div class="sourceCode cell-code" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Our first Python format string.</span></span>
<span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a><span class="co"># Notice the "f" prefix before the quotes, to tell Python this is a Format</span></span>
<span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a><span class="co"># string, and may include values to interpolate, inside curly brackets.</span></span>
<span id="cb7-4"><a href="#cb7-4" aria-hidden="true" tabindex="-1"></a><span class="ss">f'resampling is </span><span class="sc">{</span><span class="dv">100</span><span class="sc">}</span><span class="ss"> times better'</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>The Format string expects Python values to appear inside the string, enclosed in curly brackets. When Python see a Format (“f” string) like this, it gets the value named or typed inside the curly brackets, converts it to a string, and inserts that string into the result at the relevant place.</p>
<p>Format strings take a little getting used to, but once you are used to them, they are a flexible and concise way of assembling useful messages.</p>
<p>For example, to create the string we started this section with, we could write:</p>
<div class="sourceCode cell-code" id="cb8"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a><span class="ss">f'Simulation using </span><span class="sc">{</span>n_trials<span class="sc">}</span><span class="ss"> trials.'</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>Don’t forget the <code>f</code> prefix to the string, if you do want to insert values like this, otherwise you’ll get a standard (not-Format) string, and Python won’t insert the value.</p>
<div class="sourceCode cell-code" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Oops!  We forgot the "f" prefix to the string.  Python won't put in</span></span>
<span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a><span class="co"># (interpolate) the string representation of the value.</span></span>
<span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a><span class="co">'Simulation using {n_trials} trials.'</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<!---
End of Python section.
-->
<div class="nb-end">

</div>
<div class="callout callout-style-default callout-note callout-titled">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon"></i>
</div>
<div class="callout-title-container flex-fill">
End of notebook: Building strings for labels
</div>
</div>
<div class="callout-body-container callout-body">
<p><code>building_strings</code> starts at <a href="#nte-building_strings" class="quarto-xref">Note&nbsp;<span>21.1</span></a>.</p>
</div>
</div>
<!---
End of building_strings notebook.
-->
<p>Now we begin with statistical testing for hypotheses on samples and populations.</p>
</section>
</section>
<section id="should-a-single-sample-of-counted-data-be-considered-different-from-a-benchmark-universe" class="level2" data-number="21.2">
<h2 data-number="21.2" class="anchored" data-anchor-id="should-a-single-sample-of-counted-data-be-considered-different-from-a-benchmark-universe"><span class="header-section-number">21.2</span> Should a single sample of counted data be considered different from a benchmark universe?</h2>
<section id="sec-fruitfly" class="level3" data-number="21.2.1">
<h3 data-number="21.2.1" class="anchored" data-anchor-id="sec-fruitfly"><span class="header-section-number">21.2.1</span> Example: Does irradiation affect the sex ratio in fruit flies?</h3>
<p><strong>Where the Benchmark Universe Mean (in this case, the Proportion) is Known, is the Mean (Proportion) of the Population Affected by the Treatment?)</strong></p>
<p>You think you have developed a technique for irradiating the genes of fruit flies so that the sex ratio of the offspring will <em>not</em> be half males and half females. In the first twenty cases you treat, there are fourteen males and six females. Does this experimental result confirm that the irradiation does work?</p>
<p>First convert the scientific question — whether or not the treatment affects the sex distribution — into a probability-statistical question: Is the observed sample likely to have come from a benchmark universe in which the sex ratio is one male to one female? The benchmark (null) hypothesis, then, is that the treatment makes no difference and the sample comes from the one-male-to-one-female universe. Therefore, we investigate <em>how likely a one-to-one universe is to produce a distribution of fourteen or more of just one sex</em>.</p>
<p>A coin has a one-to-one (one out of two) chance of coming up tails. Therefore, we might flip a coin in groups of twenty flips, and count the number of tails in each twenty flips. Or we can use a random number table. The following steps will produce a sound estimate:</p>
<ul>
<li><strong>Step 1.</strong> Let tails = male, heads = female.</li>
<li><strong>Step 2.</strong> Flip twenty coins and count the number of males. If 14 or more males occur, record “yes.” Also, if 6 or fewer males occur, record “yes” because this means we have gotten 14 or more females. Otherwise, record “no.”</li>
<li><strong>Step 3.</strong> Repeat step 2 perhaps 100 times.</li>
<li><strong>Step 4.</strong> Calculate the proportion “yes” in the 100 trials. This proportion estimates the probability that a fruit-fly population with a propensity to produce 50 percent males will by chance produce as many as 14 or as few as 6 males in a sample of 20 flies.</li>
</ul>
<div class="cell" data-layout-align="center">
<div class="cell-output-display">
<div id="tbl-fruitfly-trials" class="lightable-paper lightable-striped lightable-hover quarto-float quarto-figure quarto-figure-center anchored" data-quarto-postprocess="true" style="font-family: &quot;Arial Narrow&quot;, arial, helvetica, sans-serif; width: auto !important; margin-left: auto; margin-right: auto;">
<figure class="quarto-float quarto-float-tbl figure">
<figcaption class="quarto-float-caption-top quarto-float-caption quarto-float-tbl" id="tbl-fruitfly-trials-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
Table&nbsp;21.1: Results from 25 random trials for Fruitfly problem
</figcaption>
<div aria-describedby="tbl-fruitfly-trials-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
<table class="lightable-paper lightable-striped lightable-hover caption-top table table-sm table-striped small" data-quarto-postprocess="true">
<thead>
<tr class="header">
<th style="text-align: right;" data-quarto-table-cell-role="th">Trial no</th>
<th style="text-align: right;" data-quarto-table-cell-role="th"># of tails</th>
<th style="text-align: left;" data-quarto-table-cell-role="th">&gt;=14 or &lt;=6</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td style="text-align: right;">1</td>
<td style="text-align: right;">12</td>
<td style="text-align: left;">No</td>
</tr>
<tr class="even">
<td style="text-align: right;">2</td>
<td style="text-align: right;">12</td>
<td style="text-align: left;">No</td>
</tr>
<tr class="odd">
<td style="text-align: right;">3</td>
<td style="text-align: right;">8</td>
<td style="text-align: left;">No</td>
</tr>
<tr class="even">
<td style="text-align: right;">4</td>
<td style="text-align: right;">11</td>
<td style="text-align: left;">No</td>
</tr>
<tr class="odd">
<td style="text-align: right;">5</td>
<td style="text-align: right;">8</td>
<td style="text-align: left;">No</td>
</tr>
<tr class="even">
<td style="text-align: right;">6</td>
<td style="text-align: right;">10</td>
<td style="text-align: left;">No</td>
</tr>
<tr class="odd">
<td style="text-align: right;">7</td>
<td style="text-align: right;">11</td>
<td style="text-align: left;">No</td>
</tr>
<tr class="even">
<td style="text-align: right;">8</td>
<td style="text-align: right;">6</td>
<td style="text-align: left;">Yes</td>
</tr>
<tr class="odd">
<td style="text-align: right;">9</td>
<td style="text-align: right;">6</td>
<td style="text-align: left;">Yes</td>
</tr>
<tr class="even">
<td style="text-align: right;">10</td>
<td style="text-align: right;">10</td>
<td style="text-align: left;">No</td>
</tr>
<tr class="odd">
<td style="text-align: right;">11</td>
<td style="text-align: right;">11</td>
<td style="text-align: left;">No</td>
</tr>
<tr class="even">
<td style="text-align: right;">12</td>
<td style="text-align: right;">12</td>
<td style="text-align: left;">No</td>
</tr>
<tr class="odd">
<td style="text-align: right;">13</td>
<td style="text-align: right;">7</td>
<td style="text-align: left;">No</td>
</tr>
<tr class="even">
<td style="text-align: right;">14</td>
<td style="text-align: right;">15</td>
<td style="text-align: left;">Yes</td>
</tr>
<tr class="odd">
<td style="text-align: right;">15</td>
<td style="text-align: right;">13</td>
<td style="text-align: left;">No</td>
</tr>
<tr class="even">
<td style="text-align: right;">16</td>
<td style="text-align: right;">9</td>
<td style="text-align: left;">No</td>
</tr>
<tr class="odd">
<td style="text-align: right;">17</td>
<td style="text-align: right;">9</td>
<td style="text-align: left;">No</td>
</tr>
<tr class="even">
<td style="text-align: right;">18</td>
<td style="text-align: right;">10</td>
<td style="text-align: left;">No</td>
</tr>
<tr class="odd">
<td style="text-align: right;">19</td>
<td style="text-align: right;">10</td>
<td style="text-align: left;">No</td>
</tr>
<tr class="even">
<td style="text-align: right;">20</td>
<td style="text-align: right;">9</td>
<td style="text-align: left;">No</td>
</tr>
<tr class="odd">
<td style="text-align: right;">21</td>
<td style="text-align: right;">12</td>
<td style="text-align: left;">No</td>
</tr>
<tr class="even">
<td style="text-align: right;">22</td>
<td style="text-align: right;">11</td>
<td style="text-align: left;">No</td>
</tr>
<tr class="odd">
<td style="text-align: right;">23</td>
<td style="text-align: right;">4</td>
<td style="text-align: left;">Yes</td>
</tr>
<tr class="even">
<td style="text-align: right;">24</td>
<td style="text-align: right;">16</td>
<td style="text-align: left;">Yes</td>
</tr>
<tr class="odd">
<td style="text-align: right;">25</td>
<td style="text-align: right;">7</td>
<td style="text-align: left;">No</td>
</tr>
</tbody>
</table>
</div>
</figure>
</div>


</div>
</div>
<p><a href="#tbl-fruitfly-trials" class="quarto-xref">Table&nbsp;<span>21.1</span></a> shows the results obtained in 25 trials of twenty flips each. In two of the twenty-five trials (8 percent) there were fourteen or more tails, which we call “males,” and in three of the 25 trials (12 percent) there six or fewer tails, meaning there were fourteen or more heads (“females”). We can therefore estimate that, even if the treatment does <em>not</em> affect the sex and the births over a long period really are one to one, five out of twenty-five times (20 percent) we would get fourteen or more of one sex or the other. Therefore, finding fourteen males out of 20 births is not overwhelming evidence that the treatment has any effect, even though the result is suggestive.</p>
<p>How accurate is the estimate? Seventy-five more trials were made, and of the 100 trials nine contained fourteen or more “males” (9 percent), and 8 trials contained fourteen or more “females” (8 percent), a total of 17 percent. So the first twenty-five trials gave a fairly reliable indication. As a matter of fact, analytically-based computation (not explained here) shows that the probability of getting fourteen or more females out of twenty births is .057 and, of course, the same for fourteen or more males from a one-to-one universe, implying a total probability of .114 of getting fourteen or more males <em>or</em> females.</p>
<p>Now let us obtain larger and more accurate simulation samples with the computer. The key step in the Python notebook below represents male fruit flies with the string <code>'male'</code> and female fruit flies with the string <code>'female'</code>. The <span class="python"><code>rnd.choice</code></span> function is then used to generate 20 of these strings with an equal probability that either string is selected. This simulates randomly choosing 20 fruit flies on the benchmark assumption — the “null hypothesis” — that each fruit fly has an equal chance of being a male or female. Now we want to discover the chances of getting more than 13 (i.e., 14 or more) males or more than 13 females under these conditions. So we use <span class="python"><code>np.sum</code></span> to count the number of males in each random sample and then store this value in the <code>scores</code> array of this number for each sample. We repeat these steps 10,000 times.</p>
<p>After ten thousand samples have been drawn, we count (<code>sum</code>) how often there were more than 13 males and then count the number of times there were fewer than 7 males (because if there were fewer than 7 males there must have been more than 13 females). When we add the two results together we have the probability that the results obtained from the sample of irradiated fruit flies would be obtained from a random sample of fruit flies.</p>
<div id="nte-fruit_fly" class="callout callout-style-default callout-note callout-titled">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon"></i>
</div>
<div class="callout-title-container flex-fill">
Note&nbsp;21.2: Notebook: Fruit fly simulation
</div>
</div>
<div class="callout-body-container callout-body">
<div class="nb-links">
<p><a class="notebook-link" href="notebooks/fruit_fly.ipynb">Download notebook</a> <a class="interact-button" href="./interact/lab/index.html?path=fruit_fly.ipynb">Interact</a></p>
</div>
</div>
</div>
<div class="nb-start" name="fruit_fly" title="Fruit fly simulation">

</div>
<div class="cell" data-layout-align="center">
<div class="sourceCode cell-code" id="cb10"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
<span id="cb10-2"><a href="#cb10-2" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> matplotlib.pyplot <span class="im">as</span> plt</span>
<span id="cb10-3"><a href="#cb10-3" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb10-4"><a href="#cb10-4" aria-hidden="true" tabindex="-1"></a><span class="co"># set up the random number generator</span></span>
<span id="cb10-5"><a href="#cb10-5" aria-hidden="true" tabindex="-1"></a>rnd <span class="op">=</span> np.random.default_rng()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<div class="cell" data-layout-align="center">
<div class="sourceCode cell-code" id="cb11"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Set the number of trials</span></span>
<span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a>n_trials <span class="op">=</span> <span class="dv">10000</span></span>
<span id="cb11-3"><a href="#cb11-3" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-4"><a href="#cb11-4" aria-hidden="true" tabindex="-1"></a><span class="co"># set the sample size for each trial</span></span>
<span id="cb11-5"><a href="#cb11-5" aria-hidden="true" tabindex="-1"></a>sample_size <span class="op">=</span> <span class="dv">20</span></span>
<span id="cb11-6"><a href="#cb11-6" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-7"><a href="#cb11-7" aria-hidden="true" tabindex="-1"></a><span class="co"># An empty array to store the trials</span></span>
<span id="cb11-8"><a href="#cb11-8" aria-hidden="true" tabindex="-1"></a>scores <span class="op">=</span> np.zeros(n_trials)</span>
<span id="cb11-9"><a href="#cb11-9" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-10"><a href="#cb11-10" aria-hidden="true" tabindex="-1"></a><span class="co"># Do 10000 trials</span></span>
<span id="cb11-11"><a href="#cb11-11" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> i <span class="kw">in</span> <span class="bu">range</span>(n_trials):</span>
<span id="cb11-12"><a href="#cb11-12" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-13"><a href="#cb11-13" aria-hidden="true" tabindex="-1"></a>    <span class="co"># Generate 20 simulated fruit flies, where each has an equal chance of</span></span>
<span id="cb11-14"><a href="#cb11-14" aria-hidden="true" tabindex="-1"></a>    <span class="co"># being male or female</span></span>
<span id="cb11-15"><a href="#cb11-15" aria-hidden="true" tabindex="-1"></a>    a <span class="op">=</span> rnd.choice([<span class="st">'male'</span>, <span class="st">'female'</span>],</span>
<span id="cb11-16"><a href="#cb11-16" aria-hidden="true" tabindex="-1"></a>                   size<span class="op">=</span>sample_size,</span>
<span id="cb11-17"><a href="#cb11-17" aria-hidden="true" tabindex="-1"></a>                   p<span class="op">=</span>[<span class="fl">0.5</span>, <span class="fl">0.5</span>],</span>
<span id="cb11-18"><a href="#cb11-18" aria-hidden="true" tabindex="-1"></a>                   replace <span class="op">=</span> <span class="va">True</span>)</span>
<span id="cb11-19"><a href="#cb11-19" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-20"><a href="#cb11-20" aria-hidden="true" tabindex="-1"></a>    <span class="co"># count the number of males in the sample</span></span>
<span id="cb11-21"><a href="#cb11-21" aria-hidden="true" tabindex="-1"></a>    b <span class="op">=</span> np.<span class="bu">sum</span>(a <span class="op">==</span> <span class="st">'male'</span>)</span>
<span id="cb11-22"><a href="#cb11-22" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-23"><a href="#cb11-23" aria-hidden="true" tabindex="-1"></a>    <span class="co"># store the result of this trial</span></span>
<span id="cb11-24"><a href="#cb11-24" aria-hidden="true" tabindex="-1"></a>    scores[i] <span class="op">=</span> b</span>
<span id="cb11-25"><a href="#cb11-25" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-26"><a href="#cb11-26" aria-hidden="true" tabindex="-1"></a><span class="co"># Produce a histogram of the trial results</span></span>
<span id="cb11-27"><a href="#cb11-27" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="ss">f"Number of males in </span><span class="sc">{</span>n_trials<span class="sc">}</span><span class="ss"> samples of </span><span class="ch">\n</span><span class="sc">{</span>sample_size<span class="sc">}</span><span class="ss"> simulated fruit flies"</span>)</span>
<span id="cb11-28"><a href="#cb11-28" aria-hidden="true" tabindex="-1"></a>plt.hist(scores)</span>
<span id="cb11-29"><a href="#cb11-29" aria-hidden="true" tabindex="-1"></a>plt.xlabel(<span class="st">'Number of Males'</span>)</span>
<span id="cb11-30"><a href="#cb11-30" aria-hidden="true" tabindex="-1"></a>plt.ylabel(<span class="st">'Frequency'</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<div class="quarto-figure quarto-figure-center">
<figure class="figure">
<p><img src="testing_counts_1_files/figure-html/unnamed-chunk-17-1.png" class="img-fluid quarto-figure quarto-figure-center figure-img" style="width:70.0%"></p>
</figure>
</div>
</div>
</div>
<p>In the histogram above, we see that in about 12 percent of the trials, the number of males was 14 or more, or 6 or fewer. Or instead of reading the results from the histogram, we can calculate the result by tacking on the following commands to the above program:</p>
<div class="cell" data-layout-align="center">
<div class="sourceCode cell-code" id="cb12"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Determine the number of trials in which we had 14 or more males.</span></span>
<span id="cb12-2"><a href="#cb12-2" aria-hidden="true" tabindex="-1"></a>j <span class="op">=</span> np.<span class="bu">sum</span>(scores <span class="op">&gt;=</span> <span class="dv">14</span>)</span>
<span id="cb12-3"><a href="#cb12-3" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb12-4"><a href="#cb12-4" aria-hidden="true" tabindex="-1"></a><span class="co"># Determine the number of trials in which we had 6 or fewer males.</span></span>
<span id="cb12-5"><a href="#cb12-5" aria-hidden="true" tabindex="-1"></a>k <span class="op">=</span> np.<span class="bu">sum</span>(scores <span class="op">&lt;=</span> <span class="dv">6</span>)</span>
<span id="cb12-6"><a href="#cb12-6" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb12-7"><a href="#cb12-7" aria-hidden="true" tabindex="-1"></a><span class="co"># Add the two results together.</span></span>
<span id="cb12-8"><a href="#cb12-8" aria-hidden="true" tabindex="-1"></a>m <span class="op">=</span> j <span class="op">+</span> k</span>
<span id="cb12-9"><a href="#cb12-9" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb12-10"><a href="#cb12-10" aria-hidden="true" tabindex="-1"></a><span class="co"># Convert to a proportion.</span></span>
<span id="cb12-11"><a href="#cb12-11" aria-hidden="true" tabindex="-1"></a>mm <span class="op">=</span> m <span class="op">/</span> n_trials</span>
<span id="cb12-12"><a href="#cb12-12" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb12-13"><a href="#cb12-13" aria-hidden="true" tabindex="-1"></a><span class="co"># Print the results.</span></span>
<span id="cb12-14"><a href="#cb12-14" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(mm)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code>0.1191</code></pre>
</div>
</div>
<div class="nb-end">

</div>
<div class="callout callout-style-default callout-note callout-titled">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon"></i>
</div>
<div class="callout-title-container flex-fill">
End of notebook: Fruit fly simulation
</div>
</div>
<div class="callout-body-container callout-body">
<p><code>fruit_fly</code> starts at <a href="#nte-fruit_fly" class="quarto-xref">Note&nbsp;<span>21.2</span></a>.</p>
</div>
</div>
<!---
End of notebook
-->
<p>Notice that the strength of the evidence for the effectiveness of the radiation treatment depends upon the original question: whether or not the treatment had <em>any</em> effect on the sex of the fruit fly, which is a two-tailed question. If there were reason to believe at the start that the treatment could increase <em>only</em> the number of <em>males</em>, then we would focus our attention on the result that in only two of the twenty-five trials were fourteen or more males. There would then be only a 2/25 = 0.08 probability of getting the observed results by chance if the treatment really has no effect, rather than the weaker odds against obtaining fourteen or more of <em>either</em> males or females.</p>
<p>Therefore, whether you decide to figure the odds of just fourteen or more males (what is called a “one-tail test”) or the odds for fourteen or more males <em>plus</em> fourteen or more females (a “two-tail test”), depends upon your advance knowledge of the subject. If you have no reason to believe that the treatment will have an effect <em>only</em> in the direction of creating more males and if you figure the odds for the one-tail test anyway, then you will be kidding yourself. Theory comes to bear here. If you have a strong hypothesis, deduced from a strong theory, that there will be more males, then you should figure one-tail odds, but if you have no such theory you should figure the weaker two-tail odds.<a href="#fn1" class="footnote-ref" id="fnref1" role="doc-noteref"><sup>1</sup></a></p>
<p>In the case of the next problem concerning calves, we shall see that a one-tail test is appropriate because we have no interest in producing more male calves. Before leaving this example, let us review our intellectual strategy in handling the problem. First we observe a result (14 males in 20 flies) which differs from the proportion of the benchmark population (50 percent males). Because we have treated this sample with irradiation and observed a result that differs from the untreated benchmark-population’s mean, we speculate that the irradiation caused the sample to differ from the untreated population. We wish to check on whether this speculation is correct.</p>
<p>When asking whether this speculation is correct, we are implicitly asking whether future irradiation would also produce a proportion of males higher than 50 percent. That is, we are implicitly asking whether irradiated flies would produce more samples with male proportions as high as 14/20 than would occur by chance in the absence of irradiation.</p>
<p>If samples as far away as 14/20 from the benchmark population mean of 10/20 would occur frequently by chance, then we would not be impressed with that experimental evidence as proof that irradiation does affect the sex ratio. Hence we set up a model that will tell us the frequency with which samples of 14 or more males out of 20 births would be observed by chance. Carrying out the resampling procedure tells us that perhaps a tenth of the time such samples would be observed by chance. That is not extremely frequent, but it is not infrequent either. Hence we would probably conclude that the evidence is provocative enough to justify further experimentation, but not so strong that we should immediately believe in the truth of this speculation.</p>
<p>The logic of attaching meaning to the probabilistic outcome of a test of a hypothesis is discussed in <a href="significance.html" class="quarto-xref"><span>Chapter 22</span></a>. There also is more about the concept of the level of significance in <a href="significance.html" class="quarto-xref"><span>Chapter 22</span></a>.</p>
<p>Because of the great importance of this sort of case, which brings out the basic principles particularly clearly, let us consider another example:</p>
</section>
<section id="sec-female-calves" class="level3" data-number="21.2.2">
<h3 data-number="21.2.2" class="anchored" data-anchor-id="sec-female-calves"><span class="header-section-number">21.2.2</span> Example: Does a treatment increase the female calf rate?</h3>
<p><strong>What is the probability that among 10 calves born, 9 or more will be female?</strong></p>
<p>Let’s consider this question in the context of a set of queries for performing statistical inference that will be discussed further in <a href="testing_procedures.html" class="quarto-xref"><span>Chapter 25</span></a>.</p>
<p><strong><em>The question</em></strong>: (From <span class="citation" data-cites="hodges1970basic">Hodges Jr and Lehmann (<a href="references.html#ref-hodges1970basic" role="doc-biblioref">1970</a>)</span>): Female calves are more valuable than males. A bio-engineer claims to be able to cause more females to be born than the expected 50 percent rate. He conducts his procedure, and nine females are born out of the next 10 pregnancies among the treated cows. Should you believe his claim? That is, what is the probability of a result this (or more) surprising occurring by chance if his procedure has no effect? In this problem, we assume that on average 100 of 206 births are female, in contrast to the 50-50 benchmark universe in the previous problem.</p>
<p><strong><em>What is the purpose of the work?</em></strong>: Female calves are more valuable than male calves.</p>
<p><strong><em>Statistical inference?</em></strong>: Yes.</p>
<p><strong><em>Confidence interval or Test of hypothesis?</em></strong>: Test of hypothesis.</p>
<p><strong><em>Will you state the costs and benefits of various outcomes, or a loss function?</em></strong>: Yes. One need only say that the benefits are very large, and if the results are promising, it is worth gathering more data to confirm results.</p>
<p><strong><em>How many samples of data are part of the hypothesis test?</em></strong>: One.</p>
<p><strong><em>What is the size of the first sample about which you wish to make significance statements?</em></strong>: Ten.</p>
<p><strong><em>What comparison(s) to make?</em></strong>: Compare the sample to the benchmark universe.</p>
<p><strong><em>What is the benchmark universe</em></strong>: <strong>that embodies the null hypothesis?</strong> 100/206 female.</p>
<p><strong><em>Which symbols for the observed entities?</em></strong>: Balls in bucket, or numbers.</p>
<p><strong><em>What values or ranges of values?</em></strong>: We could write numbers 1 through 206 on pieces of paper, and take numbers 1-100 as “male” and 101-206 as “female”. Or we could use some other mechanism to give us a 100/206 chance of any one calf being female.</p>
<p><strong><em>Finite or infinite universe?</em></strong>: Infinite.</p>
<p><strong><em>Which sample(s) do you wish to compare to which, or to the null universe (and perhaps to the alternative universe)?</em></strong>: Ten calves.</p>
<p><strong><em>What procedure to produce the sample entities?</em></strong>: Sampling with replacement.</p>
<p><strong><em>Simple (single step) or complex (multiple “if” drawings)?</em></strong>: Can think of it either way.</p>
<p><strong><em>What to record as the outcome of each resample trial?</em></strong>: The proportion (or number) of females.</p>
<p><strong><em>What is the criterion to be used in the test?</em></strong>: The probability that in a sample of ten calves, nine (or more) females would be drawn by chance from the benchmark universe of 100/206 females.</p>
<p><strong><em>“One tail” or “two tail” test?</em></strong>: One tail, because the farmer is only interested in females. Finding a large proportion of males would not be of interest; it would not cause rejecting the null hypothesis.</p>
<p>The actual computation of probability may be done in several ways, as discussed earlier for four children and for ten cows. Conventional methods are discussed for comparison in <a href="testing_procedures.html" class="quarto-xref"><span>Chapter 25</span></a>. Here is the resampling solution in Python.</p>
<div id="nte-female_calves" class="callout callout-style-default callout-note callout-titled">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon"></i>
</div>
<div class="callout-title-container flex-fill">
Note&nbsp;21.3: Notebook: Female calf numbers simulation
</div>
</div>
<div class="callout-body-container callout-body">
<div class="nb-links">
<p><a class="notebook-link" href="notebooks/female_calves.ipynb">Download notebook</a> <a class="interact-button" href="./interact/lab/index.html?path=female_calves.ipynb">Interact</a></p>
</div>
</div>
</div>
<div class="nb-start" name="female_calves" title="Female calf numbers simulation">

</div>
<div class="cell" data-layout-align="center">
<div class="sourceCode cell-code" id="cb14"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
<span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> matplotlib.pyplot <span class="im">as</span> plt</span>
<span id="cb14-3"><a href="#cb14-3" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb14-4"><a href="#cb14-4" aria-hidden="true" tabindex="-1"></a>rnd <span class="op">=</span> np.random.default_rng()</span>
<span id="cb14-5"><a href="#cb14-5" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb14-6"><a href="#cb14-6" aria-hidden="true" tabindex="-1"></a><span class="co"># Set the number of trials.</span></span>
<span id="cb14-7"><a href="#cb14-7" aria-hidden="true" tabindex="-1"></a>n_trials <span class="op">=</span> <span class="dv">10000</span></span>
<span id="cb14-8"><a href="#cb14-8" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb14-9"><a href="#cb14-9" aria-hidden="true" tabindex="-1"></a><span class="co"># Set the size of each sample.</span></span>
<span id="cb14-10"><a href="#cb14-10" aria-hidden="true" tabindex="-1"></a>sample_size <span class="op">=</span> <span class="dv">10</span></span>
<span id="cb14-11"><a href="#cb14-11" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb14-12"><a href="#cb14-12" aria-hidden="true" tabindex="-1"></a><span class="co"># Probability of any one calf being female.</span></span>
<span id="cb14-13"><a href="#cb14-13" aria-hidden="true" tabindex="-1"></a>p_female <span class="op">=</span> <span class="dv">100</span> <span class="op">/</span> <span class="dv">206</span></span>
<span id="cb14-14"><a href="#cb14-14" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb14-15"><a href="#cb14-15" aria-hidden="true" tabindex="-1"></a><span class="co"># An array to store the results.</span></span>
<span id="cb14-16"><a href="#cb14-16" aria-hidden="true" tabindex="-1"></a>scores <span class="op">=</span> np.zeros(n_trials)</span>
<span id="cb14-17"><a href="#cb14-17" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb14-18"><a href="#cb14-18" aria-hidden="true" tabindex="-1"></a><span class="co"># For 10000 repeats.</span></span>
<span id="cb14-19"><a href="#cb14-19" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> i <span class="kw">in</span> <span class="bu">range</span>(n_trials):</span>
<span id="cb14-20"><a href="#cb14-20" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb14-21"><a href="#cb14-21" aria-hidden="true" tabindex="-1"></a>    a <span class="op">=</span> rnd.choice([<span class="st">'female'</span>, <span class="st">'male'</span>],</span>
<span id="cb14-22"><a href="#cb14-22" aria-hidden="true" tabindex="-1"></a>                   p<span class="op">=</span>[p_female, <span class="dv">1</span> <span class="op">-</span> p_female],</span>
<span id="cb14-23"><a href="#cb14-23" aria-hidden="true" tabindex="-1"></a>                   size<span class="op">=</span>sample_size)</span>
<span id="cb14-24"><a href="#cb14-24" aria-hidden="true" tabindex="-1"></a>    b <span class="op">=</span> np.<span class="bu">sum</span>(a <span class="op">==</span> <span class="st">'female'</span>)</span>
<span id="cb14-25"><a href="#cb14-25" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb14-26"><a href="#cb14-26" aria-hidden="true" tabindex="-1"></a>    <span class="co"># Store the result of the current trial.</span></span>
<span id="cb14-27"><a href="#cb14-27" aria-hidden="true" tabindex="-1"></a>    scores[i] <span class="op">=</span> b</span>
<span id="cb14-28"><a href="#cb14-28" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb14-29"><a href="#cb14-29" aria-hidden="true" tabindex="-1"></a><span class="co"># Plot a histogram of the scores.</span></span>
<span id="cb14-30"><a href="#cb14-30" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="ss">f"Number of females in </span><span class="sc">{</span>n_trials<span class="sc">}</span><span class="ss"> samples of </span><span class="ch">\n</span><span class="sc">{</span>sample_size<span class="sc">}</span><span class="ss"> simulated calves"</span>)</span>
<span id="cb14-31"><a href="#cb14-31" aria-hidden="true" tabindex="-1"></a>plt.hist(scores)</span>
<span id="cb14-32"><a href="#cb14-32" aria-hidden="true" tabindex="-1"></a>plt.xlabel(<span class="st">'Number of Females'</span>)</span>
<span id="cb14-33"><a href="#cb14-33" aria-hidden="true" tabindex="-1"></a>plt.ylabel(<span class="st">'Frequency'</span>)</span>
<span id="cb14-34"><a href="#cb14-34" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb14-35"><a href="#cb14-35" aria-hidden="true" tabindex="-1"></a><span class="co"># Count the number of scores that were greater than or equal to 9.</span></span>
<span id="cb14-36"><a href="#cb14-36" aria-hidden="true" tabindex="-1"></a>k <span class="op">=</span> np.<span class="bu">sum</span>(scores <span class="op">&gt;=</span> <span class="dv">9</span>)</span>
<span id="cb14-37"><a href="#cb14-37" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb14-38"><a href="#cb14-38" aria-hidden="true" tabindex="-1"></a><span class="co"># Express as a proportion.</span></span>
<span id="cb14-39"><a href="#cb14-39" aria-hidden="true" tabindex="-1"></a>kk <span class="op">=</span> k <span class="op">/</span> n_trials</span>
<span id="cb14-40"><a href="#cb14-40" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb14-41"><a href="#cb14-41" aria-hidden="true" tabindex="-1"></a><span class="co"># Show the proportion.</span></span>
<span id="cb14-42"><a href="#cb14-42" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="st">"Probability of 9 or 10 females occurring by chance:"</span>, kk)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code>Probability of 9 or 10 females occurring by chance: 0.009</code></pre>
</div>
<div class="cell-output-display">
<div class="quarto-figure quarto-figure-center">
<figure class="figure">
<p><img src="testing_counts_1_files/figure-html/unnamed-chunk-22-1.png" class="img-fluid quarto-figure quarto-figure-center figure-img" style="width:70.0%"></p>
</figure>
</div>
</div>
</div>
<p>We read from the result in variable <code>kk</code> that the probability of 9 or 10 females occurring by chance is a bit more than one percent.</p>
<div class="nb-end">

</div>
<div class="callout callout-style-default callout-note callout-titled">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon"></i>
</div>
<div class="callout-title-container flex-fill">
End of notebook: Female calf numbers simulation
</div>
</div>
<div class="callout-body-container callout-body">
<p><code>female_calves</code> starts at <a href="#nte-female_calves" class="quarto-xref">Note&nbsp;<span>21.3</span></a>.</p>
</div>
</div>
<!---
End of calves notebook.
-->
</section>
<section id="sec-contract-poll" class="level3" data-number="21.2.3">
<h3 data-number="21.2.3" class="anchored" data-anchor-id="sec-contract-poll"><span class="header-section-number">21.2.3</span> Example: A public-opinion poll</h3>
<p><strong>Is the Proportion of a Population Greater Than a Given Value?</strong></p>
<p>A municipal official wants to determine whether a majority of the town’s residents are for or against the awarding of a high-speed internet contract, and she asks you to take a poll. You judge that the voter registration records are a fair representation of the universe in which the politician was interested, and you therefore decided to interview a random selection of registered voters. Of a sample of fifty people who expressed opinions, thirty said “yes” they were for the plan and twenty said “no,” they were against it. How conclusively do the results show that the people in town want this internet contract?</p>
<p>Now comes some necessary subtle thinking in the interpretation of what seems like a simple problem. Notice that our aim in the analysis is to avoid the mistake of saying that the town favors the plan when in fact it does not favor the plan. Our chance of making this mistake is greatest when the voters are evenly split, so we choose as the benchmark (null) hypothesis that 50 percent of the town does not want the plan. This statement really means that “50 percent or more do not want the plan.” We could assess the probability of obtaining our result from a population that is split (say) 52-48 against, but such a probability would necessarily be even smaller, and we are primarily interested in assessing the maximum probability of being wrong. If the maximum probability of error turns out to be inconsequential, then we need not worry about less likely errors.</p>
<p>This problem is very much like the one-group fruit fly irradiation problem above. The only difference is that now we are comparing the observed sample against an arbitrary value of 50 percent (because that is the break-point in a situation where the majority decides) whereas in <a href="#sec-fruitfly" class="quarto-xref"><span>Section 21.2.1</span></a> we compared the observed sample against the normal population proportion (also 50 percent, because that is the normal proportion of males). But it really does not matter <em>why</em> we are comparing the observed sample to the figure of 50 percent; the procedure is the same in both cases. (Please notice that there is nothing special about the 50 percent figure; the same procedure would be followed for 20 percent or 85 percent.)</p>
<p>In brief, we a) take two pieces of paper, write “Yes” on one and “No” on the other, put them in a bucket b) draw a piece of paper from the bucket, record whether it was “Yes” or “No”, replace, and repeat 50 times c) count the number of “yeses” and “noes” in the fifty draws, c) repeat for perhaps a hundred trials, then d) count the proportion of the trials in which a 50-50 universe would produce thirty or more “yes” answers.</p>
<p>In operational steps, the procedure is as follows:</p>
<ul>
<li><strong>Step 1.</strong> “1-5” = no, “6-0” = yes.</li>
<li><strong>Step 2.</strong> In 50 random numbers, count the “yeses,” and record “false positive” if 30 or more “yeses.”</li>
<li><strong>Step 3.</strong> Repeat step 2 perhaps 100 times.</li>
<li><strong>Step 4.</strong> Calculate the proportion of experimental trials showing “false positive.” This estimates the probability that as many as 30 “yeses” would be observed by chance in a sample of 50 people <em>if</em> half (or more) are really against the plan.</li>
</ul>
<div class="cell" data-layout-align="center">
<div class="cell-output-display">
<div id="tbl-contract-trials" class="lightable-paper lightable-striped lightable-hover quarto-float quarto-figure quarto-figure-center anchored" data-quarto-postprocess="true" style="font-family: &quot;Arial Narrow&quot;, arial, helvetica, sans-serif; width: auto !important; margin-left: auto; margin-right: auto;">
<figure class="quarto-float quarto-float-tbl figure">
<figcaption class="quarto-float-caption-top quarto-float-caption quarto-float-tbl" id="tbl-contract-trials-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
Table&nbsp;21.2: Results from 20 random trials for contract poll problem
</figcaption>
<div aria-describedby="tbl-contract-trials-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
<table class="lightable-paper lightable-striped lightable-hover caption-top table table-sm table-striped small" data-quarto-postprocess="true">
<thead>
<tr class="header">
<th style="text-align: right;" data-quarto-table-cell-role="th">Trial no</th>
<th style="text-align: right;" data-quarto-table-cell-role="th"># of "Noes"</th>
<th style="text-align: right;" data-quarto-table-cell-role="th"># of "Yeses"</th>
<th style="text-align: left;" data-quarto-table-cell-role="th">&gt;= 30 "Yeses"</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td style="text-align: right;">1</td>
<td style="text-align: right;">25</td>
<td style="text-align: right;">25</td>
<td style="text-align: left;"></td>
</tr>
<tr class="even">
<td style="text-align: right;">2</td>
<td style="text-align: right;">23</td>
<td style="text-align: right;">27</td>
<td style="text-align: left;"></td>
</tr>
<tr class="odd">
<td style="text-align: right;">3</td>
<td style="text-align: right;">18</td>
<td style="text-align: right;">32</td>
<td style="text-align: left;">+</td>
</tr>
<tr class="even">
<td style="text-align: right;">4</td>
<td style="text-align: right;">33</td>
<td style="text-align: right;">17</td>
<td style="text-align: left;"></td>
</tr>
<tr class="odd">
<td style="text-align: right;">5</td>
<td style="text-align: right;">32</td>
<td style="text-align: right;">18</td>
<td style="text-align: left;"></td>
</tr>
<tr class="even">
<td style="text-align: right;">6</td>
<td style="text-align: right;">23</td>
<td style="text-align: right;">27</td>
<td style="text-align: left;"></td>
</tr>
<tr class="odd">
<td style="text-align: right;">7</td>
<td style="text-align: right;">28</td>
<td style="text-align: right;">22</td>
<td style="text-align: left;"></td>
</tr>
<tr class="even">
<td style="text-align: right;">8</td>
<td style="text-align: right;">28</td>
<td style="text-align: right;">22</td>
<td style="text-align: left;"></td>
</tr>
<tr class="odd">
<td style="text-align: right;">9</td>
<td style="text-align: right;">22</td>
<td style="text-align: right;">28</td>
<td style="text-align: left;"></td>
</tr>
<tr class="even">
<td style="text-align: right;">10</td>
<td style="text-align: right;">30</td>
<td style="text-align: right;">20</td>
<td style="text-align: left;"></td>
</tr>
<tr class="odd">
<td style="text-align: right;">11</td>
<td style="text-align: right;">22</td>
<td style="text-align: right;">28</td>
<td style="text-align: left;"></td>
</tr>
<tr class="even">
<td style="text-align: right;">12</td>
<td style="text-align: right;">28</td>
<td style="text-align: right;">22</td>
<td style="text-align: left;"></td>
</tr>
<tr class="odd">
<td style="text-align: right;">13</td>
<td style="text-align: right;">26</td>
<td style="text-align: right;">24</td>
<td style="text-align: left;"></td>
</tr>
<tr class="even">
<td style="text-align: right;">14</td>
<td style="text-align: right;">20</td>
<td style="text-align: right;">30</td>
<td style="text-align: left;">+</td>
</tr>
<tr class="odd">
<td style="text-align: right;">15</td>
<td style="text-align: right;">27</td>
<td style="text-align: right;">23</td>
<td style="text-align: left;"></td>
</tr>
<tr class="even">
<td style="text-align: right;">16</td>
<td style="text-align: right;">19</td>
<td style="text-align: right;">31</td>
<td style="text-align: left;">+</td>
</tr>
<tr class="odd">
<td style="text-align: right;">17</td>
<td style="text-align: right;">26</td>
<td style="text-align: right;">24</td>
<td style="text-align: left;"></td>
</tr>
<tr class="even">
<td style="text-align: right;">18</td>
<td style="text-align: right;">27</td>
<td style="text-align: right;">23</td>
<td style="text-align: left;"></td>
</tr>
<tr class="odd">
<td style="text-align: right;">19</td>
<td style="text-align: right;">20</td>
<td style="text-align: right;">30</td>
<td style="text-align: left;">+</td>
</tr>
<tr class="even">
<td style="text-align: right;">20</td>
<td style="text-align: right;">24</td>
<td style="text-align: right;">26</td>
<td style="text-align: left;"></td>
</tr>
</tbody>
</table>
</div>
</figure>
</div>


</div>
</div>
<p>In <a href="#tbl-contract-trials" class="quarto-xref">Table&nbsp;<span>21.2</span></a>, we see the results of twenty trials; 4 of 20 times (20 percent), 30 or more “yeses” were observed by chance. So our “significance level” or “probability value” is 20 percent, which is normally too high to feel confident that our poll results are reliable. This is the probability that as many as thirty of fifty people would say “yes” by chance if the population were “really” split evenly. (If the population were split so that <em>more</em> than 50 percent were against the plan, the probability would be even <em>less</em> that the observed results would occur by chance. In this sense, the benchmark hypothesis is conservative). On the other hand, if we had been counting the number of times there are 30 or more “No” votes that, in our setup, have the same odds as to 30 or more “Yes” votes, there would have been three. This indicates how samples can vary just by chance.</p>
<p>Taken together, the evidence suggests that the official would be wise not to place very much confidence in the poll results, but rather ought to act with caution or else take a larger sample of voters.</p>
<div id="nte-contract_poll" class="callout callout-style-default callout-note callout-titled">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon"></i>
</div>
<div class="callout-title-container flex-fill">
Note&nbsp;21.4: Notebook: Contract poll simulation
</div>
</div>
<div class="callout-body-container callout-body">
<div class="nb-links">
<p><a class="notebook-link" href="notebooks/contract_poll.ipynb">Download notebook</a> <a class="interact-button" href="./interact/lab/index.html?path=contract_poll.ipynb">Interact</a></p>
</div>
</div>
</div>
<div class="nb-start" name="contract_poll" title="Contract poll simulation">

</div>
<p>This Python notebook generates samples of 50 simulated voters on the assumption that only 50 percent are in favor of the contract. Then it counts (<code>sum</code>s) the number of samples where over 29 (30 or more) of the 50 respondents said they were in favor of the contract. (That is, we use a “one-tailed test.”) The result in the <code>kk</code> variable is the chance of a “false positive,” that is, 30 or more people saying they favor a contract when support for the proposal is actually split evenly down the middle.</p>
<div class="cell" data-layout-align="center">
<div class="sourceCode cell-code" id="cb16"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
<span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> matplotlib.pyplot <span class="im">as</span> plt</span>
<span id="cb16-3"><a href="#cb16-3" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb16-4"><a href="#cb16-4" aria-hidden="true" tabindex="-1"></a>rnd <span class="op">=</span> np.random.default_rng()</span>
<span id="cb16-5"><a href="#cb16-5" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb16-6"><a href="#cb16-6" aria-hidden="true" tabindex="-1"></a><span class="co"># We will do 10,000 iterations.</span></span>
<span id="cb16-7"><a href="#cb16-7" aria-hidden="true" tabindex="-1"></a>n <span class="op">=</span> <span class="dv">10_000</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<div class="python">
<div id="nte-integer-underscores" class="callout callout-style-default callout-note callout-titled">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon"></i>
</div>
<div class="callout-title-container flex-fill">
Note&nbsp;21.5: Underscores in Python integers
</div>
</div>
<div class="callout-body-container callout-body">
<p>Notice the line above: <code>n = 10_000</code>. This is an alternative way of writing the familiar <code>n = 10000</code>. Python allows underscores among the digits when we type an integer — it will treat them as decoration, and ignore them in resolving the number we intend. This means that all these are equivalent to Python:</p>
<div class="sourceCode cell-code" id="cb17"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a><span class="dv">10000</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="sourceCode cell-code" id="cb18"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a><span class="dv">10_000</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>or even:</p>
<div class="sourceCode cell-code" id="cb19"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a><span class="dv">1_00_00</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>For the same reason, we could also write 1000 as:</p>
<div class="sourceCode cell-code" id="cb20"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a><span class="dv">1_000</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>The underscores make no difference to the resulting number in Python; it is up to us whether we use them. They can make it easier for us humans to read the value.</p>
</div>
</div>
<!---
End of callout note.
-->
</div>
<!---
End of Python block.
-->
<div class="cell" data-layout-align="center">
<div class="sourceCode cell-code" id="cb21"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Make an array of integers to store the "Yes" counts.</span></span>
<span id="cb21-2"><a href="#cb21-2" aria-hidden="true" tabindex="-1"></a>yeses <span class="op">=</span> np.zeros(n, dtype<span class="op">=</span><span class="bu">int</span>)</span>
<span id="cb21-3"><a href="#cb21-3" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb21-4"><a href="#cb21-4" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> i <span class="kw">in</span> <span class="bu">range</span>(n):</span>
<span id="cb21-5"><a href="#cb21-5" aria-hidden="true" tabindex="-1"></a>    answers <span class="op">=</span> rnd.choice([<span class="st">'No'</span>, <span class="st">'Yes'</span>], size<span class="op">=</span><span class="dv">50</span>)</span>
<span id="cb21-6"><a href="#cb21-6" aria-hidden="true" tabindex="-1"></a>    yeses[i] <span class="op">=</span> np.<span class="bu">sum</span>(answers <span class="op">==</span> <span class="st">'Yes'</span>)</span>
<span id="cb21-7"><a href="#cb21-7" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb21-8"><a href="#cb21-8" aria-hidden="true" tabindex="-1"></a><span class="co"># Produce a histogram of the trial results.</span></span>
<span id="cb21-9"><a href="#cb21-9" aria-hidden="true" tabindex="-1"></a><span class="co"># Use integer bins for histogram, from 10 through 40.</span></span>
<span id="cb21-10"><a href="#cb21-10" aria-hidden="true" tabindex="-1"></a>plt.hist(yeses, bins<span class="op">=</span><span class="bu">range</span>(<span class="dv">10</span>, <span class="dv">41</span>))</span>
<span id="cb21-11"><a href="#cb21-11" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">'Number of yes votes out of 50, in null universe'</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<div class="quarto-figure quarto-figure-center">
<figure class="figure">
<p><img src="testing_counts_1_files/figure-html/unnamed-chunk-31-1.png" class="img-fluid quarto-figure quarto-figure-center figure-img" style="width:70.0%"></p>
</figure>
</div>
</div>
</div>
<p>In the histogram above, we see that about 11 percent of our trials had 30 or more voters in favor, despite the fact that they were drawn from a population that was split 50-50. Python will calculate this proportion directly if we add the following commands to the above:</p>
<div class="cell" data-layout-align="center">
<div class="sourceCode cell-code" id="cb22"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a>k <span class="op">=</span> np.<span class="bu">sum</span>(yeses <span class="op">&gt;=</span> <span class="dv">30</span>)</span>
<span id="cb22-2"><a href="#cb22-2" aria-hidden="true" tabindex="-1"></a>kk <span class="op">=</span> k <span class="op">/</span> n</span>
<span id="cb22-3"><a href="#cb22-3" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="st">'Proportion &gt;= 30:'</span>, np.<span class="bu">round</span>(kk, <span class="dv">2</span>))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code>Proportion &gt;= 30: 0.1</code></pre>
</div>
</div>
<div class="nb-end">

</div>
<div class="callout callout-style-default callout-note callout-titled">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon"></i>
</div>
<div class="callout-title-container flex-fill">
End of notebook: Contract poll simulation
</div>
</div>
<div class="callout-body-container callout-body">
<p><code>contract_poll</code> starts at <a href="#nte-contract_poll" class="quarto-xref">Note&nbsp;<span>21.4</span></a>.</p>
</div>
</div>
<!---
End of contract_poll notebook.
-->
<p>The section above discusses testing hypotheses about a single sample of counted data relative to a benchmark universe. This section discusses the issue of whether <em>two</em> samples with counted data should be considered the same or different.</p>
</section>
<section id="example-did-the-trump-clinton-poll-indicate-that-trump-would-win" class="level3" data-number="21.2.4">
<h3 data-number="21.2.4" class="anchored" data-anchor-id="example-did-the-trump-clinton-poll-indicate-that-trump-would-win"><span class="header-section-number">21.2.4</span> Example: Did the Trump-Clinton poll indicate that Trump would win?</h3>
<div id="nte-trump_clinton" class="callout callout-style-default callout-note callout-titled">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon"></i>
</div>
<div class="callout-title-container flex-fill">
Note&nbsp;21.6: Notebook: Trump/Clinton poll simulation
</div>
</div>
<div class="callout-body-container callout-body">
<div class="nb-links">
<p><a class="notebook-link" href="notebooks/trump_clinton.ipynb">Download notebook</a> <a class="interact-button" href="./interact/lab/index.html?path=trump_clinton.ipynb">Interact</a></p>
</div>
</div>
</div>
<div class="nb-start" name="trump_clinton" title="Trump/Clinton poll simulation">

</div>
<p>What is the probability that a sample outcome such as actually observed (840 Trump, 660 Clinton) would occur by chance if Clinton is “really” ahead — that is, if Clinton has 50 percent (or more) of the support? To restate in sharper statistical language: What is the probability that the observed sample or one even more favorable to Trump would occur if the universe has a mean of 50 percent or below?</p>
<p>Here is a procedure that responds to that question:</p>
<ol type="1">
<li>Create a benchmark universe with one ball marked “Trump” and another marked “Clinton”</li>
<li>Draw a ball, record its marking, and replace. (We sample with replacement to simulate the practically-infinite population of U. S. voters.)</li>
<li>Repeat step 2 1500 times and count the number of “Trump”s. If 840 or greater, record “Y”; otherwise, record “N.”</li>
<li>Repeat steps 3 and 4 perhaps 1000 or 10,000 times, and count the number of “Y”s. The outcome estimates the probability that 840 or more Trump choices would occur if the universe is “really” half or more in favor of Clinton.</li>
</ol>
<p>Before we come to the simulation, we need some new code to tune our histograms (see <a href="probability_theory_3.html#sec-on-histograms" class="quarto-xref"><span>Section 12.15.2</span></a>). We are going to set the bins for the histogram using advanced ranges.</p>
<div id="nte-advanced-ranges" class="callout callout-style-default callout-note callout-titled">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon"></i>
</div>
<div class="callout-title-container flex-fill">
Note&nbsp;21.7: Advanced ranges
</div>
</div>
<div class="callout-body-container callout-body">
<p>So far (<a href="resampling_with_code.html#sec-ranges" class="quarto-xref"><span>Section 5.9</span></a>) we have used <code>np.arange</code> to make regular sequences of integers. For example, to make an array of the sequential integers from 3 through 12, we could use:</p>
<div class="sourceCode cell-code" id="cb24"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a>np.arange(<span class="dv">3</span>, <span class="dv">13</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>Sometimes we want to be able to specify a step size — the gap between the numbers in the sequence. In the sequence above, the gap (step) between each number is 1. We might want some other step size. To create a sequence of integers from 3 through 33 in steps of 5, we could write:</p>
<div class="sourceCode cell-code" id="cb25"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb25-1"><a href="#cb25-1" aria-hidden="true" tabindex="-1"></a>np.arange(<span class="dv">3</span>, <span class="dv">34</span>, step<span class="op">=</span><span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>Read this as “give me the sequence (range) of numbers, starting at 3, up to but not including 34, in steps of 5.</p>
<p>So far we have used integers as the start, stop and step values, but we could also use floating point values. For example, to get a sequence of values starting at 0.1 up to and including 0.9, in steps of 0.2:</p>
<div class="sourceCode cell-code" id="cb26"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb26-1"><a href="#cb26-1" aria-hidden="true" tabindex="-1"></a>np.arange(<span class="fl">0.1</span>, <span class="dv">1</span>, step<span class="op">=</span><span class="fl">0.2</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
</div>
<!---
End of callout note.
-->
<p>With that background, we can proceed with the Python implementation of the simulation procedure.</p>
<div class="cell" data-layout-align="center">
<div class="sourceCode cell-code" id="cb27"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb27-1"><a href="#cb27-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
<span id="cb27-2"><a href="#cb27-2" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> matplotlib.pyplot <span class="im">as</span> plt</span>
<span id="cb27-3"><a href="#cb27-3" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb27-4"><a href="#cb27-4" aria-hidden="true" tabindex="-1"></a>rnd <span class="op">=</span> np.random.default_rng()</span>
<span id="cb27-5"><a href="#cb27-5" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb27-6"><a href="#cb27-6" aria-hidden="true" tabindex="-1"></a><span class="co"># Number of repeats we will run.</span></span>
<span id="cb27-7"><a href="#cb27-7" aria-hidden="true" tabindex="-1"></a>n <span class="op">=</span> <span class="dv">10_000</span></span>
<span id="cb27-8"><a href="#cb27-8" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb27-9"><a href="#cb27-9" aria-hidden="true" tabindex="-1"></a><span class="co"># Make an integer array to store the counts.</span></span>
<span id="cb27-10"><a href="#cb27-10" aria-hidden="true" tabindex="-1"></a>trumps <span class="op">=</span> np.zeros(n, dtype<span class="op">=</span><span class="bu">int</span>)</span>
<span id="cb27-11"><a href="#cb27-11" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb27-12"><a href="#cb27-12" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> i <span class="kw">in</span> <span class="bu">range</span>(n):</span>
<span id="cb27-13"><a href="#cb27-13" aria-hidden="true" tabindex="-1"></a>    votes <span class="op">=</span> rnd.choice([<span class="st">'Trump'</span>, <span class="st">'Clinton'</span>], size<span class="op">=</span><span class="dv">1500</span>)</span>
<span id="cb27-14"><a href="#cb27-14" aria-hidden="true" tabindex="-1"></a>    trumps[i] <span class="op">=</span> np.<span class="bu">sum</span>(votes <span class="op">==</span> <span class="st">'Trump'</span>)</span>
<span id="cb27-15"><a href="#cb27-15" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb27-16"><a href="#cb27-16" aria-hidden="true" tabindex="-1"></a><span class="co"># Integer bins from 670 through 830 in steps of 5.</span></span>
<span id="cb27-17"><a href="#cb27-17" aria-hidden="true" tabindex="-1"></a>plt.hist(trumps, bins<span class="op">=</span><span class="bu">range</span>(<span class="dv">670</span>, <span class="dv">831</span>, <span class="dv">5</span>))</span>
<span id="cb27-18"><a href="#cb27-18" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">'Number of Trump voters of 1500 in null-world simulation'</span>)</span>
<span id="cb27-19"><a href="#cb27-19" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb27-20"><a href="#cb27-20" aria-hidden="true" tabindex="-1"></a><span class="co"># How often &gt;= 840 Trump votes in random draw?</span></span>
<span id="cb27-21"><a href="#cb27-21" aria-hidden="true" tabindex="-1"></a>k <span class="op">=</span> np.<span class="bu">sum</span>(trumps <span class="op">&gt;=</span> <span class="dv">840</span>)</span>
<span id="cb27-22"><a href="#cb27-22" aria-hidden="true" tabindex="-1"></a><span class="co"># As a proportion of simulated resamples.</span></span>
<span id="cb27-23"><a href="#cb27-23" aria-hidden="true" tabindex="-1"></a>kk <span class="op">=</span> k <span class="op">/</span> n</span>
<span id="cb27-24"><a href="#cb27-24" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb27-25"><a href="#cb27-25" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="st">'Proportion voting for Trump:'</span>, kk)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code>Proportion voting for Trump: 0.0</code></pre>
</div>
<div class="cell-output-display">
<div class="quarto-figure quarto-figure-center">
<figure class="figure">
<p><img src="testing_counts_1_files/figure-html/unnamed-chunk-42-1.png" class="img-fluid quarto-figure quarto-figure-center figure-img" style="width:70.0%"></p>
</figure>
</div>
</div>
</div>
<p>The value for <code>kk</code> is our estimate of the probability that Trump’s “victory” in the sample would occur by chance if he really were behind. In this case, our probability estimate is less than 1 in 10,000 (&lt; 0.0001).</p>
<div class="nb-end">

</div>
<div class="callout callout-style-default callout-note callout-titled">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon"></i>
</div>
<div class="callout-title-container flex-fill">
End of notebook: Trump/Clinton poll simulation
</div>
</div>
<div class="callout-body-container callout-body">
<p><code>trump_clinton</code> starts at <a href="#nte-trump_clinton" class="quarto-xref">Note&nbsp;<span>21.6</span></a>.</p>
</div>
</div>
<!---
End of Trump / Clinton notebook.
-->
<!---
The results of the various runs may be seen in the histogram and printout
following.

**Bin Center Freq Pct Cum Pct**

  ------- ----- ------ ------
  0.465   4     0.4    0.4
  0.47    14    1.4    1.8
  0.475   28    2.8    4.6
  0.48    46    4.6    9.2
  0.485   97    9.7    18.9
  0.49    106   10.6   29.5
  0.495   159   15.9   45.4
  0.5     121   12.1   57.5
  0.505   157   15.7   73.2
  0.51    102   10.2   83.4
  0.515   75    7.5    90.9
  0.52    39    3.9    94.8
  0.525   35    3.5    98.3
  0.53    11    1.1    99.4
  0.535   5     0.5    99.9
  0.54    1     0.1    100
  ------- ----- ------ ------
-->
</section>
<section id="sec-cancer-cures" class="level3" data-number="21.2.5">
<h3 data-number="21.2.5" class="anchored" data-anchor-id="sec-cancer-cures"><span class="header-section-number">21.2.5</span> Example: Comparison of possible cancer cure to placebo</h3>
<p><strong>Do Two Binomial Populations Differ in Their Proportions</strong>.</p>
<p><a href="#sec-fruitfly" class="quarto-xref"><span>Section 21.2.1</span></a> used an observed sample of male and female fruitflies to test the benchmark (null) hypothesis that the flies came from a universe with a one-to-one sex ratio, and the poll data problem also compared results to a 50-50 hypothesis. The calves problem also compared the results to a single benchmark universe — a proportion of 100/206 females. Now we want to compare <em>two samples with each other</em>, rather than comparing one sample with a hypothesized universe. That is, in this example we are not comparing one sample to a benchmark universe, but rather asking whether <em>both</em> samples come from the <em>same</em> universe. The universe from which both samples come, <em>if</em> both belong to the same universe, may be thought of as the benchmark universe, in this case.</p>
<p>The scientific question is whether pill P cures a rare cancer. A researcher gave pill P to six patients selected randomly from a group of twelve cancer patients; of the six, five got well. He gave an inactive placebo to the other six patients, and two of them got well. Does the evidence justify a conclusion that the pill has a curative effect?</p>
<p>(An identical statistical example would serve for an experiment on methods of teaching reading to children. In such a situation the researcher would respond to inconclusive results by running the experiment on more subjects, but in cases like the cancer-pill example the researcher often cannot obtain more subjects.)</p>
<p>We can answer the stated question by <em>combining</em> the two samples and testing both samples against the resulting combined universe. In this case, the universe is twelve subjects, seven (5 + 2) of whom got well. How likely would such a universe produce two samples as far apart as five of six, and two of six, patients who get well? In other words, how often will two samples of six subjects, each drawn from a universe in which 7/12 of the patients get well, be as far apart as 5 - 2 = 3 patients in favor of the sample designated “pill”? This is obviously a one-tail test, for we have no reason to believe that the pill group might do <em>less</em> well than the placebo group.</p>
<p>We might construct a twelve-sided die, seven of whose sides are marked “get well.” Or put 12 pieces of paper in a bucket, seven with “get well” and five with “not well”. Or we would use pairs of numbers from the random-number table, with numbers “01-07” corresponding to get well, numbers “08-12” corresponding to “not get well,” and all other numbers omitted. (If you wish to save time, you can work out a system that uses more numbers and skips fewer, but that is up to you.) Designate the first six subjects “pill” and the next six subjects “placebo.”</p>
<p>The specific procedure might be as follows:</p>
<ul>
<li><strong>Step 1.</strong> Write “get well” on seven pieces of paper, “not well” on another five. Put the 12 pieces of paper into a bucket.</li>
<li><strong>Step 2.</strong> Select two groups, “pill” and “placebo”, each with six random draws (with replacement) from the 12 pieces of paper.</li>
<li><strong>Step 3.</strong> Record how many “get well” in each group.</li>
<li><strong>Step 4.</strong> Subtract the result in group “placebo” from that in group “pill” (the difference may be negative).</li>
<li><strong>Step 5.</strong> Repeat steps 1-4 perhaps 100 times.</li>
<li><strong>Step 6.</strong> Compute the proportion of trials in which the pill does better by three or more cases.</li>
</ul>
<div class="cell" data-layout-align="center">
<div class="cell-output-display">
<div id="tbl-pill-trials" class="lightable-paper lightable-striped lightable-hover quarto-float quarto-figure quarto-figure-center anchored" data-quarto-postprocess="true" style="font-family: &quot;Arial Narrow&quot;, arial, helvetica, sans-serif; width: auto !important; margin-left: auto; margin-right: auto;">
<figure class="quarto-float quarto-float-tbl figure">
<figcaption class="quarto-float-caption-top quarto-float-caption quarto-float-tbl" id="tbl-pill-trials-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
Table&nbsp;21.3: Results from 25 random trials for pill/placebo
</figcaption>
<div aria-describedby="tbl-pill-trials-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
<table class="lightable-paper lightable-striped lightable-hover caption-top table table-sm table-striped small" data-quarto-postprocess="true">
<thead>
<tr class="header">
<th style="text-align: right;" data-quarto-table-cell-role="th">Trial no</th>
<th style="text-align: right;" data-quarto-table-cell-role="th"># of pill cures</th>
<th style="text-align: right;" data-quarto-table-cell-role="th"># of placebo cures</th>
<th style="text-align: right;" data-quarto-table-cell-role="th">Difference</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td style="text-align: right;">1</td>
<td style="text-align: right;">2</td>
<td style="text-align: right;">3</td>
<td style="text-align: right;">-1</td>
</tr>
<tr class="even">
<td style="text-align: right;">2</td>
<td style="text-align: right;">4</td>
<td style="text-align: right;">3</td>
<td style="text-align: right;">1</td>
</tr>
<tr class="odd">
<td style="text-align: right;">3</td>
<td style="text-align: right;">5</td>
<td style="text-align: right;">2</td>
<td style="text-align: right;">3</td>
</tr>
<tr class="even">
<td style="text-align: right;">4</td>
<td style="text-align: right;">3</td>
<td style="text-align: right;">3</td>
<td style="text-align: right;">0</td>
</tr>
<tr class="odd">
<td style="text-align: right;">5</td>
<td style="text-align: right;">5</td>
<td style="text-align: right;">2</td>
<td style="text-align: right;">3</td>
</tr>
<tr class="even">
<td style="text-align: right;">6</td>
<td style="text-align: right;">4</td>
<td style="text-align: right;">4</td>
<td style="text-align: right;">0</td>
</tr>
<tr class="odd">
<td style="text-align: right;">7</td>
<td style="text-align: right;">3</td>
<td style="text-align: right;">3</td>
<td style="text-align: right;">0</td>
</tr>
<tr class="even">
<td style="text-align: right;">8</td>
<td style="text-align: right;">3</td>
<td style="text-align: right;">3</td>
<td style="text-align: right;">0</td>
</tr>
<tr class="odd">
<td style="text-align: right;">9</td>
<td style="text-align: right;">3</td>
<td style="text-align: right;">3</td>
<td style="text-align: right;">0</td>
</tr>
<tr class="even">
<td style="text-align: right;">10</td>
<td style="text-align: right;">4</td>
<td style="text-align: right;">5</td>
<td style="text-align: right;">-1</td>
</tr>
<tr class="odd">
<td style="text-align: right;">11</td>
<td style="text-align: right;">4</td>
<td style="text-align: right;">5</td>
<td style="text-align: right;">-1</td>
</tr>
<tr class="even">
<td style="text-align: right;">12</td>
<td style="text-align: right;">3</td>
<td style="text-align: right;">4</td>
<td style="text-align: right;">-1</td>
</tr>
<tr class="odd">
<td style="text-align: right;">13</td>
<td style="text-align: right;">0</td>
<td style="text-align: right;">3</td>
<td style="text-align: right;">-3</td>
</tr>
<tr class="even">
<td style="text-align: right;">14</td>
<td style="text-align: right;">5</td>
<td style="text-align: right;">4</td>
<td style="text-align: right;">1</td>
</tr>
<tr class="odd">
<td style="text-align: right;">15</td>
<td style="text-align: right;">3</td>
<td style="text-align: right;">3</td>
<td style="text-align: right;">0</td>
</tr>
<tr class="even">
<td style="text-align: right;">16</td>
<td style="text-align: right;">5</td>
<td style="text-align: right;">3</td>
<td style="text-align: right;">2</td>
</tr>
<tr class="odd">
<td style="text-align: right;">17</td>
<td style="text-align: right;">5</td>
<td style="text-align: right;">1</td>
<td style="text-align: right;">4</td>
</tr>
<tr class="even">
<td style="text-align: right;">18</td>
<td style="text-align: right;">3</td>
<td style="text-align: right;">4</td>
<td style="text-align: right;">-1</td>
</tr>
<tr class="odd">
<td style="text-align: right;">19</td>
<td style="text-align: right;">4</td>
<td style="text-align: right;">2</td>
<td style="text-align: right;">2</td>
</tr>
<tr class="even">
<td style="text-align: right;">20</td>
<td style="text-align: right;">2</td>
<td style="text-align: right;">4</td>
<td style="text-align: right;">-2</td>
</tr>
<tr class="odd">
<td style="text-align: right;">21</td>
<td style="text-align: right;">2</td>
<td style="text-align: right;">6</td>
<td style="text-align: right;">-4</td>
</tr>
<tr class="even">
<td style="text-align: right;">22</td>
<td style="text-align: right;">5</td>
<td style="text-align: right;">5</td>
<td style="text-align: right;">0</td>
</tr>
<tr class="odd">
<td style="text-align: right;">23</td>
<td style="text-align: right;">4</td>
<td style="text-align: right;">5</td>
<td style="text-align: right;">-1</td>
</tr>
<tr class="even">
<td style="text-align: right;">24</td>
<td style="text-align: right;">3</td>
<td style="text-align: right;">3</td>
<td style="text-align: right;">0</td>
</tr>
<tr class="odd">
<td style="text-align: right;">25</td>
<td style="text-align: right;">4</td>
<td style="text-align: right;">5</td>
<td style="text-align: right;">-1</td>
</tr>
</tbody>
</table>
</div>
</figure>
</div>


</div>
</div>
<p>In the trials shown in <a href="#tbl-pill-trials" class="quarto-xref">Table&nbsp;<span>21.3</span></a>, in three cases (12 percent) the difference between the randomly-drawn groups is three cases or greater. Apparently it is <em>somewhat</em> unusual — it happens 12 percent of the time — for this universe to generate “pill” samples in which the number of recoveries exceeds the number in the “placebo” samples by three or more. Therefore the answer to the scientific question, based on these samples, is that there is <em>some</em> reason to think that the medicine does have a favorable effect. But the investigator might sensibly await more data before reaching a firm conclusion about the pill’s efficiency, given the 12 percent probability.</p>
<div id="nte-pill_placebo" class="callout callout-style-default callout-note callout-titled">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon"></i>
</div>
<div class="callout-title-container flex-fill">
Note&nbsp;21.8: Notebook: Cures for pill vs placebo
</div>
</div>
<div class="callout-body-container callout-body">
<div class="nb-links">
<p><a class="notebook-link" href="notebooks/pill_placebo.ipynb">Download notebook</a> <a class="interact-button" href="./interact/lab/index.html?path=pill_placebo.ipynb">Interact</a></p>
</div>
</div>
</div>
<div class="nb-start" name="pill_placebo" title="Cures for pill vs placebo">

</div>
<p>Now for a Python solution. Again, the benchmark hypothesis is that pill P has no effect, and we ask how often, on this assumption, the results that were obtained from the actual test of the pill would occur by chance.</p>
<p>Given that in the test 7 of 12 patients overall got well, the benchmark hypothesis assumes 7/12 to be the chances of any random patient being cured. We generate two similar samples of 6 patients, both taken from the same universe composed of the combined samples — the bootstrap procedure. We count (<code>sum</code>) the number who are “get well” in each sample. Then we subtract the number who got well in the “pill” sample from the number who got well in the “no-pill” sample. We record the resulting difference for each trial in the variable <code>pill_betters</code>.</p>
<p>In the actual test, 3 more patients got well in the sample given the pill than in the sample given the placebo. We therefore count how many of the trials yield results where the difference between the sample given the pill and the sample not given the pill was greater than 2 (equal to or greater than 3). This result is the probability that the results derived from the actual test would be obtained from random samples drawn from a population which has a constant cure rate, pill or no pill.</p>
<div class="cell" data-layout-align="center">
<div class="sourceCode cell-code" id="cb29"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb29-1"><a href="#cb29-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
<span id="cb29-2"><a href="#cb29-2" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> matplotlib.pyplot <span class="im">as</span> plt</span>
<span id="cb29-3"><a href="#cb29-3" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb29-4"><a href="#cb29-4" aria-hidden="true" tabindex="-1"></a>rnd <span class="op">=</span> np.random.default_rng()</span>
<span id="cb29-5"><a href="#cb29-5" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb29-6"><a href="#cb29-6" aria-hidden="true" tabindex="-1"></a><span class="co"># The bucket with the pieces of paper.</span></span>
<span id="cb29-7"><a href="#cb29-7" aria-hidden="true" tabindex="-1"></a>options <span class="op">=</span> np.repeat([<span class="st">'get well'</span>, <span class="st">'not well'</span>], [<span class="dv">7</span>, <span class="dv">5</span>])</span>
<span id="cb29-8"><a href="#cb29-8" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb29-9"><a href="#cb29-9" aria-hidden="true" tabindex="-1"></a>n <span class="op">=</span> <span class="dv">10_000</span></span>
<span id="cb29-10"><a href="#cb29-10" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb29-11"><a href="#cb29-11" aria-hidden="true" tabindex="-1"></a>pill_betters <span class="op">=</span> np.zeros(n, dtype<span class="op">=</span><span class="bu">int</span>)</span>
<span id="cb29-12"><a href="#cb29-12" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb29-13"><a href="#cb29-13" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> i <span class="kw">in</span> <span class="bu">range</span>(n):</span>
<span id="cb29-14"><a href="#cb29-14" aria-hidden="true" tabindex="-1"></a>    pill <span class="op">=</span> rnd.choice(options, size<span class="op">=</span><span class="dv">6</span>)</span>
<span id="cb29-15"><a href="#cb29-15" aria-hidden="true" tabindex="-1"></a>    pill_cures <span class="op">=</span> np.<span class="bu">sum</span>(pill <span class="op">==</span> <span class="st">'get well'</span>)</span>
<span id="cb29-16"><a href="#cb29-16" aria-hidden="true" tabindex="-1"></a>    placebo <span class="op">=</span> rnd.choice(options, size<span class="op">=</span><span class="dv">6</span>)</span>
<span id="cb29-17"><a href="#cb29-17" aria-hidden="true" tabindex="-1"></a>    placebo_cures <span class="op">=</span> np.<span class="bu">sum</span>(placebo <span class="op">==</span> <span class="st">'get well'</span>)</span>
<span id="cb29-18"><a href="#cb29-18" aria-hidden="true" tabindex="-1"></a>    pill_betters[i] <span class="op">=</span> pill_cures <span class="op">-</span> placebo_cures</span>
<span id="cb29-19"><a href="#cb29-19" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb29-20"><a href="#cb29-20" aria-hidden="true" tabindex="-1"></a>plt.hist(pill_betters, bins<span class="op">=</span><span class="bu">range</span>(<span class="op">-</span><span class="dv">6</span>, <span class="dv">7</span>))</span>
<span id="cb29-21"><a href="#cb29-21" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">'Number of extra cures pill vs placebo in null universe'</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<div class="quarto-figure quarto-figure-center">
<figure class="figure">
<p><img src="testing_counts_1_files/figure-html/unnamed-chunk-46-1.png" class="img-fluid quarto-figure quarto-figure-center figure-img" style="width:70.0%"></p>
</figure>
</div>
</div>
</div>
<p>Recall our actual observed results: In the medicine group, three more patients were cured than in the placebo group. From the histogram, we see that in only about 8 percent of the simulated trials did the “medicine” group do as well or better. The results seem to suggest — but by no means conclusively — that the medicine’s performance is not due to chance. Further study would probably be warranted. The following commands added to the above program will calculate this proportion directly:</p>
<div class="cell" data-layout-align="center">
<div class="sourceCode cell-code" id="cb30"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb30-1"><a href="#cb30-1" aria-hidden="true" tabindex="-1"></a><span class="co"># How many trials gave an advantage of 3 or greater to the pill?</span></span>
<span id="cb30-2"><a href="#cb30-2" aria-hidden="true" tabindex="-1"></a>k <span class="op">=</span> np.<span class="bu">sum</span>(pill_betters <span class="op">&gt;=</span> <span class="dv">3</span>)</span>
<span id="cb30-3"><a href="#cb30-3" aria-hidden="true" tabindex="-1"></a><span class="co"># Convert to a proportion.</span></span>
<span id="cb30-4"><a href="#cb30-4" aria-hidden="true" tabindex="-1"></a>kk <span class="op">=</span> k <span class="op">/</span> n</span>
<span id="cb30-5"><a href="#cb30-5" aria-hidden="true" tabindex="-1"></a><span class="co"># Print the result.</span></span>
<span id="cb30-6"><a href="#cb30-6" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="st">'Proportion with advantage of 3 or more for pill:'</span>,</span>
<span id="cb30-7"><a href="#cb30-7" aria-hidden="true" tabindex="-1"></a>      np.<span class="bu">round</span>(kk, <span class="dv">2</span>))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code>Proportion with advantage of 3 or more for pill: 0.07</code></pre>
</div>
</div>
<div class="nb-end">

</div>
<div class="callout callout-style-default callout-note callout-titled">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon"></i>
</div>
<div class="callout-title-container flex-fill">
End of notebook: Cures for pill vs placebo
</div>
</div>
<div class="callout-body-container callout-body">
<p><code>pill_placebo</code> starts at <a href="#nte-pill_placebo" class="quarto-xref">Note&nbsp;<span>21.8</span></a>.</p>
</div>
</div>
<!---
End of pill vs placebo notebook.
-->
<p>As I (JLS) wrote when I first proposed this bootstrap method in 1969, this method is not the standard way of handling the problem; it is not even analogous to the standard analytic difference-of-proportions method (though since then it has become widely accepted). Though the method shown is quite direct and satisfactory, there are also <em>many other</em> resampling methods that one might construct to solve the same problem. By all means, invent your own statistics rather than simply trying to copy the methods described here; the examples given here only illustrate the process of inventing statistics rather than offering solutions for all classes of problems.</p>
</section>
<section id="example-did-attitudes-about-marijuana-change" class="level3" data-number="21.2.6">
<h3 data-number="21.2.6" class="anchored" data-anchor-id="example-did-attitudes-about-marijuana-change"><span class="header-section-number">21.2.6</span> Example: Did attitudes about marijuana change?</h3>
<!---
Example 8.18 in W+W 1990.  Looks like fake data.
-->
<p>Consider two polls, each asking 1500 Americans about marijuana legalization. One poll, taken in 1980, found 52 percent of respondents in favor of decriminalization; the other, taken in 1985, found 46 percent in favor of decriminalization <span class="citation" data-cites="wonnacott1990introductory">(<a href="references.html#ref-wonnacott1990introductory" role="doc-biblioref">Wonnacott and Wonnacott 1990, 275</a>)</span>. Our null (benchmark) hypothesis is that both samples came from the same universe (the universe made up of the total of the two sets of observations). If so, let us then ask how likely would be two polls to produce results as different as were observed? Hence we construct a universe with a mean of 49 percent (the mean of the two polls of 52 percent and 46 percent), and repeatedly draw pairs of samples of size 1500 from it.</p>
<p>To see how the construction of the appropriate question is much more challenging intellectually than is the actual mathematics, let us consider another possibility suggested by a student: What about considering the universe to be the earlier poll with a mean of 52 percent, and then asking the probability that the later poll of 1500 people with a mean of 46 percent would come from it? Indeed, on first thought that procedure seems reasonable.</p>
<p>Upon reflection — and it takes considerable thought on these matters to get them right — that would <em>not</em> be an appropriate procedure. The student’s suggested procedure would be the same as assuming that we had long-run solid knowledge of the universe, as if based on millions of observations, and then asking about the probability of a particular sample drawn from it. That does not correspond to the facts.</p>
<p>The only way to find the approach you eventually consider best — and there is no guarantee that it is indeed <em>correct</em> — is by close reference to the particular facts of the case.</p>
</section>
<section id="sec-framingham-example" class="level3" data-number="21.2.7">
<h3 data-number="21.2.7" class="anchored" data-anchor-id="sec-framingham-example"><span class="header-section-number">21.2.7</span> Example: Infarction and cholesterol: Framingham study</h3>
<p>It is so important to understand the logic of hypothesis tests, and of the resampling method of doing them, that we will now tackle another problem similar to the preceding one.</p>
<p>This will be the first of several problems that use data from the famous Framingham study (drawn from Kahn and Sempos <span class="citation" data-cites="kahn1989statistical">(<a href="references.html#ref-kahn1989statistical" role="doc-biblioref">1989</a>)</span>) concerning the development of myocardial infarction 16 years after the Framingham study began, for men ages 35- 44 with serum cholesterol above 250, compared to those with serum cholesterol below 250. The raw data are shown in <a href="#tbl-framingham-data" class="quarto-xref">Table&nbsp;<span>21.4</span></a>. The data are from <span class="citation" data-cites="shurtleff1970framingham">(<a href="references.html#ref-shurtleff1970framingham" role="doc-biblioref">Shurtleff 1970</a>)</span>, cited in <span class="citation" data-cites="kahn1989statistical">(<a href="references.html#ref-kahn1989statistical" role="doc-biblioref">Kahn and Sempos 1989, 12:61</a>, Table 3-8)</span>. Kahn and Sempos divided the cases into “high” and “low” cholesterol.</p>
<div id="tbl-framingham-data" class="quarto-float quarto-figure quarto-figure-center anchored">
<figure class="quarto-float quarto-float-tbl figure">
<figcaption class="quarto-float-caption-top quarto-float-caption quarto-float-tbl" id="tbl-framingham-data-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
Table&nbsp;21.4: Development of Myocardial Infarction in Men Aged 35-44 After 16 Years
</figcaption>
<div aria-describedby="tbl-framingham-data-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
<table class="caption-top table">
<thead>
<tr class="header">
<th>Serum Cholesterol</th>
<th>Developed MI</th>
<th>Didn’t Develop MI</th>
<th>Total</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td>&gt; 250</td>
<td>10</td>
<td>125</td>
<td>135</td>
</tr>
<tr class="even">
<td>&lt;= 250</td>
<td>21</td>
<td>449</td>
<td>470</td>
</tr>
</tbody>
</table>
</div>
</figure>
</div>
<p>The statistical logic properly begins by asking: How likely is that the two observed groups “really” came from the same “population” with respect to infarction rates? That is, we start with this question: How sure should one be that there is a difference in myocardial infarction rates between the high and low-cholesterol groups? Operationally, we address this issue by asking how likely it is that two groups as different in disease rates as the observed groups would be produced by the same “statistical universe.”</p>
<p>Key step: We assume that the relevant “benchmark” or “null hypothesis” population (universe) is the composite of the two observed groups. That is, if there were no “true” difference in infarction rates between the two serum-cholesterol groups, and the observed disease differences occurred just because of sampling variation, the most reasonable representation of the population from which they came is the composite of the two observed groups.</p>
<p>Therefore, we compose a hypothetical “benchmark” universe containing (135 + 470 =) 605 men at risk, and designate (10 + 21 =) 31 of them as infarction cases. We want to determine how likely it is that a universe like this one would produce — just by chance — two groups that differ as much as do the actually observed groups. That is, how often would random sampling from this universe produce one sub-sample of 135 men containing a large enough number of infarctions, and the other sub-sample of 470 men producing few enough infarctions, that the difference in occurrence rates would be as high as the observed difference of .029? (10/135 = .074, and 21/470 = .045, and .074 - .045 = .029).</p>
<p>So far, everything that has been said applies both to the conventional formulaic method and to the “new statistics” resampling method. But the logic is seldom explained to the reader of a piece of research — if indeed the researcher her/ himself grasps what the formula is doing. And if one just grabs for a formula with a prayer that it is the right one, one need never analyze the statistical logic of the problem at hand.</p>
<p>Now we tackle this problem with a method that you would think of yourself if you began with the following mind-set: How can I simulate the mechanism whose operation I wish to understand? These steps will do the job:</p>
<ul>
<li><strong>Step 1:</strong> Fill a bucket with 605 balls, 31 red (infarction) and the rest (605 — 31 = 574) green (no infarction).</li>
<li><strong>Step 2:</strong> Draw a sample of 135 (simulating the high serum-cholesterol group), one ball at a time and throwing it back after it is drawn to keep the simulated probability of an infarction the same throughout the sample; record the number of reds. Then do the same with another sample of 470 (the low serum-cholesterol group).</li>
<li><strong>Step 3:</strong> Calculate the difference in infarction rates for the two simulated groups, and compare it to the actual difference of .029; if the simulated difference is that large, record “Yes” for this trial; if not, record “No.”</li>
<li><strong>Step 4:</strong> Repeat steps 2 and 3 until a total of (say) 400 or 1000 trials have been completed. Compute the frequency with which the simulated groups produce a difference as great as actually observed. This frequency is an estimate of the probability that a difference as great as actually observed in Framingham would occur even if serum cholesterol has no effect upon myocardial infarction.</li>
</ul>
<p>The procedure above can be carried out with balls in a bucket in a few hours. Yet it is natural to seek the added convenience of the computer to draw the samples. Here is the Python program:</p>
<div id="nte-framingham_hearts" class="callout callout-style-default callout-note callout-titled">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon"></i>
</div>
<div class="callout-title-container flex-fill">
Note&nbsp;21.9: Notebook: Framingham heart data
</div>
</div>
<div class="callout-body-container callout-body">
<div class="nb-links">
<p><a class="notebook-link" href="notebooks/framingham_hearts.ipynb">Download notebook</a> <a class="interact-button" href="./interact/lab/index.html?path=framingham_hearts.ipynb">Interact</a></p>
</div>
</div>
</div>
<div class="nb-start" name="framingham_hearts" title="Framingham heart data">

</div>
<div class="cell" data-layout-align="center">
<div class="sourceCode cell-code" id="cb32"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb32-1"><a href="#cb32-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
<span id="cb32-2"><a href="#cb32-2" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> matplotlib.pyplot <span class="im">as</span> plt</span>
<span id="cb32-3"><a href="#cb32-3" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb32-4"><a href="#cb32-4" aria-hidden="true" tabindex="-1"></a>rnd <span class="op">=</span> np.random.default_rng()</span>
<span id="cb32-5"><a href="#cb32-5" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb32-6"><a href="#cb32-6" aria-hidden="true" tabindex="-1"></a>n <span class="op">=</span> <span class="dv">10_000</span></span>
<span id="cb32-7"><a href="#cb32-7" aria-hidden="true" tabindex="-1"></a>men <span class="op">=</span> np.repeat([<span class="st">'infarction'</span>, <span class="st">'no infarction'</span>], [<span class="dv">31</span>, <span class="dv">574</span>])</span>
<span id="cb32-8"><a href="#cb32-8" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb32-9"><a href="#cb32-9" aria-hidden="true" tabindex="-1"></a>n_high <span class="op">=</span> <span class="dv">135</span>  <span class="co"># Number of men with high cholesterol</span></span>
<span id="cb32-10"><a href="#cb32-10" aria-hidden="true" tabindex="-1"></a>n_low <span class="op">=</span> <span class="dv">470</span>  <span class="co"># Number of men with low cholesterol</span></span>
<span id="cb32-11"><a href="#cb32-11" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb32-12"><a href="#cb32-12" aria-hidden="true" tabindex="-1"></a>infarct_differences <span class="op">=</span> np.zeros(n)</span>
<span id="cb32-13"><a href="#cb32-13" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb32-14"><a href="#cb32-14" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> i <span class="kw">in</span> <span class="bu">range</span>(n):</span>
<span id="cb32-15"><a href="#cb32-15" aria-hidden="true" tabindex="-1"></a>    highs <span class="op">=</span> rnd.choice(men, size<span class="op">=</span>n_high)</span>
<span id="cb32-16"><a href="#cb32-16" aria-hidden="true" tabindex="-1"></a>    lows <span class="op">=</span> rnd.choice(men, size<span class="op">=</span>n_low)</span>
<span id="cb32-17"><a href="#cb32-17" aria-hidden="true" tabindex="-1"></a>    high_infarcts <span class="op">=</span> np.<span class="bu">sum</span>(highs <span class="op">==</span> <span class="st">'infarction'</span>)</span>
<span id="cb32-18"><a href="#cb32-18" aria-hidden="true" tabindex="-1"></a>    low_infarcts <span class="op">=</span> np.<span class="bu">sum</span>(lows <span class="op">==</span> <span class="st">'infarction'</span>)</span>
<span id="cb32-19"><a href="#cb32-19" aria-hidden="true" tabindex="-1"></a>    high_prop <span class="op">=</span> high_infarcts <span class="op">/</span> n_high</span>
<span id="cb32-20"><a href="#cb32-20" aria-hidden="true" tabindex="-1"></a>    low_prop <span class="op">=</span> low_infarcts <span class="op">/</span> n_low</span>
<span id="cb32-21"><a href="#cb32-21" aria-hidden="true" tabindex="-1"></a>    infarct_differences[i] <span class="op">=</span> high_prop <span class="op">-</span> low_prop</span>
<span id="cb32-22"><a href="#cb32-22" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb32-23"><a href="#cb32-23" aria-hidden="true" tabindex="-1"></a><span class="co"># Set the histogram bin edges to the sequence starting at -0.1, up to (not</span></span>
<span id="cb32-24"><a href="#cb32-24" aria-hidden="true" tabindex="-1"></a><span class="co"># including) 0.1, in steps of 0.005.</span></span>
<span id="cb32-25"><a href="#cb32-25" aria-hidden="true" tabindex="-1"></a>plt.hist(infarct_differences, bins<span class="op">=</span>np.arange(<span class="op">-</span><span class="fl">0.1</span>, <span class="fl">0.1</span>, <span class="fl">0.005</span>))</span>
<span id="cb32-26"><a href="#cb32-26" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">'Infarct proportion differences in null universe'</span>)</span>
<span id="cb32-27"><a href="#cb32-27" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb32-28"><a href="#cb32-28" aria-hidden="true" tabindex="-1"></a><span class="co"># How often was the resampled difference &gt;= the observed difference?</span></span>
<span id="cb32-29"><a href="#cb32-29" aria-hidden="true" tabindex="-1"></a>k <span class="op">=</span> np.<span class="bu">sum</span>(infarct_differences <span class="op">&gt;=</span> <span class="fl">0.029</span>)</span>
<span id="cb32-30"><a href="#cb32-30" aria-hidden="true" tabindex="-1"></a><span class="co"># Convert this result to a proportion</span></span>
<span id="cb32-31"><a href="#cb32-31" aria-hidden="true" tabindex="-1"></a>kk <span class="op">=</span> k <span class="op">/</span> n</span>
<span id="cb32-32"><a href="#cb32-32" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb32-33"><a href="#cb32-33" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="st">'Proportion of trials with difference &gt;= observed:'</span>,</span>
<span id="cb32-34"><a href="#cb32-34" aria-hidden="true" tabindex="-1"></a>      np.<span class="bu">round</span>(kk, <span class="dv">2</span>))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code>Proportion of trials with difference &gt;= observed: 0.09</code></pre>
</div>
<div class="cell-output-display">
<div class="quarto-figure quarto-figure-center">
<figure class="figure">
<p><img src="testing_counts_1_files/figure-html/unnamed-chunk-49-1.png" class="img-fluid quarto-figure quarto-figure-center figure-img" style="width:70.0%"></p>
</figure>
</div>
</div>
</div>
<p>The results of the test using this program may be seen in the histogram. We find — perhaps surprisingly — that a difference as large as observed would occur by chance around 10 percent of the time. (If we were not guided by the theoretical expectation that high serum cholesterol produces heart disease, we might include the 10 percent difference going in the other direction, giving a 20 percent chance). Even a ten percent chance is sufficient to call into question the conclusion that high serum cholesterol is dangerous. At a minimum, this statistical result should call for more research before taking any strong action clinically or otherwise.</p>
<div class="nb-end">

</div>
<div class="callout callout-style-default callout-note callout-titled">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon"></i>
</div>
<div class="callout-title-container flex-fill">
End of notebook: Framingham heart data
</div>
</div>
<div class="callout-body-container callout-body">
<p><code>framingham_hearts</code> starts at <a href="#nte-framingham_hearts" class="quarto-xref">Note&nbsp;<span>21.9</span></a>.</p>
</div>
</div>
<!---
End of Framingham notebook.
-->
<p>Where should one look to determine which procedures should be used to deal with a problem such as set forth above? Unlike the formulaic approach, the basic source is not a manual which sets forth a menu of formulas together with sets of rules about when they are appropriate. Rather, you consult your own understanding about what is happening in (say) the Framingham situation, and the question that needs to be answered, and then you construct a “model” that is as faithful to the facts as is possible. The bucket-sampling described above is such a model for the case at hand.</p>
<p>To connect up what we have done with the conventional approach, one could apply a z test (conceptually similar to the t test, but applicable to yes-no data; it is the Normal-distribution approximation to the large binomial distribution). Do so, we find that the results are much the same as the resampling result — an eleven percent probability.</p>
<p>Someone may ask: Why do a resampling test when you can use a standard device such as a z or t test? The great advantage of resampling is that it avoids using the wrong method. The researcher is more likely to arrive at sound conclusions with resampling because s/he can understand what s/he is doing, instead of blindly grabbing a formula which may be in error.</p>
<p>The textbook from which the problem is drawn is an excellent one; the difficulty of its presentation is an inescapable consequence of the formulaic approach to probability and statistics. The body of complex algebra and tables that only a rare expert understands down to the foundations constitutes an impenetrable wall to understanding. Yet without such understanding, there can be only rote practice, which leads to frustration and error.</p>
</section>
<section id="sec-pig-rations" class="level3" data-number="21.2.8">
<h3 data-number="21.2.8" class="anchored" data-anchor-id="sec-pig-rations"><span class="header-section-number">21.2.8</span> Example: Is one pig ration more effective than the other?</h3>
<p><strong>Testing For a Difference in Means With a Two-by-Two Classification</strong>.</p>
<p>Each of two new types of ration is fed to twelve pigs. A farmer wants to know whether ration A or ration B is better.<a href="#fn2" class="footnote-ref" id="fnref2" role="doc-noteref"><sup>2</sup></a> The weight gains in pounds for pigs fed on rations A and B are:</p>
<p>A: 31, 34, 29, 26, 32, 35, 38, 34, 31, 29, 32, 31</p>
<p>B: 26, 24, 28, 29, 30, 29, 31, 29, 32, 26, 28, 32</p>
<p>The statistical question may be framed as follows: should one consider that the pigs fed on the different rations come from the same universe with respect to weight gains?</p>
<p>In the actual experiment, 9 of the 12 pigs who were fed ration A also were in the top half of weight gains. How likely is it that one group of 12 randomly-chosen pigs would contain 9 of the 12 top weight gainers?</p>
<p>One approach to the problem is to divide the pigs into two groups — the twelve with the highest weight gains, and the twelve with the lowest weight gains — and examine whether an unusually large number of high-weight-gain pigs were fed on one or the other of the rations.</p>
<p>We can make this test by ordering and grouping the twenty four pigs:</p>
<ul>
<li><strong>High-weight group:</strong> 38 (ration A), 35 (A), 34 (A), 34 (A), 32 (B), 32 (A), 32 (A), 32 (B), 31 (A), 31 (B), 31 (A), 31 (A)</li>
<li><strong>Low-weight group:</strong> 30 (B), 29 (A), 29 (A), 29 (B), 29 (B), 29 (B), 28 (B), 28 (B), 26 (A), 26 (B), 26 (B), 24 (B).</li>
</ul>
<p>Among the twelve high-weight-gain pigs, nine were fed on ration A. We ask: Is this further from an even split than we are likely to get by chance? Let us take twelve red and twelve black cards, shuffle them, and deal out twelve cards (the other twelve need not be dealt out). Count the proportion of the hands in which one ration comes up nine or more times in the first twelve cards, to reflect ration A’s appearance nine times among the highest twelve weight gains. More specifically:</p>
<ul>
<li><strong>Step 1.</strong> Constitute a deck of twelve red and twelve black cards, and shuffle.</li>
<li><strong>Step 2.</strong> Deal out twelve cards, count the number red, and record “yes” if there are nine or more of <em>either</em> red or black.</li>
<li><strong>Step 3.</strong> Repeat step 2 perhaps fifty times.</li>
<li><strong>Step 4.</strong> Compute the proportion “yes.” This proportion estimates the probability sought.</li>
</ul>
<div class="cell" data-layout-align="center">
<div class="cell-output-display">
<div id="tbl-pig-trials" class="lightable-paper lightable-striped lightable-hover quarto-float quarto-figure quarto-figure-center anchored" data-quarto-postprocess="true" style="font-family: &quot;Arial Narrow&quot;, arial, helvetica, sans-serif; width: auto !important; margin-left: auto; margin-right: auto;">
<figure class="quarto-float quarto-float-tbl figure">
<figcaption class="quarto-float-caption-top quarto-float-caption quarto-float-tbl" id="tbl-pig-trials-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
Table&nbsp;21.5: Results from 25 random trials for pig rations
</figcaption>
<div aria-describedby="tbl-pig-trials-caption-0ceaefa1-69ba-4598-a22c-09a6ac19f8ca">
<table class="lightable-paper lightable-striped lightable-hover caption-top table table-sm table-striped small" data-quarto-postprocess="true">
<thead>
<tr class="header">
<th style="text-align: right;" data-quarto-table-cell-role="th">Trial no</th>
<th style="text-align: right;" data-quarto-table-cell-role="th"># red</th>
<th style="text-align: right;" data-quarto-table-cell-role="th"># black</th>
<th style="text-align: left;" data-quarto-table-cell-role="th">&gt;=9 red or black</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td style="text-align: right;">1</td>
<td style="text-align: right;">6</td>
<td style="text-align: right;">6</td>
<td style="text-align: left;"></td>
</tr>
<tr class="even">
<td style="text-align: right;">2</td>
<td style="text-align: right;">9</td>
<td style="text-align: right;">3</td>
<td style="text-align: left;">+</td>
</tr>
<tr class="odd">
<td style="text-align: right;">3</td>
<td style="text-align: right;">9</td>
<td style="text-align: right;">3</td>
<td style="text-align: left;">+</td>
</tr>
<tr class="even">
<td style="text-align: right;">4</td>
<td style="text-align: right;">7</td>
<td style="text-align: right;">5</td>
<td style="text-align: left;"></td>
</tr>
<tr class="odd">
<td style="text-align: right;">5</td>
<td style="text-align: right;">7</td>
<td style="text-align: right;">5</td>
<td style="text-align: left;"></td>
</tr>
<tr class="even">
<td style="text-align: right;">6</td>
<td style="text-align: right;">8</td>
<td style="text-align: right;">4</td>
<td style="text-align: left;"></td>
</tr>
<tr class="odd">
<td style="text-align: right;">7</td>
<td style="text-align: right;">5</td>
<td style="text-align: right;">7</td>
<td style="text-align: left;"></td>
</tr>
<tr class="even">
<td style="text-align: right;">8</td>
<td style="text-align: right;">8</td>
<td style="text-align: right;">4</td>
<td style="text-align: left;"></td>
</tr>
<tr class="odd">
<td style="text-align: right;">9</td>
<td style="text-align: right;">7</td>
<td style="text-align: right;">5</td>
<td style="text-align: left;"></td>
</tr>
<tr class="even">
<td style="text-align: right;">10</td>
<td style="text-align: right;">10</td>
<td style="text-align: right;">2</td>
<td style="text-align: left;">+</td>
</tr>
<tr class="odd">
<td style="text-align: right;">11</td>
<td style="text-align: right;">6</td>
<td style="text-align: right;">6</td>
<td style="text-align: left;"></td>
</tr>
<tr class="even">
<td style="text-align: right;">12</td>
<td style="text-align: right;">7</td>
<td style="text-align: right;">5</td>
<td style="text-align: left;"></td>
</tr>
<tr class="odd">
<td style="text-align: right;">13</td>
<td style="text-align: right;">7</td>
<td style="text-align: right;">5</td>
<td style="text-align: left;"></td>
</tr>
<tr class="even">
<td style="text-align: right;">14</td>
<td style="text-align: right;">5</td>
<td style="text-align: right;">7</td>
<td style="text-align: left;"></td>
</tr>
<tr class="odd">
<td style="text-align: right;">15</td>
<td style="text-align: right;">7</td>
<td style="text-align: right;">5</td>
<td style="text-align: left;"></td>
</tr>
<tr class="even">
<td style="text-align: right;">16</td>
<td style="text-align: right;">5</td>
<td style="text-align: right;">7</td>
<td style="text-align: left;"></td>
</tr>
<tr class="odd">
<td style="text-align: right;">17</td>
<td style="text-align: right;">7</td>
<td style="text-align: right;">5</td>
<td style="text-align: left;"></td>
</tr>
<tr class="even">
<td style="text-align: right;">18</td>
<td style="text-align: right;">4</td>
<td style="text-align: right;">8</td>
<td style="text-align: left;"></td>
</tr>
<tr class="odd">
<td style="text-align: right;">19</td>
<td style="text-align: right;">6</td>
<td style="text-align: right;">6</td>
<td style="text-align: left;"></td>
</tr>
<tr class="even">
<td style="text-align: right;">20</td>
<td style="text-align: right;">9</td>
<td style="text-align: right;">3</td>
<td style="text-align: left;">+</td>
</tr>
<tr class="odd">
<td style="text-align: right;">21</td>
<td style="text-align: right;">6</td>
<td style="text-align: right;">6</td>
<td style="text-align: left;"></td>
</tr>
<tr class="even">
<td style="text-align: right;">22</td>
<td style="text-align: right;">8</td>
<td style="text-align: right;">4</td>
<td style="text-align: left;"></td>
</tr>
<tr class="odd">
<td style="text-align: right;">23</td>
<td style="text-align: right;">7</td>
<td style="text-align: right;">5</td>
<td style="text-align: left;"></td>
</tr>
<tr class="even">
<td style="text-align: right;">24</td>
<td style="text-align: right;">7</td>
<td style="text-align: right;">5</td>
<td style="text-align: left;"></td>
</tr>
<tr class="odd">
<td style="text-align: right;">25</td>
<td style="text-align: right;">9</td>
<td style="text-align: right;">3</td>
<td style="text-align: left;">+</td>
</tr>
</tbody>
</table>
</div>
</figure>
</div>


</div>
</div>
<p><a href="#tbl-pig-trials" class="quarto-xref">Table&nbsp;<span>21.5</span></a> shows the results of 25 trials. In five (marked by + signs) of the 25 (that is, 20 percent of the trials) there were nine or more either red or black cards in the first twelve cards. Again the results suggest that it would be <em>slightly unusual</em> for the results to favor one ration or the other so strongly just by chance if they come from the same universe.</p>
<p>Now the Python procedure to answer the question:</p>
<div id="nte-pig_rations" class="callout callout-style-default callout-note callout-titled">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon"></i>
</div>
<div class="callout-title-container flex-fill">
Note&nbsp;21.10: Notebook: Weight gain on pig rations
</div>
</div>
<div class="callout-body-container callout-body">
<div class="nb-links">
<p><a class="notebook-link" href="notebooks/pig_rations.ipynb">Download notebook</a> <a class="interact-button" href="./interact/lab/index.html?path=pig_rations.ipynb">Interact</a></p>
</div>
</div>
</div>
<div class="nb-start" name="pig_rations" title="Weight gain on pig rations">

</div>
<p>The <code>ranks = np.arange(1, 25)</code> statement creates an array of numbers 1 through 24, which will represent the rankings of weight gains for each of the 24 pigs. We repeat the following procedure for 10000 trials. First we shuffle the elements of array <code>ranks</code> so that the rank numbers for weight gains are randomized and placed in array <code>shuffled</code>. We then select the first 12 elements of <code>shuffled</code> and place them in <code>first_12</code>; this represents the rankings of a randomly-selected group of 12 pigs. We next count (<code>sum</code>) in <code>n_top</code> the number of pigs whose rankings for weight gain were in the top half — that is, a rank of less than 13. We record that number in <code>top_ranks</code>, and then continue the loop, until we finish our <code>n</code> trials.</p>
<p>Since we did not know beforehand the direction of the effect of ration A on weight gain, we want to count the times that <em>either more than 8</em> of the random selection of 12 pigs were in the top half of the rankings, <em>or that fewer than 4</em> of these pigs were in the top half of the weight gain rankings — (The latter is the same as counting the number of times that more than 8 of the 12 <em>non-selected</em> random pigs were in the top half in weight gain.)</p>
<p>We do so with the final two <code>sum</code> statements. By adding the two results <code>n_gte_9</code> and <code>n_lte_3</code> together, we have the number of times out of 10,000 that differences in weight gains in two groups as dramatic as those obtained in the actual experiment would occur by chance.</p>
<div class="cell" data-layout-align="center">
<div class="sourceCode cell-code" id="cb34"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb34-1"><a href="#cb34-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
<span id="cb34-2"><a href="#cb34-2" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> matplotlib.pyplot <span class="im">as</span> plt</span>
<span id="cb34-3"><a href="#cb34-3" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb34-4"><a href="#cb34-4" aria-hidden="true" tabindex="-1"></a>rnd <span class="op">=</span> np.random.default_rng()</span>
<span id="cb34-5"><a href="#cb34-5" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb34-6"><a href="#cb34-6" aria-hidden="true" tabindex="-1"></a><span class="co"># Constitute the set of the weight gain rank orders. ranks is now a vector</span></span>
<span id="cb34-7"><a href="#cb34-7" aria-hidden="true" tabindex="-1"></a><span class="co"># consisting of the numbers 1 — 24, in that order.</span></span>
<span id="cb34-8"><a href="#cb34-8" aria-hidden="true" tabindex="-1"></a>ranks <span class="op">=</span> np.arange(<span class="dv">1</span>, <span class="dv">25</span>)</span>
<span id="cb34-9"><a href="#cb34-9" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb34-10"><a href="#cb34-10" aria-hidden="true" tabindex="-1"></a>n <span class="op">=</span> <span class="dv">10_000</span></span>
<span id="cb34-11"><a href="#cb34-11" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb34-12"><a href="#cb34-12" aria-hidden="true" tabindex="-1"></a>top_ranks <span class="op">=</span> np.zeros(n, dtype<span class="op">=</span><span class="bu">int</span>)</span>
<span id="cb34-13"><a href="#cb34-13" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb34-14"><a href="#cb34-14" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> i <span class="kw">in</span> <span class="bu">range</span>(n):</span>
<span id="cb34-15"><a href="#cb34-15" aria-hidden="true" tabindex="-1"></a>    <span class="co"># Shuffle the ranks of the weight gains.</span></span>
<span id="cb34-16"><a href="#cb34-16" aria-hidden="true" tabindex="-1"></a>    shuffled <span class="op">=</span> rnd.permuted(ranks)</span>
<span id="cb34-17"><a href="#cb34-17" aria-hidden="true" tabindex="-1"></a>    <span class="co"># Take the first 12 ranks.</span></span>
<span id="cb34-18"><a href="#cb34-18" aria-hidden="true" tabindex="-1"></a>    first_12 <span class="op">=</span> shuffled[:<span class="dv">12</span>]</span>
<span id="cb34-19"><a href="#cb34-19" aria-hidden="true" tabindex="-1"></a>    <span class="co"># Determine how many of these randomly selected 12 ranks are less than</span></span>
<span id="cb34-20"><a href="#cb34-20" aria-hidden="true" tabindex="-1"></a>    <span class="co"># 12 (i.e. 1-12), put that result in n_top.</span></span>
<span id="cb34-21"><a href="#cb34-21" aria-hidden="true" tabindex="-1"></a>    n_top <span class="op">=</span> np.<span class="bu">sum</span>(first_12 <span class="op">&lt;=</span> <span class="dv">12</span>)</span>
<span id="cb34-22"><a href="#cb34-22" aria-hidden="true" tabindex="-1"></a>    <span class="co"># Keep track of each trial result in top_ranks</span></span>
<span id="cb34-23"><a href="#cb34-23" aria-hidden="true" tabindex="-1"></a>    top_ranks[i] <span class="op">=</span> n_top</span>
<span id="cb34-24"><a href="#cb34-24" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb34-25"><a href="#cb34-25" aria-hidden="true" tabindex="-1"></a>plt.hist(top_ranks, bins<span class="op">=</span>np.arange(<span class="dv">1</span>, <span class="dv">12</span>))</span>
<span id="cb34-26"><a href="#cb34-26" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">'Number of top 12 ranks in pig-ration trials'</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<div class="quarto-figure quarto-figure-center">
<figure class="figure">
<p><img src="testing_counts_1_files/figure-html/unnamed-chunk-53-1.png" class="img-fluid quarto-figure quarto-figure-center figure-img" style="width:70.0%"></p>
</figure>
</div>
</div>
</div>
<p>We see from the histogram that, in about 3 percent of the trials, either more than 8 or fewer than 4 top half ranks (1-12) made it into the random group of twelve that we selected. Python will calculate this for us as follows:</p>
<div class="cell" data-layout-align="center">
<div class="sourceCode cell-code" id="cb35"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb35-1"><a href="#cb35-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Determine how many of the trials yielded 9 or more top ranks.</span></span>
<span id="cb35-2"><a href="#cb35-2" aria-hidden="true" tabindex="-1"></a>n_gte_9 <span class="op">=</span> np.<span class="bu">sum</span>(top_ranks <span class="op">&gt;=</span> <span class="dv">9</span>)</span>
<span id="cb35-3"><a href="#cb35-3" aria-hidden="true" tabindex="-1"></a><span class="co"># Determine how many trials yielded 3 or fewer of the top ranks.</span></span>
<span id="cb35-4"><a href="#cb35-4" aria-hidden="true" tabindex="-1"></a><span class="co"># If there were 3 or fewer, then 9 or more of the top ranks must</span></span>
<span id="cb35-5"><a href="#cb35-5" aria-hidden="true" tabindex="-1"></a><span class="co"># have been in the other group (not selected).</span></span>
<span id="cb35-6"><a href="#cb35-6" aria-hidden="true" tabindex="-1"></a>n_lte_3 <span class="op">=</span> np.<span class="bu">sum</span>(top_ranks <span class="op">&lt;=</span> <span class="dv">3</span>)</span>
<span id="cb35-7"><a href="#cb35-7" aria-hidden="true" tabindex="-1"></a><span class="co"># Add the two together.</span></span>
<span id="cb35-8"><a href="#cb35-8" aria-hidden="true" tabindex="-1"></a>n_both <span class="op">=</span> n_gte_9 <span class="op">+</span> n_lte_3</span>
<span id="cb35-9"><a href="#cb35-9" aria-hidden="true" tabindex="-1"></a><span class="co"># Convert to a proportion.</span></span>
<span id="cb35-10"><a href="#cb35-10" aria-hidden="true" tabindex="-1"></a>prop_both <span class="op">=</span> n_both <span class="op">/</span> n</span>
<span id="cb35-11"><a href="#cb35-11" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb35-12"><a href="#cb35-12" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="st">'Trial proportion &gt;=9 top ranks in either group:'</span>,</span>
<span id="cb35-13"><a href="#cb35-13" aria-hidden="true" tabindex="-1"></a>      np.<span class="bu">round</span>(prop_both, <span class="dv">2</span>))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code>Trial proportion &gt;=9 top ranks in either group: 0.04</code></pre>
</div>
</div>
<p>The decisions that are warranted on the basis of the results depend upon one’s purpose. If writing a scientific paper on the merits of ration A is the ultimate purpose, it would be sensible to test another batch of pigs to get further evidence. (Or you could proceed to employ another sort of test for a slightly more precise evaluation.) But if the goal is a decision on which type of ration to buy for a small farm and they are the same price, just go ahead and buy ration A because, even if it is no better than ration B, you have strong evidence that it is <em>no worse</em>.</p>
<div class="nb-end">

</div>
<div class="callout callout-style-default callout-note callout-titled">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon"></i>
</div>
<div class="callout-title-container flex-fill">
End of notebook: Weight gain on pig rations
</div>
</div>
<div class="callout-body-container callout-body">
<p><code>pig_rations</code> starts at <a href="#nte-pig_rations" class="quarto-xref">Note&nbsp;<span>21.10</span></a>.</p>
</div>
</div>
<!---
End of pig-ration notebook.
-->
</section>
<section id="example-do-planet-densities-differ" class="level3" data-number="21.2.9">
<h3 data-number="21.2.9" class="anchored" data-anchor-id="example-do-planet-densities-differ"><span class="header-section-number">21.2.9</span> Example: Do planet densities differ?</h3>
<p><strong>Consider the five planets known to the ancient world.</strong></p>
<p>Mosteller and Rourke <span class="citation" data-cites="mosteller1973sturdy">(<a href="references.html#ref-mosteller1973sturdy" role="doc-biblioref">1973, 17–19</a>)</span> ask us to compare the densities of the three planets <em>farther</em> from the sun than is the earth (Mars, density 0.71; Jupiter, 0.24; and Saturn, 0.12) against the densities of the planets <em>closer</em> to the sun than is the earth (Mercury, 0.68; Venus, 0.94).</p>
<p>The average density of the distant planets is .357, of the closer planets is .81. Is this difference (.353) statistically surprising, or is it likely to occur in a chance ordering of these planets?</p>
<p>We can answer this question with a permutation test; such sampling without replacement makes sense here because we are considering the entire set of planets, rather than a sample drawn from a larger population of planets (the word “population” is used here, rather than “universe,” to avoid confusion.) And because the number of objects is so small, one could examine all possible arrangements (permutations), and see how many have (say) differences in mean densities between the two groups as large as observed.</p>
<p>Another method that Mosteller and Rourke suggest is by a comparison of the density <em>ranks</em> of the two sets, where Saturn has rank 1 and Venus has rank 5. This might have a scientific advantage if the sample data are dominated by a single “outlier,” whose domination is removed when we rank the data.</p>
<p>We see that the sum of the ranks for the “closer” set is 3+5=8. We can then ask: If the ranks were assigned at random, how likely is it that a set of two planets would have a sum as large as 8? Again, because the sample is small, we can examine all the possible permutations, as Mosteller and Rourke do in their Table 3-1 <span class="citation" data-cites="mosteller1973sturdy">(<a href="references.html#ref-mosteller1973sturdy" role="doc-biblioref">Mosteller and Rourke 1973, 56</a>)</span> (Substitute “Closer” for “B,” “Further” for “A”). In two of the ten permutations, a sum of ranks as great as 8 is observed, so the probability of a result as great as observed happening by chance is 20 percent, using these data. (We could just as well consider the difference in mean ranks between the two groups: (8/2 - 7/3 = 10 / 6 = 1.67).</p>
<!---
Perhaps easier to reproduce M+R table here - no-one will have that reference to
hand.
-->
<p>To illuminate the logic of this test, consider comparing the heights of two samples of trees. If sample A has the five tallest trees, and sample B has the five shortest trees, the difference in mean ranks will be (6+7+8+9+10=) 40 — (1+2+3+4+5=) 15, the largest possible difference. If the groups are less sharply differentiated — for example, if sample A has #3 and sample B has #8 — the difference in ranks will be less than the maximum of 40, as you can quickly verify.</p>
<p>The method we have just used is called a Mann-Whitney test, though that label is usually applied when the data are too many to examine all the possible permutations; in that case one conventionally uses a table prepared by formula. In the case where there are too many for a complete permutation test, our resampling algorithm is as follows (though we’ll continue with the planets example):</p>
<ol type="1">
<li>Compute the mean ranks of the two groups.</li>
<li>Calculate the difference between the means computed in step 1.</li>
<li>Create a bucket containing the ranks from 1 to the number of observations (5, in the case of the planets)</li>
<li>Shuffle the ranks.</li>
<li>Since we are working with the ranked data, we must draw without replacement, because there can only be one #3, one #7, and so on. So draw the number of observations corresponding to the number of observations — 2 “Closer” and 3 “Further.”</li>
<li>Compute the mean ranks of the two simulated groups of planets.</li>
<li>Calculate the difference between the means computed in step 5 and record.</li>
<li>Repeat steps 4 through 7 perhaps 1000 times.</li>
<li>Count how often the shuffled difference in ranks exceeds the observed difference from step 2 (1.67).</li>
</ol>
<div id="nte-planet_densities" class="callout callout-style-default callout-note callout-titled">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon"></i>
</div>
<div class="callout-title-container flex-fill">
Note&nbsp;21.11: Notebook: Planet densities and distance
</div>
</div>
<div class="callout-body-container callout-body">
<div class="nb-links">
<p><a class="notebook-link" href="notebooks/planet_densities.ipynb">Download notebook</a> <a class="interact-button" href="./interact/lab/index.html?path=planet_densities.ipynb">Interact</a></p>
</div>
</div>
</div>
<div class="nb-start" name="planet_densities" title="Planet densities and distance">

</div>
<div class="cell" data-layout-align="center">
<div class="sourceCode cell-code" id="cb37"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb37-1"><a href="#cb37-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
<span id="cb37-2"><a href="#cb37-2" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb37-3"><a href="#cb37-3" aria-hidden="true" tabindex="-1"></a>rnd <span class="op">=</span> np.random.default_rng()</span>
<span id="cb37-4"><a href="#cb37-4" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb37-5"><a href="#cb37-5" aria-hidden="true" tabindex="-1"></a><span class="co"># Steps 1 and 2.</span></span>
<span id="cb37-6"><a href="#cb37-6" aria-hidden="true" tabindex="-1"></a>actual_mean_diff <span class="op">=</span> <span class="dv">8</span> <span class="op">/</span> <span class="dv">2</span> <span class="op">-</span> <span class="dv">7</span> <span class="op">/</span> <span class="dv">3</span></span>
<span id="cb37-7"><a href="#cb37-7" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb37-8"><a href="#cb37-8" aria-hidden="true" tabindex="-1"></a><span class="co"># Step 3</span></span>
<span id="cb37-9"><a href="#cb37-9" aria-hidden="true" tabindex="-1"></a>ranks <span class="op">=</span> np.arange(<span class="dv">1</span>, <span class="dv">6</span>)</span>
<span id="cb37-10"><a href="#cb37-10" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb37-11"><a href="#cb37-11" aria-hidden="true" tabindex="-1"></a>n <span class="op">=</span> <span class="dv">10_000</span></span>
<span id="cb37-12"><a href="#cb37-12" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb37-13"><a href="#cb37-13" aria-hidden="true" tabindex="-1"></a>mean_differences <span class="op">=</span> np.zeros(n)</span>
<span id="cb37-14"><a href="#cb37-14" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb37-15"><a href="#cb37-15" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> i <span class="kw">in</span> <span class="bu">range</span>(n):</span>
<span id="cb37-16"><a href="#cb37-16" aria-hidden="true" tabindex="-1"></a>    <span class="co"># Step 4</span></span>
<span id="cb37-17"><a href="#cb37-17" aria-hidden="true" tabindex="-1"></a>    shuffled <span class="op">=</span> rnd.permuted(ranks)</span>
<span id="cb37-18"><a href="#cb37-18" aria-hidden="true" tabindex="-1"></a>    <span class="co"># Step 5</span></span>
<span id="cb37-19"><a href="#cb37-19" aria-hidden="true" tabindex="-1"></a>    closer <span class="op">=</span> shuffled[:<span class="dv">2</span>]  <span class="co"># First 2</span></span>
<span id="cb37-20"><a href="#cb37-20" aria-hidden="true" tabindex="-1"></a>    further <span class="op">=</span> shuffled[<span class="dv">2</span>:] <span class="co"># Last 3</span></span>
<span id="cb37-21"><a href="#cb37-21" aria-hidden="true" tabindex="-1"></a>    <span class="co"># Step 6</span></span>
<span id="cb37-22"><a href="#cb37-22" aria-hidden="true" tabindex="-1"></a>    mean_close <span class="op">=</span> np.mean(closer)</span>
<span id="cb37-23"><a href="#cb37-23" aria-hidden="true" tabindex="-1"></a>    mean_far <span class="op">=</span> np.mean(further)</span>
<span id="cb37-24"><a href="#cb37-24" aria-hidden="true" tabindex="-1"></a>    <span class="co"># Step 7</span></span>
<span id="cb37-25"><a href="#cb37-25" aria-hidden="true" tabindex="-1"></a>    mean_differences[i] <span class="op">=</span> mean_close <span class="op">-</span> mean_far</span>
<span id="cb37-26"><a href="#cb37-26" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb37-27"><a href="#cb37-27" aria-hidden="true" tabindex="-1"></a><span class="co"># Step 9</span></span>
<span id="cb37-28"><a href="#cb37-28" aria-hidden="true" tabindex="-1"></a>k <span class="op">=</span> np.<span class="bu">sum</span>(mean_differences <span class="op">&gt;=</span> actual_mean_diff)</span>
<span id="cb37-29"><a href="#cb37-29" aria-hidden="true" tabindex="-1"></a>prob <span class="op">=</span> k <span class="op">/</span> n</span>
<span id="cb37-30"><a href="#cb37-30" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb37-31"><a href="#cb37-31" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="st">'Proportion of trials with mean difference &gt;= 1.67:'</span>,</span>
<span id="cb37-32"><a href="#cb37-32" aria-hidden="true" tabindex="-1"></a>      np.<span class="bu">round</span>(prob, <span class="dv">2</span>))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code>Proportion of trials with mean difference &gt;= 1.67: 0.19</code></pre>
</div>
</div>
<p>Interpretation: 19 percent of the time, random shufflings produced a difference in ranks as great as or greater than observed. Hence, on the strength of this evidence, we should <em>not</em> conclude that there is a statistically surprising difference in densities between the further planets and the closer planets.</p>
<div class="nb-end">

</div>
<div class="callout callout-style-default callout-note callout-titled">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon"></i>
</div>
<div class="callout-title-container flex-fill">
End of notebook: Planet densities and distance
</div>
</div>
<div class="callout-body-container callout-body">
<p><code>planet_densities</code> starts at <a href="#nte-planet_densities" class="quarto-xref">Note&nbsp;<span>21.11</span></a>.</p>
</div>
</div>
<!---
End of planets notebook.
-->
</section>
</section>
<section id="conclusion" class="level2" data-number="21.3">
<h2 data-number="21.3" class="anchored" data-anchor-id="conclusion"><span class="header-section-number">21.3</span> Conclusion</h2>
<p>This chapter has begun the actual work of testing hypotheses. The next chapter continues with discussion of somewhat more complex problems with counted data — more complex to think about, but no more difficult to actually treat mathematically with resampling simulation. If you have understood the general logic of the procedures used up until this point, you are in command of all the necessary conceptual knowledge to construct your own tests to answer any statistical question. A lot more practice, working on a variety of problems, obviously would help. But the key elements are simple: 1) Model the real situation accurately, 2) experiment with the model, and 3) compare the results of the model with the observed results.</p>


<div id="refs" class="references csl-bib-body hanging-indent" data-entry-spacing="0" role="list" style="display: none">
<div id="ref-dixon1983introduction" class="csl-entry" role="listitem">
Dixon, Wilfrid J, and Frank J Massey Jr. 1983. <span>“Introduction to Statistical Analysis.”</span>
</div>
<div id="ref-hodges1970basic" class="csl-entry" role="listitem">
Hodges Jr, Joseph Lawson, and Erich Leo Lehmann. 1970. <em>Basic Concepts of Probability and Statistics</em>. 2nd ed. San Francisco, California: <span>H</span>olden-<span>D</span>ay, <span>I</span>nc. <a href="https://archive.org/details/basicconceptsofp0000unse_m8m9">https://archive.org/details/basicconceptsofp0000unse_m8m9</a>.
</div>
<div id="ref-kahn1989statistical" class="csl-entry" role="listitem">
Kahn, Harold A, and Christopher T Sempos. 1989. <em>Statistical Methods in Epidemiology</em>. Vol. 12. Monographs in Epidemiology and Biostatistics. New <span>Y</span>ork: Oxford University Press. <a href="https://www.google.co.uk/books/edition/Statistical_Methods_in_Epidemiology/YERYAgAAQBAJ">https://www.google.co.uk/books/edition/Statistical_Methods_in_Epidemiology/YERYAgAAQBAJ</a>.
</div>
<div id="ref-mosteller1973sturdy" class="csl-entry" role="listitem">
Mosteller, Frederick, and Robert E. K. Rourke. 1973. <em>Sturdy Statistics: Nonparametrics and Order Statistics</em>. Addison-Wesley Publishing Company.
</div>
<div id="ref-shurtleff1970framingham" class="csl-entry" role="listitem">
Shurtleff, Dewey. 1970. <span>“Some Characteristics Related to the Incidence of Cardiovascular Disease and Death: Framingham Study, 16-Year Follow-up.”</span> Section 26. Edited by William B. Kannel and Tavia Gordon. The Framingham Study: An Epidemiological Investigation of Cardiovascular Disease. Washington, D.C.: <span>U.S. Government Printing Office</span>. <a href="https://upload.wikimedia.org/wikipedia/commons/6/6d/The_Framingham_study_-_an_epidemiological_investigation_of_cardiovascular_disease_sec.26_1970_%28IA_framinghamstudye00kann_25%29.pdf">https://upload.wikimedia.org/wikipedia/commons/6/6d/The_Framingham_study_-_an_epidemiological_investigation_of_cardiovascular_disease_sec.26_1970_%28IA_framinghamstudye00kann_25%29.pdf</a>.
</div>
<div id="ref-wonnacott1990introductory" class="csl-entry" role="listitem">
Wonnacott, Thomas H, and Ronald J Wonnacott. 1990. <em>Introductory Statistics</em>. 5th ed. New York: John Wiley &amp; Sons.
</div>
</div>
</section>
<section id="footnotes" class="footnotes footnotes-end-of-document" role="doc-endnotes">
<hr>
<ol>
<li id="fn1"><p>If you are very knowledgeable, you may do some in-between figuring (with what is known as “Bayesian analysis”), but leave this alone unless you know well what you are doing.<a href="#fnref1" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
<li id="fn2"><p>The data for this example are based on Dixon and Massey <span class="citation" data-cites="dixon1983introduction">(<a href="references.html#ref-dixon1983introduction" role="doc-biblioref">1983, 124</a>)</span>, who offer an orthodox method of handling the problem with a t-test.<a href="#fnref2" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
</ol>
</section>

</main> <!-- /main -->
<script id="quarto-html-after-body" type="application/javascript">
window.document.addEventListener("DOMContentLoaded", function (event) {
  const toggleBodyColorMode = (bsSheetEl) => {
    const mode = bsSheetEl.getAttribute("data-mode");
    const bodyEl = window.document.querySelector("body");
    if (mode === "dark") {
      bodyEl.classList.add("quarto-dark");
      bodyEl.classList.remove("quarto-light");
    } else {
      bodyEl.classList.add("quarto-light");
      bodyEl.classList.remove("quarto-dark");
    }
  }
  const toggleBodyColorPrimary = () => {
    const bsSheetEl = window.document.querySelector("link#quarto-bootstrap");
    if (bsSheetEl) {
      toggleBodyColorMode(bsSheetEl);
    }
  }
  toggleBodyColorPrimary();  
  const icon = "";
  const anchorJS = new window.AnchorJS();
  anchorJS.options = {
    placement: 'right',
    icon: icon
  };
  anchorJS.add('.anchored');
  const isCodeAnnotation = (el) => {
    for (const clz of el.classList) {
      if (clz.startsWith('code-annotation-')) {                     
        return true;
      }
    }
    return false;
  }
  const onCopySuccess = function(e) {
    // button target
    const button = e.trigger;
    // don't keep focus
    button.blur();
    // flash "checked"
    button.classList.add('code-copy-button-checked');
    var currentTitle = button.getAttribute("title");
    button.setAttribute("title", "Copied!");
    let tooltip;
    if (window.bootstrap) {
      button.setAttribute("data-bs-toggle", "tooltip");
      button.setAttribute("data-bs-placement", "left");
      button.setAttribute("data-bs-title", "Copied!");
      tooltip = new bootstrap.Tooltip(button, 
        { trigger: "manual", 
          customClass: "code-copy-button-tooltip",
          offset: [0, -8]});
      tooltip.show();    
    }
    setTimeout(function() {
      if (tooltip) {
        tooltip.hide();
        button.removeAttribute("data-bs-title");
        button.removeAttribute("data-bs-toggle");
        button.removeAttribute("data-bs-placement");
      }
      button.setAttribute("title", currentTitle);
      button.classList.remove('code-copy-button-checked');
    }, 1000);
    // clear code selection
    e.clearSelection();
  }
  const getTextToCopy = function(trigger) {
      const codeEl = trigger.previousElementSibling.cloneNode(true);
      for (const childEl of codeEl.children) {
        if (isCodeAnnotation(childEl)) {
          childEl.remove();
        }
      }
      return codeEl.innerText;
  }
  const clipboard = new window.ClipboardJS('.code-copy-button:not([data-in-quarto-modal])', {
    text: getTextToCopy
  });
  clipboard.on('success', onCopySuccess);
  if (window.document.getElementById('quarto-embedded-source-code-modal')) {
    // For code content inside modals, clipBoardJS needs to be initialized with a container option
    // TODO: Check when it could be a function (https://github.com/zenorocha/clipboard.js/issues/860)
    const clipboardModal = new window.ClipboardJS('.code-copy-button[data-in-quarto-modal]', {
      text: getTextToCopy,
      container: window.document.getElementById('quarto-embedded-source-code-modal')
    });
    clipboardModal.on('success', onCopySuccess);
  }
    var localhostRegex = new RegExp(/^(?:http|https):\/\/localhost\:?[0-9]*\//);
    var mailtoRegex = new RegExp(/^mailto:/);
      var filterRegex = new RegExp('/' + window.location.host + '/');
    var isInternal = (href) => {
        return filterRegex.test(href) || localhostRegex.test(href) || mailtoRegex.test(href);
    }
    // Inspect non-navigation links and adorn them if external
 	var links = window.document.querySelectorAll('a[href]:not(.nav-link):not(.navbar-brand):not(.toc-action):not(.sidebar-link):not(.sidebar-item-toggle):not(.pagination-link):not(.no-external):not([aria-hidden]):not(.dropdown-item):not(.quarto-navigation-tool):not(.about-link)');
    for (var i=0; i<links.length; i++) {
      const link = links[i];
      if (!isInternal(link.href)) {
        // undo the damage that might have been done by quarto-nav.js in the case of
        // links that we want to consider external
        if (link.dataset.originalHref !== undefined) {
          link.href = link.dataset.originalHref;
        }
      }
    }
  function tippyHover(el, contentFn, onTriggerFn, onUntriggerFn) {
    const config = {
      allowHTML: true,
      maxWidth: 500,
      delay: 100,
      arrow: false,
      appendTo: function(el) {
          return el.parentElement;
      },
      interactive: true,
      interactiveBorder: 10,
      theme: 'quarto',
      placement: 'bottom-start',
    };
    if (contentFn) {
      config.content = contentFn;
    }
    if (onTriggerFn) {
      config.onTrigger = onTriggerFn;
    }
    if (onUntriggerFn) {
      config.onUntrigger = onUntriggerFn;
    }
    window.tippy(el, config); 
  }
  const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
  for (var i=0; i<noterefs.length; i++) {
    const ref = noterefs[i];
    tippyHover(ref, function() {
      // use id or data attribute instead here
      let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
      try { href = new URL(href).hash; } catch {}
      const id = href.replace(/^#\/?/, "");
      const note = window.document.getElementById(id);
      if (note) {
        return note.innerHTML;
      } else {
        return "";
      }
    });
  }
  const xrefs = window.document.querySelectorAll('a.quarto-xref');
  const processXRef = (id, note) => {
    // Strip column container classes
    const stripColumnClz = (el) => {
      el.classList.remove("page-full", "page-columns");
      if (el.children) {
        for (const child of el.children) {
          stripColumnClz(child);
        }
      }
    }
    stripColumnClz(note)
    if (id === null || id.startsWith('sec-')) {
      // Special case sections, only their first couple elements
      const container = document.createElement("div");
      if (note.children && note.children.length > 2) {
        container.appendChild(note.children[0].cloneNode(true));
        for (let i = 1; i < note.children.length; i++) {
          const child = note.children[i];
          if (child.tagName === "P" && child.innerText === "") {
            continue;
          } else {
            container.appendChild(child.cloneNode(true));
            break;
          }
        }
        if (window.Quarto?.typesetMath) {
          window.Quarto.typesetMath(container);
        }
        return container.innerHTML
      } else {
        if (window.Quarto?.typesetMath) {
          window.Quarto.typesetMath(note);
        }
        return note.innerHTML;
      }
    } else {
      // Remove any anchor links if they are present
      const anchorLink = note.querySelector('a.anchorjs-link');
      if (anchorLink) {
        anchorLink.remove();
      }
      if (window.Quarto?.typesetMath) {
        window.Quarto.typesetMath(note);
      }
      // TODO in 1.5, we should make sure this works without a callout special case
      if (note.classList.contains("callout")) {
        return note.outerHTML;
      } else {
        return note.innerHTML;
      }
    }
  }
  for (var i=0; i<xrefs.length; i++) {
    const xref = xrefs[i];
    tippyHover(xref, undefined, function(instance) {
      instance.disable();
      let url = xref.getAttribute('href');
      let hash = undefined; 
      if (url.startsWith('#')) {
        hash = url;
      } else {
        try { hash = new URL(url).hash; } catch {}
      }
      if (hash) {
        const id = hash.replace(/^#\/?/, "");
        const note = window.document.getElementById(id);
        if (note !== null) {
          try {
            const html = processXRef(id, note.cloneNode(true));
            instance.setContent(html);
          } finally {
            instance.enable();
            instance.show();
          }
        } else {
          // See if we can fetch this
          fetch(url.split('#')[0])
          .then(res => res.text())
          .then(html => {
            const parser = new DOMParser();
            const htmlDoc = parser.parseFromString(html, "text/html");
            const note = htmlDoc.getElementById(id);
            if (note !== null) {
              const html = processXRef(id, note);
              instance.setContent(html);
            } 
          }).finally(() => {
            instance.enable();
            instance.show();
          });
        }
      } else {
        // See if we can fetch a full url (with no hash to target)
        // This is a special case and we should probably do some content thinning / targeting
        fetch(url)
        .then(res => res.text())
        .then(html => {
          const parser = new DOMParser();
          const htmlDoc = parser.parseFromString(html, "text/html");
          const note = htmlDoc.querySelector('main.content');
          if (note !== null) {
            // This should only happen for chapter cross references
            // (since there is no id in the URL)
            // remove the first header
            if (note.children.length > 0 && note.children[0].tagName === "HEADER") {
              note.children[0].remove();
            }
            const html = processXRef(null, note);
            instance.setContent(html);
          } 
        }).finally(() => {
          instance.enable();
          instance.show();
        });
      }
    }, function(instance) {
    });
  }
      let selectedAnnoteEl;
      const selectorForAnnotation = ( cell, annotation) => {
        let cellAttr = 'data-code-cell="' + cell + '"';
        let lineAttr = 'data-code-annotation="' +  annotation + '"';
        const selector = 'span[' + cellAttr + '][' + lineAttr + ']';
        return selector;
      }
      const selectCodeLines = (annoteEl) => {
        const doc = window.document;
        const targetCell = annoteEl.getAttribute("data-target-cell");
        const targetAnnotation = annoteEl.getAttribute("data-target-annotation");
        const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation));
        const lines = annoteSpan.getAttribute("data-code-lines").split(",");
        const lineIds = lines.map((line) => {
          return targetCell + "-" + line;
        })
        let top = null;
        let height = null;
        let parent = null;
        if (lineIds.length > 0) {
            //compute the position of the single el (top and bottom and make a div)
            const el = window.document.getElementById(lineIds[0]);
            top = el.offsetTop;
            height = el.offsetHeight;
            parent = el.parentElement.parentElement;
          if (lineIds.length > 1) {
            const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]);
            const bottom = lastEl.offsetTop + lastEl.offsetHeight;
            height = bottom - top;
          }
          if (top !== null && height !== null && parent !== null) {
            // cook up a div (if necessary) and position it 
            let div = window.document.getElementById("code-annotation-line-highlight");
            if (div === null) {
              div = window.document.createElement("div");
              div.setAttribute("id", "code-annotation-line-highlight");
              div.style.position = 'absolute';
              parent.appendChild(div);
            }
            div.style.top = top - 2 + "px";
            div.style.height = height + 4 + "px";
            div.style.left = 0;
            let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter");
            if (gutterDiv === null) {
              gutterDiv = window.document.createElement("div");
              gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter");
              gutterDiv.style.position = 'absolute';
              const codeCell = window.document.getElementById(targetCell);
              const gutter = codeCell.querySelector('.code-annotation-gutter');
              gutter.appendChild(gutterDiv);
            }
            gutterDiv.style.top = top - 2 + "px";
            gutterDiv.style.height = height + 4 + "px";
          }
          selectedAnnoteEl = annoteEl;
        }
      };
      const unselectCodeLines = () => {
        const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"];
        elementsIds.forEach((elId) => {
          const div = window.document.getElementById(elId);
          if (div) {
            div.remove();
          }
        });
        selectedAnnoteEl = undefined;
      };
        // Handle positioning of the toggle
    window.addEventListener(
      "resize",
      throttle(() => {
        elRect = undefined;
        if (selectedAnnoteEl) {
          selectCodeLines(selectedAnnoteEl);
        }
      }, 10)
    );
    function throttle(fn, ms) {
    let throttle = false;
    let timer;
      return (...args) => {
        if(!throttle) { // first call gets through
            fn.apply(this, args);
            throttle = true;
        } else { // all the others get throttled
            if(timer) clearTimeout(timer); // cancel #2
            timer = setTimeout(() => {
              fn.apply(this, args);
              timer = throttle = false;
            }, ms);
        }
      };
    }
      // Attach click handler to the DT
      const annoteDls = window.document.querySelectorAll('dt[data-target-cell]');
      for (const annoteDlNode of annoteDls) {
        annoteDlNode.addEventListener('click', (event) => {
          const clickedEl = event.target;
          if (clickedEl !== selectedAnnoteEl) {
            unselectCodeLines();
            const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active');
            if (activeEl) {
              activeEl.classList.remove('code-annotation-active');
            }
            selectCodeLines(clickedEl);
            clickedEl.classList.add('code-annotation-active');
          } else {
            // Unselect the line
            unselectCodeLines();
            clickedEl.classList.remove('code-annotation-active');
          }
        });
      }
  const findCites = (el) => {
    const parentEl = el.parentElement;
    if (parentEl) {
      const cites = parentEl.dataset.cites;
      if (cites) {
        return {
          el,
          cites: cites.split(' ')
        };
      } else {
        return findCites(el.parentElement)
      }
    } else {
      return undefined;
    }
  };
  var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
  for (var i=0; i<bibliorefs.length; i++) {
    const ref = bibliorefs[i];
    const citeInfo = findCites(ref);
    if (citeInfo) {
      tippyHover(citeInfo.el, function() {
        var popup = window.document.createElement('div');
        citeInfo.cites.forEach(function(cite) {
          var citeDiv = window.document.createElement('div');
          citeDiv.classList.add('hanging-indent');
          citeDiv.classList.add('csl-entry');
          var biblioDiv = window.document.getElementById('ref-' + cite);
          if (biblioDiv) {
            citeDiv.innerHTML = biblioDiv.innerHTML;
          }
          popup.appendChild(citeDiv);
        });
        return popup.innerHTML;
      });
    }
  }
});
</script>
<nav class="page-navigation">
  <div class="nav-page nav-page-previous">
      <a href="./framing_questions.html" class="pagination-link" aria-label="Framing Statistical Questions">
        <i class="bi bi-arrow-left-short"></i> <span class="nav-page-text"><span class="chapter-number">20</span>&nbsp; <span class="chapter-title">Framing Statistical Questions</span></span>
      </a>          
  </div>
  <div class="nav-page nav-page-next">
      <a href="./significance.html" class="pagination-link" aria-label="The Concept of Statistical Significance in Testing Hypotheses">
        <span class="nav-page-text"><span class="chapter-number">22</span>&nbsp; <span class="chapter-title">The Concept of Statistical Significance in Testing Hypotheses</span></span> <i class="bi bi-arrow-right-short"></i>
      </a>
  </div>
</nav>
</div> <!-- /content -->


</body></html>