-
Notifications
You must be signed in to change notification settings - Fork 0
/
what_is_probability.html
1181 lines (1148 loc) · 109 KB
/
what_is_probability.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
<meta charset="utf-8">
<meta name="generator" content="quarto-1.6.1">
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
<title>3 What is probability? – Resampling statistics</title>
<style>
code{white-space: pre-wrap;}
span.smallcaps{font-variant: small-caps;}
div.columns{display: flex; gap: min(4vw, 1.5em);}
div.column{flex: auto; overflow-x: auto;}
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
ul.task-list{list-style: none;}
ul.task-list li input[type="checkbox"] {
width: 0.8em;
margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */
vertical-align: middle;
}
/* CSS for citations */
div.csl-bib-body { }
div.csl-entry {
clear: both;
margin-bottom: 0em;
}
.hanging-indent div.csl-entry {
margin-left:2em;
text-indent:-2em;
}
div.csl-left-margin {
min-width:2em;
float:left;
}
div.csl-right-inline {
margin-left:2em;
padding-left:1em;
}
div.csl-indent {
margin-left: 2em;
}</style>
<script src="site_libs/quarto-nav/quarto-nav.js"></script>
<script src="site_libs/quarto-nav/headroom.min.js"></script>
<script src="site_libs/clipboard/clipboard.min.js"></script>
<script src="site_libs/quarto-search/autocomplete.umd.js"></script>
<script src="site_libs/quarto-search/fuse.min.js"></script>
<script src="site_libs/quarto-search/quarto-search.js"></script>
<meta name="quarto:offset" content="./">
<link href="./about_technology.html" rel="next">
<link href="./resampling_method.html" rel="prev">
<script src="site_libs/quarto-html/quarto.js"></script>
<script src="site_libs/quarto-html/popper.min.js"></script>
<script src="site_libs/quarto-html/tippy.umd.min.js"></script>
<script src="site_libs/quarto-html/anchor.min.js"></script>
<link href="site_libs/quarto-html/tippy.css" rel="stylesheet">
<link href="site_libs/quarto-html/quarto-syntax-highlighting.css" rel="stylesheet" id="quarto-text-highlighting-styles">
<script src="site_libs/bootstrap/bootstrap.min.js"></script>
<link href="site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
<link href="site_libs/bootstrap/bootstrap.min.css" rel="stylesheet" id="quarto-bootstrap" data-mode="light">
<script id="quarto-search-options" type="application/json">{
"location": "sidebar",
"copy-button": false,
"collapse-after": 3,
"panel-placement": "start",
"type": "textbox",
"limit": 50,
"keyboard-shortcut": [
"f",
"/",
"s"
],
"show-item-context": false,
"language": {
"search-no-results-text": "No results",
"search-matching-documents-text": "matching documents",
"search-copy-link-title": "Copy link to search",
"search-hide-matches-text": "Hide additional matches",
"search-more-match-text": "more match in this document",
"search-more-matches-text": "more matches in this document",
"search-clear-button-title": "Clear",
"search-text-placeholder": "",
"search-detached-cancel-button-title": "Cancel",
"search-submit-button-title": "Submit",
"search-label": "Search"
}
}</script>
<script type="text/javascript">
$(document).ready(function() {
$("table").addClass('lightable-paper lightable-striped lightable-hover')
});
</script>
<script src="https://cdnjs.cloudflare.com/polyfill/v3/polyfill.min.js?features=es6"></script>
<script src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-chtml-full.js" type="text/javascript"></script>
<script type="text/javascript">
const typesetMath = (el) => {
if (window.MathJax) {
// MathJax Typeset
window.MathJax.typeset([el]);
} else if (window.katex) {
// KaTeX Render
var mathElements = el.getElementsByClassName("math");
var macros = [];
for (var i = 0; i < mathElements.length; i++) {
var texText = mathElements[i].firstChild;
if (mathElements[i].tagName == "SPAN") {
window.katex.render(texText.data, mathElements[i], {
displayMode: mathElements[i].classList.contains('display'),
throwOnError: false,
macros: macros,
fleqn: false
});
}
}
}
}
window.Quarto = {
typesetMath
};
</script>
<link rel="stylesheet" href="style.css">
<link rel="stylesheet" href="font-awesome.min.css">
</head>
<body class="nav-sidebar floating">
<div id="quarto-search-results"></div>
<header id="quarto-header" class="headroom fixed-top">
<nav class="quarto-secondary-nav">
<div class="container-fluid d-flex">
<button type="button" class="quarto-btn-toggle btn" data-bs-toggle="collapse" role="button" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
<i class="bi bi-layout-text-sidebar-reverse"></i>
</button>
<nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="./what_is_probability.html"><span class="chapter-number">3</span> <span class="chapter-title">What is probability?</span></a></li></ol></nav>
<a class="flex-grow-1" role="navigation" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
</a>
<button type="button" class="btn quarto-search-button" aria-label="Search" onclick="window.quartoOpenSearch();">
<i class="bi bi-search"></i>
</button>
</div>
</nav>
</header>
<!-- content -->
<div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article">
<!-- sidebar -->
<nav id="quarto-sidebar" class="sidebar collapse collapse-horizontal quarto-sidebar-collapse-item sidebar-navigation floating overflow-auto">
<div class="pt-lg-2 mt-2 text-left sidebar-header">
<div class="sidebar-title mb-0 py-0">
<a href="./">Resampling statistics</a>
</div>
</div>
<div class="mt-2 flex-shrink-0 align-items-center">
<div class="sidebar-search">
<div id="quarto-search" class="" title="Search"></div>
</div>
</div>
<div class="sidebar-menu-container">
<ul class="list-unstyled mt-1">
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./index.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Python version</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./preface_third.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Preface to the third edition</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./preface_second.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Preface to the second edition</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./intro.html" class="sidebar-item-text sidebar-link">
<span class="menu-text"><span class="chapter-number">1</span> <span class="chapter-title">Introduction</span></span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./resampling_method.html" class="sidebar-item-text sidebar-link">
<span class="menu-text"><span class="chapter-number">2</span> <span class="chapter-title">The resampling method</span></span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./what_is_probability.html" class="sidebar-item-text sidebar-link active">
<span class="menu-text"><span class="chapter-number">3</span> <span class="chapter-title">What is probability?</span></span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./about_technology.html" class="sidebar-item-text sidebar-link">
<span class="menu-text"><span class="chapter-number">4</span> <span class="chapter-title">Introducing Python and the Jupyter notebook</span></span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./resampling_with_code.html" class="sidebar-item-text sidebar-link">
<span class="menu-text"><span class="chapter-number">5</span> <span class="chapter-title">Resampling with code</span></span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./resampling_with_code2.html" class="sidebar-item-text sidebar-link">
<span class="menu-text"><span class="chapter-number">6</span> <span class="chapter-title">More resampling with code</span></span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./sampling_tools.html" class="sidebar-item-text sidebar-link">
<span class="menu-text"><span class="chapter-number">7</span> <span class="chapter-title">Tools for samples and sampling</span></span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./probability_theory_1a.html" class="sidebar-item-text sidebar-link">
<span class="menu-text"><span class="chapter-number">8</span> <span class="chapter-title">Probability Theory, Part 1</span></span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./probability_theory_1b.html" class="sidebar-item-text sidebar-link">
<span class="menu-text"><span class="chapter-number">9</span> <span class="chapter-title">Probability Theory Part I (continued)</span></span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./more_sampling_tools.html" class="sidebar-item-text sidebar-link">
<span class="menu-text"><span class="chapter-number">10</span> <span class="chapter-title">Two puzzles and more tools</span></span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./probability_theory_2_compound.html" class="sidebar-item-text sidebar-link">
<span class="menu-text"><span class="chapter-number">11</span> <span class="chapter-title">Probability Theory, Part 2: Compound Probability</span></span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./probability_theory_3.html" class="sidebar-item-text sidebar-link">
<span class="menu-text"><span class="chapter-number">12</span> <span class="chapter-title">Probability Theory, Part 3</span></span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./probability_theory_4_finite.html" class="sidebar-item-text sidebar-link">
<span class="menu-text"><span class="chapter-number">13</span> <span class="chapter-title">Probability Theory, Part 4: Estimating Probabilities from Finite Universes</span></span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./sampling_variability.html" class="sidebar-item-text sidebar-link">
<span class="menu-text"><span class="chapter-number">14</span> <span class="chapter-title">On Variability in Sampling</span></span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./monte_carlo.html" class="sidebar-item-text sidebar-link">
<span class="menu-text"><span class="chapter-number">15</span> <span class="chapter-title">The Procedures of Monte Carlo Simulation (and Resampling)</span></span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./standard_scores.html" class="sidebar-item-text sidebar-link">
<span class="menu-text"><span class="chapter-number">16</span> <span class="chapter-title">Ranks, Quantiles and Standard Scores</span></span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./inference_ideas.html" class="sidebar-item-text sidebar-link">
<span class="menu-text"><span class="chapter-number">17</span> <span class="chapter-title">The Basic Ideas in Statistical Inference</span></span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./inference_intro.html" class="sidebar-item-text sidebar-link">
<span class="menu-text"><span class="chapter-number">18</span> <span class="chapter-title">Introduction to Statistical Inference</span></span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./point_estimation.html" class="sidebar-item-text sidebar-link">
<span class="menu-text"><span class="chapter-number">19</span> <span class="chapter-title">Point Estimation</span></span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./framing_questions.html" class="sidebar-item-text sidebar-link">
<span class="menu-text"><span class="chapter-number">20</span> <span class="chapter-title">Framing Statistical Questions</span></span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./testing_counts_1.html" class="sidebar-item-text sidebar-link">
<span class="menu-text"><span class="chapter-number">21</span> <span class="chapter-title">Hypothesis-Testing with Counted Data, Part 1</span></span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./significance.html" class="sidebar-item-text sidebar-link">
<span class="menu-text"><span class="chapter-number">22</span> <span class="chapter-title">The Concept of Statistical Significance in Testing Hypotheses</span></span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./testing_counts_2.html" class="sidebar-item-text sidebar-link">
<span class="menu-text"><span class="chapter-number">23</span> <span class="chapter-title">The Statistics of Hypothesis-Testing with Counted Data, Part 2</span></span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./testing_measured.html" class="sidebar-item-text sidebar-link">
<span class="menu-text"><span class="chapter-number">24</span> <span class="chapter-title">The Statistics of Hypothesis-Testing With Measured Data</span></span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./testing_procedures.html" class="sidebar-item-text sidebar-link">
<span class="menu-text"><span class="chapter-number">25</span> <span class="chapter-title">General Procedures for Testing Hypotheses</span></span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./confidence_1.html" class="sidebar-item-text sidebar-link">
<span class="menu-text"><span class="chapter-number">26</span> <span class="chapter-title">Confidence Intervals, Part 1: Assessing the Accuracy of Samples</span></span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./confidence_2.html" class="sidebar-item-text sidebar-link">
<span class="menu-text"><span class="chapter-number">27</span> <span class="chapter-title">Confidence Intervals, Part 2: The Two Approaches to Estimating Confidence Intervals</span></span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./reliability_average.html" class="sidebar-item-text sidebar-link">
<span class="menu-text"><span class="chapter-number">28</span> <span class="chapter-title">Some Last Words About the Reliability of Sample Averages</span></span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./correlation_causation.html" class="sidebar-item-text sidebar-link">
<span class="menu-text"><span class="chapter-number">29</span> <span class="chapter-title">Correlation and Causation</span></span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./how_big_sample.html" class="sidebar-item-text sidebar-link">
<span class="menu-text"><span class="chapter-number">30</span> <span class="chapter-title">How Large a Sample?</span></span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./bayes_simulation.html" class="sidebar-item-text sidebar-link">
<span class="menu-text"><span class="chapter-number">31</span> <span class="chapter-title">Bayesian Analysis by Simulation</span></span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./references.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">References</span></a>
</div>
</li>
<li class="sidebar-item sidebar-item-section">
<div class="sidebar-item-container">
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" role="navigation" aria-expanded="true">
<span class="menu-text">Appendices</span></a>
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" role="navigation" aria-expanded="true" aria-label="Toggle section">
<i class="bi bi-chevron-right ms-2"></i>
</a>
</div>
<ul id="quarto-sidebar-section-1" class="collapse list-unstyled sidebar-section depth1 show">
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./exercise_solutions.html" class="sidebar-item-text sidebar-link">
<span class="menu-text"><span class="chapter-number">A</span> <span class="chapter-title">Exercise Solutions</span></span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./technical_note.html" class="sidebar-item-text sidebar-link">
<span class="menu-text"><span class="chapter-number">B</span> <span class="chapter-title">Technical Note to the Professional Reader</span></span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./acknowlegements.html" class="sidebar-item-text sidebar-link">
<span class="menu-text"><span class="chapter-number">C</span> <span class="chapter-title">Acknowledgements</span></span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./code_topics.html" class="sidebar-item-text sidebar-link">
<span class="menu-text"><span class="chapter-number">D</span> <span class="chapter-title">Code topics</span></span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./errors_suggestions.html" class="sidebar-item-text sidebar-link">
<span class="menu-text"><span class="chapter-number">E</span> <span class="chapter-title">Errors and suggestions</span></span></a>
</div>
</li>
</ul>
</li>
</ul>
</div>
</nav>
<div id="quarto-sidebar-glass" class="quarto-sidebar-collapse-item" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item"></div>
<!-- margin-sidebar -->
<div id="quarto-margin-sidebar" class="sidebar margin-sidebar">
<nav id="TOC" role="doc-toc" class="toc-active">
<h2 id="toc-title">Table of contents</h2>
<ul>
<li><a href="#introduction" id="toc-introduction" class="nav-link active" data-scroll-target="#introduction"><span class="header-section-number">3.1</span> Introduction</a></li>
<li><a href="#the-meaning-of-probability" id="toc-the-meaning-of-probability" class="nav-link" data-scroll-target="#the-meaning-of-probability"><span class="header-section-number">3.2</span> The “Meaning” of “Probability”</a></li>
<li><a href="#the-nature-and-meaning-of-the-concept-of-probability" id="toc-the-nature-and-meaning-of-the-concept-of-probability" class="nav-link" data-scroll-target="#the-nature-and-meaning-of-the-concept-of-probability"><span class="header-section-number">3.3</span> The nature and meaning of the concept of probability</a></li>
<li><a href="#back-to-proxies" id="toc-back-to-proxies" class="nav-link" data-scroll-target="#back-to-proxies"><span class="header-section-number">3.4</span> Back to Proxies</a></li>
<li><a href="#sec-probability-ways" id="toc-sec-probability-ways" class="nav-link" data-scroll-target="#sec-probability-ways"><span class="header-section-number">3.5</span> The various ways of estimating probabilities</a></li>
<li><a href="#the-relationship-of-probability-to-other-magnitudes" id="toc-the-relationship-of-probability-to-other-magnitudes" class="nav-link" data-scroll-target="#the-relationship-of-probability-to-other-magnitudes"><span class="header-section-number">3.6</span> The relationship of probability to other magnitudes</a></li>
<li><a href="#what-is-chance" id="toc-what-is-chance" class="nav-link" data-scroll-target="#what-is-chance"><span class="header-section-number">3.7</span> What is “chance”?</a></li>
<li><a href="#sec-what-is-chance" id="toc-sec-what-is-chance" class="nav-link" data-scroll-target="#sec-what-is-chance"><span class="header-section-number">3.8</span> What Do We Mean by “Random”?</a></li>
<li><a href="#randomness-from-the-computer" id="toc-randomness-from-the-computer" class="nav-link" data-scroll-target="#randomness-from-the-computer"><span class="header-section-number">3.9</span> Randomness from the computer</a></li>
<li><a href="#the-philosophers-dispute-about-the-concept-of-probability" id="toc-the-philosophers-dispute-about-the-concept-of-probability" class="nav-link" data-scroll-target="#the-philosophers-dispute-about-the-concept-of-probability"><span class="header-section-number">3.10</span> The philosophers’ dispute about the concept of probability</a></li>
<li><a href="#the-relationship-of-probability-to-the-concept-of-resampling" id="toc-the-relationship-of-probability-to-the-concept-of-resampling" class="nav-link" data-scroll-target="#the-relationship-of-probability-to-the-concept-of-resampling"><span class="header-section-number">3.11</span> The relationship of probability to the concept of resampling</a></li>
<li><a href="#conclusion" id="toc-conclusion" class="nav-link" data-scroll-target="#conclusion"><span class="header-section-number">3.12</span> Conclusion</a></li>
</ul>
</nav>
</div>
<!-- main -->
<main class="content" id="quarto-document-content">
<header id="title-block-header" class="quarto-title-block default">
<div class="quarto-title">
<h1 class="title"><span id="sec-what-is-probability" class="quarto-section-identifier"><span class="chapter-number">3</span> <span class="chapter-title">What is probability?</span></span></h1>
</div>
<div class="quarto-title-meta">
</div>
</header>
<blockquote class="blockquote">
<p>Uncertainty, in the presence of vivid hopes and fears, is painful, but must be endured if we wish to live without the support of comforting fairy tales.” — Bertrand Russell <span class="citation" data-cites="russell1945history">(<a href="references.html#ref-russell1945history" role="doc-biblioref">1945</a> p. <em>xiv</em>)</span>.</p>
</blockquote>
<section id="introduction" class="level2" data-number="3.1">
<h2 data-number="3.1" class="anchored" data-anchor-id="introduction"><span class="header-section-number">3.1</span> Introduction</h2>
<p>The central concept for dealing with uncertainty is probability. Hence we must inquire into the “meaning” of the term probability. (The term “meaning” is in quotes because it can be a confusing word.)</p>
<p>You have been using the notion of probability all your life when drawing conclusions about what you expect to happen, and in reaching decisions in your public and personal lives.</p>
<p>You wonder: Will the kick from the 45 yard line go through the uprights? How much oil can you expect from the next well you drill, and what value should you assign to that prospect? Will you make money if you invest in tech stocks for the medium term, or should you spread your investments across the stock market? Will the next Space-X launch end in disaster? Your answers to these questions rest on the probabilities you estimate.</p>
<p>And you act on the basis of probabilities: You pay extra for an low-interest loan, if you think that interest rates are going to go up. You bet heavily on a poker hand if there is a high probability that you have the best hand. A hospital decides not to buy another ambulance when the administrator judges that there is a low probability that all the other ambulances will ever be in use at once. NASA decides whether or not to send off the space shuttle this morning as scheduled.</p>
<p>The idea of probability is essential when we reason about uncertainty, and so this chapter discusses what is meant by such key terms as “probability,” “chance”, “sample,” and “universe.” It discusses the nature and the usefulness of the concept of probability as used in this book, and it touches on the source of basic estimates of probability that are the raw material of statistical inferences.</p>
</section>
<section id="the-meaning-of-probability" class="level2" data-number="3.2">
<h2 data-number="3.2" class="anchored" data-anchor-id="the-meaning-of-probability"><span class="header-section-number">3.2</span> The “Meaning” of “Probability”</h2>
<p>Probability is difficult to define <span class="citation" data-cites="feller1968introduction">(<a href="references.html#ref-feller1968introduction" role="doc-biblioref">Feller 1968</a>)</span>, but here is a useful informal starting point:</p>
<blockquote class="blockquote">
<p>A probability is a number from 0 through 1 that reflects how likely it is that a particular event will happen.</p>
</blockquote>
<p>Any particular stated probability is an assertion that indicates how likely you believe it is that an event will occur.</p>
<p>If you give an event a probability of 0 you mean that you are certain it will <em>not</em> happen. If you give probability 1 to an event, you mean you are certain that it <em>will</em> happen. For example, if I give you one card from deck that you know contains only the standard 52 cards — before you look at the card, you can give probability 0 to the card being a joker, because you are certain the pack does not contain any joker cards. If I then select only the 14 spades from that deck, and give you a card from that selection, you will say there is probability 1 that the card is a black card, because all the spades are black cards.</p>
<p>A probability estimate of .2 indicates that you think there is twice as great a chance of the event happening as if you had estimated a probability of .1. This is the rock-bottom interpretation of the term “probability,” and the heart of the concept.</p>
<div class="callout callout-style-default callout-note callout-titled">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon"></i>
</div>
<div class="callout-title-container flex-fill">
Expressing probability
</div>
</div>
<div class="callout-body-container callout-body">
<p>A given probability may be expressed in terms of probability, odds, or chances, and I shall use all three terms to help familiarize you with them.</p>
<p>Let us say we think there is a probability of 0.1 that it will rain tomorrow.</p>
<p>We can restate this probability by saying there is a one in 10 <em>chance</em> that it will rain tomorrow (<span class="math inline">\(1 / 10 = 0.1\)</span>). Giving the <em>chances</em> as 1 in 10, or 2 in 20, or 10 in 100, is the same as saying the probability is 0.1.</p>
<p>If we multiply the probability by 100 we get the <em>percent chance</em> — another way of saying the probability. Here we have a <span class="math inline">\(0.1 * 100\)</span> = 10% chance of rain. We could also say that the chances of rain are 10 in 100.</p>
<p><em>Odds</em> are still another way of expressing probability. Here we think of our outcome of interest — a day <em>with rain</em> and compare it to our outcome that is not of interest — a day <em>without rain</em>. Our probability of 0.1 means that we expect one day <em>with rain</em> in every 10 days, and therefore, one day <em>with rain</em> for every nine days <em>without rain</em>. We can express the 0.1 probability of rain as <em>odds</em> 1 to 9 (of a rainy day), or 9 to 1 <em>against</em> a rainy day.</p>
<p>“Likelihood” is a term related to “probability” but is not a complete synonym for it — it has a specific and technical meaning in probability and statistics.</p>
</div>
</div>
<p>The idea of probability arises when you are not sure about what will happen in an uncertain situation. For example, you may lack information and therefore can only make an estimate. If someone asks you your name, you do not use the concept of probability to answer; you know the answer to a very high degree of surety. To be sure, there is some chance that you do not know your own name, but for all practical purposes you can be quite sure of the answer. If someone asks you who will win tomorrow’s baseball game, however, there is a considerable chance that you will be wrong no matter what you say. Whenever there is a reasonable chance that your prediction will be wrong, the concept of probability can help you.</p>
<p>The concept of probability helps you to answer the question, “How likely is it that…?” The purpose of the study of probability and statistics is to help you make sound appraisals of statements about the future, and good decisions based upon those appraisals. The concept of probability is especially useful when you have a sample from a larger set of data — a “universe” — and you want to know the probability of various degrees of likeness between the sample and the universe. (The universe of events you are sampling from is also called the “population,” a concept to be discussed below.) Perhaps the universe of your study is all high school graduates in 2018. You might then want to know, for example, the probability that the universe’s average SAT (university entrance) score will not differ from your sample’s average SAT by more than some arbitrary number of SAT points — say, ten points.</p>
<p>We have said that a probability statement is about the future. Well, usually. Occasionally you might state a probability about your future knowledge of past events — that is, “I think I’ll find out that…” — or even about the unknown past. (Historians use probabilities to measure their uncertainty about whether events occurred in the past, and the courts do, too, though the courts hesitate to say so explicitly.)</p>
<p>Sometimes one knows a probability, such as in the case of a gambler playing black on an honest roulette wheel, or an insurance company issuing a policy on an event with which it has had a lot of experience, such as a life insurance policy. But often one does not <em>know</em> the probability of a future event. Therefore, our concept of probability must include situations where extensive data are not available.</p>
<p>All of the many techniques used to estimate probabilities should be thought of as <em>proxies</em> for the actual probability. For example, if Mission Control at Space Central simulates what should and probably will happen in space if a valve is turned aboard a space craft just now being built, the test result on the ground is a proxy for the real probability of what will happen when the crew turn the valve in the planned mission.</p>
<p>In some cases, it is difficult to conceive of <em>any</em> data that can serve as a proxy. For example, the director of the CIA, Robert Gates, said in 1993 “that in May 1989, the CIA reported that the problems in the Soviet Union were so serious and the situation so volatile that Gorbachev had only a 50-50 chance of surviving the next three to four years unless he retreated from his reform policies” (<em>The Washington Post</em>, January 17, 1993, p. A42). Can such a statement be based on solid enough data to be more than a crude guess?</p>
<p>The conceptual probability in any specific situation is <em>an interpretation of all the evidence that is then available</em>. For example, a wise biomedical worker’s estimate of the chance that a given therapy will have a positive effect on a sick patient should be an interpretation of the results of not just one study in isolation, but of the results of that study plus everything else that is known about the disease and the therapy. A wise policymaker in business, government, or the military will base a probability estimate on a wide variety of information and knowledge. The same is even true of an insurance underwriter who bases a life-insurance or shipping-insurance rate not only on extensive tables of long-time experience but also on recent knowledge of other kinds. Each situation asks us to make a choice of the best method of estimating a probability — whether that estimate is <em>objective</em> — from a frequency series — or <em>subjective</em>, from the distillation of other experience.</p>
</section>
<section id="the-nature-and-meaning-of-the-concept-of-probability" class="level2" data-number="3.3">
<h2 data-number="3.3" class="anchored" data-anchor-id="the-nature-and-meaning-of-the-concept-of-probability"><span class="header-section-number">3.3</span> The nature and meaning of the concept of probability</h2>
<p>It is confusing and unnecessary to inquire what probability “really” is. (Indeed, the terms “really” and “is,” alone or in combination, are major sources of confusion in statistics and in other logical and scientific discussions, and it is often wise to avoid their use.) Various concepts of probability — which correspond to various common definitions of the term — are useful in particular contexts. This book contains many examples of the use of probability. Work with them will gradually develop a sound understanding of the concept.</p>
<p>There are two major concepts and points of view about probability — <em>frequency</em> and <em>degrees of belief</em>. Each is useful in some situations but not in others. Though they may seem incompatible in principle, there almost never is confusion about which is appropriate in a given situation.</p>
<ol type="1">
<li><p><em>Frequency</em>: The probability of an event can be said to be the proportion of times that the event has taken place in the past, usually based on a long series of trials. Insurance companies use this when they estimate the probability that a thirty-five-year-old teacher will die during a period for which he wants to buy an insurance policy. (Notice this shortcoming: Sometimes you must bet upon events that have never or only infrequently taken place before, and so you cannot reasonably reckon the proportion of times they occurred one way or the other in the past.)</p></li>
<li><p><em>Degree of belief</em>: The probability that an event will take place or that a statement is true can be said to correspond to the odds at which you would bet that the event will take place. (Notice a shortcoming of this concept: You might be willing to accept a five-dollar bet at 2-1 odds that your team will win the game, but you might be unwilling to bet a hundred dollars at the same odds.)</p></li>
</ol>
<p>See <span class="citation" data-cites="barnett1982comparative">(<a href="references.html#ref-barnett1982comparative" role="doc-biblioref">Barnett 1982, chap. 3</a>)</span> for an in-depth discussion of different approaches to probability.</p>
<p>The connection between gambling and immorality or vice troubles some people about gambling examples. On the other hand, the immediacy and consequences of the decisions that the gambler has to make give the subject a special tang. There are several reasons why statistics use so many gambling examples — and especially tossing coins, throwing dice, and playing cards:</p>
<ol type="1">
<li><em>Historical</em>: The theory of probability began with gambling examples of dice analyzed by Cardano, Galileo, and then by Pascal and Fermat.</li>
<li><em>Generality</em>: These examples are not related to any particular walk of life, and therefore they can be generalized to applications in any walk of life. Students in any field — business, medicine, science — can feel equally at home with gambling examples.</li>
<li><em>Sharpness</em>: These examples are particularly stark, and unencumbered by the baggage of particular walks of life or special uses.</li>
<li><em>Universality</em>: Many other texts use these same examples, and therefore the use of them connects up this book with the main body of writing about probability and statistics.</li>
</ol>
<p>Often we’ll begin with a gambling example and then consider an example in one of the professional fields — such as business and other decision-making activities, biostatistics and medicine, social science and natural science — and everyday living. People in one field often can benefit from examples in others; for example, medical students should understand the need for business decision-making in terms of medical practice, as well as the biostatistical examples. And social scientists should understand the decision-making aspects of statistics if they have any interest in the use of their work in public policy.</p>
</section>
<section id="back-to-proxies" class="level2" data-number="3.4">
<h2 data-number="3.4" class="anchored" data-anchor-id="back-to-proxies"><span class="header-section-number">3.4</span> Back to Proxies</h2>
<p>Example of a proxy: The “probability risk assessments” (PRAs) that are made for the chances of failures of nuclear power plants are based, not on long experience or even on laboratory experiment, but rather on theorizing of various kinds — using pieces of prior experience wherever possible, of course. A PRA can cost a nuclear facility many millions of dollars.</p>
<p>Another example: If a manager of a high-street store looks at the sales of a particular brand of smart watches in the last two Decembers, and on that basis guesses how likely it is that she will run out of stock if she orders 200 smart watches, then the last two years’ experience is serving as a proxy for future experience. If a sales manager just “intuits” that the odds are 3 to 1 (a probability of .75) that the main local competitor will not meet a price cut, then all her past experience summed into her intuition is a proxy for the probability that it will really happen. Whether any proxy is a good or bad one depends on the wisdom of the person choosing the proxy and making the probability estimates.</p>
<p>How does one estimate a probability in practice? This involves practical skills not very different from the practical skills required to estimate with accuracy the length of a golf shot, the number of carpenters you will need to build a house, or the time it will take you to walk to a friend’s house; we will consider elsewhere some ways to improve your practical skills in estimating probabilities. For now, let us simply categorize and consider in the next section various ways of estimating an ordinary garden variety of probability, which is called an “unconditional” probability.</p>
</section>
<section id="sec-probability-ways" class="level2" data-number="3.5">
<h2 data-number="3.5" class="anchored" data-anchor-id="sec-probability-ways"><span class="header-section-number">3.5</span> The various ways of estimating probabilities</h2>
<p>Consider the probability of drawing an even-numbered spade from a deck of poker cards (consider the queen as even and the jack and king as odd). Here are several general methods of estimation, where we define each method in terms of the operations we use to make the estimate:</p>
<ol type="1">
<li><p><strong>Experience.</strong></p>
<p>The first possible source for an estimate of the probability of drawing an even-numbered spade is the purely empirical method of <em>experience</em>. If you have watched card games casually from time to time, you might simply guess at the proportion of times you have seen even-numbered spades appear — say, “about 1 in 15” or “about 1 in 9” (which is almost correct) or something like that. (If you watch long enough you might come to estimate something like 6 in 52.)</p>
<p>General information and experience are also the source for estimating the probability that the sales of a particular brand of smart watch this December will be between 200 and 250, based on sales the last two Decembers; that your team will win the football game tomorrow; that war will break out next year; or that a United States astronaut will reach Mars before a Chinese astronaut. You simply put together all your relevant prior experience and knowledge, and then make an educated guess.</p>
<p>Observation of repeated events can help you estimate the probability that a machine will turn out a defective part or that a child can memorize four nonsense syllables correctly in one attempt. You watch repeated trials of similar events and record the results.</p>
<p>Data on the mortality rates for people of various ages in a particular country in a given decade are the basis for estimating the probabilities of death, which are then used by the actuaries of an insurance company to set life insurance rates. This is <em>systematized experience</em> — called a <em>frequency series</em>.</p>
<p>No frequency series can speak for itself in a perfectly objective manner. Many judgments inevitably enter into compiling every frequency series — deciding which frequency series to use for an estimate, choosing which part of the frequency series to use, and so on. For example, should the insurance company use only its records from last year, which will be too few to provide as much data as is preferable, or should it also use death records from years further back, when conditions were slightly different, together with data from other sources? (Of course, no two deaths — indeed, no events of any kind — are <em>exactly</em> the same. But under many circumstances they are <em>practically</em> the same, and science is only interested in such “practical” considerations.)</p>
<p>Given that we have to use judgment in probability estimates, the reader may prefer to talk about “degrees of belief” instead of probabilities. That’s fine, just as long as it is understood that we operate with degrees of belief in exactly the same way as we operate with probabilities; the two terms are working synonyms.</p>
<p>There is no <em>logical</em> difference between the sort of probability that the life insurance company estimates on the basis of its “frequency series” of past death rates, and the manager’s estimates of the sales of smart watches in December, based on sales in that month in the past two years.<a href="#fn1" class="footnote-ref" id="fnref1" role="doc-noteref"><sup>1</sup></a></p>
<p>The concept of a probability based on a frequency series can be rendered almost useless when all the observations are repetitions of a single magnitude — for example, the case of all successes and zero failures of space-shuttle launches prior to the Challenger shuttle tragedy in the 1980s; in those data alone there was almost no basis to estimate the probability of a shuttle failure. (Probabilists have made some rather peculiar attempts over the centuries to estimate probabilities from the length of a zero-defect time series — such as the fact that the sun has never failed to rise (foggy days aside!) — based on the undeniable fact that the longer such a series is, the smaller the probability of a failure; see e.g., <span class="citation" data-cites="whitworth1897dcc">(<a href="references.html#ref-whitworth1897dcc" role="doc-biblioref">Whitworth 1897, xix–xli</a>)</span>. However, one surely has more information on which to act when one has a long series of observations of the same magnitude rather than a short series).</p></li>
<li><p><strong>Simulated experience.</strong></p>
<p>A second possible source of probability estimates is empirical scientific investigation with repeated trials of the phenomenon. This is an empirical method even when the empirical trials are simulations. In the case of the even-numbered spades, the empirical scientific procedure is to shuffle the cards, deal one card, record whether or not the card is an even-number spade, replace the card, and repeat the steps a good many times. The proportions of times you observe an even-numbered spade come up is a probability estimate based on a frequency series.</p>
<p>You might reasonably ask why we do not just <em>count</em> the number of even-numbered spades in the deck of fifty-two cards — using the <em>sample space analysis</em> you see below. No reason at all. But that procedure would not work if you wanted to estimate the probability of a baseball batter getting a hit or a lighter producing flame.</p>
<p>Some varieties of poker are so complex that experiment is the only feasible way to estimate the probabilities a player needs to know.</p>
<p>The resampling approach to statistics produces estimates of most probabilities with this sort of experimental “Monte Carlo” method. More about this later.</p></li>
<li><p><strong>Sample space analysis and first principles.</strong></p>
<p>A third source of probability estimates is <em>counting the possibilities</em> — the quintessential theoretical method. For example, by examination of an ordinary die one can determine that there are six different numbers that can come up. One can then determine that the probability of getting (say) either a “1” <em>or</em> a “2,” on a single throw, is 2/6 = 1/3, because two among the six possibilities are “1” or “2.” One can similarly determine that there are two possibilities of getting a “1” <em>plus</em> a “6” out of thirty-six possibilities when rolling two dice, yielding a probability estimate of 2/36 = 1/18.</p>
<p>Estimating probabilities by counting the possibilities has two requirements: 1) that the possibilities all be known (and therefore limited), and few enough to be studied easily; and 2) that the probability of each particular possibility be known, for example, that the probabilities of all sides of the dice coming up are equal, that is, equal to 1/6.</p></li>
<li><p><strong>Mathematical shortcuts to sample-space analysis.</strong></p>
<p>A fourth source of probability estimates is <em>mathematical calculations</em>. (We will introduce some probability calculation rules in <a href="probability_theory_1b.html" class="quarto-xref"><span>Chapter 9</span></a>.) If one knows by other means that the probability of a spade is 1/4 and the probability of an even-numbered card is 6/13, one can use probability calculation rules to calculate that the probability of turning up an even-numbered spade is 6/52 (that is, 1/4 x 6/13). (This is <em>multiplication rule</em> introduced in <a href="probability_theory_1a.html#sec-multiplication-rule" class="quarto-xref"><span>Section 8.12</span></a>). If one knows that the probability of a spade is 1/4 and the probability of a heart is 1/4, one can then calculate that the probability of getting a heart <em>or</em> a spade is 1/2 (that is 1/4 + 1/4). (We are using the <em>addition rule</em> from <a href="probability_theory_1a.html#sec-addition-rule" class="quarto-xref"><span>Section 8.7</span></a>.) The point here is not the particular calculation procedures, which we will touch on later, but rather that one can often calculate the desired probability on the basis of already-known probabilities.</p>
<p>It is possible to estimate probabilities with mathematical calculation only if one knows <em>by other means</em> the probabilities of some related events. For example, there is no possible way of mathematically calculating that a child will memorize four nonsense syllables correctly in one attempt; empirical knowledge is necessary.</p></li>
<li><p><strong>Kitchen-sink methods.</strong></p>
<p>In addition to the above four categories of estimation procedures, the statistical imagination may produce estimates in still other ways such as a) the salesman’s seat-of-the-pants estimate of what the competition’s price will be next quarter, based on who-knows-what gossip, long-time acquaintance with the competitors, and so on, and b) the probability risk assessments (PRAs) that are made for the chances of failures of nuclear power plants based, not on long experience or even on laboratory experiment, but rather on theorizing of various kinds — using pieces of prior experience wherever possible, of course. Any of these methods may be a combination of theoretical and empirical methods.</p></li>
</ol>
<p>As an example of an organization struggling with kitchen-sink methods, consider the estimation of the probability of failure for the tragic flight of the Challenger shuttle, as described by the famous physicist Nobelist Richard Feynman. This is a very real case that includes just about every sort of complication that enters into estimating probabilities.</p>
<blockquote class="blockquote">
<p>…Mr. Ullian told us that 5 out of 127 rockets that he had looked at had failed — a rate of about 4 percent. He took that 4 percent and divided it by 4, because he assumed a manned flight would be safer than an unmanned one. He came out with about a 1 percent chance of failure, and that was enough to warrant the destruct charges.</p>
<p>But NASA [the space agency in charge] told Mr. Ullian that the probability of failure was more like 1 in <span class="math inline">\(10^5\)</span>.</p>
<p>I tried to make sense out of that number. “Did you say 1 in <span class="math inline">\(10^5\)</span>?”</p>
<p>“That’s right; 1 in 100,000.”</p>
<p>“That means you could fly the shuttle <em>every day</em> for an average of <em>300 years</em> between accidents — every day, one flight, for 300 years — which is obviously crazy!”</p>
<p>“Yes, I know,” said Mr. Ullian. “I moved my number up to 1 in 1000 to answer all of NASA’s claims — that they were much more careful with manned flights, that the typical rocket isn’t a valid comparison, etcetera.”</p>
<p>But then a new problem came up: the Jupiter probe, <em>Galileo</em>, was going to use a power supply that runs on heat generated by radioactivity. If the shuttle carrying <em>Galileo</em> failed, radioactivity could be spread over a large area. So the argument continued: NASA kept saying 1 in 100,000 and Mr. Ullian kept saying 1 in 1000, at best.</p>
<p>Mr. Ullian also told us about the problems he had in trying to talk to the man in charge, Mr. Kingsbury: he could get appointments with underlings, but he never could get through to Kingsbury and find out how NASA got its figure of 1 in 100,000 <span class="citation" data-cites="feynman1988what">(<a href="references.html#ref-feynman1988what" role="doc-biblioref">Feynman and Leighton 1988, 179–80</a>)</span>.</p>
</blockquote>
<p>Feynman tried to ascertain more about the origins of the figure of 1 in 100,000 that entered into NASA’s calculations. He performed an experiment with the engineers:</p>
<blockquote class="blockquote">
<p>…“Here’s a piece of paper each. Please write on your paper the answer to this question: what do you think is the probability that a flight would be uncompleted due to a failure in this engine?”</p>
<p>They write down their answers and hand in their papers. One guy wrote “99-44/100% pure” (copying the Ivory soap slogan), meaning about 1 in 200. Another guy wrote something very technical and highly quantitative in the standard statistical way, carefully defining everything, that I had to translate — which also meant about 1 in 200. The third guy wrote, simply, “1 in 300.”</p>
<p>Mr. Lovingood’s paper, however, said:</p>
<p>“Cannot quantify. Reliability is judged from:</p>
<ul>
<li>past experience</li>
<li>quality control in manufacturing</li>
<li>engineering judgment”</li>
</ul>
<p>“Well,” I said, “I’ve got four answers, and one of them weaseled.” I turned to Mr. Lovingood: “I think you weaseled.”</p>
<p>“I don’t think I weaseled.”</p>
<p>“You didn’t tell me <em>what</em> your confidence was, sir; you told me <em>how</em> you determined it. What I want to know is: after you determined it, what <em>was</em> it?”</p>
<p>He says, “100 percent” — the engineers’ jaws drop, my jaw drops; I look at him, everybody looks at him — “uh, uh, minus epsilon!”</p>
<p>So I say, “Well, yes; that’s fine. Now, the only problem is, WHAT IS EPSILON?”</p>
<p>He says, “<span class="math inline">\(10^-5\)</span>.” It was the same number that Mr. Ullian had told us about: 1 in 100,000.</p>
<p>I showed Mr. Lovingood the other answers and said, “You’ll be interested to know that there <em>is</em> a difference between engineers and management here — a factor of more than 300.”</p>
<p>He says, “Sir, I’ll be glad to send you the document that contains this estimate, so you can understand it.”</p>
<p>Later, Mr. Lovingood sent me that report. It said things like “The probability of mission success is necessarily very close to 1.0” — does that mean it <em>is</em> close to 1.0, or it <em>ought to be</em> close to 1.0? — and “Historically, this high degree of mission success has given rise to a difference in philosophy between unmanned and manned space flight programs; i.e., numerical probability versus engineering judgment.” As far as I can tell, “engineering judgment” means they’re just going to make up numbers! The probability of an engine-blade failure was given as a universal constant, as if all the blades were exactly the same, under the same conditions. The whole paper was quantifying everything. Just about every nut and bolt was in there: “The chance that a HPHTP pipe will burst is <span class="math inline">\(10^-7\)</span>.” You can’t estimate things like that; a probability of 1 in 10,000,000 is almost impossible to estimate. It was clear that the numbers for each part of the engine were chosen so that when you add everything together you get 1 in 100,000. <span class="citation" data-cites="feynman1988what">(<a href="references.html#ref-feynman1988what" role="doc-biblioref">Feynman and Leighton 1988, 182–83</a>)</span>.</p>
</blockquote>
<p>We see in the Challenger shuttle case very mixed kinds of inputs to actual estimates of probabilities. They include frequency series of past flights of other rockets, judgments about the relevance of experience with that different sort of rocket, adjustments for special temperature conditions (cold), and much much more. There also were complex computational processes in arriving at the probabilities that were made the basis for the launch decision. And most impressive of all, of course, are the extraordinary differences in estimates made by various persons (or perhaps we should talk of various statuses and roles) which make a mockery of the notion of objective estimation in this case.</p>
<p>Working with different sorts of estimation methods in different sorts of situations is not new; practical statisticians do so all the time. We argue that we should make no apology for doing so.</p>
<p>The concept of probability varies from one field of endeavor to another; it is different in the law, in science, and in business. The concept is most straightforward in decision-making situations such as business and gambling; there it is crystal-clear that one’s interest is entirely in making accurate predictions so as to advance the interests of oneself and one’s group. The concept is most difficult in social science, where there is considerable doubt about the aims and values of an investigation. In sum, one should not think of what a probability “is” but rather how best to estimate it. In practice, neither in actual decision-making situations nor in scientific work — nor in classes — do people experience difficulties estimating probabilities because of philosophical confusions. Only philosophers and mathematicians worry — and even they really do not <em>need</em> to worry — about the “meaning” of probability<a href="#fn2" class="footnote-ref" id="fnref2" role="doc-noteref"><sup>2</sup></a>.</p>
</section>
<section id="the-relationship-of-probability-to-other-magnitudes" class="level2" data-number="3.6">
<h2 data-number="3.6" class="anchored" data-anchor-id="the-relationship-of-probability-to-other-magnitudes"><span class="header-section-number">3.6</span> The relationship of probability to other magnitudes</h2>
<p>An important argument in favor of approaching the concept of probability as an <em>estimate</em> is that an estimate of a probability often (though not always) is the opposite side of the coin from an estimate of a physical quantity such as time or space.</p>
<p>For example, uncertainty about the probability that one will finish a task within 9 minutes is another way of labeling the uncertainty that the time required to finish the task will be less than 9 minutes. Hence, if estimation is appropriate for time in this case, it should be equally appropriate for probability. The same is true for the probability that the quantity of smart watches sold will be between 200 and 250 units.</p>
<p>Hence the concept of probability, and its estimation in any particular case, should be no more puzzling than is the “dual” concept of time or distance or quantities of smart watches. That is, lack of certainty about the probability that an event will occur is not different in nature from lack of certainty about the amount of time or distance in the event. There is no essential difference between whether a part 2 inches in length will be the next to emerge from the machine, or what the length of the next part will be, or the length of the part that just emerged (if it has not yet been measured).</p>
<p>The information available for the measurement of (say) the length of a car or the location of a star is exactly the same information that is available with respect to the concept of probability in those situations. That is, one may have ten disparate observations of a car’s length which then constitute a probability distribution, and the same for the altitude of a star in the heavens.</p>
<p>In a book of puzzles about probability <span class="citation" data-cites="mosteller1987fifty">(<a href="references.html#ref-mosteller1987fifty" role="doc-biblioref">Mosteller 1987</a>, problem 42)</span>, this problem appears: “If a stick is broken in two at random, what is the average length of the smaller piece?” This particular puzzle does not even mention probability explicitly, and no one would feel the need to write a scholarly treatise on the meaning of the word “length” here, any more than one would one do so if the question were about an astronomer’s average observation of the angle of a star at a given time or place, or the average height of boards cut by a carpenter, or the average size of a basketball team. Nor would one write a treatise about the “meaning” of “time” if a similar puzzle involved the average time between two bird calls. Yet a rephrasing of the problem reveals its tie to the concept of probability, to wit: What is the probability that the smaller piece will be (say) more than half the length of the larger piece? Or, what is the probability distribution of the sizes of the shorter piece?</p>
<p>The duality of the concepts of probability and physical entities also emerges in Whitworth’s discussion <span class="citation" data-cites="whitworth1897dcc">(<a href="references.html#ref-whitworth1897dcc" role="doc-biblioref">1897</a>)</span> of fair betting odds:</p>
<blockquote class="blockquote">
<p>…What sum ought you fairly give or take now, while the event is undetermined, in exchange for the assurance that you shall receive a stated sum (say $1,000) if the favourable event occur? The chance of receiving $1,000 is worth something. It is not as good as the certainty of receiving $1,000, and therefore it is worth less than $1,000. But the prospect or expectation or chance, however slight, is a commodity which may be bought and sold. It must have its price somewhere between zero and $1,000. (p. xix.)</p>
</blockquote>
<blockquote class="blockquote">
<p>…And the ratio of the expectation to the full sum to be received is what is called the chance of the favourable event. For instance, if we say that the chance is 1/5, it is equivalent to saying that $200 is the fair price of the contingent $1,000. (p. xx.)…</p>
</blockquote>
<blockquote class="blockquote">
<p>The fair price can sometimes be calculated mathematically from <em>a priori</em> considerations: sometimes it can be deduced from statistics, that is, from the recorded results of observation and experiment. Sometimes it can only be estimated generally, the estimate being founded on a limited knowledge or experience. If your expectation depends on the drawing of a ticket in a raffle, the fair price can be calculated from abstract considerations: if it depend upon your outliving another person, the fair price can be inferred from recorded statistics: if it depend upon a benefactor not revoking his will, the fair price depends upon the character of your benefactor, his habit of changing his mind, and other circumstances upon the knowledge of which you base your estimate. But if in any of these cases you determine that $300 is the sum which you ought fairly to accept for your prospect, this is equivalent to saying that your chance, whether calculated or estimated, is 3/10... (p. xx.)</p>
</blockquote>
<p>It is indubitable that along with frequency data, a wide variety of other information will affect the odds at which a reasonable person will bet. If the two concepts of probability stand on a similar footing here, why should they not be on a similar footing in <em>all</em> discussion of probability? I can think of no reason that they should not be so treated.</p>
<p>Scholars write about the “discovery” of the concept of probability in one century or another. But is it not likely that even in pre-history, when a fisherperson was asked how long the big fish was, s/he sometimes extended her/his arms and said, “About this long, but I’m not exactly sure,” and when a scout was asked how many of the enemy there were, s/he answered, “I don’t know for sure...probably about fifty.” The uncertainty implicit in these statements is the functional equivalent of probability statements. There simply is no need to make such heavy work of the probability concept as the philosophers and mathematicians and historians have done.</p>
</section>
<section id="what-is-chance" class="level2" data-number="3.7">
<h2 data-number="3.7" class="anchored" data-anchor-id="what-is-chance"><span class="header-section-number">3.7</span> What is “chance”?</h2>
<p>The study of probability focuses on events with randomness — that is, events about which there is uncertainty whether or not they will occur. And the uncertainty refers to your knowledge rather than to the event itself. For example, consider this physical illustration with a remote control. The remote control has a <em>front end</em> that should point at the TV that it controls, and a <em>back end</em> that will usually be pointing at me, the user of the remote control. Call the front — the <em>TV end</em>, and the back — <em>the sofa end</em> of the remote control.</p>
<p>I spin the remote control like a baton twirler. If I hold it at the sofa end and attempt to flip it so that it turns only half a revolution, I can be almost sure that I will correctly get the TV end and not the sofa end. And if I attempt to flip it a full revolution, again I can almost surely get the sofa end successfully. It is not a random event whether I catch the sofa end or the TV end (here ignoring those throws when I catch neither end) when doing only half a revolution or one revolution. The result is quite predictable in both these simple maneuvers so far.</p>
<p>When I say the result is “predictable,” I mean that you would not bet with me about whether this time I’ll get the TV or the sofa end. So we say that the outcome of my flip aiming at half a revolution is not “random.”</p>
<p>When I twirl the remote control so little, I <em>control</em> (almost completely) whether the sofa end or the TV end comes down to my hand; this is the same as saying that the outcome does not occur by chance.</p>
<p>The terms “random” and “chance” implicitly mean that you believe that I cannot control or cannot know in advance what will happen.</p>
<p>Whether this twirl will be the rare time I miss, however, <em>should</em> be considered chance. Though you would not bet at even odds on my catching the sofa end versus the TV end if there is to be only a half or one full revolution, you might bet — at (say) odds of 50 to 1 — that I will make a mistake and get it wrong, or drop it. So the very same flip can be seen as random or determined depending on what aspect of it we are looking at.</p>
<p>Of course you would not bet <em>against</em> me about my <em>not</em> making a mistake, because the bet might <em>cause</em> me to make a mistake purposely. This “moral hazard” is a problem that emerges when a person buys life insurance and may commit suicide, or when a boxer may lose a fight purposely. The people who stake money on those events say that such an outcome is “fixed” (a very appropriate word) and not random.</p>
<p>Now I attempt more difficult maneuvers with the remote control. I can do <span class="math inline">\(1\frac{1}{2}\)</span> flips pretty well, and two full revolutions with some success — maybe even <span class="math inline">\(2\frac{1}{2}\)</span> flips on a good day. But when I get much beyond that, I cannot determine very well whether I’ll get the sofa or the TV end. The outcome gradually becomes less and less predictable — that is, more and more random.</p>
<p>If I flip the remote control so that it revolves three or more times, I can hardly control the process at all, and hence I cannot predict well whether I’ll get the sofa end or the TV end. With 5 revolutions I have absolutely no control over the outcome; I cannot predict the outcome better than 50-50. At that point, getting the sofa end or the TV end has become a completely random event for our purposes, just like flipping a coin high in the air. So at that point we say that “chance” controls the outcome, though that word is just a synonym for my lack of ability to control and predict the outcome. “Chance” can be thought to stand for the myriad small factors that influence the outcome.</p>
<p>We see the same gradual increase in randomness with increasing numbers of shuffles of cards. After one shuffle, a skilled magician can know where every card is, and after two shuffles there is still much order that s/he can work with. But after (say) five shuffles, the magician no longer has any power to predict and control, and the outcome of any draw can then be thought of as random chance.</p>
<p>At what point do we say that the outcome is “random” or “pure chance” as to whether my hand will grasp the TV end, the sofa end, or at some other spot? <em>There is no sharp boundary to this transition.</em> Rather, the transition is gradual; this is the crucial idea, and one that I have not seen stated before.</p>
<p>Whether or not we refer to the outcome as random depends upon the twirler’s skill, which influences how predictable the event is. A baton twirler or juggler might be able to do ten flips with a non-random outcome; if the twirler is an expert and the outcome is highly predictable, we say it is not random but rather is determined.</p>
<p>Again, this shows that the randomness is not a property of the physical event, but rather of a person’s knowledge and skill.</p>
</section>
<section id="sec-what-is-chance" class="level2" data-number="3.8">
<h2 data-number="3.8" class="anchored" data-anchor-id="sec-what-is-chance"><span class="header-section-number">3.8</span> What Do We Mean by “Random”?</h2>
<p>We have defined “chance” and “random” as the absence of predictive power and/or explanation and/or control. Here we should not confuse the concepts of determinacy-indeterminacy and predictable-unpredictable. What matters for <em>decision purposes</em> is whether you can predict. Whether the process is “really” determinate is largely a matter of definition and labeling, an unnecessary philosophical controversy for our purposes (and perhaps for any other purpose).<a href="#fn3" class="footnote-ref" id="fnref3" role="doc-noteref"><sup>3</sup></a></p>
<p>The remote control in the previous demonstration <em>becomes</em> unpredictable — that is, random — even though it still is subject to similar physical processes as when it is predictable. I do not deny <em>in principle</em> that these processes can be “understood,” or that one could produce a machine that would — like a baton twirler — make the course of the remote control predictable for many turns. But in <em>practice</em> we cannot make the predictions — and it is the <em>practical reality</em>, rather than the principle, that matters here.</p>
<p>When I flip the remote control half a turn or one turn, I control (almost completely) whether it comes down at the sofa end end or the TV end, so we do not say that the outcome is chance. Much the same can be said about what happens to the predictability of drawing a given card as one increases the number of times one shuffles a deck of cards.</p>
<p>Consider, too, a set of fake dice that I roll. Before you know they are fake, you assume that the probabilities of various outcomes is a matter of chance. But after you know that the dice are loaded, you no longer assume that the outcome is chance. This illustrates how the probabilities you work with are influenced by your knowledge of the facts of the situation.</p>
<p>Admittedly, this way of thinking about probability takes some getting used to. Events may appear to be random, but in fact, we can predict them — and <em>vice versa</em>. For example, suppose a magician does a simple trick with dice such as this one:</p>
<blockquote class="blockquote">
<p>The magician turns her back while a spectator throws three dice on the table. He is instructed to add the faces. He then picks up any <em>one</em> die, adding the number on the <em>bottom</em> to the previous total. This same die is rolled again. The number it now shows is also added to the total. The magician turns around. She calls attention to the fact that she has no way of knowing which of the three dice was used for the second roll. She picks up the dice, shakes them in her hand a moment, then correctly announces the final sum.</p>
</blockquote>
<p><strong>Method:</strong>. When the spectator rolls the dice, they get three numbers, one from each of the three dice. Call these numbers <span class="math inline">\(a\)</span>, <span class="math inline">\(b\)</span> and <span class="math inline">\(c\)</span>. Then he chooses one die — it doesn’t matter which, but let’s say he chooses the third die, with value <span class="math inline">\(c\)</span>. He adds the bottom of the third die to the total. Here’s the trick: the total of opposite faces on a standard die always add up to 7; 1 is opposite 6, 2 is opposite 5, and 3 is opposite 4. So the total is now <span class="math inline">\(a + b + 7\)</span>. Then the spectator rolls the third die again, to get a new number <span class="math inline">\(d\)</span>. The total is now <span class="math inline">\(a + b + 7 + d\)</span>. When the magician turns round she can see what <span class="math inline">\(a\)</span> and <span class="math inline">\(b\)</span> and <span class="math inline">\(d\)</span> are, so to get the right final total, she just needs to add 7 <span class="citation" data-cites="gardner1985mathematical">(<a href="references.html#ref-gardner1985mathematical" role="doc-biblioref">Gardner 1985, p259</a>)</span>. Ben Sparks does a <a href="https://www.youtube.com/watch?v=kviwvLpnZSY">nice demonstration of the trick on Numerphile YouTube</a>.</p>
<p>The point here is that, until you know the trick, you (the magician) cannot predict the final sum, so the magician and the spectator consider the result as random. If you do know the trick, you can predict the result, and it is not random. Whether something is “random” or not, depends on what you know.</p>
<p>Consider the distributions of heights of various groups of living things (including people). When we consider all living things taken together, the shape of the overall distribution — many individuals at the tiny end where the viruses are found, and very few individuals at the tall end where the giraffes are — is determined mostly by the distribution of species that have different mean heights. Hence we can explain the shape of that distribution, and we do not say that is determined by “chance.” But with a homogeneous cohort of a single species — say, all 25-year-old human females in the U.S. — our best description of the shape of the distribution is “chance.” With situations in between, the shape is partly due to identifiable factors — e.g. age — and partly due to “chance.”</p>
<p>Or consider the case of a basketball shooter: What causes her or him to make (or not make) a basket this shot, after a string of successes? Much must be ascribed to chance variation. But what causes a given shooter to be very good or very poor relative to other players? For that explanation we can point to such factors as the amount of practice or natural talent.</p>
<p>Again, all this has nothing to do with whether the mechanism is “really” chance, unlike the arguments that have been raging in physics for a century. That is the point of the remote control demonstration. Our knowledge and our power to predict the outcome gradually transits from non-chance (that is, “determined”) to chance (“not determined”) in a gradual way even though the same sort of physical mechanism produces each throw of the remote control.</p>
<p>Earlier I mentioned that when we say that chance controls the outcome of the remote control flip after (say) five revolutions, we mean that there are many small forces that affect the outcome. The effect of each force is not known, and each is independent of the other. None of these forces is large enough for me (as the remote control twirler) to deal with, or else I would deal with it and be able to improve my control and my ability to predict the outcome. This concept of many small influences — “small” meaning in practice those influences whose effects cannot be identified and allowed for — which affect the outcome and whose effects are not knowable and which are independent of each other is important in statistical inference. For example, as we will see later, when we add many unpredictable deviations together, and plot the distribution of the result, we end up with the famous and very common bell-shaped <em>normal distribution</em> — this striking result comes about because of a mathematical phenomenon called the Central Limit Theorem.<a href="#fn4" class="footnote-ref" id="fnref4" role="doc-noteref"><sup>4</sup></a></p>
<!---
We may show this at work, later in the book.
It does come up in the confidence_2 chapter, but without further explanation.
-->
</section>
<section id="randomness-from-the-computer" class="level2" data-number="3.9">
<h2 data-number="3.9" class="anchored" data-anchor-id="randomness-from-the-computer"><span class="header-section-number">3.9</span> Randomness from the computer</h2>
<p>We now have the idea of <em>random</em> variation as being variation we cannot predict. For example, when we flip the remote control through many rotations, we can no longer easily predict which end will land in our hand. We can call the result of any particular flip — <em>random</em> — because we cannot predict whether the result will be TV end or sofa end.</p>
<p>We still know some things about the result — it will be one of two options — TV or sofa (unless we drop it). But we cannot predict which. We say the result of each flip is <em>random</em> if we cannot do anything to improve our prediction of 50% for TV (or sofa) end on the next flip.</p>
<p>We are not saying the result <em>is</em> random in any deep, non-deterministic sense — we are only saying we can treat the result as random, because we cannot predict it.</p>
<p>Now consider getting <em>random</em> numbers from the computer, where the numbers can either be 0 or 1. This is rather like tossing a fair coin, where the results are 0 and 1 rather than “heads” and “tails”.</p>
<p>When we ask the computer for a random choice between 0 and 1, we accept it is random-enough, or random-like, if we can’t do anything to predict which of 0 or 1 we will get on any one trial. We can’t do better than guessing that the next value will be — say — 0 — and whichever number we guess, we will only ever have a 50% chance of being correct. We are not saying the computer is giving <em>truly</em> random numbers in the sense that they are fundamentally not deterministic, it is only giving us numbers we cannot <em>distinguish</em> from truly random numbers, because we cannot <em>in practice</em> do anything to predict them. The technical term for random numbers from the computer is therefore <em>pseudo-random</em> — meaning, like random numbers, in the sense they are effectively unpredictable. Effectively unpredictable means there is no <em>practical</em> way for you, or even a very powerful computer, to do anything to improve your prediction of the next number in the series.</p>
</section>
<section id="the-philosophers-dispute-about-the-concept-of-probability" class="level2" data-number="3.10">
<h2 data-number="3.10" class="anchored" data-anchor-id="the-philosophers-dispute-about-the-concept-of-probability"><span class="header-section-number">3.10</span> The philosophers’ dispute about the concept of probability</h2>
<p>Those who call themselves “objectivists” or “frequentists” and those who call themselves “personalists” or “Bayesians” have been arguing for hundreds or even thousands of years about the “nature” of probability. The objectivists insist (correctly) that any estimation not based on a series of observations is subject to potential bias, from which they conclude (incorrectly) that we should never think of probability that way. They are worried about the perversion of science, the substitution of arbitrary assessments for value-free data-gathering. The personalists argue (correctly) that in many situations it is not possible to obtain sufficient data to avoid considerable judgment. Indeed, if a probability is about the future, some judgment is <em>always</em> required — about which observations will be relevant, and so on. They sometimes conclude (incorrectly) that the objectivists’ worries are unimportant.</p>
<p>As is so often the case, the various sides in the argument have different sorts of situations in mind. As we have seen, the arguments disappear if one thinks <em>operationally</em> with respect to the <em>purpose of the work</em>, rather than in terms of <em>properties</em>, as mentioned earlier.</p>
<p>Here is an example of the difficulty of focusing on the supposed properties of the mechanism or situation: The mathematical theorist asserts that the probability of a die falling with the “5” side up is 1/6, on the basis of the physics of equally-weighted sides. But if one rolls a particular die a million times, and it turns up “5” less than 1/6 of the time, one surely would use the observed proportion as the practical estimate. The probabilities of various outcomes with cheap dice may depend upon the number of pips drilled out on a side. In 20,000 throws of a red die and 20,000 throws of a white die, the proportions of 3’s and 4’s were, respectively, .159 and .146, .145 and .142 — all far below the expected proportions of .167. That is, 3’s and 4’s occurred about 11 percent less often that if the dice had been perfectly formed, a difference that could make a big difference in a gambling game <span class="citation" data-cites="bulmer1979principles">(<a href="references.html#ref-bulmer1979principles" role="doc-biblioref">Bulmer 1979, 18</a>)</span>.</p>
<p>It is reasonable to think of both the <em>engineering</em> method (the theoretical approach) and the <em>empirical</em> method (experimentation and data collection) as two alternative ways to estimate a probability. The two methods use different processes and different proxies for the probability you wish to estimate. One must adduce additional knowledge to decide which method to use in any given situation. It is sensible to use the empirical method when data are available. (But use both together whenever possible.)</p>
<p>In view of the inevitably subjective nature of probability estimates, you may prefer to talk about “degrees of belief” instead of probabilities. That’s fine, just as long as it is understood that we operate with degrees of belief in exactly the same way as we operate with probabilities. The two terms are working synonyms.</p>
<p>Most important: One cannot sensibly talk about probabilities in the abstract, without reference to some set of facts. The topic then loses its meaning, and invites confusion and argument. This also is a reason why a general formalization of the probability concept does not make sense.</p>
</section>
<section id="the-relationship-of-probability-to-the-concept-of-resampling" class="level2" data-number="3.11">
<h2 data-number="3.11" class="anchored" data-anchor-id="the-relationship-of-probability-to-the-concept-of-resampling"><span class="header-section-number">3.11</span> The relationship of probability to the concept of resampling</h2>
<p>There is no all-agreed definition of the concept of the resampling method in statistics. Unlike some other writers, I prefer to apply the term to problems in <em>both</em> pure probability and statistics. This set of examples may illustrate:</p>
<ol type="1">
<li><p>Consider asking about the number of hits one would expect from a 0.250 (25 percent) batter in a 400 at-bat season. One would call this a problem in “probability.” The sampling distribution of the batter’s results can be calculated by formula or produced by Monte Carlo simulation.</p></li>
<li><p>Now consider examining the number of hits in a given batter’s season, and asking how likely that number (or fewer) is to occur by chance if the batter’s long-run batting average is 0.250. One would call this a problem in “statistics.” But just as in example (1) above, the answer can be calculated by formula or produced by Monte Carlo simulation. And the calculation or simulation is exactly the same as used in (1).</p>
<p>Here the term “resampling” might be applied to the simulation with considerable agreement among people familiar with the term, but perhaps not by all such persons.</p></li>
<li><p>Next consider an observed distribution of distances that a batter’s hits travel in a season with 100 hits, with an observed mean of 150 feet per hit. One might ask how likely it is that a sample of 10 hits drawn with replacement from the observed distribution of hit lengths (with a mean of 150 feet) would have a mean greater than 160 feet, and one could easily produce an answer with repeated Monte Carlo samples. Traditionally this would be called a problem in probability.</p></li>
<li><p>Next consider that a batter gets 10 hits with a mean of 160 feet, and one wishes to estimate the probability that the sample would be produced by a distribution as specified in (3). This is a problem in statistics, and by 1996, it is common statistical practice to treat it with a resampling method. The actual simulation would, however, be identical to the work described in (3).</p></li>
</ol>
<p>Because the work in (4) and (2) differ only in question (4) involving measured data and question (2) involving counted data, there seems no reason to discriminate between the two cases with respect to the term “resampling.” With respect to the pairs of cases (1) and (2), and (3) and (4), there is no difference in the actual work performed, though there is a difference in the way the question is framed. I would therefore urge that the label “resampling” be applied to (1) and (3) as well as to (2) and (4), to bring out the important fact that the procedure is the same as in resampling questions in statistics.</p>
<p>One could easily produce examples like (1) and (2) for cases that are similar except that the drawing is without replacement.<a href="#fn5" class="footnote-ref" id="fnref5" role="doc-noteref"><sup>5</sup></a> And one could adduce the example of prices in different state liquor control systems (see <a href="probability_theory_3.html#sec-public-liquor" class="quarto-xref"><span>Section 12.15</span></a>) which is similar to cases (3) and (4) except that sampling without replacement seems appropriate. Again, the analogs to cases (2) and (4) would generally be called “resampling.”</p>
<p>The concept of resampling is defined in a more precise way in <a href="probability_theory_1a.html#sec-what-is-resampling" class="quarto-xref"><span>Section 8.9</span></a>.</p>
</section>
<section id="conclusion" class="level2" data-number="3.12">
<h2 data-number="3.12" class="anchored" data-anchor-id="conclusion"><span class="header-section-number">3.12</span> Conclusion</h2>
<p>We define “chance” as the absence of predictive power and/or explanation and/or control.</p>
<p>When the remote control rotates more than three or four turns I cannot control the outcome — whether TV or sofa end — with any accuracy. That is to say, I cannot predict much better than 50-50 with more than four rotations. So we then say that the outcome is determined by “chance.”</p>
<p>As to those persons who wish to inquire into what the situation “really” is: I hope they agree that we do not need to do so to proceed with our work. I hope all will agree that the outcome of flipping the TV gradually <em>becomes</em> unpredictable (random) though still subject to similar physical processes as when predictable. I do not deny <em>in principle</em> that these processes can be “understood,” certainly one can develop a machine (or a baton twirler) that will make the outcome predictable for many turns. But this has nothing to do with whether the mechanism is “really” something one wants to say is influenced by “chance.” This is the point of the demonstration with the sofa and TV ends of the remote control. The outcome traverses from non-chance (determined) to chance (not determined) in a smooth way even though the physical mechanism that produces the revolutions remains much the same over the traverse.</p>
<div id="refs" class="references csl-bib-body hanging-indent" data-entry-spacing="0" role="list" style="display: none">
<div id="ref-barnett1982comparative" class="csl-entry" role="listitem">
Barnett, Vic. 1982. <em>Comparative Statistical Inference</em>. 2nd ed. Wiley Series in Probability and Mathematical Statistics. Chichester: John Wiley & Sons. <a href="https://archive.org/details/comparativestati0000barn">https://archive.org/details/comparativestati0000barn</a>.
</div>
<div id="ref-bulmer1979principles" class="csl-entry" role="listitem">
Bulmer, M. G. 1979. <em>Principles of Statistics</em>. New York, NY: Dover Publications, inc. <a href="https://archive.org/details/principlesofstat0000bulm">https://archive.org/details/principlesofstat0000bulm</a>.
</div>
<div id="ref-feller1968introduction" class="csl-entry" role="listitem">
Feller, William. 1968. <em>An Introduction to Probability Theory and Its Applications: Volume i</em>. 3rd ed. Vol. 1. New York: John Wiley & Sons. <a href="https://www.google.co.uk/books/edition/An_Introduction_to_Probability_Theory_an/jbkdAQAAMAAJ">https://www.google.co.uk/books/edition/An_Introduction_to_Probability_Theory_an/jbkdAQAAMAAJ</a>.
</div>
<div id="ref-feynman1988what" class="csl-entry" role="listitem">
Feynman, Richard P., and Ralph Leighton. 1988. <em>What Do <u>You</u> Care What Other People Think? Further Adventures of a Curious Character.</em> New York, NY: W. W. Norton; Company, Inc. <a href="https://archive.org/details/whatdoyoucarewha0000feyn_x5w7">https://archive.org/details/whatdoyoucarewha0000feyn_x5w7</a>.
</div>
<div id="ref-fisher1935design" class="csl-entry" role="listitem">
Fisher, Ronald Aylmer. 1935. <em>The Design of Experiments</em>. 1st ed. Edinburgh: <span>Oliver and Boyd Ltd</span>. <a href="https://archive.org/details/in.ernet.dli.2015.502684">https://archive.org/details/in.ernet.dli.2015.502684</a>.
</div>
<div id="ref-fisher1960design" class="csl-entry" role="listitem">
———. 1960. <em>The Design of Experiments</em>. 7th ed. Edinburgh: <span>Oliver and Boyd Ltd</span>. <a href="https://archive.org/details/designofexperime0000rona_q7u5">https://archive.org/details/designofexperime0000rona_q7u5</a>.
</div>
<div id="ref-gardner1985mathematical" class="csl-entry" role="listitem">
Gardner, Martin. 1985. <em>Mathematical Magic Show</em>. Penguin Books Ltd, Harmondsworth.
</div>
<div id="ref-mosteller1987fifty" class="csl-entry" role="listitem">
Mosteller, Frederick. 1987. <em>Fifty Challenging Problems in Probability with Solutions</em>. Courier Corporation.
</div>
<div id="ref-raiffa1968decision" class="csl-entry" role="listitem">
Raiffa, Howard. 1968. <span>“Decision Analysis: Introductory Lectures on Choices Under Uncertainty.”</span> <a href="https://archive.org/details/decisionanalysis0000raif">https://archive.org/details/decisionanalysis0000raif</a>.
</div>
<div id="ref-ruark1930atoms" class="csl-entry" role="listitem">
Ruark, Arthur Edward, and Harold Clayton Urey. 1930. <em>Atoms, Moleculues and Quanta</em>. New York, NY: <span>McGraw-Hill</span> book company, inc. <a href="https://archive.org/details/atomsmoleculesqu00ruar">https://archive.org/details/atomsmoleculesqu00ruar</a>.
</div>
<div id="ref-russell1945history" class="csl-entry" role="listitem">
Russell, Bertrand. 1945. <em>A History of <span>W</span>estern Philosophy</em>. New York: Simon; Schuster.
</div>
<div id="ref-whitworth1897dcc" class="csl-entry" role="listitem">
Whitworth, William Allen. 1897. <em><span>DCC</span> Exercises in Choice and Chance</em>. Cambridge, UK: Deighton Bell; Co. <a href="https://archive.org/details/dccexerciseschoi00whit">https://archive.org/details/dccexerciseschoi00whit</a>.
</div>
</div>
</section>
<section id="footnotes" class="footnotes footnotes-end-of-document" role="doc-endnotes">
<hr>
<ol>
<li id="fn1"><p>At one time, some writers believed there was a difference between “objectively sharply defined” and “objectively vague” probabilities. Raiffa <span class="citation" data-cites="raiffa1968decision">(<a href="references.html#ref-raiffa1968decision" role="doc-biblioref">1968</a>)</span> gives a clear example of why this is not so:</p>
<p>Suppose you are confronted with two options. In option 1, you must toss coin 1 (which is fair and true), guess heads or tails, and win $1.00 if you match and lose $1.00 if you fail to match. In option 2, you have a 50-50 chance of getting coin 2, which has two heads, or of getting coin 3, which has two tails. Not knowing whether you are tossing coin 2 or 3, you must call, toss, and get the payoffs as in option 1. With option 1, the probability of the toss coming out heads is .5; with option 2, the same probability is either 0 or 1, and since the chance of each in turn is .5, the probability of heads is ultimately .5 once again. Nothing is to be gained by saying that one .5 is sharply defined and that the other is fuzzy. Of course, <em>if</em>, and this is a big “if,” you could experiment with the coin you will toss before you are obliged to declare, then the two options are manifestly asymmetrical. Barring this privilege, the two options are equivalent <span class="citation" data-cites="raiffa1968decision">(<a href="references.html#ref-raiffa1968decision" role="doc-biblioref">Raiffa 1968, 108</a>)</span>.<a href="#fnref1" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
<li id="fn2"><p>This does not mean that I think that people should confine their learning to what they need in their daily work. Having a deeper philosophical knowledge than you ordinarily need can help you deal with extraordinary problems when they arise.<a href="#fnref2" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
<li id="fn3"><p>The idea that our aim is to advance our work in improving our knowledge and our decisions, rather than to answer “ultimate” questions about what is “really” true is in the same spirit as some writing about quantum theory. In 1930 <a href="https://en.wikipedia.org/wiki/Arthur_Edward_Ruark">Ruarck</a> and <a href="https://en.wikipedia.org/wiki/Harold_Urey">Urey</a> wrote: “The reader who feels disappointed that the information sought in solving a dynamical problem on the quantum theory is [only] statistical … should console himself with the thought that we seldom need any information other than that which is given by the quantum theory.” <span class="citation" data-cites="ruark1930atoms">(<a href="references.html#ref-ruark1930atoms" role="doc-biblioref">1930, 622</a>)</span>.<a href="#fnref3" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
<li id="fn4"><p>The Central Limit Theorem is an interesting mathematical result that proves something you can show for yourself by simulation — that if we take means of many values drawn from <em>any</em> shape of distribution, and then look at the distribution of the resulting means, it will be close to the <em>normal</em> (bell-curve) distribution. If you are interested in a technical (mathematical) explanation of this result, see <a href="https://en.wikipedia.org/wiki/Central_limit_theorem">the Wikipedia page on the Central Limit Theorem</a>.<a href="#fnref4" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
<li id="fn5"><p>One example of drawing <em>without replacement</em> is the sampling version of Ronald Fisher’s permutation test — see <span class="citation" data-cites="fisher1935design fisher1960design">(<a href="references.html#ref-fisher1935design" role="doc-biblioref">Fisher 1935</a>; <a href="references.html#ref-fisher1960design" role="doc-biblioref">Fisher 1960, chap. II</a>, section 5)</span>.<a href="#fnref5" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
</ol>
</section>
</main> <!-- /main -->
<script id="quarto-html-after-body" type="application/javascript">
window.document.addEventListener("DOMContentLoaded", function (event) {
const toggleBodyColorMode = (bsSheetEl) => {
const mode = bsSheetEl.getAttribute("data-mode");
const bodyEl = window.document.querySelector("body");
if (mode === "dark") {
bodyEl.classList.add("quarto-dark");
bodyEl.classList.remove("quarto-light");
} else {
bodyEl.classList.add("quarto-light");
bodyEl.classList.remove("quarto-dark");
}
}
const toggleBodyColorPrimary = () => {
const bsSheetEl = window.document.querySelector("link#quarto-bootstrap");
if (bsSheetEl) {
toggleBodyColorMode(bsSheetEl);
}
}
toggleBodyColorPrimary();
const icon = "";
const anchorJS = new window.AnchorJS();
anchorJS.options = {
placement: 'right',
icon: icon
};
anchorJS.add('.anchored');
const isCodeAnnotation = (el) => {
for (const clz of el.classList) {
if (clz.startsWith('code-annotation-')) {
return true;
}
}
return false;
}
const onCopySuccess = function(e) {
// button target
const button = e.trigger;
// don't keep focus
button.blur();
// flash "checked"
button.classList.add('code-copy-button-checked');
var currentTitle = button.getAttribute("title");
button.setAttribute("title", "Copied!");
let tooltip;
if (window.bootstrap) {
button.setAttribute("data-bs-toggle", "tooltip");
button.setAttribute("data-bs-placement", "left");
button.setAttribute("data-bs-title", "Copied!");
tooltip = new bootstrap.Tooltip(button,
{ trigger: "manual",
customClass: "code-copy-button-tooltip",
offset: [0, -8]});
tooltip.show();
}
setTimeout(function() {
if (tooltip) {
tooltip.hide();
button.removeAttribute("data-bs-title");
button.removeAttribute("data-bs-toggle");
button.removeAttribute("data-bs-placement");
}
button.setAttribute("title", currentTitle);
button.classList.remove('code-copy-button-checked');
}, 1000);
// clear code selection
e.clearSelection();
}
const getTextToCopy = function(trigger) {
const codeEl = trigger.previousElementSibling.cloneNode(true);
for (const childEl of codeEl.children) {
if (isCodeAnnotation(childEl)) {
childEl.remove();
}
}
return codeEl.innerText;
}
const clipboard = new window.ClipboardJS('.code-copy-button:not([data-in-quarto-modal])', {
text: getTextToCopy
});
clipboard.on('success', onCopySuccess);
if (window.document.getElementById('quarto-embedded-source-code-modal')) {
// For code content inside modals, clipBoardJS needs to be initialized with a container option
// TODO: Check when it could be a function (https://github.com/zenorocha/clipboard.js/issues/860)
const clipboardModal = new window.ClipboardJS('.code-copy-button[data-in-quarto-modal]', {
text: getTextToCopy,
container: window.document.getElementById('quarto-embedded-source-code-modal')
});
clipboardModal.on('success', onCopySuccess);
}
var localhostRegex = new RegExp(/^(?:http|https):\/\/localhost\:?[0-9]*\//);
var mailtoRegex = new RegExp(/^mailto:/);
var filterRegex = new RegExp('/' + window.location.host + '/');
var isInternal = (href) => {
return filterRegex.test(href) || localhostRegex.test(href) || mailtoRegex.test(href);
}
// Inspect non-navigation links and adorn them if external
var links = window.document.querySelectorAll('a[href]:not(.nav-link):not(.navbar-brand):not(.toc-action):not(.sidebar-link):not(.sidebar-item-toggle):not(.pagination-link):not(.no-external):not([aria-hidden]):not(.dropdown-item):not(.quarto-navigation-tool):not(.about-link)');
for (var i=0; i<links.length; i++) {
const link = links[i];
if (!isInternal(link.href)) {
// undo the damage that might have been done by quarto-nav.js in the case of
// links that we want to consider external
if (link.dataset.originalHref !== undefined) {
link.href = link.dataset.originalHref;
}
}
}
function tippyHover(el, contentFn, onTriggerFn, onUntriggerFn) {
const config = {
allowHTML: true,
maxWidth: 500,
delay: 100,
arrow: false,
appendTo: function(el) {
return el.parentElement;
},
interactive: true,
interactiveBorder: 10,
theme: 'quarto',
placement: 'bottom-start',
};
if (contentFn) {
config.content = contentFn;
}
if (onTriggerFn) {
config.onTrigger = onTriggerFn;
}
if (onUntriggerFn) {
config.onUntrigger = onUntriggerFn;
}
window.tippy(el, config);
}
const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
for (var i=0; i<noterefs.length; i++) {
const ref = noterefs[i];
tippyHover(ref, function() {
// use id or data attribute instead here
let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
try { href = new URL(href).hash; } catch {}
const id = href.replace(/^#\/?/, "");
const note = window.document.getElementById(id);
if (note) {
return note.innerHTML;
} else {
return "";
}
});
}
const xrefs = window.document.querySelectorAll('a.quarto-xref');
const processXRef = (id, note) => {
// Strip column container classes
const stripColumnClz = (el) => {
el.classList.remove("page-full", "page-columns");
if (el.children) {
for (const child of el.children) {
stripColumnClz(child);
}
}
}
stripColumnClz(note)
if (id === null || id.startsWith('sec-')) {
// Special case sections, only their first couple elements
const container = document.createElement("div");
if (note.children && note.children.length > 2) {
container.appendChild(note.children[0].cloneNode(true));
for (let i = 1; i < note.children.length; i++) {
const child = note.children[i];
if (child.tagName === "P" && child.innerText === "") {
continue;
} else {
container.appendChild(child.cloneNode(true));
break;
}
}
if (window.Quarto?.typesetMath) {
window.Quarto.typesetMath(container);
}
return container.innerHTML
} else {
if (window.Quarto?.typesetMath) {
window.Quarto.typesetMath(note);
}
return note.innerHTML;
}
} else {
// Remove any anchor links if they are present
const anchorLink = note.querySelector('a.anchorjs-link');
if (anchorLink) {
anchorLink.remove();
}
if (window.Quarto?.typesetMath) {
window.Quarto.typesetMath(note);
}
// TODO in 1.5, we should make sure this works without a callout special case
if (note.classList.contains("callout")) {
return note.outerHTML;
} else {
return note.innerHTML;
}
}
}
for (var i=0; i<xrefs.length; i++) {
const xref = xrefs[i];
tippyHover(xref, undefined, function(instance) {
instance.disable();
let url = xref.getAttribute('href');
let hash = undefined;
if (url.startsWith('#')) {
hash = url;
} else {
try { hash = new URL(url).hash; } catch {}
}
if (hash) {
const id = hash.replace(/^#\/?/, "");
const note = window.document.getElementById(id);
if (note !== null) {
try {
const html = processXRef(id, note.cloneNode(true));
instance.setContent(html);
} finally {
instance.enable();
instance.show();
}
} else {
// See if we can fetch this
fetch(url.split('#')[0])
.then(res => res.text())
.then(html => {
const parser = new DOMParser();
const htmlDoc = parser.parseFromString(html, "text/html");
const note = htmlDoc.getElementById(id);
if (note !== null) {
const html = processXRef(id, note);
instance.setContent(html);
}
}).finally(() => {
instance.enable();
instance.show();
});
}
} else {
// See if we can fetch a full url (with no hash to target)
// This is a special case and we should probably do some content thinning / targeting
fetch(url)
.then(res => res.text())
.then(html => {
const parser = new DOMParser();
const htmlDoc = parser.parseFromString(html, "text/html");
const note = htmlDoc.querySelector('main.content');
if (note !== null) {
// This should only happen for chapter cross references
// (since there is no id in the URL)
// remove the first header
if (note.children.length > 0 && note.children[0].tagName === "HEADER") {
note.children[0].remove();
}