bayes-rules-notes/R/ch2.html

1872 lines
89 KiB
HTML
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
<meta charset="utf-8">
<meta name="generator" content="quarto-1.1.189">
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
<meta name="author" content="Emanuel Rodriguez">
<title>Chapter 2 Notes</title>
<style>
code{white-space: pre-wrap;}
span.smallcaps{font-variant: small-caps;}
div.columns{display: flex; gap: min(4vw, 1.5em);}
div.column{flex: auto; overflow-x: auto;}
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
ul.task-list{list-style: none;}
ul.task-list li input[type="checkbox"] {
width: 0.8em;
margin: 0 0.8em 0.2em -1.6em;
vertical-align: middle;
}
pre > code.sourceCode { white-space: pre; position: relative; }
pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
pre > code.sourceCode > span:empty { height: 1.2em; }
.sourceCode { overflow: visible; }
code.sourceCode > span { color: inherit; text-decoration: inherit; }
div.sourceCode { margin: 1em 0; }
pre.sourceCode { margin: 0; }
@media screen {
div.sourceCode { overflow: auto; }
}
@media print {
pre > code.sourceCode { white-space: pre-wrap; }
pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
}
pre.numberSource code
{ counter-reset: source-line 0; }
pre.numberSource code > span
{ position: relative; left: -4em; counter-increment: source-line; }
pre.numberSource code > span > a:first-child::before
{ content: counter(source-line);
position: relative; left: -1em; text-align: right; vertical-align: baseline;
border: none; display: inline-block;
-webkit-touch-callout: none; -webkit-user-select: none;
-khtml-user-select: none; -moz-user-select: none;
-ms-user-select: none; user-select: none;
padding: 0 4px; width: 4em;
color: #aaaaaa;
}
pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa; padding-left: 4px; }
div.sourceCode
{ }
@media screen {
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
}
code span.al { color: #ff0000; font-weight: bold; } /* Alert */
code span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
code span.at { color: #7d9029; } /* Attribute */
code span.bn { color: #40a070; } /* BaseN */
code span.bu { color: #008000; } /* BuiltIn */
code span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
code span.ch { color: #4070a0; } /* Char */
code span.cn { color: #880000; } /* Constant */
code span.co { color: #60a0b0; font-style: italic; } /* Comment */
code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
code span.do { color: #ba2121; font-style: italic; } /* Documentation */
code span.dt { color: #902000; } /* DataType */
code span.dv { color: #40a070; } /* DecVal */
code span.er { color: #ff0000; font-weight: bold; } /* Error */
code span.ex { } /* Extension */
code span.fl { color: #40a070; } /* Float */
code span.fu { color: #06287e; } /* Function */
code span.im { color: #008000; font-weight: bold; } /* Import */
code span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
code span.kw { color: #007020; font-weight: bold; } /* Keyword */
code span.op { color: #666666; } /* Operator */
code span.ot { color: #007020; } /* Other */
code span.pp { color: #bc7a00; } /* Preprocessor */
code span.sc { color: #4070a0; } /* SpecialChar */
code span.ss { color: #bb6688; } /* SpecialString */
code span.st { color: #4070a0; } /* String */
code span.va { color: #19177c; } /* Variable */
code span.vs { color: #4070a0; } /* VerbatimString */
code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
</style>
<script src="ch2_files/libs/clipboard/clipboard.min.js"></script>
<script src="ch2_files/libs/quarto-html/quarto.js"></script>
<script src="ch2_files/libs/quarto-html/popper.min.js"></script>
<script src="ch2_files/libs/quarto-html/tippy.umd.min.js"></script>
<script src="ch2_files/libs/quarto-html/anchor.min.js"></script>
<link href="ch2_files/libs/quarto-html/tippy.css" rel="stylesheet">
<link href="ch2_files/libs/quarto-html/quarto-syntax-highlighting.css" rel="stylesheet" id="quarto-text-highlighting-styles">
<script src="ch2_files/libs/bootstrap/bootstrap.min.js"></script>
<link href="ch2_files/libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
<link href="ch2_files/libs/bootstrap/bootstrap.min.css" rel="stylesheet" id="quarto-bootstrap" data-mode="light">
<script>window.backupDefine = window.define; window.define = undefined;</script><script src="https://cdn.jsdelivr.net/npm/katex@0.15.1/dist/katex.min.js"></script>
<script>document.addEventListener("DOMContentLoaded", function () {
var mathElements = document.getElementsByClassName("math");
var macros = [];
for (var i = 0; i < mathElements.length; i++) {
var texText = mathElements[i].firstChild;
if (mathElements[i].tagName == "SPAN") {
katex.render(texText.data, mathElements[i], {
displayMode: mathElements[i].classList.contains('display'),
throwOnError: false,
macros: macros,
fleqn: false
});
}}});
</script>
<script>window.define = window.backupDefine; window.backupDefine = undefined;</script><link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.15.1/dist/katex.min.css">
<link rel="stylesheet" href="styles.css">
</head>
<body class="fullcontent">
<div id="quarto-content" class="page-columns page-rows-contents page-layout-article">
<main class="content" id="quarto-document-content">
<header id="title-block-header" class="quarto-title-block default">
<div class="quarto-title">
<h1 class="title">Chapter 2 Notes</h1>
</div>
<div class="quarto-title-meta">
<div>
<div class="quarto-title-meta-heading">Author</div>
<div class="quarto-title-meta-contents">
<p>Emanuel Rodriguez </p>
</div>
</div>
</div>
</header>
<p><em>Note: these notes are a work in progress</em></p>
<p>In this chapter we step through an example of “fake” vs “real” news to build a framework to determine the probability of real vs fake of a new news article titled “The President has a secret!”</p>
<p>We then go on to build a probability known as the Binomial model using the Bayesian framework</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb1"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="co"># libraries</span></span>
<span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(bayesrules)</span>
<span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(dplyr)</span>
<span id="cb1-4"><a href="#cb1-4" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(tidyr)</span>
<span id="cb1-5"><a href="#cb1-5" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(gt)</span>
<span id="cb1-6"><a href="#cb1-6" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(tibble)</span>
<span id="cb1-7"><a href="#cb1-7" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(ggplot2)</span>
<span id="cb1-8"><a href="#cb1-8" aria-hidden="true" tabindex="-1"></a><span class="fu">data</span>(fake_news)</span>
<span id="cb1-9"><a href="#cb1-9" aria-hidden="true" tabindex="-1"></a>fake_news <span class="ot">&lt;-</span> tibble<span class="sc">::</span><span class="fu">as_tibble</span>(fake_news)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>What is the proportion of news articles that were labeled fake vs real.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb2"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a>fake_news <span class="sc">|&gt;</span> <span class="fu">head</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 6 × 30
title text url authors type title…¹ text_…² title…³ text_…⁴ title…⁵
&lt;chr&gt; &lt;chr&gt; &lt;chr&gt; &lt;chr&gt; &lt;fct&gt; &lt;int&gt; &lt;int&gt; &lt;int&gt; &lt;int&gt; &lt;int&gt;
1 Clinton's E… "0 S… http… &lt;NA&gt; fake 17 219 110 1444 0
2 Donald Trum… "\n\… http… &lt;NA&gt; real 18 509 95 3016 0
3 Michelle Ob… "Mic… http… Sierra… fake 16 494 96 2881 1
4 Trump hits … "“Cr… http… Jack S… real 11 268 60 1674 0
5 Australia V… "Whe… http… Blair … fake 9 479 54 2813 0
6 Its “Trump… "Lik… http… View A… real 12 220 66 1351 1
# … with 20 more variables: text_caps &lt;int&gt;, title_caps_percent &lt;dbl&gt;,
# text_caps_percent &lt;dbl&gt;, title_excl &lt;int&gt;, text_excl &lt;int&gt;,
# title_excl_percent &lt;dbl&gt;, text_excl_percent &lt;dbl&gt;, title_has_excl &lt;lgl&gt;,
# anger &lt;dbl&gt;, anticipation &lt;dbl&gt;, disgust &lt;dbl&gt;, fear &lt;dbl&gt;, joy &lt;dbl&gt;,
# sadness &lt;dbl&gt;, surprise &lt;dbl&gt;, trust &lt;dbl&gt;, negative &lt;dbl&gt;, positive &lt;dbl&gt;,
# text_syllables &lt;int&gt;, text_syllables_per_word &lt;dbl&gt;, and abbreviated
# variable names ¹title_words, ²text_words, ³title_char, ⁴text_char, …</code></pre>
</div>
<div class="sourceCode cell-code" id="cb4"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a>fake_news <span class="sc">|&gt;</span></span>
<span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(type) <span class="sc">|&gt;</span> </span>
<span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">summarise</span>(</span>
<span id="cb4-4"><a href="#cb4-4" aria-hidden="true" tabindex="-1"></a> <span class="at">total =</span> <span class="fu">n</span>(),</span>
<span id="cb4-5"><a href="#cb4-5" aria-hidden="true" tabindex="-1"></a> <span class="at">prop =</span> total <span class="sc">/</span> <span class="fu">nrow</span>(fake_news)</span>
<span id="cb4-6"><a href="#cb4-6" aria-hidden="true" tabindex="-1"></a> ) </span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 2 × 3
type total prop
&lt;fct&gt; &lt;int&gt; &lt;dbl&gt;
1 fake 60 0.4
2 real 90 0.6</code></pre>
</div>
</div>
<p>If we let <span class="math inline">B</span> be the event that a news article is “fake” news, and <span class="math inline">B^c</span> be the event that a news article is “real”, we can write the following:</p>
<p><span class="math display">P(B) = .4</span> <span class="math display">P(B^c) = .6</span></p>
<p>This is the first “clue” or set of data that we have to build into our framework. Namely, majority of articles are “real”, therefore we could simply predict that the new article is “real”. This updated sense or reality now becomes our priors.</p>
<p>Getting additional data, and updating our priors, based on additional data. The new observation we make is the use of exclamation marks “!”. We note that the use of “!” is more frequent in news articles labeled as “fake”. We will want to incorporate this into our framework to decide whether the new incoming should be labelled as real or fake.</p>
<section id="likelihood" class="level3">
<h3 class="anchored" data-anchor-id="likelihood">Likelihood</h3>
<div class="callout-note callout callout-style-default no-icon callout-captioned">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon no-icon"></i>
</div>
<div class="callout-caption-container flex-fill">
Probability and Likelihood
</div>
</div>
<div class="callout-body-container callout-body">
<p>When the event <span class="math inline">B</span> is known, then we can evaluate the uncertainy of events <span class="math inline">A</span> and <span class="math inline">A^c</span> given <span class="math inline">B</span></p>
<p><span class="math display">P(A|B) \text{ vs } P(A^c|B)</span></p>
<p>If on the other hand, we know event <span class="math inline">A</span> then we can evaluate the relative compatability of data <span class="math inline">A</span> with <span class="math inline">B</span> and <span class="math inline">B^c</span> using likelihood functions</p>
<p><span class="math display">L(B|A) \text{ vs } L(B^c|A)</span> <span class="math display">=P(A|B) \text{ vs } P(A|B^c)</span></p>
</div>
</div>
<p>So in our case, we dont know whether this new incoming article is real or not, but we do know that the title has an exclamation mark. This means we can evaluate how likely this article is real or not given that it contains an “!” in the title using likelihood functions. We can formualte this as:</p>
<p><span class="math display">L(B|A) \text{ vs } L(B^c|A)</span></p>
<p>And perform the computation in R as follows:</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb6"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="co"># if fake, what are the proprotions of ! vs no-!</span></span>
<span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a>prop_of_excl_within_type <span class="ot">&lt;-</span> fake_news <span class="sc">|&gt;</span></span>
<span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(type, title_has_excl) <span class="sc">|&gt;</span></span>
<span id="cb6-4"><a href="#cb6-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">summarise</span>(</span>
<span id="cb6-5"><a href="#cb6-5" aria-hidden="true" tabindex="-1"></a> <span class="at">total =</span> <span class="fu">n</span>()</span>
<span id="cb6-6"><a href="#cb6-6" aria-hidden="true" tabindex="-1"></a> ) <span class="sc">|&gt;</span></span>
<span id="cb6-7"><a href="#cb6-7" aria-hidden="true" tabindex="-1"></a> <span class="fu">ungroup</span>() <span class="sc">|&gt;</span></span>
<span id="cb6-8"><a href="#cb6-8" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(type) <span class="sc">|&gt;</span></span>
<span id="cb6-9"><a href="#cb6-9" aria-hidden="true" tabindex="-1"></a> <span class="fu">summarise</span>(</span>
<span id="cb6-10"><a href="#cb6-10" aria-hidden="true" tabindex="-1"></a> <span class="at">has_excl =</span> title_has_excl,</span>
<span id="cb6-11"><a href="#cb6-11" aria-hidden="true" tabindex="-1"></a> <span class="at">prop_within_type =</span> total <span class="sc">/</span> <span class="fu">sum</span>(total)</span>
<span id="cb6-12"><a href="#cb6-12" aria-hidden="true" tabindex="-1"></a> ) </span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<div class="cell">
<div class="sourceCode cell-code" id="cb7"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a>prop_of_excl_within_type <span class="sc">|&gt;</span></span>
<span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">pivot_wider</span>(<span class="at">names_from =</span> <span class="st">"type"</span>, <span class="at">values_from =</span> prop_within_type) <span class="sc">|&gt;</span></span>
<span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">gt</span>() <span class="sc">|&gt;</span></span>
<span id="cb7-4"><a href="#cb7-4" aria-hidden="true" tabindex="-1"></a> gt<span class="sc">::</span><span class="fu">cols_label</span>(</span>
<span id="cb7-5"><a href="#cb7-5" aria-hidden="true" tabindex="-1"></a> <span class="at">has_excl =</span> <span class="st">"Contains Exclamtion"</span>,</span>
<span id="cb7-6"><a href="#cb7-6" aria-hidden="true" tabindex="-1"></a> <span class="at">fake =</span> <span class="st">"Fake"</span>, </span>
<span id="cb7-7"><a href="#cb7-7" aria-hidden="true" tabindex="-1"></a> <span class="at">real =</span> <span class="st">"Real"</span>) <span class="sc">|&gt;</span></span>
<span id="cb7-8"><a href="#cb7-8" aria-hidden="true" tabindex="-1"></a> gt<span class="sc">::</span><span class="fu">fmt_number</span>(<span class="at">columns=</span><span class="fu">c</span>(<span class="st">"fake"</span>, <span class="st">"real"</span>), <span class="at">decimals =</span> <span class="dv">3</span>) <span class="sc">|&gt;</span></span>
<span id="cb7-9"><a href="#cb7-9" aria-hidden="true" tabindex="-1"></a> gt<span class="sc">::</span><span class="fu">cols_width</span>(<span class="fu">everything</span>() <span class="sc">~</span> <span class="fu">px</span>(<span class="dv">100</span>))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<div id="ytbjahccmx" style="overflow-x:auto;overflow-y:auto;width:auto;height:auto;">
<style>html {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Helvetica Neue', 'Fira Sans', 'Droid Sans', Arial, sans-serif;
}
#ytbjahccmx .gt_table {
display: table;
border-collapse: collapse;
margin-left: auto;
margin-right: auto;
color: #333333;
font-size: 16px;
font-weight: normal;
font-style: normal;
background-color: #FFFFFF;
width: auto;
border-top-style: solid;
border-top-width: 2px;
border-top-color: #A8A8A8;
border-right-style: none;
border-right-width: 2px;
border-right-color: #D3D3D3;
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #A8A8A8;
border-left-style: none;
border-left-width: 2px;
border-left-color: #D3D3D3;
}
#ytbjahccmx .gt_heading {
background-color: #FFFFFF;
text-align: center;
border-bottom-color: #FFFFFF;
border-left-style: none;
border-left-width: 1px;
border-left-color: #D3D3D3;
border-right-style: none;
border-right-width: 1px;
border-right-color: #D3D3D3;
}
#ytbjahccmx .gt_title {
color: #333333;
font-size: 125%;
font-weight: initial;
padding-top: 4px;
padding-bottom: 4px;
padding-left: 5px;
padding-right: 5px;
border-bottom-color: #FFFFFF;
border-bottom-width: 0;
}
#ytbjahccmx .gt_subtitle {
color: #333333;
font-size: 85%;
font-weight: initial;
padding-top: 0;
padding-bottom: 6px;
padding-left: 5px;
padding-right: 5px;
border-top-color: #FFFFFF;
border-top-width: 0;
}
#ytbjahccmx .gt_bottom_border {
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
}
#ytbjahccmx .gt_col_headings {
border-top-style: solid;
border-top-width: 2px;
border-top-color: #D3D3D3;
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
border-left-style: none;
border-left-width: 1px;
border-left-color: #D3D3D3;
border-right-style: none;
border-right-width: 1px;
border-right-color: #D3D3D3;
}
#ytbjahccmx .gt_col_heading {
color: #333333;
background-color: #FFFFFF;
font-size: 100%;
font-weight: normal;
text-transform: inherit;
border-left-style: none;
border-left-width: 1px;
border-left-color: #D3D3D3;
border-right-style: none;
border-right-width: 1px;
border-right-color: #D3D3D3;
vertical-align: bottom;
padding-top: 5px;
padding-bottom: 6px;
padding-left: 5px;
padding-right: 5px;
overflow-x: hidden;
}
#ytbjahccmx .gt_column_spanner_outer {
color: #333333;
background-color: #FFFFFF;
font-size: 100%;
font-weight: normal;
text-transform: inherit;
padding-top: 0;
padding-bottom: 0;
padding-left: 4px;
padding-right: 4px;
}
#ytbjahccmx .gt_column_spanner_outer:first-child {
padding-left: 0;
}
#ytbjahccmx .gt_column_spanner_outer:last-child {
padding-right: 0;
}
#ytbjahccmx .gt_column_spanner {
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
vertical-align: bottom;
padding-top: 5px;
padding-bottom: 5px;
overflow-x: hidden;
display: inline-block;
width: 100%;
}
#ytbjahccmx .gt_group_heading {
padding-top: 8px;
padding-bottom: 8px;
padding-left: 5px;
padding-right: 5px;
color: #333333;
background-color: #FFFFFF;
font-size: 100%;
font-weight: initial;
text-transform: inherit;
border-top-style: solid;
border-top-width: 2px;
border-top-color: #D3D3D3;
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
border-left-style: none;
border-left-width: 1px;
border-left-color: #D3D3D3;
border-right-style: none;
border-right-width: 1px;
border-right-color: #D3D3D3;
vertical-align: middle;
}
#ytbjahccmx .gt_empty_group_heading {
padding: 0.5px;
color: #333333;
background-color: #FFFFFF;
font-size: 100%;
font-weight: initial;
border-top-style: solid;
border-top-width: 2px;
border-top-color: #D3D3D3;
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
vertical-align: middle;
}
#ytbjahccmx .gt_from_md > :first-child {
margin-top: 0;
}
#ytbjahccmx .gt_from_md > :last-child {
margin-bottom: 0;
}
#ytbjahccmx .gt_row {
padding-top: 8px;
padding-bottom: 8px;
padding-left: 5px;
padding-right: 5px;
margin: 10px;
border-top-style: solid;
border-top-width: 1px;
border-top-color: #D3D3D3;
border-left-style: none;
border-left-width: 1px;
border-left-color: #D3D3D3;
border-right-style: none;
border-right-width: 1px;
border-right-color: #D3D3D3;
vertical-align: middle;
overflow-x: hidden;
}
#ytbjahccmx .gt_stub {
color: #333333;
background-color: #FFFFFF;
font-size: 100%;
font-weight: initial;
text-transform: inherit;
border-right-style: solid;
border-right-width: 2px;
border-right-color: #D3D3D3;
padding-left: 5px;
padding-right: 5px;
}
#ytbjahccmx .gt_stub_row_group {
color: #333333;
background-color: #FFFFFF;
font-size: 100%;
font-weight: initial;
text-transform: inherit;
border-right-style: solid;
border-right-width: 2px;
border-right-color: #D3D3D3;
padding-left: 5px;
padding-right: 5px;
vertical-align: top;
}
#ytbjahccmx .gt_row_group_first td {
border-top-width: 2px;
}
#ytbjahccmx .gt_summary_row {
color: #333333;
background-color: #FFFFFF;
text-transform: inherit;
padding-top: 8px;
padding-bottom: 8px;
padding-left: 5px;
padding-right: 5px;
}
#ytbjahccmx .gt_first_summary_row {
border-top-style: solid;
border-top-color: #D3D3D3;
}
#ytbjahccmx .gt_first_summary_row.thick {
border-top-width: 2px;
}
#ytbjahccmx .gt_last_summary_row {
padding-top: 8px;
padding-bottom: 8px;
padding-left: 5px;
padding-right: 5px;
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
}
#ytbjahccmx .gt_grand_summary_row {
color: #333333;
background-color: #FFFFFF;
text-transform: inherit;
padding-top: 8px;
padding-bottom: 8px;
padding-left: 5px;
padding-right: 5px;
}
#ytbjahccmx .gt_first_grand_summary_row {
padding-top: 8px;
padding-bottom: 8px;
padding-left: 5px;
padding-right: 5px;
border-top-style: double;
border-top-width: 6px;
border-top-color: #D3D3D3;
}
#ytbjahccmx .gt_striped {
background-color: rgba(128, 128, 128, 0.05);
}
#ytbjahccmx .gt_table_body {
border-top-style: solid;
border-top-width: 2px;
border-top-color: #D3D3D3;
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
}
#ytbjahccmx .gt_footnotes {
color: #333333;
background-color: #FFFFFF;
border-bottom-style: none;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
border-left-style: none;
border-left-width: 2px;
border-left-color: #D3D3D3;
border-right-style: none;
border-right-width: 2px;
border-right-color: #D3D3D3;
}
#ytbjahccmx .gt_footnote {
margin: 0px;
font-size: 90%;
padding-left: 4px;
padding-right: 4px;
padding-left: 5px;
padding-right: 5px;
}
#ytbjahccmx .gt_sourcenotes {
color: #333333;
background-color: #FFFFFF;
border-bottom-style: none;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
border-left-style: none;
border-left-width: 2px;
border-left-color: #D3D3D3;
border-right-style: none;
border-right-width: 2px;
border-right-color: #D3D3D3;
}
#ytbjahccmx .gt_sourcenote {
font-size: 90%;
padding-top: 4px;
padding-bottom: 4px;
padding-left: 5px;
padding-right: 5px;
}
#ytbjahccmx .gt_left {
text-align: left;
}
#ytbjahccmx .gt_center {
text-align: center;
}
#ytbjahccmx .gt_right {
text-align: right;
font-variant-numeric: tabular-nums;
}
#ytbjahccmx .gt_font_normal {
font-weight: normal;
}
#ytbjahccmx .gt_font_bold {
font-weight: bold;
}
#ytbjahccmx .gt_font_italic {
font-style: italic;
}
#ytbjahccmx .gt_super {
font-size: 65%;
}
#ytbjahccmx .gt_footnote_marks {
font-style: italic;
font-weight: normal;
font-size: 75%;
vertical-align: 0.4em;
}
#ytbjahccmx .gt_asterisk {
font-size: 100%;
vertical-align: 0;
}
#ytbjahccmx .gt_indent_1 {
text-indent: 5px;
}
#ytbjahccmx .gt_indent_2 {
text-indent: 10px;
}
#ytbjahccmx .gt_indent_3 {
text-indent: 15px;
}
#ytbjahccmx .gt_indent_4 {
text-indent: 20px;
}
#ytbjahccmx .gt_indent_5 {
text-indent: 25px;
}
</style>
<table class="gt_table" style="table-layout: fixed;; width: 0px">
<colgroup>
<col style="width:100px;">
<col style="width:100px;">
<col style="width:100px;">
</colgroup>
<thead class="gt_col_headings">
<tr>
<th class="gt_col_heading gt_columns_bottom_border gt_center" rowspan="1" colspan="1" scope="col">Contains Exclamtion</th>
<th class="gt_col_heading gt_columns_bottom_border gt_right" rowspan="1" colspan="1" scope="col">Fake</th>
<th class="gt_col_heading gt_columns_bottom_border gt_right" rowspan="1" colspan="1" scope="col">Real</th>
</tr>
</thead>
<tbody class="gt_table_body">
<tr><td class="gt_row gt_center">FALSE</td>
<td class="gt_row gt_right">0.733</td>
<td class="gt_row gt_right">0.978</td></tr>
<tr><td class="gt_row gt_center">TRUE</td>
<td class="gt_row gt_right">0.267</td>
<td class="gt_row gt_right">0.022</td></tr>
</tbody>
</table>
</div>
</div>
</div>
<p>The table above also shows the likelihoods for the case when an article does not contain exclamation point in the title as well. Its really important to note that these are likelihoods, and its not the case that <span class="math inline">L(B|A) + L(B^c|A) = 1</span> as a matter of fact this value evaluates to a number less than one. However, since we have that <span class="math inline">L(B|A) = .267</span> and <span class="math inline">L(B^c|A) = .022</span> then we have gained additional knowledge in knowing the use of “!” in a title is more compatible with a fake news article than a real one.</p>
<p>Up to this point we can summarize our framework as follows</p>
<table class="table">
<thead>
<tr class="header">
<th>event</th>
<th><span class="math inline">B</span></th>
<th><span class="math inline">B^c</span></th>
<th>Total</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td>prior</td>
<td>.4</td>
<td>.6</td>
<td>1</td>
</tr>
<tr class="even">
<td>likelihood</td>
<td>.267</td>
<td>.022</td>
<td>.289</td>
</tr>
</tbody>
</table>
<p>Our next goal is come up with normalizing factors in order to build our probability table:</p>
<table class="table">
<thead>
<tr class="header">
<th></th>
<th><span class="math inline">B</span></th>
<th><span class="math inline">B^c</span></th>
<th>Total</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td><span class="math inline">A</span></td>
<td>(1)</td>
<td>(2)</td>
<td></td>
</tr>
<tr class="even">
<td><span class="math inline">A^c</span></td>
<td>(3)</td>
<td>(4)</td>
<td></td>
</tr>
<tr class="odd">
<td>Total</td>
<td>.4</td>
<td>.6</td>
<td>1</td>
</tr>
</tbody>
</table>
<p>A couple things to note about our table (1) + (3) = .4 and (2) + (4) = .6. (1) + (2) + (3) + (4) = 1.</p>
<p>(1.) <span class="math inline">P(A \cap B) = P(A|B)P(B)</span> we know the likelihood of <span class="math inline">L(B|A) = P(A|B)</span> and we also know the prior so we insert these to get <span class="math display"> P(A \cap B) = P(A|B)P(B) = .267 \times .4 = .1068</span></p>
<p>(3.) <span class="math inline">P(A^c \cap B) = P(A^c|B)P(B)</span> in this case we do know the prior <span class="math inline">P(B) = .4</span>, but we dont directly know the value of <span class="math inline">P(A^c|B)</span>, however, we note that <span class="math inline">P(A|B) + P(A^c|B) = 1</span>, therefore we compute <span class="math inline">P(A^c|B) = 1 - P(A|B) = 1 - .267 = .733</span> <span class="math display"> P(A^c \cap B) = P(A^c|B)P(B) = .733 \times .4 = .2932</span></p>
<p>we now can confirm that <span class="math inline">.1068 + .2932 = .4</span></p>
<p>Moving on to (2), (4)</p>
<p>(2.) <span class="math inline">P(A \cap B^c) = P(A|B^c)P(B^c)</span>. In this case know the likelihood <span class="math inline">L(B^c|A) = P(A|B^c)</span> and we know the prior <span class="math inline">P(B^c)</span> therefore, <span class="math display">P(A \cap B^c) = P(A|B^c)P(B^c) = .022 \times .6 = .0132</span></p>
<p>(4.) <span class="math inline">P(A^c \cap B^c) = P(A^c|B^c)P(B^c) = (1 - .022) \times .6 = .5868</span></p>
<p>and can confirm that <span class="math inline">.0132 + .5868 = .6</span></p>
<p>and we can fill the rest of the table:</p>
<table class="table">
<thead>
<tr class="header">
<th></th>
<th><span class="math inline">B</span></th>
<th><span class="math inline">B^c</span></th>
<th>Total</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td><span class="math inline">A</span></td>
<td>.1068</td>
<td>.0132</td>
<td>.12</td>
</tr>
<tr class="even">
<td><span class="math inline">A^c</span></td>
<td>.2932</td>
<td>.5868</td>
<td>.88</td>
</tr>
<tr class="odd">
<td>Total</td>
<td>.4</td>
<td>.6</td>
<td>1</td>
</tr>
</tbody>
</table>
<p>An important concept we implemented in above is the idea of <strong>total probability</strong></p>
<div class="callout-tip callout callout-style-default no-icon callout-captioned">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon no-icon"></i>
</div>
<div class="callout-caption-container flex-fill">
total probability
</div>
</div>
<div class="callout-body-container callout-body">
<p>The <strong>total probability</strong> of observing a real article is made up the sum of its parts. Namely</p>
<p><span class="math display">P(B^c) = P(A \cap B^c) + P(A^c \cap B^c)</span> <span class="math display">=P(A|B^c)P(B^c) + P(A^c|B^c)P(B^c)</span> <span class="math display">=.0132 + .5868 = .6</span></p>
</div>
</div>
<p>In the above calculations we also step through <strong>joint probabilities</strong></p>
<div class="callout-note callout callout-style-default no-icon callout-captioned">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon no-icon"></i>
</div>
<div class="callout-caption-container flex-fill">
Joint and conditional probability
</div>
</div>
<div class="callout-body-container callout-body">
<p><span class="math display">P(A \cap B) = P(A|B)P(B)</span></p>
<p><span class="math inline">A</span> and <span class="math inline">B</span> are said to be independent events, if and only if</p>
<p><span class="math display">P(A \cap B) = P(A)P(B)</span></p>
<p>from this we can also derive the definition of a conditional probability</p>
<p><span class="math display">P(A|B) = \frac{P(A \cap B)}{P(B)}</span></p>
</div>
</div>
<p>At this point we are able to answer the question, “What is the probability, the new article is fake?”. Given that the new article has an exclamation point, we can zoom into the top row of the table of probabilitties. Within this row we have probabilities <span class="math inline">.1068/.12 = .833</span> for fake and <span class="math inline">.0132 / .12 = .11</span> for real.</p>
<p>This is essentially Bayes Rule. We developed a posterior probability for an event <span class="math inline">B</span> given some observation <span class="math inline">A</span>. We did so by combining the likelihood of event <span class="math inline">B</span> given some new data <span class="math inline">A</span> and the prior probability of event <span class="math inline">B</span>. More formally we have the following definition:</p>
<div class="callout-note callout callout-style-default no-icon callout-captioned">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon no-icon"></i>
</div>
<div class="callout-caption-container flex-fill">
Bayes Rule
</div>
</div>
<div class="callout-body-container callout-body">
<p>The posterior probability of an event <span class="math inline">B</span> given a <span class="math inline">A</span> is:</p>
<p><span class="math display"> P(B|A) = \frac{P(A \cap B)}{P(A)} = \frac{L(B|A)P(B)}{P(A)}</span></p>
<p>where <span class="math inline">L</span> is the likelihood function <span class="math inline">L(B|A) = P(B|A)</span> and <span class="math inline">P(A)</span> is the total probability of <span class="math inline">A</span>.</p>
<p>More generally,</p>
<p><span class="math display"> \frac{likelihood \cdot prior}{normalizing \;\; constant}</span></p>
</div>
</div>
</section>
<section id="simualation" class="level3">
<h3 class="anchored" data-anchor-id="simualation">Simualation</h3>
<div class="cell">
<div class="sourceCode cell-code" id="cb8"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a>articles <span class="ot">&lt;-</span> tibble<span class="sc">::</span><span class="fu">tibble</span>(<span class="at">type =</span> <span class="fu">c</span>(<span class="st">"real"</span>, <span class="st">"fake"</span>))</span>
<span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb8-3"><a href="#cb8-3" aria-hidden="true" tabindex="-1"></a>priors <span class="ot">&lt;-</span> <span class="fu">c</span>(.<span class="dv">6</span>, .<span class="dv">4</span>)</span>
<span id="cb8-4"><a href="#cb8-4" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb8-5"><a href="#cb8-5" aria-hidden="true" tabindex="-1"></a>articles_sim <span class="ot">&lt;-</span> <span class="fu">sample_n</span>(articles, <span class="dv">10000</span>, <span class="at">replace =</span> <span class="cn">TRUE</span>, <span class="at">weight =</span> priors)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<div class="cell">
<div class="sourceCode cell-code" id="cb9"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a>articles_sim <span class="sc">|&gt;</span></span>
<span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x =</span> type)) <span class="sc">+</span> <span class="fu">geom_bar</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="ch2_files/figure-html/unnamed-chunk-6-1.png" class="img-fluid" width="672"></p>
</div>
</div>
<p>and a summary table</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb10"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a>articles_sim <span class="sc">|&gt;</span></span>
<span id="cb10-2"><a href="#cb10-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(type) <span class="sc">|&gt;</span></span>
<span id="cb10-3"><a href="#cb10-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">summarise</span>(</span>
<span id="cb10-4"><a href="#cb10-4" aria-hidden="true" tabindex="-1"></a> <span class="at">total =</span> <span class="fu">n</span>(), </span>
<span id="cb10-5"><a href="#cb10-5" aria-hidden="true" tabindex="-1"></a> <span class="at">prop =</span> total <span class="sc">/</span> <span class="fu">nrow</span>(articles_sim)</span>
<span id="cb10-6"><a href="#cb10-6" aria-hidden="true" tabindex="-1"></a> ) <span class="sc">|&gt;</span></span>
<span id="cb10-7"><a href="#cb10-7" aria-hidden="true" tabindex="-1"></a> <span class="fu">gt</span>()<span class="sc">|&gt;</span></span>
<span id="cb10-8"><a href="#cb10-8" aria-hidden="true" tabindex="-1"></a> gt<span class="sc">::</span><span class="fu">cols_width</span>(<span class="fu">everything</span>() <span class="sc">~</span> <span class="fu">px</span>(<span class="dv">100</span>))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<div id="yyakhpsztb" style="overflow-x:auto;overflow-y:auto;width:auto;height:auto;">
<style>html {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Helvetica Neue', 'Fira Sans', 'Droid Sans', Arial, sans-serif;
}
#yyakhpsztb .gt_table {
display: table;
border-collapse: collapse;
margin-left: auto;
margin-right: auto;
color: #333333;
font-size: 16px;
font-weight: normal;
font-style: normal;
background-color: #FFFFFF;
width: auto;
border-top-style: solid;
border-top-width: 2px;
border-top-color: #A8A8A8;
border-right-style: none;
border-right-width: 2px;
border-right-color: #D3D3D3;
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #A8A8A8;
border-left-style: none;
border-left-width: 2px;
border-left-color: #D3D3D3;
}
#yyakhpsztb .gt_heading {
background-color: #FFFFFF;
text-align: center;
border-bottom-color: #FFFFFF;
border-left-style: none;
border-left-width: 1px;
border-left-color: #D3D3D3;
border-right-style: none;
border-right-width: 1px;
border-right-color: #D3D3D3;
}
#yyakhpsztb .gt_title {
color: #333333;
font-size: 125%;
font-weight: initial;
padding-top: 4px;
padding-bottom: 4px;
padding-left: 5px;
padding-right: 5px;
border-bottom-color: #FFFFFF;
border-bottom-width: 0;
}
#yyakhpsztb .gt_subtitle {
color: #333333;
font-size: 85%;
font-weight: initial;
padding-top: 0;
padding-bottom: 6px;
padding-left: 5px;
padding-right: 5px;
border-top-color: #FFFFFF;
border-top-width: 0;
}
#yyakhpsztb .gt_bottom_border {
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
}
#yyakhpsztb .gt_col_headings {
border-top-style: solid;
border-top-width: 2px;
border-top-color: #D3D3D3;
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
border-left-style: none;
border-left-width: 1px;
border-left-color: #D3D3D3;
border-right-style: none;
border-right-width: 1px;
border-right-color: #D3D3D3;
}
#yyakhpsztb .gt_col_heading {
color: #333333;
background-color: #FFFFFF;
font-size: 100%;
font-weight: normal;
text-transform: inherit;
border-left-style: none;
border-left-width: 1px;
border-left-color: #D3D3D3;
border-right-style: none;
border-right-width: 1px;
border-right-color: #D3D3D3;
vertical-align: bottom;
padding-top: 5px;
padding-bottom: 6px;
padding-left: 5px;
padding-right: 5px;
overflow-x: hidden;
}
#yyakhpsztb .gt_column_spanner_outer {
color: #333333;
background-color: #FFFFFF;
font-size: 100%;
font-weight: normal;
text-transform: inherit;
padding-top: 0;
padding-bottom: 0;
padding-left: 4px;
padding-right: 4px;
}
#yyakhpsztb .gt_column_spanner_outer:first-child {
padding-left: 0;
}
#yyakhpsztb .gt_column_spanner_outer:last-child {
padding-right: 0;
}
#yyakhpsztb .gt_column_spanner {
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
vertical-align: bottom;
padding-top: 5px;
padding-bottom: 5px;
overflow-x: hidden;
display: inline-block;
width: 100%;
}
#yyakhpsztb .gt_group_heading {
padding-top: 8px;
padding-bottom: 8px;
padding-left: 5px;
padding-right: 5px;
color: #333333;
background-color: #FFFFFF;
font-size: 100%;
font-weight: initial;
text-transform: inherit;
border-top-style: solid;
border-top-width: 2px;
border-top-color: #D3D3D3;
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
border-left-style: none;
border-left-width: 1px;
border-left-color: #D3D3D3;
border-right-style: none;
border-right-width: 1px;
border-right-color: #D3D3D3;
vertical-align: middle;
}
#yyakhpsztb .gt_empty_group_heading {
padding: 0.5px;
color: #333333;
background-color: #FFFFFF;
font-size: 100%;
font-weight: initial;
border-top-style: solid;
border-top-width: 2px;
border-top-color: #D3D3D3;
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
vertical-align: middle;
}
#yyakhpsztb .gt_from_md > :first-child {
margin-top: 0;
}
#yyakhpsztb .gt_from_md > :last-child {
margin-bottom: 0;
}
#yyakhpsztb .gt_row {
padding-top: 8px;
padding-bottom: 8px;
padding-left: 5px;
padding-right: 5px;
margin: 10px;
border-top-style: solid;
border-top-width: 1px;
border-top-color: #D3D3D3;
border-left-style: none;
border-left-width: 1px;
border-left-color: #D3D3D3;
border-right-style: none;
border-right-width: 1px;
border-right-color: #D3D3D3;
vertical-align: middle;
overflow-x: hidden;
}
#yyakhpsztb .gt_stub {
color: #333333;
background-color: #FFFFFF;
font-size: 100%;
font-weight: initial;
text-transform: inherit;
border-right-style: solid;
border-right-width: 2px;
border-right-color: #D3D3D3;
padding-left: 5px;
padding-right: 5px;
}
#yyakhpsztb .gt_stub_row_group {
color: #333333;
background-color: #FFFFFF;
font-size: 100%;
font-weight: initial;
text-transform: inherit;
border-right-style: solid;
border-right-width: 2px;
border-right-color: #D3D3D3;
padding-left: 5px;
padding-right: 5px;
vertical-align: top;
}
#yyakhpsztb .gt_row_group_first td {
border-top-width: 2px;
}
#yyakhpsztb .gt_summary_row {
color: #333333;
background-color: #FFFFFF;
text-transform: inherit;
padding-top: 8px;
padding-bottom: 8px;
padding-left: 5px;
padding-right: 5px;
}
#yyakhpsztb .gt_first_summary_row {
border-top-style: solid;
border-top-color: #D3D3D3;
}
#yyakhpsztb .gt_first_summary_row.thick {
border-top-width: 2px;
}
#yyakhpsztb .gt_last_summary_row {
padding-top: 8px;
padding-bottom: 8px;
padding-left: 5px;
padding-right: 5px;
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
}
#yyakhpsztb .gt_grand_summary_row {
color: #333333;
background-color: #FFFFFF;
text-transform: inherit;
padding-top: 8px;
padding-bottom: 8px;
padding-left: 5px;
padding-right: 5px;
}
#yyakhpsztb .gt_first_grand_summary_row {
padding-top: 8px;
padding-bottom: 8px;
padding-left: 5px;
padding-right: 5px;
border-top-style: double;
border-top-width: 6px;
border-top-color: #D3D3D3;
}
#yyakhpsztb .gt_striped {
background-color: rgba(128, 128, 128, 0.05);
}
#yyakhpsztb .gt_table_body {
border-top-style: solid;
border-top-width: 2px;
border-top-color: #D3D3D3;
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
}
#yyakhpsztb .gt_footnotes {
color: #333333;
background-color: #FFFFFF;
border-bottom-style: none;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
border-left-style: none;
border-left-width: 2px;
border-left-color: #D3D3D3;
border-right-style: none;
border-right-width: 2px;
border-right-color: #D3D3D3;
}
#yyakhpsztb .gt_footnote {
margin: 0px;
font-size: 90%;
padding-left: 4px;
padding-right: 4px;
padding-left: 5px;
padding-right: 5px;
}
#yyakhpsztb .gt_sourcenotes {
color: #333333;
background-color: #FFFFFF;
border-bottom-style: none;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
border-left-style: none;
border-left-width: 2px;
border-left-color: #D3D3D3;
border-right-style: none;
border-right-width: 2px;
border-right-color: #D3D3D3;
}
#yyakhpsztb .gt_sourcenote {
font-size: 90%;
padding-top: 4px;
padding-bottom: 4px;
padding-left: 5px;
padding-right: 5px;
}
#yyakhpsztb .gt_left {
text-align: left;
}
#yyakhpsztb .gt_center {
text-align: center;
}
#yyakhpsztb .gt_right {
text-align: right;
font-variant-numeric: tabular-nums;
}
#yyakhpsztb .gt_font_normal {
font-weight: normal;
}
#yyakhpsztb .gt_font_bold {
font-weight: bold;
}
#yyakhpsztb .gt_font_italic {
font-style: italic;
}
#yyakhpsztb .gt_super {
font-size: 65%;
}
#yyakhpsztb .gt_footnote_marks {
font-style: italic;
font-weight: normal;
font-size: 75%;
vertical-align: 0.4em;
}
#yyakhpsztb .gt_asterisk {
font-size: 100%;
vertical-align: 0;
}
#yyakhpsztb .gt_indent_1 {
text-indent: 5px;
}
#yyakhpsztb .gt_indent_2 {
text-indent: 10px;
}
#yyakhpsztb .gt_indent_3 {
text-indent: 15px;
}
#yyakhpsztb .gt_indent_4 {
text-indent: 20px;
}
#yyakhpsztb .gt_indent_5 {
text-indent: 25px;
}
</style>
<table class="gt_table" style="table-layout: fixed;; width: 0px">
<colgroup>
<col style="width:100px;">
<col style="width:100px;">
<col style="width:100px;">
</colgroup>
<thead class="gt_col_headings">
<tr>
<th class="gt_col_heading gt_columns_bottom_border gt_left" rowspan="1" colspan="1" scope="col">type</th>
<th class="gt_col_heading gt_columns_bottom_border gt_right" rowspan="1" colspan="1" scope="col">total</th>
<th class="gt_col_heading gt_columns_bottom_border gt_right" rowspan="1" colspan="1" scope="col">prop</th>
</tr>
</thead>
<tbody class="gt_table_body">
<tr><td class="gt_row gt_left">fake</td>
<td class="gt_row gt_right">4037</td>
<td class="gt_row gt_right">0.4037</td></tr>
<tr><td class="gt_row gt_left">real</td>
<td class="gt_row gt_right">5963</td>
<td class="gt_row gt_right">0.5963</td></tr>
</tbody>
</table>
</div>
</div>
</div>
<p>the simulation of 10,000 articles shows us very nearly the same priors we had from the data. We can now add the exclamation usage into the data.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb11"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a>articles_sim <span class="ot">&lt;-</span> articles_sim <span class="sc">|&gt;</span></span>
<span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">mutate</span>(<span class="at">model_data =</span> <span class="fu">case_when</span>(</span>
<span id="cb11-3"><a href="#cb11-3" aria-hidden="true" tabindex="-1"></a> type <span class="sc">==</span> <span class="st">"fake"</span> <span class="sc">~</span> .<span class="dv">267</span>, </span>
<span id="cb11-4"><a href="#cb11-4" aria-hidden="true" tabindex="-1"></a> type <span class="sc">==</span> <span class="st">"real"</span> <span class="sc">~</span> .<span class="dv">022</span></span>
<span id="cb11-5"><a href="#cb11-5" aria-hidden="true" tabindex="-1"></a> ))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>The plan here is to iterate through the 10,000 samples and use the <code>data_model</code> value to assign either, “yes” or “no” using the <code>sample</code> function.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb12"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a>data <span class="ot">&lt;-</span> <span class="fu">c</span>(<span class="st">"yes"</span>, <span class="st">"no"</span>)</span>
<span id="cb12-2"><a href="#cb12-2" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb12-3"><a href="#cb12-3" aria-hidden="true" tabindex="-1"></a>articles_sim <span class="ot">&lt;-</span> articles_sim <span class="sc">|&gt;</span></span>
<span id="cb12-4"><a href="#cb12-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">mutate</span>(<span class="at">id =</span> <span class="fu">row_number</span>()) <span class="sc">|&gt;</span></span>
<span id="cb12-5"><a href="#cb12-5" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(id) <span class="sc">|&gt;</span></span>
<span id="cb12-6"><a href="#cb12-6" aria-hidden="true" tabindex="-1"></a> <span class="fu">mutate</span>(<span class="at">usage =</span> <span class="fu">sample</span>(data, <span class="dv">1</span>, <span class="at">prob =</span> <span class="fu">c</span>(model_data, <span class="dv">1</span> <span class="sc">-</span> model_data)))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<div class="cell">
<div class="sourceCode cell-code" id="cb13"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a>articles_sim <span class="sc">|&gt;</span></span>
<span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(usage, type) <span class="sc">|&gt;</span></span>
<span id="cb13-3"><a href="#cb13-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">summarise</span>(</span>
<span id="cb13-4"><a href="#cb13-4" aria-hidden="true" tabindex="-1"></a> <span class="at">total =</span> <span class="fu">n</span>()</span>
<span id="cb13-5"><a href="#cb13-5" aria-hidden="true" tabindex="-1"></a> ) <span class="sc">|&gt;</span></span>
<span id="cb13-6"><a href="#cb13-6" aria-hidden="true" tabindex="-1"></a> <span class="fu">pivot_wider</span>(<span class="at">names_from =</span> type, <span class="at">values_from =</span> total)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 2 × 3
# Groups: usage [2]
usage fake real
&lt;chr&gt; &lt;int&gt; &lt;int&gt;
1 no 2919 5824
2 yes 1118 139</code></pre>
</div>
</div>
<div class="cell">
<div class="sourceCode cell-code" id="cb15"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a>articles_sim <span class="sc">|&gt;</span></span>
<span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x =</span> type, <span class="at">fill =</span> usage)) <span class="sc">+</span> </span>
<span id="cb15-3"><a href="#cb15-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_bar</span>() <span class="sc">+</span> </span>
<span id="cb15-4"><a href="#cb15-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">scale_fill_discrete</span>(<span class="at">type =</span> <span class="fu">c</span>(<span class="st">"gray8"</span>, <span class="st">"dodgerblue4"</span>))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="ch2_files/figure-html/unnamed-chunk-11-1.png" class="img-fluid" width="672"></p>
</div>
</div>
<p>So far have compute both the priors and likelihoods, we can simply filter our data to reflect the incoming article and determine our posterior.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb16"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a>articles_sim <span class="sc">|&gt;</span></span>
<span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">filter</span>(usage <span class="sc">==</span> <span class="st">"yes"</span>) <span class="sc">|&gt;</span></span>
<span id="cb16-3"><a href="#cb16-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(type) <span class="sc">|&gt;</span></span>
<span id="cb16-4"><a href="#cb16-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">summarise</span>(</span>
<span id="cb16-5"><a href="#cb16-5" aria-hidden="true" tabindex="-1"></a> <span class="at">total =</span> <span class="fu">n</span>()</span>
<span id="cb16-6"><a href="#cb16-6" aria-hidden="true" tabindex="-1"></a> ) <span class="sc">|&gt;</span></span>
<span id="cb16-7"><a href="#cb16-7" aria-hidden="true" tabindex="-1"></a> <span class="fu">mutate</span>(</span>
<span id="cb16-8"><a href="#cb16-8" aria-hidden="true" tabindex="-1"></a> <span class="at">prop =</span> total <span class="sc">/</span> <span class="fu">sum</span>(total)</span>
<span id="cb16-9"><a href="#cb16-9" aria-hidden="true" tabindex="-1"></a> )</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 2 × 3
type total prop
&lt;chr&gt; &lt;int&gt; &lt;dbl&gt;
1 fake 1118 0.889
2 real 139 0.111</code></pre>
</div>
</div>
</section>
<section id="binomial-model-and-the-chess-example" class="level2">
<h2 class="anchored" data-anchor-id="binomial-model-and-the-chess-example">Binomial Model and the chess example</h2>
<p>The example used here is the case of a chess match between a human and a computer “Deep Blue”. The set up is such that we know the two faced each other in 1996, in which the human won. There is a rematch scheduled for the next 1997. We would like to model the number of games out of 6 that the human can win.</p>
<p>Let <span class="math inline">\pi</span> be the probability that the human wins any one match against the computer. To simplify things greatly we assume that <span class="math inline">\pi</span> takes on values of .2, .5, .8. We also assume the following prior (we are told in the book that we will learn how to build these later on):</p>
<table class="table">
<thead>
<tr class="header">
<th><span class="math inline">\pi</span></th>
<th>.2</th>
<th>.5</th>
<th>.8</th>
<th>total</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td><span class="math inline">f(\pi)</span></td>
<td>.10</td>
<td>.25</td>
<td>.65</td>
<td>1</td>
</tr>
</tbody>
</table>
<div class="callout-tip callout callout-style-default no-icon callout-captioned">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon no-icon"></i>
</div>
<div class="callout-caption-container flex-fill">
Note
</div>
</div>
<div class="callout-body-container callout-body">
<p>its important to note here that the sum of the values of <span class="math inline">\pi</span> <strong>do not</strong> add up to 1. <span class="math inline">\pi</span> represents the chances of winning any single game, we would expect <span class="math inline">\pi</span> to take on any value in <span class="math inline">\mathbb{R}</span>. On the other hand <span class="math inline">f</span> is a function that maps <span class="math inline">\pi</span> into a space of probabilities, this is next.</p>
</div>
</div>
<div class="callout-note callout callout-style-default no-icon callout-captioned">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon no-icon"></i>
</div>
<div class="callout-caption-container flex-fill">
Discrete Probability Model
</div>
</div>
<div class="callout-body-container callout-body">
<p>Let <span class="math inline">Y</span> be a discrete random variable. The probability model for <span class="math inline">Y</span> is described by a <strong>probability mass function</strong> (pmf) defined as: <span class="math display">f(y) = P(Y = y)</span></p>
<p>and has the following properties</p>
<ol type="1">
<li><span class="math inline">0 \leq f(y) \leq 1\;\; \forall y</span></li>
<li><span class="math inline">\sum_{\forall y}f(y) = 1</span></li>
</ol>
</div>
</div>
<div class="callout-tip callout callout-style-default no-icon callout-captioned">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon no-icon"></i>
</div>
<div class="callout-caption-container flex-fill">
in emanuels words
</div>
</div>
<div class="callout-body-container callout-body">
<p>what does this mean? well its very straightforward a pmf is a function that takes in a some value y and outputs the probability that the random variable <span class="math inline">Y</span> equals <span class="math inline">y</span>.</p>
</div>
</div>
<p>next we would like add a the dependancy of <span class="math inline">Y</span> on <span class="math inline">\pi</span>, we do so by introducing the conditional pmf.</p>
<div class="callout-note callout callout-style-default no-icon callout-captioned">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon no-icon"></i>
</div>
<div class="callout-caption-container flex-fill">
Conditional probability model of data <span class="math inline">Y</span>
</div>
</div>
<div class="callout-body-container callout-body">
<p>Let <span class="math inline">Y</span> be a discrete random variable that depends on some parameter <span class="math inline">\pi</span>. We define the conditional probability model of <span class="math inline">Y</span> as the conditional pmf,</p>
<p><span class="math display">f(y|\pi) = P(Y = y | \pi)</span></p>
<p>and has the following properties,</p>
<ol type="1">
<li><span class="math inline">0 \leq f(y|\pi) \leq 1\;\; \forall y</span></li>
<li><span class="math inline">\sum_{\forall y}f(y|\pi) = 1</span></li>
</ol>
</div>
</div>
<div class="callout-tip callout callout-style-default no-icon callout-captioned">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon no-icon"></i>
</div>
<div class="callout-caption-container flex-fill">
in emanuels words
</div>
</div>
<div class="callout-body-container callout-body">
<p>this is essentially the same probability model had defined above, except now we are condition probabilities by some parameter <span class="math inline">\pi</span></p>
</div>
</div>
<p>in the example of the chess player we must make some assumptions:</p>
<ol type="1">
<li><p>the chances of winning any match in the game stay constant. So if at match number 1 human has a .65% of winning, then that is the same for match 2-6.</p></li>
<li><p>Winning or loosing a game does not affect the chances of winning or loosing the next game, i.e matches are independent of one another.</p></li>
</ol>
<p>These two assumptions lead us to the <strong>Binomial Model</strong>.</p>
<div class="callout-note callout callout-style-default no-icon callout-captioned">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon no-icon"></i>
</div>
<div class="callout-caption-container flex-fill">
The Binomial Model
</div>
</div>
<div class="callout-body-container callout-body">
<p>Let the random variable <span class="math inline">Y</span> represent the number of successes in <span class="math inline">n</span> trials. Assume that each trial is independent, and the probability of sucess in a given trial is <span class="math inline">\pi</span>. Then the conditional dependence of <span class="math inline">Y</span> on <span class="math inline">\pi</span> can be modeled by the <strong>Binomial Model</strong> with parameters <span class="math inline">n</span> and <span class="math inline">\pi</span>. We can write this as,</p>
<p><span class="math display">Y|\pi \sim Bin(n, \pi)</span></p>
<p>the binomial model is specified by the pmf:</p>
<p><span class="math display">f(y|\pi) = {n \choose y} \pi^y(1 - \pi)^{n-y}</span></p>
</div>
</div>
<p>knowing this we can represent <span class="math inline">Y</span> the total number of matches out of 6 that the human can win.</p>
<p><span class="math display">Y|\pi \sim Bin(6, \pi)</span></p>
<p>and conditional pmf:</p>
<p><span class="math display">f(y|\pi) = {6 \choose y}\pi^y(1 - \pi)^{6 - y}\;\; \text{for } y \in \{1, 2, 3, 4, 5, 6\}</span></p>
<p>with the pmf we can now determine the probability of the human winning <span class="math inline">Y</span> matches out of 6 for any given value of <span class="math inline">\pi</span></p>
<div class="cell">
<div class="sourceCode cell-code" id="cb18"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a>chess_pmf <span class="ot">&lt;-</span> <span class="cf">function</span>(y, p, <span class="at">n =</span> <span class="dv">6</span>) {</span>
<span id="cb18-2"><a href="#cb18-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">choose</span>(n, y) <span class="sc">*</span> (p <span class="sc">^</span> y) <span class="sc">*</span> (<span class="dv">1</span> <span class="sc">-</span> p)<span class="sc">^</span>(n <span class="sc">-</span> y)</span>
<span id="cb18-3"><a href="#cb18-3" aria-hidden="true" tabindex="-1"></a>}</span>
<span id="cb18-4"><a href="#cb18-4" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb18-5"><a href="#cb18-5" aria-hidden="true" tabindex="-1"></a><span class="co"># what is probability that human wins 6 games given a pi value of .8 </span></span>
<span id="cb18-6"><a href="#cb18-6" aria-hidden="true" tabindex="-1"></a><span class="fu">chess_pmf</span>(<span class="at">y =</span> <span class="dv">5</span>, <span class="at">p =</span> .<span class="dv">8</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code>[1] 0.393216</code></pre>
</div>
</div>
<div class="callout-tip callout callout-style-default no-icon callout-captioned">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon no-icon"></i>
</div>
<div class="callout-caption-container flex-fill">
</div>
</div>
<div class="callout-body-container callout-body">
<p>the formula for the binomial is actually pretty intuitive, first you have the scalar <span class="math inline">{n \choose y}</span> this will determine the total number of ways the player can win <span class="math inline">y</span> games out of the possible <span class="math inline">n</span>. This is first multiplied by the probablility of success in the <span class="math inline">n</span> trials since <span class="math inline">(p ^ y)</span> can be re-written as <span class="math inline">p\times p\times \cdots \times p</span>, and then multiplied by the probability of <span class="math inline">n-y</span> failures <span class="math inline">(1 - p)^{n - y}</span></p>
</div>
</div>
<div class="cell">
<div class="sourceCode cell-code" id="cb20"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a>pies <span class="ot">&lt;-</span> <span class="fu">seq</span>(<span class="dv">0</span>, <span class="dv">1</span>, <span class="at">by =</span> .<span class="dv">05</span>)</span>
<span id="cb20-2"><a href="#cb20-2" aria-hidden="true" tabindex="-1"></a>py <span class="ot">&lt;-</span> <span class="fu">chess_pmf</span>(<span class="at">y =</span> <span class="dv">4</span>, <span class="at">p =</span> pies)</span>
<span id="cb20-3"><a href="#cb20-3" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb20-4"><a href="#cb20-4" aria-hidden="true" tabindex="-1"></a>d <span class="ot">&lt;-</span> <span class="fu">data.frame</span>(<span class="at">pies =</span> pies, <span class="at">py =</span> py)</span>
<span id="cb20-5"><a href="#cb20-5" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb20-6"><a href="#cb20-6" aria-hidden="true" tabindex="-1"></a>d <span class="sc">|&gt;</span></span>
<span id="cb20-7"><a href="#cb20-7" aria-hidden="true" tabindex="-1"></a> <span class="fu">ggplot</span>(<span class="fu">aes</span>(pies, py)) <span class="sc">+</span> <span class="fu">geom_col</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="ch2_files/figure-html/unnamed-chunk-14-1.png" class="img-fluid" width="672"></p>
</div>
</div>
<div class="cell">
<div class="sourceCode cell-code" id="cb21"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a>pies <span class="ot">&lt;-</span> <span class="fu">c</span>(.<span class="dv">2</span>, .<span class="dv">5</span>, .<span class="dv">8</span>)</span>
<span id="cb21-2"><a href="#cb21-2" aria-hidden="true" tabindex="-1"></a>ys <span class="ot">&lt;-</span> <span class="dv">0</span><span class="sc">:</span><span class="dv">6</span></span>
<span id="cb21-3"><a href="#cb21-3" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb21-4"><a href="#cb21-4" aria-hidden="true" tabindex="-1"></a>d <span class="ot">&lt;-</span> tidyr<span class="sc">::</span><span class="fu">expand_grid</span>(pies, ys)</span>
<span id="cb21-5"><a href="#cb21-5" aria-hidden="true" tabindex="-1"></a>fys <span class="ot">&lt;-</span> purrr<span class="sc">::</span><span class="fu">map2_dbl</span>(d<span class="sc">$</span>ys, d<span class="sc">$</span>pies, <span class="sc">~</span><span class="fu">chess_pmf</span>(.x, .y), <span class="at">n=</span><span class="dv">6</span>)</span>
<span id="cb21-6"><a href="#cb21-6" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb21-7"><a href="#cb21-7" aria-hidden="true" tabindex="-1"></a>d<span class="sc">$</span>fys <span class="ot">&lt;-</span> fys</span>
<span id="cb21-8"><a href="#cb21-8" aria-hidden="true" tabindex="-1"></a>d<span class="sc">$</span>display_pi <span class="ot">&lt;-</span> <span class="fu">as.factor</span>(<span class="fu">paste</span>(<span class="st">"pi ="</span>, d<span class="sc">$</span>pies))</span>
<span id="cb21-9"><a href="#cb21-9" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb21-10"><a href="#cb21-10" aria-hidden="true" tabindex="-1"></a>d <span class="sc">|&gt;</span></span>
<span id="cb21-11"><a href="#cb21-11" aria-hidden="true" tabindex="-1"></a> <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x =</span> ys, <span class="at">y =</span> fys)) <span class="sc">+</span> </span>
<span id="cb21-12"><a href="#cb21-12" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_col</span>() <span class="sc">+</span> </span>
<span id="cb21-13"><a href="#cb21-13" aria-hidden="true" tabindex="-1"></a> <span class="fu">scale_x_continuous</span>(<span class="at">breaks =</span> <span class="dv">0</span><span class="sc">:</span><span class="dv">6</span>) <span class="sc">+</span> </span>
<span id="cb21-14"><a href="#cb21-14" aria-hidden="true" tabindex="-1"></a> <span class="fu">facet_wrap</span>(<span class="fu">vars</span>(display_pi))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="ch2_files/figure-html/unnamed-chunk-15-1.png" class="img-fluid" width="672"></p>
</div>
</div>
<p>The plot shows the three possible values for <span class="math inline">\pi</span> along with the value of the pmf for each of the possible matches the human can win in a game. The values of <span class="math inline">f(y|\pi)</span> are pretty intuitive, we would expect the random variable <span class="math inline">Y</span> to be lower when the value of <span class="math inline">\pi</span> is lower and higher when the value of <span class="math inline">\pi</span> is higher.</p>
<p>For the sake of the excercise lets add more values of <span class="math inline">\pi</span> so that we can see this shift happen in more detail.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb22"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a>pies <span class="ot">&lt;-</span> <span class="fu">seq</span>(.<span class="dv">1</span>, .<span class="dv">9</span>, <span class="at">by =</span> .<span class="dv">1</span>)</span>
<span id="cb22-2"><a href="#cb22-2" aria-hidden="true" tabindex="-1"></a>ys <span class="ot">&lt;-</span> <span class="dv">0</span><span class="sc">:</span><span class="dv">6</span></span>
<span id="cb22-3"><a href="#cb22-3" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb22-4"><a href="#cb22-4" aria-hidden="true" tabindex="-1"></a>d <span class="ot">&lt;-</span> tidyr<span class="sc">::</span><span class="fu">expand_grid</span>(pies, ys)</span>
<span id="cb22-5"><a href="#cb22-5" aria-hidden="true" tabindex="-1"></a>fys <span class="ot">&lt;-</span> purrr<span class="sc">::</span><span class="fu">map2_dbl</span>(d<span class="sc">$</span>ys, d<span class="sc">$</span>pies, <span class="sc">~</span><span class="fu">chess_pmf</span>(.x, .y), <span class="at">n=</span><span class="dv">6</span>)</span>
<span id="cb22-6"><a href="#cb22-6" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb22-7"><a href="#cb22-7" aria-hidden="true" tabindex="-1"></a>d<span class="sc">$</span>fys <span class="ot">&lt;-</span> fys</span>
<span id="cb22-8"><a href="#cb22-8" aria-hidden="true" tabindex="-1"></a>d<span class="sc">$</span>display_pi <span class="ot">&lt;-</span> <span class="fu">as.factor</span>(<span class="fu">paste</span>(<span class="st">"pi ="</span>, d<span class="sc">$</span>pies))</span>
<span id="cb22-9"><a href="#cb22-9" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb22-10"><a href="#cb22-10" aria-hidden="true" tabindex="-1"></a>d <span class="sc">|&gt;</span></span>
<span id="cb22-11"><a href="#cb22-11" aria-hidden="true" tabindex="-1"></a> <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x =</span> ys, <span class="at">y =</span> fys)) <span class="sc">+</span> </span>
<span id="cb22-12"><a href="#cb22-12" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_col</span>() <span class="sc">+</span> </span>
<span id="cb22-13"><a href="#cb22-13" aria-hidden="true" tabindex="-1"></a> <span class="fu">scale_x_continuous</span>(<span class="at">breaks =</span> <span class="dv">0</span><span class="sc">:</span><span class="dv">6</span>) <span class="sc">+</span> </span>
<span id="cb22-14"><a href="#cb22-14" aria-hidden="true" tabindex="-1"></a> <span class="fu">facet_wrap</span>(<span class="fu">vars</span>(display_pi), <span class="at">nrow =</span> <span class="dv">3</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="ch2_files/figure-html/unnamed-chunk-16-1.png" class="img-fluid" width="672"></p>
</div>
</div>
<p>as it turns out we learn that the human ended up winning just one game in the 1997 rematch, <span class="math inline">Y = 1</span>. The next step in our analysis is to determine how compatible this new data is with each value of <span class="math inline">\pi</span>, the likelihood that is.</p>
<p>This is very easy to do with all the work we have done so far:</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb23"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a>d <span class="sc">|&gt;</span></span>
<span id="cb23-2"><a href="#cb23-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">filter</span>(ys <span class="sc">==</span> <span class="dv">1</span>) <span class="sc">|&gt;</span></span>
<span id="cb23-3"><a href="#cb23-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">ggplot</span>(<span class="fu">aes</span>(pies, fys)) <span class="sc">+</span> </span>
<span id="cb23-4"><a href="#cb23-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_col</span>() <span class="sc">+</span> </span>
<span id="cb23-5"><a href="#cb23-5" aria-hidden="true" tabindex="-1"></a> <span class="fu">scale_x_continuous</span>(<span class="at">breaks =</span> <span class="fu">seq</span>(.<span class="dv">1</span>, .<span class="dv">9</span>, <span class="at">by =</span> .<span class="dv">1</span>))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="ch2_files/figure-html/unnamed-chunk-17-1.png" class="img-fluid" width="672"></p>
</div>
</div>
<p>Its very important to note the following</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb24"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a><span class="co"># this will sum to a value greater than 1!!</span></span>
<span id="cb24-2"><a href="#cb24-2" aria-hidden="true" tabindex="-1"></a>d <span class="sc">|&gt;</span></span>
<span id="cb24-3"><a href="#cb24-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">filter</span>(ys <span class="sc">==</span> <span class="dv">1</span>) <span class="sc">|&gt;</span></span>
<span id="cb24-4"><a href="#cb24-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">pull</span>(fys) <span class="sc">|&gt;</span></span>
<span id="cb24-5"><a href="#cb24-5" aria-hidden="true" tabindex="-1"></a> <span class="fu">sum</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code>[1] 1.37907</code></pre>
</div>
</div>
<div class="callout-important callout callout-style-default callout-captioned">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon"></i>
</div>
<div class="callout-caption-container flex-fill">
Important
</div>
</div>
<div class="callout-body-container callout-body">
<p>this has been mentioned before but its an important message to drive home. Note that the reason why the values sum to a value greater than 1 is that they are <strong>not</strong> probabilities, they are likelihoods. We are determining how likely each value of <span class="math inline">\pi</span> is given that we have observed <span class="math inline">Y = 1</span>.</p>
</div>
</div>
<p>We can formalize the likelihood function <span class="math inline">L</span> in our example as follows:</p>
<p><span class="math display">L(\pi|y=1) = f(y=1|\pi) = {6 \choose 1}\pi^1(1-\pi)^{6-1}</span> <span class="math display"> = 6\pi(1 - \pi)^5</span></p>
<p>We can test this out</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb26"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb26-1"><a href="#cb26-1" aria-hidden="true" tabindex="-1"></a><span class="dv">6</span> <span class="sc">*</span> .<span class="dv">2</span> <span class="sc">*</span> (.<span class="dv">8</span> <span class="sc">^</span> <span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code>[1] 0.393216</code></pre>
</div>
</div>
<p>which is the value we get as .2 in the bar plot.</p>
<div class="cell">
<div class="cell-output-display">
<p><img src="ch2_files/figure-html/unnamed-chunk-20-1.png" class="img-fluid" width="672"></p>
</div>
</div>
<p>the likelihood values for <span class="math inline">Y = 1</span> are here:</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb28"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb28-1"><a href="#cb28-1" aria-hidden="true" tabindex="-1"></a>d <span class="sc">|&gt;</span></span>
<span id="cb28-2"><a href="#cb28-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">filter</span>(ys <span class="sc">==</span> <span class="dv">1</span>)<span class="sc">|&gt;</span></span>
<span id="cb28-3"><a href="#cb28-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">select</span>(<span class="sc">-</span>display_pi) <span class="sc">|&gt;</span></span>
<span id="cb28-4"><a href="#cb28-4" aria-hidden="true" tabindex="-1"></a> knitr<span class="sc">::</span><span class="fu">kable</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<table class="table table-sm table-striped">
<thead>
<tr class="header">
<th style="text-align: right;">pies</th>
<th style="text-align: right;">ys</th>
<th style="text-align: right;">fys</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td style="text-align: right;">0.1</td>
<td style="text-align: right;">1</td>
<td style="text-align: right;">0.354294</td>
</tr>
<tr class="even">
<td style="text-align: right;">0.2</td>
<td style="text-align: right;">1</td>
<td style="text-align: right;">0.393216</td>
</tr>
<tr class="odd">
<td style="text-align: right;">0.3</td>
<td style="text-align: right;">1</td>
<td style="text-align: right;">0.302526</td>
</tr>
<tr class="even">
<td style="text-align: right;">0.4</td>
<td style="text-align: right;">1</td>
<td style="text-align: right;">0.186624</td>
</tr>
<tr class="odd">
<td style="text-align: right;">0.5</td>
<td style="text-align: right;">1</td>
<td style="text-align: right;">0.093750</td>
</tr>
<tr class="even">
<td style="text-align: right;">0.6</td>
<td style="text-align: right;">1</td>
<td style="text-align: right;">0.036864</td>
</tr>
<tr class="odd">
<td style="text-align: right;">0.7</td>
<td style="text-align: right;">1</td>
<td style="text-align: right;">0.010206</td>
</tr>
<tr class="even">
<td style="text-align: right;">0.8</td>
<td style="text-align: right;">1</td>
<td style="text-align: right;">0.001536</td>
</tr>
<tr class="odd">
<td style="text-align: right;">0.9</td>
<td style="text-align: right;">1</td>
<td style="text-align: right;">0.000054</td>
</tr>
</tbody>
</table>
</div>
</div>
<p>The overall take-away from having observed the new data <span class="math inline">Y=1</span> is that it is most compatible with a the <span class="math inline">\pi</span> value of .2, this means that its safe to assume that the human is a weaker player since we can think of the value <span class="math inline">\pi</span> as a measure of relative weakness/superior of the human compared to the computer with 0 being the weakest and 1 being the strongest.</p>
<div class="callout-note callout callout-style-default no-icon callout-captioned">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon no-icon"></i>
</div>
<div class="callout-caption-container flex-fill">
Probability mass functions vs likelihood functions
</div>
</div>
<div class="callout-body-container callout-body">
<p>When <span class="math inline">\pi</span> is known the conditional pmf <span class="math inline">f(\cdot | \pi)</span> allows us to compare the probabilities of the different values of <span class="math inline">Y</span> occuring with <span class="math inline">\pi</span></p>
<p>On the other hand when <span class="math inline">Y = y</span> is known the likelihood function <span class="math inline">L(\cdot|Y=y) = f(Y=y|\cdot)</span> allows us to compare relative likelihoods of observing data <span class="math inline">y</span> under different values of <span class="math inline">\pi</span></p>
</div>
</div>
<p>Now that we have the priors for <span class="math inline">\pi</span> and the likelihoods for <span class="math inline">Y=1</span> all we need is the <strong>normalizing constant</strong> to make use of Beyes Rule in order to update our priors with this new information and develop our posterior. Recall that the normalizing constant is just the total probability of observing <span class="math inline">Y = 1</span>. To get this we simply calculate the probability of observing <span class="math inline">Y = 1</span> for all values of <span class="math inline">\pi</span> and weight each one of these by the corresponding prior of each value <span class="math inline">\pi</span>.</p>
<p><span class="math display">f(y = 1) = f(Y=1|\pi = .2)f(\pi = .2)</span> <span class="math display">+ f(Y=1|\pi = .5)f(\pi = .5) + f(Y = 1|\pi = .8)f(\pi=.8)</span> <span class="math display">\approx .637</span></p>
<section id="posterior" class="level3">
<h3 class="anchored" data-anchor-id="posterior">Posterior</h3>
<p>Our posterior distribution has pmf:</p>
<p><span class="math display">f(\pi|y = 1)</span></p>
<p>we can write this out as:</p>
<p><span class="math display">f(\pi | y = 1) = \frac{f(\pi)\times L(\pi| y = 1)}{f(y = 1)}</span></p>
<p>for <span class="math inline">\pi \in \{0.2, 0.5, 0.8\}</span></p>
<p>using just our simplified set of <span class="math inline">\pi</span> values we have the following posterior:</p>
<table class="table">
<thead>
<tr class="header">
<th><span class="math inline">\pi</span></th>
<th>0.2</th>
<th>0.5</th>
<th>0.8</th>
<th>Total</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td><span class="math inline">f(\pi)</span></td>
<td>0.10</td>
<td>0.25</td>
<td>0.65</td>
<td>1</td>
</tr>
</tbody>
</table>
</section>
<section id="chess-posterior-simulation" class="level3">
<h3 class="anchored" data-anchor-id="chess-posterior-simulation">Chess Posterior Simulation</h3>
<p>Set up the scenario, we have possible values of <span class="math inline">\pi</span> and the corresponding prior probability for each one</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb29"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb29-1"><a href="#cb29-1" aria-hidden="true" tabindex="-1"></a>chess <span class="ot">&lt;-</span> tibble<span class="sc">::</span><span class="fu">tibble</span>(<span class="at">pi =</span> <span class="fu">c</span>(.<span class="dv">2</span>, .<span class="dv">5</span>, .<span class="dv">8</span>))</span>
<span id="cb29-2"><a href="#cb29-2" aria-hidden="true" tabindex="-1"></a>prior <span class="ot">&lt;-</span> <span class="fu">c</span>(.<span class="dv">1</span>, .<span class="dv">25</span>, .<span class="dv">65</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>next we sample use a sample function to generate 10,000 different values of <span class="math inline">\pi</span> from our dataframe, we will use each of these to simuate a 6 match game.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb30"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb30-1"><a href="#cb30-1" aria-hidden="true" tabindex="-1"></a>chess_sim <span class="ot">&lt;-</span> <span class="fu">sample_n</span>(chess, <span class="at">size =</span> <span class="dv">10000</span>, <span class="at">weight =</span> prior, </span>
<span id="cb30-2"><a href="#cb30-2" aria-hidden="true" tabindex="-1"></a><span class="at">replace =</span> <span class="cn">TRUE</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>Simulate 10,000 games</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb31"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb31-1"><a href="#cb31-1" aria-hidden="true" tabindex="-1"></a>chess_sim <span class="ot">&lt;-</span> chess_sim <span class="sc">|&gt;</span></span>
<span id="cb31-2"><a href="#cb31-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">mutate</span>(<span class="at">y =</span> <span class="fu">rbinom</span>(<span class="dv">10000</span>, <span class="at">size =</span> <span class="dv">6</span>, <span class="at">prob =</span> pi))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>Lets check how close this simulation is to our known confitional pmfs</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb32"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb32-1"><a href="#cb32-1" aria-hidden="true" tabindex="-1"></a>chess_sim <span class="sc">|&gt;</span></span>
<span id="cb32-2"><a href="#cb32-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x =</span> y)) <span class="sc">+</span> </span>
<span id="cb32-3"><a href="#cb32-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">stat_count</span>(<span class="fu">aes</span>(<span class="at">y =</span> ..prop..)) <span class="sc">+</span> </span>
<span id="cb32-4"><a href="#cb32-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">facet_wrap</span>(<span class="sc">~</span>pi)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="ch2_files/figure-html/unnamed-chunk-25-1.png" class="img-fluid" width="672"></p>
</div>
</div>
<p>Lets now focus on the events where <span class="math inline">Y = 1</span> and tally up results to see how well these approximated the values we formally computed as our posterior.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb33"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb33-1"><a href="#cb33-1" aria-hidden="true" tabindex="-1"></a>chess_sim <span class="sc">|&gt;</span></span>
<span id="cb33-2"><a href="#cb33-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">filter</span>(y <span class="sc">==</span> <span class="dv">1</span>) <span class="sc">|&gt;</span></span>
<span id="cb33-3"><a href="#cb33-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(pi) <span class="sc">|&gt;</span></span>
<span id="cb33-4"><a href="#cb33-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">tally</span>() <span class="sc">|&gt;</span></span>
<span id="cb33-5"><a href="#cb33-5" aria-hidden="true" tabindex="-1"></a> <span class="fu">mutate</span>(</span>
<span id="cb33-6"><a href="#cb33-6" aria-hidden="true" tabindex="-1"></a> <span class="at">prop =</span> n <span class="sc">/</span> <span class="fu">sum</span>(n)</span>
<span id="cb33-7"><a href="#cb33-7" aria-hidden="true" tabindex="-1"></a> )</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 3 × 3
pi n prop
&lt;dbl&gt; &lt;int&gt; &lt;dbl&gt;
1 0.2 344 0.604
2 0.5 218 0.382
3 0.8 8 0.0140</code></pre>
</div>
</div>
</section>
</section>
</main>
<!-- /main column -->
<script id="quarto-html-after-body" type="application/javascript">
window.document.addEventListener("DOMContentLoaded", function (event) {
const toggleBodyColorMode = (bsSheetEl) => {
const mode = bsSheetEl.getAttribute("data-mode");
const bodyEl = window.document.querySelector("body");
if (mode === "dark") {
bodyEl.classList.add("quarto-dark");
bodyEl.classList.remove("quarto-light");
} else {
bodyEl.classList.add("quarto-light");
bodyEl.classList.remove("quarto-dark");
}
}
const toggleBodyColorPrimary = () => {
const bsSheetEl = window.document.querySelector("link#quarto-bootstrap");
if (bsSheetEl) {
toggleBodyColorMode(bsSheetEl);
}
}
toggleBodyColorPrimary();
const icon = "";
const anchorJS = new window.AnchorJS();
anchorJS.options = {
placement: 'right',
icon: icon
};
anchorJS.add('.anchored');
const clipboard = new window.ClipboardJS('.code-copy-button', {
target: function(trigger) {
return trigger.previousElementSibling;
}
});
clipboard.on('success', function(e) {
// button target
const button = e.trigger;
// don't keep focus
button.blur();
// flash "checked"
button.classList.add('code-copy-button-checked');
var currentTitle = button.getAttribute("title");
button.setAttribute("title", "Copied!");
setTimeout(function() {
button.setAttribute("title", currentTitle);
button.classList.remove('code-copy-button-checked');
}, 1000);
// clear code selection
e.clearSelection();
});
function tippyHover(el, contentFn) {
const config = {
allowHTML: true,
content: contentFn,
maxWidth: 500,
delay: 100,
arrow: false,
appendTo: function(el) {
return el.parentElement;
},
interactive: true,
interactiveBorder: 10,
theme: 'quarto',
placement: 'bottom-start'
};
window.tippy(el, config);
}
const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
for (var i=0; i<noterefs.length; i++) {
const ref = noterefs[i];
tippyHover(ref, function() {
// use id or data attribute instead here
let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
try { href = new URL(href).hash; } catch {}
const id = href.replace(/^#\/?/, "");
const note = window.document.getElementById(id);
return note.innerHTML;
});
}
var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
for (var i=0; i<bibliorefs.length; i++) {
const ref = bibliorefs[i];
const cites = ref.parentNode.getAttribute('data-cites').split(' ');
tippyHover(ref, function() {
var popup = window.document.createElement('div');
cites.forEach(function(cite) {
var citeDiv = window.document.createElement('div');
citeDiv.classList.add('hanging-indent');
citeDiv.classList.add('csl-entry');
var biblioDiv = window.document.getElementById('ref-' + cite);
if (biblioDiv) {
citeDiv.innerHTML = biblioDiv.innerHTML;
}
popup.appendChild(citeDiv);
});
return popup.innerHTML;
});
}
});
</script>
</div> <!-- /content -->
</body></html>