bayes-rules-notes/R/ch2.html

1561 lines
62 KiB
HTML
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
<meta charset="utf-8">
<meta name="generator" content="quarto-1.1.189">
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
<meta name="author" content="Emanuel Rodriguez">
<title>Chapter 2 Notes</title>
<style>
code{white-space: pre-wrap;}
span.smallcaps{font-variant: small-caps;}
div.columns{display: flex; gap: min(4vw, 1.5em);}
div.column{flex: auto; overflow-x: auto;}
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
ul.task-list{list-style: none;}
ul.task-list li input[type="checkbox"] {
width: 0.8em;
margin: 0 0.8em 0.2em -1.6em;
vertical-align: middle;
}
pre > code.sourceCode { white-space: pre; position: relative; }
pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
pre > code.sourceCode > span:empty { height: 1.2em; }
.sourceCode { overflow: visible; }
code.sourceCode > span { color: inherit; text-decoration: inherit; }
div.sourceCode { margin: 1em 0; }
pre.sourceCode { margin: 0; }
@media screen {
div.sourceCode { overflow: auto; }
}
@media print {
pre > code.sourceCode { white-space: pre-wrap; }
pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
}
pre.numberSource code
{ counter-reset: source-line 0; }
pre.numberSource code > span
{ position: relative; left: -4em; counter-increment: source-line; }
pre.numberSource code > span > a:first-child::before
{ content: counter(source-line);
position: relative; left: -1em; text-align: right; vertical-align: baseline;
border: none; display: inline-block;
-webkit-touch-callout: none; -webkit-user-select: none;
-khtml-user-select: none; -moz-user-select: none;
-ms-user-select: none; user-select: none;
padding: 0 4px; width: 4em;
color: #aaaaaa;
}
pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa; padding-left: 4px; }
div.sourceCode
{ }
@media screen {
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
}
code span.al { color: #ff0000; font-weight: bold; } /* Alert */
code span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
code span.at { color: #7d9029; } /* Attribute */
code span.bn { color: #40a070; } /* BaseN */
code span.bu { color: #008000; } /* BuiltIn */
code span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
code span.ch { color: #4070a0; } /* Char */
code span.cn { color: #880000; } /* Constant */
code span.co { color: #60a0b0; font-style: italic; } /* Comment */
code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
code span.do { color: #ba2121; font-style: italic; } /* Documentation */
code span.dt { color: #902000; } /* DataType */
code span.dv { color: #40a070; } /* DecVal */
code span.er { color: #ff0000; font-weight: bold; } /* Error */
code span.ex { } /* Extension */
code span.fl { color: #40a070; } /* Float */
code span.fu { color: #06287e; } /* Function */
code span.im { color: #008000; font-weight: bold; } /* Import */
code span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
code span.kw { color: #007020; font-weight: bold; } /* Keyword */
code span.op { color: #666666; } /* Operator */
code span.ot { color: #007020; } /* Other */
code span.pp { color: #bc7a00; } /* Preprocessor */
code span.sc { color: #4070a0; } /* SpecialChar */
code span.ss { color: #bb6688; } /* SpecialString */
code span.st { color: #4070a0; } /* String */
code span.va { color: #19177c; } /* Variable */
code span.vs { color: #4070a0; } /* VerbatimString */
code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
</style>
<script src="ch2_files/libs/clipboard/clipboard.min.js"></script>
<script src="ch2_files/libs/quarto-html/quarto.js"></script>
<script src="ch2_files/libs/quarto-html/popper.min.js"></script>
<script src="ch2_files/libs/quarto-html/tippy.umd.min.js"></script>
<script src="ch2_files/libs/quarto-html/anchor.min.js"></script>
<link href="ch2_files/libs/quarto-html/tippy.css" rel="stylesheet">
<link href="ch2_files/libs/quarto-html/quarto-syntax-highlighting.css" rel="stylesheet" id="quarto-text-highlighting-styles">
<script src="ch2_files/libs/bootstrap/bootstrap.min.js"></script>
<link href="ch2_files/libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
<link href="ch2_files/libs/bootstrap/bootstrap.min.css" rel="stylesheet" id="quarto-bootstrap" data-mode="light">
<script src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-chtml-full.js" type="text/javascript"></script>
<link rel="stylesheet" href="styles.css">
</head>
<body>
<div id="quarto-content" class="page-columns page-rows-contents page-layout-article">
<div id="quarto-margin-sidebar" class="sidebar margin-sidebar">
<nav id="TOC" role="doc-toc" class="toc-active">
<h2 id="toc-title">Table of contents</h2>
<ul>
<li><a href="#likelihood" id="toc-likelihood" class="nav-link active" data-scroll-target="#likelihood">Likelihood</a></li>
<li><a href="#simualation" id="toc-simualation" class="nav-link" data-scroll-target="#simualation">Simualation</a></li>
<li><a href="#binomial-model-and-the-chess-example" id="toc-binomial-model-and-the-chess-example" class="nav-link" data-scroll-target="#binomial-model-and-the-chess-example">Binomial Model and the chess example</a>
<ul class="collapse">
<li><a href="#the-binomial-model" id="toc-the-binomial-model" class="nav-link" data-scroll-target="#the-binomial-model">The Binomial Model</a></li>
</ul></li>
</ul>
</nav>
</div>
<main class="content" id="quarto-document-content">
<header id="title-block-header" class="quarto-title-block default">
<div class="quarto-title">
<h1 class="title">Chapter 2 Notes</h1>
</div>
<div class="quarto-title-meta">
<div>
<div class="quarto-title-meta-heading">Author</div>
<div class="quarto-title-meta-contents">
<p>Emanuel Rodriguez </p>
</div>
</div>
</div>
</header>
<p><em>Note: these notes are a work in progress</em></p>
<p>In this chapter we step through an example of “fake” vs “real” news to build a framework to determine the probability of real vs fake of a new news article titled “The President has a secret!”</p>
<p>We then go on to build a probability known as the Binomial model using the Bayesian framework</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb1"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="co"># libraries</span></span>
<span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(bayesrules)</span>
<span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(dplyr)</span>
<span id="cb1-4"><a href="#cb1-4" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(tidyr)</span>
<span id="cb1-5"><a href="#cb1-5" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(gt)</span>
<span id="cb1-6"><a href="#cb1-6" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(tibble)</span>
<span id="cb1-7"><a href="#cb1-7" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(ggplot2)</span>
<span id="cb1-8"><a href="#cb1-8" aria-hidden="true" tabindex="-1"></a><span class="fu">data</span>(fake_news)</span>
<span id="cb1-9"><a href="#cb1-9" aria-hidden="true" tabindex="-1"></a>fake_news <span class="ot">&lt;-</span> tibble<span class="sc">::</span><span class="fu">as_tibble</span>(fake_news)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>What is the proportion of news articles that were labeled fake vs real.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb2"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a>fake_news <span class="sc">|&gt;</span> <span class="fu">head</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 6 × 30
title text url authors type title…¹ text_…² title…³ text_…⁴ title…⁵
&lt;chr&gt; &lt;chr&gt; &lt;chr&gt; &lt;chr&gt; &lt;fct&gt; &lt;int&gt; &lt;int&gt; &lt;int&gt; &lt;int&gt; &lt;int&gt;
1 Clinton's E… "0 S… http… &lt;NA&gt; fake 17 219 110 1444 0
2 Donald Trum… "\n\… http… &lt;NA&gt; real 18 509 95 3016 0
3 Michelle Ob… "Mic… http… Sierra… fake 16 494 96 2881 1
4 Trump hits … "“Cr… http… Jack S… real 11 268 60 1674 0
5 Australia V… "Whe… http… Blair … fake 9 479 54 2813 0
6 Its “Trump… "Lik… http… View A… real 12 220 66 1351 1
# … with 20 more variables: text_caps &lt;int&gt;, title_caps_percent &lt;dbl&gt;,
# text_caps_percent &lt;dbl&gt;, title_excl &lt;int&gt;, text_excl &lt;int&gt;,
# title_excl_percent &lt;dbl&gt;, text_excl_percent &lt;dbl&gt;, title_has_excl &lt;lgl&gt;,
# anger &lt;dbl&gt;, anticipation &lt;dbl&gt;, disgust &lt;dbl&gt;, fear &lt;dbl&gt;, joy &lt;dbl&gt;,
# sadness &lt;dbl&gt;, surprise &lt;dbl&gt;, trust &lt;dbl&gt;, negative &lt;dbl&gt;, positive &lt;dbl&gt;,
# text_syllables &lt;int&gt;, text_syllables_per_word &lt;dbl&gt;, and abbreviated
# variable names ¹title_words, ²text_words, ³title_char, ⁴text_char, …</code></pre>
</div>
<div class="sourceCode cell-code" id="cb4"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a>fake_news <span class="sc">|&gt;</span></span>
<span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(type) <span class="sc">|&gt;</span> </span>
<span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">summarise</span>(</span>
<span id="cb4-4"><a href="#cb4-4" aria-hidden="true" tabindex="-1"></a> <span class="at">total =</span> <span class="fu">n</span>(),</span>
<span id="cb4-5"><a href="#cb4-5" aria-hidden="true" tabindex="-1"></a> <span class="at">prop =</span> total <span class="sc">/</span> <span class="fu">nrow</span>(fake_news)</span>
<span id="cb4-6"><a href="#cb4-6" aria-hidden="true" tabindex="-1"></a> ) </span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 2 × 3
type total prop
&lt;fct&gt; &lt;int&gt; &lt;dbl&gt;
1 fake 60 0.4
2 real 90 0.6</code></pre>
</div>
</div>
<p>If we let <span class="math inline">\(B\)</span> be the event that a news article is “fake” news, and <span class="math inline">\(B^c\)</span> be the event that a news article is “real”, we can write the following:</p>
<p><span class="math display">\[P(B) = .4\]</span> <span class="math display">\[P(B^c) = .6\]</span></p>
<p>This is the first “clue” or set of data that we have to build into our framework. Namely, majority of articles are “real”, therefore we could simply predict that the new article is “real”. This updated sense or reality now becomes our priors.</p>
<p>Getting additional data, and updating our priors, based on additional data. The new observation we make is the use of exclamation marks “!”. We note that the use of “!” is more frequent in news articles labeled as “fake”. We will want to incorporate this into our framework to decide whether the new incoming should be labelled as real or fake.</p>
<section id="likelihood" class="level3">
<h3 class="anchored" data-anchor-id="likelihood">Likelihood</h3>
<div class="callout-note callout callout-style-default no-icon callout-captioned">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon no-icon"></i>
</div>
<div class="callout-caption-container flex-fill">
Probability and Likelihood
</div>
</div>
<div class="callout-body-container callout-body">
<p>When the event <span class="math inline">\(B\)</span> is known, then we can evaluate the uncertainy of events <span class="math inline">\(A\)</span> and <span class="math inline">\(A^c\)</span> given <span class="math inline">\(B\)</span></p>
<p><span class="math display">\[P(A|B) \text{ vs } P(A^c|B)\]</span></p>
<p>If on the other hand, we know event <span class="math inline">\(A\)</span> then we can evaluate the relative compatability of data <span class="math inline">\(A\)</span> with <span class="math inline">\(B\)</span> and <span class="math inline">\(B^c\)</span> using likelihood functions</p>
<p><span class="math display">\[L(B|A) \text{ vs } L(B^c|A)\]</span> <span class="math display">\[=P(A|B) \text{ vs } P(A|B^c)\]</span></p>
</div>
</div>
<p>So in our case, we dont know whether this new incoming article is real or not, but we do know that the title has an exclamation mark. This means we can evaluate how likely this article is real or not given that it contains an “!” in the title using likelihood functions. We can formualte this as:</p>
<p><span class="math display">\[L(B|A) \text{ vs } L(B^c|A)\]</span></p>
<p>And perform the computation in R as follows:</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb6"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="co"># if fake, what are the proprotions of ! vs no-!</span></span>
<span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a>prop_of_excl_within_type <span class="ot">&lt;-</span> fake_news <span class="sc">|&gt;</span></span>
<span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(type, title_has_excl) <span class="sc">|&gt;</span></span>
<span id="cb6-4"><a href="#cb6-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">summarise</span>(</span>
<span id="cb6-5"><a href="#cb6-5" aria-hidden="true" tabindex="-1"></a> <span class="at">total =</span> <span class="fu">n</span>()</span>
<span id="cb6-6"><a href="#cb6-6" aria-hidden="true" tabindex="-1"></a> ) <span class="sc">|&gt;</span></span>
<span id="cb6-7"><a href="#cb6-7" aria-hidden="true" tabindex="-1"></a> <span class="fu">ungroup</span>() <span class="sc">|&gt;</span></span>
<span id="cb6-8"><a href="#cb6-8" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(type) <span class="sc">|&gt;</span></span>
<span id="cb6-9"><a href="#cb6-9" aria-hidden="true" tabindex="-1"></a> <span class="fu">summarise</span>(</span>
<span id="cb6-10"><a href="#cb6-10" aria-hidden="true" tabindex="-1"></a> <span class="at">has_excl =</span> title_has_excl,</span>
<span id="cb6-11"><a href="#cb6-11" aria-hidden="true" tabindex="-1"></a> <span class="at">prop_within_type =</span> total <span class="sc">/</span> <span class="fu">sum</span>(total)</span>
<span id="cb6-12"><a href="#cb6-12" aria-hidden="true" tabindex="-1"></a> ) </span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<div class="cell">
<div class="sourceCode cell-code" id="cb7"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a>prop_of_excl_within_type <span class="sc">|&gt;</span></span>
<span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">pivot_wider</span>(<span class="at">names_from =</span> <span class="st">"type"</span>, <span class="at">values_from =</span> prop_within_type) <span class="sc">|&gt;</span></span>
<span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">gt</span>() <span class="sc">|&gt;</span></span>
<span id="cb7-4"><a href="#cb7-4" aria-hidden="true" tabindex="-1"></a> gt<span class="sc">::</span><span class="fu">cols_label</span>(</span>
<span id="cb7-5"><a href="#cb7-5" aria-hidden="true" tabindex="-1"></a> <span class="at">has_excl =</span> <span class="st">"Contains Exclamtion"</span>,</span>
<span id="cb7-6"><a href="#cb7-6" aria-hidden="true" tabindex="-1"></a> <span class="at">fake =</span> <span class="st">"Fake"</span>, </span>
<span id="cb7-7"><a href="#cb7-7" aria-hidden="true" tabindex="-1"></a> <span class="at">real =</span> <span class="st">"Real"</span>) <span class="sc">|&gt;</span></span>
<span id="cb7-8"><a href="#cb7-8" aria-hidden="true" tabindex="-1"></a> gt<span class="sc">::</span><span class="fu">fmt_number</span>(<span class="at">columns=</span><span class="fu">c</span>(<span class="st">"fake"</span>, <span class="st">"real"</span>), <span class="at">decimals =</span> <span class="dv">3</span>) <span class="sc">|&gt;</span></span>
<span id="cb7-9"><a href="#cb7-9" aria-hidden="true" tabindex="-1"></a> gt<span class="sc">::</span><span class="fu">cols_width</span>(<span class="fu">everything</span>() <span class="sc">~</span> <span class="fu">px</span>(<span class="dv">100</span>))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<div id="ibvcfeegcr" style="overflow-x:auto;overflow-y:auto;width:auto;height:auto;">
<style>html {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Helvetica Neue', 'Fira Sans', 'Droid Sans', Arial, sans-serif;
}
#ibvcfeegcr .gt_table {
display: table;
border-collapse: collapse;
margin-left: auto;
margin-right: auto;
color: #333333;
font-size: 16px;
font-weight: normal;
font-style: normal;
background-color: #FFFFFF;
width: auto;
border-top-style: solid;
border-top-width: 2px;
border-top-color: #A8A8A8;
border-right-style: none;
border-right-width: 2px;
border-right-color: #D3D3D3;
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #A8A8A8;
border-left-style: none;
border-left-width: 2px;
border-left-color: #D3D3D3;
}
#ibvcfeegcr .gt_heading {
background-color: #FFFFFF;
text-align: center;
border-bottom-color: #FFFFFF;
border-left-style: none;
border-left-width: 1px;
border-left-color: #D3D3D3;
border-right-style: none;
border-right-width: 1px;
border-right-color: #D3D3D3;
}
#ibvcfeegcr .gt_title {
color: #333333;
font-size: 125%;
font-weight: initial;
padding-top: 4px;
padding-bottom: 4px;
padding-left: 5px;
padding-right: 5px;
border-bottom-color: #FFFFFF;
border-bottom-width: 0;
}
#ibvcfeegcr .gt_subtitle {
color: #333333;
font-size: 85%;
font-weight: initial;
padding-top: 0;
padding-bottom: 6px;
padding-left: 5px;
padding-right: 5px;
border-top-color: #FFFFFF;
border-top-width: 0;
}
#ibvcfeegcr .gt_bottom_border {
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
}
#ibvcfeegcr .gt_col_headings {
border-top-style: solid;
border-top-width: 2px;
border-top-color: #D3D3D3;
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
border-left-style: none;
border-left-width: 1px;
border-left-color: #D3D3D3;
border-right-style: none;
border-right-width: 1px;
border-right-color: #D3D3D3;
}
#ibvcfeegcr .gt_col_heading {
color: #333333;
background-color: #FFFFFF;
font-size: 100%;
font-weight: normal;
text-transform: inherit;
border-left-style: none;
border-left-width: 1px;
border-left-color: #D3D3D3;
border-right-style: none;
border-right-width: 1px;
border-right-color: #D3D3D3;
vertical-align: bottom;
padding-top: 5px;
padding-bottom: 6px;
padding-left: 5px;
padding-right: 5px;
overflow-x: hidden;
}
#ibvcfeegcr .gt_column_spanner_outer {
color: #333333;
background-color: #FFFFFF;
font-size: 100%;
font-weight: normal;
text-transform: inherit;
padding-top: 0;
padding-bottom: 0;
padding-left: 4px;
padding-right: 4px;
}
#ibvcfeegcr .gt_column_spanner_outer:first-child {
padding-left: 0;
}
#ibvcfeegcr .gt_column_spanner_outer:last-child {
padding-right: 0;
}
#ibvcfeegcr .gt_column_spanner {
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
vertical-align: bottom;
padding-top: 5px;
padding-bottom: 5px;
overflow-x: hidden;
display: inline-block;
width: 100%;
}
#ibvcfeegcr .gt_group_heading {
padding-top: 8px;
padding-bottom: 8px;
padding-left: 5px;
padding-right: 5px;
color: #333333;
background-color: #FFFFFF;
font-size: 100%;
font-weight: initial;
text-transform: inherit;
border-top-style: solid;
border-top-width: 2px;
border-top-color: #D3D3D3;
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
border-left-style: none;
border-left-width: 1px;
border-left-color: #D3D3D3;
border-right-style: none;
border-right-width: 1px;
border-right-color: #D3D3D3;
vertical-align: middle;
}
#ibvcfeegcr .gt_empty_group_heading {
padding: 0.5px;
color: #333333;
background-color: #FFFFFF;
font-size: 100%;
font-weight: initial;
border-top-style: solid;
border-top-width: 2px;
border-top-color: #D3D3D3;
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
vertical-align: middle;
}
#ibvcfeegcr .gt_from_md > :first-child {
margin-top: 0;
}
#ibvcfeegcr .gt_from_md > :last-child {
margin-bottom: 0;
}
#ibvcfeegcr .gt_row {
padding-top: 8px;
padding-bottom: 8px;
padding-left: 5px;
padding-right: 5px;
margin: 10px;
border-top-style: solid;
border-top-width: 1px;
border-top-color: #D3D3D3;
border-left-style: none;
border-left-width: 1px;
border-left-color: #D3D3D3;
border-right-style: none;
border-right-width: 1px;
border-right-color: #D3D3D3;
vertical-align: middle;
overflow-x: hidden;
}
#ibvcfeegcr .gt_stub {
color: #333333;
background-color: #FFFFFF;
font-size: 100%;
font-weight: initial;
text-transform: inherit;
border-right-style: solid;
border-right-width: 2px;
border-right-color: #D3D3D3;
padding-left: 5px;
padding-right: 5px;
}
#ibvcfeegcr .gt_stub_row_group {
color: #333333;
background-color: #FFFFFF;
font-size: 100%;
font-weight: initial;
text-transform: inherit;
border-right-style: solid;
border-right-width: 2px;
border-right-color: #D3D3D3;
padding-left: 5px;
padding-right: 5px;
vertical-align: top;
}
#ibvcfeegcr .gt_row_group_first td {
border-top-width: 2px;
}
#ibvcfeegcr .gt_summary_row {
color: #333333;
background-color: #FFFFFF;
text-transform: inherit;
padding-top: 8px;
padding-bottom: 8px;
padding-left: 5px;
padding-right: 5px;
}
#ibvcfeegcr .gt_first_summary_row {
border-top-style: solid;
border-top-color: #D3D3D3;
}
#ibvcfeegcr .gt_first_summary_row.thick {
border-top-width: 2px;
}
#ibvcfeegcr .gt_last_summary_row {
padding-top: 8px;
padding-bottom: 8px;
padding-left: 5px;
padding-right: 5px;
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
}
#ibvcfeegcr .gt_grand_summary_row {
color: #333333;
background-color: #FFFFFF;
text-transform: inherit;
padding-top: 8px;
padding-bottom: 8px;
padding-left: 5px;
padding-right: 5px;
}
#ibvcfeegcr .gt_first_grand_summary_row {
padding-top: 8px;
padding-bottom: 8px;
padding-left: 5px;
padding-right: 5px;
border-top-style: double;
border-top-width: 6px;
border-top-color: #D3D3D3;
}
#ibvcfeegcr .gt_striped {
background-color: rgba(128, 128, 128, 0.05);
}
#ibvcfeegcr .gt_table_body {
border-top-style: solid;
border-top-width: 2px;
border-top-color: #D3D3D3;
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
}
#ibvcfeegcr .gt_footnotes {
color: #333333;
background-color: #FFFFFF;
border-bottom-style: none;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
border-left-style: none;
border-left-width: 2px;
border-left-color: #D3D3D3;
border-right-style: none;
border-right-width: 2px;
border-right-color: #D3D3D3;
}
#ibvcfeegcr .gt_footnote {
margin: 0px;
font-size: 90%;
padding-left: 4px;
padding-right: 4px;
padding-left: 5px;
padding-right: 5px;
}
#ibvcfeegcr .gt_sourcenotes {
color: #333333;
background-color: #FFFFFF;
border-bottom-style: none;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
border-left-style: none;
border-left-width: 2px;
border-left-color: #D3D3D3;
border-right-style: none;
border-right-width: 2px;
border-right-color: #D3D3D3;
}
#ibvcfeegcr .gt_sourcenote {
font-size: 90%;
padding-top: 4px;
padding-bottom: 4px;
padding-left: 5px;
padding-right: 5px;
}
#ibvcfeegcr .gt_left {
text-align: left;
}
#ibvcfeegcr .gt_center {
text-align: center;
}
#ibvcfeegcr .gt_right {
text-align: right;
font-variant-numeric: tabular-nums;
}
#ibvcfeegcr .gt_font_normal {
font-weight: normal;
}
#ibvcfeegcr .gt_font_bold {
font-weight: bold;
}
#ibvcfeegcr .gt_font_italic {
font-style: italic;
}
#ibvcfeegcr .gt_super {
font-size: 65%;
}
#ibvcfeegcr .gt_footnote_marks {
font-style: italic;
font-weight: normal;
font-size: 75%;
vertical-align: 0.4em;
}
#ibvcfeegcr .gt_asterisk {
font-size: 100%;
vertical-align: 0;
}
#ibvcfeegcr .gt_indent_1 {
text-indent: 5px;
}
#ibvcfeegcr .gt_indent_2 {
text-indent: 10px;
}
#ibvcfeegcr .gt_indent_3 {
text-indent: 15px;
}
#ibvcfeegcr .gt_indent_4 {
text-indent: 20px;
}
#ibvcfeegcr .gt_indent_5 {
text-indent: 25px;
}
</style>
<table class="gt_table" style="table-layout: fixed;; width: 0px">
<colgroup>
<col style="width:100px;">
<col style="width:100px;">
<col style="width:100px;">
</colgroup>
<thead class="gt_col_headings">
<tr>
<th class="gt_col_heading gt_columns_bottom_border gt_center" rowspan="1" colspan="1" scope="col">Contains Exclamtion</th>
<th class="gt_col_heading gt_columns_bottom_border gt_right" rowspan="1" colspan="1" scope="col">Fake</th>
<th class="gt_col_heading gt_columns_bottom_border gt_right" rowspan="1" colspan="1" scope="col">Real</th>
</tr>
</thead>
<tbody class="gt_table_body">
<tr><td class="gt_row gt_center">FALSE</td>
<td class="gt_row gt_right">0.733</td>
<td class="gt_row gt_right">0.978</td></tr>
<tr><td class="gt_row gt_center">TRUE</td>
<td class="gt_row gt_right">0.267</td>
<td class="gt_row gt_right">0.022</td></tr>
</tbody>
</table>
</div>
</div>
</div>
<p>The table above also shows the likelihoods for the case when an article does not contain exclamation point in the title as well. Its really important to note that these are likelihoods, and its not the case that <span class="math inline">\(L(B|A) + L(B^c|A) = 1\)</span> as a matter of fact this value evaluates to a number less than one. However, since we have that <span class="math inline">\(L(B|A) = .267\)</span> and <span class="math inline">\(L(B^c|A) = .022\)</span> then we have gained additional knowledge in knowing the use of “!” in a title is more compatible with a fake news article than a real one.</p>
<p>Up to this point we can summarize our framework as follows</p>
<table class="table">
<thead>
<tr class="header">
<th>event</th>
<th><span class="math inline">\(B\)</span></th>
<th><span class="math inline">\(B^c\)</span></th>
<th>Total</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td>prior</td>
<td>.4</td>
<td>.6</td>
<td>1</td>
</tr>
<tr class="even">
<td>likelihood</td>
<td>.267</td>
<td>.022</td>
<td>.289</td>
</tr>
</tbody>
</table>
<p>Our next goal is come up with normalizing factors in order to build our probability table:</p>
<table class="table">
<thead>
<tr class="header">
<th></th>
<th><span class="math inline">\(B\)</span></th>
<th><span class="math inline">\(B^c\)</span></th>
<th>Total</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td><span class="math inline">\(A\)</span></td>
<td>(1)</td>
<td>(2)</td>
<td></td>
</tr>
<tr class="even">
<td><span class="math inline">\(A^c\)</span></td>
<td>(3)</td>
<td>(4)</td>
<td></td>
</tr>
<tr class="odd">
<td>Total</td>
<td>.4</td>
<td>.6</td>
<td>1</td>
</tr>
</tbody>
</table>
<p>A couple things to note about our table (1) + (3) = .4 and (2) + (4) = .6. (1) + (2) + (3) + (4) = 1.</p>
<p>(1.) <span class="math inline">\(P(A \cap B) = P(A|B)P(B)\)</span> we know the likelihood of <span class="math inline">\(L(B|A) = P(A|B)\)</span> and we also know the prior so we insert these to get <span class="math display">\[ P(A \cap B) = P(A|B)P(B) = .267 \times .4 = .1068\]</span></p>
<p>(3.) <span class="math inline">\(P(A^c \cap B) = P(A^c|B)P(B)\)</span> in this case we do know the prior <span class="math inline">\(P(B) = .4\)</span>, but we dont directly know the value of <span class="math inline">\(P(A^c|B)\)</span>, however, we note that <span class="math inline">\(P(A|B) + P(A^c|B) = 1\)</span>, therefore we compute <span class="math inline">\(P(A^c|B) = 1 - P(A|B) = 1 - .267 = .733\)</span> <span class="math display">\[ P(A^c \cap B) = P(A^c|B)P(B) = .733 \times .4 = .2932\]</span></p>
<p>we now can confirm that <span class="math inline">\(.1068 + .2932 = .4\)</span></p>
<p>Moving on to (2), (4)</p>
<p>(2.) <span class="math inline">\(P(A \cap B^c) = P(A|B^c)P(B^c)\)</span>. In this case know the likelihood <span class="math inline">\(L(B^c|A) = P(A|B^c)\)</span> and we know the prior <span class="math inline">\(P(B^c)\)</span> therefore, <span class="math display">\[P(A \cap B^c) = P(A|B^c)P(B^c) = .022 \times .6 = .0132\]</span></p>
<p>(4.) <span class="math inline">\(P(A^c \cap B^c) = P(A^c|B^c)P(B^c) = (1 - .022) \times .6 = .5868\)</span></p>
<p>and can confirm that <span class="math inline">\(.0132 + .5868 = .6\)</span></p>
<p>and we can fill the rest of the table:</p>
<table class="table">
<thead>
<tr class="header">
<th></th>
<th><span class="math inline">\(B\)</span></th>
<th><span class="math inline">\(B^c\)</span></th>
<th>Total</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td><span class="math inline">\(A\)</span></td>
<td>.1068</td>
<td>.0132</td>
<td>.12</td>
</tr>
<tr class="even">
<td><span class="math inline">\(A^c\)</span></td>
<td>.2932</td>
<td>.5868</td>
<td>.88</td>
</tr>
<tr class="odd">
<td>Total</td>
<td>.4</td>
<td>.6</td>
<td>1</td>
</tr>
</tbody>
</table>
<p>An important concept we implemented in above is the idea of <strong>total probability</strong></p>
<div class="callout-tip callout callout-style-default no-icon callout-captioned">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon no-icon"></i>
</div>
<div class="callout-caption-container flex-fill">
total probability
</div>
</div>
<div class="callout-body-container callout-body">
<p>The <strong>total probability</strong> of observing a real article is made up the sum of its parts. Namely</p>
<p><span class="math display">\[P(B^c) = P(A \cap B^c) + P(A^c \cap B^c)\]</span> <span class="math display">\[=P(A|B^c)P(B^c) + P(A^c|B^c)P(B^c)\]</span> <span class="math display">\[=.0132 + .5868 = .6\]</span></p>
</div>
</div>
<p>In the above calculations we also step through <strong>joint probabilities</strong></p>
<div class="callout-note callout callout-style-default no-icon callout-captioned">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon no-icon"></i>
</div>
<div class="callout-caption-container flex-fill">
Joint and conditional probability
</div>
</div>
<div class="callout-body-container callout-body">
<p><span class="math display">\[P(A \cap B) = P(A|B)P(B)\]</span></p>
<p><span class="math inline">\(A\)</span> and <span class="math inline">\(B\)</span> are said to be independent events, if and only if</p>
<p><span class="math display">\[P(A \cap B) = P(A)P(B)\]</span></p>
<p>from this we can also derive the definition of a conditional probability</p>
<p><span class="math display">\[P(A|B) = \frac{P(A \cap B)}{P(B)}\]</span></p>
</div>
</div>
<p>At this point we are able to answer the question, “What is the probability, the new article is fake?”. Given that the new article has an exclamation point, we can zoom into the top row of the table of probabilitties. Within this row we have probabilities <span class="math inline">\(.1068/.12 = .833\)</span> for fake and <span class="math inline">\(.0132 / .12 = .11\)</span> for real.</p>
<p>This is essentially Bayes Rule. We developed a posterior probability for an event <span class="math inline">\(B\)</span> given some observation <span class="math inline">\(A\)</span>. We did so by combining the likelihood of event <span class="math inline">\(B\)</span> given some new data <span class="math inline">\(A\)</span> and the prior probability of event <span class="math inline">\(B\)</span>. More formally we have the following definition:</p>
<div class="callout-note callout callout-style-default no-icon callout-captioned">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon no-icon"></i>
</div>
<div class="callout-caption-container flex-fill">
Bayes Rule
</div>
</div>
<div class="callout-body-container callout-body">
<p>The posterior probability of an event <span class="math inline">\(B\)</span> given a <span class="math inline">\(A\)</span> is:</p>
<p><span class="math display">\[ P(B|A) = \frac{P(A \cap B)}{P(A)} = \frac{L(B|A)P(B)}{P(A)}\]</span></p>
<p>where <span class="math inline">\(L\)</span> is the likelihood function <span class="math inline">\(L(B|A) = P(B|A)\)</span> and <span class="math inline">\(P(A)\)</span> is the total probability of <span class="math inline">\(A\)</span>.</p>
<p>More generally,</p>
<p><span class="math display">\[ \frac{likelihood \cdot prior}{normalizing \;\; constant}\]</span></p>
</div>
</div>
</section>
<section id="simualation" class="level3">
<h3 class="anchored" data-anchor-id="simualation">Simualation</h3>
<div class="cell">
<div class="sourceCode cell-code" id="cb8"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a>articles <span class="ot">&lt;-</span> tibble<span class="sc">::</span><span class="fu">tibble</span>(<span class="at">type =</span> <span class="fu">c</span>(<span class="st">"real"</span>, <span class="st">"fake"</span>))</span>
<span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb8-3"><a href="#cb8-3" aria-hidden="true" tabindex="-1"></a>priors <span class="ot">&lt;-</span> <span class="fu">c</span>(.<span class="dv">6</span>, .<span class="dv">4</span>)</span>
<span id="cb8-4"><a href="#cb8-4" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb8-5"><a href="#cb8-5" aria-hidden="true" tabindex="-1"></a>articles_sim <span class="ot">&lt;-</span> <span class="fu">sample_n</span>(articles, <span class="dv">10000</span>, <span class="at">replace =</span> <span class="cn">TRUE</span>, <span class="at">weight =</span> priors)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<div class="cell">
<div class="sourceCode cell-code" id="cb9"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a>articles_sim <span class="sc">|&gt;</span></span>
<span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x =</span> type)) <span class="sc">+</span> <span class="fu">geom_bar</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="ch2_files/figure-html/unnamed-chunk-6-1.png" class="img-fluid" width="672"></p>
</div>
</div>
<p>and a summary table</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb10"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a>articles_sim <span class="sc">|&gt;</span></span>
<span id="cb10-2"><a href="#cb10-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(type) <span class="sc">|&gt;</span></span>
<span id="cb10-3"><a href="#cb10-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">summarise</span>(</span>
<span id="cb10-4"><a href="#cb10-4" aria-hidden="true" tabindex="-1"></a> <span class="at">total =</span> <span class="fu">n</span>(), </span>
<span id="cb10-5"><a href="#cb10-5" aria-hidden="true" tabindex="-1"></a> <span class="at">prop =</span> total <span class="sc">/</span> <span class="fu">nrow</span>(articles_sim)</span>
<span id="cb10-6"><a href="#cb10-6" aria-hidden="true" tabindex="-1"></a> ) <span class="sc">|&gt;</span></span>
<span id="cb10-7"><a href="#cb10-7" aria-hidden="true" tabindex="-1"></a> <span class="fu">gt</span>()<span class="sc">|&gt;</span></span>
<span id="cb10-8"><a href="#cb10-8" aria-hidden="true" tabindex="-1"></a> gt<span class="sc">::</span><span class="fu">cols_width</span>(<span class="fu">everything</span>() <span class="sc">~</span> <span class="fu">px</span>(<span class="dv">100</span>))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<div id="dpxebxbyvj" style="overflow-x:auto;overflow-y:auto;width:auto;height:auto;">
<style>html {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Helvetica Neue', 'Fira Sans', 'Droid Sans', Arial, sans-serif;
}
#dpxebxbyvj .gt_table {
display: table;
border-collapse: collapse;
margin-left: auto;
margin-right: auto;
color: #333333;
font-size: 16px;
font-weight: normal;
font-style: normal;
background-color: #FFFFFF;
width: auto;
border-top-style: solid;
border-top-width: 2px;
border-top-color: #A8A8A8;
border-right-style: none;
border-right-width: 2px;
border-right-color: #D3D3D3;
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #A8A8A8;
border-left-style: none;
border-left-width: 2px;
border-left-color: #D3D3D3;
}
#dpxebxbyvj .gt_heading {
background-color: #FFFFFF;
text-align: center;
border-bottom-color: #FFFFFF;
border-left-style: none;
border-left-width: 1px;
border-left-color: #D3D3D3;
border-right-style: none;
border-right-width: 1px;
border-right-color: #D3D3D3;
}
#dpxebxbyvj .gt_title {
color: #333333;
font-size: 125%;
font-weight: initial;
padding-top: 4px;
padding-bottom: 4px;
padding-left: 5px;
padding-right: 5px;
border-bottom-color: #FFFFFF;
border-bottom-width: 0;
}
#dpxebxbyvj .gt_subtitle {
color: #333333;
font-size: 85%;
font-weight: initial;
padding-top: 0;
padding-bottom: 6px;
padding-left: 5px;
padding-right: 5px;
border-top-color: #FFFFFF;
border-top-width: 0;
}
#dpxebxbyvj .gt_bottom_border {
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
}
#dpxebxbyvj .gt_col_headings {
border-top-style: solid;
border-top-width: 2px;
border-top-color: #D3D3D3;
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
border-left-style: none;
border-left-width: 1px;
border-left-color: #D3D3D3;
border-right-style: none;
border-right-width: 1px;
border-right-color: #D3D3D3;
}
#dpxebxbyvj .gt_col_heading {
color: #333333;
background-color: #FFFFFF;
font-size: 100%;
font-weight: normal;
text-transform: inherit;
border-left-style: none;
border-left-width: 1px;
border-left-color: #D3D3D3;
border-right-style: none;
border-right-width: 1px;
border-right-color: #D3D3D3;
vertical-align: bottom;
padding-top: 5px;
padding-bottom: 6px;
padding-left: 5px;
padding-right: 5px;
overflow-x: hidden;
}
#dpxebxbyvj .gt_column_spanner_outer {
color: #333333;
background-color: #FFFFFF;
font-size: 100%;
font-weight: normal;
text-transform: inherit;
padding-top: 0;
padding-bottom: 0;
padding-left: 4px;
padding-right: 4px;
}
#dpxebxbyvj .gt_column_spanner_outer:first-child {
padding-left: 0;
}
#dpxebxbyvj .gt_column_spanner_outer:last-child {
padding-right: 0;
}
#dpxebxbyvj .gt_column_spanner {
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
vertical-align: bottom;
padding-top: 5px;
padding-bottom: 5px;
overflow-x: hidden;
display: inline-block;
width: 100%;
}
#dpxebxbyvj .gt_group_heading {
padding-top: 8px;
padding-bottom: 8px;
padding-left: 5px;
padding-right: 5px;
color: #333333;
background-color: #FFFFFF;
font-size: 100%;
font-weight: initial;
text-transform: inherit;
border-top-style: solid;
border-top-width: 2px;
border-top-color: #D3D3D3;
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
border-left-style: none;
border-left-width: 1px;
border-left-color: #D3D3D3;
border-right-style: none;
border-right-width: 1px;
border-right-color: #D3D3D3;
vertical-align: middle;
}
#dpxebxbyvj .gt_empty_group_heading {
padding: 0.5px;
color: #333333;
background-color: #FFFFFF;
font-size: 100%;
font-weight: initial;
border-top-style: solid;
border-top-width: 2px;
border-top-color: #D3D3D3;
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
vertical-align: middle;
}
#dpxebxbyvj .gt_from_md > :first-child {
margin-top: 0;
}
#dpxebxbyvj .gt_from_md > :last-child {
margin-bottom: 0;
}
#dpxebxbyvj .gt_row {
padding-top: 8px;
padding-bottom: 8px;
padding-left: 5px;
padding-right: 5px;
margin: 10px;
border-top-style: solid;
border-top-width: 1px;
border-top-color: #D3D3D3;
border-left-style: none;
border-left-width: 1px;
border-left-color: #D3D3D3;
border-right-style: none;
border-right-width: 1px;
border-right-color: #D3D3D3;
vertical-align: middle;
overflow-x: hidden;
}
#dpxebxbyvj .gt_stub {
color: #333333;
background-color: #FFFFFF;
font-size: 100%;
font-weight: initial;
text-transform: inherit;
border-right-style: solid;
border-right-width: 2px;
border-right-color: #D3D3D3;
padding-left: 5px;
padding-right: 5px;
}
#dpxebxbyvj .gt_stub_row_group {
color: #333333;
background-color: #FFFFFF;
font-size: 100%;
font-weight: initial;
text-transform: inherit;
border-right-style: solid;
border-right-width: 2px;
border-right-color: #D3D3D3;
padding-left: 5px;
padding-right: 5px;
vertical-align: top;
}
#dpxebxbyvj .gt_row_group_first td {
border-top-width: 2px;
}
#dpxebxbyvj .gt_summary_row {
color: #333333;
background-color: #FFFFFF;
text-transform: inherit;
padding-top: 8px;
padding-bottom: 8px;
padding-left: 5px;
padding-right: 5px;
}
#dpxebxbyvj .gt_first_summary_row {
border-top-style: solid;
border-top-color: #D3D3D3;
}
#dpxebxbyvj .gt_first_summary_row.thick {
border-top-width: 2px;
}
#dpxebxbyvj .gt_last_summary_row {
padding-top: 8px;
padding-bottom: 8px;
padding-left: 5px;
padding-right: 5px;
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
}
#dpxebxbyvj .gt_grand_summary_row {
color: #333333;
background-color: #FFFFFF;
text-transform: inherit;
padding-top: 8px;
padding-bottom: 8px;
padding-left: 5px;
padding-right: 5px;
}
#dpxebxbyvj .gt_first_grand_summary_row {
padding-top: 8px;
padding-bottom: 8px;
padding-left: 5px;
padding-right: 5px;
border-top-style: double;
border-top-width: 6px;
border-top-color: #D3D3D3;
}
#dpxebxbyvj .gt_striped {
background-color: rgba(128, 128, 128, 0.05);
}
#dpxebxbyvj .gt_table_body {
border-top-style: solid;
border-top-width: 2px;
border-top-color: #D3D3D3;
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
}
#dpxebxbyvj .gt_footnotes {
color: #333333;
background-color: #FFFFFF;
border-bottom-style: none;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
border-left-style: none;
border-left-width: 2px;
border-left-color: #D3D3D3;
border-right-style: none;
border-right-width: 2px;
border-right-color: #D3D3D3;
}
#dpxebxbyvj .gt_footnote {
margin: 0px;
font-size: 90%;
padding-left: 4px;
padding-right: 4px;
padding-left: 5px;
padding-right: 5px;
}
#dpxebxbyvj .gt_sourcenotes {
color: #333333;
background-color: #FFFFFF;
border-bottom-style: none;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
border-left-style: none;
border-left-width: 2px;
border-left-color: #D3D3D3;
border-right-style: none;
border-right-width: 2px;
border-right-color: #D3D3D3;
}
#dpxebxbyvj .gt_sourcenote {
font-size: 90%;
padding-top: 4px;
padding-bottom: 4px;
padding-left: 5px;
padding-right: 5px;
}
#dpxebxbyvj .gt_left {
text-align: left;
}
#dpxebxbyvj .gt_center {
text-align: center;
}
#dpxebxbyvj .gt_right {
text-align: right;
font-variant-numeric: tabular-nums;
}
#dpxebxbyvj .gt_font_normal {
font-weight: normal;
}
#dpxebxbyvj .gt_font_bold {
font-weight: bold;
}
#dpxebxbyvj .gt_font_italic {
font-style: italic;
}
#dpxebxbyvj .gt_super {
font-size: 65%;
}
#dpxebxbyvj .gt_footnote_marks {
font-style: italic;
font-weight: normal;
font-size: 75%;
vertical-align: 0.4em;
}
#dpxebxbyvj .gt_asterisk {
font-size: 100%;
vertical-align: 0;
}
#dpxebxbyvj .gt_indent_1 {
text-indent: 5px;
}
#dpxebxbyvj .gt_indent_2 {
text-indent: 10px;
}
#dpxebxbyvj .gt_indent_3 {
text-indent: 15px;
}
#dpxebxbyvj .gt_indent_4 {
text-indent: 20px;
}
#dpxebxbyvj .gt_indent_5 {
text-indent: 25px;
}
</style>
<table class="gt_table" style="table-layout: fixed;; width: 0px">
<colgroup>
<col style="width:100px;">
<col style="width:100px;">
<col style="width:100px;">
</colgroup>
<thead class="gt_col_headings">
<tr>
<th class="gt_col_heading gt_columns_bottom_border gt_left" rowspan="1" colspan="1" scope="col">type</th>
<th class="gt_col_heading gt_columns_bottom_border gt_right" rowspan="1" colspan="1" scope="col">total</th>
<th class="gt_col_heading gt_columns_bottom_border gt_right" rowspan="1" colspan="1" scope="col">prop</th>
</tr>
</thead>
<tbody class="gt_table_body">
<tr><td class="gt_row gt_left">fake</td>
<td class="gt_row gt_right">4031</td>
<td class="gt_row gt_right">0.4031</td></tr>
<tr><td class="gt_row gt_left">real</td>
<td class="gt_row gt_right">5969</td>
<td class="gt_row gt_right">0.5969</td></tr>
</tbody>
</table>
</div>
</div>
</div>
<p>the simulation of 10,000 articles shows us very nearly the same priors we had from the data. We can now add the exclamation usage into the data.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb11"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a>articles_sim <span class="ot">&lt;-</span> articles_sim <span class="sc">|&gt;</span></span>
<span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">mutate</span>(<span class="at">model_data =</span> <span class="fu">case_when</span>(</span>
<span id="cb11-3"><a href="#cb11-3" aria-hidden="true" tabindex="-1"></a> type <span class="sc">==</span> <span class="st">"fake"</span> <span class="sc">~</span> .<span class="dv">267</span>, </span>
<span id="cb11-4"><a href="#cb11-4" aria-hidden="true" tabindex="-1"></a> type <span class="sc">==</span> <span class="st">"real"</span> <span class="sc">~</span> .<span class="dv">022</span></span>
<span id="cb11-5"><a href="#cb11-5" aria-hidden="true" tabindex="-1"></a> ))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>The plan here is to iterate through the 10,000 samples and use the <code>data_model</code> value to assign either, “yes” or “no” using the <code>sample</code> function.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb12"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a>data <span class="ot">&lt;-</span> <span class="fu">c</span>(<span class="st">"yes"</span>, <span class="st">"no"</span>)</span>
<span id="cb12-2"><a href="#cb12-2" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb12-3"><a href="#cb12-3" aria-hidden="true" tabindex="-1"></a>articles_sim <span class="ot">&lt;-</span> articles_sim <span class="sc">|&gt;</span></span>
<span id="cb12-4"><a href="#cb12-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">mutate</span>(<span class="at">id =</span> <span class="fu">row_number</span>()) <span class="sc">|&gt;</span></span>
<span id="cb12-5"><a href="#cb12-5" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(id) <span class="sc">|&gt;</span></span>
<span id="cb12-6"><a href="#cb12-6" aria-hidden="true" tabindex="-1"></a> <span class="fu">mutate</span>(<span class="at">usage =</span> <span class="fu">sample</span>(data, <span class="dv">1</span>, <span class="at">prob =</span> <span class="fu">c</span>(model_data, <span class="dv">1</span> <span class="sc">-</span> model_data)))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<div class="cell">
<div class="sourceCode cell-code" id="cb13"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a>articles_sim <span class="sc">|&gt;</span></span>
<span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(usage, type) <span class="sc">|&gt;</span></span>
<span id="cb13-3"><a href="#cb13-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">summarise</span>(</span>
<span id="cb13-4"><a href="#cb13-4" aria-hidden="true" tabindex="-1"></a> <span class="at">total =</span> <span class="fu">n</span>()</span>
<span id="cb13-5"><a href="#cb13-5" aria-hidden="true" tabindex="-1"></a> ) <span class="sc">|&gt;</span></span>
<span id="cb13-6"><a href="#cb13-6" aria-hidden="true" tabindex="-1"></a> <span class="fu">pivot_wider</span>(<span class="at">names_from =</span> type, <span class="at">values_from =</span> total)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 2 × 3
# Groups: usage [2]
usage fake real
&lt;chr&gt; &lt;int&gt; &lt;int&gt;
1 no 2955 5845
2 yes 1076 124</code></pre>
</div>
</div>
<div class="cell">
<div class="sourceCode cell-code" id="cb15"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a>articles_sim <span class="sc">|&gt;</span></span>
<span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x =</span> type, <span class="at">fill =</span> usage)) <span class="sc">+</span> </span>
<span id="cb15-3"><a href="#cb15-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_bar</span>() <span class="sc">+</span> </span>
<span id="cb15-4"><a href="#cb15-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">scale_fill_discrete</span>(<span class="at">type =</span> <span class="fu">c</span>(<span class="st">"gray8"</span>, <span class="st">"dodgerblue4"</span>))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="ch2_files/figure-html/unnamed-chunk-11-1.png" class="img-fluid" width="672"></p>
</div>
</div>
<p>So far have compute both the priors and likelihoods, we can simply filter our data to reflect the incoming article and determine our posterior.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb16"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a>articles_sim <span class="sc">|&gt;</span></span>
<span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">filter</span>(usage <span class="sc">==</span> <span class="st">"yes"</span>) <span class="sc">|&gt;</span></span>
<span id="cb16-3"><a href="#cb16-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(type) <span class="sc">|&gt;</span></span>
<span id="cb16-4"><a href="#cb16-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">summarise</span>(</span>
<span id="cb16-5"><a href="#cb16-5" aria-hidden="true" tabindex="-1"></a> <span class="at">total =</span> <span class="fu">n</span>()</span>
<span id="cb16-6"><a href="#cb16-6" aria-hidden="true" tabindex="-1"></a> ) <span class="sc">|&gt;</span></span>
<span id="cb16-7"><a href="#cb16-7" aria-hidden="true" tabindex="-1"></a> <span class="fu">mutate</span>(</span>
<span id="cb16-8"><a href="#cb16-8" aria-hidden="true" tabindex="-1"></a> <span class="at">prop =</span> total <span class="sc">/</span> <span class="fu">sum</span>(total)</span>
<span id="cb16-9"><a href="#cb16-9" aria-hidden="true" tabindex="-1"></a> )</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 2 × 3
type total prop
&lt;chr&gt; &lt;int&gt; &lt;dbl&gt;
1 fake 1076 0.897
2 real 124 0.103</code></pre>
</div>
</div>
</section>
<section id="binomial-model-and-the-chess-example" class="level2">
<h2 class="anchored" data-anchor-id="binomial-model-and-the-chess-example">Binomial Model and the chess example</h2>
<p>The example used here is the case of a chess match between a human and a computer “Deep Blue”. The set up is such that we know the two faced each other in 1996, in which the human won. There is a rematch scheduled for the next 1997. We would like to model the number of games out of 6 that the human can win.</p>
<p>Let <span class="math inline">\(\pi\)</span> be the probability that the human wins any one match against the computer. To simplify things greatly we assume that <span class="math inline">\(\pi\)</span> takes on values of .2, .5, .8. We also assume the following prior (we are told in the book that we will learn how to build these later on):</p>
<table class="table">
<thead>
<tr class="header">
<th><span class="math inline">\(\pi\)</span></th>
<th>.2</th>
<th>.5</th>
<th>.8</th>
<th>total</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td><span class="math inline">\(f(\pi)\)</span></td>
<td>.10</td>
<td>.25</td>
<td>.65</td>
<td>1</td>
</tr>
</tbody>
</table>
<div class="callout-caution callout callout-style-default no-icon callout-captioned">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon no-icon"></i>
</div>
<div class="callout-caption-container flex-fill">
Note
</div>
</div>
<div class="callout-body-container callout-body">
<p>its important to note here that the sum of the values of <span class="math inline">\(\pi\)</span> <strong>do not</strong> add up to 1. <span class="math inline">\(\pi\)</span> represents the chances of winning any single game, we would expect <span class="math inline">\(\pi\)</span> to take on any value in <span class="math inline">\(\mathbb{R}\)</span>. On the other hand <span class="math inline">\(f\)</span> is a function that maps <span class="math inline">\(\pi\)</span> into a space of probabilities, this is next.</p>
</div>
</div>
<div class="callout-note callout callout-style-default no-icon callout-captioned">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon no-icon"></i>
</div>
<div class="callout-caption-container flex-fill">
Discrete Probability Model
</div>
</div>
<div class="callout-body-container callout-body">
<p>Let <span class="math inline">\(Y\)</span> be a discrete random variable. The probability model for <span class="math inline">\(Y\)</span> is described by a <strong>probability mass function</strong> (pmf) defined as: <span class="math display">\[f(y) = P(Y = y)\]</span></p>
<p>and has the following properties</p>
<ol type="1">
<li><span class="math inline">\(0 \leq f(y) \leq 1\;\; \forall y\)</span></li>
<li><span class="math inline">\(\sum_{\forall y}f(y) = 1\)</span></li>
</ol>
</div>
</div>
<div class="callout-caution callout callout-style-default no-icon callout-captioned">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon no-icon"></i>
</div>
<div class="callout-caption-container flex-fill">
in emanuels words
</div>
</div>
<div class="callout-body-container callout-body">
<p>what does this mean? well its very straightforward a pmf is a function that takes in a some value y and outputs the probability that the random variable <span class="math inline">\(Y\)</span> equals <span class="math inline">\(y\)</span>.</p>
</div>
</div>
<section id="the-binomial-model" class="level3">
<h3 class="anchored" data-anchor-id="the-binomial-model">The Binomial Model</h3>
<div class="callout-note callout callout-style-default no-icon callout-captioned">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon no-icon"></i>
</div>
<div class="callout-caption-container flex-fill">
Conditional probability model of data <span class="math inline">\(Y\)</span>
</div>
</div>
<div class="callout-body-container callout-body">
<p>Let <span class="math inline">\(Y\)</span> be a discrete random variable that depends on some parameter <span class="math inline">\(\pi\)</span>. We define the conditional probability model of <span class="math inline">\(Y\)</span> as the conditional pmf,</p>
<p><span class="math display">\[f(y|\pi) = P(Y = y | \pi)\]</span></p>
<p>and has the following properties,</p>
<ol type="1">
<li><span class="math inline">\(0 \leq f(y|\pi) \leq 1\;\; \forall y\)</span></li>
<li><span class="math inline">\(\sum_{\forall y}f(y|\pi) = 1\)</span></li>
</ol>
</div>
</div>
<div class="callout-caution callout callout-style-default no-icon callout-captioned">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon no-icon"></i>
</div>
<div class="callout-caption-container flex-fill">
in emanuels words
</div>
</div>
<div class="callout-body-container callout-body">
<p>this is essentially the same probability model had defined above, except now we are condition probabilities by some parameter <span class="math inline">\(\pi\)</span></p>
</div>
</div>
</section>
</section>
</main>
<!-- /main column -->
<script id="quarto-html-after-body" type="application/javascript">
window.document.addEventListener("DOMContentLoaded", function (event) {
const toggleBodyColorMode = (bsSheetEl) => {
const mode = bsSheetEl.getAttribute("data-mode");
const bodyEl = window.document.querySelector("body");
if (mode === "dark") {
bodyEl.classList.add("quarto-dark");
bodyEl.classList.remove("quarto-light");
} else {
bodyEl.classList.add("quarto-light");
bodyEl.classList.remove("quarto-dark");
}
}
const toggleBodyColorPrimary = () => {
const bsSheetEl = window.document.querySelector("link#quarto-bootstrap");
if (bsSheetEl) {
toggleBodyColorMode(bsSheetEl);
}
}
toggleBodyColorPrimary();
const icon = "";
const anchorJS = new window.AnchorJS();
anchorJS.options = {
placement: 'right',
icon: icon
};
anchorJS.add('.anchored');
const clipboard = new window.ClipboardJS('.code-copy-button', {
target: function(trigger) {
return trigger.previousElementSibling;
}
});
clipboard.on('success', function(e) {
// button target
const button = e.trigger;
// don't keep focus
button.blur();
// flash "checked"
button.classList.add('code-copy-button-checked');
var currentTitle = button.getAttribute("title");
button.setAttribute("title", "Copied!");
setTimeout(function() {
button.setAttribute("title", currentTitle);
button.classList.remove('code-copy-button-checked');
}, 1000);
// clear code selection
e.clearSelection();
});
function tippyHover(el, contentFn) {
const config = {
allowHTML: true,
content: contentFn,
maxWidth: 500,
delay: 100,
arrow: false,
appendTo: function(el) {
return el.parentElement;
},
interactive: true,
interactiveBorder: 10,
theme: 'quarto',
placement: 'bottom-start'
};
window.tippy(el, config);
}
const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
for (var i=0; i<noterefs.length; i++) {
const ref = noterefs[i];
tippyHover(ref, function() {
// use id or data attribute instead here
let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
try { href = new URL(href).hash; } catch {}
const id = href.replace(/^#\/?/, "");
const note = window.document.getElementById(id);
return note.innerHTML;
});
}
var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
for (var i=0; i<bibliorefs.length; i++) {
const ref = bibliorefs[i];
const cites = ref.parentNode.getAttribute('data-cites').split(' ');
tippyHover(ref, function() {
var popup = window.document.createElement('div');
cites.forEach(function(cite) {
var citeDiv = window.document.createElement('div');
citeDiv.classList.add('hanging-indent');
citeDiv.classList.add('csl-entry');
var biblioDiv = window.document.getElementById('ref-' + cite);
if (biblioDiv) {
citeDiv.innerHTML = biblioDiv.innerHTML;
}
popup.appendChild(citeDiv);
});
return popup.innerHTML;
});
}
});
</script>
</div> <!-- /content -->
</body></html>