bayes-rules-notes/R/ch2.html

1489 lines
58 KiB
HTML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
<meta charset="utf-8">
<meta name="generator" content="quarto-1.1.189">
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
<meta name="author" content="Emanuel Rodriguez">
<title>Chapter 2 Notes</title>
<style>
code{white-space: pre-wrap;}
span.smallcaps{font-variant: small-caps;}
div.columns{display: flex; gap: min(4vw, 1.5em);}
div.column{flex: auto; overflow-x: auto;}
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
ul.task-list{list-style: none;}
ul.task-list li input[type="checkbox"] {
width: 0.8em;
margin: 0 0.8em 0.2em -1.6em;
vertical-align: middle;
}
pre > code.sourceCode { white-space: pre; position: relative; }
pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
pre > code.sourceCode > span:empty { height: 1.2em; }
.sourceCode { overflow: visible; }
code.sourceCode > span { color: inherit; text-decoration: inherit; }
div.sourceCode { margin: 1em 0; }
pre.sourceCode { margin: 0; }
@media screen {
div.sourceCode { overflow: auto; }
}
@media print {
pre > code.sourceCode { white-space: pre-wrap; }
pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
}
pre.numberSource code
{ counter-reset: source-line 0; }
pre.numberSource code > span
{ position: relative; left: -4em; counter-increment: source-line; }
pre.numberSource code > span > a:first-child::before
{ content: counter(source-line);
position: relative; left: -1em; text-align: right; vertical-align: baseline;
border: none; display: inline-block;
-webkit-touch-callout: none; -webkit-user-select: none;
-khtml-user-select: none; -moz-user-select: none;
-ms-user-select: none; user-select: none;
padding: 0 4px; width: 4em;
color: #aaaaaa;
}
pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa; padding-left: 4px; }
div.sourceCode
{ }
@media screen {
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
}
code span.al { color: #ff0000; font-weight: bold; } /* Alert */
code span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
code span.at { color: #7d9029; } /* Attribute */
code span.bn { color: #40a070; } /* BaseN */
code span.bu { color: #008000; } /* BuiltIn */
code span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
code span.ch { color: #4070a0; } /* Char */
code span.cn { color: #880000; } /* Constant */
code span.co { color: #60a0b0; font-style: italic; } /* Comment */
code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
code span.do { color: #ba2121; font-style: italic; } /* Documentation */
code span.dt { color: #902000; } /* DataType */
code span.dv { color: #40a070; } /* DecVal */
code span.er { color: #ff0000; font-weight: bold; } /* Error */
code span.ex { } /* Extension */
code span.fl { color: #40a070; } /* Float */
code span.fu { color: #06287e; } /* Function */
code span.im { color: #008000; font-weight: bold; } /* Import */
code span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
code span.kw { color: #007020; font-weight: bold; } /* Keyword */
code span.op { color: #666666; } /* Operator */
code span.ot { color: #007020; } /* Other */
code span.pp { color: #bc7a00; } /* Preprocessor */
code span.sc { color: #4070a0; } /* SpecialChar */
code span.ss { color: #bb6688; } /* SpecialString */
code span.st { color: #4070a0; } /* String */
code span.va { color: #19177c; } /* Variable */
code span.vs { color: #4070a0; } /* VerbatimString */
code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
</style>
<script src="ch2_files/libs/clipboard/clipboard.min.js"></script>
<script src="ch2_files/libs/quarto-html/quarto.js"></script>
<script src="ch2_files/libs/quarto-html/popper.min.js"></script>
<script src="ch2_files/libs/quarto-html/tippy.umd.min.js"></script>
<script src="ch2_files/libs/quarto-html/anchor.min.js"></script>
<link href="ch2_files/libs/quarto-html/tippy.css" rel="stylesheet">
<link href="ch2_files/libs/quarto-html/quarto-syntax-highlighting.css" rel="stylesheet" id="quarto-text-highlighting-styles">
<script src="ch2_files/libs/bootstrap/bootstrap.min.js"></script>
<link href="ch2_files/libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
<link href="ch2_files/libs/bootstrap/bootstrap.min.css" rel="stylesheet" id="quarto-bootstrap" data-mode="light">
<script src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-chtml-full.js" type="text/javascript"></script>
<link rel="stylesheet" href="styles.css">
</head>
<body class="fullcontent">
<div id="quarto-content" class="page-columns page-rows-contents page-layout-article">
<main class="content" id="quarto-document-content">
<header id="title-block-header" class="quarto-title-block default">
<div class="quarto-title">
<h1 class="title">Chapter 2 Notes</h1>
</div>
<div class="quarto-title-meta">
<div>
<div class="quarto-title-meta-heading">Author</div>
<div class="quarto-title-meta-contents">
<p>Emanuel Rodriguez </p>
</div>
</div>
</div>
</header>
<p>In this chapter we step through an example of “fake” vs “real” news to build a framework to determine the probability of real vs fake of a new news article titled “The President has a secret!”</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb1"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="co"># libraries</span></span>
<span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(bayesrules)</span>
<span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(dplyr)</span>
<span id="cb1-4"><a href="#cb1-4" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(tidyr)</span>
<span id="cb1-5"><a href="#cb1-5" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(gt)</span>
<span id="cb1-6"><a href="#cb1-6" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(tibble)</span>
<span id="cb1-7"><a href="#cb1-7" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(ggplot2)</span>
<span id="cb1-8"><a href="#cb1-8" aria-hidden="true" tabindex="-1"></a><span class="fu">data</span>(fake_news)</span>
<span id="cb1-9"><a href="#cb1-9" aria-hidden="true" tabindex="-1"></a>fake_news <span class="ot">&lt;-</span> tibble<span class="sc">::</span><span class="fu">as_tibble</span>(fake_news)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>What is the proportion of news articles that were labeled fake vs real.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb2"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a>fake_news <span class="sc">|&gt;</span> <span class="fu">glimpse</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code>Rows: 150
Columns: 30
$ title &lt;chr&gt; "Clinton's Exploited Haiti Earthquake to Stea…
$ text &lt;chr&gt; "0 SHARES Facebook Twitter\n\nBernard Sansaric…
$ url &lt;chr&gt; "http://freedomdaily.com/former-haitian-senate…
$ authors &lt;chr&gt; NA, NA, "Sierra Marlee", "Jack Shafer,Nolan D"…
$ type &lt;fct&gt; fake, real, fake, real, fake, real, fake, fake…
$ title_words &lt;int&gt; 17, 18, 16, 11, 9, 12, 11, 18, 10, 13, 10, 11,…
$ text_words &lt;int&gt; 219, 509, 494, 268, 479, 220, 184, 500, 677, 4…
$ title_char &lt;int&gt; 110, 95, 96, 60, 54, 66, 86, 104, 66, 81, 59, …
$ text_char &lt;int&gt; 1444, 3016, 2881, 1674, 2813, 1351, 1128, 3112…
$ title_caps &lt;int&gt; 0, 0, 1, 0, 0, 1, 0, 2, 1, 1, 0, 1, 0, 0, 0, 0…
$ text_caps &lt;int&gt; 1, 1, 3, 3, 0, 0, 0, 12, 12, 1, 2, 5, 1, 1, 6,…
$ title_caps_percent &lt;dbl&gt; 0.000000, 0.000000, 6.250000, 0.000000, 0.0000…
$ text_caps_percent &lt;dbl&gt; 0.4566210, 0.1964637, 0.6072874, 1.1194030, 0.…
$ title_excl &lt;int&gt; 0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0…
$ text_excl &lt;int&gt; 0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, 0…
$ title_excl_percent &lt;dbl&gt; 0.0000000, 0.0000000, 2.0833333, 0.0000000, 0.…
$ text_excl_percent &lt;dbl&gt; 0.00000000, 0.00000000, 0.06942034, 0.00000000…
$ title_has_excl &lt;lgl&gt; FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE…
$ anger &lt;dbl&gt; 4.24, 2.28, 1.18, 4.66, 0.82, 1.29, 2.56, 3.47…
$ anticipation &lt;dbl&gt; 2.12, 1.71, 2.16, 1.79, 1.23, 0.43, 2.05, 1.74…
$ disgust &lt;dbl&gt; 2.54, 1.90, 0.98, 1.79, 0.41, 1.72, 2.05, 1.35…
$ fear &lt;dbl&gt; 3.81, 1.90, 1.57, 4.30, 0.82, 0.43, 5.13, 4.25…
$ joy &lt;dbl&gt; 1.27, 1.71, 1.96, 0.36, 1.23, 0.86, 1.54, 1.35…
$ sadness &lt;dbl&gt; 4.66, 1.33, 0.78, 1.79, 0.82, 0.86, 2.05, 1.93…
$ surprise &lt;dbl&gt; 2.12, 1.14, 1.18, 1.79, 0.82, 0.86, 1.03, 1.35…
$ trust &lt;dbl&gt; 2.97, 4.17, 3.73, 2.51, 2.46, 2.16, 5.13, 3.86…
$ negative &lt;dbl&gt; 8.47, 4.74, 3.33, 6.09, 2.66, 3.02, 4.10, 4.63…
$ positive &lt;dbl&gt; 3.81, 4.93, 5.49, 2.15, 4.30, 2.16, 4.10, 4.25…
$ text_syllables &lt;int&gt; 395, 845, 806, 461, 761, 376, 326, 891, 1133, …
$ text_syllables_per_word &lt;dbl&gt; 1.803653, 1.660118, 1.631579, 1.720149, 1.5887…</code></pre>
</div>
<div class="sourceCode cell-code" id="cb4"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a>fake_news <span class="sc">|&gt;</span></span>
<span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(type) <span class="sc">|&gt;</span> </span>
<span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">summarise</span>(</span>
<span id="cb4-4"><a href="#cb4-4" aria-hidden="true" tabindex="-1"></a> <span class="at">total =</span> <span class="fu">n</span>(),</span>
<span id="cb4-5"><a href="#cb4-5" aria-hidden="true" tabindex="-1"></a> <span class="at">prop =</span> total <span class="sc">/</span> <span class="fu">nrow</span>(fake_news)</span>
<span id="cb4-6"><a href="#cb4-6" aria-hidden="true" tabindex="-1"></a> ) </span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 2 × 3
type total prop
&lt;fct&gt; &lt;int&gt; &lt;dbl&gt;
1 fake 60 0.4
2 real 90 0.6</code></pre>
</div>
</div>
<p>If we let <span class="math inline">\(B\)</span> be the event that a news article is “fake” news, and <span class="math inline">\(B^c\)</span> be the event that a news article is “real”, we can write the following:</p>
<p><span class="math display">\[P(B) = .4\]</span> <span class="math display">\[P(B^c) = .6\]</span></p>
<p>This is the first “clue” or set of data that we have to build into our framework. Namely, majority of articles are “real”, therefore we could simply predict that the new article is “real”. This updated sense or reality now becomes our priors.</p>
<p>Getting additional data, and updating our priors, based on additional data. The new observation we make is the use of exclamation marks “!”. We note that the use of “!” is more frequent in news articles labeled as “fake”. We will want to incorporate this into our framework to decide whether the new incoming should be labelled as real or fake.</p>
<section id="likelihood" class="level3">
<h3 class="anchored" data-anchor-id="likelihood">Likelihood</h3>
<div class="callout-note callout callout-style-default no-icon callout-captioned">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon no-icon"></i>
</div>
<div class="callout-caption-container flex-fill">
Probability and Likelihood
</div>
</div>
<div class="callout-body-container callout-body">
<p>When the event <span class="math inline">\(B\)</span> is known, then we can evaluate the uncertainy of events <span class="math inline">\(A\)</span> and <span class="math inline">\(A^c\)</span> given <span class="math inline">\(B\)</span></p>
<p><span class="math display">\[P(A|B) \text{ vs } P(A^c|B)\]</span></p>
<p>If on the other hand, we know event <span class="math inline">\(A\)</span> then we can evaluate the relative compatability of data <span class="math inline">\(A\)</span> with <span class="math inline">\(B\)</span> and <span class="math inline">\(B^c\)</span> using likelihood functions</p>
<p><span class="math display">\[L(B|A) \text{ vs } L(B^c|A)\]</span> <span class="math display">\[=P(A|B) \text{ vs } P(A|B^c)\]</span></p>
</div>
</div>
<p>So in our case, we dont know whether this new incoming article is real or not, but we do know that the title has an exclamation mark. This means we can evaluate how likely this article is real or not given that it contains an “!” in the title using likelihood functions. We can formualte this as:</p>
<p><span class="math display">\[L(B|A) \text{ vs } L(B^c|A)\]</span></p>
<p>And perform the computation in R as follows:</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb6"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="co"># if fake, what are the proprotions of ! vs no-!</span></span>
<span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a>prop_of_excl_within_type <span class="ot">&lt;-</span> fake_news <span class="sc">|&gt;</span></span>
<span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(type, title_has_excl) <span class="sc">|&gt;</span></span>
<span id="cb6-4"><a href="#cb6-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">summarise</span>(</span>
<span id="cb6-5"><a href="#cb6-5" aria-hidden="true" tabindex="-1"></a> <span class="at">total =</span> <span class="fu">n</span>()</span>
<span id="cb6-6"><a href="#cb6-6" aria-hidden="true" tabindex="-1"></a> ) <span class="sc">|&gt;</span></span>
<span id="cb6-7"><a href="#cb6-7" aria-hidden="true" tabindex="-1"></a> <span class="fu">ungroup</span>() <span class="sc">|&gt;</span></span>
<span id="cb6-8"><a href="#cb6-8" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(type) <span class="sc">|&gt;</span></span>
<span id="cb6-9"><a href="#cb6-9" aria-hidden="true" tabindex="-1"></a> <span class="fu">summarise</span>(</span>
<span id="cb6-10"><a href="#cb6-10" aria-hidden="true" tabindex="-1"></a> <span class="at">has_excl =</span> title_has_excl,</span>
<span id="cb6-11"><a href="#cb6-11" aria-hidden="true" tabindex="-1"></a> <span class="at">prop_within_type =</span> total <span class="sc">/</span> <span class="fu">sum</span>(total)</span>
<span id="cb6-12"><a href="#cb6-12" aria-hidden="true" tabindex="-1"></a> ) </span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<div class="cell">
<div class="sourceCode cell-code" id="cb7"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a>prop_of_excl_within_type <span class="sc">|&gt;</span></span>
<span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">pivot_wider</span>(<span class="at">names_from =</span> <span class="st">"type"</span>, <span class="at">values_from =</span> prop_within_type) <span class="sc">|&gt;</span></span>
<span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">gt</span>() <span class="sc">|&gt;</span></span>
<span id="cb7-4"><a href="#cb7-4" aria-hidden="true" tabindex="-1"></a> gt<span class="sc">::</span><span class="fu">cols_label</span>(</span>
<span id="cb7-5"><a href="#cb7-5" aria-hidden="true" tabindex="-1"></a> <span class="at">has_excl =</span> <span class="st">"Contains Exclamtion"</span>,</span>
<span id="cb7-6"><a href="#cb7-6" aria-hidden="true" tabindex="-1"></a> <span class="at">fake =</span> <span class="st">"Fake"</span>, </span>
<span id="cb7-7"><a href="#cb7-7" aria-hidden="true" tabindex="-1"></a> <span class="at">real =</span> <span class="st">"Real"</span>) <span class="sc">|&gt;</span></span>
<span id="cb7-8"><a href="#cb7-8" aria-hidden="true" tabindex="-1"></a> gt<span class="sc">::</span><span class="fu">fmt_number</span>(<span class="at">columns=</span><span class="fu">c</span>(<span class="st">"fake"</span>, <span class="st">"real"</span>), <span class="at">decimals =</span> <span class="dv">3</span>) <span class="sc">|&gt;</span></span>
<span id="cb7-9"><a href="#cb7-9" aria-hidden="true" tabindex="-1"></a> gt<span class="sc">::</span><span class="fu">cols_width</span>(<span class="fu">everything</span>() <span class="sc">~</span> <span class="fu">px</span>(<span class="dv">100</span>))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<div id="cgeetizxio" style="overflow-x:auto;overflow-y:auto;width:auto;height:auto;">
<style>html {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Helvetica Neue', 'Fira Sans', 'Droid Sans', Arial, sans-serif;
}
#cgeetizxio .gt_table {
display: table;
border-collapse: collapse;
margin-left: auto;
margin-right: auto;
color: #333333;
font-size: 16px;
font-weight: normal;
font-style: normal;
background-color: #FFFFFF;
width: auto;
border-top-style: solid;
border-top-width: 2px;
border-top-color: #A8A8A8;
border-right-style: none;
border-right-width: 2px;
border-right-color: #D3D3D3;
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #A8A8A8;
border-left-style: none;
border-left-width: 2px;
border-left-color: #D3D3D3;
}
#cgeetizxio .gt_heading {
background-color: #FFFFFF;
text-align: center;
border-bottom-color: #FFFFFF;
border-left-style: none;
border-left-width: 1px;
border-left-color: #D3D3D3;
border-right-style: none;
border-right-width: 1px;
border-right-color: #D3D3D3;
}
#cgeetizxio .gt_title {
color: #333333;
font-size: 125%;
font-weight: initial;
padding-top: 4px;
padding-bottom: 4px;
padding-left: 5px;
padding-right: 5px;
border-bottom-color: #FFFFFF;
border-bottom-width: 0;
}
#cgeetizxio .gt_subtitle {
color: #333333;
font-size: 85%;
font-weight: initial;
padding-top: 0;
padding-bottom: 6px;
padding-left: 5px;
padding-right: 5px;
border-top-color: #FFFFFF;
border-top-width: 0;
}
#cgeetizxio .gt_bottom_border {
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
}
#cgeetizxio .gt_col_headings {
border-top-style: solid;
border-top-width: 2px;
border-top-color: #D3D3D3;
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
border-left-style: none;
border-left-width: 1px;
border-left-color: #D3D3D3;
border-right-style: none;
border-right-width: 1px;
border-right-color: #D3D3D3;
}
#cgeetizxio .gt_col_heading {
color: #333333;
background-color: #FFFFFF;
font-size: 100%;
font-weight: normal;
text-transform: inherit;
border-left-style: none;
border-left-width: 1px;
border-left-color: #D3D3D3;
border-right-style: none;
border-right-width: 1px;
border-right-color: #D3D3D3;
vertical-align: bottom;
padding-top: 5px;
padding-bottom: 6px;
padding-left: 5px;
padding-right: 5px;
overflow-x: hidden;
}
#cgeetizxio .gt_column_spanner_outer {
color: #333333;
background-color: #FFFFFF;
font-size: 100%;
font-weight: normal;
text-transform: inherit;
padding-top: 0;
padding-bottom: 0;
padding-left: 4px;
padding-right: 4px;
}
#cgeetizxio .gt_column_spanner_outer:first-child {
padding-left: 0;
}
#cgeetizxio .gt_column_spanner_outer:last-child {
padding-right: 0;
}
#cgeetizxio .gt_column_spanner {
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
vertical-align: bottom;
padding-top: 5px;
padding-bottom: 5px;
overflow-x: hidden;
display: inline-block;
width: 100%;
}
#cgeetizxio .gt_group_heading {
padding-top: 8px;
padding-bottom: 8px;
padding-left: 5px;
padding-right: 5px;
color: #333333;
background-color: #FFFFFF;
font-size: 100%;
font-weight: initial;
text-transform: inherit;
border-top-style: solid;
border-top-width: 2px;
border-top-color: #D3D3D3;
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
border-left-style: none;
border-left-width: 1px;
border-left-color: #D3D3D3;
border-right-style: none;
border-right-width: 1px;
border-right-color: #D3D3D3;
vertical-align: middle;
}
#cgeetizxio .gt_empty_group_heading {
padding: 0.5px;
color: #333333;
background-color: #FFFFFF;
font-size: 100%;
font-weight: initial;
border-top-style: solid;
border-top-width: 2px;
border-top-color: #D3D3D3;
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
vertical-align: middle;
}
#cgeetizxio .gt_from_md > :first-child {
margin-top: 0;
}
#cgeetizxio .gt_from_md > :last-child {
margin-bottom: 0;
}
#cgeetizxio .gt_row {
padding-top: 8px;
padding-bottom: 8px;
padding-left: 5px;
padding-right: 5px;
margin: 10px;
border-top-style: solid;
border-top-width: 1px;
border-top-color: #D3D3D3;
border-left-style: none;
border-left-width: 1px;
border-left-color: #D3D3D3;
border-right-style: none;
border-right-width: 1px;
border-right-color: #D3D3D3;
vertical-align: middle;
overflow-x: hidden;
}
#cgeetizxio .gt_stub {
color: #333333;
background-color: #FFFFFF;
font-size: 100%;
font-weight: initial;
text-transform: inherit;
border-right-style: solid;
border-right-width: 2px;
border-right-color: #D3D3D3;
padding-left: 5px;
padding-right: 5px;
}
#cgeetizxio .gt_stub_row_group {
color: #333333;
background-color: #FFFFFF;
font-size: 100%;
font-weight: initial;
text-transform: inherit;
border-right-style: solid;
border-right-width: 2px;
border-right-color: #D3D3D3;
padding-left: 5px;
padding-right: 5px;
vertical-align: top;
}
#cgeetizxio .gt_row_group_first td {
border-top-width: 2px;
}
#cgeetizxio .gt_summary_row {
color: #333333;
background-color: #FFFFFF;
text-transform: inherit;
padding-top: 8px;
padding-bottom: 8px;
padding-left: 5px;
padding-right: 5px;
}
#cgeetizxio .gt_first_summary_row {
border-top-style: solid;
border-top-color: #D3D3D3;
}
#cgeetizxio .gt_first_summary_row.thick {
border-top-width: 2px;
}
#cgeetizxio .gt_last_summary_row {
padding-top: 8px;
padding-bottom: 8px;
padding-left: 5px;
padding-right: 5px;
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
}
#cgeetizxio .gt_grand_summary_row {
color: #333333;
background-color: #FFFFFF;
text-transform: inherit;
padding-top: 8px;
padding-bottom: 8px;
padding-left: 5px;
padding-right: 5px;
}
#cgeetizxio .gt_first_grand_summary_row {
padding-top: 8px;
padding-bottom: 8px;
padding-left: 5px;
padding-right: 5px;
border-top-style: double;
border-top-width: 6px;
border-top-color: #D3D3D3;
}
#cgeetizxio .gt_striped {
background-color: rgba(128, 128, 128, 0.05);
}
#cgeetizxio .gt_table_body {
border-top-style: solid;
border-top-width: 2px;
border-top-color: #D3D3D3;
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
}
#cgeetizxio .gt_footnotes {
color: #333333;
background-color: #FFFFFF;
border-bottom-style: none;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
border-left-style: none;
border-left-width: 2px;
border-left-color: #D3D3D3;
border-right-style: none;
border-right-width: 2px;
border-right-color: #D3D3D3;
}
#cgeetizxio .gt_footnote {
margin: 0px;
font-size: 90%;
padding-left: 4px;
padding-right: 4px;
padding-left: 5px;
padding-right: 5px;
}
#cgeetizxio .gt_sourcenotes {
color: #333333;
background-color: #FFFFFF;
border-bottom-style: none;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
border-left-style: none;
border-left-width: 2px;
border-left-color: #D3D3D3;
border-right-style: none;
border-right-width: 2px;
border-right-color: #D3D3D3;
}
#cgeetizxio .gt_sourcenote {
font-size: 90%;
padding-top: 4px;
padding-bottom: 4px;
padding-left: 5px;
padding-right: 5px;
}
#cgeetizxio .gt_left {
text-align: left;
}
#cgeetizxio .gt_center {
text-align: center;
}
#cgeetizxio .gt_right {
text-align: right;
font-variant-numeric: tabular-nums;
}
#cgeetizxio .gt_font_normal {
font-weight: normal;
}
#cgeetizxio .gt_font_bold {
font-weight: bold;
}
#cgeetizxio .gt_font_italic {
font-style: italic;
}
#cgeetizxio .gt_super {
font-size: 65%;
}
#cgeetizxio .gt_footnote_marks {
font-style: italic;
font-weight: normal;
font-size: 75%;
vertical-align: 0.4em;
}
#cgeetizxio .gt_asterisk {
font-size: 100%;
vertical-align: 0;
}
#cgeetizxio .gt_indent_1 {
text-indent: 5px;
}
#cgeetizxio .gt_indent_2 {
text-indent: 10px;
}
#cgeetizxio .gt_indent_3 {
text-indent: 15px;
}
#cgeetizxio .gt_indent_4 {
text-indent: 20px;
}
#cgeetizxio .gt_indent_5 {
text-indent: 25px;
}
</style>
<table class="gt_table" style="table-layout: fixed;; width: 0px">
<colgroup>
<col style="width:100px;">
<col style="width:100px;">
<col style="width:100px;">
</colgroup>
<thead class="gt_col_headings">
<tr>
<th class="gt_col_heading gt_columns_bottom_border gt_center" rowspan="1" colspan="1" scope="col">Contains Exclamtion</th>
<th class="gt_col_heading gt_columns_bottom_border gt_right" rowspan="1" colspan="1" scope="col">Fake</th>
<th class="gt_col_heading gt_columns_bottom_border gt_right" rowspan="1" colspan="1" scope="col">Real</th>
</tr>
</thead>
<tbody class="gt_table_body">
<tr><td class="gt_row gt_center">FALSE</td>
<td class="gt_row gt_right">0.733</td>
<td class="gt_row gt_right">0.978</td></tr>
<tr><td class="gt_row gt_center">TRUE</td>
<td class="gt_row gt_right">0.267</td>
<td class="gt_row gt_right">0.022</td></tr>
</tbody>
</table>
</div>
</div>
</div>
<p>The table above also shows the likelihoods for the case when an article does not contain exclamation point in the title as well. Its really important to note that these are likelihoods, and its not the case that <span class="math inline">\(L(B|A) + L(B^c|A) = 1\)</span> as a matter of fact this value evaluates to a number less than one. However, since we have that <span class="math inline">\(L(B|A) = .267\)</span> and <span class="math inline">\(L(B^c|A) = .022\)</span> then we have gained additional knowledge in knowing the use of “!” in a title is more compatible with a fake news article than a real one.</p>
<p>Up to this point we can summarize our framework as follows</p>
<table class="table">
<thead>
<tr class="header">
<th>event</th>
<th><span class="math inline">\(B\)</span></th>
<th><span class="math inline">\(B^c\)</span></th>
<th>Total</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td>prior</td>
<td>.4</td>
<td>.6</td>
<td>1</td>
</tr>
<tr class="even">
<td>likelihood</td>
<td>.267</td>
<td>.022</td>
<td>.289</td>
</tr>
</tbody>
</table>
<p>Our next goal is come up with normalizing factors in order to build our probability table:</p>
<table class="table">
<thead>
<tr class="header">
<th></th>
<th><span class="math inline">\(B\)</span></th>
<th><span class="math inline">\(B^c\)</span></th>
<th>Total</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td><span class="math inline">\(A\)</span></td>
<td>(1)</td>
<td>(2)</td>
<td></td>
</tr>
<tr class="even">
<td><span class="math inline">\(A^c\)</span></td>
<td>(3)</td>
<td>(4)</td>
<td></td>
</tr>
<tr class="odd">
<td>Total</td>
<td>.4</td>
<td>.6</td>
<td>1</td>
</tr>
</tbody>
</table>
<p>A couple things to note about our table (1) + (3) = .4 and (2) + (4) = .6. (1) + (2) + (3) + (4) = 1.</p>
<p>(1.) <span class="math inline">\(P(A \cap B) = P(A|B)P(B)\)</span> we know the likelihood of <span class="math inline">\(L(B|A) = P(A|B)\)</span> and we also know the prior so we insert these to get <span class="math display">\[ P(A \cap B) = P(A|B)P(B) = .267 \times .4 = .1068\]</span></p>
<p>(3.) <span class="math inline">\(P(A^c \cap B) = P(A^c|B)P(B)\)</span> in this case we do know the prior <span class="math inline">\(P(B) = .4\)</span>, but we dont directly know the value of <span class="math inline">\(P(A^c|B)\)</span>, however, we note that <span class="math inline">\(P(A|B) + P(A^c|B) = 1\)</span>, therefore we compute <span class="math inline">\(P(A^c|B) = 1 - P(A|B) = 1 - .267 = .733\)</span> <span class="math display">\[ P(A^c \cap B) = P(A^c|B)P(B) = .733 \times .4 = .2932\]</span></p>
<p>we now can confirm that <span class="math inline">\(.1068 + .2932 = .4\)</span></p>
<p>Moving on to (2), (4)</p>
<p>(2.) <span class="math inline">\(P(A \cap B^c) = P(A|B^c)P(B^c)\)</span>. In this case know the likelihood <span class="math inline">\(L(B^c|A) = P(A|B^c)\)</span> and we know the prior <span class="math inline">\(P(B^c)\)</span> therefore, <span class="math display">\[P(A \cap B^c) = P(A|B^c)P(B^c) = .022 \times .6 = .0132\]</span></p>
<p>(4.) <span class="math inline">\(P(A^c \cap B^c) = P(A^c|B^c)P(B^c) = (1 - .022) \times .6 = .5868\)</span></p>
<p>and can confirm that <span class="math inline">\(.0132 + .5868 = .6\)</span></p>
<p>and we can fill the rest of the table:</p>
<table class="table">
<thead>
<tr class="header">
<th></th>
<th><span class="math inline">\(B\)</span></th>
<th><span class="math inline">\(B^c\)</span></th>
<th>Total</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td><span class="math inline">\(A\)</span></td>
<td>.1068</td>
<td>.0132</td>
<td>.12</td>
</tr>
<tr class="even">
<td><span class="math inline">\(A^c\)</span></td>
<td>.2932</td>
<td>.5868</td>
<td>.88</td>
</tr>
<tr class="odd">
<td>Total</td>
<td>.4</td>
<td>.6</td>
<td>1</td>
</tr>
</tbody>
</table>
<p>An important concept we implemented in above is the idea of <strong>total probability</strong></p>
<div class="callout-tip callout callout-style-default no-icon callout-captioned">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon no-icon"></i>
</div>
<div class="callout-caption-container flex-fill">
total probability
</div>
</div>
<div class="callout-body-container callout-body">
<p>The <strong>total probability</strong> of observing a real article is made up the sum of its parts. Namely</p>
<p><span class="math display">\[P(B^c) = P(A \cap B^c) + P(A^c \cap B^c)\]</span> <span class="math display">\[=P(A|B^c)P(B^c) + P(A^c|B^c)P(B^c)\]</span> <span class="math display">\[=.0132 + .5868 = .6\]</span></p>
</div>
</div>
<p>In the above calculations we also step through <strong>joint probabilities</strong></p>
<div class="callout-note callout callout-style-default no-icon callout-captioned">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon no-icon"></i>
</div>
<div class="callout-caption-container flex-fill">
Joint and conditional probability
</div>
</div>
<div class="callout-body-container callout-body">
<p><span class="math display">\[P(A \cap B) = P(A|B)P(B)\]</span></p>
<p><span class="math inline">\(A\)</span> and <span class="math inline">\(B\)</span> are said to be independent events, if and only if</p>
<p><span class="math display">\[P(A \cap B) = P(A)P(B)\]</span></p>
<p>from this we can also derive the definition of a conditional probability</p>
<p><span class="math display">\[P(A|B) = \frac{P(A \cap B)}{P(B)}\]</span></p>
</div>
</div>
<p>At this point we are able to answer the question, “What is the probability, the new article is fake?”. Given that the new article has an exclamation point, we can zoom into the top row of the table of probabilitties. Within this row we have probabilities <span class="math inline">\(.1068/.12 = .833\)</span> for fake and <span class="math inline">\(.0132 / .12 = .11\)</span> for real.</p>
<p>This is essentially Bayes Rule. We developed a posterior probability for an event <span class="math inline">\(B\)</span> given some observation <span class="math inline">\(A\)</span>. We did so by combining the likelihood of event <span class="math inline">\(B\)</span> given some new data <span class="math inline">\(A\)</span> and the prior probability of event <span class="math inline">\(B\)</span>. More formally we have the following definition:</p>
<div class="callout-note callout callout-style-default no-icon callout-captioned">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon no-icon"></i>
</div>
<div class="callout-caption-container flex-fill">
Bayes Rule
</div>
</div>
<div class="callout-body-container callout-body">
<p>The posterior probability of an event <span class="math inline">\(B\)</span> given a <span class="math inline">\(A\)</span> is:</p>
<p><span class="math display">\[ P(B|A) = \frac{P(A \cap B)}{P(A)} = \frac{L(B|A)P(B)}{P(A)}\]</span></p>
<p>where <span class="math inline">\(L\)</span> is the likelihood function <span class="math inline">\(L(B|A) = P(B|A)\)</span> and <span class="math inline">\(P(A)\)</span> is the total probability of <span class="math inline">\(A\)</span>.</p>
<p>More generally,</p>
<p><span class="math display">\[ \frac{likelihood \cdot prior}{normalizing \;\; constant}\]</span></p>
</div>
</div>
</section>
<section id="simualation" class="level3">
<h3 class="anchored" data-anchor-id="simualation">Simualation</h3>
<div class="cell">
<div class="sourceCode cell-code" id="cb8"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a>articles <span class="ot">&lt;-</span> tibble<span class="sc">::</span><span class="fu">tibble</span>(<span class="at">type =</span> <span class="fu">c</span>(<span class="st">"real"</span>, <span class="st">"fake"</span>))</span>
<span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb8-3"><a href="#cb8-3" aria-hidden="true" tabindex="-1"></a>priors <span class="ot">&lt;-</span> <span class="fu">c</span>(.<span class="dv">6</span>, .<span class="dv">4</span>)</span>
<span id="cb8-4"><a href="#cb8-4" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb8-5"><a href="#cb8-5" aria-hidden="true" tabindex="-1"></a>articles_sim <span class="ot">&lt;-</span> <span class="fu">sample_n</span>(articles, <span class="dv">10000</span>, <span class="at">replace =</span> <span class="cn">TRUE</span>, <span class="at">weight =</span> priors)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<div class="cell">
<div class="sourceCode cell-code" id="cb9"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a>articles_sim <span class="sc">|&gt;</span></span>
<span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x =</span> type)) <span class="sc">+</span> <span class="fu">geom_bar</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="ch2_files/figure-html/unnamed-chunk-6-1.png" class="img-fluid" width="672"></p>
</div>
</div>
<p>and a summary table</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb10"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a>articles_sim <span class="sc">|&gt;</span></span>
<span id="cb10-2"><a href="#cb10-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(type) <span class="sc">|&gt;</span></span>
<span id="cb10-3"><a href="#cb10-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">summarise</span>(</span>
<span id="cb10-4"><a href="#cb10-4" aria-hidden="true" tabindex="-1"></a> <span class="at">total =</span> <span class="fu">n</span>(), </span>
<span id="cb10-5"><a href="#cb10-5" aria-hidden="true" tabindex="-1"></a> <span class="at">prop =</span> total <span class="sc">/</span> <span class="fu">nrow</span>(articles_sim)</span>
<span id="cb10-6"><a href="#cb10-6" aria-hidden="true" tabindex="-1"></a> ) <span class="sc">|&gt;</span></span>
<span id="cb10-7"><a href="#cb10-7" aria-hidden="true" tabindex="-1"></a> <span class="fu">gt</span>()<span class="sc">|&gt;</span></span>
<span id="cb10-8"><a href="#cb10-8" aria-hidden="true" tabindex="-1"></a> gt<span class="sc">::</span><span class="fu">cols_width</span>(<span class="fu">everything</span>() <span class="sc">~</span> <span class="fu">px</span>(<span class="dv">100</span>))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<div id="riybaxjrki" style="overflow-x:auto;overflow-y:auto;width:auto;height:auto;">
<style>html {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Helvetica Neue', 'Fira Sans', 'Droid Sans', Arial, sans-serif;
}
#riybaxjrki .gt_table {
display: table;
border-collapse: collapse;
margin-left: auto;
margin-right: auto;
color: #333333;
font-size: 16px;
font-weight: normal;
font-style: normal;
background-color: #FFFFFF;
width: auto;
border-top-style: solid;
border-top-width: 2px;
border-top-color: #A8A8A8;
border-right-style: none;
border-right-width: 2px;
border-right-color: #D3D3D3;
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #A8A8A8;
border-left-style: none;
border-left-width: 2px;
border-left-color: #D3D3D3;
}
#riybaxjrki .gt_heading {
background-color: #FFFFFF;
text-align: center;
border-bottom-color: #FFFFFF;
border-left-style: none;
border-left-width: 1px;
border-left-color: #D3D3D3;
border-right-style: none;
border-right-width: 1px;
border-right-color: #D3D3D3;
}
#riybaxjrki .gt_title {
color: #333333;
font-size: 125%;
font-weight: initial;
padding-top: 4px;
padding-bottom: 4px;
padding-left: 5px;
padding-right: 5px;
border-bottom-color: #FFFFFF;
border-bottom-width: 0;
}
#riybaxjrki .gt_subtitle {
color: #333333;
font-size: 85%;
font-weight: initial;
padding-top: 0;
padding-bottom: 6px;
padding-left: 5px;
padding-right: 5px;
border-top-color: #FFFFFF;
border-top-width: 0;
}
#riybaxjrki .gt_bottom_border {
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
}
#riybaxjrki .gt_col_headings {
border-top-style: solid;
border-top-width: 2px;
border-top-color: #D3D3D3;
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
border-left-style: none;
border-left-width: 1px;
border-left-color: #D3D3D3;
border-right-style: none;
border-right-width: 1px;
border-right-color: #D3D3D3;
}
#riybaxjrki .gt_col_heading {
color: #333333;
background-color: #FFFFFF;
font-size: 100%;
font-weight: normal;
text-transform: inherit;
border-left-style: none;
border-left-width: 1px;
border-left-color: #D3D3D3;
border-right-style: none;
border-right-width: 1px;
border-right-color: #D3D3D3;
vertical-align: bottom;
padding-top: 5px;
padding-bottom: 6px;
padding-left: 5px;
padding-right: 5px;
overflow-x: hidden;
}
#riybaxjrki .gt_column_spanner_outer {
color: #333333;
background-color: #FFFFFF;
font-size: 100%;
font-weight: normal;
text-transform: inherit;
padding-top: 0;
padding-bottom: 0;
padding-left: 4px;
padding-right: 4px;
}
#riybaxjrki .gt_column_spanner_outer:first-child {
padding-left: 0;
}
#riybaxjrki .gt_column_spanner_outer:last-child {
padding-right: 0;
}
#riybaxjrki .gt_column_spanner {
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
vertical-align: bottom;
padding-top: 5px;
padding-bottom: 5px;
overflow-x: hidden;
display: inline-block;
width: 100%;
}
#riybaxjrki .gt_group_heading {
padding-top: 8px;
padding-bottom: 8px;
padding-left: 5px;
padding-right: 5px;
color: #333333;
background-color: #FFFFFF;
font-size: 100%;
font-weight: initial;
text-transform: inherit;
border-top-style: solid;
border-top-width: 2px;
border-top-color: #D3D3D3;
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
border-left-style: none;
border-left-width: 1px;
border-left-color: #D3D3D3;
border-right-style: none;
border-right-width: 1px;
border-right-color: #D3D3D3;
vertical-align: middle;
}
#riybaxjrki .gt_empty_group_heading {
padding: 0.5px;
color: #333333;
background-color: #FFFFFF;
font-size: 100%;
font-weight: initial;
border-top-style: solid;
border-top-width: 2px;
border-top-color: #D3D3D3;
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
vertical-align: middle;
}
#riybaxjrki .gt_from_md > :first-child {
margin-top: 0;
}
#riybaxjrki .gt_from_md > :last-child {
margin-bottom: 0;
}
#riybaxjrki .gt_row {
padding-top: 8px;
padding-bottom: 8px;
padding-left: 5px;
padding-right: 5px;
margin: 10px;
border-top-style: solid;
border-top-width: 1px;
border-top-color: #D3D3D3;
border-left-style: none;
border-left-width: 1px;
border-left-color: #D3D3D3;
border-right-style: none;
border-right-width: 1px;
border-right-color: #D3D3D3;
vertical-align: middle;
overflow-x: hidden;
}
#riybaxjrki .gt_stub {
color: #333333;
background-color: #FFFFFF;
font-size: 100%;
font-weight: initial;
text-transform: inherit;
border-right-style: solid;
border-right-width: 2px;
border-right-color: #D3D3D3;
padding-left: 5px;
padding-right: 5px;
}
#riybaxjrki .gt_stub_row_group {
color: #333333;
background-color: #FFFFFF;
font-size: 100%;
font-weight: initial;
text-transform: inherit;
border-right-style: solid;
border-right-width: 2px;
border-right-color: #D3D3D3;
padding-left: 5px;
padding-right: 5px;
vertical-align: top;
}
#riybaxjrki .gt_row_group_first td {
border-top-width: 2px;
}
#riybaxjrki .gt_summary_row {
color: #333333;
background-color: #FFFFFF;
text-transform: inherit;
padding-top: 8px;
padding-bottom: 8px;
padding-left: 5px;
padding-right: 5px;
}
#riybaxjrki .gt_first_summary_row {
border-top-style: solid;
border-top-color: #D3D3D3;
}
#riybaxjrki .gt_first_summary_row.thick {
border-top-width: 2px;
}
#riybaxjrki .gt_last_summary_row {
padding-top: 8px;
padding-bottom: 8px;
padding-left: 5px;
padding-right: 5px;
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
}
#riybaxjrki .gt_grand_summary_row {
color: #333333;
background-color: #FFFFFF;
text-transform: inherit;
padding-top: 8px;
padding-bottom: 8px;
padding-left: 5px;
padding-right: 5px;
}
#riybaxjrki .gt_first_grand_summary_row {
padding-top: 8px;
padding-bottom: 8px;
padding-left: 5px;
padding-right: 5px;
border-top-style: double;
border-top-width: 6px;
border-top-color: #D3D3D3;
}
#riybaxjrki .gt_striped {
background-color: rgba(128, 128, 128, 0.05);
}
#riybaxjrki .gt_table_body {
border-top-style: solid;
border-top-width: 2px;
border-top-color: #D3D3D3;
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
}
#riybaxjrki .gt_footnotes {
color: #333333;
background-color: #FFFFFF;
border-bottom-style: none;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
border-left-style: none;
border-left-width: 2px;
border-left-color: #D3D3D3;
border-right-style: none;
border-right-width: 2px;
border-right-color: #D3D3D3;
}
#riybaxjrki .gt_footnote {
margin: 0px;
font-size: 90%;
padding-left: 4px;
padding-right: 4px;
padding-left: 5px;
padding-right: 5px;
}
#riybaxjrki .gt_sourcenotes {
color: #333333;
background-color: #FFFFFF;
border-bottom-style: none;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
border-left-style: none;
border-left-width: 2px;
border-left-color: #D3D3D3;
border-right-style: none;
border-right-width: 2px;
border-right-color: #D3D3D3;
}
#riybaxjrki .gt_sourcenote {
font-size: 90%;
padding-top: 4px;
padding-bottom: 4px;
padding-left: 5px;
padding-right: 5px;
}
#riybaxjrki .gt_left {
text-align: left;
}
#riybaxjrki .gt_center {
text-align: center;
}
#riybaxjrki .gt_right {
text-align: right;
font-variant-numeric: tabular-nums;
}
#riybaxjrki .gt_font_normal {
font-weight: normal;
}
#riybaxjrki .gt_font_bold {
font-weight: bold;
}
#riybaxjrki .gt_font_italic {
font-style: italic;
}
#riybaxjrki .gt_super {
font-size: 65%;
}
#riybaxjrki .gt_footnote_marks {
font-style: italic;
font-weight: normal;
font-size: 75%;
vertical-align: 0.4em;
}
#riybaxjrki .gt_asterisk {
font-size: 100%;
vertical-align: 0;
}
#riybaxjrki .gt_indent_1 {
text-indent: 5px;
}
#riybaxjrki .gt_indent_2 {
text-indent: 10px;
}
#riybaxjrki .gt_indent_3 {
text-indent: 15px;
}
#riybaxjrki .gt_indent_4 {
text-indent: 20px;
}
#riybaxjrki .gt_indent_5 {
text-indent: 25px;
}
</style>
<table class="gt_table" style="table-layout: fixed;; width: 0px">
<colgroup>
<col style="width:100px;">
<col style="width:100px;">
<col style="width:100px;">
</colgroup>
<thead class="gt_col_headings">
<tr>
<th class="gt_col_heading gt_columns_bottom_border gt_left" rowspan="1" colspan="1" scope="col">type</th>
<th class="gt_col_heading gt_columns_bottom_border gt_right" rowspan="1" colspan="1" scope="col">total</th>
<th class="gt_col_heading gt_columns_bottom_border gt_right" rowspan="1" colspan="1" scope="col">prop</th>
</tr>
</thead>
<tbody class="gt_table_body">
<tr><td class="gt_row gt_left">fake</td>
<td class="gt_row gt_right">3941</td>
<td class="gt_row gt_right">0.3941</td></tr>
<tr><td class="gt_row gt_left">real</td>
<td class="gt_row gt_right">6059</td>
<td class="gt_row gt_right">0.6059</td></tr>
</tbody>
</table>
</div>
</div>
</div>
<p>the simulation of 10,000 articles shows us very nearly the same priors we had from the data. We can now add the exclamation usage into the data.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb11"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a>articles_sim <span class="ot">&lt;-</span> articles_sim <span class="sc">|&gt;</span></span>
<span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">mutate</span>(<span class="at">model_data =</span> <span class="fu">case_when</span>(</span>
<span id="cb11-3"><a href="#cb11-3" aria-hidden="true" tabindex="-1"></a> type <span class="sc">==</span> <span class="st">"fake"</span> <span class="sc">~</span> .<span class="dv">267</span>, </span>
<span id="cb11-4"><a href="#cb11-4" aria-hidden="true" tabindex="-1"></a> type <span class="sc">==</span> <span class="st">"real"</span> <span class="sc">~</span> .<span class="dv">022</span></span>
<span id="cb11-5"><a href="#cb11-5" aria-hidden="true" tabindex="-1"></a> ))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>The plan here is to iterate through the 10,000 samples and use the <code>data_model</code> value to assign either, “yes” or “no” using the <code>sample</code> function.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb12"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a>data <span class="ot">&lt;-</span> <span class="fu">c</span>(<span class="st">"yes"</span>, <span class="st">"no"</span>)</span>
<span id="cb12-2"><a href="#cb12-2" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb12-3"><a href="#cb12-3" aria-hidden="true" tabindex="-1"></a>articles_sim <span class="ot">&lt;-</span> articles_sim <span class="sc">|&gt;</span></span>
<span id="cb12-4"><a href="#cb12-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">mutate</span>(<span class="at">id =</span> <span class="fu">row_number</span>()) <span class="sc">|&gt;</span></span>
<span id="cb12-5"><a href="#cb12-5" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(id) <span class="sc">|&gt;</span></span>
<span id="cb12-6"><a href="#cb12-6" aria-hidden="true" tabindex="-1"></a> <span class="fu">mutate</span>(<span class="at">usage =</span> <span class="fu">sample</span>(data, <span class="dv">1</span>, <span class="at">prob =</span> <span class="fu">c</span>(model_data, <span class="dv">1</span> <span class="sc">-</span> model_data)))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<div class="cell">
<div class="sourceCode cell-code" id="cb13"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a>articles_sim <span class="sc">|&gt;</span></span>
<span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(usage, type) <span class="sc">|&gt;</span></span>
<span id="cb13-3"><a href="#cb13-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">summarise</span>(</span>
<span id="cb13-4"><a href="#cb13-4" aria-hidden="true" tabindex="-1"></a> <span class="at">total =</span> <span class="fu">n</span>()</span>
<span id="cb13-5"><a href="#cb13-5" aria-hidden="true" tabindex="-1"></a> ) <span class="sc">|&gt;</span></span>
<span id="cb13-6"><a href="#cb13-6" aria-hidden="true" tabindex="-1"></a> <span class="fu">pivot_wider</span>(<span class="at">names_from =</span> type, <span class="at">values_from =</span> total)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 2 × 3
# Groups: usage [2]
usage fake real
&lt;chr&gt; &lt;int&gt; &lt;int&gt;
1 no 2936 5932
2 yes 1005 127</code></pre>
</div>
</div>
<div class="cell">
<div class="sourceCode cell-code" id="cb15"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a>articles_sim <span class="sc">|&gt;</span></span>
<span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">ggplot</span>(<span class="fu">aes</span>(<span class="at">x =</span> type, <span class="at">fill =</span> usage)) <span class="sc">+</span> </span>
<span id="cb15-3"><a href="#cb15-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_bar</span>() <span class="sc">+</span> </span>
<span id="cb15-4"><a href="#cb15-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">scale_fill_discrete</span>(<span class="at">type =</span> <span class="fu">c</span>(<span class="st">"gray8"</span>, <span class="st">"dodgerblue4"</span>))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="ch2_files/figure-html/unnamed-chunk-11-1.png" class="img-fluid" width="672"></p>
</div>
</div>
<p>So far have compute both the priors and likelihoods, we can simply filter our data to reflect the incoming article and determine our posterior.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb16"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a>articles_sim <span class="sc">|&gt;</span></span>
<span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">filter</span>(usage <span class="sc">==</span> <span class="st">"yes"</span>) <span class="sc">|&gt;</span></span>
<span id="cb16-3"><a href="#cb16-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(type) <span class="sc">|&gt;</span></span>
<span id="cb16-4"><a href="#cb16-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">summarise</span>(</span>
<span id="cb16-5"><a href="#cb16-5" aria-hidden="true" tabindex="-1"></a> <span class="at">total =</span> <span class="fu">n</span>()</span>
<span id="cb16-6"><a href="#cb16-6" aria-hidden="true" tabindex="-1"></a> ) <span class="sc">|&gt;</span></span>
<span id="cb16-7"><a href="#cb16-7" aria-hidden="true" tabindex="-1"></a> <span class="fu">mutate</span>(</span>
<span id="cb16-8"><a href="#cb16-8" aria-hidden="true" tabindex="-1"></a> <span class="at">prop =</span> total <span class="sc">/</span> <span class="fu">sum</span>(total)</span>
<span id="cb16-9"><a href="#cb16-9" aria-hidden="true" tabindex="-1"></a> )</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 2 × 3
type total prop
&lt;chr&gt; &lt;int&gt; &lt;dbl&gt;
1 fake 1005 0.888
2 real 127 0.112</code></pre>
</div>
</div>
<div class="callout-note callout callout-style-default no-icon callout-captioned">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon no-icon"></i>
</div>
<div class="callout-caption-container flex-fill">
Discrete Probability Model
</div>
</div>
<div class="callout-body-container callout-body">
<p>Let <span class="math inline">\(Y\)</span> be a discrete random variable. The probability model for <span class="math inline">\(Y\)</span> is described by a <strong>probability mass function</strong> (pmf) defined as: <span class="math display">\[f(y) = P(Y = y)\]</span></p>
<p>and has the following properties</p>
<ol type="1">
<li><span class="math inline">\(0 \leq f(y) \leq 1\;\; \forall y\)</span></li>
<li><span class="math inline">\(\sum_{\forall y}f(y) = 1\)</span></li>
</ol>
</div>
</div>
<div class="callout-caution callout callout-style-default no-icon callout-captioned">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon no-icon"></i>
</div>
<div class="callout-caption-container flex-fill">
in emanuels words
</div>
</div>
<div class="callout-body-container callout-body">
<p>what does this mean? well its very straightforward a pmf is a function that takes in a some value y and outputs the probability that the random variable <span class="math inline">\(Y\)</span> equals <span class="math inline">\(y\)</span>.</p>
</div>
</div>
</section>
</main>
<!-- /main column -->
<script id="quarto-html-after-body" type="application/javascript">
window.document.addEventListener("DOMContentLoaded", function (event) {
const toggleBodyColorMode = (bsSheetEl) => {
const mode = bsSheetEl.getAttribute("data-mode");
const bodyEl = window.document.querySelector("body");
if (mode === "dark") {
bodyEl.classList.add("quarto-dark");
bodyEl.classList.remove("quarto-light");
} else {
bodyEl.classList.add("quarto-light");
bodyEl.classList.remove("quarto-dark");
}
}
const toggleBodyColorPrimary = () => {
const bsSheetEl = window.document.querySelector("link#quarto-bootstrap");
if (bsSheetEl) {
toggleBodyColorMode(bsSheetEl);
}
}
toggleBodyColorPrimary();
const icon = "";
const anchorJS = new window.AnchorJS();
anchorJS.options = {
placement: 'right',
icon: icon
};
anchorJS.add('.anchored');
const clipboard = new window.ClipboardJS('.code-copy-button', {
target: function(trigger) {
return trigger.previousElementSibling;
}
});
clipboard.on('success', function(e) {
// button target
const button = e.trigger;
// don't keep focus
button.blur();
// flash "checked"
button.classList.add('code-copy-button-checked');
var currentTitle = button.getAttribute("title");
button.setAttribute("title", "Copied!");
setTimeout(function() {
button.setAttribute("title", currentTitle);
button.classList.remove('code-copy-button-checked');
}, 1000);
// clear code selection
e.clearSelection();
});
function tippyHover(el, contentFn) {
const config = {
allowHTML: true,
content: contentFn,
maxWidth: 500,
delay: 100,
arrow: false,
appendTo: function(el) {
return el.parentElement;
},
interactive: true,
interactiveBorder: 10,
theme: 'quarto',
placement: 'bottom-start'
};
window.tippy(el, config);
}
const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
for (var i=0; i<noterefs.length; i++) {
const ref = noterefs[i];
tippyHover(ref, function() {
// use id or data attribute instead here
let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
try { href = new URL(href).hash; } catch {}
const id = href.replace(/^#\/?/, "");
const note = window.document.getElementById(id);
return note.innerHTML;
});
}
var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
for (var i=0; i<bibliorefs.length; i++) {
const ref = bibliorefs[i];
const cites = ref.parentNode.getAttribute('data-cites').split(' ');
tippyHover(ref, function() {
var popup = window.document.createElement('div');
cites.forEach(function(cite) {
var citeDiv = window.document.createElement('div');
citeDiv.classList.add('hanging-indent');
citeDiv.classList.add('csl-entry');
var biblioDiv = window.document.getElementById('ref-' + cite);
if (biblioDiv) {
citeDiv.innerHTML = biblioDiv.innerHTML;
}
popup.appendChild(citeDiv);
});
return popup.innerHTML;
});
}
});
</script>
</div> <!-- /content -->
</body></html>