bayes-rules-notes/R/ch2.html

896 lines
34 KiB
HTML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
<meta charset="utf-8">
<meta name="generator" content="quarto-1.1.189">
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
<meta name="author" content="Emanuel Rodriguez">
<title>Chapter 2 Notes</title>
<style>
code{white-space: pre-wrap;}
span.smallcaps{font-variant: small-caps;}
div.columns{display: flex; gap: min(4vw, 1.5em);}
div.column{flex: auto; overflow-x: auto;}
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
ul.task-list{list-style: none;}
ul.task-list li input[type="checkbox"] {
width: 0.8em;
margin: 0 0.8em 0.2em -1.6em;
vertical-align: middle;
}
pre > code.sourceCode { white-space: pre; position: relative; }
pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
pre > code.sourceCode > span:empty { height: 1.2em; }
.sourceCode { overflow: visible; }
code.sourceCode > span { color: inherit; text-decoration: inherit; }
div.sourceCode { margin: 1em 0; }
pre.sourceCode { margin: 0; }
@media screen {
div.sourceCode { overflow: auto; }
}
@media print {
pre > code.sourceCode { white-space: pre-wrap; }
pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
}
pre.numberSource code
{ counter-reset: source-line 0; }
pre.numberSource code > span
{ position: relative; left: -4em; counter-increment: source-line; }
pre.numberSource code > span > a:first-child::before
{ content: counter(source-line);
position: relative; left: -1em; text-align: right; vertical-align: baseline;
border: none; display: inline-block;
-webkit-touch-callout: none; -webkit-user-select: none;
-khtml-user-select: none; -moz-user-select: none;
-ms-user-select: none; user-select: none;
padding: 0 4px; width: 4em;
color: #aaaaaa;
}
pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa; padding-left: 4px; }
div.sourceCode
{ }
@media screen {
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
}
code span.al { color: #ff0000; font-weight: bold; } /* Alert */
code span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
code span.at { color: #7d9029; } /* Attribute */
code span.bn { color: #40a070; } /* BaseN */
code span.bu { color: #008000; } /* BuiltIn */
code span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
code span.ch { color: #4070a0; } /* Char */
code span.cn { color: #880000; } /* Constant */
code span.co { color: #60a0b0; font-style: italic; } /* Comment */
code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
code span.do { color: #ba2121; font-style: italic; } /* Documentation */
code span.dt { color: #902000; } /* DataType */
code span.dv { color: #40a070; } /* DecVal */
code span.er { color: #ff0000; font-weight: bold; } /* Error */
code span.ex { } /* Extension */
code span.fl { color: #40a070; } /* Float */
code span.fu { color: #06287e; } /* Function */
code span.im { color: #008000; font-weight: bold; } /* Import */
code span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
code span.kw { color: #007020; font-weight: bold; } /* Keyword */
code span.op { color: #666666; } /* Operator */
code span.ot { color: #007020; } /* Other */
code span.pp { color: #bc7a00; } /* Preprocessor */
code span.sc { color: #4070a0; } /* SpecialChar */
code span.ss { color: #bb6688; } /* SpecialString */
code span.st { color: #4070a0; } /* String */
code span.va { color: #19177c; } /* Variable */
code span.vs { color: #4070a0; } /* VerbatimString */
code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
</style>
<script src="ch2_files/libs/clipboard/clipboard.min.js"></script>
<script src="ch2_files/libs/quarto-html/quarto.js"></script>
<script src="ch2_files/libs/quarto-html/popper.min.js"></script>
<script src="ch2_files/libs/quarto-html/tippy.umd.min.js"></script>
<script src="ch2_files/libs/quarto-html/anchor.min.js"></script>
<link href="ch2_files/libs/quarto-html/tippy.css" rel="stylesheet">
<link href="ch2_files/libs/quarto-html/quarto-syntax-highlighting.css" rel="stylesheet" id="quarto-text-highlighting-styles">
<script src="ch2_files/libs/bootstrap/bootstrap.min.js"></script>
<link href="ch2_files/libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
<link href="ch2_files/libs/bootstrap/bootstrap.min.css" rel="stylesheet" id="quarto-bootstrap" data-mode="light">
<script src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-chtml-full.js" type="text/javascript"></script>
<link rel="stylesheet" href="styles.css">
</head>
<body class="fullcontent">
<div id="quarto-content" class="page-columns page-rows-contents page-layout-article">
<main class="content" id="quarto-document-content">
<header id="title-block-header" class="quarto-title-block default">
<div class="quarto-title">
<h1 class="title">Chapter 2 Notes</h1>
</div>
<div class="quarto-title-meta">
<div>
<div class="quarto-title-meta-heading">Author</div>
<div class="quarto-title-meta-contents">
<p>Emanuel Rodriguez </p>
</div>
</div>
</div>
</header>
<p>In this chapter we step through an example of “fake” vs “real” news to build a framework to determine the probability of real vs fake of a new news article titled “The President has a secret!”</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb1"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="co"># libraries</span></span>
<span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(bayesrules)</span>
<span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(dplyr)</span>
<span id="cb1-4"><a href="#cb1-4" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(tidyr)</span>
<span id="cb1-5"><a href="#cb1-5" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(gt)</span>
<span id="cb1-6"><a href="#cb1-6" aria-hidden="true" tabindex="-1"></a><span class="fu">data</span>(fake_news)</span>
<span id="cb1-7"><a href="#cb1-7" aria-hidden="true" tabindex="-1"></a>fake_news <span class="ot">&lt;-</span> tibble<span class="sc">::</span><span class="fu">as_tibble</span>(fake_news)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>What is the proportion of news articles that were labeled fake vs real.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb2"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a>fake_news <span class="sc">|&gt;</span> <span class="fu">glimpse</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code>Rows: 150
Columns: 30
$ title &lt;chr&gt; "Clinton's Exploited Haiti Earthquake to Stea…
$ text &lt;chr&gt; "0 SHARES Facebook Twitter\n\nBernard Sansaric…
$ url &lt;chr&gt; "http://freedomdaily.com/former-haitian-senate…
$ authors &lt;chr&gt; NA, NA, "Sierra Marlee", "Jack Shafer,Nolan D"…
$ type &lt;fct&gt; fake, real, fake, real, fake, real, fake, fake…
$ title_words &lt;int&gt; 17, 18, 16, 11, 9, 12, 11, 18, 10, 13, 10, 11,…
$ text_words &lt;int&gt; 219, 509, 494, 268, 479, 220, 184, 500, 677, 4…
$ title_char &lt;int&gt; 110, 95, 96, 60, 54, 66, 86, 104, 66, 81, 59, …
$ text_char &lt;int&gt; 1444, 3016, 2881, 1674, 2813, 1351, 1128, 3112…
$ title_caps &lt;int&gt; 0, 0, 1, 0, 0, 1, 0, 2, 1, 1, 0, 1, 0, 0, 0, 0…
$ text_caps &lt;int&gt; 1, 1, 3, 3, 0, 0, 0, 12, 12, 1, 2, 5, 1, 1, 6,…
$ title_caps_percent &lt;dbl&gt; 0.000000, 0.000000, 6.250000, 0.000000, 0.0000…
$ text_caps_percent &lt;dbl&gt; 0.4566210, 0.1964637, 0.6072874, 1.1194030, 0.…
$ title_excl &lt;int&gt; 0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0…
$ text_excl &lt;int&gt; 0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, 0…
$ title_excl_percent &lt;dbl&gt; 0.0000000, 0.0000000, 2.0833333, 0.0000000, 0.…
$ text_excl_percent &lt;dbl&gt; 0.00000000, 0.00000000, 0.06942034, 0.00000000…
$ title_has_excl &lt;lgl&gt; FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE…
$ anger &lt;dbl&gt; 4.24, 2.28, 1.18, 4.66, 0.82, 1.29, 2.56, 3.47…
$ anticipation &lt;dbl&gt; 2.12, 1.71, 2.16, 1.79, 1.23, 0.43, 2.05, 1.74…
$ disgust &lt;dbl&gt; 2.54, 1.90, 0.98, 1.79, 0.41, 1.72, 2.05, 1.35…
$ fear &lt;dbl&gt; 3.81, 1.90, 1.57, 4.30, 0.82, 0.43, 5.13, 4.25…
$ joy &lt;dbl&gt; 1.27, 1.71, 1.96, 0.36, 1.23, 0.86, 1.54, 1.35…
$ sadness &lt;dbl&gt; 4.66, 1.33, 0.78, 1.79, 0.82, 0.86, 2.05, 1.93…
$ surprise &lt;dbl&gt; 2.12, 1.14, 1.18, 1.79, 0.82, 0.86, 1.03, 1.35…
$ trust &lt;dbl&gt; 2.97, 4.17, 3.73, 2.51, 2.46, 2.16, 5.13, 3.86…
$ negative &lt;dbl&gt; 8.47, 4.74, 3.33, 6.09, 2.66, 3.02, 4.10, 4.63…
$ positive &lt;dbl&gt; 3.81, 4.93, 5.49, 2.15, 4.30, 2.16, 4.10, 4.25…
$ text_syllables &lt;int&gt; 395, 845, 806, 461, 761, 376, 326, 891, 1133, …
$ text_syllables_per_word &lt;dbl&gt; 1.803653, 1.660118, 1.631579, 1.720149, 1.5887…</code></pre>
</div>
<div class="sourceCode cell-code" id="cb4"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a>fake_news <span class="sc">|&gt;</span></span>
<span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(type) <span class="sc">|&gt;</span> </span>
<span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">summarise</span>(</span>
<span id="cb4-4"><a href="#cb4-4" aria-hidden="true" tabindex="-1"></a> <span class="at">total =</span> <span class="fu">n</span>(),</span>
<span id="cb4-5"><a href="#cb4-5" aria-hidden="true" tabindex="-1"></a> <span class="at">prop =</span> total <span class="sc">/</span> <span class="fu">nrow</span>(fake_news)</span>
<span id="cb4-6"><a href="#cb4-6" aria-hidden="true" tabindex="-1"></a> ) </span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 2 × 3
type total prop
&lt;fct&gt; &lt;int&gt; &lt;dbl&gt;
1 fake 60 0.4
2 real 90 0.6</code></pre>
</div>
</div>
<p>If we let <span class="math inline">\(B\)</span> be the event that a news article is “fake” news, and <span class="math inline">\(B^c\)</span> be the event that a news article is “real”, we can write the following:</p>
<p><span class="math display">\[P(B) = .4\]</span> <span class="math display">\[P(B^c) = .6\]</span></p>
<p>This is the first “clue” or set of data that we have to build into our framework. Namely, majority of articles are “real”, therefore we could simply predict that the new article is “real”. This updated sense or reality now becomes our priors.</p>
<p>Getting additional data, and updating our priors, based on additional data. The new observation we make is the use of exclamation marks “!”. We note that the use of “!” is more frequent in news articles labeled as “fake”. We will want to incorporate this into our framework to decide whether the new incoming should be labelled as real or fake.</p>
<section id="likelihood" class="level3">
<h3 class="anchored" data-anchor-id="likelihood">Likelihood</h3>
<div class="callout-note callout callout-style-default no-icon callout-captioned">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon no-icon"></i>
</div>
<div class="callout-caption-container flex-fill">
Probability and Likelihood
</div>
</div>
<div class="callout-body-container callout-body">
<p>When the event <span class="math inline">\(B\)</span> is known, then we can evaluate the uncertainy of events <span class="math inline">\(A\)</span> and <span class="math inline">\(A^c\)</span> given <span class="math inline">\(B\)</span></p>
<p><span class="math display">\[P(A|B) \text{ vs } P(A^c|B)\]</span></p>
<p>If on the other hand, we know event <span class="math inline">\(A\)</span> then we can evaluate the relative compatability of data <span class="math inline">\(A\)</span> with <span class="math inline">\(B\)</span> and <span class="math inline">\(B^c\)</span> using likelihood functions</p>
<p><span class="math display">\[L(B|A) \text{ vs } L(B^c|A)\]</span> <span class="math display">\[=P(A|B) \text{ vs } P(A|B^c)\]</span></p>
</div>
</div>
<p>So in our case, we dont know whether this new incoming article is real or not, but we do know that the title has an exclamation mark. This means we can evaluate how likely this article is real or not given that it contains an “!” in the title using likelihood functions. We can formualte this as:</p>
<p><span class="math display">\[L(B|A) \text{ vs } L(B^c|A)\]</span></p>
<p>And perform the computation in R as follows:</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb6"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="co"># if fake, what are the proprotions of ! vs no-!</span></span>
<span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a>prop_of_excl_within_type <span class="ot">&lt;-</span> fake_news <span class="sc">|&gt;</span></span>
<span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(type, title_has_excl) <span class="sc">|&gt;</span></span>
<span id="cb6-4"><a href="#cb6-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">summarise</span>(</span>
<span id="cb6-5"><a href="#cb6-5" aria-hidden="true" tabindex="-1"></a> <span class="at">total =</span> <span class="fu">n</span>()</span>
<span id="cb6-6"><a href="#cb6-6" aria-hidden="true" tabindex="-1"></a> ) <span class="sc">|&gt;</span></span>
<span id="cb6-7"><a href="#cb6-7" aria-hidden="true" tabindex="-1"></a> <span class="fu">ungroup</span>() <span class="sc">|&gt;</span></span>
<span id="cb6-8"><a href="#cb6-8" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(type) <span class="sc">|&gt;</span></span>
<span id="cb6-9"><a href="#cb6-9" aria-hidden="true" tabindex="-1"></a> <span class="fu">summarise</span>(</span>
<span id="cb6-10"><a href="#cb6-10" aria-hidden="true" tabindex="-1"></a> <span class="at">has_excl =</span> title_has_excl,</span>
<span id="cb6-11"><a href="#cb6-11" aria-hidden="true" tabindex="-1"></a> <span class="at">prop_within_type =</span> total <span class="sc">/</span> <span class="fu">sum</span>(total)</span>
<span id="cb6-12"><a href="#cb6-12" aria-hidden="true" tabindex="-1"></a> ) </span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<div class="cell">
<div class="sourceCode cell-code" id="cb7"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a>prop_of_excl_within_type <span class="sc">|&gt;</span></span>
<span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">pivot_wider</span>(<span class="at">names_from =</span> <span class="st">"type"</span>, <span class="at">values_from =</span> prop_within_type) <span class="sc">|&gt;</span></span>
<span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">gt</span>() <span class="sc">|&gt;</span></span>
<span id="cb7-4"><a href="#cb7-4" aria-hidden="true" tabindex="-1"></a> gt<span class="sc">::</span><span class="fu">cols_label</span>(</span>
<span id="cb7-5"><a href="#cb7-5" aria-hidden="true" tabindex="-1"></a> <span class="at">has_excl =</span> <span class="st">"Contains Exclamtion"</span>,</span>
<span id="cb7-6"><a href="#cb7-6" aria-hidden="true" tabindex="-1"></a> <span class="at">fake =</span> <span class="st">"Fake"</span>, </span>
<span id="cb7-7"><a href="#cb7-7" aria-hidden="true" tabindex="-1"></a> <span class="at">real =</span> <span class="st">"Real"</span>) <span class="sc">|&gt;</span></span>
<span id="cb7-8"><a href="#cb7-8" aria-hidden="true" tabindex="-1"></a> gt<span class="sc">::</span><span class="fu">fmt_number</span>(<span class="at">columns=</span><span class="fu">c</span>(<span class="st">"fake"</span>, <span class="st">"real"</span>), <span class="at">decimals =</span> <span class="dv">3</span>) <span class="sc">|&gt;</span></span>
<span id="cb7-9"><a href="#cb7-9" aria-hidden="true" tabindex="-1"></a> gt<span class="sc">::</span><span class="fu">cols_width</span>(<span class="fu">everything</span>() <span class="sc">~</span> <span class="fu">px</span>(<span class="dv">100</span>))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<div id="jtimbozsld" style="overflow-x:auto;overflow-y:auto;width:auto;height:auto;">
<style>html {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Helvetica Neue', 'Fira Sans', 'Droid Sans', Arial, sans-serif;
}
#jtimbozsld .gt_table {
display: table;
border-collapse: collapse;
margin-left: auto;
margin-right: auto;
color: #333333;
font-size: 16px;
font-weight: normal;
font-style: normal;
background-color: #FFFFFF;
width: auto;
border-top-style: solid;
border-top-width: 2px;
border-top-color: #A8A8A8;
border-right-style: none;
border-right-width: 2px;
border-right-color: #D3D3D3;
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #A8A8A8;
border-left-style: none;
border-left-width: 2px;
border-left-color: #D3D3D3;
}
#jtimbozsld .gt_heading {
background-color: #FFFFFF;
text-align: center;
border-bottom-color: #FFFFFF;
border-left-style: none;
border-left-width: 1px;
border-left-color: #D3D3D3;
border-right-style: none;
border-right-width: 1px;
border-right-color: #D3D3D3;
}
#jtimbozsld .gt_title {
color: #333333;
font-size: 125%;
font-weight: initial;
padding-top: 4px;
padding-bottom: 4px;
padding-left: 5px;
padding-right: 5px;
border-bottom-color: #FFFFFF;
border-bottom-width: 0;
}
#jtimbozsld .gt_subtitle {
color: #333333;
font-size: 85%;
font-weight: initial;
padding-top: 0;
padding-bottom: 6px;
padding-left: 5px;
padding-right: 5px;
border-top-color: #FFFFFF;
border-top-width: 0;
}
#jtimbozsld .gt_bottom_border {
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
}
#jtimbozsld .gt_col_headings {
border-top-style: solid;
border-top-width: 2px;
border-top-color: #D3D3D3;
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
border-left-style: none;
border-left-width: 1px;
border-left-color: #D3D3D3;
border-right-style: none;
border-right-width: 1px;
border-right-color: #D3D3D3;
}
#jtimbozsld .gt_col_heading {
color: #333333;
background-color: #FFFFFF;
font-size: 100%;
font-weight: normal;
text-transform: inherit;
border-left-style: none;
border-left-width: 1px;
border-left-color: #D3D3D3;
border-right-style: none;
border-right-width: 1px;
border-right-color: #D3D3D3;
vertical-align: bottom;
padding-top: 5px;
padding-bottom: 6px;
padding-left: 5px;
padding-right: 5px;
overflow-x: hidden;
}
#jtimbozsld .gt_column_spanner_outer {
color: #333333;
background-color: #FFFFFF;
font-size: 100%;
font-weight: normal;
text-transform: inherit;
padding-top: 0;
padding-bottom: 0;
padding-left: 4px;
padding-right: 4px;
}
#jtimbozsld .gt_column_spanner_outer:first-child {
padding-left: 0;
}
#jtimbozsld .gt_column_spanner_outer:last-child {
padding-right: 0;
}
#jtimbozsld .gt_column_spanner {
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
vertical-align: bottom;
padding-top: 5px;
padding-bottom: 5px;
overflow-x: hidden;
display: inline-block;
width: 100%;
}
#jtimbozsld .gt_group_heading {
padding-top: 8px;
padding-bottom: 8px;
padding-left: 5px;
padding-right: 5px;
color: #333333;
background-color: #FFFFFF;
font-size: 100%;
font-weight: initial;
text-transform: inherit;
border-top-style: solid;
border-top-width: 2px;
border-top-color: #D3D3D3;
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
border-left-style: none;
border-left-width: 1px;
border-left-color: #D3D3D3;
border-right-style: none;
border-right-width: 1px;
border-right-color: #D3D3D3;
vertical-align: middle;
}
#jtimbozsld .gt_empty_group_heading {
padding: 0.5px;
color: #333333;
background-color: #FFFFFF;
font-size: 100%;
font-weight: initial;
border-top-style: solid;
border-top-width: 2px;
border-top-color: #D3D3D3;
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
vertical-align: middle;
}
#jtimbozsld .gt_from_md > :first-child {
margin-top: 0;
}
#jtimbozsld .gt_from_md > :last-child {
margin-bottom: 0;
}
#jtimbozsld .gt_row {
padding-top: 8px;
padding-bottom: 8px;
padding-left: 5px;
padding-right: 5px;
margin: 10px;
border-top-style: solid;
border-top-width: 1px;
border-top-color: #D3D3D3;
border-left-style: none;
border-left-width: 1px;
border-left-color: #D3D3D3;
border-right-style: none;
border-right-width: 1px;
border-right-color: #D3D3D3;
vertical-align: middle;
overflow-x: hidden;
}
#jtimbozsld .gt_stub {
color: #333333;
background-color: #FFFFFF;
font-size: 100%;
font-weight: initial;
text-transform: inherit;
border-right-style: solid;
border-right-width: 2px;
border-right-color: #D3D3D3;
padding-left: 5px;
padding-right: 5px;
}
#jtimbozsld .gt_stub_row_group {
color: #333333;
background-color: #FFFFFF;
font-size: 100%;
font-weight: initial;
text-transform: inherit;
border-right-style: solid;
border-right-width: 2px;
border-right-color: #D3D3D3;
padding-left: 5px;
padding-right: 5px;
vertical-align: top;
}
#jtimbozsld .gt_row_group_first td {
border-top-width: 2px;
}
#jtimbozsld .gt_summary_row {
color: #333333;
background-color: #FFFFFF;
text-transform: inherit;
padding-top: 8px;
padding-bottom: 8px;
padding-left: 5px;
padding-right: 5px;
}
#jtimbozsld .gt_first_summary_row {
border-top-style: solid;
border-top-color: #D3D3D3;
}
#jtimbozsld .gt_first_summary_row.thick {
border-top-width: 2px;
}
#jtimbozsld .gt_last_summary_row {
padding-top: 8px;
padding-bottom: 8px;
padding-left: 5px;
padding-right: 5px;
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
}
#jtimbozsld .gt_grand_summary_row {
color: #333333;
background-color: #FFFFFF;
text-transform: inherit;
padding-top: 8px;
padding-bottom: 8px;
padding-left: 5px;
padding-right: 5px;
}
#jtimbozsld .gt_first_grand_summary_row {
padding-top: 8px;
padding-bottom: 8px;
padding-left: 5px;
padding-right: 5px;
border-top-style: double;
border-top-width: 6px;
border-top-color: #D3D3D3;
}
#jtimbozsld .gt_striped {
background-color: rgba(128, 128, 128, 0.05);
}
#jtimbozsld .gt_table_body {
border-top-style: solid;
border-top-width: 2px;
border-top-color: #D3D3D3;
border-bottom-style: solid;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
}
#jtimbozsld .gt_footnotes {
color: #333333;
background-color: #FFFFFF;
border-bottom-style: none;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
border-left-style: none;
border-left-width: 2px;
border-left-color: #D3D3D3;
border-right-style: none;
border-right-width: 2px;
border-right-color: #D3D3D3;
}
#jtimbozsld .gt_footnote {
margin: 0px;
font-size: 90%;
padding-left: 4px;
padding-right: 4px;
padding-left: 5px;
padding-right: 5px;
}
#jtimbozsld .gt_sourcenotes {
color: #333333;
background-color: #FFFFFF;
border-bottom-style: none;
border-bottom-width: 2px;
border-bottom-color: #D3D3D3;
border-left-style: none;
border-left-width: 2px;
border-left-color: #D3D3D3;
border-right-style: none;
border-right-width: 2px;
border-right-color: #D3D3D3;
}
#jtimbozsld .gt_sourcenote {
font-size: 90%;
padding-top: 4px;
padding-bottom: 4px;
padding-left: 5px;
padding-right: 5px;
}
#jtimbozsld .gt_left {
text-align: left;
}
#jtimbozsld .gt_center {
text-align: center;
}
#jtimbozsld .gt_right {
text-align: right;
font-variant-numeric: tabular-nums;
}
#jtimbozsld .gt_font_normal {
font-weight: normal;
}
#jtimbozsld .gt_font_bold {
font-weight: bold;
}
#jtimbozsld .gt_font_italic {
font-style: italic;
}
#jtimbozsld .gt_super {
font-size: 65%;
}
#jtimbozsld .gt_footnote_marks {
font-style: italic;
font-weight: normal;
font-size: 75%;
vertical-align: 0.4em;
}
#jtimbozsld .gt_asterisk {
font-size: 100%;
vertical-align: 0;
}
#jtimbozsld .gt_indent_1 {
text-indent: 5px;
}
#jtimbozsld .gt_indent_2 {
text-indent: 10px;
}
#jtimbozsld .gt_indent_3 {
text-indent: 15px;
}
#jtimbozsld .gt_indent_4 {
text-indent: 20px;
}
#jtimbozsld .gt_indent_5 {
text-indent: 25px;
}
</style>
<table class="gt_table" style="table-layout: fixed;; width: 0px">
<colgroup>
<col style="width:100px;">
<col style="width:100px;">
<col style="width:100px;">
</colgroup>
<thead class="gt_col_headings">
<tr>
<th class="gt_col_heading gt_columns_bottom_border gt_center" rowspan="1" colspan="1" scope="col">Contains Exclamtion</th>
<th class="gt_col_heading gt_columns_bottom_border gt_right" rowspan="1" colspan="1" scope="col">Fake</th>
<th class="gt_col_heading gt_columns_bottom_border gt_right" rowspan="1" colspan="1" scope="col">Real</th>
</tr>
</thead>
<tbody class="gt_table_body">
<tr><td class="gt_row gt_center">FALSE</td>
<td class="gt_row gt_right">0.733</td>
<td class="gt_row gt_right">0.978</td></tr>
<tr><td class="gt_row gt_center">TRUE</td>
<td class="gt_row gt_right">0.267</td>
<td class="gt_row gt_right">0.022</td></tr>
</tbody>
</table>
</div>
</div>
</div>
<p>The table above also shows the likelihoods for the case when an article does not contain exclamation point in the title as well. Its really important to note that these are likelihoods, and its not the case that <span class="math inline">\(L(B|A) + L(B^c|A) = 1\)</span> as a matter of fact this value evaluates to a number less than one. However, since we have that <span class="math inline">\(L(B|A) = .267\)</span> and <span class="math inline">\(L(B^c|A) = .022\)</span> then we have gained additional knowledge in knowing the use of “!” in a title is more compatible with a fake news article than a real one.</p>
<p>Up to this point we can summarize our framework as follows</p>
<table class="table">
<thead>
<tr class="header">
<th>event</th>
<th><span class="math inline">\(B\)</span></th>
<th><span class="math inline">\(B^c\)</span></th>
<th>Total</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td>prior</td>
<td>.4</td>
<td>.6</td>
<td>1</td>
</tr>
<tr class="even">
<td>likelihood</td>
<td>.267</td>
<td>.022</td>
<td>.289</td>
</tr>
</tbody>
</table>
<p>Our next goal is come up with normalizing factors in order to build our probability table:</p>
<table class="table">
<thead>
<tr class="header">
<th></th>
<th><span class="math inline">\(B\)</span></th>
<th><span class="math inline">\(B^c\)</span></th>
<th>Total</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td><span class="math inline">\(A\)</span></td>
<td>(1)</td>
<td>(2)</td>
<td></td>
</tr>
<tr class="even">
<td><span class="math inline">\(A^c\)</span></td>
<td>(3)</td>
<td>(4)</td>
<td></td>
</tr>
<tr class="odd">
<td>Total</td>
<td>.4</td>
<td>.6</td>
<td>1</td>
</tr>
</tbody>
</table>
<p>A couple things to note about our table (1) + (3) = .4 and (2) + (4) = .6. (1) + (2) + (3) + (4) = 1.</p>
<p>(1.) <span class="math inline">\(P(A \cap B) = P(A|B)P(B)\)</span> we know the likelihood of <span class="math inline">\(L(B|A) = P(A|B)\)</span> and we also know the prior so we insert these to get <span class="math display">\[ P(A \cap B) = P(A|B)P(B) = .267 \times .4 = .1068\]</span></p>
<p>(3.) <span class="math inline">\(P(A^c \cap B) = P(A^c|B)P(B)\)</span> in this case we do know the prior <span class="math inline">\(P(B) = .4\)</span>, but we dont directly know the value of <span class="math inline">\(P(A^c|B)\)</span>, however, we note that <span class="math inline">\(P(A|B) + P(A^c|B) = 1\)</span>, therefore we compute <span class="math inline">\(P(A^c|B) = 1 - P(A|B) = 1 - .267 = .733\)</span> <span class="math display">\[ P(A^c \cap B) = P(A^c|B)P(B) = .733 \times .4 = .2932\]</span></p>
<p>we now can confirm that <span class="math inline">\(.1068 + .2932 = .4\)</span></p>
<p>Moving on to (2), (4)</p>
<p>(2.) <span class="math inline">\(P(A \cap B^c) = P(A|B^c)P(B^c)\)</span>. In this case know the likelihood <span class="math inline">\(L(B^c|A) = P(A|B^c)\)</span> and we know the prior <span class="math inline">\(P(B^c)\)</span> therefore, <span class="math display">\[P(A \cap B^c) = P(A|B^c)P(B^c) = .022 \times .6 = .0132\]</span></p>
<p>(4.) <span class="math inline">\(P(A^c \cap B^c) = P(A^c|B^c)P(B^c) = (1 - .022) \times .6 = .5868\)</span></p>
<p>and can confirm that <span class="math inline">\(.0132 + .5868 = .6\)</span></p>
<p>and we can fill the rest of the table:</p>
<table class="table">
<thead>
<tr class="header">
<th></th>
<th><span class="math inline">\(B\)</span></th>
<th><span class="math inline">\(B^c\)</span></th>
<th>Total</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td><span class="math inline">\(A\)</span></td>
<td>.1068</td>
<td>.0132</td>
<td>.12</td>
</tr>
<tr class="even">
<td><span class="math inline">\(A^c\)</span></td>
<td>.2932</td>
<td>.5868</td>
<td>.88</td>
</tr>
<tr class="odd">
<td>Total</td>
<td>.4</td>
<td>.6</td>
<td>1</td>
</tr>
</tbody>
</table>
<p>An important concept we implemented in above is the idea of <strong>total probability</strong></p>
<div class="callout-tip callout callout-style-default no-icon callout-captioned">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon no-icon"></i>
</div>
<div class="callout-caption-container flex-fill">
total probability
</div>
</div>
<div class="callout-body-container callout-body">
<p>The <strong>total probability</strong> of observing a real article is made up the sum of its parts. Namely</p>
<p><span class="math display">\[P(B^c) = P(A \cap B^c) + P(A^c \cap B^c)\]</span> <span class="math display">\[=P(A|B^c)P(B^c) + P(A^c|B^c)P(B^c)\]</span> <span class="math display">\[=.0132 + .5868 = .6\]</span></p>
</div>
</div>
</section>
</main>
<!-- /main column -->
<script id="quarto-html-after-body" type="application/javascript">
window.document.addEventListener("DOMContentLoaded", function (event) {
const toggleBodyColorMode = (bsSheetEl) => {
const mode = bsSheetEl.getAttribute("data-mode");
const bodyEl = window.document.querySelector("body");
if (mode === "dark") {
bodyEl.classList.add("quarto-dark");
bodyEl.classList.remove("quarto-light");
} else {
bodyEl.classList.add("quarto-light");
bodyEl.classList.remove("quarto-dark");
}
}
const toggleBodyColorPrimary = () => {
const bsSheetEl = window.document.querySelector("link#quarto-bootstrap");
if (bsSheetEl) {
toggleBodyColorMode(bsSheetEl);
}
}
toggleBodyColorPrimary();
const icon = "";
const anchorJS = new window.AnchorJS();
anchorJS.options = {
placement: 'right',
icon: icon
};
anchorJS.add('.anchored');
const clipboard = new window.ClipboardJS('.code-copy-button', {
target: function(trigger) {
return trigger.previousElementSibling;
}
});
clipboard.on('success', function(e) {
// button target
const button = e.trigger;
// don't keep focus
button.blur();
// flash "checked"
button.classList.add('code-copy-button-checked');
var currentTitle = button.getAttribute("title");
button.setAttribute("title", "Copied!");
setTimeout(function() {
button.setAttribute("title", currentTitle);
button.classList.remove('code-copy-button-checked');
}, 1000);
// clear code selection
e.clearSelection();
});
function tippyHover(el, contentFn) {
const config = {
allowHTML: true,
content: contentFn,
maxWidth: 500,
delay: 100,
arrow: false,
appendTo: function(el) {
return el.parentElement;
},
interactive: true,
interactiveBorder: 10,
theme: 'quarto',
placement: 'bottom-start'
};
window.tippy(el, config);
}
const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
for (var i=0; i<noterefs.length; i++) {
const ref = noterefs[i];
tippyHover(ref, function() {
// use id or data attribute instead here
let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
try { href = new URL(href).hash; } catch {}
const id = href.replace(/^#\/?/, "");
const note = window.document.getElementById(id);
return note.innerHTML;
});
}
var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
for (var i=0; i<bibliorefs.length; i++) {
const ref = bibliorefs[i];
const cites = ref.parentNode.getAttribute('data-cites').split(' ');
tippyHover(ref, function() {
var popup = window.document.createElement('div');
cites.forEach(function(cite) {
var citeDiv = window.document.createElement('div');
citeDiv.classList.add('hanging-indent');
citeDiv.classList.add('csl-entry');
var biblioDiv = window.document.getElementById('ref-' + cite);
if (biblioDiv) {
citeDiv.innerHTML = biblioDiv.innerHTML;
}
popup.appendChild(citeDiv);
});
return popup.innerHTML;
});
}
});
</script>
</div> <!-- /content -->
</body></html>