start qmd doc for this:

This commit is contained in:
2022-09-03 23:50:30 -07:00
parent 601bfc9411
commit f9340fd7aa
14 changed files with 3039 additions and 6 deletions

301
R/ch2.html Normal file
View File

@@ -0,0 +1,301 @@
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
<meta charset="utf-8">
<meta name="generator" content="quarto-1.1.179">
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
<meta name="author" content="Emanuel Rodriguez">
<title>Chapter 2 Notes</title>
<style>
code{white-space: pre-wrap;}
span.smallcaps{font-variant: small-caps;}
div.columns{display: flex; gap: min(4vw, 1.5em);}
div.column{flex: auto; overflow-x: auto;}
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
ul.task-list{list-style: none;}
ul.task-list li input[type="checkbox"] {
width: 0.8em;
margin: 0 0.8em 0.2em -1.6em;
vertical-align: middle;
}
pre > code.sourceCode { white-space: pre; position: relative; }
pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
pre > code.sourceCode > span:empty { height: 1.2em; }
.sourceCode { overflow: visible; }
code.sourceCode > span { color: inherit; text-decoration: inherit; }
div.sourceCode { margin: 1em 0; }
pre.sourceCode { margin: 0; }
@media screen {
div.sourceCode { overflow: auto; }
}
@media print {
pre > code.sourceCode { white-space: pre-wrap; }
pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
}
pre.numberSource code
{ counter-reset: source-line 0; }
pre.numberSource code > span
{ position: relative; left: -4em; counter-increment: source-line; }
pre.numberSource code > span > a:first-child::before
{ content: counter(source-line);
position: relative; left: -1em; text-align: right; vertical-align: baseline;
border: none; display: inline-block;
-webkit-touch-callout: none; -webkit-user-select: none;
-khtml-user-select: none; -moz-user-select: none;
-ms-user-select: none; user-select: none;
padding: 0 4px; width: 4em;
color: #aaaaaa;
}
pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa; padding-left: 4px; }
div.sourceCode
{ }
@media screen {
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
}
code span.al { color: #ff0000; font-weight: bold; } /* Alert */
code span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
code span.at { color: #7d9029; } /* Attribute */
code span.bn { color: #40a070; } /* BaseN */
code span.bu { color: #008000; } /* BuiltIn */
code span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
code span.ch { color: #4070a0; } /* Char */
code span.cn { color: #880000; } /* Constant */
code span.co { color: #60a0b0; font-style: italic; } /* Comment */
code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
code span.do { color: #ba2121; font-style: italic; } /* Documentation */
code span.dt { color: #902000; } /* DataType */
code span.dv { color: #40a070; } /* DecVal */
code span.er { color: #ff0000; font-weight: bold; } /* Error */
code span.ex { } /* Extension */
code span.fl { color: #40a070; } /* Float */
code span.fu { color: #06287e; } /* Function */
code span.im { color: #008000; font-weight: bold; } /* Import */
code span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
code span.kw { color: #007020; font-weight: bold; } /* Keyword */
code span.op { color: #666666; } /* Operator */
code span.ot { color: #007020; } /* Other */
code span.pp { color: #bc7a00; } /* Preprocessor */
code span.sc { color: #4070a0; } /* SpecialChar */
code span.ss { color: #bb6688; } /* SpecialString */
code span.st { color: #4070a0; } /* String */
code span.va { color: #19177c; } /* Variable */
code span.vs { color: #4070a0; } /* VerbatimString */
code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
</style>
<script src="ch2_files/libs/clipboard/clipboard.min.js"></script>
<script src="ch2_files/libs/quarto-html/quarto.js"></script>
<script src="ch2_files/libs/quarto-html/popper.min.js"></script>
<script src="ch2_files/libs/quarto-html/tippy.umd.min.js"></script>
<script src="ch2_files/libs/quarto-html/anchor.min.js"></script>
<link href="ch2_files/libs/quarto-html/tippy.css" rel="stylesheet">
<link href="ch2_files/libs/quarto-html/quarto-syntax-highlighting.css" rel="stylesheet" id="quarto-text-highlighting-styles">
<script src="ch2_files/libs/bootstrap/bootstrap.min.js"></script>
<link href="ch2_files/libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
<link href="ch2_files/libs/bootstrap/bootstrap.min.css" rel="stylesheet" id="quarto-bootstrap" data-mode="light">
<script src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-chtml-full.js" type="text/javascript"></script>
</head>
<body class="fullcontent">
<div id="quarto-content" class="page-columns page-rows-contents page-layout-article">
<main class="content" id="quarto-document-content">
<header id="title-block-header" class="quarto-title-block default">
<div class="quarto-title">
<h1 class="title">Chapter 2 Notes</h1>
</div>
<div class="quarto-title-meta">
<div>
<div class="quarto-title-meta-heading">Author</div>
<div class="quarto-title-meta-contents">
<p>Emanuel Rodriguez </p>
</div>
</div>
</div>
</header>
<p>In this chapter we step through an example of “fake” vs “real” news to build a framework to determine the probability of real vs fake of a new news article titled “The President has a secret!”</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb1"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="co"># libraries</span></span>
<span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(bayesrules)</span>
<span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(dplyr)</span>
<span id="cb1-4"><a href="#cb1-4" aria-hidden="true" tabindex="-1"></a><span class="fu">data</span>(fake_news)</span>
<span id="cb1-5"><a href="#cb1-5" aria-hidden="true" tabindex="-1"></a>fake_news <span class="ot">&lt;-</span> tibble<span class="sc">::</span><span class="fu">as_tibble</span>(fake_news)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>What is the proportion of news articles that were labeled fake vs real.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb2"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a>fake_news <span class="sc">|&gt;</span> <span class="fu">glimpse</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code>Rows: 150
Columns: 30
$ title &lt;chr&gt; "Clinton's Exploited Haiti Earthquake to Stea…
$ text &lt;chr&gt; "0 SHARES Facebook Twitter\n\nBernard Sansaric…
$ url &lt;chr&gt; "http://freedomdaily.com/former-haitian-senate…
$ authors &lt;chr&gt; NA, NA, "Sierra Marlee", "Jack Shafer,Nolan D"…
$ type &lt;fct&gt; fake, real, fake, real, fake, real, fake, fake…
$ title_words &lt;int&gt; 17, 18, 16, 11, 9, 12, 11, 18, 10, 13, 10, 11,…
$ text_words &lt;int&gt; 219, 509, 494, 268, 479, 220, 184, 500, 677, 4…
$ title_char &lt;int&gt; 110, 95, 96, 60, 54, 66, 86, 104, 66, 81, 59, …
$ text_char &lt;int&gt; 1444, 3016, 2881, 1674, 2813, 1351, 1128, 3112…
$ title_caps &lt;int&gt; 0, 0, 1, 0, 0, 1, 0, 2, 1, 1, 0, 1, 0, 0, 0, 0…
$ text_caps &lt;int&gt; 1, 1, 3, 3, 0, 0, 0, 12, 12, 1, 2, 5, 1, 1, 6,…
$ title_caps_percent &lt;dbl&gt; 0.000000, 0.000000, 6.250000, 0.000000, 0.0000…
$ text_caps_percent &lt;dbl&gt; 0.4566210, 0.1964637, 0.6072874, 1.1194030, 0.…
$ title_excl &lt;int&gt; 0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0…
$ text_excl &lt;int&gt; 0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, 0…
$ title_excl_percent &lt;dbl&gt; 0.0000000, 0.0000000, 2.0833333, 0.0000000, 0.…
$ text_excl_percent &lt;dbl&gt; 0.00000000, 0.00000000, 0.06942034, 0.00000000…
$ title_has_excl &lt;lgl&gt; FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE…
$ anger &lt;dbl&gt; 4.24, 2.28, 1.18, 4.66, 0.82, 1.29, 2.56, 3.47…
$ anticipation &lt;dbl&gt; 2.12, 1.71, 2.16, 1.79, 1.23, 0.43, 2.05, 1.74…
$ disgust &lt;dbl&gt; 2.54, 1.90, 0.98, 1.79, 0.41, 1.72, 2.05, 1.35…
$ fear &lt;dbl&gt; 3.81, 1.90, 1.57, 4.30, 0.82, 0.43, 5.13, 4.25…
$ joy &lt;dbl&gt; 1.27, 1.71, 1.96, 0.36, 1.23, 0.86, 1.54, 1.35…
$ sadness &lt;dbl&gt; 4.66, 1.33, 0.78, 1.79, 0.82, 0.86, 2.05, 1.93…
$ surprise &lt;dbl&gt; 2.12, 1.14, 1.18, 1.79, 0.82, 0.86, 1.03, 1.35…
$ trust &lt;dbl&gt; 2.97, 4.17, 3.73, 2.51, 2.46, 2.16, 5.13, 3.86…
$ negative &lt;dbl&gt; 8.47, 4.74, 3.33, 6.09, 2.66, 3.02, 4.10, 4.63…
$ positive &lt;dbl&gt; 3.81, 4.93, 5.49, 2.15, 4.30, 2.16, 4.10, 4.25…
$ text_syllables &lt;int&gt; 395, 845, 806, 461, 761, 376, 326, 891, 1133, …
$ text_syllables_per_word &lt;dbl&gt; 1.803653, 1.660118, 1.631579, 1.720149, 1.5887…</code></pre>
</div>
<div class="sourceCode cell-code" id="cb4"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a>fake_news <span class="sc">|&gt;</span></span>
<span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(type) <span class="sc">|&gt;</span> </span>
<span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">summarise</span>(</span>
<span id="cb4-4"><a href="#cb4-4" aria-hidden="true" tabindex="-1"></a> <span class="at">total =</span> <span class="fu">n</span>(),</span>
<span id="cb4-5"><a href="#cb4-5" aria-hidden="true" tabindex="-1"></a> <span class="at">prop =</span> total <span class="sc">/</span> <span class="fu">nrow</span>(fake_news)</span>
<span id="cb4-6"><a href="#cb4-6" aria-hidden="true" tabindex="-1"></a> ) </span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 2 × 3
type total prop
&lt;fct&gt; &lt;int&gt; &lt;dbl&gt;
1 fake 60 0.4
2 real 90 0.6</code></pre>
</div>
</div>
<p>If we let <span class="math inline">\(B\)</span> be the event that a news article is “fake” news, and <span class="math inline">\(B^c\)</span> be the event that a news article is “real”, we can write the following:</p>
<p><span class="math display">\[P(B) = .4\]</span> <span class="math display">\[P(B^c) = .6\]</span></p>
<p>This is the first “clue” or set of data that we have to build into our framework. Namely, majority of articles are “real”, therefore we could simply predict that the new article is “real”. This updated sense or reality now becomes our priors.</p>
<p>Getting additional data, and updating our priors, based on additional data.</p>
</main>
<!-- /main column -->
<script id="quarto-html-after-body" type="application/javascript">
window.document.addEventListener("DOMContentLoaded", function (event) {
const toggleBodyColorMode = (bsSheetEl) => {
const mode = bsSheetEl.getAttribute("data-mode");
const bodyEl = window.document.querySelector("body");
if (mode === "dark") {
bodyEl.classList.add("quarto-dark");
bodyEl.classList.remove("quarto-light");
} else {
bodyEl.classList.add("quarto-light");
bodyEl.classList.remove("quarto-dark");
}
}
const toggleBodyColorPrimary = () => {
const bsSheetEl = window.document.querySelector("link#quarto-bootstrap");
if (bsSheetEl) {
toggleBodyColorMode(bsSheetEl);
}
}
toggleBodyColorPrimary();
const icon = "";
const anchorJS = new window.AnchorJS();
anchorJS.options = {
placement: 'right',
icon: icon
};
anchorJS.add('.anchored');
const clipboard = new window.ClipboardJS('.code-copy-button', {
target: function(trigger) {
return trigger.previousElementSibling;
}
});
clipboard.on('success', function(e) {
// button target
const button = e.trigger;
// don't keep focus
button.blur();
// flash "checked"
button.classList.add('code-copy-button-checked');
var currentTitle = button.getAttribute("title");
button.setAttribute("title", "Copied!");
setTimeout(function() {
button.setAttribute("title", currentTitle);
button.classList.remove('code-copy-button-checked');
}, 1000);
// clear code selection
e.clearSelection();
});
function tippyHover(el, contentFn) {
const config = {
allowHTML: true,
content: contentFn,
maxWidth: 500,
delay: 100,
arrow: false,
appendTo: function(el) {
return el.parentElement;
},
interactive: true,
interactiveBorder: 10,
theme: 'quarto',
placement: 'bottom-start'
};
window.tippy(el, config);
}
const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
for (var i=0; i<noterefs.length; i++) {
const ref = noterefs[i];
tippyHover(ref, function() {
// use id or data attribute instead here
let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
try { href = new URL(href).hash; } catch {}
const id = href.replace(/^#\/?/, "");
const note = window.document.getElementById(id);
return note.innerHTML;
});
}
var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
for (var i=0; i<bibliorefs.length; i++) {
const ref = bibliorefs[i];
const cites = ref.parentNode.getAttribute('data-cites').split(' ');
tippyHover(ref, function() {
var popup = window.document.createElement('div');
cites.forEach(function(cite) {
var citeDiv = window.document.createElement('div');
citeDiv.classList.add('hanging-indent');
citeDiv.classList.add('csl-entry');
var biblioDiv = window.document.getElementById('ref-' + cite);
if (biblioDiv) {
citeDiv.innerHTML = biblioDiv.innerHTML;
}
popup.appendChild(citeDiv);
});
return popup.innerHTML;
});
}
});
</script>
</div> <!-- /content -->
</body></html>