more notes
This commit is contained in:
parent
cac5ac9243
commit
721d1d83d0
154
R/ch2.html
154
R/ch2.html
|
@ -243,12 +243,12 @@ Probability and Likelihood
|
|||
<span id="cb7-9"><a href="#cb7-9" aria-hidden="true" tabindex="-1"></a> gt<span class="sc">::</span><span class="fu">cols_width</span>(<span class="fu">everything</span>() <span class="sc">~</span> <span class="fu">px</span>(<span class="dv">100</span>))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<div class="cell-output-display">
|
||||
|
||||
<div id="bwyculjrlm" style="overflow-x:auto;overflow-y:auto;width:auto;height:auto;">
|
||||
<div id="veslrdaavz" style="overflow-x:auto;overflow-y:auto;width:auto;height:auto;">
|
||||
<style>html {
|
||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Helvetica Neue', 'Fira Sans', 'Droid Sans', Arial, sans-serif;
|
||||
}
|
||||
|
||||
#bwyculjrlm .gt_table {
|
||||
#veslrdaavz .gt_table {
|
||||
display: table;
|
||||
border-collapse: collapse;
|
||||
margin-left: auto;
|
||||
|
@ -273,7 +273,7 @@ Probability and Likelihood
|
|||
border-left-color: #D3D3D3;
|
||||
}
|
||||
|
||||
#bwyculjrlm .gt_heading {
|
||||
#veslrdaavz .gt_heading {
|
||||
background-color: #FFFFFF;
|
||||
text-align: center;
|
||||
border-bottom-color: #FFFFFF;
|
||||
|
@ -285,7 +285,7 @@ Probability and Likelihood
|
|||
border-right-color: #D3D3D3;
|
||||
}
|
||||
|
||||
#bwyculjrlm .gt_title {
|
||||
#veslrdaavz .gt_title {
|
||||
color: #333333;
|
||||
font-size: 125%;
|
||||
font-weight: initial;
|
||||
|
@ -297,7 +297,7 @@ Probability and Likelihood
|
|||
border-bottom-width: 0;
|
||||
}
|
||||
|
||||
#bwyculjrlm .gt_subtitle {
|
||||
#veslrdaavz .gt_subtitle {
|
||||
color: #333333;
|
||||
font-size: 85%;
|
||||
font-weight: initial;
|
||||
|
@ -309,13 +309,13 @@ Probability and Likelihood
|
|||
border-top-width: 0;
|
||||
}
|
||||
|
||||
#bwyculjrlm .gt_bottom_border {
|
||||
#veslrdaavz .gt_bottom_border {
|
||||
border-bottom-style: solid;
|
||||
border-bottom-width: 2px;
|
||||
border-bottom-color: #D3D3D3;
|
||||
}
|
||||
|
||||
#bwyculjrlm .gt_col_headings {
|
||||
#veslrdaavz .gt_col_headings {
|
||||
border-top-style: solid;
|
||||
border-top-width: 2px;
|
||||
border-top-color: #D3D3D3;
|
||||
|
@ -330,7 +330,7 @@ Probability and Likelihood
|
|||
border-right-color: #D3D3D3;
|
||||
}
|
||||
|
||||
#bwyculjrlm .gt_col_heading {
|
||||
#veslrdaavz .gt_col_heading {
|
||||
color: #333333;
|
||||
background-color: #FFFFFF;
|
||||
font-size: 100%;
|
||||
|
@ -350,7 +350,7 @@ Probability and Likelihood
|
|||
overflow-x: hidden;
|
||||
}
|
||||
|
||||
#bwyculjrlm .gt_column_spanner_outer {
|
||||
#veslrdaavz .gt_column_spanner_outer {
|
||||
color: #333333;
|
||||
background-color: #FFFFFF;
|
||||
font-size: 100%;
|
||||
|
@ -362,15 +362,15 @@ Probability and Likelihood
|
|||
padding-right: 4px;
|
||||
}
|
||||
|
||||
#bwyculjrlm .gt_column_spanner_outer:first-child {
|
||||
#veslrdaavz .gt_column_spanner_outer:first-child {
|
||||
padding-left: 0;
|
||||
}
|
||||
|
||||
#bwyculjrlm .gt_column_spanner_outer:last-child {
|
||||
#veslrdaavz .gt_column_spanner_outer:last-child {
|
||||
padding-right: 0;
|
||||
}
|
||||
|
||||
#bwyculjrlm .gt_column_spanner {
|
||||
#veslrdaavz .gt_column_spanner {
|
||||
border-bottom-style: solid;
|
||||
border-bottom-width: 2px;
|
||||
border-bottom-color: #D3D3D3;
|
||||
|
@ -382,7 +382,7 @@ Probability and Likelihood
|
|||
width: 100%;
|
||||
}
|
||||
|
||||
#bwyculjrlm .gt_group_heading {
|
||||
#veslrdaavz .gt_group_heading {
|
||||
padding-top: 8px;
|
||||
padding-bottom: 8px;
|
||||
padding-left: 5px;
|
||||
|
@ -407,7 +407,7 @@ Probability and Likelihood
|
|||
vertical-align: middle;
|
||||
}
|
||||
|
||||
#bwyculjrlm .gt_empty_group_heading {
|
||||
#veslrdaavz .gt_empty_group_heading {
|
||||
padding: 0.5px;
|
||||
color: #333333;
|
||||
background-color: #FFFFFF;
|
||||
|
@ -422,15 +422,15 @@ Probability and Likelihood
|
|||
vertical-align: middle;
|
||||
}
|
||||
|
||||
#bwyculjrlm .gt_from_md > :first-child {
|
||||
#veslrdaavz .gt_from_md > :first-child {
|
||||
margin-top: 0;
|
||||
}
|
||||
|
||||
#bwyculjrlm .gt_from_md > :last-child {
|
||||
#veslrdaavz .gt_from_md > :last-child {
|
||||
margin-bottom: 0;
|
||||
}
|
||||
|
||||
#bwyculjrlm .gt_row {
|
||||
#veslrdaavz .gt_row {
|
||||
padding-top: 8px;
|
||||
padding-bottom: 8px;
|
||||
padding-left: 5px;
|
||||
|
@ -449,7 +449,7 @@ Probability and Likelihood
|
|||
overflow-x: hidden;
|
||||
}
|
||||
|
||||
#bwyculjrlm .gt_stub {
|
||||
#veslrdaavz .gt_stub {
|
||||
color: #333333;
|
||||
background-color: #FFFFFF;
|
||||
font-size: 100%;
|
||||
|
@ -462,7 +462,7 @@ Probability and Likelihood
|
|||
padding-right: 5px;
|
||||
}
|
||||
|
||||
#bwyculjrlm .gt_stub_row_group {
|
||||
#veslrdaavz .gt_stub_row_group {
|
||||
color: #333333;
|
||||
background-color: #FFFFFF;
|
||||
font-size: 100%;
|
||||
|
@ -476,11 +476,11 @@ Probability and Likelihood
|
|||
vertical-align: top;
|
||||
}
|
||||
|
||||
#bwyculjrlm .gt_row_group_first td {
|
||||
#veslrdaavz .gt_row_group_first td {
|
||||
border-top-width: 2px;
|
||||
}
|
||||
|
||||
#bwyculjrlm .gt_summary_row {
|
||||
#veslrdaavz .gt_summary_row {
|
||||
color: #333333;
|
||||
background-color: #FFFFFF;
|
||||
text-transform: inherit;
|
||||
|
@ -490,16 +490,16 @@ Probability and Likelihood
|
|||
padding-right: 5px;
|
||||
}
|
||||
|
||||
#bwyculjrlm .gt_first_summary_row {
|
||||
#veslrdaavz .gt_first_summary_row {
|
||||
border-top-style: solid;
|
||||
border-top-color: #D3D3D3;
|
||||
}
|
||||
|
||||
#bwyculjrlm .gt_first_summary_row.thick {
|
||||
#veslrdaavz .gt_first_summary_row.thick {
|
||||
border-top-width: 2px;
|
||||
}
|
||||
|
||||
#bwyculjrlm .gt_last_summary_row {
|
||||
#veslrdaavz .gt_last_summary_row {
|
||||
padding-top: 8px;
|
||||
padding-bottom: 8px;
|
||||
padding-left: 5px;
|
||||
|
@ -509,7 +509,7 @@ Probability and Likelihood
|
|||
border-bottom-color: #D3D3D3;
|
||||
}
|
||||
|
||||
#bwyculjrlm .gt_grand_summary_row {
|
||||
#veslrdaavz .gt_grand_summary_row {
|
||||
color: #333333;
|
||||
background-color: #FFFFFF;
|
||||
text-transform: inherit;
|
||||
|
@ -519,7 +519,7 @@ Probability and Likelihood
|
|||
padding-right: 5px;
|
||||
}
|
||||
|
||||
#bwyculjrlm .gt_first_grand_summary_row {
|
||||
#veslrdaavz .gt_first_grand_summary_row {
|
||||
padding-top: 8px;
|
||||
padding-bottom: 8px;
|
||||
padding-left: 5px;
|
||||
|
@ -529,11 +529,11 @@ Probability and Likelihood
|
|||
border-top-color: #D3D3D3;
|
||||
}
|
||||
|
||||
#bwyculjrlm .gt_striped {
|
||||
#veslrdaavz .gt_striped {
|
||||
background-color: rgba(128, 128, 128, 0.05);
|
||||
}
|
||||
|
||||
#bwyculjrlm .gt_table_body {
|
||||
#veslrdaavz .gt_table_body {
|
||||
border-top-style: solid;
|
||||
border-top-width: 2px;
|
||||
border-top-color: #D3D3D3;
|
||||
|
@ -542,7 +542,7 @@ Probability and Likelihood
|
|||
border-bottom-color: #D3D3D3;
|
||||
}
|
||||
|
||||
#bwyculjrlm .gt_footnotes {
|
||||
#veslrdaavz .gt_footnotes {
|
||||
color: #333333;
|
||||
background-color: #FFFFFF;
|
||||
border-bottom-style: none;
|
||||
|
@ -556,7 +556,7 @@ Probability and Likelihood
|
|||
border-right-color: #D3D3D3;
|
||||
}
|
||||
|
||||
#bwyculjrlm .gt_footnote {
|
||||
#veslrdaavz .gt_footnote {
|
||||
margin: 0px;
|
||||
font-size: 90%;
|
||||
padding-left: 4px;
|
||||
|
@ -565,7 +565,7 @@ Probability and Likelihood
|
|||
padding-right: 5px;
|
||||
}
|
||||
|
||||
#bwyculjrlm .gt_sourcenotes {
|
||||
#veslrdaavz .gt_sourcenotes {
|
||||
color: #333333;
|
||||
background-color: #FFFFFF;
|
||||
border-bottom-style: none;
|
||||
|
@ -579,7 +579,7 @@ Probability and Likelihood
|
|||
border-right-color: #D3D3D3;
|
||||
}
|
||||
|
||||
#bwyculjrlm .gt_sourcenote {
|
||||
#veslrdaavz .gt_sourcenote {
|
||||
font-size: 90%;
|
||||
padding-top: 4px;
|
||||
padding-bottom: 4px;
|
||||
|
@ -587,64 +587,64 @@ Probability and Likelihood
|
|||
padding-right: 5px;
|
||||
}
|
||||
|
||||
#bwyculjrlm .gt_left {
|
||||
#veslrdaavz .gt_left {
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
#bwyculjrlm .gt_center {
|
||||
#veslrdaavz .gt_center {
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
#bwyculjrlm .gt_right {
|
||||
#veslrdaavz .gt_right {
|
||||
text-align: right;
|
||||
font-variant-numeric: tabular-nums;
|
||||
}
|
||||
|
||||
#bwyculjrlm .gt_font_normal {
|
||||
#veslrdaavz .gt_font_normal {
|
||||
font-weight: normal;
|
||||
}
|
||||
|
||||
#bwyculjrlm .gt_font_bold {
|
||||
#veslrdaavz .gt_font_bold {
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
#bwyculjrlm .gt_font_italic {
|
||||
#veslrdaavz .gt_font_italic {
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
#bwyculjrlm .gt_super {
|
||||
#veslrdaavz .gt_super {
|
||||
font-size: 65%;
|
||||
}
|
||||
|
||||
#bwyculjrlm .gt_footnote_marks {
|
||||
#veslrdaavz .gt_footnote_marks {
|
||||
font-style: italic;
|
||||
font-weight: normal;
|
||||
font-size: 75%;
|
||||
vertical-align: 0.4em;
|
||||
}
|
||||
|
||||
#bwyculjrlm .gt_asterisk {
|
||||
#veslrdaavz .gt_asterisk {
|
||||
font-size: 100%;
|
||||
vertical-align: 0;
|
||||
}
|
||||
|
||||
#bwyculjrlm .gt_indent_1 {
|
||||
#veslrdaavz .gt_indent_1 {
|
||||
text-indent: 5px;
|
||||
}
|
||||
|
||||
#bwyculjrlm .gt_indent_2 {
|
||||
#veslrdaavz .gt_indent_2 {
|
||||
text-indent: 10px;
|
||||
}
|
||||
|
||||
#bwyculjrlm .gt_indent_3 {
|
||||
#veslrdaavz .gt_indent_3 {
|
||||
text-indent: 15px;
|
||||
}
|
||||
|
||||
#bwyculjrlm .gt_indent_4 {
|
||||
#veslrdaavz .gt_indent_4 {
|
||||
text-indent: 20px;
|
||||
}
|
||||
|
||||
#bwyculjrlm .gt_indent_5 {
|
||||
#veslrdaavz .gt_indent_5 {
|
||||
text-indent: 25px;
|
||||
}
|
||||
</style>
|
||||
|
@ -676,6 +676,68 @@ Probability and Likelihood
|
|||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<p>The table above also shows the likelihoods for the case when an article does not contain exclamation point in the title as well. It’s really important to note that these are likelihoods, and its not the case that <span class="math inline">\(L(B|A) + L(B^c|A) = 1\)</span> as a matter of fact this value evaluates to a number less than one. However, since we have that <span class="math inline">\(L(B|A) = .267\)</span> and <span class="math inline">\(L(B^c|A) = .022\)</span> then we have gained additional knowledge in knowing the use of “!” in a title is more compatible with a fake news article than a real one.</p>
|
||||
<p>Up to this point we can summarize our framework as follows</p>
|
||||
<table class="table">
|
||||
<thead>
|
||||
<tr class="header">
|
||||
<th>event</th>
|
||||
<th><span class="math inline">\(B\)</span></th>
|
||||
<th><span class="math inline">\(B^c\)</span></th>
|
||||
<th>Total</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr class="odd">
|
||||
<td>prior</td>
|
||||
<td>.4</td>
|
||||
<td>.6</td>
|
||||
<td>1</td>
|
||||
</tr>
|
||||
<tr class="even">
|
||||
<td>likelihood</td>
|
||||
<td>.267</td>
|
||||
<td>.022</td>
|
||||
<td>.289</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<p>Our next goal is come up with normalizing factors in order to build our probability table:</p>
|
||||
<table class="table">
|
||||
<thead>
|
||||
<tr class="header">
|
||||
<th></th>
|
||||
<th><span class="math inline">\(B\)</span></th>
|
||||
<th><span class="math inline">\(B^c\)</span></th>
|
||||
<th>Total</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr class="odd">
|
||||
<td><span class="math inline">\(A\)</span></td>
|
||||
<td>(1)</td>
|
||||
<td>(2)</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr class="even">
|
||||
<td><span class="math inline">\(A^c\)</span></td>
|
||||
<td>(3)</td>
|
||||
<td>(4)</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr class="odd">
|
||||
<td>Total</td>
|
||||
<td>.4</td>
|
||||
<td>.6</td>
|
||||
<td>1</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<p>A couple things to note about our table (1) + (2) = .4 and (2) + (4) = .6. (1) + (2) + (3) + (4) = 1.</p>
|
||||
<ol type="1">
|
||||
<li><p><span class="math inline">\(P(A \cap B) = P(A|B)P(B)\)</span> we know the likelihood of <span class="math inline">\(L(B|A) = P(A|B)\)</span> and we also know the prior so we insert these to get <span class="math display">\[ P(A \cap B) = P(A|B)P(B) = .267 \times .4 = .1068\]</span></p></li>
|
||||
<li><p><span class="math inline">\(P(A^c \cap B) = P(A^c|B)P(B)\)</span> in this case we do know the prior <span class="math inline">\(P(B) = .4\)</span>, but we don’t directly know the value of <span class="math inline">\(P(A^c|B)\)</span>, however, we note that <span class="math inline">\(P(A|B) + P(A^c|B) = 1\)</span>, therefore we compute <span class="math inline">\(P(A^c|B) = 1 - P(A|B) = 1 - .267 = .733\)</span> <span class="math display">\[ P(A^c \cap B) = P(A^c|B)P(B) = .733 \times .4 = .2932\]</span></p></li>
|
||||
</ol>
|
||||
</section>
|
||||
|
||||
</main>
|
||||
|
|
37
R/ch2.qmd
37
R/ch2.qmd
|
@ -114,4 +114,39 @@ prop_of_excl_within_type |>
|
|||
|
||||
The table above also shows the likelihoods for the case
|
||||
when an article does not contain exclamation point in
|
||||
the title.
|
||||
the title as well. It's really important to note that these are likelihoods,
|
||||
and its not the case that $L(B|A) + L(B^c|A) = 1$ as a matter of fact this
|
||||
value evaluates to a number less than one. However, since we have that
|
||||
$L(B|A) = .267$ and $L(B^c|A) = .022$ then we have gained additional
|
||||
knowledge in knowing the use of "!" in a title is more compatible
|
||||
with a fake news article than a real one.
|
||||
|
||||
Up to this point we can summarize our framework as follows
|
||||
|
||||
| event | $B$ | $B^c$ | Total |
|
||||
|------- |-----|-------|------|
|
||||
| prior | .4 | .6 | 1 |
|
||||
| likelihood |.267 | .022 | .289 |
|
||||
|
||||
Our next goal is come up with normalizing factors in order to build our
|
||||
probability table:
|
||||
|
||||
| | $B$| $B^c$| Total |
|
||||
|------|----|------|-------|
|
||||
|$A$ | (1)| (2) | |
|
||||
|$A^c$ | (3)| (4) | |
|
||||
|Total | .4 | .6 | 1 |
|
||||
|
||||
A couple things to note about our table (1) + (3) = .4 and (2) + (4) = .6.
|
||||
(1) + (2) + (3) + (4) = 1.
|
||||
|
||||
(1) $P(A \cap B) = P(A|B)P(B)$ we know the likelihood of $L(B|A) = P(A|B)$ and we also
|
||||
know the prior so we insert these to get
|
||||
$$ P(A \cap B) = P(A|B)P(B) = .267 \times .4 = .1068$$
|
||||
|
||||
(3) $P(A^c \cap B) = P(A^c|B)P(B)$ in this case we do know the prior $P(B) = .4$, but we
|
||||
don't directly know the value of $P(A^c|B)$, however, we note that $P(A|B) + P(A^c|B) = 1$,
|
||||
therefore we compute $P(A^c|B) = 1 - P(A|B) = 1 - .267 = .733$
|
||||
$$ P(A^c \cap B) = P(A^c|B)P(B) = .733 \times .4 = .2932$$
|
||||
|
||||
we now can confirm that $.1068 + .2932 = .4$
|
|
@ -0,0 +1,380 @@
|
|||
% Options for packages loaded elsewhere
|
||||
\PassOptionsToPackage{unicode}{hyperref}
|
||||
\PassOptionsToPackage{hyphens}{url}
|
||||
\PassOptionsToPackage{dvipsnames,svgnames,x11names}{xcolor}
|
||||
%
|
||||
\documentclass[
|
||||
letterpaper,
|
||||
DIV=11,
|
||||
numbers=noendperiod]{scrartcl}
|
||||
|
||||
\usepackage{amsmath,amssymb}
|
||||
\usepackage{lmodern}
|
||||
\usepackage{iftex}
|
||||
\ifPDFTeX
|
||||
\usepackage[T1]{fontenc}
|
||||
\usepackage[utf8]{inputenc}
|
||||
\usepackage{textcomp} % provide euro and other symbols
|
||||
\else % if luatex or xetex
|
||||
\usepackage{unicode-math}
|
||||
\defaultfontfeatures{Scale=MatchLowercase}
|
||||
\defaultfontfeatures[\rmfamily]{Ligatures=TeX,Scale=1}
|
||||
\fi
|
||||
% Use upquote if available, for straight quotes in verbatim environments
|
||||
\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
|
||||
\IfFileExists{microtype.sty}{% use microtype if available
|
||||
\usepackage[]{microtype}
|
||||
\UseMicrotypeSet[protrusion]{basicmath} % disable protrusion for tt fonts
|
||||
}{}
|
||||
\makeatletter
|
||||
\@ifundefined{KOMAClassName}{% if non-KOMA class
|
||||
\IfFileExists{parskip.sty}{%
|
||||
\usepackage{parskip}
|
||||
}{% else
|
||||
\setlength{\parindent}{0pt}
|
||||
\setlength{\parskip}{6pt plus 2pt minus 1pt}}
|
||||
}{% if KOMA class
|
||||
\KOMAoptions{parskip=half}}
|
||||
\makeatother
|
||||
\usepackage{xcolor}
|
||||
\setlength{\emergencystretch}{3em} % prevent overfull lines
|
||||
\setcounter{secnumdepth}{-\maxdimen} % remove section numbering
|
||||
% Make \paragraph and \subparagraph free-standing
|
||||
\ifx\paragraph\undefined\else
|
||||
\let\oldparagraph\paragraph
|
||||
\renewcommand{\paragraph}[1]{\oldparagraph{#1}\mbox{}}
|
||||
\fi
|
||||
\ifx\subparagraph\undefined\else
|
||||
\let\oldsubparagraph\subparagraph
|
||||
\renewcommand{\subparagraph}[1]{\oldsubparagraph{#1}\mbox{}}
|
||||
\fi
|
||||
|
||||
\usepackage{color}
|
||||
\usepackage{fancyvrb}
|
||||
\newcommand{\VerbBar}{|}
|
||||
\newcommand{\VERB}{\Verb[commandchars=\\\{\}]}
|
||||
\DefineVerbatimEnvironment{Highlighting}{Verbatim}{commandchars=\\\{\}}
|
||||
% Add ',fontsize=\small' for more characters per line
|
||||
\usepackage{framed}
|
||||
\definecolor{shadecolor}{RGB}{241,243,245}
|
||||
\newenvironment{Shaded}{\begin{snugshade}}{\end{snugshade}}
|
||||
\newcommand{\AlertTok}[1]{\textcolor[rgb]{0.68,0.00,0.00}{#1}}
|
||||
\newcommand{\AnnotationTok}[1]{\textcolor[rgb]{0.37,0.37,0.37}{#1}}
|
||||
\newcommand{\AttributeTok}[1]{\textcolor[rgb]{0.40,0.45,0.13}{#1}}
|
||||
\newcommand{\BaseNTok}[1]{\textcolor[rgb]{0.68,0.00,0.00}{#1}}
|
||||
\newcommand{\BuiltInTok}[1]{\textcolor[rgb]{0.00,0.23,0.31}{#1}}
|
||||
\newcommand{\CharTok}[1]{\textcolor[rgb]{0.13,0.47,0.30}{#1}}
|
||||
\newcommand{\CommentTok}[1]{\textcolor[rgb]{0.37,0.37,0.37}{#1}}
|
||||
\newcommand{\CommentVarTok}[1]{\textcolor[rgb]{0.37,0.37,0.37}{\textit{#1}}}
|
||||
\newcommand{\ConstantTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{#1}}
|
||||
\newcommand{\ControlFlowTok}[1]{\textcolor[rgb]{0.00,0.23,0.31}{#1}}
|
||||
\newcommand{\DataTypeTok}[1]{\textcolor[rgb]{0.68,0.00,0.00}{#1}}
|
||||
\newcommand{\DecValTok}[1]{\textcolor[rgb]{0.68,0.00,0.00}{#1}}
|
||||
\newcommand{\DocumentationTok}[1]{\textcolor[rgb]{0.37,0.37,0.37}{\textit{#1}}}
|
||||
\newcommand{\ErrorTok}[1]{\textcolor[rgb]{0.68,0.00,0.00}{#1}}
|
||||
\newcommand{\ExtensionTok}[1]{\textcolor[rgb]{0.00,0.23,0.31}{#1}}
|
||||
\newcommand{\FloatTok}[1]{\textcolor[rgb]{0.68,0.00,0.00}{#1}}
|
||||
\newcommand{\FunctionTok}[1]{\textcolor[rgb]{0.28,0.35,0.67}{#1}}
|
||||
\newcommand{\ImportTok}[1]{\textcolor[rgb]{0.00,0.46,0.62}{#1}}
|
||||
\newcommand{\InformationTok}[1]{\textcolor[rgb]{0.37,0.37,0.37}{#1}}
|
||||
\newcommand{\KeywordTok}[1]{\textcolor[rgb]{0.00,0.23,0.31}{#1}}
|
||||
\newcommand{\NormalTok}[1]{\textcolor[rgb]{0.00,0.23,0.31}{#1}}
|
||||
\newcommand{\OperatorTok}[1]{\textcolor[rgb]{0.37,0.37,0.37}{#1}}
|
||||
\newcommand{\OtherTok}[1]{\textcolor[rgb]{0.00,0.23,0.31}{#1}}
|
||||
\newcommand{\PreprocessorTok}[1]{\textcolor[rgb]{0.68,0.00,0.00}{#1}}
|
||||
\newcommand{\RegionMarkerTok}[1]{\textcolor[rgb]{0.00,0.23,0.31}{#1}}
|
||||
\newcommand{\SpecialCharTok}[1]{\textcolor[rgb]{0.37,0.37,0.37}{#1}}
|
||||
\newcommand{\SpecialStringTok}[1]{\textcolor[rgb]{0.13,0.47,0.30}{#1}}
|
||||
\newcommand{\StringTok}[1]{\textcolor[rgb]{0.13,0.47,0.30}{#1}}
|
||||
\newcommand{\VariableTok}[1]{\textcolor[rgb]{0.07,0.07,0.07}{#1}}
|
||||
\newcommand{\VerbatimStringTok}[1]{\textcolor[rgb]{0.13,0.47,0.30}{#1}}
|
||||
\newcommand{\WarningTok}[1]{\textcolor[rgb]{0.37,0.37,0.37}{\textit{#1}}}
|
||||
|
||||
\providecommand{\tightlist}{%
|
||||
\setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}\usepackage{longtable,booktabs,array}
|
||||
\usepackage{calc} % for calculating minipage widths
|
||||
% Correct order of tables after \paragraph or \subparagraph
|
||||
\usepackage{etoolbox}
|
||||
\makeatletter
|
||||
\patchcmd\longtable{\par}{\if@noskipsec\mbox{}\fi\par}{}{}
|
||||
\makeatother
|
||||
% Allow footnotes in longtable head/foot
|
||||
\IfFileExists{footnotehyper.sty}{\usepackage{footnotehyper}}{\usepackage{footnote}}
|
||||
\makesavenoteenv{longtable}
|
||||
\usepackage{graphicx}
|
||||
\makeatletter
|
||||
\def\maxwidth{\ifdim\Gin@nat@width>\linewidth\linewidth\else\Gin@nat@width\fi}
|
||||
\def\maxheight{\ifdim\Gin@nat@height>\textheight\textheight\else\Gin@nat@height\fi}
|
||||
\makeatother
|
||||
% Scale images if necessary, so that they will not overflow the page
|
||||
% margins by default, and it is still possible to overwrite the defaults
|
||||
% using explicit options in \includegraphics[width, height, ...]{}
|
||||
\setkeys{Gin}{width=\maxwidth,height=\maxheight,keepaspectratio}
|
||||
% Set default figure placement to htbp
|
||||
\makeatletter
|
||||
\def\fps@figure{htbp}
|
||||
\makeatother
|
||||
|
||||
\usepackage{amsmath}
|
||||
\usepackage{booktabs}
|
||||
\usepackage{caption}
|
||||
\usepackage{longtable}
|
||||
\KOMAoption{captions}{tableheading}
|
||||
\makeatletter
|
||||
\@ifpackageloaded{tcolorbox}{}{\usepackage[many]{tcolorbox}}
|
||||
\@ifpackageloaded{fontawesome5}{}{\usepackage{fontawesome5}}
|
||||
\definecolor{quarto-callout-color}{HTML}{909090}
|
||||
\definecolor{quarto-callout-note-color}{HTML}{0758E5}
|
||||
\definecolor{quarto-callout-important-color}{HTML}{CC1914}
|
||||
\definecolor{quarto-callout-warning-color}{HTML}{EB9113}
|
||||
\definecolor{quarto-callout-tip-color}{HTML}{00A047}
|
||||
\definecolor{quarto-callout-caution-color}{HTML}{FC5300}
|
||||
\definecolor{quarto-callout-color-frame}{HTML}{acacac}
|
||||
\definecolor{quarto-callout-note-color-frame}{HTML}{4582ec}
|
||||
\definecolor{quarto-callout-important-color-frame}{HTML}{d9534f}
|
||||
\definecolor{quarto-callout-warning-color-frame}{HTML}{f0ad4e}
|
||||
\definecolor{quarto-callout-tip-color-frame}{HTML}{02b875}
|
||||
\definecolor{quarto-callout-caution-color-frame}{HTML}{fd7e14}
|
||||
\makeatother
|
||||
\makeatletter
|
||||
\makeatother
|
||||
\makeatletter
|
||||
\makeatother
|
||||
\makeatletter
|
||||
\@ifpackageloaded{caption}{}{\usepackage{caption}}
|
||||
\AtBeginDocument{%
|
||||
\ifdefined\contentsname
|
||||
\renewcommand*\contentsname{Table of contents}
|
||||
\else
|
||||
\newcommand\contentsname{Table of contents}
|
||||
\fi
|
||||
\ifdefined\listfigurename
|
||||
\renewcommand*\listfigurename{List of Figures}
|
||||
\else
|
||||
\newcommand\listfigurename{List of Figures}
|
||||
\fi
|
||||
\ifdefined\listtablename
|
||||
\renewcommand*\listtablename{List of Tables}
|
||||
\else
|
||||
\newcommand\listtablename{List of Tables}
|
||||
\fi
|
||||
\ifdefined\figurename
|
||||
\renewcommand*\figurename{Figure}
|
||||
\else
|
||||
\newcommand\figurename{Figure}
|
||||
\fi
|
||||
\ifdefined\tablename
|
||||
\renewcommand*\tablename{Table}
|
||||
\else
|
||||
\newcommand\tablename{Table}
|
||||
\fi
|
||||
}
|
||||
\@ifpackageloaded{float}{}{\usepackage{float}}
|
||||
\floatstyle{ruled}
|
||||
\@ifundefined{c@chapter}{\newfloat{codelisting}{h}{lop}}{\newfloat{codelisting}{h}{lop}[chapter]}
|
||||
\floatname{codelisting}{Listing}
|
||||
\newcommand*\listoflistings{\listof{codelisting}{List of Listings}}
|
||||
\makeatother
|
||||
\makeatletter
|
||||
\@ifpackageloaded{caption}{}{\usepackage{caption}}
|
||||
\@ifpackageloaded{subcaption}{}{\usepackage{subcaption}}
|
||||
\makeatother
|
||||
\makeatletter
|
||||
\@ifpackageloaded{tcolorbox}{}{\usepackage[many]{tcolorbox}}
|
||||
\makeatother
|
||||
\makeatletter
|
||||
\@ifundefined{shadecolor}{\definecolor{shadecolor}{rgb}{.97, .97, .97}}
|
||||
\makeatother
|
||||
\makeatletter
|
||||
\makeatother
|
||||
\ifLuaTeX
|
||||
\usepackage{selnolig} % disable illegal ligatures
|
||||
\fi
|
||||
\IfFileExists{bookmark.sty}{\usepackage{bookmark}}{\usepackage{hyperref}}
|
||||
\IfFileExists{xurl.sty}{\usepackage{xurl}}{} % add URL line breaks if available
|
||||
\urlstyle{same} % disable monospaced font for URLs
|
||||
\hypersetup{
|
||||
pdftitle={Chapter 2 Notes},
|
||||
pdfauthor={Emanuel Rodriguez},
|
||||
colorlinks=true,
|
||||
linkcolor={blue},
|
||||
filecolor={Maroon},
|
||||
citecolor={Blue},
|
||||
urlcolor={Blue},
|
||||
pdfcreator={LaTeX via pandoc}}
|
||||
|
||||
\title{Chapter 2 Notes}
|
||||
\author{Emanuel Rodriguez}
|
||||
\date{}
|
||||
|
||||
\begin{document}
|
||||
\maketitle
|
||||
\ifdefined\Shaded\renewenvironment{Shaded}{\begin{tcolorbox}[interior hidden, enhanced, breakable, frame hidden, sharp corners, boxrule=0pt, borderline west={3pt}{0pt}{shadecolor}]}{\end{tcolorbox}}\fi
|
||||
|
||||
In this chapter we step through an example of ``fake'' vs ``real'' news
|
||||
to build a framework to determine the probability of real vs fake of a
|
||||
new news article titled ``The President has a secret!''
|
||||
|
||||
\begin{Shaded}
|
||||
\begin{Highlighting}[]
|
||||
\CommentTok{\# libraries}
|
||||
\FunctionTok{library}\NormalTok{(bayesrules)}
|
||||
\FunctionTok{library}\NormalTok{(dplyr)}
|
||||
\FunctionTok{library}\NormalTok{(tidyr)}
|
||||
\FunctionTok{library}\NormalTok{(gt)}
|
||||
\FunctionTok{data}\NormalTok{(fake\_news)}
|
||||
\NormalTok{fake\_news }\OtherTok{\textless{}{-}}\NormalTok{ tibble}\SpecialCharTok{::}\FunctionTok{as\_tibble}\NormalTok{(fake\_news)}
|
||||
\end{Highlighting}
|
||||
\end{Shaded}
|
||||
|
||||
What is the proportion of news articles that were labeled fake vs real.
|
||||
|
||||
\begin{Shaded}
|
||||
\begin{Highlighting}[]
|
||||
\NormalTok{fake\_news }\SpecialCharTok{|\textgreater{}} \FunctionTok{glimpse}\NormalTok{()}
|
||||
\end{Highlighting}
|
||||
\end{Shaded}
|
||||
|
||||
\begin{verbatim}
|
||||
Rows: 150
|
||||
Columns: 30
|
||||
$ title <chr> "Clinton's Exploited Haiti Earthquake ‘to Stea~
|
||||
$ text <chr> "0 SHARES Facebook Twitter\n\nBernard Sansaric~
|
||||
$ url <chr> "http://freedomdaily.com/former-haitian-senate~
|
||||
$ authors <chr> NA, NA, "Sierra Marlee", "Jack Shafer,Nolan D"~
|
||||
$ type <fct> fake, real, fake, real, fake, real, fake, fake~
|
||||
$ title_words <int> 17, 18, 16, 11, 9, 12, 11, 18, 10, 13, 10, 11,~
|
||||
$ text_words <int> 219, 509, 494, 268, 479, 220, 184, 500, 677, 4~
|
||||
$ title_char <int> 110, 95, 96, 60, 54, 66, 86, 104, 66, 81, 59, ~
|
||||
$ text_char <int> 1444, 3016, 2881, 1674, 2813, 1351, 1128, 3112~
|
||||
$ title_caps <int> 0, 0, 1, 0, 0, 1, 0, 2, 1, 1, 0, 1, 0, 0, 0, 0~
|
||||
$ text_caps <int> 1, 1, 3, 3, 0, 0, 0, 12, 12, 1, 2, 5, 1, 1, 6,~
|
||||
$ title_caps_percent <dbl> 0.000000, 0.000000, 6.250000, 0.000000, 0.0000~
|
||||
$ text_caps_percent <dbl> 0.4566210, 0.1964637, 0.6072874, 1.1194030, 0.~
|
||||
$ title_excl <int> 0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0~
|
||||
$ text_excl <int> 0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, 0~
|
||||
$ title_excl_percent <dbl> 0.0000000, 0.0000000, 2.0833333, 0.0000000, 0.~
|
||||
$ text_excl_percent <dbl> 0.00000000, 0.00000000, 0.06942034, 0.00000000~
|
||||
$ title_has_excl <lgl> FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE~
|
||||
$ anger <dbl> 4.24, 2.28, 1.18, 4.66, 0.82, 1.29, 2.56, 3.47~
|
||||
$ anticipation <dbl> 2.12, 1.71, 2.16, 1.79, 1.23, 0.43, 2.05, 1.74~
|
||||
$ disgust <dbl> 2.54, 1.90, 0.98, 1.79, 0.41, 1.72, 2.05, 1.35~
|
||||
$ fear <dbl> 3.81, 1.90, 1.57, 4.30, 0.82, 0.43, 5.13, 4.25~
|
||||
$ joy <dbl> 1.27, 1.71, 1.96, 0.36, 1.23, 0.86, 1.54, 1.35~
|
||||
$ sadness <dbl> 4.66, 1.33, 0.78, 1.79, 0.82, 0.86, 2.05, 1.93~
|
||||
$ surprise <dbl> 2.12, 1.14, 1.18, 1.79, 0.82, 0.86, 1.03, 1.35~
|
||||
$ trust <dbl> 2.97, 4.17, 3.73, 2.51, 2.46, 2.16, 5.13, 3.86~
|
||||
$ negative <dbl> 8.47, 4.74, 3.33, 6.09, 2.66, 3.02, 4.10, 4.63~
|
||||
$ positive <dbl> 3.81, 4.93, 5.49, 2.15, 4.30, 2.16, 4.10, 4.25~
|
||||
$ text_syllables <int> 395, 845, 806, 461, 761, 376, 326, 891, 1133, ~
|
||||
$ text_syllables_per_word <dbl> 1.803653, 1.660118, 1.631579, 1.720149, 1.5887~
|
||||
\end{verbatim}
|
||||
|
||||
\begin{Shaded}
|
||||
\begin{Highlighting}[]
|
||||
\NormalTok{fake\_news }\SpecialCharTok{|\textgreater{}}
|
||||
\FunctionTok{group\_by}\NormalTok{(type) }\SpecialCharTok{|\textgreater{}}
|
||||
\FunctionTok{summarise}\NormalTok{(}
|
||||
\AttributeTok{total =} \FunctionTok{n}\NormalTok{(),}
|
||||
\AttributeTok{prop =}\NormalTok{ total }\SpecialCharTok{/} \FunctionTok{nrow}\NormalTok{(fake\_news)}
|
||||
\NormalTok{ ) }
|
||||
\end{Highlighting}
|
||||
\end{Shaded}
|
||||
|
||||
\begin{verbatim}
|
||||
# A tibble: 2 x 3
|
||||
type total prop
|
||||
<fct> <int> <dbl>
|
||||
1 fake 60 0.4
|
||||
2 real 90 0.6
|
||||
\end{verbatim}
|
||||
|
||||
If we let \(B\) be the event that a news article is ``fake'' news, and
|
||||
\(B^c\) be the event that a news article is ``real'', we can write the
|
||||
following:
|
||||
|
||||
\[P(B) = .4\] \[P(B^c) = .6\]
|
||||
|
||||
This is the first ``clue'' or set of data that we have to build into our
|
||||
framework. Namely, majority of articles are ``real'', therefore we could
|
||||
simply predict that the new article is ``real''. This updated sense or
|
||||
reality now becomes our priors.
|
||||
|
||||
Getting additional data, and updating our priors, based on additional
|
||||
data. The new observation we make is the use of exclamation marks ``!''.
|
||||
We note that the use of ``!'' is more frequent in news articles labeled
|
||||
as ``fake''. We will want to incorporate this into our framework to
|
||||
decide whether the new incoming should be labelled as real or fake.
|
||||
|
||||
\hypertarget{likelihood}{%
|
||||
\subsubsection{Likelihood}\label{likelihood}}
|
||||
|
||||
\begin{tcolorbox}[enhanced jigsaw, coltitle=black, rightrule=.15mm, bottomtitle=1mm, breakable, colframe=quarto-callout-note-color-frame, toprule=.15mm, colback=white, opacityback=0, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Probability and Likelihood}, bottomrule=.15mm, opacitybacktitle=0.6, toptitle=1mm, titlerule=0mm, arc=.35mm, leftrule=.75mm, left=2mm, colbacktitle=quarto-callout-note-color!10!white]
|
||||
When the event \(B\) is known, then we can evaluate the uncertainy of
|
||||
events \(A\) and \(A^c\) given \(B\)
|
||||
|
||||
\[P(A|B) \text{ vs } P(A^c|B)\]
|
||||
|
||||
If on the other hand, we know event \(A\) then we can evaluate the
|
||||
relative compatability of data \(A\) with \(B\) and \(B^c\) using
|
||||
likelihood functions
|
||||
|
||||
\[L(B|A) \text{ vs } L(B^c|A)\] \[=P(A|B) \text{ vs } P(A|B^c)\]
|
||||
\end{tcolorbox}
|
||||
|
||||
So in our case, we don't know whether this new incoming article is real
|
||||
or not, but we do know that the title has an exclamation mark. This
|
||||
means we can evaluate how likely this article is real or not given that
|
||||
it contains an ``!'' in the title using likelihood functions. We can
|
||||
formualte this as:
|
||||
|
||||
\[L(B|A) \text{ vs } L(B^c|A)\]
|
||||
|
||||
And perform the computation in R as follows:
|
||||
|
||||
\begin{Shaded}
|
||||
\begin{Highlighting}[]
|
||||
\CommentTok{\# if fake, what are the proprotions of ! vs no{-}!}
|
||||
\NormalTok{prop\_of\_excl\_within\_type }\OtherTok{\textless{}{-}}\NormalTok{ fake\_news }\SpecialCharTok{|\textgreater{}}
|
||||
\FunctionTok{group\_by}\NormalTok{(type, title\_has\_excl) }\SpecialCharTok{|\textgreater{}}
|
||||
\FunctionTok{summarise}\NormalTok{(}
|
||||
\AttributeTok{total =} \FunctionTok{n}\NormalTok{()}
|
||||
\NormalTok{ ) }\SpecialCharTok{|\textgreater{}}
|
||||
\FunctionTok{ungroup}\NormalTok{() }\SpecialCharTok{|\textgreater{}}
|
||||
\FunctionTok{group\_by}\NormalTok{(type) }\SpecialCharTok{|\textgreater{}}
|
||||
\FunctionTok{summarise}\NormalTok{(}
|
||||
\AttributeTok{has\_excl =}\NormalTok{ title\_has\_excl,}
|
||||
\AttributeTok{prop\_within\_type =}\NormalTok{ total }\SpecialCharTok{/} \FunctionTok{sum}\NormalTok{(total)}
|
||||
\NormalTok{ ) }
|
||||
\end{Highlighting}
|
||||
\end{Shaded}
|
||||
|
||||
\begin{Shaded}
|
||||
\begin{Highlighting}[]
|
||||
\NormalTok{prop\_of\_excl\_within\_type }\SpecialCharTok{|\textgreater{}}
|
||||
\FunctionTok{pivot\_wider}\NormalTok{(}\AttributeTok{names\_from =} \StringTok{"type"}\NormalTok{, }\AttributeTok{values\_from =}\NormalTok{ prop\_within\_type) }\SpecialCharTok{|\textgreater{}}
|
||||
\FunctionTok{gt}\NormalTok{() }\SpecialCharTok{|\textgreater{}}
|
||||
\NormalTok{ gt}\SpecialCharTok{::}\FunctionTok{cols\_label}\NormalTok{(}
|
||||
\AttributeTok{has\_excl =} \StringTok{"Contains Exclamtion"}\NormalTok{,}
|
||||
\AttributeTok{fake =} \StringTok{"Fake"}\NormalTok{, }
|
||||
\AttributeTok{real =} \StringTok{"Real"}\NormalTok{) }\SpecialCharTok{|\textgreater{}}
|
||||
\NormalTok{ gt}\SpecialCharTok{::}\FunctionTok{fmt\_number}\NormalTok{(}\AttributeTok{columns=}\FunctionTok{c}\NormalTok{(}\StringTok{"fake"}\NormalTok{, }\StringTok{"real"}\NormalTok{), }\AttributeTok{decimals =} \DecValTok{3}\NormalTok{) }\SpecialCharTok{|\textgreater{}}
|
||||
\NormalTok{ gt}\SpecialCharTok{::}\FunctionTok{cols\_width}\NormalTok{(}\FunctionTok{everything}\NormalTok{() }\SpecialCharTok{\textasciitilde{}} \FunctionTok{px}\NormalTok{(}\DecValTok{100}\NormalTok{))}
|
||||
\end{Highlighting}
|
||||
\end{Shaded}
|
||||
|
||||
\begin{longtable}{crr}
|
||||
\toprule
|
||||
Contains Exclamtion & Fake & Real \\
|
||||
\midrule
|
||||
FALSE & $0.733$ & $0.978$ \\
|
||||
TRUE & $0.267$ & $0.022$ \\
|
||||
\bottomrule
|
||||
\end{longtable}
|
||||
|
||||
The table above also shows the likelihoods for the case when an article
|
||||
does not contain exclamation point in the title.
|
||||
|
||||
|
||||
|
||||
\end{document}
|
Loading…
Reference in New Issue