47 lines
1.0 KiB
R
47 lines
1.0 KiB
R
# install.packages("bayesrules")
|
|
|
|
library(bayesrules)
|
|
library(dplyr)
|
|
|
|
data(fake_news)
|
|
# make it a tibble for the sake printing
|
|
fake_news <- tibble::as_tibble(fake_news)
|
|
|
|
fake_news |> glimpse()
|
|
|
|
# proportion of each type of article
|
|
fake_news |>
|
|
group_by(type) |>
|
|
summarise(
|
|
n = n(),
|
|
prop = n / nrow(fake_news) # <- is there a NSE way
|
|
# of getting total rows of
|
|
# original DF?
|
|
)
|
|
|
|
# we can also do this with the tally function
|
|
fake_news|>
|
|
group_by(type) |>
|
|
tally() |>
|
|
mutate(prop = n / sum(n))
|
|
|
|
# usage of "!" in fake vs real articles
|
|
fake_news |>
|
|
group_by(type, title_has_excl) |>
|
|
summarise(
|
|
total_usage_of_excl = n()
|
|
) |>
|
|
ungroup() |>
|
|
group_by(type) |>
|
|
summarise(
|
|
prop_excl_within_type = total_usage_of_excl / sum(total_usage_of_excl),
|
|
title_has_excl
|
|
) |>
|
|
filter(type == "fake")
|
|
|
|
# P(A ^ B) = P(A|B)*P(B)
|
|
# P(B) = .4
|
|
# P(A|B) = .2667
|
|
# => P(A|B)*P(B)
|
|
.4 * .2667
|