bayes-rules-notes/R/ch2.R

# install.packages("bayesrules")

library(bayesrules)
library(dplyr)

data(fake_news)
# make it a tibble for the sake printing
fake_news <- tibble::as_tibble(fake_news)

fake_news |> glimpse()

# proportion of each type of article
fake_news |>
    group_by(type) |>
    summarise(
        n = n(),
        prop = n / nrow(fake_news) # <- is there a NSE way
                                    # of getting total rows of
                                    # original DF?
    )

# we can also do this with the tally function
fake_news|>
    group_by(type) |>
    tally() |>
    mutate(prop = n / sum(n))

# usage of "!" in fake vs real articles
fake_news |>
    group_by(type, title_has_excl) |>
    summarise(
        total_usage_of_excl = n()
    ) |>
    ungroup() |>
    group_by(type) |>
    summarise(
        prop_excl_within_type = total_usage_of_excl / sum(total_usage_of_excl),
        title_has_excl
    ) |>
    filter(type == "fake")

# P(A ^ B) = P(A|B)*P(B)
# P(B) = .4
# P(A|B) = .2667
# => P(A|B)*P(B)
.4 * .2667