## ----setup, include=FALSE-----------------------------------------------------
# del /s /a:h desktop.ini
vignette_input <- knitr::current_input(dir = TRUE)
vignette_dir <- if (is.null(vignette_input)) getwd() else dirname(vignette_input)
fig_dir <- tempdir()

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.path = file.path(fig_dir, "getting-started-")
)

library(cosmic)
library(knitr)
library(kableExtra)
library(ggplot2)
library(MASS)

## ----load-seattle-------------------------------------------------------------
path <- system.file("extdata", "dataSPD.RData", package = "cosmic")
if (!nzchar(path)) {
  path_candidates <- c(
    file.path(vignette_dir, "..", "inst", "extdata", "dataSPD.RData"),
    file.path("..", "inst", "extdata", "dataSPD.RData"),
    file.path("inst", "extdata", "dataSPD.RData")
  )
  path <- path_candidates[file.exists(path_candidates)][1]
}

if (is.na(path) || !nzchar(path)) {
  stop("Could not find 'dataSPD.RData' in the installed package or source tree.")
}

load(path)

## ----seattle-overview---------------------------------------------------------
seattle_overview <- data.frame(
  quantity = c("Officer-incident rows", "Incidents", "Officers", "Force levels"),
  value = c(nrow(d),
            length(unique(d$id)),
            length(unique(d$idOff)),
            length(unique(d$y))))
seattle_overview

## ----force-distribution-------------------------------------------------------
force_distribution <- data.frame(
  force_level = as.integer(names(table(d$y))),
  count = as.integer(table(d$y)))
force_distribution

## ----incident-size------------------------------------------------------------
officers_per_incident <- table(table(d$id))
incident_size_distribution <- data.frame(
  officers_in_incident = as.integer(names(officers_per_incident)),
  number_of_incidents = as.integer(officers_per_incident)
)
incident_size_distribution

## ----install-cmdstanr, eval = FALSE-------------------------------------------
# install.packages("cmdstanr",
#                  repos = c("https://stan-dev.r-universe.dev",
#                            getOption("repos")))
# cmdstanr::install_cmdstan()

## ----fit-seattle, eval = FALSE------------------------------------------------
# # 5-6 hours?
# fit <- cosmic(d,
#               incidentID = id,
#               officerID  = idOff,
#               y          = y,
#               iter       = 10000,
#               chains     = 4,
#               cores      = 1,
#               threads    = 8)

## ----loadPrecompute, echo=FALSE, results='hide'-------------------------------

if(FALSE)
{
  SPDdiag <- diagnose(fit)
  
  future::plan(future::multisession, workers = 4)
  flag_officer_tail_pct <- 0.05
  off_summary <- officer_summary(fit,
                                 pct_threshold = 0.25,
                                 pct_tail = flag_officer_tail_pct)
  SPDoff_summary <- off_summary
  
  SPDdraws <- posterior(fit, pars="sDelta")
  save(SPDdiag, SPDoff_summary, SPDdraws, file="vignettes/SPDprecompute.RData")
}

# load pre-computed results
fit_path_candidates <- c(file.path(vignette_dir, "SPDprecompute.RData"),
                         "SPDprecompute.RData",
                         file.path("vignettes", "SPDprecompute.RData"))
fit_path <- fit_path_candidates[file.exists(fit_path_candidates)][1]

if (is.na(fit_path)) {
  stop("Could not find 'SPDprecompute.RData' next to the vignette source.")
}

load(fit_path)

## ----diagnose-fit, eval = FALSE-----------------------------------------------
# diagnose(fit)

## ----echo = FALSE-------------------------------------------------------------
# taken from pre-computed results
print(SPDdiag)

## ----officer-summary, eval = FALSE--------------------------------------------
# flag_officer_tail_pct <- 0.05
# off_summary <- officer_summary(fit,
#                                pct_threshold = 0.25,
#                                pct_tail = flag_officer_tail_pct)
# 
# head(off_summary)

## ----echo = FALSE-------------------------------------------------------------
# taken from pre-computed results
flag_officer_tail_pct <- 0.05
head(SPDoff_summary)

## ----outlier-report, eval = FALSE---------------------------------------------
# outliers <- outlier_report(off_summary, prob_outlier = 0.9)
# head(outliers, 10)

## ----echo = FALSE-------------------------------------------------------------
# taken from pre-computed results
outliers <- outlier_report(SPDoff_summary, prob_outlier = 0.9)
head(outliers, 10)

## -----------------------------------------------------------------------------
  outliers_display <- outliers
  prob_cols <- names(outliers_display) %in% c("pRankToppct", "pRankBotpct")
  tail_pct_label <- formatC(100 * flag_officer_tail_pct,
                            format = "fg", digits = 3)
  names(outliers_display)[names(outliers_display) == "pRankToppct"] <-
    paste0("Prob. top ", tail_pct_label, "%")
  names(outliers_display)[names(outliers_display) == "pRankBotpct"] <-
    paste0("Prob. bottom ", tail_pct_label, "%")
  outliers_display[prob_cols] <-
    lapply(outliers_display[prob_cols],
           function(x) sprintf("%.2f", x))

  digits <- rep(0, ncol(outliers_display))
  digits[seq.int(length(digits)-4,length(digits))] <- 2L

  knitr::kable(
    outliers_display,
    format = "html",
    digits = digits,
    caption = "Officers with strong posterior evidence of being in the tails of the peer distribution",
    align = rep("r", ncol(outliers_display))) |>
    kableExtra::kable_styling(
      bootstrap_options = c("striped", "hover", "condensed", "responsive"),
      full_width = FALSE,
      position = "center") |>
    kableExtra::row_spec(0, bold = TRUE, color = "white",
                         background = "#2C3E50") |>
    kableExtra::column_spec(1, bold = TRUE) |>
    kableExtra::scroll_box(width = "100%", height = "320px")

## ----posterior-draws, eval = FALSE--------------------------------------------
# sDelta_draws <- posterior(fit, pars = "sDelta")$sDelta

## ----posterior-draws-precompute, echo = FALSE---------------------------------
sDelta_draws <- SPDdraws$sDelta

## -----------------------------------------------------------------------------
score_summary <- data.frame(
  parameter = c("s[2]", "s[3]"),
  mean = c(
    mean(1 + sDelta_draws[, 1]),
    mean(1 + sDelta_draws[, 1] + sDelta_draws[, 2])
  ),
  lower_95 = c(
    unname(stats::quantile(1 + sDelta_draws[, 1], probs = 0.025)),
    unname(stats::quantile(1 + sDelta_draws[, 1] + sDelta_draws[, 2],
                           probs = 0.025))
  ),
  upper_95 = c(
    unname(stats::quantile(1 + sDelta_draws[, 1], probs = 0.975)),
    unname(stats::quantile(1 + sDelta_draws[, 1] + sDelta_draws[, 2],
                           probs = 0.975))
  )
)

## ----posterior-draw-summary, echo = FALSE-------------------------------------
knitr::kable(
  score_summary,
  digits = 3,
  caption = "Posterior summaries for the estimated score parameters"
)

## ----posterior-joint-plot, fig.width=10, fig.height=6.5, out.width='100%', fig.align='center'----
ggplot(data.frame(x = sDelta_draws[,1], 
                  y = sDelta_draws[,2]),
       aes(x=x, y=y)) +
  geom_point(alpha = 0.05, size = 0.5) +
  geom_density_2d(color = "red") +
  labs(x = expression(s[2] - 1),
       y = expression(s[3] - s[2])) +
  theme_minimal()

## ----posterior-s2-hist, fig.width=10, fig.height=5.5, out.width='100%', fig.align='center'----
hist(1+sDelta_draws[,1], xlab=expression(s[2]), main="")
abline(v=1+mean(sDelta_draws[,1]))

## ----posterior-s3-hist, fig.width=10, fig.height=5.5, out.width='100%', fig.align='center'----
hist(1+sDelta_draws[,1]+sDelta_draws[,2], 
     xlab=expression(s[3]), main="")
abline(v=1+mean(sDelta_draws[,1])+mean(sDelta_draws[,2]))

