diff --git a/NAMESPACE b/NAMESPACE index 4823d8df..1752f215 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -119,6 +119,10 @@ export(ppc_bars) export(ppc_bars_data) export(ppc_bars_grouped) export(ppc_boxplot) +export(ppc_calibration) +export(ppc_calibration_grouped) +export(ppc_calibration_overlay) +export(ppc_calibration_overlay_grouped) export(ppc_data) export(ppc_dens) export(ppc_dens_overlay) @@ -142,6 +146,8 @@ export(ppc_intervals_data) export(ppc_intervals_grouped) export(ppc_km_overlay) export(ppc_km_overlay_grouped) +export(ppc_loo_calibration) +export(ppc_loo_calibration_grouped) export(ppc_loo_intervals) export(ppc_loo_pit) export(ppc_loo_pit_data) diff --git a/R/ppc-calibration.R b/R/ppc-calibration.R new file mode 100644 index 00000000..0d42d15f --- /dev/null +++ b/R/ppc-calibration.R @@ -0,0 +1,391 @@ +#' PPC calibration +#' +#' Assess the calibration of the predictions, or predictive probabilites in relation to +#' binary observations. +#' See the **Plot Descriptions** section, below, for details. +#' +#' @name PPC-calibration +#' @family PPCs +#' +#' @template args-y-yrep +#' @template args-group +#' @param interval_type For `ppc_calibration()`, `ppc_calibration_grouped()`, +#' 'ppc_loo_calibration()', and ´ppc_loo_calibration_grouped()´, the type of +#' interval to compute. Options are '"consistency"' (default) for credible +#' intervals for the PAV-adjusted calibration curve of posterior predictive +#' sample, or `"confidence"` for the credible intervals of the calibration +#' curve of the observed binary events. +#' +#' @template return-ggplot-or-data +#' +#' @section Plot Descriptions: +#' \describe{ +#' \item{`ppc_calibration()`,`ppc_calibration_grouped()`}{ +#' PAV-adjusted calibration plots showing the relationship between the +#' predicted event probabilities and the conditional event probabilities. +#' The `interval_type` parameter controls whether confidence intervals, or +#' consistency intervals are computed.}, +#' \item{`ppc_calibration_overlay()`,`ppc_calibration_overlay_grouped()`}{ +#' Overlay plots showing posterior samples of PAV-adjusted calibration +#' curves. +#' }, +#' \item{`ppc_loo_calibration()`,`ppc_loo_calibration_grouped()`}{ +#' PAV-adjusted calibration plots to assess the calibration of the +#' leave-one-out (LOO) predictive probabilities. +#' } +#' } +#' +#' @examples +#' color_scheme_set("brightblue") +#' +#' # Make an example dataset of binary observations +#' ymin <- range(example_y_data(), example_yrep_draws())[1] +#' ymax <- range(example_y_data(), example_yrep_draws())[2] +#' y <- rbinom(length(example_y_data()), 1, (example_y_data() - ymin) / (ymax - ymin)) +#' prep <- (example_yrep_draws() - ymin) / (ymax - ymin) +#' +#' ppc_calibration_overlay(y, prep[1:50, ]) +#' +#' # Compare confidence vs consistency intervals +#' ppc_calibration(y, prep, interval_type = "confidence") +#' ppc_calibration(y, prep, interval_type = "consistency") +NULL + + +#' @rdname PPC-calibration +#' @export +ppc_calibration_overlay <- function( + y, prep, ..., linewidth = 0.25, alpha = 0.5) { + check_ignored_arguments(...) + data <- .ppc_calibration_data(y, prep) + ggplot(data) + + geom_abline(color = "black", linetype = 2) + + geom_line( + aes(value, cep, group = rep_id, color = "yrep"), + linewidth = linewidth, alpha = alpha + ) + + scale_color_ppc() + + bayesplot_theme_get() + + legend_none() + + coord_equal(xlim = c(0, 1), ylim = c(0, 1), expand = FALSE) + + xlab("Predicted probability") + + ylab("Conditional event probability") + + NULL +} + +#' @rdname PPC-calibration +#' @export +ppc_calibration_overlay_grouped <- function( + y, prep, group, ..., linewidth = 0.25, alpha = 0.7) { + check_ignored_arguments(...) + data <- .ppc_calibration_data(y, prep = prep, group = group) + ggplot(data) + + geom_abline(color = "black", linetype = 2) + + geom_line(aes(value, cep, group = rep_id, color = "yrep"), + linewidth = linewidth, alpha = alpha + ) + + facet_wrap(vars(group)) + + scale_color_ppc() + + bayesplot_theme_get() + + legend_none() + + coord_equal(xlim = c(0, 1), ylim = c(0, 1), expand = FALSE) + + xlab("Predicted probability") + + ylab("Conditional event probability") + + NULL +} + +#' @rdname PPC-calibration +#' @export +ppc_calibration <- function( + y, prep = NULL, yrep = NULL, prob = .95, interval_type = c("confidence", "consistency"), ..., + linewidth = 0.5, alpha = 0.7) { + check_ignored_arguments(...) + interval_type <- match.arg(interval_type) + data <- .ppc_calibration_data(y, prep, yrep, NULL, interval_type) %>% + group_by(idx) %>% + summarise( + value = mean(value), + lb = quantile(cep_intervals, .5 - .5 * prob), + ub = quantile(cep_intervals, .5 + .5 * prob), + cep = mean(cep) + ) + + ggplot(data) + + aes(value, cep) + + geom_abline(color = "black", linetype = 2) + + geom_ribbon(aes(ymin = lb, ymax = ub, fill = "yrep"), alpha = alpha) + + geom_line( + aes(color = "y"), + linewidth = linewidth + ) + + scale_color_ppc() + + scale_fill_ppc() + + bayesplot_theme_get() + + legend_none() + + coord_equal(xlim = c(0, 1), ylim = c(0, 1), expand = FALSE) + + xlab("Predicted probability") + + ylab("Conditional event probability") + + NULL +} + +#' @rdname PPC-calibration +#' @export +ppc_calibration_grouped <- function( + y, + prep = NULL, + yrep = NULL, + group, + prob = .95, + interval_type = c("confidence", "consistency"), + ..., + linewidth = 0.5, + alpha = 0.7) { + check_ignored_arguments(...) + interval_type <- match.arg(interval_type) + data <- .ppc_calibration_data(y, prep, yrep, group, interval_type) %>% + group_by(group, idx) %>% + summarise( + value = mean(value), + lb = quantile(cep_intervals, .5 - .5 * prob), + ub = quantile(cep_intervals, .5 + .5 * prob), + cep = mean(cep), + ) %>% + ungroup() + + ggplot(data) + + aes(value, cep) + + geom_abline(color = "black", linetype = 2) + + geom_ribbon(aes(ymin = lb, ymax = ub, fill = "yrep"), alpha = alpha) + + geom_line( + aes(color = "y"), + linewidth = linewidth + ) + + facet_wrap(vars(group)) + + scale_color_ppc() + + scale_fill_ppc() + + bayesplot_theme_get() + + legend_none() + + coord_equal(xlim = c(0, 1), ylim = c(0, 1), expand = FALSE) + + xlab("Predicted probability") + + ylab("Conditional event probability") + + NULL +} + +#' @rdname PPC-calibration +#' @export +ppc_loo_calibration <- function( + y, yrep, lw, prob = .95, interval_type = c("confidence", "consistency"), ..., linewidth = 0.5, alpha = 0.7) { + check_ignored_arguments(...) + # Create LOO-predictive samples using resampling + yrep_resampled <- .loo_resample_data(yrep, lw, psis_object = NULL) + ppc_calibration(y, yrep = yrep_resampled, prob = prob, interval_type = interval_type, ..., linewidth = linewidth, alpha = alpha) +} + +#' @rdname PPC-calibration +#' @export +ppc_loo_calibration_grouped <- function( + y, + yrep, + group, + lw, + prob = .95, + interval_type = c("confidence", "consistency"), + ..., + linewidth = 0.5, + alpha = 0.7) { + check_ignored_arguments(...) + # Create LOO-predictive samples using resampling + yrep_resampled <- .loo_resample_data(yrep, lw, psis_object = NULL) + ppc_calibration_grouped( + y, + yrep = yrep_resampled, + group = group, + prob = prob, + interval_type = interval_type, + ..., + linewidth = linewidth, + alpha = alpha + ) +} + +.ppc_calibration_data <- function( + y, + prep = NULL, + yrep = NULL, + group = NULL, + interval_type = c("confidence", "consistency")) { + y <- validate_y(y) + n_obs <- length(y) + interval_type <- match.arg(interval_type) + + # Determine if we're using prep (probabilities) or yrep (predictive samples) + if (!is.null(prep)) { + predictions <- validate_predictions(prep, n_obs) + if (any(prep > 1 | prep < 0)) { + stop( + "Values of ´prep´ should be predictive probabilities between 0 and 1." + ) + } + is_probability <- TRUE + } else if (!is.null(yrep)) { + predictions <- validate_predictions(yrep, n_obs) + is_probability <- FALSE + } else { + stop("Either 'prep' or 'yrep' must be provided.") + } + + if (!is.null(group)) { + group <- validate_group(group, n_obs) + } else { + group <- rep(1, n_obs) + } + + data <- .ppd_data(predictions, group = group) + + if (interval_type == "confidence") { + if (is_probability) { + # confidence interval from predicted probabilities: + # cep = cep_intervals = monotone(y[order(ppred)]) + # i.e. posterior of the calibration curve trajectory + # data %>% + # group_by(group, rep_id) %>% + # mutate( + # ord = order(value), + # value = value[ord], + # cep_intervals = .monotone(y[ord]), + # cep = cep_intervals, + # idx = seq_len(n()) + # ) %>% + data %>% + group_by(group) %>% + group_modify(.f = pava_transform(.x, .y, y, NULL)) %>% + ungroup() %>% + select(value, cep_intervals, cep, idx, group) + } else { + ppred <- colMeans(predictions) + data %>% + group_by(group, rep_id) %>% + mutate( + idx_boot = sample(y_id, n(), replace = TRUE), + ord = order(ppred[idx_boot]), + value = ppred[idx_boot][ord], + cep_intervals = .monotone(y[idx_boot][ord]), + cep = cep_intervals, + idx = seq_len(n()), + ) %>% + ungroup() %>% + select(value, cep_intervals, cep, idx, group) + } + } else { + # Consistency intervals + if (is_probability) { + # For prep (probabilities), generate predictive samples from binomial + data %>% + group_by(group, rep_id) %>% + mutate( + # Generate predictive samples from binomial distribution + ord = order(value), + value = value[ord], + cep_intervals = .monotone(rbinom(n(), 1, value)), + cep = .monotone(y[ord]), + idx = seq_len(n()) + ) %>% + ungroup() %>% + select(value, cep, cep_intervals, idx, group) + } else { + # For yrep (predictive samples), use column averages for ordering + ppred <- colMeans(predictions) + data %>% + group_by(group, rep_id) %>% + mutate( + # Use column averages for ordering when yrep is provided + ord = order(ppred[y_id]), + cep_intervals = .monotone(value[ord]), + value = ppred[y_id][ord], + cep = .monotone(y[y_id][ord]), + idx = seq_len(n()), + ) %>% + ungroup() %>% + select(value, cep_intervals, cep, idx, group) + } + } +} + +.loo_resample_data <- function(yrep, lw, psis_object) { + lw <- .get_lw(lw, psis_object) + stopifnot(identical(dim(yrep), dim(lw))) + + # Resample each column (observation) with its corresponding weights + n_obs <- ncol(yrep) + n_draws <- nrow(yrep) + + # Initialize resampled matrix + yrep_resampled <- matrix(NA, nrow = n_draws, ncol = n_obs) + + for (i in 1:n_obs) { + # Create draws object for this observation + draws_i <- posterior::as_draws_matrix(yrep[, i, drop = FALSE]) + + # Resample using the weights for this observation + weights_i <- lw[, i] + resampled_i <- posterior::resample_draws( + draws_i, + weights = weights_i, ndraws = n_draws + ) + + # Extract the resampled values + yrep_resampled[, i] <- as.numeric(resampled_i) + } + + # Add observation names if available + if (!is.null(colnames(yrep))) { + colnames(yrep_resampled) <- colnames(yrep) + } + + yrep_resampled +} + +.monotone <- function(y) { + if (requireNamespace("monotone", quietly = TRUE)) { + monotone <- monotone::monotone + } else { + monotone <- function(y) { + stats::isoreg(y)$yf + } + } + monotone(y) +} + +pava_transform <- function(.x, .y, y, yrep, interval_type) { + if (no_prob) { + probs <- .x %>% + group_by(y_id) %>% + summarise(p = mean(yrep)) %>% + arrange(y_id) %>% + pull(p) + ord <- order(probs) + data <- .x |> + group_by(rep_id) |> + mutate(ord = ord) |> + ungroup() + } else { + data <- data |> + group_by(rep_id) |> + mutate(ord = order(value)) |> + ungroup() + } + if (interval_type == "confidence") { + data %>% + group_by(rep_id) %>% + mutate( + cep_intervals = .monotone(y[y_id][ord_v]), + value = ifelse(is.null(yrep), value[ord_v], probs[ord], + cep = cep_interval, + idx = seq_len(n()), + ) %>% + ungroup() %>% + select(value, cep_intervals, cep, idx, rep_id, y_id) + } else { + data + } +} diff --git a/man/PPC-calibration.Rd b/man/PPC-calibration.Rd new file mode 100644 index 00000000..fbd9a0c3 --- /dev/null +++ b/man/PPC-calibration.Rd @@ -0,0 +1,115 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ppc-calibration.R +\name{PPC-calibration} +\alias{PPC-calibration} +\alias{ppc_calibration_overlay} +\alias{ppc_calibration_overlay_grouped} +\alias{ppc_calibration} +\alias{ppc_calibration_grouped} +\alias{ppc_loo_calibration} +\alias{ppc_loo_calibration_grouped} +\title{Assess the calibration of the predictive distributions \code{yrep} in relation to +the data `y'. +See the \strong{Plot Descriptions} section, below, for details.} +\usage{ +ppc_calibration_overlay(y, prep, ..., linewidth = 0.25, alpha = 0.5) + +ppc_calibration_overlay_grouped( + y, + prep, + group, + ..., + linewidth = 0.25, + alpha = 0.7 +) + +ppc_calibration(y, prep, prob = 0.95, ..., linewidth = 0.5, alpha = 0.7) + +ppc_calibration_grouped( + y, + prep, + group, + prob = 0.95, + ..., + linewidth = 0.5, + alpha = 0.7 +) + +ppc_loo_calibration(y, yrep, lw, ..., linewidth = 0.25, alpha = 0.7) + +ppc_loo_calibration_grouped( + y, + yrep, + group, + lw, + ..., + linewidth = 0.25, + alpha = 0.7 +) +} +\arguments{ +\item{y}{A vector of observations. See \strong{Details}.} + +\item{group}{A grouping variable of the same length as \code{y}. +Will be coerced to \link[base:factor]{factor} if not already a factor. +Each value in \code{group} is interpreted as the group level pertaining +to the corresponding observation.} + +\item{yrep}{An \code{S} by \code{N} matrix of draws from the posterior (or prior) +predictive distribution. The number of rows, \code{S}, is the size of the +posterior (or prior) sample used to generate \code{yrep}. The number of columns, +\code{N} is the number of predicted observations (\code{length(y)}). The columns of +\code{yrep} should be in the same order as the data points in \code{y} for the plots +to make sense. See the \strong{Details} and \strong{Plot Descriptions} sections for +additional advice specific to particular plots.} +} +\value{ +The plotting functions return a ggplot object that can be further +customized using the \strong{ggplot2} package. The functions with suffix +\verb{_data()} return the data that would have been drawn by the plotting +function. +} +\description{ +Assess the calibration of the predictive distributions \code{yrep} in relation to +the data `y'. +See the \strong{Plot Descriptions} section, below, for details. +} +\section{Plot Descriptions}{ + +\describe{ +\item{\code{ppc_calibration()},\code{ppc_calibration_grouped()}}{ + +}, +\item{\code{ppc_calibration_overlay()},\code{ppc_calibration_overlay_grouped()}}{ + +}, +\item{\code{ppc_loo_calibration()},\code{ppc_loo_calibration_grouped()}}{ + +} +} +} + +\examples{ +color_scheme_set("brightblue") + +# Make an example dataset of binary observations +ymin <- range(example_y_data(), example_yrep_draws())[1] +ymax <- range(example_y_data(), example_yrep_draws())[2] +y <- rbinom(length(example_y_data()), 1, (example_y_data() - ymin) / (ymax - ymin)) +prep <- (example_yrep_draws() - ymin) / (ymax - ymin) + +ppc_calibration_overlay(y, prep[1:50, ]) +} +\seealso{ +Other PPCs: +\code{\link{PPC-censoring}}, +\code{\link{PPC-discrete}}, +\code{\link{PPC-distributions}}, +\code{\link{PPC-errors}}, +\code{\link{PPC-intervals}}, +\code{\link{PPC-loo}}, +\code{\link{PPC-overview}}, +\code{\link{PPC-scatterplots}}, +\code{\link{PPC-test-statistics}} +} +\concept{PPCs} diff --git a/man/PPC-censoring.Rd b/man/PPC-censoring.Rd index ddf011b4..30a7b78d 100644 --- a/man/PPC-censoring.Rd +++ b/man/PPC-censoring.Rd @@ -150,6 +150,7 @@ doi:10.1080/01621459.1958.10501452. } \seealso{ Other PPCs: +\code{\link{PPC-calibration}}, \code{\link{PPC-discrete}}, \code{\link{PPC-distributions}}, \code{\link{PPC-errors}}, diff --git a/man/PPC-discrete.Rd b/man/PPC-discrete.Rd index 434ba7bd..4f2fcabd 100644 --- a/man/PPC-discrete.Rd +++ b/man/PPC-discrete.Rd @@ -216,6 +216,7 @@ Visualizing count data regressions using rootograms. } \seealso{ Other PPCs: +\code{\link{PPC-calibration}}, \code{\link{PPC-censoring}}, \code{\link{PPC-distributions}}, \code{\link{PPC-errors}}, diff --git a/man/PPC-distributions.Rd b/man/PPC-distributions.Rd index 628a3ad5..991a9742 100644 --- a/man/PPC-distributions.Rd +++ b/man/PPC-distributions.Rd @@ -389,6 +389,7 @@ Press, London, third edition. (Ch. 6) } \seealso{ Other PPCs: +\code{\link{PPC-calibration}}, \code{\link{PPC-censoring}}, \code{\link{PPC-discrete}}, \code{\link{PPC-errors}}, diff --git a/man/PPC-errors.Rd b/man/PPC-errors.Rd index 047590bf..c3196f26 100644 --- a/man/PPC-errors.Rd +++ b/man/PPC-errors.Rd @@ -227,6 +227,7 @@ Press, London, third edition. (Ch. 6) } \seealso{ Other PPCs: +\code{\link{PPC-calibration}}, \code{\link{PPC-censoring}}, \code{\link{PPC-discrete}}, \code{\link{PPC-distributions}}, diff --git a/man/PPC-intervals.Rd b/man/PPC-intervals.Rd index ddac2ebc..369ab029 100644 --- a/man/PPC-intervals.Rd +++ b/man/PPC-intervals.Rd @@ -240,6 +240,7 @@ Press, London, third edition. (Ch. 6) } \seealso{ Other PPCs: +\code{\link{PPC-calibration}}, \code{\link{PPC-censoring}}, \code{\link{PPC-discrete}}, \code{\link{PPC-distributions}}, diff --git a/man/PPC-loo.Rd b/man/PPC-loo.Rd index 10f29d8c..3113fab8 100644 --- a/man/PPC-loo.Rd +++ b/man/PPC-loo.Rd @@ -349,6 +349,7 @@ https://www.jstor.org/stable/2986005. } \seealso{ Other PPCs: +\code{\link{PPC-calibration}}, \code{\link{PPC-censoring}}, \code{\link{PPC-discrete}}, \code{\link{PPC-distributions}}, diff --git a/man/PPC-overview.Rd b/man/PPC-overview.Rd index 16d3f0ff..f58465d2 100644 --- a/man/PPC-overview.Rd +++ b/man/PPC-overview.Rd @@ -124,6 +124,7 @@ Press, London, third edition. (Ch. 6) } \seealso{ Other PPCs: +\code{\link{PPC-calibration}}, \code{\link{PPC-censoring}}, \code{\link{PPC-discrete}}, \code{\link{PPC-distributions}}, diff --git a/man/PPC-scatterplots.Rd b/man/PPC-scatterplots.Rd index 2c22bbd3..2e274273 100644 --- a/man/PPC-scatterplots.Rd +++ b/man/PPC-scatterplots.Rd @@ -155,6 +155,7 @@ Press, London, third edition. (Ch. 6) } \seealso{ Other PPCs: +\code{\link{PPC-calibration}}, \code{\link{PPC-censoring}}, \code{\link{PPC-discrete}}, \code{\link{PPC-distributions}}, diff --git a/man/PPC-test-statistics.Rd b/man/PPC-test-statistics.Rd index 83883979..c9368794 100644 --- a/man/PPC-test-statistics.Rd +++ b/man/PPC-test-statistics.Rd @@ -213,6 +213,7 @@ Press, London, third edition. (Ch. 6) } \seealso{ Other PPCs: +\code{\link{PPC-calibration}}, \code{\link{PPC-censoring}}, \code{\link{PPC-discrete}}, \code{\link{PPC-distributions}}, diff --git a/tests/testthat/test-ppc-calibration.R b/tests/testthat/test-ppc-calibration.R new file mode 100644 index 00000000..1858ace5 --- /dev/null +++ b/tests/testthat/test-ppc-calibration.R @@ -0,0 +1,257 @@ +library(bayesplot) +context("PPC: calibration") + +if (!exists("expect_gg")) expect_gg <- bayesplot:::expect_gg + +# Create binary test data for calibration plots +set.seed(1234) +n_obs <- 100 +n_draws <- 50 +p_true <- runif(n_obs) +calib_y <- rbinom(n_obs, 1, p_true) +calib_prep <- matrix(pmin(1,pmax(0, p_true + rnorm(n_obs * n_draws, 0, .1))), nrow = n_draws, ncol = n_obs) +calib_yrep <- t(apply(calib_prep, 1, rbinom, n = n_obs, size = 1)) +calib_group <- gl(2, n_obs / 2, labels = c("A", "B")) +calib_lw <- matrix(runif(n_obs * n_draws), ncol = n_obs, nrow = n_draws) + +test_that("ppc_calibration_overlay returns a ggplot object", { + expect_gg(ppc_calibration_overlay(calib_y, calib_prep)) + expect_gg(ppc_calibration_overlay(calib_y, calib_prep[1:5, ], + linewidth = 0.5, alpha = 0.3)) +}) + +test_that("ppc_calibration_overlay_grouped returns a ggplot object", { + expect_gg(ppc_calibration_overlay_grouped(calib_y, calib_prep, calib_group)) + expect_gg(ppc_calibration_overlay_grouped(calib_y, calib_prep[1:3, ], calib_group, + linewidth = 0.5, alpha = 0.3)) +}) + +test_that("ppc_calibration returns a ggplot object", { + expect_gg(ppc_calibration(calib_y, calib_prep)) + expect_gg(ppc_calibration(calib_y, calib_yrep)) + expect_gg(ppc_calibration(calib_y, calib_prep, prob = 0.9, + linewidth = 0.8, alpha = 0.5)) + # Test new interval_type parameter + expect_gg(ppc_calibration(calib_y, calib_prep, interval_type = "confidence")) + expect_gg(ppc_calibration(calib_y, calib_prep, interval_type = "consistency")) +}) + +test_that("ppc_calibration_grouped returns a ggplot object", { + expect_gg(ppc_calibration_grouped(calib_y, calib_prep, calib_group)) + expect_gg(ppc_calibration_grouped(calib_y, calib_yrep, calib_group)) + expect_gg(ppc_calibration_grouped(calib_y, calib_prep[1:5, ], calib_group, + prob = 0.9, linewidth = 0.8, alpha = 0.5)) + # Test new interval_type parameter + expect_gg(ppc_calibration_grouped(calib_y, calib_prep, calib_group, interval_type = "confidence")) + expect_gg(ppc_calibration_grouped(calib_y, calib_prep, calib_group, interval_type = "consistency")) +}) + +test_that("ppc_loo_calibration returns a ggplot object", { + # Note: This function now returns interval plots instead of overlay plots + # Testing basic functionality for now + expect_gg(ppc_loo_calibration(calib_y, calib_yrep, calib_prep)) + expect_gg(ppc_loo_calibration(calib_y, calib_yrep[1:3, ], calib_prep[1:3, ], + prob = 0.9, linewidth = 0.8, alpha = 0.5)) +}) + +test_that("ppc_loo_calibration_grouped returns a ggplot object", { + # Note: This function now returns interval plots instead of overlay plots + # Testing basic functionality for now + expect_gg(ppc_loo_calibration_grouped(calib_y, calib_prep, calib_group, calib_lw)) + expect_gg(ppc_loo_calibration_grouped(calib_y, calib_prep[1:3, ], calib_group, calib_lw[1:3, ], + prob = 0.9, linewidth = 0.8, alpha = 0.5)) +}) + +test_that("calibration functions handle edge cases", { + # Single observation + expect_gg(ppc_calibration_overlay(1, matrix(0.5, nrow = 1, ncol = 1))) + expect_gg(ppc_calibration(1, matrix(0.5, nrow = 1, ncol = 1))) + + # Single draw + expect_gg(ppc_calibration_overlay(calib_y, calib_prep[1, , drop = FALSE])) + expect_gg(ppc_calibration(calib_y, calib_prep[1, , drop = FALSE])) + + # All zeros or ones + expect_gg(ppc_calibration_overlay(rep(0, 10), matrix(0.1, nrow = 5, ncol = 10))) + expect_gg(ppc_calibration_overlay(rep(1, 10), matrix(0.9, nrow = 5, ncol = 10))) +}) + +test_that("calibration functions validate inputs correctly", { + # Invalid probabilities (outside [0,1]) + expect_error(ppc_calibration_overlay(calib_y, matrix(1.5, nrow = 5, ncol = 50)), + "Values of ´prep´ should be predictive probabilities between 0 and 1") + expect_error(ppc_calibration_overlay(calib_y, matrix(-0.1, nrow = 5, ncol = 50)), + "Values of ´prep´ should be predictive probabilities between 0 and 1") + + # Mismatched dimensions + expect_error(ppc_calibration_overlay(calib_y, calib_prep[, 1:25]), + "ncol(yrep) must be equal to length(y).", fixed=TRUE) + + # Invalid group + expect_error(ppc_calibration_overlay_grouped(calib_y, calib_prep, calib_group[1:25]), + "length(group) must be equal to the number of observations.", + fixed=TRUE) + + # Invalid interval_type + expect_error(ppc_calibration(calib_y, calib_prep, interval_type = "invalid"), + "should be one of") +}) + +test_that("calibration functions work with different group types", { + # Numeric groups + expect_gg(ppc_calibration_overlay_grouped(calib_y, calib_prep, as.numeric(calib_group))) + expect_gg(ppc_calibration_grouped(calib_y, calib_prep, as.numeric(calib_group))) + + # Integer groups + expect_gg(ppc_calibration_overlay_grouped(calib_y, calib_prep, as.integer(calib_group))) + expect_gg(ppc_calibration_grouped(calib_y, calib_prep, as.integer(calib_group))) + + # Character groups + expect_gg(ppc_calibration_overlay_grouped(calib_y, calib_prep, as.character(calib_group))) + expect_gg(ppc_calibration_grouped(calib_y, calib_prep, as.character(calib_group))) +}) + +# Visual tests ----------------------------------------------------------------- + +test_that("ppc_calibration_overlay renders correctly", { + testthat::skip_on_cran() + testthat::skip_if_not_installed("vdiffr") + skip_on_r_oldrel() + + p_base <- ppc_calibration_overlay(calib_y, calib_prep) + vdiffr::expect_doppelganger("ppc_calibration_overlay (default)", p_base) + + p_custom <- ppc_calibration_overlay( + calib_y, + calib_prep, + prob = .99, + linewidth = 0.5, + alpha = 0.3 + ) + vdiffr::expect_doppelganger("ppc_calibration_overlay (custom)", p_custom) +}) + +test_that("ppc_calibration_overlay_grouped renders correctly", { + testthat::skip_on_cran() + testthat::skip_if_not_installed("vdiffr") + skip_on_r_oldrel() + + p_base <- ppc_calibration_overlay_grouped(calib_y, calib_prep, calib_group) + vdiffr::expect_doppelganger("ppc_calibration_overlay_grouped (default)", p_base) + + p_custom <- ppc_calibration_overlay_grouped( + calib_y, + calib_prep, + calib_group, + prob = .99, + linewidth = 0.5, + alpha = 0.3 + ) + vdiffr::expect_doppelganger("ppc_calibration_overlay_grouped (custom)", p_custom) +}) + +test_that("ppc_calibration renders correctly", { + testthat::skip_on_cran() + testthat::skip_if_not_installed("vdiffr") + skip_on_r_oldrel() + + p_base <- ppc_calibration(calib_y, calib_prep) + vdiffr::expect_doppelganger("ppc_calibration (default)", p_base) + + p_custom <- ppc_calibration( + calib_y, + calib_prep, + prob = 0.9, + linewidth = 0.8, + alpha = 0.5 + ) + vdiffr::expect_doppelganger("ppc_calibration (custom)", p_custom) + + # Test interval_type variants + p_confidence <- ppc_calibration( + calib_y, + calib_prep, + interval_type = "confidence" + ) + vdiffr::expect_doppelganger("ppc_calibration (confidence)", p_confidence) + + p_consistency <- ppc_calibration( + calib_y, + calib_prep, + interval_type = "consistency" + ) + vdiffr::expect_doppelganger("ppc_calibration (consistency)", p_consistency) +}) + +test_that("ppc_calibration_grouped renders correctly", { + testthat::skip_on_cran() + testthat::skip_if_not_installed("vdiffr") + skip_on_r_oldrel() + + p_base <- ppc_calibration_grouped(calib_y, calib_prep, calib_group) + vdiffr::expect_doppelganger("ppc_calibration_grouped (default)", p_base) + + p_custom <- ppc_calibration_grouped( + calib_y, + calib_prep, + calib_group, + prob = 0.9, + linewidth = 0.8, + alpha = 0.5 + ) + vdiffr::expect_doppelganger("ppc_calibration_grouped (custom)", p_custom) + + # Test interval_type variants + p_confidence <- ppc_calibration_grouped( + calib_y, + calib_prep, + calib_group, + interval_type = "confidence" + ) + vdiffr::expect_doppelganger("ppc_calibration_grouped (confidence)", p_confidence) + + p_consistency <- ppc_calibration_grouped( + calib_y, + calib_prep, + calib_group, + interval_type = "consistency" + ) + vdiffr::expect_doppelganger("ppc_calibration_grouped (consistency)", p_consistency) +}) + +test_that("ppc_loo_calibration renders correctly", { + testthat::skip_on_cran() + testthat::skip_if_not_installed("vdiffr") + skip_on_r_oldrel() + + p_base <- ppc_loo_calibration(calib_y, calib_yrep, calib_lw) + vdiffr::expect_doppelganger("ppc_loo_calibration (default)", p_base) + + p_custom <- ppc_loo_calibration( + calib_y, + calib_yrep, + calib_lw, + linewidth = 0.5, + alpha = 0.3 + ) + vdiffr::expect_doppelganger("ppc_loo_calibration (custom)", p_custom) +}) + +test_that("ppc_loo_calibration_grouped renders correctly", { + testthat::skip_on_cran() + testthat::skip_if_not_installed("vdiffr") + skip_on_r_oldrel() + + p_base <- ppc_loo_calibration_grouped(calib_y, calib_yrep, calib_group, calib_lw) + vdiffr::expect_doppelganger("ppc_loo_calibration_grouped (default)", p_base) + + p_custom <- ppc_loo_calibration_grouped( + calib_y, + calib_yrep, + calib_group, + calib_lw, + linewidth = 0.5, + alpha = 0.3 + ) + vdiffr::expect_doppelganger("ppc_loo_calibration_grouped (custom)", p_custom) +})