diff --git a/NAMESPACE b/NAMESPACE
index 4823d8df..1752f215 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -119,6 +119,10 @@ export(ppc_bars)
 export(ppc_bars_data)
 export(ppc_bars_grouped)
 export(ppc_boxplot)
+export(ppc_calibration)
+export(ppc_calibration_grouped)
+export(ppc_calibration_overlay)
+export(ppc_calibration_overlay_grouped)
 export(ppc_data)
 export(ppc_dens)
 export(ppc_dens_overlay)
@@ -142,6 +146,8 @@ export(ppc_intervals_data)
 export(ppc_intervals_grouped)
 export(ppc_km_overlay)
 export(ppc_km_overlay_grouped)
+export(ppc_loo_calibration)
+export(ppc_loo_calibration_grouped)
 export(ppc_loo_intervals)
 export(ppc_loo_pit)
 export(ppc_loo_pit_data)
diff --git a/R/ppc-calibration.R b/R/ppc-calibration.R
new file mode 100644
index 00000000..0d42d15f
--- /dev/null
+++ b/R/ppc-calibration.R
@@ -0,0 +1,391 @@
+#' PPC calibration
+#'
+#' Assess the calibration of the predictions, or predictive probabilites in relation to
+#' binary observations.
+#' See the **Plot Descriptions** section, below, for details.
+#'
+#' @name PPC-calibration
+#' @family PPCs
+#'
+#' @template args-y-yrep
+#' @template args-group
+#' @param interval_type For `ppc_calibration()`, `ppc_calibration_grouped()`,
+#'   'ppc_loo_calibration()', and ´ppc_loo_calibration_grouped()´, the type of
+#'    interval to compute. Options are '"consistency"' (default) for credible
+#'    intervals for the PAV-adjusted calibration curve of posterior predictive
+#'    sample, or `"confidence"` for the credible intervals of the calibration
+#'    curve of the observed binary events.
+#'
+#' @template return-ggplot-or-data
+#'
+#' @section Plot Descriptions:
+#' \describe{
+#'   \item{`ppc_calibration()`,`ppc_calibration_grouped()`}{
+#'   PAV-adjusted calibration plots showing the relationship between the
+#'   predicted event probabilities and the conditional event probabilities.
+#'   The `interval_type` parameter controls whether confidence intervals, or
+#'   consistency intervals are computed.},
+#'   \item{`ppc_calibration_overlay()`,`ppc_calibration_overlay_grouped()`}{
+#'   Overlay plots showing posterior samples of PAV-adjusted calibration
+#'   curves.
+#'   },
+#'   \item{`ppc_loo_calibration()`,`ppc_loo_calibration_grouped()`}{
+#'   PAV-adjusted calibration plots to assess the calibration of the
+#'   leave-one-out (LOO) predictive probabilities.
+#'   }
+#' }
+#'
+#' @examples
+#' color_scheme_set("brightblue")
+#'
+#' # Make an example dataset of binary observations
+#' ymin <- range(example_y_data(), example_yrep_draws())[1]
+#' ymax <- range(example_y_data(), example_yrep_draws())[2]
+#' y <- rbinom(length(example_y_data()), 1, (example_y_data() - ymin) / (ymax - ymin))
+#' prep <- (example_yrep_draws() - ymin) / (ymax - ymin)
+#'
+#' ppc_calibration_overlay(y, prep[1:50, ])
+#'
+#' # Compare confidence vs consistency intervals
+#' ppc_calibration(y, prep, interval_type = "confidence")
+#' ppc_calibration(y, prep, interval_type = "consistency")
+NULL
+
+
+#' @rdname PPC-calibration
+#' @export
+ppc_calibration_overlay <- function(
+    y, prep, ..., linewidth = 0.25, alpha = 0.5) {
+  check_ignored_arguments(...)
+  data <- .ppc_calibration_data(y, prep)
+  ggplot(data) +
+    geom_abline(color = "black", linetype = 2) +
+    geom_line(
+      aes(value, cep, group = rep_id, color = "yrep"),
+      linewidth = linewidth, alpha = alpha
+    ) +
+    scale_color_ppc() +
+    bayesplot_theme_get() +
+    legend_none() +
+    coord_equal(xlim = c(0, 1), ylim = c(0, 1), expand = FALSE) +
+    xlab("Predicted probability") +
+    ylab("Conditional event probability") +
+    NULL
+}
+
+#' @rdname PPC-calibration
+#' @export
+ppc_calibration_overlay_grouped <- function(
+    y, prep, group, ..., linewidth = 0.25, alpha = 0.7) {
+  check_ignored_arguments(...)
+  data <- .ppc_calibration_data(y, prep = prep, group = group)
+  ggplot(data) +
+    geom_abline(color = "black", linetype = 2) +
+    geom_line(aes(value, cep, group = rep_id, color = "yrep"),
+      linewidth = linewidth, alpha = alpha
+    ) +
+    facet_wrap(vars(group)) +
+    scale_color_ppc() +
+    bayesplot_theme_get() +
+    legend_none() +
+    coord_equal(xlim = c(0, 1), ylim = c(0, 1), expand = FALSE) +
+    xlab("Predicted probability") +
+    ylab("Conditional event probability") +
+    NULL
+}
+
+#' @rdname PPC-calibration
+#' @export
+ppc_calibration <- function(
+    y, prep = NULL, yrep = NULL, prob = .95, interval_type = c("confidence", "consistency"), ...,
+    linewidth = 0.5, alpha = 0.7) {
+  check_ignored_arguments(...)
+  interval_type <- match.arg(interval_type)
+  data <- .ppc_calibration_data(y, prep, yrep, NULL, interval_type) %>%
+    group_by(idx) %>%
+    summarise(
+      value = mean(value),
+      lb = quantile(cep_intervals, .5 - .5 * prob),
+      ub = quantile(cep_intervals, .5 + .5 * prob),
+      cep = mean(cep)
+    )
+
+  ggplot(data) +
+    aes(value, cep) +
+    geom_abline(color = "black", linetype = 2) +
+    geom_ribbon(aes(ymin = lb, ymax = ub, fill = "yrep"), alpha = alpha) +
+    geom_line(
+      aes(color = "y"),
+      linewidth = linewidth
+    ) +
+    scale_color_ppc() +
+    scale_fill_ppc() +
+    bayesplot_theme_get() +
+    legend_none() +
+    coord_equal(xlim = c(0, 1), ylim = c(0, 1), expand = FALSE) +
+    xlab("Predicted probability") +
+    ylab("Conditional event probability") +
+    NULL
+}
+
+#' @rdname PPC-calibration
+#' @export
+ppc_calibration_grouped <- function(
+    y,
+    prep = NULL,
+    yrep = NULL,
+    group,
+    prob = .95,
+    interval_type = c("confidence", "consistency"),
+    ...,
+    linewidth = 0.5,
+    alpha = 0.7) {
+  check_ignored_arguments(...)
+  interval_type <- match.arg(interval_type)
+  data <- .ppc_calibration_data(y, prep, yrep, group, interval_type) %>%
+    group_by(group, idx) %>%
+    summarise(
+      value = mean(value),
+      lb = quantile(cep_intervals, .5 - .5 * prob),
+      ub = quantile(cep_intervals, .5 + .5 * prob),
+      cep = mean(cep),
+    ) %>%
+    ungroup()
+
+  ggplot(data) +
+    aes(value, cep) +
+    geom_abline(color = "black", linetype = 2) +
+    geom_ribbon(aes(ymin = lb, ymax = ub, fill = "yrep"), alpha = alpha) +
+    geom_line(
+      aes(color = "y"),
+      linewidth = linewidth
+    ) +
+    facet_wrap(vars(group)) +
+    scale_color_ppc() +
+    scale_fill_ppc() +
+    bayesplot_theme_get() +
+    legend_none() +
+    coord_equal(xlim = c(0, 1), ylim = c(0, 1), expand = FALSE) +
+    xlab("Predicted probability") +
+    ylab("Conditional event probability") +
+    NULL
+}
+
+#' @rdname PPC-calibration
+#' @export
+ppc_loo_calibration <- function(
+    y, yrep, lw, prob = .95, interval_type = c("confidence", "consistency"), ..., linewidth = 0.5, alpha = 0.7) {
+  check_ignored_arguments(...)
+  # Create LOO-predictive samples using resampling
+  yrep_resampled <- .loo_resample_data(yrep, lw, psis_object = NULL)
+  ppc_calibration(y, yrep = yrep_resampled, prob = prob, interval_type = interval_type, ..., linewidth = linewidth, alpha = alpha)
+}
+
+#' @rdname PPC-calibration
+#' @export
+ppc_loo_calibration_grouped <- function(
+    y,
+    yrep,
+    group,
+    lw,
+    prob = .95,
+    interval_type = c("confidence", "consistency"),
+    ...,
+    linewidth = 0.5,
+    alpha = 0.7) {
+  check_ignored_arguments(...)
+  # Create LOO-predictive samples using resampling
+  yrep_resampled <- .loo_resample_data(yrep, lw, psis_object = NULL)
+  ppc_calibration_grouped(
+    y,
+    yrep = yrep_resampled,
+    group = group,
+    prob = prob,
+    interval_type = interval_type,
+    ...,
+    linewidth = linewidth,
+    alpha = alpha
+  )
+}
+
+.ppc_calibration_data <- function(
+    y,
+    prep = NULL,
+    yrep = NULL,
+    group = NULL,
+    interval_type = c("confidence", "consistency")) {
+  y <- validate_y(y)
+  n_obs <- length(y)
+  interval_type <- match.arg(interval_type)
+
+  # Determine if we're using prep (probabilities) or yrep (predictive samples)
+  if (!is.null(prep)) {
+    predictions <- validate_predictions(prep, n_obs)
+    if (any(prep > 1 | prep < 0)) {
+      stop(
+        "Values of ´prep´ should be predictive probabilities between 0 and 1."
+      )
+    }
+    is_probability <- TRUE
+  } else if (!is.null(yrep)) {
+    predictions <- validate_predictions(yrep, n_obs)
+    is_probability <- FALSE
+  } else {
+    stop("Either 'prep' or 'yrep' must be provided.")
+  }
+
+  if (!is.null(group)) {
+    group <- validate_group(group, n_obs)
+  } else {
+    group <- rep(1, n_obs)
+  }
+
+  data <- .ppd_data(predictions, group = group)
+
+  if (interval_type == "confidence") {
+    if (is_probability) {
+      # confidence interval from predicted probabilities:
+      # cep = cep_intervals = monotone(y[order(ppred)])
+      # i.e. posterior of the calibration curve trajectory
+      # data %>%
+      #   group_by(group, rep_id) %>%
+      #   mutate(
+      #     ord = order(value),
+      #     value = value[ord],
+      #     cep_intervals = .monotone(y[ord]),
+      #     cep = cep_intervals,
+      #     idx = seq_len(n())
+      #   ) %>%
+       data %>%
+        group_by(group) %>%
+        group_modify(.f = pava_transform(.x, .y, y, NULL)) %>%
+        ungroup() %>%
+        select(value, cep_intervals, cep, idx, group)
+    } else {
+      ppred <- colMeans(predictions)
+      data %>%
+        group_by(group, rep_id) %>%
+        mutate(
+          idx_boot = sample(y_id, n(), replace = TRUE),
+          ord = order(ppred[idx_boot]),
+          value = ppred[idx_boot][ord],
+          cep_intervals = .monotone(y[idx_boot][ord]),
+          cep = cep_intervals,
+          idx = seq_len(n()),
+        ) %>%
+        ungroup() %>%
+        select(value, cep_intervals, cep, idx, group)
+    }
+  } else {
+    # Consistency intervals
+    if (is_probability) {
+      # For prep (probabilities), generate predictive samples from binomial
+      data %>%
+        group_by(group, rep_id) %>%
+        mutate(
+          # Generate predictive samples from binomial distribution
+          ord = order(value),
+          value = value[ord],
+          cep_intervals = .monotone(rbinom(n(), 1, value)),
+          cep = .monotone(y[ord]),
+          idx = seq_len(n())
+        ) %>%
+        ungroup() %>%
+        select(value, cep, cep_intervals, idx, group)
+    } else {
+      # For yrep (predictive samples), use column averages for ordering
+      ppred <- colMeans(predictions)
+      data %>%
+        group_by(group, rep_id) %>%
+        mutate(
+          # Use column averages for ordering when yrep is provided
+          ord = order(ppred[y_id]),
+          cep_intervals = .monotone(value[ord]),
+          value = ppred[y_id][ord],
+          cep = .monotone(y[y_id][ord]),
+          idx = seq_len(n()),
+        ) %>%
+        ungroup() %>%
+        select(value, cep_intervals, cep, idx, group)
+    }
+  }
+}
+
+.loo_resample_data <- function(yrep, lw, psis_object) {
+  lw <- .get_lw(lw, psis_object)
+  stopifnot(identical(dim(yrep), dim(lw)))
+
+  # Resample each column (observation) with its corresponding weights
+  n_obs <- ncol(yrep)
+  n_draws <- nrow(yrep)
+
+  # Initialize resampled matrix
+  yrep_resampled <- matrix(NA, nrow = n_draws, ncol = n_obs)
+
+  for (i in 1:n_obs) {
+    # Create draws object for this observation
+    draws_i <- posterior::as_draws_matrix(yrep[, i, drop = FALSE])
+
+    # Resample using the weights for this observation
+    weights_i <- lw[, i]
+    resampled_i <- posterior::resample_draws(
+      draws_i,
+      weights = weights_i, ndraws = n_draws
+    )
+
+    # Extract the resampled values
+    yrep_resampled[, i] <- as.numeric(resampled_i)
+  }
+
+  # Add observation names if available
+  if (!is.null(colnames(yrep))) {
+    colnames(yrep_resampled) <- colnames(yrep)
+  }
+
+  yrep_resampled
+}
+
+.monotone <- function(y) {
+  if (requireNamespace("monotone", quietly = TRUE)) {
+    monotone <- monotone::monotone
+  } else {
+    monotone <- function(y) {
+      stats::isoreg(y)$yf
+    }
+  }
+  monotone(y)
+}
+
+pava_transform <- function(.x, .y, y, yrep, interval_type) {
+  if (no_prob) {
+    probs <- .x %>%
+      group_by(y_id) %>%
+      summarise(p = mean(yrep)) %>%
+      arrange(y_id) %>%
+      pull(p)
+    ord <- order(probs)
+    data <- .x |>
+      group_by(rep_id) |>
+      mutate(ord = ord) |>
+      ungroup()
+  } else {
+    data <- data |>
+      group_by(rep_id) |>
+      mutate(ord = order(value)) |>
+      ungroup()
+  }
+  if (interval_type == "confidence") {
+    data %>%
+      group_by(rep_id) %>%
+      mutate(
+        cep_intervals = .monotone(y[y_id][ord_v]),
+        value = ifelse(is.null(yrep), value[ord_v], probs[ord],
+        cep = cep_interval,
+        idx = seq_len(n()),
+      ) %>%
+      ungroup() %>%
+      select(value, cep_intervals, cep, idx, rep_id, y_id)
+  } else {
+    data 
+  }
+}
diff --git a/man/PPC-calibration.Rd b/man/PPC-calibration.Rd
new file mode 100644
index 00000000..fbd9a0c3
--- /dev/null
+++ b/man/PPC-calibration.Rd
@@ -0,0 +1,115 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ppc-calibration.R
+\name{PPC-calibration}
+\alias{PPC-calibration}
+\alias{ppc_calibration_overlay}
+\alias{ppc_calibration_overlay_grouped}
+\alias{ppc_calibration}
+\alias{ppc_calibration_grouped}
+\alias{ppc_loo_calibration}
+\alias{ppc_loo_calibration_grouped}
+\title{Assess the calibration of the predictive distributions \code{yrep} in relation to
+the data `y'.
+See the \strong{Plot Descriptions} section, below, for details.}
+\usage{
+ppc_calibration_overlay(y, prep, ..., linewidth = 0.25, alpha = 0.5)
+
+ppc_calibration_overlay_grouped(
+  y,
+  prep,
+  group,
+  ...,
+  linewidth = 0.25,
+  alpha = 0.7
+)
+
+ppc_calibration(y, prep, prob = 0.95, ..., linewidth = 0.5, alpha = 0.7)
+
+ppc_calibration_grouped(
+  y,
+  prep,
+  group,
+  prob = 0.95,
+  ...,
+  linewidth = 0.5,
+  alpha = 0.7
+)
+
+ppc_loo_calibration(y, yrep, lw, ..., linewidth = 0.25, alpha = 0.7)
+
+ppc_loo_calibration_grouped(
+  y,
+  yrep,
+  group,
+  lw,
+  ...,
+  linewidth = 0.25,
+  alpha = 0.7
+)
+}
+\arguments{
+\item{y}{A vector of observations. See \strong{Details}.}
+
+\item{group}{A grouping variable of the same length as \code{y}.
+Will be coerced to \link[base:factor]{factor} if not already a factor.
+Each value in \code{group} is interpreted as the group level pertaining
+to the corresponding observation.}
+
+\item{yrep}{An \code{S} by \code{N} matrix of draws from the posterior (or prior)
+predictive distribution. The number of rows, \code{S}, is the size of the
+posterior (or prior) sample used to generate \code{yrep}. The number of columns,
+\code{N} is the number of predicted observations (\code{length(y)}). The columns of
+\code{yrep} should be in the same order as the data points in \code{y} for the plots
+to make sense. See the \strong{Details} and \strong{Plot Descriptions} sections for
+additional advice specific to particular plots.}
+}
+\value{
+The plotting functions return a ggplot object that can be further
+customized using the \strong{ggplot2} package. The functions with suffix
+\verb{_data()} return the data that would have been drawn by the plotting
+function.
+}
+\description{
+Assess the calibration of the predictive distributions \code{yrep} in relation to
+the data `y'.
+See the \strong{Plot Descriptions} section, below, for details.
+}
+\section{Plot Descriptions}{
+
+\describe{
+\item{\code{ppc_calibration()},\code{ppc_calibration_grouped()}}{
+
+},
+\item{\code{ppc_calibration_overlay()},\code{ppc_calibration_overlay_grouped()}}{
+
+},
+\item{\code{ppc_loo_calibration()},\code{ppc_loo_calibration_grouped()}}{
+
+}
+}
+}
+
+\examples{
+color_scheme_set("brightblue")
+
+# Make an example dataset of binary observations
+ymin <- range(example_y_data(), example_yrep_draws())[1]
+ymax <- range(example_y_data(), example_yrep_draws())[2]
+y <- rbinom(length(example_y_data()), 1, (example_y_data() - ymin) / (ymax - ymin))
+prep <- (example_yrep_draws() - ymin) / (ymax - ymin)
+
+ppc_calibration_overlay(y, prep[1:50, ])
+}
+\seealso{
+Other PPCs: 
+\code{\link{PPC-censoring}},
+\code{\link{PPC-discrete}},
+\code{\link{PPC-distributions}},
+\code{\link{PPC-errors}},
+\code{\link{PPC-intervals}},
+\code{\link{PPC-loo}},
+\code{\link{PPC-overview}},
+\code{\link{PPC-scatterplots}},
+\code{\link{PPC-test-statistics}}
+}
+\concept{PPCs}
diff --git a/man/PPC-censoring.Rd b/man/PPC-censoring.Rd
index ddf011b4..30a7b78d 100644
--- a/man/PPC-censoring.Rd
+++ b/man/PPC-censoring.Rd
@@ -150,6 +150,7 @@ doi:10.1080/01621459.1958.10501452.
 }
 \seealso{
 Other PPCs: 
+\code{\link{PPC-calibration}},
 \code{\link{PPC-discrete}},
 \code{\link{PPC-distributions}},
 \code{\link{PPC-errors}},
diff --git a/man/PPC-discrete.Rd b/man/PPC-discrete.Rd
index 434ba7bd..4f2fcabd 100644
--- a/man/PPC-discrete.Rd
+++ b/man/PPC-discrete.Rd
@@ -216,6 +216,7 @@ Visualizing count data regressions using rootograms.
 }
 \seealso{
 Other PPCs: 
+\code{\link{PPC-calibration}},
 \code{\link{PPC-censoring}},
 \code{\link{PPC-distributions}},
 \code{\link{PPC-errors}},
diff --git a/man/PPC-distributions.Rd b/man/PPC-distributions.Rd
index 628a3ad5..991a9742 100644
--- a/man/PPC-distributions.Rd
+++ b/man/PPC-distributions.Rd
@@ -389,6 +389,7 @@ Press, London, third edition. (Ch. 6)
 }
 \seealso{
 Other PPCs: 
+\code{\link{PPC-calibration}},
 \code{\link{PPC-censoring}},
 \code{\link{PPC-discrete}},
 \code{\link{PPC-errors}},
diff --git a/man/PPC-errors.Rd b/man/PPC-errors.Rd
index 047590bf..c3196f26 100644
--- a/man/PPC-errors.Rd
+++ b/man/PPC-errors.Rd
@@ -227,6 +227,7 @@ Press, London, third edition. (Ch. 6)
 }
 \seealso{
 Other PPCs: 
+\code{\link{PPC-calibration}},
 \code{\link{PPC-censoring}},
 \code{\link{PPC-discrete}},
 \code{\link{PPC-distributions}},
diff --git a/man/PPC-intervals.Rd b/man/PPC-intervals.Rd
index ddac2ebc..369ab029 100644
--- a/man/PPC-intervals.Rd
+++ b/man/PPC-intervals.Rd
@@ -240,6 +240,7 @@ Press, London, third edition. (Ch. 6)
 }
 \seealso{
 Other PPCs: 
+\code{\link{PPC-calibration}},
 \code{\link{PPC-censoring}},
 \code{\link{PPC-discrete}},
 \code{\link{PPC-distributions}},
diff --git a/man/PPC-loo.Rd b/man/PPC-loo.Rd
index 10f29d8c..3113fab8 100644
--- a/man/PPC-loo.Rd
+++ b/man/PPC-loo.Rd
@@ -349,6 +349,7 @@ https://www.jstor.org/stable/2986005.
 }
 \seealso{
 Other PPCs: 
+\code{\link{PPC-calibration}},
 \code{\link{PPC-censoring}},
 \code{\link{PPC-discrete}},
 \code{\link{PPC-distributions}},
diff --git a/man/PPC-overview.Rd b/man/PPC-overview.Rd
index 16d3f0ff..f58465d2 100644
--- a/man/PPC-overview.Rd
+++ b/man/PPC-overview.Rd
@@ -124,6 +124,7 @@ Press, London, third edition. (Ch. 6)
 }
 \seealso{
 Other PPCs: 
+\code{\link{PPC-calibration}},
 \code{\link{PPC-censoring}},
 \code{\link{PPC-discrete}},
 \code{\link{PPC-distributions}},
diff --git a/man/PPC-scatterplots.Rd b/man/PPC-scatterplots.Rd
index 2c22bbd3..2e274273 100644
--- a/man/PPC-scatterplots.Rd
+++ b/man/PPC-scatterplots.Rd
@@ -155,6 +155,7 @@ Press, London, third edition. (Ch. 6)
 }
 \seealso{
 Other PPCs: 
+\code{\link{PPC-calibration}},
 \code{\link{PPC-censoring}},
 \code{\link{PPC-discrete}},
 \code{\link{PPC-distributions}},
diff --git a/man/PPC-test-statistics.Rd b/man/PPC-test-statistics.Rd
index 83883979..c9368794 100644
--- a/man/PPC-test-statistics.Rd
+++ b/man/PPC-test-statistics.Rd
@@ -213,6 +213,7 @@ Press, London, third edition. (Ch. 6)
 }
 \seealso{
 Other PPCs: 
+\code{\link{PPC-calibration}},
 \code{\link{PPC-censoring}},
 \code{\link{PPC-discrete}},
 \code{\link{PPC-distributions}},
diff --git a/tests/testthat/test-ppc-calibration.R b/tests/testthat/test-ppc-calibration.R
new file mode 100644
index 00000000..1858ace5
--- /dev/null
+++ b/tests/testthat/test-ppc-calibration.R
@@ -0,0 +1,257 @@
+library(bayesplot)
+context("PPC: calibration")
+
+if (!exists("expect_gg")) expect_gg <- bayesplot:::expect_gg
+
+# Create binary test data for calibration plots
+set.seed(1234)
+n_obs <- 100
+n_draws <- 50
+p_true <- runif(n_obs)
+calib_y <- rbinom(n_obs, 1, p_true)
+calib_prep <- matrix(pmin(1,pmax(0, p_true + rnorm(n_obs * n_draws, 0, .1))), nrow = n_draws, ncol = n_obs)
+calib_yrep <- t(apply(calib_prep, 1, rbinom, n = n_obs, size = 1))
+calib_group <- gl(2, n_obs / 2, labels = c("A", "B"))
+calib_lw <- matrix(runif(n_obs * n_draws), ncol = n_obs, nrow = n_draws)
+
+test_that("ppc_calibration_overlay returns a ggplot object", {
+  expect_gg(ppc_calibration_overlay(calib_y, calib_prep))
+  expect_gg(ppc_calibration_overlay(calib_y, calib_prep[1:5, ], 
+                                   linewidth = 0.5, alpha = 0.3))
+})
+
+test_that("ppc_calibration_overlay_grouped returns a ggplot object", {
+  expect_gg(ppc_calibration_overlay_grouped(calib_y, calib_prep, calib_group))
+  expect_gg(ppc_calibration_overlay_grouped(calib_y, calib_prep[1:3, ], calib_group,
+                                           linewidth = 0.5, alpha = 0.3))
+})
+
+test_that("ppc_calibration returns a ggplot object", {
+  expect_gg(ppc_calibration(calib_y, calib_prep))
+  expect_gg(ppc_calibration(calib_y, calib_yrep))
+  expect_gg(ppc_calibration(calib_y, calib_prep, prob = 0.9,
+                           linewidth = 0.8, alpha = 0.5))
+  # Test new interval_type parameter
+  expect_gg(ppc_calibration(calib_y, calib_prep, interval_type = "confidence"))
+  expect_gg(ppc_calibration(calib_y, calib_prep, interval_type = "consistency"))
+})
+
+test_that("ppc_calibration_grouped returns a ggplot object", {
+  expect_gg(ppc_calibration_grouped(calib_y, calib_prep, calib_group))
+  expect_gg(ppc_calibration_grouped(calib_y, calib_yrep, calib_group))
+  expect_gg(ppc_calibration_grouped(calib_y, calib_prep[1:5, ], calib_group,
+                                   prob = 0.9, linewidth = 0.8, alpha = 0.5))
+  # Test new interval_type parameter
+  expect_gg(ppc_calibration_grouped(calib_y, calib_prep, calib_group, interval_type = "confidence"))
+  expect_gg(ppc_calibration_grouped(calib_y, calib_prep, calib_group, interval_type = "consistency"))
+})
+
+test_that("ppc_loo_calibration returns a ggplot object", {
+  # Note: This function now returns interval plots instead of overlay plots
+  # Testing basic functionality for now
+  expect_gg(ppc_loo_calibration(calib_y, calib_yrep, calib_prep))
+  expect_gg(ppc_loo_calibration(calib_y, calib_yrep[1:3, ], calib_prep[1:3, ],
+                               prob = 0.9, linewidth = 0.8, alpha = 0.5))
+})
+
+test_that("ppc_loo_calibration_grouped returns a ggplot object", {
+  # Note: This function now returns interval plots instead of overlay plots
+  # Testing basic functionality for now
+  expect_gg(ppc_loo_calibration_grouped(calib_y, calib_prep, calib_group, calib_lw))
+  expect_gg(ppc_loo_calibration_grouped(calib_y, calib_prep[1:3, ], calib_group, calib_lw[1:3, ],
+                                       prob = 0.9, linewidth = 0.8, alpha = 0.5))
+})
+
+test_that("calibration functions handle edge cases", {
+  # Single observation
+  expect_gg(ppc_calibration_overlay(1, matrix(0.5, nrow = 1, ncol = 1)))
+  expect_gg(ppc_calibration(1, matrix(0.5, nrow = 1, ncol = 1)))
+  
+  # Single draw
+  expect_gg(ppc_calibration_overlay(calib_y, calib_prep[1, , drop = FALSE]))
+  expect_gg(ppc_calibration(calib_y, calib_prep[1, , drop = FALSE]))
+  
+  # All zeros or ones
+  expect_gg(ppc_calibration_overlay(rep(0, 10), matrix(0.1, nrow = 5, ncol = 10)))
+  expect_gg(ppc_calibration_overlay(rep(1, 10), matrix(0.9, nrow = 5, ncol = 10)))
+})
+
+test_that("calibration functions validate inputs correctly", {
+  # Invalid probabilities (outside [0,1])
+  expect_error(ppc_calibration_overlay(calib_y, matrix(1.5, nrow = 5, ncol = 50)),
+               "Values of ´prep´ should be predictive probabilities between 0 and 1")
+  expect_error(ppc_calibration_overlay(calib_y, matrix(-0.1, nrow = 5, ncol = 50)),
+               "Values of ´prep´ should be predictive probabilities between 0 and 1")
+  
+  # Mismatched dimensions
+  expect_error(ppc_calibration_overlay(calib_y, calib_prep[, 1:25]),
+               "ncol(yrep) must be equal to length(y).", fixed=TRUE)
+  
+  # Invalid group
+  expect_error(ppc_calibration_overlay_grouped(calib_y, calib_prep, calib_group[1:25]),
+               "length(group) must be equal to the number of observations.",
+              fixed=TRUE)
+  
+  # Invalid interval_type
+  expect_error(ppc_calibration(calib_y, calib_prep, interval_type = "invalid"),
+               "should be one of")
+})
+
+test_that("calibration functions work with different group types", {
+  # Numeric groups
+  expect_gg(ppc_calibration_overlay_grouped(calib_y, calib_prep, as.numeric(calib_group)))
+  expect_gg(ppc_calibration_grouped(calib_y, calib_prep, as.numeric(calib_group)))
+  
+  # Integer groups
+  expect_gg(ppc_calibration_overlay_grouped(calib_y, calib_prep, as.integer(calib_group)))
+  expect_gg(ppc_calibration_grouped(calib_y, calib_prep, as.integer(calib_group)))
+  
+  # Character groups
+  expect_gg(ppc_calibration_overlay_grouped(calib_y, calib_prep, as.character(calib_group)))
+  expect_gg(ppc_calibration_grouped(calib_y, calib_prep, as.character(calib_group)))
+})
+
+# Visual tests -----------------------------------------------------------------
+
+test_that("ppc_calibration_overlay renders correctly", {
+  testthat::skip_on_cran()
+  testthat::skip_if_not_installed("vdiffr")
+  skip_on_r_oldrel()
+
+  p_base <- ppc_calibration_overlay(calib_y, calib_prep)
+  vdiffr::expect_doppelganger("ppc_calibration_overlay (default)", p_base)
+
+  p_custom <- ppc_calibration_overlay(
+    calib_y,
+    calib_prep,
+    prob = .99,
+    linewidth = 0.5,
+    alpha = 0.3
+  )
+  vdiffr::expect_doppelganger("ppc_calibration_overlay (custom)", p_custom)
+})
+
+test_that("ppc_calibration_overlay_grouped renders correctly", {
+  testthat::skip_on_cran()
+  testthat::skip_if_not_installed("vdiffr")
+  skip_on_r_oldrel()
+
+  p_base <- ppc_calibration_overlay_grouped(calib_y, calib_prep, calib_group)
+  vdiffr::expect_doppelganger("ppc_calibration_overlay_grouped (default)", p_base)
+
+  p_custom <- ppc_calibration_overlay_grouped(
+    calib_y,
+    calib_prep,
+    calib_group,
+    prob = .99,
+    linewidth = 0.5,
+    alpha = 0.3
+  )
+  vdiffr::expect_doppelganger("ppc_calibration_overlay_grouped (custom)", p_custom)
+})
+
+test_that("ppc_calibration renders correctly", {
+  testthat::skip_on_cran()
+  testthat::skip_if_not_installed("vdiffr")
+  skip_on_r_oldrel()
+
+  p_base <- ppc_calibration(calib_y, calib_prep)
+  vdiffr::expect_doppelganger("ppc_calibration (default)", p_base)
+
+  p_custom <- ppc_calibration(
+    calib_y,
+    calib_prep,
+    prob = 0.9,
+    linewidth = 0.8,
+    alpha = 0.5
+  )
+  vdiffr::expect_doppelganger("ppc_calibration (custom)", p_custom)
+  
+  # Test interval_type variants
+  p_confidence <- ppc_calibration(
+    calib_y,
+    calib_prep,
+    interval_type = "confidence"
+  )
+  vdiffr::expect_doppelganger("ppc_calibration (confidence)", p_confidence)
+  
+  p_consistency <- ppc_calibration(
+    calib_y,
+    calib_prep,
+    interval_type = "consistency"
+  )
+  vdiffr::expect_doppelganger("ppc_calibration (consistency)", p_consistency)
+})
+
+test_that("ppc_calibration_grouped renders correctly", {
+  testthat::skip_on_cran()
+  testthat::skip_if_not_installed("vdiffr")
+  skip_on_r_oldrel()
+
+  p_base <- ppc_calibration_grouped(calib_y, calib_prep, calib_group)
+  vdiffr::expect_doppelganger("ppc_calibration_grouped (default)", p_base)
+
+  p_custom <- ppc_calibration_grouped(
+    calib_y,
+    calib_prep,
+    calib_group,
+    prob = 0.9,
+    linewidth = 0.8,
+    alpha = 0.5
+  )
+  vdiffr::expect_doppelganger("ppc_calibration_grouped (custom)", p_custom)
+  
+  # Test interval_type variants
+  p_confidence <- ppc_calibration_grouped(
+    calib_y,
+    calib_prep,
+    calib_group,
+    interval_type = "confidence"
+  )
+  vdiffr::expect_doppelganger("ppc_calibration_grouped (confidence)", p_confidence)
+  
+  p_consistency <- ppc_calibration_grouped(
+    calib_y,
+    calib_prep,
+    calib_group,
+    interval_type = "consistency"
+  )
+  vdiffr::expect_doppelganger("ppc_calibration_grouped (consistency)", p_consistency)
+})
+
+test_that("ppc_loo_calibration renders correctly", {
+  testthat::skip_on_cran()
+  testthat::skip_if_not_installed("vdiffr")
+  skip_on_r_oldrel()
+
+  p_base <- ppc_loo_calibration(calib_y, calib_yrep, calib_lw)
+  vdiffr::expect_doppelganger("ppc_loo_calibration (default)", p_base)
+
+  p_custom <- ppc_loo_calibration(
+    calib_y,
+    calib_yrep,
+    calib_lw,
+    linewidth = 0.5,
+    alpha = 0.3
+  )
+  vdiffr::expect_doppelganger("ppc_loo_calibration (custom)", p_custom)
+})
+
+test_that("ppc_loo_calibration_grouped renders correctly", {
+  testthat::skip_on_cran()
+  testthat::skip_if_not_installed("vdiffr")
+  skip_on_r_oldrel()
+
+  p_base <- ppc_loo_calibration_grouped(calib_y, calib_yrep, calib_group, calib_lw)
+  vdiffr::expect_doppelganger("ppc_loo_calibration_grouped (default)", p_base)
+
+  p_custom <- ppc_loo_calibration_grouped(
+    calib_y,
+    calib_yrep,
+    calib_group,
+    calib_lw,
+    linewidth = 0.5,
+    alpha = 0.3
+  )
+  vdiffr::expect_doppelganger("ppc_loo_calibration_grouped (custom)", p_custom)
+})