diff --git a/NAMESPACE b/NAMESPACE
index 1717dd56..86683f2a 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -75,6 +75,7 @@ export(mcmc_nuts_treedepth)
export(mcmc_pairs)
export(mcmc_parcoord)
export(mcmc_parcoord_data)
+export(mcmc_rank_ecdf)
export(mcmc_rank_hist)
export(mcmc_rank_overlay)
export(mcmc_recover_hist)
@@ -131,6 +132,8 @@ export(ppc_loo_pit_data)
export(ppc_loo_pit_overlay)
export(ppc_loo_pit_qq)
export(ppc_loo_ribbon)
+export(ppc_pit_ecdf)
+export(ppc_pit_ecdf_grouped)
export(ppc_ribbon)
export(ppc_ribbon_data)
export(ppc_ribbon_grouped)
diff --git a/R/helpers-ppc.R b/R/helpers-ppc.R
index 71c63e0e..8d0f4fed 100644
--- a/R/helpers-ppc.R
+++ b/R/helpers-ppc.R
@@ -98,6 +98,32 @@ validate_predictions <- function(predictions, n_obs = NULL) {
}
+#' Validate PIT
+#'
+#' Checks that `pit` is numeric, doesn't have any NAs, and is either a vector,
+#' or a 1-D array with values in [0,1].
+#'
+#' @param pit The 'pit' object from the user.
+#' @return Either throws an error or returns a numeric vector.
+#' @noRd
+validate_pit <- function(pit) {
+ stopifnot(is.numeric(pit))
+
+ if (!is_vector_or_1Darray(pit)) {
+ abort("'pit' must be a vector or 1D array.")
+ }
+
+ if (any(pit > 1) || any(pit < 0)) {
+ abort("'pit' must only contain values between 0 and 1.")
+ }
+
+ if (anyNA(pit)) {
+ abort("NAs not allowed in 'pit'.")
+ }
+
+ unname(pit)
+}
+
#' Validate group
#'
#' Checks that grouping variable has correct number of observations and is
@@ -267,6 +293,298 @@ melt_and_stack <- function(y, yrep) {
}
+#' Obtain the coverage parameter for simultaneous confidence bands for ECDFs
+#'
+#' @param N Length of sample.
+#' @param L Number of chains. Used for MCMC, defaults to 1 for ppc.
+#' @param K Number of equally spaced evaluation points (1:K / K). Defaults to N.
+#' @param prob Desired simultaneous coverage (0,1).
+#' @param M number of simulations to run, if simulation method is used.
+#' @param interpolate_adj Boolean defining whether to interpolate the confidence
+#' bands from precomputed values. Interpolation provides a faster plot with the
+#' trade-off of possible loss of accuracy.
+#' @return The adjusted coverage parameter yielding the desired simultaneous
+#' coverage of the ECDF traces.
+#' @noRd
+adjust_gamma <- function(N,
+ L = 1,
+ K = N,
+ prob = 0.99,
+ M = 1000,
+ interpolate_adj = FALSE) {
+ if (! all_counts(c(K, N, L))) {
+ abort("Parameters 'N', 'L' and 'K' must be positive integers.")
+ }
+ if (prob >= 1 || prob <= 0) {
+ abort("Value of 'prob' must be in (0,1).")
+ }
+ if (interpolate_adj == TRUE) {
+ gamma <- interpolate_gamma(N = N, K = K, prob = prob, L = L)
+ } else if (L == 1) {
+ gamma <- adjust_gamma_optimize(N = N, K = K, prob = prob)
+ } else {
+ gamma <- adjust_gamma_simulate(N = N, L = L, K = K, prob = prob, M = M)
+ }
+ gamma
+}
+
+#' Adjust coverage parameter for single sample using the optimization method.
+#' @param N Length of sample.
+#' @param K Number of equally spaced evaluation points (1:K / K). Defaults to N.
+#' @param prob Desired simultaneous coverage (0,1).
+#' @return The adjusted coverage parameter yielding the desired simultaneous
+#' coverage of the ECDF traces.
+#' @noRd
+adjust_gamma_optimize <- function(N, K, prob) {
+ target <- function(gamma, prob, N, K) {
+ z <- 1:(K - 1) / K
+ z1 <- c(0, z)
+ z2 <- c(z, 1)
+
+ # pre-compute quantiles and use symmetry for increased efficiency.
+ x2_lower <- qbinom(gamma / 2, N, z2)
+ x2_upper <- c(N - rev(x2_lower)[2:K], 1)
+
+ # Compute the total probability of trajectories inside the confidence
+ # intervals. Initialize the set and corresponding probabilities known
+ # to be 0 and 1 for the starting value z1 = 0.
+ x1 <- 0
+ p_int <- 1
+ for (i in seq_along(z1)) {
+ p_int <- p_interior(
+ p_int = p_int,
+ x1 = x1,
+ x2 = x2_lower[i]: x2_upper[i],
+ z1 = z1[i],
+ z2 = z2[i],
+ N = N
+ )
+ x1 <- x2_lower[i]:x2_upper[i]
+ }
+ return(abs(prob - sum(p_int)))
+ }
+ optimize(target, c(0, 1 - prob), prob = prob, N = N, K = K)$minimum
+}
+
+#' Adjust coverage parameter for multiple chains using the simulation method.
+#' In short, 'M' simulations of 'L' standard uniform chains are run and the
+#' confidence bands are set to cover 100 * 'prob' % of these simulations.
+#' @param N Length of sample.
+#' @param L Number of chains. Used for MCMC, defaults to 1 for ppc.
+#' @param K Number of equally spaced evaluation points (1:K / K). Defaults to N.
+#' @param prob Desired simultaneous coverage (0,1).
+#' @param M number of simulations to run.
+#' @return The adjusted coverage parameter yielding the desired simultaneous
+#' coverage of the ECDF traces.
+#' @noRd
+adjust_gamma_simulate <- function(N, L, K, prob, M) {
+ gamma <- numeric(M)
+ z <- (1:(K - 1)) / K # Rank ECDF evaluation points.
+ n <- N * (L - 1)
+ k <- floor(z * N * L)
+ for (m in seq_len(M)) {
+ u <- u_scale(replicate(L, runif(N))) # Fractional ranks of sample chains
+ scaled_ecdfs <- apply(outer(u, z, "<="), c(2, 3), sum)
+ # Find the smalles marginal probability of the simulation run
+ gamma[m] <- 2 * min(
+ apply(
+ scaled_ecdfs, 1, phyper, m = N, n = n, k = k
+ ),
+ apply(
+ scaled_ecdfs - 1, 1, phyper, m = N, n = n, k = k, lower.tail = FALSE
+ )
+ )
+ }
+ alpha_quantile(gamma, 1 - prob)
+}
+
+#' Approximate the required adjustement to obtain simultaneous confidence bands
+#' of an ECDF plot with interpolation with regards to N and K from precomputed
+#' values for a fixed set of prob and L values.
+#' @param N Length of sample.
+#' @param L Number of chains. Used for MCMC, defaults to 1 for ppc.
+#' @param prob Desired simultaneous coverage (0,1).
+#' @param K Number of equally spaced evaluation points (1:K / K). Defaults to N.
+#' @return The approximated adjusted coverage parameter yielding the desired
+#' simultaneous coverage of the ECDF traces.
+#' @noRd
+interpolate_gamma <- function(N, K, prob, L) {
+ # Find the precomputed values ueful for the interpolation task.
+ vals <- get_interpolation_values(N, K, L, prob)
+ # Largest lower bound and smalles upper bound for N among precomputed values.
+ N_lb <- max(vals[vals$N <= N, ]$N)
+ N_ub <- min(vals[vals$N >= N, ]$N)
+ # Approximate largest lower bound and smalles upper bound for gamma.
+ log_gamma_lb <- approx(
+ x = log(vals[vals$N == N_lb, ]$K),
+ y = log(vals[vals$N == N_lb, ]$val),
+ xout = log(K)
+ )$y
+ log_gamma_ub <- approx(
+ x = log(vals[vals$N == N_ub, ]$K),
+ y = log(vals[vals$N == N_ub, ]$val),
+ xout = log(K)
+ )$y
+ if (N_ub == N_lb) {
+ log_gamma_approx <- log_gamma_lb
+ } else {
+ # Approximate log_gamma for the desired value of N.
+ log_gamma_approx <- approx(
+ x = log(c(N_lb, N_ub)),
+ y = c(log_gamma_lb, log_gamma_ub),
+ xout = log(N)
+ )$y
+ }
+ exp(log_gamma_approx)
+}
+
+#' Filter the precomputed values useful for the interpolation task given to
+#' interpolate_gamma. Check, if the task is possible with the availabel data.
+#' @param N Length of sample.
+#' @param K Number of equally spaced evaluation points (1:K / K). Defaults to N.
+#' @param L Number of chains. Used for MCMC, defaults to 1 for ppc.
+#' @param prob Desired simultaneous coverage (0,1).
+#' @return A data.frame containing the relevant precomputed values.
+#' @noRd
+get_interpolation_values <- function(N, K, L, prob) {
+ for (dim in c("L", "prob")) {
+ if (all(get(dim) != bayesplot:::gamma_adj[, dim])) {
+ stop(paste(
+ "No precomputed values to interpolate from for '", dim, "' = ",
+ get(dim),
+ ".\n",
+ "Values of '", dim, "' available for interpolation: ",
+ paste(unique(bayesplot:::gamma_adj[, dim]), collapse = ", "),
+ ".",
+ sep = ""
+ ))
+ }
+ }
+ vals <- bayesplot:::gamma_adj[
+ bayesplot:::gamma_adj$L == L & bayesplot:::gamma_adj$prob == prob,
+ ]
+ if (N > max(vals$N)) {
+ stop(paste(
+ "No precomputed values to interpolate from for sample length of ",
+ N,
+ ".\n",
+ "Please use a subsample of length ",
+ max(vals$N),
+ " or smaller, or consider setting 'interpolate_adj' = FALSE.",
+ sep = ""
+ ))
+ }
+ if (N < min(vals$N)) {
+ stop(paste(
+ "No precomputed values to interpolate from for sample length of ",
+ N,
+ ".\n",
+ "Please use a subsample of length ",
+ min(vals$N),
+ " or larger, or consider setting 'interpolate_adj' = FALSE.",
+ sep = ""
+ ))
+ }
+ if (K > max(vals[vals$N <= N, ]$K)) {
+ stop(paste(
+ "No precomputed values available for interpolation for 'K' = ",
+ K,
+ ".\n",
+ "Try either setting a value of 'K' <= ",
+ max(vals[vals$N <= N, ]$K),
+ "or 'interpolate_adj' = FALSE.",
+ sep = ""
+ ))
+ }
+ if (K < min(vals[vals$N <= N, ]$K)) {
+ stop(paste(
+ "No precomputed values available for interpolation for 'K' = ",
+ K,
+ ".\n",
+ "Try either setting a value of 'K' >= ",
+ min(vals[vals$N <= N, ]$K),
+ "or 'interpolate_adj' = FALSE.",
+ sep = ""
+ ))
+ }
+ vals
+}
+
+#' A helper function for 'adjust_gamma_optimize' defining the probability that
+#' a scaled ECDF stays within the supplied bounds between two evaluation points.
+#' @param p_int For each value in x1, the probability that the ECDF has stayed
+#' within the bounds until z1 and takes the value in x1 at z1.
+#' @param x1 Vector of scaled ECDF values at the left end of the interval, z1.
+#' @param x2 Vector of scaled ECDF values at the right end of the interval, z2.
+#' @param z1 Left evaluation point in [0,1]
+#' @param z2 Right evaluation point in [0,1] with z2 > z1.
+#' @param N Total number of values in the sample.
+#' @return A vector containing the probability to transitioning from the values
+#' in x1 to each of the values in x2 weighted by the probabilities in p_int.
+#' @noRd
+p_interior <- function(p_int, x1, x2, z1, z2, N) {
+ # Ratio between the length of the evaluation interval and the total length of
+ # the interval left to cover by ECDF.
+ z_tilde <- (z2 - z1) / (1 - z1)
+ # Number of samples left to cover by ECDF.
+ N_tilde <- rep(N - x1, each = length(x2))
+
+ p_int <- rep(p_int, each = length(x2))
+ x_diff <- outer(x2, x1, "-")
+ # Pobability of each transition from a value in x1 to a value in x2.
+ p_x2_int <- p_int * dbinom(x_diff, N_tilde, z_tilde)
+ rowSums(p_x2_int)
+}
+
+#' A helper function for 'adjust_alpha_simulate'
+#' 100 * `alpha` percent of the trials in 'gamma' are allowed to be rejected.
+#' In case of ties, return the largest value dominating at most
+#' 100 * (alpha + tol) percent of the values.
+#' @noRd
+alpha_quantile <- function(gamma, alpha, tol = 0.001) {
+ a <- unname(quantile(gamma, probs = alpha))
+ a_tol <- unname(quantile(gamma, probs = alpha + tol))
+ if (a == a_tol) {
+ if (min(gamma) < a) {
+ # take the largest value that doesn't exceed the tolerance.
+ a <- max(gamma[gamma < a])
+ }
+ }
+ a
+}
+
+#' Compute simultaneous confidence intervals with the given adjusted coverage
+#' parameter gamma.
+#' @param gamma Adjusted coverage parameter for the marginal distribution
+#' (binomial for PIT values and hypergeometric for rank transformed chains).
+#' @param N Sample length.
+#' @param K Number of uniformly spaced evaluation points.
+#' @param L Number of MCMC-chains. (1 for ppc)
+#' @return A list with upper and lower confidence interval values at the
+#' evaluation points.
+#' @noRd
+ecdf_intervals <- function(gamma, N, K, L = 1) {
+ lims <- list()
+ z <- seq(0, 1, length.out = K + 1)
+ if (L == 1) {
+ lims$lower <- qbinom(gamma / 2, N, z)
+ lims$upper <- qbinom(1 - gamma / 2, N, z)
+ } else {
+ n <- N * (L - 1)
+ k <- floor(z * L * N)
+ lims$lower <- qhyper(gamma / 2, N, n, k)
+ lims$upper <- qhyper(1 - gamma / 2, N, n, k)
+ }
+ lims
+}
+
+#' Helper for 'adjust_gamma_simulate`
+#' Transforms observations in 'x' into their corresponding fractional ranks.
+#' @noRd
+u_scale <- function(x) {
+ array(rank(x) / length(x), dim = dim(x), dimnames = dimnames(x))
+}
+
# labels ----------------------------------------------------------------
create_rep_ids <- function(ids) paste('italic(y)[rep] (', ids, ")")
y_label <- function() expression(italic(y))
diff --git a/R/mcmc-traces.R b/R/mcmc-traces.R
index 285c7b37..ce65db93 100644
--- a/R/mcmc-traces.R
+++ b/R/mcmc-traces.R
@@ -10,6 +10,7 @@
#' @template args-regex_pars
#' @template args-transformations
#' @template args-facet_args
+#' @template args-pit-ecdf
#' @param ... Currently ignored.
#' @param size An optional value to override the default line size
#' for `mcmc_trace()` or the default point size for `mcmc_trace_highlight()`.
@@ -63,9 +64,20 @@
#' Ranks from `mcmc_rank_hist()` are plotted using overlaid lines in a
#' single panel.
#' }
+#' \item{`mcmc_rank_ecdf()`}{
+#' The ECDFs of the ranks from `mcmc_rank_hist()` are plotted with the
+#' simultaneous confidence bands with a coverage determined by `prob`,
+#' that is, bands that completely cover all of the rank ECDFs with the
+#' probability 'prob'.
+#' By default, the difference between the observed rank ECDFs and the
+#' theoretical expectation for samples originating from the same distribution
+#' is drawn.
+#' See Säilynoja et al. (2021) for details.
+#' }
#' }
#'
#' @template reference-improved-rhat
+#' @template reference-uniformity-test
#' @examples
#' # some parameter draws to use for demonstration
#' x <- example_mcmc_draws(chains = 4, params = 6)
@@ -105,6 +117,11 @@
#' mcmc_rank_hist(x, pars = c("alpha", "sigma"), ref_line = TRUE)
#' mcmc_rank_overlay(x, "alpha")
#'
+#' # ECDF difference plots of the ranking of MCMC samples between chains.
+#' # Provide 99% simultaneous confidence intervals for the chains sampling from
+#' # the same distribution.
+#' mcmc_rank_ecdf(x, prob = 0.99)
+#'
#' \dontrun{
#' # parse facet label text
#' color_scheme_set("purple")
@@ -421,6 +438,112 @@ mcmc_rank_hist <- function(x,
labs(x = "Rank")
}
+#' @rdname MCMC-traces
+#' @param prob For `mcmc_rank_ecdf()`, a value between 0 and 1
+#' specifying the desired simultaneous confidence of the confidence bands to be
+#' drawn for the rank ECDF plots.
+#' @param plot_diff For `mcmc_rank_ecdf()`, a boolean specifying it the
+#' difference between the observed rank ECDFs and the theoretical expectation
+#' should be drawn instead of the unmodified rank ECDF plots.
+#' @export
+mcmc_rank_ecdf <-
+ function(x,
+ pars = character(),
+ regex_pars = character(),
+ transformations = list(),
+ ...,
+ K = NULL,
+ facet_args = list(),
+ prob = 0.99,
+ plot_diff = TRUE,
+ interpolate_adj = TRUE) {
+ check_ignored_arguments(...,
+ ok_args = c("K", "pit", "prob", "plot_diff", "interpolate_adj", "M")
+ )
+ data <- mcmc_trace_data(
+ x,
+ pars = pars,
+ regex_pars = regex_pars,
+ transformations = transformations,
+ highlight = 1
+ )
+ n_iter <- unique(data$n_iterations)
+ n_chain <- unique(data$n_chains)
+ n_param <- unique(data$n_parameters)
+
+ x <- if (is.null(K)) {
+ 0:n_iter / n_iter
+ } else {
+ 0:K / K
+ }
+ gamma <- adjust_gamma(
+ N = n_iter,
+ L = n_chain,
+ K = if (is.null(K)) {
+ n_iter
+ } else {
+ K
+ },
+ prob = prob,
+ interpolate_adj = interpolate_adj,
+ ...
+ )
+ lims <- ecdf_intervals(
+ gamma = gamma,
+ N = n_iter,
+ K = if (is.null(K)) {
+ n_iter
+ } else {
+ K
+ },
+ L <- n_chain
+ )
+ data_lim <- data.frame(
+ upper = lims$upper / n_iter - (plot_diff == TRUE) * x,
+ lower = lims$lower / n_iter - (plot_diff == TRUE) * x,
+ x = x
+ )
+ data <- data %>%
+ group_by(parameter, chain) %>%
+ dplyr::group_map(~ data.frame(
+ parameter = .y[1],
+ chain = .y[2],
+ ecdf_value = ecdf(.x$value_rank / (n_iter * n_chain))(x) -
+ (plot_diff == TRUE) * x,
+ x = x
+ )) %>%
+ dplyr::bind_rows()
+
+ mapping <- aes_(
+ x = ~x,
+ y = ~ecdf_value,
+ color = ~chain,
+ group = ~chain
+ )
+
+ scale_color <- scale_color_manual("Chain", values = chain_colors(n_chain))
+
+ facet_call <- NULL
+ if (n_param == 1) {
+ facet_call <- ylab(levels(data$parameter))
+ } else {
+ facet_args$facets <- ~parameter
+ facet_args$scales <- facet_args$scales %||% "free"
+ facet_call <- do.call("facet_wrap", facet_args)
+ }
+
+ ggplot() +
+ geom_step(data = data_lim, aes_(x = ~x, y = ~upper), show.legend = FALSE) +
+ geom_step(data = data_lim, aes_(x = ~x, y = ~lower), show.legend = FALSE) +
+ geom_step(mapping, data) +
+ bayesplot_theme_get() +
+ scale_color +
+ facet_call +
+ scale_x_continuous(breaks = pretty) +
+ legend_move(ifelse(n_chain > 1, "right", "none")) +
+ xaxis_title(FALSE) +
+ yaxis_title(on = n_param == 1)
+}
#' @rdname MCMC-traces
#' @export
@@ -604,7 +727,6 @@ mcmc_trace_data <- function(x,
yaxis_title(on = n_param == 1)
}
-
chain_colors <- function(n) {
all_clrs <- unlist(color_scheme_get())
clrs <- switch(
diff --git a/R/ppc-distributions.R b/R/ppc-distributions.R
index cadb62f1..237586b2 100644
--- a/R/ppc-distributions.R
+++ b/R/ppc-distributions.R
@@ -12,6 +12,7 @@
#' @template args-hist
#' @template args-hist-freq
#' @template args-dens
+#' @template args-pit-ecdf
#' @param size,alpha Passed to the appropriate geom to control the appearance of
#' the predictive distributions.
#' @param ... Currently unused.
@@ -47,9 +48,16 @@
#' quantiles. `y` is overlaid on the plot either as a violin, points, or
#' both, depending on the `y_draw` argument.
#' }
+#' \item{`ppc_pit_ecdf()`}{
+#' The ECDF of the empirical PIT values of `y` computed with respect to the
+#' corresponding `yrep` values. `100 * prob`% central simultaneous confidence
+#' intervals are provided to asses if ´y´ and ´yrep´ originate from the same
+#' distribution. The PIT values can also be provided directly as `pit`.
+#' See Säilynoja et al. (2021) for more details.}
#' }
#'
#' @template reference-vis-paper
+#' @template reference-uniformity-test
#' @templateVar bdaRef (Ch. 6)
#' @template reference-bda
#'
@@ -59,16 +67,19 @@
#' yrep <- example_yrep_draws()
#' dim(yrep)
#' ppc_dens_overlay(y, yrep[1:25, ])
-#'
#' \donttest{
#' # ppc_ecdf_overlay with continuous data (set discrete=TRUE if discrete data)
#' ppc_ecdf_overlay(y, yrep[sample(nrow(yrep), 25), ])
#' }
+#' # ECDF and ECDF difference plot of the PIT values of ´y´ compared to ´yrep
+#' # with 99% simultaneous confidence bands.
+#' ppc_pit_ecdf(y, yrep, prob = 0.99, plot_diff = FALSE, interpolate_adj = FALSE)
+#' ppc_pit_ecdf(y, yrep, prob = 0.99, interpolate_adj = FALSE)
+#'
#'
#' # for ppc_hist,dens,freqpoly,boxplot definitely use a subset yrep rows so
#' # only a few (instead of nrow(yrep)) histograms are plotted
#' ppc_hist(y, yrep[1:8, ])
-#'
#' \donttest{
#' color_scheme_set("red")
#' ppc_boxplot(y, yrep[1:8, ])
@@ -79,13 +90,13 @@
#' }
#'
#' # frequency polygons
-#' ppc_freqpoly(y, yrep[1:3,], alpha = 0.1, size = 1, binwidth = 5)
+#' ppc_freqpoly(y, yrep[1:3, ], alpha = 0.1, size = 1, binwidth = 5)
#'
#' group <- example_group_data()
-#' ppc_freqpoly_grouped(y, yrep[1:3,], group) + yaxis_text()
+#' ppc_freqpoly_grouped(y, yrep[1:3, ], group) + yaxis_text()
#' \donttest{
#' # if groups are different sizes then the 'freq' argument can be useful
-#' ppc_freqpoly_grouped(y, yrep[1:3,], group, freq = FALSE) + yaxis_text()
+#' ppc_freqpoly_grouped(y, yrep[1:3, ], group, freq = FALSE) + yaxis_text()
#' }
#'
#' # density and distribution overlays by group
@@ -93,6 +104,10 @@
#'
#' ppc_ecdf_overlay_grouped(y, yrep[1:25, ], group = group)
#'
+#' # ECDF difference plots of the PIT values by group
+#' # with 99% simultaneous confidence bands.
+#' ppc_pit_ecdf_grouped(y, yrep, group=group, prob=0.99, interpolate_adj=FALSE)
+#'
#' # don't need to only use small number of rows for ppc_violin_grouped
#' # (as it pools yrep draws within groups)
#' color_scheme_set("gray")
@@ -102,8 +117,10 @@
#'
#' # change how y is drawn
#' ppc_violin_grouped(y, yrep, group, alpha = 0, y_draw = "points", y_size = 1.5)
-#' ppc_violin_grouped(y, yrep, group, alpha = 0, y_draw = "both",
-#' y_size = 1.5, y_alpha = 0.5, y_jitter = 0.33)
+#' ppc_violin_grouped(y, yrep, group,
+#' alpha = 0, y_draw = "both",
+#' y_size = 1.5, y_alpha = 0.5, y_jitter = 0.33
+#' )
#' }
NULL
@@ -139,9 +156,9 @@ ppc_dens_overlay <-
check_ignored_arguments(...)
data <- ppc_data(y, yrep)
- ggplot(data, mapping = aes_(x = ~ value)) +
+ ggplot(data, mapping = aes_(x = ~value)) +
overlay_ppd_densities(
- mapping = aes_(group = ~ rep_id, color = "yrep"),
+ mapping = aes_(group = ~rep_id, color = "yrep"),
data = function(x) dplyr::filter(x, !.data$is_y),
size = size,
alpha = alpha,
@@ -175,19 +192,17 @@ ppc_dens_overlay <-
#' @rdname PPC-distributions
#' @export
#' @template args-density-controls
-ppc_dens_overlay_grouped <- function(
- y,
- yrep,
- group,
- ...,
- size = 0.25,
- alpha = 0.7,
- trim = FALSE,
- bw = "nrd0",
- adjust = 1,
- kernel = "gaussian",
- n_dens = 1024
-) {
+ppc_dens_overlay_grouped <- function(y,
+ yrep,
+ group,
+ ...,
+ size = 0.25,
+ alpha = 0.7,
+ trim = FALSE,
+ bw = "nrd0",
+ adjust = 1,
+ kernel = "gaussian",
+ n_dens = 1024) {
check_ignored_arguments(...)
p_overlay <- ppc_dens_overlay(
@@ -221,20 +236,18 @@ ppc_dens_overlay_grouped <- function(
#' `TRUE` then `geom="step"` is used.
#' @param pad A logical scalar passed to [ggplot2::stat_ecdf()].
#'
-ppc_ecdf_overlay <- function(
- y,
- yrep,
- ...,
- discrete = FALSE,
- pad = TRUE,
- size = 0.25,
- alpha = 0.7
-) {
+ppc_ecdf_overlay <- function(y,
+ yrep,
+ ...,
+ discrete = FALSE,
+ pad = TRUE,
+ size = 0.25,
+ alpha = 0.7) {
check_ignored_arguments(...)
data <- ppc_data(y, yrep)
ggplot(data) +
- aes_(x = ~ value) +
+ aes_(x = ~value) +
hline_at(
0.5,
size = 0.1,
@@ -249,7 +262,7 @@ ppc_ecdf_overlay <- function(
) +
stat_ecdf(
data = function(x) dplyr::filter(x, !.data$is_y),
- mapping = aes_(group = ~ rep_id, color = "yrep"),
+ mapping = aes_(group = ~rep_id, color = "yrep"),
geom = if (discrete) "step" else "line",
size = size,
alpha = alpha,
@@ -262,25 +275,23 @@ ppc_ecdf_overlay <- function(
size = 1,
pad = pad
) +
- scale_color_ppc() +
- scale_y_continuous(breaks = c(0, 0.5, 1)) +
- bayesplot_theme_get() +
- yaxis_title(FALSE) +
- xaxis_title(FALSE)
- }
+ scale_color_ppc() +
+ scale_y_continuous(breaks = c(0, 0.5, 1)) +
+ bayesplot_theme_get() +
+ yaxis_title(FALSE) +
+ xaxis_title(FALSE)
+}
#' @export
#' @rdname PPC-distributions
-ppc_ecdf_overlay_grouped <- function(
- y,
- yrep,
- group,
- ...,
- discrete = FALSE,
- pad = TRUE,
- size = 0.25,
- alpha = 0.7
-) {
+ppc_ecdf_overlay_grouped <- function(y,
+ yrep,
+ group,
+ ...,
+ discrete = FALSE,
+ pad = TRUE,
+ size = 0.25,
+ alpha = 0.7) {
check_ignored_arguments(...)
p_overlay <- ppc_ecdf_overlay(
@@ -315,9 +326,9 @@ ppc_dens <-
check_ignored_arguments(...)
data <- ppc_data(y, yrep)
ggplot(data, mapping = aes_(
- x = ~ value,
- fill = ~ is_y_label,
- color = ~ is_y_label
+ x = ~value,
+ fill = ~is_y_label,
+ color = ~is_y_label
)) +
geom_density(
size = size,
@@ -354,8 +365,8 @@ ppc_hist <-
data <- ppc_data(y, yrep)
ggplot(data, mapping = set_hist_aes(
freq = freq,
- fill = ~ is_y_label,
- color = ~ is_y_label
+ fill = ~is_y_label,
+ color = ~is_y_label
)) +
geom_histogram(
size = 0.25,
@@ -388,7 +399,6 @@ ppc_freqpoly <-
freq = TRUE,
size = 0.5,
alpha = 1) {
-
dots <- list(...)
if (!from_grouped(dots)) {
check_ignored_arguments(...)
@@ -398,8 +408,8 @@ ppc_freqpoly <-
data <- ppc_data(y, yrep, group = dots$group)
ggplot(data, mapping = set_hist_aes(
freq = freq,
- fill = ~ is_y_label,
- color = ~ is_y_label
+ fill = ~is_y_label,
+ color = ~is_y_label
)) +
geom_area(
stat = "bin",
@@ -465,19 +475,19 @@ ppc_boxplot <-
data <- ppc_data(y, yrep)
ggplot(data, mapping = aes_(
- x = ~ rep_label,
- y = ~ value,
- fill = ~ is_y_label,
- color = ~ is_y_label
+ x = ~rep_label,
+ y = ~value,
+ fill = ~is_y_label,
+ color = ~is_y_label
)) +
geom_boxplot(
notch = notch,
size = size,
alpha = alpha,
- outlier.alpha = 2/3,
+ outlier.alpha = 2 / 3,
outlier.size = 1
) +
- scale_x_discrete(labels = function(x) parse(text=x)) +
+ scale_x_discrete(labels = function(x) parse(text = x)) +
scale_fill_ppc() +
scale_color_ppc() +
bayesplot_theme_get() +
@@ -520,7 +530,7 @@ ppc_violin_grouped <-
y_points <- y_draw %in% c("points", "both")
args_violin_yrep <- list(
- data = function(x) dplyr::filter(x,!.data$is_y),
+ data = function(x) dplyr::filter(x, !.data$is_y),
aes_(fill = "yrep", color = "yrep"),
draw_quantiles = probs,
alpha = alpha,
@@ -553,7 +563,8 @@ ppc_violin_grouped <-
layer_jitter_y <- do.call(jitter_y_func, args_jitter_y)
data <- ppc_data(y, yrep, group)
- ggplot(data, mapping = aes_(x = ~ group, y = ~ value)) +
+
+ ggplot(data, mapping = aes_(x = ~group, y = ~value)) +
layer_violin_yrep +
layer_violin_y +
layer_jitter_y +
@@ -563,3 +574,157 @@ ppc_violin_grouped <-
xaxis_title(FALSE) +
bayesplot_theme_get()
}
+
+
+#' @export
+#' @param pit An optional vector of probability integral transformed values for
+#' which the ECDF is to be drawn. If NULL, PIT values are computed to ´y´ with
+#' respect to the corresponding values in ´yrep´.
+#' @rdname PPC-distributions
+#'
+ppc_pit_ecdf <- function(y,
+ yrep,
+ ...,
+ pit = NULL,
+ K = NULL,
+ prob = .99,
+ plot_diff = TRUE,
+ interpolate_adj = TRUE) {
+ check_ignored_arguments(...,
+ ok_args = c("K", "pit", "prob", "plot_diff", "interpolate_adj")
+ )
+
+ if (is.null(pit)) {
+ pit <- ppc_data(y, yrep) %>%
+ group_by(y_id) %>%
+ dplyr::group_map(~ mean(.x$value[.x$is_y] >= .x$value[!.x$is_y])) %>%
+ unlist()
+ if (is.null(K)) {
+ K <- min(
+ length(unique(ppc_data(y, yrep)$rep_id)) + 1,
+ length(pit)
+ )
+ }
+ } else {
+ inform("'pit' specified so ignoring 'y', and 'yrep' if specified.")
+ pit <- validate_pit(pit)
+ if (is.null(K)) {
+ K <- length(pit)
+ }
+ }
+ N <- length(pit)
+ gamma <- adjust_gamma(
+ N = N,
+ K = K,
+ prob = prob,
+ interpolate_adj = interpolate_adj
+ )
+ lims <- ecdf_intervals(gamma = gamma, N = N, K = K)
+ ggplot() +
+ aes_(
+ x = 1:K / K,
+ y = ecdf(pit)(seq(0, 1, length.out = K)) - (plot_diff == TRUE) * 1:K / K,
+ color = "y"
+ ) +
+ geom_step(show.legend = FALSE) +
+ geom_step(aes(
+ y = lims$upper[-1] / N - (plot_diff == TRUE) * 1:K / K,
+ color = "yrep"
+ ), show.legend = FALSE) +
+ geom_step(aes(
+ y = lims$lower[-1] / N - (plot_diff == TRUE) * 1:K / K,
+ color = "yrep"
+ ), show.legend = FALSE) +
+ yaxis_title(FALSE) +
+ xaxis_title(FALSE) +
+ yaxis_ticks(FALSE) +
+ scale_color_ppc() +
+ bayesplot_theme_get()
+}
+
+#' @export
+#' @rdname PPC-distributions
+#'
+ppc_pit_ecdf_grouped <-
+ function(y,
+ yrep,
+ group,
+ ...,
+ K = NULL,
+ pit = NULL,
+ prob = .99,
+ plot_diff = TRUE,
+ interpolate_adj = TRUE) {
+ check_ignored_arguments(...,
+ ok_args = c("K", "pit", "prob", "plot_diff", "interpolate_adj")
+ )
+
+ if (is.null(pit)) {
+ pit <- ppc_data(y, yrep, group) %>%
+ group_by(y_id) %>%
+ dplyr::group_map(~ mean(.x$value[.x$is_y] >= .x$value[!.x$is_y])) %>%
+ unlist()
+ if (is.null(K)) {
+ K <- length(unique(ppc_data(y, yrep)$rep_id)) + 1
+ }
+ } else {
+ inform("'pit' specified so ignoring 'y' and 'yrep' if specified.")
+ pit <- validate_pit(pit)
+ }
+ N <- length(pit)
+
+ gammas <- lapply(unique(group), function(g) {
+ N_g <- sum(group == g)
+ adjust_gamma(
+ N = N_g,
+ K = min(N_g, K),
+ prob = prob,
+ interpolate_adj = interpolate_adj
+ )
+ })
+ names(gammas) <- unique(group)
+
+ data <- data.frame(pit = pit, group = group) %>%
+ group_by(group) %>%
+ dplyr::group_map(~ data.frame(
+ ecdf_value = ecdf(.x$pit)(
+ seq(0, 1, length.out = min(nrow(.x), K))),
+ group = .y[1],
+ lims_upper = ecdf_intervals(
+ gamma = gammas[[unlist(.y[1])]],
+ N = nrow(.x),
+ K = min(nrow(.x), K)
+ )$upper[-1] / nrow(.x),
+ lims_lower = ecdf_intervals(
+ gamma = gammas[[unlist(.y[1])]],
+ N = nrow(.x),
+ K = min(nrow(.x), K)
+ )$lower[-1] / nrow(.x),
+ x = seq(0, 1, length.out = min(nrow(.x), K))
+ )) %>%
+ dplyr::bind_rows()
+
+ ggplot(data) +
+ aes_(
+ x = ~x,
+ y = ~ ecdf_value - (plot_diff == TRUE) * x,
+ group = ~group,
+ color = "y"
+ ) +
+ geom_step(show.legend = FALSE) +
+ geom_step(aes_(
+ y = ~ lims_upper - (plot_diff == TRUE) * x,
+ color = "yrep"
+ ), show.legend = FALSE) +
+ geom_step(aes_(
+ y = ~ lims_lower - (plot_diff == TRUE) * x,
+ color = "yrep"
+ ), show.legend = FALSE) +
+ xaxis_title(FALSE) +
+ yaxis_title(FALSE) +
+ yaxis_ticks(FALSE) +
+ bayesplot_theme_get() +
+ facet_wrap("group") +
+ scale_color_ppc() +
+ force_axes_in_facets()
+ }
diff --git a/R/sysdata.rda b/R/sysdata.rda
index 7213031e..6acb55b3 100644
Binary files a/R/sysdata.rda and b/R/sysdata.rda differ
diff --git a/man-roxygen/args-pit-ecdf.R b/man-roxygen/args-pit-ecdf.R
new file mode 100644
index 00000000..587deac6
--- /dev/null
+++ b/man-roxygen/args-pit-ecdf.R
@@ -0,0 +1,12 @@
+#' @param K An optional integer defining the number of equally spaced evaluation
+#' points for the ECDF. If the submitted PIT values are known to be discrete,
+#' this should be the number of the discrete cases. Defaults to the smaller of
+#' ´length(y)´ and ´ncol(yrep)´ when applicable.
+#' @param prob The desired simultaneous coverage level of the bands around the
+#' ECDF. A value in (0,1).
+#' @param plot_diff A boolean defining wether to plot the difference between the
+#' observed ECDF and the theoretical expectation for uniform PIT values.
+#' @param interpolate_adj A boolean defining if the simultaneous confidence
+#' bands should be computed or interpolated from precomputed values. Computing
+#' the bands may be computationally intensive and approximation gives a fast
+#' method for assessing the ECDF trajectory.
diff --git a/man-roxygen/reference-uniformity-test.R b/man-roxygen/reference-uniformity-test.R
new file mode 100644
index 00000000..c18e209a
--- /dev/null
+++ b/man-roxygen/reference-uniformity-test.R
@@ -0,0 +1,4 @@
+#' @references Säilynoja, T., Bürkner, P., Vehtari, A.
+#' (2021). Graphical Test for Discrete Uniformity and its Applications in
+#' Goodness of Fit Evaluation and Multiple Sample Comparison [arXiv
+#' preprint](https://arxiv.org/abs/2103.10522).
diff --git a/man/MCMC-traces.Rd b/man/MCMC-traces.Rd
index 68d94198..e1cc67e9 100644
--- a/man/MCMC-traces.Rd
+++ b/man/MCMC-traces.Rd
@@ -7,6 +7,7 @@
\alias{trace_style_np}
\alias{mcmc_rank_overlay}
\alias{mcmc_rank_hist}
+\alias{mcmc_rank_ecdf}
\alias{mcmc_trace_data}
\title{Trace and rank plots of MCMC draws}
\usage{
@@ -64,6 +65,19 @@ mcmc_rank_hist(
ref_line = FALSE
)
+mcmc_rank_ecdf(
+ x,
+ pars = character(),
+ regex_pars = character(),
+ transformations = list(),
+ ...,
+ K = NULL,
+ facet_args = list(),
+ prob = 0.99,
+ plot_diff = TRUE,
+ interpolate_adj = TRUE
+)
+
mcmc_trace_data(
x,
pars = character(),
@@ -178,6 +192,24 @@ of rank-normalized MCMC samples. Defaults to \code{20}.}
\item{ref_line}{For the rank plots, whether to draw a horizontal line at the
average number of ranks per bin. Defaults to \code{FALSE}.}
+
+\item{K}{An optional integer defining the number of equally spaced evaluation
+points for the ECDF. If the submitted PIT values are known to be discrete,
+this should be the number of the discrete cases. Defaults to the smaller of
+´length(y)´ and ´ncol(yrep)´ when applicable.}
+
+\item{prob}{For \code{mcmc_rank_ecdf()}, a value between 0 and 1
+specifying the desired simultaneous confidence of the confidence bands to be
+drawn for the rank ECDF plots.}
+
+\item{plot_diff}{For \code{mcmc_rank_ecdf()}, a boolean specifying it the
+difference between the observed rank ECDFs and the theoretical expectation
+should be drawn instead of the unmodified rank ECDF plots.}
+
+\item{interpolate_adj}{A boolean defining if the simultaneous confidence
+bands should be computed or interpolated from precomputed values. Computing
+the bands may be computationally intensive and approximation gives a fast
+method for assessing the ECDF trajectory.}
}
\value{
The plotting functions return a ggplot object that can be further
@@ -214,6 +246,16 @@ See Vehtari et al. (2019) for details.
Ranks from \code{mcmc_rank_hist()} are plotted using overlaid lines in a
single panel.
}
+\item{\code{mcmc_rank_ecdf()}}{
+The ECDFs of the ranks from \code{mcmc_rank_hist()} are plotted with the
+simultaneous confidence bands with a coverage determined by \code{prob},
+that is, bands that completely cover all of the rank ECDFs with the
+probability 'prob'.
+By default, the difference between the observed rank ECDFs and the
+theoretical expectation for samples originating from the same distribution
+is drawn.
+See Säilynoja et al. (2021) for details.
+}
}
}
@@ -256,6 +298,11 @@ mcmc_rank_hist(x, "alpha")
mcmc_rank_hist(x, pars = c("alpha", "sigma"), ref_line = TRUE)
mcmc_rank_overlay(x, "alpha")
+# ECDF difference plots of the ranking of MCMC samples between chains.
+# Provide 99\% simultaneous confidence intervals for the chains sampling from
+# the same distribution.
+mcmc_rank_ecdf(x, prob = 0.99)
+
\dontrun{
# parse facet label text
color_scheme_set("purple")
@@ -302,6 +349,10 @@ mcmc_trace(
Vehtari, A., Gelman, A., Simpson, D., Carpenter, B., Bürkner, P.
(2019). Rank-normalization, folding, and localization: An improved \emph{R}-hat
for assessing convergence of MCMC. \href{https://arxiv.org/abs/1903.08008}{arXiv preprint}.
+
+Säilynoja, T., Bürkner, P., Vehtari, A.
+(2021). Graphical Test for Discrete Uniformity and its Applications in
+Goodness of Fit Evaluation and Multiple Sample Comparison \href{https://arxiv.org/abs/2103.10522}{arXiv preprint}.
}
\seealso{
Other MCMC:
diff --git a/man/PPC-distributions.Rd b/man/PPC-distributions.Rd
index 43660409..c197956a 100644
--- a/man/PPC-distributions.Rd
+++ b/man/PPC-distributions.Rd
@@ -13,6 +13,8 @@
\alias{ppc_freqpoly_grouped}
\alias{ppc_boxplot}
\alias{ppc_violin_grouped}
+\alias{ppc_pit_ecdf}
+\alias{ppc_pit_ecdf_grouped}
\title{PPC distributions}
\usage{
ppc_data(y, yrep, group = NULL)
@@ -97,6 +99,29 @@ ppc_violin_grouped(
y_alpha = 1,
y_jitter = 0.1
)
+
+ppc_pit_ecdf(
+ y,
+ yrep,
+ ...,
+ pit = NULL,
+ K = NULL,
+ prob = 0.99,
+ plot_diff = TRUE,
+ interpolate_adj = TRUE
+)
+
+ppc_pit_ecdf_grouped(
+ y,
+ yrep,
+ group,
+ ...,
+ K = NULL,
+ pit = NULL,
+ prob = 0.99,
+ plot_diff = TRUE,
+ interpolate_adj = TRUE
+)
}
\arguments{
\item{y}{A vector of observations. See \strong{Details}.}
@@ -160,6 +185,26 @@ horizontal lines. Set to \code{NULL} to remove the lines.}
to to the \code{size}, \code{alpha}, and \code{width} arguments of \code{\link[ggplot2:geom_jitter]{ggplot2::geom_jitter()}}
to control the appearance of \code{y} points. The default of \code{y_jitter=NULL}
will let \strong{ggplot2} determine the amount of jitter.}
+
+\item{pit}{An optional vector of probability integral transformed values for
+which the ECDF is to be drawn. If NULL, PIT values are computed to ´y´ with
+respect to the corresponding values in ´yrep´.}
+
+\item{K}{An optional integer defining the number of equally spaced evaluation
+points for the ECDF. If the submitted PIT values are known to be discrete,
+this should be the number of the discrete cases. Defaults to the smaller of
+´length(y)´ and ´ncol(yrep)´ when applicable.}
+
+\item{prob}{The desired simultaneous coverage level of the bands around the
+ECDF. A value in (0,1).}
+
+\item{plot_diff}{A boolean defining wether to plot the difference between the
+observed ECDF and the theoretical expectation for uniform PIT values.}
+
+\item{interpolate_adj}{A boolean defining if the simultaneous confidence
+bands should be computed or interpolated from precomputed values. Computing
+the bands may be computationally intensive and approximation gives a fast
+method for assessing the ECDF trajectory.}
}
\value{
The plotting functions return a ggplot object that can be further
@@ -205,6 +250,12 @@ variable is plotted as a violin with horizontal lines at notable
quantiles. \code{y} is overlaid on the plot either as a violin, points, or
both, depending on the \code{y_draw} argument.
}
+\item{\code{ppc_pit_ecdf()}}{
+The ECDF of the empirical PIT values of \code{y} computed with respect to the
+corresponding \code{yrep} values. \code{100 * prob}\% central simultaneous confidence
+intervals are provided to asses if ´y´ and ´yrep´ originate from the same
+distribution. The PIT values can also be provided directly as \code{pit}.
+See Säilynoja et al. (2021) for more details.}
}
}
@@ -214,16 +265,19 @@ y <- example_y_data()
yrep <- example_yrep_draws()
dim(yrep)
ppc_dens_overlay(y, yrep[1:25, ])
-
\donttest{
# ppc_ecdf_overlay with continuous data (set discrete=TRUE if discrete data)
ppc_ecdf_overlay(y, yrep[sample(nrow(yrep), 25), ])
}
+# ECDF and ECDF difference plot of the PIT values of ´y´ compared to ´yrep
+# with 99\% simultaneous confidence bands.
+ppc_pit_ecdf(y, yrep, prob = 0.99, plot_diff = FALSE, interpolate_adj = FALSE)
+ppc_pit_ecdf(y, yrep, prob = 0.99, interpolate_adj = FALSE)
+
# for ppc_hist,dens,freqpoly,boxplot definitely use a subset yrep rows so
# only a few (instead of nrow(yrep)) histograms are plotted
ppc_hist(y, yrep[1:8, ])
-
\donttest{
color_scheme_set("red")
ppc_boxplot(y, yrep[1:8, ])
@@ -234,13 +288,13 @@ ppc_dens(y, yrep[200:202, ])
}
# frequency polygons
-ppc_freqpoly(y, yrep[1:3,], alpha = 0.1, size = 1, binwidth = 5)
+ppc_freqpoly(y, yrep[1:3, ], alpha = 0.1, size = 1, binwidth = 5)
group <- example_group_data()
-ppc_freqpoly_grouped(y, yrep[1:3,], group) + yaxis_text()
+ppc_freqpoly_grouped(y, yrep[1:3, ], group) + yaxis_text()
\donttest{
# if groups are different sizes then the 'freq' argument can be useful
-ppc_freqpoly_grouped(y, yrep[1:3,], group, freq = FALSE) + yaxis_text()
+ppc_freqpoly_grouped(y, yrep[1:3, ], group, freq = FALSE) + yaxis_text()
}
# density and distribution overlays by group
@@ -248,6 +302,10 @@ ppc_dens_overlay_grouped(y, yrep[1:25, ], group = group)
ppc_ecdf_overlay_grouped(y, yrep[1:25, ], group = group)
+# ECDF difference plots of the PIT values by group
+# with 99\% simultaneous confidence bands.
+ppc_pit_ecdf_grouped(y, yrep, group=group, prob=0.99, interpolate_adj=FALSE)
+
# don't need to only use small number of rows for ppc_violin_grouped
# (as it pools yrep draws within groups)
color_scheme_set("gray")
@@ -257,8 +315,10 @@ ppc_violin_grouped(y, yrep, group, alpha = 0)
# change how y is drawn
ppc_violin_grouped(y, yrep, group, alpha = 0, y_draw = "points", y_size = 1.5)
-ppc_violin_grouped(y, yrep, group, alpha = 0, y_draw = "both",
- y_size = 1.5, y_alpha = 0.5, y_jitter = 0.33)
+ppc_violin_grouped(y, yrep, group,
+ alpha = 0, y_draw = "both",
+ y_size = 1.5, y_alpha = 0.5, y_jitter = 0.33
+)
}
}
\references{
@@ -269,6 +329,10 @@ Gelman, A. (2019), Visualization in Bayesian workflow.
\href{https://arxiv.org/abs/1709.01449}{arXiv preprint},
\href{https://github.com/jgabry/bayes-vis-paper}{code on GitHub})
+Säilynoja, T., Bürkner, P., Vehtari, A.
+(2021). Graphical Test for Discrete Uniformity and its Applications in
+Goodness of Fit Evaluation and Multiple Sample Comparison \href{https://arxiv.org/abs/2103.10522}{arXiv preprint}.
+
Gelman, A., Carlin, J. B., Stern, H. S., Dunson, D. B., Vehtari,
A., and Rubin, D. B. (2013). \emph{Bayesian Data Analysis.} Chapman & Hall/CRC
Press, London, third edition. (Ch. 6)
diff --git a/tests/testthat/_snaps/mcmc-traces/mcmc-rank-ecdf-default.svg b/tests/testthat/_snaps/mcmc-traces/mcmc-rank-ecdf-default.svg
new file mode 100644
index 00000000..dc312b96
--- /dev/null
+++ b/tests/testthat/_snaps/mcmc-traces/mcmc-rank-ecdf-default.svg
@@ -0,0 +1,130 @@
+
+
diff --git a/tests/testthat/_snaps/mcmc-traces/mcmc-rank-ecdf-no-diff.svg b/tests/testthat/_snaps/mcmc-traces/mcmc-rank-ecdf-no-diff.svg
new file mode 100644
index 00000000..8c58ac31
--- /dev/null
+++ b/tests/testthat/_snaps/mcmc-traces/mcmc-rank-ecdf-no-diff.svg
@@ -0,0 +1,130 @@
+
+
diff --git a/tests/testthat/_snaps/mcmc-traces/mcmc-rank-ecdf-one-param-no-diff.svg b/tests/testthat/_snaps/mcmc-traces/mcmc-rank-ecdf-one-param-no-diff.svg
new file mode 100644
index 00000000..f5bc0c0f
--- /dev/null
+++ b/tests/testthat/_snaps/mcmc-traces/mcmc-rank-ecdf-one-param-no-diff.svg
@@ -0,0 +1,72 @@
+
+
diff --git a/tests/testthat/_snaps/mcmc-traces/mcmc-rank-ecdf-one-parameter.svg b/tests/testthat/_snaps/mcmc-traces/mcmc-rank-ecdf-one-parameter.svg
new file mode 100644
index 00000000..d41432e1
--- /dev/null
+++ b/tests/testthat/_snaps/mcmc-traces/mcmc-rank-ecdf-one-parameter.svg
@@ -0,0 +1,72 @@
+
+
diff --git a/tests/testthat/_snaps/ppc-distributions/ppc-pit-ecdf-default.svg b/tests/testthat/_snaps/ppc-distributions/ppc-pit-ecdf-default.svg
new file mode 100644
index 00000000..d82c3428
--- /dev/null
+++ b/tests/testthat/_snaps/ppc-distributions/ppc-pit-ecdf-default.svg
@@ -0,0 +1,53 @@
+
+
diff --git a/tests/testthat/_snaps/ppc-distributions/ppc-pit-ecdf-grouped-default.svg b/tests/testthat/_snaps/ppc-distributions/ppc-pit-ecdf-grouped-default.svg
new file mode 100644
index 00000000..261e6496
--- /dev/null
+++ b/tests/testthat/_snaps/ppc-distributions/ppc-pit-ecdf-grouped-default.svg
@@ -0,0 +1,155 @@
+
+
diff --git a/tests/testthat/_snaps/ppc-distributions/ppc-pit-ecdf-grouped-no-diff.svg b/tests/testthat/_snaps/ppc-distributions/ppc-pit-ecdf-grouped-no-diff.svg
new file mode 100644
index 00000000..eb737933
--- /dev/null
+++ b/tests/testthat/_snaps/ppc-distributions/ppc-pit-ecdf-grouped-no-diff.svg
@@ -0,0 +1,163 @@
+
+
diff --git a/tests/testthat/_snaps/ppc-distributions/ppc-pit-ecdf-no-diff.svg b/tests/testthat/_snaps/ppc-distributions/ppc-pit-ecdf-no-diff.svg
new file mode 100644
index 00000000..258e9f55
--- /dev/null
+++ b/tests/testthat/_snaps/ppc-distributions/ppc-pit-ecdf-no-diff.svg
@@ -0,0 +1,57 @@
+
+
diff --git a/tests/testthat/test-helpers-ppc.R b/tests/testthat/test-helpers-ppc.R
index 032b65c2..f1e740cb 100644
--- a/tests/testthat/test-helpers-ppc.R
+++ b/tests/testthat/test-helpers-ppc.R
@@ -2,6 +2,7 @@ library(bayesplot)
context("PPC: misc. functions")
source(test_path("data-for-ppc-tests.R"))
+source(test_path("data-for-mcmc-tests.R"))
# melt_predictions ---------------------------------------------------------------
expect_molten_yrep <- function(yrep) {
@@ -54,6 +55,7 @@ test_that("is_whole_number works correctly", {
c(rep(TRUE, 3), FALSE))
expect_true(!is_whole_number("1"))
})
+
test_that("all_counts works correctly", {
expect_true(all_counts(1))
expect_true(all_counts(0:5))
@@ -64,3 +66,55 @@ test_that("all_counts works correctly", {
expect_false(all_counts(c(-1, 2)))
})
+# adjust_gamma
+
+test_that("adjust_gamma works with different adjustment methods", {
+ set.seed(8420)
+
+ expect_equal(
+ adjust_gamma(N = 100, K = 100, L = 1, prob = .99),
+ adjust_gamma(N = 100, K = 100, L = 1, prob = .99, interpolate_adj = TRUE),
+ tolerance = 1e-3
+ )
+
+ expect_equal(
+ adjust_gamma(N = 100, K = 100, L = 4, prob = .99, M = 1000),
+ adjust_gamma(N = 100, K = 100, L = 4, prob = .99, interpolate_adj = TRUE),
+ tolerance = 1e-3
+ )
+
+ set.seed(NULL)
+})
+
+
+# get_interpolation_values ------------------------------------------------
+test_that("get_interpolation_values catches impossible values", {
+ expect_error(
+ get_interpolation_values(1000, 1000, 0, .5),
+ "No precomputed values to interpolate from for 'L' = 0."
+ )
+ expect_error(
+ get_interpolation_values(1000, 1000, 4, 0),
+ "No precomputed values to interpolate from for 'prob' = 0."
+ )
+ expect_error(
+ get_interpolation_values(1000, 0, 4, .95),
+ "No precomputed values available for interpolation for 'K' = 0."
+ )
+ expect_error(
+ get_interpolation_values(0, 1000, 4, .95),
+ "No precomputed values to interpolate from for sample length of 0."
+ )
+})
+
+# ecdf_intervals ---------------------------------------------------------
+test_that("ecdf_intervals returns right dimensions and values", {
+ lims <- ecdf_intervals(.0001, N = 100, K = 100, L = 1)
+ expect_named(lims, c("lower", "upper"))
+ expect_length(lims$upper, 101)
+ expect_length(lims$lower, 101)
+ expect_equal(min(lims$upper), 0)
+ expect_equal(max(lims$upper), 100)
+ expect_equal(min(lims$lower), 0)
+ expect_equal(max(lims$lower), 100)
+})
diff --git a/tests/testthat/test-mcmc-traces.R b/tests/testthat/test-mcmc-traces.R
index 82dcec1e..34489c9e 100644
--- a/tests/testthat/test-mcmc-traces.R
+++ b/tests/testthat/test-mcmc-traces.R
@@ -33,6 +33,17 @@ test_that("mcmc_trace_highlight throws error if highlight > number of chains", {
expect_error(mcmc_trace_highlight(arr, pars = "sigma", highlight = 7), "'highlight' is 7")
})
+test_that("mcmc_rank_ecdf returns a ggplot object", {
+ expect_gg(mcmc_rank_ecdf(arr, regex_pars = c("beta", "x\\:")))
+ expect_gg(mcmc_rank_ecdf(dframe_multiple_chains, interpolate_adj = FALSE))
+})
+
+test_that("mcmc_rank_ecdf throws error if 1 chain but multiple chains required", {
+ expect_error(mcmc_rank_ecdf(mat), "requires multiple chains")
+ expect_error(mcmc_rank_ecdf(dframe), "requires multiple chains")
+ expect_error(mcmc_rank_ecdf(arr1chain), "requires multiple chains")
+})
+
# options -----------------------------------------------------------------
test_that("mcmc_trace options work", {
expect_gg(g1 <- mcmc_trace(arr, regex_pars = "beta", window = c(5, 10)))
@@ -47,6 +58,12 @@ test_that("mcmc_trace options work", {
expect_error(mcmc_trace(arr, n_warmup = 50, iter1 = 20))
})
+test_that("mcmc_rank_ecdf options work", {
+ expect_error(
+ mcmc_rank_ecdf(dframe_multiple_chains, interpolate_adj = TRUE),
+ "No precomputed values"
+ )
+})
# displaying divergences in traceplot -------------------------------------
test_that("mcmc_trace 'np' argument works", {
@@ -207,6 +224,34 @@ test_that("mcmc_trace_highlight renders correctly", {
vdiffr::expect_doppelganger("mcmc_trace_highlight (alpha)", p_alpha)
})
+test_that("mcmc_rank_ecdf renders correctly", {
+ testthat::skip_on_cran()
+ testthat::skip_if_not_installed("vdiffr")
+
+ p_base <- mcmc_rank_ecdf(vdiff_dframe_chains, pars = c("V1", "V2"))
+ p_one_param <- mcmc_rank_ecdf(vdiff_dframe_chains, pars = "V1")
+
+ p_no_diff <- mcmc_rank_ecdf(
+ vdiff_dframe_chains,
+ pars = c("V1", "V2"),
+ plot_diff = FALSE
+ )
+
+ p_no_diff_one_param <- mcmc_rank_ecdf(
+ vdiff_dframe_chains,
+ pars = "V1",
+ plot_diff = FALSE
+ )
+
+ vdiffr::expect_doppelganger("mcmc_rank_ecdf (default)", p_base)
+ vdiffr::expect_doppelganger("mcmc_rank_ecdf (one parameter)", p_one_param)
+ vdiffr::expect_doppelganger("mcmc_rank_ecdf (no diff)", p_no_diff)
+ vdiffr::expect_doppelganger(
+ "mcmc_rank_ecdf (one param no diff)",
+ p_no_diff_one_param
+ )
+})
+
test_that("mcmc_trace with 'np' renders correctly", {
testthat::skip_on_cran()
testthat::skip_if_not_installed("vdiffr")
diff --git a/tests/testthat/test-ppc-distributions.R b/tests/testthat/test-ppc-distributions.R
index 047e59cf..7f0be3e8 100644
--- a/tests/testthat/test-ppc-distributions.R
+++ b/tests/testthat/test-ppc-distributions.R
@@ -58,6 +58,11 @@ test_that("ppc_dens,pp_hist,ppc_freqpoly,ppc_boxplot return ggplot objects", {
expect_gg(ppd_boxplot(yrep2, notch = FALSE))
})
+test_that("ppc_pit_ecdf, ppc_pit_ecdf_grouped returns a ggplot object", {
+ expect_gg(ppc_pit_ecdf(y, yrep, interpolate_adj = FALSE))
+ expect_gg(ppc_pit_ecdf_grouped(y, yrep, group = group, interpolate_adj = FALSE))
+})
+
test_that("ppc_freqpoly_grouped returns a ggplot object", {
expect_gg(ppc_freqpoly_grouped(y, yrep[1:4, ], group))
expect_gg(ppc_freqpoly_grouped(y, yrep[1:4, ], group,
@@ -286,3 +291,18 @@ test_that("ppc_violin_grouped renders correctly", {
set.seed(seed = NULL)
})
+
+test_that("ppc_pit_ecdf, ppc_pit_ecdf_grouped renders correctly", {
+ testthat::skip_on_cran()
+ testthat::skip_if_not_installed("vdiffr")
+
+ p_base <- ppc_pit_ecdf(y, yrep, interpolate_adj = FALSE)
+ g_base <- ppc_pit_ecdf_grouped(y, yrep, group = group, interpolate_adj = FALSE)
+ p_diff <- ppc_pit_ecdf(y, yrep, plot_diff = FALSE, interpolate_adj = FALSE)
+ g_diff <- ppc_pit_ecdf_grouped(y, yrep, plot_diff = FALSE, group = group, interpolate_adj = FALSE)
+
+ vdiffr::expect_doppelganger("ppc_pit_ecdf (default)", p_base)
+ vdiffr::expect_doppelganger("ppc_pit_ecdf_grouped (default)", g_base)
+ vdiffr::expect_doppelganger("ppc_pit_ecdf (no diff)", p_diff)
+ vdiffr::expect_doppelganger("ppc_pit_ecdf_grouped (no diff)", g_diff)
+})