diff --git a/NAMESPACE b/NAMESPACE index 1717dd56..86683f2a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -75,6 +75,7 @@ export(mcmc_nuts_treedepth) export(mcmc_pairs) export(mcmc_parcoord) export(mcmc_parcoord_data) +export(mcmc_rank_ecdf) export(mcmc_rank_hist) export(mcmc_rank_overlay) export(mcmc_recover_hist) @@ -131,6 +132,8 @@ export(ppc_loo_pit_data) export(ppc_loo_pit_overlay) export(ppc_loo_pit_qq) export(ppc_loo_ribbon) +export(ppc_pit_ecdf) +export(ppc_pit_ecdf_grouped) export(ppc_ribbon) export(ppc_ribbon_data) export(ppc_ribbon_grouped) diff --git a/R/helpers-ppc.R b/R/helpers-ppc.R index 71c63e0e..8d0f4fed 100644 --- a/R/helpers-ppc.R +++ b/R/helpers-ppc.R @@ -98,6 +98,32 @@ validate_predictions <- function(predictions, n_obs = NULL) { } +#' Validate PIT +#' +#' Checks that `pit` is numeric, doesn't have any NAs, and is either a vector, +#' or a 1-D array with values in [0,1]. +#' +#' @param pit The 'pit' object from the user. +#' @return Either throws an error or returns a numeric vector. +#' @noRd +validate_pit <- function(pit) { + stopifnot(is.numeric(pit)) + + if (!is_vector_or_1Darray(pit)) { + abort("'pit' must be a vector or 1D array.") + } + + if (any(pit > 1) || any(pit < 0)) { + abort("'pit' must only contain values between 0 and 1.") + } + + if (anyNA(pit)) { + abort("NAs not allowed in 'pit'.") + } + + unname(pit) +} + #' Validate group #' #' Checks that grouping variable has correct number of observations and is @@ -267,6 +293,298 @@ melt_and_stack <- function(y, yrep) { } +#' Obtain the coverage parameter for simultaneous confidence bands for ECDFs +#' +#' @param N Length of sample. +#' @param L Number of chains. Used for MCMC, defaults to 1 for ppc. +#' @param K Number of equally spaced evaluation points (1:K / K). Defaults to N. +#' @param prob Desired simultaneous coverage (0,1). +#' @param M number of simulations to run, if simulation method is used. +#' @param interpolate_adj Boolean defining whether to interpolate the confidence +#' bands from precomputed values. Interpolation provides a faster plot with the +#' trade-off of possible loss of accuracy. +#' @return The adjusted coverage parameter yielding the desired simultaneous +#' coverage of the ECDF traces. +#' @noRd +adjust_gamma <- function(N, + L = 1, + K = N, + prob = 0.99, + M = 1000, + interpolate_adj = FALSE) { + if (! all_counts(c(K, N, L))) { + abort("Parameters 'N', 'L' and 'K' must be positive integers.") + } + if (prob >= 1 || prob <= 0) { + abort("Value of 'prob' must be in (0,1).") + } + if (interpolate_adj == TRUE) { + gamma <- interpolate_gamma(N = N, K = K, prob = prob, L = L) + } else if (L == 1) { + gamma <- adjust_gamma_optimize(N = N, K = K, prob = prob) + } else { + gamma <- adjust_gamma_simulate(N = N, L = L, K = K, prob = prob, M = M) + } + gamma +} + +#' Adjust coverage parameter for single sample using the optimization method. +#' @param N Length of sample. +#' @param K Number of equally spaced evaluation points (1:K / K). Defaults to N. +#' @param prob Desired simultaneous coverage (0,1). +#' @return The adjusted coverage parameter yielding the desired simultaneous +#' coverage of the ECDF traces. +#' @noRd +adjust_gamma_optimize <- function(N, K, prob) { + target <- function(gamma, prob, N, K) { + z <- 1:(K - 1) / K + z1 <- c(0, z) + z2 <- c(z, 1) + + # pre-compute quantiles and use symmetry for increased efficiency. + x2_lower <- qbinom(gamma / 2, N, z2) + x2_upper <- c(N - rev(x2_lower)[2:K], 1) + + # Compute the total probability of trajectories inside the confidence + # intervals. Initialize the set and corresponding probabilities known + # to be 0 and 1 for the starting value z1 = 0. + x1 <- 0 + p_int <- 1 + for (i in seq_along(z1)) { + p_int <- p_interior( + p_int = p_int, + x1 = x1, + x2 = x2_lower[i]: x2_upper[i], + z1 = z1[i], + z2 = z2[i], + N = N + ) + x1 <- x2_lower[i]:x2_upper[i] + } + return(abs(prob - sum(p_int))) + } + optimize(target, c(0, 1 - prob), prob = prob, N = N, K = K)$minimum +} + +#' Adjust coverage parameter for multiple chains using the simulation method. +#' In short, 'M' simulations of 'L' standard uniform chains are run and the +#' confidence bands are set to cover 100 * 'prob' % of these simulations. +#' @param N Length of sample. +#' @param L Number of chains. Used for MCMC, defaults to 1 for ppc. +#' @param K Number of equally spaced evaluation points (1:K / K). Defaults to N. +#' @param prob Desired simultaneous coverage (0,1). +#' @param M number of simulations to run. +#' @return The adjusted coverage parameter yielding the desired simultaneous +#' coverage of the ECDF traces. +#' @noRd +adjust_gamma_simulate <- function(N, L, K, prob, M) { + gamma <- numeric(M) + z <- (1:(K - 1)) / K # Rank ECDF evaluation points. + n <- N * (L - 1) + k <- floor(z * N * L) + for (m in seq_len(M)) { + u <- u_scale(replicate(L, runif(N))) # Fractional ranks of sample chains + scaled_ecdfs <- apply(outer(u, z, "<="), c(2, 3), sum) + # Find the smalles marginal probability of the simulation run + gamma[m] <- 2 * min( + apply( + scaled_ecdfs, 1, phyper, m = N, n = n, k = k + ), + apply( + scaled_ecdfs - 1, 1, phyper, m = N, n = n, k = k, lower.tail = FALSE + ) + ) + } + alpha_quantile(gamma, 1 - prob) +} + +#' Approximate the required adjustement to obtain simultaneous confidence bands +#' of an ECDF plot with interpolation with regards to N and K from precomputed +#' values for a fixed set of prob and L values. +#' @param N Length of sample. +#' @param L Number of chains. Used for MCMC, defaults to 1 for ppc. +#' @param prob Desired simultaneous coverage (0,1). +#' @param K Number of equally spaced evaluation points (1:K / K). Defaults to N. +#' @return The approximated adjusted coverage parameter yielding the desired +#' simultaneous coverage of the ECDF traces. +#' @noRd +interpolate_gamma <- function(N, K, prob, L) { + # Find the precomputed values ueful for the interpolation task. + vals <- get_interpolation_values(N, K, L, prob) + # Largest lower bound and smalles upper bound for N among precomputed values. + N_lb <- max(vals[vals$N <= N, ]$N) + N_ub <- min(vals[vals$N >= N, ]$N) + # Approximate largest lower bound and smalles upper bound for gamma. + log_gamma_lb <- approx( + x = log(vals[vals$N == N_lb, ]$K), + y = log(vals[vals$N == N_lb, ]$val), + xout = log(K) + )$y + log_gamma_ub <- approx( + x = log(vals[vals$N == N_ub, ]$K), + y = log(vals[vals$N == N_ub, ]$val), + xout = log(K) + )$y + if (N_ub == N_lb) { + log_gamma_approx <- log_gamma_lb + } else { + # Approximate log_gamma for the desired value of N. + log_gamma_approx <- approx( + x = log(c(N_lb, N_ub)), + y = c(log_gamma_lb, log_gamma_ub), + xout = log(N) + )$y + } + exp(log_gamma_approx) +} + +#' Filter the precomputed values useful for the interpolation task given to +#' interpolate_gamma. Check, if the task is possible with the availabel data. +#' @param N Length of sample. +#' @param K Number of equally spaced evaluation points (1:K / K). Defaults to N. +#' @param L Number of chains. Used for MCMC, defaults to 1 for ppc. +#' @param prob Desired simultaneous coverage (0,1). +#' @return A data.frame containing the relevant precomputed values. +#' @noRd +get_interpolation_values <- function(N, K, L, prob) { + for (dim in c("L", "prob")) { + if (all(get(dim) != bayesplot:::gamma_adj[, dim])) { + stop(paste( + "No precomputed values to interpolate from for '", dim, "' = ", + get(dim), + ".\n", + "Values of '", dim, "' available for interpolation: ", + paste(unique(bayesplot:::gamma_adj[, dim]), collapse = ", "), + ".", + sep = "" + )) + } + } + vals <- bayesplot:::gamma_adj[ + bayesplot:::gamma_adj$L == L & bayesplot:::gamma_adj$prob == prob, + ] + if (N > max(vals$N)) { + stop(paste( + "No precomputed values to interpolate from for sample length of ", + N, + ".\n", + "Please use a subsample of length ", + max(vals$N), + " or smaller, or consider setting 'interpolate_adj' = FALSE.", + sep = "" + )) + } + if (N < min(vals$N)) { + stop(paste( + "No precomputed values to interpolate from for sample length of ", + N, + ".\n", + "Please use a subsample of length ", + min(vals$N), + " or larger, or consider setting 'interpolate_adj' = FALSE.", + sep = "" + )) + } + if (K > max(vals[vals$N <= N, ]$K)) { + stop(paste( + "No precomputed values available for interpolation for 'K' = ", + K, + ".\n", + "Try either setting a value of 'K' <= ", + max(vals[vals$N <= N, ]$K), + "or 'interpolate_adj' = FALSE.", + sep = "" + )) + } + if (K < min(vals[vals$N <= N, ]$K)) { + stop(paste( + "No precomputed values available for interpolation for 'K' = ", + K, + ".\n", + "Try either setting a value of 'K' >= ", + min(vals[vals$N <= N, ]$K), + "or 'interpolate_adj' = FALSE.", + sep = "" + )) + } + vals +} + +#' A helper function for 'adjust_gamma_optimize' defining the probability that +#' a scaled ECDF stays within the supplied bounds between two evaluation points. +#' @param p_int For each value in x1, the probability that the ECDF has stayed +#' within the bounds until z1 and takes the value in x1 at z1. +#' @param x1 Vector of scaled ECDF values at the left end of the interval, z1. +#' @param x2 Vector of scaled ECDF values at the right end of the interval, z2. +#' @param z1 Left evaluation point in [0,1] +#' @param z2 Right evaluation point in [0,1] with z2 > z1. +#' @param N Total number of values in the sample. +#' @return A vector containing the probability to transitioning from the values +#' in x1 to each of the values in x2 weighted by the probabilities in p_int. +#' @noRd +p_interior <- function(p_int, x1, x2, z1, z2, N) { + # Ratio between the length of the evaluation interval and the total length of + # the interval left to cover by ECDF. + z_tilde <- (z2 - z1) / (1 - z1) + # Number of samples left to cover by ECDF. + N_tilde <- rep(N - x1, each = length(x2)) + + p_int <- rep(p_int, each = length(x2)) + x_diff <- outer(x2, x1, "-") + # Pobability of each transition from a value in x1 to a value in x2. + p_x2_int <- p_int * dbinom(x_diff, N_tilde, z_tilde) + rowSums(p_x2_int) +} + +#' A helper function for 'adjust_alpha_simulate' +#' 100 * `alpha` percent of the trials in 'gamma' are allowed to be rejected. +#' In case of ties, return the largest value dominating at most +#' 100 * (alpha + tol) percent of the values. +#' @noRd +alpha_quantile <- function(gamma, alpha, tol = 0.001) { + a <- unname(quantile(gamma, probs = alpha)) + a_tol <- unname(quantile(gamma, probs = alpha + tol)) + if (a == a_tol) { + if (min(gamma) < a) { + # take the largest value that doesn't exceed the tolerance. + a <- max(gamma[gamma < a]) + } + } + a +} + +#' Compute simultaneous confidence intervals with the given adjusted coverage +#' parameter gamma. +#' @param gamma Adjusted coverage parameter for the marginal distribution +#' (binomial for PIT values and hypergeometric for rank transformed chains). +#' @param N Sample length. +#' @param K Number of uniformly spaced evaluation points. +#' @param L Number of MCMC-chains. (1 for ppc) +#' @return A list with upper and lower confidence interval values at the +#' evaluation points. +#' @noRd +ecdf_intervals <- function(gamma, N, K, L = 1) { + lims <- list() + z <- seq(0, 1, length.out = K + 1) + if (L == 1) { + lims$lower <- qbinom(gamma / 2, N, z) + lims$upper <- qbinom(1 - gamma / 2, N, z) + } else { + n <- N * (L - 1) + k <- floor(z * L * N) + lims$lower <- qhyper(gamma / 2, N, n, k) + lims$upper <- qhyper(1 - gamma / 2, N, n, k) + } + lims +} + +#' Helper for 'adjust_gamma_simulate` +#' Transforms observations in 'x' into their corresponding fractional ranks. +#' @noRd +u_scale <- function(x) { + array(rank(x) / length(x), dim = dim(x), dimnames = dimnames(x)) +} + # labels ---------------------------------------------------------------- create_rep_ids <- function(ids) paste('italic(y)[rep] (', ids, ")") y_label <- function() expression(italic(y)) diff --git a/R/mcmc-traces.R b/R/mcmc-traces.R index 285c7b37..ce65db93 100644 --- a/R/mcmc-traces.R +++ b/R/mcmc-traces.R @@ -10,6 +10,7 @@ #' @template args-regex_pars #' @template args-transformations #' @template args-facet_args +#' @template args-pit-ecdf #' @param ... Currently ignored. #' @param size An optional value to override the default line size #' for `mcmc_trace()` or the default point size for `mcmc_trace_highlight()`. @@ -63,9 +64,20 @@ #' Ranks from `mcmc_rank_hist()` are plotted using overlaid lines in a #' single panel. #' } +#' \item{`mcmc_rank_ecdf()`}{ +#' The ECDFs of the ranks from `mcmc_rank_hist()` are plotted with the +#' simultaneous confidence bands with a coverage determined by `prob`, +#' that is, bands that completely cover all of the rank ECDFs with the +#' probability 'prob'. +#' By default, the difference between the observed rank ECDFs and the +#' theoretical expectation for samples originating from the same distribution +#' is drawn. +#' See Säilynoja et al. (2021) for details. +#' } #' } #' #' @template reference-improved-rhat +#' @template reference-uniformity-test #' @examples #' # some parameter draws to use for demonstration #' x <- example_mcmc_draws(chains = 4, params = 6) @@ -105,6 +117,11 @@ #' mcmc_rank_hist(x, pars = c("alpha", "sigma"), ref_line = TRUE) #' mcmc_rank_overlay(x, "alpha") #' +#' # ECDF difference plots of the ranking of MCMC samples between chains. +#' # Provide 99% simultaneous confidence intervals for the chains sampling from +#' # the same distribution. +#' mcmc_rank_ecdf(x, prob = 0.99) +#' #' \dontrun{ #' # parse facet label text #' color_scheme_set("purple") @@ -421,6 +438,112 @@ mcmc_rank_hist <- function(x, labs(x = "Rank") } +#' @rdname MCMC-traces +#' @param prob For `mcmc_rank_ecdf()`, a value between 0 and 1 +#' specifying the desired simultaneous confidence of the confidence bands to be +#' drawn for the rank ECDF plots. +#' @param plot_diff For `mcmc_rank_ecdf()`, a boolean specifying it the +#' difference between the observed rank ECDFs and the theoretical expectation +#' should be drawn instead of the unmodified rank ECDF plots. +#' @export +mcmc_rank_ecdf <- + function(x, + pars = character(), + regex_pars = character(), + transformations = list(), + ..., + K = NULL, + facet_args = list(), + prob = 0.99, + plot_diff = TRUE, + interpolate_adj = TRUE) { + check_ignored_arguments(..., + ok_args = c("K", "pit", "prob", "plot_diff", "interpolate_adj", "M") + ) + data <- mcmc_trace_data( + x, + pars = pars, + regex_pars = regex_pars, + transformations = transformations, + highlight = 1 + ) + n_iter <- unique(data$n_iterations) + n_chain <- unique(data$n_chains) + n_param <- unique(data$n_parameters) + + x <- if (is.null(K)) { + 0:n_iter / n_iter + } else { + 0:K / K + } + gamma <- adjust_gamma( + N = n_iter, + L = n_chain, + K = if (is.null(K)) { + n_iter + } else { + K + }, + prob = prob, + interpolate_adj = interpolate_adj, + ... + ) + lims <- ecdf_intervals( + gamma = gamma, + N = n_iter, + K = if (is.null(K)) { + n_iter + } else { + K + }, + L <- n_chain + ) + data_lim <- data.frame( + upper = lims$upper / n_iter - (plot_diff == TRUE) * x, + lower = lims$lower / n_iter - (plot_diff == TRUE) * x, + x = x + ) + data <- data %>% + group_by(parameter, chain) %>% + dplyr::group_map(~ data.frame( + parameter = .y[1], + chain = .y[2], + ecdf_value = ecdf(.x$value_rank / (n_iter * n_chain))(x) - + (plot_diff == TRUE) * x, + x = x + )) %>% + dplyr::bind_rows() + + mapping <- aes_( + x = ~x, + y = ~ecdf_value, + color = ~chain, + group = ~chain + ) + + scale_color <- scale_color_manual("Chain", values = chain_colors(n_chain)) + + facet_call <- NULL + if (n_param == 1) { + facet_call <- ylab(levels(data$parameter)) + } else { + facet_args$facets <- ~parameter + facet_args$scales <- facet_args$scales %||% "free" + facet_call <- do.call("facet_wrap", facet_args) + } + + ggplot() + + geom_step(data = data_lim, aes_(x = ~x, y = ~upper), show.legend = FALSE) + + geom_step(data = data_lim, aes_(x = ~x, y = ~lower), show.legend = FALSE) + + geom_step(mapping, data) + + bayesplot_theme_get() + + scale_color + + facet_call + + scale_x_continuous(breaks = pretty) + + legend_move(ifelse(n_chain > 1, "right", "none")) + + xaxis_title(FALSE) + + yaxis_title(on = n_param == 1) +} #' @rdname MCMC-traces #' @export @@ -604,7 +727,6 @@ mcmc_trace_data <- function(x, yaxis_title(on = n_param == 1) } - chain_colors <- function(n) { all_clrs <- unlist(color_scheme_get()) clrs <- switch( diff --git a/R/ppc-distributions.R b/R/ppc-distributions.R index cadb62f1..237586b2 100644 --- a/R/ppc-distributions.R +++ b/R/ppc-distributions.R @@ -12,6 +12,7 @@ #' @template args-hist #' @template args-hist-freq #' @template args-dens +#' @template args-pit-ecdf #' @param size,alpha Passed to the appropriate geom to control the appearance of #' the predictive distributions. #' @param ... Currently unused. @@ -47,9 +48,16 @@ #' quantiles. `y` is overlaid on the plot either as a violin, points, or #' both, depending on the `y_draw` argument. #' } +#' \item{`ppc_pit_ecdf()`}{ +#' The ECDF of the empirical PIT values of `y` computed with respect to the +#' corresponding `yrep` values. `100 * prob`% central simultaneous confidence +#' intervals are provided to asses if ´y´ and ´yrep´ originate from the same +#' distribution. The PIT values can also be provided directly as `pit`. +#' See Säilynoja et al. (2021) for more details.} #' } #' #' @template reference-vis-paper +#' @template reference-uniformity-test #' @templateVar bdaRef (Ch. 6) #' @template reference-bda #' @@ -59,16 +67,19 @@ #' yrep <- example_yrep_draws() #' dim(yrep) #' ppc_dens_overlay(y, yrep[1:25, ]) -#' #' \donttest{ #' # ppc_ecdf_overlay with continuous data (set discrete=TRUE if discrete data) #' ppc_ecdf_overlay(y, yrep[sample(nrow(yrep), 25), ]) #' } +#' # ECDF and ECDF difference plot of the PIT values of ´y´ compared to ´yrep +#' # with 99% simultaneous confidence bands. +#' ppc_pit_ecdf(y, yrep, prob = 0.99, plot_diff = FALSE, interpolate_adj = FALSE) +#' ppc_pit_ecdf(y, yrep, prob = 0.99, interpolate_adj = FALSE) +#' #' #' # for ppc_hist,dens,freqpoly,boxplot definitely use a subset yrep rows so #' # only a few (instead of nrow(yrep)) histograms are plotted #' ppc_hist(y, yrep[1:8, ]) -#' #' \donttest{ #' color_scheme_set("red") #' ppc_boxplot(y, yrep[1:8, ]) @@ -79,13 +90,13 @@ #' } #' #' # frequency polygons -#' ppc_freqpoly(y, yrep[1:3,], alpha = 0.1, size = 1, binwidth = 5) +#' ppc_freqpoly(y, yrep[1:3, ], alpha = 0.1, size = 1, binwidth = 5) #' #' group <- example_group_data() -#' ppc_freqpoly_grouped(y, yrep[1:3,], group) + yaxis_text() +#' ppc_freqpoly_grouped(y, yrep[1:3, ], group) + yaxis_text() #' \donttest{ #' # if groups are different sizes then the 'freq' argument can be useful -#' ppc_freqpoly_grouped(y, yrep[1:3,], group, freq = FALSE) + yaxis_text() +#' ppc_freqpoly_grouped(y, yrep[1:3, ], group, freq = FALSE) + yaxis_text() #' } #' #' # density and distribution overlays by group @@ -93,6 +104,10 @@ #' #' ppc_ecdf_overlay_grouped(y, yrep[1:25, ], group = group) #' +#' # ECDF difference plots of the PIT values by group +#' # with 99% simultaneous confidence bands. +#' ppc_pit_ecdf_grouped(y, yrep, group=group, prob=0.99, interpolate_adj=FALSE) +#' #' # don't need to only use small number of rows for ppc_violin_grouped #' # (as it pools yrep draws within groups) #' color_scheme_set("gray") @@ -102,8 +117,10 @@ #' #' # change how y is drawn #' ppc_violin_grouped(y, yrep, group, alpha = 0, y_draw = "points", y_size = 1.5) -#' ppc_violin_grouped(y, yrep, group, alpha = 0, y_draw = "both", -#' y_size = 1.5, y_alpha = 0.5, y_jitter = 0.33) +#' ppc_violin_grouped(y, yrep, group, +#' alpha = 0, y_draw = "both", +#' y_size = 1.5, y_alpha = 0.5, y_jitter = 0.33 +#' ) #' } NULL @@ -139,9 +156,9 @@ ppc_dens_overlay <- check_ignored_arguments(...) data <- ppc_data(y, yrep) - ggplot(data, mapping = aes_(x = ~ value)) + + ggplot(data, mapping = aes_(x = ~value)) + overlay_ppd_densities( - mapping = aes_(group = ~ rep_id, color = "yrep"), + mapping = aes_(group = ~rep_id, color = "yrep"), data = function(x) dplyr::filter(x, !.data$is_y), size = size, alpha = alpha, @@ -175,19 +192,17 @@ ppc_dens_overlay <- #' @rdname PPC-distributions #' @export #' @template args-density-controls -ppc_dens_overlay_grouped <- function( - y, - yrep, - group, - ..., - size = 0.25, - alpha = 0.7, - trim = FALSE, - bw = "nrd0", - adjust = 1, - kernel = "gaussian", - n_dens = 1024 -) { +ppc_dens_overlay_grouped <- function(y, + yrep, + group, + ..., + size = 0.25, + alpha = 0.7, + trim = FALSE, + bw = "nrd0", + adjust = 1, + kernel = "gaussian", + n_dens = 1024) { check_ignored_arguments(...) p_overlay <- ppc_dens_overlay( @@ -221,20 +236,18 @@ ppc_dens_overlay_grouped <- function( #' `TRUE` then `geom="step"` is used. #' @param pad A logical scalar passed to [ggplot2::stat_ecdf()]. #' -ppc_ecdf_overlay <- function( - y, - yrep, - ..., - discrete = FALSE, - pad = TRUE, - size = 0.25, - alpha = 0.7 -) { +ppc_ecdf_overlay <- function(y, + yrep, + ..., + discrete = FALSE, + pad = TRUE, + size = 0.25, + alpha = 0.7) { check_ignored_arguments(...) data <- ppc_data(y, yrep) ggplot(data) + - aes_(x = ~ value) + + aes_(x = ~value) + hline_at( 0.5, size = 0.1, @@ -249,7 +262,7 @@ ppc_ecdf_overlay <- function( ) + stat_ecdf( data = function(x) dplyr::filter(x, !.data$is_y), - mapping = aes_(group = ~ rep_id, color = "yrep"), + mapping = aes_(group = ~rep_id, color = "yrep"), geom = if (discrete) "step" else "line", size = size, alpha = alpha, @@ -262,25 +275,23 @@ ppc_ecdf_overlay <- function( size = 1, pad = pad ) + - scale_color_ppc() + - scale_y_continuous(breaks = c(0, 0.5, 1)) + - bayesplot_theme_get() + - yaxis_title(FALSE) + - xaxis_title(FALSE) - } + scale_color_ppc() + + scale_y_continuous(breaks = c(0, 0.5, 1)) + + bayesplot_theme_get() + + yaxis_title(FALSE) + + xaxis_title(FALSE) +} #' @export #' @rdname PPC-distributions -ppc_ecdf_overlay_grouped <- function( - y, - yrep, - group, - ..., - discrete = FALSE, - pad = TRUE, - size = 0.25, - alpha = 0.7 -) { +ppc_ecdf_overlay_grouped <- function(y, + yrep, + group, + ..., + discrete = FALSE, + pad = TRUE, + size = 0.25, + alpha = 0.7) { check_ignored_arguments(...) p_overlay <- ppc_ecdf_overlay( @@ -315,9 +326,9 @@ ppc_dens <- check_ignored_arguments(...) data <- ppc_data(y, yrep) ggplot(data, mapping = aes_( - x = ~ value, - fill = ~ is_y_label, - color = ~ is_y_label + x = ~value, + fill = ~is_y_label, + color = ~is_y_label )) + geom_density( size = size, @@ -354,8 +365,8 @@ ppc_hist <- data <- ppc_data(y, yrep) ggplot(data, mapping = set_hist_aes( freq = freq, - fill = ~ is_y_label, - color = ~ is_y_label + fill = ~is_y_label, + color = ~is_y_label )) + geom_histogram( size = 0.25, @@ -388,7 +399,6 @@ ppc_freqpoly <- freq = TRUE, size = 0.5, alpha = 1) { - dots <- list(...) if (!from_grouped(dots)) { check_ignored_arguments(...) @@ -398,8 +408,8 @@ ppc_freqpoly <- data <- ppc_data(y, yrep, group = dots$group) ggplot(data, mapping = set_hist_aes( freq = freq, - fill = ~ is_y_label, - color = ~ is_y_label + fill = ~is_y_label, + color = ~is_y_label )) + geom_area( stat = "bin", @@ -465,19 +475,19 @@ ppc_boxplot <- data <- ppc_data(y, yrep) ggplot(data, mapping = aes_( - x = ~ rep_label, - y = ~ value, - fill = ~ is_y_label, - color = ~ is_y_label + x = ~rep_label, + y = ~value, + fill = ~is_y_label, + color = ~is_y_label )) + geom_boxplot( notch = notch, size = size, alpha = alpha, - outlier.alpha = 2/3, + outlier.alpha = 2 / 3, outlier.size = 1 ) + - scale_x_discrete(labels = function(x) parse(text=x)) + + scale_x_discrete(labels = function(x) parse(text = x)) + scale_fill_ppc() + scale_color_ppc() + bayesplot_theme_get() + @@ -520,7 +530,7 @@ ppc_violin_grouped <- y_points <- y_draw %in% c("points", "both") args_violin_yrep <- list( - data = function(x) dplyr::filter(x,!.data$is_y), + data = function(x) dplyr::filter(x, !.data$is_y), aes_(fill = "yrep", color = "yrep"), draw_quantiles = probs, alpha = alpha, @@ -553,7 +563,8 @@ ppc_violin_grouped <- layer_jitter_y <- do.call(jitter_y_func, args_jitter_y) data <- ppc_data(y, yrep, group) - ggplot(data, mapping = aes_(x = ~ group, y = ~ value)) + + + ggplot(data, mapping = aes_(x = ~group, y = ~value)) + layer_violin_yrep + layer_violin_y + layer_jitter_y + @@ -563,3 +574,157 @@ ppc_violin_grouped <- xaxis_title(FALSE) + bayesplot_theme_get() } + + +#' @export +#' @param pit An optional vector of probability integral transformed values for +#' which the ECDF is to be drawn. If NULL, PIT values are computed to ´y´ with +#' respect to the corresponding values in ´yrep´. +#' @rdname PPC-distributions +#' +ppc_pit_ecdf <- function(y, + yrep, + ..., + pit = NULL, + K = NULL, + prob = .99, + plot_diff = TRUE, + interpolate_adj = TRUE) { + check_ignored_arguments(..., + ok_args = c("K", "pit", "prob", "plot_diff", "interpolate_adj") + ) + + if (is.null(pit)) { + pit <- ppc_data(y, yrep) %>% + group_by(y_id) %>% + dplyr::group_map(~ mean(.x$value[.x$is_y] >= .x$value[!.x$is_y])) %>% + unlist() + if (is.null(K)) { + K <- min( + length(unique(ppc_data(y, yrep)$rep_id)) + 1, + length(pit) + ) + } + } else { + inform("'pit' specified so ignoring 'y', and 'yrep' if specified.") + pit <- validate_pit(pit) + if (is.null(K)) { + K <- length(pit) + } + } + N <- length(pit) + gamma <- adjust_gamma( + N = N, + K = K, + prob = prob, + interpolate_adj = interpolate_adj + ) + lims <- ecdf_intervals(gamma = gamma, N = N, K = K) + ggplot() + + aes_( + x = 1:K / K, + y = ecdf(pit)(seq(0, 1, length.out = K)) - (plot_diff == TRUE) * 1:K / K, + color = "y" + ) + + geom_step(show.legend = FALSE) + + geom_step(aes( + y = lims$upper[-1] / N - (plot_diff == TRUE) * 1:K / K, + color = "yrep" + ), show.legend = FALSE) + + geom_step(aes( + y = lims$lower[-1] / N - (plot_diff == TRUE) * 1:K / K, + color = "yrep" + ), show.legend = FALSE) + + yaxis_title(FALSE) + + xaxis_title(FALSE) + + yaxis_ticks(FALSE) + + scale_color_ppc() + + bayesplot_theme_get() +} + +#' @export +#' @rdname PPC-distributions +#' +ppc_pit_ecdf_grouped <- + function(y, + yrep, + group, + ..., + K = NULL, + pit = NULL, + prob = .99, + plot_diff = TRUE, + interpolate_adj = TRUE) { + check_ignored_arguments(..., + ok_args = c("K", "pit", "prob", "plot_diff", "interpolate_adj") + ) + + if (is.null(pit)) { + pit <- ppc_data(y, yrep, group) %>% + group_by(y_id) %>% + dplyr::group_map(~ mean(.x$value[.x$is_y] >= .x$value[!.x$is_y])) %>% + unlist() + if (is.null(K)) { + K <- length(unique(ppc_data(y, yrep)$rep_id)) + 1 + } + } else { + inform("'pit' specified so ignoring 'y' and 'yrep' if specified.") + pit <- validate_pit(pit) + } + N <- length(pit) + + gammas <- lapply(unique(group), function(g) { + N_g <- sum(group == g) + adjust_gamma( + N = N_g, + K = min(N_g, K), + prob = prob, + interpolate_adj = interpolate_adj + ) + }) + names(gammas) <- unique(group) + + data <- data.frame(pit = pit, group = group) %>% + group_by(group) %>% + dplyr::group_map(~ data.frame( + ecdf_value = ecdf(.x$pit)( + seq(0, 1, length.out = min(nrow(.x), K))), + group = .y[1], + lims_upper = ecdf_intervals( + gamma = gammas[[unlist(.y[1])]], + N = nrow(.x), + K = min(nrow(.x), K) + )$upper[-1] / nrow(.x), + lims_lower = ecdf_intervals( + gamma = gammas[[unlist(.y[1])]], + N = nrow(.x), + K = min(nrow(.x), K) + )$lower[-1] / nrow(.x), + x = seq(0, 1, length.out = min(nrow(.x), K)) + )) %>% + dplyr::bind_rows() + + ggplot(data) + + aes_( + x = ~x, + y = ~ ecdf_value - (plot_diff == TRUE) * x, + group = ~group, + color = "y" + ) + + geom_step(show.legend = FALSE) + + geom_step(aes_( + y = ~ lims_upper - (plot_diff == TRUE) * x, + color = "yrep" + ), show.legend = FALSE) + + geom_step(aes_( + y = ~ lims_lower - (plot_diff == TRUE) * x, + color = "yrep" + ), show.legend = FALSE) + + xaxis_title(FALSE) + + yaxis_title(FALSE) + + yaxis_ticks(FALSE) + + bayesplot_theme_get() + + facet_wrap("group") + + scale_color_ppc() + + force_axes_in_facets() + } diff --git a/R/sysdata.rda b/R/sysdata.rda index 7213031e..6acb55b3 100644 Binary files a/R/sysdata.rda and b/R/sysdata.rda differ diff --git a/man-roxygen/args-pit-ecdf.R b/man-roxygen/args-pit-ecdf.R new file mode 100644 index 00000000..587deac6 --- /dev/null +++ b/man-roxygen/args-pit-ecdf.R @@ -0,0 +1,12 @@ +#' @param K An optional integer defining the number of equally spaced evaluation +#' points for the ECDF. If the submitted PIT values are known to be discrete, +#' this should be the number of the discrete cases. Defaults to the smaller of +#' ´length(y)´ and ´ncol(yrep)´ when applicable. +#' @param prob The desired simultaneous coverage level of the bands around the +#' ECDF. A value in (0,1). +#' @param plot_diff A boolean defining wether to plot the difference between the +#' observed ECDF and the theoretical expectation for uniform PIT values. +#' @param interpolate_adj A boolean defining if the simultaneous confidence +#' bands should be computed or interpolated from precomputed values. Computing +#' the bands may be computationally intensive and approximation gives a fast +#' method for assessing the ECDF trajectory. diff --git a/man-roxygen/reference-uniformity-test.R b/man-roxygen/reference-uniformity-test.R new file mode 100644 index 00000000..c18e209a --- /dev/null +++ b/man-roxygen/reference-uniformity-test.R @@ -0,0 +1,4 @@ +#' @references Säilynoja, T., Bürkner, P., Vehtari, A. +#' (2021). Graphical Test for Discrete Uniformity and its Applications in +#' Goodness of Fit Evaluation and Multiple Sample Comparison [arXiv +#' preprint](https://arxiv.org/abs/2103.10522). diff --git a/man/MCMC-traces.Rd b/man/MCMC-traces.Rd index 68d94198..e1cc67e9 100644 --- a/man/MCMC-traces.Rd +++ b/man/MCMC-traces.Rd @@ -7,6 +7,7 @@ \alias{trace_style_np} \alias{mcmc_rank_overlay} \alias{mcmc_rank_hist} +\alias{mcmc_rank_ecdf} \alias{mcmc_trace_data} \title{Trace and rank plots of MCMC draws} \usage{ @@ -64,6 +65,19 @@ mcmc_rank_hist( ref_line = FALSE ) +mcmc_rank_ecdf( + x, + pars = character(), + regex_pars = character(), + transformations = list(), + ..., + K = NULL, + facet_args = list(), + prob = 0.99, + plot_diff = TRUE, + interpolate_adj = TRUE +) + mcmc_trace_data( x, pars = character(), @@ -178,6 +192,24 @@ of rank-normalized MCMC samples. Defaults to \code{20}.} \item{ref_line}{For the rank plots, whether to draw a horizontal line at the average number of ranks per bin. Defaults to \code{FALSE}.} + +\item{K}{An optional integer defining the number of equally spaced evaluation +points for the ECDF. If the submitted PIT values are known to be discrete, +this should be the number of the discrete cases. Defaults to the smaller of +´length(y)´ and ´ncol(yrep)´ when applicable.} + +\item{prob}{For \code{mcmc_rank_ecdf()}, a value between 0 and 1 +specifying the desired simultaneous confidence of the confidence bands to be +drawn for the rank ECDF plots.} + +\item{plot_diff}{For \code{mcmc_rank_ecdf()}, a boolean specifying it the +difference between the observed rank ECDFs and the theoretical expectation +should be drawn instead of the unmodified rank ECDF plots.} + +\item{interpolate_adj}{A boolean defining if the simultaneous confidence +bands should be computed or interpolated from precomputed values. Computing +the bands may be computationally intensive and approximation gives a fast +method for assessing the ECDF trajectory.} } \value{ The plotting functions return a ggplot object that can be further @@ -214,6 +246,16 @@ See Vehtari et al. (2019) for details. Ranks from \code{mcmc_rank_hist()} are plotted using overlaid lines in a single panel. } +\item{\code{mcmc_rank_ecdf()}}{ +The ECDFs of the ranks from \code{mcmc_rank_hist()} are plotted with the +simultaneous confidence bands with a coverage determined by \code{prob}, +that is, bands that completely cover all of the rank ECDFs with the +probability 'prob'. +By default, the difference between the observed rank ECDFs and the +theoretical expectation for samples originating from the same distribution +is drawn. +See Säilynoja et al. (2021) for details. +} } } @@ -256,6 +298,11 @@ mcmc_rank_hist(x, "alpha") mcmc_rank_hist(x, pars = c("alpha", "sigma"), ref_line = TRUE) mcmc_rank_overlay(x, "alpha") +# ECDF difference plots of the ranking of MCMC samples between chains. +# Provide 99\% simultaneous confidence intervals for the chains sampling from +# the same distribution. +mcmc_rank_ecdf(x, prob = 0.99) + \dontrun{ # parse facet label text color_scheme_set("purple") @@ -302,6 +349,10 @@ mcmc_trace( Vehtari, A., Gelman, A., Simpson, D., Carpenter, B., Bürkner, P. (2019). Rank-normalization, folding, and localization: An improved \emph{R}-hat for assessing convergence of MCMC. \href{https://arxiv.org/abs/1903.08008}{arXiv preprint}. + +Säilynoja, T., Bürkner, P., Vehtari, A. +(2021). Graphical Test for Discrete Uniformity and its Applications in +Goodness of Fit Evaluation and Multiple Sample Comparison \href{https://arxiv.org/abs/2103.10522}{arXiv preprint}. } \seealso{ Other MCMC: diff --git a/man/PPC-distributions.Rd b/man/PPC-distributions.Rd index 43660409..c197956a 100644 --- a/man/PPC-distributions.Rd +++ b/man/PPC-distributions.Rd @@ -13,6 +13,8 @@ \alias{ppc_freqpoly_grouped} \alias{ppc_boxplot} \alias{ppc_violin_grouped} +\alias{ppc_pit_ecdf} +\alias{ppc_pit_ecdf_grouped} \title{PPC distributions} \usage{ ppc_data(y, yrep, group = NULL) @@ -97,6 +99,29 @@ ppc_violin_grouped( y_alpha = 1, y_jitter = 0.1 ) + +ppc_pit_ecdf( + y, + yrep, + ..., + pit = NULL, + K = NULL, + prob = 0.99, + plot_diff = TRUE, + interpolate_adj = TRUE +) + +ppc_pit_ecdf_grouped( + y, + yrep, + group, + ..., + K = NULL, + pit = NULL, + prob = 0.99, + plot_diff = TRUE, + interpolate_adj = TRUE +) } \arguments{ \item{y}{A vector of observations. See \strong{Details}.} @@ -160,6 +185,26 @@ horizontal lines. Set to \code{NULL} to remove the lines.} to to the \code{size}, \code{alpha}, and \code{width} arguments of \code{\link[ggplot2:geom_jitter]{ggplot2::geom_jitter()}} to control the appearance of \code{y} points. The default of \code{y_jitter=NULL} will let \strong{ggplot2} determine the amount of jitter.} + +\item{pit}{An optional vector of probability integral transformed values for +which the ECDF is to be drawn. If NULL, PIT values are computed to ´y´ with +respect to the corresponding values in ´yrep´.} + +\item{K}{An optional integer defining the number of equally spaced evaluation +points for the ECDF. If the submitted PIT values are known to be discrete, +this should be the number of the discrete cases. Defaults to the smaller of +´length(y)´ and ´ncol(yrep)´ when applicable.} + +\item{prob}{The desired simultaneous coverage level of the bands around the +ECDF. A value in (0,1).} + +\item{plot_diff}{A boolean defining wether to plot the difference between the +observed ECDF and the theoretical expectation for uniform PIT values.} + +\item{interpolate_adj}{A boolean defining if the simultaneous confidence +bands should be computed or interpolated from precomputed values. Computing +the bands may be computationally intensive and approximation gives a fast +method for assessing the ECDF trajectory.} } \value{ The plotting functions return a ggplot object that can be further @@ -205,6 +250,12 @@ variable is plotted as a violin with horizontal lines at notable quantiles. \code{y} is overlaid on the plot either as a violin, points, or both, depending on the \code{y_draw} argument. } +\item{\code{ppc_pit_ecdf()}}{ +The ECDF of the empirical PIT values of \code{y} computed with respect to the +corresponding \code{yrep} values. \code{100 * prob}\% central simultaneous confidence +intervals are provided to asses if ´y´ and ´yrep´ originate from the same +distribution. The PIT values can also be provided directly as \code{pit}. +See Säilynoja et al. (2021) for more details.} } } @@ -214,16 +265,19 @@ y <- example_y_data() yrep <- example_yrep_draws() dim(yrep) ppc_dens_overlay(y, yrep[1:25, ]) - \donttest{ # ppc_ecdf_overlay with continuous data (set discrete=TRUE if discrete data) ppc_ecdf_overlay(y, yrep[sample(nrow(yrep), 25), ]) } +# ECDF and ECDF difference plot of the PIT values of ´y´ compared to ´yrep +# with 99\% simultaneous confidence bands. +ppc_pit_ecdf(y, yrep, prob = 0.99, plot_diff = FALSE, interpolate_adj = FALSE) +ppc_pit_ecdf(y, yrep, prob = 0.99, interpolate_adj = FALSE) + # for ppc_hist,dens,freqpoly,boxplot definitely use a subset yrep rows so # only a few (instead of nrow(yrep)) histograms are plotted ppc_hist(y, yrep[1:8, ]) - \donttest{ color_scheme_set("red") ppc_boxplot(y, yrep[1:8, ]) @@ -234,13 +288,13 @@ ppc_dens(y, yrep[200:202, ]) } # frequency polygons -ppc_freqpoly(y, yrep[1:3,], alpha = 0.1, size = 1, binwidth = 5) +ppc_freqpoly(y, yrep[1:3, ], alpha = 0.1, size = 1, binwidth = 5) group <- example_group_data() -ppc_freqpoly_grouped(y, yrep[1:3,], group) + yaxis_text() +ppc_freqpoly_grouped(y, yrep[1:3, ], group) + yaxis_text() \donttest{ # if groups are different sizes then the 'freq' argument can be useful -ppc_freqpoly_grouped(y, yrep[1:3,], group, freq = FALSE) + yaxis_text() +ppc_freqpoly_grouped(y, yrep[1:3, ], group, freq = FALSE) + yaxis_text() } # density and distribution overlays by group @@ -248,6 +302,10 @@ ppc_dens_overlay_grouped(y, yrep[1:25, ], group = group) ppc_ecdf_overlay_grouped(y, yrep[1:25, ], group = group) +# ECDF difference plots of the PIT values by group +# with 99\% simultaneous confidence bands. +ppc_pit_ecdf_grouped(y, yrep, group=group, prob=0.99, interpolate_adj=FALSE) + # don't need to only use small number of rows for ppc_violin_grouped # (as it pools yrep draws within groups) color_scheme_set("gray") @@ -257,8 +315,10 @@ ppc_violin_grouped(y, yrep, group, alpha = 0) # change how y is drawn ppc_violin_grouped(y, yrep, group, alpha = 0, y_draw = "points", y_size = 1.5) -ppc_violin_grouped(y, yrep, group, alpha = 0, y_draw = "both", - y_size = 1.5, y_alpha = 0.5, y_jitter = 0.33) +ppc_violin_grouped(y, yrep, group, + alpha = 0, y_draw = "both", + y_size = 1.5, y_alpha = 0.5, y_jitter = 0.33 +) } } \references{ @@ -269,6 +329,10 @@ Gelman, A. (2019), Visualization in Bayesian workflow. \href{https://arxiv.org/abs/1709.01449}{arXiv preprint}, \href{https://github.com/jgabry/bayes-vis-paper}{code on GitHub}) +Säilynoja, T., Bürkner, P., Vehtari, A. +(2021). Graphical Test for Discrete Uniformity and its Applications in +Goodness of Fit Evaluation and Multiple Sample Comparison \href{https://arxiv.org/abs/2103.10522}{arXiv preprint}. + Gelman, A., Carlin, J. B., Stern, H. S., Dunson, D. B., Vehtari, A., and Rubin, D. B. (2013). \emph{Bayesian Data Analysis.} Chapman & Hall/CRC Press, London, third edition. (Ch. 6) diff --git a/tests/testthat/_snaps/mcmc-traces/mcmc-rank-ecdf-default.svg b/tests/testthat/_snaps/mcmc-traces/mcmc-rank-ecdf-default.svg new file mode 100644 index 00000000..dc312b96 --- /dev/null +++ b/tests/testthat/_snaps/mcmc-traces/mcmc-rank-ecdf-default.svg @@ -0,0 +1,130 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +V1 + + + + + + + + + +V2 + + + + + + + + + +0.0 +0.2 +0.4 +0.6 +0.8 +1.0 + + + + + + + +0.0 +0.2 +0.4 +0.6 +0.8 +1.0 + +-0.08 +-0.04 +0.00 +0.04 +0.08 + + + + + + +-0.08 +-0.04 +0.00 +0.04 +0.08 + + + + + +Chain + + + + +1 +2 +3 +4 +mcmc_rank_ecdf (default) + + diff --git a/tests/testthat/_snaps/mcmc-traces/mcmc-rank-ecdf-no-diff.svg b/tests/testthat/_snaps/mcmc-traces/mcmc-rank-ecdf-no-diff.svg new file mode 100644 index 00000000..8c58ac31 --- /dev/null +++ b/tests/testthat/_snaps/mcmc-traces/mcmc-rank-ecdf-no-diff.svg @@ -0,0 +1,130 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +V1 + + + + + + + + + +V2 + + + + + + + + + +0.0 +0.2 +0.4 +0.6 +0.8 +1.0 + + + + + + + +0.0 +0.2 +0.4 +0.6 +0.8 +1.0 + +0.00 +0.25 +0.50 +0.75 +1.00 + + + + + + +0.00 +0.25 +0.50 +0.75 +1.00 + + + + + +Chain + + + + +1 +2 +3 +4 +mcmc_rank_ecdf (no diff) + + diff --git a/tests/testthat/_snaps/mcmc-traces/mcmc-rank-ecdf-one-param-no-diff.svg b/tests/testthat/_snaps/mcmc-traces/mcmc-rank-ecdf-one-param-no-diff.svg new file mode 100644 index 00000000..f5bc0c0f --- /dev/null +++ b/tests/testthat/_snaps/mcmc-traces/mcmc-rank-ecdf-one-param-no-diff.svg @@ -0,0 +1,72 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0.00 +0.25 +0.50 +0.75 +1.00 + + + + + + + + + + + + +0.0 +0.2 +0.4 +0.6 +0.8 +1.0 +V1 +Chain + + + + +1 +2 +3 +4 +mcmc_rank_ecdf (one param no diff) + + diff --git a/tests/testthat/_snaps/mcmc-traces/mcmc-rank-ecdf-one-parameter.svg b/tests/testthat/_snaps/mcmc-traces/mcmc-rank-ecdf-one-parameter.svg new file mode 100644 index 00000000..d41432e1 --- /dev/null +++ b/tests/testthat/_snaps/mcmc-traces/mcmc-rank-ecdf-one-parameter.svg @@ -0,0 +1,72 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +-0.08 +-0.04 +0.00 +0.04 +0.08 + + + + + + + + + + + + +0.0 +0.2 +0.4 +0.6 +0.8 +1.0 +V1 +Chain + + + + +1 +2 +3 +4 +mcmc_rank_ecdf (one parameter) + + diff --git a/tests/testthat/_snaps/ppc-distributions/ppc-pit-ecdf-default.svg b/tests/testthat/_snaps/ppc-distributions/ppc-pit-ecdf-default.svg new file mode 100644 index 00000000..d82c3428 --- /dev/null +++ b/tests/testthat/_snaps/ppc-distributions/ppc-pit-ecdf-default.svg @@ -0,0 +1,53 @@ + + + + + + + + + + + + + + + + + + + + + + + + + +-0.1 +0.0 +0.1 + + + + + + + + + +0.00 +0.25 +0.50 +0.75 +1.00 +ppc_pit_ecdf (default) + + diff --git a/tests/testthat/_snaps/ppc-distributions/ppc-pit-ecdf-grouped-default.svg b/tests/testthat/_snaps/ppc-distributions/ppc-pit-ecdf-grouped-default.svg new file mode 100644 index 00000000..261e6496 --- /dev/null +++ b/tests/testthat/_snaps/ppc-distributions/ppc-pit-ecdf-grouped-default.svg @@ -0,0 +1,155 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +C + + + + + + + + + +D + + + + + + + + + +A + + + + + + + + + +B + + + + + + + + +0.00 +0.25 +0.50 +0.75 +1.00 + + + + + + +0.00 +0.25 +0.50 +0.75 +1.00 + +-0.2 +0.0 +0.2 + + + + +-0.2 +0.0 +0.2 + + + +ppc_pit_ecdf_grouped (default) + + diff --git a/tests/testthat/_snaps/ppc-distributions/ppc-pit-ecdf-grouped-no-diff.svg b/tests/testthat/_snaps/ppc-distributions/ppc-pit-ecdf-grouped-no-diff.svg new file mode 100644 index 00000000..eb737933 --- /dev/null +++ b/tests/testthat/_snaps/ppc-distributions/ppc-pit-ecdf-grouped-no-diff.svg @@ -0,0 +1,163 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +C + + + + + + + + + +D + + + + + + + + + +A + + + + + + + + + +B + + + + + + + + +0.00 +0.25 +0.50 +0.75 +1.00 + + + + + + +0.00 +0.25 +0.50 +0.75 +1.00 + +0.00 +0.25 +0.50 +0.75 +1.00 + + + + + + +0.00 +0.25 +0.50 +0.75 +1.00 + + + + + +ppc_pit_ecdf_grouped (no diff) + + diff --git a/tests/testthat/_snaps/ppc-distributions/ppc-pit-ecdf-no-diff.svg b/tests/testthat/_snaps/ppc-distributions/ppc-pit-ecdf-no-diff.svg new file mode 100644 index 00000000..258e9f55 --- /dev/null +++ b/tests/testthat/_snaps/ppc-distributions/ppc-pit-ecdf-no-diff.svg @@ -0,0 +1,57 @@ + + + + + + + + + + + + + + + + + + + + + + + + + +0.00 +0.25 +0.50 +0.75 +1.00 + + + + + + + + + + + +0.00 +0.25 +0.50 +0.75 +1.00 +ppc_pit_ecdf (no diff) + + diff --git a/tests/testthat/test-helpers-ppc.R b/tests/testthat/test-helpers-ppc.R index 032b65c2..f1e740cb 100644 --- a/tests/testthat/test-helpers-ppc.R +++ b/tests/testthat/test-helpers-ppc.R @@ -2,6 +2,7 @@ library(bayesplot) context("PPC: misc. functions") source(test_path("data-for-ppc-tests.R")) +source(test_path("data-for-mcmc-tests.R")) # melt_predictions --------------------------------------------------------------- expect_molten_yrep <- function(yrep) { @@ -54,6 +55,7 @@ test_that("is_whole_number works correctly", { c(rep(TRUE, 3), FALSE)) expect_true(!is_whole_number("1")) }) + test_that("all_counts works correctly", { expect_true(all_counts(1)) expect_true(all_counts(0:5)) @@ -64,3 +66,55 @@ test_that("all_counts works correctly", { expect_false(all_counts(c(-1, 2))) }) +# adjust_gamma + +test_that("adjust_gamma works with different adjustment methods", { + set.seed(8420) + + expect_equal( + adjust_gamma(N = 100, K = 100, L = 1, prob = .99), + adjust_gamma(N = 100, K = 100, L = 1, prob = .99, interpolate_adj = TRUE), + tolerance = 1e-3 + ) + + expect_equal( + adjust_gamma(N = 100, K = 100, L = 4, prob = .99, M = 1000), + adjust_gamma(N = 100, K = 100, L = 4, prob = .99, interpolate_adj = TRUE), + tolerance = 1e-3 + ) + + set.seed(NULL) +}) + + +# get_interpolation_values ------------------------------------------------ +test_that("get_interpolation_values catches impossible values", { + expect_error( + get_interpolation_values(1000, 1000, 0, .5), + "No precomputed values to interpolate from for 'L' = 0." + ) + expect_error( + get_interpolation_values(1000, 1000, 4, 0), + "No precomputed values to interpolate from for 'prob' = 0." + ) + expect_error( + get_interpolation_values(1000, 0, 4, .95), + "No precomputed values available for interpolation for 'K' = 0." + ) + expect_error( + get_interpolation_values(0, 1000, 4, .95), + "No precomputed values to interpolate from for sample length of 0." + ) +}) + +# ecdf_intervals --------------------------------------------------------- +test_that("ecdf_intervals returns right dimensions and values", { + lims <- ecdf_intervals(.0001, N = 100, K = 100, L = 1) + expect_named(lims, c("lower", "upper")) + expect_length(lims$upper, 101) + expect_length(lims$lower, 101) + expect_equal(min(lims$upper), 0) + expect_equal(max(lims$upper), 100) + expect_equal(min(lims$lower), 0) + expect_equal(max(lims$lower), 100) +}) diff --git a/tests/testthat/test-mcmc-traces.R b/tests/testthat/test-mcmc-traces.R index 82dcec1e..34489c9e 100644 --- a/tests/testthat/test-mcmc-traces.R +++ b/tests/testthat/test-mcmc-traces.R @@ -33,6 +33,17 @@ test_that("mcmc_trace_highlight throws error if highlight > number of chains", { expect_error(mcmc_trace_highlight(arr, pars = "sigma", highlight = 7), "'highlight' is 7") }) +test_that("mcmc_rank_ecdf returns a ggplot object", { + expect_gg(mcmc_rank_ecdf(arr, regex_pars = c("beta", "x\\:"))) + expect_gg(mcmc_rank_ecdf(dframe_multiple_chains, interpolate_adj = FALSE)) +}) + +test_that("mcmc_rank_ecdf throws error if 1 chain but multiple chains required", { + expect_error(mcmc_rank_ecdf(mat), "requires multiple chains") + expect_error(mcmc_rank_ecdf(dframe), "requires multiple chains") + expect_error(mcmc_rank_ecdf(arr1chain), "requires multiple chains") +}) + # options ----------------------------------------------------------------- test_that("mcmc_trace options work", { expect_gg(g1 <- mcmc_trace(arr, regex_pars = "beta", window = c(5, 10))) @@ -47,6 +58,12 @@ test_that("mcmc_trace options work", { expect_error(mcmc_trace(arr, n_warmup = 50, iter1 = 20)) }) +test_that("mcmc_rank_ecdf options work", { + expect_error( + mcmc_rank_ecdf(dframe_multiple_chains, interpolate_adj = TRUE), + "No precomputed values" + ) +}) # displaying divergences in traceplot ------------------------------------- test_that("mcmc_trace 'np' argument works", { @@ -207,6 +224,34 @@ test_that("mcmc_trace_highlight renders correctly", { vdiffr::expect_doppelganger("mcmc_trace_highlight (alpha)", p_alpha) }) +test_that("mcmc_rank_ecdf renders correctly", { + testthat::skip_on_cran() + testthat::skip_if_not_installed("vdiffr") + + p_base <- mcmc_rank_ecdf(vdiff_dframe_chains, pars = c("V1", "V2")) + p_one_param <- mcmc_rank_ecdf(vdiff_dframe_chains, pars = "V1") + + p_no_diff <- mcmc_rank_ecdf( + vdiff_dframe_chains, + pars = c("V1", "V2"), + plot_diff = FALSE + ) + + p_no_diff_one_param <- mcmc_rank_ecdf( + vdiff_dframe_chains, + pars = "V1", + plot_diff = FALSE + ) + + vdiffr::expect_doppelganger("mcmc_rank_ecdf (default)", p_base) + vdiffr::expect_doppelganger("mcmc_rank_ecdf (one parameter)", p_one_param) + vdiffr::expect_doppelganger("mcmc_rank_ecdf (no diff)", p_no_diff) + vdiffr::expect_doppelganger( + "mcmc_rank_ecdf (one param no diff)", + p_no_diff_one_param + ) +}) + test_that("mcmc_trace with 'np' renders correctly", { testthat::skip_on_cran() testthat::skip_if_not_installed("vdiffr") diff --git a/tests/testthat/test-ppc-distributions.R b/tests/testthat/test-ppc-distributions.R index 047e59cf..7f0be3e8 100644 --- a/tests/testthat/test-ppc-distributions.R +++ b/tests/testthat/test-ppc-distributions.R @@ -58,6 +58,11 @@ test_that("ppc_dens,pp_hist,ppc_freqpoly,ppc_boxplot return ggplot objects", { expect_gg(ppd_boxplot(yrep2, notch = FALSE)) }) +test_that("ppc_pit_ecdf, ppc_pit_ecdf_grouped returns a ggplot object", { + expect_gg(ppc_pit_ecdf(y, yrep, interpolate_adj = FALSE)) + expect_gg(ppc_pit_ecdf_grouped(y, yrep, group = group, interpolate_adj = FALSE)) +}) + test_that("ppc_freqpoly_grouped returns a ggplot object", { expect_gg(ppc_freqpoly_grouped(y, yrep[1:4, ], group)) expect_gg(ppc_freqpoly_grouped(y, yrep[1:4, ], group, @@ -286,3 +291,18 @@ test_that("ppc_violin_grouped renders correctly", { set.seed(seed = NULL) }) + +test_that("ppc_pit_ecdf, ppc_pit_ecdf_grouped renders correctly", { + testthat::skip_on_cran() + testthat::skip_if_not_installed("vdiffr") + + p_base <- ppc_pit_ecdf(y, yrep, interpolate_adj = FALSE) + g_base <- ppc_pit_ecdf_grouped(y, yrep, group = group, interpolate_adj = FALSE) + p_diff <- ppc_pit_ecdf(y, yrep, plot_diff = FALSE, interpolate_adj = FALSE) + g_diff <- ppc_pit_ecdf_grouped(y, yrep, plot_diff = FALSE, group = group, interpolate_adj = FALSE) + + vdiffr::expect_doppelganger("ppc_pit_ecdf (default)", p_base) + vdiffr::expect_doppelganger("ppc_pit_ecdf_grouped (default)", g_base) + vdiffr::expect_doppelganger("ppc_pit_ecdf (no diff)", p_diff) + vdiffr::expect_doppelganger("ppc_pit_ecdf_grouped (no diff)", g_diff) +})