From 12264f94ce4ce7b9ea7f4cfa20f2a26e20f78c5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20S=C3=A4ilynoja?= Date: Thu, 10 Jun 2021 18:05:19 +0300 Subject: [PATCH 01/48] helper functions and start for ecdf plot functions --- R/helpers-ppc.R | 12 ++++++++++-- R/ppc-distributions.R | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/R/helpers-ppc.R b/R/helpers-ppc.R index 1f8ee245..1b9b8b97 100644 --- a/R/helpers-ppc.R +++ b/R/helpers-ppc.R @@ -40,9 +40,10 @@ validate_y <- function(y) { #' correct number of columns (equal to the length of `y`). #' #' @param yrep,y The user's `yrep` object and the `y` object returned by `validate_y()`. +#' @param match_ncols Is the number of columns in 'yrep' required to match the length of 'y'. #' @return Either throws an error or returns a numeric matrix. #' @noRd -validate_yrep <- function(yrep, y) { +validate_yrep <- function(yrep, y, require_ncols = TRUE) { stopifnot(is.matrix(yrep), is.numeric(yrep)) if (is.integer(yrep)) { if (nrow(yrep) == 1) { @@ -57,13 +58,20 @@ validate_yrep <- function(yrep, y) { abort("NAs not allowed in 'yrep'.") } - if (ncol(yrep) != length(y)) { + if (all(match_ncols, ncol(yrep) != length(y))) { abort("ncol(yrep) must be equal to length(y).") } unclass(unname(yrep)) } +validate_pit <- function(pit) { + stopifnot(is.matrix(pit), is.numeric(yrep)) + if (any(pit < 0) || any(pit > 1)) { + abort("'pit' values expected to lie between 0 and 1 (inclusive).") + } + unclass(unname(pit)) +} #' Validate group #' diff --git a/R/ppc-distributions.R b/R/ppc-distributions.R index 0cc24c7d..4dce7fbb 100644 --- a/R/ppc-distributions.R +++ b/R/ppc-distributions.R @@ -126,6 +126,25 @@ ppc_data <- function(y, yrep, group = NULL) { data } +comparison_data <- function(y, yrep, pit) { + if (!missing(pit)) { + data <- validate_pit(pit) + } elif (missing(y)) { + if (nrow(yrep) < 2) { + abort(paste("When both 'pit' and 'y' are missing, 'yrep' must include ", + "multiple rows to allow for sample comparison.", sep="")) + } + data <- u_scale(validate_yrep(yrep[1,], yrep)) + } else { + y <- validate_y(y) + yrep <- validate_yrep(yrep, y, match_ncols = FALSE) + data <- apply(yrep, 1, function(rep) { + u_scale(matrix(c(y, rep), ncol=2)[,2]) + }) + } + data + } +} #' @rdname PPC-distributions @@ -456,6 +475,19 @@ ppc_ecdf_overlay_grouped <- function( force_axes_in_facets() } +ppc_ecdf_intervals <- function( + y, + yrep, + pit, + ..., + conf_level = 0.95, + size = 0.25, + alpha = 0.7 +) { + check_ignored_arguments(...) + data <- comparison_data(y, yrep, pit) + +} #' @export From a108e01e7f1dc067556293f03783be2186d84976 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A4ilynoja=20Teemu?= Date: Wed, 16 Jun 2021 16:02:55 +0300 Subject: [PATCH 02/48] Added helper functions for ECDF confidence intervals. --- R/helpers-ppc.R | 137 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 136 insertions(+), 1 deletion(-) diff --git a/R/helpers-ppc.R b/R/helpers-ppc.R index 1b9b8b97..2489558e 100644 --- a/R/helpers-ppc.R +++ b/R/helpers-ppc.R @@ -43,7 +43,7 @@ validate_y <- function(y) { #' @param match_ncols Is the number of columns in 'yrep' required to match the length of 'y'. #' @return Either throws an error or returns a numeric matrix. #' @noRd -validate_yrep <- function(yrep, y, require_ncols = TRUE) { +validate_yrep <- function(yrep, y, match_ncols = TRUE) { stopifnot(is.matrix(yrep), is.numeric(yrep)) if (is.integer(yrep)) { if (nrow(yrep) == 1) { @@ -65,11 +65,25 @@ validate_yrep <- function(yrep, y, require_ncols = TRUE) { unclass(unname(yrep)) } + +#' Validate PIT +#' +#' Checks that the probability integral transformation (PIT) values from +#' a numeric matrix with no NAs and that the provided values fall in [0,1]. +#' +#' @param pit The 'pit' object provided by the user. +#' @returns Either throws an error or returns a numeric matrix. +#' @noRd validate_pit <- function(pit) { stopifnot(is.matrix(pit), is.numeric(yrep)) if (any(pit < 0) || any(pit > 1)) { abort("'pit' values expected to lie between 0 and 1 (inclusive).") } + + if (anyNA(pit)) { + abort("NAs not allowed in 'pit'.") + } + unclass(unname(pit)) } @@ -258,6 +272,127 @@ all_counts <- function(x, ...) { all_whole_number(x, ...) && min(x) >= 0 } +adjust_gamma <- function(N, L, K=N, conf_level=0.95) { + if (any(c(K, N, L) < 1)) { + abort("Parameters 'N', 'L' and 'K' must be positive integers.") + } + if (conf_level >= 1 || conf_level <= 0) { + abort("Value of 'conf_level' must be in (0,1).") + } + if (L==1) { + gamma <- adjust_gamma_optimize(N, K, conf_level) + } + else { + gamma <- adjust_gamma_simulate(N, L, K, conf_level) + } + gamma +} + +adjust_gamma_optimize <- function(N, K, conf_level=0.95) { + target <- function(gamma, conf_level, N, K) { + z <- 1:(K - 1) / K + z1 <- c(0,z) + z2 <- c(z,1) + + # pre-compute quantiles and use symmetry for increased efficiency. + x2_lower <- qbinom(gamma / 2, N, z2) + x2_upper <- c(N - rev(x2_lower)[2:K], 1) + + # Compute the total probability of trajectories inside the confidence + # intervals. Initialize the set and corresponding probasbilities known + # to be 0 and 1 for the starting value z1 = 0. + x1 <- 0 + p_int <- 1 + for (i in seq_along(z1)) { + tmp <- p_interior( + p_int, x1 = x1, x2 = x2_lower[i]: x2_upper[i], + z1 = z1[i], z2 = z2[i], gamma = gamma, N = N + ) + x1 <- tmp$x1 + p_int <- tmp$p_int + } + abs(conf_level - sum(p_int)) + } + optimize(target, c(0, 1 - conf_level), conf_level, N = N, K = K)$minimum +} + +adjust_gamma_simulate <-function(N, L, K, conf_level=0.95, M=5000) { + gamma <- numeric(M) + z <- (1:(K - 1)) / K + if (L > 1){ + n <- N * (L - 1) + k <- floor(z * N * L) + for (m in seq_len(M)) { + u = u_scale(replicate(L, runif(N))) + scaled_ecdfs <- apply(outer(u, z, "<="), c(2,3), sum) + gamma[m] <- 2 * min( + apply( + scaled_ecdfs, 1, phyper, m = N, n = n, k = k + ), + apply( + scaled_ecdfs - 1, 1, phyper, m = N, n = n, k = k, lower.tail = FALSE + ) + ) + } + + } + else { + for (m in seq_len(M)) { + u <- runif(N) + scaled_ecdf <- colSums(outer(u, z, "<=")) + gamma[m] <- 2 * min( + pbinom(scaled_ecdf, N, z), + pbinom(scaled_ecdfs - 1, N, z, lower.tail = FALSE) + ) + } + } + alpha_quantile(gamma, 1 - conf_level) +} + +p_interior <- function(p_int, x1, x2, z1, z2, gamma, N) { + z_tilde <- (z2 - z1) / (1 - z1) + + N_tilde <- rep(N - x1, each = length(x2)) + p_int <- rep(p_int, each = length(x2)) + x_diff <- outer(x2, x1, "-") + p_x2_int <- p_int * dbinom(x_diff, N_tilde, z_tilde) + + list(p_int = rowSums(p_x2_int), x1 = x2) +} + +# alpha percent of the trials are allowed to be rejected +alpha_quantile <- function(gamma, alpha, tol = 0.001) { + a <- unname(quantile(gamma, probs = alpha)) + a_tol <- unname(quantile(gamma, probs = alpha + tol)) + if (a == a_tol) { + if (min(gamma) < a) { + # take the largest value that doesn't exceed the tolerance. + a <- max(gamma[gamma < a]) + } + } + a +} + +ecdf_intervals <- function(N, L, K, gamma) { + lims <- list() + z <- seq(0,1, length.out = K + 1) + if (L == 1) { + lims$lower <- qbinom(gamma / 2, N, z) + lims$upper <- qbinom(1 - gamma / 2, N, z) + } else { + n <- N * (L - 1) + k <- floor(z * L * N) + lims$lower <- qhyper(gamma / 2, N, n, k) + lims$upper <- qhyper(1 - gamma / 2, N, n, k) + } + lims +} + +# Transform observations in 'x' into their corresponding fractional ranks. +u_scale <- function(x) { + array(rank(x) / length(x), dim = dim(x), dimnames = dimnames(x)) +} + # labels ---------------------------------------------------------------- create_yrep_ids <- function(ids) paste('italic(y)[rep] (', ids, ")") yrep_label <- function() expression(italic(y)[rep]) From cfc5c8be906f717b8244378d3b563642712d1e3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A4ilynoja=20Teemu?= Date: Wed, 16 Jun 2021 16:05:08 +0300 Subject: [PATCH 03/48] ecdf confidence intervals and sample comparison data preparation. --- R/ppc-distributions.R | 101 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 88 insertions(+), 13 deletions(-) diff --git a/R/ppc-distributions.R b/R/ppc-distributions.R index 4dce7fbb..1344d99a 100644 --- a/R/ppc-distributions.R +++ b/R/ppc-distributions.R @@ -126,24 +126,44 @@ ppc_data <- function(y, yrep, group = NULL) { data } -comparison_data <- function(y, yrep, pit) { + +#' @rdname PPC-distributions +#' @export +comparison_data <- function(y, yrep, pit, K) { + if (missing(K)){ + if (!missing(y)) { + K <- length(y) + } + else if (!missing(pit)) { + K = ncol(pit) + } + else { + K = ncol(yrep) + } + } + z <- seq(0,1, length.out = K + 1) if (!missing(pit)) { - data <- validate_pit(pit) - } elif (missing(y)) { + pit <- validate_pit(pit) + ecdfs <- t(apply(pit, 1, function(row) ecdf(row)(z))) + ecdfs <- melt_and_stack(ecdfs[1,], ecdfs) + ecdfs <- dplyr::filter(ecdfs, !ecdfs$is_y) + } else if (missing(y)) { + pit <- u_scale(validate_yrep(yrep, yrep[1, ], match_ncols = FALSE)) if (nrow(yrep) < 2) { abort(paste("When both 'pit' and 'y' are missing, 'yrep' must include ", "multiple rows to allow for sample comparison.", sep="")) } - data <- u_scale(validate_yrep(yrep[1,], yrep)) + ecdfs <- t(apply(pit, 1, function(row) ecdf(row)(z))) + ecdfs <- melt_and_stack(ecdfs[1,], ecdfs) + ecdfs <- dplyr::filter(ecdfs, !ecdfs$is_y) } else { y <- validate_y(y) yrep <- validate_yrep(yrep, y, match_ncols = FALSE) - data <- apply(yrep, 1, function(rep) { - u_scale(matrix(c(y, rep), ncol=2)[,2]) - }) - } - data + pit <- u_scale(rbind(y,yrep)) + ecdfs <- t(apply(ecdfs, 1, function(row) ecdf(row)(z))) + ecdfs <- melt_and_stack(ecdfs[1, ], ecdfs[2, nrow(yrep) + 1]) } + ecdfs } @@ -179,7 +199,6 @@ ppc_hist <- function(y, yrep, ..., binwidth = NULL, breaks = NULL, #' @export #' @param notch A logical scalar passed to [ggplot2::geom_boxplot()]. #' Unlike for `geom_boxplot()`, the default is `notch=TRUE`. -#' ppc_boxplot <- function(y, yrep, ..., notch = TRUE, size = 0.5, alpha = 1) { check_ignored_arguments(...) data <- ppc_data(y, yrep) @@ -236,7 +255,7 @@ ppc_freqpoly <- function(y, yrep, ..., #' @rdname PPC-distributions #' @export #' @template args-group -#' + ppc_freqpoly_grouped <- function(y, yrep, group, ..., binwidth = NULL, freq = TRUE, size = 0.25, alpha = 1) { check_ignored_arguments(...) @@ -475,18 +494,74 @@ ppc_ecdf_overlay_grouped <- function( force_axes_in_facets() } +#' @export +#' @rdname PPC-distributions +#' @param pit For 'ppc_ecdf_intervals' and 'ppc_ecdf_diff_intervals', the PIT +#' values of one or more samples can be provided directly making 'y' and +#' yrep' optional. +#' @param conf_level For 'ppc_ecdf_intervals' and 'ppc_ecdf_diff_intervals', +#' the desired simultaneous confidence level of the bands to be drawn. +#' @param K, For 'ppc_ecdf_intervals' and 'ppc_ecdf_diff_intervals', number of +#' equally spaced evaluation points for the ECDF plot. Affects the granularity +#' of the plot and can significantly speed up the computation of the confidence +#' bands. The default is 'K = ncol(yrep)', or 'K = ncol(pit)' if PIT values are +#' provided instead. ppc_ecdf_intervals <- function( y, yrep, pit, + gamma, + K, ..., conf_level = 0.95, size = 0.25, alpha = 0.7 ) { check_ignored_arguments(...) - data <- comparison_data(y, yrep, pit) - + data <- comparison_data(y, yrep, pit, K) + if (missing(K)) { + K <- max(data$y_id) + } + if (missing(gamma)) { + gamma <- adjust_gamma( + N = max(data$y_id), + L = max(data$rep_id) + any(data$is_y), + K = K, + conf_level = conf_level + ) + } + limits <- ecdf_intervals(gamma, K, L) + z <- 0:K / K + fig <- ggplot(data, mapping = aes_(x = rep(L, z))) + + geom_step( + data = function(x) dplyr::filter(x, !.data$is_y), + aes_(group = ~ variable, y = ~ value, color = ~ variable) + ) + + geom_line( + data = data.frame(x = c(0, 1), y = c(0, 1)), + mapping = aes_(x = ~ x, y = ~ y), + alpha = 0.5 * alpha, + color = 'gray' + ) + + geom_step(aes_(y = limits$upper)) + + geom_step(aes_(y = limits$lower)) + if any(data$is_y) { + fig <- fig + geom_step( + data = function(x) dplyr::filter(x, .data$is_y), + aes_(y = ~ value) + ) + } + fig + scale_y_continuous(breaks = c(0, 0.5, 1)) + + yaxis_title(FALSE) + + xaxis_title(FALSE) + + yaxis_ticks(FALSE) + + bayesplot_theme_get() + + + + + + } From 0ec2bc096c485ac365627c3c78d226b2eec9d9de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A4ilynoja=20Teemu?= Date: Wed, 16 Jun 2021 16:23:01 +0300 Subject: [PATCH 04/48] typo --- R/ppc-distributions.R | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/R/ppc-distributions.R b/R/ppc-distributions.R index 1344d99a..909d7901 100644 --- a/R/ppc-distributions.R +++ b/R/ppc-distributions.R @@ -545,7 +545,7 @@ ppc_ecdf_intervals <- function( ) + geom_step(aes_(y = limits$upper)) + geom_step(aes_(y = limits$lower)) - if any(data$is_y) { + if (any(data$is_y)) { fig <- fig + geom_step( data = function(x) dplyr::filter(x, .data$is_y), aes_(y = ~ value) @@ -556,12 +556,6 @@ ppc_ecdf_intervals <- function( xaxis_title(FALSE) + yaxis_ticks(FALSE) + bayesplot_theme_get() - - - - - - } From b11e0f9f2e80afa0bcc4c43b42f5e2ec528684d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A4ilynoja=20Teemu?= Date: Wed, 16 Jun 2021 16:31:55 +0300 Subject: [PATCH 05/48] updated NAMESPACE and Rd --- NAMESPACE | 2 + man/PPC-distributions.Rd | 148 +++++++++++++++------------------------ 2 files changed, 57 insertions(+), 93 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 824ae29f..845022a6 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -32,6 +32,7 @@ export(bayesplot_theme_update) export(color_scheme_get) export(color_scheme_set) export(color_scheme_view) +export(comparison_data) export(example_group_data) export(example_mcmc_draws) export(example_x_data) @@ -105,6 +106,7 @@ export(ppc_data) export(ppc_dens) export(ppc_dens_overlay) export(ppc_dens_overlay_grouped) +export(ppc_ecdf_intervals) export(ppc_ecdf_overlay) export(ppc_ecdf_overlay_grouped) export(ppc_error_binned) diff --git a/man/PPC-distributions.Rd b/man/PPC-distributions.Rd index 2961468a..dc5ddcca 100644 --- a/man/PPC-distributions.Rd +++ b/man/PPC-distributions.Rd @@ -3,6 +3,7 @@ \name{PPC-distributions} \alias{PPC-distributions} \alias{ppc_data} +\alias{comparison_data} \alias{ppc_hist} \alias{ppc_boxplot} \alias{ppc_freqpoly} @@ -12,99 +13,46 @@ \alias{ppc_dens_overlay_grouped} \alias{ppc_ecdf_overlay} \alias{ppc_ecdf_overlay_grouped} +\alias{ppc_ecdf_intervals} \alias{ppc_violin_grouped} \title{PPC distributions} \usage{ ppc_data(y, yrep, group = NULL) +comparison_data(y, yrep, pit, K) + ppc_hist(y, yrep, ..., binwidth = NULL, breaks = NULL, freq = TRUE) ppc_boxplot(y, yrep, ..., notch = TRUE, size = 0.5, alpha = 1) -ppc_freqpoly( - y, - yrep, - ..., - binwidth = NULL, - freq = TRUE, - size = 0.25, - alpha = 1 -) - -ppc_freqpoly_grouped( - y, - yrep, - group, - ..., - binwidth = NULL, - freq = TRUE, - size = 0.25, - alpha = 1 -) +ppc_freqpoly(y, yrep, ..., binwidth = NULL, freq = TRUE, size = 0.25, + alpha = 1) + +ppc_freqpoly_grouped(y, yrep, group, ..., binwidth = NULL, freq = TRUE, + size = 0.25, alpha = 1) ppc_dens(y, yrep, ..., trim = FALSE, size = 0.5, alpha = 1) -ppc_dens_overlay( - y, - yrep, - ..., - size = 0.25, - alpha = 0.7, - trim = FALSE, - bw = "nrd0", - adjust = 1, - kernel = "gaussian", - n_dens = 1024 -) - -ppc_dens_overlay_grouped( - y, - yrep, - group, - ..., - size = 0.25, - alpha = 0.7, - trim = FALSE, - bw = "nrd0", - adjust = 1, - kernel = "gaussian", - n_dens = 1024 -) - -ppc_ecdf_overlay( - y, - yrep, - ..., - discrete = FALSE, - pad = TRUE, - size = 0.25, - alpha = 0.7 -) - -ppc_ecdf_overlay_grouped( - y, - yrep, - group, - ..., - discrete = FALSE, - pad = TRUE, - size = 0.25, - alpha = 0.7 -) - -ppc_violin_grouped( - y, - yrep, - group, - ..., - probs = c(0.1, 0.5, 0.9), - size = 1, - alpha = 1, - y_draw = c("violin", "points", "both"), - y_size = 1, - y_alpha = 1, - y_jitter = 0.1 -) +ppc_dens_overlay(y, yrep, ..., size = 0.25, alpha = 0.7, + trim = FALSE, bw = "nrd0", adjust = 1, kernel = "gaussian", + n_dens = 1024) + +ppc_dens_overlay_grouped(y, yrep, group, ..., size = 0.25, alpha = 0.7, + trim = FALSE, bw = "nrd0", adjust = 1, kernel = "gaussian", + n_dens = 1024) + +ppc_ecdf_overlay(y, yrep, ..., discrete = FALSE, pad = TRUE, + size = 0.25, alpha = 0.7) + +ppc_ecdf_overlay_grouped(y, yrep, group, ..., discrete = FALSE, + pad = TRUE, size = 0.25, alpha = 0.7) + +ppc_ecdf_intervals(y, yrep, pit, gamma, K, ..., conf_level = 0.95, + size = 0.25, alpha = 0.7) + +ppc_violin_grouped(y, yrep, group, ..., probs = c(0.1, 0.5, 0.9), + size = 1, alpha = 1, y_draw = c("violin", "points", "both"), + y_size = 1, y_alpha = 1, y_jitter = 0.1) } \arguments{ \item{y}{A vector of observations. See \strong{Details}.} @@ -120,6 +68,16 @@ sense. See \strong{Details} for additional instructions.} \code{y}. Each value in \code{group} is interpreted as the group level pertaining to the corresponding value of \code{y}.} +\item{pit}{For 'ppc_ecdf_intervals' and 'ppc_ecdf_diff_intervals', the PIT +values of one or more samples can be provided directly making 'y' and +yrep' optional.} + +\item{K, }{For 'ppc_ecdf_intervals' and 'ppc_ecdf_diff_intervals', number of +equally spaced evaluation points for the ECDF plot. Affects the granularity +of the plot and can significantly speed up the computation of the confidence +bands. The default is 'K = ncol(yrep)', or 'K = ncol(pit)' if PIT values are +provided instead.} + \item{...}{Currently unused.} \item{binwidth}{Passed to \code{\link[ggplot2:geom_histogram]{ggplot2::geom_histogram()}} to override @@ -153,6 +111,9 @@ passed to \code{\link[ggplot2:stat_ecdf]{ggplot2::stat_ecdf()}}. If \code{discre \item{pad}{A logical scalar passed to \code{\link[ggplot2:stat_ecdf]{ggplot2::stat_ecdf()}}.} +\item{conf_level}{For 'ppc_ecdf_intervals' and 'ppc_ecdf_diff_intervals', +the desired simultaneous confidence level of the bands to be drawn.} + \item{probs}{A numeric vector passed to \code{\link[ggplot2:geom_violin]{ggplot2::geom_violin()}}'s \code{draw_quantiles} argument to specify at which quantiles to draw horizontal lines. Set to \code{NULL} to remove the lines.} @@ -165,11 +126,15 @@ horizontal lines. Set to \code{NULL} to remove the lines.} to to the \code{size}, \code{alpha}, and \code{width} arguments of \code{\link[ggplot2:geom_jitter]{ggplot2::geom_jitter()}} to control the appearance of \code{y} points. The default of \code{y_jitter=NULL} will let \strong{ggplot2} determine the amount of jitter.} + +\item{bw, adjust, kernel, n_dens}{Optional arguments passed to +\code{\link[stats:density]{stats::density()}} to override default kernel density estimation +parameters. \code{n_dens} defaults to \code{1024}.} } \value{ The plotting functions return a ggplot object that can be further customized using the \strong{ggplot2} package. The functions with suffix -\verb{_data()} return the data that would have been drawn by the plotting +\code{_data()} return the data that would have been drawn by the plotting function. } \description{ @@ -186,7 +151,7 @@ For Binomial data, the plots will typically be most useful if \section{Plot Descriptions}{ \describe{ -\item{\verb{ppc_hist(), ppc_freqpoly(), ppc_dens(), ppc_boxplot()}}{ +\item{\code{ppc_hist(), ppc_freqpoly(), ppc_dens(), ppc_boxplot()}}{ A separate histogram, shaded frequency polygon, smoothed kernel density estimate, or box and whiskers plot is displayed for \code{y} and each dataset (row) in \code{yrep}. For these plots \code{yrep} should therefore @@ -198,7 +163,7 @@ variable for \code{y} and each dataset (row) in \code{yrep}. For this plot \code{yrep} should therefore contain only a small number of rows. See the \strong{Examples} section. } -\item{\verb{ppc_ecdf_overlay(), ppc_dens_overlay(), ppc_ecdf_overlay_grouped(), ppc_dens_overlay_grouped()}}{ +\item{\code{ppc_ecdf_overlay(), ppc_dens_overlay(), ppc_ecdf_overlay_grouped(), ppc_dens_overlay_grouped()}}{ Kernel density or empirical CDF estimates of each dataset (row) in \code{yrep} are overlaid, with the distribution of \code{y} itself on top (and in a darker shade). When using \code{ppc_ecdf_overlay()} with discrete @@ -279,14 +244,11 @@ A., and Rubin, D. B. (2013). \emph{Bayesian Data Analysis.} Chapman & Hall/CRC Press, London, third edition. (Ch. 6) } \seealso{ -Other PPCs: -\code{\link{PPC-censoring}}, -\code{\link{PPC-discrete}}, -\code{\link{PPC-errors}}, -\code{\link{PPC-intervals}}, -\code{\link{PPC-loo}}, -\code{\link{PPC-overview}}, -\code{\link{PPC-scatterplots}}, -\code{\link{PPC-test-statistics}} +Other PPCs: \code{\link{PPC-censoring}}, + \code{\link{PPC-discrete}}, \code{\link{PPC-errors}}, + \code{\link{PPC-intervals}}, \code{\link{PPC-loo}}, + \code{\link{PPC-overview}}, + \code{\link{PPC-scatterplots}}, + \code{\link{PPC-test-statistics}} } \concept{PPCs} From 60af0ef291241377f170abc784fb9e1845bce566 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A4ilynoja=20Teemu?= Date: Wed, 16 Jun 2021 16:37:34 +0300 Subject: [PATCH 06/48] prelim bug fixes --- R/ppc-distributions.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/ppc-distributions.R b/R/ppc-distributions.R index 909d7901..31b874e3 100644 --- a/R/ppc-distributions.R +++ b/R/ppc-distributions.R @@ -160,7 +160,7 @@ comparison_data <- function(y, yrep, pit, K) { y <- validate_y(y) yrep <- validate_yrep(yrep, y, match_ncols = FALSE) pit <- u_scale(rbind(y,yrep)) - ecdfs <- t(apply(ecdfs, 1, function(row) ecdf(row)(z))) + ecdfs <- t(apply(pit, 1, function(row) ecdf(row)(z))) ecdfs <- melt_and_stack(ecdfs[1, ], ecdfs[2, nrow(yrep) + 1]) } ecdfs @@ -530,7 +530,7 @@ ppc_ecdf_intervals <- function( conf_level = conf_level ) } - limits <- ecdf_intervals(gamma, K, L) + limits <- ecdf_intervals(gamma, K, max(data$rep_id) + any(data$is_y)) z <- 0:K / K fig <- ggplot(data, mapping = aes_(x = rep(L, z))) + geom_step( From 20daa1245c389bbf0fa7a4c6fa6c44ce49c87d8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A4ilynoja=20Teemu?= Date: Wed, 16 Jun 2021 16:46:45 +0300 Subject: [PATCH 07/48] prelim bug fixes --- R/ppc-distributions.R | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/R/ppc-distributions.R b/R/ppc-distributions.R index 31b874e3..4105d5c3 100644 --- a/R/ppc-distributions.R +++ b/R/ppc-distributions.R @@ -159,9 +159,9 @@ comparison_data <- function(y, yrep, pit, K) { } else { y <- validate_y(y) yrep <- validate_yrep(yrep, y, match_ncols = FALSE) - pit <- u_scale(rbind(y,yrep)) + pit <- u_scale(rbind(y, yrep)) ecdfs <- t(apply(pit, 1, function(row) ecdf(row)(z))) - ecdfs <- melt_and_stack(ecdfs[1, ], ecdfs[2, nrow(yrep) + 1]) + ecdfs <- melt_and_stack(ecdfs[1, ], ecdfs[2:nrow(yrep) + 1,]) } ecdfs } @@ -530,7 +530,11 @@ ppc_ecdf_intervals <- function( conf_level = conf_level ) } - limits <- ecdf_intervals(gamma, K, max(data$rep_id) + any(data$is_y)) + limits <- ecdf_intervals( + N = max(data$y_id), + L = max(data$rep_id) + any(data$is_y), + K = K, + gamma = gamma) z <- 0:K / K fig <- ggplot(data, mapping = aes_(x = rep(L, z))) + geom_step( From 3c77d519838eed646828dd5b26a85c71541eefde Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A4ilynoja=20Teemu?= Date: Wed, 16 Jun 2021 16:54:41 +0300 Subject: [PATCH 08/48] prelim bug fixes --- R/ppc-distributions.R | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/R/ppc-distributions.R b/R/ppc-distributions.R index 4105d5c3..c73c86fc 100644 --- a/R/ppc-distributions.R +++ b/R/ppc-distributions.R @@ -522,17 +522,19 @@ ppc_ecdf_intervals <- function( if (missing(K)) { K <- max(data$y_id) } + N <- max(data$y_id) + L <- max(data$rep_id) + any(data$is_y) if (missing(gamma)) { gamma <- adjust_gamma( - N = max(data$y_id), - L = max(data$rep_id) + any(data$is_y), + N = N, + L = L, K = K, conf_level = conf_level ) } limits <- ecdf_intervals( - N = max(data$y_id), - L = max(data$rep_id) + any(data$is_y), + N = N, + L = L, K = K, gamma = gamma) z <- 0:K / K From 60f3f38d724132939259b53ebb0a547de2461fc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A4ilynoja=20Teemu?= Date: Mon, 28 Jun 2021 13:35:10 +0300 Subject: [PATCH 09/48] change of default functionality to merge yrep for empirical pit computation. --- R/helpers-ppc.R | 6 ++++++ R/ppc-distributions.R | 18 +++++++++++------- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/R/helpers-ppc.R b/R/helpers-ppc.R index 2489558e..98d4bcaa 100644 --- a/R/helpers-ppc.R +++ b/R/helpers-ppc.R @@ -273,6 +273,7 @@ all_counts <- function(x, ...) { } adjust_gamma <- function(N, L, K=N, conf_level=0.95) { + print(paste(K, N, L, sep=" ")) if (any(c(K, N, L) < 1)) { abort("Parameters 'N', 'L' and 'K' must be positive integers.") } @@ -393,6 +394,11 @@ u_scale <- function(x) { array(rank(x) / length(x), dim = dim(x), dimnames = dimnames(x)) } + +empirical_pit <- function(y, yrep) { + apply(outer(yrep, y, "<="), 3, sum) / length(yrep) +} + # labels ---------------------------------------------------------------- create_yrep_ids <- function(ids) paste('italic(y)[rep] (', ids, ")") yrep_label <- function() expression(italic(y)[rep]) diff --git a/R/ppc-distributions.R b/R/ppc-distributions.R index c73c86fc..48ce5a49 100644 --- a/R/ppc-distributions.R +++ b/R/ppc-distributions.R @@ -135,16 +135,17 @@ comparison_data <- function(y, yrep, pit, K) { K <- length(y) } else if (!missing(pit)) { - K = ncol(pit) + K <- ncol(pit) } else { - K = ncol(yrep) + K <- ncol(yrep) } } z <- seq(0,1, length.out = K + 1) if (!missing(pit)) { pit <- validate_pit(pit) ecdfs <- t(apply(pit, 1, function(row) ecdf(row)(z))) + # work around to adhere to the melt_and_stack-format with only "yrep". ecdfs <- melt_and_stack(ecdfs[1,], ecdfs) ecdfs <- dplyr::filter(ecdfs, !ecdfs$is_y) } else if (missing(y)) { @@ -154,14 +155,17 @@ comparison_data <- function(y, yrep, pit, K) { "multiple rows to allow for sample comparison.", sep="")) } ecdfs <- t(apply(pit, 1, function(row) ecdf(row)(z))) + # work around to adhere to the melt_and_stack-format with only "yrep". ecdfs <- melt_and_stack(ecdfs[1,], ecdfs) ecdfs <- dplyr::filter(ecdfs, !ecdfs$is_y) } else { y <- validate_y(y) yrep <- validate_yrep(yrep, y, match_ncols = FALSE) - pit <- u_scale(rbind(y, yrep)) - ecdfs <- t(apply(pit, 1, function(row) ecdf(row)(z))) - ecdfs <- melt_and_stack(ecdfs[1, ], ecdfs[2:nrow(yrep) + 1,]) + pit <- empirical_pit(y, yrep) + ecdfs <- ecdf(pit)(z) + # work around to adhere to the melt_and_stack-format with only "y". + ecdfs <- melt_and_stack(ecdfs, t(ecdfs)) + ecdfs <- dplyr::filter(ecdfs, ecdf$is_y) } ecdfs } @@ -523,7 +527,7 @@ ppc_ecdf_intervals <- function( K <- max(data$y_id) } N <- max(data$y_id) - L <- max(data$rep_id) + any(data$is_y) + L <- max(replace(molten[molten$is_y, ]$rep_id, 'NA', 0)) + any(data$is_y) if (missing(gamma)) { gamma <- adjust_gamma( N = N, @@ -538,7 +542,7 @@ ppc_ecdf_intervals <- function( K = K, gamma = gamma) z <- 0:K / K - fig <- ggplot(data, mapping = aes_(x = rep(L, z))) + + fig <- ggplot(data, mapping = aes_(x = rep(z, L))) + geom_step( data = function(x) dplyr::filter(x, !.data$is_y), aes_(group = ~ variable, y = ~ value, color = ~ variable) From 7e2546853810e8f378e38e2d98a538231652a2d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A4ilynoja=20Teemu?= Date: Mon, 28 Jun 2021 13:49:50 +0300 Subject: [PATCH 10/48] typo --- R/ppc-distributions.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/ppc-distributions.R b/R/ppc-distributions.R index 48ce5a49..f88e13ef 100644 --- a/R/ppc-distributions.R +++ b/R/ppc-distributions.R @@ -165,7 +165,7 @@ comparison_data <- function(y, yrep, pit, K) { ecdfs <- ecdf(pit)(z) # work around to adhere to the melt_and_stack-format with only "y". ecdfs <- melt_and_stack(ecdfs, t(ecdfs)) - ecdfs <- dplyr::filter(ecdfs, ecdf$is_y) + ecdfs <- dplyr::filter(ecdfs, ecdfs$is_y) } ecdfs } From e545f40e17a7c9aac0218082732749e8d5b55ddb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A4ilynoja=20Teemu?= Date: Mon, 28 Jun 2021 14:16:50 +0300 Subject: [PATCH 11/48] typo --- R/ppc-distributions.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/ppc-distributions.R b/R/ppc-distributions.R index f88e13ef..40150eba 100644 --- a/R/ppc-distributions.R +++ b/R/ppc-distributions.R @@ -163,7 +163,7 @@ comparison_data <- function(y, yrep, pit, K) { yrep <- validate_yrep(yrep, y, match_ncols = FALSE) pit <- empirical_pit(y, yrep) ecdfs <- ecdf(pit)(z) - # work around to adhere to the melt_and_stack-format with only "y". + # work around to adhere to the melt_and_stack-format with only "y" . ecdfs <- melt_and_stack(ecdfs, t(ecdfs)) ecdfs <- dplyr::filter(ecdfs, ecdfs$is_y) } From 67dbd86635114cf69fd7854f89802bd08b0f25a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A4ilynoja=20Teemu?= Date: Mon, 28 Jun 2021 14:19:54 +0300 Subject: [PATCH 12/48] typo --- R/ppc-distributions.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/ppc-distributions.R b/R/ppc-distributions.R index 40150eba..cdb7ccd0 100644 --- a/R/ppc-distributions.R +++ b/R/ppc-distributions.R @@ -527,7 +527,7 @@ ppc_ecdf_intervals <- function( K <- max(data$y_id) } N <- max(data$y_id) - L <- max(replace(molten[molten$is_y, ]$rep_id, 'NA', 0)) + any(data$is_y) + L <- max(replace(data[data$is_y, ]$rep_id, 'NA', 0)) + any(data$is_y) if (missing(gamma)) { gamma <- adjust_gamma( N = N, From 911d73a61072eff9a6e323c65ca692f3a99b63ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A4ilynoja=20Teemu?= Date: Mon, 28 Jun 2021 14:25:04 +0300 Subject: [PATCH 13/48] typo --- R/ppc-distributions.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/ppc-distributions.R b/R/ppc-distributions.R index cdb7ccd0..1690d7e9 100644 --- a/R/ppc-distributions.R +++ b/R/ppc-distributions.R @@ -527,7 +527,7 @@ ppc_ecdf_intervals <- function( K <- max(data$y_id) } N <- max(data$y_id) - L <- max(replace(data[data$is_y, ]$rep_id, 'NA', 0)) + any(data$is_y) + L <- max(replace(data$rep_id, NA, 0)) + any(data$is_y) if (missing(gamma)) { gamma <- adjust_gamma( N = N, From 9205ac44e2cbfd00399c49f79772bba126f4d400 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A4ilynoja=20Teemu?= Date: Mon, 28 Jun 2021 14:39:51 +0300 Subject: [PATCH 14/48] replace NA in rep_id. --- R/ppc-distributions.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/R/ppc-distributions.R b/R/ppc-distributions.R index 1690d7e9..efaec4aa 100644 --- a/R/ppc-distributions.R +++ b/R/ppc-distributions.R @@ -167,6 +167,7 @@ comparison_data <- function(y, yrep, pit, K) { ecdfs <- melt_and_stack(ecdfs, t(ecdfs)) ecdfs <- dplyr::filter(ecdfs, ecdfs$is_y) } + ecdfs$rep_id[is.na(ecdfs$rep_id)] <- 0 ecdfs } @@ -527,7 +528,7 @@ ppc_ecdf_intervals <- function( K <- max(data$y_id) } N <- max(data$y_id) - L <- max(replace(data$rep_id, NA, 0)) + any(data$is_y) + L <- max(data$rep_id) + any(data$is_y) if (missing(gamma)) { gamma <- adjust_gamma( N = N, @@ -545,7 +546,7 @@ ppc_ecdf_intervals <- function( fig <- ggplot(data, mapping = aes_(x = rep(z, L))) + geom_step( data = function(x) dplyr::filter(x, !.data$is_y), - aes_(group = ~ variable, y = ~ value, color = ~ variable) + aes_(group = ~ rep_id, y = ~ value, color = ~ rep_id) ) + geom_line( data = data.frame(x = c(0, 1), y = c(0, 1)), From 943274da3cc08799dadb926036a3bc99f8ffbb71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A4ilynoja=20Teemu?= Date: Mon, 28 Jun 2021 14:46:06 +0300 Subject: [PATCH 15/48] only attempt to draw 'yrep' if 'y' not submitted. --- R/ppc-distributions.R | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/R/ppc-distributions.R b/R/ppc-distributions.R index efaec4aa..f9939c8b 100644 --- a/R/ppc-distributions.R +++ b/R/ppc-distributions.R @@ -544,10 +544,6 @@ ppc_ecdf_intervals <- function( gamma = gamma) z <- 0:K / K fig <- ggplot(data, mapping = aes_(x = rep(z, L))) + - geom_step( - data = function(x) dplyr::filter(x, !.data$is_y), - aes_(group = ~ rep_id, y = ~ value, color = ~ rep_id) - ) + geom_line( data = data.frame(x = c(0, 1), y = c(0, 1)), mapping = aes_(x = ~ x, y = ~ y), @@ -562,6 +558,12 @@ ppc_ecdf_intervals <- function( aes_(y = ~ value) ) } + if (any(!data$is_y)) { + fig <- fig + geom_step( + data = function(x) dplyr::filter(x, !.data$is_y), + aes_(group = ~ rep_id, y = ~ value, color = ~ rep_id) + ) + } fig + scale_y_continuous(breaks = c(0, 0.5, 1)) + yaxis_title(FALSE) + xaxis_title(FALSE) + From fa92c72a7809f9134137fa0c344999ae5842e02e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A4ilynoja=20Teemu?= Date: Mon, 28 Jun 2021 14:54:12 +0300 Subject: [PATCH 16/48] Correctly compute N from the given values. --- R/ppc-distributions.R | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/R/ppc-distributions.R b/R/ppc-distributions.R index f9939c8b..470a5897 100644 --- a/R/ppc-distributions.R +++ b/R/ppc-distributions.R @@ -525,9 +525,15 @@ ppc_ecdf_intervals <- function( check_ignored_arguments(...) data <- comparison_data(y, yrep, pit, K) if (missing(K)) { - K <- max(data$y_id) + K <- max(data$y_id) - 1 + } + if (!missing(y)) { + N <- length(y) + } else if (!missing(yrep)) { + N <- ncol(yrep) + } else { + N <- ncol(pit) } - N <- max(data$y_id) L <- max(data$rep_id) + any(data$is_y) if (missing(gamma)) { gamma <- adjust_gamma( From ae1cf0b707292f6959222cf638905ecec609c24a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A4ilynoja=20Teemu?= Date: Mon, 28 Jun 2021 15:06:46 +0300 Subject: [PATCH 17/48] Fix the scaling of the confidence intervals. --- R/ppc-distributions.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/ppc-distributions.R b/R/ppc-distributions.R index 470a5897..eedac35f 100644 --- a/R/ppc-distributions.R +++ b/R/ppc-distributions.R @@ -549,25 +549,25 @@ ppc_ecdf_intervals <- function( K = K, gamma = gamma) z <- 0:K / K - fig <- ggplot(data, mapping = aes_(x = rep(z, L))) + + fig <- ggplot() + geom_line( data = data.frame(x = c(0, 1), y = c(0, 1)), mapping = aes_(x = ~ x, y = ~ y), alpha = 0.5 * alpha, color = 'gray' ) + - geom_step(aes_(y = limits$upper)) + - geom_step(aes_(y = limits$lower)) + geom_step(aes_(x = z, y = limits$upper / N), color = 'gray') + + geom_step(aes_(x = z, y = limits$lower / N), color = 'gray') if (any(data$is_y)) { fig <- fig + geom_step( data = function(x) dplyr::filter(x, .data$is_y), - aes_(y = ~ value) + aes_(x = z, y = ~ value) ) } if (any(!data$is_y)) { fig <- fig + geom_step( data = function(x) dplyr::filter(x, !.data$is_y), - aes_(group = ~ rep_id, y = ~ value, color = ~ rep_id) + aes_(x = rep(z, L - any(data$is_y)), group = ~ rep_id, y = ~ value, color = ~ rep_id) ) } fig + scale_y_continuous(breaks = c(0, 0.5, 1)) + From f34c2d47ec8da50ada07a1d0efe0942ad0bdb8ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A4ilynoja=20Teemu?= Date: Mon, 28 Jun 2021 15:10:44 +0300 Subject: [PATCH 18/48] Data missing frpm ggplot --- R/ppc-distributions.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/ppc-distributions.R b/R/ppc-distributions.R index eedac35f..90b83d78 100644 --- a/R/ppc-distributions.R +++ b/R/ppc-distributions.R @@ -549,7 +549,7 @@ ppc_ecdf_intervals <- function( K = K, gamma = gamma) z <- 0:K / K - fig <- ggplot() + + fig <- ggplot(data) + geom_line( data = data.frame(x = c(0, 1), y = c(0, 1)), mapping = aes_(x = ~ x, y = ~ y), From 70511d06d706748ffbad2a2aee39ba6a937fcb93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A4ilynoja=20Teemu?= Date: Mon, 28 Jun 2021 15:27:28 +0300 Subject: [PATCH 19/48] correcting ggplot usage. --- R/ppc-distributions.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/ppc-distributions.R b/R/ppc-distributions.R index 90b83d78..0f5ad36d 100644 --- a/R/ppc-distributions.R +++ b/R/ppc-distributions.R @@ -556,8 +556,8 @@ ppc_ecdf_intervals <- function( alpha = 0.5 * alpha, color = 'gray' ) + - geom_step(aes_(x = z, y = limits$upper / N), color = 'gray') + - geom_step(aes_(x = z, y = limits$lower / N), color = 'gray') + geom_step(data = limits, aes_(x = z, y = ~ upper / N), color = 'gray') + + geom_step(data = limits, aes_(x = z, y = ~ lower / N), color = 'gray') if (any(data$is_y)) { fig <- fig + geom_step( data = function(x) dplyr::filter(x, .data$is_y), From de47a2157e939a09d4e5f59abcd4e5441e56c4c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A4ilynoja=20Teemu?= Date: Mon, 28 Jun 2021 15:27:56 +0300 Subject: [PATCH 20/48] remove debug printting. --- R/helpers-ppc.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/helpers-ppc.R b/R/helpers-ppc.R index 98d4bcaa..9edeef6c 100644 --- a/R/helpers-ppc.R +++ b/R/helpers-ppc.R @@ -273,7 +273,6 @@ all_counts <- function(x, ...) { } adjust_gamma <- function(N, L, K=N, conf_level=0.95) { - print(paste(K, N, L, sep=" ")) if (any(c(K, N, L) < 1)) { abort("Parameters 'N', 'L' and 'K' must be positive integers.") } From 2c53f31e115f79fff6f0ecbf4d21f6ed36de3022 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A4ilynoja=20Teemu?= Date: Mon, 28 Jun 2021 15:31:16 +0300 Subject: [PATCH 21/48] more ggplot corrections --- R/ppc-distributions.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/ppc-distributions.R b/R/ppc-distributions.R index 0f5ad36d..41a93886 100644 --- a/R/ppc-distributions.R +++ b/R/ppc-distributions.R @@ -556,8 +556,8 @@ ppc_ecdf_intervals <- function( alpha = 0.5 * alpha, color = 'gray' ) + - geom_step(data = limits, aes_(x = z, y = ~ upper / N), color = 'gray') + - geom_step(data = limits, aes_(x = z, y = ~ lower / N), color = 'gray') + geom_step(data = data.frame(limits), aes_(x = z, y = ~ upper / N), color = 'gray') + + geom_step(data = data.frame(limits), aes_(x = z, y = ~ lower / N), color = 'gray') if (any(data$is_y)) { fig <- fig + geom_step( data = function(x) dplyr::filter(x, .data$is_y), From 08467bb758f73a2ff06f7bfda26d2dc5aab30416 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A4ilynoja=20Teemu?= Date: Mon, 28 Jun 2021 16:12:46 +0300 Subject: [PATCH 22/48] fix x axis replication with sample comparison. --- R/ppc-distributions.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/ppc-distributions.R b/R/ppc-distributions.R index 41a93886..7105c6a5 100644 --- a/R/ppc-distributions.R +++ b/R/ppc-distributions.R @@ -567,7 +567,7 @@ ppc_ecdf_intervals <- function( if (any(!data$is_y)) { fig <- fig + geom_step( data = function(x) dplyr::filter(x, !.data$is_y), - aes_(x = rep(z, L - any(data$is_y)), group = ~ rep_id, y = ~ value, color = ~ rep_id) + aes_(x = rep(z, each = L - any(data$is_y)), group = ~ rep_id, y = ~ value, color = ~ rep_id) ) } fig + scale_y_continuous(breaks = c(0, 0.5, 1)) + From 9d39db3dec05bc0d6a4f951f2c0ace98510aa2f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A4ilynoja=20Teemu?= Date: Mon, 28 Jun 2021 16:18:19 +0300 Subject: [PATCH 23/48] remove diagonal line from ecdf plot. --- R/ppc-distributions.R | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/R/ppc-distributions.R b/R/ppc-distributions.R index 7105c6a5..e00b1cb6 100644 --- a/R/ppc-distributions.R +++ b/R/ppc-distributions.R @@ -550,12 +550,6 @@ ppc_ecdf_intervals <- function( gamma = gamma) z <- 0:K / K fig <- ggplot(data) + - geom_line( - data = data.frame(x = c(0, 1), y = c(0, 1)), - mapping = aes_(x = ~ x, y = ~ y), - alpha = 0.5 * alpha, - color = 'gray' - ) + geom_step(data = data.frame(limits), aes_(x = z, y = ~ upper / N), color = 'gray') + geom_step(data = data.frame(limits), aes_(x = z, y = ~ lower / N), color = 'gray') if (any(data$is_y)) { @@ -571,6 +565,7 @@ ppc_ecdf_intervals <- function( ) } fig + scale_y_continuous(breaks = c(0, 0.5, 1)) + + scale_color_ppc_dist() + yaxis_title(FALSE) + xaxis_title(FALSE) + yaxis_ticks(FALSE) + From c40362b8925d826a97bc98e59da8a449852eeb98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A4ilynoja=20Teemu?= Date: Mon, 28 Jun 2021 16:31:32 +0300 Subject: [PATCH 24/48] Change rep_id to factors for color scaling. --- R/ppc-distributions.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/ppc-distributions.R b/R/ppc-distributions.R index e00b1cb6..1889176f 100644 --- a/R/ppc-distributions.R +++ b/R/ppc-distributions.R @@ -561,7 +561,8 @@ ppc_ecdf_intervals <- function( if (any(!data$is_y)) { fig <- fig + geom_step( data = function(x) dplyr::filter(x, !.data$is_y), - aes_(x = rep(z, each = L - any(data$is_y)), group = ~ rep_id, y = ~ value, color = ~ rep_id) + aes_(x = rep(z, each = L - any(data$is_y)), group = ~ rep_id, + y = ~ value, color = ~ as.factor(rep_id)) ) } fig + scale_y_continuous(breaks = c(0, 0.5, 1)) + From d4f7517b0eaf22a676e7e96b641f3058a30385a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A4ilynoja=20Teemu?= Date: Mon, 28 Jun 2021 16:34:34 +0300 Subject: [PATCH 25/48] color scaling fixes. --- R/ppc-distributions.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/ppc-distributions.R b/R/ppc-distributions.R index 1889176f..b3c6dab1 100644 --- a/R/ppc-distributions.R +++ b/R/ppc-distributions.R @@ -566,7 +566,7 @@ ppc_ecdf_intervals <- function( ) } fig + scale_y_continuous(breaks = c(0, 0.5, 1)) + - scale_color_ppc_dist() + + scale_color_discrete() + yaxis_title(FALSE) + xaxis_title(FALSE) + yaxis_ticks(FALSE) + From 7e92da85743655864dd68c4199d58d7494670665 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A4ilynoja=20Teemu?= Date: Mon, 28 Jun 2021 16:35:56 +0300 Subject: [PATCH 26/48] color by rep label --- R/ppc-distributions.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/ppc-distributions.R b/R/ppc-distributions.R index b3c6dab1..a1f4e40b 100644 --- a/R/ppc-distributions.R +++ b/R/ppc-distributions.R @@ -562,7 +562,7 @@ ppc_ecdf_intervals <- function( fig <- fig + geom_step( data = function(x) dplyr::filter(x, !.data$is_y), aes_(x = rep(z, each = L - any(data$is_y)), group = ~ rep_id, - y = ~ value, color = ~ as.factor(rep_id)) + y = ~ value, color = ~ rep_label) ) } fig + scale_y_continuous(breaks = c(0, 0.5, 1)) + From c77881d853084dc363410d8852223bcde9f7410d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A4ilynoja=20Teemu?= Date: Wed, 30 Jun 2021 14:43:12 +0300 Subject: [PATCH 27/48] Added ppc_ecdf_intervals_difference. --- NAMESPACE | 153 ------------------------------------------ R/ppc-distributions.R | 84 ++++++++++++++++++++--- 2 files changed, 75 insertions(+), 162 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 845022a6..e11f9818 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,158 +1,5 @@ # Generated by roxygen2: do not edit by hand -S3method("[",neff_ratio) -S3method("[",rhat) -S3method(log_posterior,CmdStanMCMC) -S3method(log_posterior,stanfit) -S3method(log_posterior,stanreg) -S3method(neff_ratio,CmdStanMCMC) -S3method(neff_ratio,stanfit) -S3method(neff_ratio,stanreg) -S3method(nuts_params,CmdStanMCMC) -S3method(nuts_params,list) -S3method(nuts_params,stanfit) -S3method(nuts_params,stanreg) -S3method(plot,bayesplot_grid) -S3method(plot,bayesplot_scheme) -S3method(pp_check,default) -S3method(print,bayesplot_function_list) -S3method(print,bayesplot_grid) -S3method(print,bayesplot_scheme) -S3method(rhat,CmdStanMCMC) -S3method(rhat,stanfit) -S3method(rhat,stanreg) -export(abline_01) -export(available_mcmc) -export(available_ppc) -export(bayesplot_grid) -export(bayesplot_theme_get) -export(bayesplot_theme_replace) -export(bayesplot_theme_set) -export(bayesplot_theme_update) -export(color_scheme_get) -export(color_scheme_set) -export(color_scheme_view) -export(comparison_data) -export(example_group_data) -export(example_mcmc_draws) -export(example_x_data) -export(example_y_data) -export(example_yrep_draws) -export(facet_bg) -export(facet_text) -export(grid_lines) -export(hline_0) -export(hline_at) -export(lbub) -export(legend_move) -export(legend_none) -export(legend_text) -export(log_posterior) -export(mcmc_acf) -export(mcmc_acf_bar) -export(mcmc_areas) -export(mcmc_areas_data) -export(mcmc_areas_ridges) -export(mcmc_areas_ridges_data) -export(mcmc_combo) -export(mcmc_dens) -export(mcmc_dens_chains) -export(mcmc_dens_chains_data) -export(mcmc_dens_overlay) -export(mcmc_hex) -export(mcmc_hist) -export(mcmc_hist_by_chain) -export(mcmc_intervals) -export(mcmc_intervals_data) -export(mcmc_neff) -export(mcmc_neff_data) -export(mcmc_neff_hist) -export(mcmc_nuts_acceptance) -export(mcmc_nuts_divergence) -export(mcmc_nuts_energy) -export(mcmc_nuts_stepsize) -export(mcmc_nuts_treedepth) -export(mcmc_pairs) -export(mcmc_parcoord) -export(mcmc_parcoord_data) -export(mcmc_rank_hist) -export(mcmc_rank_overlay) -export(mcmc_recover_hist) -export(mcmc_recover_intervals) -export(mcmc_recover_scatter) -export(mcmc_rhat) -export(mcmc_rhat_data) -export(mcmc_rhat_hist) -export(mcmc_scatter) -export(mcmc_trace) -export(mcmc_trace_data) -export(mcmc_trace_highlight) -export(mcmc_violin) -export(neff_ratio) -export(nuts_params) -export(overlay_function) -export(pairs_condition) -export(pairs_style_np) -export(panel_bg) -export(param_glue) -export(param_range) -export(parcoord_style_np) -export(plot_bg) -export(pp_check) -export(ppc_bars) -export(ppc_bars_grouped) -export(ppc_boxplot) -export(ppc_data) -export(ppc_dens) -export(ppc_dens_overlay) -export(ppc_dens_overlay_grouped) -export(ppc_ecdf_intervals) -export(ppc_ecdf_overlay) -export(ppc_ecdf_overlay_grouped) -export(ppc_error_binned) -export(ppc_error_hist) -export(ppc_error_hist_grouped) -export(ppc_error_scatter) -export(ppc_error_scatter_avg) -export(ppc_error_scatter_avg_vs_x) -export(ppc_freqpoly) -export(ppc_freqpoly_grouped) -export(ppc_hist) -export(ppc_intervals) -export(ppc_intervals_data) -export(ppc_intervals_grouped) -export(ppc_km_overlay) -export(ppc_loo_intervals) -export(ppc_loo_pit) -export(ppc_loo_pit_data) -export(ppc_loo_pit_overlay) -export(ppc_loo_pit_qq) -export(ppc_loo_ribbon) -export(ppc_ribbon) -export(ppc_ribbon_data) -export(ppc_ribbon_grouped) -export(ppc_rootogram) -export(ppc_scatter) -export(ppc_scatter_avg) -export(ppc_scatter_avg_grouped) -export(ppc_stat) -export(ppc_stat_2d) -export(ppc_stat_freqpoly_grouped) -export(ppc_stat_grouped) -export(ppc_violin_grouped) -export(rhat) -export(scatter_style_np) -export(theme_default) -export(trace_style_np) -export(vars) -export(vline_0) -export(vline_at) -export(xaxis_text) -export(xaxis_ticks) -export(xaxis_title) -export(yaxis_text) -export(yaxis_ticks) -export(yaxis_title) import(ggplot2) import(rlang) import(stats) diff --git a/R/ppc-distributions.R b/R/ppc-distributions.R index a1f4e40b..f10ec2ea 100644 --- a/R/ppc-distributions.R +++ b/R/ppc-distributions.R @@ -501,16 +501,17 @@ ppc_ecdf_overlay_grouped <- function( #' @export #' @rdname PPC-distributions -#' @param pit For 'ppc_ecdf_intervals' and 'ppc_ecdf_diff_intervals', the PIT -#' values of one or more samples can be provided directly making 'y' and +#' @param pit For 'ppc_ecdf_intervals' and 'ppc_ecdf_intervals_difference', the +#' PIT values of one or more samples can be provided directly making 'y' and #' yrep' optional. -#' @param conf_level For 'ppc_ecdf_intervals' and 'ppc_ecdf_diff_intervals', -#' the desired simultaneous confidence level of the bands to be drawn. -#' @param K, For 'ppc_ecdf_intervals' and 'ppc_ecdf_diff_intervals', number of -#' equally spaced evaluation points for the ECDF plot. Affects the granularity -#' of the plot and can significantly speed up the computation of the confidence -#' bands. The default is 'K = ncol(yrep)', or 'K = ncol(pit)' if PIT values are -#' provided instead. +#' @param conf_level For 'ppc_ecdf_intervals' and +#' 'ppc_ecdf_intervals_difference', the desired simultaneous confidence level +#' of the bands to be drawn. +#' @param K, For 'ppc_ecdf_intervals' and 'ppc_ecdf_intervals_difference', +#' number of equally spaced evaluation points for the ECDF plot. Affects the +#' granularity of the plot and can significantly speed up the computation of +#' the confidencebands. The default is 'K = ncol(yrep)', or 'K = ncol(pit)' +#' if PIT values are provided instead. ppc_ecdf_intervals <- function( y, yrep, @@ -574,6 +575,71 @@ ppc_ecdf_intervals <- function( } +#' @export +#' @rdname PPC-distributions +ppc_ecdf_intervals_difference <- function( + y, + yrep, + pit, + gamma, + K, + ..., + conf_level = 0.95, + size = 0.25, + alpha = 0.7 +) { + check_ignored_arguments(...) + data <- comparison_data(y, yrep, pit, K) + if (missing(K)) { + K <- max(data$y_id) - 1 + } + if (!missing(y)) { + N <- length(y) + } else if (!missing(yrep)) { + N <- ncol(yrep) + } else { + N <- ncol(pit) + } + L <- max(data$rep_id) + any(data$is_y) + if (missing(gamma)) { + gamma <- adjust_gamma( + N = N, + L = L, + K = K, + conf_level = conf_level + ) + } + limits <- ecdf_intervals( + N = N, + L = L, + K = K, + gamma = gamma) + z <- 0:K / K + fig <- ggplot(data) + + geom_step(data = data.frame(limits), aes_(x = z, y = ~ upper / N - z), color = 'gray') + + geom_step(data = data.frame(limits), aes_(x = z, y = ~ lower / N - z), color = 'gray') + if (any(data$is_y)) { + fig <- fig + geom_step( + data = function(x) dplyr::filter(x, .data$is_y), + aes_(x = z, y = ~ value - z) + ) + } + if (any(!data$is_y)) { + fig <- fig + geom_step( + data = function(x) dplyr::filter(x, !.data$is_y), + aes_(x = rep(z, each = L - any(data$is_y)), group = ~ rep_id, + y = ~ value - z, color = ~ rep_label) + ) + } + fig + scale_y_continuous(breaks = c(0, 0.5, 1)) + + scale_color_discrete() + + yaxis_title(FALSE) + + xaxis_title(FALSE) + + yaxis_ticks(FALSE) + + bayesplot_theme_get() +} + + #' @export #' @rdname PPC-distributions #' @param probs A numeric vector passed to [ggplot2::geom_violin()]'s From e1cd094ec43f5574410b11a03d2ecbbab727d93d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A4ilynoja=20Teemu?= Date: Wed, 30 Jun 2021 15:03:19 +0300 Subject: [PATCH 28/48] fixing NAMESPACE --- NAMESPACE | 154 ++++++++++++++++++++++++++++++++++++++++++++++++ R/helpers-ppc.R | 2 +- 2 files changed, 155 insertions(+), 1 deletion(-) diff --git a/NAMESPACE b/NAMESPACE index e11f9818..3231a0b3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,159 @@ # Generated by roxygen2: do not edit by hand +S3method("[",neff_ratio) +S3method("[",rhat) +S3method(log_posterior,CmdStanMCMC) +S3method(log_posterior,stanfit) +S3method(log_posterior,stanreg) +S3method(neff_ratio,CmdStanMCMC) +S3method(neff_ratio,stanfit) +S3method(neff_ratio,stanreg) +S3method(nuts_params,CmdStanMCMC) +S3method(nuts_params,list) +S3method(nuts_params,stanfit) +S3method(nuts_params,stanreg) +S3method(plot,bayesplot_grid) +S3method(plot,bayesplot_scheme) +S3method(pp_check,default) +S3method(print,bayesplot_function_list) +S3method(print,bayesplot_grid) +S3method(print,bayesplot_scheme) +S3method(rhat,CmdStanMCMC) +S3method(rhat,stanfit) +S3method(rhat,stanreg) +export(abline_01) +export(available_mcmc) +export(available_ppc) +export(bayesplot_grid) +export(bayesplot_theme_get) +export(bayesplot_theme_replace) +export(bayesplot_theme_set) +export(bayesplot_theme_update) +export(color_scheme_get) +export(color_scheme_set) +export(color_scheme_view) +export(comparison_data) +export(example_group_data) +export(example_mcmc_draws) +export(example_x_data) +export(example_y_data) +export(example_yrep_draws) +export(facet_bg) +export(facet_text) +export(grid_lines) +export(hline_0) +export(hline_at) +export(lbub) +export(legend_move) +export(legend_none) +export(legend_text) +export(log_posterior) +export(mcmc_acf) +export(mcmc_acf_bar) +export(mcmc_areas) +export(mcmc_areas_data) +export(mcmc_areas_ridges) +export(mcmc_areas_ridges_data) +export(mcmc_combo) +export(mcmc_dens) +export(mcmc_dens_chains) +export(mcmc_dens_chains_data) +export(mcmc_dens_overlay) +export(mcmc_hex) +export(mcmc_hist) +export(mcmc_hist_by_chain) +export(mcmc_intervals) +export(mcmc_intervals_data) +export(mcmc_neff) +export(mcmc_neff_data) +export(mcmc_neff_hist) +export(mcmc_nuts_acceptance) +export(mcmc_nuts_divergence) +export(mcmc_nuts_energy) +export(mcmc_nuts_stepsize) +export(mcmc_nuts_treedepth) +export(mcmc_pairs) +export(mcmc_parcoord) +export(mcmc_parcoord_data) +export(mcmc_rank_hist) +export(mcmc_rank_overlay) +export(mcmc_recover_hist) +export(mcmc_recover_intervals) +export(mcmc_recover_scatter) +export(mcmc_rhat) +export(mcmc_rhat_data) +export(mcmc_rhat_hist) +export(mcmc_scatter) +export(mcmc_trace) +export(mcmc_trace_data) +export(mcmc_trace_highlight) +export(mcmc_violin) +export(neff_ratio) +export(nuts_params) +export(overlay_function) +export(pairs_condition) +export(pairs_style_np) +export(panel_bg) +export(param_glue) +export(param_range) +export(parcoord_style_np) +export(plot_bg) +export(pp_check) +export(ppc_bars) +export(ppc_bars_grouped) +export(ppc_boxplot) +export(ppc_data) +export(ppc_dens) +export(ppc_dens_overlay) +export(ppc_dens_overlay_grouped) +export(ppc_ecdf_intervals) +export(ppc_ecdf_intervals_difference) +export(ppc_ecdf_overlay) +export(ppc_ecdf_overlay_grouped) +export(ppc_error_binned) +export(ppc_error_hist) +export(ppc_error_hist_grouped) +export(ppc_error_scatter) +export(ppc_error_scatter_avg) +export(ppc_error_scatter_avg_vs_x) +export(ppc_freqpoly) +export(ppc_freqpoly_grouped) +export(ppc_hist) +export(ppc_intervals) +export(ppc_intervals_data) +export(ppc_intervals_grouped) +export(ppc_km_overlay) +export(ppc_loo_intervals) +export(ppc_loo_pit) +export(ppc_loo_pit_data) +export(ppc_loo_pit_overlay) +export(ppc_loo_pit_qq) +export(ppc_loo_ribbon) +export(ppc_ribbon) +export(ppc_ribbon_data) +export(ppc_ribbon_grouped) +export(ppc_rootogram) +export(ppc_scatter) +export(ppc_scatter_avg) +export(ppc_scatter_avg_grouped) +export(ppc_stat) +export(ppc_stat_2d) +export(ppc_stat_freqpoly_grouped) +export(ppc_stat_grouped) +export(ppc_violin_grouped) +export(rhat) +export(scatter_style_np) +export(theme_default) +export(trace_style_np) +export(vars) +export(vline_0) +export(vline_at) +export(xaxis_text) +export(xaxis_ticks) +export(xaxis_title) +export(yaxis_text) +export(yaxis_ticks) +export(yaxis_title) import(ggplot2) import(rlang) import(stats) diff --git a/R/helpers-ppc.R b/R/helpers-ppc.R index 9edeef6c..3efa277a 100644 --- a/R/helpers-ppc.R +++ b/R/helpers-ppc.R @@ -72,7 +72,7 @@ validate_yrep <- function(yrep, y, match_ncols = TRUE) { #' a numeric matrix with no NAs and that the provided values fall in [0,1]. #' #' @param pit The 'pit' object provided by the user. -#' @returns Either throws an error or returns a numeric matrix. +#' @return Either throws an error or returns a numeric matrix. #' @noRd validate_pit <- function(pit) { stopifnot(is.matrix(pit), is.numeric(yrep)) From ed832d066590669c4e5772283adb5b39ac000be1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A4ilynoja=20Teemu?= Date: Wed, 30 Jun 2021 16:42:22 +0300 Subject: [PATCH 29/48] Moved ppc_ecdf_intervals and ppc_ecdf_intervals_difference to PPC-intervals. --- NAMESPACE | 1 - R/ppc-distributions.R | 185 ------------------------------- R/ppc-intervals.R | 233 ++++++++++++++++++++++++++++++++++++++- man/PPC-distributions.Rd | 148 ++++++++++++++++--------- man/PPC-intervals.Rd | 49 +++++++- 5 files changed, 370 insertions(+), 246 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 3231a0b3..75a00c1f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -32,7 +32,6 @@ export(bayesplot_theme_update) export(color_scheme_get) export(color_scheme_set) export(color_scheme_view) -export(comparison_data) export(example_group_data) export(example_mcmc_draws) export(example_x_data) diff --git a/R/ppc-distributions.R b/R/ppc-distributions.R index f10ec2ea..14fe9970 100644 --- a/R/ppc-distributions.R +++ b/R/ppc-distributions.R @@ -127,51 +127,6 @@ ppc_data <- function(y, yrep, group = NULL) { } -#' @rdname PPC-distributions -#' @export -comparison_data <- function(y, yrep, pit, K) { - if (missing(K)){ - if (!missing(y)) { - K <- length(y) - } - else if (!missing(pit)) { - K <- ncol(pit) - } - else { - K <- ncol(yrep) - } - } - z <- seq(0,1, length.out = K + 1) - if (!missing(pit)) { - pit <- validate_pit(pit) - ecdfs <- t(apply(pit, 1, function(row) ecdf(row)(z))) - # work around to adhere to the melt_and_stack-format with only "yrep". - ecdfs <- melt_and_stack(ecdfs[1,], ecdfs) - ecdfs <- dplyr::filter(ecdfs, !ecdfs$is_y) - } else if (missing(y)) { - pit <- u_scale(validate_yrep(yrep, yrep[1, ], match_ncols = FALSE)) - if (nrow(yrep) < 2) { - abort(paste("When both 'pit' and 'y' are missing, 'yrep' must include ", - "multiple rows to allow for sample comparison.", sep="")) - } - ecdfs <- t(apply(pit, 1, function(row) ecdf(row)(z))) - # work around to adhere to the melt_and_stack-format with only "yrep". - ecdfs <- melt_and_stack(ecdfs[1,], ecdfs) - ecdfs <- dplyr::filter(ecdfs, !ecdfs$is_y) - } else { - y <- validate_y(y) - yrep <- validate_yrep(yrep, y, match_ncols = FALSE) - pit <- empirical_pit(y, yrep) - ecdfs <- ecdf(pit)(z) - # work around to adhere to the melt_and_stack-format with only "y" . - ecdfs <- melt_and_stack(ecdfs, t(ecdfs)) - ecdfs <- dplyr::filter(ecdfs, ecdfs$is_y) - } - ecdfs$rep_id[is.na(ecdfs$rep_id)] <- 0 - ecdfs -} - - #' @rdname PPC-distributions #' @export ppc_hist <- function(y, yrep, ..., binwidth = NULL, breaks = NULL, @@ -499,146 +454,6 @@ ppc_ecdf_overlay_grouped <- function( force_axes_in_facets() } -#' @export -#' @rdname PPC-distributions -#' @param pit For 'ppc_ecdf_intervals' and 'ppc_ecdf_intervals_difference', the -#' PIT values of one or more samples can be provided directly making 'y' and -#' yrep' optional. -#' @param conf_level For 'ppc_ecdf_intervals' and -#' 'ppc_ecdf_intervals_difference', the desired simultaneous confidence level -#' of the bands to be drawn. -#' @param K, For 'ppc_ecdf_intervals' and 'ppc_ecdf_intervals_difference', -#' number of equally spaced evaluation points for the ECDF plot. Affects the -#' granularity of the plot and can significantly speed up the computation of -#' the confidencebands. The default is 'K = ncol(yrep)', or 'K = ncol(pit)' -#' if PIT values are provided instead. -ppc_ecdf_intervals <- function( - y, - yrep, - pit, - gamma, - K, - ..., - conf_level = 0.95, - size = 0.25, - alpha = 0.7 -) { - check_ignored_arguments(...) - data <- comparison_data(y, yrep, pit, K) - if (missing(K)) { - K <- max(data$y_id) - 1 - } - if (!missing(y)) { - N <- length(y) - } else if (!missing(yrep)) { - N <- ncol(yrep) - } else { - N <- ncol(pit) - } - L <- max(data$rep_id) + any(data$is_y) - if (missing(gamma)) { - gamma <- adjust_gamma( - N = N, - L = L, - K = K, - conf_level = conf_level - ) - } - limits <- ecdf_intervals( - N = N, - L = L, - K = K, - gamma = gamma) - z <- 0:K / K - fig <- ggplot(data) + - geom_step(data = data.frame(limits), aes_(x = z, y = ~ upper / N), color = 'gray') + - geom_step(data = data.frame(limits), aes_(x = z, y = ~ lower / N), color = 'gray') - if (any(data$is_y)) { - fig <- fig + geom_step( - data = function(x) dplyr::filter(x, .data$is_y), - aes_(x = z, y = ~ value) - ) - } - if (any(!data$is_y)) { - fig <- fig + geom_step( - data = function(x) dplyr::filter(x, !.data$is_y), - aes_(x = rep(z, each = L - any(data$is_y)), group = ~ rep_id, - y = ~ value, color = ~ rep_label) - ) - } - fig + scale_y_continuous(breaks = c(0, 0.5, 1)) + - scale_color_discrete() + - yaxis_title(FALSE) + - xaxis_title(FALSE) + - yaxis_ticks(FALSE) + - bayesplot_theme_get() -} - - -#' @export -#' @rdname PPC-distributions -ppc_ecdf_intervals_difference <- function( - y, - yrep, - pit, - gamma, - K, - ..., - conf_level = 0.95, - size = 0.25, - alpha = 0.7 -) { - check_ignored_arguments(...) - data <- comparison_data(y, yrep, pit, K) - if (missing(K)) { - K <- max(data$y_id) - 1 - } - if (!missing(y)) { - N <- length(y) - } else if (!missing(yrep)) { - N <- ncol(yrep) - } else { - N <- ncol(pit) - } - L <- max(data$rep_id) + any(data$is_y) - if (missing(gamma)) { - gamma <- adjust_gamma( - N = N, - L = L, - K = K, - conf_level = conf_level - ) - } - limits <- ecdf_intervals( - N = N, - L = L, - K = K, - gamma = gamma) - z <- 0:K / K - fig <- ggplot(data) + - geom_step(data = data.frame(limits), aes_(x = z, y = ~ upper / N - z), color = 'gray') + - geom_step(data = data.frame(limits), aes_(x = z, y = ~ lower / N - z), color = 'gray') - if (any(data$is_y)) { - fig <- fig + geom_step( - data = function(x) dplyr::filter(x, .data$is_y), - aes_(x = z, y = ~ value - z) - ) - } - if (any(!data$is_y)) { - fig <- fig + geom_step( - data = function(x) dplyr::filter(x, !.data$is_y), - aes_(x = rep(z, each = L - any(data$is_y)), group = ~ rep_id, - y = ~ value - z, color = ~ rep_label) - ) - } - fig + scale_y_continuous(breaks = c(0, 0.5, 1)) + - scale_color_discrete() + - yaxis_title(FALSE) + - xaxis_title(FALSE) + - yaxis_ticks(FALSE) + - bayesplot_theme_get() -} - #' @export #' @rdname PPC-distributions diff --git a/R/ppc-intervals.R b/R/ppc-intervals.R index 5571af06..9c41c850 100644 --- a/R/ppc-intervals.R +++ b/R/ppc-intervals.R @@ -130,6 +130,195 @@ ppc_intervals <- function(y, yrep, x = NULL, ..., prob = 0.5, prob_outer = 0.9, ) } +#' @export +#' @rdname PPC-intervals +#' @param pit For 'ppc_ecdf_intervals' and 'ppc_ecdf_intervals_difference', the +#' PIT values of one or more samples can be provided directly causing'y' and +#' 'yrep' to be ignored. +#' @param K, For 'ppc_ecdf_intervals' and 'ppc_ecdf_intervals_difference', +#' number of uniformly spaced evaluation points for the ECDF or ECDFs. Affects +#' the granularity of the plot and can significantly speed up the computation +#' of the simultaneous confidence bands. Defaults to 'length(y)', +#' 'K = ncol(yrep)', or lastly to 'K = ncol(pit)' depending on which one is +#' provided. +ppc_ecdf_intervals <- function( + y, + yrep, + pit, + gamma, + gamma_outer, + K, + ..., + prob = 0.5, + prob_outer = 0.9, + size = 1, + alpha = 0.33 +) { + check_ignored_arguments(...) + data <- .ppc_ecdf_intervals_data(y, yrep, pit, K) + if (missing(K)) { + K <- max(data$y_id) - 1 + } + if (!missing(y)) { + N <- length(y) + } else if (!missing(yrep)) { + N <- ncol(yrep) + } else { + N <- ncol(pit) + } + L <- max(data$rep_id) + any(data$is_y) + if (missing(gamma_outer)) { + gamma_outer <- adjust_gamma( + N = N, + L = L, + K = K, + conf_level = prob_outer + ) + } + limits <- ecdf_intervals( + N = N, + L = L, + K = K, + gamma = gamma) + limits_outer <- ecdf_intervals( + N = N, + L = L, + K = K, + gamma = gamma_outer) + z <- 0:K / K + fig <- ggplot(data) + + geom_ribbon( + data = data.frame(limits_outer), + aes_( + x = c(0, rep(z[2:(K + 1)], each = 2)), + ymax = ~ upper / N, + ymin = ~ lower / N + ), + alpha = alpha, + size = size) + + geom_ribbon( + data = data.frame(limits), + aes_( + x = c(0, rep(z[2:(K + 1)], each = 2)), + ymax = ~ upper / N, + ymin = ~ lower / N + ), + alpha = alpha, + size = size) + if (any(data$is_y)) { + fig <- fig + geom_step( + data = function(x) dplyr::filter(x, .data$is_y), + aes_(x = z, y = ~ value), + size = size + ) + } + if (any(!data$is_y)) { + fig <- fig + geom_step( + data = function(x) dplyr::filter(x, !.data$is_y), + aes_(x = rep(z, each = L - any(data$is_y)), group = ~ rep_id, + y = ~ value, color = ~ rep_label), + size = size + ) + } + fig + scale_y_continuous(breaks = c(0, 0.5, 1)) + + scale_color_discrete() + + yaxis_title(FALSE) + + xaxis_title(FALSE) + + yaxis_ticks(FALSE) + + bayesplot_theme_get() +} + +#' @export +#' @rdname PPC-intervals +ppc_ecdf_intervals_difference <- function( + y, + yrep, + pit, + gamma, + gamma_outer, + K, + ..., + prob = 0.5, + prob_outer = 0.9, + size = 1, + alpha = 0.33 +) { + check_ignored_arguments(...) + data <- .ppc_ecdf_intervals_data(y, yrep, pit, K) + if (missing(K)) { + K <- max(data$y_id) - 1 + } + if (!missing(y)) { + N <- length(y) + } else if (!missing(yrep)) { + N <- ncol(yrep) + } else { + N <- ncol(pit) + } + L <- max(data$rep_id) + any(data$is_y) + if (missing(gamma_outer)) { + gamma_outer <- adjust_gamma( + N = N, + L = L, + K = K, + conf_level = prob_outer + ) + } + limits <- ecdf_intervals( + N = N, + L = L, + K = K, + gamma = gamma) + limits_outer <- ecdf_intervals( + N = N, + L = L, + K = K, + gamma = gamma_outer) + z <- 0:K / K + fig <- ggplot(data) + + geom_ribbon( + data = data.frame(limits_outer), + aes_( + x = c(0, rep(z[2:(K + 1)], each = 2)), + ymax = ~ upper / N - c(0, rep(z[2:(K + 1)], each = 2)), + ymin = ~ lower / N - c(0, rep(z[2:(K + 1)], each = 2)) + ), + alpha = alpha, + size = size) + + geom_ribbon( + data = data.frame(limits), + aes_( + x = c(0, rep(z[2:(K + 1)], each = 2)), + ymax = ~ upper / N - c(0, rep(z[2:(K + 1)], each = 2)), + ymin = ~ lower / N - c(0, rep(z[2:(K + 1)], each = 2)) + ), + alpha = alpha, + size = size) + if (any(data$is_y)) { + fig <- fig + geom_step( + data = function(x) dplyr::filter(x, .data$is_y), + aes_(x = z, y = ~ value - z) + ) + } + if (any(!data$is_y)) { + fig <- fig + geom_step( + data = function(x) dplyr::filter(x, !.data$is_y), + aes_( + x = rep(z, each = L - any(data$is_y)), + y = ~ value - rep(z, each = L - any(data$is_y)), + group = ~ rep_id, + color = ~ rep_label + ) + ) + } + fig + scale_y_continuous(breaks = c(0, 0.5, 1)) + + scale_color_discrete() + + yaxis_title(FALSE) + + xaxis_title(FALSE) + + yaxis_ticks(FALSE) + + bayesplot_theme_get() +} + #' @rdname PPC-intervals #' @export #' @template args-group @@ -312,6 +501,49 @@ label_x <- function(x) { )) } +.ppc_ecdf_intervals_data <- function(y, yrep, pit, K) { + if (missing(K)){ + if (!missing(y)) { + K <- length(y) + } + else if (!missing(pit)) { + K <- ncol(pit) + } + else { + K <- ncol(yrep) + } + } + z <- seq(0,1, length.out = K + 1) + if (!missing(pit)) { + pit <- validate_pit(pit) + ecdfs <- t(apply(pit, 1, function(row) ecdf(row)(z))) + # work around to adhere to the melt_and_stack-format with only "yrep". + ecdfs <- melt_and_stack(ecdfs[1,], ecdfs) + ecdfs <- dplyr::filter(ecdfs, !ecdfs$is_y) + } else if (missing(y)) { + pit <- u_scale(validate_yrep(yrep, yrep[1, ], match_ncols = FALSE)) + if (nrow(yrep) < 2) { + abort(paste("When both 'pit' and 'y' are missing, 'yrep' must include ", + "multiple rows to allow for sample comparison.", sep="")) + } + ecdfs <- t(apply(pit, 1, function(row) ecdf(row)(z))) + # work around to adhere to the melt_and_stack-format with only "yrep". + ecdfs <- melt_and_stack(ecdfs[1,], ecdfs) + ecdfs <- dplyr::filter(ecdfs, !ecdfs$is_y) + } else { + y <- validate_y(y) + yrep <- validate_yrep(yrep, y, match_ncols = FALSE) + pit <- empirical_pit(y, yrep) + ecdfs <- ecdf(pit)(z) + # work around to adhere to the melt_and_stack-format with only "y" . + ecdfs <- melt_and_stack(ecdfs, t(ecdfs)) + ecdfs <- dplyr::filter(ecdfs, ecdfs$is_y) + } + ecdfs$rep_id[is.na(ecdfs$rep_id)] <- 0 + ecdfs +} + + # Make intervals or ribbon plot # # @param data The object returned by .ppc_intervals_data @@ -419,4 +651,3 @@ label_x <- function(x) { labs(y = NULL, x = x_lab %||% expression(italic(x))) + bayesplot_theme_get() } - diff --git a/man/PPC-distributions.Rd b/man/PPC-distributions.Rd index dc5ddcca..2961468a 100644 --- a/man/PPC-distributions.Rd +++ b/man/PPC-distributions.Rd @@ -3,7 +3,6 @@ \name{PPC-distributions} \alias{PPC-distributions} \alias{ppc_data} -\alias{comparison_data} \alias{ppc_hist} \alias{ppc_boxplot} \alias{ppc_freqpoly} @@ -13,46 +12,99 @@ \alias{ppc_dens_overlay_grouped} \alias{ppc_ecdf_overlay} \alias{ppc_ecdf_overlay_grouped} -\alias{ppc_ecdf_intervals} \alias{ppc_violin_grouped} \title{PPC distributions} \usage{ ppc_data(y, yrep, group = NULL) -comparison_data(y, yrep, pit, K) - ppc_hist(y, yrep, ..., binwidth = NULL, breaks = NULL, freq = TRUE) ppc_boxplot(y, yrep, ..., notch = TRUE, size = 0.5, alpha = 1) -ppc_freqpoly(y, yrep, ..., binwidth = NULL, freq = TRUE, size = 0.25, - alpha = 1) - -ppc_freqpoly_grouped(y, yrep, group, ..., binwidth = NULL, freq = TRUE, - size = 0.25, alpha = 1) +ppc_freqpoly( + y, + yrep, + ..., + binwidth = NULL, + freq = TRUE, + size = 0.25, + alpha = 1 +) + +ppc_freqpoly_grouped( + y, + yrep, + group, + ..., + binwidth = NULL, + freq = TRUE, + size = 0.25, + alpha = 1 +) ppc_dens(y, yrep, ..., trim = FALSE, size = 0.5, alpha = 1) -ppc_dens_overlay(y, yrep, ..., size = 0.25, alpha = 0.7, - trim = FALSE, bw = "nrd0", adjust = 1, kernel = "gaussian", - n_dens = 1024) - -ppc_dens_overlay_grouped(y, yrep, group, ..., size = 0.25, alpha = 0.7, - trim = FALSE, bw = "nrd0", adjust = 1, kernel = "gaussian", - n_dens = 1024) - -ppc_ecdf_overlay(y, yrep, ..., discrete = FALSE, pad = TRUE, - size = 0.25, alpha = 0.7) - -ppc_ecdf_overlay_grouped(y, yrep, group, ..., discrete = FALSE, - pad = TRUE, size = 0.25, alpha = 0.7) - -ppc_ecdf_intervals(y, yrep, pit, gamma, K, ..., conf_level = 0.95, - size = 0.25, alpha = 0.7) - -ppc_violin_grouped(y, yrep, group, ..., probs = c(0.1, 0.5, 0.9), - size = 1, alpha = 1, y_draw = c("violin", "points", "both"), - y_size = 1, y_alpha = 1, y_jitter = 0.1) +ppc_dens_overlay( + y, + yrep, + ..., + size = 0.25, + alpha = 0.7, + trim = FALSE, + bw = "nrd0", + adjust = 1, + kernel = "gaussian", + n_dens = 1024 +) + +ppc_dens_overlay_grouped( + y, + yrep, + group, + ..., + size = 0.25, + alpha = 0.7, + trim = FALSE, + bw = "nrd0", + adjust = 1, + kernel = "gaussian", + n_dens = 1024 +) + +ppc_ecdf_overlay( + y, + yrep, + ..., + discrete = FALSE, + pad = TRUE, + size = 0.25, + alpha = 0.7 +) + +ppc_ecdf_overlay_grouped( + y, + yrep, + group, + ..., + discrete = FALSE, + pad = TRUE, + size = 0.25, + alpha = 0.7 +) + +ppc_violin_grouped( + y, + yrep, + group, + ..., + probs = c(0.1, 0.5, 0.9), + size = 1, + alpha = 1, + y_draw = c("violin", "points", "both"), + y_size = 1, + y_alpha = 1, + y_jitter = 0.1 +) } \arguments{ \item{y}{A vector of observations. See \strong{Details}.} @@ -68,16 +120,6 @@ sense. See \strong{Details} for additional instructions.} \code{y}. Each value in \code{group} is interpreted as the group level pertaining to the corresponding value of \code{y}.} -\item{pit}{For 'ppc_ecdf_intervals' and 'ppc_ecdf_diff_intervals', the PIT -values of one or more samples can be provided directly making 'y' and -yrep' optional.} - -\item{K, }{For 'ppc_ecdf_intervals' and 'ppc_ecdf_diff_intervals', number of -equally spaced evaluation points for the ECDF plot. Affects the granularity -of the plot and can significantly speed up the computation of the confidence -bands. The default is 'K = ncol(yrep)', or 'K = ncol(pit)' if PIT values are -provided instead.} - \item{...}{Currently unused.} \item{binwidth}{Passed to \code{\link[ggplot2:geom_histogram]{ggplot2::geom_histogram()}} to override @@ -111,9 +153,6 @@ passed to \code{\link[ggplot2:stat_ecdf]{ggplot2::stat_ecdf()}}. If \code{discre \item{pad}{A logical scalar passed to \code{\link[ggplot2:stat_ecdf]{ggplot2::stat_ecdf()}}.} -\item{conf_level}{For 'ppc_ecdf_intervals' and 'ppc_ecdf_diff_intervals', -the desired simultaneous confidence level of the bands to be drawn.} - \item{probs}{A numeric vector passed to \code{\link[ggplot2:geom_violin]{ggplot2::geom_violin()}}'s \code{draw_quantiles} argument to specify at which quantiles to draw horizontal lines. Set to \code{NULL} to remove the lines.} @@ -126,15 +165,11 @@ horizontal lines. Set to \code{NULL} to remove the lines.} to to the \code{size}, \code{alpha}, and \code{width} arguments of \code{\link[ggplot2:geom_jitter]{ggplot2::geom_jitter()}} to control the appearance of \code{y} points. The default of \code{y_jitter=NULL} will let \strong{ggplot2} determine the amount of jitter.} - -\item{bw, adjust, kernel, n_dens}{Optional arguments passed to -\code{\link[stats:density]{stats::density()}} to override default kernel density estimation -parameters. \code{n_dens} defaults to \code{1024}.} } \value{ The plotting functions return a ggplot object that can be further customized using the \strong{ggplot2} package. The functions with suffix -\code{_data()} return the data that would have been drawn by the plotting +\verb{_data()} return the data that would have been drawn by the plotting function. } \description{ @@ -151,7 +186,7 @@ For Binomial data, the plots will typically be most useful if \section{Plot Descriptions}{ \describe{ -\item{\code{ppc_hist(), ppc_freqpoly(), ppc_dens(), ppc_boxplot()}}{ +\item{\verb{ppc_hist(), ppc_freqpoly(), ppc_dens(), ppc_boxplot()}}{ A separate histogram, shaded frequency polygon, smoothed kernel density estimate, or box and whiskers plot is displayed for \code{y} and each dataset (row) in \code{yrep}. For these plots \code{yrep} should therefore @@ -163,7 +198,7 @@ variable for \code{y} and each dataset (row) in \code{yrep}. For this plot \code{yrep} should therefore contain only a small number of rows. See the \strong{Examples} section. } -\item{\code{ppc_ecdf_overlay(), ppc_dens_overlay(), ppc_ecdf_overlay_grouped(), ppc_dens_overlay_grouped()}}{ +\item{\verb{ppc_ecdf_overlay(), ppc_dens_overlay(), ppc_ecdf_overlay_grouped(), ppc_dens_overlay_grouped()}}{ Kernel density or empirical CDF estimates of each dataset (row) in \code{yrep} are overlaid, with the distribution of \code{y} itself on top (and in a darker shade). When using \code{ppc_ecdf_overlay()} with discrete @@ -244,11 +279,14 @@ A., and Rubin, D. B. (2013). \emph{Bayesian Data Analysis.} Chapman & Hall/CRC Press, London, third edition. (Ch. 6) } \seealso{ -Other PPCs: \code{\link{PPC-censoring}}, - \code{\link{PPC-discrete}}, \code{\link{PPC-errors}}, - \code{\link{PPC-intervals}}, \code{\link{PPC-loo}}, - \code{\link{PPC-overview}}, - \code{\link{PPC-scatterplots}}, - \code{\link{PPC-test-statistics}} +Other PPCs: +\code{\link{PPC-censoring}}, +\code{\link{PPC-discrete}}, +\code{\link{PPC-errors}}, +\code{\link{PPC-intervals}}, +\code{\link{PPC-loo}}, +\code{\link{PPC-overview}}, +\code{\link{PPC-scatterplots}}, +\code{\link{PPC-test-statistics}} } \concept{PPCs} diff --git a/man/PPC-intervals.Rd b/man/PPC-intervals.Rd index 0c58809a..1344c16c 100644 --- a/man/PPC-intervals.Rd +++ b/man/PPC-intervals.Rd @@ -3,6 +3,8 @@ \name{PPC-intervals} \alias{PPC-intervals} \alias{ppc_intervals} +\alias{ppc_ecdf_intervals} +\alias{ppc_ecdf_intervals_difference} \alias{ppc_intervals_grouped} \alias{ppc_ribbon} \alias{ppc_ribbon_grouped} @@ -21,6 +23,34 @@ ppc_intervals( fatten = 3 ) +ppc_ecdf_intervals( + y, + yrep, + pit, + gamma, + gamma_outer, + K, + ..., + prob = 0.5, + prob_outer = 0.9, + size = 1, + alpha = 0.33 +) + +ppc_ecdf_intervals_difference( + y, + yrep, + pit, + gamma, + gamma_outer, + K, + ..., + prob = 0.5, + prob_outer = 0.9, + size = 1, + alpha = 0.33 +) + ppc_intervals_grouped( y, yrep, @@ -101,6 +131,21 @@ is missing or \code{NULL}, then \code{1:length(y)} is used for the x-axis.} probability mass to include in the inner and outer intervals. The defaults are \code{prob=0.5} and \code{prob_outer=0.9}.} +\item{pit}{For 'ppc_ecdf_intervals' and 'ppc_ecdf_intervals_difference', the +PIT values of one or more samples can be provided directly causing'y' and +'yrep' to be ignored.} + +\item{K, }{For 'ppc_ecdf_intervals' and 'ppc_ecdf_intervals_difference', +number of uniformly spaced evaluation points for the ECDF or ECDFs. Affects +the granularity of the plot and can significantly speed up the computation +of the simultaneous confidence bands. Defaults to 'length(y)', +'K = ncol(yrep)', or lastly to 'K = ncol(pit)' depending on which one is +provided.} + +\item{alpha, size, fatten}{Arguments passed to geoms. For ribbon plots \code{alpha} +and \code{size} are passed to \code{\link[ggplot2:geom_ribbon]{ggplot2::geom_ribbon()}}. For interval plots +\code{size} and \code{fatten} are passed to \code{\link[ggplot2:geom_linerange]{ggplot2::geom_pointrange()}}.} + \item{group}{A grouping variable (a vector or factor) the same length as \code{y}. Each value in \code{group} is interpreted as the group level pertaining to the corresponding value of \code{y}.} @@ -108,10 +153,6 @@ pertaining to the corresponding value of \code{y}.} \item{facet_args}{An optional list of arguments (other than \code{facets}) passed to \code{\link[ggplot2:facet_wrap]{ggplot2::facet_wrap()}} to control faceting.} -\item{alpha, size, fatten}{Arguments passed to geoms. For ribbon plots \code{alpha} -and \code{size} are passed to \code{\link[ggplot2:geom_ribbon]{ggplot2::geom_ribbon()}}. For interval plots -\code{size} and \code{fatten} are passed to \code{\link[ggplot2:geom_linerange]{ggplot2::geom_pointrange()}}.} - \item{y_draw}{For ribbon plots only, a string specifying how to draw \code{y}. Can be \code{"line"} (the default), \code{"points"}, or \code{"both"}.} } From d910940c0427904bcf34302906657a68b4518dfe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A4ilynoja=20Teemu?= Date: Wed, 30 Jun 2021 16:42:59 +0300 Subject: [PATCH 30/48] Changed ppc_ecdf_intervals(_difference) to show confidence intervals as ribbon. --- R/helpers-ppc.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/helpers-ppc.R b/R/helpers-ppc.R index 3efa277a..27d4b27d 100644 --- a/R/helpers-ppc.R +++ b/R/helpers-ppc.R @@ -385,6 +385,8 @@ ecdf_intervals <- function(N, L, K, gamma) { lims$lower <- qhyper(gamma / 2, N, n, k) lims$upper <- qhyper(1 - gamma / 2, N, n, k) } + lims$lower <- c(rep(lims$lower[1:K], each=2), lims$lower[K + 1]) + lims$upper <- c(rep(lims$upper[1:K], each=2), lims$upper[K + 1]) lims } From c70124df41b99cb3949c4c565d39b1336db20c43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A4ilynoja=20Teemu?= Date: Wed, 30 Jun 2021 16:45:34 +0300 Subject: [PATCH 31/48] added handling of missing gamma. --- R/ppc-intervals.R | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/R/ppc-intervals.R b/R/ppc-intervals.R index 9c41c850..9a2dad8f 100644 --- a/R/ppc-intervals.R +++ b/R/ppc-intervals.R @@ -175,6 +175,14 @@ ppc_ecdf_intervals <- function( conf_level = prob_outer ) } + if (missing(gamma)) { + gamma <- adjust_gamma( + N = N, + L = L, + K = K, + conf_level = prob + ) + } limits <- ecdf_intervals( N = N, L = L, @@ -264,6 +272,14 @@ ppc_ecdf_intervals_difference <- function( conf_level = prob_outer ) } + if (missing(gamma)) { + gamma <- adjust_gamma( + N = N, + L = L, + K = K, + conf_level = prob + ) + } limits <- ecdf_intervals( N = N, L = L, From bd8b90b48e66f727408e8d27d857818d0ff5f75b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A4ilynoja=20Teemu?= Date: Wed, 30 Jun 2021 16:56:26 +0300 Subject: [PATCH 32/48] ppc_ecdf_intervals_difference with fixed step ribbons. --- R/ppc-intervals.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/ppc-intervals.R b/R/ppc-intervals.R index 9a2dad8f..7db65e14 100644 --- a/R/ppc-intervals.R +++ b/R/ppc-intervals.R @@ -290,13 +290,13 @@ ppc_ecdf_intervals_difference <- function( L = L, K = K, gamma = gamma_outer) - z <- 0:K / K + z <- seq(0,1, length.out = K + 1) fig <- ggplot(data) + geom_ribbon( data = data.frame(limits_outer), aes_( x = c(0, rep(z[2:(K + 1)], each = 2)), - ymax = ~ upper / N - c(0, rep(z[2:(K + 1)], each = 2)), + ymax = ~ upper / N - c(0, rep(z[1:K], each = 2)), ymin = ~ lower / N - c(0, rep(z[2:(K + 1)], each = 2)) ), alpha = alpha, @@ -305,8 +305,8 @@ ppc_ecdf_intervals_difference <- function( data = data.frame(limits), aes_( x = c(0, rep(z[2:(K + 1)], each = 2)), - ymax = ~ upper / N - c(0, rep(z[2:(K + 1)], each = 2)), - ymin = ~ lower / N - c(0, rep(z[2:(K + 1)], each = 2)) + ymax = ~ upper / N - c(rep(z[1:K], each = 2), 1), + ymin = ~ lower / N - c(rep(z[1:K], each = 2), 1) ), alpha = alpha, size = size) From 259fbc3ce12ee5363b18d22e1393e6b29ac5ac3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A4ilynoja=20Teemu?= Date: Wed, 30 Jun 2021 16:59:59 +0300 Subject: [PATCH 33/48] ppc_ecdf_intervals_difference more ribbon fixing. --- R/ppc-intervals.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/ppc-intervals.R b/R/ppc-intervals.R index 7db65e14..762d4183 100644 --- a/R/ppc-intervals.R +++ b/R/ppc-intervals.R @@ -296,8 +296,8 @@ ppc_ecdf_intervals_difference <- function( data = data.frame(limits_outer), aes_( x = c(0, rep(z[2:(K + 1)], each = 2)), - ymax = ~ upper / N - c(0, rep(z[1:K], each = 2)), - ymin = ~ lower / N - c(0, rep(z[2:(K + 1)], each = 2)) + ymax = ~ upper / N - c(rep(z[1:K], each = 2), 1), + ymin = ~ lower / N - c(rep(z[1:K], each = 2), 1) ), alpha = alpha, size = size) + From 5b7131283096c0e03dd9587b26fd6cc99286490b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20S=C3=A4ilynoja?= Date: Thu, 1 Jul 2021 14:43:14 +0300 Subject: [PATCH 34/48] Improve legends and colours of single ecdf plots. --- R/ppc-intervals.R | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/R/ppc-intervals.R b/R/ppc-intervals.R index 762d4183..78d2cda7 100644 --- a/R/ppc-intervals.R +++ b/R/ppc-intervals.R @@ -203,7 +203,8 @@ ppc_ecdf_intervals <- function( ymin = ~ lower / N ), alpha = alpha, - size = size) + + size = size, + colour = "theoretical CDF") + geom_ribbon( data = data.frame(limits), aes_( @@ -212,12 +213,14 @@ ppc_ecdf_intervals <- function( ymin = ~ lower / N ), alpha = alpha, - size = size) + size = size, + colour = "theoretical CDF") if (any(data$is_y)) { fig <- fig + geom_step( data = function(x) dplyr::filter(x, .data$is_y), aes_(x = z, y = ~ value), - size = size + size = size, + colour = "ECDF" ) } if (any(!data$is_y)) { @@ -300,7 +303,8 @@ ppc_ecdf_intervals_difference <- function( ymin = ~ lower / N - c(rep(z[1:K], each = 2), 1) ), alpha = alpha, - size = size) + + size = size, + colour = "theoretical CDF") + geom_ribbon( data = data.frame(limits), aes_( @@ -309,11 +313,13 @@ ppc_ecdf_intervals_difference <- function( ymin = ~ lower / N - c(rep(z[1:K], each = 2), 1) ), alpha = alpha, - size = size) + size = size, + colour = "theoretical CDF") if (any(data$is_y)) { fig <- fig + geom_step( data = function(x) dplyr::filter(x, .data$is_y), - aes_(x = z, y = ~ value - z) + aes_(x = z, y = ~ value - z), + colour = "ECDF" ) } if (any(!data$is_y)) { From dbedd6f9f75ce1b6453006adfe62d35e1e9773a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20S=C3=A4ilynoja?= Date: Thu, 1 Jul 2021 18:11:05 +0300 Subject: [PATCH 35/48] colour change to aesthetics --- R/ppc-intervals.R | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/R/ppc-intervals.R b/R/ppc-intervals.R index 78d2cda7..eb22ca46 100644 --- a/R/ppc-intervals.R +++ b/R/ppc-intervals.R @@ -200,27 +200,26 @@ ppc_ecdf_intervals <- function( aes_( x = c(0, rep(z[2:(K + 1)], each = 2)), ymax = ~ upper / N, - ymin = ~ lower / N + ymin = ~ lower / N, + colour = "theoretical CDF" ), alpha = alpha, - size = size, - colour = "theoretical CDF") + + size = size) + geom_ribbon( data = data.frame(limits), aes_( x = c(0, rep(z[2:(K + 1)], each = 2)), ymax = ~ upper / N, - ymin = ~ lower / N + ymin = ~ lower / N, + colour = "theoretical CDF" ), alpha = alpha, - size = size, - colour = "theoretical CDF") + size = size) if (any(data$is_y)) { fig <- fig + geom_step( data = function(x) dplyr::filter(x, .data$is_y), - aes_(x = z, y = ~ value), - size = size, - colour = "ECDF" + aes_(x = z, y = ~ value, colour = "ECDF"), + size = size ) } if (any(!data$is_y)) { @@ -300,26 +299,25 @@ ppc_ecdf_intervals_difference <- function( aes_( x = c(0, rep(z[2:(K + 1)], each = 2)), ymax = ~ upper / N - c(rep(z[1:K], each = 2), 1), - ymin = ~ lower / N - c(rep(z[1:K], each = 2), 1) + ymin = ~ lower / N - c(rep(z[1:K], each = 2), 1), + colour = "theoretical CDF" ), alpha = alpha, - size = size, - colour = "theoretical CDF") + + size = size) + geom_ribbon( data = data.frame(limits), aes_( x = c(0, rep(z[2:(K + 1)], each = 2)), ymax = ~ upper / N - c(rep(z[1:K], each = 2), 1), - ymin = ~ lower / N - c(rep(z[1:K], each = 2), 1) + ymin = ~ lower / N - c(rep(z[1:K], each = 2), 1), + colour = "theoretical CDF" ), alpha = alpha, - size = size, - colour = "theoretical CDF") + size = size) if (any(data$is_y)) { fig <- fig + geom_step( data = function(x) dplyr::filter(x, .data$is_y), - aes_(x = z, y = ~ value - z), - colour = "ECDF" + aes_(x = z, y = ~ value - z, colour = "ECDF") ) } if (any(!data$is_y)) { From 94f29889b5c895b715f5ea438ddd04b339ada3e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20S=C3=A4ilynoja?= Date: Thu, 1 Jul 2021 18:13:39 +0300 Subject: [PATCH 36/48] fill conf intervals --- R/ppc-intervals.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/ppc-intervals.R b/R/ppc-intervals.R index eb22ca46..4999bcdf 100644 --- a/R/ppc-intervals.R +++ b/R/ppc-intervals.R @@ -201,7 +201,8 @@ ppc_ecdf_intervals <- function( x = c(0, rep(z[2:(K + 1)], each = 2)), ymax = ~ upper / N, ymin = ~ lower / N, - colour = "theoretical CDF" + colour = "theoretical CDF", + fill = "theoretical CDF" ), alpha = alpha, size = size) + @@ -211,7 +212,8 @@ ppc_ecdf_intervals <- function( x = c(0, rep(z[2:(K + 1)], each = 2)), ymax = ~ upper / N, ymin = ~ lower / N, - colour = "theoretical CDF" + colour = "theoretical CDF", + fill = "theoretical CDF" ), alpha = alpha, size = size) From e958d78b11c892ab79ad0a89a5ec0c8780cfb0db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20S=C3=A4ilynoja?= Date: Thu, 1 Jul 2021 18:19:11 +0300 Subject: [PATCH 37/48] fill also for difference plot --- R/ppc-intervals.R | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/R/ppc-intervals.R b/R/ppc-intervals.R index 4999bcdf..1b788022 100644 --- a/R/ppc-intervals.R +++ b/R/ppc-intervals.R @@ -201,7 +201,7 @@ ppc_ecdf_intervals <- function( x = c(0, rep(z[2:(K + 1)], each = 2)), ymax = ~ upper / N, ymin = ~ lower / N, - colour = "theoretical CDF", + color = "theoretical CDF", fill = "theoretical CDF" ), alpha = alpha, @@ -212,7 +212,7 @@ ppc_ecdf_intervals <- function( x = c(0, rep(z[2:(K + 1)], each = 2)), ymax = ~ upper / N, ymin = ~ lower / N, - colour = "theoretical CDF", + color = "theoretical CDF", fill = "theoretical CDF" ), alpha = alpha, @@ -220,7 +220,7 @@ ppc_ecdf_intervals <- function( if (any(data$is_y)) { fig <- fig + geom_step( data = function(x) dplyr::filter(x, .data$is_y), - aes_(x = z, y = ~ value, colour = "ECDF"), + aes_(x = z, y = ~ value, color = "ECDF"), size = size ) } @@ -233,7 +233,7 @@ ppc_ecdf_intervals <- function( ) } fig + scale_y_continuous(breaks = c(0, 0.5, 1)) + - scale_color_discrete() + + scale_color_discrete("") + yaxis_title(FALSE) + xaxis_title(FALSE) + yaxis_ticks(FALSE) + @@ -302,7 +302,8 @@ ppc_ecdf_intervals_difference <- function( x = c(0, rep(z[2:(K + 1)], each = 2)), ymax = ~ upper / N - c(rep(z[1:K], each = 2), 1), ymin = ~ lower / N - c(rep(z[1:K], each = 2), 1), - colour = "theoretical CDF" + colour = "theoretical CDF", + fill = "theoretical CDF" ), alpha = alpha, size = size) + @@ -312,7 +313,8 @@ ppc_ecdf_intervals_difference <- function( x = c(0, rep(z[2:(K + 1)], each = 2)), ymax = ~ upper / N - c(rep(z[1:K], each = 2), 1), ymin = ~ lower / N - c(rep(z[1:K], each = 2), 1), - colour = "theoretical CDF" + colour = "theoretical CDF", + fill = "theoretical CDF" ), alpha = alpha, size = size) @@ -334,7 +336,7 @@ ppc_ecdf_intervals_difference <- function( ) } fig + scale_y_continuous(breaks = c(0, 0.5, 1)) + - scale_color_discrete() + + scale_color_discrete("") + yaxis_title(FALSE) + xaxis_title(FALSE) + yaxis_ticks(FALSE) + From fefaecb41cb91ffd03c8e9a1d10c7e743cdbddfd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20S=C3=A4ilynoja?= Date: Thu, 1 Jul 2021 18:28:12 +0300 Subject: [PATCH 38/48] try joining color and fill in legend. --- R/ppc-intervals.R | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/R/ppc-intervals.R b/R/ppc-intervals.R index 1b788022..274a3b2f 100644 --- a/R/ppc-intervals.R +++ b/R/ppc-intervals.R @@ -233,7 +233,12 @@ ppc_ecdf_intervals <- function( ) } fig + scale_y_continuous(breaks = c(0, 0.5, 1)) + - scale_color_discrete("") + + scale_color_discrete( + name = "") + ) + + scale_fill_discrete( + name = "") + ) + yaxis_title(FALSE) + xaxis_title(FALSE) + yaxis_ticks(FALSE) + @@ -336,7 +341,12 @@ ppc_ecdf_intervals_difference <- function( ) } fig + scale_y_continuous(breaks = c(0, 0.5, 1)) + - scale_color_discrete("") + + scale_color_discrete( + name = "") + ) + + scale_fill_discrete( + name = "") + ) + yaxis_title(FALSE) + xaxis_title(FALSE) + yaxis_ticks(FALSE) + From fa9741be4543202d560d347f2896e45724567543 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20S=C3=A4ilynoja?= Date: Thu, 1 Jul 2021 18:29:23 +0300 Subject: [PATCH 39/48] try joining color and fill in legend. --- R/ppc-intervals.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/ppc-intervals.R b/R/ppc-intervals.R index 274a3b2f..d8cc8c3c 100644 --- a/R/ppc-intervals.R +++ b/R/ppc-intervals.R @@ -234,10 +234,10 @@ ppc_ecdf_intervals <- function( } fig + scale_y_continuous(breaks = c(0, 0.5, 1)) + scale_color_discrete( - name = "") + name = "" ) + scale_fill_discrete( - name = "") + name = "" ) + yaxis_title(FALSE) + xaxis_title(FALSE) + @@ -342,10 +342,10 @@ ppc_ecdf_intervals_difference <- function( } fig + scale_y_continuous(breaks = c(0, 0.5, 1)) + scale_color_discrete( - name = "") + name = "" ) + scale_fill_discrete( - name = "") + name = "" ) + yaxis_title(FALSE) + xaxis_title(FALSE) + From 5fde0d71c9afa4df7d405b65f85339c283e4107f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20S=C3=A4ilynoja?= Date: Thu, 1 Jul 2021 18:31:00 +0300 Subject: [PATCH 40/48] try joining color and fill in legend. --- R/ppc-intervals.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/ppc-intervals.R b/R/ppc-intervals.R index d8cc8c3c..ca42caff 100644 --- a/R/ppc-intervals.R +++ b/R/ppc-intervals.R @@ -307,7 +307,7 @@ ppc_ecdf_intervals_difference <- function( x = c(0, rep(z[2:(K + 1)], each = 2)), ymax = ~ upper / N - c(rep(z[1:K], each = 2), 1), ymin = ~ lower / N - c(rep(z[1:K], each = 2), 1), - colour = "theoretical CDF", + color = "theoretical CDF", fill = "theoretical CDF" ), alpha = alpha, @@ -318,7 +318,7 @@ ppc_ecdf_intervals_difference <- function( x = c(0, rep(z[2:(K + 1)], each = 2)), ymax = ~ upper / N - c(rep(z[1:K], each = 2), 1), ymin = ~ lower / N - c(rep(z[1:K], each = 2), 1), - colour = "theoretical CDF", + color = "theoretical CDF", fill = "theoretical CDF" ), alpha = alpha, @@ -326,7 +326,7 @@ ppc_ecdf_intervals_difference <- function( if (any(data$is_y)) { fig <- fig + geom_step( data = function(x) dplyr::filter(x, .data$is_y), - aes_(x = z, y = ~ value - z, colour = "ECDF") + aes_(x = z, y = ~ value - z, color = "ECDF") ) } if (any(!data$is_y)) { From 670817eb72b6304c2946cf808bd391c1122896a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A4ilynoja=20Teemu?= Date: Fri, 2 Jul 2021 12:28:56 +0300 Subject: [PATCH 41/48] typo in validate_pit --- R/helpers-ppc.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/helpers-ppc.R b/R/helpers-ppc.R index 27d4b27d..2f2daec1 100644 --- a/R/helpers-ppc.R +++ b/R/helpers-ppc.R @@ -75,7 +75,7 @@ validate_yrep <- function(yrep, y, match_ncols = TRUE) { #' @return Either throws an error or returns a numeric matrix. #' @noRd validate_pit <- function(pit) { - stopifnot(is.matrix(pit), is.numeric(yrep)) + stopifnot(is.matrix(pit), is.numeric(pit)) if (any(pit < 0) || any(pit > 1)) { abort("'pit' values expected to lie between 0 and 1 (inclusive).") } From 1b0bdc8333d0599530a5b91511531f76b9efb70e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A4ilynoja=20Teemu?= Date: Wed, 14 Jul 2021 00:22:43 +0300 Subject: [PATCH 42/48] Documentation and plot labels. --- R/helpers-ppc.R | 29 ++++++++++++++++++++++++++--- R/ppc-intervals.R | 38 +++++++++++++++++++++++++++++--------- 2 files changed, 55 insertions(+), 12 deletions(-) diff --git a/R/helpers-ppc.R b/R/helpers-ppc.R index 2f2daec1..9501820d 100644 --- a/R/helpers-ppc.R +++ b/R/helpers-ppc.R @@ -68,7 +68,7 @@ validate_yrep <- function(yrep, y, match_ncols = TRUE) { #' Validate PIT #' -#' Checks that the probability integral transformation (PIT) values from +#' Checks that the provided probability integral transformation (PIT) values are #' a numeric matrix with no NAs and that the provided values fall in [0,1]. #' #' @param pit The 'pit' object provided by the user. @@ -272,6 +272,7 @@ all_counts <- function(x, ...) { all_whole_number(x, ...) && min(x) >= 0 } + adjust_gamma <- function(N, L, K=N, conf_level=0.95) { if (any(c(K, N, L) < 1)) { abort("Parameters 'N', 'L' and 'K' must be positive integers.") @@ -288,6 +289,11 @@ adjust_gamma <- function(N, L, K=N, conf_level=0.95) { gamma } +# Adjust coverage parameter to find silmultaneous confidence intervals for the +# ECDF of a sample from the uniform distribution. +# N - length of samples +# K - number of equally spaced evaluation points, i.e. the right ends of the +# partition intervals. adjust_gamma_optimize <- function(N, K, conf_level=0.95) { target <- function(gamma, conf_level, N, K) { z <- 1:(K - 1) / K @@ -316,6 +322,13 @@ adjust_gamma_optimize <- function(N, K, conf_level=0.95) { optimize(target, c(0, 1 - conf_level), conf_level, N = N, K = K)$minimum } +# Adjust coverage parameter to find silmultaneous confidence intervals for the +# ECDFs of multiple samples (chains) from the uniform distribution. +# N - length of samples (chains). +# L - number of samples (chains). +# K - number of equally spaced evaluation points, i.e. the right ends of the +# partition intervals. +# M - number of simulations used to determine the 'conf_level' middle quantile. adjust_gamma_simulate <-function(N, L, K, conf_level=0.95, M=5000) { gamma <- numeric(M) z <- (1:(K - 1)) / K @@ -360,7 +373,9 @@ p_interior <- function(p_int, x1, x2, z1, z2, gamma, N) { list(p_int = rowSums(p_x2_int), x1 = x2) } -# alpha percent of the trials are allowed to be rejected +# 100 * `alpha` percent of the trials are allowed to be rejected. +# In case of ties, return the largest value dominating at most +# 100 * (alpha + tol) percent of the values. alpha_quantile <- function(gamma, alpha, tol = 0.001) { a <- unname(quantile(gamma, probs = alpha)) a_tol <- unname(quantile(gamma, probs = alpha + tol)) @@ -373,6 +388,13 @@ alpha_quantile <- function(gamma, alpha, tol = 0.001) { a } +# Compute simultaneous confidence intervals for one or more samples from the +# standard uniform distribution. +# N - sample length +# L - number of samples +# K - size of uniform partition defining the ECDF evaluation points. +# gamma - coverage parameter for the marginal distribution (binomial for +# one sample and hypergeometric for multiple rank transformed chains). ecdf_intervals <- function(N, L, K, gamma) { lims <- list() z <- seq(0,1, length.out = K + 1) @@ -395,7 +417,8 @@ u_scale <- function(x) { array(rank(x) / length(x), dim = dim(x), dimnames = dimnames(x)) } - +# for each value in 'y', compute the fractional ranks (empirical pit values) +# with respect to 'yrep'. empirical_pit <- function(y, yrep) { apply(outer(yrep, y, "<="), 3, sum) / length(yrep) } diff --git a/R/ppc-intervals.R b/R/ppc-intervals.R index ca42caff..0c185527 100644 --- a/R/ppc-intervals.R +++ b/R/ppc-intervals.R @@ -233,11 +233,22 @@ ppc_ecdf_intervals <- function( ) } fig + scale_y_continuous(breaks = c(0, 0.5, 1)) + - scale_color_discrete( - name = "" + scale_color_manual( + name = "", + values = set_names( + get_color(c("lh", "dh")), + c("theoretical CDF", "ECDF")), + labels = set_names( + c(expression(italic(ECDF)), expression(italic("theoretical CDF"))), + c("ECDF", "theoretical ECDF")) ) + - scale_fill_discrete( - name = "" + scale_fill_manual( + name = "", + values = c(yrep = get_color("l"), + y = NA), + labels = set_names( + c(expression(italic(ECDF)), expression(italic("theoretical CDF"))), + c("ECDF", "theoretical ECDF")) ) + yaxis_title(FALSE) + xaxis_title(FALSE) + @@ -340,12 +351,21 @@ ppc_ecdf_intervals_difference <- function( ) ) } - fig + scale_y_continuous(breaks = c(0, 0.5, 1)) + - scale_color_discrete( - name = "" + fig + + scale_color_manual( + name = "", + values = set_names(get_color(c("lh", "dh")), c("theoretical CDF", "ECDF")), + labels = set_names( + c(expression(italic(ECDF)), expression(italic("theoretical CDF"))), + c("ECDF", "theoretical ECDF")) ) + - scale_fill_discrete( - name = "" + scale_fill_manual( + name = "", + values = c(yrep = get_color("l"), + y = NA), + labels = set_names( + c(expression(italic(ECDF)), expression(italic("theoretical CDF"))), + c("ECDF", "theoretical ECDF")) ) + yaxis_title(FALSE) + xaxis_title(FALSE) + From d2c7b676957c49bdafbc853c645099ec3013d2b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A4ilynoja=20Teemu?= Date: Thu, 15 Jul 2021 11:21:28 +0300 Subject: [PATCH 43/48] legent labels fix --- R/ppc-intervals.R | 40 +++++++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/R/ppc-intervals.R b/R/ppc-intervals.R index 0c185527..f8ae43eb 100644 --- a/R/ppc-intervals.R +++ b/R/ppc-intervals.R @@ -238,17 +238,19 @@ ppc_ecdf_intervals <- function( values = set_names( get_color(c("lh", "dh")), c("theoretical CDF", "ECDF")), - labels = set_names( - c(expression(italic(ECDF)), expression(italic("theoretical CDF"))), - c("ECDF", "theoretical ECDF")) + labels = c( + "theoretical CDF" = expression(italic("theoretical CDF")), + "ECDF" = expression(italic("ECDF")) + ) ) + scale_fill_manual( name = "", - values = c(yrep = get_color("l"), - y = NA), - labels = set_names( - c(expression(italic(ECDF)), expression(italic("theoretical CDF"))), - c("ECDF", "theoretical ECDF")) + values = c("theoretical CDF" = get_color("l"), + "ECDF" = NA), + labels = c( + "theoretical CDF" = expression(italic("theoretical CDF")), + "ECDF" = expression(italic("ECDF")) + ) ) + yaxis_title(FALSE) + xaxis_title(FALSE) + @@ -354,18 +356,22 @@ ppc_ecdf_intervals_difference <- function( fig + scale_color_manual( name = "", - values = set_names(get_color(c("lh", "dh")), c("theoretical CDF", "ECDF")), - labels = set_names( - c(expression(italic(ECDF)), expression(italic("theoretical CDF"))), - c("ECDF", "theoretical ECDF")) + values = set_names( + get_color(c("lh", "dh")), + c("theoretical CDF", "ECDF")), + labels = c( + "theoretical CDF" = expression(italic("theoretical CDF")), + "ECDF" = expression(italic("ECDF")) + ) ) + scale_fill_manual( name = "", - values = c(yrep = get_color("l"), - y = NA), - labels = set_names( - c(expression(italic(ECDF)), expression(italic("theoretical CDF"))), - c("ECDF", "theoretical ECDF")) + values = c("theoretical CDF" = get_color("l"), + "ECDF" = NA), + labels = c( + "theoretical CDF" = expression(italic("theoretical CDF")), + "ECDF" = expression(italic("ECDF")) + ) ) + yaxis_title(FALSE) + xaxis_title(FALSE) + From 622200796edef384a66b4fcadb466db67a6c93ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A4ilynoja=20Teemu?= Date: Thu, 15 Jul 2021 12:23:19 +0300 Subject: [PATCH 44/48] Documentation and examples. --- R/ppc-intervals.R | 46 +++++++++++++++++++++++++++++++++------------- 1 file changed, 33 insertions(+), 13 deletions(-) diff --git a/R/ppc-intervals.R b/R/ppc-intervals.R index f8ae43eb..b981ef3f 100644 --- a/R/ppc-intervals.R +++ b/R/ppc-intervals.R @@ -41,6 +41,16 @@ #' Same as `ppc_intervals()` and `ppc_ribbon()`, respectively, but a #' separate plot (facet) is generated for each level of a grouping variable. #' } +#' \item{`ppc_ecdf_intervals(), ppc_ecdf_intervals_difference()`}{ +#' `100*prob`% central simultaneous confidence intervals for the ECDF of the +#' probability integral transformed sample at `K` evenly spaced evaluation +#' points. If `y` and `yrep` are provided, the PIT values of `y` are computed +#' with regards to `yrep`. PIT values can also be provided directly as `pit`. +#' `ppc_ecdf_intervals()' plots the ECDF with the desired simultaneous +#' confidence intervals, whereas `ppc_ecdf_intervals_difference()` in +#' addition substracts the value of the theoretical CDF from the ECDF at each +#' evaluation point, resulting in a more dynamic range especially for large +#' samples. #' } #' #' @examples @@ -50,6 +60,8 @@ #' color_scheme_set("brightblue") #' ppc_ribbon(y, yrep) #' ppc_intervals(y, yrep) +#' ppc_ecdf_intervals(y, yrep) +#' ppc_ecdf_intervals_difference(y, yrep) #' #' # change x axis to y values (instead of indices) and add x = y line #' ppc_intervals(y, yrep, x = y) + abline_01() @@ -159,6 +171,7 @@ ppc_ecdf_intervals <- function( if (missing(K)) { K <- max(data$y_id) - 1 } + # Infer sample length if (!missing(y)) { N <- length(y) } else if (!missing(yrep)) { @@ -166,7 +179,9 @@ ppc_ecdf_intervals <- function( } else { N <- ncol(pit) } + # Infer number of chains L <- max(data$rep_id) + any(data$is_y) + # Adjust confidence intervals for simultaneous coverage if (missing(gamma_outer)) { gamma_outer <- adjust_gamma( N = N, @@ -183,6 +198,7 @@ ppc_ecdf_intervals <- function( conf_level = prob ) } + # Compute limits limits <- ecdf_intervals( N = N, L = L, @@ -193,7 +209,9 @@ ppc_ecdf_intervals <- function( L = L, K = K, gamma = gamma_outer) + # determine evaluation points. z <- 0:K / K + # construct figure fig <- ggplot(data) + geom_ribbon( data = data.frame(limits_outer), @@ -217,17 +235,19 @@ ppc_ecdf_intervals <- function( ), alpha = alpha, size = size) + # add sample ECDF if (any(data$is_y)) { fig <- fig + geom_step( data = function(x) dplyr::filter(x, .data$is_y), - aes_(x = z, y = ~ value, color = "ECDF"), + aes_(x = z, y = ~ value, color = "sample ECDF"), size = size ) } + # add if (any(!data$is_y)) { fig <- fig + geom_step( data = function(x) dplyr::filter(x, !.data$is_y), - aes_(x = rep(z, each = L - any(data$is_y)), group = ~ rep_id, + aes_(x = rep(z, each = L), group = ~ rep_id, y = ~ value, color = ~ rep_label), size = size ) @@ -237,19 +257,19 @@ ppc_ecdf_intervals <- function( name = "", values = set_names( get_color(c("lh", "dh")), - c("theoretical CDF", "ECDF")), + c("theoretical CDF", "sample ECDF")), labels = c( "theoretical CDF" = expression(italic("theoretical CDF")), - "ECDF" = expression(italic("ECDF")) + "sample ECDF" = expression(italic("sample ECDF")) ) ) + scale_fill_manual( name = "", values = c("theoretical CDF" = get_color("l"), - "ECDF" = NA), + "sample ECDF" = NA), labels = c( "theoretical CDF" = expression(italic("theoretical CDF")), - "ECDF" = expression(italic("ECDF")) + "sample ECDF" = expression(italic("sample ECDF")) ) ) + yaxis_title(FALSE) + @@ -339,15 +359,15 @@ ppc_ecdf_intervals_difference <- function( if (any(data$is_y)) { fig <- fig + geom_step( data = function(x) dplyr::filter(x, .data$is_y), - aes_(x = z, y = ~ value - z, color = "ECDF") + aes_(x = z, y = ~ value - z, color = "sample ECDF") ) } if (any(!data$is_y)) { fig <- fig + geom_step( data = function(x) dplyr::filter(x, !.data$is_y), aes_( - x = rep(z, each = L - any(data$is_y)), - y = ~ value - rep(z, each = L - any(data$is_y)), + x = rep(z, each = L)), + y = ~ value - rep(z, each = L), group = ~ rep_id, color = ~ rep_label ) @@ -358,19 +378,19 @@ ppc_ecdf_intervals_difference <- function( name = "", values = set_names( get_color(c("lh", "dh")), - c("theoretical CDF", "ECDF")), + c("theoretical CDF", "sample ECDF")), labels = c( "theoretical CDF" = expression(italic("theoretical CDF")), - "ECDF" = expression(italic("ECDF")) + "sample ECDF" = expression(italic("sample ECDF")) ) ) + scale_fill_manual( name = "", values = c("theoretical CDF" = get_color("l"), - "ECDF" = NA), + "sample ECDF" = NA), labels = c( "theoretical CDF" = expression(italic("theoretical CDF")), - "ECDF" = expression(italic("ECDF")) + "sample ECDF" = expression(italic("sample ECDF")) ) ) + yaxis_title(FALSE) + From 2aa0737259114aa60864c79787e96eb3d10c437d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A4ilynoja=20Teemu?= Date: Thu, 15 Jul 2021 12:33:34 +0300 Subject: [PATCH 45/48] cleaned ppc-distributions.R and fixed documentation from ppc-intervals.R --- R/ppc-distributions.R | 5 ++++- R/ppc-intervals.R | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/R/ppc-distributions.R b/R/ppc-distributions.R index 14fe9970..0cc24c7d 100644 --- a/R/ppc-distributions.R +++ b/R/ppc-distributions.R @@ -127,6 +127,7 @@ ppc_data <- function(y, yrep, group = NULL) { } + #' @rdname PPC-distributions #' @export ppc_hist <- function(y, yrep, ..., binwidth = NULL, breaks = NULL, @@ -159,6 +160,7 @@ ppc_hist <- function(y, yrep, ..., binwidth = NULL, breaks = NULL, #' @export #' @param notch A logical scalar passed to [ggplot2::geom_boxplot()]. #' Unlike for `geom_boxplot()`, the default is `notch=TRUE`. +#' ppc_boxplot <- function(y, yrep, ..., notch = TRUE, size = 0.5, alpha = 1) { check_ignored_arguments(...) data <- ppc_data(y, yrep) @@ -215,7 +217,7 @@ ppc_freqpoly <- function(y, yrep, ..., #' @rdname PPC-distributions #' @export #' @template args-group - +#' ppc_freqpoly_grouped <- function(y, yrep, group, ..., binwidth = NULL, freq = TRUE, size = 0.25, alpha = 1) { check_ignored_arguments(...) @@ -455,6 +457,7 @@ ppc_ecdf_overlay_grouped <- function( } + #' @export #' @rdname PPC-distributions #' @param probs A numeric vector passed to [ggplot2::geom_violin()]'s diff --git a/R/ppc-intervals.R b/R/ppc-intervals.R index b981ef3f..9d199a60 100644 --- a/R/ppc-intervals.R +++ b/R/ppc-intervals.R @@ -147,7 +147,7 @@ ppc_intervals <- function(y, yrep, x = NULL, ..., prob = 0.5, prob_outer = 0.9, #' @param pit For 'ppc_ecdf_intervals' and 'ppc_ecdf_intervals_difference', the #' PIT values of one or more samples can be provided directly causing'y' and #' 'yrep' to be ignored. -#' @param K, For 'ppc_ecdf_intervals' and 'ppc_ecdf_intervals_difference', +#' @param K For 'ppc_ecdf_intervals' and 'ppc_ecdf_intervals_difference', #' number of uniformly spaced evaluation points for the ECDF or ECDFs. Affects #' the granularity of the plot and can significantly speed up the computation #' of the simultaneous confidence bands. Defaults to 'length(y)', From 10dd20778eca8933631c53f9a86fe819fc1b40e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A4ilynoja=20Teemu?= Date: Thu, 15 Jul 2021 12:40:19 +0300 Subject: [PATCH 46/48] typo in ppc-intervals.R, files roxygenised. --- R/ppc-intervals.R | 1 - man/PPC-intervals.Rd | 13 ++++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/R/ppc-intervals.R b/R/ppc-intervals.R index 9d199a60..18ff9e2d 100644 --- a/R/ppc-intervals.R +++ b/R/ppc-intervals.R @@ -371,7 +371,6 @@ ppc_ecdf_intervals_difference <- function( group = ~ rep_id, color = ~ rep_label ) - ) } fig + scale_color_manual( diff --git a/man/PPC-intervals.Rd b/man/PPC-intervals.Rd index 1344c16c..5deaf757 100644 --- a/man/PPC-intervals.Rd +++ b/man/PPC-intervals.Rd @@ -135,7 +135,7 @@ are \code{prob=0.5} and \code{prob_outer=0.9}.} PIT values of one or more samples can be provided directly causing'y' and 'yrep' to be ignored.} -\item{K, }{For 'ppc_ecdf_intervals' and 'ppc_ecdf_intervals_difference', +\item{K}{For 'ppc_ecdf_intervals' and 'ppc_ecdf_intervals_difference', number of uniformly spaced evaluation points for the ECDF or ECDFs. Affects the granularity of the plot and can significantly speed up the computation of the simultaneous confidence bands. Defaults to 'length(y)', @@ -186,6 +186,15 @@ plots may be easier to read than the other. Same as \code{ppc_intervals()} and \code{ppc_ribbon()}, respectively, but a separate plot (facet) is generated for each level of a grouping variable. } +\item{\verb{ppc_ecdf_intervals(), ppc_ecdf_intervals_difference()}}{ +\code{100*prob}\% central simultaneous confidence intervals for the ECDF of the +probability integral transformed sample at \code{K} evenly spaced evaluation +points. If \code{y} and \code{yrep} are provided, the PIT values of \code{y} are computed +with regards to \code{yrep}. PIT values can also be provided directly as \code{pit}. +\verb{ppc_ecdf_intervals()' plots the ECDF with the desired simultaneous confidence intervals, whereas }ppc_ecdf_intervals_difference()` in +addition substracts the value of the theoretical CDF from the ECDF at each +evaluation point, resulting in a more dynamic range especially for large +samples. } } @@ -196,6 +205,8 @@ yrep <- matrix(rnorm(5000, 0, 2), ncol = 50) color_scheme_set("brightblue") ppc_ribbon(y, yrep) ppc_intervals(y, yrep) +ppc_ecdf_intervals(y, yrep) +ppc_ecdf_intervals_difference(y, yrep) # change x axis to y values (instead of indices) and add x = y line ppc_intervals(y, yrep, x = y) + abline_01() From d0e3d04032df41789811869263fdf855972dda2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A4ilynoja=20Teemu?= Date: Mon, 19 Jul 2021 10:49:13 +0300 Subject: [PATCH 47/48] ranks in empirical_pit start from 1 instead of 0. --- R/helpers-ppc.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/helpers-ppc.R b/R/helpers-ppc.R index 9501820d..a985b318 100644 --- a/R/helpers-ppc.R +++ b/R/helpers-ppc.R @@ -412,15 +412,15 @@ ecdf_intervals <- function(N, L, K, gamma) { lims } -# Transform observations in 'x' into their corresponding fractional ranks. +#' Transform observations in 'x' into their corresponding fractional ranks. u_scale <- function(x) { array(rank(x) / length(x), dim = dim(x), dimnames = dimnames(x)) } -# for each value in 'y', compute the fractional ranks (empirical pit values) -# with respect to 'yrep'. +#' For each value in 'y', estimate its probability integral transformation with +#' respect to 'yrep' through its fractional rank. empirical_pit <- function(y, yrep) { - apply(outer(yrep, y, "<="), 3, sum) / length(yrep) + (1 + apply(outer(yrep, y, "<="), 3, sum)) / (1 + length(yrep)) } # labels ---------------------------------------------------------------- From f4f87d3e79be3c5f5db22e171d4e62f0bcac4a54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20S=C3=A4ilynoja?= Date: Mon, 18 Oct 2021 22:18:07 +0300 Subject: [PATCH 48/48] Fixed misaligned aesthetics in ppc_ecdf_intervals_difference --- R/ppc-intervals.R | 1464 ++++++++++++++++++++++----------------------- 1 file changed, 732 insertions(+), 732 deletions(-) diff --git a/R/ppc-intervals.R b/R/ppc-intervals.R index 18ff9e2d..b70a720f 100644 --- a/R/ppc-intervals.R +++ b/R/ppc-intervals.R @@ -1,732 +1,732 @@ -#' PPC intervals -#' -#' Medians and central interval estimates of `yrep` with `y` overlaid. -#' See the **Plot Descriptions** section, below. -#' -#' @name PPC-intervals -#' @family PPCs -#' -#' @template args-y-yrep -#' @template args-prob-prob_outer -#' @param x A numeric vector the same length as `y` to use as the x-axis -#' variable. For example, `x` could be a predictor variable from a -#' regression model, a time variable for time-series models, etc. If `x` -#' is missing or `NULL`, then `1:length(y)` is used for the x-axis. -#' @param ... Currently unused. -#' @param alpha,size,fatten Arguments passed to geoms. For ribbon plots `alpha` -#' and `size` are passed to [ggplot2::geom_ribbon()]. For interval plots -#' `size` and `fatten` are passed to [ggplot2::geom_pointrange()]. -#' -#' @template return-ggplot-or-data -#' -#' @templateVar bdaRef (Ch. 6) -#' @template reference-bda -#' -#' @section Plot Descriptions: -#' \describe{ -#' \item{`ppc_intervals(), ppc_ribbon()`}{ -#' `100*prob`% central intervals for `yrep` at each `x` -#' value. `ppc_intervals()` plots intervals as vertical bars with points -#' indicating `yrep` medians and darker points indicating observed -#' `y` values. `ppc_ribbon()` plots a ribbon of connected intervals -#' with a line through the median of `yrep` and a darker line connecting -#' observed `y` values. In both cases an optional `x` variable can -#' also be specified for the x-axis variable. -#' -#' Depending on the number of observations and the variability in the -#' predictions at different values of `x`, one or the other of these -#' plots may be easier to read than the other. -#' } -#' \item{`ppc_intervals_grouped(), ppc_ribbon_grouped()`}{ -#' Same as `ppc_intervals()` and `ppc_ribbon()`, respectively, but a -#' separate plot (facet) is generated for each level of a grouping variable. -#' } -#' \item{`ppc_ecdf_intervals(), ppc_ecdf_intervals_difference()`}{ -#' `100*prob`% central simultaneous confidence intervals for the ECDF of the -#' probability integral transformed sample at `K` evenly spaced evaluation -#' points. If `y` and `yrep` are provided, the PIT values of `y` are computed -#' with regards to `yrep`. PIT values can also be provided directly as `pit`. -#' `ppc_ecdf_intervals()' plots the ECDF with the desired simultaneous -#' confidence intervals, whereas `ppc_ecdf_intervals_difference()` in -#' addition substracts the value of the theoretical CDF from the ECDF at each -#' evaluation point, resulting in a more dynamic range especially for large -#' samples. -#' } -#' -#' @examples -#' y <- rnorm(50) -#' yrep <- matrix(rnorm(5000, 0, 2), ncol = 50) -#' -#' color_scheme_set("brightblue") -#' ppc_ribbon(y, yrep) -#' ppc_intervals(y, yrep) -#' ppc_ecdf_intervals(y, yrep) -#' ppc_ecdf_intervals_difference(y, yrep) -#' -#' # change x axis to y values (instead of indices) and add x = y line -#' ppc_intervals(y, yrep, x = y) + abline_01() -#' -#' -#' color_scheme_set("teal") -#' year <- 1950:1999 -#' ppc_ribbon(y, yrep, x = year, alpha = 0, size = 0.75) + ggplot2::xlab("Year") -#' -#' color_scheme_set("pink") -#' year <- rep(2000:2009, each = 5) -#' group <- gl(5, 1, length = 50, labels = LETTERS[1:5]) -#' ppc_ribbon_grouped(y, yrep, x = year, group) + -#' ggplot2::scale_x_continuous(breaks = pretty) -#' -#' ppc_ribbon_grouped( -#' y, yrep, x = year, group, -#' facet_args = list(scales = "fixed"), -#' alpha = 1, -#' size = 2 -#' ) + -#' xaxis_text(FALSE) + -#' xaxis_ticks(FALSE) + -#' panel_bg(fill = "gray20") -#' -#' ppc_dat <- ppc_intervals_data(y, yrep, x = year, prob = 0.5) -#' ppc_group_dat <- ppc_intervals_data(y, yrep, x = year, group = group, prob = 0.5) -#' -#' \dontrun{ -#' library("rstanarm") -#' fit <- stan_glmer(mpg ~ wt + (1|cyl), data = mtcars, refresh = 0) -#' yrep <- posterior_predict(fit) -#' -#' color_scheme_set("purple") -#' with(mtcars, ppc_intervals(mpg, yrep, x = wt, prob = 0.5)) + -#' panel_bg(fill="gray90", color = NA) + -#' grid_lines(color = "white") -#' -#' ppc_intervals_grouped(y = mtcars$mpg, yrep, prob = 0.8, -#' x = mtcars$wt, group = mtcars$cyl) -#' -#' -#' color_scheme_set("gray") -#' ppc_intervals(mtcars$mpg, yrep, prob = 0.5) + -#' ggplot2::scale_x_continuous( -#' labels = rownames(mtcars), -#' breaks = 1:nrow(mtcars) -#' ) + -#' xaxis_text(angle = -70, vjust = 1, hjust = 0) -#' -#' } -#' -#' -NULL - -#' @rdname PPC-intervals -#' @export -ppc_intervals <- function(y, yrep, x = NULL, ..., prob = 0.5, prob_outer = 0.9, - size = 1, fatten = 3) { - check_ignored_arguments(...) - - data <- ppc_intervals_data( - y = y, - yrep = yrep, - x = x, - group = NULL, - prob = prob, - prob_outer = prob_outer - ) - - .ppc_intervals( - data = data, - size = size, - fatten = fatten, - grouped = FALSE, - style = "intervals", - x_lab = label_x(x) - ) -} - -#' @export -#' @rdname PPC-intervals -#' @param pit For 'ppc_ecdf_intervals' and 'ppc_ecdf_intervals_difference', the -#' PIT values of one or more samples can be provided directly causing'y' and -#' 'yrep' to be ignored. -#' @param K For 'ppc_ecdf_intervals' and 'ppc_ecdf_intervals_difference', -#' number of uniformly spaced evaluation points for the ECDF or ECDFs. Affects -#' the granularity of the plot and can significantly speed up the computation -#' of the simultaneous confidence bands. Defaults to 'length(y)', -#' 'K = ncol(yrep)', or lastly to 'K = ncol(pit)' depending on which one is -#' provided. -ppc_ecdf_intervals <- function( - y, - yrep, - pit, - gamma, - gamma_outer, - K, - ..., - prob = 0.5, - prob_outer = 0.9, - size = 1, - alpha = 0.33 -) { - check_ignored_arguments(...) - data <- .ppc_ecdf_intervals_data(y, yrep, pit, K) - if (missing(K)) { - K <- max(data$y_id) - 1 - } - # Infer sample length - if (!missing(y)) { - N <- length(y) - } else if (!missing(yrep)) { - N <- ncol(yrep) - } else { - N <- ncol(pit) - } - # Infer number of chains - L <- max(data$rep_id) + any(data$is_y) - # Adjust confidence intervals for simultaneous coverage - if (missing(gamma_outer)) { - gamma_outer <- adjust_gamma( - N = N, - L = L, - K = K, - conf_level = prob_outer - ) - } - if (missing(gamma)) { - gamma <- adjust_gamma( - N = N, - L = L, - K = K, - conf_level = prob - ) - } - # Compute limits - limits <- ecdf_intervals( - N = N, - L = L, - K = K, - gamma = gamma) - limits_outer <- ecdf_intervals( - N = N, - L = L, - K = K, - gamma = gamma_outer) - # determine evaluation points. - z <- 0:K / K - # construct figure - fig <- ggplot(data) + - geom_ribbon( - data = data.frame(limits_outer), - aes_( - x = c(0, rep(z[2:(K + 1)], each = 2)), - ymax = ~ upper / N, - ymin = ~ lower / N, - color = "theoretical CDF", - fill = "theoretical CDF" - ), - alpha = alpha, - size = size) + - geom_ribbon( - data = data.frame(limits), - aes_( - x = c(0, rep(z[2:(K + 1)], each = 2)), - ymax = ~ upper / N, - ymin = ~ lower / N, - color = "theoretical CDF", - fill = "theoretical CDF" - ), - alpha = alpha, - size = size) - # add sample ECDF - if (any(data$is_y)) { - fig <- fig + geom_step( - data = function(x) dplyr::filter(x, .data$is_y), - aes_(x = z, y = ~ value, color = "sample ECDF"), - size = size - ) - } - # add - if (any(!data$is_y)) { - fig <- fig + geom_step( - data = function(x) dplyr::filter(x, !.data$is_y), - aes_(x = rep(z, each = L), group = ~ rep_id, - y = ~ value, color = ~ rep_label), - size = size - ) - } - fig + scale_y_continuous(breaks = c(0, 0.5, 1)) + - scale_color_manual( - name = "", - values = set_names( - get_color(c("lh", "dh")), - c("theoretical CDF", "sample ECDF")), - labels = c( - "theoretical CDF" = expression(italic("theoretical CDF")), - "sample ECDF" = expression(italic("sample ECDF")) - ) - ) + - scale_fill_manual( - name = "", - values = c("theoretical CDF" = get_color("l"), - "sample ECDF" = NA), - labels = c( - "theoretical CDF" = expression(italic("theoretical CDF")), - "sample ECDF" = expression(italic("sample ECDF")) - ) - ) + - yaxis_title(FALSE) + - xaxis_title(FALSE) + - yaxis_ticks(FALSE) + - bayesplot_theme_get() -} - -#' @export -#' @rdname PPC-intervals -ppc_ecdf_intervals_difference <- function( - y, - yrep, - pit, - gamma, - gamma_outer, - K, - ..., - prob = 0.5, - prob_outer = 0.9, - size = 1, - alpha = 0.33 -) { - check_ignored_arguments(...) - data <- .ppc_ecdf_intervals_data(y, yrep, pit, K) - if (missing(K)) { - K <- max(data$y_id) - 1 - } - if (!missing(y)) { - N <- length(y) - } else if (!missing(yrep)) { - N <- ncol(yrep) - } else { - N <- ncol(pit) - } - L <- max(data$rep_id) + any(data$is_y) - if (missing(gamma_outer)) { - gamma_outer <- adjust_gamma( - N = N, - L = L, - K = K, - conf_level = prob_outer - ) - } - if (missing(gamma)) { - gamma <- adjust_gamma( - N = N, - L = L, - K = K, - conf_level = prob - ) - } - limits <- ecdf_intervals( - N = N, - L = L, - K = K, - gamma = gamma) - limits_outer <- ecdf_intervals( - N = N, - L = L, - K = K, - gamma = gamma_outer) - z <- seq(0,1, length.out = K + 1) - fig <- ggplot(data) + - geom_ribbon( - data = data.frame(limits_outer), - aes_( - x = c(0, rep(z[2:(K + 1)], each = 2)), - ymax = ~ upper / N - c(rep(z[1:K], each = 2), 1), - ymin = ~ lower / N - c(rep(z[1:K], each = 2), 1), - color = "theoretical CDF", - fill = "theoretical CDF" - ), - alpha = alpha, - size = size) + - geom_ribbon( - data = data.frame(limits), - aes_( - x = c(0, rep(z[2:(K + 1)], each = 2)), - ymax = ~ upper / N - c(rep(z[1:K], each = 2), 1), - ymin = ~ lower / N - c(rep(z[1:K], each = 2), 1), - color = "theoretical CDF", - fill = "theoretical CDF" - ), - alpha = alpha, - size = size) - if (any(data$is_y)) { - fig <- fig + geom_step( - data = function(x) dplyr::filter(x, .data$is_y), - aes_(x = z, y = ~ value - z, color = "sample ECDF") - ) - } - if (any(!data$is_y)) { - fig <- fig + geom_step( - data = function(x) dplyr::filter(x, !.data$is_y), - aes_( - x = rep(z, each = L)), - y = ~ value - rep(z, each = L), - group = ~ rep_id, - color = ~ rep_label - ) - } - fig + - scale_color_manual( - name = "", - values = set_names( - get_color(c("lh", "dh")), - c("theoretical CDF", "sample ECDF")), - labels = c( - "theoretical CDF" = expression(italic("theoretical CDF")), - "sample ECDF" = expression(italic("sample ECDF")) - ) - ) + - scale_fill_manual( - name = "", - values = c("theoretical CDF" = get_color("l"), - "sample ECDF" = NA), - labels = c( - "theoretical CDF" = expression(italic("theoretical CDF")), - "sample ECDF" = expression(italic("sample ECDF")) - ) - ) + - yaxis_title(FALSE) + - xaxis_title(FALSE) + - yaxis_ticks(FALSE) + - bayesplot_theme_get() -} - -#' @rdname PPC-intervals -#' @export -#' @template args-group -#' @param facet_args An optional list of arguments (other than `facets`) -#' passed to [ggplot2::facet_wrap()] to control faceting. -#' -ppc_intervals_grouped <- function(y, - yrep, - x = NULL, - group, - ..., - facet_args = list(), - prob = 0.5, - prob_outer = 0.9, - size = 1, - fatten = 3) { - check_ignored_arguments(...) - - data <- ppc_intervals_data( - y = y, - yrep = yrep, - x = x, - group = group, - prob = prob, - prob_outer = prob_outer - ) - - facet_args[["scales"]] <- facet_args[["scales"]] %||% "free" - - .ppc_intervals( - data = data, - facet_args = facet_args, - size = size, - fatten = fatten, - grouped = TRUE, - style = "intervals", - x_lab = label_x(x) - ) -} - - -#' @rdname PPC-intervals -#' @export -#' @param y_draw For ribbon plots only, a string specifying how to draw `y`. Can -#' be `"line"` (the default), `"points"`, or `"both"`. -ppc_ribbon <- function(y, - yrep, - x = NULL, - ..., - prob = 0.5, - prob_outer = 0.9, - alpha = 0.33, - size = 0.25, - y_draw = c("line", "points", "both")) { - check_ignored_arguments(...) - - data <- ppc_intervals_data( - y = y, - yrep = yrep, - x = x, - group = NULL, - prob = prob - ) - - .ppc_intervals( - data = data, - alpha = alpha, - size = size, - grouped = FALSE, - style = "ribbon", - x_lab = label_x(x), - y_draw = y_draw - ) -} - - -#' @export -#' @rdname PPC-intervals -ppc_ribbon_grouped <- function(y, - yrep, - x = NULL, - group, - ..., - facet_args = list(), - prob = 0.5, - prob_outer = 0.9, - alpha = 0.33, - size = 0.25, - y_draw = c("line", "points", "both")) { - check_ignored_arguments(...) - - data <- ppc_intervals_data( - y = y, - yrep = yrep, - x = x, - group = group, - prob = prob, - prob_outer = prob_outer - ) - - facet_args[["scales"]] <- facet_args[["scales"]] %||% "free" - - .ppc_intervals( - data = data, - facet_args = facet_args, - alpha = alpha, - size = size, - grouped = TRUE, - style = "ribbon", - x_lab = label_x(x), - y_draw = y_draw - ) -} - - -#' @rdname PPC-intervals -#' @export -ppc_intervals_data <- function(y, yrep, x = NULL, group = NULL, - ..., prob = 0.5, prob_outer = 0.9) { - check_ignored_arguments(...) - .ppc_intervals_data(y = y, yrep = yrep, x = x, group = group, - prob = prob, prob_outer = prob_outer) -} - - -#' @rdname PPC-intervals -#' @export -ppc_ribbon_data <- ppc_intervals_data - - - - -# internal ---------------------------------------------------------------- -label_x <- function(x) { - if (missing(x)) "Index" else NULL -} - -.ppc_intervals_data <- function(y, yrep, x = NULL, group = NULL, - prob = 0.5, prob_outer = 0.9) { - grouped <- !is.null(group) - stopifnot(prob > 0 && prob < 1) - stopifnot(prob_outer > 0 && prob_outer <= 1) - probs <- sort(c(prob, prob_outer)) - prob <- probs[1] - prob_outer <- probs[2] - - y <- validate_y(y) - yrep <- validate_yrep(yrep, y) - x <- validate_x(x, y) - - long_d <- melt_and_stack(y, yrep) - long_d$x <- x[long_d$y_id] - long_d$y_obs <- y[long_d$y_id] - - molten_reps <- long_d[!as.logical(long_d[["is_y"]]), , drop = FALSE] - molten_reps$is_y <- NULL - - if (grouped) { - group <- validate_group(group, y) - molten_reps$group <- group[molten_reps$y_id] - group_vars <- syms(c("y_id", "y_obs", "group", "x")) - } else { - group_vars <- syms(c("y_id", "y_obs", "x")) - } - - grouped_d <- dplyr::group_by(molten_reps, !!! group_vars) - alpha <- (1 - probs) / 2 - probs <- sort(c(alpha, 0.5, 1 - alpha)) - - val_col <- sym("value") - dplyr::ungroup(dplyr::summarise( - grouped_d, - outer_width = prob_outer, - inner_width = prob, - ll = unname(quantile(!! val_col, probs = probs[1])), - l = unname(quantile(!! val_col, probs = probs[2])), - m = unname(quantile(!! val_col, probs = probs[3])), - h = unname(quantile(!! val_col, probs = probs[4])), - hh = unname(quantile(!! val_col, probs = probs[5])) - )) -} - -.ppc_ecdf_intervals_data <- function(y, yrep, pit, K) { - if (missing(K)){ - if (!missing(y)) { - K <- length(y) - } - else if (!missing(pit)) { - K <- ncol(pit) - } - else { - K <- ncol(yrep) - } - } - z <- seq(0,1, length.out = K + 1) - if (!missing(pit)) { - pit <- validate_pit(pit) - ecdfs <- t(apply(pit, 1, function(row) ecdf(row)(z))) - # work around to adhere to the melt_and_stack-format with only "yrep". - ecdfs <- melt_and_stack(ecdfs[1,], ecdfs) - ecdfs <- dplyr::filter(ecdfs, !ecdfs$is_y) - } else if (missing(y)) { - pit <- u_scale(validate_yrep(yrep, yrep[1, ], match_ncols = FALSE)) - if (nrow(yrep) < 2) { - abort(paste("When both 'pit' and 'y' are missing, 'yrep' must include ", - "multiple rows to allow for sample comparison.", sep="")) - } - ecdfs <- t(apply(pit, 1, function(row) ecdf(row)(z))) - # work around to adhere to the melt_and_stack-format with only "yrep". - ecdfs <- melt_and_stack(ecdfs[1,], ecdfs) - ecdfs <- dplyr::filter(ecdfs, !ecdfs$is_y) - } else { - y <- validate_y(y) - yrep <- validate_yrep(yrep, y, match_ncols = FALSE) - pit <- empirical_pit(y, yrep) - ecdfs <- ecdf(pit)(z) - # work around to adhere to the melt_and_stack-format with only "y" . - ecdfs <- melt_and_stack(ecdfs, t(ecdfs)) - ecdfs <- dplyr::filter(ecdfs, ecdfs$is_y) - } - ecdfs$rep_id[is.na(ecdfs$rep_id)] <- 0 - ecdfs -} - - -# Make intervals or ribbon plot -# -# @param data The object returned by .ppc_intervals_data -# @return A ggplot object -# -.ppc_intervals <- - function(data, - facet_args = list(), - alpha = 0.33, - fatten = 3, - size = 1, - grouped = FALSE, - style = c("intervals", "ribbon"), - x_lab = NULL, - y_draw = c("line", "points", "both")) { - - style <- match.arg(style) - y_draw <- match.arg(y_draw) - - graph <- ggplot( - data = data, - mapping = aes_( - x = ~ x, - y = ~ m, - ymin = ~ l, - ymax = ~ h - ) - ) - - if (style == "ribbon") { - graph <- graph + - geom_ribbon( - aes_(color = "yrep", fill = "yrep", ymin = ~ ll, ymax = ~ hh), - alpha = alpha, - size = size - ) + - geom_ribbon( - aes_(color = "yrep", fill = "yrep"), - alpha = alpha, - size = size - ) + - geom_line( - aes_(color = "yrep"), - size = size/2 - ) + - geom_blank(aes_(fill = "y")) - - if (y_draw == "line" || y_draw == "both") { - graph <- graph + geom_line( - aes_(y = ~ y_obs, color = "y"), - size = 0.5 - ) - } - - if (y_draw == "points" || y_draw == "both") { - graph <- graph + geom_point( - mapping = aes_(y = ~ y_obs, color = "y", fill = "y"), - shape = 21, - size = 1.5 - ) - } - } else { - graph <- graph + - geom_pointrange( - mapping = aes_(color = "yrep", fill = "yrep", ymin = ~ ll, ymax = ~ hh), - shape = 21, - alpha = alpha, - size = size, - fatten = fatten - ) + - geom_pointrange( - mapping = aes_(color = "yrep", fill = "yrep"), - shape = 21, - size = size, - fatten = fatten - ) + - geom_point( - mapping = aes_(y = ~ y_obs, color = "y", fill = "y"), - shape = 21, - size = 1.5 - ) - } - - graph <- graph + - scale_color_manual( - name = "", - values = set_names(get_color(c("lh", "dh")), c("yrep", "y")), - labels = c(yrep = yrep_label(), y = y_label()) - ) + - scale_fill_manual( - name = "", - values = c(yrep = get_color("l"), - y = if (style == "ribbon") NA else get_color("d")), - labels = c(yrep = yrep_label(), y = y_label()) - ) - - - if (grouped) { - facet_args[["facets"]] <- "group" - facet_args[["scales"]] <- facet_args[["scales"]] %||% "free" - graph <- graph + do.call("facet_wrap", facet_args) - } - - graph + - labs(y = NULL, x = x_lab %||% expression(italic(x))) + - bayesplot_theme_get() -} +#' PPC intervals +#' +#' Medians and central interval estimates of `yrep` with `y` overlaid. +#' See the **Plot Descriptions** section, below. +#' +#' @name PPC-intervals +#' @family PPCs +#' +#' @template args-y-yrep +#' @template args-prob-prob_outer +#' @param x A numeric vector the same length as `y` to use as the x-axis +#' variable. For example, `x` could be a predictor variable from a +#' regression model, a time variable for time-series models, etc. If `x` +#' is missing or `NULL`, then `1:length(y)` is used for the x-axis. +#' @param ... Currently unused. +#' @param alpha,size,fatten Arguments passed to geoms. For ribbon plots `alpha` +#' and `size` are passed to [ggplot2::geom_ribbon()]. For interval plots +#' `size` and `fatten` are passed to [ggplot2::geom_pointrange()]. +#' +#' @template return-ggplot-or-data +#' +#' @templateVar bdaRef (Ch. 6) +#' @template reference-bda +#' +#' @section Plot Descriptions: +#' \describe{ +#' \item{`ppc_intervals(), ppc_ribbon()`}{ +#' `100*prob`% central intervals for `yrep` at each `x` +#' value. `ppc_intervals()` plots intervals as vertical bars with points +#' indicating `yrep` medians and darker points indicating observed +#' `y` values. `ppc_ribbon()` plots a ribbon of connected intervals +#' with a line through the median of `yrep` and a darker line connecting +#' observed `y` values. In both cases an optional `x` variable can +#' also be specified for the x-axis variable. +#' +#' Depending on the number of observations and the variability in the +#' predictions at different values of `x`, one or the other of these +#' plots may be easier to read than the other. +#' } +#' \item{`ppc_intervals_grouped(), ppc_ribbon_grouped()`}{ +#' Same as `ppc_intervals()` and `ppc_ribbon()`, respectively, but a +#' separate plot (facet) is generated for each level of a grouping variable. +#' } +#' \item{`ppc_ecdf_intervals(), ppc_ecdf_intervals_difference()`}{ +#' `100*prob`% central simultaneous confidence intervals for the ECDF of the +#' probability integral transformed sample at `K` evenly spaced evaluation +#' points. If `y` and `yrep` are provided, the PIT values of `y` are computed +#' with regards to `yrep`. PIT values can also be provided directly as `pit`. +#' `ppc_ecdf_intervals()' plots the ECDF with the desired simultaneous +#' confidence intervals, whereas `ppc_ecdf_intervals_difference()` in +#' addition substracts the value of the theoretical CDF from the ECDF at each +#' evaluation point, resulting in a more dynamic range especially for large +#' samples. +#' } +#' +#' @examples +#' y <- rnorm(50) +#' yrep <- matrix(rnorm(5000, 0, 2), ncol = 50) +#' +#' color_scheme_set("brightblue") +#' ppc_ribbon(y, yrep) +#' ppc_intervals(y, yrep) +#' ppc_ecdf_intervals(y, yrep) +#' ppc_ecdf_intervals_difference(y, yrep) +#' +#' # change x axis to y values (instead of indices) and add x = y line +#' ppc_intervals(y, yrep, x = y) + abline_01() +#' +#' +#' color_scheme_set("teal") +#' year <- 1950:1999 +#' ppc_ribbon(y, yrep, x = year, alpha = 0, size = 0.75) + ggplot2::xlab("Year") +#' +#' color_scheme_set("pink") +#' year <- rep(2000:2009, each = 5) +#' group <- gl(5, 1, length = 50, labels = LETTERS[1:5]) +#' ppc_ribbon_grouped(y, yrep, x = year, group) + +#' ggplot2::scale_x_continuous(breaks = pretty) +#' +#' ppc_ribbon_grouped( +#' y, yrep, x = year, group, +#' facet_args = list(scales = "fixed"), +#' alpha = 1, +#' size = 2 +#' ) + +#' xaxis_text(FALSE) + +#' xaxis_ticks(FALSE) + +#' panel_bg(fill = "gray20") +#' +#' ppc_dat <- ppc_intervals_data(y, yrep, x = year, prob = 0.5) +#' ppc_group_dat <- ppc_intervals_data(y, yrep, x = year, group = group, prob = 0.5) +#' +#' \dontrun{ +#' library("rstanarm") +#' fit <- stan_glmer(mpg ~ wt + (1|cyl), data = mtcars, refresh = 0) +#' yrep <- posterior_predict(fit) +#' +#' color_scheme_set("purple") +#' with(mtcars, ppc_intervals(mpg, yrep, x = wt, prob = 0.5)) + +#' panel_bg(fill="gray90", color = NA) + +#' grid_lines(color = "white") +#' +#' ppc_intervals_grouped(y = mtcars$mpg, yrep, prob = 0.8, +#' x = mtcars$wt, group = mtcars$cyl) +#' +#' +#' color_scheme_set("gray") +#' ppc_intervals(mtcars$mpg, yrep, prob = 0.5) + +#' ggplot2::scale_x_continuous( +#' labels = rownames(mtcars), +#' breaks = 1:nrow(mtcars) +#' ) + +#' xaxis_text(angle = -70, vjust = 1, hjust = 0) +#' +#' } +#' +#' +NULL + +#' @rdname PPC-intervals +#' @export +ppc_intervals <- function(y, yrep, x = NULL, ..., prob = 0.5, prob_outer = 0.9, + size = 1, fatten = 3) { + check_ignored_arguments(...) + + data <- ppc_intervals_data( + y = y, + yrep = yrep, + x = x, + group = NULL, + prob = prob, + prob_outer = prob_outer + ) + + .ppc_intervals( + data = data, + size = size, + fatten = fatten, + grouped = FALSE, + style = "intervals", + x_lab = label_x(x) + ) +} + +#' @export +#' @rdname PPC-intervals +#' @param pit For 'ppc_ecdf_intervals' and 'ppc_ecdf_intervals_difference', the +#' PIT values of one or more samples can be provided directly causing'y' and +#' 'yrep' to be ignored. +#' @param K For 'ppc_ecdf_intervals' and 'ppc_ecdf_intervals_difference', +#' number of uniformly spaced evaluation points for the ECDF or ECDFs. Affects +#' the granularity of the plot and can significantly speed up the computation +#' of the simultaneous confidence bands. Defaults to 'length(y)', +#' 'K = ncol(yrep)', or lastly to 'K = ncol(pit)' depending on which one is +#' provided. +ppc_ecdf_intervals <- function( + y, + yrep, + pit, + gamma, + gamma_outer, + K, + ..., + prob = 0.5, + prob_outer = 0.9, + size = 1, + alpha = 0.33 +) { + check_ignored_arguments(...) + data <- .ppc_ecdf_intervals_data(y, yrep, pit, K) + if (missing(K)) { + K <- max(data$y_id) - 1 + } + # Infer sample length + if (!missing(y)) { + N <- length(y) + } else if (!missing(yrep)) { + N <- ncol(yrep) + } else { + N <- ncol(pit) + } + # Infer number of chains + L <- max(data$rep_id) + any(data$is_y) + # Adjust confidence intervals for simultaneous coverage + if (missing(gamma_outer)) { + gamma_outer <- adjust_gamma( + N = N, + L = L, + K = K, + conf_level = prob_outer + ) + } + if (missing(gamma)) { + gamma <- adjust_gamma( + N = N, + L = L, + K = K, + conf_level = prob + ) + } + # Compute limits + limits <- ecdf_intervals( + N = N, + L = L, + K = K, + gamma = gamma) + limits_outer <- ecdf_intervals( + N = N, + L = L, + K = K, + gamma = gamma_outer) + # determine evaluation points. + z <- 0:K / K + # construct figure + fig <- ggplot(data) + + geom_ribbon( + data = data.frame(limits_outer), + aes_( + x = c(0, rep(z[2:(K + 1)], each = 2)), + ymax = ~ upper / N, + ymin = ~ lower / N, + color = "theoretical CDF", + fill = "theoretical CDF" + ), + alpha = alpha, + size = size) + + geom_ribbon( + data = data.frame(limits), + aes_( + x = c(0, rep(z[2:(K + 1)], each = 2)), + ymax = ~ upper / N, + ymin = ~ lower / N, + color = "theoretical CDF", + fill = "theoretical CDF" + ), + alpha = alpha, + size = size) + # add sample ECDF + if (any(data$is_y)) { + fig <- fig + geom_step( + data = function(x) dplyr::filter(x, .data$is_y), + aes_(x = z, y = ~ value, color = "sample ECDF"), + size = size + ) + } + # add + if (any(!data$is_y)) { + fig <- fig + geom_step( + data = function(x) dplyr::filter(x, !.data$is_y), + aes_(x = rep(z, each = L), group = ~ rep_id, + y = ~ value, color = ~ rep_label), + size = size + ) + } + fig + scale_y_continuous(breaks = c(0, 0.5, 1)) + + scale_color_manual( + name = "", + values = set_names( + get_color(c("lh", "dh")), + c("theoretical CDF", "sample ECDF")), + labels = c( + "theoretical CDF" = expression(italic("theoretical CDF")), + "sample ECDF" = expression(italic("sample ECDF")) + ) + ) + + scale_fill_manual( + name = "", + values = c("theoretical CDF" = get_color("l"), + "sample ECDF" = NA), + labels = c( + "theoretical CDF" = expression(italic("theoretical CDF")), + "sample ECDF" = expression(italic("sample ECDF")) + ) + ) + + yaxis_title(FALSE) + + xaxis_title(FALSE) + + yaxis_ticks(FALSE) + + bayesplot_theme_get() +} + +#' @export +#' @rdname PPC-intervals +ppc_ecdf_intervals_difference <- function( + y, + yrep, + pit, + gamma, + gamma_outer, + K, + ..., + prob = 0.5, + prob_outer = 0.9, + size = 1, + alpha = 0.33 +) { + check_ignored_arguments(...) + data <- .ppc_ecdf_intervals_data(y, yrep, pit, K) + if (missing(K)) { + K <- max(data$y_id) - 1 + } + if (!missing(y)) { + N <- length(y) + } else if (!missing(yrep)) { + N <- ncol(yrep) + } else { + N <- ncol(pit) + } + L <- max(data$rep_id) + any(data$is_y) + if (missing(gamma_outer)) { + gamma_outer <- adjust_gamma( + N = N, + L = L, + K = K, + conf_level = prob_outer + ) + } + if (missing(gamma)) { + gamma <- adjust_gamma( + N = N, + L = L, + K = K, + conf_level = prob + ) + } + limits <- ecdf_intervals( + N = N, + L = L, + K = K, + gamma = gamma) + limits_outer <- ecdf_intervals( + N = N, + L = L, + K = K, + gamma = gamma_outer) + z <- seq(0,1, length.out = K + 1) + fig <- ggplot(data) + + geom_ribbon( + data = data.frame(limits_outer), + aes_( + x = c(0, rep(z[2:(K + 1)], each = 2)), + ymax = ~ upper / N - c(rep(z[1:K], each = 2), 1), + ymin = ~ lower / N - c(rep(z[1:K], each = 2), 1), + color = "theoretical CDF", + fill = "theoretical CDF" + ), + alpha = alpha, + size = size) + + geom_ribbon( + data = data.frame(limits), + aes_( + x = c(0, rep(z[2:(K + 1)], each = 2)), + ymax = ~ upper / N - c(rep(z[1:K], each = 2), 1), + ymin = ~ lower / N - c(rep(z[1:K], each = 2), 1), + color = "theoretical CDF", + fill = "theoretical CDF" + ), + alpha = alpha, + size = size) + if (any(data$is_y)) { + fig <- fig + geom_step( + data = function(x) dplyr::filter(x, .data$is_y), + aes_(x = z, y = ~ value - z, color = "sample ECDF") + ) + } + if (any(!data$is_y)) { + fig <- fig + geom_step( + data = function(x) dplyr::filter(x, !.data$is_y), + aes_( + x = rep(z, each = L), + y = ~ value - rep(z, each = L), + group = ~ rep_id, + color = ~ rep_label + )) + } + fig + + scale_color_manual( + name = "", + values = set_names( + get_color(c("lh", "dh")), + c("theoretical CDF", "sample ECDF")), + labels = c( + "theoretical CDF" = expression(italic("theoretical CDF")), + "sample ECDF" = expression(italic("sample ECDF")) + ) + ) + + scale_fill_manual( + name = "", + values = c("theoretical CDF" = get_color("l"), + "sample ECDF" = NA), + labels = c( + "theoretical CDF" = expression(italic("theoretical CDF")), + "sample ECDF" = expression(italic("sample ECDF")) + ) + ) + + yaxis_title(FALSE) + + xaxis_title(FALSE) + + yaxis_ticks(FALSE) + + bayesplot_theme_get() +} + +#' @rdname PPC-intervals +#' @export +#' @template args-group +#' @param facet_args An optional list of arguments (other than `facets`) +#' passed to [ggplot2::facet_wrap()] to control faceting. +#' +ppc_intervals_grouped <- function(y, + yrep, + x = NULL, + group, + ..., + facet_args = list(), + prob = 0.5, + prob_outer = 0.9, + size = 1, + fatten = 3) { + check_ignored_arguments(...) + + data <- ppc_intervals_data( + y = y, + yrep = yrep, + x = x, + group = group, + prob = prob, + prob_outer = prob_outer + ) + + facet_args[["scales"]] <- facet_args[["scales"]] %||% "free" + + .ppc_intervals( + data = data, + facet_args = facet_args, + size = size, + fatten = fatten, + grouped = TRUE, + style = "intervals", + x_lab = label_x(x) + ) +} + + +#' @rdname PPC-intervals +#' @export +#' @param y_draw For ribbon plots only, a string specifying how to draw `y`. Can +#' be `"line"` (the default), `"points"`, or `"both"`. +ppc_ribbon <- function(y, + yrep, + x = NULL, + ..., + prob = 0.5, + prob_outer = 0.9, + alpha = 0.33, + size = 0.25, + y_draw = c("line", "points", "both")) { + check_ignored_arguments(...) + + data <- ppc_intervals_data( + y = y, + yrep = yrep, + x = x, + group = NULL, + prob = prob + ) + + .ppc_intervals( + data = data, + alpha = alpha, + size = size, + grouped = FALSE, + style = "ribbon", + x_lab = label_x(x), + y_draw = y_draw + ) +} + + +#' @export +#' @rdname PPC-intervals +ppc_ribbon_grouped <- function(y, + yrep, + x = NULL, + group, + ..., + facet_args = list(), + prob = 0.5, + prob_outer = 0.9, + alpha = 0.33, + size = 0.25, + y_draw = c("line", "points", "both")) { + check_ignored_arguments(...) + + data <- ppc_intervals_data( + y = y, + yrep = yrep, + x = x, + group = group, + prob = prob, + prob_outer = prob_outer + ) + + facet_args[["scales"]] <- facet_args[["scales"]] %||% "free" + + .ppc_intervals( + data = data, + facet_args = facet_args, + alpha = alpha, + size = size, + grouped = TRUE, + style = "ribbon", + x_lab = label_x(x), + y_draw = y_draw + ) +} + + +#' @rdname PPC-intervals +#' @export +ppc_intervals_data <- function(y, yrep, x = NULL, group = NULL, + ..., prob = 0.5, prob_outer = 0.9) { + check_ignored_arguments(...) + .ppc_intervals_data(y = y, yrep = yrep, x = x, group = group, + prob = prob, prob_outer = prob_outer) +} + + +#' @rdname PPC-intervals +#' @export +ppc_ribbon_data <- ppc_intervals_data + + + + +# internal ---------------------------------------------------------------- +label_x <- function(x) { + if (missing(x)) "Index" else NULL +} + +.ppc_intervals_data <- function(y, yrep, x = NULL, group = NULL, + prob = 0.5, prob_outer = 0.9) { + grouped <- !is.null(group) + stopifnot(prob > 0 && prob < 1) + stopifnot(prob_outer > 0 && prob_outer <= 1) + probs <- sort(c(prob, prob_outer)) + prob <- probs[1] + prob_outer <- probs[2] + + y <- validate_y(y) + yrep <- validate_yrep(yrep, y) + x <- validate_x(x, y) + + long_d <- melt_and_stack(y, yrep) + long_d$x <- x[long_d$y_id] + long_d$y_obs <- y[long_d$y_id] + + molten_reps <- long_d[!as.logical(long_d[["is_y"]]), , drop = FALSE] + molten_reps$is_y <- NULL + + if (grouped) { + group <- validate_group(group, y) + molten_reps$group <- group[molten_reps$y_id] + group_vars <- syms(c("y_id", "y_obs", "group", "x")) + } else { + group_vars <- syms(c("y_id", "y_obs", "x")) + } + + grouped_d <- dplyr::group_by(molten_reps, !!! group_vars) + alpha <- (1 - probs) / 2 + probs <- sort(c(alpha, 0.5, 1 - alpha)) + + val_col <- sym("value") + dplyr::ungroup(dplyr::summarise( + grouped_d, + outer_width = prob_outer, + inner_width = prob, + ll = unname(quantile(!! val_col, probs = probs[1])), + l = unname(quantile(!! val_col, probs = probs[2])), + m = unname(quantile(!! val_col, probs = probs[3])), + h = unname(quantile(!! val_col, probs = probs[4])), + hh = unname(quantile(!! val_col, probs = probs[5])) + )) +} + +.ppc_ecdf_intervals_data <- function(y, yrep, pit, K) { + if (missing(K)){ + if (!missing(y)) { + K <- length(y) + } + else if (!missing(pit)) { + K <- ncol(pit) + } + else { + K <- ncol(yrep) + } + } + z <- seq(0,1, length.out = K + 1) + if (!missing(pit)) { + pit <- validate_pit(pit) + ecdfs <- t(apply(pit, 1, function(row) ecdf(row)(z))) + # work around to adhere to the melt_and_stack-format with only "yrep". + ecdfs <- melt_and_stack(ecdfs[1,], ecdfs) + ecdfs <- dplyr::filter(ecdfs, !ecdfs$is_y) + } else if (missing(y)) { + pit <- u_scale(validate_yrep(yrep, yrep[1, ], match_ncols = FALSE)) + if (nrow(yrep) < 2) { + abort(paste("When both 'pit' and 'y' are missing, 'yrep' must include ", + "multiple rows to allow for sample comparison.", sep="")) + } + ecdfs <- t(apply(pit, 1, function(row) ecdf(row)(z))) + # work around to adhere to the melt_and_stack-format with only "yrep". + ecdfs <- melt_and_stack(ecdfs[1,], ecdfs) + ecdfs <- dplyr::filter(ecdfs, !ecdfs$is_y) + } else { + y <- validate_y(y) + yrep <- validate_yrep(yrep, y, match_ncols = FALSE) + pit <- empirical_pit(y, yrep) + ecdfs <- ecdf(pit)(z) + # work around to adhere to the melt_and_stack-format with only "y" . + ecdfs <- melt_and_stack(ecdfs, t(ecdfs)) + ecdfs <- dplyr::filter(ecdfs, ecdfs$is_y) + } + ecdfs$rep_id[is.na(ecdfs$rep_id)] <- 0 + ecdfs +} + + +# Make intervals or ribbon plot +# +# @param data The object returned by .ppc_intervals_data +# @return A ggplot object +# +.ppc_intervals <- + function(data, + facet_args = list(), + alpha = 0.33, + fatten = 3, + size = 1, + grouped = FALSE, + style = c("intervals", "ribbon"), + x_lab = NULL, + y_draw = c("line", "points", "both")) { + + style <- match.arg(style) + y_draw <- match.arg(y_draw) + + graph <- ggplot( + data = data, + mapping = aes_( + x = ~ x, + y = ~ m, + ymin = ~ l, + ymax = ~ h + ) + ) + + if (style == "ribbon") { + graph <- graph + + geom_ribbon( + aes_(color = "yrep", fill = "yrep", ymin = ~ ll, ymax = ~ hh), + alpha = alpha, + size = size + ) + + geom_ribbon( + aes_(color = "yrep", fill = "yrep"), + alpha = alpha, + size = size + ) + + geom_line( + aes_(color = "yrep"), + size = size/2 + ) + + geom_blank(aes_(fill = "y")) + + if (y_draw == "line" || y_draw == "both") { + graph <- graph + geom_line( + aes_(y = ~ y_obs, color = "y"), + size = 0.5 + ) + } + + if (y_draw == "points" || y_draw == "both") { + graph <- graph + geom_point( + mapping = aes_(y = ~ y_obs, color = "y", fill = "y"), + shape = 21, + size = 1.5 + ) + } + } else { + graph <- graph + + geom_pointrange( + mapping = aes_(color = "yrep", fill = "yrep", ymin = ~ ll, ymax = ~ hh), + shape = 21, + alpha = alpha, + size = size, + fatten = fatten + ) + + geom_pointrange( + mapping = aes_(color = "yrep", fill = "yrep"), + shape = 21, + size = size, + fatten = fatten + ) + + geom_point( + mapping = aes_(y = ~ y_obs, color = "y", fill = "y"), + shape = 21, + size = 1.5 + ) + } + + graph <- graph + + scale_color_manual( + name = "", + values = set_names(get_color(c("lh", "dh")), c("yrep", "y")), + labels = c(yrep = yrep_label(), y = y_label()) + ) + + scale_fill_manual( + name = "", + values = c(yrep = get_color("l"), + y = if (style == "ribbon") NA else get_color("d")), + labels = c(yrep = yrep_label(), y = y_label()) + ) + + + if (grouped) { + facet_args[["facets"]] <- "group" + facet_args[["scales"]] <- facet_args[["scales"]] %||% "free" + graph <- graph + do.call("facet_wrap", facet_args) + } + + graph + + labs(y = NULL, x = x_lab %||% expression(italic(x))) + + bayesplot_theme_get() +}