From dc42dbb08a5b361d13d1a8f6d63ca9eb4d902376 Mon Sep 17 00:00:00 2001 From: TJ Mahr Date: Wed, 20 Mar 2019 09:56:10 -0500 Subject: [PATCH 01/29] depend on tibble. this is okay because dplyr::as_tibble() forced the dependency anyway. --- DESCRIPTION | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 20fd218e..1bfa0a6a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -28,7 +28,8 @@ Imports: stats, utils, rlang, - ggridges + ggridges, + tibble Suggests: arm, gridExtra (>= 2.2.1), From 001906e7aaa6608f97cb3547e736f9e1a3fcd2de Mon Sep 17 00:00:00 2001 From: TJ Mahr Date: Wed, 20 Mar 2019 09:56:26 -0500 Subject: [PATCH 02/29] add mcmc_trace_data() --- R/mcmc-traces.R | 197 ++++++++++++++++++++++++++++++------------------ 1 file changed, 124 insertions(+), 73 deletions(-) diff --git a/R/mcmc-traces.R b/R/mcmc-traces.R index 464a3822..9c591615 100644 --- a/R/mcmc-traces.R +++ b/R/mcmc-traces.R @@ -271,106 +271,96 @@ trace_style_np <- ...) { style <- match.arg(style) - x <- prepare_mcmc_array(x, pars, regex_pars, transformations) - - if (iter1 < 0) { - stop( - "'iter1' cannot be negative." - ) - } + data <- mcmc_trace_data( + x, pars = pars, regex_pars = regex_pars, transformations = transformations, + highlight = highlight, n_warmup = n_warmup, iter1 = iter1, window = window, + np = np, np_style = np_style + ) + n_iter <- unique(data$n_iterations) + n_chain <- unique(data$n_chains) + n_param <- unique(data$n_parameters) + + mapping <- aes_( + x = ~ iteration, + y = ~ value, + color = ~ chain + ) - if (n_warmup > 0 && iter1 > 0) { - stop( - "'n_warmup' and 'iter1' can't both be specified." + if (!is.null(highlight)) { + mapping <- modify_aes_( + mapping, + alpha = ~ highlight, + color = ~ highlight ) } - if (!is.null(highlight)) { - if (!has_multiple_chains(x)) - STOP_need_multiple_chains() - - if (!highlight %in% seq_len(ncol(x))) - stop( - "'highlight' is ", highlight, - ", but 'x' contains ", ncol(x), " chains." - ) + layer_warmup <- if (n_warmup > 0) { + layer_warmup <- annotate( + "rect", xmin = -Inf, xmax = n_warmup, ymin = -Inf, ymax = Inf, size = 1, + color = "gray88", fill = "gray88", alpha = 0.5) + } else { + NULL } - data <- melt_mcmc(x) - data$Chain <- factor(data$Chain) - n_chain <- num_chains(data) - n_iter <- num_iters(data) - n_param <- num_params(data) - geom_args <- list() geom_args$size <- size %||% ifelse(style == "line", 1/3, 1) + layer_draws <- do.call(paste0("geom_", style), geom_args) - if (is.null(highlight)) { - mapping <- aes_(x = ~ Iteration + iter1, y = ~ Value, color = ~ Chain) - } else { - stopifnot(length(highlight) == 1) - mapping <- aes_(x = ~ Iteration + iter1, - y = ~ Value, - alpha = ~ Chain == highlight, - color = ~ Chain == highlight) - } - graph <- ggplot(data, mapping) + - bayesplot_theme_get() - - if (n_warmup > 0) { - graph <- graph + - annotate("rect", - xmin = -Inf, xmax = n_warmup, - ymin = -Inf, ymax = Inf, - size = 1, - color = "gray88", - fill = "gray88", - alpha = 0.5) - } - - if (!is.null(window)) { + coord_window <- if (!is.null(window)) { stopifnot(length(window) == 2) - graph <- graph + coord_cartesian(xlim = window) + coord_cartesian(xlim = window) + } else { + NULL } - graph <- graph + do.call(paste0("geom_", style), geom_args) + scale_alpha <- NULL + scale_color <- NULL + div_rug <- NULL + div_guides <- NULL if (!is.null(highlight)) { - graph <- graph + - scale_alpha_discrete(range = c(alpha, 1), guide = "none") + - scale_color_manual("", - values = get_color(c("lh", "d")), - labels = c("Other chains", paste("Chain", highlight))) + ## scale_alpha_discrete() warns on default + scale_alpha <- scale_alpha_ordinal(range = c(alpha, 1), guide = "none") + scale_color <- scale_color_manual( + "", + values = get_color(c("lh", "d")), + labels = c("Other chains", paste("Chain", highlight))) + } else { - graph <- graph + - scale_color_manual("Chain", values = chain_colors(n_chain)) + scale_color <- scale_color_manual("Chain", values = chain_colors(n_chain)) if (!is.null(np)) { div_rug <- divergence_rug(np, np_style, n_iter, n_chain) if (!is.null(div_rug)) - graph <- graph + - div_rug + - guides( - color = guide_legend(order = 1), - linetype = guide_legend(order = 2, - title = NULL, - keywidth = rel(1/2), - override.aes = list(size = rel(1/2))) - ) + div_guides <- guides( + color = guide_legend(order = 1), + linetype = guide_legend( + order = 2, title = NULL, keywidth = rel(1/2), + override.aes = list(size = rel(1/2))) + ) } } + facet_call <- NULL if (n_param == 1) { - graph <- graph + ylab(levels(data$Parameter)) + facet_call <- ylab(levels(data$parameter)) } else { - facet_args$facets <- ~ Parameter - if (is.null(facet_args$scales)) - facet_args$scales <- "free" - graph <- graph + do.call("facet_wrap", facet_args) + facet_args$facets <- ~ parameter + facet_args$scales <- facet_args$scales %||% "free" + facet_call <- do.call("facet_wrap", facet_args) } - graph + + ggplot(data, mapping) + + bayesplot_theme_get() + + layer_warmup + + layer_draws + + coord_window + + scale_alpha + + scale_color + + div_rug + + div_guides + + facet_call + scale_x_continuous(breaks = pretty) + legend_move(ifelse(n_chain > 1, "right", "none")) + xaxis_title(FALSE) + @@ -394,6 +384,67 @@ chain_colors <- function(n) { } +mcmc_trace_data <- function(x, + pars = character(), + regex_pars = character(), + transformations = list(), + facet_args = list(), + ..., + highlight = NULL, + n_warmup = 0, + iter1 = 0, + window = NULL, + size = NULL, + np = NULL, + np_style = trace_style_np()) { + + check_ignored_arguments(...) + + x <- prepare_mcmc_array(x, pars, regex_pars, transformations) + + if (iter1 < 0) { + stop("'iter1' cannot be negative.") + } + + if (n_warmup > 0 && iter1 > 0) { + stop("'n_warmup' and 'iter1' can't both be specified.") + } + + if (!is.null(highlight)) { + stopifnot(length(highlight) == 1) + + if (!has_multiple_chains(x)){ + STOP_need_multiple_chains() + } + + if (!highlight %in% seq_len(ncol(x))) + stop( + "'highlight' is ", highlight, + ", but 'x' contains ", ncol(x), " chains." + ) + } + + ## @todo: filter to just window? + + data <- melt_mcmc(x) + data$Chain <- factor(data$Chain) + data$n_chains <- num_chains(data) + data$n_iterations <- num_iters(data) + data$n_parameters <- num_chains(data) + data <- rlang::set_names(data, tolower) + + data$highlight <- if (!is.null(highlight)) { + data$chain == highlight + } else { + FALSE + } + + data$warmup <- data$iteration <= n_warmup + data$iteration <- data$iteration + iter1 + + tibble::as_tibble(data) +} + # Add divergences to trace plot using geom_rug # # @param np User's 'np' argument, if specified. From 5a5eda85e2aeb37a98b88627ffe5cd1f4cf2a17e Mon Sep 17 00:00:00 2001 From: TJ Mahr Date: Wed, 20 Mar 2019 10:34:13 -0500 Subject: [PATCH 03/29] add ranks to mcmc_trace_data() --- R/mcmc-traces.R | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/R/mcmc-traces.R b/R/mcmc-traces.R index 9c591615..203a8638 100644 --- a/R/mcmc-traces.R +++ b/R/mcmc-traces.R @@ -433,6 +433,11 @@ mcmc_trace_data <- function(x, data$n_parameters <- num_chains(data) data <- rlang::set_names(data, tolower) + data <- data %>% + group_by(.data$parameter) %>% + mutate(value_rank = dplyr::row_number(.data$value)) %>% + ungroup() + data$highlight <- if (!is.null(highlight)) { data$chain == highlight } else { From fb4d0a481aa9d0e10e690aa97b418eeb74582efe Mon Sep 17 00:00:00 2001 From: TJ Mahr Date: Fri, 5 Apr 2019 14:19:00 -0500 Subject: [PATCH 04/29] fix indentation --- R/mcmc-traces.R | 126 ++++++++++++++++++++++++------------------------ 1 file changed, 63 insertions(+), 63 deletions(-) diff --git a/R/mcmc-traces.R b/R/mcmc-traces.R index 203a8638..88bba524 100644 --- a/R/mcmc-traces.R +++ b/R/mcmc-traces.R @@ -157,42 +157,42 @@ mcmc_trace <- np_style = trace_style_np(), divergences = NULL) { - # deprecate 'divergences' arg in favor of 'np' (for consistency across functions) - if (!is.null(divergences)) { - warning( - "The 'divergences' argument is deprecated ", - "and will be removed in a future release. ", - "Use the 'np' argument instead." - ) + # deprecate 'divergences' arg in favor of 'np' (for consistency across functions) + if (!is.null(divergences)) { + warning( + "The 'divergences' argument is deprecated ", + "and will be removed in a future release. ", + "Use the 'np' argument instead." + ) - if (is.null(np)) { - np <- divergences - } else { - stop( - "'np' and 'divergences' can't both be specified. ", - "Use only 'np' (the 'divergences' argument is deprecated)." - ) - } + if (is.null(np)) { + np <- divergences + } else { + stop( + "'np' and 'divergences' can't both be specified. ", + "Use only 'np' (the 'divergences' argument is deprecated)." + ) } - - check_ignored_arguments(...) - .mcmc_trace( - x, - pars = pars, - regex_pars = regex_pars, - transformations = transformations, - facet_args = facet_args, - n_warmup = n_warmup, - window = window, - size = size, - style = "line", - np = np, - np_style = np_style, - iter1 = iter1, - ... - ) } + check_ignored_arguments(...) + .mcmc_trace( + x, + pars = pars, + regex_pars = regex_pars, + transformations = transformations, + facet_args = facet_args, + n_warmup = n_warmup, + window = window, + size = size, + style = "line", + np = np, + np_style = np_style, + iter1 = iter1, + ... + ) +} + #' @rdname MCMC-traces #' @export #' @param highlight For \code{mcmc_trace_highlight}, an integer specifying one @@ -209,22 +209,22 @@ mcmc_trace_highlight <- size = NULL, alpha = 0.2, highlight = 1) { - check_ignored_arguments(...) - .mcmc_trace( - x, - pars = pars, - regex_pars = regex_pars, - transformations = transformations, - facet_args = facet_args, - n_warmup = n_warmup, - window = window, - size = size, - alpha = alpha, - highlight = highlight, - style = "point", - ... - ) - } + check_ignored_arguments(...) + .mcmc_trace( + x, + pars = pars, + regex_pars = regex_pars, + transformations = transformations, + facet_args = facet_args, + n_warmup = n_warmup, + window = window, + size = size, + alpha = alpha, + highlight = highlight, + style = "point", + ... + ) +} #' @rdname MCMC-traces @@ -236,21 +236,21 @@ mcmc_trace_highlight <- #' for showing divergences in the plot. The default values are displayed in #' the \strong{Usage} section above. trace_style_np <- - function(div_color = "red", - div_size = 0.25, - div_alpha = 1) { - stopifnot( - is.character(div_color), - is.numeric(div_size), - is.numeric(div_alpha) && div_alpha >= 0 && div_alpha <= 1 - ) - style <- list( - color = c(div = div_color), - size = c(div = div_size), - alpha = c(div = div_alpha) - ) - structure(style, class = c(class(style), "nuts_style")) - } +function(div_color = "red", + div_size = 0.25, + div_alpha = 1) { + stopifnot( + is.character(div_color), + is.numeric(div_size), + is.numeric(div_alpha) && div_alpha >= 0 && div_alpha <= 1 + ) + style <- list( + color = c(div = div_color), + size = c(div = div_size), + alpha = c(div = div_alpha) + ) + structure(style, class = c(class(style), "nuts_style")) +} # internal ----------------------------------------------------------------- From fd24c53feff13e862f6167c47f8c9a5e5f712923 Mon Sep 17 00:00:00 2001 From: TJ Mahr Date: Fri, 5 Apr 2019 14:55:38 -0500 Subject: [PATCH 05/29] add cividis --- R/bayesplot-colors.R | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/R/bayesplot-colors.R b/R/bayesplot-colors.R index bcbfc59a..dfb50577 100644 --- a/R/bayesplot-colors.R +++ b/R/bayesplot-colors.R @@ -368,7 +368,12 @@ master_color_list <- list( viridisB = list("#FCFFA4FF", "#FCA50AFF", "#DD513AFF", "#932667FF", "#420A68FF", "#000004FF"), viridisC = - list("#F0F921FF", "#FCA636FF", "#E16462FF", "#B12A90FF", "#6A00A8FF", "#0D0887FF") + list("#F0F921FF", "#FCA636FF", "#E16462FF", "#B12A90FF", "#6A00A8FF", "#0D0887FF"), + # popular form of viridis is viridis option D + viridisD = + list("#FDE725FF", "#7AD151FF", "#22A884FF", "#2A788EFF", "#414487FF", "#440154FF"), + viridisE = + list("#FFEA46FF", "#CBBA69FF", "#958F78FF", "#666970FF", "#31446BFF", "#00204DFF") ) # instantiate aesthetics -------------------------------------------------- From 7c69a849dd0b10564c0e3f6dab0895cd62a608b9 Mon Sep 17 00:00:00 2001 From: TJ Mahr Date: Fri, 5 Apr 2019 15:08:22 -0500 Subject: [PATCH 06/29] add mcmc_rank_overlay() --- NAMESPACE | 1 + R/mcmc-traces.R | 70 ++++++++++++++++++++++++++++++++++++++++++++-- man/MCMC-traces.Rd | 18 ++++++++++++ 3 files changed, 86 insertions(+), 3 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 0436c4cb..7ae11dd9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -70,6 +70,7 @@ export(mcmc_nuts_treedepth) export(mcmc_pairs) export(mcmc_parcoord) export(mcmc_parcoord_data) +export(mcmc_rank_overlay) export(mcmc_recover_hist) export(mcmc_recover_intervals) export(mcmc_recover_scatter) diff --git a/R/mcmc-traces.R b/R/mcmc-traces.R index 88bba524..234dcae1 100644 --- a/R/mcmc-traces.R +++ b/R/mcmc-traces.R @@ -53,6 +53,12 @@ #' Traces are plotted using points rather than lines and the opacity of all #' chains but one (specified by the \code{highlight} argument) is reduced. #' } +#' \item{\code{mcmc_rank_overlay}}{ +#' Whereas tradition trace plots visualize how the chains mix over the course +#' of sampling, rank-normalized histograms visualize how the values from the +#' chains mix together in terms of ranking. An ideal plot would show the +#' lines mixing or overlapping in a uniform distribution. +#' } #' } #' #' @examples @@ -87,6 +93,11 @@ #' panel_bg(fill = "gray90", color = NA) + #' legend_move("top") #' +#' # Rank-normalized histogram plots. Instead of showing how chains mix over +#' # time, look at how the ranking of MCMC samples mixed between chains. +#' color_scheme_set("viridisE") +#' mcmc_rank_overlay(x, "alpha") +#' #' \dontrun{ #' # parse facet label text #' color_scheme_set("purple") @@ -236,9 +247,9 @@ mcmc_trace_highlight <- #' for showing divergences in the plot. The default values are displayed in #' the \strong{Usage} section above. trace_style_np <- -function(div_color = "red", - div_size = 0.25, - div_alpha = 1) { + function(div_color = "red", + div_size = 0.25, + div_alpha = 1) { stopifnot( is.character(div_color), is.numeric(div_size), @@ -253,6 +264,59 @@ function(div_color = "red", } +#' @rdname MCMC-traces +#' @export +#' @param n_bins number of bins to use for the histogram of rank-normalized MCMC +#' samples. +mcmc_rank_overlay <- function( + x, + pars = character(), + regex_pars = character(), + transformations = list(), + n_bins = 20) { + + data <- mcmc_trace_data( + x, pars = pars, regex_pars = regex_pars, transformations = transformations + ) + + n_chain <- unique(data$n_chains) + + # We have to bin and count the data ourselves because + # ggplot2::stat_bin(geom = "step") does not draw the final bin. + histobins <- data %>% + dplyr::distinct(.data$value_rank) %>% + mutate(cut = cut(.data$value_rank, n_bins)) %>% + group_by(.data$cut) %>% + mutate(bin_start = min(.data$value_rank)) %>% + ungroup() %>% + select(-.data$cut) + + d_bin_counts <- data %>% + left_join(histobins, by = "value_rank") %>% + count(.data$parameter, .data$chain, .data$bin_start) + + # Duplicate the final bin, setting the left edge to the greatest x value, so + # that the entire x-axis is used, + right_edge <- max(data$value_rank) + + d_bin_counts <- d_bin_counts %>% + dplyr::filter(.data$bin_start == max(.data$bin_start)) %>% + mutate(bin_start = right_edge) %>% + dplyr::bind_rows(d_bin_counts) + + scale_color <- scale_color_manual("Chain", values = chain_colors(n_chain)) + + ggplot(d_bin_counts) + + aes_(x = ~ bin_start, y = ~ n, color = ~ chain) + + geom_step() + + facet_wrap("parameter") + + scale_color + + ylim(c(0, NA)) + + bayesplot_theme_get() + + labs(x = "Rank", y = NULL) +} + + # internal ----------------------------------------------------------------- .mcmc_trace <- function(x, pars = character(), diff --git a/man/MCMC-traces.Rd b/man/MCMC-traces.Rd index 6b2b0d83..5c97cb25 100644 --- a/man/MCMC-traces.Rd +++ b/man/MCMC-traces.Rd @@ -5,6 +5,7 @@ \alias{mcmc_trace} \alias{mcmc_trace_highlight} \alias{trace_style_np} +\alias{mcmc_rank_overlay} \title{Trace plot (time series plot) of MCMC draws} \usage{ mcmc_trace(x, pars = character(), regex_pars = character(), @@ -17,6 +18,9 @@ mcmc_trace_highlight(x, pars = character(), regex_pars = character(), window = NULL, size = NULL, alpha = 0.2, highlight = 1) trace_style_np(div_color = "red", div_size = 0.25, div_alpha = 1) + +mcmc_rank_overlay(x, pars = character(), regex_pars = character(), + transformations = list(), n_bins = 20) } \arguments{ \item{x}{A 3-D array, matrix, list of matrices, or data frame of MCMC draws. @@ -104,6 +108,9 @@ of the chains that will be more visible than the others in the plot.} specified. They control the color, size, and transparency specifications for showing divergences in the plot. The default values are displayed in the \strong{Usage} section above.} + +\item{n_bins}{number of bins to use for the histogram of rank-normalized MCMC +samples.} } \value{ A ggplot object that can be further customized using the @@ -124,6 +131,12 @@ section, below, for details. Traces are plotted using points rather than lines and the opacity of all chains but one (specified by the \code{highlight} argument) is reduced. } + \item{\code{mcmc_rank_overlay}}{ + Whereas tradition trace plots visualize how the chains mix over the course + of sampling, rank-normalized histograms visualize how the values from the + chains mix together in terms of ranking. An ideal plot would show the + lines mixing or overlapping in a uniform distribution. + } } } @@ -159,6 +172,11 @@ mcmc_trace(x[,, 1:4], window = c(100, 130), size = 1) + panel_bg(fill = "gray90", color = NA) + legend_move("top") +# Rank-normalized histogram plots. Instead of showing how chains mix over +# time, look at how the ranking of MCMC samples mixed between chains. +color_scheme_set("viridisE") +mcmc_rank_overlay(x, "alpha") + \dontrun{ # parse facet label text color_scheme_set("purple") From 67608e7f2c01ef9c36183c35678ce9d7ef673bb3 Mon Sep 17 00:00:00 2001 From: TJ Mahr Date: Fri, 5 Apr 2019 15:23:02 -0500 Subject: [PATCH 07/29] update news --- NEWS.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/NEWS.md b/NEWS.md index 0da051f3..9e8fcf13 100644 --- a/NEWS.md +++ b/NEWS.md @@ -40,6 +40,12 @@ * The examples in [`?ppc_loo_pit_overlay()`](http://mc-stan.org/bayesplot/reference/PPC-loo.html) now work as expected. (#166, #167) + +* Added `"viridisD"` as an alternative name for `"viridis"` to the supported + colors. + +* Added `"viridisE"` (the [cividis](https://github.com/marcosci/cividis) + version of viridis) to the supported colors. From f3f1e876c2ec2c68184a14c1a74251b684d59214 Mon Sep 17 00:00:00 2001 From: TJ Mahr Date: Wed, 10 Apr 2019 09:44:13 -0500 Subject: [PATCH 08/29] use tibble() instead of data_frame() to tamp down warnings --- R/helpers-ppc.R | 4 ++-- R/mcmc-diagnostics.R | 2 +- R/ppc-distributions.R | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/R/helpers-ppc.R b/R/helpers-ppc.R index bcfd4d53..e461409e 100644 --- a/R/helpers-ppc.R +++ b/R/helpers-ppc.R @@ -151,7 +151,7 @@ validate_x <- function(x = NULL, y, unique_x = FALSE) { melt_yrep <- function(yrep) { out <- yrep %>% reshape2::melt(varnames = c("rep_id", "y_id")) %>% - dplyr::as_data_frame() + tibble::as_tibble() id <- create_yrep_ids(out$rep_id) out$rep_label <- factor(id, levels = unique(id)) out[c("y_id", "rep_id", "rep_label", "value")] @@ -179,7 +179,7 @@ melt_and_stack <- function(y, yrep) { # Add a level in the labels for the observed y values levels(molten_yrep$rep_label) <- c(levels(molten_yrep$rep_label), y_text) - ydat <- dplyr::data_frame( + ydat <- tibble::tibble( rep_label = factor(y_text, levels = levels(molten_yrep$rep_label)), rep_id = NA_integer_, y_id = seq_along(y), diff --git a/R/mcmc-diagnostics.R b/R/mcmc-diagnostics.R index a0c6ac9e..53471237 100644 --- a/R/mcmc-diagnostics.R +++ b/R/mcmc-diagnostics.R @@ -382,7 +382,7 @@ diagnostic_data_frame <- function(x) { stopifnot(!anyDuplicated(names(x))) diagnostic <- class(x)[1] - d <- dplyr::data_frame( + d <- tibble::tibble( diagnostic = diagnostic, parameter = factor(seq_along(x), labels = names(x)), value = as.numeric(x), diff --git a/R/ppc-distributions.R b/R/ppc-distributions.R index bfb8fce8..45b974e6 100644 --- a/R/ppc-distributions.R +++ b/R/ppc-distributions.R @@ -107,7 +107,7 @@ ppc_data <- function(y, yrep, group = NULL) { if (!is.null(group)) { group <- validate_group(group, y) - group_indices <- dplyr::data_frame(group, y_id = seq_along(group)) + group_indices <- tibble::tibble(group, y_id = seq_along(group)) data <- data %>% left_join(group_indices, by = "y_id") %>% select(.data$group, dplyr::everything()) @@ -120,12 +120,12 @@ ppc_data <- function(y, yrep, group = NULL) { #' @rdname PPC-distributions #' @export -ppc_hist <- function(y, yrep, ..., binwidth = NULL, breaks = NULL, +ppc_hist <- function(y, yrep, ..., binwidth = NULL, breaks = NULL, freq = TRUE) { check_ignored_arguments(...) data <- ppc_data(y, yrep) aes_list <- set_hist_aes(freq, fill = ~ is_y_label, color = ~ is_y_label) - + ggplot(data) + aes_list + geom_histogram(size = 0.25, binwidth = binwidth, breaks = breaks) + From 00c346c9ac9ce5478364020295e89d7323f59e6c Mon Sep 17 00:00:00 2001 From: TJ Mahr Date: Wed, 10 Apr 2019 09:55:45 -0500 Subject: [PATCH 09/29] make visual unit tests pass --- R/mcmc-traces.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/mcmc-traces.R b/R/mcmc-traces.R index 234dcae1..0eb5c30a 100644 --- a/R/mcmc-traces.R +++ b/R/mcmc-traces.R @@ -494,7 +494,7 @@ mcmc_trace_data <- function(x, data$Chain <- factor(data$Chain) data$n_chains <- num_chains(data) data$n_iterations <- num_iters(data) - data$n_parameters <- num_chains(data) + data$n_parameters <- num_params(data) data <- rlang::set_names(data, tolower) data <- data %>% From b7a4cda41b1eafd204bb4b751c32c7ea4e56e146 Mon Sep 17 00:00:00 2001 From: TJ Mahr Date: Wed, 10 Apr 2019 10:06:58 -0500 Subject: [PATCH 10/29] style --- R/mcmc-traces.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/mcmc-traces.R b/R/mcmc-traces.R index 0eb5c30a..27ea5016 100644 --- a/R/mcmc-traces.R +++ b/R/mcmc-traces.R @@ -361,7 +361,8 @@ mcmc_rank_overlay <- function( layer_warmup <- if (n_warmup > 0) { layer_warmup <- annotate( "rect", xmin = -Inf, xmax = n_warmup, ymin = -Inf, ymax = Inf, size = 1, - color = "gray88", fill = "gray88", alpha = 0.5) + color = "gray88", fill = "gray88", alpha = 0.5 + ) } else { NULL } @@ -389,23 +390,22 @@ mcmc_rank_overlay <- function( "", values = get_color(c("lh", "d")), labels = c("Other chains", paste("Chain", highlight))) - } else { scale_color <- scale_color_manual("Chain", values = chain_colors(n_chain)) if (!is.null(np)) { div_rug <- divergence_rug(np, np_style, n_iter, n_chain) - if (!is.null(div_rug)) + if (!is.null(div_rug)) { div_guides <- guides( color = guide_legend(order = 1), linetype = guide_legend( order = 2, title = NULL, keywidth = rel(1/2), override.aes = list(size = rel(1/2))) ) + } } } - facet_call <- NULL if (n_param == 1) { facet_call <- ylab(levels(data$parameter)) From 291a7a1fbffd11f08af541d50e48caff5c4914a8 Mon Sep 17 00:00:00 2001 From: TJ Mahr Date: Wed, 10 Apr 2019 10:50:32 -0500 Subject: [PATCH 11/29] use markdown in mcmc_trace() docs --- R/bayesplot-package.R | 2 +- R/mcmc-traces.R | 93 ++++++++++++++++++++++--------------------- man/MCMC-traces.Rd | 65 +++++++++++++++--------------- 3 files changed, 80 insertions(+), 80 deletions(-) diff --git a/R/bayesplot-package.R b/R/bayesplot-package.R index aa7dae7a..ae038c00 100644 --- a/R/bayesplot-package.R +++ b/R/bayesplot-package.R @@ -5,7 +5,7 @@ #' @aliases bayesplot #' #' @import ggplot2 stats rlang -#' @importFrom dplyr %>% +#' @importFrom dplyr %>% summarise group_by select #' #' @description #' \if{html}{ diff --git a/R/mcmc-traces.R b/R/mcmc-traces.R index 27ea5016..34aa21a1 100644 --- a/R/mcmc-traces.R +++ b/R/mcmc-traces.R @@ -1,11 +1,11 @@ #' Trace plot (time series plot) of MCMC draws #' -#' Trace plot (or traceplot) of MCMC draws. See the \strong{Plot Descriptions} +#' Trace plot (or traceplot) of MCMC draws. See the **Plot Descriptions** #' section, below, for details. #' #' @name MCMC-traces #' @family MCMC -#' +#' @md #' @template args-mcmc-x #' @template args-pars #' @template args-regex_pars @@ -13,47 +13,47 @@ #' @template args-facet_args #' @param ... Currently ignored. #' @param size An optional value to override the default line size -#' (\code{mcmc_trace}) or the default point size -#' (\code{mcmc_trace_highlight}). -#' @param alpha For \code{mcmc_trace_highlight}, passed to -#' \code{\link[ggplot2]{geom_point}} to control the transparency of the points +#' (`mcmc_trace()`) or the default point size +#' (`mcmc_trace_highlight()`). +#' @param alpha For `mcmc_trace_highlight()`, passed to +#' [ggplot2::geom_point()] to control the transparency of the points #' for the chains not highlighted. #' @param n_warmup An integer; the number of warmup iterations included in -#' \code{x}. The default is \code{n_warmup = 0}, i.e. to assume no warmup -#' iterations are included. If \code{n_warmup > 0} then the background for -#' iterations \code{1:n_warmup} is shaded gray. +#' `x`. The default is `n_warmup = 0`, i.e. to assume no warmup +#' iterations are included. If `n_warmup > 0` then the background for +#' iterations `1:n_warmup` is shaded gray. #' @param iter1 An integer; the iteration number of the first included draw #' (default 0). This can be used to make it more obvious that the warmup #' iterations have been discarded from the traceplot. It cannot be specified -#' if \code{n_warmup} is also set to a positive value. +#' if `n_warmup` is also set to a positive value. #' @param window An integer vector of length two specifying the limits of a #' range of iterations to display. -#' @param np For models fit using \code{\link{NUTS}} (more generally, any -#' \href{http://en.wikipedia.org/wiki/Symplectic_integrator}{symplectic -#' integrator}), an optional data frame providing NUTS diagnostic -#' information. The data frame should be the object returned by -#' \code{\link{nuts_params}} or one with the same structure. If \code{np} is -#' specified then tick marks are added to the bottom of the trace plot -#' indicating within which iterations there was a divergence (if there were any). -#' See the end of the \strong{Examples} section, below. -#' @param np_style A call to the \code{trace_style_np} helper function to +#' @param np For models fit using [`NUTS`] (more generally, any [symplectic +#' integrator](http://en.wikipedia.org/wiki/Symplectic_integrator)), an +#' optional data frame providing NUTS diagnostic information. The data frame +#' should be the object returned by [`nuts_params`] or one with the +#' same structure. If `np` is specified then tick marks are added to the +#' bottom of the trace plot indicating within which iterations there was a +#' divergence (if there were any). See the end of the **Examples** +#' section, below. +#' @param np_style A call to the `trace_style_np()` helper function to #' specify arguments controlling the appearance of tick marks representing -#' divergences (if the \code{np} argument is specified). -#' @param divergences Deprecated. Use the \code{np} argument instead. +#' divergences (if the `np` argument is specified). +#' @param divergences Deprecated. Use the `np` argument instead. #' #' @template return-ggplot #' #' @section Plot Descriptions: #' \describe{ -#' \item{\code{mcmc_trace}}{ -#' Standard trace plots of MCMC draws. For models fit using \code{\link{NUTS}}, -#' the \code{np} argument can be used to also show divergences on the trace plot. +#' \item{`mcmc_trace()`}{ +#' Standard trace plots of MCMC draws. For models fit using [`NUTS`], +#' the `np` argument can be used to also show divergences on the trace plot. #' } -#' \item{\code{mcmc_trace_highlight}}{ +#' \item{`mcmc_trace_highlight()`}{ #' Traces are plotted using points rather than lines and the opacity of all -#' chains but one (specified by the \code{highlight} argument) is reduced. +#' chains but one (specified by the `highlight` argument) is reduced. #' } -#' \item{\code{mcmc_rank_overlay}}{ +#' \item{`mcmc_rank_overlay()`}{ #' Whereas tradition trace plots visualize how the chains mix over the course #' of sampling, rank-normalized histograms visualize how the values from the #' chains mix together in terms of ranking. An ideal plot would show the @@ -205,9 +205,10 @@ mcmc_trace <- } #' @rdname MCMC-traces -#' @export -#' @param highlight For \code{mcmc_trace_highlight}, an integer specifying one +#' @param highlight For `mcmc_trace_highlight()`, an integer specifying one #' of the chains that will be more visible than the others in the plot. +#' @export +#' @md mcmc_trace_highlight <- function(x, pars = character(), @@ -239,13 +240,14 @@ mcmc_trace_highlight <- #' @rdname MCMC-traces -#' @export #' @param div_color,div_size,div_alpha Optional arguments to the -#' \code{trace_style_np} helper function that are eventually passed to -#' \code{\link[ggplot2]{geom_rug}} if the \code{np} argument is also -#' specified. They control the color, size, and transparency specifications +#' `trace_style_np()` helper function that are eventually passed to +#' [ggplot2::geom_rug()] if the `np` argument is also +#' specified. These control the color, size, and transparency specifications #' for showing divergences in the plot. The default values are displayed in -#' the \strong{Usage} section above. +#' the **Usage** section above. +#' @export +#' @md trace_style_np <- function(div_color = "red", div_size = 0.25, @@ -265,9 +267,9 @@ trace_style_np <- #' @rdname MCMC-traces -#' @export #' @param n_bins number of bins to use for the histogram of rank-normalized MCMC #' samples. +#' @export mcmc_rank_overlay <- function( x, pars = character(), @@ -514,17 +516,16 @@ mcmc_trace_data <- function(x, tibble::as_tibble(data) } -# Add divergences to trace plot using geom_rug -# -# @param np User's 'np' argument, if specified. -# @param np_style User's 'np_style' argument, if specified. -# @param n_iter Number of iterations in the trace plot (to check against number -# of iterations provided in 'np'). -# @param n_chain Number of chains in the trace plot (to check against number -# of chains provided in 'np'). -# @return Object returned by geom_rug. -# -#' @importFrom dplyr summarise group_by select +#' Add divergences to trace plot using geom_rug +#' +#' @param np User's 'np' argument, if specified. +#' @param np_style User's 'np_style' argument, if specified. +#' @param n_iter Number of iterations in the trace plot (to check against number +#' of iterations provided in 'np'). +#' @param n_chain Number of chains in the trace plot (to check against number +#' of chains provided in 'np'). +#' @return Object returned by geom_rug. +#' @noRd divergence_rug <- function(np, np_style, n_iter, n_chain) { if (is.data.frame(np)) { np <- validate_nuts_data_frame(np) diff --git a/man/MCMC-traces.Rd b/man/MCMC-traces.Rd index 5c97cb25..fca18913 100644 --- a/man/MCMC-traces.Rd +++ b/man/MCMC-traces.Rd @@ -77,35 +77,34 @@ if \code{n_warmup} is also set to a positive value.} range of iterations to display.} \item{size}{An optional value to override the default line size -(\code{mcmc_trace}) or the default point size -(\code{mcmc_trace_highlight}).} - -\item{np}{For models fit using \code{\link{NUTS}} (more generally, any -\href{http://en.wikipedia.org/wiki/Symplectic_integrator}{symplectic -integrator}), an optional data frame providing NUTS diagnostic -information. The data frame should be the object returned by -\code{\link{nuts_params}} or one with the same structure. If \code{np} is -specified then tick marks are added to the bottom of the trace plot -indicating within which iterations there was a divergence (if there were any). -See the end of the \strong{Examples} section, below.} - -\item{np_style}{A call to the \code{trace_style_np} helper function to +(\code{mcmc_trace()}) or the default point size +(\code{mcmc_trace_highlight()}).} + +\item{np}{For models fit using \code{\link{NUTS}} (more generally, any \href{http://en.wikipedia.org/wiki/Symplectic_integrator}{symplectic integrator}), an +optional data frame providing NUTS diagnostic information. The data frame +should be the object returned by \code{\link{nuts_params}} or one with the +same structure. If \code{np} is specified then tick marks are added to the +bottom of the trace plot indicating within which iterations there was a +divergence (if there were any). See the end of the \strong{Examples} +section, below.} + +\item{np_style}{A call to the \code{trace_style_np()} helper function to specify arguments controlling the appearance of tick marks representing divergences (if the \code{np} argument is specified).} \item{divergences}{Deprecated. Use the \code{np} argument instead.} -\item{alpha}{For \code{mcmc_trace_highlight}, passed to -\code{\link[ggplot2]{geom_point}} to control the transparency of the points +\item{alpha}{For \code{mcmc_trace_highlight()}, passed to +\code{\link[ggplot2:geom_point]{ggplot2::geom_point()}} to control the transparency of the points for the chains not highlighted.} -\item{highlight}{For \code{mcmc_trace_highlight}, an integer specifying one +\item{highlight}{For \code{mcmc_trace_highlight()}, an integer specifying one of the chains that will be more visible than the others in the plot.} \item{div_color, div_size, div_alpha}{Optional arguments to the -\code{trace_style_np} helper function that are eventually passed to -\code{\link[ggplot2]{geom_rug}} if the \code{np} argument is also -specified. They control the color, size, and transparency specifications +\code{trace_style_np()} helper function that are eventually passed to +\code{\link[ggplot2:geom_rug]{ggplot2::geom_rug()}} if the \code{np} argument is also +specified. These control the color, size, and transparency specifications for showing divergences in the plot. The default values are displayed in the \strong{Usage} section above.} @@ -123,20 +122,20 @@ section, below, for details. \section{Plot Descriptions}{ \describe{ - \item{\code{mcmc_trace}}{ - Standard trace plots of MCMC draws. For models fit using \code{\link{NUTS}}, - the \code{np} argument can be used to also show divergences on the trace plot. - } - \item{\code{mcmc_trace_highlight}}{ - Traces are plotted using points rather than lines and the opacity of all - chains but one (specified by the \code{highlight} argument) is reduced. - } - \item{\code{mcmc_rank_overlay}}{ - Whereas tradition trace plots visualize how the chains mix over the course - of sampling, rank-normalized histograms visualize how the values from the - chains mix together in terms of ranking. An ideal plot would show the - lines mixing or overlapping in a uniform distribution. - } +\item{\code{mcmc_trace()}}{ +Standard trace plots of MCMC draws. For models fit using \code{\link{NUTS}}, +the \code{np} argument can be used to also show divergences on the trace plot. +} +\item{\code{mcmc_trace_highlight()}}{ +Traces are plotted using points rather than lines and the opacity of all +chains but one (specified by the \code{highlight} argument) is reduced. +} +\item{\code{mcmc_rank_overlay()}}{ +Whereas tradition trace plots visualize how the chains mix over the course +of sampling, rank-normalized histograms visualize how the values from the +chains mix together in terms of ranking. An ideal plot would show the +lines mixing or overlapping in a uniform distribution. +} } } From 0f357690f9f35690115b65a6b4ae773f00be0e2b Mon Sep 17 00:00:00 2001 From: TJ Mahr Date: Fri, 10 May 2019 08:50:20 -0500 Subject: [PATCH 12/29] fix description --- DESCRIPTION | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1bfa0a6a..e228e6c8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -22,16 +22,16 @@ SystemRequirements: pandoc (>= 1.12.3), pandoc-citeproc Depends: R (>= 3.1.0) Imports: - dplyr (>= 0.7.1), + dplyr (>= 0.8.0), ggplot2 (>= 2.2.1), reshape2, stats, utils, - rlang, + rlang (>= 0.3.0), ggridges, + hexbin, tibble Suggests: - arm, gridExtra (>= 2.2.1), knitr (>= 1.16), loo (>= 2.0.0), From e2a888a434753d1a776e18f5fec2306aebeef410 Mon Sep 17 00:00:00 2001 From: TJ Mahr Date: Fri, 10 May 2019 09:54:08 -0500 Subject: [PATCH 13/29] add mcmc_rank_hist() --- NAMESPACE | 1 + R/mcmc-traces.R | 97 +++++++++++++++++++++++++++++++++++++++------- man/MCMC-traces.Rd | 19 ++++++--- 3 files changed, 99 insertions(+), 18 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 7ae11dd9..f80b59e5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -70,6 +70,7 @@ export(mcmc_nuts_treedepth) export(mcmc_pairs) export(mcmc_parcoord) export(mcmc_parcoord_data) +export(mcmc_rank_hist) export(mcmc_rank_overlay) export(mcmc_recover_hist) export(mcmc_recover_intervals) diff --git a/R/mcmc-traces.R b/R/mcmc-traces.R index 34aa21a1..6e9aeec8 100644 --- a/R/mcmc-traces.R +++ b/R/mcmc-traces.R @@ -53,11 +53,15 @@ #' Traces are plotted using points rather than lines and the opacity of all #' chains but one (specified by the `highlight` argument) is reduced. #' } +#' \item{`mcmc_rank_hist()`}{ +#' Whereas traditional trace plots visualize how the chains mix over the +#' course of sampling, rank-normalized histograms visualize how the values +#' from the chains mix together in terms of ranking. An ideal plot would +#' show the lines mixing or overlapping in a uniform distribution. +#' } #' \item{`mcmc_rank_overlay()`}{ -#' Whereas tradition trace plots visualize how the chains mix over the course -#' of sampling, rank-normalized histograms visualize how the values from the -#' chains mix together in terms of ranking. An ideal plot would show the -#' lines mixing or overlapping in a uniform distribution. +#' Ranks from `mcmc_rank_hist()` are plotted using overlaid lines in a +#' single panel. #' } #' } #' @@ -96,6 +100,7 @@ #' # Rank-normalized histogram plots. Instead of showing how chains mix over #' # time, look at how the ranking of MCMC samples mixed between chains. #' color_scheme_set("viridisE") +#' mcmc_rank_hist(x, "alpha") #' mcmc_rank_overlay(x, "alpha") #' #' \dontrun{ @@ -268,17 +273,18 @@ trace_style_np <- #' @rdname MCMC-traces #' @param n_bins number of bins to use for the histogram of rank-normalized MCMC -#' samples. +#' samples. Defaults to 20. #' @export -mcmc_rank_overlay <- function( - x, - pars = character(), - regex_pars = character(), - transformations = list(), - n_bins = 20) { - +mcmc_rank_overlay <- function(x, + pars = character(), + regex_pars = character(), + transformations = list(), + n_bins = 20) { data <- mcmc_trace_data( - x, pars = pars, regex_pars = regex_pars, transformations = transformations + x, + pars = pars, + regex_pars = regex_pars, + transformations = transformations ) n_chain <- unique(data$n_chains) @@ -318,6 +324,71 @@ mcmc_rank_overlay <- function( labs(x = "Rank", y = NULL) } +#' @rdname MCMC-traces +#' @export +mcmc_rank_hist <- function(x, + pars = character(), + regex_pars = character(), + transformations = list(), + facet_args = list(), + n_bins = 20) { + data <- mcmc_trace_data( + x, + pars = pars, + regex_pars = regex_pars, + transformations = transformations + ) + + n_iter <- unique(data$n_iterations) + n_chains <- unique(data$n_chains) + n_param <- unique(data$n_parameters) + + # Create a dataframe with chain x parameter x min(rank) x max(rank) to set + # x axis range in each facet + data_boundaries <- data %>% + dplyr::distinct(.data$chain, .data$parameter) + data_boundaries <- dplyr::bind_rows( + mutate(data_boundaries, value_rank = min(data$value_rank)), + mutate(data_boundaries, value_rank = max(data$value_rank)) + ) + right_edge <- max(data_boundaries$value_rank) + + # If there is one parameter, put the chains in one row. + # Otherwise, use a grid. + + facet_args[["scales"]] <- facet_args[["scales"]] %||% "fixed" + + if (n_param > 1) { + facet_f <- facet_grid + facet_args[["facets"]] <- parameter ~ chain + } else { + facet_f <- facet_wrap + facet_args[["facets"]] <- parameter ~ chain + facet_args[["nrow"]] <- 1 + labeller <- function(x) label_value(x, multi_line = FALSE) + facet_args[["labeller"]] <- labeller + } + + facet_call <- do.call(facet_f, facet_args) + + ggplot(data) + + aes(x = value_rank) + + geom_histogram( + color = get_color("mid_highlight"), + fill = get_color("mid"), + binwidth = right_edge / n_bins, + boundary = right_edge, + size = .25 + ) + + geom_blank(data = data_boundaries) + + facet_call + + force_axes_in_facets() + + dont_expand_y_axis(c(0.005, 0)) + + bayesplot_theme_get() + + yaxis_title(FALSE) + + labs(x = "Rank") +} + # internal ----------------------------------------------------------------- .mcmc_trace <- function(x, diff --git a/man/MCMC-traces.Rd b/man/MCMC-traces.Rd index fca18913..971b4a27 100644 --- a/man/MCMC-traces.Rd +++ b/man/MCMC-traces.Rd @@ -6,6 +6,7 @@ \alias{mcmc_trace_highlight} \alias{trace_style_np} \alias{mcmc_rank_overlay} +\alias{mcmc_rank_hist} \title{Trace plot (time series plot) of MCMC draws} \usage{ mcmc_trace(x, pars = character(), regex_pars = character(), @@ -21,6 +22,9 @@ trace_style_np(div_color = "red", div_size = 0.25, div_alpha = 1) mcmc_rank_overlay(x, pars = character(), regex_pars = character(), transformations = list(), n_bins = 20) + +mcmc_rank_hist(x, pars = character(), regex_pars = character(), + transformations = list(), facet_args = list(), n_bins = 20) } \arguments{ \item{x}{A 3-D array, matrix, list of matrices, or data frame of MCMC draws. @@ -109,7 +113,7 @@ for showing divergences in the plot. The default values are displayed in the \strong{Usage} section above.} \item{n_bins}{number of bins to use for the histogram of rank-normalized MCMC -samples.} +samples. Defaults to 20.} } \value{ A ggplot object that can be further customized using the @@ -130,11 +134,15 @@ the \code{np} argument can be used to also show divergences on the trace plot. Traces are plotted using points rather than lines and the opacity of all chains but one (specified by the \code{highlight} argument) is reduced. } +\item{\code{mcmc_rank_hist()}}{ +Whereas traditional trace plots visualize how the chains mix over the +course of sampling, rank-normalized histograms visualize how the values +from the chains mix together in terms of ranking. An ideal plot would +show the lines mixing or overlapping in a uniform distribution. +} \item{\code{mcmc_rank_overlay()}}{ -Whereas tradition trace plots visualize how the chains mix over the course -of sampling, rank-normalized histograms visualize how the values from the -chains mix together in terms of ranking. An ideal plot would show the -lines mixing or overlapping in a uniform distribution. +Ranks from \code{mcmc_rank_hist()} are plotted using overlaid lines in a +single panel. } } } @@ -174,6 +182,7 @@ mcmc_trace(x[,, 1:4], window = c(100, 130), size = 1) + # Rank-normalized histogram plots. Instead of showing how chains mix over # time, look at how the ranking of MCMC samples mixed between chains. color_scheme_set("viridisE") +mcmc_rank_hist(x, "alpha") mcmc_rank_overlay(x, "alpha") \dontrun{ From 3955ec5f3e29b42dd1e916176442cceed7324f60 Mon Sep 17 00:00:00 2001 From: TJ Mahr Date: Fri, 10 May 2019 10:14:45 -0500 Subject: [PATCH 14/29] Use standard evaluation --- R/mcmc-traces.R | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/R/mcmc-traces.R b/R/mcmc-traces.R index 6e9aeec8..282055ea 100644 --- a/R/mcmc-traces.R +++ b/R/mcmc-traces.R @@ -357,22 +357,21 @@ mcmc_rank_hist <- function(x, # Otherwise, use a grid. facet_args[["scales"]] <- facet_args[["scales"]] %||% "fixed" + facet_args[["facets"]] <- facet_args[["facets"]] %||% (parameter ~ chain) if (n_param > 1) { facet_f <- facet_grid - facet_args[["facets"]] <- parameter ~ chain } else { facet_f <- facet_wrap - facet_args[["facets"]] <- parameter ~ chain - facet_args[["nrow"]] <- 1 + facet_args[["nrow"]] <- facet_args[["nrow"]] %||% 1 labeller <- function(x) label_value(x, multi_line = FALSE) - facet_args[["labeller"]] <- labeller + facet_args[["labeller"]] <- facet_args[["labeller"]] %||% labeller } facet_call <- do.call(facet_f, facet_args) ggplot(data) + - aes(x = value_rank) + + aes_(x = ~ value_rank) + geom_histogram( color = get_color("mid_highlight"), fill = get_color("mid"), From c7230429562a720960d41bf9a912c196f78eff2e Mon Sep 17 00:00:00 2001 From: TJ Mahr Date: Fri, 10 May 2019 10:24:13 -0500 Subject: [PATCH 15/29] repair merge --- DESCRIPTION | 2 +- R/mcmc-traces.R | 27 +++++++++++++-------------- man/MCMC-traces.Rd | 1 - 3 files changed, 14 insertions(+), 16 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index c11e9da4..baccf040 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -29,7 +29,7 @@ Imports: utils, rlang (>= 0.3.0), ggridges, - tibble + tibble, hexbin Suggests: gridExtra (>= 2.2.1), diff --git a/R/mcmc-traces.R b/R/mcmc-traces.R index 3a2bf62a..933e482c 100644 --- a/R/mcmc-traces.R +++ b/R/mcmc-traces.R @@ -173,20 +173,19 @@ mcmc_trace <- np_style = trace_style_np(), divergences = NULL) { - # deprecate 'divergences' arg in favor of 'np' (for consistency across functions) - if (!is.null(np) && !is.null(divergences)) { - stop( - "'np' and 'divergences' can't both be specified. ", - "Use only 'np' (the 'divergences' argument is deprecated)." - ) - } else if (!is.null(divergences)) { - warning( - "The 'divergences' argument is deprecated ", - "and will be removed in a future release. ", - "Use the 'np' argument instead." - ) - np <- divergences - } + # deprecate 'divergences' arg in favor of 'np' (for consistency across functions) + if (!is.null(np) && !is.null(divergences)) { + stop( + "'np' and 'divergences' can't both be specified. ", + "Use only 'np' (the 'divergences' argument is deprecated)." + ) + } else if (!is.null(divergences)) { + warning( + "The 'divergences' argument is deprecated ", + "and will be removed in a future release. ", + "Use the 'np' argument instead." + ) + np <- divergences } check_ignored_arguments(...) diff --git a/man/MCMC-traces.Rd b/man/MCMC-traces.Rd index ebaf68a8..8200e94e 100644 --- a/man/MCMC-traces.Rd +++ b/man/MCMC-traces.Rd @@ -95,7 +95,6 @@ divergence (if there were any). See the end of the \strong{Examples} section, below.} \item{np_style}{A call to the \code{trace_style_np()} helper function to - specify arguments controlling the appearance of tick marks representing divergences (if the \code{np} argument is specified).} From 8ffa499934a644620a1dd7a8820dcb86a70d24a8 Mon Sep 17 00:00:00 2001 From: TJ Mahr Date: Fri, 10 May 2019 10:29:50 -0500 Subject: [PATCH 16/29] export mcmc_trace_data() --- NAMESPACE | 1 + R/mcmc-traces.R | 54 ++++++++++++++++++++++------------------------ man/MCMC-traces.Rd | 9 +++++++- 3 files changed, 35 insertions(+), 29 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index f80b59e5..747af0d6 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -80,6 +80,7 @@ export(mcmc_rhat_data) export(mcmc_rhat_hist) export(mcmc_scatter) export(mcmc_trace) +export(mcmc_trace_data) export(mcmc_trace_highlight) export(mcmc_violin) export(neff_ratio) diff --git a/R/mcmc-traces.R b/R/mcmc-traces.R index 933e482c..b2825d1f 100644 --- a/R/mcmc-traces.R +++ b/R/mcmc-traces.R @@ -41,7 +41,7 @@ #' divergences (if the `np` argument is specified). #' @param divergences Deprecated. Use the `np` argument instead. #' -#' @template return-ggplot +#' @template return-ggplot-or-data #' #' @section Plot Descriptions: #' \describe{ @@ -211,18 +211,17 @@ mcmc_trace <- #' of the chains that will be more visible than the others in the plot. #' @export #' @md -mcmc_trace_highlight <- - function(x, - pars = character(), - regex_pars = character(), - transformations = list(), - facet_args = list(), - ..., - n_warmup = 0, - window = NULL, - size = NULL, - alpha = 0.2, - highlight = 1) { +mcmc_trace_highlight <- function(x, + pars = character(), + regex_pars = character(), + transformations = list(), + facet_args = list(), + ..., + n_warmup = 0, + window = NULL, + size = NULL, + alpha = 0.2, + highlight = 1) { check_ignored_arguments(...) .mcmc_trace( x, @@ -250,20 +249,19 @@ mcmc_trace_highlight <- #' the **Usage** section above. #' @export #' @md -trace_style_np <- - function(div_color = "red", - div_size = 0.25, - div_alpha = 1) { +trace_style_np <- function(div_color = "red", div_size = 0.25, div_alpha = 1) { stopifnot( is.character(div_color), is.numeric(div_size), is.numeric(div_alpha) && div_alpha >= 0 && div_alpha <= 1 ) + style <- list( color = c(div = div_color), size = c(div = div_size), alpha = c(div = div_alpha) ) + structure(style, class = c(class(style), "nuts_style")) } @@ -324,11 +322,11 @@ mcmc_rank_overlay <- function(x, #' @rdname MCMC-traces #' @export mcmc_rank_hist <- function(x, - pars = character(), - regex_pars = character(), - transformations = list(), - facet_args = list(), - n_bins = 20) { + pars = character(), + regex_pars = character(), + transformations = list(), + facet_args = list(), + n_bins = 20) { data <- mcmc_trace_data( x, pars = pars, @@ -344,18 +342,19 @@ mcmc_rank_hist <- function(x, # x axis range in each facet data_boundaries <- data %>% dplyr::distinct(.data$chain, .data$parameter) + data_boundaries <- dplyr::bind_rows( mutate(data_boundaries, value_rank = min(data$value_rank)), mutate(data_boundaries, value_rank = max(data$value_rank)) ) - right_edge <- max(data_boundaries$value_rank) - # If there is one parameter, put the chains in one row. - # Otherwise, use a grid. + right_edge <- max(data_boundaries$value_rank) facet_args[["scales"]] <- facet_args[["scales"]] %||% "fixed" facet_args[["facets"]] <- facet_args[["facets"]] %||% (parameter ~ chain) + # If there is one parameter, put the chains in one row. + # Otherwise, use a grid. if (n_param > 1) { facet_f <- facet_grid } else { @@ -402,7 +401,6 @@ mcmc_rank_hist <- function(x, np_style = trace_style_np(), iter1 = 0, ...) { - style <- match.arg(style) data <- mcmc_trace_data( x, pars = pars, regex_pars = regex_pars, transformations = transformations, @@ -516,7 +514,8 @@ chain_colors <- function(n) { unname(rev(clrs)) } - +#' @rdname MCMC-traces +#' @export mcmc_trace_data <- function(x, pars = character(), regex_pars = character(), @@ -530,7 +529,6 @@ mcmc_trace_data <- function(x, size = NULL, np = NULL, np_style = trace_style_np()) { - check_ignored_arguments(...) x <- prepare_mcmc_array(x, pars, regex_pars, transformations) diff --git a/man/MCMC-traces.Rd b/man/MCMC-traces.Rd index 8200e94e..a3934488 100644 --- a/man/MCMC-traces.Rd +++ b/man/MCMC-traces.Rd @@ -7,6 +7,7 @@ \alias{trace_style_np} \alias{mcmc_rank_overlay} \alias{mcmc_rank_hist} +\alias{mcmc_trace_data} \title{Trace plot (time series plot) of MCMC draws} \usage{ mcmc_trace(x, pars = character(), regex_pars = character(), @@ -25,6 +26,11 @@ mcmc_rank_overlay(x, pars = character(), regex_pars = character(), mcmc_rank_hist(x, pars = character(), regex_pars = character(), transformations = list(), facet_args = list(), n_bins = 20) + +mcmc_trace_data(x, pars = character(), regex_pars = character(), + transformations = list(), facet_args = list(), ..., + highlight = NULL, n_warmup = 0, iter1 = 0, window = NULL, + size = NULL, np = NULL, np_style = trace_style_np()) } \arguments{ \item{x}{A 3-D array, matrix, list of matrices, or data frame of MCMC draws. @@ -119,7 +125,8 @@ samples. Defaults to 20.} } \value{ A ggplot object that can be further customized using the - \pkg{ggplot2} package. + \pkg{ggplot2} package. The \code{_data} functions return the data that + would have been drawn by the plotting function. } \description{ Trace plot (or traceplot) of MCMC draws. See the \strong{Plot Descriptions} From b627dc2b4031f46ba248d584c03c3fa3aa99ffaf Mon Sep 17 00:00:00 2001 From: TJ Mahr Date: Tue, 21 May 2019 12:17:17 -0500 Subject: [PATCH 17/29] update news --- NEWS.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/NEWS.md b/NEWS.md index b0c9aeea..6e8a4927 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,6 +6,20 @@ +* Two new plots have been added for inspecting the distribution of ranks. + Rank-normalized histograms were introduced by the Stan team's [new paper on + MCMC diagnostics](https://arxiv.org/abs/1903.08008). (#178, #179) + + `mcmc_rank_hist()`: A traditional traceplot (`mcmc_trace()`) visualizes how + sampled values the MCMC chains mix over the course of sampling. A + rank-normalized histogram (`mcmc_rank_hist()`) visualizes how the *ranks* of + values from the chains mix together. An ideal plot would show the ranks mixing + or overlapping in a uniform distribution. + + `mcmc_rank_overlay()`: Instead of drawing each chain's histogram in a separate + panel, this plot draws the top edge of the chains' histograms in a single + panel. + * [ColorBrewer](http://colorbrewer2.org) palettes are now available as color schemes via [`color_scheme_set()`](https://mc-stan.org/bayesplot/reference/bayesplot-colors.html). From aeb4ea3ecadb6fc5d1c503458573bf4e830efac7 Mon Sep 17 00:00:00 2001 From: TJ Mahr Date: Tue, 21 May 2019 12:18:32 -0500 Subject: [PATCH 18/29] support reference line, add citation, don't show y axis in mcmc_rank_hist() --- R/helpers-gg.R | 24 +++++++++++--- R/mcmc-traces.R | 45 ++++++++++++++++++++++++--- man-roxygen/reference-improved-rhat.R | 4 +++ man/MCMC-traces.Rd | 15 +++++++-- 4 files changed, 75 insertions(+), 13 deletions(-) create mode 100644 man-roxygen/reference-improved-rhat.R diff --git a/R/helpers-gg.R b/R/helpers-gg.R index 7254b880..f084b9b4 100644 --- a/R/helpers-gg.R +++ b/R/helpers-gg.R @@ -64,12 +64,26 @@ dont_expand_axes <- function() { } force_axes_in_facets <- function() { thm <- bayesplot_theme_get() - annotate("segment", - x = c(-Inf, -Inf), xend = c(Inf,-Inf), - y = c(-Inf,-Inf), yend = c(-Inf, Inf), - color = thm$axis.line$colour %||% "black", - size = thm$axis.line$size %||% 0.5) + annotate( + "segment", + x = c(-Inf, -Inf), xend = c(Inf,-Inf), + y = c(-Inf,-Inf), yend = c(-Inf, Inf), + color = thm$axis.line$colour %||% thm$line$colour %||% "black", + size = thm$axis.line$size %||% thm$line$size %||% 0.5 + ) } + +force_x_axis_in_facets <- function() { + thm <- bayesplot_theme_get() + annotate( + "segment", + x = -Inf, xend = Inf, + y = -Inf, yend = -Inf, + color = thm$axis.line$colour %||% thm$line$colour %||% "black", + size = thm$axis.line$size %||% thm$line$size %||% 0.5 + ) +} + no_legend_spacing <- function() { theme(legend.spacing.y = unit(0, "cm")) } diff --git a/R/mcmc-traces.R b/R/mcmc-traces.R index d311ca30..6d591b43 100644 --- a/R/mcmc-traces.R +++ b/R/mcmc-traces.R @@ -56,7 +56,7 @@ #' Whereas traditional trace plots visualize how the chains mix over the #' course of sampling, rank-normalized histograms visualize how the values #' from the chains mix together in terms of ranking. An ideal plot would -#' show the lines mixing or overlapping in a uniform distribution. +#' show the rankings mixing or overlapping in a uniform distribution. #' } #' \item{`mcmc_rank_overlay()`}{ #' Ranks from `mcmc_rank_hist()` are plotted using overlaid lines in a @@ -64,6 +64,7 @@ #' } #' } #' +#' @template reference-improved-rhat #' @examples #' # some parameter draws to use for demonstration #' x <- example_mcmc_draws(chains = 4, params = 6) @@ -267,12 +268,15 @@ trace_style_np <- function(div_color = "red", div_size = 0.25, div_alpha = 1) { #' @rdname MCMC-traces #' @param n_bins number of bins to use for the histogram of rank-normalized MCMC #' samples. Defaults to 20. +#' @param ref_line whether to draw a horizontal line at the average number of +#' ranks per bin. Defaults to `FALSE`. #' @export mcmc_rank_overlay <- function(x, pars = character(), regex_pars = character(), transformations = list(), - n_bins = 20) { + n_bins = 20, + ref_line = FALSE) { data <- mcmc_trace_data( x, pars = pars, @@ -307,13 +311,26 @@ mcmc_rank_overlay <- function(x, scale_color <- scale_color_manual("Chain", values = chain_colors(n_chain)) + layer_ref_line <- if (ref_line) { + geom_hline( + yintercept = (right_edge / n_bins) / n_chains, + color = get_color("dark_highlight"), + size = 1, + linetype = "dashed" + ) + } else { + NULL + } + ggplot(d_bin_counts) + aes_(x = ~ bin_start, y = ~ n, color = ~ chain) + geom_step() + + layer_ref_line + facet_wrap("parameter") + scale_color + ylim(c(0, NA)) + bayesplot_theme_get() + + force_x_axis_in_facets() + labs(x = "Rank", y = NULL) } @@ -324,7 +341,8 @@ mcmc_rank_hist <- function(x, regex_pars = character(), transformations = list(), facet_args = list(), - n_bins = 20) { + n_bins = 20, + ref_line = FALSE) { data <- mcmc_trace_data( x, pars = pars, @@ -362,6 +380,17 @@ mcmc_rank_hist <- function(x, facet_args[["labeller"]] <- facet_args[["labeller"]] %||% labeller } + layer_ref_line <- if (ref_line) { + geom_hline( + yintercept = (right_edge / n_bins) / n_chains, + color = get_color("dark_highlight"), + size = .5, + linetype = "dashed" + ) + } else { + NULL + } + facet_call <- do.call(facet_f, facet_args) ggplot(data) + @@ -373,12 +402,18 @@ mcmc_rank_hist <- function(x, boundary = right_edge, size = .25 ) + + layer_ref_line + geom_blank(data = data_boundaries) + facet_call + - force_axes_in_facets() + + force_x_axis_in_facets() + dont_expand_y_axis(c(0.005, 0)) + bayesplot_theme_get() + - yaxis_title(FALSE) + + theme( + axis.line.y = element_blank(), + axis.title.y = element_blank(), + axis.text.y = element_blank(), + axis.ticks = element_blank() + ) + labs(x = "Rank") } diff --git a/man-roxygen/reference-improved-rhat.R b/man-roxygen/reference-improved-rhat.R new file mode 100644 index 00000000..59da344c --- /dev/null +++ b/man-roxygen/reference-improved-rhat.R @@ -0,0 +1,4 @@ +#' @references Vehtari, A., Gelman, A., Simpson, D., Carpenter, B., Bürkner, P. +#' (2019). Rank-normalization, folding, and localization: An improved *R*-hat +#' for assessing convergence of MCMC. [arXiv +#' preprint](https://arxiv.org/abs/1903.08008). diff --git a/man/MCMC-traces.Rd b/man/MCMC-traces.Rd index 7bc7beb9..5863a744 100644 --- a/man/MCMC-traces.Rd +++ b/man/MCMC-traces.Rd @@ -22,10 +22,11 @@ mcmc_trace_highlight(x, pars = character(), regex_pars = character(), trace_style_np(div_color = "red", div_size = 0.25, div_alpha = 1) mcmc_rank_overlay(x, pars = character(), regex_pars = character(), - transformations = list(), n_bins = 20) + transformations = list(), n_bins = 20, ref_line = FALSE) mcmc_rank_hist(x, pars = character(), regex_pars = character(), - transformations = list(), facet_args = list(), n_bins = 20) + transformations = list(), facet_args = list(), n_bins = 20, + ref_line = FALSE) mcmc_trace_data(x, pars = character(), regex_pars = character(), transformations = list(), facet_args = list(), ..., @@ -120,6 +121,9 @@ the plot. The default values are displayed in the \strong{Usage} section above.} \item{n_bins}{number of bins to use for the histogram of rank-normalized MCMC samples. Defaults to 20.} + +\item{ref_line}{whether to draw a horizontal line at the average number of +ranks per bin. Defaults to \code{FALSE}.} } \value{ A ggplot object that can be further customized using the \strong{ggplot2} @@ -145,7 +149,7 @@ chains but one (specified by the \code{highlight} argument) is reduced. Whereas traditional trace plots visualize how the chains mix over the course of sampling, rank-normalized histograms visualize how the values from the chains mix together in terms of ranking. An ideal plot would -show the lines mixing or overlapping in a uniform distribution. +show the rankings mixing or overlapping in a uniform distribution. } \item{\code{mcmc_rank_overlay()}}{ Ranks from \code{mcmc_rank_hist()} are plotted using overlaid lines in a @@ -243,6 +247,11 @@ mcmc_trace( ) } +} +\references{ +Vehtari, A., Gelman, A., Simpson, D., Carpenter, B., Bürkner, P. +(2019). Rank-normalization, folding, and localization: An improved \emph{R}-hat +for assessing convergence of MCMC. \href{https://arxiv.org/abs/1903.08008}{arXiv preprint}. } \seealso{ Other MCMC: \code{\link{MCMC-combos}}, From 4718aa84d16d026c50641023d3389dc629ac94bd Mon Sep 17 00:00:00 2001 From: TJ Mahr Date: Tue, 21 May 2019 12:24:51 -0500 Subject: [PATCH 19/29] visual tests --- .../mcmc-rank-histogram-default.svg | 355 +++++++++++++++++ .../mcmc-rank-histogram-one-parameter.svg | 209 ++++++++++ .../mcmc-rank-histogram-reference-line.svg | 363 ++++++++++++++++++ .../mcmc-rank-histogram-wide-bins.svg | 145 +++++++ .../mcmc-traces/mcmc-rank-overlay-default.svg | 109 ++++++ .../mcmc-rank-overlay-one-parameter.svg | 72 ++++ .../mcmc-rank-overlay-reference-line.svg | 111 ++++++ .../mcmc-rank-overlay-wide-bins.svg | 72 ++++ tests/testthat/test-mcmc-traces.R | 59 +++ 9 files changed, 1495 insertions(+) create mode 100644 tests/figs/mcmc-traces/mcmc-rank-histogram-default.svg create mode 100644 tests/figs/mcmc-traces/mcmc-rank-histogram-one-parameter.svg create mode 100644 tests/figs/mcmc-traces/mcmc-rank-histogram-reference-line.svg create mode 100644 tests/figs/mcmc-traces/mcmc-rank-histogram-wide-bins.svg create mode 100644 tests/figs/mcmc-traces/mcmc-rank-overlay-default.svg create mode 100644 tests/figs/mcmc-traces/mcmc-rank-overlay-one-parameter.svg create mode 100644 tests/figs/mcmc-traces/mcmc-rank-overlay-reference-line.svg create mode 100644 tests/figs/mcmc-traces/mcmc-rank-overlay-wide-bins.svg diff --git a/tests/figs/mcmc-traces/mcmc-rank-histogram-default.svg b/tests/figs/mcmc-traces/mcmc-rank-histogram-default.svg new file mode 100644 index 00000000..7a6aad97 --- /dev/null +++ b/tests/figs/mcmc-traces/mcmc-rank-histogram-default.svg @@ -0,0 +1,355 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +1 + + + + + + + + + + +2 + + + + + + + + + + +3 + + + + + + + + + + +4 + + + + + + + + + + +V1 + + + + + + + + + + +V2 + + + + + + +0 +500 +1000 +1500 +2000 + +0 +500 +1000 +1500 +2000 + +0 +500 +1000 +1500 +2000 + +0 +500 +1000 +1500 +2000 +Rank +mcmc rank histogram (default) + diff --git a/tests/figs/mcmc-traces/mcmc-rank-histogram-one-parameter.svg b/tests/figs/mcmc-traces/mcmc-rank-histogram-one-parameter.svg new file mode 100644 index 00000000..e1f41046 --- /dev/null +++ b/tests/figs/mcmc-traces/mcmc-rank-histogram-one-parameter.svg @@ -0,0 +1,209 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +V1, 1 + + + + + + + + + + +V1, 2 + + + + + + + + + + +V1, 3 + + + + + + + + + + +V1, 4 + + + + + + +0 +500 +1000 +1500 +2000 + +0 +500 +1000 +1500 +2000 + +0 +500 +1000 +1500 +2000 + +0 +500 +1000 +1500 +2000 +Rank +mcmc rank histogram (one parameter) + diff --git a/tests/figs/mcmc-traces/mcmc-rank-histogram-reference-line.svg b/tests/figs/mcmc-traces/mcmc-rank-histogram-reference-line.svg new file mode 100644 index 00000000..79366d1a --- /dev/null +++ b/tests/figs/mcmc-traces/mcmc-rank-histogram-reference-line.svg @@ -0,0 +1,363 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +1 + + + + + + + + + + +2 + + + + + + + + + + +3 + + + + + + + + + + +4 + + + + + + + + + + +V1 + + + + + + + + + + +V2 + + + + + + +0 +500 +1000 +1500 +2000 + +0 +500 +1000 +1500 +2000 + +0 +500 +1000 +1500 +2000 + +0 +500 +1000 +1500 +2000 +Rank +mcmc rank histogram (reference line) + diff --git a/tests/figs/mcmc-traces/mcmc-rank-histogram-wide-bins.svg b/tests/figs/mcmc-traces/mcmc-rank-histogram-wide-bins.svg new file mode 100644 index 00000000..8884cd27 --- /dev/null +++ b/tests/figs/mcmc-traces/mcmc-rank-histogram-wide-bins.svg @@ -0,0 +1,145 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +V1, 1 + + + + + + + + + + +V1, 2 + + + + + + + + + + +V1, 3 + + + + + + + + + + +V1, 4 + + + + + + +0 +500 +1000 +1500 +2000 + +0 +500 +1000 +1500 +2000 + +0 +500 +1000 +1500 +2000 + +0 +500 +1000 +1500 +2000 +Rank +mcmc rank histogram (wide bins) + diff --git a/tests/figs/mcmc-traces/mcmc-rank-overlay-default.svg b/tests/figs/mcmc-traces/mcmc-rank-overlay-default.svg new file mode 100644 index 00000000..cc020e24 --- /dev/null +++ b/tests/figs/mcmc-traces/mcmc-rank-overlay-default.svg @@ -0,0 +1,109 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +V1 + + + + + + + + + + +V2 + + + + + + + + + + + +0 +500 +1000 +1500 +2000 + + + + + + +0 +500 +1000 +1500 +2000 + +0 +10 +20 +30 + + + + +Rank +Chain + + + + +1 +2 +3 +4 +mcmc rank overlay (default) + diff --git a/tests/figs/mcmc-traces/mcmc-rank-overlay-one-parameter.svg b/tests/figs/mcmc-traces/mcmc-rank-overlay-one-parameter.svg new file mode 100644 index 00000000..bfc7c8a5 --- /dev/null +++ b/tests/figs/mcmc-traces/mcmc-rank-overlay-one-parameter.svg @@ -0,0 +1,72 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + +V1 + + + + + + + + + + + +0 +500 +1000 +1500 +2000 + +0 +10 +20 +30 + + + + +Rank +Chain + + + + +1 +2 +3 +4 +mcmc rank overlay (one parameter) + diff --git a/tests/figs/mcmc-traces/mcmc-rank-overlay-reference-line.svg b/tests/figs/mcmc-traces/mcmc-rank-overlay-reference-line.svg new file mode 100644 index 00000000..1bd95203 --- /dev/null +++ b/tests/figs/mcmc-traces/mcmc-rank-overlay-reference-line.svg @@ -0,0 +1,111 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +V1 + + + + + + + + + + +V2 + + + + + + + + + + + +0 +500 +1000 +1500 +2000 + + + + + + +0 +500 +1000 +1500 +2000 + +0 +10 +20 +30 + + + + +Rank +Chain + + + + +1 +2 +3 +4 +mcmc rank overlay (reference line) + diff --git a/tests/figs/mcmc-traces/mcmc-rank-overlay-wide-bins.svg b/tests/figs/mcmc-traces/mcmc-rank-overlay-wide-bins.svg new file mode 100644 index 00000000..43cf8a25 --- /dev/null +++ b/tests/figs/mcmc-traces/mcmc-rank-overlay-wide-bins.svg @@ -0,0 +1,72 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + +V1 + + + + + + + + + + + +0 +500 +1000 +1500 +2000 + +0 +50 +100 +150 + + + + +Rank +Chain + + + + +1 +2 +3 +4 +mcmc rank overlay (wide bins) + diff --git a/tests/testthat/test-mcmc-traces.R b/tests/testthat/test-mcmc-traces.R index 6a45fda9..474402fd 100644 --- a/tests/testthat/test-mcmc-traces.R +++ b/tests/testthat/test-mcmc-traces.R @@ -117,6 +117,65 @@ test_that("mcmc_trace renders correctly", { vdiffr::expect_doppelganger("mcmc trace (iter1 offset)", p_iter1) }) +test_that("mcmc_rank_overlay renders correctly", { + testthat::skip_on_cran() + + p_base <- mcmc_rank_overlay(vdiff_dframe_chains, pars = c("V1", "V2")) + p_base_ref <- mcmc_rank_overlay( + vdiff_dframe_chains, + pars = c("V1", "V2"), + ref_line = TRUE + ) + p_one_param <- mcmc_rank_overlay(vdiff_dframe_chains, pars = "V1") + p_one_param_wide_bins <- mcmc_rank_overlay( + vdiff_dframe_chains, + pars = "V1", + n_bins = 4 + ) + + vdiffr::expect_doppelganger("mcmc rank overlay (default)", p_base) + vdiffr::expect_doppelganger( + "mcmc rank overlay (reference line)", + p_base_ref + ) + vdiffr::expect_doppelganger("mcmc rank overlay (one parameter)", p_one_param) + vdiffr::expect_doppelganger( + "mcmc rank overlay (wide bins)", + p_one_param_wide_bins + ) +}) + +test_that("mcmc_rank_hist renders correctly", { + testthat::skip_on_cran() + + p_base <- mcmc_rank_hist(vdiff_dframe_chains, pars = c("V1", "V2")) + p_base_ref <- mcmc_rank_hist( + vdiff_dframe_chains, + pars = c("V1", "V2"), + ref_line = TRUE + ) + p_one_param <- mcmc_rank_hist(vdiff_dframe_chains, pars = "V1") + p_one_param_wide_bins <- mcmc_rank_hist( + vdiff_dframe_chains, + pars = "V1", + n_bins = 4 + ) + + vdiffr::expect_doppelganger("mcmc rank histogram (default)", p_base) + vdiffr::expect_doppelganger( + "mcmc rank histogram (reference line)", + p_base_ref + ) + vdiffr::expect_doppelganger( + "mcmc rank histogram (one parameter)", + p_one_param + ) + vdiffr::expect_doppelganger( + "mcmc rank histogram (wide bins)", + p_one_param_wide_bins + ) +}) + test_that("mcmc_trace_highlight renders correctly", { testthat::skip_on_cran() From e6e7bb9b9030bfdbbfed04292d83849d9bb6b323 Mon Sep 17 00:00:00 2001 From: TJ Mahr Date: Tue, 21 May 2019 12:31:04 -0500 Subject: [PATCH 20/29] switch to abort/warn --- R/mcmc-traces.R | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/R/mcmc-traces.R b/R/mcmc-traces.R index 6d591b43..8a235c70 100644 --- a/R/mcmc-traces.R +++ b/R/mcmc-traces.R @@ -176,16 +176,16 @@ mcmc_trace <- # deprecate 'divergences' arg in favor of 'np' # (for consistency across functions) if (!is.null(np) && !is.null(divergences)) { - stop( + abort(paste0( "'np' and 'divergences' can't both be specified. ", "Use only 'np' (the 'divergences' argument is deprecated)." - ) + )) } else if (!is.null(divergences)) { - warning( + warn(paste0( "The 'divergences' argument is deprecated ", "and will be removed in a future release. ", "Use the 'np' argument instead." - ) + )) np <- divergences } @@ -567,11 +567,11 @@ mcmc_trace_data <- function(x, x <- prepare_mcmc_array(x, pars, regex_pars, transformations) if (iter1 < 0) { - stop("'iter1' cannot be negative.") + abort("'iter1' cannot be negative.") } if (n_warmup > 0 && iter1 > 0) { - stop("'n_warmup' and 'iter1' can't both be specified.") + abort("'n_warmup' and 'iter1' can't both be specified.") } if (!is.null(highlight)) { @@ -581,11 +581,12 @@ mcmc_trace_data <- function(x, STOP_need_multiple_chains() } - if (!highlight %in% seq_len(ncol(x))) - stop( + if (!highlight %in% seq_len(ncol(x))) { + abort(paste0( "'highlight' is ", highlight, ", but 'x' contains ", ncol(x), " chains." - ) + )) + } } ## @todo: filter to just window? From 128fcadf10721bea9220c1cfb06c61b3bbc7dd9e Mon Sep 17 00:00:00 2001 From: jgabry Date: Tue, 21 May 2019 14:59:39 -0400 Subject: [PATCH 21/29] fix r cmd check and test failure --- R/mcmc-traces.R | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/R/mcmc-traces.R b/R/mcmc-traces.R index 96882b39..fee4a4aa 100644 --- a/R/mcmc-traces.R +++ b/R/mcmc-traces.R @@ -5,7 +5,6 @@ #' #' @name MCMC-traces #' @family MCMC -#' @md #' @template args-mcmc-x #' @template args-pars #' @template args-regex_pars @@ -57,6 +56,7 @@ #' course of sampling, rank-normalized histograms visualize how the values #' from the chains mix together in terms of ranking. An ideal plot would #' show the rankings mixing or overlapping in a uniform distribution. +#' See Vehtari et al. (2019) for details. #' } #' \item{`mcmc_rank_overlay()`}{ #' Ranks from `mcmc_rank_hist()` are plotted using overlaid lines in a @@ -143,7 +143,6 @@ #' np_style = trace_style_np(div_color = "black", div_size = 0.5) #' ) #' -#' color_scheme_set("viridis") #' mcmc_trace( #' posterior, #' pars = c("wt", "sigma"), @@ -266,17 +265,19 @@ trace_style_np <- function(div_color = "red", div_size = 0.25, div_alpha = 1) { } #' @rdname MCMC-traces -#' @param n_bins number of bins to use for the histogram of rank-normalized MCMC -#' samples. Defaults to 20. -#' @param ref_line whether to draw a horizontal line at the average number of -#' ranks per bin. Defaults to `FALSE`. +#' @param n_bins For the rank plots, the number of bins to use for the histogram +#' of rank-normalized MCMC samples. Defaults to `20`. +#' @param ref_line For the rank plots, whether to draw a horizontal line at the +#' average number of ranks per bin. Defaults to `FALSE`. #' @export mcmc_rank_overlay <- function(x, pars = character(), regex_pars = character(), transformations = list(), + ..., n_bins = 20, ref_line = FALSE) { + check_ignored_arguments(...) data <- mcmc_trace_data( x, pars = pars, @@ -284,7 +285,7 @@ mcmc_rank_overlay <- function(x, transformations = transformations ) - n_chain <- unique(data$n_chains) + n_chains <- unique(data$n_chains) # We have to bin and count the data ourselves because # ggplot2::stat_bin(geom = "step") does not draw the final bin. @@ -309,7 +310,7 @@ mcmc_rank_overlay <- function(x, mutate(bin_start = right_edge) %>% dplyr::bind_rows(d_bin_counts) - scale_color <- scale_color_manual("Chain", values = chain_colors(n_chain)) + scale_color <- scale_color_manual("Chain", values = chain_colors(n_chains)) layer_ref_line <- if (ref_line) { geom_hline( @@ -341,8 +342,10 @@ mcmc_rank_hist <- function(x, regex_pars = character(), transformations = list(), facet_args = list(), + ..., n_bins = 20, ref_line = FALSE) { + check_ignored_arguments(...) data <- mcmc_trace_data( x, pars = pars, From b67ec98aaadb2d39e676e535bb7a30871ac43501 Mon Sep 17 00:00:00 2001 From: jgabry Date: Tue, 21 May 2019 15:07:38 -0400 Subject: [PATCH 22/29] add example using ref_line --- R/mcmc-traces.R | 1 + man/MCMC-traces.Rd | 15 ++++++++------- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/R/mcmc-traces.R b/R/mcmc-traces.R index fee4a4aa..56b06c6a 100644 --- a/R/mcmc-traces.R +++ b/R/mcmc-traces.R @@ -101,6 +101,7 @@ #' # time, look at how the ranking of MCMC samples mixed between chains. #' color_scheme_set("viridisE") #' mcmc_rank_hist(x, "alpha") +#' mcmc_rank_hist(x, pars = c("alpha", "sigma"), ref_line = TRUE) #' mcmc_rank_overlay(x, "alpha") #' #' \dontrun{ diff --git a/man/MCMC-traces.Rd b/man/MCMC-traces.Rd index ac3c2917..fe3ce903 100644 --- a/man/MCMC-traces.Rd +++ b/man/MCMC-traces.Rd @@ -22,10 +22,10 @@ mcmc_trace_highlight(x, pars = character(), regex_pars = character(), trace_style_np(div_color = "red", div_size = 0.25, div_alpha = 1) mcmc_rank_overlay(x, pars = character(), regex_pars = character(), - transformations = list(), n_bins = 20, ref_line = FALSE) + transformations = list(), ..., n_bins = 20, ref_line = FALSE) mcmc_rank_hist(x, pars = character(), regex_pars = character(), - transformations = list(), facet_args = list(), n_bins = 20, + transformations = list(), facet_args = list(), ..., n_bins = 20, ref_line = FALSE) mcmc_trace_data(x, pars = character(), regex_pars = character(), @@ -119,11 +119,11 @@ of the chains that will be more visible than the others in the plot.} the color, size, and transparency specifications for showing divergences in the plot. The default values are displayed in the \strong{Usage} section above.} -\item{n_bins}{number of bins to use for the histogram of rank-normalized MCMC -samples. Defaults to 20.} +\item{n_bins}{For the rank plots, the number of bins to use for the histogram +of rank-normalized MCMC samples. Defaults to \code{20}.} -\item{ref_line}{whether to draw a horizontal line at the average number of -ranks per bin. Defaults to \code{FALSE}.} +\item{ref_line}{For the rank plots, whether to draw a horizontal line at the +average number of ranks per bin. Defaults to \code{FALSE}.} } \value{ A ggplot object that can be further customized using the \strong{ggplot2} @@ -150,6 +150,7 @@ Whereas traditional trace plots visualize how the chains mix over the course of sampling, rank-normalized histograms visualize how the values from the chains mix together in terms of ranking. An ideal plot would show the rankings mixing or overlapping in a uniform distribution. +See Vehtari et al. (2019) for details. } \item{\code{mcmc_rank_overlay()}}{ Ranks from \code{mcmc_rank_hist()} are plotted using overlaid lines in a @@ -194,6 +195,7 @@ mcmc_trace(x[,, 1:4], window = c(100, 130), size = 1) + # time, look at how the ranking of MCMC samples mixed between chains. color_scheme_set("viridisE") mcmc_rank_hist(x, "alpha") +mcmc_rank_hist(x, pars = c("alpha", "sigma"), ref_line = TRUE) mcmc_rank_overlay(x, "alpha") \dontrun{ @@ -236,7 +238,6 @@ mcmc_trace( np_style = trace_style_np(div_color = "black", div_size = 0.5) ) -color_scheme_set("viridis") mcmc_trace( posterior, pars = c("wt", "sigma"), From 1abb3f39bf9b488a60ce06146eec84510350635c Mon Sep 17 00:00:00 2001 From: jgabry Date: Tue, 21 May 2019 15:13:52 -0400 Subject: [PATCH 23/29] Add new color scheme names to the list in the doc --- R/bayesplot-colors.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/bayesplot-colors.R b/R/bayesplot-colors.R index df911931..b61eba23 100644 --- a/R/bayesplot-colors.R +++ b/R/bayesplot-colors.R @@ -45,7 +45,7 @@ #' * `"teal"` #' * `"yellow"` #' * [`"viridis"`](https://CRAN.R-project.org/package=viridis), `"viridisA"`, -#' `"viridisB"`, `"viridisC"` +#' `"viridisB"`, `"viridisC"`, `"viridisD"`, `"viridisE"` #' * `"mix-x-y"`, replacing `x` and `y` with any two of #' the scheme names listed above (e.g. "mix-teal-pink", "mix-blue-red", #' etc.). The order of `x` and `y` matters, i.e., the color schemes From 7ec919bfd9729df007afa66f90935282e0be2fba Mon Sep 17 00:00:00 2001 From: jgabry Date: Tue, 21 May 2019 15:21:25 -0400 Subject: [PATCH 24/29] mention mcmc_trace_data() in NEWS --- NEWS.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/NEWS.md b/NEWS.md index afa96120..5d3860ae 100644 --- a/NEWS.md +++ b/NEWS.md @@ -19,6 +19,9 @@ `mcmc_rank_overlay()`: Instead of drawing each chain's histogram in a separate panel, this plot draws the top edge of the chains' histograms in a single panel. + +* Added `mcmc_trace_data()`, which returns the data used for plotting the trace + plots and rank histograms. (Advances #97) * [ColorBrewer](http://colorbrewer2.org) palettes are now available as color schemes via From 4affa9e3c726cb70e06d3b5b2ebe5ed10d21ef35 Mon Sep 17 00:00:00 2001 From: jgabry Date: Tue, 21 May 2019 15:42:10 -0400 Subject: [PATCH 25/29] move mcmc_trace_data() out of the # internal section --- R/mcmc-traces.R | 135 ++++++++++++++++++++++++------------------------ 1 file changed, 68 insertions(+), 67 deletions(-) diff --git a/R/mcmc-traces.R b/R/mcmc-traces.R index 56b06c6a..a0526cf5 100644 --- a/R/mcmc-traces.R +++ b/R/mcmc-traces.R @@ -422,6 +422,74 @@ mcmc_rank_hist <- function(x, } +#' @rdname MCMC-traces +#' @export +mcmc_trace_data <- function(x, + pars = character(), + regex_pars = character(), + transformations = list(), + facet_args = list(), + ..., + highlight = NULL, + n_warmup = 0, + iter1 = 0, + window = NULL, + size = NULL, + np = NULL, + np_style = trace_style_np()) { + check_ignored_arguments(...) + + x <- prepare_mcmc_array(x, pars, regex_pars, transformations) + + if (iter1 < 0) { + abort("'iter1' cannot be negative.") + } + + if (n_warmup > 0 && iter1 > 0) { + abort("'n_warmup' and 'iter1' can't both be specified.") + } + + if (!is.null(highlight)) { + stopifnot(length(highlight) == 1) + + if (!has_multiple_chains(x)){ + STOP_need_multiple_chains() + } + + if (!highlight %in% seq_len(ncol(x))) { + abort(paste0( + "'highlight' is ", highlight, + ", but 'x' contains ", ncol(x), " chains." + )) + } + } + + ## @todo: filter to just window? + data <- melt_mcmc(x) + data$Chain <- factor(data$Chain) + data$n_chains <- num_chains(data) + data$n_iterations <- num_iters(data) + data$n_parameters <- num_params(data) + data <- rlang::set_names(data, tolower) + + data <- data %>% + group_by(.data$parameter) %>% + mutate(value_rank = dplyr::row_number(.data$value)) %>% + ungroup() + + data$highlight <- if (!is.null(highlight)) { + data$chain == highlight + } else { + FALSE + } + + data$warmup <- data$iteration <= n_warmup + data$iteration <- data$iteration + iter1 + + tibble::as_tibble(data) +} + + # internal ----------------------------------------------------------------- .mcmc_trace <- function(x, pars = character(), @@ -551,73 +619,6 @@ chain_colors <- function(n) { unname(rev(clrs)) } -#' @rdname MCMC-traces -#' @export -mcmc_trace_data <- function(x, - pars = character(), - regex_pars = character(), - transformations = list(), - facet_args = list(), - ..., - highlight = NULL, - n_warmup = 0, - iter1 = 0, - window = NULL, - size = NULL, - np = NULL, - np_style = trace_style_np()) { - check_ignored_arguments(...) - - x <- prepare_mcmc_array(x, pars, regex_pars, transformations) - - if (iter1 < 0) { - abort("'iter1' cannot be negative.") - } - - if (n_warmup > 0 && iter1 > 0) { - abort("'n_warmup' and 'iter1' can't both be specified.") - } - - if (!is.null(highlight)) { - stopifnot(length(highlight) == 1) - - if (!has_multiple_chains(x)){ - STOP_need_multiple_chains() - } - - if (!highlight %in% seq_len(ncol(x))) { - abort(paste0( - "'highlight' is ", highlight, - ", but 'x' contains ", ncol(x), " chains." - )) - } - } - - ## @todo: filter to just window? - data <- melt_mcmc(x) - data$Chain <- factor(data$Chain) - data$n_chains <- num_chains(data) - data$n_iterations <- num_iters(data) - data$n_parameters <- num_params(data) - data <- rlang::set_names(data, tolower) - - data <- data %>% - group_by(.data$parameter) %>% - mutate(value_rank = dplyr::row_number(.data$value)) %>% - ungroup() - - data$highlight <- if (!is.null(highlight)) { - data$chain == highlight - } else { - FALSE - } - - data$warmup <- data$iteration <= n_warmup - data$iteration <- data$iteration + iter1 - - tibble::as_tibble(data) -} - #' Add divergences to trace plot using geom_rug #' From 0eac0c3cfe9f6bacc87efad8bc951576422f76be Mon Sep 17 00:00:00 2001 From: jgabry Date: Tue, 21 May 2019 16:05:26 -0400 Subject: [PATCH 26/29] remove unused args to mcmc_trace_data() --- R/mcmc-traces.R | 10 +++------- man/MCMC-traces.Rd | 5 ++--- man/bayesplot-colors.Rd | 2 +- 3 files changed, 6 insertions(+), 11 deletions(-) diff --git a/R/mcmc-traces.R b/R/mcmc-traces.R index a0526cf5..c8a8570a 100644 --- a/R/mcmc-traces.R +++ b/R/mcmc-traces.R @@ -1,4 +1,4 @@ -#' Trace plot (time series plot) of MCMC draws +#' Trace plots (time series plot) of MCMC draws #' #' Trace plot (or traceplot) of MCMC draws. See the **Plot Descriptions** #' section, below, for details. @@ -433,10 +433,7 @@ mcmc_trace_data <- function(x, highlight = NULL, n_warmup = 0, iter1 = 0, - window = NULL, - size = NULL, - np = NULL, - np_style = trace_style_np()) { + window = NULL) { check_ignored_arguments(...) x <- prepare_mcmc_array(x, pars, regex_pars, transformations) @@ -509,8 +506,7 @@ mcmc_trace_data <- function(x, style <- match.arg(style) data <- mcmc_trace_data( x, pars = pars, regex_pars = regex_pars, transformations = transformations, - highlight = highlight, n_warmup = n_warmup, iter1 = iter1, window = window, - np = np, np_style = np_style + highlight = highlight, n_warmup = n_warmup, iter1 = iter1, window = window ) n_iter <- unique(data$n_iterations) n_chain <- unique(data$n_chains) diff --git a/man/MCMC-traces.Rd b/man/MCMC-traces.Rd index fe3ce903..2b39c1ce 100644 --- a/man/MCMC-traces.Rd +++ b/man/MCMC-traces.Rd @@ -8,7 +8,7 @@ \alias{mcmc_rank_overlay} \alias{mcmc_rank_hist} \alias{mcmc_trace_data} -\title{Trace plot (time series plot) of MCMC draws} +\title{Trace plots (time series plot) of MCMC draws} \usage{ mcmc_trace(x, pars = character(), regex_pars = character(), transformations = list(), facet_args = list(), ..., n_warmup = 0, @@ -30,8 +30,7 @@ mcmc_rank_hist(x, pars = character(), regex_pars = character(), mcmc_trace_data(x, pars = character(), regex_pars = character(), transformations = list(), facet_args = list(), ..., - highlight = NULL, n_warmup = 0, iter1 = 0, window = NULL, - size = NULL, np = NULL, np_style = trace_style_np()) + highlight = NULL, n_warmup = 0, iter1 = 0, window = NULL) } \arguments{ \item{x}{A 3-D array, matrix, list of matrices, or data frame of MCMC draws. diff --git a/man/bayesplot-colors.Rd b/man/bayesplot-colors.Rd index e8cbc9b2..00bae882 100644 --- a/man/bayesplot-colors.Rd +++ b/man/bayesplot-colors.Rd @@ -67,7 +67,7 @@ schemes are: \item \code{"teal"} \item \code{"yellow"} \item \href{https://CRAN.R-project.org/package=viridis}{"viridis"}, \code{"viridisA"}, -\code{"viridisB"}, \code{"viridisC"} +\code{"viridisB"}, \code{"viridisC"}, \code{"viridisD"}, \code{"viridisE"} \item \code{"mix-x-y"}, replacing \code{x} and \code{y} with any two of the scheme names listed above (e.g. "mix-teal-pink", "mix-blue-red", etc.). The order of \code{x} and \code{y} matters, i.e., the color schemes From 0e4646476c45c9431ad0c0cb5e08e61d2717852a Mon Sep 17 00:00:00 2001 From: jgabry Date: Tue, 21 May 2019 16:42:13 -0400 Subject: [PATCH 27/29] drop more unused args from mcmc_trace_data() --- R/mcmc-traces.R | 14 ++++++++------ man/MCMC-traces.Rd | 4 ++-- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/R/mcmc-traces.R b/R/mcmc-traces.R index c8a8570a..d99b9f20 100644 --- a/R/mcmc-traces.R +++ b/R/mcmc-traces.R @@ -428,12 +428,10 @@ mcmc_trace_data <- function(x, pars = character(), regex_pars = character(), transformations = list(), - facet_args = list(), ..., highlight = NULL, n_warmup = 0, - iter1 = 0, - window = NULL) { + iter1 = 0) { check_ignored_arguments(...) x <- prepare_mcmc_array(x, pars, regex_pars, transformations) @@ -461,7 +459,6 @@ mcmc_trace_data <- function(x, } } - ## @todo: filter to just window? data <- melt_mcmc(x) data$Chain <- factor(data$Chain) data$n_chains <- num_chains(data) @@ -505,8 +502,13 @@ mcmc_trace_data <- function(x, ...) { style <- match.arg(style) data <- mcmc_trace_data( - x, pars = pars, regex_pars = regex_pars, transformations = transformations, - highlight = highlight, n_warmup = n_warmup, iter1 = iter1, window = window + x, + pars = pars, + regex_pars = regex_pars, + transformations = transformations, + highlight = highlight, + n_warmup = n_warmup, + iter1 = iter1 ) n_iter <- unique(data$n_iterations) n_chain <- unique(data$n_chains) diff --git a/man/MCMC-traces.Rd b/man/MCMC-traces.Rd index 2b39c1ce..18ee891b 100644 --- a/man/MCMC-traces.Rd +++ b/man/MCMC-traces.Rd @@ -29,8 +29,8 @@ mcmc_rank_hist(x, pars = character(), regex_pars = character(), ref_line = FALSE) mcmc_trace_data(x, pars = character(), regex_pars = character(), - transformations = list(), facet_args = list(), ..., - highlight = NULL, n_warmup = 0, iter1 = 0, window = NULL) + transformations = list(), ..., highlight = NULL, n_warmup = 0, + iter1 = 0) } \arguments{ \item{x}{A 3-D array, matrix, list of matrices, or data frame of MCMC draws. From 3104a3ec34bdab573883dffbcd41d57bcc9cda58 Mon Sep 17 00:00:00 2001 From: jgabry Date: Tue, 21 May 2019 17:15:32 -0400 Subject: [PATCH 28/29] make parameter,value,value_rank first columns of mcmc_trace_data() --- R/mcmc-traces.R | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/R/mcmc-traces.R b/R/mcmc-traces.R index d99b9f20..462cc5e2 100644 --- a/R/mcmc-traces.R +++ b/R/mcmc-traces.R @@ -1,4 +1,4 @@ -#' Trace plots (time series plot) of MCMC draws +#' Trace plots of MCMC draws #' #' Trace plot (or traceplot) of MCMC draws. See the **Plot Descriptions** #' section, below, for details. @@ -12,8 +12,7 @@ #' @template args-facet_args #' @param ... Currently ignored. #' @param size An optional value to override the default line size -#' (`mcmc_trace()`) or the default point size -#' (`mcmc_trace_highlight()`). +#' for `mcmc_trace()` or the default point size for `mcmc_trace_highlight()`. #' @param alpha For `mcmc_trace_highlight()`, passed to #' [ggplot2::geom_point()] to control the transparency of the points #' for the chains not highlighted. @@ -27,10 +26,10 @@ #' if `n_warmup` is also set to a positive value. #' @param window An integer vector of length two specifying the limits of a #' range of iterations to display. -#' @param np For models fit using [NUTS] (more generally, any [symplectic -#' integrator](https://en.wikipedia.org/wiki/Symplectic_integrator)), an -#' optional data frame providing NUTS diagnostic information. The data frame -#' should be the object returned by [nuts_params()] or one with the same +#' @param np For models fit using [NUTS] (more generally, any +#' [symplectic integrator](https://en.wikipedia.org/wiki/Symplectic_integrator)), +#' an optional data frame providing NUTS diagnostic information. The data +#' frame should be the object returned by [nuts_params()] or one with the same #' structure. If `np` is specified then tick marks are added to the bottom of #' the trace plot indicating within which iterations there was a divergence #' (if there were any). See the end of the **Examples** section, below. @@ -40,6 +39,8 @@ #' @param divergences Deprecated. Use the `np` argument instead. #' #' @template return-ggplot-or-data +#' @return `mcmc_trace_data()` returns the data for the trace *and* rank plots +#' in the same data frame. #' #' @section Plot Descriptions: #' \describe{ @@ -208,10 +209,9 @@ mcmc_trace <- } #' @rdname MCMC-traces +#' @export #' @param highlight For `mcmc_trace_highlight()`, an integer specifying one #' of the chains that will be more visible than the others in the plot. -#' @export -#' @md mcmc_trace_highlight <- function(x, pars = character(), regex_pars = character(), @@ -242,13 +242,13 @@ mcmc_trace_highlight <- function(x, #' @rdname MCMC-traces +#' @export #' @param div_color,div_size,div_alpha Optional arguments to the #' `trace_style_np()` helper function that are eventually passed to #' [ggplot2::geom_rug()] if the `np` argument is also specified. They control #' the color, size, and transparency specifications for showing divergences in #' the plot. The default values are displayed in the **Usage** section above. -#' @export -#' @md +#' trace_style_np <- function(div_color = "red", div_size = 0.25, div_alpha = 1) { stopifnot( is.character(div_color), @@ -466,10 +466,12 @@ mcmc_trace_data <- function(x, data$n_parameters <- num_params(data) data <- rlang::set_names(data, tolower) + first_cols <- syms(c("parameter", "value", "value_rank")) data <- data %>% group_by(.data$parameter) %>% mutate(value_rank = dplyr::row_number(.data$value)) %>% - ungroup() + ungroup() %>% + select(!!! first_cols, dplyr::everything()) data$highlight <- if (!is.null(highlight)) { data$chain == highlight @@ -478,8 +480,7 @@ mcmc_trace_data <- function(x, } data$warmup <- data$iteration <= n_warmup - data$iteration <- data$iteration + iter1 - + data$iteration <- data$iteration + as.integer(iter1) tibble::as_tibble(data) } From 691fd80a47f3fad4ba45d4d93af59172f88497b1 Mon Sep 17 00:00:00 2001 From: jgabry Date: Tue, 21 May 2019 17:15:38 -0400 Subject: [PATCH 29/29] regenerate some Rd files --- man-roxygen/return-ggplot-or-data.R | 7 ++++--- man/MCMC-diagnostics.Rd | 7 ++++--- man/MCMC-intervals.Rd | 7 ++++--- man/MCMC-parcoord.Rd | 7 ++++--- man/MCMC-traces.Rd | 22 +++++++++++++--------- man/PPC-distributions.Rd | 7 ++++--- man/PPC-intervals.Rd | 7 ++++--- 7 files changed, 37 insertions(+), 27 deletions(-) diff --git a/man-roxygen/return-ggplot-or-data.R b/man-roxygen/return-ggplot-or-data.R index 486ddd54..17ab8593 100644 --- a/man-roxygen/return-ggplot-or-data.R +++ b/man-roxygen/return-ggplot-or-data.R @@ -1,3 +1,4 @@ -#' @return A ggplot object that can be further customized using the **ggplot2** -#' package. The functions with suffix `_data` return the data that would have -#' been drawn by the plotting function. +#' @return The plotting functions return a ggplot object that can be further +#' customized using the **ggplot2** package. The functions with suffix +#' `_data()` return the data that would have been drawn by the plotting +#' function. diff --git a/man/MCMC-diagnostics.Rd b/man/MCMC-diagnostics.Rd index 4beb768a..ae8a6db9 100644 --- a/man/MCMC-diagnostics.Rd +++ b/man/MCMC-diagnostics.Rd @@ -69,9 +69,10 @@ to control faceting.} \item{lags}{The number of lags to show in the autocorrelation plot.} } \value{ -A ggplot object that can be further customized using the \strong{ggplot2} -package. The functions with suffix \code{_data} return the data that would have -been drawn by the plotting function. +The plotting functions return a ggplot object that can be further +customized using the \strong{ggplot2} package. The functions with suffix +\code{_data()} return the data that would have been drawn by the plotting +function. } \description{ Plots of Rhat statistics, ratios of effective sample size to total sample diff --git a/man/MCMC-intervals.Rd b/man/MCMC-intervals.Rd index ab211a2c..e1870c4f 100644 --- a/man/MCMC-intervals.Rd +++ b/man/MCMC-intervals.Rd @@ -103,9 +103,10 @@ points across the curves are the same height. The method \code{"scaled height"} parameters. \code{n_dens} defaults to \code{1024}.} } \value{ -A ggplot object that can be further customized using the \strong{ggplot2} -package. The functions with suffix \code{_data} return the data that would have -been drawn by the plotting function. +The plotting functions return a ggplot object that can be further +customized using the \strong{ggplot2} package. The functions with suffix +\code{_data()} return the data that would have been drawn by the plotting +function. } \description{ Plot central (quantile-based) posterior interval estimates from MCMC draws. diff --git a/man/MCMC-parcoord.Rd b/man/MCMC-parcoord.Rd index e29546fb..043836a6 100644 --- a/man/MCMC-parcoord.Rd +++ b/man/MCMC-parcoord.Rd @@ -75,9 +75,10 @@ the color, size, and transparency specifications for showing divergences in the plot. The default values are displayed in the \strong{Usage} section above.} } \value{ -A ggplot object that can be further customized using the \strong{ggplot2} -package. The functions with suffix \code{_data} return the data that would have -been drawn by the plotting function. +The plotting functions return a ggplot object that can be further +customized using the \strong{ggplot2} package. The functions with suffix +\code{_data()} return the data that would have been drawn by the plotting +function. } \description{ Parallel coordinates plot of MCMC draws (one dimension per parameter). diff --git a/man/MCMC-traces.Rd b/man/MCMC-traces.Rd index 18ee891b..7437c734 100644 --- a/man/MCMC-traces.Rd +++ b/man/MCMC-traces.Rd @@ -8,7 +8,7 @@ \alias{mcmc_rank_overlay} \alias{mcmc_rank_hist} \alias{mcmc_trace_data} -\title{Trace plots (time series plot) of MCMC draws} +\title{Trace plots of MCMC draws} \usage{ mcmc_trace(x, pars = character(), regex_pars = character(), transformations = list(), facet_args = list(), ..., n_warmup = 0, @@ -89,12 +89,12 @@ if \code{n_warmup} is also set to a positive value.} range of iterations to display.} \item{size}{An optional value to override the default line size -(\code{mcmc_trace()}) or the default point size -(\code{mcmc_trace_highlight()}).} +for \code{mcmc_trace()} or the default point size for \code{mcmc_trace_highlight()}.} -\item{np}{For models fit using \link{NUTS} (more generally, any \href{https://en.wikipedia.org/wiki/Symplectic_integrator}{symplectic integrator}), an -optional data frame providing NUTS diagnostic information. The data frame -should be the object returned by \code{\link[=nuts_params]{nuts_params()}} or one with the same +\item{np}{For models fit using \link{NUTS} (more generally, any +\href{https://en.wikipedia.org/wiki/Symplectic_integrator}{symplectic integrator}), +an optional data frame providing NUTS diagnostic information. The data +frame should be the object returned by \code{\link[=nuts_params]{nuts_params()}} or one with the same structure. If \code{np} is specified then tick marks are added to the bottom of the trace plot indicating within which iterations there was a divergence (if there were any). See the end of the \strong{Examples} section, below.} @@ -125,9 +125,13 @@ of rank-normalized MCMC samples. Defaults to \code{20}.} average number of ranks per bin. Defaults to \code{FALSE}.} } \value{ -A ggplot object that can be further customized using the \strong{ggplot2} -package. The functions with suffix \code{_data} return the data that would have -been drawn by the plotting function. +The plotting functions return a ggplot object that can be further +customized using the \strong{ggplot2} package. The functions with suffix +\code{_data()} return the data that would have been drawn by the plotting +function. + +\code{mcmc_trace_data()} returns the data for the trace \emph{and} rank plots +in the same data frame. } \description{ Trace plot (or traceplot) of MCMC draws. See the \strong{Plot Descriptions} diff --git a/man/PPC-distributions.Rd b/man/PPC-distributions.Rd index b0d765a0..902d557b 100644 --- a/man/PPC-distributions.Rd +++ b/man/PPC-distributions.Rd @@ -99,9 +99,10 @@ to control the appearance of \code{y} points. The default of \code{y_jitter=NULL will let \strong{ggplot2} determine the amount of jitter.} } \value{ -A ggplot object that can be further customized using the \strong{ggplot2} -package. The functions with suffix \code{_data} return the data that would have -been drawn by the plotting function. +The plotting functions return a ggplot object that can be further +customized using the \strong{ggplot2} package. The functions with suffix +\code{_data()} return the data that would have been drawn by the plotting +function. } \description{ Compare the empirical distribution of the data \code{y} to the distributions diff --git a/man/PPC-intervals.Rd b/man/PPC-intervals.Rd index e7c44eef..5ab62ffb 100644 --- a/man/PPC-intervals.Rd +++ b/man/PPC-intervals.Rd @@ -61,9 +61,10 @@ and \code{size} are passed to \code{\link[ggplot2:geom_ribbon]{ggplot2::geom_rib \code{size} and \code{fatten} are passed to \code{\link[ggplot2:geom_pointrange]{ggplot2::geom_pointrange()}}.} } \value{ -A ggplot object that can be further customized using the \strong{ggplot2} -package. The functions with suffix \code{_data} return the data that would have -been drawn by the plotting function. +The plotting functions return a ggplot object that can be further +customized using the \strong{ggplot2} package. The functions with suffix +\code{_data()} return the data that would have been drawn by the plotting +function. } \description{ Medians and central interval estimates of \code{yrep} with \code{y} overlaid.