From ca5aa86457934dff05f79387a38e2a2aee7ca3ee Mon Sep 17 00:00:00 2001 From: Matthew Kay Date: Tue, 30 Jan 2024 19:52:21 -0600 Subject: [PATCH 1/9] poach rollup_summary from rollup branch --- R/rollup_summary.R | 113 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 R/rollup_summary.R diff --git a/R/rollup_summary.R b/R/rollup_summary.R new file mode 100644 index 00000000..baec4b15 --- /dev/null +++ b/R/rollup_summary.R @@ -0,0 +1,113 @@ +#' "Roll up" `draws_summary` objects by collapsing over nonscalar parameters. +#' +#' By default, all variables with names matched by `\\[.*\\]$` are rolled up, +#' but there is an option to pass a list of parameter names, which will roll up +#' any variables matched by `^parameter_name\\[.*\\]$` +#' +#' @name draws_summary_rollup +#' @param x a `draws_summary` object or a `draws` object to be summarised +#' @param rollup_vars a list of variable names (excluding brackets and indices) to roll up +#' @param min_only a character vector of varable names for which only minimum values are +#' desired in the rollup +#' @param max_only a character vector of varable names for which only maximum values are +#' desired in the rollup + +#' @return +#' The `rollup_summary()` methods return a list of [tibble][tibble::tibble] data frames. +#' The first element is a standard `draws_summary` for the variables that are not rolled up +#' The second element is a rollup of the variables to be rolled up and contains max and min +#' values of the summary functions attained by any element of the variable +#' +#' @details +#' By default, only the maximum value of `rhat` and the minimum values of [ess_bulk()] and +#' [ess_tail()] are returned. # INSERT HOW WE HANDLE NA SUMMARIES +#' +#' @examples +#' ds <- summarise_draws(example_draws()) +#' ds2 <- summarise_draws(2 * example_draws()) +#' ds2$variable <- c("pi", "upsilon", +#' "omega[1,1]", "omega[2,1]", "omega[3,1]", "omega[4,1]", +#' "omega[1,2]", "omega[2,2]", "omega[3,2]", "omega[4,2]") +#' draws_summary <- rbind(ds, ds2) +#' rollup_summary(draws_summary) +#' rollup_summary(draws_summary, rollup_vars = "theta") +#' rollup_summary(example_draws()) +NULL + +#' @rdname draws_summary_rollup +#' @export +rollup_summary <- function(x, rollup_vars = NULL, + min_only = c("ess_bulk", "ess_tail"), + max_only = "rhat") { + UseMethod("rollup_summary") +} + +#' @rdname draws_summary_rollup +#' @export +rollup_summary.default <- function(x, rollup_vars = NULL, + min_only = c("ess_bulk", "ess_tail"), + max_only = "rhat") { + rollup_summary(summarise_draws(x), rollup_vars = rollup_vars, + min_only = min_only, + max_only = max_only) +} + +#' @rdname draws_summary_rollup +#' @export +rollup_summary.draws_summary <- function (x, rollup_vars = NULL, + min_only = c("ess_bulk", "ess_tail"), + max_only = "rhat") { + # get variable names + vars <- draws_summary$variable + # Determine which variable names need to be rolled up + if (is.null(rollup_vars)) { + vars_nonscalar <- grepl("\\[", vars) + } else { + vars_nonscalar <- as.logical(colSums(do.call(rbind, + lapply(paste0("^", rollup_vars, "\\["), + function(x){grepl(x, vars)})))) + } + # Separate out draws_summary into the scalar variables to leave alone and the nonscalar + # variables for rollup + ds_scalar <- draws_summary[!vars_nonscalar, ] + ds_nonscalar <- draws_summary[vars_nonscalar, ] + # Roll up the nonscalar variables + varnames_nonscalar <- gsub("\\[(.*)", "", ds_nonscalar$variable) + summary_names <- names(draws_summary)[-1] + names_minmax <- summary_names[!(summary_names %in% c(min_only, max_only))] + split_nonscalar <- split(ds_nonscalar, varnames_nonscalar)[unique(varnames_nonscalar)] + # [unique(varnames_nonscalar)] preserves the order of the names + min_max <- do.call(rbind, lapply(split_nonscalar, rollup_helper_minmax, + names = names_minmax)) + min_only <- do.call(rbind, lapply(split_nonscalar, rollup_helper_min, names = min_only)) + max_only <- do.call(rbind, lapply(split_nonscalar, rollup_helper_max, names = max_only)) + variable_column <- data.frame("variable" = unique(varnames_nonscalar)) + variable_indices <- parse_variable_indices(ds_nonscalar$variable) + dimension_column <- data.frame("dimension" = paste0("(", + sapply(variable_indices, function(x){paste(x$dimensions, collapse = ",")}), + ")")) + nonscalar_out <- tibble::as_tibble(cbind(variable_column, dimension_column, min_max, max_only, min_only)) + out <- list(unrolled_vars = ds_scalar, rolled_vars = nonscalar_out) + out +} + +rollup_helper_minmax <- function(x, names){ + x <- x[, names] + mm <- c(apply(x, 2, function(x) {c(min(x), max(x))})) + names(mm) <- paste0(rep(names(x), each = 2), c("_min", "_max")) + mm +} + +rollup_helper_min <- function(x, names){ + x <- x[, names] + min_only <- apply(x, 2, min) + names(min_only) <- paste0(names(x), "_min") + min_only +} + +rollup_helper_max <- function(x, names){ + x <- x[, names] + max_only <- apply(x, 2, max) + names(max_only) <- paste0(names(x), "_max") + max_only +} From 2b2e4d2f56f580348cb68283564e8489b473af02 Mon Sep 17 00:00:00 2001 From: Matthew Kay Date: Wed, 31 Jan 2024 01:56:12 -0600 Subject: [PATCH 2/9] rollup_summary() that allows specifying functions on a per-summary measure basis --- NAMESPACE | 6 + R/as_draws_rvars.R | 3 +- R/rollup_summary.R | 292 ++++++++++++++++++++++++++++-------------- R/summarise_draws.R | 72 ++++++----- R/variable-indices.R | 17 +++ man/rollup_summary.Rd | 122 ++++++++++++++++++ 6 files changed, 382 insertions(+), 130 deletions(-) create mode 100644 man/rollup_summary.Rd diff --git a/NAMESPACE b/NAMESPACE index 62cf616b..2ebe5bbb 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -248,6 +248,7 @@ S3method(print,draws_list) S3method(print,draws_matrix) S3method(print,draws_rvars) S3method(print,draws_summary) +S3method(print,rollup_summary) S3method(print,rvar) S3method(prod,rvar) S3method(quantile,rvar) @@ -287,6 +288,9 @@ S3method(rhat_basic,default) S3method(rhat_basic,rvar) S3method(rhat_nested,default) S3method(rhat_nested,rvar) +S3method(rollup_summary,data.frame) +S3method(rollup_summary,default) +S3method(rollup_summary,rollup_summary) S3method(sd,default) S3method(sd,rvar) S3method(split_chains,draws) @@ -420,6 +424,7 @@ export(cdf) export(chain_ids) export(default_convergence_measures) export(default_mcse_measures) +export(default_rollups) export(default_summary_measures) export(diag) export(dissent) @@ -485,6 +490,7 @@ export(rfun) export(rhat) export(rhat_basic) export(rhat_nested) +export(rollup_summary) export(rstar) export(rvar) export(rvar_all) diff --git a/R/as_draws_rvars.R b/R/as_draws_rvars.R index fd7a558a..506a5581 100755 --- a/R/as_draws_rvars.R +++ b/R/as_draws_rvars.R @@ -84,8 +84,7 @@ as_draws_rvars.draws_matrix <- function(x, ...) { # first, pull out the list of indices into a data frame # where each column is an index variable - indices <- as.data.frame(do.call(rbind, split_indices(var$indices)), - stringsAsFactors = FALSE) + indices <- split_indices_to_df(var$indices) unique_indices <- vector("list", length(indices)) .dimnames <- vector("list", length(indices)) names(unique_indices) <- names(indices) diff --git a/R/rollup_summary.R b/R/rollup_summary.R index baec4b15..d8f6de60 100644 --- a/R/rollup_summary.R +++ b/R/rollup_summary.R @@ -1,113 +1,217 @@ -#' "Roll up" `draws_summary` objects by collapsing over nonscalar parameters. -#' -#' By default, all variables with names matched by `\\[.*\\]$` are rolled up, -#' but there is an option to pass a list of parameter names, which will roll up -#' any variables matched by `^parameter_name\\[.*\\]$` -#' -#' @name draws_summary_rollup -#' @param x a `draws_summary` object or a `draws` object to be summarised -#' @param rollup_vars a list of variable names (excluding brackets and indices) to roll up -#' @param min_only a character vector of varable names for which only minimum values are -#' desired in the rollup -#' @param max_only a character vector of varable names for which only maximum values are -#' desired in the rollup - -#' @return -#' The `rollup_summary()` methods return a list of [tibble][tibble::tibble] data frames. -#' The first element is a standard `draws_summary` for the variables that are not rolled up -#' The second element is a rollup of the variables to be rolled up and contains max and min -#' values of the summary functions attained by any element of the variable -#' +#' "Roll up" `draws_summary` objects by collapsing over non-scalar parameters. +#' +#' "Rolls up" summaries of draws (e.g. as returned by [summarise_draws()]). +#' By default, all variables containing indices (e.g. `"x[1]"`) are rolled up, +#' but the `.variable` parameter can be used to roll up specific variables only. +#' +#' @param .x a `draws_summary` object, a [`draws`] object, a `data.frame`, or an +#' object with a [summarise_draws()] method. +#' @param .variable (character vector) base names (without indices) of variables +#' to roll up. If `NULL` (the default), all variables with indices in their names +#' will be rolled up. +#' @param ... a named arguments where each name is a summary measure (i.e. column) +#' in `.x` and the value is a character vector of function names or a named list +#' of functions giving the rollup functions to apply to the corresponding summary +#' measure. +#' @param .default (list) named list where names are summary measures in `.x` +#' and values are the default rollup functions to apply to those summary +#' measures unless overridden by `...`. +#' @param .unspecified (character vector or list) default rollup functions to +#' apply to any summary measure (column) in `.x` that does not have its own +#' specific rollup functions given in `...` or `.default`. #' @details -#' By default, only the maximum value of `rhat` and the minimum values of [ess_bulk()] and -#' [ess_tail()] are returned. # INSERT HOW WE HANDLE NA SUMMARIES -#' +#' If called without specifying additional rollup functions in `...`, +#' `rollup_summary()` will apply the functions provided in `.default` and +#' `.unspecified` to the columns in `.x` (or, if `.x` is not a `data.frame`, +#' to the result of `summarise_draws(.x)`). +#' +#' In addition to the defaults for all columns in `.unspecified`, several +#' summary measures have specific default rollup functions associated with them +#' that will be applied unless this is overridden by entries in `...`. For +#' example, `ess_bulk` has the default rollup function `"min"` instead of +#' `c("min", "max")`. `default_rollups()` gives the complete list of default +#' rollup functions. +#' +#' Calls to `rollup_summary()` can be chained, in which case subsequent +#' rollups will be applied only to variables that have not already been +#' rolled up. This makes it possible to provide different rollup functions +#' for different variables by combining chaining with the use of the +#' `.variable` parameter. +#' @returns +#' A named list of [`draws_summary`] objects; i.e. subclasses of [`tibble`], +#' with the following elements: +#' - `"unrolled"`: a [`draws_summary`] of the variables that were not rolled up. +#' - `"rolled"`: a [`draws_summary`] of the rolled-up variables. The second +#' column of this data frame, `"dims"`, gives the lengths of the dimensions +#' of each rolled up variable as a comma-separated string. The remaining +#' columns give each roll up of each summary measure; e.g. if `x` contained +#' a summary measure `"mean"` and it was rolled up using the `"min"` and +#' `"max"` functions (the default), the output will have a `"mean_min"` and +#' `"mean_max"` column. #' @examples -#' ds <- summarise_draws(example_draws()) -#' ds2 <- summarise_draws(2 * example_draws()) -#' ds2$variable <- c("pi", "upsilon", -#' "omega[1,1]", "omega[2,1]", "omega[3,1]", "omega[4,1]", -#' "omega[1,2]", "omega[2,2]", "omega[3,2]", "omega[4,2]") -#' draws_summary <- rbind(ds, ds2) -#' rollup_summary(draws_summary) -#' rollup_summary(draws_summary, rollup_vars = "theta") -#' rollup_summary(example_draws()) -NULL - -#' @rdname draws_summary_rollup +#' x <- example_draws() +#' +#' # default summaries show a row for every element in array-like variables +#' summarise_draws(x) +#' +#' # you can roll up summaries of array-like variables by rolling up draws +#' # objects directly +#' rollup_summary(x) +#' +#' # or summarise draws objects first to pick the desired summary measures +#' ds <- summarise_draws(x, "mean", "sd") +#' rollup_summary(ds) +#' +#' # rollups work on variables of any dimension +#' x <- example_draws(example = "multi_normal") +#' rollup_summary(x) +#' +#' # you can roll up only some variables +#' rollup_summary(x, .variable = "Sigma") +#' +#' # you can also specify the rollup functions to apply to each function +#' rollup_summary(x, "Sigma", mean = "mean", median = "min") +#' +#' # to apply a particular function or functions to all summaries, pass them +#' # to .unspecified and set .default to NULL: +#' rollup_summary(x, .unspecified = "median", .default = NULL) +#' +#' @examplesIf getRversion() > "4.1" +#' # rollups can be chained to provide different rollup functions to +#' # different variables +#' x |> +#' summarise_draws("mean", "sd") |> +#' rollup_summary("mu", sd = "min") |> +#' rollup_summary("Sigma", sd = "max") #' @export -rollup_summary <- function(x, rollup_vars = NULL, - min_only = c("ess_bulk", "ess_tail"), - max_only = "rhat") { +rollup_summary <- function(.x, ...) { UseMethod("rollup_summary") } -#' @rdname draws_summary_rollup +#' @rdname rollup_summary #' @export -rollup_summary.default <- function(x, rollup_vars = NULL, - min_only = c("ess_bulk", "ess_tail"), - max_only = "rhat") { - rollup_summary(summarise_draws(x), rollup_vars = rollup_vars, - min_only = min_only, - max_only = max_only) +rollup_summary.default <- function(.x, ...) { + rollup_summary(summarise_draws(.x), ...) } -#' @rdname draws_summary_rollup +#' @rdname rollup_summary #' @export -rollup_summary.draws_summary <- function (x, rollup_vars = NULL, - min_only = c("ess_bulk", "ess_tail"), - max_only = "rhat") { - # get variable names - vars <- draws_summary$variable - # Determine which variable names need to be rolled up - if (is.null(rollup_vars)) { - vars_nonscalar <- grepl("\\[", vars) +rollup_summary.data.frame <- function ( + .x, + .variable = NULL, + ..., + .default = default_rollups(), + .unspecified = c("min", "max") +) { + funs <- list(...) + + # apply the measure-specific default rollup functions to any columns not + # overridden by the user + missing_default_funs <- setdiff(names(.default), names(funs)) + funs[missing_default_funs] <- .default[missing_default_funs] + + # apply the generic default rollup functions to any remaining unspecified columns + funs[setdiff(names(.x), names(funs))] <- list(.unspecified) + + # turn the function specifications into named lists of functions + funs <- lapply(funs, function(fun_list) inject(create_function_list(!!!fun_list))) + + # determine the variables to roll up + vars <- split_variable_names(.x$variable) + if (is.null(.variable)) { + rollup_rows <- nzchar(vars$indices) } else { - vars_nonscalar <- as.logical(colSums(do.call(rbind, - lapply(paste0("^", rollup_vars, "\\["), - function(x){grepl(x, vars)})))) + rollup_rows <- vars$base_name %in% .variable } - # Separate out draws_summary into the scalar variables to leave alone and the nonscalar - # variables for rollup - ds_scalar <- draws_summary[!vars_nonscalar, ] - ds_nonscalar <- draws_summary[vars_nonscalar, ] - # Roll up the nonscalar variables - varnames_nonscalar <- gsub("\\[(.*)", "", ds_nonscalar$variable) - summary_names <- names(draws_summary)[-1] - names_minmax <- summary_names[!(summary_names %in% c(min_only, max_only))] - split_nonscalar <- split(ds_nonscalar, varnames_nonscalar)[unique(varnames_nonscalar)] - # [unique(varnames_nonscalar)] preserves the order of the names - min_max <- do.call(rbind, lapply(split_nonscalar, rollup_helper_minmax, - names = names_minmax)) - min_only <- do.call(rbind, lapply(split_nonscalar, rollup_helper_min, names = min_only)) - max_only <- do.call(rbind, lapply(split_nonscalar, rollup_helper_max, names = max_only)) - variable_column <- data.frame("variable" = unique(varnames_nonscalar)) - variable_indices <- parse_variable_indices(ds_nonscalar$variable) - dimension_column <- data.frame("dimension" = paste0("(", - sapply(variable_indices, function(x){paste(x$dimensions, collapse = ",")}), - ")")) - nonscalar_out <- tibble::as_tibble(cbind(variable_column, dimension_column, min_max, max_only, min_only)) - out <- list(unrolled_vars = ds_scalar, rolled_vars = nonscalar_out) - out + variable_col <- which(names(.x) == "variable") + vars <- vars[rollup_rows, ] + + # split the input df by variable base name and roll up the summaries + var_groups <- vctrs::vec_split(cbind(vars, .x[rollup_rows, -variable_col]), vars$base_name) + rolled_up_vars <- lapply(var_groups$val, function(x) { + indices <- split_indices_to_df(x$indices) + rolled_up_cols <- do.call(cbind, lapply(seq_along(x)[c(-1,-2)], function(col_i) { + col <- x[[col_i]] + col_name <- names(x)[[col_i]] + rolled_up_col <- lapply(funs[[col_name]], function(f) f(col)) + names(rolled_up_col) <- paste0(col_name, "_", names(rolled_up_col)) + vctrs::new_data_frame(rolled_up_col, n = 1L) + })) + cbind( + variable = x$base_name[[1]], + dims = paste0(lengths(lapply(indices, unique)), collapse = ","), + rolled_up_cols + ) + }) + + new_rollup_summary( + unrolled = .x[!rollup_rows, ], + rolled = do.call(rbind, rolled_up_vars) + ) } -rollup_helper_minmax <- function(x, names){ - x <- x[, names] - mm <- c(apply(x, 2, function(x) {c(min(x), max(x))})) - names(mm) <- paste0(rep(names(x), each = 2), c("_min", "_max")) - mm +#' @rdname rollup_summary +#' @export +rollup_summary.rollup_summary <- function (.x, ...) { + out <- rollup_summary(.x$unrolled, ...) + new_rollup_summary( + unrolled = out$unrolled, + rolled = vctrs::vec_rbind(.x$rolled, out$rolled) + ) } -rollup_helper_min <- function(x, names){ - x <- x[, names] - min_only <- apply(x, 2, min) - names(min_only) <- paste0(names(x), "_min") - min_only +new_rollup_summary <- function(unrolled, rolled) { + assert_data_frame(unrolled) + if (!inherits(unrolled, "draws_summary")) class(unrolled) <- class_draws_summary() + assert_data_frame(rolled) + if (!inherits(rolled, "draws_summary")) class(rolled) <- class_draws_summary() + + structure( + list(unrolled = unrolled, rolled = rolled), + class = class_rollup_summary() + ) } -rollup_helper_max <- function(x, names){ - x <- x[, names] - max_only <- apply(x, 2, max) - names(max_only) <- paste0(names(x), "_max") - max_only +class_rollup_summary <- function() { + c("rollup_summary", "list") +} + +#' @export +print.rollup_summary <- function(x, ..., color = TRUE) { + color <- as_one_logical(color) + if (color) { + subtle <- pillar::style_subtle + } else { + subtle <- identity + } + + cat(":\n\n") + if (NROW(x$unrolled) > 0) { + cat("$unrolled", subtle("(variables that have not been rolled up):"), "\n") + print(x$unrolled, ...) + cat("\n") + } + if (NROW(x$rolled) > 0) { + cat("$rolled", subtle("(variables that have been rolled up):"), "\n") + print(x$rolled, ...) + cat("\n") + } + invisible(x) +} + +#' @rdname rollup_summary +#' @export +default_rollups <- function() { + list( + ess_basic = "min", + ess_bulk = "min", + ess_mean = "min", + ess_median = "min", + ess_quantile = "min", + ess_sd = "min", + ess_tail = "min", + rhat = "max", + rhat_basic = "max", + rhat_nested = "max" + ) } diff --git a/R/summarise_draws.R b/R/summarise_draws.R index 6f13755a..55b4581e 100644 --- a/R/summarise_draws.R +++ b/R/summarise_draws.R @@ -117,41 +117,9 @@ summarise_draws.draws <- function( if (.cores <= 0) { stop_no_call("'.cores' must be a positive integer.") } - funs <- as.list(c(...)) .args <- as.list(.args) - if (length(funs)) { - if (is.null(names(funs))) { - # ensure names are initialized properly - names(funs) <- rep("", length(funs)) - } - calls <- substitute(list(...))[-1] - calls <- ulapply(calls, deparse_pretty) - for (i in seq_along(funs)) { - fname <- NULL - if (is.character(funs[[i]])) { - fname <- as_one_character(funs[[i]]) - } - # label unnamed arguments via their calls - if (!nzchar(names(funs)[i])) { - if (!is.null(fname)) { - names(funs)[i] <- fname - } else { - names(funs)[i] <- calls[i] - } - } - # get functions passed as strings from the right environments - if (!is.null(fname)) { - if (exists(fname, envir = caller_env())) { - env <- caller_env() - } else if (fname %in% getNamespaceExports("posterior")) { - env <- asNamespace("posterior") - } else { - stop_no_call("Cannot find function '", fname, "'.") - } - } - funs[[i]] <- rlang::as_function(funs[[i]], env = env) - } - } else { + funs <- create_function_list(...) + if (length(funs) == 0) { # default functions funs <- list( mean = base::mean, @@ -327,6 +295,42 @@ empty_draws_summary <- function(dimensions = "variable") { } +create_function_list <- function(..., .env = caller_env(2)) { + funs <- as.list(c(...)) + if (is.null(names(funs))) { + # ensure names are initialized properly + names(funs) <- rep("", length(funs)) + } + calls <- substitute(list(...))[-1] + calls <- ulapply(calls, deparse_pretty) + for (i in seq_along(funs)) { + fname <- NULL + if (is.character(funs[[i]])) { + fname <- as_one_character(funs[[i]]) + } + # label unnamed arguments via their calls + if (!nzchar(names(funs)[i])) { + if (!is.null(fname)) { + names(funs)[i] <- fname + } else { + names(funs)[i] <- calls[i] + } + } + # get functions passed as strings from the right environments + if (!is.null(fname)) { + if (exists(fname, envir = .env)) { + env <- .env + } else if (fname %in% getNamespaceExports("posterior")) { + env <- asNamespace("posterior") + } else { + stop_no_call("Cannot find function '", fname, "'.") + } + } + funs[[i]] <- rlang::as_function(funs[[i]], env = env) + } + funs +} + create_summary_list <- function(x, v, funs, .args) { draws <- drop_dims_or_classes(x[, , v], dims = 3, reset_class = FALSE) args <- c(list(draws), .args) diff --git a/R/variable-indices.R b/R/variable-indices.R index 7965044f..20b25853 100644 --- a/R/variable-indices.R +++ b/R/variable-indices.R @@ -114,6 +114,23 @@ split_indices <- function(x) { strsplit(substr(x, 2, nchar(x) - 1), ",", fixed = TRUE) } +#' Given a vector of index strings of all the same length (such as returned by +#' `split_variable_names(x)$indices` for all variables with the same base name), +#' split each index string into a character vector of indices and form them +#' into a data frame. +#' @param x a character vector of index strings that all have the same number +#' of dimensions, +#' e.g. `c("[1,1]", "[1,2]", "[1,3]")` +#' @returns a data frame with `length(x)` rows and number of columns equal to +#' the number of dimensions in the variables. Throws an error if the number +#' of dimensions are not all equal. +#' @noRd +split_indices_to_df <- function(x) { + indices = split_indices(x) + stopifnot(all(lengths(indices) == lengths(indices[1]))) + as.data.frame(do.call(rbind, indices), stringsAsFactors = FALSE) +} + # manipulating flattened variable names ----------------------------------- diff --git a/man/rollup_summary.Rd b/man/rollup_summary.Rd new file mode 100644 index 00000000..cffc296e --- /dev/null +++ b/man/rollup_summary.Rd @@ -0,0 +1,122 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rollup_summary.R +\name{rollup_summary} +\alias{rollup_summary} +\alias{rollup_summary.default} +\alias{rollup_summary.data.frame} +\alias{rollup_summary.rollup_summary} +\alias{default_rollups} +\title{"Roll up" \code{draws_summary} objects by collapsing over non-scalar parameters.} +\usage{ +rollup_summary(.x, ...) + +\method{rollup_summary}{default}(.x, ...) + +\method{rollup_summary}{data.frame}( + .x, + .variable = NULL, + ..., + .default = default_rollups(), + .unspecified = c("min", "max") +) + +\method{rollup_summary}{rollup_summary}(.x, ...) + +default_rollups() +} +\arguments{ +\item{.x}{a \code{draws_summary} object, a \code{\link{draws}} object, a \code{data.frame}, or an +object with a \code{\link[=summarise_draws]{summarise_draws()}} method.} + +\item{...}{a named arguments where each name is a summary measure (i.e. column) +in \code{.x} and the value is a character vector of function names or a named list +of functions giving the rollup functions to apply to the corresponding summary +measure.} + +\item{.variable}{(character vector) base names (without indices) of variables +to roll up. If \code{NULL} (the default), all variables with indices in their names +will be rolled up.} + +\item{.default}{(list) named list where names are summary measures in \code{.x} +and values are the default rollup functions to apply to those summary +measures unless overridden by \code{...}.} + +\item{.unspecified}{(character vector or list) default rollup functions to +apply to any summary measure (column) in \code{.x} that does not have its own +specific rollup functions given in \code{...} or \code{.default}.} +} +\value{ +A named list of \code{\link{draws_summary}} objects; i.e. subclasses of \code{\link{tibble}}, +with the following elements: +\itemize{ +\item \code{"unrolled"}: a \code{\link{draws_summary}} of the variables that were not rolled up. +\item \code{"rolled"}: a \code{\link{draws_summary}} of the rolled-up variables. The second +column of this data frame, \code{"dims"}, gives the lengths of the dimensions +of each rolled up variable as a comma-separated string. The remaining +columns give each roll up of each summary measure; e.g. if \code{x} contained +a summary measure \code{"mean"} and it was rolled up using the \code{"min"} and +\code{"max"} functions (the default), the output will have a \code{"mean_min"} and +\code{"mean_max"} column. +} +} +\description{ +"Rolls up" summaries of draws (e.g. as returned by \code{\link[=summarise_draws]{summarise_draws()}}). +By default, all variables containing indices (e.g. \code{"x[1]"}) are rolled up, +but the \code{.variable} parameter can be used to roll up specific variables only. +} +\details{ +If called without specifying additional rollup functions in \code{...}, +\code{rollup_summary()} will apply the functions provided in \code{.default} and +\code{.unspecified} to the columns in \code{.x} (or, if \code{.x} is not a \code{data.frame}, +to the result of \code{summarise_draws(.x)}). + +In addition to the defaults for all columns in \code{.unspecified}, several +summary measures have specific default rollup functions associated with them +that will be applied unless this is overridden by entries in \code{...}. For +example, \code{ess_bulk} has the default rollup function \code{"min"} instead of +\code{c("min", "max")}. \code{default_rollups()} gives the complete list of default +rollup functions. + +Calls to \code{rollup_summary()} can be chained, in which case subsequent +rollups will be applied only to variables that have not already been +rolled up. This makes it possible to provide different rollup functions +for different variables by combining chaining with the use of the +\code{.variable} parameter. +} +\examples{ +x <- example_draws() + +# default summaries show a row for every element in array-like variables +summarise_draws(x) + +# you can roll up summaries of array-like variables by rolling up draws +# objects directly +rollup_summary(x) + +# or summarise draws objects first to pick the desired summary measures +ds <- summarise_draws(x, "mean", "sd") +rollup_summary(ds) + +# rollups work on variables of any dimension +x <- example_draws(example = "multi_normal") +rollup_summary(x) + +# you can roll up only some variables +rollup_summary(x, .variable = "Sigma") + +# you can also specify the rollup functions to apply to each function +rollup_summary(x, "Sigma", mean = "mean", median = "min") + +# to apply a particular function or functions to all summaries, pass them +# to .unspecified and set .default to NULL: +rollup_summary(x, .unspecified = "median", .default = NULL) + +\dontshow{if (getRversion() > "4.1") (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +# rollups can be chained to provide different rollup functions to +# different variables +x |> + summarise_draws("mean", "sd") |> + rollup_summary("mu", sd = "min") |> + rollup_summary("Sigma", sd = "max") +\dontshow{\}) # examplesIf} +} From 3439abfb4dc8e522eb1b128810a9dbacb4380596 Mon Sep 17 00:00:00 2001 From: Matthew Kay Date: Thu, 1 Feb 2024 16:48:44 -0600 Subject: [PATCH 3/9] simplify method for passing rollup funs for unspecified summaries --- R/rollup_summary.R | 147 ++++++++++++++++++++++++------------------ man/rollup_summary.Rd | 121 ++++++++++++++++++---------------- 2 files changed, 152 insertions(+), 116 deletions(-) diff --git a/R/rollup_summary.R b/R/rollup_summary.R index d8f6de60..63456d1b 100644 --- a/R/rollup_summary.R +++ b/R/rollup_summary.R @@ -1,53 +1,66 @@ -#' "Roll up" `draws_summary` objects by collapsing over non-scalar parameters. +#' Roll up `draws_summary` objects by collapsing summaries of non-scalar parameters. #' -#' "Rolls up" summaries of draws (e.g. as returned by [summarise_draws()]). -#' By default, all variables containing indices (e.g. `"x[1]"`) are rolled up, -#' but the `.variable` parameter can be used to roll up specific variables only. +#' Roll up summaries of draws (e.g. as returned by [summarise_draws()]); that +#' is, summarise the summaries. By default, summaries of all variables containing +#' indices (e.g. `"x[1]"`) are rolled up, but the `variable` parameter can be +#' used to roll up specific variables only. #' -#' @param .x a `draws_summary` object, a [`draws`] object, a `data.frame`, or an -#' object with a [summarise_draws()] method. -#' @param .variable (character vector) base names (without indices) of variables +#' @param .x (multiple options) The object containing summaries to roll up. One of: +#' - a [`draws_summary`] object such as produced by [summarise_draws()]. +#' - a `data.frame` with a `"variable"` column giving the names of variables, +#' where all other columns are numeric summaries of those variables. +#' - an object with a [summarise_draws()] method, such as a [`draws`] object, +#' in which case [summarise_draws()] will be called on `.x` and the result +#' will be rolled up. +#' - a [`rollup_summary`] object such as produced by `rollup_summary()`, in +#' which case variables that have not been rolled up yet may be rolled up. +#' @param variable (character vector) base names (without indices) of variables #' to roll up. If `NULL` (the default), all variables with indices in their names -#' will be rolled up. -#' @param ... a named arguments where each name is a summary measure (i.e. column) -#' in `.x` and the value is a character vector of function names or a named list -#' of functions giving the rollup functions to apply to the corresponding summary -#' measure. -#' @param .default (list) named list where names are summary measures in `.x` +#' (e.g. `"x[1,2]"`) will be rolled up. +#' @param ... (multiple options) arguments where the name of each argument is a +#' summary measure (i.e. column) in `.x` and the value is the rollup functions +#' to apply to that summary measure, specified as one of: +#' - bare name of a function +#' - a character vector of function names (optionally named). +#' - a named list of strings or functions. +#' +#' Unnamed arguments in `...` specify default rollup functions to apply to all +#' summary measures that do not have specific rollup functions given in `...` or +#' `.funs`. +#' @param .funs (list) named list where names are summary measures in `.x` #' and values are the default rollup functions to apply to those summary -#' measures unless overridden by `...`. -#' @param .unspecified (character vector or list) default rollup functions to -#' apply to any summary measure (column) in `.x` that does not have its own -#' specific rollup functions given in `...` or `.default`. +#' measures, unless overridden by `...`. As in `...`, unnamed elements of this +#' list give default rollup functions to apply to summary measures that do not +#' have specific rollup functions given in `...` or `.funs`. #' @details #' If called without specifying additional rollup functions in `...`, -#' `rollup_summary()` will apply the functions provided in `.default` and -#' `.unspecified` to the columns in `.x` (or, if `.x` is not a `data.frame`, -#' to the result of `summarise_draws(.x)`). +#' `rollup_summary()` will apply the default rollup functions as determined by +#' `.funs` to the columns in `.x` (or, if `.x` is not a `data.frame`, to the +#' result of `summarise_draws(.x)`). #' -#' In addition to the defaults for all columns in `.unspecified`, several -#' summary measures have specific default rollup functions associated with them -#' that will be applied unless this is overridden by entries in `...`. For -#' example, `ess_bulk` has the default rollup function `"min"` instead of -#' `c("min", "max")`. `default_rollups()` gives the complete list of default -#' rollup functions. +#' The default value of `.funs` provides several default rollup functions +#' that will be applied to specific summary measures, unless this is overridden +#' by entries in `...`. For example, `ess_bulk` has the default +#' rollup function `"min"` instead of `c("min", "max")`, as the minimum +#' effective sample size is likely of more interest than the maximum. +#' `default_rollups()` gives the complete list of default rollup functions. #' #' Calls to `rollup_summary()` can be chained, in which case subsequent #' rollups will be applied only to variables that have not already been -#' rolled up. This makes it possible to provide different rollup functions -#' for different variables by combining chaining with the use of the -#' `.variable` parameter. +#' rolled up (i.e. the `"unrolled"` element; see the description of +#' `rollup_summary` objects below). This makes it possible to provide different +#' rollup functions for different variables by calling `rollup_summary()` +#' multiple times with different values of the `variable` parameter. #' @returns -#' A named list of [`draws_summary`] objects; i.e. subclasses of [`tibble`], -#' with the following elements: +#' A `rollup_summary` object, which is a named list of [`draws_summary`] objects: #' - `"unrolled"`: a [`draws_summary`] of the variables that were not rolled up. #' - `"rolled"`: a [`draws_summary`] of the rolled-up variables. The second -#' column of this data frame, `"dims"`, gives the lengths of the dimensions -#' of each rolled up variable as a comma-separated string. The remaining -#' columns give each roll up of each summary measure; e.g. if `x` contained -#' a summary measure `"mean"` and it was rolled up using the `"min"` and -#' `"max"` functions (the default), the output will have a `"mean_min"` and -#' `"mean_max"` column. +#' column of this data frame, `"dim"`, gives the lengths of the dimensions +#' of each rolled up variable as a comma-separated character vector. The +#' remaining columns give the rollups of each summary measure; e.g. if `x` +#' contained a summary measure `"mean"` and it was rolled up using the `"min"` +#' and `"max"` functions (the default), the output will have a `"mean_min"` +#' and `"mean_max"` column. #' @examples #' x <- example_draws() #' @@ -55,11 +68,13 @@ #' summarise_draws(x) #' #' # you can roll up summaries of array-like variables by rolling up draws -#' # objects directly +#' # objects directly; this will apply the default options of summarise_draws() #' rollup_summary(x) #' #' # or summarise draws objects first to pick the desired summary measures -#' ds <- summarise_draws(x, "mean", "sd") +#' # (note that ess_bulk is only rolled up using min by default; see the +#' # .default parameter) +#' ds <- summarise_draws(x, "mean", "sd", "ess_bulk") #' rollup_summary(ds) #' #' # rollups work on variables of any dimension @@ -67,22 +82,21 @@ #' rollup_summary(x) #' #' # you can roll up only some variables -#' rollup_summary(x, .variable = "Sigma") +#' rollup_summary(x, variable = "Sigma") #' -#' # you can also specify the rollup functions to apply to each function -#' rollup_summary(x, "Sigma", mean = "mean", median = "min") +#' # you can specify the rollup functions to apply to all summaries ... +#' rollup_summary(x, "mean", "min") #' -#' # to apply a particular function or functions to all summaries, pass them -#' # to .unspecified and set .default to NULL: -#' rollup_summary(x, .unspecified = "median", .default = NULL) +#' # ... or specify the rollup functions to apply to specific summaries +#' rollup_summary(x, mean = "sd", median = "min") #' #' @examplesIf getRversion() > "4.1" #' # rollups can be chained to provide different rollup functions to #' # different variables #' x |> #' summarise_draws("mean", "sd") |> -#' rollup_summary("mu", sd = "min") |> -#' rollup_summary("Sigma", sd = "max") +#' rollup_summary(variable = "mu", sd = "min") |> +#' rollup_summary(variable = "Sigma", sd = "max") #' @export rollup_summary <- function(.x, ...) { UseMethod("rollup_summary") @@ -98,30 +112,40 @@ rollup_summary.default <- function(.x, ...) { #' @export rollup_summary.data.frame <- function ( .x, - .variable = NULL, ..., - .default = default_rollups(), - .unspecified = c("min", "max") + variable = NULL, + .funs = default_rollups() ) { - funs <- list(...) + rollup_funs <- lapply(list(...), function(fun_list) inject(create_function_list(!!!fun_list))) + default_rollup_funs <- lapply(.funs, function(fun_list) inject(create_function_list(!!!fun_list))) + + is_unnamed <- rlang::names2(rollup_funs) == "" + if (any(is_unnamed)) { + # user provided unnamed functions in dots, use these for summary measures + # that otherwise don't have a rollup function specified + unspecified_rollup_funs <- do.call(c, rollup_funs[is_unnamed]) + rollup_fund <- rollup_funs[!is_unnamed] + } else { + # use the default unspecified rollup funs + is_unnamed <- rlang::names2(default_rollup_funs) == "" + unspecified_rollup_funs <- do.call(c, default_rollup_funs[is_unnamed]) + default_rollup_funs <- default_rollup_funs[!is_unnamed] + } # apply the measure-specific default rollup functions to any columns not # overridden by the user - missing_default_funs <- setdiff(names(.default), names(funs)) - funs[missing_default_funs] <- .default[missing_default_funs] + missing_default_funs <- setdiff(names(default_rollup_funs), names(rollup_funs)) + rollup_funs[missing_default_funs] <- default_rollup_funs[missing_default_funs] # apply the generic default rollup functions to any remaining unspecified columns - funs[setdiff(names(.x), names(funs))] <- list(.unspecified) - - # turn the function specifications into named lists of functions - funs <- lapply(funs, function(fun_list) inject(create_function_list(!!!fun_list))) + rollup_funs[setdiff(names(.x), names(rollup_funs))] <- list(unspecified_rollup_funs) # determine the variables to roll up vars <- split_variable_names(.x$variable) - if (is.null(.variable)) { + if (is.null(variable)) { rollup_rows <- nzchar(vars$indices) } else { - rollup_rows <- vars$base_name %in% .variable + rollup_rows <- vars$base_name %in% variable } variable_col <- which(names(.x) == "variable") vars <- vars[rollup_rows, ] @@ -133,13 +157,13 @@ rollup_summary.data.frame <- function ( rolled_up_cols <- do.call(cbind, lapply(seq_along(x)[c(-1,-2)], function(col_i) { col <- x[[col_i]] col_name <- names(x)[[col_i]] - rolled_up_col <- lapply(funs[[col_name]], function(f) f(col)) + rolled_up_col <- lapply(rollup_funs[[col_name]], function(f) f(col)) names(rolled_up_col) <- paste0(col_name, "_", names(rolled_up_col)) vctrs::new_data_frame(rolled_up_col, n = 1L) })) cbind( variable = x$base_name[[1]], - dims = paste0(lengths(lapply(indices, unique)), collapse = ","), + dim = paste0(lengths(lapply(indices, unique)), collapse = ","), rolled_up_cols ) }) @@ -203,6 +227,7 @@ print.rollup_summary <- function(x, ..., color = TRUE) { #' @export default_rollups <- function() { list( + c("min", "max"), ess_basic = "min", ess_bulk = "min", ess_mean = "min", diff --git a/man/rollup_summary.Rd b/man/rollup_summary.Rd index cffc296e..9f29cbe1 100644 --- a/man/rollup_summary.Rd +++ b/man/rollup_summary.Rd @@ -6,82 +6,92 @@ \alias{rollup_summary.data.frame} \alias{rollup_summary.rollup_summary} \alias{default_rollups} -\title{"Roll up" \code{draws_summary} objects by collapsing over non-scalar parameters.} +\title{Roll up \code{draws_summary} objects by collapsing summaries of non-scalar parameters.} \usage{ rollup_summary(.x, ...) \method{rollup_summary}{default}(.x, ...) -\method{rollup_summary}{data.frame}( - .x, - .variable = NULL, - ..., - .default = default_rollups(), - .unspecified = c("min", "max") -) +\method{rollup_summary}{data.frame}(.x, ..., variable = NULL, .funs = default_rollups()) \method{rollup_summary}{rollup_summary}(.x, ...) default_rollups() } \arguments{ -\item{.x}{a \code{draws_summary} object, a \code{\link{draws}} object, a \code{data.frame}, or an -object with a \code{\link[=summarise_draws]{summarise_draws()}} method.} +\item{.x}{(multiple options) The object containing summaries to roll up. One of: +\itemize{ +\item a \code{\link{draws_summary}} object such as produced by \code{\link[=summarise_draws]{summarise_draws()}}. +\item a \code{data.frame} with a \code{"variable"} column giving the names of variables, +where all other columns are numeric summaries of those variables. +\item an object with a \code{\link[=summarise_draws]{summarise_draws()}} method, such as a \code{\link{draws}} object, +in which case \code{\link[=summarise_draws]{summarise_draws()}} will be called on \code{.x} and the result +will be rolled up. +\item a \code{\link{rollup_summary}} object such as produced by \code{rollup_summary()}, in +which case variables that have not been rolled up yet may be rolled up. +}} + +\item{...}{(multiple options) arguments where the name of each argument is a +summary measure (i.e. column) in \code{.x} and the value is the rollup functions +to apply to that summary measure, specified as one of: +\itemize{ +\item bare name of a function +\item a character vector of function names (optionally named). +\item a named list of strings or functions. +} -\item{...}{a named arguments where each name is a summary measure (i.e. column) -in \code{.x} and the value is a character vector of function names or a named list -of functions giving the rollup functions to apply to the corresponding summary -measure.} +Unnamed arguments in \code{...} specify default rollup functions to apply to all +summary measures that do not have specific rollup functions given in \code{...} or +\code{.funs}.} -\item{.variable}{(character vector) base names (without indices) of variables +\item{variable}{(character vector) base names (without indices) of variables to roll up. If \code{NULL} (the default), all variables with indices in their names -will be rolled up.} +(e.g. \code{"x[1,2]"}) will be rolled up.} -\item{.default}{(list) named list where names are summary measures in \code{.x} +\item{.funs}{(list) named list where names are summary measures in \code{.x} and values are the default rollup functions to apply to those summary -measures unless overridden by \code{...}.} - -\item{.unspecified}{(character vector or list) default rollup functions to -apply to any summary measure (column) in \code{.x} that does not have its own -specific rollup functions given in \code{...} or \code{.default}.} +measures, unless overridden by \code{...}. As in \code{...}, unnamed elements of this +list give default rollup functions to apply to summary measures that do not +have specific rollup functions given in \code{...} or \code{.funs}.} } \value{ -A named list of \code{\link{draws_summary}} objects; i.e. subclasses of \code{\link{tibble}}, -with the following elements: +A \code{rollup_summary} object, which is a named list of \code{\link{draws_summary}} objects: \itemize{ \item \code{"unrolled"}: a \code{\link{draws_summary}} of the variables that were not rolled up. \item \code{"rolled"}: a \code{\link{draws_summary}} of the rolled-up variables. The second -column of this data frame, \code{"dims"}, gives the lengths of the dimensions -of each rolled up variable as a comma-separated string. The remaining -columns give each roll up of each summary measure; e.g. if \code{x} contained -a summary measure \code{"mean"} and it was rolled up using the \code{"min"} and -\code{"max"} functions (the default), the output will have a \code{"mean_min"} and -\code{"mean_max"} column. +column of this data frame, \code{"dim"}, gives the lengths of the dimensions +of each rolled up variable as a comma-separated character vector. The +remaining columns give the rollups of each summary measure; e.g. if \code{x} +contained a summary measure \code{"mean"} and it was rolled up using the \code{"min"} +and \code{"max"} functions (the default), the output will have a \code{"mean_min"} +and \code{"mean_max"} column. } } \description{ -"Rolls up" summaries of draws (e.g. as returned by \code{\link[=summarise_draws]{summarise_draws()}}). -By default, all variables containing indices (e.g. \code{"x[1]"}) are rolled up, -but the \code{.variable} parameter can be used to roll up specific variables only. +Roll up summaries of draws (e.g. as returned by \code{\link[=summarise_draws]{summarise_draws()}}); that +is, summarise the summaries. By default, summaries of all variables containing +indices (e.g. \code{"x[1]"}) are rolled up, but the \code{variable} parameter can be +used to roll up specific variables only. } \details{ If called without specifying additional rollup functions in \code{...}, -\code{rollup_summary()} will apply the functions provided in \code{.default} and -\code{.unspecified} to the columns in \code{.x} (or, if \code{.x} is not a \code{data.frame}, -to the result of \code{summarise_draws(.x)}). +\code{rollup_summary()} will apply the default rollup functions as determined by +\code{.funs} to the columns in \code{.x} (or, if \code{.x} is not a \code{data.frame}, to the +result of \code{summarise_draws(.x)}). -In addition to the defaults for all columns in \code{.unspecified}, several -summary measures have specific default rollup functions associated with them -that will be applied unless this is overridden by entries in \code{...}. For -example, \code{ess_bulk} has the default rollup function \code{"min"} instead of -\code{c("min", "max")}. \code{default_rollups()} gives the complete list of default -rollup functions. +The default value of \code{.funs} provides several default rollup functions +that will be applied to specific summary measures, unless this is overridden +by entries in \code{...}. For example, \code{ess_bulk} has the default +rollup function \code{"min"} instead of \code{c("min", "max")}, as the minimum +effective sample size is likely of more interest than the maximum. +\code{default_rollups()} gives the complete list of default rollup functions. Calls to \code{rollup_summary()} can be chained, in which case subsequent rollups will be applied only to variables that have not already been -rolled up. This makes it possible to provide different rollup functions -for different variables by combining chaining with the use of the -\code{.variable} parameter. +rolled up (i.e. the \code{"unrolled"} element; see the description of +\code{rollup_summary} objects below). This makes it possible to provide different +rollup functions for different variables by calling \code{rollup_summary()} +multiple times with different values of the \code{variable} parameter. } \examples{ x <- example_draws() @@ -90,11 +100,13 @@ x <- example_draws() summarise_draws(x) # you can roll up summaries of array-like variables by rolling up draws -# objects directly +# objects directly; this will apply the default options of summarise_draws() rollup_summary(x) # or summarise draws objects first to pick the desired summary measures -ds <- summarise_draws(x, "mean", "sd") +# (note that ess_bulk is only rolled up using min by default; see the +# .default parameter) +ds <- summarise_draws(x, "mean", "sd", "ess_bulk") rollup_summary(ds) # rollups work on variables of any dimension @@ -102,21 +114,20 @@ x <- example_draws(example = "multi_normal") rollup_summary(x) # you can roll up only some variables -rollup_summary(x, .variable = "Sigma") +rollup_summary(x, variable = "Sigma") -# you can also specify the rollup functions to apply to each function -rollup_summary(x, "Sigma", mean = "mean", median = "min") +# you can specify the rollup functions to apply to all summaries ... +rollup_summary(x, "mean", "min") -# to apply a particular function or functions to all summaries, pass them -# to .unspecified and set .default to NULL: -rollup_summary(x, .unspecified = "median", .default = NULL) +# ... or specify the rollup functions to apply to specific summaries +rollup_summary(x, mean = "sd", median = "min") \dontshow{if (getRversion() > "4.1") (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} # rollups can be chained to provide different rollup functions to # different variables x |> summarise_draws("mean", "sd") |> - rollup_summary("mu", sd = "min") |> - rollup_summary("Sigma", sd = "max") + rollup_summary(variable = "mu", sd = "min") |> + rollup_summary(variable = "Sigma", sd = "max") \dontshow{\}) # examplesIf} } From 6d36e9e16acda8f444bd54e6eace392ee44715ef Mon Sep 17 00:00:00 2001 From: Matthew Kay Date: Fri, 2 Feb 2024 00:19:41 -0600 Subject: [PATCH 4/9] tests for rollup_summary --- NAMESPACE | 1 + NEWS.md | 2 + R/misc.R | 1 + R/rollup_summary.R | 45 +++++++++++++------ R/summarise_draws.R | 51 ++++++++++++++------- man/rollup_summary.Rd | 24 +++++++--- tests/testthat/test-rollup_summary.R | 64 +++++++++++++++++++++++++++ tests/testthat/test-summarise_draws.R | 6 +++ 8 files changed, 159 insertions(+), 35 deletions(-) create mode 100644 tests/testthat/test-rollup_summary.R diff --git a/NAMESPACE b/NAMESPACE index 2ebe5bbb..0426a993 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -290,6 +290,7 @@ S3method(rhat_nested,default) S3method(rhat_nested,rvar) S3method(rollup_summary,data.frame) S3method(rollup_summary,default) +S3method(rollup_summary,draws) S3method(rollup_summary,rollup_summary) S3method(sd,default) S3method(sd,rvar) diff --git a/NEWS.md b/NEWS.md index 03f5a1a8..e10ff33f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -15,6 +15,8 @@ * For types that support `factor` variables (`draws_df`, `draws_list`, and `draws_rvars`), `extract_variable()` and `extract_variable_matrix()` can now return `factor`s. +* Add `rollup_summary()` function for rolling up summaries of variables with + indices (#43). # posterior 1.5.0 diff --git a/R/misc.R b/R/misc.R index c1bce76c..8f07274a 100644 --- a/R/misc.R +++ b/R/misc.R @@ -136,6 +136,7 @@ move_to_start <- function(x, start) { # prettily deparse an expression # @return a single character string deparse_pretty <- function(x, max_chars = NULL, max_wsp = 1L) { + if (rlang::is_quosure(x)) x <- rlang::get_expr(x) out <- collapse(deparse(x)) out <- rm_wsp(out, max_wsp) assert_int(max_chars, null.ok = TRUE) diff --git a/R/rollup_summary.R b/R/rollup_summary.R index 63456d1b..f46acd9b 100644 --- a/R/rollup_summary.R +++ b/R/rollup_summary.R @@ -14,19 +14,20 @@ #' will be rolled up. #' - a [`rollup_summary`] object such as produced by `rollup_summary()`, in #' which case variables that have not been rolled up yet may be rolled up. -#' @param variable (character vector) base names (without indices) of variables -#' to roll up. If `NULL` (the default), all variables with indices in their names -#' (e.g. `"x[1,2]"`) will be rolled up. #' @param ... (multiple options) arguments where the name of each argument is a #' summary measure (i.e. column) in `.x` and the value is the rollup functions #' to apply to that summary measure, specified as one of: #' - bare name of a function #' - a character vector of function names (optionally named). -#' - a named list of strings or functions. +#' - a function formula, as accepted by [rlang::as_function()]. +#' - a named list of any of the above. #' #' Unnamed arguments in `...` specify default rollup functions to apply to all #' summary measures that do not have specific rollup functions given in `...` or #' `.funs`. +#' @param variable (character vector) base names (without indices) of variables +#' to roll up. If `NULL` (the default), all variables with indices in their names +#' (e.g. `"x[1,2]"`) will be rolled up. #' @param .funs (list) named list where names are summary measures in `.x` #' and values are the default rollup functions to apply to those summary #' measures, unless overridden by `...`. As in `...`, unnamed elements of this @@ -73,7 +74,7 @@ #' #' # or summarise draws objects first to pick the desired summary measures #' # (note that ess_bulk is only rolled up using min by default; see the -#' # .default parameter) +#' # .funs parameter) #' ds <- summarise_draws(x, "mean", "sd", "ess_bulk") #' rollup_summary(ds) #' @@ -84,13 +85,23 @@ #' # you can roll up only some variables #' rollup_summary(x, variable = "Sigma") #' -#' # you can specify the rollup functions to apply to all summaries ... +#' # you can specify the rollup functions to apply to all summaries by passing +#' # unnamed parameters ... #' rollup_summary(x, "mean", "min") #' -#' # ... or specify the rollup functions to apply to specific summaries +#' # ... or use names to specify rollup functions for specific summaries #' rollup_summary(x, mean = "sd", median = "min") #' -#' @examplesIf getRversion() > "4.1" +#' @examplesIf getRversion() < "4.1" +#' # you can pass parameters to rollup functions using anonymous functions +#' x2 <- draws_rvars(x = c(rvar_rng(rnorm, 5), NA)) +#' rollup_summary(x2, list(min = function(x) min(x, na.rm = TRUE))) +#' +#' @examplesIf getRversion() >= "4.1" +#' # you can pass parameters to rollup functions using anonymous functions +#' x2 <- draws_rvars(x = c(rvar_rng(rnorm, 5), NA)) +#' rollup_summary(x2, list(min = \(x) min(x, na.rm = TRUE))) +#' #' # rollups can be chained to provide different rollup functions to #' # different variables #' x |> @@ -108,6 +119,12 @@ rollup_summary.default <- function(.x, ...) { rollup_summary(summarise_draws(.x), ...) } +#' @rdname rollup_summary +#' @export +rollup_summary.draws <- function(.x, ...) { + rollup_summary(summarise_draws(.x), ...) +} + #' @rdname rollup_summary #' @export rollup_summary.data.frame <- function ( @@ -116,15 +133,15 @@ rollup_summary.data.frame <- function ( variable = NULL, .funs = default_rollups() ) { - rollup_funs <- lapply(list(...), function(fun_list) inject(create_function_list(!!!fun_list))) - default_rollup_funs <- lapply(.funs, function(fun_list) inject(create_function_list(!!!fun_list))) + rollup_funs <- lapply(rlang::enquos0(...), create_function_list) + default_rollup_funs <- lapply(.funs, create_function_list) is_unnamed <- rlang::names2(rollup_funs) == "" if (any(is_unnamed)) { # user provided unnamed functions in dots, use these for summary measures # that otherwise don't have a rollup function specified unspecified_rollup_funs <- do.call(c, rollup_funs[is_unnamed]) - rollup_fund <- rollup_funs[!is_unnamed] + rollup_funs <- rollup_funs[!is_unnamed] } else { # use the default unspecified rollup funs is_unnamed <- rlang::names2(default_rollup_funs) == "" @@ -151,14 +168,14 @@ rollup_summary.data.frame <- function ( vars <- vars[rollup_rows, ] # split the input df by variable base name and roll up the summaries - var_groups <- vctrs::vec_split(cbind(vars, .x[rollup_rows, -variable_col]), vars$base_name) + var_groups <- vctrs::vec_split(cbind(vars, .x[rollup_rows, -variable_col, drop = FALSE]), vars$base_name) rolled_up_vars <- lapply(var_groups$val, function(x) { indices <- split_indices_to_df(x$indices) rolled_up_cols <- do.call(cbind, lapply(seq_along(x)[c(-1,-2)], function(col_i) { col <- x[[col_i]] col_name <- names(x)[[col_i]] rolled_up_col <- lapply(rollup_funs[[col_name]], function(f) f(col)) - names(rolled_up_col) <- paste0(col_name, "_", names(rolled_up_col)) + names(rolled_up_col) <- sprintf("%s_%s", col_name, names(rolled_up_col)) vctrs::new_data_frame(rolled_up_col, n = 1L) })) cbind( @@ -169,7 +186,7 @@ rollup_summary.data.frame <- function ( }) new_rollup_summary( - unrolled = .x[!rollup_rows, ], + unrolled = .x[!rollup_rows, , drop = FALSE], rolled = do.call(rbind, rolled_up_vars) ) } diff --git a/R/summarise_draws.R b/R/summarise_draws.R index 55b4581e..4052c3a4 100644 --- a/R/summarise_draws.R +++ b/R/summarise_draws.R @@ -118,7 +118,7 @@ summarise_draws.draws <- function( stop_no_call("'.cores' must be a positive integer.") } .args <- as.list(.args) - funs <- create_function_list(...) + funs <- create_function_list(rlang::enquos0(...)) if (length(funs) == 0) { # default functions funs <- list( @@ -295,39 +295,58 @@ empty_draws_summary <- function(dimensions = "variable") { } -create_function_list <- function(..., .env = caller_env(2)) { - funs <- as.list(c(...)) +#' convert a specification for a list of functions (in various formats) into a +#' named list of functions +#' @param fun_exprs One of: +#' - a function. +#' - a character vector of names of functions that can be found either in `env` +#' or in the \pkg{posterior} namespace. +#' - an unevaluated expression or a quosure that represents a function +#' - an \pkg{rlang} function formula (a la [rlang::as_function()]). +#' - a list where each element is of the above. +#' @param env the environment to evaluate expressions in and to go searching for +#' functions specified as strings in. +#' @returns a named list of functions in `fun_expres` +#' @noRd +create_function_list <- function(fun_exprs, env = caller_env(2)) { + # flatten fun_exprs into two lists: funs, a list of functions/strings/formulas, + # and fun_exprs, a list of bare expressions or quosures + if (!is.list(fun_exprs)) fun_exprs <- list(fun_exprs) + funs <- lapply(fun_exprs, eval_tidy, env = env) + fun_exprs <- rep(fun_exprs, lengths(funs)) + funs <- as.list(do.call(c, funs)) + if (is.null(names(funs))) { # ensure names are initialized properly names(funs) <- rep("", length(funs)) } - calls <- substitute(list(...))[-1] - calls <- ulapply(calls, deparse_pretty) + for (i in seq_along(funs)) { fname <- NULL if (is.character(funs[[i]])) { fname <- as_one_character(funs[[i]]) } + # label unnamed arguments via their calls if (!nzchar(names(funs)[i])) { if (!is.null(fname)) { names(funs)[i] <- fname } else { - names(funs)[i] <- calls[i] + names(funs)[i] <- deparse_pretty(fun_exprs[[i]]) } } - # get functions passed as strings from the right environments - if (!is.null(fname)) { - if (exists(fname, envir = .env)) { - env <- .env - } else if (fname %in% getNamespaceExports("posterior")) { - env <- asNamespace("posterior") - } else { - stop_no_call("Cannot find function '", fname, "'.") - } + + # get the environment to find functions passed as strings in + env_i <- env + if (!is.null(fname) && !exists(fname, envir = env_i, mode = "function")) { + # if the function isn't in the calling environment fall back to the package + env_i <- asNamespace("posterior") } - funs[[i]] <- rlang::as_function(funs[[i]], env = env) + + funs[[i]] <- rlang::as_function(funs[[i]], env = env_i) } + + names(funs) <- make.unique(names(funs)) funs } diff --git a/man/rollup_summary.Rd b/man/rollup_summary.Rd index 9f29cbe1..635e2c2e 100644 --- a/man/rollup_summary.Rd +++ b/man/rollup_summary.Rd @@ -3,6 +3,7 @@ \name{rollup_summary} \alias{rollup_summary} \alias{rollup_summary.default} +\alias{rollup_summary.draws} \alias{rollup_summary.data.frame} \alias{rollup_summary.rollup_summary} \alias{default_rollups} @@ -12,6 +13,8 @@ rollup_summary(.x, ...) \method{rollup_summary}{default}(.x, ...) +\method{rollup_summary}{draws}(.x, ...) + \method{rollup_summary}{data.frame}(.x, ..., variable = NULL, .funs = default_rollups()) \method{rollup_summary}{rollup_summary}(.x, ...) @@ -37,7 +40,8 @@ to apply to that summary measure, specified as one of: \itemize{ \item bare name of a function \item a character vector of function names (optionally named). -\item a named list of strings or functions. +\item a function formula, as accepted by \code{\link[rlang:as_function]{rlang::as_function()}}. +\item a named list of any of the above. } Unnamed arguments in \code{...} specify default rollup functions to apply to all @@ -105,7 +109,7 @@ rollup_summary(x) # or summarise draws objects first to pick the desired summary measures # (note that ess_bulk is only rolled up using min by default; see the -# .default parameter) +# .funs parameter) ds <- summarise_draws(x, "mean", "sd", "ess_bulk") rollup_summary(ds) @@ -116,13 +120,23 @@ rollup_summary(x) # you can roll up only some variables rollup_summary(x, variable = "Sigma") -# you can specify the rollup functions to apply to all summaries ... +# you can specify the rollup functions to apply to all summaries by passing +# unnamed parameters ... rollup_summary(x, "mean", "min") -# ... or specify the rollup functions to apply to specific summaries +# ... or use names to specify rollup functions for specific summaries rollup_summary(x, mean = "sd", median = "min") -\dontshow{if (getRversion() > "4.1") (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (getRversion() < "4.1") (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +# you can pass parameters to rollup functions using anonymous functions +x2 <- draws_rvars(x = c(rvar_rng(rnorm, 5), NA)) +rollup_summary(x2, list(min = function(x) min(x, na.rm = TRUE))) +\dontshow{\}) # examplesIf} +\dontshow{if (getRversion() >= "4.1") (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +# you can pass parameters to rollup functions using anonymous functions +x2 <- draws_rvars(x = c(rvar_rng(rnorm, 5), NA)) +rollup_summary(x2, list(min = \(x) min(x, na.rm = TRUE))) + # rollups can be chained to provide different rollup functions to # different variables x |> diff --git a/tests/testthat/test-rollup_summary.R b/tests/testthat/test-rollup_summary.R new file mode 100644 index 00000000..4e87d6aa --- /dev/null +++ b/tests/testthat/test-rollup_summary.R @@ -0,0 +1,64 @@ +test_that("rollup_summary works correctly", { + set.seed(1234) + x_array <- as_draws_array(example_draws(example = "multi_normal")) + x_array <- mutate_variables(x_array, y = rnorm(ndraws(x_array))) + x <- as_draws_df(x_array) + + sum_x <- summarise_draws(x) + rollup <- rollup_summary(sum_x) + expect_equal(rollup, rollup_summary(sum_x)) + expect_equal(rollup, rollup_summary(x_array)) + + sum_x <- summarise_draws(x, "mean", "sd") + + rollup <- rollup_summary(sum_x) + expect_equal(rollup$unrolled, sum_x[sum_x$variable == "y", ]) + expect_equal(rollup$rolled$variable, c("mu", "Sigma")) + expect_equal(rollup$rolled$dim, c("3", "3,3")) + expect_equal(names(rollup$rolled), c("variable", "dim", "mean_min", "mean_max", "sd_min", "sd_max")) + expect_equal(rollup$rolled$mean_max[1], max(sum_x[startsWith(sum_x$variable, "mu"),"mean"])) + + rollup <- rollup_summary(sum_x, variable = "Sigma") + expect_equal(rollup$unrolled, sum_x[!startsWith(sum_x$variable, "Sigma"), ]) + expect_equal(rollup$rolled$variable, c("Sigma")) + expect_equal(rollup$rolled$dim, c("3,3")) + expect_equal(names(rollup$rolled), c("variable", "dim", "mean_min", "mean_max", "sd_min", "sd_max")) + expect_equal(rollup$rolled$mean_min, min(sum_x[startsWith(sum_x$variable, "Sigma"),]$mean)) + + rollup <- rollup_summary(sum_x, "mean", "min") + expect_equal(names(rollup$rolled), c("variable", "dim", "mean_mean", "mean_min", "sd_mean", "sd_min")) + expect_equal(rollup$rolled$mean_mean[1], mean(sum_x[startsWith(sum_x$variable, "mu"),]$mean)) + + rollup <- rollup_summary(sum_x, mean = c("median", "mean"), .funs = list(mean = "stop", sd = "min")) + expect_equal(names(rollup$rolled), c("variable", "dim", "mean_median", "mean_mean", "sd_min")) + expect_equal(rollup$rolled$mean_median[1], median(sum_x[startsWith(sum_x$variable, "mu"),]$mean)) + + x2 <- draws_rvars(x = c(rvar(matrix(1:20, ncol = 2)), NA)) + sum_x2 <- summarise_draws(x2, min, max) + rollup <- rollup_summary(sum_x2, list(min = function(x) min(x, na.rm = TRUE)), max) + expect_equal(rollup$rolled$variable, "x") + expect_equal(rollup$rolled$dim, "3") + expect_equal(rollup$rolled$min_min, 1) + expect_equal(rollup$rolled$min_max, NA_real_) + expect_equal(rollup$rolled$max_min, 10) + expect_equal(rollup$rolled$max_max, NA_real_) +}) + +test_that("chaining rollups works", { + set.seed(1234) + x <- example_draws(example = "multi_normal") + x <- mutate_variables(x, y = rnorm(ndraws(x))) + x <- as_draws_df(x) + + sum_x <- summarise_draws(x, "mean", "sd") + + rollup <- rollup_summary( + rollup_summary(sum_x, variable = "mu", sd = "min"), + variable = "Sigma", sd = "max" + ) + expect_equal(rollup$unrolled$variable, "y") + expect_equal(rollup$rolled$variable, c("mu", "Sigma")) + expect_equal(names(rollup$rolled), c("variable", "dim", "mean_min", "mean_max", "sd_min", "sd_max")) + expect_equal(rollup$rolled$sd_min, c(min(sum_x[startsWith(sum_x$variable, "mu"),]$sd), NA_real_)) + expect_equal(rollup$rolled$sd_max, c(NA_real_, max(sum_x[startsWith(sum_x$variable, "Sigma"),]$sd))) +}) diff --git a/tests/testthat/test-summarise_draws.R b/tests/testthat/test-summarise_draws.R index 80ed971e..6a823112 100644 --- a/tests/testthat/test-summarise_draws.R +++ b/tests/testthat/test-summarise_draws.R @@ -15,6 +15,12 @@ test_that("summarise_draws works correctly", { sum_x <- summarise_draws(x, ~quantile(.x, probs = c(0.4, 0.6))) expect_true(all(c("40%", "60%") %in% names(sum_x))) + sum_x <- summarise_draws(x, c("mcse_mean", "mean"), median) + expect_true(all(c("mcse_mean", "mean", "median") %in% names(sum_x))) + + sum_x <- summarise_draws(x, f = c("mcse_mean", "mean")) + expect_true(all(c("f1", "f2") %in% names(sum_x))) + x[1, 1] <- NA sum_x <- summarise_draws(x) expect_true(is.na(sum_x[1, "q5"])) From 2a1514a4cef5dfc64348a23ed5fbacf1ecdadb90 Mon Sep 17 00:00:00 2001 From: Matthew Kay Date: Fri, 2 Feb 2024 01:41:21 -0600 Subject: [PATCH 5/9] reduce benchmark time --- touchstone/script.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/touchstone/script.R b/touchstone/script.R index 39aae730..f8ee8164 100644 --- a/touchstone/script.R +++ b/touchstone/script.R @@ -38,7 +38,7 @@ for (dest_type in draws_types) { as_draws_dest(x) } }, - n = 50 + n = 20 ) } @@ -54,7 +54,7 @@ for (n_variables in c(10, 100)) { "summarise_draws_{n_variables}_variables" := { posterior::summarise_draws(x) }, - n = 50 + n = 20 ) } From 58e501a12e4c7b65017f871e03da63aadfc700f5 Mon Sep 17 00:00:00 2001 From: Matthew Kay Date: Fri, 2 Feb 2024 12:31:37 -0600 Subject: [PATCH 6/9] test coverage and check fixes --- DESCRIPTION | 2 +- R/rollup_summary.R | 12 ++++----- man/draws_array.Rd | 4 +-- man/draws_df.Rd | 4 +-- man/draws_list.Rd | 4 +-- man/draws_matrix.Rd | 4 +-- man/draws_rvars.Rd | 4 +-- man/ess_basic.Rd | 2 +- man/ess_bulk.Rd | 2 +- man/ess_quantile.Rd | 2 +- man/ess_sd.Rd | 2 +- man/ess_tail.Rd | 2 +- man/extract_variable_array.Rd | 4 +-- man/extract_variable_matrix.Rd | 4 +-- man/mcse_mean.Rd | 2 +- man/mcse_quantile.Rd | 2 +- man/mcse_sd.Rd | 2 +- man/pareto_diags.Rd | 2 +- man/pareto_khat.Rd | 2 +- man/rhat_basic.Rd | 2 +- man/rhat_nested.Rd | 2 +- man/rollup_summary.Rd | 7 +----- man/rstar.Rd | 4 +-- tests/testthat/test-rollup_summary.R | 36 +++++++++++++++++++++++++++ tests/testthat/test-summarise_draws.R | 17 +++++++++++++ 25 files changed, 89 insertions(+), 41 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index ce462430..6bec29ee 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -56,5 +56,5 @@ LazyData: false URL: https://mc-stan.org/posterior/, https://discourse.mc-stan.org/ BugReports: https://github.com/stan-dev/posterior/issues Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.0 +RoxygenNote: 7.3.1 VignetteBuilder: knitr diff --git a/R/rollup_summary.R b/R/rollup_summary.R index f46acd9b..981cad64 100644 --- a/R/rollup_summary.R +++ b/R/rollup_summary.R @@ -92,16 +92,11 @@ #' # ... or use names to specify rollup functions for specific summaries #' rollup_summary(x, mean = "sd", median = "min") #' -#' @examplesIf getRversion() < "4.1" #' # you can pass parameters to rollup functions using anonymous functions #' x2 <- draws_rvars(x = c(rvar_rng(rnorm, 5), NA)) #' rollup_summary(x2, list(min = function(x) min(x, na.rm = TRUE))) #' #' @examplesIf getRversion() >= "4.1" -#' # you can pass parameters to rollup functions using anonymous functions -#' x2 <- draws_rvars(x = c(rvar_rng(rnorm, 5), NA)) -#' rollup_summary(x2, list(min = \(x) min(x, na.rm = TRUE))) -#' #' # rollups can be chained to provide different rollup functions to #' # different variables #' x |> @@ -133,6 +128,10 @@ rollup_summary.data.frame <- function ( variable = NULL, .funs = default_rollups() ) { + assert_multi_class(.x$variable, c("character", "factor")) + assert_character(variable, null.ok = TRUE) + assert_list(.funs, null.ok = TRUE) + rollup_funs <- lapply(rlang::enquos0(...), create_function_list) default_rollup_funs <- lapply(.funs, create_function_list) @@ -181,7 +180,8 @@ rollup_summary.data.frame <- function ( cbind( variable = x$base_name[[1]], dim = paste0(lengths(lapply(indices, unique)), collapse = ","), - rolled_up_cols + rolled_up_cols, + stringsAsFactors = FALSE ) }) diff --git a/man/draws_array.Rd b/man/draws_array.Rd index 359703ca..941cf85e 100755 --- a/man/draws_array.Rd +++ b/man/draws_array.Rd @@ -74,10 +74,10 @@ str(x2) } \seealso{ Other formats: +\code{\link{draws}}, \code{\link{draws_df}()}, \code{\link{draws_list}()}, \code{\link{draws_matrix}()}, -\code{\link{draws_rvars}()}, -\code{\link{draws}} +\code{\link{draws_rvars}()} } \concept{formats} diff --git a/man/draws_df.Rd b/man/draws_df.Rd index a34daffb..bbfc8206 100755 --- a/man/draws_df.Rd +++ b/man/draws_df.Rd @@ -96,10 +96,10 @@ print(xnew) } \seealso{ Other formats: +\code{\link{draws}}, \code{\link{draws_array}()}, \code{\link{draws_list}()}, \code{\link{draws_matrix}()}, -\code{\link{draws_rvars}()}, -\code{\link{draws}} +\code{\link{draws_rvars}()} } \concept{formats} diff --git a/man/draws_list.Rd b/man/draws_list.Rd index f45526a8..b696350e 100755 --- a/man/draws_list.Rd +++ b/man/draws_list.Rd @@ -76,10 +76,10 @@ str(x2) } \seealso{ Other formats: +\code{\link{draws}}, \code{\link{draws_array}()}, \code{\link{draws_df}()}, \code{\link{draws_matrix}()}, -\code{\link{draws_rvars}()}, -\code{\link{draws}} +\code{\link{draws_rvars}()} } \concept{formats} diff --git a/man/draws_matrix.Rd b/man/draws_matrix.Rd index 1b548412..432e3f9b 100755 --- a/man/draws_matrix.Rd +++ b/man/draws_matrix.Rd @@ -74,10 +74,10 @@ str(x2) } \seealso{ Other formats: +\code{\link{draws}}, \code{\link{draws_array}()}, \code{\link{draws_df}()}, \code{\link{draws_list}()}, -\code{\link{draws_rvars}()}, -\code{\link{draws}} +\code{\link{draws_rvars}()} } \concept{formats} diff --git a/man/draws_rvars.Rd b/man/draws_rvars.Rd index 0e4b614a..28786629 100755 --- a/man/draws_rvars.Rd +++ b/man/draws_rvars.Rd @@ -77,10 +77,10 @@ str(x2) } \seealso{ Other formats: +\code{\link{draws}}, \code{\link{draws_array}()}, \code{\link{draws_df}()}, \code{\link{draws_list}()}, -\code{\link{draws_matrix}()}, -\code{\link{draws}} +\code{\link{draws_matrix}()} } \concept{formats} diff --git a/man/ess_basic.Rd b/man/ess_basic.Rd index 867076ca..47aeec0e 100755 --- a/man/ess_basic.Rd +++ b/man/ess_basic.Rd @@ -81,9 +81,9 @@ Other diagnostics: \code{\link{mcse_sd}()}, \code{\link{pareto_diags}()}, \code{\link{pareto_khat}()}, +\code{\link{rhat}()}, \code{\link{rhat_basic}()}, \code{\link{rhat_nested}()}, -\code{\link{rhat}()}, \code{\link{rstar}()} } \concept{diagnostics} diff --git a/man/ess_bulk.Rd b/man/ess_bulk.Rd index c1456be3..8baada5c 100755 --- a/man/ess_bulk.Rd +++ b/man/ess_bulk.Rd @@ -74,9 +74,9 @@ Other diagnostics: \code{\link{mcse_sd}()}, \code{\link{pareto_diags}()}, \code{\link{pareto_khat}()}, +\code{\link{rhat}()}, \code{\link{rhat_basic}()}, \code{\link{rhat_nested}()}, -\code{\link{rhat}()}, \code{\link{rstar}()} } \concept{diagnostics} diff --git a/man/ess_quantile.Rd b/man/ess_quantile.Rd index aa85c909..f919ad60 100755 --- a/man/ess_quantile.Rd +++ b/man/ess_quantile.Rd @@ -83,9 +83,9 @@ Other diagnostics: \code{\link{mcse_sd}()}, \code{\link{pareto_diags}()}, \code{\link{pareto_khat}()}, +\code{\link{rhat}()}, \code{\link{rhat_basic}()}, \code{\link{rhat_nested}()}, -\code{\link{rhat}()}, \code{\link{rstar}()} } \concept{diagnostics} diff --git a/man/ess_sd.Rd b/man/ess_sd.Rd index 38475d2a..91278bf8 100755 --- a/man/ess_sd.Rd +++ b/man/ess_sd.Rd @@ -68,9 +68,9 @@ Other diagnostics: \code{\link{mcse_sd}()}, \code{\link{pareto_diags}()}, \code{\link{pareto_khat}()}, +\code{\link{rhat}()}, \code{\link{rhat_basic}()}, \code{\link{rhat_nested}()}, -\code{\link{rhat}()}, \code{\link{rstar}()} } \concept{diagnostics} diff --git a/man/ess_tail.Rd b/man/ess_tail.Rd index 8f959718..36b2772c 100755 --- a/man/ess_tail.Rd +++ b/man/ess_tail.Rd @@ -74,9 +74,9 @@ Other diagnostics: \code{\link{mcse_sd}()}, \code{\link{pareto_diags}()}, \code{\link{pareto_khat}()}, +\code{\link{rhat}()}, \code{\link{rhat_basic}()}, \code{\link{rhat_nested}()}, -\code{\link{rhat}()}, \code{\link{rstar}()} } \concept{diagnostics} diff --git a/man/extract_variable_array.Rd b/man/extract_variable_array.Rd index 348a1a74..2c1eae76 100644 --- a/man/extract_variable_array.Rd +++ b/man/extract_variable_array.Rd @@ -45,7 +45,7 @@ str(Sigma) } \seealso{ Other variable extraction methods: -\code{\link{extract_variable_matrix}()}, -\code{\link{extract_variable}()} +\code{\link{extract_variable}()}, +\code{\link{extract_variable_matrix}()} } \concept{variable extraction methods} diff --git a/man/extract_variable_matrix.Rd b/man/extract_variable_matrix.Rd index 1b9c97c1..dedb49c1 100644 --- a/man/extract_variable_matrix.Rd +++ b/man/extract_variable_matrix.Rd @@ -47,7 +47,7 @@ rhat(mu) } \seealso{ Other variable extraction methods: -\code{\link{extract_variable_array}()}, -\code{\link{extract_variable}()} +\code{\link{extract_variable}()}, +\code{\link{extract_variable_array}()} } \concept{variable extraction methods} diff --git a/man/mcse_mean.Rd b/man/mcse_mean.Rd index c75935b1..65828211 100755 --- a/man/mcse_mean.Rd +++ b/man/mcse_mean.Rd @@ -65,9 +65,9 @@ Other diagnostics: \code{\link{mcse_sd}()}, \code{\link{pareto_diags}()}, \code{\link{pareto_khat}()}, +\code{\link{rhat}()}, \code{\link{rhat_basic}()}, \code{\link{rhat_nested}()}, -\code{\link{rhat}()}, \code{\link{rstar}()} } \concept{diagnostics} diff --git a/man/mcse_quantile.Rd b/man/mcse_quantile.Rd index 2d05f626..4651181e 100755 --- a/man/mcse_quantile.Rd +++ b/man/mcse_quantile.Rd @@ -80,9 +80,9 @@ Other diagnostics: \code{\link{mcse_sd}()}, \code{\link{pareto_diags}()}, \code{\link{pareto_khat}()}, +\code{\link{rhat}()}, \code{\link{rhat_basic}()}, \code{\link{rhat_nested}()}, -\code{\link{rhat}()}, \code{\link{rstar}()} } \concept{diagnostics} diff --git a/man/mcse_sd.Rd b/man/mcse_sd.Rd index 671ef249..02f86ebd 100755 --- a/man/mcse_sd.Rd +++ b/man/mcse_sd.Rd @@ -70,9 +70,9 @@ Other diagnostics: \code{\link{mcse_quantile}()}, \code{\link{pareto_diags}()}, \code{\link{pareto_khat}()}, +\code{\link{rhat}()}, \code{\link{rhat_basic}()}, \code{\link{rhat_nested}()}, -\code{\link{rhat}()}, \code{\link{rstar}()} } \concept{diagnostics} diff --git a/man/pareto_diags.Rd b/man/pareto_diags.Rd index 46370c49..49247293 100644 --- a/man/pareto_diags.Rd +++ b/man/pareto_diags.Rd @@ -150,9 +150,9 @@ Other diagnostics: \code{\link{mcse_quantile}()}, \code{\link{mcse_sd}()}, \code{\link{pareto_khat}()}, +\code{\link{rhat}()}, \code{\link{rhat_basic}()}, \code{\link{rhat_nested}()}, -\code{\link{rhat}()}, \code{\link{rstar}()} } \concept{diagnostics} diff --git a/man/pareto_khat.Rd b/man/pareto_khat.Rd index a4f91707..6234abf9 100644 --- a/man/pareto_khat.Rd +++ b/man/pareto_khat.Rd @@ -92,9 +92,9 @@ Other diagnostics: \code{\link{mcse_quantile}()}, \code{\link{mcse_sd}()}, \code{\link{pareto_diags}()}, +\code{\link{rhat}()}, \code{\link{rhat_basic}()}, \code{\link{rhat_nested}()}, -\code{\link{rhat}()}, \code{\link{rstar}()} } \concept{diagnostics} diff --git a/man/rhat_basic.Rd b/man/rhat_basic.Rd index 16ffd332..762cce0f 100755 --- a/man/rhat_basic.Rd +++ b/man/rhat_basic.Rd @@ -77,8 +77,8 @@ Other diagnostics: \code{\link{mcse_sd}()}, \code{\link{pareto_diags}()}, \code{\link{pareto_khat}()}, -\code{\link{rhat_nested}()}, \code{\link{rhat}()}, +\code{\link{rhat_nested}()}, \code{\link{rstar}()} } \concept{diagnostics} diff --git a/man/rhat_nested.Rd b/man/rhat_nested.Rd index f2536efd..9f91ad05 100644 --- a/man/rhat_nested.Rd +++ b/man/rhat_nested.Rd @@ -85,8 +85,8 @@ Other diagnostics: \code{\link{mcse_sd}()}, \code{\link{pareto_diags}()}, \code{\link{pareto_khat}()}, -\code{\link{rhat_basic}()}, \code{\link{rhat}()}, +\code{\link{rhat_basic}()}, \code{\link{rstar}()} } \concept{diagnostics} diff --git a/man/rollup_summary.Rd b/man/rollup_summary.Rd index 635e2c2e..cf96dfc6 100644 --- a/man/rollup_summary.Rd +++ b/man/rollup_summary.Rd @@ -127,16 +127,11 @@ rollup_summary(x, "mean", "min") # ... or use names to specify rollup functions for specific summaries rollup_summary(x, mean = "sd", median = "min") -\dontshow{if (getRversion() < "4.1") (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} # you can pass parameters to rollup functions using anonymous functions x2 <- draws_rvars(x = c(rvar_rng(rnorm, 5), NA)) rollup_summary(x2, list(min = function(x) min(x, na.rm = TRUE))) -\dontshow{\}) # examplesIf} -\dontshow{if (getRversion() >= "4.1") (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} -# you can pass parameters to rollup functions using anonymous functions -x2 <- draws_rvars(x = c(rvar_rng(rnorm, 5), NA)) -rollup_summary(x2, list(min = \(x) min(x, na.rm = TRUE))) +\dontshow{if (getRversion() >= "4.1") (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} # rollups can be chained to provide different rollup functions to # different variables x |> diff --git a/man/rstar.Rd b/man/rstar.Rd index c9479902..f2e74898 100644 --- a/man/rstar.Rd +++ b/man/rstar.Rd @@ -117,8 +117,8 @@ Other diagnostics: \code{\link{mcse_sd}()}, \code{\link{pareto_diags}()}, \code{\link{pareto_khat}()}, +\code{\link{rhat}()}, \code{\link{rhat_basic}()}, -\code{\link{rhat_nested}()}, -\code{\link{rhat}()} +\code{\link{rhat_nested}()} } \concept{diagnostics} diff --git a/tests/testthat/test-rollup_summary.R b/tests/testthat/test-rollup_summary.R index 4e87d6aa..a33bd75b 100644 --- a/tests/testthat/test-rollup_summary.R +++ b/tests/testthat/test-rollup_summary.R @@ -62,3 +62,39 @@ test_that("chaining rollups works", { expect_equal(rollup$rolled$sd_min, c(min(sum_x[startsWith(sum_x$variable, "mu"),]$sd), NA_real_)) expect_equal(rollup$rolled$sd_max, c(NA_real_, max(sum_x[startsWith(sum_x$variable, "Sigma"),]$sd))) }) + +test_that("rollup on draws-like object works", { + x <- as_draws_array(example_draws()) + expect_equal(rollup_summary(unclass(x)), rollup_summary(x)) +}) + +test_that("rollup on data frames works", { + x <- example_draws() + sum_x <- summarise_draws(x) + df_sum_x <- as.data.frame(sum_x) + df_sum_x$variable <- factor(df_sum_x$variable) + + expect_equal(rollup_summary(df_sum_x)$rolled, rollup_summary(sum_x)$rolled) +}) + +test_that("NULL rollup functions work", { + x <- example_draws() + + expect_equal( + as.data.frame(rollup_summary(x, .funs = NULL)$rolled), + data.frame(variable = "theta", dim = "8", stringsAsFactors = FALSE) + ) +}) + +test_that("printing works", { + x <- rollup_summary(example_draws()) + + for (color in c(TRUE, FALSE)) { + out <- capture.output(print(x, color = color)) + expect_match(out, "", fixed = TRUE, all = FALSE) + expect_match(out, "$unrolled", fixed = TRUE, all = FALSE) + expect_match(out, "variable +mean +median", all = FALSE) + expect_match(out, "$rolled", fixed = TRUE, all = FALSE) + expect_match(out, "variable +dim +mean_min +mean_max", all = FALSE) + } +}) diff --git a/tests/testthat/test-summarise_draws.R b/tests/testthat/test-summarise_draws.R index 6a823112..476dd61c 100644 --- a/tests/testthat/test-summarise_draws.R +++ b/tests/testthat/test-summarise_draws.R @@ -193,3 +193,20 @@ test_that("draws summaries can be converted to data frames", { expect_equal(as.data.frame(summarise_draws(draws_matrix, mean, quantile2)), ref) }) + +test_that("string summary functions in the posterior namespace can be found", { + expect_equal( + # execute in an environment where only summarise_draws() and example_draws() + # are available, but not ess_bulk(), so that summarise_draws() is explicitly + # forced to look in the posterior namespace for ess_bulk() + evalq( + summarise_draws(example_draws(), "ess_bulk"), + envir = list( + summarise_draws = summarise_draws, + example_draws = example_draws + ), + enclos = emptyenv() + ), + summarise_draws(example_draws(), ess_bulk) + ) +}) From 12d37655afb55f320e97a71d8ea535f0a025f8e0 Mon Sep 17 00:00:00 2001 From: Matthew Kay Date: Fri, 2 Feb 2024 12:58:15 -0600 Subject: [PATCH 7/9] check fixes for R 3 --- R/rollup_summary.R | 9 ++++----- man/rollup_summary.Rd | 10 ++++------ 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/R/rollup_summary.R b/R/rollup_summary.R index 981cad64..6764d955 100644 --- a/R/rollup_summary.R +++ b/R/rollup_summary.R @@ -96,13 +96,12 @@ #' x2 <- draws_rvars(x = c(rvar_rng(rnorm, 5), NA)) #' rollup_summary(x2, list(min = function(x) min(x, na.rm = TRUE))) #' -#' @examplesIf getRversion() >= "4.1" #' # rollups can be chained to provide different rollup functions to #' # different variables -#' x |> -#' summarise_draws("mean", "sd") |> -#' rollup_summary(variable = "mu", sd = "min") |> -#' rollup_summary(variable = "Sigma", sd = "max") +#' ds <- summarise_draws(x, "mean", "sd") +#' rs <- rollup_summary(ds, variable = "mu", sd = "min") +#' rs <- rollup_summary(rs, variable = "Sigma", sd = "max") +#' rs #' @export rollup_summary <- function(.x, ...) { UseMethod("rollup_summary") diff --git a/man/rollup_summary.Rd b/man/rollup_summary.Rd index cf96dfc6..ff19298d 100644 --- a/man/rollup_summary.Rd +++ b/man/rollup_summary.Rd @@ -131,12 +131,10 @@ rollup_summary(x, mean = "sd", median = "min") x2 <- draws_rvars(x = c(rvar_rng(rnorm, 5), NA)) rollup_summary(x2, list(min = function(x) min(x, na.rm = TRUE))) -\dontshow{if (getRversion() >= "4.1") (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} # rollups can be chained to provide different rollup functions to # different variables -x |> - summarise_draws("mean", "sd") |> - rollup_summary(variable = "mu", sd = "min") |> - rollup_summary(variable = "Sigma", sd = "max") -\dontshow{\}) # examplesIf} +ds <- summarise_draws(x, "mean", "sd") +rs <- rollup_summary(ds, variable = "mu", sd = "min") +rs <- rollup_summary(rs, variable = "Sigma", sd = "max") +rs } From c5e7f08b55cbe17cea1a9036613f490642d6d2f7 Mon Sep 17 00:00:00 2001 From: Matthew Kay Date: Fri, 2 Feb 2024 12:58:54 -0600 Subject: [PATCH 8/9] try removing cluster exports for windows to speed up summarise_draws --- R/summarise_draws.R | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/R/summarise_draws.R b/R/summarise_draws.R index 4052c3a4..34e579c7 100644 --- a/R/summarise_draws.R +++ b/R/summarise_draws.R @@ -164,23 +164,6 @@ summarise_draws.draws <- function( if (checkmate::test_os("windows")) { cl <- parallel::makePSOCKcluster(.cores) on.exit(parallel::stopCluster(cl)) - # exporting all these functions seems to be required to - # pass GitHub actions checks on Windows - parallel::clusterExport( - cl, - varlist = package_function_names("posterior"), - envir = as.environment(asNamespace("posterior")) - ) - parallel::clusterExport( - cl, - varlist = package_function_names("checkmate"), - envir = as.environment(asNamespace("checkmate")) - ) - parallel::clusterExport( - cl, - varlist = package_function_names("rlang"), - envir = as.environment(asNamespace("rlang")) - ) summary_list <- parallel::parLapply( cl, X = chunk_list, From 16c7a393e4fcab2240e01b0c90ee3be08126d305 Mon Sep 17 00:00:00 2001 From: Matthew Kay Date: Fri, 2 Feb 2024 13:46:44 -0600 Subject: [PATCH 9/9] more sensible precedence for overriding default rollup functions --- R/rollup_summary.R | 15 +++++++-------- man/rollup_summary.Rd | 5 ++--- tests/testthat/test-rollup_summary.R | 10 ++++++++++ 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/R/rollup_summary.R b/R/rollup_summary.R index 6764d955..80afa78d 100644 --- a/R/rollup_summary.R +++ b/R/rollup_summary.R @@ -23,8 +23,7 @@ #' - a named list of any of the above. #' #' Unnamed arguments in `...` specify default rollup functions to apply to all -#' summary measures that do not have specific rollup functions given in `...` or -#' `.funs`. +#' summary measures that do not have specific rollup functions given in `...`. #' @param variable (character vector) base names (without indices) of variables #' to roll up. If `NULL` (the default), all variables with indices in their names #' (e.g. `"x[1,2]"`) will be rolled up. @@ -32,7 +31,7 @@ #' and values are the default rollup functions to apply to those summary #' measures, unless overridden by `...`. As in `...`, unnamed elements of this #' list give default rollup functions to apply to summary measures that do not -#' have specific rollup functions given in `...` or `.funs`. +#' have specific rollup functions given in `.funs`. #' @details #' If called without specifying additional rollup functions in `...`, #' `rollup_summary()` will apply the default rollup functions as determined by @@ -145,12 +144,12 @@ rollup_summary.data.frame <- function ( is_unnamed <- rlang::names2(default_rollup_funs) == "" unspecified_rollup_funs <- do.call(c, default_rollup_funs[is_unnamed]) default_rollup_funs <- default_rollup_funs[!is_unnamed] - } - # apply the measure-specific default rollup functions to any columns not - # overridden by the user - missing_default_funs <- setdiff(names(default_rollup_funs), names(rollup_funs)) - rollup_funs[missing_default_funs] <- default_rollup_funs[missing_default_funs] + # apply the measure-specific default rollup functions to any columns not + # overridden by the user + missing_default_funs <- setdiff(names(default_rollup_funs), names(rollup_funs)) + rollup_funs[missing_default_funs] <- default_rollup_funs[missing_default_funs] + } # apply the generic default rollup functions to any remaining unspecified columns rollup_funs[setdiff(names(.x), names(rollup_funs))] <- list(unspecified_rollup_funs) diff --git a/man/rollup_summary.Rd b/man/rollup_summary.Rd index ff19298d..e9c0d864 100644 --- a/man/rollup_summary.Rd +++ b/man/rollup_summary.Rd @@ -45,8 +45,7 @@ to apply to that summary measure, specified as one of: } Unnamed arguments in \code{...} specify default rollup functions to apply to all -summary measures that do not have specific rollup functions given in \code{...} or -\code{.funs}.} +summary measures that do not have specific rollup functions given in \code{...}.} \item{variable}{(character vector) base names (without indices) of variables to roll up. If \code{NULL} (the default), all variables with indices in their names @@ -56,7 +55,7 @@ to roll up. If \code{NULL} (the default), all variables with indices in their na and values are the default rollup functions to apply to those summary measures, unless overridden by \code{...}. As in \code{...}, unnamed elements of this list give default rollup functions to apply to summary measures that do not -have specific rollup functions given in \code{...} or \code{.funs}.} +have specific rollup functions given in \code{.funs}.} } \value{ A \code{rollup_summary} object, which is a named list of \code{\link{draws_summary}} objects: diff --git a/tests/testthat/test-rollup_summary.R b/tests/testthat/test-rollup_summary.R index a33bd75b..f1d8b69b 100644 --- a/tests/testthat/test-rollup_summary.R +++ b/tests/testthat/test-rollup_summary.R @@ -86,6 +86,16 @@ test_that("NULL rollup functions work", { ) }) +test_that("unnamed rollups in `...` override measure-specific rollups in .funs", { + x <- example_draws() + ds <- summarise_draws(x, "mean", "rhat", "ess_bulk") + rollup <- rollup_summary(ds, "median") + expect_equal( + names(rollup$rolled), + c("variable", "dim", "mean_median", "rhat_median", "ess_bulk_median") + ) +}) + test_that("printing works", { x <- rollup_summary(example_draws())