From a43580f139229a1338b5735906b17d0876f179a2 Mon Sep 17 00:00:00 2001 From: Aki Vehtari Date: Fri, 12 Apr 2024 18:59:46 +0300 Subject: [PATCH 01/22] Don't warn about slightly large khats --- R/cv_varsel.R | 69 ++++++++++++++++----------------------------------- 1 file changed, 21 insertions(+), 48 deletions(-) diff --git a/R/cv_varsel.R b/R/cv_varsel.R index ef1e4416a..b18669929 100644 --- a/R/cv_varsel.R +++ b/R/cv_varsel.R @@ -612,13 +612,10 @@ loo_varsel <- function(refmodel, method, nterms_max, ndraws, # recommend K-fold CV (for the reference model refits, i.e., brms's `reloo` # argument, another reason is that they can quickly become as costly as # K-fold CV). - warn_pareto( - n07 = sum(pareto_k > 0.7), n05 = sum(0.7 >= pareto_k & pareto_k > 0.5), - warn_txt_start = paste0("In the calculation of the reference model's ", - "PSIS-LOO CV weights, "), - warn_txt_mid_common = paste0(" (out of ", n, ") Pareto k-values are "), - warn_txt_end = paste0( - ". Moment matching (see the loo package), mixture importance ", + warn_pareto(n07 = sum(pareto_k > 0.7), n = n, + warn_txt = paste0("Some Pareto k's for the reference model's ", + "PSIS-LOO-CV are >0.7 (%d / %d).", + "\n\nMoment matching (see the loo package), mixture importance ", "sampling (see the loo package), and `reloo`-ing (see the brms package) ", "are not supported by projpred. If these techniques (run outside of ", "projpred, i.e., for the reference model only; note that `reloo`-ing ", @@ -644,18 +641,14 @@ loo_varsel <- function(refmodel, method, nterms_max, ndraws, log_ratios = -loglik_forPSIS ) # The k-values are h-specific (expectation-specific) here (see Vehtari et - # al., 2022, , beginning of + # al., 2024, , beginning of # section 3, section 3.2.8, appendix D, and appendix E). - warn_pareto( - n07 = sum(y_lat_E$pareto_k > 0.7), - n05 = sum(0.7 >= y_lat_E$pareto_k & y_lat_E$pareto_k > 0.5), - warn_txt_start = paste0("In the recalculation of the latent response ", - "values, "), - warn_txt_mid_common = paste0( - " (out of ", n, ") expectation-specific Pareto k-values are " - ), - warn_txt_end = ". In general, we recommend K-fold CV in this case." - ) + warn_pareto(n07 = sum(y_lat_E$pareto_k > 0.7), n = n, + warn_txt = paste0("In the recalculation of the latent response values, ", + "some expectation-specific Pareto k-values are >0.7 (%d / % d). ", + "In general, we recommend K-fold CV in this case." + ) + ) refmodel$y <- y_lat_E$value } @@ -825,20 +818,13 @@ loo_varsel <- function(refmodel, method, nterms_max, ndraws, } if (importance_sampling_nm == "psis") { pareto_k_eval <- loo::pareto_k_values(sub_psisloo) - warn_pareto( - n07 = sum(pareto_k_eval > 0.7), - n05 = sum(0.7 >= pareto_k_eval & pareto_k_eval > 0.5), - warn_txt_start = paste0( - "In the recalculation of the reference model's PSIS-LOO CV ", - "weights for the performance evaluation (based on clustered or ", - "thinned posterior draws), " - ), - warn_txt_mid_common = paste0( - " (out of ", nloo, ") Pareto k-values are " - ), - warn_txt_end = paste0( - ". Watch out for warnings thrown by the original-draws Pareto ", - "smoothing to see whether it makes sense to increase the number ", + warn_pareto(n07 = sum(pareto_k_eval > 0.7), n = n, + warn_txt = paste0( + "Some Pareto k's for the reference model's ", + "PSIS-LOO-CV using clustered or ", + "thinned posterior draws are >0.7 (%d / %d), ", + ".\n\nCompare this to the original-draws Pareto k", + "diagnostics to see whether it makes sense to increase the number ", "of draws (resulting from the clustering or thinning for the ", "performance evaluation). Alternatively, K-fold CV can be used." ) @@ -1190,22 +1176,9 @@ loo_varsel <- function(refmodel, method, nterms_max, ndraws, return(out_list) } -warn_pareto <- function(n07, n05, warn_txt_start, warn_txt_mid_common, - warn_txt_end) { - if (!getOption("projpred.warn_psis", TRUE) || (n07 == 0 && n05 == 0)) return() - if (n07 > 0) { - warn_txt_mid <- paste0(n07, warn_txt_mid_common, "> 0.7") - if (n05 > 0) { - warn_txt_mid <- paste0(warn_txt_mid, " and ") - } - } else { - warn_txt_mid <- "" - } - if (n05 > 0) { - warn_txt_mid <- paste0(warn_txt_mid, n05, warn_txt_mid_common, "in the ", - "interval (0.5, 0.7]") - } - warning(warn_txt_start, warn_txt_mid, warn_txt_end) +warn_pareto <- function(n07, n, warn_txt) { + if (!getOption("projpred.warn_psis", TRUE) || (n07 == 0)) return() + warning(sprintf(warn_txt, n07, n)) return() } From e17599fd2fce5d386b13eff418ca6ab2437e6554 Mon Sep 17 00:00:00 2001 From: Aki Vehtari Date: Fri, 12 Apr 2024 21:37:40 +0300 Subject: [PATCH 02/22] fix some spelling --- R/cv_varsel.R | 96 ++++++++++++++++++++++++++++----------------------- 1 file changed, 52 insertions(+), 44 deletions(-) diff --git a/R/cv_varsel.R b/R/cv_varsel.R index b18669929..d8f34c76c 100644 --- a/R/cv_varsel.R +++ b/R/cv_varsel.R @@ -16,13 +16,13 @@ #' #' @inheritParams varsel #' @param cv_method The CV method, either `"LOO"` or `"kfold"`. In the `"LOO"` -#' case, a Pareto-smoothed importance sampling leave-one-out CV (PSIS-LOO CV) +#' case, a Pareto-smoothed importance sampling leave-one-out CV (PSIS-LOO-CV) #' is performed, which avoids refitting the reference model `nloo` times (in -#' contrast to a standard LOO CV). In the `"kfold"` case, a \eqn{K}-fold CV is +#' contrast to a standard LOO-CV). In the `"kfold"` case, a \eqn{K}-fold-CV is #' performed. See also section "Note" below. #' @param nloo **Caution:** Still experimental. Only relevant if `cv_method = #' "LOO"`. If `nloo` is smaller than the number of all observations, -#' approximate full LOO CV using probability-proportional-to-size-sampling +#' approximate full LOO-CV using probability-proportional-to-size-sampling #' (PPS) to make accurate computation only for `nloo` (anything from 1 to the #' number of all observations) leave-one-out folds (Magnusson et al., 2019). #' Smaller values lead to faster computation but higher uncertainty in the @@ -30,13 +30,13 @@ #' @param K Only relevant if `cv_method = "kfold"` and if `cvfits` is `NULL` #' (which is the case for reference model objects created by #' [get_refmodel.stanreg()] or [brms::get_refmodel.brmsfit()]). Number of -#' folds in \eqn{K}-fold CV. +#' folds in \eqn{K}-fold-CV. #' @param cvfits Only relevant if `cv_method = "kfold"`. The same as argument #' `cvfits` of [init_refmodel()], but repeated here so that output from #' [run_cvfun()] can be inserted here straightforwardly. #' @param validate_search A single logical value indicating whether to #' cross-validate also the search part, i.e., whether to run the search -#' separately for each CV fold (`TRUE`) or not (`FALSE`). We strongly do not +#' separately for each CV-fold (`TRUE`) or not (`FALSE`). We strongly do not #' recommend setting this to `FALSE`, because this is known to bias the #' predictive performance estimates of the selected submodels. However, #' setting this to `FALSE` can sometimes be useful because comparing the @@ -50,9 +50,9 @@ #' `NA`, then the PRNG state is reset (to the state before calling #' [cv_varsel()]) upon exiting [cv_varsel()]. Here, `seed` is used for #' clustering the reference model's posterior draws (if `!is.null(nclusters)` -#' or `!is.null(nclusters_pred)`), for subsampling PSIS-LOO CV folds (if +#' or `!is.null(nclusters_pred)`), for subsampling PSIS-LOO-CV folds (if #' `nloo` is smaller than the number of observations), for sampling the folds -#' in \eqn{K}-fold CV, and for drawing new group-level effects when predicting +#' in \eqn{K}-fold-CV, and for drawing new group-level effects when predicting #' from a multilevel submodel (however, not yet in case of a GAMM). #' @param parallel A single logical value indicating whether to run costly parts #' of the CV in parallel (`TRUE`) or not (`FALSE`). See also section "Note" @@ -70,7 +70,7 @@ #' @note If `validate_search` is `FALSE`, the search is not included in the CV #' so that only a single full-data search is run. #' -#' For PSIS-LOO CV, \pkg{projpred} calls [loo::psis()] (or, exceptionally, +#' For PSIS-LOO-CV, \pkg{projpred} calls [loo::psis()] (or, exceptionally, #' [loo::sis()], see below) with `r_eff = NA`. This is only a problem if there #' was extreme autocorrelation between the MCMC iterations when the reference #' model was built. In those cases however, the reference model should not @@ -332,7 +332,7 @@ cv_varsel.refmodel <- function( if (!is.null(search_out) && validate_search) { # Extract the fold-wise predictor rankings (to avoid passing the large # object `search_out` itself) and coerce them to a `list` (in a row-wise - # manner) which is needed for the K-fold CV parallelization: + # manner) which is needed for the K-fold-CV parallelization: search_out_rks <- search_out[["rk_foldwise"]] if (!is.null(search_out_rks)) { n_folds <- nrow(search_out_rks) @@ -374,7 +374,7 @@ cv_varsel.refmodel <- function( # passing the large object `search_path_fulldata` to loo_varsel(): NULL } else { - # For K-fold CV, `validate_search = FALSE` may not be combined with + # For K-fold-CV, `validate_search = FALSE` may not be combined with # `refit_prj = FALSE`, so element `predictor_ranking` is all we need: search_path_fulldata["predictor_ranking"] }, @@ -494,7 +494,7 @@ parse_args_cv_varsel <- function(refmodel, cv_method, nloo, K, cvfits, if (!validate_search && !refit_prj) { # Not allowed because this would induce a dependency between training and # test data: - stop("For K-fold CV, `validate_search = FALSE` may not be combined with ", + stop("For K-fold-CV, `validate_search = FALSE` may not be combined with ", "`refit_prj = FALSE`.") } } else { @@ -504,7 +504,7 @@ parse_args_cv_varsel <- function(refmodel, cv_method, nloo, K, cvfits, stop("nloo must be at least 1") } else if (nloo < refmodel[["nobs"]] && getOption("projpred.warn_subsampled_loo", TRUE)) { - warning("Subsampled PSIS-LOO CV is still experimental.") + warning("Subsampled PSIS-LOO-CV is still experimental.") } } @@ -522,12 +522,12 @@ parse_args_cv_varsel <- function(refmodel, cv_method, nloo, K, cvfits, return(nlist(cv_method, nloo, K, cvfits)) } -# PSIS-LOO CV ------------------------------------------------------------- +# PSIS-LOO-CV ------------------------------------------------------------- -# Workhorse function for a variable selection with PSIS-LOO CV +# Workhorse function for a variable selection with PSIS-LOO-CV # # Argument `validate_search` indicates whether the search is performed -# separately for each LOO CV fold (i.e., separately for each observation). For +# separately for each LOO-CV fold (i.e., separately for each observation). For # all other arguments, see the documentation of cv_varsel(). loo_varsel <- function(refmodel, method, nterms_max, ndraws, nclusters, ndraws_pred, nclusters_pred, refit_prj, @@ -543,7 +543,7 @@ loo_varsel <- function(refmodel, method, nterms_max, ndraws, "probabilistic model for which the log-likelihood can be evaluated.") } - # Log-likelihood values for the reference model (necessary for the PSIS-LOO CV + # Log-likelihood values for the reference model (necessary for the PSIS-LOO-CV # weights, but also for performance statistics like ELPD, MLPD, and GMPD): if (refmodel$family$for_latent) { mu_offs_oscale <- refmodel$family$latent_ilink( @@ -580,14 +580,14 @@ loo_varsel <- function(refmodel, method, nterms_max, ndraws, } n <- ncol(loglik_forPSIS) - # PSIS-LOO CV weights: + # PSIS-LOO-CV weights: if (length(unique(refmodel$wdraws_ref)) != 1) { stop("Currently, projpred requires the reference model's posterior draws ", "to have constant weights.") } if (nrow(loglik_forPSIS) <= 1) { stop("Currently, more than one posterior draw from the reference model is ", - "needed (because projpred relies on loo::psis() for PSIS-LOO CV).") + "needed (because projpred relies on loo::psis() for PSIS-LOO-CV).") } # Call loo::psis() and while doing so, catch messages and warnings via # capt_mssgs_warns() to filter out some of them. @@ -609,23 +609,24 @@ loo_varsel <- function(refmodel, method, nterms_max, ndraws, # turn, i.e., brms's `reloo` argument) currently cannot be used because all # these techniques result in new MCMC draws for the reference model, meaning # that the projection would have to be adapted. Therefore, it is easier to - # recommend K-fold CV (for the reference model refits, i.e., brms's `reloo` + # recommend K-fold-CV (for the reference model refits, i.e., brms's `reloo` # argument, another reason is that they can quickly become as costly as - # K-fold CV). - warn_pareto(n07 = sum(pareto_k > 0.7), n = n, + # K-fold-CV). + warn_pareto(n07 = sum(pareto_k > 0.7), n = n, + khat_threshold = loo:::ps_khat_threshold(dim(psisloo)[1]), warn_txt = paste0("Some Pareto k's for the reference model's ", - "PSIS-LOO-CV are >0.7 (%d / %d).", - "\n\nMoment matching (see the loo package), mixture importance ", - "sampling (see the loo package), and `reloo`-ing (see the brms package) ", + "PSIS-LOO weights are >%s (%d / %d).", + "\n\nMoment matching (see the `loo` package), mixture importance ", + "sampling (see the loo package), and `reloo`-ing (see the `brms` package) ", "are not supported by projpred. If these techniques (run outside of ", "projpred, i.e., for the reference model only; note that `reloo`-ing ", "may be computationally costly) result in a markedly different ", - "reference model ELPD estimate than ordinary PSIS-LOO CV does, we ", - "recommend to use K-fold CV within projpred." + "reference model ELPD estimate than ordinary PSIS-LOO-CV does, we ", + "recommend to use K-fold-CV within projpred." ) ) lw <- weights(psisloo) - + if (refmodel$family$for_latent) { # Need to re-calculate the latent response values in `refmodel$y` by # incorporating the PSIS weights because `refmodel$y` resulted from applying @@ -644,9 +645,10 @@ loo_varsel <- function(refmodel, method, nterms_max, ndraws, # al., 2024, , beginning of # section 3, section 3.2.8, appendix D, and appendix E). warn_pareto(n07 = sum(y_lat_E$pareto_k > 0.7), n = n, + khat_threshold = loo:::ps_khat_threshold(dim(psisloo)[1]), warn_txt = paste0("In the recalculation of the latent response values, ", - "some expectation-specific Pareto k-values are >0.7 (%d / % d). ", - "In general, we recommend K-fold CV in this case." + "some expectation-specific Pareto k-values are >%s (%d / % d). ", + "In general, we recommend K-fold-CV in this case." ) ) refmodel$y <- y_lat_E$value @@ -706,7 +708,7 @@ loo_varsel <- function(refmodel, method, nterms_max, ndraws, refdist_eval <- perf_eval_out[["p_ref"]] # Step 2: Weight the full-data performance evaluation results according to - # the PSIS-LOO CV weights. + # the PSIS-LOO-CV weights. if (refmodel$family$for_latent) { refdist_eval_mu_offs_oscale <- refmodel$family$latent_ilink( t(refdist_eval$mu_offs), cl_ref = refdist_eval$cl, @@ -763,14 +765,14 @@ loo_varsel <- function(refmodel, method, nterms_max, ndraws, if (no_psis_eval) { if (getOption("projpred.warn_psis", TRUE)) { warning( - "In the recalculation of the reference model's PSIS-LOO CV ", + "In the recalculation of the reference model's PSIS-LOO-CV ", "weights for the performance evaluation, the number of draws ", "after clustering or thinning is too small for Pareto ", "smoothing. Using standard importance sampling (SIS) instead. ", "Watch out for warnings thrown by the original-draws Pareto ", "smoothing to see whether it makes sense to increase the number ", "of draws (resulting from the clustering or thinning for the ", - "performance evaluation). Alternatively, K-fold CV can be used." + "performance evaluation). Alternatively, K-fold-CV can be used." ) } # Use loo::sis(). @@ -816,17 +818,22 @@ loo_varsel <- function(refmodel, method, nterms_max, ndraws, if (length(mssgs_warns_capt) > 0) { warning(mssgs_warns_capt) } + if (importance_sampling_nm == "psis") { pareto_k_eval <- loo::pareto_k_values(sub_psisloo) warn_pareto(n07 = sum(pareto_k_eval > 0.7), n = n, + khat_threshold = loo:::ps_khat_threshold(dim(sub_psisloo)[1]), warn_txt = paste0( "Some Pareto k's for the reference model's ", - "PSIS-LOO-CV using clustered or ", - "thinned posterior draws are >0.7 (%d / %d), ", - ".\n\nCompare this to the original-draws Pareto k", - "diagnostics to see whether it makes sense to increase the number ", + "PSIS-LOO weights for ", + ifelse(clust_used_eval, + paste0(nclusters_pred, " clustered "), + paste0(ndraws_pred, " posterior ")), + "draws are >%s (%d / %d)", + ".\n\nCompare this to the Pareto k -diagnostic with all draws ", + "to see whether it makes sense to increase the number ", "of draws (resulting from the clustering or thinning for the ", - "performance evaluation). Alternatively, K-fold CV can be used." + "performance evaluation). Alternatively, K-fold-CV can be used." ) ) } @@ -914,7 +921,7 @@ loo_varsel <- function(refmodel, method, nterms_max, ndraws, } verb_out(verb_txt_start, verb_txt_mid, "the performance evaluation with ", "`refit_prj = ", refit_prj, "` for each of the N = ", nloo, " ", - "LOO CV folds separately ...") + "LOO-CV folds separately ...") } one_obs <- function(run_index, verbose_search = verbose && @@ -1176,13 +1183,14 @@ loo_varsel <- function(refmodel, method, nterms_max, ndraws, return(out_list) } -warn_pareto <- function(n07, n, warn_txt) { +warn_pareto <- function(n07, n, khat_threshold=0.7, warn_txt) { if (!getOption("projpred.warn_psis", TRUE) || (n07 == 0)) return() - warning(sprintf(warn_txt, n07, n)) + warning(sprintf(warn_txt, as.character(round(khat_threshold,2)), n07, n), + call.=FALSE) return() } -# K-fold CV --------------------------------------------------------------- +# K-fold-CV --------------------------------------------------------------- # Needed to avoid a NOTE in `R CMD check`: if (getRversion() >= package_version("2.15.1")) { @@ -1406,7 +1414,7 @@ get_kfold <- function(refmodel, K, cvfits, verbose) { verb_out("-----", verbose = verbose) } else { stop("For a reference model which is not of class `datafit`, either ", - "`cvfits` or `cvfun` needs to be provided for K-fold CV (see ", + "`cvfits` or `cvfun` needs to be provided for K-fold-CV (see ", "`?init_refmodel`).") } } else { @@ -1497,7 +1505,7 @@ get_kfold <- function(refmodel, K, cvfits, verbose) { #' cvfits = cv_fits, nterms_max = 3, nclusters = 5, #' nclusters_pred = 10, seed = 5555) #' -#' # Stratified K-fold CV is straightforward: +#' # Stratified K-fold-CV is straightforward: #' n_strat <- 3L #' set.seed(692) #' # Some example strata: @@ -1556,7 +1564,7 @@ run_cvfun.refmodel <- function(object, return(structure(cvfits, folds = folds)) } -# PSIS-LOO CV helpers ----------------------------------------------------- +# PSIS-LOO-CV helpers ----------------------------------------------------- # ## decide which points to go through in the validation (i.e., which points # ## belong to the semi random subsample of validation points) From 67ec416797ddd690375b4f7877e8f3c96d5c59ce Mon Sep 17 00:00:00 2001 From: Aki Vehtari Date: Thu, 25 Apr 2024 12:22:56 +0300 Subject: [PATCH 03/22] couple fixes --- R/cv_varsel.R | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/R/cv_varsel.R b/R/cv_varsel.R index d8f34c76c..f266943ba 100644 --- a/R/cv_varsel.R +++ b/R/cv_varsel.R @@ -613,7 +613,7 @@ loo_varsel <- function(refmodel, method, nterms_max, ndraws, # argument, another reason is that they can quickly become as costly as # K-fold-CV). warn_pareto(n07 = sum(pareto_k > 0.7), n = n, - khat_threshold = loo:::ps_khat_threshold(dim(psisloo)[1]), + khat_threshold = .ps_khat_threshold(dim(psisloo)[1]), warn_txt = paste0("Some Pareto k's for the reference model's ", "PSIS-LOO weights are >%s (%d / %d).", "\n\nMoment matching (see the `loo` package), mixture importance ", @@ -642,10 +642,10 @@ loo_varsel <- function(refmodel, method, nterms_max, ndraws, log_ratios = -loglik_forPSIS ) # The k-values are h-specific (expectation-specific) here (see Vehtari et - # al., 2024, , beginning of + # al., 2024, , beginning of # section 3, section 3.2.8, appendix D, and appendix E). warn_pareto(n07 = sum(y_lat_E$pareto_k > 0.7), n = n, - khat_threshold = loo:::ps_khat_threshold(dim(psisloo)[1]), + khat_threshold = .ps_khat_threshold(dim(psisloo)[1]), warn_txt = paste0("In the recalculation of the latent response values, ", "some expectation-specific Pareto k-values are >%s (%d / % d). ", "In general, we recommend K-fold-CV in this case." @@ -787,8 +787,8 @@ loo_varsel <- function(refmodel, method, nterms_max, ndraws, # Use loo::psis(). # Usually, we have a small number of projected draws here (400 by # default), which means that the 'loo' package will automatically - # perform the regularization from Vehtari et al. (2022, - # , appendix G). + # perform the regularization from Vehtari et al. (2024, + # , appendix G). importance_sampling_nm <- "psis" } } else { @@ -822,7 +822,7 @@ loo_varsel <- function(refmodel, method, nterms_max, ndraws, if (importance_sampling_nm == "psis") { pareto_k_eval <- loo::pareto_k_values(sub_psisloo) warn_pareto(n07 = sum(pareto_k_eval > 0.7), n = n, - khat_threshold = loo:::ps_khat_threshold(dim(sub_psisloo)[1]), + khat_threshold = .ps_khat_threshold(dim(sub_psisloo)[1]), warn_txt = paste0( "Some Pareto k's for the reference model's ", "PSIS-LOO weights for ", @@ -1626,3 +1626,23 @@ loo_subsample_pps <- function(nloo, lppd) { return(nlist(inds, wcv)) } + +#' Pareto-smoothing k-hat threshold +#' +#' Copied from loo package. Remove after loo package exposes this. +#' +#' Given sample size S computes khat threshold for reliable Pareto +#' smoothed estimate (to have small probability of large error). See +#' section 3.2.4, equation (13). Sample sizes 100, 320, 1000, 2200, +#' 10000 correspond to thresholds 0.5, 0.6, 0.67, 0.7, 0.75. Although +#' with bigger sample size S we can achieve estimates with small +#' probability of large error, it is difficult to get accurate MCSE +#' estimates as the bias starts to dominate when k > 0.7 (see Section 3.2.3). +#' Thus the sample size dependend k-ht threshold is capped at 0.7. +#' @param S sample size +#' @param ... unused +#' @return threshold +#' @noRd +.ps_khat_threshold <- function(S, ...) { + min(1 - 1 / log10(S), 0.7) +} From 930ad816e8185a3fb1eb5ed1ec80f1f55ddf2e25 Mon Sep 17 00:00:00 2001 From: fweber144 Date: Thu, 25 Apr 2024 21:32:58 +0200 Subject: [PATCH 04/22] make coding style consistent with that of the existing codebase (mainly determined by RStudio's coding and indentation style) --- R/cv_varsel.R | 63 +++++++++++++++++++++++++++------------------------ 1 file changed, 33 insertions(+), 30 deletions(-) diff --git a/R/cv_varsel.R b/R/cv_varsel.R index f266943ba..3789e81be 100644 --- a/R/cv_varsel.R +++ b/R/cv_varsel.R @@ -612,21 +612,22 @@ loo_varsel <- function(refmodel, method, nterms_max, ndraws, # recommend K-fold-CV (for the reference model refits, i.e., brms's `reloo` # argument, another reason is that they can quickly become as costly as # K-fold-CV). - warn_pareto(n07 = sum(pareto_k > 0.7), n = n, - khat_threshold = .ps_khat_threshold(dim(psisloo)[1]), - warn_txt = paste0("Some Pareto k's for the reference model's ", - "PSIS-LOO weights are >%s (%d / %d).", - "\n\nMoment matching (see the `loo` package), mixture importance ", - "sampling (see the loo package), and `reloo`-ing (see the `brms` package) ", - "are not supported by projpred. If these techniques (run outside of ", - "projpred, i.e., for the reference model only; note that `reloo`-ing ", - "may be computationally costly) result in a markedly different ", - "reference model ELPD estimate than ordinary PSIS-LOO-CV does, we ", - "recommend to use K-fold-CV within projpred." + warn_pareto( + n07 = sum(pareto_k > 0.7), n = n, + khat_threshold = .ps_khat_threshold(dim(psisloo)[1]), + warn_txt = paste0( + "Some Pareto k's for the reference model's PSIS-LOO weights are ", + "> %s (%d / %d).\n\nMoment matching (see the `loo` package), mixture ", + "importance sampling (see the loo package), and `reloo`-ing (see the ", + "`brms` package) are not supported by projpred. If these techniques ", + "(run outside of projpred, i.e., for the reference model only; note ", + "that `reloo`-ing may be computationally costly) result in a markedly ", + "different reference model ELPD estimate than ordinary PSIS-LOO-CV ", + "does, we recommend to use K-fold-CV within projpred." ) ) lw <- weights(psisloo) - + if (refmodel$family$for_latent) { # Need to re-calculate the latent response values in `refmodel$y` by # incorporating the PSIS weights because `refmodel$y` resulted from applying @@ -644,13 +645,15 @@ loo_varsel <- function(refmodel, method, nterms_max, ndraws, # The k-values are h-specific (expectation-specific) here (see Vehtari et # al., 2024, , beginning of # section 3, section 3.2.8, appendix D, and appendix E). - warn_pareto(n07 = sum(y_lat_E$pareto_k > 0.7), n = n, - khat_threshold = .ps_khat_threshold(dim(psisloo)[1]), - warn_txt = paste0("In the recalculation of the latent response values, ", - "some expectation-specific Pareto k-values are >%s (%d / % d). ", - "In general, we recommend K-fold-CV in this case." - ) + warn_pareto( + n07 = sum(y_lat_E$pareto_k > 0.7), n = n, + khat_threshold = .ps_khat_threshold(dim(psisloo)[1]), + warn_txt = paste0( + "In the recalculation of the latent response values, some ", + "expectation-specific Pareto k-values are > %s (%d / % d). ", + "In general, we recommend K-fold-CV in this case." ) + ) refmodel$y <- y_lat_E$value } @@ -821,19 +824,19 @@ loo_varsel <- function(refmodel, method, nterms_max, ndraws, if (importance_sampling_nm == "psis") { pareto_k_eval <- loo::pareto_k_values(sub_psisloo) - warn_pareto(n07 = sum(pareto_k_eval > 0.7), n = n, - khat_threshold = .ps_khat_threshold(dim(sub_psisloo)[1]), + warn_pareto( + n07 = sum(pareto_k_eval > 0.7), n = n, + khat_threshold = .ps_khat_threshold(dim(sub_psisloo)[1]), warn_txt = paste0( - "Some Pareto k's for the reference model's ", - "PSIS-LOO weights for ", + "Some Pareto k's for the reference model's PSIS-LOO weights for ", ifelse(clust_used_eval, paste0(nclusters_pred, " clustered "), paste0(ndraws_pred, " posterior ")), - "draws are >%s (%d / %d)", - ".\n\nCompare this to the Pareto k -diagnostic with all draws ", - "to see whether it makes sense to increase the number ", - "of draws (resulting from the clustering or thinning for the ", - "performance evaluation). Alternatively, K-fold-CV can be used." + "draws are > %s (%d / %d).\n\nCompare this to the ", + "Pareto k -diagnostic with all draws to see whether it makes ", + "sense to increase the number of draws (resulting from the ", + "clustering or thinning for the performance evaluation). ", + "Alternatively, K-fold-CV can be used." ) ) } @@ -1183,10 +1186,10 @@ loo_varsel <- function(refmodel, method, nterms_max, ndraws, return(out_list) } -warn_pareto <- function(n07, n, khat_threshold=0.7, warn_txt) { +warn_pareto <- function(n07, n, khat_threshold = 0.7, warn_txt) { if (!getOption("projpred.warn_psis", TRUE) || (n07 == 0)) return() - warning(sprintf(warn_txt, as.character(round(khat_threshold,2)), n07, n), - call.=FALSE) + warning(sprintf(warn_txt, as.character(round(khat_threshold, 2)), n07, n), + call. = FALSE) return() } From d7b434df0d96f8e94be386463bf727fc81b20388 Mon Sep 17 00:00:00 2001 From: fweber144 Date: Thu, 25 Apr 2024 21:56:23 +0200 Subject: [PATCH 05/22] re-roxygenize --- man/cv_varsel.Rd | 43 +++++++++++++++++++++---------------------- man/run_cvfun.Rd | 2 +- 2 files changed, 22 insertions(+), 23 deletions(-) diff --git a/man/cv_varsel.Rd b/man/cv_varsel.Rd index 55ecb679d..55e0f9b34 100644 --- a/man/cv_varsel.Rd +++ b/man/cv_varsel.Rd @@ -62,13 +62,13 @@ when refitting the submodels for the performance evaluation (if \code{refit_prj} is \code{TRUE}).} \item{cv_method}{The CV method, either \code{"LOO"} or \code{"kfold"}. In the \code{"LOO"} -case, a Pareto-smoothed importance sampling leave-one-out CV (PSIS-LOO CV) +case, a Pareto-smoothed importance sampling leave-one-out CV (PSIS-LOO-CV) is performed, which avoids refitting the reference model \code{nloo} times (in -contrast to a standard LOO CV). In the \code{"kfold"} case, a \eqn{K}-fold CV is +contrast to a standard LOO-CV). In the \code{"kfold"} case, a \eqn{K}-fold-CV is performed. See also section "Note" below.} \item{nloo}{\strong{Caution:} Still experimental. Only relevant if \code{cv_method = "LOO"}. If \code{nloo} is smaller than the number of all observations, -approximate full LOO CV using probability-proportional-to-size-sampling +approximate full LOO-CV using probability-proportional-to-size-sampling (PPS) to make accurate computation only for \code{nloo} (anything from 1 to the number of all observations) leave-one-out folds (Magnusson et al., 2019). Smaller values lead to faster computation but higher uncertainty in the @@ -77,7 +77,7 @@ evaluation part. If \code{NULL}, all observations are used (as by default).} \item{K}{Only relevant if \code{cv_method = "kfold"} and if \code{cvfits} is \code{NULL} (which is the case for reference model objects created by \code{\link[=get_refmodel.stanreg]{get_refmodel.stanreg()}} or \code{\link[brms:get_refmodel.brmsfit]{brms::get_refmodel.brmsfit()}}). Number of -folds in \eqn{K}-fold CV.} +folds in \eqn{K}-fold-CV.} \item{cvfits}{Only relevant if \code{cv_method = "kfold"}. The same as argument \code{cvfits} of \code{\link[=init_refmodel]{init_refmodel()}}, but repeated here so that output from @@ -85,7 +85,7 @@ folds in \eqn{K}-fold CV.} \item{validate_search}{A single logical value indicating whether to cross-validate also the search part, i.e., whether to run the search -separately for each CV fold (\code{TRUE}) or not (\code{FALSE}). We strongly do not +separately for each CV-fold (\code{TRUE}) or not (\code{FALSE}). We strongly do not recommend setting this to \code{FALSE}, because this is known to bias the predictive performance estimates of the selected submodels. However, setting this to \code{FALSE} can sometimes be useful because comparing the @@ -182,9 +182,9 @@ results can be obtained again if needed. Passed to argument \code{seed} of \code{NA}, then the PRNG state is reset (to the state before calling \code{\link[=cv_varsel]{cv_varsel()}}) upon exiting \code{\link[=cv_varsel]{cv_varsel()}}. Here, \code{seed} is used for clustering the reference model's posterior draws (if \code{!is.null(nclusters)} -or \code{!is.null(nclusters_pred)}), for subsampling PSIS-LOO CV folds (if +or \code{!is.null(nclusters_pred)}), for subsampling PSIS-LOO-CV folds (if \code{nloo} is smaller than the number of observations), for sampling the folds -in \eqn{K}-fold CV, and for drawing new group-level effects when predicting +in \eqn{K}-fold-CV, and for drawing new group-level effects when predicting from a multilevel submodel (however, not yet in case of a GAMM).} \item{search_terms}{Only relevant for forward search. A custom character @@ -277,7 +277,7 @@ that leads to the same results, \code{search_terms = c("x2", "x3")}). If \code{validate_search} is \code{FALSE}, the search is not included in the CV so that only a single full-data search is run. -For PSIS-LOO CV, \pkg{projpred} calls \code{\link[loo:psis]{loo::psis()}} (or, exceptionally, +For PSIS-LOO-CV, \pkg{projpred} calls \code{\link[loo:psis]{loo::psis()}} (or, exceptionally, \code{\link[loo:sis]{loo::sis()}}, see below) with \code{r_eff = NA}. This is only a problem if there was extreme autocorrelation between the MCMC iterations when the reference model was built. In those cases however, the reference model should not @@ -330,20 +330,19 @@ cvvs <- cv_varsel(fit, method = "L1", cv_method = "kfold", K = 2, \dontshow{\}) # examplesIf} } \references{ -Magnusson, Måns, Michael Andersen, Johan Jonasson, and Aki Vehtari. 2019. -"Bayesian Leave-One-Out Cross-Validation for Large Data." In \emph{Proceedings of -the 36th International Conference on Machine Learning}, edited by Kamalika -Chaudhuri and Ruslan Salakhutdinov, 97:4244--53. Proceedings of Machine -Learning Research. PMLR. -\url{https://proceedings.mlr.press/v97/magnusson19a.html}. - -Vehtari, Aki, Andrew Gelman, and Jonah Gabry. 2017. "Practical Bayesian Model -Evaluation Using Leave-One-Out Cross-Validation and WAIC." \emph{Statistics and -Computing} 27 (5): 1413--32. \doi{10.1007/s11222-016-9696-4}. - -Vehtari, Aki, Daniel Simpson, Andrew Gelman, Yuling Yao, and Jonah Gabry. -2022. "Pareto Smoothed Importance Sampling." arXiv. -\doi{10.48550/arXiv.1507.02646}. +Måns Magnusson, Michael Riis Andersen, Johan Jonasson, Aki Vehtari +(2020). Leave-one-out cross-validation for Bayesian model +comparison in large data. Proceedings of the 23rd International +Conference on Artificial Intelligence and Statistics (AISTATS), +PMLR 108:341-351. + +Aki Vehtari, Andrew Gelman, and Jonah Gabry (2017). Practical Bayesian Model +Evaluation Using Leave-One-Out Cross-Validation and WAIC. Statistics and +Computing, 27(5):1413--32. \doi{10.1007/s11222-016-9696-4}. + +Aki Vehtari, Daniel Simpson, Andrew Gelman, Yuling Yao, and Jonah +Gabry (2024). Pareto smoothed importance sampling. Journal of +Machine Learning Research, 25(72):1-58. } \seealso{ \code{\link[=varsel]{varsel()}} diff --git a/man/run_cvfun.Rd b/man/run_cvfun.Rd index 516837730..8c7715ab9 100644 --- a/man/run_cvfun.Rd +++ b/man/run_cvfun.Rd @@ -91,7 +91,7 @@ cvvs_fw <- cv_varsel(ref, method = "forward", cv_method = "kfold", cvfits = cv_fits, nterms_max = 3, nclusters = 5, nclusters_pred = 10, seed = 5555) -# Stratified K-fold CV is straightforward: +# Stratified K-fold-CV is straightforward: n_strat <- 3L set.seed(692) # Some example strata: From 9d489d2bca5c2ded0b6612be4467d893d6576ec2 Mon Sep 17 00:00:00 2001 From: Aki Vehtari Date: Fri, 26 Apr 2024 12:01:16 +0300 Subject: [PATCH 06/22] improved doc and shortened warning messages --- R/cv_varsel.R | 72 ++++++++++++++++++++++++++------------------------- R/varsel.R | 7 ++--- 2 files changed, 41 insertions(+), 38 deletions(-) diff --git a/R/cv_varsel.R b/R/cv_varsel.R index 3789e81be..62a3166c4 100644 --- a/R/cv_varsel.R +++ b/R/cv_varsel.R @@ -68,7 +68,13 @@ #' @inherit varsel details return #' #' @note If `validate_search` is `FALSE`, the search is not included in the CV -#' so that only a single full-data search is run. +#' so that only a single full-data search is run. If the number of observations +#' is big, the fast PSIS-LOO-CV along the full-data search path is likely +#' to be accurate. If the number of observations is small or moderate, +#' the fast PSIS-LOO-CV along the full-data search path is likely to have +#' optimistic bias in the middle of the search path. This result can be +#' used to guide further actions and the optimistic bias can be greatly +#' reduced by using `validate_search=TRUE`. #' #' For PSIS-LOO-CV, \pkg{projpred} calls [loo::psis()] (or, exceptionally, #' [loo::sis()], see below) with `r_eff = NA`. This is only a problem if there @@ -77,13 +83,27 @@ #' have been used anyway, so we don't expect \pkg{projpred}'s `r_eff = NA` to #' be a problem. #' -#' PSIS cannot be used if the draws have different (i.e., nonconstant) weights -#' or if the number of draws is too small. In such cases, \pkg{projpred} -#' resorts to standard importance sampling (SIS) and throws a warning about -#' this. Throughout the documentation, the term "PSIS" is used even though in -#' fact, \pkg{projpred} resorts to SIS in these special cases. -#' -#' With `parallel = TRUE`, costly parts of \pkg{projpred}'s CV are run in +#' PSIS uses Pareto-$\hat{k}$ diagnostic to assess the reliability of PSIS-LOO-CV. +#' See [loo::loo-glossary] for how to interpret the Pareto-$\hat{k}$ values and +#' the warning thresholds. `projpred` package does not support the usually recommended +#' moment-matching ([loo::loo2-moment-matching]), mixture importance sampling +#' ([loo::loo2-mixis]), or `reloo`-ing ([brms::reloo()]). If the reference model +#' PSIS-LOO-CV Pareto-$\hat{k}$ values are good, but there are high Pareto-$\hat{k}$ +#' values for the projected models, you can try increasing the number of draws used +#' for the PSIS-LOO-CV (`ndraws_pred` with `refit_prj=TRUE`). If increasing the +#' number of draws does not help and if the reference model PSIS-LOO-CV +#' Pareto-$\hat{k}$ values are high, and the PSIS-LOO-CV results change substantially +#' when using moment-matching, mixture importance sampling, or `reloo`-ing, we +#' recommend to use $K$-fold-CV within `projpred`. +#' +#' PSIS cannot be used if the number of draws or clusters is too small. In such +#' cases, \pkg{projpred} resorts to standard importance sampling (SIS) and +#' shows a message about this. Throughout the documentation, the term "PSIS" is +#' used even though in fact, \pkg{projpred} resorts to SIS in these special cases. +#' If SIS is used, check that the reference model PSIS-LOO-CV Pareto-$\hat{k}$ +#' values are good. +#' +#' With `parallel = TRUE`, costly parts of \pkg{projpred}'s CV can be run in #' parallel. Costly parts are the fold-wise searches and performance #' evaluations in case of `validate_search = TRUE`. (Note that in case of #' \eqn{K}-fold CV, the \eqn{K} reference model refits are not affected by @@ -615,16 +635,7 @@ loo_varsel <- function(refmodel, method, nterms_max, ndraws, warn_pareto( n07 = sum(pareto_k > 0.7), n = n, khat_threshold = .ps_khat_threshold(dim(psisloo)[1]), - warn_txt = paste0( - "Some Pareto k's for the reference model's PSIS-LOO weights are ", - "> %s (%d / %d).\n\nMoment matching (see the `loo` package), mixture ", - "importance sampling (see the loo package), and `reloo`-ing (see the ", - "`brms` package) are not supported by projpred. If these techniques ", - "(run outside of projpred, i.e., for the reference model only; note ", - "that `reloo`-ing may be computationally costly) result in a markedly ", - "different reference model ELPD estimate than ordinary PSIS-LOO-CV ", - "does, we recommend to use K-fold-CV within projpred." - ) + warn_txt = "Some (%d / %d) Pareto k's for the reference model's PSIS-LOO weights are > %s." ) lw <- weights(psisloo) @@ -649,8 +660,8 @@ loo_varsel <- function(refmodel, method, nterms_max, ndraws, n07 = sum(y_lat_E$pareto_k > 0.7), n = n, khat_threshold = .ps_khat_threshold(dim(psisloo)[1]), warn_txt = paste0( - "In the recalculation of the latent response values, some ", - "expectation-specific Pareto k-values are > %s (%d / % d). ", + "In the recalculation of the latent response values, some (%d / % d)", + "expectation-specific Pareto k-values are > %s.\n", "In general, we recommend K-fold-CV in this case." ) ) @@ -768,14 +779,9 @@ loo_varsel <- function(refmodel, method, nterms_max, ndraws, if (no_psis_eval) { if (getOption("projpred.warn_psis", TRUE)) { warning( - "In the recalculation of the reference model's PSIS-LOO-CV ", - "weights for the performance evaluation, the number of draws ", - "after clustering or thinning is too small for Pareto ", - "smoothing. Using standard importance sampling (SIS) instead. ", - "Watch out for warnings thrown by the original-draws Pareto ", - "smoothing to see whether it makes sense to increase the number ", - "of draws (resulting from the clustering or thinning for the ", - "performance evaluation). Alternatively, K-fold-CV can be used." + "Using standard importance sampling (SIS), as the number of draws", + "or clusters is too small for PSIS. For improved accuracy reliability", + "increase the number of draws or clusters, or use K-fold-CV." ) } # Use loo::sis(). @@ -828,15 +834,11 @@ loo_varsel <- function(refmodel, method, nterms_max, ndraws, n07 = sum(pareto_k_eval > 0.7), n = n, khat_threshold = .ps_khat_threshold(dim(sub_psisloo)[1]), warn_txt = paste0( - "Some Pareto k's for the reference model's PSIS-LOO weights for ", + "Some (%d / % d) Pareto k's for the reference model's PSIS-LOO weights for ", ifelse(clust_used_eval, paste0(nclusters_pred, " clustered "), paste0(ndraws_pred, " posterior ")), - "draws are > %s (%d / %d).\n\nCompare this to the ", - "Pareto k -diagnostic with all draws to see whether it makes ", - "sense to increase the number of draws (resulting from the ", - "clustering or thinning for the performance evaluation). ", - "Alternatively, K-fold-CV can be used." + "draws are > %s." ) ) } @@ -1188,7 +1190,7 @@ loo_varsel <- function(refmodel, method, nterms_max, ndraws, warn_pareto <- function(n07, n, khat_threshold = 0.7, warn_txt) { if (!getOption("projpred.warn_psis", TRUE) || (n07 == 0)) return() - warning(sprintf(warn_txt, as.character(round(khat_threshold, 2)), n07, n), + warning(sprintf(warn_txt, n07, n, as.character(round(khat_threshold, 2))), call. = FALSE) return() } diff --git a/R/varsel.R b/R/varsel.R index e80215f5b..b434a758c 100644 --- a/R/varsel.R +++ b/R/varsel.R @@ -5,7 +5,7 @@ #' known as solution path), i.e., the best submodel for each submodel size #' (number of predictor terms). The evaluation part determines the predictive #' performance of the submodels along the predictor ranking. A special method is -#' [varsel.vsel()] because it re-uses the search results from an earlier +#' [varsel.vsel()] which re-uses the search results from an earlier #' [varsel()] (or [cv_varsel()]) run, as illustrated in the main vignette. #' #' @param object An object of class `refmodel` (returned by [get_refmodel()] or @@ -141,8 +141,9 @@ #' #' L1 search is faster than forward search, but forward search may be more #' accurate. Furthermore, forward search may find a sparser model with -#' comparable performance to that found by L1 search, but it may also start -#' overfitting when more predictors are added. +#' comparable performance to that found by L1 search, but it may also +#' overfit when more predictors are added. This overfit can be detected +#' by running search validation (see [cv_varsel()]). #' #' An L1 search may select an interaction term before all involved lower-order #' interaction terms (including main-effect terms) have been selected. In From ecd876212ca60f6894d550de0f88e40b315651d1 Mon Sep 17 00:00:00 2001 From: Aki Vehtari Date: Fri, 26 Apr 2024 12:03:49 +0300 Subject: [PATCH 07/22] fix the warning threshold --- R/cv_varsel.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/cv_varsel.R b/R/cv_varsel.R index 62a3166c4..95f54e461 100644 --- a/R/cv_varsel.R +++ b/R/cv_varsel.R @@ -633,7 +633,7 @@ loo_varsel <- function(refmodel, method, nterms_max, ndraws, # argument, another reason is that they can quickly become as costly as # K-fold-CV). warn_pareto( - n07 = sum(pareto_k > 0.7), n = n, + n07 = sum(pareto_k > .ps_khat_threshold(dim(psisloo)[1])), n = n, khat_threshold = .ps_khat_threshold(dim(psisloo)[1]), warn_txt = "Some (%d / %d) Pareto k's for the reference model's PSIS-LOO weights are > %s." ) @@ -657,7 +657,7 @@ loo_varsel <- function(refmodel, method, nterms_max, ndraws, # al., 2024, , beginning of # section 3, section 3.2.8, appendix D, and appendix E). warn_pareto( - n07 = sum(y_lat_E$pareto_k > 0.7), n = n, + n07 = sum(y_lat_E$pareto_k > .ps_khat_threshold(dim(psisloo)[1])), n = n, khat_threshold = .ps_khat_threshold(dim(psisloo)[1]), warn_txt = paste0( "In the recalculation of the latent response values, some (%d / % d)", @@ -831,7 +831,7 @@ loo_varsel <- function(refmodel, method, nterms_max, ndraws, if (importance_sampling_nm == "psis") { pareto_k_eval <- loo::pareto_k_values(sub_psisloo) warn_pareto( - n07 = sum(pareto_k_eval > 0.7), n = n, + n07 = sum(pareto_k_eval > .ps_khat_threshold(dim(psisloo)[1])), n = n, khat_threshold = .ps_khat_threshold(dim(sub_psisloo)[1]), warn_txt = paste0( "Some (%d / % d) Pareto k's for the reference model's PSIS-LOO weights for ", From b19dfcd7db6f77b8bf53f63902492e70903662b2 Mon Sep 17 00:00:00 2001 From: Aki Vehtari Date: Fri, 26 Apr 2024 12:25:21 +0300 Subject: [PATCH 08/22] fixes --- DESCRIPTION | 2 +- R/cv_varsel.R | 17 +++++++++-------- man/cv_varsel.Rd | 42 ++++++++++++++++++++++++++++++++---------- man/varsel.Rd | 7 ++++--- 4 files changed, 46 insertions(+), 22 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 70d4cc510..8e2c511e2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -86,5 +86,5 @@ Additional_repositories: https://mc-stan.org/r-packages/ LazyData: TRUE Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.1 VignetteBuilder: knitr, rmarkdown diff --git a/R/cv_varsel.R b/R/cv_varsel.R index 95f54e461..c71ea81b1 100644 --- a/R/cv_varsel.R +++ b/R/cv_varsel.R @@ -83,16 +83,17 @@ #' have been used anyway, so we don't expect \pkg{projpred}'s `r_eff = NA` to #' be a problem. #' -#' PSIS uses Pareto-$\hat{k}$ diagnostic to assess the reliability of PSIS-LOO-CV. -#' See [loo::loo-glossary] for how to interpret the Pareto-$\hat{k}$ values and -#' the warning thresholds. `projpred` package does not support the usually recommended -#' moment-matching ([loo::loo2-moment-matching]), mixture importance sampling -#' ([loo::loo2-mixis]), or `reloo`-ing ([brms::reloo()]). If the reference model -#' PSIS-LOO-CV Pareto-$\hat{k}$ values are good, but there are high Pareto-$\hat{k}$ +#' PSIS uses Pareto-\eqn{\hat{k}} diagnostic to assess the reliability of PSIS-LOO-CV. +#' See [loo::loo-glossary] for how to interpret the Pareto-\eqn{\hat{k}} values and +#' the warning thresholds. \pkg{projpred} does not support the usually recommended +#' moment-matching (`vignette("loo2-moment-matching", package="loo")`), mixture +#' importance sampling (`vignette("loo2-mixis", package="loo")`), +#' or `reloo`-ing ([brms::reloo()]). If the reference model +#' PSIS-LOO-CV Pareto-\eqn{\hat{k}} values are good, but there are high Pareto-\eqn{\hat{k}} #' values for the projected models, you can try increasing the number of draws used #' for the PSIS-LOO-CV (`ndraws_pred` with `refit_prj=TRUE`). If increasing the #' number of draws does not help and if the reference model PSIS-LOO-CV -#' Pareto-$\hat{k}$ values are high, and the PSIS-LOO-CV results change substantially +#' Pareto-\eqn{\hat{k}} values are high, and the PSIS-LOO-CV results change substantially #' when using moment-matching, mixture importance sampling, or `reloo`-ing, we #' recommend to use $K$-fold-CV within `projpred`. #' @@ -100,7 +101,7 @@ #' cases, \pkg{projpred} resorts to standard importance sampling (SIS) and #' shows a message about this. Throughout the documentation, the term "PSIS" is #' used even though in fact, \pkg{projpred} resorts to SIS in these special cases. -#' If SIS is used, check that the reference model PSIS-LOO-CV Pareto-$\hat{k}$ +#' If SIS is used, check that the reference model PSIS-LOO-CV Pareto-\eqn{\hat{k}} #' values are good. #' #' With `parallel = TRUE`, costly parts of \pkg{projpred}'s CV can be run in diff --git a/man/cv_varsel.Rd b/man/cv_varsel.Rd index 55e0f9b34..8765964a6 100644 --- a/man/cv_varsel.Rd +++ b/man/cv_varsel.Rd @@ -236,8 +236,9 @@ effect. L1 search is faster than forward search, but forward search may be more accurate. Furthermore, forward search may find a sparser model with -comparable performance to that found by L1 search, but it may also start -overfitting when more predictors are added. +comparable performance to that found by L1 search, but it may also +overfit when more predictors are added. This overfit can be detected +by running search validation (see \code{\link[=cv_varsel]{cv_varsel()}}). An L1 search may select an interaction term before all involved lower-order interaction terms (including main-effect terms) have been selected. In @@ -275,7 +276,13 @@ that leads to the same results, \code{search_terms = c("x2", "x3")}). } \note{ If \code{validate_search} is \code{FALSE}, the search is not included in the CV -so that only a single full-data search is run. +so that only a single full-data search is run. If the number of observations +is big, the fast PSIS-LOO-CV along the full-data search path is likely +to be accurate. If the number of observations is small or moderate, +the fast PSIS-LOO-CV along the full-data search path is likely to have +optimistic bias in the middle of the search path. This result can be +used to guide further actions and the optimistic bias can be greatly +reduced by using \code{validate_search=TRUE}. For PSIS-LOO-CV, \pkg{projpred} calls \code{\link[loo:psis]{loo::psis()}} (or, exceptionally, \code{\link[loo:sis]{loo::sis()}}, see below) with \code{r_eff = NA}. This is only a problem if there @@ -284,13 +291,28 @@ model was built. In those cases however, the reference model should not have been used anyway, so we don't expect \pkg{projpred}'s \code{r_eff = NA} to be a problem. -PSIS cannot be used if the draws have different (i.e., nonconstant) weights -or if the number of draws is too small. In such cases, \pkg{projpred} -resorts to standard importance sampling (SIS) and throws a warning about -this. Throughout the documentation, the term "PSIS" is used even though in -fact, \pkg{projpred} resorts to SIS in these special cases. - -With \code{parallel = TRUE}, costly parts of \pkg{projpred}'s CV are run in +PSIS uses Pareto-\eqn{\hat{k}} diagnostic to assess the reliability of PSIS-LOO-CV. +See \link[loo:loo-glossary]{loo::loo-glossary} for how to interpret the Pareto-\eqn{\hat{k}} values and +the warning thresholds. \pkg{projpred} does not support the usually recommended +moment-matching (\code{vignette("loo2-moment-matching", package="loo")}), mixture +importance sampling (\code{vignette("loo2-mixis", package="loo")}), +or \code{reloo}-ing (\code{\link[brms:reloo.brmsfit]{brms::reloo()}}). If the reference model +PSIS-LOO-CV Pareto-\eqn{\hat{k}} values are good, but there are high Pareto-\eqn{\hat{k}} +values for the projected models, you can try increasing the number of draws used +for the PSIS-LOO-CV (\code{ndraws_pred} with \code{refit_prj=TRUE}). If increasing the +number of draws does not help and if the reference model PSIS-LOO-CV +Pareto-\eqn{\hat{k}} values are high, and the PSIS-LOO-CV results change substantially +when using moment-matching, mixture importance sampling, or \code{reloo}-ing, we +recommend to use $K$-fold-CV within \code{projpred}. + +PSIS cannot be used if the number of draws or clusters is too small. In such +cases, \pkg{projpred} resorts to standard importance sampling (SIS) and +shows a message about this. Throughout the documentation, the term "PSIS" is +used even though in fact, \pkg{projpred} resorts to SIS in these special cases. +If SIS is used, check that the reference model PSIS-LOO-CV Pareto-\eqn{\hat{k}} +values are good. + +With \code{parallel = TRUE}, costly parts of \pkg{projpred}'s CV can be run in parallel. Costly parts are the fold-wise searches and performance evaluations in case of \code{validate_search = TRUE}. (Note that in case of \eqn{K}-fold CV, the \eqn{K} reference model refits are not affected by diff --git a/man/varsel.Rd b/man/varsel.Rd index 2cac4afb4..b9ade6fbb 100644 --- a/man/varsel.Rd +++ b/man/varsel.Rd @@ -165,7 +165,7 @@ variable selection. The search part determines the predictor ranking (also known as solution path), i.e., the best submodel for each submodel size (number of predictor terms). The evaluation part determines the predictive performance of the submodels along the predictor ranking. A special method is -\code{\link[=varsel.vsel]{varsel.vsel()}} because it re-uses the search results from an earlier +\code{\link[=varsel.vsel]{varsel.vsel()}} which re-uses the search results from an earlier \code{\link[=varsel]{varsel()}} (or \code{\link[=cv_varsel]{cv_varsel()}}) run, as illustrated in the main vignette. } \details{ @@ -185,8 +185,9 @@ effect. L1 search is faster than forward search, but forward search may be more accurate. Furthermore, forward search may find a sparser model with -comparable performance to that found by L1 search, but it may also start -overfitting when more predictors are added. +comparable performance to that found by L1 search, but it may also +overfit when more predictors are added. This overfit can be detected +by running search validation (see \code{\link[=cv_varsel]{cv_varsel()}}). An L1 search may select an interaction term before all involved lower-order interaction terms (including main-effect terms) have been selected. In From 1877ea8429bca1c25ef12035815421ef03042d27 Mon Sep 17 00:00:00 2001 From: Aki Vehtari Date: Fri, 26 Apr 2024 12:33:44 +0300 Subject: [PATCH 09/22] link moment matching functions --- R/cv_varsel.R | 8 ++++---- man/cv_varsel.Rd | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/R/cv_varsel.R b/R/cv_varsel.R index c71ea81b1..491ce9970 100644 --- a/R/cv_varsel.R +++ b/R/cv_varsel.R @@ -86,10 +86,10 @@ #' PSIS uses Pareto-\eqn{\hat{k}} diagnostic to assess the reliability of PSIS-LOO-CV. #' See [loo::loo-glossary] for how to interpret the Pareto-\eqn{\hat{k}} values and #' the warning thresholds. \pkg{projpred} does not support the usually recommended -#' moment-matching (`vignette("loo2-moment-matching", package="loo")`), mixture -#' importance sampling (`vignette("loo2-mixis", package="loo")`), -#' or `reloo`-ing ([brms::reloo()]). If the reference model -#' PSIS-LOO-CV Pareto-\eqn{\hat{k}} values are good, but there are high Pareto-\eqn{\hat{k}} +#' moment-matching (see [loo::loo_moment_match()] and [brms::loo_moment_match()]), +#' mixture importance sampling (`vignette("loo2-mixis", package="loo")`), +#' or `reloo`-ing ([brms::reloo()]). If the reference model PSIS-LOO-CV +#' Pareto-\eqn{\hat{k}} values are good, but there are high Pareto-\eqn{\hat{k}} #' values for the projected models, you can try increasing the number of draws used #' for the PSIS-LOO-CV (`ndraws_pred` with `refit_prj=TRUE`). If increasing the #' number of draws does not help and if the reference model PSIS-LOO-CV diff --git a/man/cv_varsel.Rd b/man/cv_varsel.Rd index 8765964a6..ef0ae1ece 100644 --- a/man/cv_varsel.Rd +++ b/man/cv_varsel.Rd @@ -294,10 +294,10 @@ be a problem. PSIS uses Pareto-\eqn{\hat{k}} diagnostic to assess the reliability of PSIS-LOO-CV. See \link[loo:loo-glossary]{loo::loo-glossary} for how to interpret the Pareto-\eqn{\hat{k}} values and the warning thresholds. \pkg{projpred} does not support the usually recommended -moment-matching (\code{vignette("loo2-moment-matching", package="loo")}), mixture -importance sampling (\code{vignette("loo2-mixis", package="loo")}), -or \code{reloo}-ing (\code{\link[brms:reloo.brmsfit]{brms::reloo()}}). If the reference model -PSIS-LOO-CV Pareto-\eqn{\hat{k}} values are good, but there are high Pareto-\eqn{\hat{k}} +moment-matching (see \code{\link[loo:loo_moment_match]{loo::loo_moment_match()}} and \code{\link[brms:loo_moment_match.brmsfit]{brms::loo_moment_match()}}), +mixture importance sampling (\code{vignette("loo2-mixis", package="loo")}), +or \code{reloo}-ing (\code{\link[brms:reloo.brmsfit]{brms::reloo()}}). If the reference model PSIS-LOO-CV +Pareto-\eqn{\hat{k}} values are good, but there are high Pareto-\eqn{\hat{k}} values for the projected models, you can try increasing the number of draws used for the PSIS-LOO-CV (\code{ndraws_pred} with \code{refit_prj=TRUE}). If increasing the number of draws does not help and if the reference model PSIS-LOO-CV From ffaebb4021ed75e7e76371e2717e6fe006e2b23e Mon Sep 17 00:00:00 2001 From: Aki Vehtari Date: Fri, 26 Apr 2024 14:56:25 +0300 Subject: [PATCH 10/22] move one paragraph in varsel doc --- R/cv_varsel.R | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/R/cv_varsel.R b/R/cv_varsel.R index 491ce9970..d8d458952 100644 --- a/R/cv_varsel.R +++ b/R/cv_varsel.R @@ -76,13 +76,6 @@ #' used to guide further actions and the optimistic bias can be greatly #' reduced by using `validate_search=TRUE`. #' -#' For PSIS-LOO-CV, \pkg{projpred} calls [loo::psis()] (or, exceptionally, -#' [loo::sis()], see below) with `r_eff = NA`. This is only a problem if there -#' was extreme autocorrelation between the MCMC iterations when the reference -#' model was built. In those cases however, the reference model should not -#' have been used anyway, so we don't expect \pkg{projpred}'s `r_eff = NA` to -#' be a problem. -#' #' PSIS uses Pareto-\eqn{\hat{k}} diagnostic to assess the reliability of PSIS-LOO-CV. #' See [loo::loo-glossary] for how to interpret the Pareto-\eqn{\hat{k}} values and #' the warning thresholds. \pkg{projpred} does not support the usually recommended @@ -97,6 +90,13 @@ #' when using moment-matching, mixture importance sampling, or `reloo`-ing, we #' recommend to use $K$-fold-CV within `projpred`. #' +#' For PSIS-LOO-CV, \pkg{projpred} calls [loo::psis()] (or, exceptionally, +#' [loo::sis()], see below) with `r_eff = NA`. This is only a problem if there +#' was extreme autocorrelation between the MCMC iterations when the reference +#' model was built. In those cases however, the reference model should not +#' have been used anyway, so we don't expect \pkg{projpred}'s `r_eff = NA` to +#' be a problem. +#' #' PSIS cannot be used if the number of draws or clusters is too small. In such #' cases, \pkg{projpred} resorts to standard importance sampling (SIS) and #' shows a message about this. Throughout the documentation, the term "PSIS" is From 35835b79ce2db22e92eee170f4b85d68f8b627f4 Mon Sep 17 00:00:00 2001 From: Aki Vehtari Date: Fri, 26 Apr 2024 15:22:44 +0300 Subject: [PATCH 11/22] mention projpred.warn_psis option --- R/cv_varsel.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/cv_varsel.R b/R/cv_varsel.R index d8d458952..381593a38 100644 --- a/R/cv_varsel.R +++ b/R/cv_varsel.R @@ -77,6 +77,8 @@ #' reduced by using `validate_search=TRUE`. #' #' PSIS uses Pareto-\eqn{\hat{k}} diagnostic to assess the reliability of PSIS-LOO-CV. +#' Whether the Pareto-\eqn{\hat{k}} diagnostics are shown as warnings, is controlled +#' with a global option `projpred.warn_psis` (default is FALSE). #' See [loo::loo-glossary] for how to interpret the Pareto-\eqn{\hat{k}} values and #' the warning thresholds. \pkg{projpred} does not support the usually recommended #' moment-matching (see [loo::loo_moment_match()] and [brms::loo_moment_match()]), From 2e8b0a4813b78288bf49aebdfee35ec51339623f Mon Sep 17 00:00:00 2001 From: fweber144 Date: Sat, 27 Apr 2024 21:20:44 +0200 Subject: [PATCH 12/22] fix default for global option `projpred.warn_psis` mentioned in the docs --- R/cv_varsel.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/cv_varsel.R b/R/cv_varsel.R index 381593a38..9be2ed641 100644 --- a/R/cv_varsel.R +++ b/R/cv_varsel.R @@ -78,7 +78,7 @@ #' #' PSIS uses Pareto-\eqn{\hat{k}} diagnostic to assess the reliability of PSIS-LOO-CV. #' Whether the Pareto-\eqn{\hat{k}} diagnostics are shown as warnings, is controlled -#' with a global option `projpred.warn_psis` (default is FALSE). +#' with a global option `projpred.warn_psis` (default is `TRUE`). #' See [loo::loo-glossary] for how to interpret the Pareto-\eqn{\hat{k}} values and #' the warning thresholds. \pkg{projpred} does not support the usually recommended #' moment-matching (see [loo::loo_moment_match()] and [brms::loo_moment_match()]), @@ -91,7 +91,7 @@ #' Pareto-\eqn{\hat{k}} values are high, and the PSIS-LOO-CV results change substantially #' when using moment-matching, mixture importance sampling, or `reloo`-ing, we #' recommend to use $K$-fold-CV within `projpred`. -#' +#' #' For PSIS-LOO-CV, \pkg{projpred} calls [loo::psis()] (or, exceptionally, #' [loo::sis()], see below) with `r_eff = NA`. This is only a problem if there #' was extreme autocorrelation between the MCMC iterations when the reference @@ -105,7 +105,7 @@ #' used even though in fact, \pkg{projpred} resorts to SIS in these special cases. #' If SIS is used, check that the reference model PSIS-LOO-CV Pareto-\eqn{\hat{k}} #' values are good. -#' +#' #' With `parallel = TRUE`, costly parts of \pkg{projpred}'s CV can be run in #' parallel. Costly parts are the fold-wise searches and performance #' evaluations in case of `validate_search = TRUE`. (Note that in case of From 5355aa6e5fecde2845c793fd5a5c387904b7ab47 Mon Sep 17 00:00:00 2001 From: Aki Vehtari Date: Sun, 28 Apr 2024 14:57:07 +0300 Subject: [PATCH 13/22] minor edit --- R/cv_varsel.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/cv_varsel.R b/R/cv_varsel.R index 381593a38..3582ca1b1 100644 --- a/R/cv_varsel.R +++ b/R/cv_varsel.R @@ -88,9 +88,9 @@ #' values for the projected models, you can try increasing the number of draws used #' for the PSIS-LOO-CV (`ndraws_pred` with `refit_prj=TRUE`). If increasing the #' number of draws does not help and if the reference model PSIS-LOO-CV -#' Pareto-\eqn{\hat{k}} values are high, and the PSIS-LOO-CV results change substantially -#' when using moment-matching, mixture importance sampling, or `reloo`-ing, we -#' recommend to use $K$-fold-CV within `projpred`. +#' Pareto-\eqn{\hat{k}} values are high, and the there reference model PSIS-LOO-CV +#' results change substantially when using moment-matching, mixture importance +#' sampling, or `reloo`-ing, we recommend to use $K$-fold-CV within `projpred`. #' #' For PSIS-LOO-CV, \pkg{projpred} calls [loo::psis()] (or, exceptionally, #' [loo::sis()], see below) with `r_eff = NA`. This is only a problem if there From 3a023f8cedb610d6f477108e248538706d9df5db Mon Sep 17 00:00:00 2001 From: fweber144 Date: Sun, 28 Apr 2024 21:11:35 +0200 Subject: [PATCH 14/22] fix typo (docs) --- R/cv_varsel.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/cv_varsel.R b/R/cv_varsel.R index 03daa4f76..19ef0cd19 100644 --- a/R/cv_varsel.R +++ b/R/cv_varsel.R @@ -88,10 +88,10 @@ #' values for the projected models, you can try increasing the number of draws used #' for the PSIS-LOO-CV (`ndraws_pred` with `refit_prj=TRUE`). If increasing the #' number of draws does not help and if the reference model PSIS-LOO-CV -#' Pareto-\eqn{\hat{k}} values are high, and the there reference model PSIS-LOO-CV +#' Pareto-\eqn{\hat{k}} values are high, and the reference model PSIS-LOO-CV #' results change substantially when using moment-matching, mixture importance #' sampling, or `reloo`-ing, we recommend to use $K$-fold-CV within `projpred`. -#' +#' #' For PSIS-LOO-CV, \pkg{projpred} calls [loo::psis()] (or, exceptionally, #' [loo::sis()], see below) with `r_eff = NA`. This is only a problem if there #' was extreme autocorrelation between the MCMC iterations when the reference From ac58c84f069a76fccb447f77a6f94861fad13575 Mon Sep 17 00:00:00 2001 From: fweber144 Date: Sun, 28 Apr 2024 21:12:29 +0200 Subject: [PATCH 15/22] re-roxygenize --- man/cv_varsel.Rd | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/man/cv_varsel.Rd b/man/cv_varsel.Rd index ef0ae1ece..3d34ae60a 100644 --- a/man/cv_varsel.Rd +++ b/man/cv_varsel.Rd @@ -284,14 +284,9 @@ optimistic bias in the middle of the search path. This result can be used to guide further actions and the optimistic bias can be greatly reduced by using \code{validate_search=TRUE}. -For PSIS-LOO-CV, \pkg{projpred} calls \code{\link[loo:psis]{loo::psis()}} (or, exceptionally, -\code{\link[loo:sis]{loo::sis()}}, see below) with \code{r_eff = NA}. This is only a problem if there -was extreme autocorrelation between the MCMC iterations when the reference -model was built. In those cases however, the reference model should not -have been used anyway, so we don't expect \pkg{projpred}'s \code{r_eff = NA} to -be a problem. - PSIS uses Pareto-\eqn{\hat{k}} diagnostic to assess the reliability of PSIS-LOO-CV. +Whether the Pareto-\eqn{\hat{k}} diagnostics are shown as warnings, is controlled +with a global option \code{projpred.warn_psis} (default is \code{TRUE}). See \link[loo:loo-glossary]{loo::loo-glossary} for how to interpret the Pareto-\eqn{\hat{k}} values and the warning thresholds. \pkg{projpred} does not support the usually recommended moment-matching (see \code{\link[loo:loo_moment_match]{loo::loo_moment_match()}} and \code{\link[brms:loo_moment_match.brmsfit]{brms::loo_moment_match()}}), @@ -301,9 +296,16 @@ Pareto-\eqn{\hat{k}} values are good, but there are high Pareto-\eqn{\hat{k}} values for the projected models, you can try increasing the number of draws used for the PSIS-LOO-CV (\code{ndraws_pred} with \code{refit_prj=TRUE}). If increasing the number of draws does not help and if the reference model PSIS-LOO-CV -Pareto-\eqn{\hat{k}} values are high, and the PSIS-LOO-CV results change substantially -when using moment-matching, mixture importance sampling, or \code{reloo}-ing, we -recommend to use $K$-fold-CV within \code{projpred}. +Pareto-\eqn{\hat{k}} values are high, and the reference model PSIS-LOO-CV +results change substantially when using moment-matching, mixture importance +sampling, or \code{reloo}-ing, we recommend to use $K$-fold-CV within \code{projpred}. + +For PSIS-LOO-CV, \pkg{projpred} calls \code{\link[loo:psis]{loo::psis()}} (or, exceptionally, +\code{\link[loo:sis]{loo::sis()}}, see below) with \code{r_eff = NA}. This is only a problem if there +was extreme autocorrelation between the MCMC iterations when the reference +model was built. In those cases however, the reference model should not +have been used anyway, so we don't expect \pkg{projpred}'s \code{r_eff = NA} to +be a problem. PSIS cannot be used if the number of draws or clusters is too small. In such cases, \pkg{projpred} resorts to standard importance sampling (SIS) and From 2c88c91afa4076af8643696344ca4e71fef9a8e8 Mon Sep 17 00:00:00 2001 From: fweber144 Date: Sat, 8 Jun 2024 07:43:02 +0200 Subject: [PATCH 16/22] fix whitespace in warning messages --- R/cv_varsel.R | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/R/cv_varsel.R b/R/cv_varsel.R index 19ef0cd19..757979f7d 100644 --- a/R/cv_varsel.R +++ b/R/cv_varsel.R @@ -663,7 +663,7 @@ loo_varsel <- function(refmodel, method, nterms_max, ndraws, n07 = sum(y_lat_E$pareto_k > .ps_khat_threshold(dim(psisloo)[1])), n = n, khat_threshold = .ps_khat_threshold(dim(psisloo)[1]), warn_txt = paste0( - "In the recalculation of the latent response values, some (%d / % d)", + "In the recalculation of the latent response values, some (%d / % d) ", "expectation-specific Pareto k-values are > %s.\n", "In general, we recommend K-fold-CV in this case." ) @@ -782,9 +782,10 @@ loo_varsel <- function(refmodel, method, nterms_max, ndraws, if (no_psis_eval) { if (getOption("projpred.warn_psis", TRUE)) { warning( - "Using standard importance sampling (SIS), as the number of draws", - "or clusters is too small for PSIS. For improved accuracy reliability", - "increase the number of draws or clusters, or use K-fold-CV." + "Using standard importance sampling (SIS), as the number of ", + "draws or clusters is too small for PSIS. For improved accuracy ", + "reliability, increase the number of draws or clusters, or use ", + "K-fold-CV." ) } # Use loo::sis(). From 7dfc9d818f79f553f254a0a4f3f8e99bc249156c Mon Sep 17 00:00:00 2001 From: fweber144 Date: Sat, 8 Jun 2024 07:44:49 +0200 Subject: [PATCH 17/22] minor enhancements in the docs --- R/cv_varsel.R | 56 ++++++++++++++++++++++++---------------------- man/cv_varsel.Rd | 58 +++++++++++++++++++++++++----------------------- 2 files changed, 59 insertions(+), 55 deletions(-) diff --git a/R/cv_varsel.R b/R/cv_varsel.R index 757979f7d..b1595f50a 100644 --- a/R/cv_varsel.R +++ b/R/cv_varsel.R @@ -68,29 +68,31 @@ #' @inherit varsel details return #' #' @note If `validate_search` is `FALSE`, the search is not included in the CV -#' so that only a single full-data search is run. If the number of observations -#' is big, the fast PSIS-LOO-CV along the full-data search path is likely -#' to be accurate. If the number of observations is small or moderate, -#' the fast PSIS-LOO-CV along the full-data search path is likely to have -#' optimistic bias in the middle of the search path. This result can be +#' so that only a single full-data search is run. If the number of +#' observations is big, the fast PSIS-LOO-CV along the full-data search path +#' is likely to be accurate. If the number of observations is small or +#' moderate, the fast PSIS-LOO-CV along the full-data search path is likely to +#' have optimistic bias in the middle of the search path. This result can be #' used to guide further actions and the optimistic bias can be greatly -#' reduced by using `validate_search=TRUE`. +#' reduced by using `validate_search = TRUE`. #' -#' PSIS uses Pareto-\eqn{\hat{k}} diagnostic to assess the reliability of PSIS-LOO-CV. -#' Whether the Pareto-\eqn{\hat{k}} diagnostics are shown as warnings, is controlled -#' with a global option `projpred.warn_psis` (default is `TRUE`). -#' See [loo::loo-glossary] for how to interpret the Pareto-\eqn{\hat{k}} values and -#' the warning thresholds. \pkg{projpred} does not support the usually recommended -#' moment-matching (see [loo::loo_moment_match()] and [brms::loo_moment_match()]), -#' mixture importance sampling (`vignette("loo2-mixis", package="loo")`), -#' or `reloo`-ing ([brms::reloo()]). If the reference model PSIS-LOO-CV -#' Pareto-\eqn{\hat{k}} values are good, but there are high Pareto-\eqn{\hat{k}} -#' values for the projected models, you can try increasing the number of draws used -#' for the PSIS-LOO-CV (`ndraws_pred` with `refit_prj=TRUE`). If increasing the -#' number of draws does not help and if the reference model PSIS-LOO-CV -#' Pareto-\eqn{\hat{k}} values are high, and the reference model PSIS-LOO-CV -#' results change substantially when using moment-matching, mixture importance -#' sampling, or `reloo`-ing, we recommend to use $K$-fold-CV within `projpred`. +#' PSIS uses Pareto-\eqn{\hat{k}} diagnostic to assess the reliability of +#' PSIS-LOO-CV. Whether the Pareto-\eqn{\hat{k}} diagnostics are shown as +#' warnings, is controlled with a global option `projpred.warn_psis` (default +#' is `TRUE`). See [loo::loo-glossary] for how to interpret the +#' Pareto-\eqn{\hat{k}} values and the warning thresholds. \pkg{projpred} does +#' not support the usually recommended moment-matching (see +#' [loo::loo_moment_match()] and [brms::loo_moment_match()]), mixture +#' importance sampling (`vignette("loo2-mixis", package="loo")`), or +#' `reloo`-ing ([brms::reloo()]). If the reference model PSIS-LOO-CV +#' Pareto-\eqn{\hat{k}} values are good, but there are high +#' Pareto-\eqn{\hat{k}} values for the projected models, you can try +#' increasing the number of draws used for the PSIS-LOO-CV (`ndraws_pred` with +#' `refit_prj = TRUE`). If increasing the number of draws does not help and if +#' the reference model PSIS-LOO-CV Pareto-\eqn{\hat{k}} values are high, and +#' the reference model PSIS-LOO-CV results change substantially when using +#' moment-matching, mixture importance sampling, or `reloo`-ing, we recommend +#' to use \eqn{K}-fold-CV within `projpred`. #' #' For PSIS-LOO-CV, \pkg{projpred} calls [loo::psis()] (or, exceptionally, #' [loo::sis()], see below) with `r_eff = NA`. This is only a problem if there @@ -99,12 +101,12 @@ #' have been used anyway, so we don't expect \pkg{projpred}'s `r_eff = NA` to #' be a problem. #' -#' PSIS cannot be used if the number of draws or clusters is too small. In such -#' cases, \pkg{projpred} resorts to standard importance sampling (SIS) and -#' shows a message about this. Throughout the documentation, the term "PSIS" is -#' used even though in fact, \pkg{projpred} resorts to SIS in these special cases. -#' If SIS is used, check that the reference model PSIS-LOO-CV Pareto-\eqn{\hat{k}} -#' values are good. +#' PSIS cannot be used if the number of draws or clusters is too small. In +#' such cases, \pkg{projpred} resorts to standard importance sampling (SIS) +#' and shows a message about this. Throughout the documentation, the term +#' "PSIS" is used even though in fact, \pkg{projpred} resorts to SIS in these +#' special cases. If SIS is used, check that the reference model PSIS-LOO-CV +#' Pareto-\eqn{\hat{k}} values are good. #' #' With `parallel = TRUE`, costly parts of \pkg{projpred}'s CV can be run in #' parallel. Costly parts are the fold-wise searches and performance diff --git a/man/cv_varsel.Rd b/man/cv_varsel.Rd index 3d34ae60a..b0d8a0799 100644 --- a/man/cv_varsel.Rd +++ b/man/cv_varsel.Rd @@ -276,29 +276,31 @@ that leads to the same results, \code{search_terms = c("x2", "x3")}). } \note{ If \code{validate_search} is \code{FALSE}, the search is not included in the CV -so that only a single full-data search is run. If the number of observations -is big, the fast PSIS-LOO-CV along the full-data search path is likely -to be accurate. If the number of observations is small or moderate, -the fast PSIS-LOO-CV along the full-data search path is likely to have -optimistic bias in the middle of the search path. This result can be +so that only a single full-data search is run. If the number of +observations is big, the fast PSIS-LOO-CV along the full-data search path +is likely to be accurate. If the number of observations is small or +moderate, the fast PSIS-LOO-CV along the full-data search path is likely to +have optimistic bias in the middle of the search path. This result can be used to guide further actions and the optimistic bias can be greatly -reduced by using \code{validate_search=TRUE}. - -PSIS uses Pareto-\eqn{\hat{k}} diagnostic to assess the reliability of PSIS-LOO-CV. -Whether the Pareto-\eqn{\hat{k}} diagnostics are shown as warnings, is controlled -with a global option \code{projpred.warn_psis} (default is \code{TRUE}). -See \link[loo:loo-glossary]{loo::loo-glossary} for how to interpret the Pareto-\eqn{\hat{k}} values and -the warning thresholds. \pkg{projpred} does not support the usually recommended -moment-matching (see \code{\link[loo:loo_moment_match]{loo::loo_moment_match()}} and \code{\link[brms:loo_moment_match.brmsfit]{brms::loo_moment_match()}}), -mixture importance sampling (\code{vignette("loo2-mixis", package="loo")}), -or \code{reloo}-ing (\code{\link[brms:reloo.brmsfit]{brms::reloo()}}). If the reference model PSIS-LOO-CV -Pareto-\eqn{\hat{k}} values are good, but there are high Pareto-\eqn{\hat{k}} -values for the projected models, you can try increasing the number of draws used -for the PSIS-LOO-CV (\code{ndraws_pred} with \code{refit_prj=TRUE}). If increasing the -number of draws does not help and if the reference model PSIS-LOO-CV -Pareto-\eqn{\hat{k}} values are high, and the reference model PSIS-LOO-CV -results change substantially when using moment-matching, mixture importance -sampling, or \code{reloo}-ing, we recommend to use $K$-fold-CV within \code{projpred}. +reduced by using \code{validate_search = TRUE}. + +PSIS uses Pareto-\eqn{\hat{k}} diagnostic to assess the reliability of +PSIS-LOO-CV. Whether the Pareto-\eqn{\hat{k}} diagnostics are shown as +warnings, is controlled with a global option \code{projpred.warn_psis} (default +is \code{TRUE}). See \link[loo:loo-glossary]{loo::loo-glossary} for how to interpret the +Pareto-\eqn{\hat{k}} values and the warning thresholds. \pkg{projpred} does +not support the usually recommended moment-matching (see +\code{\link[loo:loo_moment_match]{loo::loo_moment_match()}} and \code{\link[brms:loo_moment_match.brmsfit]{brms::loo_moment_match()}}), mixture +importance sampling (\code{vignette("loo2-mixis", package="loo")}), or +\code{reloo}-ing (\code{\link[brms:reloo.brmsfit]{brms::reloo()}}). If the reference model PSIS-LOO-CV +Pareto-\eqn{\hat{k}} values are good, but there are high +Pareto-\eqn{\hat{k}} values for the projected models, you can try +increasing the number of draws used for the PSIS-LOO-CV (\code{ndraws_pred} with +\code{refit_prj = TRUE}). If increasing the number of draws does not help and if +the reference model PSIS-LOO-CV Pareto-\eqn{\hat{k}} values are high, and +the reference model PSIS-LOO-CV results change substantially when using +moment-matching, mixture importance sampling, or \code{reloo}-ing, we recommend +to use \eqn{K}-fold-CV within \code{projpred}. For PSIS-LOO-CV, \pkg{projpred} calls \code{\link[loo:psis]{loo::psis()}} (or, exceptionally, \code{\link[loo:sis]{loo::sis()}}, see below) with \code{r_eff = NA}. This is only a problem if there @@ -307,12 +309,12 @@ model was built. In those cases however, the reference model should not have been used anyway, so we don't expect \pkg{projpred}'s \code{r_eff = NA} to be a problem. -PSIS cannot be used if the number of draws or clusters is too small. In such -cases, \pkg{projpred} resorts to standard importance sampling (SIS) and -shows a message about this. Throughout the documentation, the term "PSIS" is -used even though in fact, \pkg{projpred} resorts to SIS in these special cases. -If SIS is used, check that the reference model PSIS-LOO-CV Pareto-\eqn{\hat{k}} -values are good. +PSIS cannot be used if the number of draws or clusters is too small. In +such cases, \pkg{projpred} resorts to standard importance sampling (SIS) +and shows a message about this. Throughout the documentation, the term +"PSIS" is used even though in fact, \pkg{projpred} resorts to SIS in these +special cases. If SIS is used, check that the reference model PSIS-LOO-CV +Pareto-\eqn{\hat{k}} values are good. With \code{parallel = TRUE}, costly parts of \pkg{projpred}'s CV can be run in parallel. Costly parts are the fold-wise searches and performance From e68b9448907dcd256b12633af85af0a6b81e0dec Mon Sep 17 00:00:00 2001 From: fweber144 Date: Sat, 8 Jun 2024 11:36:25 +0200 Subject: [PATCH 18/22] minor fix in docs --- R/cv_varsel.R | 13 +++++++------ man/cv_varsel.Rd | 13 +++++++------ 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/R/cv_varsel.R b/R/cv_varsel.R index b1595f50a..1efb22d20 100644 --- a/R/cv_varsel.R +++ b/R/cv_varsel.R @@ -87,12 +87,13 @@ #' `reloo`-ing ([brms::reloo()]). If the reference model PSIS-LOO-CV #' Pareto-\eqn{\hat{k}} values are good, but there are high #' Pareto-\eqn{\hat{k}} values for the projected models, you can try -#' increasing the number of draws used for the PSIS-LOO-CV (`ndraws_pred` with -#' `refit_prj = TRUE`). If increasing the number of draws does not help and if -#' the reference model PSIS-LOO-CV Pareto-\eqn{\hat{k}} values are high, and -#' the reference model PSIS-LOO-CV results change substantially when using -#' moment-matching, mixture importance sampling, or `reloo`-ing, we recommend -#' to use \eqn{K}-fold-CV within `projpred`. +#' increasing the number of draws used for the PSIS-LOO-CV (`ndraws` in case +#' of `refit_prj = FALSE`; `ndraws_pred` in case of `refit_prj = TRUE`). If +#' increasing the number of draws does not help and if the reference model +#' PSIS-LOO-CV Pareto-\eqn{\hat{k}} values are high, and the reference model +#' PSIS-LOO-CV results change substantially when using moment-matching, +#' mixture importance sampling, or `reloo`-ing, we recommend to use +#' \eqn{K}-fold-CV within `projpred`. #' #' For PSIS-LOO-CV, \pkg{projpred} calls [loo::psis()] (or, exceptionally, #' [loo::sis()], see below) with `r_eff = NA`. This is only a problem if there diff --git a/man/cv_varsel.Rd b/man/cv_varsel.Rd index b0d8a0799..cb01e1974 100644 --- a/man/cv_varsel.Rd +++ b/man/cv_varsel.Rd @@ -295,12 +295,13 @@ importance sampling (\code{vignette("loo2-mixis", package="loo")}), or \code{reloo}-ing (\code{\link[brms:reloo.brmsfit]{brms::reloo()}}). If the reference model PSIS-LOO-CV Pareto-\eqn{\hat{k}} values are good, but there are high Pareto-\eqn{\hat{k}} values for the projected models, you can try -increasing the number of draws used for the PSIS-LOO-CV (\code{ndraws_pred} with -\code{refit_prj = TRUE}). If increasing the number of draws does not help and if -the reference model PSIS-LOO-CV Pareto-\eqn{\hat{k}} values are high, and -the reference model PSIS-LOO-CV results change substantially when using -moment-matching, mixture importance sampling, or \code{reloo}-ing, we recommend -to use \eqn{K}-fold-CV within \code{projpred}. +increasing the number of draws used for the PSIS-LOO-CV (\code{ndraws} in case +of \code{refit_prj = FALSE}; \code{ndraws_pred} in case of \code{refit_prj = TRUE}). If +increasing the number of draws does not help and if the reference model +PSIS-LOO-CV Pareto-\eqn{\hat{k}} values are high, and the reference model +PSIS-LOO-CV results change substantially when using moment-matching, +mixture importance sampling, or \code{reloo}-ing, we recommend to use +\eqn{K}-fold-CV within \code{projpred}. For PSIS-LOO-CV, \pkg{projpred} calls \code{\link[loo:psis]{loo::psis()}} (or, exceptionally, \code{\link[loo:sis]{loo::sis()}}, see below) with \code{r_eff = NA}. This is only a problem if there From 87331d6c57028b488ed999d57141ea41aba461d1 Mon Sep 17 00:00:00 2001 From: fweber144 Date: Sat, 8 Jun 2024 12:02:38 +0200 Subject: [PATCH 19/22] add `NEWS.md` entry --- NEWS.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/NEWS.md b/NEWS.md index dad6bfcc3..400383f95 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,9 @@ If you read this from a place other than ). This threshold depends on the Monte Carlo sample size and is often close to the former fixed threshold of 0.7 (a short introduction may also be found in the [LOO glossary](https://mc-stan.org/loo/reference/loo-glossary.html)). Correspondingly, the former "secondary" threshold of 0.5 is not used anymore either. # projpred 2.8.0 From 94408fdfbce6bf4ff239b91d4b2ebff4061f5898 Mon Sep 17 00:00:00 2001 From: Aki Vehtari Date: Mon, 10 Jun 2024 19:22:54 +0300 Subject: [PATCH 20/22] warning text fixes --- R/cv_varsel.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/cv_varsel.R b/R/cv_varsel.R index 03daa4f76..7ed3a42cd 100644 --- a/R/cv_varsel.R +++ b/R/cv_varsel.R @@ -783,7 +783,7 @@ loo_varsel <- function(refmodel, method, nterms_max, ndraws, if (getOption("projpred.warn_psis", TRUE)) { warning( "Using standard importance sampling (SIS), as the number of draws", - "or clusters is too small for PSIS. For improved accuracy reliability", + "or clusters is too small for PSIS. For improved accuracy", "increase the number of draws or clusters, or use K-fold-CV." ) } @@ -837,7 +837,7 @@ loo_varsel <- function(refmodel, method, nterms_max, ndraws, n07 = sum(pareto_k_eval > .ps_khat_threshold(dim(psisloo)[1])), n = n, khat_threshold = .ps_khat_threshold(dim(sub_psisloo)[1]), warn_txt = paste0( - "Some (%d / % d) Pareto k's for the reference model's PSIS-LOO weights for ", + "Some (%d / % d) Pareto k's for the reference model's PSIS-LOO weights given ", ifelse(clust_used_eval, paste0(nclusters_pred, " clustered "), paste0(ndraws_pred, " posterior ")), From 569c6e49a204be5cd0c4b90c03fc37de9885ce73 Mon Sep 17 00:00:00 2001 From: fweber144 Date: Mon, 10 Jun 2024 20:53:42 +0200 Subject: [PATCH 21/22] fix whitespace and indentation --- R/cv_varsel.R | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/R/cv_varsel.R b/R/cv_varsel.R index 8cffacae9..0fcb9042b 100644 --- a/R/cv_varsel.R +++ b/R/cv_varsel.R @@ -785,9 +785,10 @@ loo_varsel <- function(refmodel, method, nterms_max, ndraws, if (no_psis_eval) { if (getOption("projpred.warn_psis", TRUE)) { warning( - "Using standard importance sampling (SIS), as the number of draws", - "or clusters is too small for PSIS. For improved accuracy", - "increase the number of draws or clusters, or use K-fold-CV." + "Using standard importance sampling (SIS), as the number of ", + "draws or clusters is too small for PSIS. For improved ", + "accuracy, increase the number of draws or clusters, or use ", + "K-fold-CV." ) } # Use loo::sis(). @@ -840,7 +841,8 @@ loo_varsel <- function(refmodel, method, nterms_max, ndraws, n07 = sum(pareto_k_eval > .ps_khat_threshold(dim(psisloo)[1])), n = n, khat_threshold = .ps_khat_threshold(dim(sub_psisloo)[1]), warn_txt = paste0( - "Some (%d / % d) Pareto k's for the reference model's PSIS-LOO weights given ", + "Some (%d / % d) Pareto k's for the reference model's PSIS-LOO ", + "weights given ", ifelse(clust_used_eval, paste0(nclusters_pred, " clustered "), paste0(ndraws_pred, " posterior ")), From 4e9d1c42627ea5d2d5816d988c098844cb5aff66 Mon Sep 17 00:00:00 2001 From: fweber144 Date: Mon, 10 Jun 2024 20:58:09 +0200 Subject: [PATCH 22/22] minor fixes (`sprintf()`) --- R/cv_varsel.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/cv_varsel.R b/R/cv_varsel.R index 0fcb9042b..472106aac 100644 --- a/R/cv_varsel.R +++ b/R/cv_varsel.R @@ -666,7 +666,7 @@ loo_varsel <- function(refmodel, method, nterms_max, ndraws, n07 = sum(y_lat_E$pareto_k > .ps_khat_threshold(dim(psisloo)[1])), n = n, khat_threshold = .ps_khat_threshold(dim(psisloo)[1]), warn_txt = paste0( - "In the recalculation of the latent response values, some (%d / % d) ", + "In the recalculation of the latent response values, some (%d / %d) ", "expectation-specific Pareto k-values are > %s.\n", "In general, we recommend K-fold-CV in this case." ) @@ -841,7 +841,7 @@ loo_varsel <- function(refmodel, method, nterms_max, ndraws, n07 = sum(pareto_k_eval > .ps_khat_threshold(dim(psisloo)[1])), n = n, khat_threshold = .ps_khat_threshold(dim(sub_psisloo)[1]), warn_txt = paste0( - "Some (%d / % d) Pareto k's for the reference model's PSIS-LOO ", + "Some (%d / %d) Pareto k's for the reference model's PSIS-LOO ", "weights given ", ifelse(clust_used_eval, paste0(nclusters_pred, " clustered "),