diff --git a/DESCRIPTION b/DESCRIPTION index 2606c2637..2ef37cffd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -90,5 +90,5 @@ Additional_repositories: https://stan-dev.r-universe.dev/ LazyData: TRUE Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.3 VignetteBuilder: knitr, rmarkdown +Config/roxygen2/version: 8.0.0 diff --git a/R/methods.R b/R/methods.R index 57a810ddb..5a8f02326 100644 --- a/R/methods.R +++ b/R/methods.R @@ -19,7 +19,7 @@ #' the draws, while the aleatoric uncertainty from the data model is #' not included. However, the estimated means of both methods averaged #' across draws should be very similar. -#' +#' #' The [proj_linpred()] function draws from the projected posterior of the #' linear predictors, that is, draws before applying any link functions #' or other transformations. These linear predictors can also be @@ -639,6 +639,11 @@ proj_predict_aux <- function(proj, newdata, offsetnew, weightsnew, return(structure(pppd_out, cats = cats_aug)) } +# Needed to avoid a NOTE in `R CMD check`: +if (getRversion() >= package_version("2.15.1")) { + utils::globalVariables("fontsize") +} + #' Plot predictive performance #' #' This is the [plot()] method for `vsel` objects (returned by [varsel()] or @@ -1158,9 +1163,9 @@ plot.vsel <- function( x_color_txt <- "black" } pp <- pp + - if (packageVersion("ggplot2") < "4.0.0") { + if (utils::packageVersion("ggplot2") < "4.0.0") { geom_text(aes(y = -Inf, label = .data[["size_chr"]]), vjust = -0.5, - color = x_color_txt) + color = x_color_txt) } else { geom_text(aes(y = -Inf, label = .data[["size_chr"]], size = from_theme(fontsize * 0.7)), diff --git a/man/as.matrix.projection.Rd b/man/as.matrix.projection.Rd index b7316d98a..3c4645a91 100644 --- a/man/as.matrix.projection.Rd +++ b/man/as.matrix.projection.Rd @@ -41,7 +41,7 @@ nonconstant) weights for the projected draws, see } \details{ In case of the augmented-data projection for a multilevel submodel -of a \code{\link[brms:brmsfamily]{brms::categorical()}} reference model, the multilevel parameters (and +of a \code{\link[brms:categorical]{brms::categorical()}} reference model, the multilevel parameters (and therefore also their names) slightly differ from those in the \pkg{brms} reference model fit (see section "Augmented-data projection" in \code{\link[=extend_family]{extend_family()}}'s documentation). diff --git a/man/as_draws_matrix.projection.Rd b/man/as_draws_matrix.projection.Rd index 83001dcbe..774fe05b5 100644 --- a/man/as_draws_matrix.projection.Rd +++ b/man/as_draws_matrix.projection.Rd @@ -25,7 +25,7 @@ An \eqn{S_{\mathrm{prj}} \times Q}{S_prj x Q} \code{draws_matrix} (see weights, \code{\link[posterior:weight_draws]{posterior::weight_draws()}} is applied internally. } \description{ -These are the \code{\link[posterior:draws]{posterior::as_draws()}} and \code{\link[posterior:draws_matrix]{posterior::as_draws_matrix()}} +These are the \code{\link[posterior:as_draws]{posterior::as_draws()}} and \code{\link[posterior:as_draws_matrix]{posterior::as_draws_matrix()}} methods for \code{projection} objects (returned by \code{\link[=project]{project()}}, possibly as elements of a \code{list}). They extract the projected parameter draws and return them as a \code{draws_matrix}. In case of different (i.e., nonconstant) weights @@ -36,7 +36,7 @@ these weights (safer in contrast to the matrix returned by } \details{ In case of the augmented-data projection for a multilevel submodel -of a \code{\link[brms:brmsfamily]{brms::categorical()}} reference model, the multilevel parameters (and +of a \code{\link[brms:categorical]{brms::categorical()}} reference model, the multilevel parameters (and therefore also their names) slightly differ from those in the \pkg{brms} reference model fit (see section "Augmented-data projection" in \code{\link[=extend_family]{extend_family()}}'s documentation). diff --git a/man/cl_agg.Rd b/man/cl_agg.Rd index 10ade5461..f4d34e2a3 100644 --- a/man/cl_agg.Rd +++ b/man/cl_agg.Rd @@ -17,7 +17,7 @@ cl_agg( \item{cl}{A numeric vector of length \eqn{S}, giving the cluster indices for the draws. The cluster indices need to be values from the set \eqn{\{1, - ..., S_{\mathrm{cl}}\}}{{1, ..., S_cl}}, except for draws that should be +..., S_{\mathrm{cl}}\}}{{1, ..., S_cl}}, except for draws that should be dropped (e.g., by thinning), in which case \code{NA} needs to be provided at the positions of \code{cl} corresponding to these draws.} diff --git a/man/cv_varsel.Rd b/man/cv_varsel.Rd index 36e0a36b5..d26f8ee9c 100644 --- a/man/cv_varsel.Rd +++ b/man/cv_varsel.Rd @@ -142,7 +142,7 @@ those predictors have no cost and will therefore be selected first, whereas used for each predictor.} \item{verbose}{A single integer value from the set \eqn{\{0, 1, 2, 3, - 4\}}{{0, 1, 2, 3, 4}} (for \code{\link[=varsel]{varsel()}}, \eqn{3} and \eqn{4} have the same +4\}}{{0, 1, 2, 3, 4}} (for \code{\link[=varsel]{varsel()}}, \eqn{3} and \eqn{4} have the same effect), indicating how much information (if any) to print out during the computations. Higher values indicate that more information should be printed, \code{0} deactivates the verbose mode. Internally, argument \code{verbose} @@ -298,9 +298,9 @@ whether the Pareto-\eqn{\hat{k}} diagnostics may result in warnings. See \link[loo:loo-glossary]{loo::loo-glossary} for how to interpret the Pareto-\eqn{\hat{k}} values and the warning thresholds. \pkg{projpred} does not support the usually recommended moment-matching (see \code{\link[loo:loo_moment_match]{loo::loo_moment_match()}} and -\code{\link[brms:loo_moment_match.brmsfit]{brms::loo_moment_match()}}), mixture importance sampling +\code{\link[brms:loo_moment_match]{brms::loo_moment_match()}}), mixture importance sampling (\code{vignette("loo2-mixis", package="loo")}), or \code{reloo}-ing -(\code{\link[brms:reloo.brmsfit]{brms::reloo()}}). If the reference model PSIS-LOO CV Pareto-\eqn{\hat{k}} +(\code{\link[brms:reloo]{brms::reloo()}}). If the reference model PSIS-LOO CV Pareto-\eqn{\hat{k}} values are good, but there are high Pareto-\eqn{\hat{k}} values for the projected models, you can try increasing the number of draws used for the PSIS-LOO CV (\code{ndraws} in case of \code{refit_prj = FALSE}; \code{ndraws_pred} in case @@ -335,7 +335,7 @@ the backends from packages \pkg{doParallel}, \pkg{doMPI}, or \pkg{doFuture}. For GLMs, this CV parallelization should work reliably, but for other models (such as GLMMs), it may lead to excessive memory usage which in turn may crash the R session (on Unix systems, setting an -appropriate memory limit via \code{\link[unix:rlimit]{unix::rlimit_as()}} may avoid crashing the +appropriate memory limit via \code{\link[unix:rlimit_as]{unix::rlimit_as()}} may avoid crashing the whole machine). However, the problem of excessive memory usage is less pronounced for the CV parallelization than for the projection parallelization described in \link{projpred-package}. In that regard, the CV diff --git a/man/df_binom.Rd b/man/df_binom.Rd index 44b4d3d1c..7f07f948b 100644 --- a/man/df_binom.Rd +++ b/man/df_binom.Rd @@ -15,7 +15,7 @@ A simulated classification dataset containing 100 observations. \url{https://web.stanford.edu/~hastie/glmnet/glmnetData/BNExample.RData} } \usage{ -df_binom +data(df_binom) } \description{ Binomial toy example diff --git a/man/df_gaussian.Rd b/man/df_gaussian.Rd index 5869bc1ae..e960f81e9 100644 --- a/man/df_gaussian.Rd +++ b/man/df_gaussian.Rd @@ -16,7 +16,7 @@ respectively.} \url{https://web.stanford.edu/~hastie/glmnet/glmnetData/QSExample.RData} } \usage{ -df_gaussian +data(df_gaussian) } \description{ Gaussian toy example diff --git a/man/extend_family.Rd b/man/extend_family.Rd index 47cbccc11..1dcd25c63 100644 --- a/man/extend_family.Rd +++ b/man/extend_family.Rd @@ -147,11 +147,11 @@ reference model is projected) currently have the following restrictions: constant with value 1). \item The thresholds are \code{"flexible"} (see \code{\link[brms:brmsfamily]{brms::brmsfamily()}}). \item The thresholds do not vary across the levels of a \code{factor}-like variable -(see argument \code{gr} of \code{\link[brms:addition-terms]{brms::resp_thres()}}). +(see argument \code{gr} of \code{\link[brms:resp_thres]{brms::resp_thres()}}). \item The \code{"probit_approx"} link is replaced by \code{"probit"}. } -For the \code{\link[brms:brmsfamily]{brms::categorical()}} family, be aware that: +For the \code{\link[brms:categorical]{brms::categorical()}} family, be aware that: \itemize{ \item For multilevel submodels, the group-level effects are allowed to be correlated between different response categories. diff --git a/man/mesquite.Rd b/man/mesquite.Rd index a0c653d1f..d289a6c2f 100644 --- a/man/mesquite.Rd +++ b/man/mesquite.Rd @@ -23,7 +23,7 @@ group).} \url{https://sites.stat.columbia.edu/gelman/arm/examples/mesquite/mesquite.dat} } \usage{ -mesquite +data(mesquite) } \description{ The mesquite bushes yields dataset from Gelman and Hill (2006) diff --git a/man/plot.cv_proportions.Rd b/man/plot.cv_proportions.Rd index 2f61f5c9f..33e21aecf 100644 --- a/man/plot.cv_proportions.Rd +++ b/man/plot.cv_proportions.Rd @@ -20,7 +20,7 @@ that \code{\link[=cv_proportions]{cv_proportions()}} will be applied to internally before then calling \code{\link[=plot.cv_proportions]{plot.cv_proportions()}}.} -\item{text_angle}{Passed to argument \code{angle} of \code{\link[ggplot2:element]{ggplot2::element_text()}} for +\item{text_angle}{Passed to argument \code{angle} of \code{\link[ggplot2:element_text]{ggplot2::element_text()}} for the y-axis tick labels. In case of long predictor names, \code{text_angle = 45} might be helpful (for example).} diff --git a/man/plot.vsel.Rd b/man/plot.vsel.Rd index 52f5de0c7..7bcf5e079 100644 --- a/man/plot.vsel.Rd +++ b/man/plot.vsel.Rd @@ -150,11 +150,11 @@ to be passed to \code{\link[=abbreviate]{abbreviate()}} in case of \code{ranking full-data predictor ranking and the corresponding ranking proportions are placed below the x-axis. By \code{"text"} or \code{"label"}, they are placed within the plotting area, using \code{\link[ggrepel:geom_text_repel]{ggrepel::geom_text_repel()}} or -\code{\link[ggrepel:geom_text_repel]{ggrepel::geom_label_repel()}}, respectively. See also argument +\code{\link[ggrepel:geom_label_repel]{ggrepel::geom_label_repel()}}, respectively. See also argument \code{ranking_repel_args}.} \item{ranking_repel_args}{A \code{list} of arguments (except for \code{mapping}) to be -passed to \code{\link[ggrepel:geom_text_repel]{ggrepel::geom_text_repel()}} or \code{\link[ggrepel:geom_text_repel]{ggrepel::geom_label_repel()}} in +passed to \code{\link[ggrepel:geom_text_repel]{ggrepel::geom_text_repel()}} or \code{\link[ggrepel:geom_label_repel]{ggrepel::geom_label_repel()}} in case of \code{ranking_repel = "text"} or \code{ranking_repel = "label"}, respectively.} @@ -177,9 +177,9 @@ or not (\code{FALSE}).} the ranking proportions given on the x-axis (below the full-data predictor ranking).} -\item{text_angle}{Passed to argument \code{angle} of \code{\link[ggplot2:element]{ggplot2::element_text()}} for +\item{text_angle}{Passed to argument \code{angle} of \code{\link[ggplot2:element_text]{ggplot2::element_text()}} for the x-axis tick labels. Note that the default of argument \code{angle} in -\code{\link[ggplot2:element]{ggplot2::element_text()}} is \code{NULL} (which implies no rotation) whereas we +\code{\link[ggplot2:element_text]{ggplot2::element_text()}} is \code{NULL} (which implies no rotation) whereas we use a default of \code{text_angle = 45} here. If \code{text_angle > 0} (\verb{< 0}), the x-axis text is automatically right-aligned (left-aligned). If \code{-90 < text_angle && text_angle < 90 && text_angle != 0}, the x-axis text is also top-aligned. When controlling \code{text_angle} via global option diff --git a/man/pred-projection.Rd b/man/pred-projection.Rd index 4a189e68b..687e3e4a8 100644 --- a/man/pred-projection.Rd +++ b/man/pred-projection.Rd @@ -245,7 +245,7 @@ In case of the latent projection and \code{transform = FALSE}: \itemize{ \item Output element \code{pred} contains the linear predictors without any modifications that may be due to the original response distribution (e.g., -for a \code{\link[brms:brmsfamily]{brms::cumulative()}} model, the ordered thresholds are not taken into +for a \code{\link[brms:cumulative]{brms::cumulative()}} model, the ordered thresholds are not taken into account). \item Output element \code{lpd} contains the \emph{latent} log predictive density values, i.e., those corresponding to the latent Gaussian distribution. If \code{newdata} diff --git a/man/predict.refmodel.Rd b/man/predict.refmodel.Rd index cde4d168f..f65d7bffe 100644 --- a/man/predict.refmodel.Rd +++ b/man/predict.refmodel.Rd @@ -50,7 +50,7 @@ spirit to argument \code{resp_oscale} from other functions: If (i) \code{is.null(ynew)}, then argument \code{type} affects the predictions as described above. In that case, note that \code{type = "link"} yields the linear predictors without any modifications that may be due to the original response -distribution (e.g., for a \code{\link[brms:brmsfamily]{brms::cumulative()}} model, the ordered +distribution (e.g., for a \code{\link[brms:cumulative]{brms::cumulative()}} model, the ordered thresholds are not taken into account). If (ii) \code{!is.null(ynew)}, then argument \code{type} also affects the scale of the log posterior predictive densities (\code{type = "response"} for the original response scale, \code{type = "link"} for the latent Gaussian scale).} diff --git a/man/project.Rd b/man/project.Rd index b8db71a7e..ccbccf35d 100644 --- a/man/project.Rd +++ b/man/project.Rd @@ -69,7 +69,7 @@ drawing new group-level effects when predicting from a multilevel submodel calculating output elements \code{dis} and \code{ce}.)} \item{verbose}{A single integer value from the set \eqn{\{0, 1, 2\}}{{0, 1, - 2}} (if \code{!is.null(predictor_terms)}, \eqn{1} and \eqn{2} have the same +2}} (if \code{!is.null(predictor_terms)}, \eqn{1} and \eqn{2} have the same effect), indicating how much information (if any) to print out during the computations. Higher values indicate that more information should be printed, \code{0} deactivates the verbose mode. Internally, argument \code{verbose} diff --git a/man/projpred-package.Rd b/man/projpred-package.Rd index dd41d6673..ab5fc30a5 100644 --- a/man/projpred-package.Rd +++ b/man/projpred-package.Rd @@ -40,7 +40,7 @@ minimizers (in other words, these are the workhorse functions employed by \item Submodel without multilevel or additive terms: \itemize{ \item For the traditional (or latent) projection (or the augmented-data -projection in case of the \code{\link[=binomial]{binomial()}} or \code{\link[brms:brmsfamily]{brms::bernoulli()}} family): An +projection in case of the \code{\link[=binomial]{binomial()}} or \code{\link[brms:bernoulli]{brms::bernoulli()}} family): An internal C++ function which basically serves the same purpose as \code{\link[=lm]{lm()}} for the \code{\link[=gaussian]{gaussian()}} family and \code{\link[=glm]{glm()}} for all other families. The returned object inherits from class \code{subfit}. Possible tuning parameters @@ -58,21 +58,21 @@ value for the intercept at centered predictors; default: \code{0}), and \code{beta_init} (numeric vector giving the starting values for the regression coefficients; default: vector of \code{0}s). \item For the augmented-data projection: \code{\link[MASS:polr]{MASS::polr()}} (the returned object -inherits from class \code{polr}) for the \code{\link[brms:brmsfamily]{brms::cumulative()}} family or +inherits from class \code{polr}) for the \code{\link[brms:cumulative]{brms::cumulative()}} family or \code{\link[rstanarm:stan_polr]{rstanarm::stan_polr()}} fits, \code{\link[nnet:multinom]{nnet::multinom()}} (the returned object -inherits from class \code{multinom}) for the \code{\link[brms:brmsfamily]{brms::categorical()}} family. +inherits from class \code{multinom}) for the \code{\link[brms:categorical]{brms::categorical()}} family. } \item Submodel with multilevel but no additive terms: \itemize{ \item For the traditional (or latent) projection (or the augmented-data -projection in case of the \code{\link[=binomial]{binomial()}} or \code{\link[brms:brmsfamily]{brms::bernoulli()}} family): +projection in case of the \code{\link[=binomial]{binomial()}} or \code{\link[brms:bernoulli]{brms::bernoulli()}} family): \code{\link[lme4:lmer]{lme4::lmer()}} (the returned object inherits from class \code{lmerMod}) for the \code{\link[=gaussian]{gaussian()}} family, \code{\link[lme4:glmer]{lme4::glmer()}} (the returned object inherits from class \code{glmerMod}) for all other families. \item For the augmented-data projection: \code{\link[ordinal:clmm]{ordinal::clmm()}} (the returned -object inherits from class \code{clmm}) for the \code{\link[brms:brmsfamily]{brms::cumulative()}} family, +object inherits from class \code{clmm}) for the \code{\link[brms:cumulative]{brms::cumulative()}} family, \code{\link[mclogit:mblogit]{mclogit::mblogit()}} (the returned object inherits from class \code{mmblogit}) -for the \code{\link[brms:brmsfamily]{brms::categorical()}} family. +for the \code{\link[brms:categorical]{brms::categorical()}} family. } \item Submodel without multilevel but additive terms: \code{\link[mgcv:gam]{mgcv::gam()}} (the returned object inherits from class \code{gam}). @@ -119,7 +119,7 @@ projection if the submodel has no multilevel or additive predictor terms), but for all other types of submodels, the fitted submodel objects are quite big, which---when running in parallel---may lead to excessive memory usage which in turn may crash the R session (on Unix systems, setting an -appropriate memory limit via \code{\link[unix:rlimit]{unix::rlimit_as()}} may avoid crashing the whole +appropriate memory limit via \code{\link[unix:rlimit_as]{unix::rlimit_as()}} may avoid crashing the whole machine). Thus, we currently cannot recommend parallelizing projections onto submodels which are GLMs (in this context, the latent projection onto a submodel without multilevel and without additive terms may be regarded as a @@ -238,6 +238,7 @@ Useful links: Authors: \itemize{ + \item Osvaldo Martin \email{aloctavodia@gmail.com} \item Juho Piironen \email{juho.t.piironen@gmail.com} \item Markus Paasiniemi \item Alejandro Catalina \email{alecatfel@gmail.com} diff --git a/man/refmodel-init-get.Rd b/man/refmodel-init-get.Rd index ca562cb25..5e7cd663e 100644 --- a/man/refmodel-init-get.Rd +++ b/man/refmodel-init-get.Rd @@ -359,7 +359,7 @@ For the augmented-data projection, the response vector resulting from Note that response-specific offsets (i.e., one length-\eqn{N} offset vector per response category) are not supported by \pkg{projpred} yet. So far, only offsets which are the same across all response categories are supported. This -is why in case of the \code{\link[brms:brmsfamily]{brms::categorical()}} family, offsets are currently not +is why in case of the \code{\link[brms:categorical]{brms::categorical()}} family, offsets are currently not supported at all. Currently, \code{object = NULL} (i.e., a \code{datafit}; see section "Value") is not diff --git a/man/varsel.Rd b/man/varsel.Rd index 73786e44f..cac025862 100644 --- a/man/varsel.Rd +++ b/man/varsel.Rd @@ -92,7 +92,7 @@ supplied). Note that \code{nterms_max} does not count the intercept, so use does not count the intercept.)} \item{verbose}{A single integer value from the set \eqn{\{0, 1, 2, 3, - 4\}}{{0, 1, 2, 3, 4}} (for \code{\link[=varsel]{varsel()}}, \eqn{3} and \eqn{4} have the same +4\}}{{0, 1, 2, 3, 4}} (for \code{\link[=varsel]{varsel()}}, \eqn{3} and \eqn{4} have the same effect), indicating how much information (if any) to print out during the computations. Higher values indicate that more information should be printed, \code{0} deactivates the verbose mode. Internally, argument \code{verbose} diff --git a/tests/testthat/test_datafit.R b/tests/testthat/test_datafit.R index d6e32df7c..4f56d499f 100644 --- a/tests/testthat/test_datafit.R +++ b/tests/testthat/test_datafit.R @@ -695,10 +695,17 @@ test_that(paste( nterms = 0:nterms, refit_prj = FALSE) # compute the results for the Lasso - lasso <- glmnet::glmnet(x, y_glmnet, - family = fam$family, weights = weights, - lambda.min.ratio = lambda_min_ratio, - nlambda = nlambda, thresh = 1e-12) + if (packageVersion("glmnet") < "5.0") { + lasso <- glmnet::glmnet(x, y_glmnet, + family = fam$family, weights = weights, + lambda.min.ratio = lambda_min_ratio, + nlambda = nlambda, thresh = 1e-12) + } else { + lasso <- glmnet::glmnet(x, y_glmnet, + family = fam$family, weights = weights, + lambda.min.ratio = lambda_min_ratio, + nlambda = nlambda, control = list(thresh = 1e-12)) + } predictor_ranking <- predict(lasso, type = "nonzero", s = lasso$lambda) nselected <- sapply(predictor_ranking, function(e) length(e)) lambdainds <- sapply(unique(nselected), function(nterms) { diff --git a/tests/testthat/test_glm_elnet.R b/tests/testthat/test_glm_elnet.R index 084d601c6..f720fcec9 100644 --- a/tests/testthat/test_glm_elnet.R +++ b/tests/testthat/test_glm_elnet.R @@ -87,14 +87,26 @@ test_that(paste( normalize = normalize, thresh = 1e-12, intercept = intercept ) - fit2 <- glmnet::glmnet(x, y_glmnet, - family = fam$family, alpha = alpha, - lambda.min.ratio = lambda_min_ratio, - nlambda = nlam, - weights = w, offset = os, - standardize = normalize, - thresh = 1e-12, intercept = intercept - ) + if (packageVersion("glmnet") < "5.0") { + fit2 <- glmnet::glmnet(x, y_glmnet, + family = fam$family, alpha = alpha, + lambda.min.ratio = lambda_min_ratio, + nlambda = nlam, + weights = w, offset = os, + standardize = normalize, + thresh = 1e-12, intercept = intercept + ) + } else { + fit2 <- glmnet::glmnet(x, y_glmnet, + family = fam$family, alpha = alpha, + lambda.min.ratio = lambda_min_ratio, + nlambda = nlam, + weights = w, offset = os, + standardize = normalize, + control = list(thresh = 1e-12), + intercept = intercept + ) + } ## check that with a given L1-norm, the coefficient values are the ## same (need to check it this way since the lambda values are not ## comparable between glm_elnet and glmnet)