diff --git a/DESCRIPTION b/DESCRIPTION
index 2606c2637..2ef37cffd 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -90,5 +90,5 @@ Additional_repositories:
     https://stan-dev.r-universe.dev/
 LazyData: TRUE
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.3.3
 VignetteBuilder: knitr, rmarkdown
+Config/roxygen2/version: 8.0.0
diff --git a/R/methods.R b/R/methods.R
index 57a810ddb..5a8f02326 100644
--- a/R/methods.R
+++ b/R/methods.R
@@ -19,7 +19,7 @@
 #' the draws, while the aleatoric uncertainty from the data model is
 #' not included. However, the estimated means of both methods averaged
 #' across draws should be very similar.
-#' 
+#'
 #' The [proj_linpred()] function draws from the projected posterior of the
 #' linear predictors, that is, draws before applying any link functions
 #' or other transformations. These linear predictors can also be
@@ -639,6 +639,11 @@ proj_predict_aux <- function(proj, newdata, offsetnew, weightsnew,
   return(structure(pppd_out, cats = cats_aug))
 }
 
+# Needed to avoid a NOTE in `R CMD check`:
+if (getRversion() >= package_version("2.15.1")) {
+  utils::globalVariables("fontsize")
+}
+
 #' Plot predictive performance
 #'
 #' This is the [plot()] method for `vsel` objects (returned by [varsel()] or
@@ -1158,9 +1163,9 @@ plot.vsel <- function(
       x_color_txt <- "black"
     }
     pp <- pp +
-      if (packageVersion("ggplot2") < "4.0.0") {
+      if (utils::packageVersion("ggplot2") < "4.0.0") {
         geom_text(aes(y = -Inf, label = .data[["size_chr"]]), vjust = -0.5,
-                        color = x_color_txt)
+                  color = x_color_txt)
       } else {
         geom_text(aes(y = -Inf, label = .data[["size_chr"]],
                       size = from_theme(fontsize * 0.7)),
diff --git a/man/as.matrix.projection.Rd b/man/as.matrix.projection.Rd
index b7316d98a..3c4645a91 100644
--- a/man/as.matrix.projection.Rd
+++ b/man/as.matrix.projection.Rd
@@ -41,7 +41,7 @@ nonconstant) weights for the projected draws, see
 }
 \details{
 In case of the augmented-data projection for a multilevel submodel
-of a \code{\link[brms:brmsfamily]{brms::categorical()}} reference model, the multilevel parameters (and
+of a \code{\link[brms:categorical]{brms::categorical()}} reference model, the multilevel parameters (and
 therefore also their names) slightly differ from those in the \pkg{brms}
 reference model fit (see section "Augmented-data projection" in
 \code{\link[=extend_family]{extend_family()}}'s documentation).
diff --git a/man/as_draws_matrix.projection.Rd b/man/as_draws_matrix.projection.Rd
index 83001dcbe..774fe05b5 100644
--- a/man/as_draws_matrix.projection.Rd
+++ b/man/as_draws_matrix.projection.Rd
@@ -25,7 +25,7 @@ An \eqn{S_{\mathrm{prj}} \times Q}{S_prj x Q} \code{draws_matrix} (see
 weights, \code{\link[posterior:weight_draws]{posterior::weight_draws()}} is applied internally.
 }
 \description{
-These are the \code{\link[posterior:draws]{posterior::as_draws()}} and \code{\link[posterior:draws_matrix]{posterior::as_draws_matrix()}}
+These are the \code{\link[posterior:as_draws]{posterior::as_draws()}} and \code{\link[posterior:as_draws_matrix]{posterior::as_draws_matrix()}}
 methods for \code{projection} objects (returned by \code{\link[=project]{project()}}, possibly as
 elements of a \code{list}). They extract the projected parameter draws and return
 them as a \code{draws_matrix}. In case of different (i.e., nonconstant) weights
@@ -36,7 +36,7 @@ these weights (safer in contrast to the matrix returned by
 }
 \details{
 In case of the augmented-data projection for a multilevel submodel
-of a \code{\link[brms:brmsfamily]{brms::categorical()}} reference model, the multilevel parameters (and
+of a \code{\link[brms:categorical]{brms::categorical()}} reference model, the multilevel parameters (and
 therefore also their names) slightly differ from those in the \pkg{brms}
 reference model fit (see section "Augmented-data projection" in
 \code{\link[=extend_family]{extend_family()}}'s documentation).
diff --git a/man/cl_agg.Rd b/man/cl_agg.Rd
index 10ade5461..f4d34e2a3 100644
--- a/man/cl_agg.Rd
+++ b/man/cl_agg.Rd
@@ -17,7 +17,7 @@ cl_agg(
 
 \item{cl}{A numeric vector of length \eqn{S}, giving the cluster indices for
 the draws. The cluster indices need to be values from the set \eqn{\{1,
-  ..., S_{\mathrm{cl}}\}}{{1, ..., S_cl}}, except for draws that should be
+..., S_{\mathrm{cl}}\}}{{1, ..., S_cl}}, except for draws that should be
 dropped (e.g., by thinning), in which case \code{NA} needs to be provided at the
 positions of \code{cl} corresponding to these draws.}
 
diff --git a/man/cv_varsel.Rd b/man/cv_varsel.Rd
index 36e0a36b5..d26f8ee9c 100644
--- a/man/cv_varsel.Rd
+++ b/man/cv_varsel.Rd
@@ -142,7 +142,7 @@ those predictors have no cost and will therefore be selected first, whereas
 used for each predictor.}
 
 \item{verbose}{A single integer value from the set \eqn{\{0, 1, 2, 3,
-  4\}}{{0, 1, 2, 3, 4}} (for \code{\link[=varsel]{varsel()}}, \eqn{3} and \eqn{4} have the same
+4\}}{{0, 1, 2, 3, 4}} (for \code{\link[=varsel]{varsel()}}, \eqn{3} and \eqn{4} have the same
 effect), indicating how much information (if any) to print out during the
 computations. Higher values indicate that more information should be
 printed, \code{0} deactivates the verbose mode. Internally, argument \code{verbose}
@@ -298,9 +298,9 @@ whether the Pareto-\eqn{\hat{k}} diagnostics may result in warnings. See
 \link[loo:loo-glossary]{loo::loo-glossary} for how to interpret the Pareto-\eqn{\hat{k}} values
 and the warning thresholds. \pkg{projpred} does not support the usually
 recommended moment-matching (see \code{\link[loo:loo_moment_match]{loo::loo_moment_match()}} and
-\code{\link[brms:loo_moment_match.brmsfit]{brms::loo_moment_match()}}), mixture importance sampling
+\code{\link[brms:loo_moment_match]{brms::loo_moment_match()}}), mixture importance sampling
 (\code{vignette("loo2-mixis", package="loo")}), or \code{reloo}-ing
-(\code{\link[brms:reloo.brmsfit]{brms::reloo()}}). If the reference model PSIS-LOO CV Pareto-\eqn{\hat{k}}
+(\code{\link[brms:reloo]{brms::reloo()}}). If the reference model PSIS-LOO CV Pareto-\eqn{\hat{k}}
 values are good, but there are high Pareto-\eqn{\hat{k}} values for the
 projected models, you can try increasing the number of draws used for the
 PSIS-LOO CV (\code{ndraws} in case of \code{refit_prj = FALSE}; \code{ndraws_pred} in case
@@ -335,7 +335,7 @@ the backends from packages \pkg{doParallel}, \pkg{doMPI}, or
 \pkg{doFuture}. For GLMs, this CV parallelization should work reliably, but
 for other models (such as GLMMs), it may lead to excessive memory usage
 which in turn may crash the R session (on Unix systems, setting an
-appropriate memory limit via \code{\link[unix:rlimit]{unix::rlimit_as()}} may avoid crashing the
+appropriate memory limit via \code{\link[unix:rlimit_as]{unix::rlimit_as()}} may avoid crashing the
 whole machine). However, the problem of excessive memory usage is less
 pronounced for the CV parallelization than for the projection
 parallelization described in \link{projpred-package}. In that regard, the CV
diff --git a/man/df_binom.Rd b/man/df_binom.Rd
index 44b4d3d1c..7f07f948b 100644
--- a/man/df_binom.Rd
+++ b/man/df_binom.Rd
@@ -15,7 +15,7 @@ A simulated classification dataset containing 100 observations.
 \url{https://web.stanford.edu/~hastie/glmnet/glmnetData/BNExample.RData}
 }
 \usage{
-df_binom
+data(df_binom)
 }
 \description{
 Binomial toy example
diff --git a/man/df_gaussian.Rd b/man/df_gaussian.Rd
index 5869bc1ae..e960f81e9 100644
--- a/man/df_gaussian.Rd
+++ b/man/df_gaussian.Rd
@@ -16,7 +16,7 @@ respectively.}
 \url{https://web.stanford.edu/~hastie/glmnet/glmnetData/QSExample.RData}
 }
 \usage{
-df_gaussian
+data(df_gaussian)
 }
 \description{
 Gaussian toy example
diff --git a/man/extend_family.Rd b/man/extend_family.Rd
index 47cbccc11..1dcd25c63 100644
--- a/man/extend_family.Rd
+++ b/man/extend_family.Rd
@@ -147,11 +147,11 @@ reference model is projected) currently have the following restrictions:
 constant with value 1).
 \item The thresholds are \code{"flexible"} (see \code{\link[brms:brmsfamily]{brms::brmsfamily()}}).
 \item The thresholds do not vary across the levels of a \code{factor}-like variable
-(see argument \code{gr} of \code{\link[brms:addition-terms]{brms::resp_thres()}}).
+(see argument \code{gr} of \code{\link[brms:resp_thres]{brms::resp_thres()}}).
 \item The \code{"probit_approx"} link is replaced by \code{"probit"}.
 }
 
-For the \code{\link[brms:brmsfamily]{brms::categorical()}} family, be aware that:
+For the \code{\link[brms:categorical]{brms::categorical()}} family, be aware that:
 \itemize{
 \item For multilevel submodels, the group-level effects are allowed to be
 correlated between different response categories.
diff --git a/man/mesquite.Rd b/man/mesquite.Rd
index a0c653d1f..d289a6c2f 100644
--- a/man/mesquite.Rd
+++ b/man/mesquite.Rd
@@ -23,7 +23,7 @@ group).}
 \url{https://sites.stat.columbia.edu/gelman/arm/examples/mesquite/mesquite.dat}
 }
 \usage{
-mesquite
+data(mesquite)
 }
 \description{
 The mesquite bushes yields dataset from Gelman and Hill (2006)
diff --git a/man/plot.cv_proportions.Rd b/man/plot.cv_proportions.Rd
index 2f61f5c9f..33e21aecf 100644
--- a/man/plot.cv_proportions.Rd
+++ b/man/plot.cv_proportions.Rd
@@ -20,7 +20,7 @@
 that \code{\link[=cv_proportions]{cv_proportions()}} will be applied to internally before then calling
 \code{\link[=plot.cv_proportions]{plot.cv_proportions()}}.}
 
-\item{text_angle}{Passed to argument \code{angle} of \code{\link[ggplot2:element]{ggplot2::element_text()}} for
+\item{text_angle}{Passed to argument \code{angle} of \code{\link[ggplot2:element_text]{ggplot2::element_text()}} for
 the y-axis tick labels. In case of long predictor names, \code{text_angle = 45}
 might be helpful (for example).}
 
diff --git a/man/plot.vsel.Rd b/man/plot.vsel.Rd
index 52f5de0c7..7bcf5e079 100644
--- a/man/plot.vsel.Rd
+++ b/man/plot.vsel.Rd
@@ -150,11 +150,11 @@ to be passed to \code{\link[=abbreviate]{abbreviate()}} in case of \code{ranking
 full-data predictor ranking and the corresponding ranking proportions are
 placed below the x-axis. By \code{"text"} or \code{"label"}, they are placed within
 the plotting area, using \code{\link[ggrepel:geom_text_repel]{ggrepel::geom_text_repel()}} or
-\code{\link[ggrepel:geom_text_repel]{ggrepel::geom_label_repel()}}, respectively. See also argument
+\code{\link[ggrepel:geom_label_repel]{ggrepel::geom_label_repel()}}, respectively. See also argument
 \code{ranking_repel_args}.}
 
 \item{ranking_repel_args}{A \code{list} of arguments (except for \code{mapping}) to be
-passed to \code{\link[ggrepel:geom_text_repel]{ggrepel::geom_text_repel()}} or \code{\link[ggrepel:geom_text_repel]{ggrepel::geom_label_repel()}} in
+passed to \code{\link[ggrepel:geom_text_repel]{ggrepel::geom_text_repel()}} or \code{\link[ggrepel:geom_label_repel]{ggrepel::geom_label_repel()}} in
 case of \code{ranking_repel = "text"} or \code{ranking_repel = "label"},
 respectively.}
 
@@ -177,9 +177,9 @@ or not (\code{FALSE}).}
 the ranking proportions given on the x-axis (below the full-data predictor
 ranking).}
 
-\item{text_angle}{Passed to argument \code{angle} of \code{\link[ggplot2:element]{ggplot2::element_text()}} for
+\item{text_angle}{Passed to argument \code{angle} of \code{\link[ggplot2:element_text]{ggplot2::element_text()}} for
 the x-axis tick labels. Note that the default of argument \code{angle} in
-\code{\link[ggplot2:element]{ggplot2::element_text()}} is \code{NULL} (which implies no rotation) whereas we
+\code{\link[ggplot2:element_text]{ggplot2::element_text()}} is \code{NULL} (which implies no rotation) whereas we
 use a default of \code{text_angle = 45} here. If \code{text_angle > 0} (\verb{< 0}), the
 x-axis text is automatically right-aligned (left-aligned). If \code{-90 < text_angle && text_angle < 90 && text_angle != 0}, the x-axis text is also
 top-aligned. When controlling \code{text_angle} via global option
diff --git a/man/pred-projection.Rd b/man/pred-projection.Rd
index 4a189e68b..687e3e4a8 100644
--- a/man/pred-projection.Rd
+++ b/man/pred-projection.Rd
@@ -245,7 +245,7 @@ In case of the latent projection and \code{transform = FALSE}:
 \itemize{
 \item Output element \code{pred} contains the linear predictors without any
 modifications that may be due to the original response distribution (e.g.,
-for a \code{\link[brms:brmsfamily]{brms::cumulative()}} model, the ordered thresholds are not taken into
+for a \code{\link[brms:cumulative]{brms::cumulative()}} model, the ordered thresholds are not taken into
 account).
 \item Output element \code{lpd} contains the \emph{latent} log predictive density values,
 i.e., those corresponding to the latent Gaussian distribution. If \code{newdata}
diff --git a/man/predict.refmodel.Rd b/man/predict.refmodel.Rd
index cde4d168f..f65d7bffe 100644
--- a/man/predict.refmodel.Rd
+++ b/man/predict.refmodel.Rd
@@ -50,7 +50,7 @@ spirit to argument \code{resp_oscale} from other functions: If (i)
 \code{is.null(ynew)}, then argument \code{type} affects the predictions as described
 above. In that case, note that \code{type = "link"} yields the linear predictors
 without any modifications that may be due to the original response
-distribution (e.g., for a \code{\link[brms:brmsfamily]{brms::cumulative()}} model, the ordered
+distribution (e.g., for a \code{\link[brms:cumulative]{brms::cumulative()}} model, the ordered
 thresholds are not taken into account). If (ii) \code{!is.null(ynew)}, then
 argument \code{type} also affects the scale of the log posterior predictive
 densities (\code{type = "response"} for the original response scale, \code{type = "link"} for the latent Gaussian scale).}
diff --git a/man/project.Rd b/man/project.Rd
index b8db71a7e..ccbccf35d 100644
--- a/man/project.Rd
+++ b/man/project.Rd
@@ -69,7 +69,7 @@ drawing new group-level effects when predicting from a multilevel submodel
 calculating output elements \code{dis} and \code{ce}.)}
 
 \item{verbose}{A single integer value from the set \eqn{\{0, 1, 2\}}{{0, 1,
-  2}} (if \code{!is.null(predictor_terms)}, \eqn{1} and \eqn{2} have the same
+2}} (if \code{!is.null(predictor_terms)}, \eqn{1} and \eqn{2} have the same
 effect), indicating how much information (if any) to print out during the
 computations. Higher values indicate that more information should be
 printed, \code{0} deactivates the verbose mode. Internally, argument \code{verbose}
diff --git a/man/projpred-package.Rd b/man/projpred-package.Rd
index dd41d6673..ab5fc30a5 100644
--- a/man/projpred-package.Rd
+++ b/man/projpred-package.Rd
@@ -40,7 +40,7 @@ minimizers (in other words, these are the workhorse functions employed by
 \item Submodel without multilevel or additive terms:
 \itemize{
 \item For the traditional (or latent) projection (or the augmented-data
-projection in case of the \code{\link[=binomial]{binomial()}} or \code{\link[brms:brmsfamily]{brms::bernoulli()}} family): An
+projection in case of the \code{\link[=binomial]{binomial()}} or \code{\link[brms:bernoulli]{brms::bernoulli()}} family): An
 internal C++ function which basically serves the same purpose as \code{\link[=lm]{lm()}}
 for the \code{\link[=gaussian]{gaussian()}} family and \code{\link[=glm]{glm()}} for all other families. The
 returned object inherits from class \code{subfit}. Possible tuning parameters
@@ -58,21 +58,21 @@ value for the intercept at centered predictors; default: \code{0}), and
 \code{beta_init} (numeric vector giving the starting values for the regression
 coefficients; default: vector of \code{0}s).
 \item For the augmented-data projection: \code{\link[MASS:polr]{MASS::polr()}} (the returned object
-inherits from class \code{polr}) for the \code{\link[brms:brmsfamily]{brms::cumulative()}} family or
+inherits from class \code{polr}) for the \code{\link[brms:cumulative]{brms::cumulative()}} family or
 \code{\link[rstanarm:stan_polr]{rstanarm::stan_polr()}} fits, \code{\link[nnet:multinom]{nnet::multinom()}} (the returned object
-inherits from class \code{multinom}) for the \code{\link[brms:brmsfamily]{brms::categorical()}} family.
+inherits from class \code{multinom}) for the \code{\link[brms:categorical]{brms::categorical()}} family.
 }
 \item Submodel with multilevel but no additive terms:
 \itemize{
 \item For the traditional (or latent) projection (or the augmented-data
-projection in case of the \code{\link[=binomial]{binomial()}} or \code{\link[brms:brmsfamily]{brms::bernoulli()}} family):
+projection in case of the \code{\link[=binomial]{binomial()}} or \code{\link[brms:bernoulli]{brms::bernoulli()}} family):
 \code{\link[lme4:lmer]{lme4::lmer()}} (the returned object inherits from class \code{lmerMod}) for
 the \code{\link[=gaussian]{gaussian()}} family, \code{\link[lme4:glmer]{lme4::glmer()}} (the returned object inherits
 from class \code{glmerMod}) for all other families.
 \item For the augmented-data projection: \code{\link[ordinal:clmm]{ordinal::clmm()}} (the returned
-object inherits from class \code{clmm}) for the \code{\link[brms:brmsfamily]{brms::cumulative()}} family,
+object inherits from class \code{clmm}) for the \code{\link[brms:cumulative]{brms::cumulative()}} family,
 \code{\link[mclogit:mblogit]{mclogit::mblogit()}} (the returned object inherits from class \code{mmblogit})
-for the \code{\link[brms:brmsfamily]{brms::categorical()}} family.
+for the \code{\link[brms:categorical]{brms::categorical()}} family.
 }
 \item Submodel without multilevel but additive terms: \code{\link[mgcv:gam]{mgcv::gam()}} (the returned
 object inherits from class \code{gam}).
@@ -119,7 +119,7 @@ projection if the submodel has no multilevel or additive predictor terms),
 but for all other types of submodels, the fitted submodel objects are quite
 big, which---when running in parallel---may lead to excessive memory usage
 which in turn may crash the R session (on Unix systems, setting an
-appropriate memory limit via \code{\link[unix:rlimit]{unix::rlimit_as()}} may avoid crashing the whole
+appropriate memory limit via \code{\link[unix:rlimit_as]{unix::rlimit_as()}} may avoid crashing the whole
 machine). Thus, we currently cannot recommend parallelizing projections onto
 submodels which are GLMs (in this context, the latent projection onto a
 submodel without multilevel and without additive terms may be regarded as a
@@ -238,6 +238,7 @@ Useful links:
 
 Authors:
 \itemize{
+  \item Osvaldo Martin \email{aloctavodia@gmail.com}
   \item Juho Piironen \email{juho.t.piironen@gmail.com}
   \item Markus Paasiniemi
   \item Alejandro Catalina \email{alecatfel@gmail.com}
diff --git a/man/refmodel-init-get.Rd b/man/refmodel-init-get.Rd
index ca562cb25..5e7cd663e 100644
--- a/man/refmodel-init-get.Rd
+++ b/man/refmodel-init-get.Rd
@@ -359,7 +359,7 @@ For the augmented-data projection, the response vector resulting from
 Note that response-specific offsets (i.e., one length-\eqn{N} offset vector
 per response category) are not supported by \pkg{projpred} yet. So far, only
 offsets which are the same across all response categories are supported. This
-is why in case of the \code{\link[brms:brmsfamily]{brms::categorical()}} family, offsets are currently not
+is why in case of the \code{\link[brms:categorical]{brms::categorical()}} family, offsets are currently not
 supported at all.
 
 Currently, \code{object = NULL} (i.e., a \code{datafit}; see section "Value") is not
diff --git a/man/varsel.Rd b/man/varsel.Rd
index 73786e44f..cac025862 100644
--- a/man/varsel.Rd
+++ b/man/varsel.Rd
@@ -92,7 +92,7 @@ supplied). Note that \code{nterms_max} does not count the intercept, so use
 does not count the intercept.)}
 
 \item{verbose}{A single integer value from the set \eqn{\{0, 1, 2, 3,
-  4\}}{{0, 1, 2, 3, 4}} (for \code{\link[=varsel]{varsel()}}, \eqn{3} and \eqn{4} have the same
+4\}}{{0, 1, 2, 3, 4}} (for \code{\link[=varsel]{varsel()}}, \eqn{3} and \eqn{4} have the same
 effect), indicating how much information (if any) to print out during the
 computations. Higher values indicate that more information should be
 printed, \code{0} deactivates the verbose mode. Internally, argument \code{verbose}
diff --git a/tests/testthat/test_datafit.R b/tests/testthat/test_datafit.R
index d6e32df7c..4f56d499f 100644
--- a/tests/testthat/test_datafit.R
+++ b/tests/testthat/test_datafit.R
@@ -695,10 +695,17 @@ test_that(paste(
                           nterms = 0:nterms, refit_prj = FALSE)
 
     # compute the results for the Lasso
-    lasso <- glmnet::glmnet(x, y_glmnet,
-                            family = fam$family, weights = weights,
-                            lambda.min.ratio = lambda_min_ratio,
-                            nlambda = nlambda, thresh = 1e-12)
+    if (packageVersion("glmnet") < "5.0") {
+      lasso <- glmnet::glmnet(x, y_glmnet,
+                              family = fam$family, weights = weights,
+                              lambda.min.ratio = lambda_min_ratio,
+                              nlambda = nlambda, thresh = 1e-12)
+    } else {
+      lasso <- glmnet::glmnet(x, y_glmnet,
+                              family = fam$family, weights = weights,
+                              lambda.min.ratio = lambda_min_ratio,
+                              nlambda = nlambda, control = list(thresh = 1e-12))
+    }
     predictor_ranking <- predict(lasso, type = "nonzero", s = lasso$lambda)
     nselected <- sapply(predictor_ranking, function(e) length(e))
     lambdainds <- sapply(unique(nselected), function(nterms) {
diff --git a/tests/testthat/test_glm_elnet.R b/tests/testthat/test_glm_elnet.R
index 084d601c6..f720fcec9 100644
--- a/tests/testthat/test_glm_elnet.R
+++ b/tests/testthat/test_glm_elnet.R
@@ -87,14 +87,26 @@ test_that(paste(
                                 normalize = normalize, thresh = 1e-12,
                                 intercept = intercept
               )
-              fit2 <- glmnet::glmnet(x, y_glmnet,
-                                     family = fam$family, alpha = alpha,
-                                     lambda.min.ratio = lambda_min_ratio,
-                                     nlambda = nlam,
-                                     weights = w, offset = os,
-                                     standardize = normalize,
-                                     thresh = 1e-12, intercept = intercept
-              )
+              if (packageVersion("glmnet") < "5.0") {
+                fit2 <- glmnet::glmnet(x, y_glmnet,
+                                       family = fam$family, alpha = alpha,
+                                       lambda.min.ratio = lambda_min_ratio,
+                                       nlambda = nlam,
+                                       weights = w, offset = os,
+                                       standardize = normalize,
+                                       thresh = 1e-12, intercept = intercept
+                )
+              } else {
+                fit2 <- glmnet::glmnet(x, y_glmnet,
+                                       family = fam$family, alpha = alpha,
+                                       lambda.min.ratio = lambda_min_ratio,
+                                       nlambda = nlam,
+                                       weights = w, offset = os,
+                                       standardize = normalize,
+                                       control = list(thresh = 1e-12),
+                                       intercept = intercept
+                )
+              }
               ## check that with a given L1-norm, the coefficient values are the
               ## same (need to check it this way since the lambda values are not
               ## comparable between glm_elnet and glmnet)