diff --git a/src/common/dnmtools_gaussinv.cpp b/src/common/dnmtools_gaussinv.cpp index bae01391..4fc465b4 100644 --- a/src/common/dnmtools_gaussinv.cpp +++ b/src/common/dnmtools_gaussinv.cpp @@ -5,19 +5,19 @@ * * Copyright (C) 2002 Przemyslaw Sliwa and Jason H. Stover. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3 of the License, or (at - * your option) any later version. + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 3 of the License, or (at your option) + * any later version. * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ /* @@ -35,31 +35,30 @@ * * Copyright (C) 2002, 2004 Jason H. Stover. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3 of the License, or (at - * your option) any later version. + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 3 of the License, or (at your option) + * any later version. * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ /* - * Computes the cumulative distribution function for the Gaussian - * distribution using a rational function approximation. The - * computation is for the standard Normal distribution, i.e., mean 0 - * and standard deviation 1. If you want to compute Pr(X < t) for a - * Gaussian random variable X with non-zero mean m and standard - * deviation sd not equal to 1, find gsl_cdf_ugaussian ((t-m)/sd). - * This approximation is accurate to at least double precision. The - * accuracy was verified with a pari-gp script. The largest error - * found was about 1.4E-20. The coefficients were derived by Cody. + * Computes the cumulative distribution function for the Gaussian distribution + * using a rational function approximation. The computation is for the + * standard Normal distribution, i.e., mean 0 and standard deviation 1. If you + * want to compute Pr(X < t) for a Gaussian random variable X with non-zero + * mean m and standard deviation sd not equal to 1, find gsl_cdf_ugaussian + * ((t-m)/sd). This approximation is accurate to at least double + * precision. The accuracy was verified with a pari-gp script. The largest + * error found was about 1.4E-20. The coefficients were derived by Cody. * * References: * @@ -76,197 +75,169 @@ #include "dnmtools_gaussinv.hpp" +#include #include #include +#include +#include #include +#include -// ADS: they are all magic... - -// NOLINTBEGIN(*-avoid-magic-numbers,*-avoid-c-arrays,*-pointer-arithmetic,*-array-to-pointer-decay,*-constant-array-index) +// ADS: so much magic. +template [[nodiscard]] static inline double -rat_eval(const double a[], const size_t na, const double b[], const size_t nb, - const double x) { - double u = a[na - 1]; - - for (size_t i = na - 1; i > 0; i--) { - u = x * u + a[i - 1]; - } - - double v = b[nb - 1]; - - for (size_t j = nb - 1; j > 0; j--) { - v = x * v + b[j - 1]; - } - - return u / v; +rat_eval(const std::array &a, + const std::array &b, const double x) { + const auto acc = [&](const auto t, const auto y) { return x * t + y; }; + return std::accumulate(std::cbegin(a), std::cend(a), 0.0, acc) / + std::accumulate(std::cbegin(b), std::cend(b), 0.0, acc); } -static double -small(double q) { +[[nodiscard]] static double +small(const double q) { // clang-format off - const double a[8] = { - 3.387132872796366608, - 133.14166789178437745, - 1971.5909503065514427, - 13731.693765509461125, - 45921.953931549871457, - 67265.770927008700853, - 33430.575583588128105, + static constexpr auto a = std::array{ 2509.0809287301226727, + 33430.575583588128105, + 67265.770927008700853, + 45921.953931549871457, + 13731.693765509461125, + 1971.5909503065514427, + 133.14166789178437745, + 3.387132872796366608, }; - // clang-format on - - // clang-format off - const double b[8] = { - 1.0, - 42.313330701600911252, - 687.1870074920579083, - 5394.1960214247511077, - 21213.794301586595867, - 39307.89580009271061, - 28729.085735721942674, + static constexpr auto b = std::array{ 5226.495278852854561, + 28729.085735721942674, + 39307.89580009271061, + 21213.794301586595867, + 5394.1960214247511077, + 687.1870074920579083, + 42.313330701600911252, + 1.0, }; // clang-format on const double r = 0.180625 - q * q; - return q * rat_eval(a, 8, b, 8, r); + return q * rat_eval(a, b, r); } -static double -intermediate(double r) { +[[nodiscard]] static double +intermediate(const double r) { + static constexpr auto offset = 1.6; // clang-format off - const double a[] = { - 1.42343711074968357734, - 4.6303378461565452959, - 5.7694972214606914055, - 3.64784832476320460504, - 1.27045825245236838258, - 0.24178072517745061177, - 0.0227238449892691845833, + static constexpr auto a = std::array{ 7.7454501427834140764e-4, + 0.0227238449892691845833, + 0.24178072517745061177, + 1.27045825245236838258, + 3.64784832476320460504, + 5.7694972214606914055, + 4.6303378461565452959, + 1.42343711074968357734, }; - // clang-format on - - // clang-format off - const double b[] = { - 1.0, - 2.05319162663775882187, - 1.6763848301838038494, - 0.68976733498510000455, - 0.14810397642748007459, - 0.0151986665636164571966, - 5.475938084995344946e-4, + static constexpr auto b = std::array{ 1.05075007164441684324e-9, + 5.475938084995344946e-4, + 0.0151986665636164571966, + 0.14810397642748007459, + 0.68976733498510000455, + 1.6763848301838038494, + 2.05319162663775882187, + 1.0, }; // clang-format on - return rat_eval(a, 8, b, 8, (r - 1.6)); + return rat_eval(a, b, r - offset); } -static double -tail(double r) { +[[nodiscard]] static double +tail(const double r) { + static constexpr auto offset = 0.5; // clang-format off - const double a[] = { - 6.6579046435011037772, - 5.4637849111641143699, - 1.7848265399172913358, - 0.29656057182850489123, - 0.026532189526576123093, - 0.0012426609473880784386, - 2.71155556874348757815e-5, + static constexpr auto a = std::array{ 2.01033439929228813265e-7, + 2.71155556874348757815e-5, + 0.0012426609473880784386, + 0.026532189526576123093, + 0.29656057182850489123, + 1.7848265399172913358, + 5.4637849111641143699, + 6.6579046435011037772, }; - // clang-format on - - // clang-format off - const double b[] = { - 1.0, - 0.59983220655588793769, - 0.13692988092273580531, - 0.0148753612908506148525, - 7.868691311456132591e-4, - 1.8463183175100546818e-5, - 1.4215117583164458887e-7, + static constexpr auto b = std::array{ 2.04426310338993978564e-15, + 1.4215117583164458887e-7, + 1.8463183175100546818e-5, + 7.868691311456132591e-4, + 0.0148753612908506148525, + 0.13692988092273580531, + 0.59983220655588793769, + 1.0, }; // clang-format on - return rat_eval(a, 8, b, 8, (r - 5.0)); + return rat_eval(a, b, r - offset); } -double +[[nodiscard]] double dnmt_gsl_cdf_ugaussian_Pinv(const double P) { - const double dP = P - 0.5; - + static constexpr auto cutoff_for_small = 0.425; + const auto dP = P - 0.5; if (P == 1.0) return std::numeric_limits::infinity(); - else if (P == 0.0) + if (P == 0.0) return -std::numeric_limits::infinity(); - - if (fabs(dP) <= 0.425) + if (std::abs(dP) <= cutoff_for_small) return small(dP); - - const double pp = (P < 0.5) ? P : 1.0 - P; - - const double r = std::sqrt(-log(pp)); - + const double pp = P < 0.5 ? P : 1.0 - P; + const double r = std::sqrt(-std::log(pp)); const double x = (r <= 5.0) ? intermediate(r) : tail(r); - - return (P < 0.5) ? -x : x; + return P < 0.5 ? -x : x; // NOLINT(*-avoid-magic-numbers) } -double +[[nodiscard]] double dnmt_gsl_cdf_ugaussian_Qinv(const double Q) { + static constexpr auto cutoff_for_small = 0.425; const double dQ = Q - 0.5; - - if (Q == 1.0) { + if (Q == 1.0) return -std::numeric_limits::infinity(); - } - else if (Q == 0.0) { + if (Q == 0.0) return std::numeric_limits::infinity(); - } - - if (fabs(dQ) <= 0.425) + if (std::abs(dQ) <= cutoff_for_small) return -small(dQ); - - const double pp = (Q < 0.5) ? Q : 1.0 - Q; - - const double r = std::sqrt(-log(pp)); - + const double pp = Q < 0.5 ? Q : 1.0 - Q; + const double r = std::sqrt(-std::log(pp)); const double x = (r <= 5.0) ? intermediate(r) : tail(r); - - return (Q < 0.5) ? x : -x; + return Q < 0.5 ? x : -x; // NOLINT(*-avoid-magic-numbers) } -double +[[nodiscard]] double dnmt_gsl_cdf_gaussian_Pinv(const double P, const double sigma) { return sigma * dnmt_gsl_cdf_ugaussian_Pinv(P); } -double +[[nodiscard]] double dnmt_gsl_cdf_gaussian_Qinv(const double Q, const double sigma) { return sigma * dnmt_gsl_cdf_ugaussian_Qinv(Q); } // ADS: from gsl/gsl_math.h: -static constexpr const double dnmt_M_2_SQRTPI = - 1.12837916709551257389615890312; /* 2/sqrt(pi) */ -static constexpr const double dnmt_M_SQRT1_2 = - 0.70710678118654752440084436210; /* sqrt(1/2) */ -static constexpr const double dnmt_M_1_SQRT2PI = - dnmt_M_2_SQRTPI * dnmt_M_SQRT1_2 / 2.0; -static constexpr const double dnmt_M_SQRT2 = - 1.41421356237309504880168872421; /* sqrt(2) */ -static constexpr const double dnmt_SQRT32 = (4.0 * dnmt_M_SQRT2); + +// clang-format off +static constexpr const double dnmt_M_2_SQRTPI = 1.12837916709551257389615890312; // 2/sqrt(pi) +static constexpr const double dnmt_M_SQRT1_2 = 0.70710678118654752440084436210; // sqrt(1/2) +static constexpr const double dnmt_M_1_SQRT2PI = dnmt_M_2_SQRTPI * dnmt_M_SQRT1_2 / 2.0; +static constexpr const double dnmt_M_SQRT2 = 1.41421356237309504880168872421; // sqrt(2) +static constexpr const double dnmt_SQRT32 = 4.0 * dnmt_M_SQRT2; +// clang-format on /* * IEEE double precision dependent constants. * - * GAUSS_EPSILON: Smallest positive value such that - * gsl_cdf_gaussian(x) > 0.5. + * GAUSS_EPSILON: Smallest positive value such that gsl_cdf_gaussian(x) > 0.5. * GAUSS_XUPPER: Largest value x such that gsl_cdf_gaussian(x) < 1.0. * GAUSS_XLOWER: Smallest value x such that gsl_cdf_gaussian(x) > 0.0. */ @@ -277,189 +248,178 @@ static constexpr const double dnmt_GAUSS_XUPPER = 8.572; static constexpr const double dnmt_GAUSS_XLOWER = -37.519; static constexpr const double dnmt_GAUSS_SCALE = 16.0; -static double -get_del(double x, double rational) { - const double xsq = floor(x * dnmt_GAUSS_SCALE) / dnmt_GAUSS_SCALE; - double del = (x - xsq) * (x + xsq); - del *= 0.5; - return exp(-0.5 * xsq * xsq) * exp(-1.0 * del) * rational; +[[nodiscard]] static double +get_del(const double x, const double rational) { + static constexpr auto coeff = -0.5; + const double xsq = std::floor(x * dnmt_GAUSS_SCALE) / dnmt_GAUSS_SCALE; + const double del = (x - xsq) * (x + xsq); + return std::exp(coeff * (xsq * xsq + del)) * rational; } -/* - * Normal cdf for fabs(x) < 0.66291 - */ -static double +// Normal cdf for fabs(x) < 0.66291 +[[nodiscard]] static double gauss_small(const double x) { - double xsq{}; - double xnum{}; - double xden{}; - - const double a[5] = {2.2352520354606839287, 161.02823106855587881, - 1067.6894854603709582, 18154.981253343561249, - 0.065682337918207449113}; - const double b[4] = {47.20258190468824187, 976.09855173777669322, - 10260.932208618978205, 45507.789335026729956}; - - xsq = x * x; - xnum = a[4] * xsq; - xden = xsq; - - for (unsigned int i = 0; i < 3; i++) { - xnum = (xnum + a[i]) * xsq; - xden = (xden + b[i]) * xsq; + // clang-format off + static constexpr auto a_last = 4; + static constexpr auto a = std::array{ + 2.2352520354606839287, + 161.02823106855587881, + 1067.6894854603709582, + 18154.981253343561249, + 0.065682337918207449113, + }; + static constexpr auto b_last = 3; + static constexpr auto b = std::array{ + 47.20258190468824187, + 976.09855173777669322, + 10260.932208618978205, + 45507.789335026729956, + }; + // clang-format on + + const double xsq = x * x; + double xnum = a[a_last] * xsq; + double xden = xsq; + for (std::uint32_t i = 0; i < b_last; i++) { + xnum = (xnum + a[i]) * xsq; // NOLINT(*-constant-array-index) + xden = (xden + b[i]) * xsq; // NOLINT(*-constant-array-index) } - return x * (xnum + a[3]) / (xden + b[3]); + return x * (xnum + a[b_last]) / (xden + b[b_last]); } -/* - * Normal cdf for 0.66291 < fabs(x) < sqrt(32). - */ -static double +// Normal cdf for 0.66291 < fabs(x) < sqrt(32). +[[nodiscard]] static double gauss_medium(const double x) { - const double c[9] = { - 0.39894151208813466764, 8.8831497943883759412, 93.506656132177855979, - 597.27027639480026226, 2494.5375852903726711, 6848.1904505362823326, - 11602.651437647350124, 9842.7148383839780218, 1.0765576773720192317e-8}; - const double d[8] = {22.266688044328115691, 235.38790178262499861, - 1519.377599407554805, 6485.558298266760755, - 18615.571640885098091, 34900.952721145977266, - 38912.003286093271411, 19685.429676859990727}; - - const double absx = fabs(x); - - double xnum = c[8] * absx; + static constexpr auto c_last = 8; + static constexpr auto d_last = 7; + // clang-format off + static constexpr auto c = std::array{ + 0.39894151208813466764, + 8.8831497943883759412, + 93.506656132177855979, + 597.27027639480026226, + 2494.5375852903726711, + 6848.1904505362823326, + 11602.651437647350124, + 9842.7148383839780218, + 1.0765576773720192317e-8, + }; + static constexpr auto d = std::array{ + 22.266688044328115691, + 235.38790178262499861, + 1519.377599407554805, + 6485.558298266760755, + 18615.571640885098091, + 34900.952721145977266, + 38912.003286093271411, + 19685.429676859990727, + }; + // clang-format on + const double absx = std::abs(x); + + double xnum = c[c_last] * absx; double xden = absx; - for (unsigned int i = 0; i < 7; i++) { - xnum = (xnum + c[i]) * absx; - xden = (xden + d[i]) * absx; + for (unsigned int i = 0; i < d_last; i++) { + xnum = (xnum + c[i]) * absx; // NOLINT(*-constant-array-index) + xden = (xden + d[i]) * absx; // NOLINT(*-constant-array-index) } - const double temp = (xnum + c[7]) / (xden + d[7]); + const double temp = (xnum + c[d_last]) / (xden + d[d_last]); return get_del(x, temp); } -/* - * Normal cdf for - * {sqrt(32) < x < GAUSS_XUPPER} union { dnmt_GAUSS_XLOWER < x < -sqrt(32) }. - */ -static double +// Normal cdf for +// { sqrt(32) < x < GAUSS_XUPPER } union { GAUSS_XLOWER < x < -sqrt(32) } +[[nodiscard]] static double gauss_large(const double x) { - const double p[6] = {0.21589853405795699, 0.1274011611602473639, - 0.022235277870649807, 0.001421619193227893466, - 2.9112874951168792e-5, 0.02307344176494017303}; - const double q[5] = {1.28426009614491121, 0.468238212480865118, - 0.0659881378689285515, 0.00378239633202758244, - 7.29751555083966205e-5}; - - const double absx = fabs(x); + static constexpr auto p_last = 5; + static constexpr auto q_last = 4; + // clang-format off + static constexpr auto p = std::array{ + 0.21589853405795699, + 0.1274011611602473639, + 0.022235277870649807, + 0.001421619193227893466, + 2.9112874951168792e-5, + 0.02307344176494017303, + }; + static constexpr auto q = std::array{ + 1.28426009614491121, + 0.468238212480865118, + 0.0659881378689285515, + 0.00378239633202758244, + 7.29751555083966205e-5, + }; + // clang-format on + + const double absx = std::abs(x); const double xsq = 1.0 / (x * x); - double xnum = p[5] * xsq; + double xnum = p[p_last] * xsq; double xden = xsq; - for (int i = 0; i < 4; i++) { - xnum = (xnum + p[i]) * xsq; - xden = (xden + q[i]) * xsq; + for (int i = 0; i < q_last; i++) { + xnum = (xnum + p[i]) * xsq; // NOLINT(*-constant-array-index) + xden = (xden + q[i]) * xsq; // NOLINT(*-constant-array-index) } - double temp = xsq * (xnum + p[4]) / (xden + q[4]); + double temp = xsq * (xnum + p[q_last]) / (xden + q[q_last]); temp = (dnmt_M_1_SQRT2PI - temp) / absx; return get_del(x, temp); } -double +[[nodiscard]] double dnmt_gsl_cdf_ugaussian_P(const double x) { - double result = 0.0; - const double absx = fabs(x); - - if (absx < dnmt_GAUSS_EPSILON) { - return 0.5; - } - else if (absx < 0.66291) { - return 0.5 + gauss_small(x); + static constexpr auto absx_cutoff = 0.66291; + const double absx = std::abs(x); + if (absx < dnmt_GAUSS_EPSILON) + return 0.5; // NOLINT(*-avoid-magic-numbers) + if (absx < absx_cutoff) + return 0.5 + gauss_small(x); // NOLINT(*-avoid-magic-numbers) + if (absx < dnmt_SQRT32) { + const auto result = gauss_medium(x); + return x > 0.0 ? 1.0 - result : result; } - else if (absx < dnmt_SQRT32) { - result = gauss_medium(x); - - if (x > 0.0) { - result = 1.0 - result; - } - - return result; - } - else if (x > dnmt_GAUSS_XUPPER) { + if (x > dnmt_GAUSS_XUPPER) return 1.0; - } - else if (x < dnmt_GAUSS_XLOWER) { + if (x < dnmt_GAUSS_XLOWER) return 0.0; - } - else { - result = gauss_large(x); - - if (x > 0.0) { - result = 1.0 - result; - } - } - - return result; + const auto result = gauss_large(x); + return x > 0.0 ? 1.0 - result : result; } -double +[[nodiscard]] double dnmt_gsl_cdf_ugaussian_Q(const double x) { - double result = 0.0; - const double absx = fabs(x); - - if (absx < dnmt_GAUSS_EPSILON) { - return 0.5; + static constexpr auto absx_cutoff = 0.66291; + const double absx = std::abs(x); + if (absx < dnmt_GAUSS_EPSILON) + return 0.5; // NOLINT(*-avoid-magic-numbers) + if (absx < absx_cutoff) { + const auto result = gauss_small(x); + // NOLINTNEXTLINE(*-avoid-magic-numbers) + return x < 0.0 ? std::abs(result) + 0.5 : 0.5 - result; } - else if (absx < 0.66291) { - result = gauss_small(x); - - if (x < 0.0) { - result = fabs(result) + 0.5; - } - else { - result = 0.5 - result; - } - - return result; + if (absx < dnmt_SQRT32) { + const auto result = gauss_medium(x); + return x < 0.0 ? 1.0 - result : result; } - else if (absx < dnmt_SQRT32) { - result = gauss_medium(x); - - if (x < 0.0) { - result = 1.0 - result; - } - - return result; - } - else if (x > -(dnmt_GAUSS_XLOWER)) { + if (x > -dnmt_GAUSS_XLOWER) return 0.0; - } - else if (x < -(dnmt_GAUSS_XUPPER)) { + if (x < -dnmt_GAUSS_XUPPER) return 1.0; - } - else { - result = gauss_large(x); - - if (x < 0.0) { - result = 1.0 - result; - } - } - return result; + const auto result = gauss_large(x); + return x < 0.0 ? 1.0 - result : result; } -double +[[nodiscard]] double dnmt_gsl_cdf_gaussian_P(const double x, const double sigma) { return dnmt_gsl_cdf_ugaussian_P(x / sigma); } -double +[[nodiscard]] double dnmt_gsl_cdf_gaussian_Q(const double x, const double sigma) { return dnmt_gsl_cdf_ugaussian_Q(x / sigma); } - -// NOLINTEND(*-avoid-magic-numbers,*-avoid-c-arrays,*-pointer-arithmetic,*-array-to-pointer-decay,*-constant-array-index) diff --git a/src/common/dnmtools_gaussinv.hpp b/src/common/dnmtools_gaussinv.hpp index d980d3f1..ffcddcd1 100644 --- a/src/common/dnmtools_gaussinv.hpp +++ b/src/common/dnmtools_gaussinv.hpp @@ -1,29 +1,26 @@ -/* Code from GSl, see copyright below. - */ - -/* cdf/gsl_cdf.h +/* Copyright (C) 2025 Andrew D Smith * - * Copyright (C) 2002 Jason H. Stover. + * This is free software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the Free Software + * Foundation; either version 2 of the License, or (at your option) any later + * version. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3 of the License, or (at - * your option) any later version. + * This is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * You should have received a copy of the GNU General Public License along + * with this software; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ -/* Author: J. Stover */ - -double dnmt_gsl_cdf_ugaussian_Pinv(const double P); -double dnmt_gsl_cdf_ugaussian_Qinv(const double Q); +[[nodiscard]] double +dnmt_gsl_cdf_ugaussian_Pinv(const double P); +[[nodiscard]] double +dnmt_gsl_cdf_ugaussian_Qinv(const double Q); -double dnmt_gsl_cdf_gaussian_P(const double x, const double sigma); -double dnmt_gsl_cdf_gaussian_Q(const double x, const double sigma); +[[nodiscard]] double +dnmt_gsl_cdf_gaussian_P(const double x, const double sigma); +[[nodiscard]] double +dnmt_gsl_cdf_gaussian_Q(const double x, const double sigma); diff --git a/src/utils/unxcounts.cpp b/src/utils/unxcounts.cpp index 90447d21..f6404f20 100644 --- a/src/utils/unxcounts.cpp +++ b/src/utils/unxcounts.cpp @@ -1,21 +1,21 @@ -/* unxcounts: reverse the process of xcounts and generate the counts - * file, including sites not covered. +/* Copyright (C) 2023 Andrew D. Smith * - * Copyright (C) 2023 Andrew D. Smith + * This program is free software: you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. * - * Authors: Andrew D. Smith - * - * This program is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation, either version 3 of the - * License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. */ +[[maybe_unused]] static constexpr auto about = R"( +unxcounts: reverse the process of xcounts and generate the counts file, +including sites not covered. +)"; + #include "OptionParser.hpp" #include "bsutils.hpp" #include "counts_header.hpp" @@ -28,6 +28,7 @@ #include #include +#include #include #include #include @@ -45,7 +46,7 @@ #include #include -// NOLINTBEGIN(*-avoid-c-arrays,*-avoid-magic-numbers,*-avoid-non-const-global-variables,*-narrowing-conversions,*-constant-array-index,*-pointer-arithmetic) +// NOLINTBEGIN(*-narrowing-conversions) static void read_fasta_file_short_names_uppercase(const std::string &chroms_file, @@ -55,8 +56,8 @@ read_fasta_file_short_names_uppercase(const std::string &chroms_file, names.clear(); read_fasta_file_short_names(chroms_file, names, chroms); for (auto &i : chroms) - transform(std::cbegin(i), std::cend(i), begin(i), - [](const char c) { return std::toupper(c); }); + std::transform(std::cbegin(i), std::cend(i), begin(i), + [](const char c) { return std::toupper(c); }); } static void @@ -83,7 +84,7 @@ verify_chrom_orders( throw std::runtime_error("failed to acquire buffer"); while (bamxx::getline(in, line)) { - if (std::isdigit(line.s[0])) + if (std::isdigit(line.s[0])) // NOLINT(*-pointer-arithmetic) continue; if (is_counts_header_line(line.s)) continue; @@ -110,7 +111,7 @@ verify_chrom_orders( std::cerr << "chrom orders are consistent" << "\n"; } -static const char *tag_values[] = { +static constexpr auto tag_values = std::array{ "CpG", // 0 "CHH", // 1 "CXG", // 2 @@ -118,12 +119,14 @@ static const char *tag_values[] = { "N" // 4 }; -static const int tag_sizes[] = {3, 3, 3, 3, 1}; +static constexpr auto tag_sizes = std::array{ + 3, 3, 3, 3, 1, +}; -// ADS: the values below allow for things like CHH where the is a N in -// the triplet; I'm allowing that for consistency with the weird logic -// from earlier versions. -const std::uint32_t context_codes[] = { +// ADS: the values below allow for things like CHH where the is a N in the +// triplet; I'm allowing that for consistency with the weird logic from +// earlier versions. +static constexpr auto context_codes = std::array{ /*CAA CHH*/ 1, /*CAC CHH*/ 1, /*CAG CXG*/ 2, @@ -154,14 +157,14 @@ const std::uint32_t context_codes[] = { static inline std::uint32_t get_tag_from_genome_c(const std::string &s, const size_t pos) { const auto val = base2int(s[pos + 1]) * 5 + base2int(s[pos + 2]); - return context_codes[val]; + return context_codes[val]; // NOLINT(*-constant-array-index) } static inline std::uint32_t get_tag_from_genome_g(const std::string &s, const size_t pos) { const auto val = base2int(complement(s[pos - 1])) * 5 + base2int(complement(s[pos - 2])); - return context_codes[val]; + return context_codes[val]; // NOLINT(*-constant-array-index) } static bool @@ -169,11 +172,13 @@ write_missing(const std::uint32_t name_size, const std::string &chrom, const std::uint64_t start_pos, const std::uint64_t end_pos, std::vector &buf, bamxx::bgzf_file &out) { static constexpr auto zeros = "\t0\t0\n"; + static constexpr auto zeros_sz = 5; static constexpr auto pos_strand = "\t+\t"; static constexpr auto neg_strand = "\t-\t"; - const auto buf_end = buf.data() + size(buf); + const auto buf_end = + buf.data() + std::size(buf); // NOLINT(*-pointer-arithmetic) // chrom name is already in the buffer so move past it - auto cursor = buf.data() + name_size + 1; + auto cursor = buf.data() + name_size + 1; // NOLINT(*-pointer-arithmetic) for (auto pos = start_pos; pos < end_pos; ++pos) { const char base = chrom[pos]; if (is_cytosine(base) || is_guanine(base)) { @@ -182,10 +187,12 @@ write_missing(const std::uint32_t name_size, const std::string &chrom, : get_tag_from_genome_g(chrom, pos); #pragma GCC diagnostic push #pragma GCC diagnostic error "-Wstringop-overflow=0" + // NOLINTBEGIN(*-constant-array-index) auto [ptr, ec] = std::to_chars(cursor, buf_end, pos); ptr = std::copy_n(is_c ? pos_strand : neg_strand, 3, ptr); ptr = std::copy_n(tag_values[the_tag], tag_sizes[the_tag], ptr); - ptr = std::copy_n(zeros, 5, ptr); + ptr = std::copy_n(zeros, zeros_sz, ptr); + // NOLINTEND(*-constant-array-index) const auto sz = std::distance(buf.data(), ptr); #pragma GCC diagnostic push @@ -201,21 +208,26 @@ write_missing_cpg(const std::uint32_t &name_size, const std::string &chrom, const std::uint64_t start_pos, const std::uint64_t end_pos, std::vector &buf, bamxx::bgzf_file &out) { static constexpr auto zeros = "\t0\t0\n"; + static constexpr auto zeros_sz = 5; static constexpr auto pos_strand = "\t+\t"; - const auto buf_end = buf.data() + size(buf); + static constexpr auto pos_strand_sz = 3; + const auto buf_end = + buf.data() + std::size(buf); // NOLINT(*-pointer-arithmetic) // chrom name is already in the buffer so move past it - auto cursor = buf.data() + name_size + 1; + auto cursor = buf.data() + name_size + 1; // NOLINT(*-pointer-arithmetic) for (auto pos = start_pos; pos < end_pos - 1; ++pos) { - // When this function is called, the "end_pos" is either the chrom - // size or the position of a base known to be a C. So we never - // have to allow pos+1 to equal end_pos. + // When this function is called, the "end_pos" is either the chrom size or + // the position of a base known to be a C. So we never have to allow pos+1 + // to equal end_pos. if (is_cytosine(chrom[pos]) && is_guanine(chrom[pos + 1])) { #pragma GCC diagnostic push #pragma GCC diagnostic error "-Wstringop-overflow=0" + // NOLINTBEGIN(*-constant-array-index) auto [ptr, ec] = std::to_chars(cursor, buf_end, pos); - ptr = std::copy_n(pos_strand, 3, ptr); + ptr = std::copy_n(pos_strand, pos_strand_sz, ptr); ptr = std::copy_n("CpG", 3, ptr); - ptr = std::copy_n(zeros, 5, ptr); + ptr = std::copy_n(zeros, zeros_sz, ptr); + // NOLINTEND(*-constant-array-index) const auto sz = std::distance(buf.data(), ptr); #pragma GCC diagnostic push if (bgzf_write(out.f, buf.data(), sz) != sz) @@ -233,8 +245,11 @@ write_site(const std::uint32_t name_size, const std::string &chrom, static constexpr auto pos_strand = "\t+\t"; static constexpr auto neg_strand = "\t-\t"; static constexpr auto fmt = std::chars_format::general; + // use default precision, 6, same as std::cout default + static constexpr auto precision = 6; - const auto buf_end = buf.data() + size(buf); + const auto buf_end = + buf.data() + std::size(buf); // NOLINT(*-pointer-arithmetic) const char base = chrom[pos]; assert(is_cytosine(base) || is_guanine(base)); const bool is_c = is_cytosine(base); @@ -246,17 +261,17 @@ write_site(const std::uint32_t name_size, const std::string &chrom, #pragma GCC diagnostic push #pragma GCC diagnostic error "-Wstringop-overflow=0" // chrom name is already in the buffer so move past it - auto cursor = buf.data() + name_size + 1; + auto cursor = buf.data() + name_size + 1; // NOLINT(*-pointer-arithmetic) { auto [ptr, ec] = std::to_chars(cursor, buf_end, pos); cursor = ptr; } cursor = std::copy_n(is_c ? pos_strand : neg_strand, 3, cursor); + // NOLINTNEXTLINE(*-constant-array-index) cursor = std::copy_n(tag_values[the_tag], tag_sizes[the_tag], cursor); *cursor++ = '\t'; { - // use default precision, 6, same as std::cout default - auto [ptr, ec] = std::to_chars(cursor, buf_end, meth, fmt, 6); + auto [ptr, ec] = std::to_chars(cursor, buf_end, meth, fmt, precision); cursor = ptr; } *cursor++ = '\t'; @@ -318,11 +333,11 @@ get_lookups(const std::vector &names, std::vector &chrom_sizes) { chrom_lookup.clear(); name_to_id.clear(); - chrom_sizes = std::vector(size(chroms), 0); - for (size_t i = 0; i < size(chroms); ++i) { + chrom_sizes = std::vector(std::size(chroms), 0); + for (size_t i = 0; i < std::size(chroms); ++i) { chrom_lookup[names[i]] = std::cbegin(chroms) + i; name_to_id[names[i]] = i; - chrom_sizes[i] = size(chroms[i]); + chrom_sizes[i] = std::size(chroms[i]); } } @@ -332,7 +347,8 @@ process_header_line( const std::vector &chrom_sizes, const kstring_t &line, bamxx::bgzf_file &out) { std::string hdr_line{line.s}; - if (size(hdr_line) > 1 && !verify_chrom(hdr_line, name_to_id, chrom_sizes)) + if (std::size(hdr_line) > 1 && + !verify_chrom(hdr_line, name_to_id, chrom_sizes)) throw std::runtime_error{"failed to verify header for: " + hdr_line}; if (!write_counts_header_line(hdr_line, out)) throw std::runtime_error{"failed to write header line: " + hdr_line}; @@ -351,7 +367,8 @@ write_all_sites(const bool verbose, const std::uint32_t prev_chr_id, auto res = std::copy(std::cbegin(names[i]), std::cend(names[i]), buf.data()); *res = '\t'; - write_missing(size(names[i]), chroms[i], 0u, size(chroms[i]), buf, out); + write_missing(std::size(names[i]), chroms[i], 0u, std::size(chroms[i]), buf, + out); } } @@ -364,11 +381,11 @@ process_sites(const bool verbose, const bool add_missing_chroms, std::vector chroms, names; read_fasta_file_short_names_uppercase(chroms_file, names, chroms); if (verbose) - std::cerr << "[n chroms in reference: " << chroms.size() << "]" << "\n"; + std::cerr << "[n chroms in reference: " << std::size(chroms) << "]" << "\n"; std::unordered_map chrom_lookup; std::unordered_map name_to_id; - std::vector chrom_sizes(size(chroms), 0); + std::vector chrom_sizes(std::size(chroms), 0); get_lookups(names, chroms, chrom_lookup, name_to_id, chrom_sizes); if (add_missing_chroms) @@ -412,15 +429,16 @@ process_sites(const bool verbose, const bool add_missing_chroms, while (getline(in, line)) { if (is_counts_header_line(line.s)) { process_header_line(name_to_id, chrom_sizes, line, out); - continue; // ADS: early loop exit + continue; // ADS: just skip headers } - if (!std::isdigit(line.s[0])) { // check if we have a chrom line + // check if we have a chrom line + if (!std::isdigit(line.s[0])) { // NOLINT(*-pointer-arithmetic) if (!require_covered && pos != std::numeric_limits::max()) - write_missing(nm_sz, *ch_itr, pos + 1, size(*ch_itr), buf, out); + write_missing(nm_sz, *ch_itr, pos + 1, std::size(*ch_itr), buf, out); chrom_name = std::string{line.s}; - nm_sz = size(chrom_name); + nm_sz = std::size(chrom_name); const std::int32_t chr_id = get_chrom_id(name_to_id, chrom_name); if (add_missing_chroms) @@ -438,10 +456,12 @@ process_sites(const bool verbose, const bool add_missing_chroms, } else { std::uint32_t pos_step = 0, n_meth = 0, n_unmeth = 0; + // NOLINTBEGIN(*-pointer-arithmetic) const auto end_line = line.s + line.l; auto res = std::from_chars(line.s, end_line, pos_step); res = std::from_chars(res.ptr + 1, end_line, n_meth); res = std::from_chars(res.ptr + 1, end_line, n_unmeth); + // NOLINTEND(*-pointer-arithmetic) const auto curr_pos = pos + pos_step; if (!require_covered && pos + 1 < curr_pos) @@ -452,9 +472,9 @@ process_sites(const bool verbose, const bool add_missing_chroms, } } if (!require_covered) - write_missing(nm_sz, *ch_itr, pos + 1, size(*ch_itr), buf, out); + write_missing(nm_sz, *ch_itr, pos + 1, std::size(*ch_itr), buf, out); if (add_missing_chroms) - write_all_sites(verbose, prev_chr_id, size(chroms), names, chroms, buf, + write_all_sites(verbose, prev_chr_id, std::size(chroms), names, chroms, buf, out); } @@ -471,7 +491,8 @@ write_all_cpgs(const bool verbose, const std::uint32_t prev_chr_id, auto res = std::copy(std::cbegin(names[i]), std::cend(names[i]), buf.data()); *res = '\t'; - write_missing_cpg(size(names[i]), chroms[i], 0u, size(chroms[i]), buf, out); + write_missing_cpg(std::size(names[i]), chroms[i], 0u, std::size(chroms[i]), + buf, out); } } @@ -484,11 +505,11 @@ process_cpg_sites(const bool verbose, const bool add_missing_chroms, std::vector chroms, names; read_fasta_file_short_names_uppercase(chroms_file, names, chroms); if (verbose) - std::cerr << "[n chroms in reference: " << chroms.size() << "]" << "\n"; + std::cerr << "[n chroms in reference: " << std::size(chroms) << "]" << "\n"; std::unordered_map chrom_lookup; std::unordered_map name_to_id; - std::vector chrom_sizes(size(chroms), 0); + std::vector chrom_sizes(std::size(chroms), 0); get_lookups(names, chroms, chrom_lookup, name_to_id, chrom_sizes); if (add_missing_chroms) @@ -535,12 +556,14 @@ process_cpg_sites(const bool verbose, const bool add_missing_chroms, continue; // ADS: early loop exit } - if (!std::isdigit(line.s[0])) { // check if we have a chrom line + // check if we have a chrom line + if (!std::isdigit(line.s[0])) { // NOLINT(*-pointer-arithmetic) if (!require_covered && pos != std::numeric_limits::max()) - write_missing_cpg(nm_sz, *ch_itr, pos + 1, size(*ch_itr), buf, out); + write_missing_cpg(nm_sz, *ch_itr, pos + 1, std::size(*ch_itr), buf, + out); chrom_name = std::string{line.s}; - nm_sz = size(chrom_name); + nm_sz = std::size(chrom_name); const std::int32_t chr_id = get_chrom_id(name_to_id, chrom_name); if (add_missing_chroms) @@ -558,10 +581,12 @@ process_cpg_sites(const bool verbose, const bool add_missing_chroms, } else { std::uint32_t pos_step = 0, n_meth = 0, n_unmeth = 0; + // NOLINTBEGIN(*-pointer-arithmetic) const auto end_line = line.s + line.l; auto res = std::from_chars(line.s, end_line, pos_step); res = std::from_chars(res.ptr + 1, end_line, n_meth); res = std::from_chars(res.ptr + 1, end_line, n_unmeth); + // NOLINTEND(*-pointer-arithmetic) const auto curr_pos = pos + pos_step; if (!require_covered && pos + 1 < curr_pos) @@ -572,9 +597,10 @@ process_cpg_sites(const bool verbose, const bool add_missing_chroms, } } if (!require_covered) - write_missing_cpg(nm_sz, *ch_itr, pos + 1, size(*ch_itr), buf, out); + write_missing_cpg(nm_sz, *ch_itr, pos + 1, std::size(*ch_itr), buf, out); if (add_missing_chroms) - write_all_cpgs(verbose, prev_chr_id, size(chroms), names, chroms, buf, out); + write_all_cpgs(verbose, prev_chr_id, std::size(chroms), names, chroms, buf, + out); } int @@ -622,7 +648,7 @@ main_unxcounts(int argc, char *argv[]) { // NOLINT(*-avoid-c-arrays) std::cerr << opt_parse.option_missing_message() << "\n"; return EXIT_SUCCESS; } - if (leftover_args.size() != 1) { + if (std::size(leftover_args) != 1) { std::cerr << opt_parse.help_message() << "\n"; return EXIT_SUCCESS; } @@ -653,4 +679,4 @@ main_unxcounts(int argc, char *argv[]) { // NOLINT(*-avoid-c-arrays) return EXIT_SUCCESS; } -// NOLINTEND(*-avoid-c-arrays,*-avoid-magic-numbers,*-avoid-non-const-global-variables,*-narrowing-conversions,*-constant-array-index,*-pointer-arithmetic) +// NOLINTEND(*-narrowing-conversions) diff --git a/src/utils/xcounts.cpp b/src/utils/xcounts.cpp index 42e7e3fa..0f9e2e97 100644 --- a/src/utils/xcounts.cpp +++ b/src/utils/xcounts.cpp @@ -1,9 +1,4 @@ -/* xcounts: reformat counts so they only give the m and u counts in a dynamic - * step wig format - * - * Copyright (C) 2023 Andrew D. Smith - * - * Authors: Andrew D. Smith +/* Copyright (C) 2023 Andrew D. Smith * * This program is free software: you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free @@ -16,6 +11,11 @@ * more details. */ +[[maybe_unused]] static constexpr auto about = R"( +xcounts: reformat counts so they only give the m and u counts in a dynamic +step wig format +)"; + #include "MSite.hpp" #include "OptionParser.hpp" #include "counts_header.hpp" @@ -40,7 +40,7 @@ #include #include -// NOLINTBEGIN(*-avoid-c-arrays,*-avoid-magic-numbers,*-avoid-non-const-global-variables,*-narrowing-conversions,*-constant-array-index,*-pointer-arithmetic) +// NOLINTBEGIN(*-narrowing-conversions) enum class xcounts_err : std::uint8_t { // clang-format off @@ -61,6 +61,7 @@ struct xcounts_err_cat : std::error_category { return "xcounts error"; } auto message(const int condition) const -> std::string override { + // NOLINTBEGIN(*-avoid-magic-numbers) using std::string_literals::operator""s; switch (condition) { case 0: return "ok"s; @@ -73,6 +74,7 @@ struct xcounts_err_cat : std::error_category { case 7: return "failed to parse site"s; } std::abort(); // unreacheable + // NOLINTEND(*-avoid-magic-numbers) } }; // clang-format on @@ -90,6 +92,7 @@ make_error_code(xcounts_err e) { template static inline std::uint32_t fill_output_buffer(const std::uint32_t offset, const MSite &s, T &buf) { + // NOLINTBEGIN(*-pointer-arithmetic) auto buf_end = buf.data() + buf.size(); auto res = std::to_chars(buf.data(), buf_end, s.pos - offset); *res.ptr++ = '\t'; @@ -98,11 +101,15 @@ fill_output_buffer(const std::uint32_t offset, const MSite &s, T &buf) { res = std::to_chars(res.ptr, buf_end, s.n_unmeth()); *res.ptr++ = '\n'; return std::distance(buf.data(), res.ptr); + // NOLINTEND(*-pointer-arithmetic) } int main_xcounts(int argc, char *argv[]) { // NOLINT(*-avoid-c-arrays) try { + static constexpr auto output_buffer_size = 128; + static constexpr auto input_buffer_size = 1024; + // ADS: It might happen that a "chromosome" has no CpG sites (like // Scaffold113377 in strPur2). Therefore, we can't assume each chrom will // appear in the input when attempting to check consistency. @@ -197,11 +204,11 @@ main_xcounts(int argc, char *argv[]) { // NOLINT(*-avoid-c-arrays) // use the kstring_t type to more directly use the BGZF file kstring_t line{0, 0, nullptr}; - const int ret = ks_resize(&line, 1024); + const int ret = ks_resize(&line, input_buffer_size); if (ret) throw dnmt_error("failed to acquire buffer"); - std::vector buf(128); + std::vector buf(output_buffer_size); std::uint32_t offset = 0; std::string prev_chrom; @@ -222,6 +229,7 @@ main_xcounts(int argc, char *argv[]) { // NOLINT(*-avoid-c-arrays) continue; } + // NOLINTNEXTLINE(*-pointer-arithmetic) if (!site.initialize(line.s, line.s + line.l)) { ec = xcounts_err::failed_to_parse_site; break; @@ -284,4 +292,4 @@ main_xcounts(int argc, char *argv[]) { // NOLINT(*-avoid-c-arrays) return EXIT_SUCCESS; } -// NOLINTEND(*-avoid-c-arrays,*-avoid-magic-numbers,*-avoid-non-const-global-variables,*-narrowing-conversions,*-constant-array-index,*-pointer-arithmetic) +// NOLINTEND(*-narrowing-conversions)