From 3d8bde60a1fdc7e05c3f09b53056de344937fcea Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Tue, 17 Nov 2020 11:29:10 -0500 Subject: [PATCH 1/7] fixes for differences in F77 BLAS/LAPACK API in MADNESS and rest of world. --- src/TiledArray/algebra/lapack/chol.h | 4 +- src/TiledArray/algebra/lapack/lapack.cc | 117 +++++++++++++++--------- 2 files changed, 74 insertions(+), 47 deletions(-) diff --git a/src/TiledArray/algebra/lapack/chol.h b/src/TiledArray/algebra/lapack/chol.h index 0d3a6fbeed..44376bae9a 100644 --- a/src/TiledArray/algebra/lapack/chol.h +++ b/src/TiledArray/algebra/lapack/chol.h @@ -24,9 +24,9 @@ #ifndef TILEDARRAY_ALGEBRA_LAPACK_CHOL_H__INCLUDED #define TILEDARRAY_ALGEBRA_LAPACK_CHOL_H__INCLUDED -#include #include #include +#include #include namespace TiledArray { @@ -45,7 +45,7 @@ auto make_L_eig(const DistArray& A) { World& world = A.world(); auto A_eig = detail::to_eigen(A); if (world.rank() == 0) { - chol_eig(A_eig); + lapack::cholesky(A_eig); } world.gop.broadcast_serializable(A_eig, 0); return A_eig; diff --git a/src/TiledArray/algebra/lapack/lapack.cc b/src/TiledArray/algebra/lapack/lapack.cc index a2d6e91626..977d6292b4 100644 --- a/src/TiledArray/algebra/lapack/lapack.cc +++ b/src/TiledArray/algebra/lapack/lapack.cc @@ -22,9 +22,9 @@ * */ -#include #include #include +#include #include #include @@ -43,8 +43,8 @@ namespace TiledArray::lapack { -template -void cholesky(Matrix &A) { +template +void cholesky(Matrix& A) { char uplo = 'L'; integer n = A.rows(); auto* a = A.data(); @@ -58,8 +58,8 @@ void cholesky(Matrix &A) { if (info != 0) TA_EXCEPTION("LAPACK::potrf failed"); } -template -void cholesky_linv(Matrix &A) { +template +void cholesky_linv(Matrix& A) { char uplo = 'L'; char diag = 'N'; integer n = A.rows(); @@ -70,8 +70,8 @@ void cholesky_linv(Matrix &A) { if (info != 0) TA_EXCEPTION("LAPACK::trtri failed"); } -template -void cholesky_solve(Matrix &A, Matrix &X) { +template +void cholesky_solve(Matrix& A, Matrix& X) { char uplo = 'L'; integer n = A.rows(); integer nrhs = X.cols(); @@ -80,16 +80,16 @@ void cholesky_solve(Matrix &A, Matrix &X) { integer lda = n; integer ldb = n; integer info = 0; - //TA_LAPACK_CALL(posv, &uplo, &n, &nrhs, a, &lda, b, &ldb, &info); + // TA_LAPACK_CALL(posv, &uplo, &n, &nrhs, a, &lda, b, &ldb, &info); if (info != 0) TA_EXCEPTION("LAPACK::posv failed"); } -template -void cholesky_lsolve(TransposeFlag transpose, Matrix &A, Matrix &X) { +template +void cholesky_lsolve(TransposeFlag transpose, Matrix& A, Matrix& X) { char uplo = 'L'; char trans = transpose == TransposeFlag::Transpose - ? 'T' - : (transpose == TransposeFlag::NoTranspose ? 'N' : 'C'); + ? 'T' + : (transpose == TransposeFlag::NoTranspose ? 'N' : 'C'); char diag = 'N'; integer n = A.rows(); integer nrhs = X.cols(); @@ -98,12 +98,13 @@ void cholesky_lsolve(TransposeFlag transpose, Matrix &A, Matrix &X) { integer lda = n; integer ldb = n; integer info = 0; - //TA_LAPACK_CALL(trtrs, &uplo, &trans, &diag, &n, &nrhs, a, &lda, b, &ldb, &info); + // TA_LAPACK_CALL(trtrs, &uplo, &trans, &diag, &n, &nrhs, a, &lda, b, &ldb, + // &info); if (info != 0) TA_EXCEPTION("LAPACK::trtrs failed"); } -template -void hereig(Matrix &A, Vector &W) { +template +void hereig(Matrix& A, Vector& W) { char jobz = 'V'; char uplo = 'L'; integer n = A.rows(); @@ -113,15 +114,27 @@ void hereig(Matrix &A, Vector &W) { integer lwork = -1; integer info; T lwork_dummy; - TA_LAPACK_CALL(syev, &jobz, &uplo, &n, a, &lda, w, &lwork_dummy, &lwork, &info, sizeof(char), sizeof(char) ); +#ifndef MADNESS_LINALG_USE_LAPACKE + TA_LAPACK_CALL(syev, &jobz, &uplo, &n, a, &lda, w, &lwork_dummy, &lwork, + &info, sizeof(char), sizeof(char)); +#else + TA_LAPACK_CALL(syev, &jobz, &uplo, &n, a, &lda, w, &lwork_dummy, &lwork, + &info); +#endif lwork = integer(lwork_dummy); Vector work(lwork); - TA_LAPACK_CALL(syev, &jobz, &uplo, &n, a, &lda, w, work.data(), &lwork, &info, sizeof(char), sizeof(char) ); +#ifndef MADNESS_LINALG_USE_LAPACKE + TA_LAPACK_CALL(syev, &jobz, &uplo, &n, a, &lda, w, work.data(), &lwork, &info, + sizeof(char), sizeof(char)); +#else + TA_LAPACK_CALL(syev, &jobz, &uplo, &n, a, &lda, w, work.data(), &lwork, + &info); +#endif if (info != 0) TA_EXCEPTION("lapack::hereig failed"); } -template -void hereig_gen(Matrix &A, Matrix &B, Vector &W) { +template +void hereig_gen(Matrix& A, Matrix& B, Vector& W) { integer itype = 1; char jobz = 'V'; char uplo = 'L'; @@ -134,21 +147,33 @@ void hereig_gen(Matrix &A, Matrix &B, Vector &W) { integer lwork = -1; integer info; T lwork_dummy; - TA_LAPACK_CALL(sygv, &itype, &jobz, &uplo, &n, a, &lda, b, &ldb, w, &lwork_dummy, &lwork, &info, sizeof(char), sizeof(char) ); +#ifndef MADNESS_LINALG_USE_LAPACKE + TA_LAPACK_CALL(sygv, &itype, &jobz, &uplo, &n, a, &lda, b, &ldb, w, + &lwork_dummy, &lwork, &info, sizeof(char), sizeof(char)); +#else + TA_LAPACK_CALL(sygv, &itype, &jobz, &uplo, &n, a, &lda, b, &ldb, w, + &lwork_dummy, &lwork, &info); +#endif lwork = integer(lwork_dummy); Vector work(lwork); - TA_LAPACK_CALL(sygv, &itype, &jobz, &uplo, &n, a, &lda, b, &ldb, w, work.data(), &lwork, &info, sizeof(char), sizeof(char) ); +#ifndef MADNESS_LINALG_USE_LAPACKE + TA_LAPACK_CALL(sygv, &itype, &jobz, &uplo, &n, a, &lda, b, &ldb, w, + work.data(), &lwork, &info, sizeof(char), sizeof(char)); +#else + TA_LAPACK_CALL(sygv, &itype, &jobz, &uplo, &n, a, &lda, b, &ldb, w, + work.data(), &lwork, &info); +#endif if (info != 0) TA_EXCEPTION("lapack::hereig_gen failed"); } -template -void svd(Matrix &A, Vector &S, Matrix *U, Matrix *VT) { +template +void svd(Matrix& A, Vector& S, Matrix* U, Matrix* VT) { integer m = A.rows(); integer n = A.cols(); T* a = A.data(); integer lda = A.rows(); - S.resize(std::max(m,n)); + S.resize(std::max(m, n)); T* s = S.data(); char jobu = 'N'; @@ -156,7 +181,7 @@ void svd(Matrix &A, Vector &S, Matrix *U, Matrix *VT) { integer ldu = 0; if (U) { jobu = 'A'; - U->resize(m,n); + U->resize(m, n); u = U->data(); ldu = U->rows(); } @@ -166,7 +191,7 @@ void svd(Matrix &A, Vector &S, Matrix *U, Matrix *VT) { integer ldvt = 0; if (VT) { jobvt = 'A'; - VT->resize(n,m); + VT->resize(n, m); vt = VT->data(); ldvt = VT->rows(); } @@ -175,32 +200,34 @@ void svd(Matrix &A, Vector &S, Matrix *U, Matrix *VT) { integer info; T lwork_dummy; - TA_LAPACK_CALL( - gesvd, - &jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, &lwork_dummy, &lwork, &info, - sizeof(char), sizeof(char) - ); +#ifndef MADNESS_LINALG_USE_LAPACKE + TA_LAPACK_CALL(gesvd, &jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, + &lwork_dummy, &lwork, &info, sizeof(char), sizeof(char)); +#else + TA_LAPACK_CALL(gesvd, &jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, + &lwork_dummy, &lwork, &info); +#endif lwork = integer(lwork_dummy); Vector work(lwork); - TA_LAPACK_CALL( - gesvd, - &jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, &lwork_dummy, &lwork, &info, - sizeof(char), sizeof(char) - ); +#ifndef MADNESS_LINALG_USE_LAPACKE + TA_LAPACK_CALL(gesvd, &jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, + &lwork_dummy, &lwork, &info, sizeof(char), sizeof(char)); +#else + TA_LAPACK_CALL(gesvd, &jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, + &lwork_dummy, &lwork, &info); +#endif if (info != 0) TA_EXCEPTION("lapack::hereig_gen failed"); } - -#define TA_LAPACK_EXPLICIT(MATRIX,VECTOR) \ +#define TA_LAPACK_EXPLICIT(MATRIX, VECTOR) \ template void cholesky(MATRIX&); \ template void cholesky_linv(MATRIX&); \ - template void cholesky_solve(MATRIX&,MATRIX&); \ - template void cholesky_lsolve(TransposeFlag,MATRIX&,MATRIX&); \ - template void hereig(MATRIX&,VECTOR&); \ - template void hereig_gen(MATRIX&,MATRIX&,VECTOR&); \ - template void svd(MATRIX&,VECTOR&,MATRIX*,MATRIX*); - + template void cholesky_solve(MATRIX&, MATRIX&); \ + template void cholesky_lsolve(TransposeFlag, MATRIX&, MATRIX&); \ + template void hereig(MATRIX&, VECTOR&); \ + template void hereig_gen(MATRIX&, MATRIX&, VECTOR&); \ + template void svd(MATRIX&, VECTOR&, MATRIX*, MATRIX*); TA_LAPACK_EXPLICIT(lapack::Matrix, lapack::Vector); -} +} // namespace TiledArray::lapack From 76c0d0520fc139f0727977d073c527cc2276378a Mon Sep 17 00:00:00 2001 From: asadchev Date: Tue, 17 Nov 2020 14:15:17 -0500 Subject: [PATCH 2/7] LAPACK algebra backend --- src/CMakeLists.txt | 2 +- src/TiledArray/algebra/chol.h | 12 +- src/TiledArray/algebra/heig.h | 20 +- src/TiledArray/algebra/lapack/heig.h | 119 +++------- src/TiledArray/algebra/lapack/lapack.cc | 233 ------------------- src/TiledArray/algebra/lapack/lapack.cpp | 282 +++++++++++++++++++++++ src/TiledArray/algebra/lapack/lapack.h | 25 +- src/TiledArray/algebra/lapack/lu.h | 70 ++++++ src/TiledArray/algebra/lapack/svd.h | 103 +++++++++ src/TiledArray/algebra/lu.h | 3 +- src/TiledArray/algebra/svd.h | 15 +- tests/lapack.cpp | 24 +- 12 files changed, 566 insertions(+), 342 deletions(-) delete mode 100644 src/TiledArray/algebra/lapack/lapack.cc create mode 100644 src/TiledArray/algebra/lapack/lapack.cpp create mode 100644 src/TiledArray/algebra/lapack/lu.h create mode 100644 src/TiledArray/algebra/lapack/svd.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8a6ca49265..2d89f1c4be 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -207,7 +207,7 @@ TiledArray/array_impl.cpp TiledArray/dist_array.cpp TiledArray/util/backtrace.cpp TiledArray/util/bug.cpp -TiledArray/algebra/lapack/lapack.cc +TiledArray/algebra/lapack/lapack.cpp ) # the list of libraries on which TiledArray depends on, will be cached later diff --git a/src/TiledArray/algebra/chol.h b/src/TiledArray/algebra/chol.h index 222f80806d..06eb98fb19 100644 --- a/src/TiledArray/algebra/chol.h +++ b/src/TiledArray/algebra/chol.h @@ -37,9 +37,8 @@ auto cholesky(const Array& A, TiledRange l_trange = TiledRange()) { #if TILEDARRAY_HAS_SCALAPACK if (A.world().size() > 1 && A.range().volume() > 10000000) return scalapack::cholesky(A, l_trange); - else #endif - return lapack::cholesky(A, l_trange); + return lapack::cholesky(A, l_trange); } template @@ -47,9 +46,8 @@ auto cholesky_linv(const Array& A, TiledRange l_trange = TiledRange()) { #if TILEDARRAY_HAS_SCALAPACK if (A.world().size() > 1 && A.range().volume() > 10000000) return scalapack::cholesky_linv(A, l_trange); - else #endif - return lapack::cholesky_linv(A, l_trange); + return lapack::cholesky_linv(A, l_trange); } template @@ -58,9 +56,8 @@ auto cholesky_solve(const Array& A, const Array& B, #if TILEDARRAY_HAS_SCALAPACK if (A.world().size() > 1 && A.range().volume() > 10000000) return scalapack::cholesky_solve(A, B, x_trange); - else #endif - return lapack::cholesky_solve(A, B, x_trange); + return lapack::cholesky_solve(A, B, x_trange); } template @@ -71,9 +68,8 @@ auto cholesky_lsolve(TransposeFlag transpose, const Array& A, const Array& B, if (A.world().size() > 1 && A.range().volume() > 10000000) return scalapack::cholesky_lsolve(transpose, A, B, l_trange, x_trange); - else #endif - return lapack::cholesky_lsolve(transpose, A, B, l_trange, x_trange); + return lapack::cholesky_lsolve(transpose, A, B, l_trange, x_trange); } } // namespace TiledArray diff --git a/src/TiledArray/algebra/heig.h b/src/TiledArray/algebra/heig.h index 1a3816da5f..68e39c9de9 100644 --- a/src/TiledArray/algebra/heig.h +++ b/src/TiledArray/algebra/heig.h @@ -27,16 +27,30 @@ #include #if TILEDARRAY_HAS_SCALAPACK #include -#else -// eigen #endif +#include namespace TiledArray { +template +auto heig(const Array& A, TiledRange evec_trange = TiledRange()) { #if TILEDARRAY_HAS_SCALAPACK -using scalapack::heig; + if (A.world().size() > 1 && A.range().volume() > 10000000) { + return scalapack::heig(A, evec_trange); + } #endif + return lapack::heig(A, evec_trange); +} + +template +auto heig(const ArrayA& A, const ArrayB& B, TiledRange evec_trange = TiledRange()) { +#if TILEDARRAY_HAS_SCALAPACK + if (A.world().size() > 1 && A.range().volume() > 10000000) { + return scalapack::heig(A, B, evec_trange); + } #endif + return lapack::heig(A, B, evec_trange); +} } // namespace TiledArray diff --git a/src/TiledArray/algebra/lapack/heig.h b/src/TiledArray/algebra/lapack/heig.h index 4d779a176e..5fe4270c8f 100644 --- a/src/TiledArray/algebra/lapack/heig.h +++ b/src/TiledArray/algebra/lapack/heig.h @@ -24,15 +24,16 @@ #ifndef TILEDARRAY_ALGEBRA_LAPACK_HEIG_H__INCLUDED #define TILEDARRAY_ALGEBRA_LAPACK_HEIG_H__INCLUDED -#include #include + +#include +#include #include -namespace TiledArray { -namespace lapack { +namespace TiledArray::lapack { /** - * @brief Solve the standard eigenvalue problem with ScaLAPACK + * @brief Solve the standard eigenvalue problem with LAPACK * * A(i,k) X(k,j) = X(i,j) E(j) * @@ -51,80 +52,24 @@ namespace lapack { */ template auto heig(const Array& A, TiledRange evec_trange = TiledRange()) { - using scalar_type = typename Array::scalar_type; - using numeric_type = typename Array::numeric_type; - constexpr const bool is_real = std::is_same_v; - static_assert(std::is_same_v, - "TA::lapack::{cholesky*} are only usable with a DistArray of " - "scalar types"); - + using numeric_type = typename lapack::array_traits::numeric_type; World& world = A.world(); auto A_eig = detail::to_eigen(A); - std::vector evals; -// if (world.rank() == 0) { -// char jobz = 'V'; -// char uplo = 'L'; -// integer n = A_eig.rows(); -// numeric_type* a = A_eig.data(); -// integer lda = n; -// integer info = 0; -// evals.resize(n); -// integer lwork = -1; -// std::vector work(1); -// // run once to query, then to compute -// while (lwork != static_cast(work.size())) { -// if (lwork > 0) { -// work.resize(lwork); -// } -// if constexpr (is_real) { -// #if defined(MADNESS_LINALG_USE_LAPACKE) -// MADNESS_DISPATCH_LAPACK_FN(syev, &jobz, &uplo, &n, a, &lda, -// evals.data(), work.data(), &lwork, &info); -// #else -// MADNESS_DISPATCH_LAPACK_FN(syev, &jobz, &uplo, &n, a, &lda, -// evals.data(), work.data(), &lwork, &info, -// sizeof(char), sizeof(char)); -// #endif -// } else { -// std::vector rwork; -// if (lwork == static_cast(work.size())) rwork.resize(3 * n - 2); -// #if defined(MADNESS_LINALG_USE_LAPACKE) -// MADNESS_DISPATCH_LAPACK_FN(heev, &jobz, &uplo, &n, a, &lda, -// evals.data(), work.data(), &lwork, -// &rwork.data(), &info); -// #else -// MADNESS_DISPATCH_LAPACK_FN( -// heev, &jobz, &uplo, &n, a, &lda, evals.data(), work.data(), &lwork, -// &rwork.data(), &info, sizeof(char), sizeof(char)); -// #endif -// } -// if (lwork == -1) { -// if constexpr (is_real) { -// lwork = static_cast(work[0]); -// } else { -// lwork = static_cast(work[0].real()); -// } -// TA_ASSERT(lwork > 1); -// } -// }; - -// if (info != 0) { -// if (is_real) -// TA_EXCEPTION("LAPACK::syev failed"); -// else -// TA_EXCEPTION("LAPACK::heev failed"); -// } -// } - + std::vector evals; + if (world.rank() == 0) { + lapack::heig(A_eig, evals); + } world.gop.broadcast_serializable(A_eig, 0); world.gop.broadcast_serializable(evals, 0); if (evec_trange.rank() == 0) evec_trange = A.trange(); - return std::tuple(evals, - eigen_to_array(A.world(), evec_trange, A_eig)); + return std::tuple( + evals, + eigen_to_array(world, evec_trange, A_eig) + ); } /** - * @brief Solve the generalized eigenvalue problem with ScaLAPACK + * @brief Solve the generalized eigenvalue problem with LAPACK * * A(i,k) X(k,j) = B(i,k) X(k,j) E(j) * @@ -142,26 +87,30 @@ auto heig(const Array& A, TiledRange evec_trange = TiledRange()) { * @param[in] B Positive-definite matrix * @param[in] evec_trange TiledRange for resulting eigenvectors. If left empty, * will default to array.trange() - * @param[in] NB ScaLAPACK block size. Defaults to 128 * * @returns A tuple containing the eigenvalues and eigenvectors of input array * as std::vector and in TA format, respectively. */ template -auto heig(const ArrayA& A, const ArrayB& B, - TiledRange evec_trange = TiledRange()) { - using scalar_type = typename ArrayA::scalar_type; - using numeric_type = typename ArrayA::numeric_type; - constexpr const bool is_real = std::is_same_v; - static_assert(std::is_same_v, - "TA::lapack::{cholesky*} are only usable with a DistArray of " - "scalar types"); - - abort(); - return std::tuple(std::vector{}, EVecType{}); +auto heig(const ArrayA& A, const ArrayB& B, TiledRange evec_trange = TiledRange()) { + using numeric_type = typename lapack::array_traits::numeric_type; + (void)lapack::array_traits{}; + World& world = A.world(); + auto A_eig = detail::to_eigen(A); + auto B_eig = detail::to_eigen(B); + std::vector evals; + if (world.rank() == 0) { + lapack::heig(A_eig, B_eig, evals); + } + world.gop.broadcast_serializable(A_eig, 0); + world.gop.broadcast_serializable(evals, 0); + if (evec_trange.rank() == 0) evec_trange = A.trange(); + return std::tuple( + evals, + eigen_to_array(A.world(), evec_trange, A_eig) + ); } -} // namespace lapack -} // namespace TiledArray +} // namespace TiledArray::lapack -#endif // TILEDARRAY_ALGEBRA_SCALAPACK_HEIG_H__INCLUDED +#endif // TILEDARRAY_ALGEBRA_LAPACK_HEIG_H__INCLUDED diff --git a/src/TiledArray/algebra/lapack/lapack.cc b/src/TiledArray/algebra/lapack/lapack.cc deleted file mode 100644 index 977d6292b4..0000000000 --- a/src/TiledArray/algebra/lapack/lapack.cc +++ /dev/null @@ -1,233 +0,0 @@ -/* - * This file is a part of TiledArray. - * Copyright (C) 2020 Virginia Tech - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * Eduard Valeyev - * - * chol.h - * Created: 16 October, 2020 - * - */ - -#include -#include -#include -#include -#include - -#define TA_LAPACK_CALL(name, args...) \ - typedef T numeric_type; \ - if constexpr (std::is_same_v) \ - d##name##_(args); \ - else if constexpr (std::is_same_v) \ - s##name##_(args); \ - else if constexpr (std::is_same_v>) \ - z##name##_(args); \ - else if constexpr (std::is_same_v>) \ - c##name##_(args); \ - else \ - std::abort(); - -namespace TiledArray::lapack { - -template -void cholesky(Matrix& A) { - char uplo = 'L'; - integer n = A.rows(); - auto* a = A.data(); - integer lda = n; - integer info = 0; -#if defined(MADNESS_LINALG_USE_LAPACKE) - TA_LAPACK_CALL(potrf, &uplo, &n, a, &lda, &info); -#else - TA_LAPACK_CALL(potrf, &uplo, &n, a, &lda, &info, sizeof(char)); -#endif - if (info != 0) TA_EXCEPTION("LAPACK::potrf failed"); -} - -template -void cholesky_linv(Matrix& A) { - char uplo = 'L'; - char diag = 'N'; - integer n = A.rows(); - auto* l = A.data(); - integer lda = n; - integer info = 0; - TA_LAPACK_CALL(trtri, &uplo, &diag, &n, l, &lda, &info); - if (info != 0) TA_EXCEPTION("LAPACK::trtri failed"); -} - -template -void cholesky_solve(Matrix& A, Matrix& X) { - char uplo = 'L'; - integer n = A.rows(); - integer nrhs = X.cols(); - auto* a = A.data(); - auto* b = X.data(); - integer lda = n; - integer ldb = n; - integer info = 0; - // TA_LAPACK_CALL(posv, &uplo, &n, &nrhs, a, &lda, b, &ldb, &info); - if (info != 0) TA_EXCEPTION("LAPACK::posv failed"); -} - -template -void cholesky_lsolve(TransposeFlag transpose, Matrix& A, Matrix& X) { - char uplo = 'L'; - char trans = transpose == TransposeFlag::Transpose - ? 'T' - : (transpose == TransposeFlag::NoTranspose ? 'N' : 'C'); - char diag = 'N'; - integer n = A.rows(); - integer nrhs = X.cols(); - auto* a = A.data(); - auto* b = X.data(); - integer lda = n; - integer ldb = n; - integer info = 0; - // TA_LAPACK_CALL(trtrs, &uplo, &trans, &diag, &n, &nrhs, a, &lda, b, &ldb, - // &info); - if (info != 0) TA_EXCEPTION("LAPACK::trtrs failed"); -} - -template -void hereig(Matrix& A, Vector& W) { - char jobz = 'V'; - char uplo = 'L'; - integer n = A.rows(); - T* a = A.data(); - integer lda = A.rows(); - T* w = W.data(); - integer lwork = -1; - integer info; - T lwork_dummy; -#ifndef MADNESS_LINALG_USE_LAPACKE - TA_LAPACK_CALL(syev, &jobz, &uplo, &n, a, &lda, w, &lwork_dummy, &lwork, - &info, sizeof(char), sizeof(char)); -#else - TA_LAPACK_CALL(syev, &jobz, &uplo, &n, a, &lda, w, &lwork_dummy, &lwork, - &info); -#endif - lwork = integer(lwork_dummy); - Vector work(lwork); -#ifndef MADNESS_LINALG_USE_LAPACKE - TA_LAPACK_CALL(syev, &jobz, &uplo, &n, a, &lda, w, work.data(), &lwork, &info, - sizeof(char), sizeof(char)); -#else - TA_LAPACK_CALL(syev, &jobz, &uplo, &n, a, &lda, w, work.data(), &lwork, - &info); -#endif - if (info != 0) TA_EXCEPTION("lapack::hereig failed"); -} - -template -void hereig_gen(Matrix& A, Matrix& B, Vector& W) { - integer itype = 1; - char jobz = 'V'; - char uplo = 'L'; - integer n = A.rows(); - T* a = A.data(); - integer lda = A.rows(); - T* b = B.data(); - integer ldb = B.rows(); - T* w = W.data(); - integer lwork = -1; - integer info; - T lwork_dummy; -#ifndef MADNESS_LINALG_USE_LAPACKE - TA_LAPACK_CALL(sygv, &itype, &jobz, &uplo, &n, a, &lda, b, &ldb, w, - &lwork_dummy, &lwork, &info, sizeof(char), sizeof(char)); -#else - TA_LAPACK_CALL(sygv, &itype, &jobz, &uplo, &n, a, &lda, b, &ldb, w, - &lwork_dummy, &lwork, &info); -#endif - lwork = integer(lwork_dummy); - Vector work(lwork); -#ifndef MADNESS_LINALG_USE_LAPACKE - TA_LAPACK_CALL(sygv, &itype, &jobz, &uplo, &n, a, &lda, b, &ldb, w, - work.data(), &lwork, &info, sizeof(char), sizeof(char)); -#else - TA_LAPACK_CALL(sygv, &itype, &jobz, &uplo, &n, a, &lda, b, &ldb, w, - work.data(), &lwork, &info); -#endif - if (info != 0) TA_EXCEPTION("lapack::hereig_gen failed"); -} - -template -void svd(Matrix& A, Vector& S, Matrix* U, Matrix* VT) { - integer m = A.rows(); - integer n = A.cols(); - T* a = A.data(); - integer lda = A.rows(); - - S.resize(std::max(m, n)); - T* s = S.data(); - - char jobu = 'N'; - T* u = nullptr; - integer ldu = 0; - if (U) { - jobu = 'A'; - U->resize(m, n); - u = U->data(); - ldu = U->rows(); - } - - char jobvt = 'N'; - T* vt = nullptr; - integer ldvt = 0; - if (VT) { - jobvt = 'A'; - VT->resize(n, m); - vt = VT->data(); - ldvt = VT->rows(); - } - - integer lwork = -1; - integer info; - T lwork_dummy; - -#ifndef MADNESS_LINALG_USE_LAPACKE - TA_LAPACK_CALL(gesvd, &jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, - &lwork_dummy, &lwork, &info, sizeof(char), sizeof(char)); -#else - TA_LAPACK_CALL(gesvd, &jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, - &lwork_dummy, &lwork, &info); -#endif - lwork = integer(lwork_dummy); - Vector work(lwork); -#ifndef MADNESS_LINALG_USE_LAPACKE - TA_LAPACK_CALL(gesvd, &jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, - &lwork_dummy, &lwork, &info, sizeof(char), sizeof(char)); -#else - TA_LAPACK_CALL(gesvd, &jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, - &lwork_dummy, &lwork, &info); -#endif - if (info != 0) TA_EXCEPTION("lapack::hereig_gen failed"); -} - -#define TA_LAPACK_EXPLICIT(MATRIX, VECTOR) \ - template void cholesky(MATRIX&); \ - template void cholesky_linv(MATRIX&); \ - template void cholesky_solve(MATRIX&, MATRIX&); \ - template void cholesky_lsolve(TransposeFlag, MATRIX&, MATRIX&); \ - template void hereig(MATRIX&, VECTOR&); \ - template void hereig_gen(MATRIX&, MATRIX&, VECTOR&); \ - template void svd(MATRIX&, VECTOR&, MATRIX*, MATRIX*); - -TA_LAPACK_EXPLICIT(lapack::Matrix, lapack::Vector); - -} // namespace TiledArray::lapack diff --git a/src/TiledArray/algebra/lapack/lapack.cpp b/src/TiledArray/algebra/lapack/lapack.cpp new file mode 100644 index 0000000000..85867e193d --- /dev/null +++ b/src/TiledArray/algebra/lapack/lapack.cpp @@ -0,0 +1,282 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2020 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Eduard Valeyev + * + * chol.h + * Created: 16 October, 2020 + * + */ + +#include +#include +#include +#include +#include + +#define TA_LAPACK_CALL(name, args...) \ + typedef T numeric_type; \ + if constexpr (std::is_same_v) \ + d##name##_(args); \ + else if constexpr (std::is_same_v) \ + s##name##_(args); \ + else if constexpr (std::is_same_v>) \ + z##name##_(args); \ + else if constexpr (std::is_same_v>) \ + c##name##_(args); \ + else \ + std::abort(); + +#define TA_LAPACK_GESV(...) TA_LAPACK_CALL(gesv, __VA_ARGS__) +#define TA_LAPACK_GETRF(...) TA_LAPACK_CALL(getrf, __VA_ARGS__) +#define TA_LAPACK_GETRI(...) TA_LAPACK_CALL(getri, __VA_ARGS__) + +#ifdef MADNESS_LINALG_USE_LAPACKE + +#define TA_LAPACK_POTRF(...) TA_LAPACK_CALL(potrf, __VA_ARGS__) +#define TA_LAPACK_POSV(...) TA_LAPACK_CALL(posv, __VA_ARGS__) +#define TA_LAPACK_GESVD(...) TA_LAPACK_CALL(gesvd, __VA_ARGS__) +#define TA_LAPACK_TRTRI(...) TA_LAPACK_CALL(trtri, __VA_ARGS__) +#define TA_LAPACK_TRTRS(...) TA_LAPACK_CALL(trtrs, __VA_ARGS__) +#define TA_LAPACK_SYEV(...) TA_LAPACK_CALL(syev, __VA_ARGS__) +#define TA_LAPACK_SYGV(...) TA_LAPACK_CALL(sygv, __VA_ARGS__) + +#else + +#ifdef FORTRAN_LINKAGE_LCU +#define dtrtri dtrtri_ +#define dtrtrs dtrtrs_ +#define dposv dposv_ +#endif + +extern "C" { // these arent in madness/clapack_fortran.h +void dtrtri(const char* uplo, const char* diag, + const integer* n, + const real8* a, const integer* lda, + integer *info, + char_len, char_len); +void dtrtrs(const char* uplo, const char* trans, const char* diag, + const integer* n, const integer *nrhs, + const real8* a, const integer* lda, + const real8* b, const integer* ldb, + integer *info, + char_len, char_len, char_len); +void dposv(const char* uplo, + const integer* n, const integer *nrhs, + const real8* a, const integer* lda, + const real8* b, const integer* ldb, + integer *info, + char_len); +} + +#define TA_LAPACK_POTRF(...) TA_LAPACK_CALL(potrf, __VA_ARGS__, sizeof(char)) +#define TA_LAPACK_POSV(...) TA_LAPACK_CALL(posv, __VA_ARGS__, sizeof(char)) +#define TA_LAPACK_GESVD(...) TA_LAPACK_CALL(gesvd, __VA_ARGS__, sizeof(char), sizeof(char)) +#define TA_LAPACK_TRTRI(...) TA_LAPACK_CALL(trtri, __VA_ARGS__, sizeof(char), sizeof(char)) +#define TA_LAPACK_TRTRS(...) TA_LAPACK_CALL(trtrs, __VA_ARGS__, sizeof(char), sizeof(char), sizeof(char)) +#define TA_LAPACK_SYEV(...) TA_LAPACK_CALL(syev, __VA_ARGS__, sizeof(char), sizeof(char)) +#define TA_LAPACK_SYGV(...) TA_LAPACK_CALL(sygv, __VA_ARGS__, sizeof(char), sizeof(char)) + +#endif // MADNESS_LINALG_USE_LAPACKE + +namespace TiledArray::lapack { + +template +void cholesky(Matrix& A) { + char uplo = 'L'; + integer n = A.rows(); + auto* a = A.data(); + integer lda = n; + integer info = 0; + TA_LAPACK_POTRF(&uplo, &n, a, &lda, &info); + if (info != 0) TA_EXCEPTION("lapack::cholesky failed"); +} + +template +void cholesky_linv(Matrix& A) { + char uplo = 'L'; + char diag = 'N'; + integer n = A.rows(); + auto* l = A.data(); + integer lda = n; + integer info = 0; + TA_LAPACK_TRTRI(&uplo, &diag, &n, l, &lda, &info); + if (info != 0) TA_EXCEPTION("lapack::cholesky_linv failed"); +} + +template +void cholesky_solve(Matrix& A, Matrix& X) { + char uplo = 'L'; + integer n = A.rows(); + integer nrhs = X.cols(); + auto* a = A.data(); + auto* b = X.data(); + integer lda = n; + integer ldb = n; + integer info = 0; + TA_LAPACK_POSV(&uplo, &n, &nrhs, a, &lda, b, &ldb, &info); + if (info != 0) TA_EXCEPTION("lapack::cholesky_solve failed"); +} + +template +void cholesky_lsolve(TransposeFlag transpose, Matrix& A, Matrix& X) { + char uplo = 'L'; + char trans = transpose == TransposeFlag::Transpose + ? 'T' + : (transpose == TransposeFlag::NoTranspose ? 'N' : 'C'); + char diag = 'N'; + integer n = A.rows(); + integer nrhs = X.cols(); + auto* a = A.data(); + auto* b = X.data(); + integer lda = n; + integer ldb = n; + integer info = 0; + TA_LAPACK_TRTRS(&uplo, &trans, &diag, &n, &nrhs, a, &lda, b, &ldb, &info); + if (info != 0) TA_EXCEPTION("lapack::cholesky_lsolve failed"); +} + +template +void heig(Matrix& A, std::vector& W) { + char jobz = 'V'; + char uplo = 'L'; + integer n = A.rows(); + T* a = A.data(); + integer lda = A.rows(); + W.resize(n); + T* w = W.data(); + integer lwork = -1; + integer info; + T lwork_dummy; + TA_LAPACK_SYEV(&jobz, &uplo, &n, a, &lda, w, &lwork_dummy, &lwork, &info); + lwork = integer(lwork_dummy); + std::vector work(lwork); + TA_LAPACK_SYEV(&jobz, &uplo, &n, a, &lda, w, work.data(), &lwork, &info); + if (info != 0) TA_EXCEPTION("lapack::heig failed"); +} + +template +void heig(Matrix& A, Matrix& B, std::vector& W) { + integer itype = 1; + char jobz = 'V'; + char uplo = 'L'; + integer n = A.rows(); + T* a = A.data(); + integer lda = A.rows(); + T* b = B.data(); + integer ldb = B.rows(); + W.resize(n); + T* w = W.data(); + integer lwork = -1; + integer info; + T lwork_dummy; + TA_LAPACK_SYGV(&itype, &jobz, &uplo, &n, a, &lda, b, &ldb, w, &lwork_dummy, &lwork, &info); + lwork = integer(lwork_dummy); + std::vector work(lwork); + TA_LAPACK_SYGV(&itype, &jobz, &uplo, &n, a, &lda, b, &ldb, w, work.data(), &lwork, &info); + if (info != 0) TA_EXCEPTION("lapack::heig failed"); +} + +template +void svd(Matrix& A, std::vector& S, Matrix* U, Matrix* VT) { + integer m = A.rows(); + integer n = A.cols(); + T* a = A.data(); + integer lda = A.rows(); + + S.resize(std::min(m, n)); + T* s = S.data(); + + char jobu = 'N'; + T* u = nullptr; + integer ldu = m; + if (U) { + jobu = 'A'; + U->resize(m, n); + u = U->data(); + ldu = U->rows(); + } + + char jobvt = 'N'; + T* vt = nullptr; + integer ldvt = n; + if (VT) { + jobvt = 'A'; + VT->resize(n, m); + vt = VT->data(); + ldvt = VT->rows(); + } + + integer lwork = -1; + integer info; + T lwork_dummy; + + TA_LAPACK_GESVD(&jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, &lwork_dummy, &lwork, &info); + lwork = integer(lwork_dummy); + std::vector work(lwork); + TA_LAPACK_GESVD(&jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work.data(), &lwork, &info); + if (info != 0) TA_EXCEPTION("lapack::svd failed"); +} + +template +void lu_solve(Matrix &A, Matrix &B) { + integer n = A.rows(); + integer nrhs = B.cols(); + T* a = A.data(); + integer lda = A.rows(); + T* b = B.data(); + integer ldb = B.rows(); + std::vector ipiv(n); + integer info; + TA_LAPACK_GESV(&n, &nrhs, a, &lda, ipiv.data(), b, &ldb, &info); + if (info != 0) TA_EXCEPTION("lapack::lu_solve failed"); +} + +template +void lu_inv(Matrix &A) { + integer n = A.rows(); + T* a = A.data(); + integer lda = A.rows(); + integer lwork = -1; + std::vector work(1); + std::vector ipiv(n); + integer info; + TA_LAPACK_GETRF(&n, &n, a, &lda, ipiv.data(), &info); + if (info != 0) TA_EXCEPTION("lapack::lu_inv failed"); + TA_LAPACK_GETRI(&n, a, &lda, ipiv.data(), work.data(), &lwork, &info); + lwork = (integer)work[0]; + work.resize(lwork); + TA_LAPACK_GETRI(&n, a, &lda, ipiv.data(), work.data(), &lwork, &info); + if (info != 0) TA_EXCEPTION("lapack::lu_inv failed"); +} + + +#define TA_LAPACK_EXPLICIT(MATRIX, VECTOR) \ + template void cholesky(MATRIX&); \ + template void cholesky_linv(MATRIX&); \ + template void cholesky_solve(MATRIX&, MATRIX&); \ + template void cholesky_lsolve(TransposeFlag, MATRIX&, MATRIX&); \ + template void heig(MATRIX&, VECTOR&); \ + template void heig(MATRIX&, MATRIX&, VECTOR&); \ + template void svd(MATRIX&, VECTOR&, MATRIX*, MATRIX*); \ + template void lu_solve(MATRIX&, MATRIX&); \ + template void lu_inv(MATRIX&); + +TA_LAPACK_EXPLICIT(lapack::Matrix, std::vector); +//TA_LAPACK_EXPLICIT(lapack::Matrix, std::vector); + +} // namespace TiledArray::lapack diff --git a/src/TiledArray/algebra/lapack/lapack.h b/src/TiledArray/algebra/lapack/lapack.h index 1ccf68719f..78c767bfc5 100644 --- a/src/TiledArray/algebra/lapack/lapack.h +++ b/src/TiledArray/algebra/lapack/lapack.h @@ -30,8 +30,16 @@ namespace TiledArray::lapack { -template -using Vector = Eigen::Matrix; +template +struct array_traits { + using scalar_type = typename A::scalar_type; + using numeric_type = typename A::numeric_type; + static const bool complex = !std::is_same_v; + static_assert( + std::is_same_v, + "TA::lapack is only usable with a DistArray of scalar types" + ); +}; template using Matrix = Eigen::Matrix; @@ -49,10 +57,19 @@ template void cholesky_lsolve(TransposeFlag transpose, Matrix &A, Matrix &X); template -void hereig(Matrix &A, Vector &W); +void heig(Matrix &A, std::vector &W); + +template +void heig(Matrix &A, Matrix &B, std::vector &W); + +template +void svd(Matrix &A, std::vector &S, Matrix *U, Matrix *VT); + +template +void lu_solve(Matrix &A, Matrix &B); template -void svd(Matrix &A, Vector &S, Matrix *U, Matrix *VT); +void lu_inv(Matrix &A); } diff --git a/src/TiledArray/algebra/lapack/lu.h b/src/TiledArray/algebra/lapack/lu.h new file mode 100644 index 0000000000..389af28942 --- /dev/null +++ b/src/TiledArray/algebra/lapack/lu.h @@ -0,0 +1,70 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2020 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * David Williams-Young + * Computational Research Division, Lawrence Berkeley National Laboratory + * + * lu.h + * Created: 19 June, 2020 + * + */ +#ifndef TILEDARRAY_ALGEBRA_LAPACK_LU_H__INCLUDED +#define TILEDARRAY_ALGEBRA_LAPACK_LU_H__INCLUDED + +#include + +#include + +namespace TiledArray::lapack { + +/** + * @brief Solve a linear system via LU factorization + */ +template +auto lu_solve(const ArrayA& A, const ArrayB& B, TiledRange x_trange = TiledRange()) { + (void)lapack::array_traits{}; + (void)lapack::array_traits{}; + auto& world = A.world(); + auto A_eig = detail::to_eigen(A); + auto B_eig = detail::to_eigen(B); + if (world.rank() == 0) { + lapack::lu_solve(A_eig, B_eig); + } + world.gop.broadcast_serializable(B_eig, 0); + if (x_trange.rank() == 0) x_trange = B.trange(); + return eigen_to_array(world, x_trange, B_eig); +} + +/** + * @brief Invert a matrix via LU + */ +template +auto lu_inv(const Array& A, TiledRange ainv_trange = TiledRange()) { + (void)lapack::array_traits{}; + auto& world = A.world(); + auto A_eig = detail::to_eigen(A); + if (world.rank() == 0) { + lapack::lu_inv(A_eig); + } + world.gop.broadcast_serializable(A_eig, 0); + if (ainv_trange.rank() == 0) ainv_trange = A.trange(); + return eigen_to_array(A.world(), ainv_trange, A_eig); +} + +} // namespace TiledArray::lapack + +#endif // TILEDARRAY_ALGEBRA_LAPACK_LU_H__INCLUDED diff --git a/src/TiledArray/algebra/lapack/svd.h b/src/TiledArray/algebra/lapack/svd.h new file mode 100644 index 0000000000..8ea0afd3da --- /dev/null +++ b/src/TiledArray/algebra/lapack/svd.h @@ -0,0 +1,103 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2020 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * David Williams-Young + * Computational Research Division, Lawrence Berkeley National Laboratory + * + * svd.h + * Created: 12 June, 2020 + * + */ +#ifndef TILEDARRAY_ALGEBRA_LAPACK_SVD_H__INCLUDED +#define TILEDARRAY_ALGEBRA_LAPACK_SVD_H__INCLUDED + +#include + +#include + +namespace TiledArray::lapack { + +/** + * @brief Compute the singular value decomposition (SVD) via ScaLAPACK + * + * A(i,j) = S(k) U(i,k) conj(V(j,k)) + * + * Example Usage: + * + * auto S = svd (A, ...) + * auto [S, U] = svd (A, ...) + * auto [S, VT] = svd(A, ...) + * auto [S, U, VT] = svd (A, ...) + * + * @tparam Array Input array type, must be convertible to BlockCyclicMatrix + * + * @param[in] A Input array to be decomposed. Must be rank-2 + * @param[in] u_trange TiledRange for resulting left singular vectors. + * @param[in] vt_trange TiledRange for resulting right singular vectors + * (transposed). + * + * @returns A tuple containing the eigenvalues and eigenvectors of input array + * as std::vector and in TA format, respectively. + */ +template > +auto svd(const Array& A, TiledRange u_trange = TiledRange(), TiledRange vt_trange = TiledRange()) { + + using T = typename Array::numeric_type; + + World& world = A.world(); + auto A_eig = detail::to_eigen(A); + + constexpr bool svd_all_vectors = std::is_same_v; + constexpr bool need_u = std::is_same_v or svd_all_vectors; + constexpr bool need_vt = std::is_same_v or svd_all_vectors; + + std::vector S; + std::unique_ptr< Matrix > U, VT; + + if constexpr (need_u) U = std::make_unique< Matrix >(); + if constexpr (need_vt) VT = std::make_unique< Matrix >(); + + if (world.rank() == 0) { + lapack::svd(A_eig, S, U.get(), VT.get()); + } + + world.gop.broadcast_serializable(S, 0); + if (U) world.gop.broadcast_serializable(*U, 0); + if (VT) world.gop.broadcast_serializable(*VT, 0); + + auto make_array = [&world](auto && ... args) { + return eigen_to_array(world, args...); + }; + + if constexpr (need_u && need_vt) { + return std::tuple(S, make_array(u_trange, *U), make_array(vt_trange, *VT)); + } + if constexpr (need_u && !need_vt) { + return std::tuple(S, make_array(u_trange, *U)); + } + if constexpr (!need_u && need_vt) { + return std::tuple(S, make_array(vt_trange, *VT)); + } + + if constexpr (!need_u && !need_vt) return S; + +} + +} // namespace scalapack::TiledArray + +#endif // TILEDARRAY_ALGEBRA_LAPACK_SVD_H__INCLUDED diff --git a/src/TiledArray/algebra/lu.h b/src/TiledArray/algebra/lu.h index fa1bfca49e..330f49c8f7 100644 --- a/src/TiledArray/algebra/lu.h +++ b/src/TiledArray/algebra/lu.h @@ -27,9 +27,8 @@ #include #if TILEDARRAY_HAS_SCALAPACK #include -#else -#// include eigen #endif +#include namespace TiledArray { diff --git a/src/TiledArray/algebra/svd.h b/src/TiledArray/algebra/svd.h index b31c7932da..3c87fbc087 100644 --- a/src/TiledArray/algebra/svd.h +++ b/src/TiledArray/algebra/svd.h @@ -27,16 +27,21 @@ #include #ifdef TILEDARRAY_HAS_SCALAPACK #include -#else -// include eigen #endif // TILEDARRAY_HAS_SCALAPACK +#include namespace TiledArray { -#ifdef TILEDARRAY_HAS_SCALAPACK -using scalapack::svd; -#else +template > +auto svd(const Array& A, TiledRange u_trange = TiledRange(), TiledRange vt_trange = TiledRange()) { +#if TILEDARRAY_HAS_SCALAPACK + if (A.world().size() > 1 && A.range().volume() > 10000000) { + return scalapack::svd(A, u_trange, vt_trange); + } #endif + return lapack::svd(A, u_trange, vt_trange); +} } // namespace TiledArray diff --git a/tests/lapack.cpp b/tests/lapack.cpp index 287acab547..5666be717d 100644 --- a/tests/lapack.cpp +++ b/tests/lapack.cpp @@ -6,6 +6,8 @@ #include "TiledArray/algebra/lapack/chol.h" #include "TiledArray/algebra/lapack/heig.h" +#include "TiledArray/algebra/lapack/svd.h" +#include "TiledArray/algebra/lapack/lu.h" using namespace TiledArray::lapack; @@ -68,7 +70,7 @@ BOOST_AUTO_TEST_CASE(chol) { TA::Tensor A; this->make_ta_reference(A, range); - auto L = cholesky(A); + auto L = lapack::cholesky(A); decltype(A) A_minus_LLt; A_minus_LLt = A.clone(); @@ -80,4 +82,24 @@ BOOST_AUTO_TEST_CASE(chol) { N * N * std::numeric_limits::epsilon()); } +BOOST_AUTO_TEST_CASE(svd) { + TA::TArray A; + { auto S = lapack::svd(A); } + { auto [S,U] = lapack::svd(A); } + { auto [S,VT] = lapack::svd(A); } + { auto [S,U,VT] = lapack::svd(A); } +} + +BOOST_AUTO_TEST_CASE(heig) { + TA::TArray A; + { auto W = lapack::heig(A); } + { auto W = lapack::heig(A,A); } +} + +BOOST_AUTO_TEST_CASE(lu) { + TA::TArray A; + { auto W = lapack::lu_solve(A,A); } + { auto W = lapack::lu_inv(A); } +} + BOOST_AUTO_TEST_SUITE_END() From acefee4bae05b6b025e767d80ada804bae5525ee Mon Sep 17 00:00:00 2001 From: asadchev Date: Thu, 19 Nov 2020 20:31:05 -0500 Subject: [PATCH 3/7] Rename chol.h to cholesky.h --- src/CMakeLists.txt | 8 +++++--- src/TiledArray/algebra/{chol.h => cholesky.h} | 4 ++-- src/TiledArray/algebra/lapack/{chol.h => cholesky.h} | 0 src/TiledArray/algebra/lapack/lapack.cpp | 2 +- src/TiledArray/algebra/scalapack/all.h | 2 +- src/TiledArray/algebra/scalapack/{chol.h => cholesky.h} | 0 6 files changed, 9 insertions(+), 7 deletions(-) rename src/TiledArray/algebra/{chol.h => cholesky.h} (96%) rename src/TiledArray/algebra/lapack/{chol.h => cholesky.h} (100%) rename src/TiledArray/algebra/scalapack/{chol.h => cholesky.h} (100%) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 2d89f1c4be..ddab08d0a1 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -60,15 +60,17 @@ TiledArray/zero_tensor.h TiledArray/algebra/conjgrad.h TiledArray/algebra/diis.h TiledArray/algebra/utils.h -TiledArray/algebra/chol.h +TiledArray/algebra/cholesky.h TiledArray/algebra/heig.h TiledArray/algebra/lu.h TiledArray/algebra/svd.h TiledArray/algebra/types.h -TiledArray/algebra/lapack/chol.h +TiledArray/algebra/lapack/cholesky.h TiledArray/algebra/lapack/heig.h TiledArray/algebra/lapack/util.h -TiledArray/algebra/scalapack/chol.h +TiledArray/algebra/lapack/lu.h +TiledArray/algebra/lapack/svd.h +TiledArray/algebra/scalapack/cholesky.h TiledArray/algebra/scalapack/heig.h TiledArray/algebra/scalapack/lu.h TiledArray/algebra/scalapack/svd.h diff --git a/src/TiledArray/algebra/chol.h b/src/TiledArray/algebra/cholesky.h similarity index 96% rename from src/TiledArray/algebra/chol.h rename to src/TiledArray/algebra/cholesky.h index 06eb98fb19..b34bf38292 100644 --- a/src/TiledArray/algebra/chol.h +++ b/src/TiledArray/algebra/cholesky.h @@ -26,9 +26,9 @@ #include #if TILEDARRAY_HAS_SCALAPACK -#include +#include #endif -#include +#include namespace TiledArray { diff --git a/src/TiledArray/algebra/lapack/chol.h b/src/TiledArray/algebra/lapack/cholesky.h similarity index 100% rename from src/TiledArray/algebra/lapack/chol.h rename to src/TiledArray/algebra/lapack/cholesky.h diff --git a/src/TiledArray/algebra/lapack/lapack.cpp b/src/TiledArray/algebra/lapack/lapack.cpp index 85867e193d..640ef695f1 100644 --- a/src/TiledArray/algebra/lapack/lapack.cpp +++ b/src/TiledArray/algebra/lapack/lapack.cpp @@ -17,7 +17,7 @@ * * Eduard Valeyev * - * chol.h + * cholesky.h * Created: 16 October, 2020 * */ diff --git a/src/TiledArray/algebra/scalapack/all.h b/src/TiledArray/algebra/scalapack/all.h index 2599d0280e..fbba6826d0 100644 --- a/src/TiledArray/algebra/scalapack/all.h +++ b/src/TiledArray/algebra/scalapack/all.h @@ -28,7 +28,7 @@ #include #if TILEDARRAY_HAS_SCALAPACK -#include +#include #include #include #include diff --git a/src/TiledArray/algebra/scalapack/chol.h b/src/TiledArray/algebra/scalapack/cholesky.h similarity index 100% rename from src/TiledArray/algebra/scalapack/chol.h rename to src/TiledArray/algebra/scalapack/cholesky.h From abe7576a5b4a379c527c25a60be94d8b59084950 Mon Sep 17 00:00:00 2001 From: asadchev Date: Thu, 19 Nov 2020 16:12:07 -0500 Subject: [PATCH 4/7] Refactor linalg unit tests --- tests/CMakeLists.txt | 8 +- tests/lapack.cpp | 105 --------- tests/{scalapack.cpp => linear_algebra.cpp} | 234 ++++++++++---------- 3 files changed, 124 insertions(+), 223 deletions(-) delete mode 100644 tests/lapack.cpp rename tests/{scalapack.cpp => linear_algebra.cpp} (81%) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 9004369a5c..75889908de 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -111,16 +111,16 @@ set(ta_test_src_files ta_test.cpp # t_tot_tot_contract_.cpp # tot_tot_tot_contract_.cpp # einsum.cpp - lapack.cpp + linear_algebra.cpp ) if(CUDA_FOUND) list(APPEND ta_test_src_files cutt.cpp expressions_cuda_um.cpp tensor_um.cpp) endif() -if (TARGET TiledArray_SCALAPACK) - list(APPEND ta_test_src_files scalapack.cpp) -endif(TARGET TiledArray_SCALAPACK) +# if (TARGET TiledArray_SCALAPACK) +# list(APPEND ta_test_src_files scalapack.cpp) +# endif(TARGET TiledArray_SCALAPACK) # if tiledarray library was compiled without exceptions, use TA header-only (see below) if (NOT TA_DEFAULT_ERROR EQUAL 1 AND NOT CUDA_FOUND AND FALSE) diff --git a/tests/lapack.cpp b/tests/lapack.cpp deleted file mode 100644 index 5666be717d..0000000000 --- a/tests/lapack.cpp +++ /dev/null @@ -1,105 +0,0 @@ -#include -#include -#include "TiledArray/config.h" -#include "range_fixture.h" -#include "unit_test_config.h" - -#include "TiledArray/algebra/lapack/chol.h" -#include "TiledArray/algebra/lapack/heig.h" -#include "TiledArray/algebra/lapack/svd.h" -#include "TiledArray/algebra/lapack/lu.h" - -using namespace TiledArray::lapack; - -struct LAPACKFixture { - int64_t N; - std::vector htoeplitz_vector; - std::vector exact_evals; - - inline double matrix_element_generator(int64_t i, int64_t j) { - // Generates a Circulant matrix: good condition number - return htoeplitz_vector[std::abs(i - j)]; - } - - inline double make_ta_reference(TA::Tensor& t, - TA::Range const& range) { - t = TA::Tensor(range, 0.0); - auto lo = range.lobound_data(); - auto up = range.upbound_data(); - for (auto m = lo[0]; m < up[0]; ++m) { - for (auto n = lo[1]; n < up[1]; ++n) { - t(m, n) = matrix_element_generator(m, n); - } - } - - return t.norm(); - }; - - LAPACKFixture(int64_t N) : N(N), htoeplitz_vector(N), exact_evals(N) { - // Generate an hermitian Circulant vector - std::fill(htoeplitz_vector.begin(), htoeplitz_vector.begin(), 0); - htoeplitz_vector[0] = 100; - std::default_random_engine gen(0); - std::uniform_real_distribution<> dist(0., 1.); - for (int64_t i = 1; i <= (N / 2); ++i) { - double val = dist(gen); - htoeplitz_vector[i] = val; - htoeplitz_vector[N - i] = val; - } - - // Compute exact eigenvalues - const double ff = 2. * M_PI / N; - for (int64_t j = 0; j < N; ++j) { - double val = htoeplitz_vector[0]; - for (int64_t k = 1; k < N; ++k) - val += htoeplitz_vector[N - k] * std::cos(ff * j * k); - exact_evals[j] = val; - } - - std::sort(exact_evals.begin(), exact_evals.end()); - } - - LAPACKFixture() : LAPACKFixture(1000) {} -}; - -BOOST_FIXTURE_TEST_SUITE(lapack_suite, LAPACKFixture) - -BOOST_AUTO_TEST_CASE(chol) { - auto range = TA::Range{N, N}; - - TA::Tensor A; - this->make_ta_reference(A, range); - - auto L = lapack::cholesky(A); - - decltype(A) A_minus_LLt; - A_minus_LLt = A.clone(); - A_minus_LLt.gemm(L, L, -1, - math::GemmHelper{madness::cblas::NoTrans, - madness::cblas::ConjTrans, 2, 2, 2}); - - BOOST_CHECK_SMALL(A_minus_LLt.norm(), - N * N * std::numeric_limits::epsilon()); -} - -BOOST_AUTO_TEST_CASE(svd) { - TA::TArray A; - { auto S = lapack::svd(A); } - { auto [S,U] = lapack::svd(A); } - { auto [S,VT] = lapack::svd(A); } - { auto [S,U,VT] = lapack::svd(A); } -} - -BOOST_AUTO_TEST_CASE(heig) { - TA::TArray A; - { auto W = lapack::heig(A); } - { auto W = lapack::heig(A,A); } -} - -BOOST_AUTO_TEST_CASE(lu) { - TA::TArray A; - { auto W = lapack::lu_solve(A,A); } - { auto W = lapack::lu_inv(A); } -} - -BOOST_AUTO_TEST_SUITE_END() diff --git a/tests/scalapack.cpp b/tests/linear_algebra.cpp similarity index 81% rename from tests/scalapack.cpp rename to tests/linear_algebra.cpp index f728938a25..94d8643759 100644 --- a/tests/scalapack.cpp +++ b/tests/linear_algebra.cpp @@ -1,19 +1,36 @@ #include #include #include "TiledArray/config.h" -#include "range_fixture.h" +//#include "range_fixture.h" #include "unit_test_config.h" -#include "TiledArray/algebra/lapack/chol.h" +#include "TiledArray/algebra/lapack/cholesky.h" #include "TiledArray/algebra/lapack/heig.h" -#include "TiledArray/algebra/scalapack/all.h" +#include "TiledArray/algebra/lapack/lu.h" +#include "TiledArray/algebra/lapack/svd.h" -using namespace TiledArray::scalapack; +#include "TiledArray/algebra/cholesky.h" +#include "TiledArray/algebra/heig.h" +#include "TiledArray/algebra/lu.h" +#include "TiledArray/algebra/svd.h" -struct ScaLAPACKFixture { - blacspp::Grid grid; - BlockCyclicMatrix ref_matrix; // XXX: Just double is fine? +namespace TA = TiledArray; +namespace lapack = TA::lapack; + +#if TILEDARRAY_HAS_SCALAPACK +#include "TiledArray/algebra/scalapack/all.h" +namespace scalapack = TA::scalapack; +#define TILEDARRAY_SCALAPACK_TEST(F,E) \ + compare("TiledArray::scalapack", lapack::F, scalapack::F, E); \ + compare("TiledArray", lapack::F, TiledArray::F, E); +#else +#define TILEDARRAY_SCALAPACK_TEST(...) +#endif + +struct ReferenceFixture { + + size_t N; std::vector htoeplitz_vector; std::vector exact_evals; @@ -27,8 +44,7 @@ struct ScaLAPACKFixture { #endif } - inline double make_ta_reference(TA::Tensor& t, - TA::Range const& range) { + inline double make_ta_reference(TA::Tensor& t, TA::Range const& range) { t = TA::Tensor(range, 0.0); auto lo = range.lobound_data(); auto up = range.upbound_data(); @@ -41,21 +57,9 @@ struct ScaLAPACKFixture { return t.norm(); }; - inline void construct_scalapack(BlockCyclicMatrix& A) { - auto [M, N] = A.dims(); - for (size_t i = 0; i < M; ++i) - for (size_t j = 0; j < N; ++j) - if (A.dist().i_own(i, j)) { - auto [i_local, j_local] = A.dist().local_indx(i, j); - A.local_mat()(i_local, j_local) = matrix_element_generator(i, j); - } - } - - ScaLAPACKFixture(int64_t N, int64_t NB) - : grid(blacspp::Grid::square_grid(MPI_COMM_WORLD)), // XXX: Is this safe? - ref_matrix(*GlobalFixture::world, grid, N, N, NB, NB), - htoeplitz_vector(N), - exact_evals(N) { + ReferenceFixture(int64_t N = 1000) + : N(N), htoeplitz_vector(N), exact_evals(N) + { // Generate an hermitian Circulant vector std::fill(htoeplitz_vector.begin(), htoeplitz_vector.begin(), 0); htoeplitz_vector[0] = 100; @@ -79,11 +83,39 @@ struct ScaLAPACKFixture { std::sort(exact_evals.begin(), exact_evals.end()); - // Fill reference matrix - construct_scalapack(ref_matrix); } - ScaLAPACKFixture() : ScaLAPACKFixture(1000, 128) {} +}; + +struct LinearAlgebraFixture : ReferenceFixture { + +#if TILEDARRAY_HAS_SCALAPACK + + blacspp::Grid grid; + scalapack::BlockCyclicMatrix ref_matrix; // XXX: Just double is fine? + + LinearAlgebraFixture(int64_t N = 1000, int64_t NB = 128) : + ReferenceFixture(N), + grid(blacspp::Grid::square_grid(MPI_COMM_WORLD)), // XXX: Is this safe? + ref_matrix(*GlobalFixture::world, grid, N, N, NB, NB) + { + for (size_t i = 0; i < N; ++i) { + for (size_t j = 0; j < N; ++j) { + if (ref_matrix.dist().i_own(i, j)) { + auto [i_local, j_local] = ref_matrix.dist().local_indx(i, j); + ref_matrix.local_mat()(i_local, j_local) = matrix_element_generator(i, j); + } + } + } + } + template + static void compare(const char *context, const A& lapack, const A& result, double e) { + BOOST_TEST_CONTEXT(context); + auto diff_with_lapack = (lapack("i,j") - result("i,j")).norm().get(); + BOOST_CHECK_SMALL(diff_with_lapack, e); + } +#endif + }; TA::TiledRange gen_trange(size_t N, const std::vector& TA_NBs) { @@ -108,7 +140,10 @@ TA::TiledRange gen_trange(size_t N, const std::vector& TA_NBs) { return TA::TiledRange(ranges.begin(), ranges.end()); }; -BOOST_FIXTURE_TEST_SUITE(scalapack_suite, ScaLAPACKFixture) + +BOOST_FIXTURE_TEST_SUITE(linear_algebra_suite, LinearAlgebraFixture) + +#if TILEDARRAY_HAS_SCALAPACK BOOST_AUTO_TEST_CASE(bc_to_uniform_dense_tiled_array_test) { GlobalFixture::world->gop.fence(); @@ -127,7 +162,7 @@ BOOST_AUTO_TEST_CASE(bc_to_uniform_dense_tiled_array_test) { }); GlobalFixture::world->gop.fence(); - auto test_ta = block_cyclic_to_array>(ref_matrix, trange); + auto test_ta = scalapack::block_cyclic_to_array>(ref_matrix, trange); GlobalFixture::world->gop.fence(); auto norm_diff = @@ -155,7 +190,7 @@ BOOST_AUTO_TEST_CASE(bc_to_uniform_dense_tiled_array_all_small_test) { }); GlobalFixture::world->gop.fence(); - auto test_ta = block_cyclic_to_array>(ref_matrix, trange); + auto test_ta = scalapack::block_cyclic_to_array>(ref_matrix, trange); GlobalFixture::world->gop.fence(); auto norm_diff = @@ -183,7 +218,7 @@ BOOST_AUTO_TEST_CASE(uniform_dense_tiled_array_to_bc_test) { }); GlobalFixture::world->gop.fence(); - auto test_matrix = array_to_block_cyclic(ref_ta, grid, NB, NB); + auto test_matrix = scalapack::array_to_block_cyclic(ref_ta, grid, NB, NB); GlobalFixture::world->gop.fence(); double local_norm_diff = @@ -218,7 +253,7 @@ BOOST_AUTO_TEST_CASE(bc_to_random_dense_tiled_array_test) { }); GlobalFixture::world->gop.fence(); - auto test_ta = block_cyclic_to_array>(ref_matrix, trange); + auto test_ta = scalapack::block_cyclic_to_array>(ref_matrix, trange); GlobalFixture::world->gop.fence(); auto norm_diff = @@ -246,7 +281,7 @@ BOOST_AUTO_TEST_CASE(random_dense_tiled_array_to_bc_test) { }); GlobalFixture::world->gop.fence(); - auto test_matrix = array_to_block_cyclic(ref_ta, grid, NB, NB); + auto test_matrix = scalapack::array_to_block_cyclic(ref_ta, grid, NB, NB); GlobalFixture::world->gop.fence(); double local_norm_diff = @@ -282,7 +317,7 @@ BOOST_AUTO_TEST_CASE(bc_to_sparse_tiled_array_test) { GlobalFixture::world->gop.fence(); auto test_ta = - block_cyclic_to_array>(ref_matrix, trange); + scalapack::block_cyclic_to_array>(ref_matrix, trange); GlobalFixture::world->gop.fence(); auto norm_diff = @@ -310,7 +345,7 @@ BOOST_AUTO_TEST_CASE(sparse_tiled_array_to_bc_test) { }); GlobalFixture::world->gop.fence(); - auto test_matrix = array_to_block_cyclic(ref_ta, grid, NB, NB); + auto test_matrix = scalapack::array_to_block_cyclic(ref_ta, grid, NB, NB); GlobalFixture::world->gop.fence(); double local_norm_diff = @@ -345,7 +380,7 @@ BOOST_AUTO_TEST_CASE(const_tiled_array_to_bc_test) { }); GlobalFixture::world->gop.fence(); - auto test_matrix = array_to_block_cyclic(ref_ta, grid, NB, NB); + auto test_matrix = scalapack::array_to_block_cyclic(ref_ta, grid, NB, NB); GlobalFixture::world->gop.fence(); double local_norm_diff = @@ -363,10 +398,10 @@ BOOST_AUTO_TEST_CASE(const_tiled_array_to_bc_test) { GlobalFixture::world->gop.fence(); }; -BOOST_AUTO_TEST_CASE(sca_heig_same_tiling) { +#endif // TILEDARRAY_HAS_SCALAPACK + +BOOST_AUTO_TEST_CASE(heig_same_tiling) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); @@ -376,7 +411,7 @@ BOOST_AUTO_TEST_CASE(sca_heig_same_tiling) { return this->make_ta_reference(t, range); }); - auto [evals, evecs] = heig(ref_ta); + auto [evals, evecs] = lapack::heig(ref_ta); auto [evals_lapack, evecs_lapack] = lapack::heig(ref_ta); // auto evals = heig( ref_ta ); @@ -399,10 +434,8 @@ BOOST_AUTO_TEST_CASE(sca_heig_same_tiling) { GlobalFixture::world->gop.fence(); } -BOOST_AUTO_TEST_CASE(sca_heig_diff_tiling) { +BOOST_AUTO_TEST_CASE(heig_diff_tiling) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); auto ref_ta = TA::make_array>( @@ -412,7 +445,7 @@ BOOST_AUTO_TEST_CASE(sca_heig_diff_tiling) { }); auto new_trange = gen_trange(N, {64ul}); - auto [evals, evecs] = heig(ref_ta, new_trange, 128); + auto [evals, evecs] = lapack::heig(ref_ta, new_trange); auto [evals_lapack, evecs_lapack] = lapack::heig(ref_ta, new_trange); BOOST_CHECK(evecs.trange() == new_trange); @@ -434,10 +467,8 @@ BOOST_AUTO_TEST_CASE(sca_heig_diff_tiling) { GlobalFixture::world->gop.fence(); } -BOOST_AUTO_TEST_CASE(sca_heig_generalized) { +BOOST_AUTO_TEST_CASE(heig_generalized) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); @@ -461,7 +492,7 @@ BOOST_AUTO_TEST_CASE(sca_heig_generalized) { }); GlobalFixture::world->gop.fence(); - auto [evals, evecs] = heig(ref_ta, dense_iden); + auto [evals, evecs] = lapack::heig(ref_ta, dense_iden); // auto evals = heig( ref_ta ); BOOST_CHECK(evecs.trange() == ref_ta.trange()); @@ -476,10 +507,8 @@ BOOST_AUTO_TEST_CASE(sca_heig_generalized) { GlobalFixture::world->gop.fence(); } -BOOST_AUTO_TEST_CASE(sca_chol) { +BOOST_AUTO_TEST_CASE(cholesky) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); @@ -489,7 +518,7 @@ BOOST_AUTO_TEST_CASE(sca_chol) { return this->make_ta_reference(t, range); }); - auto L = cholesky(A); + auto L = lapack::cholesky(A); BOOST_CHECK(L.trange() == A.trange()); @@ -510,10 +539,8 @@ BOOST_AUTO_TEST_CASE(sca_chol) { GlobalFixture::world->gop.fence(); } -BOOST_AUTO_TEST_CASE(sca_chol_linv) { +BOOST_AUTO_TEST_CASE(cholesky_linv) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); @@ -523,8 +550,7 @@ BOOST_AUTO_TEST_CASE(sca_chol_linv) { return this->make_ta_reference(t, range); }); - auto Linv = cholesky_linv(A); - auto Linv_lapack = lapack::cholesky_linv(A); + auto Linv = lapack::cholesky_linv(A); BOOST_CHECK(Linv.trange() == A.trange()); @@ -543,22 +569,18 @@ BOOST_AUTO_TEST_CASE(sca_chol_linv) { } }); + double epsilon = N * N * std::numeric_limits::epsilon(); double norm = A("i,j").norm().get(); - BOOST_CHECK_SMALL(norm, N * N * std::numeric_limits::epsilon()); - // test against LAPACK - decltype(Linv) Linv_error; - Linv_error("i,j") = Linv("i,j") - Linv_lapack("i,j"); - BOOST_CHECK_SMALL(Linv_error("i,j").norm().get(), - N * N * std::numeric_limits::epsilon()); + BOOST_CHECK_SMALL(norm, epsilon); + + TILEDARRAY_SCALAPACK_TEST(cholesky_linv(A), epsilon); GlobalFixture::world->gop.fence(); } -BOOST_AUTO_TEST_CASE(sca_chol_linv_retl) { +BOOST_AUTO_TEST_CASE(cholesky_linv_retl) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); @@ -568,8 +590,7 @@ BOOST_AUTO_TEST_CASE(sca_chol_linv_retl) { return this->make_ta_reference(t, range); }); - auto [L, Linv] = cholesky_linv(A); - auto [L_lapack, Linv_lapack] = lapack::cholesky_linv(A); + auto [L, Linv] = lapack::cholesky_linv(A); BOOST_CHECK(Linv.trange() == A.trange()); BOOST_CHECK(L.trange() == A.trange()); @@ -588,26 +609,18 @@ BOOST_AUTO_TEST_CASE(sca_chol_linv_retl) { } }); + double epsilon = N * N * std::numeric_limits::epsilon(); double norm = tmp("i,j").norm(*GlobalFixture::world).get(); - BOOST_CHECK_SMALL(norm, N * N * std::numeric_limits::epsilon()); - // test against LAPACK - decltype(L) L_error; - L_error("i,j") = L("i,j") - L_lapack("i,j"); - BOOST_CHECK_SMALL(L_error("i,j").norm().get(), - N * N * std::numeric_limits::epsilon()); - decltype(Linv) Linv_error; - Linv_error("i,j") = Linv("i,j") - Linv_lapack("i,j"); - BOOST_CHECK_SMALL(Linv_error("i,j").norm().get(), - N * N * std::numeric_limits::epsilon()); + BOOST_CHECK_SMALL(norm, epsilon); + + TILEDARRAY_SCALAPACK_TEST(cholesky_linv(A), epsilon); GlobalFixture::world->gop.fence(); } -BOOST_AUTO_TEST_CASE(sca_chol_solve) { +BOOST_AUTO_TEST_CASE(cholesky_solve) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); @@ -617,7 +630,7 @@ BOOST_AUTO_TEST_CASE(sca_chol_solve) { return this->make_ta_reference(t, range); }); - auto iden = cholesky_solve(A, A); + auto iden = lapack::cholesky_solve(A, A); BOOST_CHECK(iden.trange() == A.trange()); auto iden_lapack = lapack::cholesky_solve(A, A); @@ -643,10 +656,8 @@ BOOST_AUTO_TEST_CASE(sca_chol_solve) { GlobalFixture::world->gop.fence(); } -BOOST_AUTO_TEST_CASE(sca_chol_lsolve) { +BOOST_AUTO_TEST_CASE(cholesky_lsolve) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); @@ -657,13 +668,12 @@ BOOST_AUTO_TEST_CASE(sca_chol_lsolve) { }); // Should produce X = L**H - auto [L, X] = cholesky_lsolve(TransposeFlag::NoTranspose, A, A); + auto [L, X] = lapack::cholesky_lsolve(TA::TransposeFlag::NoTranspose, A, A); BOOST_CHECK(X.trange() == A.trange()); BOOST_CHECK(L.trange() == A.trange()); // first, test against LAPACK - auto [L_lapack, X_lapack] = - lapack::cholesky_lsolve(TransposeFlag::NoTranspose, A, A); + auto [L_lapack, X_lapack] = lapack::cholesky_lsolve(TA::TransposeFlag::NoTranspose, A, A); decltype(L) L_error; L_error("i,j") = L("i,j") - L_lapack("i,j"); BOOST_CHECK_SMALL(L_error("i,j").norm().get(), @@ -681,10 +691,8 @@ BOOST_AUTO_TEST_CASE(sca_chol_lsolve) { GlobalFixture::world->gop.fence(); } -BOOST_AUTO_TEST_CASE(sca_lu_solve) { +BOOST_AUTO_TEST_CASE(lu_solve) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); @@ -694,7 +702,7 @@ BOOST_AUTO_TEST_CASE(sca_lu_solve) { return this->make_ta_reference(t, range); }); - auto iden = lu_solve(ref_ta, ref_ta); + auto iden = lapack::lu_solve(ref_ta, ref_ta); BOOST_CHECK(iden.trange() == ref_ta.trange()); @@ -709,16 +717,18 @@ BOOST_AUTO_TEST_CASE(sca_lu_solve) { } }); + double epsilon = N * N * std::numeric_limits::epsilon(); double norm = iden("i,j").norm(*GlobalFixture::world).get(); - BOOST_CHECK_SMALL(norm, N * N * std::numeric_limits::epsilon()); + + BOOST_CHECK_SMALL(norm, epsilon); + TILEDARRAY_SCALAPACK_TEST(lu_solve(ref_ta, ref_ta), epsilon); GlobalFixture::world->gop.fence(); + } -BOOST_AUTO_TEST_CASE(sca_lu_inv) { +BOOST_AUTO_TEST_CASE(lu_inv) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); @@ -730,7 +740,7 @@ BOOST_AUTO_TEST_CASE(sca_lu_inv) { TA::TArray iden(*GlobalFixture::world, trange); - auto Ainv = lu_inv(ref_ta); + auto Ainv = lapack::lu_inv(ref_ta); iden("i,j") = Ainv("i,k") * ref_ta("k,j"); BOOST_CHECK(iden.trange() == ref_ta.trange()); @@ -746,17 +756,19 @@ BOOST_AUTO_TEST_CASE(sca_lu_inv) { } }); + double epsilon = N * N * std::numeric_limits::epsilon(); double norm = iden("i,j").norm(*GlobalFixture::world).get(); - BOOST_CHECK_SMALL(norm, N * N * std::numeric_limits::epsilon()); + + BOOST_CHECK_SMALL(norm, epsilon); + TILEDARRAY_SCALAPACK_TEST(lu_inv(ref_ta), epsilon); GlobalFixture::world->gop.fence(); + } #if 1 -BOOST_AUTO_TEST_CASE(sca_svd_values_only) { +BOOST_AUTO_TEST_CASE(svd_values_only) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); @@ -766,7 +778,7 @@ BOOST_AUTO_TEST_CASE(sca_svd_values_only) { return this->make_ta_reference(t, range); }); - auto S = svd(ref_ta, trange, trange); + auto S = lapack::svd(ref_ta, trange, trange); std::vector exact_singular_values = exact_evals; std::sort(exact_singular_values.begin(), exact_singular_values.end(), @@ -779,10 +791,8 @@ BOOST_AUTO_TEST_CASE(sca_svd_values_only) { GlobalFixture::world->gop.fence(); } -BOOST_AUTO_TEST_CASE(sca_svd_leftvectors) { +BOOST_AUTO_TEST_CASE(svd_leftvectors) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); @@ -792,7 +802,7 @@ BOOST_AUTO_TEST_CASE(sca_svd_leftvectors) { return this->make_ta_reference(t, range); }); - auto [S, U] = svd(ref_ta, trange, trange); + auto [S, U] = lapack::svd(ref_ta, trange, trange); std::vector exact_singular_values = exact_evals; std::sort(exact_singular_values.begin(), exact_singular_values.end(), @@ -805,10 +815,8 @@ BOOST_AUTO_TEST_CASE(sca_svd_leftvectors) { GlobalFixture::world->gop.fence(); } -BOOST_AUTO_TEST_CASE(sca_svd_rightvectors) { +BOOST_AUTO_TEST_CASE(svd_rightvectors) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); @@ -818,7 +826,7 @@ BOOST_AUTO_TEST_CASE(sca_svd_rightvectors) { return this->make_ta_reference(t, range); }); - auto [S, VT] = svd(ref_ta, trange, trange); + auto [S, VT] = lapack::svd(ref_ta, trange, trange); std::vector exact_singular_values = exact_evals; std::sort(exact_singular_values.begin(), exact_singular_values.end(), @@ -831,10 +839,8 @@ BOOST_AUTO_TEST_CASE(sca_svd_rightvectors) { GlobalFixture::world->gop.fence(); } -BOOST_AUTO_TEST_CASE(sca_svd_allvectors) { +BOOST_AUTO_TEST_CASE(svd_allvectors) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); @@ -844,7 +850,7 @@ BOOST_AUTO_TEST_CASE(sca_svd_allvectors) { return this->make_ta_reference(t, range); }); - auto [S, U, VT] = svd(ref_ta, trange, trange); + auto [S, U, VT] = lapack::svd(ref_ta, trange, trange); std::vector exact_singular_values = exact_evals; std::sort(exact_singular_values.begin(), exact_singular_values.end(), From cfa3a93226a26b740608d3c1fc1f3d90f6a9f335 Mon Sep 17 00:00:00 2001 From: asadchev Date: Thu, 19 Nov 2020 21:39:18 -0500 Subject: [PATCH 5/7] Set BLA_STATIC=OFF by default --- CMakeLists.txt | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0c0eaa7fa3..a8d770603f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -59,9 +59,9 @@ endif(TILEDARRAY_PRERELEASE_ID) # Set install paths ============================================================ -set(TILEDARRAY_INSTALL_BINDIR "bin" +set(TILEDARRAY_INSTALL_BINDIR "bin" CACHE PATH "TiledArray binary install directory") -set(TILEDARRAY_INSTALL_INCLUDEDIR "include" +set(TILEDARRAY_INSTALL_INCLUDEDIR "include" CACHE PATH "TiledArray INCLUDE install directory") set(TILEDARRAY_INSTALL_LIBDIR "lib" CACHE PATH "TiledArray LIB install directory") @@ -162,16 +162,15 @@ else () endif() redefaultable_option(CMAKE_POSITION_INDEPENDENT_CODE "Default value for POSITION_INDEPENDENT_CODE of targets" ${default_CMAKE_POSITION_INDEPENDENT_CODE}) +set(BLA_STATIC FALSE CACHE BOOL "Whether to use static linkage for BLAS, LAPACK, and related libraries") if(BUILD_SHARED_LIBS) - set(BLA_STATIC FALSE CACHE BOOL "Whether to use static linkage for BLAS, LAPACK, and related libraries") set(CMAKE_MACOSX_RPATH TRUE) else() - set(BLA_STATIC TRUE CACHE BOOL "Whether to use static linkage for BLAS, LAPACK, and related libraries") set(CMAKE_MACOSX_RPATH FALSE) endif() # miscellaneous cmake platform-neutral and platform-specific configuration ============================= -set(CMAKE_FIND_NO_INSTALL_PREFIX TRUE) # do not search in CMAKE_INSTALL_PREFIX +set(CMAKE_FIND_NO_INSTALL_PREFIX TRUE) # do not search in CMAKE_INSTALL_PREFIX set(CMAKE_SKIP_RPATH FALSE) set(CMAKE_SKIP_BUILD_RPATH FALSE) set(CMAKE_SKIP_INSTALL_RPATH FALSE) @@ -394,19 +393,19 @@ export(EXPORT tiledarray configure_package_config_file(cmake/tiledarray-config.cmake.in "${PROJECT_BINARY_DIR}/tiledarray-config.cmake" INSTALL_DESTINATION "${TILEDARRAY_INSTALL_CMAKEDIR}" - PATH_VARS CMAKE_INSTALL_PREFIX TILEDARRAY_INSTALL_BINDIR + PATH_VARS CMAKE_INSTALL_PREFIX TILEDARRAY_INSTALL_BINDIR TILEDARRAY_INSTALL_INCLUDEDIR TILEDARRAY_INSTALL_LIBDIR TILEDARRAY_INSTALL_DOCDIR TILEDARRAY_INSTALL_CMAKEDIR) # Install config, version, and target files install(EXPORT tiledarray FILE "tiledarray-targets.cmake" - DESTINATION "${TILEDARRAY_INSTALL_CMAKEDIR}" + DESTINATION "${TILEDARRAY_INSTALL_CMAKEDIR}" COMPONENT tiledarray) install(FILES "${PROJECT_BINARY_DIR}/tiledarray-config.cmake" "${PROJECT_BINARY_DIR}/tiledarray-config-version.cmake" - DESTINATION "${TILEDARRAY_INSTALL_CMAKEDIR}" + DESTINATION "${TILEDARRAY_INSTALL_CMAKEDIR}" COMPONENT tiledarray) From 217be9aeb53e686ec9b675e3297a3dbce2e6d7b0 Mon Sep 17 00:00:00 2001 From: asadchev Date: Thu, 19 Nov 2020 22:02:34 -0500 Subject: [PATCH 6/7] fixup --- tests/linear_algebra.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/linear_algebra.cpp b/tests/linear_algebra.cpp index 94d8643759..390c068aeb 100644 --- a/tests/linear_algebra.cpp +++ b/tests/linear_algebra.cpp @@ -22,7 +22,9 @@ namespace lapack = TA::lapack; #include "TiledArray/algebra/scalapack/all.h" namespace scalapack = TA::scalapack; #define TILEDARRAY_SCALAPACK_TEST(F,E) \ + GlobalFixture::world->gop.fence(); \ compare("TiledArray::scalapack", lapack::F, scalapack::F, E); \ + GlobalFixture::world->gop.fence(); \ compare("TiledArray", lapack::F, TiledArray::F, E); #else #define TILEDARRAY_SCALAPACK_TEST(...) @@ -577,6 +579,7 @@ BOOST_AUTO_TEST_CASE(cholesky_linv) { TILEDARRAY_SCALAPACK_TEST(cholesky_linv(A), epsilon); GlobalFixture::world->gop.fence(); + } BOOST_AUTO_TEST_CASE(cholesky_linv_retl) { From 8355dc5f8cebee28d4f52d4a28c3c670e65cf4ed Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Fri, 20 Nov 2020 10:27:19 -0500 Subject: [PATCH 7/7] fixed cholesky_linv unit test --- tests/linear_algebra.cpp | 63 +++++++++++++++++++--------------------- 1 file changed, 30 insertions(+), 33 deletions(-) diff --git a/tests/linear_algebra.cpp b/tests/linear_algebra.cpp index 390c068aeb..07f9a58be2 100644 --- a/tests/linear_algebra.cpp +++ b/tests/linear_algebra.cpp @@ -21,17 +21,16 @@ namespace lapack = TA::lapack; #include "TiledArray/algebra/scalapack/all.h" namespace scalapack = TA::scalapack; -#define TILEDARRAY_SCALAPACK_TEST(F,E) \ - GlobalFixture::world->gop.fence(); \ - compare("TiledArray::scalapack", lapack::F, scalapack::F, E); \ - GlobalFixture::world->gop.fence(); \ - compare("TiledArray", lapack::F, TiledArray::F, E); +#define TILEDARRAY_SCALAPACK_TEST(F, E) \ + GlobalFixture::world->gop.fence(); \ + compare("TiledArray::scalapack", lapack::F, scalapack::F, E); \ + GlobalFixture::world->gop.fence(); \ + compare("TiledArray", lapack::F, TiledArray::F, E); #else #define TILEDARRAY_SCALAPACK_TEST(...) #endif struct ReferenceFixture { - size_t N; std::vector htoeplitz_vector; std::vector exact_evals; @@ -46,7 +45,8 @@ struct ReferenceFixture { #endif } - inline double make_ta_reference(TA::Tensor& t, TA::Range const& range) { + inline double make_ta_reference(TA::Tensor& t, + TA::Range const& range) { t = TA::Tensor(range, 0.0); auto lo = range.lobound_data(); auto up = range.upbound_data(); @@ -60,8 +60,7 @@ struct ReferenceFixture { }; ReferenceFixture(int64_t N = 1000) - : N(N), htoeplitz_vector(N), exact_evals(N) - { + : N(N), htoeplitz_vector(N), exact_evals(N) { // Generate an hermitian Circulant vector std::fill(htoeplitz_vector.begin(), htoeplitz_vector.begin(), 0); htoeplitz_vector[0] = 100; @@ -84,40 +83,37 @@ struct ReferenceFixture { } std::sort(exact_evals.begin(), exact_evals.end()); - } - }; struct LinearAlgebraFixture : ReferenceFixture { - #if TILEDARRAY_HAS_SCALAPACK blacspp::Grid grid; scalapack::BlockCyclicMatrix ref_matrix; // XXX: Just double is fine? - LinearAlgebraFixture(int64_t N = 1000, int64_t NB = 128) : - ReferenceFixture(N), - grid(blacspp::Grid::square_grid(MPI_COMM_WORLD)), // XXX: Is this safe? - ref_matrix(*GlobalFixture::world, grid, N, N, NB, NB) - { + LinearAlgebraFixture(int64_t N = 1000, int64_t NB = 128) + : ReferenceFixture(N), + grid(blacspp::Grid::square_grid(MPI_COMM_WORLD)), // XXX: Is this safe? + ref_matrix(*GlobalFixture::world, grid, N, N, NB, NB) { for (size_t i = 0; i < N; ++i) { for (size_t j = 0; j < N; ++j) { if (ref_matrix.dist().i_own(i, j)) { auto [i_local, j_local] = ref_matrix.dist().local_indx(i, j); - ref_matrix.local_mat()(i_local, j_local) = matrix_element_generator(i, j); + ref_matrix.local_mat()(i_local, j_local) = + matrix_element_generator(i, j); } } } } - template - static void compare(const char *context, const A& lapack, const A& result, double e) { + template + static void compare(const char* context, const A& lapack, const A& result, + double e) { BOOST_TEST_CONTEXT(context); auto diff_with_lapack = (lapack("i,j") - result("i,j")).norm().get(); BOOST_CHECK_SMALL(diff_with_lapack, e); } #endif - }; TA::TiledRange gen_trange(size_t N, const std::vector& TA_NBs) { @@ -142,7 +138,6 @@ TA::TiledRange gen_trange(size_t N, const std::vector& TA_NBs) { return TA::TiledRange(ranges.begin(), ranges.end()); }; - BOOST_FIXTURE_TEST_SUITE(linear_algebra_suite, LinearAlgebraFixture) #if TILEDARRAY_HAS_SCALAPACK @@ -164,7 +159,8 @@ BOOST_AUTO_TEST_CASE(bc_to_uniform_dense_tiled_array_test) { }); GlobalFixture::world->gop.fence(); - auto test_ta = scalapack::block_cyclic_to_array>(ref_matrix, trange); + auto test_ta = + scalapack::block_cyclic_to_array>(ref_matrix, trange); GlobalFixture::world->gop.fence(); auto norm_diff = @@ -192,7 +188,8 @@ BOOST_AUTO_TEST_CASE(bc_to_uniform_dense_tiled_array_all_small_test) { }); GlobalFixture::world->gop.fence(); - auto test_ta = scalapack::block_cyclic_to_array>(ref_matrix, trange); + auto test_ta = + scalapack::block_cyclic_to_array>(ref_matrix, trange); GlobalFixture::world->gop.fence(); auto norm_diff = @@ -255,7 +252,8 @@ BOOST_AUTO_TEST_CASE(bc_to_random_dense_tiled_array_test) { }); GlobalFixture::world->gop.fence(); - auto test_ta = scalapack::block_cyclic_to_array>(ref_matrix, trange); + auto test_ta = + scalapack::block_cyclic_to_array>(ref_matrix, trange); GlobalFixture::world->gop.fence(); auto norm_diff = @@ -318,8 +316,8 @@ BOOST_AUTO_TEST_CASE(bc_to_sparse_tiled_array_test) { }); GlobalFixture::world->gop.fence(); - auto test_ta = - scalapack::block_cyclic_to_array>(ref_matrix, trange); + auto test_ta = scalapack::block_cyclic_to_array>( + ref_matrix, trange); GlobalFixture::world->gop.fence(); auto norm_diff = @@ -400,7 +398,7 @@ BOOST_AUTO_TEST_CASE(const_tiled_array_to_bc_test) { GlobalFixture::world->gop.fence(); }; -#endif // TILEDARRAY_HAS_SCALAPACK +#endif // TILEDARRAY_HAS_SCALAPACK BOOST_AUTO_TEST_CASE(heig_same_tiling) { GlobalFixture::world->gop.fence(); @@ -551,6 +549,7 @@ BOOST_AUTO_TEST_CASE(cholesky_linv) { [this](TA::Tensor& t, TA::Range const& range) -> double { return this->make_ta_reference(t, range); }); + decltype(A) Acopy = A.clone(); auto Linv = lapack::cholesky_linv(A); @@ -576,10 +575,9 @@ BOOST_AUTO_TEST_CASE(cholesky_linv) { BOOST_CHECK_SMALL(norm, epsilon); - TILEDARRAY_SCALAPACK_TEST(cholesky_linv(A), epsilon); + TILEDARRAY_SCALAPACK_TEST(cholesky_linv(Acopy), epsilon); GlobalFixture::world->gop.fence(); - } BOOST_AUTO_TEST_CASE(cholesky_linv_retl) { @@ -676,7 +674,8 @@ BOOST_AUTO_TEST_CASE(cholesky_lsolve) { BOOST_CHECK(L.trange() == A.trange()); // first, test against LAPACK - auto [L_lapack, X_lapack] = lapack::cholesky_lsolve(TA::TransposeFlag::NoTranspose, A, A); + auto [L_lapack, X_lapack] = + lapack::cholesky_lsolve(TA::TransposeFlag::NoTranspose, A, A); decltype(L) L_error; L_error("i,j") = L("i,j") - L_lapack("i,j"); BOOST_CHECK_SMALL(L_error("i,j").norm().get(), @@ -727,7 +726,6 @@ BOOST_AUTO_TEST_CASE(lu_solve) { TILEDARRAY_SCALAPACK_TEST(lu_solve(ref_ta, ref_ta), epsilon); GlobalFixture::world->gop.fence(); - } BOOST_AUTO_TEST_CASE(lu_inv) { @@ -766,7 +764,6 @@ BOOST_AUTO_TEST_CASE(lu_inv) { TILEDARRAY_SCALAPACK_TEST(lu_inv(ref_ta), epsilon); GlobalFixture::world->gop.fence(); - } #if 1