diff --git a/CMakeLists.txt b/CMakeLists.txt index 0c0eaa7fa3..a8d770603f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -59,9 +59,9 @@ endif(TILEDARRAY_PRERELEASE_ID) # Set install paths ============================================================ -set(TILEDARRAY_INSTALL_BINDIR "bin" +set(TILEDARRAY_INSTALL_BINDIR "bin" CACHE PATH "TiledArray binary install directory") -set(TILEDARRAY_INSTALL_INCLUDEDIR "include" +set(TILEDARRAY_INSTALL_INCLUDEDIR "include" CACHE PATH "TiledArray INCLUDE install directory") set(TILEDARRAY_INSTALL_LIBDIR "lib" CACHE PATH "TiledArray LIB install directory") @@ -162,16 +162,15 @@ else () endif() redefaultable_option(CMAKE_POSITION_INDEPENDENT_CODE "Default value for POSITION_INDEPENDENT_CODE of targets" ${default_CMAKE_POSITION_INDEPENDENT_CODE}) +set(BLA_STATIC FALSE CACHE BOOL "Whether to use static linkage for BLAS, LAPACK, and related libraries") if(BUILD_SHARED_LIBS) - set(BLA_STATIC FALSE CACHE BOOL "Whether to use static linkage for BLAS, LAPACK, and related libraries") set(CMAKE_MACOSX_RPATH TRUE) else() - set(BLA_STATIC TRUE CACHE BOOL "Whether to use static linkage for BLAS, LAPACK, and related libraries") set(CMAKE_MACOSX_RPATH FALSE) endif() # miscellaneous cmake platform-neutral and platform-specific configuration ============================= -set(CMAKE_FIND_NO_INSTALL_PREFIX TRUE) # do not search in CMAKE_INSTALL_PREFIX +set(CMAKE_FIND_NO_INSTALL_PREFIX TRUE) # do not search in CMAKE_INSTALL_PREFIX set(CMAKE_SKIP_RPATH FALSE) set(CMAKE_SKIP_BUILD_RPATH FALSE) set(CMAKE_SKIP_INSTALL_RPATH FALSE) @@ -394,19 +393,19 @@ export(EXPORT tiledarray configure_package_config_file(cmake/tiledarray-config.cmake.in "${PROJECT_BINARY_DIR}/tiledarray-config.cmake" INSTALL_DESTINATION "${TILEDARRAY_INSTALL_CMAKEDIR}" - PATH_VARS CMAKE_INSTALL_PREFIX TILEDARRAY_INSTALL_BINDIR + PATH_VARS CMAKE_INSTALL_PREFIX TILEDARRAY_INSTALL_BINDIR TILEDARRAY_INSTALL_INCLUDEDIR TILEDARRAY_INSTALL_LIBDIR TILEDARRAY_INSTALL_DOCDIR TILEDARRAY_INSTALL_CMAKEDIR) # Install config, version, and target files install(EXPORT tiledarray FILE "tiledarray-targets.cmake" - DESTINATION "${TILEDARRAY_INSTALL_CMAKEDIR}" + DESTINATION "${TILEDARRAY_INSTALL_CMAKEDIR}" COMPONENT tiledarray) install(FILES "${PROJECT_BINARY_DIR}/tiledarray-config.cmake" "${PROJECT_BINARY_DIR}/tiledarray-config-version.cmake" - DESTINATION "${TILEDARRAY_INSTALL_CMAKEDIR}" + DESTINATION "${TILEDARRAY_INSTALL_CMAKEDIR}" COMPONENT tiledarray) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8a6ca49265..ddab08d0a1 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -60,15 +60,17 @@ TiledArray/zero_tensor.h TiledArray/algebra/conjgrad.h TiledArray/algebra/diis.h TiledArray/algebra/utils.h -TiledArray/algebra/chol.h +TiledArray/algebra/cholesky.h TiledArray/algebra/heig.h TiledArray/algebra/lu.h TiledArray/algebra/svd.h TiledArray/algebra/types.h -TiledArray/algebra/lapack/chol.h +TiledArray/algebra/lapack/cholesky.h TiledArray/algebra/lapack/heig.h TiledArray/algebra/lapack/util.h -TiledArray/algebra/scalapack/chol.h +TiledArray/algebra/lapack/lu.h +TiledArray/algebra/lapack/svd.h +TiledArray/algebra/scalapack/cholesky.h TiledArray/algebra/scalapack/heig.h TiledArray/algebra/scalapack/lu.h TiledArray/algebra/scalapack/svd.h @@ -207,7 +209,7 @@ TiledArray/array_impl.cpp TiledArray/dist_array.cpp TiledArray/util/backtrace.cpp TiledArray/util/bug.cpp -TiledArray/algebra/lapack/lapack.cc +TiledArray/algebra/lapack/lapack.cpp ) # the list of libraries on which TiledArray depends on, will be cached later diff --git a/src/TiledArray/algebra/chol.h b/src/TiledArray/algebra/cholesky.h similarity index 86% rename from src/TiledArray/algebra/chol.h rename to src/TiledArray/algebra/cholesky.h index 222f80806d..b34bf38292 100644 --- a/src/TiledArray/algebra/chol.h +++ b/src/TiledArray/algebra/cholesky.h @@ -26,9 +26,9 @@ #include #if TILEDARRAY_HAS_SCALAPACK -#include +#include #endif -#include +#include namespace TiledArray { @@ -37,9 +37,8 @@ auto cholesky(const Array& A, TiledRange l_trange = TiledRange()) { #if TILEDARRAY_HAS_SCALAPACK if (A.world().size() > 1 && A.range().volume() > 10000000) return scalapack::cholesky(A, l_trange); - else #endif - return lapack::cholesky(A, l_trange); + return lapack::cholesky(A, l_trange); } template @@ -47,9 +46,8 @@ auto cholesky_linv(const Array& A, TiledRange l_trange = TiledRange()) { #if TILEDARRAY_HAS_SCALAPACK if (A.world().size() > 1 && A.range().volume() > 10000000) return scalapack::cholesky_linv(A, l_trange); - else #endif - return lapack::cholesky_linv(A, l_trange); + return lapack::cholesky_linv(A, l_trange); } template @@ -58,9 +56,8 @@ auto cholesky_solve(const Array& A, const Array& B, #if TILEDARRAY_HAS_SCALAPACK if (A.world().size() > 1 && A.range().volume() > 10000000) return scalapack::cholesky_solve(A, B, x_trange); - else #endif - return lapack::cholesky_solve(A, B, x_trange); + return lapack::cholesky_solve(A, B, x_trange); } template @@ -71,9 +68,8 @@ auto cholesky_lsolve(TransposeFlag transpose, const Array& A, const Array& B, if (A.world().size() > 1 && A.range().volume() > 10000000) return scalapack::cholesky_lsolve(transpose, A, B, l_trange, x_trange); - else #endif - return lapack::cholesky_lsolve(transpose, A, B, l_trange, x_trange); + return lapack::cholesky_lsolve(transpose, A, B, l_trange, x_trange); } } // namespace TiledArray diff --git a/src/TiledArray/algebra/heig.h b/src/TiledArray/algebra/heig.h index 1a3816da5f..68e39c9de9 100644 --- a/src/TiledArray/algebra/heig.h +++ b/src/TiledArray/algebra/heig.h @@ -27,16 +27,30 @@ #include #if TILEDARRAY_HAS_SCALAPACK #include -#else -// eigen #endif +#include namespace TiledArray { +template +auto heig(const Array& A, TiledRange evec_trange = TiledRange()) { #if TILEDARRAY_HAS_SCALAPACK -using scalapack::heig; + if (A.world().size() > 1 && A.range().volume() > 10000000) { + return scalapack::heig(A, evec_trange); + } #endif + return lapack::heig(A, evec_trange); +} + +template +auto heig(const ArrayA& A, const ArrayB& B, TiledRange evec_trange = TiledRange()) { +#if TILEDARRAY_HAS_SCALAPACK + if (A.world().size() > 1 && A.range().volume() > 10000000) { + return scalapack::heig(A, B, evec_trange); + } #endif + return lapack::heig(A, B, evec_trange); +} } // namespace TiledArray diff --git a/src/TiledArray/algebra/lapack/chol.h b/src/TiledArray/algebra/lapack/cholesky.h similarity index 99% rename from src/TiledArray/algebra/lapack/chol.h rename to src/TiledArray/algebra/lapack/cholesky.h index 0d3a6fbeed..44376bae9a 100644 --- a/src/TiledArray/algebra/lapack/chol.h +++ b/src/TiledArray/algebra/lapack/cholesky.h @@ -24,9 +24,9 @@ #ifndef TILEDARRAY_ALGEBRA_LAPACK_CHOL_H__INCLUDED #define TILEDARRAY_ALGEBRA_LAPACK_CHOL_H__INCLUDED -#include #include #include +#include #include namespace TiledArray { @@ -45,7 +45,7 @@ auto make_L_eig(const DistArray& A) { World& world = A.world(); auto A_eig = detail::to_eigen(A); if (world.rank() == 0) { - chol_eig(A_eig); + lapack::cholesky(A_eig); } world.gop.broadcast_serializable(A_eig, 0); return A_eig; diff --git a/src/TiledArray/algebra/lapack/heig.h b/src/TiledArray/algebra/lapack/heig.h index 4d779a176e..5fe4270c8f 100644 --- a/src/TiledArray/algebra/lapack/heig.h +++ b/src/TiledArray/algebra/lapack/heig.h @@ -24,15 +24,16 @@ #ifndef TILEDARRAY_ALGEBRA_LAPACK_HEIG_H__INCLUDED #define TILEDARRAY_ALGEBRA_LAPACK_HEIG_H__INCLUDED -#include #include + +#include +#include #include -namespace TiledArray { -namespace lapack { +namespace TiledArray::lapack { /** - * @brief Solve the standard eigenvalue problem with ScaLAPACK + * @brief Solve the standard eigenvalue problem with LAPACK * * A(i,k) X(k,j) = X(i,j) E(j) * @@ -51,80 +52,24 @@ namespace lapack { */ template auto heig(const Array& A, TiledRange evec_trange = TiledRange()) { - using scalar_type = typename Array::scalar_type; - using numeric_type = typename Array::numeric_type; - constexpr const bool is_real = std::is_same_v; - static_assert(std::is_same_v, - "TA::lapack::{cholesky*} are only usable with a DistArray of " - "scalar types"); - + using numeric_type = typename lapack::array_traits::numeric_type; World& world = A.world(); auto A_eig = detail::to_eigen(A); - std::vector evals; -// if (world.rank() == 0) { -// char jobz = 'V'; -// char uplo = 'L'; -// integer n = A_eig.rows(); -// numeric_type* a = A_eig.data(); -// integer lda = n; -// integer info = 0; -// evals.resize(n); -// integer lwork = -1; -// std::vector work(1); -// // run once to query, then to compute -// while (lwork != static_cast(work.size())) { -// if (lwork > 0) { -// work.resize(lwork); -// } -// if constexpr (is_real) { -// #if defined(MADNESS_LINALG_USE_LAPACKE) -// MADNESS_DISPATCH_LAPACK_FN(syev, &jobz, &uplo, &n, a, &lda, -// evals.data(), work.data(), &lwork, &info); -// #else -// MADNESS_DISPATCH_LAPACK_FN(syev, &jobz, &uplo, &n, a, &lda, -// evals.data(), work.data(), &lwork, &info, -// sizeof(char), sizeof(char)); -// #endif -// } else { -// std::vector rwork; -// if (lwork == static_cast(work.size())) rwork.resize(3 * n - 2); -// #if defined(MADNESS_LINALG_USE_LAPACKE) -// MADNESS_DISPATCH_LAPACK_FN(heev, &jobz, &uplo, &n, a, &lda, -// evals.data(), work.data(), &lwork, -// &rwork.data(), &info); -// #else -// MADNESS_DISPATCH_LAPACK_FN( -// heev, &jobz, &uplo, &n, a, &lda, evals.data(), work.data(), &lwork, -// &rwork.data(), &info, sizeof(char), sizeof(char)); -// #endif -// } -// if (lwork == -1) { -// if constexpr (is_real) { -// lwork = static_cast(work[0]); -// } else { -// lwork = static_cast(work[0].real()); -// } -// TA_ASSERT(lwork > 1); -// } -// }; - -// if (info != 0) { -// if (is_real) -// TA_EXCEPTION("LAPACK::syev failed"); -// else -// TA_EXCEPTION("LAPACK::heev failed"); -// } -// } - + std::vector evals; + if (world.rank() == 0) { + lapack::heig(A_eig, evals); + } world.gop.broadcast_serializable(A_eig, 0); world.gop.broadcast_serializable(evals, 0); if (evec_trange.rank() == 0) evec_trange = A.trange(); - return std::tuple(evals, - eigen_to_array(A.world(), evec_trange, A_eig)); + return std::tuple( + evals, + eigen_to_array(world, evec_trange, A_eig) + ); } /** - * @brief Solve the generalized eigenvalue problem with ScaLAPACK + * @brief Solve the generalized eigenvalue problem with LAPACK * * A(i,k) X(k,j) = B(i,k) X(k,j) E(j) * @@ -142,26 +87,30 @@ auto heig(const Array& A, TiledRange evec_trange = TiledRange()) { * @param[in] B Positive-definite matrix * @param[in] evec_trange TiledRange for resulting eigenvectors. If left empty, * will default to array.trange() - * @param[in] NB ScaLAPACK block size. Defaults to 128 * * @returns A tuple containing the eigenvalues and eigenvectors of input array * as std::vector and in TA format, respectively. */ template -auto heig(const ArrayA& A, const ArrayB& B, - TiledRange evec_trange = TiledRange()) { - using scalar_type = typename ArrayA::scalar_type; - using numeric_type = typename ArrayA::numeric_type; - constexpr const bool is_real = std::is_same_v; - static_assert(std::is_same_v, - "TA::lapack::{cholesky*} are only usable with a DistArray of " - "scalar types"); - - abort(); - return std::tuple(std::vector{}, EVecType{}); +auto heig(const ArrayA& A, const ArrayB& B, TiledRange evec_trange = TiledRange()) { + using numeric_type = typename lapack::array_traits::numeric_type; + (void)lapack::array_traits{}; + World& world = A.world(); + auto A_eig = detail::to_eigen(A); + auto B_eig = detail::to_eigen(B); + std::vector evals; + if (world.rank() == 0) { + lapack::heig(A_eig, B_eig, evals); + } + world.gop.broadcast_serializable(A_eig, 0); + world.gop.broadcast_serializable(evals, 0); + if (evec_trange.rank() == 0) evec_trange = A.trange(); + return std::tuple( + evals, + eigen_to_array(A.world(), evec_trange, A_eig) + ); } -} // namespace lapack -} // namespace TiledArray +} // namespace TiledArray::lapack -#endif // TILEDARRAY_ALGEBRA_SCALAPACK_HEIG_H__INCLUDED +#endif // TILEDARRAY_ALGEBRA_LAPACK_HEIG_H__INCLUDED diff --git a/src/TiledArray/algebra/lapack/lapack.cc b/src/TiledArray/algebra/lapack/lapack.cc deleted file mode 100644 index a2d6e91626..0000000000 --- a/src/TiledArray/algebra/lapack/lapack.cc +++ /dev/null @@ -1,206 +0,0 @@ -/* - * This file is a part of TiledArray. - * Copyright (C) 2020 Virginia Tech - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * Eduard Valeyev - * - * chol.h - * Created: 16 October, 2020 - * - */ - -#include -#include -#include -#include -#include - -#define TA_LAPACK_CALL(name, args...) \ - typedef T numeric_type; \ - if constexpr (std::is_same_v) \ - d##name##_(args); \ - else if constexpr (std::is_same_v) \ - s##name##_(args); \ - else if constexpr (std::is_same_v>) \ - z##name##_(args); \ - else if constexpr (std::is_same_v>) \ - c##name##_(args); \ - else \ - std::abort(); - -namespace TiledArray::lapack { - -template -void cholesky(Matrix &A) { - char uplo = 'L'; - integer n = A.rows(); - auto* a = A.data(); - integer lda = n; - integer info = 0; -#if defined(MADNESS_LINALG_USE_LAPACKE) - TA_LAPACK_CALL(potrf, &uplo, &n, a, &lda, &info); -#else - TA_LAPACK_CALL(potrf, &uplo, &n, a, &lda, &info, sizeof(char)); -#endif - if (info != 0) TA_EXCEPTION("LAPACK::potrf failed"); -} - -template -void cholesky_linv(Matrix &A) { - char uplo = 'L'; - char diag = 'N'; - integer n = A.rows(); - auto* l = A.data(); - integer lda = n; - integer info = 0; - TA_LAPACK_CALL(trtri, &uplo, &diag, &n, l, &lda, &info); - if (info != 0) TA_EXCEPTION("LAPACK::trtri failed"); -} - -template -void cholesky_solve(Matrix &A, Matrix &X) { - char uplo = 'L'; - integer n = A.rows(); - integer nrhs = X.cols(); - auto* a = A.data(); - auto* b = X.data(); - integer lda = n; - integer ldb = n; - integer info = 0; - //TA_LAPACK_CALL(posv, &uplo, &n, &nrhs, a, &lda, b, &ldb, &info); - if (info != 0) TA_EXCEPTION("LAPACK::posv failed"); -} - -template -void cholesky_lsolve(TransposeFlag transpose, Matrix &A, Matrix &X) { - char uplo = 'L'; - char trans = transpose == TransposeFlag::Transpose - ? 'T' - : (transpose == TransposeFlag::NoTranspose ? 'N' : 'C'); - char diag = 'N'; - integer n = A.rows(); - integer nrhs = X.cols(); - auto* a = A.data(); - auto* b = X.data(); - integer lda = n; - integer ldb = n; - integer info = 0; - //TA_LAPACK_CALL(trtrs, &uplo, &trans, &diag, &n, &nrhs, a, &lda, b, &ldb, &info); - if (info != 0) TA_EXCEPTION("LAPACK::trtrs failed"); -} - -template -void hereig(Matrix &A, Vector &W) { - char jobz = 'V'; - char uplo = 'L'; - integer n = A.rows(); - T* a = A.data(); - integer lda = A.rows(); - T* w = W.data(); - integer lwork = -1; - integer info; - T lwork_dummy; - TA_LAPACK_CALL(syev, &jobz, &uplo, &n, a, &lda, w, &lwork_dummy, &lwork, &info, sizeof(char), sizeof(char) ); - lwork = integer(lwork_dummy); - Vector work(lwork); - TA_LAPACK_CALL(syev, &jobz, &uplo, &n, a, &lda, w, work.data(), &lwork, &info, sizeof(char), sizeof(char) ); - if (info != 0) TA_EXCEPTION("lapack::hereig failed"); -} - -template -void hereig_gen(Matrix &A, Matrix &B, Vector &W) { - integer itype = 1; - char jobz = 'V'; - char uplo = 'L'; - integer n = A.rows(); - T* a = A.data(); - integer lda = A.rows(); - T* b = B.data(); - integer ldb = B.rows(); - T* w = W.data(); - integer lwork = -1; - integer info; - T lwork_dummy; - TA_LAPACK_CALL(sygv, &itype, &jobz, &uplo, &n, a, &lda, b, &ldb, w, &lwork_dummy, &lwork, &info, sizeof(char), sizeof(char) ); - lwork = integer(lwork_dummy); - Vector work(lwork); - TA_LAPACK_CALL(sygv, &itype, &jobz, &uplo, &n, a, &lda, b, &ldb, w, work.data(), &lwork, &info, sizeof(char), sizeof(char) ); - if (info != 0) TA_EXCEPTION("lapack::hereig_gen failed"); -} - -template -void svd(Matrix &A, Vector &S, Matrix *U, Matrix *VT) { - integer m = A.rows(); - integer n = A.cols(); - T* a = A.data(); - integer lda = A.rows(); - - S.resize(std::max(m,n)); - T* s = S.data(); - - char jobu = 'N'; - T* u = nullptr; - integer ldu = 0; - if (U) { - jobu = 'A'; - U->resize(m,n); - u = U->data(); - ldu = U->rows(); - } - - char jobvt = 'N'; - T* vt = nullptr; - integer ldvt = 0; - if (VT) { - jobvt = 'A'; - VT->resize(n,m); - vt = VT->data(); - ldvt = VT->rows(); - } - - integer lwork = -1; - integer info; - T lwork_dummy; - - TA_LAPACK_CALL( - gesvd, - &jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, &lwork_dummy, &lwork, &info, - sizeof(char), sizeof(char) - ); - lwork = integer(lwork_dummy); - Vector work(lwork); - TA_LAPACK_CALL( - gesvd, - &jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, &lwork_dummy, &lwork, &info, - sizeof(char), sizeof(char) - ); - if (info != 0) TA_EXCEPTION("lapack::hereig_gen failed"); -} - - -#define TA_LAPACK_EXPLICIT(MATRIX,VECTOR) \ - template void cholesky(MATRIX&); \ - template void cholesky_linv(MATRIX&); \ - template void cholesky_solve(MATRIX&,MATRIX&); \ - template void cholesky_lsolve(TransposeFlag,MATRIX&,MATRIX&); \ - template void hereig(MATRIX&,VECTOR&); \ - template void hereig_gen(MATRIX&,MATRIX&,VECTOR&); \ - template void svd(MATRIX&,VECTOR&,MATRIX*,MATRIX*); - - -TA_LAPACK_EXPLICIT(lapack::Matrix, lapack::Vector); - -} diff --git a/src/TiledArray/algebra/lapack/lapack.cpp b/src/TiledArray/algebra/lapack/lapack.cpp new file mode 100644 index 0000000000..640ef695f1 --- /dev/null +++ b/src/TiledArray/algebra/lapack/lapack.cpp @@ -0,0 +1,282 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2020 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Eduard Valeyev + * + * cholesky.h + * Created: 16 October, 2020 + * + */ + +#include +#include +#include +#include +#include + +#define TA_LAPACK_CALL(name, args...) \ + typedef T numeric_type; \ + if constexpr (std::is_same_v) \ + d##name##_(args); \ + else if constexpr (std::is_same_v) \ + s##name##_(args); \ + else if constexpr (std::is_same_v>) \ + z##name##_(args); \ + else if constexpr (std::is_same_v>) \ + c##name##_(args); \ + else \ + std::abort(); + +#define TA_LAPACK_GESV(...) TA_LAPACK_CALL(gesv, __VA_ARGS__) +#define TA_LAPACK_GETRF(...) TA_LAPACK_CALL(getrf, __VA_ARGS__) +#define TA_LAPACK_GETRI(...) TA_LAPACK_CALL(getri, __VA_ARGS__) + +#ifdef MADNESS_LINALG_USE_LAPACKE + +#define TA_LAPACK_POTRF(...) TA_LAPACK_CALL(potrf, __VA_ARGS__) +#define TA_LAPACK_POSV(...) TA_LAPACK_CALL(posv, __VA_ARGS__) +#define TA_LAPACK_GESVD(...) TA_LAPACK_CALL(gesvd, __VA_ARGS__) +#define TA_LAPACK_TRTRI(...) TA_LAPACK_CALL(trtri, __VA_ARGS__) +#define TA_LAPACK_TRTRS(...) TA_LAPACK_CALL(trtrs, __VA_ARGS__) +#define TA_LAPACK_SYEV(...) TA_LAPACK_CALL(syev, __VA_ARGS__) +#define TA_LAPACK_SYGV(...) TA_LAPACK_CALL(sygv, __VA_ARGS__) + +#else + +#ifdef FORTRAN_LINKAGE_LCU +#define dtrtri dtrtri_ +#define dtrtrs dtrtrs_ +#define dposv dposv_ +#endif + +extern "C" { // these arent in madness/clapack_fortran.h +void dtrtri(const char* uplo, const char* diag, + const integer* n, + const real8* a, const integer* lda, + integer *info, + char_len, char_len); +void dtrtrs(const char* uplo, const char* trans, const char* diag, + const integer* n, const integer *nrhs, + const real8* a, const integer* lda, + const real8* b, const integer* ldb, + integer *info, + char_len, char_len, char_len); +void dposv(const char* uplo, + const integer* n, const integer *nrhs, + const real8* a, const integer* lda, + const real8* b, const integer* ldb, + integer *info, + char_len); +} + +#define TA_LAPACK_POTRF(...) TA_LAPACK_CALL(potrf, __VA_ARGS__, sizeof(char)) +#define TA_LAPACK_POSV(...) TA_LAPACK_CALL(posv, __VA_ARGS__, sizeof(char)) +#define TA_LAPACK_GESVD(...) TA_LAPACK_CALL(gesvd, __VA_ARGS__, sizeof(char), sizeof(char)) +#define TA_LAPACK_TRTRI(...) TA_LAPACK_CALL(trtri, __VA_ARGS__, sizeof(char), sizeof(char)) +#define TA_LAPACK_TRTRS(...) TA_LAPACK_CALL(trtrs, __VA_ARGS__, sizeof(char), sizeof(char), sizeof(char)) +#define TA_LAPACK_SYEV(...) TA_LAPACK_CALL(syev, __VA_ARGS__, sizeof(char), sizeof(char)) +#define TA_LAPACK_SYGV(...) TA_LAPACK_CALL(sygv, __VA_ARGS__, sizeof(char), sizeof(char)) + +#endif // MADNESS_LINALG_USE_LAPACKE + +namespace TiledArray::lapack { + +template +void cholesky(Matrix& A) { + char uplo = 'L'; + integer n = A.rows(); + auto* a = A.data(); + integer lda = n; + integer info = 0; + TA_LAPACK_POTRF(&uplo, &n, a, &lda, &info); + if (info != 0) TA_EXCEPTION("lapack::cholesky failed"); +} + +template +void cholesky_linv(Matrix& A) { + char uplo = 'L'; + char diag = 'N'; + integer n = A.rows(); + auto* l = A.data(); + integer lda = n; + integer info = 0; + TA_LAPACK_TRTRI(&uplo, &diag, &n, l, &lda, &info); + if (info != 0) TA_EXCEPTION("lapack::cholesky_linv failed"); +} + +template +void cholesky_solve(Matrix& A, Matrix& X) { + char uplo = 'L'; + integer n = A.rows(); + integer nrhs = X.cols(); + auto* a = A.data(); + auto* b = X.data(); + integer lda = n; + integer ldb = n; + integer info = 0; + TA_LAPACK_POSV(&uplo, &n, &nrhs, a, &lda, b, &ldb, &info); + if (info != 0) TA_EXCEPTION("lapack::cholesky_solve failed"); +} + +template +void cholesky_lsolve(TransposeFlag transpose, Matrix& A, Matrix& X) { + char uplo = 'L'; + char trans = transpose == TransposeFlag::Transpose + ? 'T' + : (transpose == TransposeFlag::NoTranspose ? 'N' : 'C'); + char diag = 'N'; + integer n = A.rows(); + integer nrhs = X.cols(); + auto* a = A.data(); + auto* b = X.data(); + integer lda = n; + integer ldb = n; + integer info = 0; + TA_LAPACK_TRTRS(&uplo, &trans, &diag, &n, &nrhs, a, &lda, b, &ldb, &info); + if (info != 0) TA_EXCEPTION("lapack::cholesky_lsolve failed"); +} + +template +void heig(Matrix& A, std::vector& W) { + char jobz = 'V'; + char uplo = 'L'; + integer n = A.rows(); + T* a = A.data(); + integer lda = A.rows(); + W.resize(n); + T* w = W.data(); + integer lwork = -1; + integer info; + T lwork_dummy; + TA_LAPACK_SYEV(&jobz, &uplo, &n, a, &lda, w, &lwork_dummy, &lwork, &info); + lwork = integer(lwork_dummy); + std::vector work(lwork); + TA_LAPACK_SYEV(&jobz, &uplo, &n, a, &lda, w, work.data(), &lwork, &info); + if (info != 0) TA_EXCEPTION("lapack::heig failed"); +} + +template +void heig(Matrix& A, Matrix& B, std::vector& W) { + integer itype = 1; + char jobz = 'V'; + char uplo = 'L'; + integer n = A.rows(); + T* a = A.data(); + integer lda = A.rows(); + T* b = B.data(); + integer ldb = B.rows(); + W.resize(n); + T* w = W.data(); + integer lwork = -1; + integer info; + T lwork_dummy; + TA_LAPACK_SYGV(&itype, &jobz, &uplo, &n, a, &lda, b, &ldb, w, &lwork_dummy, &lwork, &info); + lwork = integer(lwork_dummy); + std::vector work(lwork); + TA_LAPACK_SYGV(&itype, &jobz, &uplo, &n, a, &lda, b, &ldb, w, work.data(), &lwork, &info); + if (info != 0) TA_EXCEPTION("lapack::heig failed"); +} + +template +void svd(Matrix& A, std::vector& S, Matrix* U, Matrix* VT) { + integer m = A.rows(); + integer n = A.cols(); + T* a = A.data(); + integer lda = A.rows(); + + S.resize(std::min(m, n)); + T* s = S.data(); + + char jobu = 'N'; + T* u = nullptr; + integer ldu = m; + if (U) { + jobu = 'A'; + U->resize(m, n); + u = U->data(); + ldu = U->rows(); + } + + char jobvt = 'N'; + T* vt = nullptr; + integer ldvt = n; + if (VT) { + jobvt = 'A'; + VT->resize(n, m); + vt = VT->data(); + ldvt = VT->rows(); + } + + integer lwork = -1; + integer info; + T lwork_dummy; + + TA_LAPACK_GESVD(&jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, &lwork_dummy, &lwork, &info); + lwork = integer(lwork_dummy); + std::vector work(lwork); + TA_LAPACK_GESVD(&jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work.data(), &lwork, &info); + if (info != 0) TA_EXCEPTION("lapack::svd failed"); +} + +template +void lu_solve(Matrix &A, Matrix &B) { + integer n = A.rows(); + integer nrhs = B.cols(); + T* a = A.data(); + integer lda = A.rows(); + T* b = B.data(); + integer ldb = B.rows(); + std::vector ipiv(n); + integer info; + TA_LAPACK_GESV(&n, &nrhs, a, &lda, ipiv.data(), b, &ldb, &info); + if (info != 0) TA_EXCEPTION("lapack::lu_solve failed"); +} + +template +void lu_inv(Matrix &A) { + integer n = A.rows(); + T* a = A.data(); + integer lda = A.rows(); + integer lwork = -1; + std::vector work(1); + std::vector ipiv(n); + integer info; + TA_LAPACK_GETRF(&n, &n, a, &lda, ipiv.data(), &info); + if (info != 0) TA_EXCEPTION("lapack::lu_inv failed"); + TA_LAPACK_GETRI(&n, a, &lda, ipiv.data(), work.data(), &lwork, &info); + lwork = (integer)work[0]; + work.resize(lwork); + TA_LAPACK_GETRI(&n, a, &lda, ipiv.data(), work.data(), &lwork, &info); + if (info != 0) TA_EXCEPTION("lapack::lu_inv failed"); +} + + +#define TA_LAPACK_EXPLICIT(MATRIX, VECTOR) \ + template void cholesky(MATRIX&); \ + template void cholesky_linv(MATRIX&); \ + template void cholesky_solve(MATRIX&, MATRIX&); \ + template void cholesky_lsolve(TransposeFlag, MATRIX&, MATRIX&); \ + template void heig(MATRIX&, VECTOR&); \ + template void heig(MATRIX&, MATRIX&, VECTOR&); \ + template void svd(MATRIX&, VECTOR&, MATRIX*, MATRIX*); \ + template void lu_solve(MATRIX&, MATRIX&); \ + template void lu_inv(MATRIX&); + +TA_LAPACK_EXPLICIT(lapack::Matrix, std::vector); +//TA_LAPACK_EXPLICIT(lapack::Matrix, std::vector); + +} // namespace TiledArray::lapack diff --git a/src/TiledArray/algebra/lapack/lapack.h b/src/TiledArray/algebra/lapack/lapack.h index 1ccf68719f..78c767bfc5 100644 --- a/src/TiledArray/algebra/lapack/lapack.h +++ b/src/TiledArray/algebra/lapack/lapack.h @@ -30,8 +30,16 @@ namespace TiledArray::lapack { -template -using Vector = Eigen::Matrix; +template +struct array_traits { + using scalar_type = typename A::scalar_type; + using numeric_type = typename A::numeric_type; + static const bool complex = !std::is_same_v; + static_assert( + std::is_same_v, + "TA::lapack is only usable with a DistArray of scalar types" + ); +}; template using Matrix = Eigen::Matrix; @@ -49,10 +57,19 @@ template void cholesky_lsolve(TransposeFlag transpose, Matrix &A, Matrix &X); template -void hereig(Matrix &A, Vector &W); +void heig(Matrix &A, std::vector &W); + +template +void heig(Matrix &A, Matrix &B, std::vector &W); + +template +void svd(Matrix &A, std::vector &S, Matrix *U, Matrix *VT); + +template +void lu_solve(Matrix &A, Matrix &B); template -void svd(Matrix &A, Vector &S, Matrix *U, Matrix *VT); +void lu_inv(Matrix &A); } diff --git a/src/TiledArray/algebra/lapack/lu.h b/src/TiledArray/algebra/lapack/lu.h new file mode 100644 index 0000000000..389af28942 --- /dev/null +++ b/src/TiledArray/algebra/lapack/lu.h @@ -0,0 +1,70 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2020 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * David Williams-Young + * Computational Research Division, Lawrence Berkeley National Laboratory + * + * lu.h + * Created: 19 June, 2020 + * + */ +#ifndef TILEDARRAY_ALGEBRA_LAPACK_LU_H__INCLUDED +#define TILEDARRAY_ALGEBRA_LAPACK_LU_H__INCLUDED + +#include + +#include + +namespace TiledArray::lapack { + +/** + * @brief Solve a linear system via LU factorization + */ +template +auto lu_solve(const ArrayA& A, const ArrayB& B, TiledRange x_trange = TiledRange()) { + (void)lapack::array_traits{}; + (void)lapack::array_traits{}; + auto& world = A.world(); + auto A_eig = detail::to_eigen(A); + auto B_eig = detail::to_eigen(B); + if (world.rank() == 0) { + lapack::lu_solve(A_eig, B_eig); + } + world.gop.broadcast_serializable(B_eig, 0); + if (x_trange.rank() == 0) x_trange = B.trange(); + return eigen_to_array(world, x_trange, B_eig); +} + +/** + * @brief Invert a matrix via LU + */ +template +auto lu_inv(const Array& A, TiledRange ainv_trange = TiledRange()) { + (void)lapack::array_traits{}; + auto& world = A.world(); + auto A_eig = detail::to_eigen(A); + if (world.rank() == 0) { + lapack::lu_inv(A_eig); + } + world.gop.broadcast_serializable(A_eig, 0); + if (ainv_trange.rank() == 0) ainv_trange = A.trange(); + return eigen_to_array(A.world(), ainv_trange, A_eig); +} + +} // namespace TiledArray::lapack + +#endif // TILEDARRAY_ALGEBRA_LAPACK_LU_H__INCLUDED diff --git a/src/TiledArray/algebra/lapack/svd.h b/src/TiledArray/algebra/lapack/svd.h new file mode 100644 index 0000000000..8ea0afd3da --- /dev/null +++ b/src/TiledArray/algebra/lapack/svd.h @@ -0,0 +1,103 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2020 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * David Williams-Young + * Computational Research Division, Lawrence Berkeley National Laboratory + * + * svd.h + * Created: 12 June, 2020 + * + */ +#ifndef TILEDARRAY_ALGEBRA_LAPACK_SVD_H__INCLUDED +#define TILEDARRAY_ALGEBRA_LAPACK_SVD_H__INCLUDED + +#include + +#include + +namespace TiledArray::lapack { + +/** + * @brief Compute the singular value decomposition (SVD) via ScaLAPACK + * + * A(i,j) = S(k) U(i,k) conj(V(j,k)) + * + * Example Usage: + * + * auto S = svd (A, ...) + * auto [S, U] = svd (A, ...) + * auto [S, VT] = svd(A, ...) + * auto [S, U, VT] = svd (A, ...) + * + * @tparam Array Input array type, must be convertible to BlockCyclicMatrix + * + * @param[in] A Input array to be decomposed. Must be rank-2 + * @param[in] u_trange TiledRange for resulting left singular vectors. + * @param[in] vt_trange TiledRange for resulting right singular vectors + * (transposed). + * + * @returns A tuple containing the eigenvalues and eigenvectors of input array + * as std::vector and in TA format, respectively. + */ +template > +auto svd(const Array& A, TiledRange u_trange = TiledRange(), TiledRange vt_trange = TiledRange()) { + + using T = typename Array::numeric_type; + + World& world = A.world(); + auto A_eig = detail::to_eigen(A); + + constexpr bool svd_all_vectors = std::is_same_v; + constexpr bool need_u = std::is_same_v or svd_all_vectors; + constexpr bool need_vt = std::is_same_v or svd_all_vectors; + + std::vector S; + std::unique_ptr< Matrix > U, VT; + + if constexpr (need_u) U = std::make_unique< Matrix >(); + if constexpr (need_vt) VT = std::make_unique< Matrix >(); + + if (world.rank() == 0) { + lapack::svd(A_eig, S, U.get(), VT.get()); + } + + world.gop.broadcast_serializable(S, 0); + if (U) world.gop.broadcast_serializable(*U, 0); + if (VT) world.gop.broadcast_serializable(*VT, 0); + + auto make_array = [&world](auto && ... args) { + return eigen_to_array(world, args...); + }; + + if constexpr (need_u && need_vt) { + return std::tuple(S, make_array(u_trange, *U), make_array(vt_trange, *VT)); + } + if constexpr (need_u && !need_vt) { + return std::tuple(S, make_array(u_trange, *U)); + } + if constexpr (!need_u && need_vt) { + return std::tuple(S, make_array(vt_trange, *VT)); + } + + if constexpr (!need_u && !need_vt) return S; + +} + +} // namespace scalapack::TiledArray + +#endif // TILEDARRAY_ALGEBRA_LAPACK_SVD_H__INCLUDED diff --git a/src/TiledArray/algebra/lu.h b/src/TiledArray/algebra/lu.h index fa1bfca49e..330f49c8f7 100644 --- a/src/TiledArray/algebra/lu.h +++ b/src/TiledArray/algebra/lu.h @@ -27,9 +27,8 @@ #include #if TILEDARRAY_HAS_SCALAPACK #include -#else -#// include eigen #endif +#include namespace TiledArray { diff --git a/src/TiledArray/algebra/scalapack/all.h b/src/TiledArray/algebra/scalapack/all.h index 2599d0280e..fbba6826d0 100644 --- a/src/TiledArray/algebra/scalapack/all.h +++ b/src/TiledArray/algebra/scalapack/all.h @@ -28,7 +28,7 @@ #include #if TILEDARRAY_HAS_SCALAPACK -#include +#include #include #include #include diff --git a/src/TiledArray/algebra/scalapack/chol.h b/src/TiledArray/algebra/scalapack/cholesky.h similarity index 100% rename from src/TiledArray/algebra/scalapack/chol.h rename to src/TiledArray/algebra/scalapack/cholesky.h diff --git a/src/TiledArray/algebra/svd.h b/src/TiledArray/algebra/svd.h index b31c7932da..3c87fbc087 100644 --- a/src/TiledArray/algebra/svd.h +++ b/src/TiledArray/algebra/svd.h @@ -27,16 +27,21 @@ #include #ifdef TILEDARRAY_HAS_SCALAPACK #include -#else -// include eigen #endif // TILEDARRAY_HAS_SCALAPACK +#include namespace TiledArray { -#ifdef TILEDARRAY_HAS_SCALAPACK -using scalapack::svd; -#else +template > +auto svd(const Array& A, TiledRange u_trange = TiledRange(), TiledRange vt_trange = TiledRange()) { +#if TILEDARRAY_HAS_SCALAPACK + if (A.world().size() > 1 && A.range().volume() > 10000000) { + return scalapack::svd(A, u_trange, vt_trange); + } #endif + return lapack::svd(A, u_trange, vt_trange); +} } // namespace TiledArray diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 9004369a5c..75889908de 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -111,16 +111,16 @@ set(ta_test_src_files ta_test.cpp # t_tot_tot_contract_.cpp # tot_tot_tot_contract_.cpp # einsum.cpp - lapack.cpp + linear_algebra.cpp ) if(CUDA_FOUND) list(APPEND ta_test_src_files cutt.cpp expressions_cuda_um.cpp tensor_um.cpp) endif() -if (TARGET TiledArray_SCALAPACK) - list(APPEND ta_test_src_files scalapack.cpp) -endif(TARGET TiledArray_SCALAPACK) +# if (TARGET TiledArray_SCALAPACK) +# list(APPEND ta_test_src_files scalapack.cpp) +# endif(TARGET TiledArray_SCALAPACK) # if tiledarray library was compiled without exceptions, use TA header-only (see below) if (NOT TA_DEFAULT_ERROR EQUAL 1 AND NOT CUDA_FOUND AND FALSE) diff --git a/tests/lapack.cpp b/tests/lapack.cpp deleted file mode 100644 index 287acab547..0000000000 --- a/tests/lapack.cpp +++ /dev/null @@ -1,83 +0,0 @@ -#include -#include -#include "TiledArray/config.h" -#include "range_fixture.h" -#include "unit_test_config.h" - -#include "TiledArray/algebra/lapack/chol.h" -#include "TiledArray/algebra/lapack/heig.h" - -using namespace TiledArray::lapack; - -struct LAPACKFixture { - int64_t N; - std::vector htoeplitz_vector; - std::vector exact_evals; - - inline double matrix_element_generator(int64_t i, int64_t j) { - // Generates a Circulant matrix: good condition number - return htoeplitz_vector[std::abs(i - j)]; - } - - inline double make_ta_reference(TA::Tensor& t, - TA::Range const& range) { - t = TA::Tensor(range, 0.0); - auto lo = range.lobound_data(); - auto up = range.upbound_data(); - for (auto m = lo[0]; m < up[0]; ++m) { - for (auto n = lo[1]; n < up[1]; ++n) { - t(m, n) = matrix_element_generator(m, n); - } - } - - return t.norm(); - }; - - LAPACKFixture(int64_t N) : N(N), htoeplitz_vector(N), exact_evals(N) { - // Generate an hermitian Circulant vector - std::fill(htoeplitz_vector.begin(), htoeplitz_vector.begin(), 0); - htoeplitz_vector[0] = 100; - std::default_random_engine gen(0); - std::uniform_real_distribution<> dist(0., 1.); - for (int64_t i = 1; i <= (N / 2); ++i) { - double val = dist(gen); - htoeplitz_vector[i] = val; - htoeplitz_vector[N - i] = val; - } - - // Compute exact eigenvalues - const double ff = 2. * M_PI / N; - for (int64_t j = 0; j < N; ++j) { - double val = htoeplitz_vector[0]; - for (int64_t k = 1; k < N; ++k) - val += htoeplitz_vector[N - k] * std::cos(ff * j * k); - exact_evals[j] = val; - } - - std::sort(exact_evals.begin(), exact_evals.end()); - } - - LAPACKFixture() : LAPACKFixture(1000) {} -}; - -BOOST_FIXTURE_TEST_SUITE(lapack_suite, LAPACKFixture) - -BOOST_AUTO_TEST_CASE(chol) { - auto range = TA::Range{N, N}; - - TA::Tensor A; - this->make_ta_reference(A, range); - - auto L = cholesky(A); - - decltype(A) A_minus_LLt; - A_minus_LLt = A.clone(); - A_minus_LLt.gemm(L, L, -1, - math::GemmHelper{madness::cblas::NoTrans, - madness::cblas::ConjTrans, 2, 2, 2}); - - BOOST_CHECK_SMALL(A_minus_LLt.norm(), - N * N * std::numeric_limits::epsilon()); -} - -BOOST_AUTO_TEST_SUITE_END() diff --git a/tests/scalapack.cpp b/tests/linear_algebra.cpp similarity index 82% rename from tests/scalapack.cpp rename to tests/linear_algebra.cpp index f728938a25..07f9a58be2 100644 --- a/tests/scalapack.cpp +++ b/tests/linear_algebra.cpp @@ -1,19 +1,37 @@ #include #include #include "TiledArray/config.h" -#include "range_fixture.h" +//#include "range_fixture.h" #include "unit_test_config.h" -#include "TiledArray/algebra/lapack/chol.h" +#include "TiledArray/algebra/lapack/cholesky.h" #include "TiledArray/algebra/lapack/heig.h" -#include "TiledArray/algebra/scalapack/all.h" +#include "TiledArray/algebra/lapack/lu.h" +#include "TiledArray/algebra/lapack/svd.h" -using namespace TiledArray::scalapack; +#include "TiledArray/algebra/cholesky.h" +#include "TiledArray/algebra/heig.h" +#include "TiledArray/algebra/lu.h" +#include "TiledArray/algebra/svd.h" -struct ScaLAPACKFixture { - blacspp::Grid grid; - BlockCyclicMatrix ref_matrix; // XXX: Just double is fine? +namespace TA = TiledArray; +namespace lapack = TA::lapack; + +#if TILEDARRAY_HAS_SCALAPACK +#include "TiledArray/algebra/scalapack/all.h" +namespace scalapack = TA::scalapack; +#define TILEDARRAY_SCALAPACK_TEST(F, E) \ + GlobalFixture::world->gop.fence(); \ + compare("TiledArray::scalapack", lapack::F, scalapack::F, E); \ + GlobalFixture::world->gop.fence(); \ + compare("TiledArray", lapack::F, TiledArray::F, E); +#else +#define TILEDARRAY_SCALAPACK_TEST(...) +#endif + +struct ReferenceFixture { + size_t N; std::vector htoeplitz_vector; std::vector exact_evals; @@ -41,21 +59,8 @@ struct ScaLAPACKFixture { return t.norm(); }; - inline void construct_scalapack(BlockCyclicMatrix& A) { - auto [M, N] = A.dims(); - for (size_t i = 0; i < M; ++i) - for (size_t j = 0; j < N; ++j) - if (A.dist().i_own(i, j)) { - auto [i_local, j_local] = A.dist().local_indx(i, j); - A.local_mat()(i_local, j_local) = matrix_element_generator(i, j); - } - } - - ScaLAPACKFixture(int64_t N, int64_t NB) - : grid(blacspp::Grid::square_grid(MPI_COMM_WORLD)), // XXX: Is this safe? - ref_matrix(*GlobalFixture::world, grid, N, N, NB, NB), - htoeplitz_vector(N), - exact_evals(N) { + ReferenceFixture(int64_t N = 1000) + : N(N), htoeplitz_vector(N), exact_evals(N) { // Generate an hermitian Circulant vector std::fill(htoeplitz_vector.begin(), htoeplitz_vector.begin(), 0); htoeplitz_vector[0] = 100; @@ -78,12 +83,37 @@ struct ScaLAPACKFixture { } std::sort(exact_evals.begin(), exact_evals.end()); - - // Fill reference matrix - construct_scalapack(ref_matrix); } +}; + +struct LinearAlgebraFixture : ReferenceFixture { +#if TILEDARRAY_HAS_SCALAPACK - ScaLAPACKFixture() : ScaLAPACKFixture(1000, 128) {} + blacspp::Grid grid; + scalapack::BlockCyclicMatrix ref_matrix; // XXX: Just double is fine? + + LinearAlgebraFixture(int64_t N = 1000, int64_t NB = 128) + : ReferenceFixture(N), + grid(blacspp::Grid::square_grid(MPI_COMM_WORLD)), // XXX: Is this safe? + ref_matrix(*GlobalFixture::world, grid, N, N, NB, NB) { + for (size_t i = 0; i < N; ++i) { + for (size_t j = 0; j < N; ++j) { + if (ref_matrix.dist().i_own(i, j)) { + auto [i_local, j_local] = ref_matrix.dist().local_indx(i, j); + ref_matrix.local_mat()(i_local, j_local) = + matrix_element_generator(i, j); + } + } + } + } + template + static void compare(const char* context, const A& lapack, const A& result, + double e) { + BOOST_TEST_CONTEXT(context); + auto diff_with_lapack = (lapack("i,j") - result("i,j")).norm().get(); + BOOST_CHECK_SMALL(diff_with_lapack, e); + } +#endif }; TA::TiledRange gen_trange(size_t N, const std::vector& TA_NBs) { @@ -108,7 +138,9 @@ TA::TiledRange gen_trange(size_t N, const std::vector& TA_NBs) { return TA::TiledRange(ranges.begin(), ranges.end()); }; -BOOST_FIXTURE_TEST_SUITE(scalapack_suite, ScaLAPACKFixture) +BOOST_FIXTURE_TEST_SUITE(linear_algebra_suite, LinearAlgebraFixture) + +#if TILEDARRAY_HAS_SCALAPACK BOOST_AUTO_TEST_CASE(bc_to_uniform_dense_tiled_array_test) { GlobalFixture::world->gop.fence(); @@ -127,7 +159,8 @@ BOOST_AUTO_TEST_CASE(bc_to_uniform_dense_tiled_array_test) { }); GlobalFixture::world->gop.fence(); - auto test_ta = block_cyclic_to_array>(ref_matrix, trange); + auto test_ta = + scalapack::block_cyclic_to_array>(ref_matrix, trange); GlobalFixture::world->gop.fence(); auto norm_diff = @@ -155,7 +188,8 @@ BOOST_AUTO_TEST_CASE(bc_to_uniform_dense_tiled_array_all_small_test) { }); GlobalFixture::world->gop.fence(); - auto test_ta = block_cyclic_to_array>(ref_matrix, trange); + auto test_ta = + scalapack::block_cyclic_to_array>(ref_matrix, trange); GlobalFixture::world->gop.fence(); auto norm_diff = @@ -183,7 +217,7 @@ BOOST_AUTO_TEST_CASE(uniform_dense_tiled_array_to_bc_test) { }); GlobalFixture::world->gop.fence(); - auto test_matrix = array_to_block_cyclic(ref_ta, grid, NB, NB); + auto test_matrix = scalapack::array_to_block_cyclic(ref_ta, grid, NB, NB); GlobalFixture::world->gop.fence(); double local_norm_diff = @@ -218,7 +252,8 @@ BOOST_AUTO_TEST_CASE(bc_to_random_dense_tiled_array_test) { }); GlobalFixture::world->gop.fence(); - auto test_ta = block_cyclic_to_array>(ref_matrix, trange); + auto test_ta = + scalapack::block_cyclic_to_array>(ref_matrix, trange); GlobalFixture::world->gop.fence(); auto norm_diff = @@ -246,7 +281,7 @@ BOOST_AUTO_TEST_CASE(random_dense_tiled_array_to_bc_test) { }); GlobalFixture::world->gop.fence(); - auto test_matrix = array_to_block_cyclic(ref_ta, grid, NB, NB); + auto test_matrix = scalapack::array_to_block_cyclic(ref_ta, grid, NB, NB); GlobalFixture::world->gop.fence(); double local_norm_diff = @@ -281,8 +316,8 @@ BOOST_AUTO_TEST_CASE(bc_to_sparse_tiled_array_test) { }); GlobalFixture::world->gop.fence(); - auto test_ta = - block_cyclic_to_array>(ref_matrix, trange); + auto test_ta = scalapack::block_cyclic_to_array>( + ref_matrix, trange); GlobalFixture::world->gop.fence(); auto norm_diff = @@ -310,7 +345,7 @@ BOOST_AUTO_TEST_CASE(sparse_tiled_array_to_bc_test) { }); GlobalFixture::world->gop.fence(); - auto test_matrix = array_to_block_cyclic(ref_ta, grid, NB, NB); + auto test_matrix = scalapack::array_to_block_cyclic(ref_ta, grid, NB, NB); GlobalFixture::world->gop.fence(); double local_norm_diff = @@ -345,7 +380,7 @@ BOOST_AUTO_TEST_CASE(const_tiled_array_to_bc_test) { }); GlobalFixture::world->gop.fence(); - auto test_matrix = array_to_block_cyclic(ref_ta, grid, NB, NB); + auto test_matrix = scalapack::array_to_block_cyclic(ref_ta, grid, NB, NB); GlobalFixture::world->gop.fence(); double local_norm_diff = @@ -363,10 +398,10 @@ BOOST_AUTO_TEST_CASE(const_tiled_array_to_bc_test) { GlobalFixture::world->gop.fence(); }; -BOOST_AUTO_TEST_CASE(sca_heig_same_tiling) { +#endif // TILEDARRAY_HAS_SCALAPACK + +BOOST_AUTO_TEST_CASE(heig_same_tiling) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); @@ -376,7 +411,7 @@ BOOST_AUTO_TEST_CASE(sca_heig_same_tiling) { return this->make_ta_reference(t, range); }); - auto [evals, evecs] = heig(ref_ta); + auto [evals, evecs] = lapack::heig(ref_ta); auto [evals_lapack, evecs_lapack] = lapack::heig(ref_ta); // auto evals = heig( ref_ta ); @@ -399,10 +434,8 @@ BOOST_AUTO_TEST_CASE(sca_heig_same_tiling) { GlobalFixture::world->gop.fence(); } -BOOST_AUTO_TEST_CASE(sca_heig_diff_tiling) { +BOOST_AUTO_TEST_CASE(heig_diff_tiling) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); auto ref_ta = TA::make_array>( @@ -412,7 +445,7 @@ BOOST_AUTO_TEST_CASE(sca_heig_diff_tiling) { }); auto new_trange = gen_trange(N, {64ul}); - auto [evals, evecs] = heig(ref_ta, new_trange, 128); + auto [evals, evecs] = lapack::heig(ref_ta, new_trange); auto [evals_lapack, evecs_lapack] = lapack::heig(ref_ta, new_trange); BOOST_CHECK(evecs.trange() == new_trange); @@ -434,10 +467,8 @@ BOOST_AUTO_TEST_CASE(sca_heig_diff_tiling) { GlobalFixture::world->gop.fence(); } -BOOST_AUTO_TEST_CASE(sca_heig_generalized) { +BOOST_AUTO_TEST_CASE(heig_generalized) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); @@ -461,7 +492,7 @@ BOOST_AUTO_TEST_CASE(sca_heig_generalized) { }); GlobalFixture::world->gop.fence(); - auto [evals, evecs] = heig(ref_ta, dense_iden); + auto [evals, evecs] = lapack::heig(ref_ta, dense_iden); // auto evals = heig( ref_ta ); BOOST_CHECK(evecs.trange() == ref_ta.trange()); @@ -476,10 +507,8 @@ BOOST_AUTO_TEST_CASE(sca_heig_generalized) { GlobalFixture::world->gop.fence(); } -BOOST_AUTO_TEST_CASE(sca_chol) { +BOOST_AUTO_TEST_CASE(cholesky) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); @@ -489,7 +518,7 @@ BOOST_AUTO_TEST_CASE(sca_chol) { return this->make_ta_reference(t, range); }); - auto L = cholesky(A); + auto L = lapack::cholesky(A); BOOST_CHECK(L.trange() == A.trange()); @@ -510,10 +539,8 @@ BOOST_AUTO_TEST_CASE(sca_chol) { GlobalFixture::world->gop.fence(); } -BOOST_AUTO_TEST_CASE(sca_chol_linv) { +BOOST_AUTO_TEST_CASE(cholesky_linv) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); @@ -522,9 +549,9 @@ BOOST_AUTO_TEST_CASE(sca_chol_linv) { [this](TA::Tensor& t, TA::Range const& range) -> double { return this->make_ta_reference(t, range); }); + decltype(A) Acopy = A.clone(); - auto Linv = cholesky_linv(A); - auto Linv_lapack = lapack::cholesky_linv(A); + auto Linv = lapack::cholesky_linv(A); BOOST_CHECK(Linv.trange() == A.trange()); @@ -543,22 +570,18 @@ BOOST_AUTO_TEST_CASE(sca_chol_linv) { } }); + double epsilon = N * N * std::numeric_limits::epsilon(); double norm = A("i,j").norm().get(); - BOOST_CHECK_SMALL(norm, N * N * std::numeric_limits::epsilon()); - // test against LAPACK - decltype(Linv) Linv_error; - Linv_error("i,j") = Linv("i,j") - Linv_lapack("i,j"); - BOOST_CHECK_SMALL(Linv_error("i,j").norm().get(), - N * N * std::numeric_limits::epsilon()); + BOOST_CHECK_SMALL(norm, epsilon); + + TILEDARRAY_SCALAPACK_TEST(cholesky_linv(Acopy), epsilon); GlobalFixture::world->gop.fence(); } -BOOST_AUTO_TEST_CASE(sca_chol_linv_retl) { +BOOST_AUTO_TEST_CASE(cholesky_linv_retl) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); @@ -568,8 +591,7 @@ BOOST_AUTO_TEST_CASE(sca_chol_linv_retl) { return this->make_ta_reference(t, range); }); - auto [L, Linv] = cholesky_linv(A); - auto [L_lapack, Linv_lapack] = lapack::cholesky_linv(A); + auto [L, Linv] = lapack::cholesky_linv(A); BOOST_CHECK(Linv.trange() == A.trange()); BOOST_CHECK(L.trange() == A.trange()); @@ -588,26 +610,18 @@ BOOST_AUTO_TEST_CASE(sca_chol_linv_retl) { } }); + double epsilon = N * N * std::numeric_limits::epsilon(); double norm = tmp("i,j").norm(*GlobalFixture::world).get(); - BOOST_CHECK_SMALL(norm, N * N * std::numeric_limits::epsilon()); - // test against LAPACK - decltype(L) L_error; - L_error("i,j") = L("i,j") - L_lapack("i,j"); - BOOST_CHECK_SMALL(L_error("i,j").norm().get(), - N * N * std::numeric_limits::epsilon()); - decltype(Linv) Linv_error; - Linv_error("i,j") = Linv("i,j") - Linv_lapack("i,j"); - BOOST_CHECK_SMALL(Linv_error("i,j").norm().get(), - N * N * std::numeric_limits::epsilon()); + BOOST_CHECK_SMALL(norm, epsilon); + + TILEDARRAY_SCALAPACK_TEST(cholesky_linv(A), epsilon); GlobalFixture::world->gop.fence(); } -BOOST_AUTO_TEST_CASE(sca_chol_solve) { +BOOST_AUTO_TEST_CASE(cholesky_solve) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); @@ -617,7 +631,7 @@ BOOST_AUTO_TEST_CASE(sca_chol_solve) { return this->make_ta_reference(t, range); }); - auto iden = cholesky_solve(A, A); + auto iden = lapack::cholesky_solve(A, A); BOOST_CHECK(iden.trange() == A.trange()); auto iden_lapack = lapack::cholesky_solve(A, A); @@ -643,10 +657,8 @@ BOOST_AUTO_TEST_CASE(sca_chol_solve) { GlobalFixture::world->gop.fence(); } -BOOST_AUTO_TEST_CASE(sca_chol_lsolve) { +BOOST_AUTO_TEST_CASE(cholesky_lsolve) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); @@ -657,13 +669,13 @@ BOOST_AUTO_TEST_CASE(sca_chol_lsolve) { }); // Should produce X = L**H - auto [L, X] = cholesky_lsolve(TransposeFlag::NoTranspose, A, A); + auto [L, X] = lapack::cholesky_lsolve(TA::TransposeFlag::NoTranspose, A, A); BOOST_CHECK(X.trange() == A.trange()); BOOST_CHECK(L.trange() == A.trange()); // first, test against LAPACK auto [L_lapack, X_lapack] = - lapack::cholesky_lsolve(TransposeFlag::NoTranspose, A, A); + lapack::cholesky_lsolve(TA::TransposeFlag::NoTranspose, A, A); decltype(L) L_error; L_error("i,j") = L("i,j") - L_lapack("i,j"); BOOST_CHECK_SMALL(L_error("i,j").norm().get(), @@ -681,10 +693,8 @@ BOOST_AUTO_TEST_CASE(sca_chol_lsolve) { GlobalFixture::world->gop.fence(); } -BOOST_AUTO_TEST_CASE(sca_lu_solve) { +BOOST_AUTO_TEST_CASE(lu_solve) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); @@ -694,7 +704,7 @@ BOOST_AUTO_TEST_CASE(sca_lu_solve) { return this->make_ta_reference(t, range); }); - auto iden = lu_solve(ref_ta, ref_ta); + auto iden = lapack::lu_solve(ref_ta, ref_ta); BOOST_CHECK(iden.trange() == ref_ta.trange()); @@ -709,16 +719,17 @@ BOOST_AUTO_TEST_CASE(sca_lu_solve) { } }); + double epsilon = N * N * std::numeric_limits::epsilon(); double norm = iden("i,j").norm(*GlobalFixture::world).get(); - BOOST_CHECK_SMALL(norm, N * N * std::numeric_limits::epsilon()); + + BOOST_CHECK_SMALL(norm, epsilon); + TILEDARRAY_SCALAPACK_TEST(lu_solve(ref_ta, ref_ta), epsilon); GlobalFixture::world->gop.fence(); } -BOOST_AUTO_TEST_CASE(sca_lu_inv) { +BOOST_AUTO_TEST_CASE(lu_inv) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); @@ -730,7 +741,7 @@ BOOST_AUTO_TEST_CASE(sca_lu_inv) { TA::TArray iden(*GlobalFixture::world, trange); - auto Ainv = lu_inv(ref_ta); + auto Ainv = lapack::lu_inv(ref_ta); iden("i,j") = Ainv("i,k") * ref_ta("k,j"); BOOST_CHECK(iden.trange() == ref_ta.trange()); @@ -746,17 +757,18 @@ BOOST_AUTO_TEST_CASE(sca_lu_inv) { } }); + double epsilon = N * N * std::numeric_limits::epsilon(); double norm = iden("i,j").norm(*GlobalFixture::world).get(); - BOOST_CHECK_SMALL(norm, N * N * std::numeric_limits::epsilon()); + + BOOST_CHECK_SMALL(norm, epsilon); + TILEDARRAY_SCALAPACK_TEST(lu_inv(ref_ta), epsilon); GlobalFixture::world->gop.fence(); } #if 1 -BOOST_AUTO_TEST_CASE(sca_svd_values_only) { +BOOST_AUTO_TEST_CASE(svd_values_only) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); @@ -766,7 +778,7 @@ BOOST_AUTO_TEST_CASE(sca_svd_values_only) { return this->make_ta_reference(t, range); }); - auto S = svd(ref_ta, trange, trange); + auto S = lapack::svd(ref_ta, trange, trange); std::vector exact_singular_values = exact_evals; std::sort(exact_singular_values.begin(), exact_singular_values.end(), @@ -779,10 +791,8 @@ BOOST_AUTO_TEST_CASE(sca_svd_values_only) { GlobalFixture::world->gop.fence(); } -BOOST_AUTO_TEST_CASE(sca_svd_leftvectors) { +BOOST_AUTO_TEST_CASE(svd_leftvectors) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); @@ -792,7 +802,7 @@ BOOST_AUTO_TEST_CASE(sca_svd_leftvectors) { return this->make_ta_reference(t, range); }); - auto [S, U] = svd(ref_ta, trange, trange); + auto [S, U] = lapack::svd(ref_ta, trange, trange); std::vector exact_singular_values = exact_evals; std::sort(exact_singular_values.begin(), exact_singular_values.end(), @@ -805,10 +815,8 @@ BOOST_AUTO_TEST_CASE(sca_svd_leftvectors) { GlobalFixture::world->gop.fence(); } -BOOST_AUTO_TEST_CASE(sca_svd_rightvectors) { +BOOST_AUTO_TEST_CASE(svd_rightvectors) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); @@ -818,7 +826,7 @@ BOOST_AUTO_TEST_CASE(sca_svd_rightvectors) { return this->make_ta_reference(t, range); }); - auto [S, VT] = svd(ref_ta, trange, trange); + auto [S, VT] = lapack::svd(ref_ta, trange, trange); std::vector exact_singular_values = exact_evals; std::sort(exact_singular_values.begin(), exact_singular_values.end(), @@ -831,10 +839,8 @@ BOOST_AUTO_TEST_CASE(sca_svd_rightvectors) { GlobalFixture::world->gop.fence(); } -BOOST_AUTO_TEST_CASE(sca_svd_allvectors) { +BOOST_AUTO_TEST_CASE(svd_allvectors) { GlobalFixture::world->gop.fence(); - auto [M, N] = ref_matrix.dims(); - BOOST_REQUIRE_EQUAL(M, N); auto trange = gen_trange(N, {128ul}); @@ -844,7 +850,7 @@ BOOST_AUTO_TEST_CASE(sca_svd_allvectors) { return this->make_ta_reference(t, range); }); - auto [S, U, VT] = svd(ref_ta, trange, trange); + auto [S, U, VT] = lapack::svd(ref_ta, trange, trange); std::vector exact_singular_values = exact_evals; std::sort(exact_singular_values.begin(), exact_singular_values.end(),