diff --git a/external/versions.cmake b/external/versions.cmake index 8edd1ac577..cf0b91c61b 100644 --- a/external/versions.cmake +++ b/external/versions.cmake @@ -36,7 +36,7 @@ set(TA_TRACKED_UMPIRE_PREVIOUS_TAG v6.0.0) #set(TA_TRACKED_BLACSPP_TAG 20cfd414c5b719be1c958f4a2d57abef06df83b6 ) #set(TA_TRACKED_BLACSPP_PREVIOUS_TAG da4ada57e578cf944325a7152164306742551596 ) -set(TA_TRACKED_SCALAPACKPP_TAG 043f85d7f31ec6009740ab466bcb5008af7b0814 ) +set(TA_TRACKED_SCALAPACKPP_TAG bf17a7246af38d34523bd0099b01d9961d06d311 ) set(TA_TRACKED_SCALAPACKPP_PREVIOUS_TAG 043f85d7f31ec6009740ab466bcb5008af7b0814 ) set(TA_TRACKED_RANGEV3_TAG 2e0591c57fce2aca6073ad6e4fdc50d841827864) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 98cf62b9c2..933b260694 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -64,12 +64,14 @@ TiledArray/math/linalg/util.h TiledArray/math/linalg/cholesky.h TiledArray/math/linalg/heig.h TiledArray/math/linalg/lu.h +TiledArray/math/linalg/qr.h TiledArray/math/linalg/svd.h TiledArray/math/linalg/scalapack/util.h TiledArray/math/linalg/scalapack/block_cyclic.h TiledArray/math/linalg/scalapack/cholesky.h TiledArray/math/linalg/scalapack/heig.h TiledArray/math/linalg/scalapack/lu.h +TiledArray/math/linalg/scalapack/qr.h TiledArray/math/linalg/scalapack/svd.h TiledArray/conversions/btas.h TiledArray/conversions/clone.h diff --git a/src/TiledArray/math/linalg.h b/src/TiledArray/math/linalg.h index c6dc015e7a..62f90eb5ec 100644 --- a/src/TiledArray/math/linalg.h +++ b/src/TiledArray/math/linalg.h @@ -31,6 +31,7 @@ #include #include #include +#include #include #endif // TILEDARRAY_MATH_LINALG_LINALG_H__INCLUDED diff --git a/src/TiledArray/math/linalg/non-distributed/qr.h b/src/TiledArray/math/linalg/non-distributed/qr.h new file mode 100644 index 0000000000..e43cec632d --- /dev/null +++ b/src/TiledArray/math/linalg/non-distributed/qr.h @@ -0,0 +1,42 @@ +#ifndef TILEDARRAY_MATH_LINALG_NON_DISTRIBUTED_QR_H__INCLUDED +#define TILEDARRAY_MATH_LINALG_NON_DISTRIBUTED_QR_H__INCLUDED + +#include + +#include +#include +#include + +namespace TiledArray::math::linalg::non_distributed { + +template +auto householder_qr( const ArrayV& V, TiledRange q_trange = TiledRange(), + TiledRange r_trange = TiledRange() ) { + + (void)detail::array_traits{}; + auto& world = V.world(); + auto V_eig = detail::make_matrix(V); + decltype(V_eig) R_eig; + if( !world.rank() ) { + linalg::rank_local::householder_qr( V_eig, R_eig ); + } + world.gop.broadcast_serializable( V_eig, 0 ); + if(q_trange.rank() == 0) q_trange = V.trange(); + auto Q = eigen_to_array( world, q_trange, V_eig ); + if constexpr (not QOnly) { + world.gop.broadcast_serializable( R_eig, 0 ); + if (r_trange.rank() == 0) { + // Generate a TRange based on column tiling of V + auto col_tiling = V.trange().dim(1); + r_trange = TiledRange( {col_tiling, col_tiling} ); + } + auto R = eigen_to_array( world, r_trange, R_eig ); + return std::make_tuple( Q, R ); + } else { + return Q; + } +} + +} // namespace TiledArray::math::linalg::non_distributed + +#endif diff --git a/src/TiledArray/math/linalg/qr.h b/src/TiledArray/math/linalg/qr.h new file mode 100644 index 0000000000..1683e6792c --- /dev/null +++ b/src/TiledArray/math/linalg/qr.h @@ -0,0 +1,53 @@ +#ifndef TILEDARRAY_MATH_LINALG_QR_H__INCLUDED +#define TILEDARRAY_MATH_LINALG_QR_H__INCLUDED + +#include +#if TILEDARRAY_HAS_SCALAPACK +#include +#endif +#include +#include + +#include + +namespace TiledArray::math::linalg { + +template +auto householder_qr( const ArrayV& V, TiledRange q_trange = TiledRange(), + TiledRange r_trange = TiledRange() ) { + TA_MAX_THREADS; +#if TILEDARRAY_HAS_SCALAPACK + if (V.world().size() > 1 && V.elements_range().volume() > 10000000) { + return scalapack::householder_qr( V, q_trange, r_trange ); + } +#endif + return non_distributed::householder_qr( V, q_trange, r_trange ); +} + +template +auto cholesky_qr( const ArrayV& V, TiledRange r_trange = TiledRange() ) { + TA_MAX_THREADS; + // Form Grammian + ArrayV G; G("i,j") = V("k,i").conj() * V("k,j"); + + // Obtain Cholesky L and its inverse + auto [L, Linv] = cholesky_linv( G, r_trange ); + + // Q = V * L**-H + ArrayV Q; Q("i,j") = V("i,k") * Linv("j,k").conj(); + + if constexpr (not QOnly) { + // R = L**H + ArrayV R; R("i,j") = L("j,i"); + return std::make_tuple( Q, R ); + } else return Q; + +} + +} // namespace TiledArray::math::linalg + +namespace TiledArray { + using TiledArray::math::linalg::householder_qr; + using TiledArray::math::linalg::cholesky_qr; +} // namespace TiledArray +#endif diff --git a/src/TiledArray/math/linalg/rank-local.cpp b/src/TiledArray/math/linalg/rank-local.cpp index 5413ad0e5f..becea61bd4 100644 --- a/src/TiledArray/math/linalg/rank-local.cpp +++ b/src/TiledArray/math/linalg/rank-local.cpp @@ -207,6 +207,34 @@ void lu_inv(Matrix& A) { TA_LAPACK(getri, n, a, lda, ipiv.data()); } +template +void householder_qr( Matrix &V, Matrix &R ) { + integer m = V.rows(); + integer n = V.cols(); + integer k = std::min(m,n); + integer ldv = V.rows(); // Col Major + T* v = V.data(); + std::vector tau(k); + lapack::geqrf( m, n, v, ldv, tau.data() ); + + // Extract R + if constexpr ( not QOnly ) { + // Resize R just in case + R.resize(k,n); + R.fill(0.); + // Extract Upper triangle into R + integer ldr = R.rows(); + T* r = R.data(); + lapack::lacpy( lapack::MatrixType::Upper, k, n, v, ldv, r, ldr ); + } + + // Explicitly form Q + // TODO: This is wrong for complex, but it doesn't look like R/C is caught + // anywhere else either... + lapack::orgqr( m, n, k, v, ldv, tau.data() ); + +} + #define TA_LAPACK_EXPLICIT(MATRIX, VECTOR) \ template void cholesky(MATRIX&); \ template void cholesky_linv(MATRIX&); \ @@ -216,7 +244,9 @@ void lu_inv(Matrix& A) { template void heig(MATRIX&, MATRIX&, VECTOR&); \ template void svd(Job,Job,MATRIX&, VECTOR&, MATRIX*, MATRIX*); \ template void lu_solve(MATRIX&, MATRIX&); \ - template void lu_inv(MATRIX&); + template void lu_inv(MATRIX&); \ + template void householder_qr(MATRIX&,MATRIX&); \ + template void householder_qr(MATRIX&,MATRIX&); TA_LAPACK_EXPLICIT(Matrix, std::vector); TA_LAPACK_EXPLICIT(Matrix, std::vector); diff --git a/src/TiledArray/math/linalg/rank-local.h b/src/TiledArray/math/linalg/rank-local.h index 228eb1cc66..6af9622a32 100644 --- a/src/TiledArray/math/linalg/rank-local.h +++ b/src/TiledArray/math/linalg/rank-local.h @@ -54,6 +54,9 @@ void lu_solve(Matrix &A, Matrix &B); template void lu_inv(Matrix &A); +template +void householder_qr( Matrix &V, Matrix &R ); + } // namespace TiledArray::math::linalg::rank_local #endif // TILEDARRAY_MATH_LINALG_RANK_LOCAL_H__INCLUDED diff --git a/src/TiledArray/math/linalg/scalapack/qr.h b/src/TiledArray/math/linalg/scalapack/qr.h new file mode 100644 index 0000000000..e948aeb5cc --- /dev/null +++ b/src/TiledArray/math/linalg/scalapack/qr.h @@ -0,0 +1,85 @@ +#ifndef TILEDARRAY_MATH_LINALG_SCALAPACK_QR_H__INCLUDED +#define TILEDARRAY_MATH_LINALG_SCALAPACK_QR_H__INCLUDED + +#include +#if TILEDARRAY_HAS_SCALAPACK + +#include + +#include +#include +#include + +namespace TiledArray::math::linalg::scalapack { + +template +auto householder_qr( const ArrayV& V, TiledRange q_trange = TiledRange(), + TiledRange r_trange = TiledRange(), + size_t NB = default_block_size(), + size_t MB = default_block_size()) { + + using value_type = typename ArrayV::element_type; + + auto& world = V.world(); + auto world_comm = world.mpi.comm().Get_mpi_comm(); + blacspp::Grid grid = blacspp::Grid::square_grid(world_comm); + + world.gop.fence(); // stage ScaLAPACK execution + auto V_sca = scalapack::array_to_block_cyclic(V, grid, MB, NB); + world.gop.fence(); // stage ScaLAPACK execution + + auto [M, N] = V_sca.dims(); + auto K = std::min(M,N); + auto [V_Mloc, V_Nloc] = V_sca.dist().get_local_dims(M, N); + auto desc_v = V_sca.dist().descinit_noerror(M, N, V_Mloc); + + + std::vector + TAU_local( scalapackpp::local_col_from_desc( K, desc_v ) ); + + // Perform QR factorization -> Obtain reflectors + R in UT + auto info = scalapackpp::pgeqrf( M, N, V_sca.local_mat().data(), 1, 1, desc_v, TAU_local.data() ); + if(info) TA_EXCEPTION("GEQRF FAILED"); + + ArrayV R; // Uninitialized R matrix + + if constexpr (not QOnly) { + BlockCyclicMatrix R_sca( world, grid, K, N, MB, NB ); + auto [R_Mloc, R_Nloc] = R_sca.dist().get_local_dims(K, N); + auto desc_r = R_sca.dist().descinit_noerror(K, N, R_Mloc); + + // Extract R from the upper triangle of V + R_sca.local_mat().fill(0.); + scalapackpp::placpy( scalapackpp::Uplo::Upper, K, N, + V_sca.local_mat().data(), 1, 1, desc_v, + R_sca.local_mat().data(), 1, 1, desc_r ); + + if (r_trange.rank() == 0) { + // Generate a TRange based on column tiling of V + auto col_tiling = V.trange().dim(1); + r_trange = TiledRange( {col_tiling, col_tiling} ); + } + + world.gop.fence(); + R = scalapack::block_cyclic_to_array( R_sca, r_trange ); + world.gop.fence(); + } + + // Generate Q + info = scalapackpp::generate_q_householder( M, N, K, V_sca.local_mat().data(), 1, 1, desc_v, + TAU_local.data() ); + if(info) TA_EXCEPTION("GENQ FAILED"); + + if(q_trange.rank() == 0) q_trange = V.trange(); + world.gop.fence(); + auto Q = scalapack::block_cyclic_to_array( V_sca, q_trange ); + world.gop.fence(); + + if constexpr (QOnly) return Q; + else return std::make_tuple( Q, R ); +} + +} // namespace TiledArray::math::linalg::scalapack + +#endif // TILEDARRAY_HAS_SCALAPACK +#endif // HEADER GUARD diff --git a/tests/linalg.cpp b/tests/linalg.cpp index de9139e081..2f9d54b5bf 100644 --- a/tests/linalg.cpp +++ b/tests/linalg.cpp @@ -888,4 +888,168 @@ BOOST_AUTO_TEST_CASE(svd_allvectors) { } #endif +template +void householder_qr_q_only_test( const ArrayT& A, double tol ) { + + using value_type = typename ArrayT::element_type; + + #if TILEDARRAY_HAS_SCALAPACK + auto Q = use_scalapack ? scalapack::householder_qr( A ) : + non_dist:: householder_qr( A ); + #else + static_assert(not use_scalapack); + auto Q = non_dist::householder_qr( A ); + #endif + + + // Make sure the Q is orthogonal at least + TA::TArray Iden; Iden("i,j") = Q("k,i") * Q("k,j"); + Iden.make_replicated(); + auto I_eig = TA::array_to_eigen(Iden); + const auto N = A.trange().dim(1).extent(); + BOOST_CHECK_SMALL( (I_eig - decltype(I_eig)::Identity(N,N)).norm(), tol ); + +} + +template +void householder_qr_test( const ArrayT& A, double tol ) { + + #if TILEDARRAY_HAS_SCALAPACK + auto [Q,R] = use_scalapack ? scalapack::householder_qr( A ) : + non_dist:: householder_qr( A ); + #else + static_assert(not use_scalapack); + auto [Q,R] = non_dist::householder_qr( A ); + #endif + + // Check reconstruction error + TA::TArray QR_ERROR; + QR_ERROR("i,j") = A("i,j") - Q("i,k") * R("k,j"); + BOOST_CHECK_SMALL( QR_ERROR("i,j").norm().get(), tol ); + + // Check orthonormality of Q + TA::TArray Iden; Iden("i,j") = Q("k,i") * Q("k,j"); + Iden.make_replicated(); + auto I_eig = TA::array_to_eigen(Iden); + const auto N = A.trange().dim(1).extent(); + BOOST_CHECK_SMALL( (I_eig - decltype(I_eig)::Identity(N,N)).norm(), tol ); +} + +BOOST_AUTO_TEST_CASE(householder_qr_q_only) { + GlobalFixture::world->gop.fence(); + + auto trange = gen_trange(N, {128ul}); + + auto ref_ta = TA::make_array>( + *GlobalFixture::world, trange, + [this](TA::Tensor& t, TA::Range const& range) -> double { + return this->make_ta_reference(t, range); + }); + + double tol = N * N * std::numeric_limits::epsilon(); + householder_qr_q_only_test( ref_ta, tol ); + #if TILEDARRAY_HAS_SCALAPACK + householder_qr_q_only_test( ref_ta, tol ); + #endif + + + GlobalFixture::world->gop.fence(); +} + +BOOST_AUTO_TEST_CASE(householder_qr) { + GlobalFixture::world->gop.fence(); + + auto trange = gen_trange(N, {128ul}); + + auto ref_ta = TA::make_array>( + *GlobalFixture::world, trange, + [this](TA::Tensor& t, TA::Range const& range) -> double { + return this->make_ta_reference(t, range); + }); + + + double tol = N * N * std::numeric_limits::epsilon(); + householder_qr_test( ref_ta, tol ); + #if TILEDARRAY_HAS_SCALAPACK + householder_qr_test( ref_ta, tol ); + #endif + + GlobalFixture::world->gop.fence(); +} + + + + + + +template +void cholesky_qr_q_only_test( const ArrayT& A, double tol ) { + + using value_type = typename ArrayT::element_type; + + auto Q = TiledArray::math::linalg::cholesky_qr( A ); + + + // Make sure the Q is orthogonal at least + TA::TArray Iden; Iden("i,j") = Q("k,i") * Q("k,j"); + Iden.make_replicated(); + auto I_eig = TA::array_to_eigen(Iden); + const auto N = A.trange().dim(1).extent(); + BOOST_CHECK_SMALL( (I_eig - decltype(I_eig)::Identity(N,N)).norm(), tol ); + +} + +template +void cholesky_qr_test( const ArrayT& A, double tol ) { + + auto [Q,R] = TiledArray::math::linalg::cholesky_qr( A ); + + // Check reconstruction error + TA::TArray QR_ERROR; + QR_ERROR("i,j") = A("i,j") - Q("i,k") * R("k,j"); + BOOST_CHECK_SMALL( QR_ERROR("i,j").norm().get(), tol ); + + // Check orthonormality of Q + TA::TArray Iden; Iden("i,j") = Q("k,i") * Q("k,j"); + Iden.make_replicated(); + auto I_eig = TA::array_to_eigen(Iden); + const auto N = A.trange().dim(1).extent(); + BOOST_CHECK_SMALL( (I_eig - decltype(I_eig)::Identity(N,N)).norm(), tol ); +} + +BOOST_AUTO_TEST_CASE(cholesky_qr_q_only) { + GlobalFixture::world->gop.fence(); + + auto trange = gen_trange(N, {128ul}); + + auto ref_ta = TA::make_array>( + *GlobalFixture::world, trange, + [this](TA::Tensor& t, TA::Range const& range) -> double { + return this->make_ta_reference(t, range); + }); + + double tol = N * N * std::numeric_limits::epsilon(); + cholesky_qr_q_only_test( ref_ta, tol ); + + GlobalFixture::world->gop.fence(); +} + +BOOST_AUTO_TEST_CASE(cholesky_qr) { + GlobalFixture::world->gop.fence(); + + auto trange = gen_trange(N, {128ul}); + + auto ref_ta = TA::make_array>( + *GlobalFixture::world, trange, + [this](TA::Tensor& t, TA::Range const& range) -> double { + return this->make_ta_reference(t, range); + }); + + + double tol = N * N * std::numeric_limits::epsilon(); + cholesky_qr_test( ref_ta, tol ); + + GlobalFixture::world->gop.fence(); +} + BOOST_AUTO_TEST_SUITE_END()