From a7f303b39d42e67b9a21f1f18268ad0d177efe33 Mon Sep 17 00:00:00 2001 From: David Williams-Young Date: Thu, 14 Oct 2021 12:11:04 -0700 Subject: [PATCH 1/4] [LINALG] Added Householder QR API, rank-local implementation (ICL) + associated UT --- src/CMakeLists.txt | 2 + src/TiledArray/math/linalg.h | 1 + .../math/linalg/non-distributed/qr.h | 42 +++++++++++++++ src/TiledArray/math/linalg/qr.h | 31 +++++++++++ src/TiledArray/math/linalg/rank-local.cpp | 32 +++++++++++- src/TiledArray/math/linalg/rank-local.h | 3 ++ tests/linalg.cpp | 52 +++++++++++++++++++ 7 files changed, 162 insertions(+), 1 deletion(-) create mode 100644 src/TiledArray/math/linalg/non-distributed/qr.h create mode 100644 src/TiledArray/math/linalg/qr.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 98cf62b9c2..933b260694 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -64,12 +64,14 @@ TiledArray/math/linalg/util.h TiledArray/math/linalg/cholesky.h TiledArray/math/linalg/heig.h TiledArray/math/linalg/lu.h +TiledArray/math/linalg/qr.h TiledArray/math/linalg/svd.h TiledArray/math/linalg/scalapack/util.h TiledArray/math/linalg/scalapack/block_cyclic.h TiledArray/math/linalg/scalapack/cholesky.h TiledArray/math/linalg/scalapack/heig.h TiledArray/math/linalg/scalapack/lu.h +TiledArray/math/linalg/scalapack/qr.h TiledArray/math/linalg/scalapack/svd.h TiledArray/conversions/btas.h TiledArray/conversions/clone.h diff --git a/src/TiledArray/math/linalg.h b/src/TiledArray/math/linalg.h index c6dc015e7a..62f90eb5ec 100644 --- a/src/TiledArray/math/linalg.h +++ b/src/TiledArray/math/linalg.h @@ -31,6 +31,7 @@ #include #include #include +#include #include #endif // TILEDARRAY_MATH_LINALG_LINALG_H__INCLUDED diff --git a/src/TiledArray/math/linalg/non-distributed/qr.h b/src/TiledArray/math/linalg/non-distributed/qr.h new file mode 100644 index 0000000000..ea0bcd0ad4 --- /dev/null +++ b/src/TiledArray/math/linalg/non-distributed/qr.h @@ -0,0 +1,42 @@ +#ifndef TILEDARRAY_MATH_LINALG_NON_DISTRIBUTED_QR_H__INCLUDED +#define TILEDARRAY_MATH_LINALG_NON_DISTRIBUTED_QR_H__INCLUDED + +#include + +#include +#include +#include + +namespace TiledArray::math::linalg::non_distributed { + +template +auto householder_qr( const ArrayV& V, TiledRange q_trange = TiledRange(), + TiledRange r_trange = TiledRange() ) { + + (void)detail::array_traits{}; + auto& world = V.world(); + auto V_eig = detail::make_matrix(V); + decltype(V_eig) R_eig; + if( !world.rank() ) { + linalg::rank_local::householder_qr( V_eig, R_eig ); + } + world.gop.broadcast_serializable( V_eig, 0 ); + if(q_trange.rank() == 0) q_trange = V.trange(); + auto Q = eigen_to_array( world, q_trange, V_eig ); + if constexpr (not QOnly) { + world.gop.broadcast_serializable( R_eig, 0 ); + if (r_trange.rank() == 0) { + // Generate a TRange based on column tiling of V + auto col_tiling = V.trange().dim(1); + r_trange = TiledRange( {col_tiling, col_tiling} ); + } + auto R = eigen_to_array( world, r_trange, R_eig ); + return std::make_tuple( Q, R ); + } else { + return Q; + } +} + +} // namespace TiledArray::math::linalg::non_distributed + +#endif diff --git a/src/TiledArray/math/linalg/qr.h b/src/TiledArray/math/linalg/qr.h new file mode 100644 index 0000000000..4893e7ab2c --- /dev/null +++ b/src/TiledArray/math/linalg/qr.h @@ -0,0 +1,31 @@ +#ifndef TILEDARRAY_MATH_LINALG_QR_H__INCLUDED +#define TILEDARRAY_MATH_LINALG_QR_H__INCLUDED + +#include +#if TILEDARRAY_HAS_SCALAPACK +//#include +#endif +#include +#include + +namespace TiledArray::math::linalg { + +template +auto householder_qr( const ArrayV& V, TiledRange q_trange = TiledRange(), + TiledRange r_trange = TiledRange() ) { + TA_MAX_THREADS; +#if TILEDARRAY_HAS_SCALAPACK + if (V.world().size() > 1 && V.elements_range().volume() > 10000000) { + TA_EXCEPTION("ScaLAPACK Householder QR NYI"); + //return scalapack::householder_qr( V, q_trange, r_trange ); + } +#endif + return non_distributed::householder_qr( V, q_trange, r_trange ); +} + +} // namespace TiledArray::math::linalg + +namespace TiledArray { + using TiledArray::math::linalg::householder_qr; +} // namespace TiledArray +#endif diff --git a/src/TiledArray/math/linalg/rank-local.cpp b/src/TiledArray/math/linalg/rank-local.cpp index 5413ad0e5f..becea61bd4 100644 --- a/src/TiledArray/math/linalg/rank-local.cpp +++ b/src/TiledArray/math/linalg/rank-local.cpp @@ -207,6 +207,34 @@ void lu_inv(Matrix& A) { TA_LAPACK(getri, n, a, lda, ipiv.data()); } +template +void householder_qr( Matrix &V, Matrix &R ) { + integer m = V.rows(); + integer n = V.cols(); + integer k = std::min(m,n); + integer ldv = V.rows(); // Col Major + T* v = V.data(); + std::vector tau(k); + lapack::geqrf( m, n, v, ldv, tau.data() ); + + // Extract R + if constexpr ( not QOnly ) { + // Resize R just in case + R.resize(k,n); + R.fill(0.); + // Extract Upper triangle into R + integer ldr = R.rows(); + T* r = R.data(); + lapack::lacpy( lapack::MatrixType::Upper, k, n, v, ldv, r, ldr ); + } + + // Explicitly form Q + // TODO: This is wrong for complex, but it doesn't look like R/C is caught + // anywhere else either... + lapack::orgqr( m, n, k, v, ldv, tau.data() ); + +} + #define TA_LAPACK_EXPLICIT(MATRIX, VECTOR) \ template void cholesky(MATRIX&); \ template void cholesky_linv(MATRIX&); \ @@ -216,7 +244,9 @@ void lu_inv(Matrix& A) { template void heig(MATRIX&, MATRIX&, VECTOR&); \ template void svd(Job,Job,MATRIX&, VECTOR&, MATRIX*, MATRIX*); \ template void lu_solve(MATRIX&, MATRIX&); \ - template void lu_inv(MATRIX&); + template void lu_inv(MATRIX&); \ + template void householder_qr(MATRIX&,MATRIX&); \ + template void householder_qr(MATRIX&,MATRIX&); TA_LAPACK_EXPLICIT(Matrix, std::vector); TA_LAPACK_EXPLICIT(Matrix, std::vector); diff --git a/src/TiledArray/math/linalg/rank-local.h b/src/TiledArray/math/linalg/rank-local.h index 228eb1cc66..6af9622a32 100644 --- a/src/TiledArray/math/linalg/rank-local.h +++ b/src/TiledArray/math/linalg/rank-local.h @@ -54,6 +54,9 @@ void lu_solve(Matrix &A, Matrix &B); template void lu_inv(Matrix &A); +template +void householder_qr( Matrix &V, Matrix &R ); + } // namespace TiledArray::math::linalg::rank_local #endif // TILEDARRAY_MATH_LINALG_RANK_LOCAL_H__INCLUDED diff --git a/tests/linalg.cpp b/tests/linalg.cpp index de9139e081..762573f823 100644 --- a/tests/linalg.cpp +++ b/tests/linalg.cpp @@ -888,4 +888,56 @@ BOOST_AUTO_TEST_CASE(svd_allvectors) { } #endif +BOOST_AUTO_TEST_CASE(householder_qr_q_only) { + GlobalFixture::world->gop.fence(); + + auto trange = gen_trange(N, {128ul}); + + auto ref_ta = TA::make_array>( + *GlobalFixture::world, trange, + [this](TA::Tensor& t, TA::Range const& range) -> double { + return this->make_ta_reference(t, range); + }); + + auto Q = non_dist::householder_qr( ref_ta ); + + // Make sure the Q is orthogonal at least + TA::TArray Iden( *GlobalFixture::world, trange ); + Iden("i,j") = Q("k,i") * Q("k,j"); + auto I_eig = TA::array_to_eigen(Iden); + double tol = N * N * std::numeric_limits::epsilon(); + BOOST_CHECK_SMALL( (I_eig - decltype(I_eig)::Identity(N,N)).norm(), tol ); + + GlobalFixture::world->gop.fence(); +} + +BOOST_AUTO_TEST_CASE(householder_qr) { + GlobalFixture::world->gop.fence(); + + auto trange = gen_trange(N, {128ul}); + + auto ref_ta = TA::make_array>( + *GlobalFixture::world, trange, + [this](TA::Tensor& t, TA::Range const& range) -> double { + return this->make_ta_reference(t, range); + }); + + auto [Q,R] = non_dist::householder_qr( ref_ta ); + + double tol = N * N * std::numeric_limits::epsilon(); + + // Check reconstruction error + TA::TArray QR_ERROR; + QR_ERROR("i,j") = ref_ta("i,j") - Q("i,k") * R("k,j"); + BOOST_CHECK_SMALL( QR_ERROR("i,j").norm().get(), tol ); + + // Check orthonormality of Q + TA::TArray Iden; + Iden("i,j") = Q("k,i") * Q("k,j"); + auto I_eig = TA::array_to_eigen(Iden); + BOOST_CHECK_SMALL( (I_eig - decltype(I_eig)::Identity(N,N)).norm(), tol ); + + GlobalFixture::world->gop.fence(); +} + BOOST_AUTO_TEST_SUITE_END() From bbcfddb3d5193202415c73f78fc9c609f8c35734 Mon Sep 17 00:00:00 2001 From: David Williams-Young Date: Fri, 15 Oct 2021 09:17:38 -0700 Subject: [PATCH 2/4] [LINALG] Added ScaLAPACK QR --- .../math/linalg/non-distributed/qr.h | 2 +- src/TiledArray/math/linalg/qr.h | 9 +- src/TiledArray/math/linalg/scalapack/qr.h | 85 +++++++++++++++++++ tests/linalg.cpp | 64 +++++++++----- 4 files changed, 135 insertions(+), 25 deletions(-) create mode 100644 src/TiledArray/math/linalg/scalapack/qr.h diff --git a/src/TiledArray/math/linalg/non-distributed/qr.h b/src/TiledArray/math/linalg/non-distributed/qr.h index ea0bcd0ad4..e43cec632d 100644 --- a/src/TiledArray/math/linalg/non-distributed/qr.h +++ b/src/TiledArray/math/linalg/non-distributed/qr.h @@ -9,7 +9,7 @@ namespace TiledArray::math::linalg::non_distributed { -template +template auto householder_qr( const ArrayV& V, TiledRange q_trange = TiledRange(), TiledRange r_trange = TiledRange() ) { diff --git a/src/TiledArray/math/linalg/qr.h b/src/TiledArray/math/linalg/qr.h index 4893e7ab2c..deba834416 100644 --- a/src/TiledArray/math/linalg/qr.h +++ b/src/TiledArray/math/linalg/qr.h @@ -3,24 +3,23 @@ #include #if TILEDARRAY_HAS_SCALAPACK -//#include +#include #endif #include #include namespace TiledArray::math::linalg { -template +template auto householder_qr( const ArrayV& V, TiledRange q_trange = TiledRange(), TiledRange r_trange = TiledRange() ) { TA_MAX_THREADS; #if TILEDARRAY_HAS_SCALAPACK if (V.world().size() > 1 && V.elements_range().volume() > 10000000) { - TA_EXCEPTION("ScaLAPACK Householder QR NYI"); - //return scalapack::householder_qr( V, q_trange, r_trange ); + return scalapack::householder_qr( V, q_trange, r_trange ); } #endif - return non_distributed::householder_qr( V, q_trange, r_trange ); + return non_distributed::householder_qr( V, q_trange, r_trange ); } } // namespace TiledArray::math::linalg diff --git a/src/TiledArray/math/linalg/scalapack/qr.h b/src/TiledArray/math/linalg/scalapack/qr.h new file mode 100644 index 0000000000..e948aeb5cc --- /dev/null +++ b/src/TiledArray/math/linalg/scalapack/qr.h @@ -0,0 +1,85 @@ +#ifndef TILEDARRAY_MATH_LINALG_SCALAPACK_QR_H__INCLUDED +#define TILEDARRAY_MATH_LINALG_SCALAPACK_QR_H__INCLUDED + +#include +#if TILEDARRAY_HAS_SCALAPACK + +#include + +#include +#include +#include + +namespace TiledArray::math::linalg::scalapack { + +template +auto householder_qr( const ArrayV& V, TiledRange q_trange = TiledRange(), + TiledRange r_trange = TiledRange(), + size_t NB = default_block_size(), + size_t MB = default_block_size()) { + + using value_type = typename ArrayV::element_type; + + auto& world = V.world(); + auto world_comm = world.mpi.comm().Get_mpi_comm(); + blacspp::Grid grid = blacspp::Grid::square_grid(world_comm); + + world.gop.fence(); // stage ScaLAPACK execution + auto V_sca = scalapack::array_to_block_cyclic(V, grid, MB, NB); + world.gop.fence(); // stage ScaLAPACK execution + + auto [M, N] = V_sca.dims(); + auto K = std::min(M,N); + auto [V_Mloc, V_Nloc] = V_sca.dist().get_local_dims(M, N); + auto desc_v = V_sca.dist().descinit_noerror(M, N, V_Mloc); + + + std::vector + TAU_local( scalapackpp::local_col_from_desc( K, desc_v ) ); + + // Perform QR factorization -> Obtain reflectors + R in UT + auto info = scalapackpp::pgeqrf( M, N, V_sca.local_mat().data(), 1, 1, desc_v, TAU_local.data() ); + if(info) TA_EXCEPTION("GEQRF FAILED"); + + ArrayV R; // Uninitialized R matrix + + if constexpr (not QOnly) { + BlockCyclicMatrix R_sca( world, grid, K, N, MB, NB ); + auto [R_Mloc, R_Nloc] = R_sca.dist().get_local_dims(K, N); + auto desc_r = R_sca.dist().descinit_noerror(K, N, R_Mloc); + + // Extract R from the upper triangle of V + R_sca.local_mat().fill(0.); + scalapackpp::placpy( scalapackpp::Uplo::Upper, K, N, + V_sca.local_mat().data(), 1, 1, desc_v, + R_sca.local_mat().data(), 1, 1, desc_r ); + + if (r_trange.rank() == 0) { + // Generate a TRange based on column tiling of V + auto col_tiling = V.trange().dim(1); + r_trange = TiledRange( {col_tiling, col_tiling} ); + } + + world.gop.fence(); + R = scalapack::block_cyclic_to_array( R_sca, r_trange ); + world.gop.fence(); + } + + // Generate Q + info = scalapackpp::generate_q_householder( M, N, K, V_sca.local_mat().data(), 1, 1, desc_v, + TAU_local.data() ); + if(info) TA_EXCEPTION("GENQ FAILED"); + + if(q_trange.rank() == 0) q_trange = V.trange(); + world.gop.fence(); + auto Q = scalapack::block_cyclic_to_array( V_sca, q_trange ); + world.gop.fence(); + + if constexpr (QOnly) return Q; + else return std::make_tuple( Q, R ); +} + +} // namespace TiledArray::math::linalg::scalapack + +#endif // TILEDARRAY_HAS_SCALAPACK +#endif // HEADER GUARD diff --git a/tests/linalg.cpp b/tests/linalg.cpp index 762573f823..0d78bd3ee0 100644 --- a/tests/linalg.cpp +++ b/tests/linalg.cpp @@ -888,6 +888,42 @@ BOOST_AUTO_TEST_CASE(svd_allvectors) { } #endif +template +void householder_qr_q_only_test( const ArrayT& A, double tol ) { + + using value_type = typename ArrayT::element_type; + + auto Q = use_scalapack ? scalapack::householder_qr( A ) : + non_dist:: householder_qr( A ); + + + // Make sure the Q is orthogonal at least + TA::TArray Iden; Iden("i,j") = Q("k,i") * Q("k,j"); + auto I_eig = TA::array_to_eigen(Iden); + const auto N = A.trange().dim(1).extent(); + BOOST_CHECK_SMALL( (I_eig - decltype(I_eig)::Identity(N,N)).norm(), tol ); + +} + +template +void householder_qr_test( const ArrayT& A, double tol ) { + + auto [Q,R] = use_scalapack ? scalapack::householder_qr( A ) : + non_dist:: householder_qr( A ); + + // Check reconstruction error + TA::TArray QR_ERROR; + QR_ERROR("i,j") = A("i,j") - Q("i,k") * R("k,j"); + BOOST_CHECK_SMALL( QR_ERROR("i,j").norm().get(), tol ); + + // Check orthonormality of Q + TA::TArray Iden; + Iden("i,j") = Q("k,i") * Q("k,j"); + auto I_eig = TA::array_to_eigen(Iden); + const auto N = A.trange().dim(1).extent(); + BOOST_CHECK_SMALL( (I_eig - decltype(I_eig)::Identity(N,N)).norm(), tol ); +} + BOOST_AUTO_TEST_CASE(householder_qr_q_only) { GlobalFixture::world->gop.fence(); @@ -899,14 +935,12 @@ BOOST_AUTO_TEST_CASE(householder_qr_q_only) { return this->make_ta_reference(t, range); }); - auto Q = non_dist::householder_qr( ref_ta ); - - // Make sure the Q is orthogonal at least - TA::TArray Iden( *GlobalFixture::world, trange ); - Iden("i,j") = Q("k,i") * Q("k,j"); - auto I_eig = TA::array_to_eigen(Iden); double tol = N * N * std::numeric_limits::epsilon(); - BOOST_CHECK_SMALL( (I_eig - decltype(I_eig)::Identity(N,N)).norm(), tol ); + householder_qr_q_only_test( ref_ta, tol ); + #if TILEDARRAY_HAS_SCALAPACK + householder_qr_q_only_test( ref_ta, tol ); + #endif + GlobalFixture::world->gop.fence(); } @@ -922,20 +956,12 @@ BOOST_AUTO_TEST_CASE(householder_qr) { return this->make_ta_reference(t, range); }); - auto [Q,R] = non_dist::householder_qr( ref_ta ); double tol = N * N * std::numeric_limits::epsilon(); - - // Check reconstruction error - TA::TArray QR_ERROR; - QR_ERROR("i,j") = ref_ta("i,j") - Q("i,k") * R("k,j"); - BOOST_CHECK_SMALL( QR_ERROR("i,j").norm().get(), tol ); - - // Check orthonormality of Q - TA::TArray Iden; - Iden("i,j") = Q("k,i") * Q("k,j"); - auto I_eig = TA::array_to_eigen(Iden); - BOOST_CHECK_SMALL( (I_eig - decltype(I_eig)::Identity(N,N)).norm(), tol ); + householder_qr_test( ref_ta, tol ); + #if TILEDARRAY_HAS_SCALAPACK + householder_qr_test( ref_ta, tol ); + #endif GlobalFixture::world->gop.fence(); } From 4296ceac900741f216ff8195dca89df5aa0cb7ba Mon Sep 17 00:00:00 2001 From: David Williams-Young Date: Fri, 15 Oct 2021 12:03:37 -0700 Subject: [PATCH 3/4] [LINALG] Add Generic Cholesky QR implementation + bump scalapackpp + fix parallel UTs for QR --- external/versions.cmake | 2 +- src/TiledArray/math/linalg/qr.h | 23 ++++++++++ tests/linalg.cpp | 80 ++++++++++++++++++++++++++++++++- 3 files changed, 102 insertions(+), 3 deletions(-) diff --git a/external/versions.cmake b/external/versions.cmake index 8edd1ac577..cf0b91c61b 100644 --- a/external/versions.cmake +++ b/external/versions.cmake @@ -36,7 +36,7 @@ set(TA_TRACKED_UMPIRE_PREVIOUS_TAG v6.0.0) #set(TA_TRACKED_BLACSPP_TAG 20cfd414c5b719be1c958f4a2d57abef06df83b6 ) #set(TA_TRACKED_BLACSPP_PREVIOUS_TAG da4ada57e578cf944325a7152164306742551596 ) -set(TA_TRACKED_SCALAPACKPP_TAG 043f85d7f31ec6009740ab466bcb5008af7b0814 ) +set(TA_TRACKED_SCALAPACKPP_TAG bf17a7246af38d34523bd0099b01d9961d06d311 ) set(TA_TRACKED_SCALAPACKPP_PREVIOUS_TAG 043f85d7f31ec6009740ab466bcb5008af7b0814 ) set(TA_TRACKED_RANGEV3_TAG 2e0591c57fce2aca6073ad6e4fdc50d841827864) diff --git a/src/TiledArray/math/linalg/qr.h b/src/TiledArray/math/linalg/qr.h index deba834416..1683e6792c 100644 --- a/src/TiledArray/math/linalg/qr.h +++ b/src/TiledArray/math/linalg/qr.h @@ -8,6 +8,8 @@ #include #include +#include + namespace TiledArray::math::linalg { template @@ -22,9 +24,30 @@ auto householder_qr( const ArrayV& V, TiledRange q_trange = TiledRange(), return non_distributed::householder_qr( V, q_trange, r_trange ); } +template +auto cholesky_qr( const ArrayV& V, TiledRange r_trange = TiledRange() ) { + TA_MAX_THREADS; + // Form Grammian + ArrayV G; G("i,j") = V("k,i").conj() * V("k,j"); + + // Obtain Cholesky L and its inverse + auto [L, Linv] = cholesky_linv( G, r_trange ); + + // Q = V * L**-H + ArrayV Q; Q("i,j") = V("i,k") * Linv("j,k").conj(); + + if constexpr (not QOnly) { + // R = L**H + ArrayV R; R("i,j") = L("j,i"); + return std::make_tuple( Q, R ); + } else return Q; + +} + } // namespace TiledArray::math::linalg namespace TiledArray { using TiledArray::math::linalg::householder_qr; + using TiledArray::math::linalg::cholesky_qr; } // namespace TiledArray #endif diff --git a/tests/linalg.cpp b/tests/linalg.cpp index 0d78bd3ee0..d828b4df07 100644 --- a/tests/linalg.cpp +++ b/tests/linalg.cpp @@ -899,6 +899,7 @@ void householder_qr_q_only_test( const ArrayT& A, double tol ) { // Make sure the Q is orthogonal at least TA::TArray Iden; Iden("i,j") = Q("k,i") * Q("k,j"); + Iden.make_replicated(); auto I_eig = TA::array_to_eigen(Iden); const auto N = A.trange().dim(1).extent(); BOOST_CHECK_SMALL( (I_eig - decltype(I_eig)::Identity(N,N)).norm(), tol ); @@ -917,8 +918,8 @@ void householder_qr_test( const ArrayT& A, double tol ) { BOOST_CHECK_SMALL( QR_ERROR("i,j").norm().get(), tol ); // Check orthonormality of Q - TA::TArray Iden; - Iden("i,j") = Q("k,i") * Q("k,j"); + TA::TArray Iden; Iden("i,j") = Q("k,i") * Q("k,j"); + Iden.make_replicated(); auto I_eig = TA::array_to_eigen(Iden); const auto N = A.trange().dim(1).extent(); BOOST_CHECK_SMALL( (I_eig - decltype(I_eig)::Identity(N,N)).norm(), tol ); @@ -966,4 +967,79 @@ BOOST_AUTO_TEST_CASE(householder_qr) { GlobalFixture::world->gop.fence(); } + + + + + +template +void cholesky_qr_q_only_test( const ArrayT& A, double tol ) { + + using value_type = typename ArrayT::element_type; + + auto Q = TiledArray::math::linalg::cholesky_qr( A ); + + + // Make sure the Q is orthogonal at least + TA::TArray Iden; Iden("i,j") = Q("k,i") * Q("k,j"); + Iden.make_replicated(); + auto I_eig = TA::array_to_eigen(Iden); + const auto N = A.trange().dim(1).extent(); + BOOST_CHECK_SMALL( (I_eig - decltype(I_eig)::Identity(N,N)).norm(), tol ); + +} + +template +void cholesky_qr_test( const ArrayT& A, double tol ) { + + auto [Q,R] = TiledArray::math::linalg::cholesky_qr( A ); + + // Check reconstruction error + TA::TArray QR_ERROR; + QR_ERROR("i,j") = A("i,j") - Q("i,k") * R("k,j"); + BOOST_CHECK_SMALL( QR_ERROR("i,j").norm().get(), tol ); + + // Check orthonormality of Q + TA::TArray Iden; Iden("i,j") = Q("k,i") * Q("k,j"); + Iden.make_replicated(); + auto I_eig = TA::array_to_eigen(Iden); + const auto N = A.trange().dim(1).extent(); + BOOST_CHECK_SMALL( (I_eig - decltype(I_eig)::Identity(N,N)).norm(), tol ); +} + +BOOST_AUTO_TEST_CASE(cholesky_qr_q_only) { + GlobalFixture::world->gop.fence(); + + auto trange = gen_trange(N, {128ul}); + + auto ref_ta = TA::make_array>( + *GlobalFixture::world, trange, + [this](TA::Tensor& t, TA::Range const& range) -> double { + return this->make_ta_reference(t, range); + }); + + double tol = N * N * std::numeric_limits::epsilon(); + cholesky_qr_q_only_test( ref_ta, tol ); + + GlobalFixture::world->gop.fence(); +} + +BOOST_AUTO_TEST_CASE(cholesky_qr) { + GlobalFixture::world->gop.fence(); + + auto trange = gen_trange(N, {128ul}); + + auto ref_ta = TA::make_array>( + *GlobalFixture::world, trange, + [this](TA::Tensor& t, TA::Range const& range) -> double { + return this->make_ta_reference(t, range); + }); + + + double tol = N * N * std::numeric_limits::epsilon(); + cholesky_qr_test( ref_ta, tol ); + + GlobalFixture::world->gop.fence(); +} + BOOST_AUTO_TEST_SUITE_END() From 0e00701e17c236e862d427f4ff90969aa7217682 Mon Sep 17 00:00:00 2001 From: David Williams-Young Date: Fri, 15 Oct 2021 12:58:28 -0700 Subject: [PATCH 4/4] [LINALG] Fix non-ScaLAPACK compilation in QR UTs --- tests/linalg.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/linalg.cpp b/tests/linalg.cpp index d828b4df07..2f9d54b5bf 100644 --- a/tests/linalg.cpp +++ b/tests/linalg.cpp @@ -893,8 +893,13 @@ void householder_qr_q_only_test( const ArrayT& A, double tol ) { using value_type = typename ArrayT::element_type; + #if TILEDARRAY_HAS_SCALAPACK auto Q = use_scalapack ? scalapack::householder_qr( A ) : non_dist:: householder_qr( A ); + #else + static_assert(not use_scalapack); + auto Q = non_dist::householder_qr( A ); + #endif // Make sure the Q is orthogonal at least @@ -909,8 +914,13 @@ void householder_qr_q_only_test( const ArrayT& A, double tol ) { template void householder_qr_test( const ArrayT& A, double tol ) { + #if TILEDARRAY_HAS_SCALAPACK auto [Q,R] = use_scalapack ? scalapack::householder_qr( A ) : non_dist:: householder_qr( A ); + #else + static_assert(not use_scalapack); + auto [Q,R] = non_dist::householder_qr( A ); + #endif // Check reconstruction error TA::TArray QR_ERROR;