diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index ed72df3e72..594d64bc6b 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -13,6 +13,7 @@ variables: ${TA_PYTHON} ${ENABLE_CUDA} ${BLA_VENDOR} + ${BLA_THREADS} ${ENABLE_SCALAPACK} before_script: @@ -37,8 +38,9 @@ ubuntu: ${TA_CONFIG} ${TA_TARGETS} MPIEXEC_PREFLAGS='--bind-to;none;--allow-run-as-root' - blacs_LIBRARIES=scalapack-openmpi - scalapack_LIBRARIES=scalapack-openmpi + ScaLAPACK_LIBRARIES="scalapack-openmpi;lapack;blas;MPI::MPI_C" + #blacs_LIBRARIES=scalapack-openmpi + #scalapack_LIBRARIES=scalapack-openmpi #lapack_LIBRARIES=lapack artifacts: paths: @@ -53,7 +55,8 @@ ubuntu: - IMAGE : [ "ubuntu:18.04", "ubuntu:20.04" ] CXX: [ g++ ] BUILD_TYPE : [ "Release" ] - BLA_VENDOR : [ "BLA_VENDOR=Intel10_64lp_seq", "BLA_VENDOR=Intel10_64lp" ] + BLA_VENDOR : [ "BLAS_PREFERENCE_LIST=IntelMKL" ] + BLA_THREADS : [ "intelmkl_PREFERED_THREAD_LEVEL=tbb" ] # ENABLE_SCALAPACK : [ "ENABLE_SCALAPACK=ON", "ENABLE_SCALAPACK=OFF" ] TA_PYTHON : [ "TA_PYTHON=OFF" ] # needs to be fixed for MKL - IMAGE : [ "ubuntu:18.04", "ubuntu:20.04" ] @@ -64,4 +67,4 @@ ubuntu: CXX: [ g++ ] BUILD_TYPE : [ "Release", "Debug" ] ENABLE_CUDA : [ "ENABLE_CUDA=ON" ] - TA_TARGETS : [ "tiledarray examples" ] \ No newline at end of file + TA_TARGETS : [ "tiledarray examples" ] diff --git a/CMakeLists.txt b/CMakeLists.txt index 56cf6d8ea4..1ea39e6160 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,6 +26,9 @@ cmake_minimum_required (VERSION 3.15.0) # need list(PREPEND for toolchains +# Preload versions/tags of all dependencies ==================================== +include(external/versions.cmake) + ############################################################################### # Bring ValeevGroup cmake toolkit ############################################################################### @@ -34,6 +37,7 @@ FetchContent_Populate( vg_cmake_kit QUIET GIT_REPOSITORY https://github.com/ValeevGroup/kit-cmake.git + GIT_TAG ${TA_TRACKED_VGCMAKEKIT_TAG} SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/cmake/vg BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/cmake/vg-build SUBBUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/cmake/vg-subbuild @@ -93,9 +97,6 @@ include(CMakeDependentOption) include(CMakePackageConfigHelpers) include(FeatureSummary) -# Preload versions/tags of all dependencies ==================================== -include(external/versions.cmake) - set(MPI_CXX_SKIP_MPICXX TRUE CACHE BOOL "MPI_CXX_SKIP_MPICXX") # Configure options ======================================================= @@ -105,6 +106,9 @@ add_feature_info(MPI ENABLE_MPI "Message-Passing Interface supports distributed- option(ENABLE_SCALAPACK "Enable ScaLAPACK Bindings in TiledArray" OFF) add_feature_info(ScaLAPACK ENABLE_SCALAPACK "ScaLAPACK provides distributed linear algebra") +option(ENABLE_WFN91_LINALG_DISCOVERY_KIT "Use linear algebra discovery kit from github.com/wavefunction91 [recommended]" ON) +add_feature_info(WFN91LinearAlgebraDiscoveryKit ENABLE_WFN91_LINALG_DISCOVERY_KIT "Linear algebra discovery kit from github.com/wavefunction91 supports many more corner cases than the default CMake modules and/or ICL's BLAS++/LAPACK++ modules") + redefaultable_option(ENABLE_TBB "Enable use of TBB with MADNESS" OFF) add_feature_info(TBB ENABLE_TBB "Intel Thread-Building Blocks (TBB) supports programming shared-memory systems") @@ -286,6 +290,36 @@ endif() include(external/madness.cmake) detect_MADNESS_configuration() include(external/eigen.cmake) + +###### discover linear algebra + +# use NWChemEx/David's linear algebra discovery modules? +# - yes => Invoke first to configure the correct libraries config and run modules to find BLAS/LAPACK/ScaLAPACK(if needed) +# - no => BLAS/LAPACK will be discovered by BLAS++/LAPACK++ (loaded by BTAS) which use standard CMake modules or +# their custom modules; if needed, ScaLAPACK will be discovered by BLACS++ +if (ENABLE_WFN91_LINALG_DISCOVERY_KIT) + include("${vg_cmake_kit_SOURCE_DIR}/modules/FetchWfn91LinAlgModules.cmake") + if(ENABLE_SCALAPACK) + find_package(ScaLAPACK REQUIRED) + # Propagate ScaLAPACK -> BLAS/LAPACK if not set + # (ScaLAPACK necessarily contains a BLAS/LAPACK linker by standard) + # TODO: Tell David to write a macro that hides this verbosity from user space + if( NOT BLAS_LIBRARIES ) + set( BLAS_LIBRARIES "${ScaLAPACK_LIBRARIES}" CACHE STRING "BLAS LIBRARIES" ) + endif() + if( NOT LAPACK_LIBRARIES ) + set( LAPACK_LIBRARIES "${ScaLAPACK_LIBRARIES}" CACHE STRING "LAPACK LIBRARIES" ) + endif() + else(ENABLE_SCALAPACK) + find_package(LAPACK REQUIRED) + # Propagate LAPACK -> BLAS if not set + # (LAPACK necessacarily contains a BLAS linker by standard) + # TODO: Tell David to write a macro that hides this verbosity from user space + if( NOT BLAS_LIBRARIES ) + set( BLAS_LIBRARIES "${LAPACK_LIBRARIES}" CACHE STRING "BLAS LIBRARIES" ) + endif() + endif(ENABLE_SCALAPACK) +endif(ENABLE_WFN91_LINALG_DISCOVERY_KIT) # BTAS does a better job of building and checking Boost since it uses Boost::serialization # it also memorized the location of its config for use from install tree include(FindOrFetchBTAS) diff --git a/INSTALL.md b/INSTALL.md index 0195f8527e..0c2ee12eb4 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -215,7 +215,7 @@ to use TiledArray in a distributed memory environment. Note, if you build MADNESS yourself, you must also configure MADNESS with `ENABLE_MPI=OFF` to enable this option. -## BLAS/LAPACK +## Linear Algebra: BLAS/LAPACK/ScaLAPACK Even for basic operation TiledArray requires a serial BLAS implementation, either by linking with a serial version of the BLAS library or by setting the number of threads to one @@ -230,26 +230,68 @@ set the environment variables (e.g. `OMP_NUM_THREADS`, `MKL_NUM_THREADS`, etc.) to ensure single-threaded execution of BLAS/LAPACK kernels as needed. - As of version 1.0 TiledArray also provides a direct (non-iterative) linear solvers API implemented using LAPACK and (optionally) ScaLAPACK. Therefore LAPACK is now a mandatory -prerequisite of TiledArray - -BLAS/LAPACK dependencies are provided by the BTAS library, which in turn uses BLAS++/LAPACK++ -C++ linear algebra packages to discover the BLAS and LAPACK libraries at configure time. -The most common scenario is where TiledArray will configure and compile BTAS dependency -and its BLAS++/LAPACK++ prerequisites from source (this is strongly recommended). The following -CMake variables can be used to control how BLAS/LAPACK discovery occurs: - -* `BLA_STATIC` -- indicates whether static or shared LAPACK and BLAS libraries will be preferred. -* `BLA_VENDOR` -- controls which vendor BLAS/LAPACK library will be sought +prerequisite of TiledArray. The use of ScaLAPACK can be enabled by setting CMake cache +variable `ENABLE_SCALAPACK` to `ON`. + +Robust discovery of linear algebra libraries, and _especially_ their distributed-memory +variants, is a complex process. Unfortunately even for serial/shared-memory linear +algebra libraries only basic scenarios are supported by the standard CMake modules +(e.g., [BLAS](https://cmake.org/cmake/help/latest/module/FindBLAS.html) and +[LAPACK](https://cmake.org/cmake/help/latest/module/FindLAPACK.html)). +There are several discovery mechanisms available for robust discovery of linear +algebra in TA: +- By specifying the `BLAS_LIBRARIES`, `LAPACK_LIBRARIES`, and (if `ENABLE_SCALAPACK` is on) + `ScaLAPACK_LIBRARIES` CMake cache variables via CMake command line or via a toolchain. + Doing this overrides all other mechanisms of discovery described below and is recommended + if the discovery fails for some reason. To help with setting these variables for specific + platforms consider using toolchain files from + [the Valeev Group CMake kit](https://github.com/ValeevGroup/kit-cmake/tree/master/toolchains) (see examples above). +- The default discovery method utilizes [the recently developed linear algebra discovery kit](https://github.com/wavefunction91/cmake-modules) developed by + [David Williams-Young](https://github.com/wavefunction91) and co-workers + for the [NWChemEx project](https://github.com/)NWChemEx-Project). The + discovery modules will override the standard CMake modules for BLAS and LAPACK, + provide modules to discover BLACS and ScaLAPACK. These modules + will then be invoked to discover the linear algebra libraries as robustly as + feasible. The following CMake cache variables can be used to control the behavior + of the NWChemEx discovery kit: + - `{BLAS,LAPACK,ScaLAPACK}_PREFERS_STATIC`: if set to on, will prefer to link the corresponding component statically. + - `{BLAS,LAPACK,ScaLAPACK}_PREFERENCE_LIST`: these specify the variants of the corresponding libraries to search, + in the order of preference. The following are permitted values in these lists: + - `ReferenceBLAS`: NETLIB reference implementations + - `IntelMKL`: Intel Math Kernel Library + - `IBMESSL`: IBM Engineering and Scientific Subroutine Library + - `BLIS`: BLAS-Like Instantiation Software + - `OpenBLAS`: OpenBLAS + - `Accelerate`: Apple's Accelerate framework + - `FLAME`: (LAPACK-only) [libFLAME](https://www.cs.utexas.edu/~flame/web/libFLAME.html) + *N.B.* These differ from the recognized values of the `BLA_VENDOR` variable used by the [BLAS+LAPACK CMake modules](https://cmake.org/cmake/help/latest/module/FindBLAS.html). +- If the use of the NWChemEx kit is disabled by setting CMake cache variable `ENABLE_WFN91_LINALG_DISCOVERY_KIT` to `OFF` + BLAS/LAPACK are imported transitively via the BLAS++/LAPACK++ libraries (which are themselves + imported transitively via the BTAS library). Under the most common scenario, where TiledArray + will configure and compile BTAS dependency and its BLAS++/LAPACK++ prerequisites from source + (this is strongly recommended), BLAS/LAPACK will thus be discovered and imported by + BLAS++/LAPACK++ during the TA configuration. There are 2 mechanisms by which BLAS++/LAPACK++ + discover BLAS/LAPACK: + - _the built-in custom discovery kit_; no options exist to provide any control + - standard CMake BLAS/LAPACK modules. + + The latter is used if CMake cache variable `BLA_VENDOR` is specified: + - `BLA_VENDOR` -- controls which vendor BLAS/LAPACK library will be sought (see [CMake docs](https://cmake.org/cmake/help/latest/module/FindLAPACK.html)); by default all possible vendor libraries will be considered. E.g., to force the use of the Accelerate framework on MacOS use `-DBLA_VENDOR=Apple`. -More information can be found in the installation instructions for -[BLAS++](https://icl.bitbucket.io/blaspp/md__i_n_s_t_a_l_l.html) and -[LAPACK++](https://icl.bitbucket.io/lapackpp/md__i_n_s_t_a_l_l.html). + More information can be found in the installation instructions for + [BLAS++](https://icl.bitbucket.io/blaspp/md__i_n_s_t_a_l_l.html) and + [LAPACK++](https://icl.bitbucket.io/lapackpp/md__i_n_s_t_a_l_l.html). + + Note that BLAS++/LAPACK++ discover BLAS and LAPACK only; ScaLAPACK + library is always discovered using the NWChemEx kit. + +Also note that all discovery methods respect the following CMake cache variable: +- `BLA_STATIC` -- indicates whether static or shared LAPACK and BLAS libraries will be preferred. Additional platform-specific BLAS/LAPACK notes are listed below. @@ -257,11 +299,19 @@ Additional platform-specific BLAS/LAPACK notes are listed below. Intel MKL is a freely-available collection of high-performance libraries that implements BLAS, LAPACK, and ScaLAPACK APIs. MKL is complex: it supports both serial kernels as well as parallel kernels that can take advantage of multiple cores via the use of OpenMP and Intel TBB (the [Intel OneAPI toolkit](https://software.intel.com/oneapi) provides MKL also capable of execution on some Intel GPUs and FPGAs), and the [necessary MKL link options](https://software.intel.com/sites/products/mkl/mkl_link_line_advisor.htm) will depend on the compiler, OS, and other details. -Fortunately, Intel MKL can be discovered by BLAS++/LAPACK++ automatically in most instances; if needed, specifying `BLA_VENDOR` with [appropriate argument](https://cmake.org/cmake/help/latest/module/FindBLAS.html#input-variables) can be used to force TiledArray to use MKL. Unfortunately it is not possible to specify the use of TBB-based backend for MKL without the use of a toolchain file. All MKL-enabled toolchains in [The Valeev Group CMake kit](https://github.com/ValeevGroup/kit-cmake/tree/master/toolchains) can be used to configure TiledArray to use sequential, OpenMP, or TBB backend by setting the `MKL_THREADING` CMake cache variable to `SEQ`, `OMP`, or `TBB`, respectively. The toolchains also respect the user-provided choice of `BLA_STATIC`. If multiple MKL versions are present on your system, specify the apropriate variant of the library by loading the corresponding `mklvars.sh` script to set environment variables `MKLROOT` and, if necessary, `LD_LIBRARY_PATH`/`DYLD_LIBRARY_PATH`. +To discover and configure the use of Intel MKL consider these suggestions: +- The use of NWChemEx discovery kit is strongly recommended for discovering Intel MKL. The following CMake cache variables can be used to specify the desired Intel MKL configuration: + - `intelmkl_PREFERS_STATIC`: whether to look for static or shared/dynamic libraries (default = `OFF`) + - `intelmkl_PREFERED_THREAD_LEVEL`: which threading backend to use, supported values are `sequential`, `openmp`, and `tbb` (default = `openmp`) + - `intelmkl_PREFERED_THREAD_LIBRARY`: which thread library to use, supported values are `intel`, `gnu`, and `pgi` (default depends on the compile) -Also note that even if OpenMP or TBB backends are used, TiledArray will be default set the number of threads to be used by MKL kernels to 1, regardless of the value of environment variables `MKL_NUM_THREADS`/`OMP_NUM_THREADS`. It is possible to change the number of threads to be used programmatically in your application by calling MKL function `mkl_set_num_threads()`. +- Most common configurations of Intel MKL can also be discovered by BLAS++/LAPACK++ automatically; if needed, specifying `BLA_VENDOR` with [appropriate argument](https://cmake.org/cmake/help/latest/module/FindBLAS.html#input-variables) can be used to force TiledArray to use MKL. Unfortunately it is not possible to specify the use of TBB-based backend for MKL without the use of a toolchain file. + +- All MKL-enabled toolchains in [The Valeev Group CMake kit](https://github.com/ValeevGroup/kit-cmake/tree/master/toolchains) can be used to configure TiledArray to use sequential, OpenMP, or TBB backend by setting the `MKL_THREADING` CMake cache variable to `SEQ`, `OMP`, or `TBB`, respectively. The toolchains also respect the user-provided choice of `BLA_STATIC`. If multiple MKL versions are present on your system, specify the apropriate variant of the library by loading the corresponding `mklvars.sh` script to set environment variables `MKLROOT` and, if necessary, `LD_LIBRARY_PATH`/`DYLD_LIBRARY_PATH`. + + On 64-bit platforms it is possible to specify whether to use 32-bit (`LP64`, the default) or 64-bit (`ILP64`) integers in BLAS/LAPACK API. To choose the `ILP64` interface when using the VG MKL toolchains set CMake cache variable `INTEGER4` to `OFF`; the same is achieved when using the default BLAS/LAPACK detection by setting `BLA_VENDOR` to [one of the valid `Intel*64ilp*` choices](https://cmake.org/cmake/help/latest/module/FindBLAS.html#input-variables). N.B. Currently `ILP64` variant of BLACS/ScaLAPACK is not supported, due to [a pending issue](https://github.com/wavefunction91/blacspp/issues/5). -On 64-bit platforms it is possible to specify whether to use 32-bit (`LP64`, the default) or 64-bit (`ILP64`) integers in BLAS/LAPACK API. To choose the `ILP64` interface when using the VG MKL toolchains set CMake cache variable `INTEGER4` to `OFF`; the same is achieved when using the default BLAS/LAPACK detection by setting `BLA_VENDOR` to [one of the valid `Intel*64ilp*` choices](https://cmake.org/cmake/help/latest/module/FindBLAS.html#input-variables). N.B. Currently `ILP64` variant of BLACS/ScaLAPACK is not supported, due to [a pending issue](https://github.com/wavefunction91/blacspp/issues/5). +Also note that even if OpenMP or TBB backends are used, TiledArray will be default set the number of threads to be used by MKL kernels to 1, regardless of the value of environment variables `MKL_NUM_THREADS`/`OMP_NUM_THREADS`. It is possible to change the number of threads to be used programmatically in your application by calling MKL function `mkl_set_num_threads()`. ## CUDA @@ -304,11 +354,12 @@ the correct revision of MADNESS. The following CMake options may be used to modify build behavior or find MADNESS: * `ENABLE_MPI` -- Enable MPI [Default=ON] -* `ENABLE_SCALAPACK` -- Enable use of ScaLAPACK bindings [Default=OFF] +* `ENABLE_SCALAPACK` -- Enable the use of ScaLAPACK bindings [Default=OFF] * `ENABLE_TBB` -- Enable the use of TBB when building MADNESS [Default=ON] * `ENABLE_GPERFTOOLS` -- Enable the use of gperftools when building MADNESS [Default=OFF] * `ENABLE_TCMALLOC_MINIMAL` -- Enable the use of gperftool's tcmalloc_minimal library only (the rest of gperftools is skipped) when building MADNESS [Default=OFF] * `ENABLE_LIBUNWIND` -- Force the discovery of libunwind library when building MADNESS [Default=OFF] +* `ENABLE_WFN91_LINALG_DISCOVERY_KIT` -- Enable the use of NWChemEx's linear algebra discovery [Default=ON] * `MADNESS_SOURCE_DIR` -- Path to the MADNESS source directory * `MADNESS_BINARY_DIR` -- Path to the MADNESS build directory * `MADNESS_URL` -- Path to the MADNESS repository [Default=MADNESS git repository] diff --git a/ci/.build-project b/ci/.build-project index 7b6f4d0b3a..79a08d541b 100755 --- a/ci/.build-project +++ b/ci/.build-project @@ -15,7 +15,7 @@ while [ $# -gt 0 ]; do -*) echo "Invalid option $arg"; exit 1 ;; CXX=*) eval "export $1" ;; *=*) vars+="\"-D$1\" " - # NB unset vars from s.t. CMake doesn't see env vars, eg BLA_VENDOR + # NB unset vars from s.t. CMake doesn't see env vars, eg BLA_VENDOR unset $(echo "$1" | cut -d= -f1) ;; *) targets+="$1 ";; @@ -33,8 +33,8 @@ xtime="/usr/bin/time" if [ -n "$metrics" ]; then #sudo apt install time if [ ! -x $xtime ]; then - echo >&2 "${xtime} is not executable, metrics disabled" - metrics="" + echo >&2 "${xtime} is not executable, metrics disabled" + metrics="" fi rm -f "${metrics}" fi @@ -44,9 +44,9 @@ function time_cmd { cmd="$@" echo "+ $cmd" if [ -n "$metrics" ]; then - format="'${step}.memory %Mk\n${step}.time %E\n'" - cmd="${xtime} -f ${format} -a -o $metrics $cmd" - #echo "$cmd" + format="'${step}.memory %Mk\n${step}.time %E\n'" + cmd="${xtime} -f ${format} -a -o $metrics $cmd" + #echo "$cmd" fi eval "$cmd" } @@ -73,7 +73,7 @@ section_end host_system_info section_start "preparing_system_section[collapsed=true]" "Preparing system" cmd "source ci/openmpi.env" cmd "echo 'localhost slots=2' > /etc/openmpi/openmpi-default-hostfile" -if [[ "$vars" =~ \"-DBLA_VENDOR=Intel ]]; then +if [[ "$vars" =~ \"-DBLAS_PREFERENCE_LIST=IntelMKL ]]; then cmd "make -C /home/ValeevGroup install/intel-mkl" cmd "source /opt/intel/mkl/bin/mklvars.sh intel64" cmd "echo MKLROOT=\$MKLROOT" @@ -94,4 +94,3 @@ for target in ${targets}; do time_cmd ${target} "cmake --build ${build_dir} --target ${target}" section_end build_${target}_section done - diff --git a/external/scalapack.cmake b/external/scalapack.cmake index e7f619419a..5820534ab4 100644 --- a/external/scalapack.cmake +++ b/external/scalapack.cmake @@ -16,40 +16,21 @@ if( TARGET scalapackpp::scalapackpp ) else() message(STATUS "Could not find scalapackpp! Building..." ) + include(FetchContent) - include( DownloadProject ) - download_project( - PROJ blacspp - GIT_REPOSITORY https://github.com/wavefunction91/blacspp.git - GIT_TAG ${TA_TRACKED_BLACSPP_TAG} - PREFIX ${PROJECT_BINARY_DIR}/external - UPDATE_DISCONNECTED 1 - ) - download_project( - PROJ scalapackpp + FetchContent_Declare( scalapackpp GIT_REPOSITORY https://github.com/wavefunction91/scalapackpp.git GIT_TAG ${TA_TRACKED_SCALAPACKPP_TAG} - PREFIX ${PROJECT_BINARY_DIR}/external - UPDATE_DISCONNECTED 1 ) - - if( DEFINED SCALAPACK_LIBRARIES ) - set( scalapack_LIBRARIES ${SCALAPACK_LIBRARIES} ) - set( blacs_LIBRARIES ${SCALAPACK_LIBRARIES} ) - endif() - - set( BLACSPP_ENABLE_TESTS OFF ) - set( SCALAPACKPP_ENABLE_TESTS OFF ) - add_subdirectory( ${blacspp_SOURCE_DIR} ${blacspp_BINARY_DIR} ) - add_subdirectory( ${scalapackpp_SOURCE_DIR} ${scalapackpp_BINARY_DIR} ) + FetchContent_MakeAvailable( scalapackpp ) # propagate MPI_CXX_SKIP_MPICXX=ON - target_compile_definitions( blacspp PRIVATE ${MPI_CXX_COMPILE_DEFINITIONS} ) + target_compile_definitions( blacspp PRIVATE ${MPI_CXX_COMPILE_DEFINITIONS} ) target_compile_definitions( scalapackpp PRIVATE ${MPI_CXX_COMPILE_DEFINITIONS} ) install( TARGETS blacspp scalapackpp EXPORT tiledarray COMPONENT tiledarray ) # Add these dependencies to External - add_dependencies(External-tiledarray scalapackpp blacspp) + add_dependencies(External-tiledarray scalapackpp ) # set {blacspp,scalapackpp}_CONFIG to the install location so that we know where to find it set(blacspp_CONFIG ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}/cmake/blacspp/blacspp-config.cmake) diff --git a/external/versions.cmake b/external/versions.cmake index 90c9fcccd8..81c9a36f81 100644 --- a/external/versions.cmake +++ b/external/versions.cmake @@ -1,6 +1,8 @@ # for each dependency track both current and previous id (the variable for the latter must contain PREVIOUS) # to be able to auto-update them +set(TA_TRACKED_VGCMAKEKIT_TAG 7d6fc0f790588eba4ea5006eccff86bb3d3903fe) + # Boost explicitly downgraded to 1.59 from 1.68 set(TA_TRACKED_BOOST_VERSION 1.59) set(TA_TRACKED_BOOST_PREVIOUS_VERSION 1.68) @@ -31,11 +33,11 @@ set(TA_TRACKED_CUTT_PREVIOUS_TAG 592198b93c93b7ca79e7900b9a9f2e79f9dafec3) set(TA_TRACKED_UMPIRE_TAG f04abd1dd038c84262915a493d8f78576bb80fd0) set(TA_TRACKED_UMPIRE_PREVIOUS_TAG f04abd1dd038c84262915a493d8f78576bb80fd0) -set(TA_TRACKED_BLACSPP_TAG 20cfd414c5b719be1c958f4a2d57abef06df83b6 ) -set(TA_TRACKED_BLACSPP_PREVIOUS_TAG da4ada57e578cf944325a7152164306742551596 ) +#set(TA_TRACKED_BLACSPP_TAG 20cfd414c5b719be1c958f4a2d57abef06df83b6 ) +#set(TA_TRACKED_BLACSPP_PREVIOUS_TAG da4ada57e578cf944325a7152164306742551596 ) -set(TA_TRACKED_SCALAPACKPP_TAG 534deef123b91494c7718b30f00d191fc4c7ede1 ) -set(TA_TRACKED_SCALAPACKPP_PREVIOUS_TAG 534deef123b91494c7718b30f00d191fc4c7ede1 ) +set(TA_TRACKED_SCALAPACKPP_TAG 1152672fc1852e5cfbd6510e3c621e0981a207d6 ) +set(TA_TRACKED_SCALAPACKPP_PREVIOUS_TAG 1152672fc1852e5cfbd6510e3c621e0981a207d6 ) set(TA_TRACKED_RANGEV3_TAG 2e0591c57fce2aca6073ad6e4fdc50d841827864) set(TA_TRACKED_RANGEV3_PREVIOUS_TAG dbdaa247a25a0daa24c68f1286a5693c72ea0006) diff --git a/src/TiledArray/math/linalg/rank-local.h b/src/TiledArray/math/linalg/rank-local.h index 87d4b44a56..228eb1cc66 100644 --- a/src/TiledArray/math/linalg/rank-local.h +++ b/src/TiledArray/math/linalg/rank-local.h @@ -6,6 +6,8 @@ #include #include +#include + #include namespace TiledArray::math::linalg::rank_local { diff --git a/src/TiledArray/math/linalg/scalapack/block_cyclic.h b/src/TiledArray/math/linalg/scalapack/block_cyclic.h index 7c36ec0869..4ca4e31c4e 100644 --- a/src/TiledArray/math/linalg/scalapack/block_cyclic.h +++ b/src/TiledArray/math/linalg/scalapack/block_cyclic.h @@ -40,7 +40,7 @@ #include #include -#include +#include namespace TiledArray::math::linalg::scalapack { diff --git a/src/TiledArray/math/linalg/scalapack/cholesky.h b/src/TiledArray/math/linalg/scalapack/cholesky.h index 545015e8a9..b3a82ddc36 100644 --- a/src/TiledArray/math/linalg/scalapack/cholesky.h +++ b/src/TiledArray/math/linalg/scalapack/cholesky.h @@ -73,12 +73,12 @@ auto cholesky(const Array& A, TiledRange l_trange = TiledRange(), auto [Mloc, Nloc] = matrix.dist().get_local_dims(N, N); auto desc = matrix.dist().descinit_noerror(N, N, Mloc); - auto info = scalapackpp::ppotrf(blacspp::Triangle::Lower, N, + auto info = scalapackpp::ppotrf(blacspp::Uplo::Lower, N, matrix.local_mat().data(), 1, 1, desc); if (info) TA_EXCEPTION("Cholesky Failed"); // Zero out the upper triangle - zero_triangle(blacspp::Triangle::Upper, matrix); + zero_triangle(blacspp::Uplo::Upper, matrix); if (l_trange.rank() == 0) l_trange = A.trange(); @@ -129,12 +129,12 @@ auto cholesky_linv(const Array& A, TiledRange l_trange = TiledRange(), auto [Mloc, Nloc] = matrix.dist().get_local_dims(N, N); auto desc = matrix.dist().descinit_noerror(N, N, Mloc); - auto info = scalapackpp::ppotrf(blacspp::Triangle::Lower, N, + auto info = scalapackpp::ppotrf(blacspp::Uplo::Lower, N, matrix.local_mat().data(), 1, 1, desc); if (info) TA_EXCEPTION("Cholesky Failed"); // Zero out the upper triangle - zero_triangle(blacspp::Triangle::Upper, matrix); + zero_triangle(blacspp::Uplo::Upper, matrix); // Copy L if needed std::shared_ptr> L_sca = nullptr; @@ -146,7 +146,7 @@ auto cholesky_linv(const Array& A, TiledRange l_trange = TiledRange(), // Compute inverse info = - scalapackpp::ptrtri(blacspp::Triangle::Lower, blacspp::Diagonal::NonUnit, + scalapackpp::ptrtri(blacspp::Uplo::Lower, blacspp::Diag::NonUnit, N, matrix.local_mat().data(), 1, 1, desc); if (info) TA_EXCEPTION("TRTRI Failed"); @@ -200,7 +200,7 @@ auto cholesky_solve(const Array& A, const Array& B, desc_b = B_sca.dist().descinit_noerror(N, NRHS, Mloc); } - auto info = scalapackpp::pposv(blacspp::Triangle::Lower, N, NRHS, + auto info = scalapackpp::pposv(blacspp::Uplo::Lower, N, NRHS, A_sca.local_mat().data(), 1, 1, desc_a, B_sca.local_mat().data(), 1, 1, desc_b); if (info) TA_EXCEPTION("Cholesky Solve Failed"); @@ -250,18 +250,18 @@ auto cholesky_lsolve(Op trans, const Array& A, const Array& B, desc_b = B_sca.dist().descinit_noerror(N, NRHS, Mloc); } - auto info = scalapackpp::ppotrf(blacspp::Triangle::Lower, N, + auto info = scalapackpp::ppotrf(blacspp::Uplo::Lower, N, A_sca.local_mat().data(), 1, 1, desc_a); if (info) TA_EXCEPTION("Cholesky Failed"); info = scalapackpp::ptrtrs( - blacspp::Triangle::Lower, to_scalapackpp_transposeflag(trans), - blacspp::Diagonal::NonUnit, N, NRHS, A_sca.local_mat().data(), 1, 1, + blacspp::Uplo::Lower, to_scalapackpp_transposeflag(trans), + blacspp::Diag::NonUnit, N, NRHS, A_sca.local_mat().data(), 1, 1, desc_a, B_sca.local_mat().data(), 1, 1, desc_b); if (info) TA_EXCEPTION("TRTRS Failed"); // Zero out the upper triangle - zero_triangle(blacspp::Triangle::Upper, A_sca); + zero_triangle(blacspp::Uplo::Upper, A_sca); if (l_trange.rank() == 0) l_trange = A.trange(); if (x_trange.rank() == 0) x_trange = B.trange(); diff --git a/src/TiledArray/math/linalg/scalapack/heig.h b/src/TiledArray/math/linalg/scalapack/heig.h index d6a6d26984..bc9edeaa91 100644 --- a/src/TiledArray/math/linalg/scalapack/heig.h +++ b/src/TiledArray/math/linalg/scalapack/heig.h @@ -80,7 +80,7 @@ auto heig(const Array& A, TiledRange evec_trange = TiledRange(), scalapack::BlockCyclicMatrix evecs(world, grid, N, N, NB, NB); auto info = scalapackpp::hereig( - scalapackpp::VectorFlag::Vectors, blacspp::Triangle::Lower, N, + scalapackpp::Job::Vec, blacspp::Uplo::Lower, N, matrix.local_mat().data(), 1, 1, desc, evals.data(), evecs.local_mat().data(), 1, 1, desc); if (info) TA_EXCEPTION("EVP Failed"); @@ -150,7 +150,7 @@ auto heig(const ArrayA& A, const ArrayB& B, scalapack::BlockCyclicMatrix evecs(world, grid, N, N, NB, NB); auto info = scalapackpp::hereig_gen( - scalapackpp::VectorFlag::Vectors, blacspp::Triangle::Lower, N, + scalapackpp::Job::Vec, blacspp::Uplo::Lower, N, A_sca.local_mat().data(), 1, 1, desc, B_sca.local_mat().data(), 1, 1, desc, evals.data(), evecs.local_mat().data(), 1, 1, desc); if (info) TA_EXCEPTION("EVP Failed"); diff --git a/src/TiledArray/math/linalg/scalapack/lu.h b/src/TiledArray/math/linalg/scalapack/lu.h index 56b06e8f38..ecf0421086 100644 --- a/src/TiledArray/math/linalg/scalapack/lu.h +++ b/src/TiledArray/math/linalg/scalapack/lu.h @@ -67,7 +67,7 @@ auto lu_solve(const ArrayA& A, const ArrayB& B, auto [B_Mloc, B_Nloc] = B_sca.dist().get_local_dims(N, NRHS); auto desc_b = B_sca.dist().descinit_noerror(N, NRHS, B_Mloc); - std::vector IPIV(A_Mloc + MB); + std::vector IPIV(A_Mloc + MB); auto info = scalapackpp::pgesv(N, NRHS, A_sca.local_mat().data(), 1, 1, desc_a, @@ -104,7 +104,7 @@ auto lu_inv(const Array& A, TiledRange ainv_trange = TiledRange(), auto [A_Mloc, A_Nloc] = A_sca.dist().get_local_dims(N, N); auto desc_a = A_sca.dist().descinit_noerror(N, N, A_Mloc); - std::vector IPIV(A_Mloc + MB); + std::vector IPIV(A_Mloc + MB); { auto info = scalapackpp::pgetrf(N, N, A_sca.local_mat().data(), 1, 1, diff --git a/src/TiledArray/math/linalg/scalapack/svd.h b/src/TiledArray/math/linalg/scalapack/svd.h index 29315777d8..dc68d374c5 100644 --- a/src/TiledArray/math/linalg/scalapack/svd.h +++ b/src/TiledArray/math/linalg/scalapack/svd.h @@ -94,14 +94,14 @@ auto svd(const Array& A, TiledRange u_trange, TiledRange vt_trange, std::shared_ptr> U = nullptr, VT = nullptr; - scalapackpp::VectorFlag JOBU = scalapackpp::VectorFlag::NoVectors; - scalapackpp::VectorFlag JOBVT = scalapackpp::VectorFlag::NoVectors; + scalapackpp::Job JOBU = scalapackpp::Job::NoVec; + scalapackpp::Job JOBVT = scalapackpp::Job::NoVec; value_type* U_ptr = nullptr; value_type* VT_ptr = nullptr; if constexpr (need_u) { - JOBU = scalapackpp::VectorFlag::Vectors; + JOBU = scalapackpp::Job::Vec; U = std::make_shared>( world, grid, M, SVD_SIZE, MB, NB); @@ -109,7 +109,7 @@ auto svd(const Array& A, TiledRange u_trange, TiledRange vt_trange, } if constexpr (need_vt) { - JOBVT = scalapackpp::VectorFlag::Vectors; + JOBVT = scalapackpp::Job::Vec; VT = std::make_shared>( world, grid, SVD_SIZE, N, MB, NB); diff --git a/src/TiledArray/math/linalg/scalapack/util.h b/src/TiledArray/math/linalg/scalapack/util.h index 7c9c7abd89..8e0fb9b603 100644 --- a/src/TiledArray/math/linalg/scalapack/util.h +++ b/src/TiledArray/math/linalg/scalapack/util.h @@ -33,21 +33,21 @@ namespace TiledArray::math::linalg::scalapack { -inline scalapackpp::TransposeFlag to_scalapackpp_transposeflag(Op t) { +inline scalapackpp::Op to_scalapackpp_transposeflag(Op t) { switch (t) { case Op::NoTrans: - return scalapackpp::TransposeFlag::NoTranspose; + return scalapackpp::Op::NoTrans; case Op::Trans: - return scalapackpp::TransposeFlag::Transpose; + return scalapackpp::Op::Trans; case Op::ConjTrans: - return scalapackpp::TransposeFlag::ConjTranspose; + return scalapackpp::Op::ConjTrans; default: abort(); } } template -void zero_triangle(blacspp::Triangle tri, scalapack::BlockCyclicMatrix& A, +void zero_triangle(blacspp::Uplo tri, scalapack::BlockCyclicMatrix& A, bool zero_diag = false) { auto zero_el = [&](size_t I, size_t J) { if (A.dist().i_own(I, J)) { @@ -59,7 +59,7 @@ void zero_triangle(blacspp::Triangle tri, scalapack::BlockCyclicMatrix& A, auto [M, N] = A.dims(); // Zero the lower triangle - if (tri == blacspp::Triangle::Lower) { + if (tri == blacspp::Uplo::Lower) { if (zero_diag) for (size_t j = 0; j < N; ++j) for (size_t i = j; i < M; ++i) zero_el(i, j);