From b7f585ec60889c37e3cf45c865c771ce6be05ec7 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Wed, 28 Jul 2021 15:53:14 -0400 Subject: [PATCH 01/12] switch to Umpire host memory pool --- CMakeLists.txt | 1 + external/umpire.cmake | 22 +++-- src/CMakeLists.txt | 9 +- src/TiledArray/cuda/um_allocator.h | 66 ++----------- src/TiledArray/external/cuda.h | 15 +-- src/TiledArray/external/umpire.h | 139 ++++++++++++++++++++++++++++ src/TiledArray/fwd.h | 12 ++- src/TiledArray/host/allocator.h | 77 ++++++++++++++++ src/TiledArray/host/env.h | 143 +++++++++++++++++++++++++++++ src/TiledArray/tensor/tensor.h | 15 +-- 10 files changed, 413 insertions(+), 86 deletions(-) create mode 100644 src/TiledArray/external/umpire.h create mode 100644 src/TiledArray/host/allocator.h create mode 100644 src/TiledArray/host/env.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 9d1f37eb2b..45e8265589 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -294,6 +294,7 @@ endif() include(external/madness.cmake) detect_MADNESS_configuration() include(external/eigen.cmake) +include(external/umpire.cmake) ###### discover linear algebra diff --git a/external/umpire.cmake b/external/umpire.cmake index ad8632e2c0..271d374f6a 100644 --- a/external/umpire.cmake +++ b/external/umpire.cmake @@ -2,6 +2,8 @@ ## find Umpire ## +if (NOT TARGET TiledArray_UMPIRE) + find_path(_UMPIRE_INSTALL_DIR NAMES include/umpire/Umpire.hpp HINTS ${UMPIRE_INSTALL_DIR}) # if user provides UMPIRE, use it @@ -70,20 +72,24 @@ else() -DCMAKE_CXX_EXTENSIONS=${CMAKE_CXX_EXTENSIONS} -DCMAKE_AR=${CMAKE_AR} -DBLT_SOURCE_DIR=${BLT_SOURCE_DIR} - -DBLT_CXX_STD=c++${CMAKE_CUDA_STANDARD} - -DENABLE_CUDA=ON + -DBLT_CXX_STD=c++${CMAKE_CXX_STANDARD} -DENABLE_BENCHMARKS=OFF -DENABLE_OPENMP=OFF -DENABLE_TESTS=OFF -DENABLE_EXAMPLES=OFF -DENABLE_LOGGING=OFF -DENABLE_ASSERTS=${enable_umpire_asserts} - -DCMAKE_CUDA_COMPILER=${CMAKE_CUDA_COMPILER} - -DCMAKE_CUDA_STANDARD=${CMAKE_CUDA_STANDARD} - -DCMAKE_CUDA_EXTENSIONS=${CMAKE_CUDA_EXTENSIONS} - -DCMAKE_CUDA_HOST_COMPILER=${CMAKE_CUDA_HOST_COMPILER} - -DCUDA_TOOLKIT_ROOT_DIR=${CUDAToolkit_ROOT} ) + if (ENABLE_CUDA) + list(APPEND UMPIRE_CMAKE_ARGS + -DENABLE_CUDA=ON + -DCMAKE_CUDA_COMPILER=${CMAKE_CUDA_COMPILER} + -DCMAKE_CUDA_STANDARD=${CMAKE_CUDA_STANDARD} + -DCMAKE_CUDA_EXTENSIONS=${CMAKE_CUDA_EXTENSIONS} + -DCMAKE_CUDA_HOST_COMPILER=${CMAKE_CUDA_HOST_COMPILER} + -DCUDA_TOOLKIT_ROOT_DIR=${CUDAToolkit_ROOT} + ) + endif(ENABLE_CUDA) if (CMAKE_TOOLCHAIN_FILE) set(UMPIRE_CMAKE_ARGS "${UMPIRE_CMAKE_ARGS}" "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}" @@ -160,3 +166,5 @@ set_target_properties( install(TARGETS TiledArray_UMPIRE EXPORT tiledarray COMPONENT tiledarray) #TODO test Umpire + +endif(NOT TARGET TiledArray_UMPIRE) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index e1ca7b7943..0f54a0348a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -35,7 +35,6 @@ TiledArray/dense_shape.h TiledArray/dist_array.h TiledArray/distributed_storage.h TiledArray/error.h -TiledArray/external/madness.h TiledArray/initialize.h TiledArray/perm_index.h TiledArray/permutation.h @@ -117,6 +116,10 @@ TiledArray/expressions/unary_engine.h TiledArray/expressions/unary_expr.h TiledArray/expressions/index_list.h TiledArray/external/btas.h +TiledArray/external/madness.h +TiledArray/external/umpire.h +TiledArray/host/env.h +TiledArray/host/allocator.h TiledArray/math/blas.h TiledArray/math/gemm_helper.h TiledArray/math/outer.h @@ -217,7 +220,7 @@ TiledArray/math/linalg/rank-local.cpp ) # the list of libraries on which TiledArray depends on, will be cached later -set(_TILEDARRAY_DEPENDENCIES MADworld TiledArray_Eigen BTAS::BTAS blaspp_headers) +set(_TILEDARRAY_DEPENDENCIES MADworld TiledArray_Eigen BTAS::BTAS blaspp_headers TiledArray_UMPIRE) # TODO better ways to handle tiledarray cuda dependency if(CUDA_FOUND) @@ -234,7 +237,7 @@ if(CUDA_FOUND) LANGUAGE CUDA) # the list of libraries on which TiledArray depends on - list(APPEND _TILEDARRAY_DEPENDENCIES CUDA::cublas CUDA::nvToolsExt TiledArray_UMPIRE TiledArray_CUTT) + list(APPEND _TILEDARRAY_DEPENDENCIES CUDA::cublas CUDA::nvToolsExt TiledArray_CUTT) endif(CUDA_FOUND) diff --git a/src/TiledArray/cuda/um_allocator.h b/src/TiledArray/cuda/um_allocator.h index 437527e28d..3bb40126a4 100644 --- a/src/TiledArray/cuda/um_allocator.h +++ b/src/TiledArray/cuda/um_allocator.h @@ -29,6 +29,7 @@ #ifdef TILEDARRAY_HAS_CUDA #include +#include #include #include @@ -38,49 +39,27 @@ namespace TiledArray { /// CUDA UM allocator, based on boilerplate by Howard Hinnant /// (https://howardhinnant.github.io/allocator_boilerplate.html) template -class cuda_um_allocator_impl { +class cuda_um_allocator_impl : public umpire_allocator { public: - using value_type = T; - using pointer = T*; - using reference = T&; - using const_reference = const T&; + using base_type = umpire_allocator; + using base_type; cuda_um_allocator_impl() noexcept - : um_dynamic_pool_(&cudaEnv::instance()->um_dynamic_pool()) {} + : base_type(&cudaEnv::instance()->um_dynamic_pool()) {} template cuda_um_allocator_impl(const cuda_um_allocator_impl& rhs) noexcept - : um_dynamic_pool_(rhs.um_dynamic_pool_) {} - - /// allocates um memory using umpire dynamic pool - pointer allocate(size_t n) { - pointer result = nullptr; - - TA_ASSERT(um_dynamic_pool_); - - result = static_cast(um_dynamic_pool_->allocate(n * sizeof(T))); - - return result; - } - - /// deallocate um memory using umpire dynamic pool - void deallocate(value_type* ptr, size_t) { - TA_ASSERT(um_dynamic_pool_); - um_dynamic_pool_->deallocate(ptr); - } + : base_type(static_cast&>(rhs)) {} template friend bool operator==(const cuda_um_allocator_impl& lhs, const cuda_um_allocator_impl& rhs) noexcept; - - private: - umpire::Allocator* um_dynamic_pool_; }; // class cuda_um_allocator template bool operator==(const cuda_um_allocator_impl& lhs, const cuda_um_allocator_impl& rhs) noexcept { - return lhs.um_dynamic_pool_ == rhs.um_dynamic_pool_; + return lhs.umpire_allocator() == rhs.umpire_allocator(); } template @@ -89,37 +68,6 @@ bool operator!=(const cuda_um_allocator_impl& lhs, return !(lhs == rhs); } -/// see -/// https://stackoverflow.com/questions/21028299/is-this-behavior-of-vectorresizesize-type-n-under-c11-and-boost-container/21028912#21028912 -template -class default_init_allocator : public A { - using a_t = std::allocator_traits; - - public: - using reference = typename A::reference; // std::allocator::reference - // deprecated in C++17, but thrust - // still relying on this - using const_reference = typename A::const_reference; // ditto - - template - struct rebind { - using other = - default_init_allocator>; - }; - - using A::A; - - template - void construct(U* ptr) noexcept( - std::is_nothrow_default_constructible::value) { - ::new (static_cast(ptr)) U; - } - template - void construct(U* ptr, Args&&... args) { - a_t::construct(static_cast(*this), ptr, std::forward(args)...); - } -}; - template using cuda_um_allocator = default_init_allocator>; diff --git a/src/TiledArray/external/cuda.h b/src/TiledArray/external/cuda.h index bf9e3171a1..2a7d325023 100644 --- a/src/TiledArray/external/cuda.h +++ b/src/TiledArray/external/cuda.h @@ -38,11 +38,7 @@ #include #include -// for memory management -#include -#include -#include -#include +#include #include #include @@ -209,11 +205,10 @@ class cudaEnv { } } - /// no copy constructor - cudaEnv(cudaEnv& cuda_global) = delete; - - /// no assignment constructor - cudaEnv operator=(cudaEnv& cuda_global) = delete; + cudaEnv(const cudaEnv&) = delete; + cudaEnv(cudaEnv&&) = delete; + cudaEnv& operator=(const cudaEnv&) = delete; + cudaEnv& operator=(cudaEnv&&) = delete; /// access to static member static std::unique_ptr& instance() { diff --git a/src/TiledArray/external/umpire.h b/src/TiledArray/external/umpire.h new file mode 100644 index 0000000000..a55a34b615 --- /dev/null +++ b/src/TiledArray/external/umpire.h @@ -0,0 +1,139 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2021 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Eduard Valeyev + * Department of Chemistry, Virginia Tech + * Jan 31, 2018 + * + */ + +#ifndef TILEDARRAY_EXTERNAL_UMPIRE_H___INCLUDED +#define TILEDARRAY_EXTERNAL_UMPIRE_H___INCLUDED + +#include + +#include + +// for memory management +#include +#include +#include +#include + +#include +#include + +namespace TiledArray { + +/// wraps Umpire allocator into a standard-compliant allocator, +/// based on the boilerplate by Howard Hinnant +/// (https://howardhinnant.github.io/allocator_boilerplate.html) +template +class umpire_allocator_impl { + public: + using value_type = T; + using pointer = value_type*; + using const_pointer = + typename std::pointer_traits::template rebind; + using void_pointer = + typename std::pointer_traits::template rebind; + using const_void_pointer = + typename std::pointer_traits::template rebind; + + using reference = T&; + using const_reference = const T&; + + using difference_type = + typename std::pointer_traits::difference_type; + using size_type = std::make_unsigned_t; + + umpire_allocator_impl(umpire::Allocator* umpalloc) noexcept + : umpalloc_(umpalloc) {} + + template + umpire_allocator_impl(const umpire_allocator_impl& rhs) noexcept + : umpalloc_(rhs.umpalloc_) {} + + /// allocates um memory using umpire dynamic pool + pointer allocate(size_t n) { + pointer result = nullptr; + + TA_ASSERT(umpalloc_); + + result = static_cast(umpalloc_->allocate(n * sizeof(T))); + + return result; + } + + /// deallocate um memory using umpire dynamic pool + void deallocate(pointer ptr, size_t) { + TA_ASSERT(umpalloc_); + umpalloc_->deallocate(ptr); + } + + const umpire::Allocator* umpire_allocator() const { return umpalloc_; } + + private: + umpire::Allocator* umpalloc_; +}; // class umpire_allocator + +template +bool operator==(const umpire_allocator_impl& lhs, + const umpire_allocator_impl& rhs) noexcept { + return lhs.um_dynamic_pool() == rhs.um_dynamic_pool(); +} + +template +bool operator!=(const umpire_allocator_impl& lhs, + const umpire_allocator_impl& rhs) noexcept { + return !(lhs == rhs); +} + +/// see +/// https://stackoverflow.com/questions/21028299/is-this-behavior-of-vectorresizesize-type-n-under-c11-and-boost-container/21028912#21028912 +template +class default_init_allocator : public A { + using a_t = std::allocator_traits; + + public: + using reference = typename A::reference; // std::allocator::reference + // deprecated in C++17, but thrust + // still relying on this + using const_reference = typename A::const_reference; // ditto + + template + struct rebind { + using other = + default_init_allocator>; + }; + + using A::A; + + template + void construct(U* ptr) noexcept( + std::is_nothrow_default_constructible::value) { + ::new (static_cast(ptr)) U; + } + template + void construct(U* ptr, Args&&... args) { + a_t::construct(static_cast(*this), ptr, std::forward(args)...); + } +}; + +} // namespace TiledArray + +#endif // TILEDARRAY_CUDA_UM_ALLOCATOR_H___INCLUDED diff --git a/src/TiledArray/fwd.h b/src/TiledArray/fwd.h index b53626333e..20245d9d1f 100644 --- a/src/TiledArray/fwd.h +++ b/src/TiledArray/fwd.h @@ -33,6 +33,15 @@ template class aligned_allocator; } // namespace Eigen +namespace TiledArray { +template +class host_allocator_impl; +template +class default_init_allocator; +template +using host_allocator = default_init_allocator>; +} // namespace TiledArray + namespace madness { class World; } @@ -54,7 +63,8 @@ class SparsePolicy; // TiledArray Tensors template /* std::allocator */> + typename A = host_allocator< + T> /* or Eigen::aligned_allocator or std::allocator */> class Tensor; typedef Tensor TensorD; diff --git a/src/TiledArray/host/allocator.h b/src/TiledArray/host/allocator.h new file mode 100644 index 0000000000..efaaeff9c4 --- /dev/null +++ b/src/TiledArray/host/allocator.h @@ -0,0 +1,77 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2021 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Eduard Valeyev + * Department of Chemistry, Virginia Tech + * Jan 31, 2018 + * + */ + +#ifndef TILEDARRAY_HOST_ALLOCATOR_H___INCLUDED +#define TILEDARRAY_HOST_ALLOCATOR_H___INCLUDED + +#include + +#include +#include + +#include + +#include +#include + +namespace TiledArray { + +/// CUDA UM allocator, based on boilerplate by Howard Hinnant +/// (https://howardhinnant.github.io/allocator_boilerplate.html) +template +class host_allocator_impl : public umpire_allocator_impl { + public: + using base_type = umpire_allocator_impl; + using typename base_type::const_pointer; + using typename base_type::const_reference; + using typename base_type::pointer; + using typename base_type::reference; + using typename base_type::value_type; + + host_allocator_impl() noexcept + : base_type(&hostEnv::instance()->host_allocator()) {} + + template + host_allocator_impl(const host_allocator_impl& rhs) noexcept + : base_type(static_cast&>(rhs)) {} + + template + friend bool operator==(const host_allocator_impl& lhs, + const host_allocator_impl& rhs) noexcept; +}; // class host_allocator + +template +bool operator==(const host_allocator_impl& lhs, + const host_allocator_impl& rhs) noexcept { + return lhs.umpire_allocator() == rhs.umpire_allocator(); +} + +template +bool operator!=(const host_allocator_impl& lhs, + const host_allocator_impl& rhs) noexcept { + return !(lhs == rhs); +} + +} // namespace TiledArray + +#endif // TILEDARRAY_HOST_ALLOCATOR_H___INCLUDED diff --git a/src/TiledArray/host/env.h b/src/TiledArray/host/env.h new file mode 100644 index 0000000000..39d60a8931 --- /dev/null +++ b/src/TiledArray/host/env.h @@ -0,0 +1,143 @@ +/* + * This file is a part of TiledArray. + * Copyright (C) 2021 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Chong Peng + * Department of Chemistry, Virginia Tech + * July 23, 2018 + * + */ + +#ifndef TILEDARRAY_HOST_ENV_H__INCLUDED +#define TILEDARRAY_HOST_ENV_H__INCLUDED + +#include + +// for memory management +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +namespace TiledArray { + +namespace detail { + +inline std::pair mpi_local_rank_size(World& world) { + auto host_comm = + world.mpi.comm().Split_type(SafeMPI::Intracomm::SHARED_SPLIT_TYPE, 0); + return std::make_pair(host_comm.Get_rank(), host_comm.Get_size()); +} + +} // namespace detail + +/** + * hostEnv set up global environment + * + * Singleton class + */ + +class hostEnv { + public: + ~hostEnv() = default; + + hostEnv(const hostEnv&) = delete; + hostEnv(hostEnv&&) = delete; + hostEnv& operator=(const hostEnv&) = delete; + hostEnv& operator=(hostEnv&&) = delete; + + /// access the instance, if not initialized will be initialized using default + /// params + static std::unique_ptr& instance() { + if (!instance_accessor()) { + initialize(TiledArray::get_default_world()); + } + return instance_accessor(); + } + + /// initialize the instance using explicit params + static void initialize(World& world, + const std::uint64_t max_memory_size = (1ul << 40), + const std::uint64_t page_size = (1ul << 30)) { + // initialize only when not initialized + if (instance_accessor() == nullptr) { + // uncomment to debug umpire ops + // + // umpire::util::Logger::getActiveLogger()->setLoggingMsgLevel( + // umpire::util::message::Debug); + + // make thread-safe size-limited pool of host memory + + auto& rm = umpire::ResourceManager::getInstance(); + + // turn off Umpire introspection for non-Debug builds +#ifndef NDEBUG + constexpr auto introspect = true; +#else + constexpr auto introspect = false; +#endif + + // start with empty memory, increase each by 1 GB + auto alloc_grain = 1ul << 30; + + // allocate zero memory for device pool, same grain for subsequent allocs + auto host_size_limited_alloc = + rm.makeAllocator( + "size_limited_alloc", rm.getAllocator("HOST"), max_memory_size); + auto host_dynamic_pool = + rm.makeAllocator( + "HostDynamicPool", host_size_limited_alloc, 0, page_size); + auto thread_safe_host_dynamic_pool = + rm.makeAllocator( + "ThreadSafeHostDynamicPool", host_dynamic_pool); + + auto host_env = std::unique_ptr( + new hostEnv(world, thread_safe_host_dynamic_pool)); + instance_accessor() = std::move(host_env); + } + } + + World& world() const { return *world_; } + + umpire::Allocator& host_allocator() { return host_allocator_; } + + protected: + hostEnv(World& world, umpire::Allocator host_alloc) + : world_(&world), host_allocator_(host_alloc) {} + + private: + // the world used to initialize this + World* world_; + + /// allocates from a thread-safe, dynamic, size-limited host memory pool + umpire::Allocator host_allocator_; + + inline static std::unique_ptr& instance_accessor() { + static std::unique_ptr instance_{nullptr}; + return instance_; + } +}; + +} // namespace TiledArray + +#endif // TILEDARRAY_HOST_ENV_H__INCLUDED diff --git a/src/TiledArray/tensor/tensor.h b/src/TiledArray/tensor/tensor.h index afe32a038f..bdc37af2cf 100644 --- a/src/TiledArray/tensor/tensor.h +++ b/src/TiledArray/tensor/tensor.h @@ -20,6 +20,8 @@ #ifndef TILEDARRAY_TENSOR_TENSOR_H__INCLUDED #define TILEDARRAY_TENSOR_TENSOR_H__INCLUDED +#include "TiledArray/host/allocator.h" + #include "TiledArray/math/blas.h" #include "TiledArray/math/gemm_helper.h" #include "TiledArray/tensor/complex.h" @@ -28,6 +30,7 @@ #include "TiledArray/tile_interface/permute.h" #include "TiledArray/tile_interface/trace.h" #include "TiledArray/util/logger.h" + namespace TiledArray { // Forward declare Tensor for type traits @@ -627,12 +630,12 @@ class Tensor { /// \tparam Archive The output archive type /// \param[out] ar The output archive template >::type* = nullptr> + typename std::enable_if< + madness::is_output_archive_v>::type* = nullptr> void serialize(Archive& ar) { if (pimpl_) { const std::uint64_t volume = pimpl_->range_.volume(); - ar & volume; + ar& volume; ar& madness::archive::wrap(pimpl_->data_, volume); ar & pimpl_->range_; } else { @@ -646,8 +649,8 @@ class Tensor { /// \tparam Archive The input archive type /// \param[out] ar The input archive template >::type* = nullptr> + typename std::enable_if< + madness::is_input_archive_v>::type* = nullptr> void serialize(Archive& ar) { std::uint64_t n = 0; ar& n; @@ -1872,7 +1875,7 @@ class Tensor { } } } -#else // TA_ENABLE_TILE_OPS_LOGGING +#else // TA_ENABLE_TILE_OPS_LOGGING math::blas::gemm(gemm_helper.left_op(), gemm_helper.right_op(), m, n, k, factor, left.data(), lda, right.data(), ldb, numeric_type(1), pimpl_->data_, n); From 7649a8dfc3df7a10d59c0e53c2bd8904d602d0e0 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Thu, 29 Jul 2021 07:45:59 -0400 Subject: [PATCH 02/12] bump Umpire to 5.0.1 --- INSTALL.md | 2 +- cmake/modules/FetchBLT.cmake | 2 +- external/umpire.cmake | 2 +- external/versions.cmake | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/INSTALL.md b/INSTALL.md index f2dc253661..687b16fb20 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -65,7 +65,7 @@ Compiling BTAS requires the following prerequisites: Optional prerequisites: - [CUDA compiler and runtime](https://developer.nvidia.com/cuda-zone) -- for execution on CUDA-enabled accelerators. CUDA 11 or later is required. Support for CUDA also requires the following additional prerequisites, both of which will be built and installed automatically if missing: - [cuTT](github.com/ValeevGroup/cutt) -- CUDA transpose library; note that our fork of the [original cuTT repo](github.com/ap-hynninen/cutt) is required to provide thread-safety (tag 0e8685bf82910bc7435835f846e88f1b39f47f09). - - [Umpire](github.com/LLNL/Umpire) -- portable memory manager for heterogeneous platforms (tag f04abd1dd038c84262915a493d8f78576bb80fd0). + - [Umpire](github.com/LLNL/Umpire) -- portable memory manager for heterogeneous platforms (tag 5201a47a35e3844160dcbecd0916f8c96aa7dd07). - [Doxygen](http://www.doxygen.nl/) -- for building documentation (version 1.8.12 or later). - [ScaLAPACK](http://www.netlib.org/scalapack/) -- a distributed-memory linear algebra package. If detected, the following C++ components will also be sought and downloaded, if missing: - [blacspp](https://github.com/wavefunction91/blacspp.git) -- a modern C++ (C++17) wrapper for BLACS (tag 88076f1706be083ead882f6ce0bfc6884a72fc03) diff --git a/cmake/modules/FetchBLT.cmake b/cmake/modules/FetchBLT.cmake index 706c34c327..3ceaa52190 100644 --- a/cmake/modules/FetchBLT.cmake +++ b/cmake/modules/FetchBLT.cmake @@ -1,6 +1,6 @@ FetchContent_Declare( BLT - GIT_REPOSITORY https://github.com/evaleev/blt.git + GIT_REPOSITORY https://github.com/LLNL/blt.git GIT_TAG origin/develop ) FetchContent_MakeAvailable(BLT) diff --git a/external/umpire.cmake b/external/umpire.cmake index 271d374f6a..5505bf5781 100644 --- a/external/umpire.cmake +++ b/external/umpire.cmake @@ -158,7 +158,7 @@ set_target_properties( TiledArray_UMPIRE PROPERTIES INTERFACE_INCLUDE_DIRECTORIES - "$;$;$" + "$;$;$;$" INTERFACE_LINK_LIBRARIES "$;$" ) diff --git a/external/versions.cmake b/external/versions.cmake index bd82b1adf5..ecdf7f2dcd 100644 --- a/external/versions.cmake +++ b/external/versions.cmake @@ -30,7 +30,7 @@ set(TA_TRACKED_BTAS_PREVIOUS_TAG f4c965b778ff7df74f276f7fcf6224ac419e8ee3) set(TA_TRACKED_CUTT_TAG 0e8685bf82910bc7435835f846e88f1b39f47f09) set(TA_TRACKED_CUTT_PREVIOUS_TAG 592198b93c93b7ca79e7900b9a9f2e79f9dafec3) -set(TA_TRACKED_UMPIRE_TAG f04abd1dd038c84262915a493d8f78576bb80fd0) +set(TA_TRACKED_UMPIRE_TAG 5201a47a35e3844160dcbecd0916f8c96aa7dd07) set(TA_TRACKED_UMPIRE_PREVIOUS_TAG f04abd1dd038c84262915a493d8f78576bb80fd0) #set(TA_TRACKED_BLACSPP_TAG 20cfd414c5b719be1c958f4a2d57abef06df83b6 ) From 81a6f74dbc90b26fd830bbf94cd0294ce1e935dc Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Thu, 29 Jul 2021 07:46:33 -0400 Subject: [PATCH 03/12] Umpire: DynamicPool -> QuickPool --- src/TiledArray/external/cuda.h | 4 ++-- src/TiledArray/host/env.h | 9 +++------ 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/src/TiledArray/external/cuda.h b/src/TiledArray/external/cuda.h index 2a7d325023..78a210f784 100644 --- a/src/TiledArray/external/cuda.h +++ b/src/TiledArray/external/cuda.h @@ -252,7 +252,7 @@ class cudaEnv { // subsequent allocs will use 1/10 of the total device memory auto alloc_grain = mem_total_free.second / 10; auto um_dynamic_pool = - rm.makeAllocator( + rm.makeAllocator( "UMDynamicPool", rm.getAllocator("UM"), mem_total_free.second, alloc_grain); auto thread_safe_um_dynamic_pool = @@ -265,7 +265,7 @@ class cudaEnv { "size_limited_alloc", rm.getAllocator("DEVICE"), mem_total_free.first); auto dev_dynamic_pool = - rm.makeAllocator( + rm.makeAllocator( "CUDADynamicPool", dev_size_limited_alloc, 0, alloc_grain); auto thread_safe_dev_dynamic_pool = rm.makeAllocator( diff --git a/src/TiledArray/host/env.h b/src/TiledArray/host/env.h index 39d60a8931..469a4aa1b4 100644 --- a/src/TiledArray/host/env.h +++ b/src/TiledArray/host/env.h @@ -28,7 +28,7 @@ // for memory management #include -#include +#include #include #include @@ -78,7 +78,7 @@ class hostEnv { /// initialize the instance using explicit params static void initialize(World& world, const std::uint64_t max_memory_size = (1ul << 40), - const std::uint64_t page_size = (1ul << 30)) { + const std::uint64_t page_size = (1ul << 22)) { // initialize only when not initialized if (instance_accessor() == nullptr) { // uncomment to debug umpire ops @@ -97,15 +97,12 @@ class hostEnv { constexpr auto introspect = false; #endif - // start with empty memory, increase each by 1 GB - auto alloc_grain = 1ul << 30; - // allocate zero memory for device pool, same grain for subsequent allocs auto host_size_limited_alloc = rm.makeAllocator( "size_limited_alloc", rm.getAllocator("HOST"), max_memory_size); auto host_dynamic_pool = - rm.makeAllocator( + rm.makeAllocator( "HostDynamicPool", host_size_limited_alloc, 0, page_size); auto thread_safe_host_dynamic_pool = rm.makeAllocator( From 671f2096b09e0bd2b7d9cbaf8b5ccb81104e1055 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Thu, 29 Jul 2021 11:56:54 -0400 Subject: [PATCH 04/12] implemented simple profiling for TA::Tensor memory allocations --- CMakeLists.txt | 4 +++ INSTALL.md | 1 + external/versions.cmake | 4 +-- src/TiledArray/config.h.in | 3 ++ src/TiledArray/tensor/tensor.h | 66 ++++++++++++++++++++++++++++++---- 5 files changed, 69 insertions(+), 9 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 45e8265589..76d4881732 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -132,6 +132,10 @@ if((ENABLE_GPERFTOOLS OR ENABLE_TCMALLOC_MINIMAL) AND CMAKE_SYSTEM_NAME MATCHES set(ENABLE_LIBUNWIND ON) add_feature_info(Libunwind ENABLE_LIBUNWIND "Libunwind provides stack unwinding") endif() + +option(TA_TENSOR_MEM_PROFILE "Turn on instrumented profiling of TA::Tensor memory use" OFF) +add_feature_info(TENSOR_MEM_PROFILE TA_TENSOR_MEM_PROFILE "instrumented profiling of TA::Tensor memory use") + option(TA_BUILD_UNITTEST "Causes building TiledArray unit tests" ON) option(TA_EXPERT "TiledArray Expert mode: disables automatically downloading or building dependencies" OFF) diff --git a/INSTALL.md b/INSTALL.md index 687b16fb20..b0c7e217e6 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -405,6 +405,7 @@ support may be added. * `TA_ENABLE_RANGEV3` -- Set to `ON` to find or fetch the Range-V3 library and enable additional tests of TA components with constructs anticipated to be supported in the future. [Default=OFF]. * `TA_SIGNED_1INDEX_TYPE` -- Set to `OFF` to use unsigned 1-index coordinate type (default for TiledArray 1.0.0-alpha.2 and older). The default is `ON`, which enables the use of negative indices in coordinates. * `TA_MAX_SOO_RANK_METADATA` -- Specifies the maximum rank for which to use Small Object Optimization (hence, avoid the use of the heap) for metadata. The default is `8`. +* `TA_TENSOR_MEM_PROFILE` -- Set to `ON` to profile memory allocations in TA::Tensor. # Build TiledArray diff --git a/external/versions.cmake b/external/versions.cmake index ecdf7f2dcd..a36edf7803 100644 --- a/external/versions.cmake +++ b/external/versions.cmake @@ -19,8 +19,8 @@ set(TA_INSTALL_EIGEN_PREVIOUS_VERSION 3.3.7) set(TA_INSTALL_EIGEN_URL_HASH b9e98a200d2455f06db9c661c5610496) set(TA_INSTALL_EIGEN_PREVIOUS_URL_HASH b9e98a200d2455f06db9c661c5610496) -set(TA_TRACKED_MADNESS_TAG 6694b3adc9204dc86aba9911444aa6737171c9e3) -set(TA_TRACKED_MADNESS_PREVIOUS_TAG f47f962213be0b1e149f5b56826992f27278128e) +set(TA_TRACKED_MADNESS_TAG 69f4bb3bb5e61d0312bb1a187e38f9985d0be5cc) +set(TA_TRACKED_MADNESS_PREVIOUS_TAG 6694b3adc9204dc86aba9911444aa6737171c9e3) set(TA_TRACKED_MADNESS_VERSION 0.10.1) set(TA_TRACKED_MADNESS_PREVIOUS_VERSION 0.10.1) diff --git a/src/TiledArray/config.h.in b/src/TiledArray/config.h.in index f038919ea2..86fef14166 100644 --- a/src/TiledArray/config.h.in +++ b/src/TiledArray/config.h.in @@ -78,6 +78,9 @@ #cmakedefine TILEDARRAY_HAS_CUDA @TILEDARRAY_HAS_CUDA@ #cmakedefine TILEDARRAY_CHECK_CUDA_ERROR @TILEDARRAY_CHECK_CUDA_ERROR@ +/* Is TA::Tensor memory profiling enabled? */ +#cmakedefine TA_TENSOR_MEM_PROFILE 1 + /* Use preprocessor to check if BTAS is available */ #ifndef TILEDARRAY_HAS_BTAS #ifdef __has_include diff --git a/src/TiledArray/tensor/tensor.h b/src/TiledArray/tensor/tensor.h index bdc37af2cf..6846ebb5bc 100644 --- a/src/TiledArray/tensor/tensor.h +++ b/src/TiledArray/tensor/tensor.h @@ -33,12 +33,15 @@ namespace TiledArray { -// Forward declare Tensor for type traits -template -class Tensor; - namespace detail { +#ifdef TA_TENSOR_MEM_PROFILE +inline static std::mutex + ta_tensor_mem_profile_mtx; // protects the following statics +inline static std::uint64_t nbytes_allocated = 0; +inline static std::uint64_t max_nbytes_allocated = 0; +#endif // TA_TENSOR_MEM_PROFILE + /// Signals that we can take the trace of a Tensor (for numeric \c T) template struct TraceIsDefined, enable_if_numeric_t> : std::true_type {}; @@ -93,6 +96,46 @@ class Tensor { /// This tensor is used as an evaluated intermediate for other tensors. class Impl : public allocator_type { +#ifdef TA_TENSOR_MEM_PROFILE + enum class MemOp{Alloc, Dealloc}; + void alloc_record(std::uint64_t n, MemOp action) { + const double to_MiB = + 1 / (1024.0 * 1024.0); /* Convert from bytes to MiB */ + const auto nbytes = n * sizeof(value_type); + { + std::scoped_lock lock(detail::ta_tensor_mem_profile_mtx); + if (action == MemOp::Alloc) { + detail::nbytes_allocated += nbytes; + detail::max_nbytes_allocated = + std::max(detail::nbytes_allocated, detail::max_nbytes_allocated); + } else + detail::nbytes_allocated -= nbytes; + } + char buf[1024]; + auto value_type_str = []() { + if constexpr (std::is_same_v) + return "double"; + else if constexpr (std::is_same_v) + return "float"; + else if constexpr (std::is_same_v>) + return "zdouble"; + else if constexpr (std::is_same_v>) + return "zfloat"; + else + return ""; + }; + std::snprintf( + buf, 1023, + "TA::Tensor<%s>: %sallocated %lf MiB [wm = %lf MiB hwm = %lf MiB]\n", + value_type_str(), (action == MemOp::Dealloc ? "de" : " "), + nbytes * to_MiB, detail::nbytes_allocated * to_MiB, + detail::max_nbytes_allocated * to_MiB); + auto& os = madness::print_meminfo_ostream(); + os << buf; + os.flush(); + } +#endif + public: /// Default constructor @@ -104,21 +147,30 @@ class Tensor { /// \param range The N-dimensional range for this tensor explicit Impl(const range_type& range) : allocator_type(), range_(range), data_(NULL) { - data_ = allocator_type::allocate(range.volume()); + data_ = allocator_type::allocate(range_.volume()); +#ifdef TA_TENSOR_MEM_PROFILE + alloc_record(range_.volume(), MemOp::Alloc); +#endif } /// Construct with rvalue range /// \param range The N-dimensional range for this tensor explicit Impl(range_type&& range) - : allocator_type(), range_(range), data_(NULL) { - data_ = allocator_type::allocate(range.volume()); + : allocator_type(), range_(std::move(range)), data_(NULL) { + data_ = allocator_type::allocate(range_.volume()); +#ifdef TA_TENSOR_MEM_PROFILE + alloc_record(range_.volume(), MemOp::Alloc); +#endif } ~Impl() { math::destroy_vector(range_.volume(), data_); allocator_type::deallocate(data_, range_.volume()); data_ = NULL; +#ifdef TA_TENSOR_MEM_PROFILE + alloc_record(range_.volume(), MemOp::Dealloc); +#endif } range_type range_; ///< Tensor size info From 1bec6a0306573ac69d2e1a856dbaa5843620328a Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Fri, 30 Jul 2021 08:46:17 -0400 Subject: [PATCH 05/12] revert to Eigen allocator, host allocator is slower than just using tcmalloc --- src/TiledArray/fwd.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/TiledArray/fwd.h b/src/TiledArray/fwd.h index 20245d9d1f..6f2cd33701 100644 --- a/src/TiledArray/fwd.h +++ b/src/TiledArray/fwd.h @@ -25,14 +25,16 @@ #include #include -// #include // fwddecl for std::allocator +// uncomment to import fwddecl for std::allocator +// #include -namespace Eigen { // fwd define Eigen's aligned allocator for - // TiledArray::Tensor +// fwddecl Eigen::aligned_allocator +namespace Eigen { template class aligned_allocator; } // namespace Eigen +// fwddecl host_allocator namespace TiledArray { template class host_allocator_impl; @@ -62,9 +64,8 @@ class DensePolicy; class SparsePolicy; // TiledArray Tensors -template /* or Eigen::aligned_allocator or std::allocator */> +// can also use host_allocator and std::allocator for A +template > class Tensor; typedef Tensor TensorD; From c91864e75f46dad068f1ea9cc6993cb2d9dc55d7 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Fri, 30 Jul 2021 13:32:10 -0400 Subject: [PATCH 06/12] bump MAD tag for fixup --- INSTALL.md | 2 +- external/versions.cmake | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/INSTALL.md b/INSTALL.md index b0c7e217e6..f532e84d9b 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -42,7 +42,7 @@ Both methods are supported. However, for most users we _strongly_ recommend to b - Boost.Range: header-only, *only used for unit testing* - [BTAS](https://github.com/ValeevGroup/BTAS), tag d7794799e4510cf66844081dd8f1f5b648112d33 . If usable BTAS installation is not found, TiledArray will download and compile BTAS from source. *This is the recommended way to compile BTAS for all users*. -- [MADNESS](https://github.com/m-a-d-n-e-s-s/madness), tag 7ce06234c23aa8e0ab3d9e9b87eff9cd85390d80 . +- [MADNESS](https://github.com/m-a-d-n-e-s-s/madness), tag 12bd24c6cb984a639be863fc0e1364226713f7ff . Only the MADworld runtime and BLAS/LAPACK C API component of MADNESS is used by TiledArray. If usable MADNESS installation is not found, TiledArray will download and compile MADNESS from source. *This is the recommended way to compile MADNESS for all users*. diff --git a/external/versions.cmake b/external/versions.cmake index a36edf7803..f4451bd0c4 100644 --- a/external/versions.cmake +++ b/external/versions.cmake @@ -19,7 +19,7 @@ set(TA_INSTALL_EIGEN_PREVIOUS_VERSION 3.3.7) set(TA_INSTALL_EIGEN_URL_HASH b9e98a200d2455f06db9c661c5610496) set(TA_INSTALL_EIGEN_PREVIOUS_URL_HASH b9e98a200d2455f06db9c661c5610496) -set(TA_TRACKED_MADNESS_TAG 69f4bb3bb5e61d0312bb1a187e38f9985d0be5cc) +set(TA_TRACKED_MADNESS_TAG 12bd24c6cb984a639be863fc0e1364226713f7ff) set(TA_TRACKED_MADNESS_PREVIOUS_TAG 6694b3adc9204dc86aba9911444aa6737171c9e3) set(TA_TRACKED_MADNESS_VERSION 0.10.1) set(TA_TRACKED_MADNESS_PREVIOUS_VERSION 0.10.1) From 9f70ce85febd3c6b7cf16e1ce87be24205e67291 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Tue, 3 Aug 2021 08:56:18 -0400 Subject: [PATCH 07/12] dox++ --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 76d4881732..7ec8df6133 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -268,7 +268,7 @@ check_type_size("long double" TILEDARRAY_HAS_LONG_DOUBLE LANGUAGE CXX) check_type_size("long long" TILEDARRAY_HAS_LONG_LONG LANGUAGE CXX) # TA_ASSERT -set (TA_ASSERT_POLICY TA_ASSERT_THROW CACHE STRING "") +set (TA_ASSERT_POLICY TA_ASSERT_THROW CACHE STRING "Controls the behavior of TA_ASSERT; TA_ASSERT_THROW causes TA_ASSERT to throw, TA_ASSERT_ABORT causes TA_ASSERT to abort, TA_ASSERT_IGNORE makes TA_ASSERT a no-op") set_property( CACHE TA_ASSERT_POLICY PROPERTY STRINGS TA_ASSERT_THROW TA_ASSERT_ABORT TA_ASSERT_IGNORE) From 679b822bd451f50ae8c23984fe57e83dd1fc6c19 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Tue, 3 Aug 2021 08:58:17 -0400 Subject: [PATCH 08/12] when using MPI must abort via MPI_Abort --- src/TiledArray/tiledarray.cpp | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/TiledArray/tiledarray.cpp b/src/TiledArray/tiledarray.cpp index aa56ea2fec..29b60a61d6 100644 --- a/src/TiledArray/tiledarray.cpp +++ b/src/TiledArray/tiledarray.cpp @@ -2,6 +2,8 @@ #include #include +#include + #ifdef TILEDARRAY_HAS_CUDA #include #include @@ -78,9 +80,8 @@ TiledArray::World& TiledArray::initialize(int& argc, char**& argv, if (!initialized()) { if (!madness::initialized()) { initialized_madworld_accessor() = true; - } - else { // if MADWorld initialized, we must assume that comm is its default - // World. + } else { // if MADWorld initialized, we must assume that comm is its + // default World. if (madness::World::is_default(comm)) throw Exception( "MADWorld initialized before TiledArray::initialize(argc, argv, " @@ -120,16 +121,13 @@ void TiledArray::finalize() { finalized_accessor() = true; } -void TiledArray::ta_abort() { - std::abort(); -} +void TiledArray::ta_abort() { SafeMPI::COMM_WORLD.Abort(); } -void TiledArray::ta_abort(const std::string &m) { +void TiledArray::ta_abort(const std::string& m) { std::cerr << m << std::endl; ta_abort(); } - void TiledArray::taskq_wait_busy() { madness::threadpool_wait_policy(madness::WaitPolicy::Busy); } From e41052f4fbc250ab3887366090bb2f6a4b881c3d Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Wed, 4 Aug 2021 09:21:34 -0400 Subject: [PATCH 09/12] revert Umpire to use the pinned version of BLT instead of evaleev/BLT reverts 42e9345a719f863f99a70e968a0b210215759005 --- cmake/modules/FetchBLT.cmake | 9 --------- external/umpire.cmake | 4 ---- 2 files changed, 13 deletions(-) delete mode 100644 cmake/modules/FetchBLT.cmake diff --git a/cmake/modules/FetchBLT.cmake b/cmake/modules/FetchBLT.cmake deleted file mode 100644 index 3ceaa52190..0000000000 --- a/cmake/modules/FetchBLT.cmake +++ /dev/null @@ -1,9 +0,0 @@ -FetchContent_Declare( - BLT - GIT_REPOSITORY https://github.com/LLNL/blt.git - GIT_TAG origin/develop -) -FetchContent_MakeAvailable(BLT) -FetchContent_GetProperties(BLT - SOURCE_DIR BLT_SOURCE_DIR -) diff --git a/external/umpire.cmake b/external/umpire.cmake index 5505bf5781..65e71f5317 100644 --- a/external/umpire.cmake +++ b/external/umpire.cmake @@ -42,9 +42,6 @@ else() message("** Will clone Umpire from ${UMPIRE_URL}") - ## use patched BLT - include(FetchBLT) - if (TA_ASSERT_POLICY EQUAL TA_ASSERT_IGNORE) set(enable_umpire_asserts OFF) else() @@ -71,7 +68,6 @@ else() -DCMAKE_CXX_STANDARD=${CMAKE_CXX_STANDARD} -DCMAKE_CXX_EXTENSIONS=${CMAKE_CXX_EXTENSIONS} -DCMAKE_AR=${CMAKE_AR} - -DBLT_SOURCE_DIR=${BLT_SOURCE_DIR} -DBLT_CXX_STD=c++${CMAKE_CXX_STANDARD} -DENABLE_BENCHMARKS=OFF -DENABLE_OPENMP=OFF From 7c6e35403a3b3b7070f8982ec4ac3b03af35ee33 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Wed, 4 Aug 2021 09:22:56 -0400 Subject: [PATCH 10/12] pass CMAKE_CUDA_ARCHITECTURES to cuTT and Umpire, if given --- external/cutt.cmake | 3 +++ external/umpire.cmake | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/external/cutt.cmake b/external/cutt.cmake index 7dc0d6e83d..e0fbf00c5d 100644 --- a/external/cutt.cmake +++ b/external/cutt.cmake @@ -72,6 +72,9 @@ else() -DCMAKE_CUDA_HOST_COMPILER=${CMAKE_CUDA_HOST_COMPILER} -DCUDA_TOOLKIT_ROOT_DIR=${CUDAToolkit_ROOT} ) + if (DEFINED CMAKE_CUDA_ARCHITECTURES) + list(APPEND CUTT_CMAKE_ARGS -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}) + endif(DEFINED CMAKE_CUDA_ARCHITECTURES) if (CMAKE_TOOLCHAIN_FILE) set(CUTT_CMAKE_ARGS "${CUTT_CMAKE_ARGS}" "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}") diff --git a/external/umpire.cmake b/external/umpire.cmake index 65e71f5317..964f9f65b7 100644 --- a/external/umpire.cmake +++ b/external/umpire.cmake @@ -85,6 +85,9 @@ else() -DCMAKE_CUDA_HOST_COMPILER=${CMAKE_CUDA_HOST_COMPILER} -DCUDA_TOOLKIT_ROOT_DIR=${CUDAToolkit_ROOT} ) + if (DEFINED CMAKE_CUDA_ARCHITECTURES) + list(APPEND UMPIRE_CMAKE_ARGS -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}) + endif(DEFINED CMAKE_CUDA_ARCHITECTURES) endif(ENABLE_CUDA) if (CMAKE_TOOLCHAIN_FILE) set(UMPIRE_CMAKE_ARGS "${UMPIRE_CMAKE_ARGS}" @@ -110,7 +113,6 @@ else() DOWNLOAD_DIR ${EXTERNAL_SOURCE_DIR} GIT_REPOSITORY ${UMPIRE_URL} GIT_TAG ${UMPIRE_TAG} - GIT_SUBMODULES "" # N.B. do not initialize modules! #--Configure step------------- SOURCE_DIR ${EXTERNAL_SOURCE_DIR} LIST_SEPARATOR :: From ea65d833964609ff8970bfd8d9bc651241ac231f Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Wed, 4 Aug 2021 09:23:27 -0400 Subject: [PATCH 11/12] compilation fixups --- src/TiledArray/cuda/um_allocator.h | 12 ++++++++---- src/TiledArray/host/env.h | 10 ---------- 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/src/TiledArray/cuda/um_allocator.h b/src/TiledArray/cuda/um_allocator.h index 3bb40126a4..94dfb0de82 100644 --- a/src/TiledArray/cuda/um_allocator.h +++ b/src/TiledArray/cuda/um_allocator.h @@ -39,17 +39,21 @@ namespace TiledArray { /// CUDA UM allocator, based on boilerplate by Howard Hinnant /// (https://howardhinnant.github.io/allocator_boilerplate.html) template -class cuda_um_allocator_impl : public umpire_allocator { +class cuda_um_allocator_impl : public umpire_allocator_impl { public: - using base_type = umpire_allocator; - using base_type; + using base_type = umpire_allocator_impl; + using typename base_type::const_pointer; + using typename base_type::const_reference; + using typename base_type::pointer; + using typename base_type::reference; + using typename base_type::value_type; cuda_um_allocator_impl() noexcept : base_type(&cudaEnv::instance()->um_dynamic_pool()) {} template cuda_um_allocator_impl(const cuda_um_allocator_impl& rhs) noexcept - : base_type(static_cast&>(rhs)) {} + : base_type(static_cast&>(rhs)) {} template friend bool operator==(const cuda_um_allocator_impl& lhs, diff --git a/src/TiledArray/host/env.h b/src/TiledArray/host/env.h index 469a4aa1b4..2ae0bf6930 100644 --- a/src/TiledArray/host/env.h +++ b/src/TiledArray/host/env.h @@ -41,16 +41,6 @@ namespace TiledArray { -namespace detail { - -inline std::pair mpi_local_rank_size(World& world) { - auto host_comm = - world.mpi.comm().Split_type(SafeMPI::Intracomm::SHARED_SPLIT_TYPE, 0); - return std::make_pair(host_comm.Get_rank(), host_comm.Get_size()); -} - -} // namespace detail - /** * hostEnv set up global environment * From 51061e906868bdc059c5cf79317f99d7f40efff8 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Wed, 4 Aug 2021 09:35:34 -0400 Subject: [PATCH 12/12] deps built via ExternalProject_add use default dir structure of FetchContent (CMAKE_BINARY_DIR/_deps, etc.) --- external/cutt.cmake | 10 +++++----- external/eigen.cmake | 6 ++---- external/madness.cmake | 16 ++++++++-------- external/umpire.cmake | 10 +++++----- 4 files changed, 20 insertions(+), 22 deletions(-) diff --git a/external/cutt.cmake b/external/cutt.cmake index e0fbf00c5d..f945b54b15 100644 --- a/external/cutt.cmake +++ b/external/cutt.cmake @@ -23,10 +23,10 @@ else() enable_language(C) # set source and build path for cuTT in the TiledArray project - set(EXTERNAL_SOURCE_DIR ${PROJECT_BINARY_DIR}/external/source/cutt) + set(EXTERNAL_SOURCE_DIR ${CMAKE_BINARY_DIR}/_deps/cutt-src) # cutt only supports in source build - set(EXTERNAL_BUILD_DIR ${PROJECT_BINARY_DIR}/external/build/cutt) - set(EXTERNAL_INSTALL_DIR ${CMAKE_INSTALL_PREFIX}/external/cutt) + set(EXTERNAL_BUILD_DIR ${CMAKE_BINARY_DIR}/_deps/cutt-build) + set(EXTERNAL_INSTALL_DIR ${CMAKE_INSTALL_PREFIX}) if (NOT CUTT_URL) set(CUTT_URL https://github.com/ValeevGroup/cutt.git) @@ -92,8 +92,8 @@ else() ExternalProject_Add(cutt PREFIX ${CMAKE_INSTALL_PREFIX} - STAMP_DIR ${PROJECT_BINARY_DIR}/external/cutt-stamp - TMP_DIR ${PROJECT_BINARY_DIR}/external/tmp + STAMP_DIR ${CMAKE_BINARY_DIR}/_deps/cutt-ep-artifacts + TMP_DIR ${CMAKE_BINARY_DIR}/_deps/cutt-ep-artifacts # needed in case CMAKE_INSTALL_PREFIX is not writable #--Download step-------------- DOWNLOAD_DIR ${EXTERNAL_SOURCE_DIR} GIT_REPOSITORY ${CUTT_URL} diff --git a/external/eigen.cmake b/external/eigen.cmake index 1b034177b7..361a8fe8ae 100644 --- a/external/eigen.cmake +++ b/external/eigen.cmake @@ -93,15 +93,13 @@ else() include(ExternalProject) # Set source and build path for Eigen3 in the TiledArray Project - set(EXTERNAL_SOURCE_DIR ${PROJECT_BINARY_DIR}/external/source/eigen) - set(EXTERNAL_BUILD_DIR ${PROJECT_BINARY_DIR}/external/build/eigen) + set(EXTERNAL_SOURCE_DIR ${CMAKE_BINARY_DIR}/_deps/eigen-src) + set(EXTERNAL_BUILD_DIR ${CMAKE_BINARY_DIR}/_deps/eigen-build) message("** Will build Eigen from ${EIGEN3_URL}") ExternalProject_Add(eigen3 PREFIX ${CMAKE_INSTALL_PREFIX} - STAMP_DIR ${EXTERNAL_BUILD_DIR}/stamp - TMP_DIR ${EXTERNAL_BUILD_DIR}/tmp #--Download step-------------- DOWNLOAD_DIR ${EXTERNAL_SOURCE_DIR} URL ${EIGEN3_URL} diff --git a/external/madness.cmake b/external/madness.cmake index 0c9f2fa482..9ab766f7e6 100644 --- a/external/madness.cmake +++ b/external/madness.cmake @@ -129,9 +129,9 @@ else() # Create a cache entry for MADNESS build variables. # Note: This will not overwrite user specified values. - set(MADNESS_SOURCE_DIR "${PROJECT_BINARY_DIR}/external/madness-src" CACHE PATH + set(MADNESS_SOURCE_DIR "${CMAKE_BINARY_DIR}/_deps/madness-src" CACHE PATH "Path to the MADNESS source directory") - set(MADNESS_BINARY_DIR "${PROJECT_BINARY_DIR}/external/madness-build" CACHE PATH + set(MADNESS_BINARY_DIR "${CMAKE_BINARY_DIR}/_deps/madness-build" CACHE PATH "Path to the MADNESS build directory") set(MADNESS_URL "https://github.com/m-a-d-n-e-s-s/madness.git" CACHE STRING "Path to the MADNESS repository") @@ -152,16 +152,16 @@ else() # If the MADNESS source directory is the default location and does not exist, # MADNESS will be downloaded from git. message(STATUS "Checking MADNESS source directory: ${MADNESS_SOURCE_DIR}") - if("${MADNESS_SOURCE_DIR}" STREQUAL "${PROJECT_BINARY_DIR}/external/madness-src") + if("${MADNESS_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}/_deps/madness-src") - # Create the external source directory - if(NOT EXISTS ${PROJECT_BINARY_DIR}/external) + # Create the source directory + if(NOT EXISTS ${CMAKE_BINARY_DIR}/_deps) set(error_code 1) execute_process( - COMMAND "${CMAKE_COMMAND}" -E make_directory "${PROJECT_BINARY_DIR}/external" + COMMAND "${CMAKE_COMMAND}" -E make_directory "${CMAKE_BINARY_DIR}/_deps" RESULT_VARIABLE error_code) if(error_code) - message(FATAL_ERROR "Failed to create directory \"${PROJECT_BINARY_DIR}/external\"") + message(FATAL_ERROR "Failed to create directory \"${CMAKE_BINARY_DIR}/_deps\"") endif() endif() @@ -173,7 +173,7 @@ else() while(error_code AND number_of_tries LESS 3) execute_process( COMMAND ${GIT_EXECUTABLE} clone ${MADNESS_URL} madness-src - WORKING_DIRECTORY ${PROJECT_BINARY_DIR}/external + WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/_deps RESULT_VARIABLE error_code) math(EXPR number_of_tries "${number_of_tries} + 1") endwhile() diff --git a/external/umpire.cmake b/external/umpire.cmake index 964f9f65b7..d90100cf4d 100644 --- a/external/umpire.cmake +++ b/external/umpire.cmake @@ -29,9 +29,9 @@ else() enable_language(C) # set source and build path for Umpire in the TiledArray project - set(EXTERNAL_SOURCE_DIR ${PROJECT_BINARY_DIR}/external/source/Umpire) - set(EXTERNAL_BUILD_DIR ${PROJECT_BINARY_DIR}/external/build/Umpire) - set(EXTERNAL_INSTALL_DIR ${CMAKE_INSTALL_PREFIX}/external/Umpire) + set(EXTERNAL_SOURCE_DIR ${CMAKE_BINARY_DIR}/_deps/umpire-src) + set(EXTERNAL_BUILD_DIR ${CMAKE_BINARY_DIR}/_deps/umpire-build) + set(EXTERNAL_INSTALL_DIR ${CMAKE_INSTALL_PREFIX}) if (NOT UMPIRE_URL) set(UMPIRE_URL https://github.com/LLNL/Umpire.git) @@ -107,8 +107,8 @@ else() ExternalProject_Add(Umpire PREFIX ${CMAKE_INSTALL_PREFIX} - STAMP_DIR ${PROJECT_BINARY_DIR}/external/Umpire-stamp - TMP_DIR ${PROJECT_BINARY_DIR}/external/tmp + STAMP_DIR ${CMAKE_BINARY_DIR}/_deps/umpire-ep-artifacts + TMP_DIR ${CMAKE_BINARY_DIR}/_deps/umpire-ep-artifacts # needed in case CMAKE_INSTALL_PREFIX is not writable #--Download step-------------- DOWNLOAD_DIR ${EXTERNAL_SOURCE_DIR} GIT_REPOSITORY ${UMPIRE_URL}