Skip to content

Commit f71b415

Browse files
committed
Add benchmark files from #162.
1 parent 767e9db commit f71b415

File tree

8 files changed

+564
-2
lines changed

8 files changed

+564
-2
lines changed

CMakeLists.txt

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,9 @@ if(CMAKE_COMPILER_IS_GNUCXX)
5454
endif(CMAKE_CXX11_ABI)
5555
endif(CMAKE_COMPILER_IS_GNUCXX)
5656

57-
option(BUILD_TESTS "Configure CMake to build tests"
58-
ON)
57+
option(BUILD_TESTS "Configure CMake to build tests" ON)
58+
option(BUILD_BENCHMARKS "Configure CMake to build (google) benchmarks" OFF)
59+
5960

6061
###################################################################################################
6162
# - cnmem ---------------------------------------------------------------------------------
@@ -89,6 +90,24 @@ if(BUILD_TESTS)
8990
endif(GTEST_FOUND)
9091
endif(BUILD_TESTS)
9192

93+
###################################################################################################
94+
# - add google benchmark --------------------------------------------------------------------------
95+
96+
if(BUILD_BENCHMARKS)
97+
98+
include(ConfigureGoogleBenchmark)
99+
100+
if(GBENCH_FOUND)
101+
message(STATUS "Google C++ Benchmarking Framework (Google Benchmark) found in ${GBENCH_ROOT}")
102+
include_directories(${GBENCH_INCLUDE_DIR})
103+
add_subdirectory(${CMAKE_SOURCE_DIR}/benchmarks)
104+
else()
105+
message(AUTHOR_WARNING "Google C++ Benchmarking Framework (Google Benchmark) not found: automated tests are disabled.")
106+
endif(GBENCH_FOUND)
107+
108+
endif(BUILD_BENCHMARKS)
109+
110+
92111
###################################################################################################
93112
# - include paths ---------------------------------------------------------------------------------
94113

benchmarks/CMakeLists.txt

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
cmake_minimum_required(VERSION 3.12 FATAL_ERROR)
2+
3+
project(RMM_BENCHS LANGUAGES C CXX CUDA)
4+
5+
###################################################################################################
6+
# - compiler function -----------------------------------------------------------------------------
7+
8+
function(ConfigureBench CMAKE_BENCH_NAME CMAKE_BENCH_SRC)
9+
add_executable(${CMAKE_BENCH_NAME}
10+
${CMAKE_BENCH_SRC}
11+
"${CMAKE_CURRENT_SOURCE_DIR}/synchronization/synchronization.cpp")
12+
set_target_properties(${CMAKE_BENCH_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
13+
target_link_libraries(${CMAKE_BENCH_NAME} benchmark benchmark_main pthread rmm )
14+
set_target_properties(${CMAKE_BENCH_NAME} PROPERTIES
15+
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/gbenchmarks")
16+
endfunction(ConfigureBench)
17+
18+
###################################################################################################
19+
# - include paths ---------------------------------------------------------------------------------
20+
21+
include_directories("${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}"
22+
"${CMAKE_BINARY_DIR}/include"
23+
"${CMAKE_SOURCE_DIR}/include"
24+
"${CMAKE_SOURCE_DIR}"
25+
"${CMAKE_SOURCE_DIR}/src"
26+
"${GTEST_INCLUDE_DIR}"
27+
"${GBENCH_INCLUDE_DIR}"
28+
"${CMAKE_CURRENT_SOURCE_DIR}")
29+
30+
###################################################################################################
31+
# - library paths ---------------------------------------------------------------------------------
32+
33+
link_directories("${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}" # CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES is an undocumented/unsupported variable containing the link directories for nvcc
34+
"${CMAKE_BINARY_DIR}/lib"
35+
"${CMAKE_BINARY_DIR}"
36+
"${GTEST_LIBRARY_DIR}"
37+
"${GBENCH_LIBRARY_DIR}")
38+
39+
###################################################################################################
40+
### test sources ##################################################################################
41+
###################################################################################################
42+
43+
###################################################################################################
44+
# - test benchmark --------------------------------------------------------------------------------
45+
46+
set(TEST_BENCH_SRC
47+
"${CMAKE_CURRENT_SOURCE_DIR}/test/test_benchmark.cpp")
48+
49+
ConfigureBench(TEST_BENCH "${TEST_BENCH_SRC}")
50+
51+
###################################################################################################
52+
# - random allocations benchmark --------------------------------------------------------------------------------
53+
54+
set(RANDOM_ALLOCATIONS_BENCH_SRC
55+
"${CMAKE_CURRENT_SOURCE_DIR}/random_allocations/random_allocations.cpp")
56+
57+
ConfigureBench(RANDOM_ALLOCATIONS_BENCH "${RANDOM_ALLOCATIONS_BENCH_SRC}")
Lines changed: 195 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,195 @@
1+
/*
2+
* Copyright (c) 2019, NVIDIA CORPORATION.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either ex ess or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
#include <rmm/mr/device/cnmem_memory_resource.hpp>
17+
#include <rmm/mr/device/cuda_memory_resource.hpp>
18+
#include <rmm/mr/device/default_memory_resource.hpp>
19+
#include <rmm/mr/device/device_memory_resource.hpp>
20+
#include <rmm/mr/device/managed_memory_resource.hpp>
21+
#include <rmm/mr/device/sub_memory_resource.hpp>
22+
23+
#include <benchmark/benchmark.h>
24+
25+
#include <random>
26+
27+
#define VERBOSE 0
28+
29+
namespace {
30+
31+
constexpr std::size_t size_mb{1 << 20};
32+
33+
struct allocation {
34+
void* p{nullptr};
35+
std::size_t size{0};
36+
allocation(void* _p, std::size_t _size) : p{_p}, size{_size} {}
37+
allocation() = default;
38+
};
39+
40+
using allocation_vector = std::vector<allocation>;
41+
42+
allocation remove_at(allocation_vector& allocs, std::size_t index) {
43+
assert(index < allocs.size());
44+
auto removed = allocs[index];
45+
46+
if ((allocs.size() > 1) && (index < allocs.size() - 1)) {
47+
std::swap(allocs[index], allocs.back());
48+
}
49+
allocs.pop_back();
50+
51+
return removed;
52+
}
53+
54+
template <typename SizeDistribution>
55+
void random_allocation_free(rmm::mr::device_memory_resource& mr,
56+
SizeDistribution size_distribution,
57+
size_t num_allocations,
58+
size_t max_usage, // in MiB
59+
cudaStream_t stream = 0)
60+
{
61+
std::default_random_engine generator;
62+
63+
max_usage *= size_mb; // convert to bytes
64+
65+
constexpr int allocation_probability = 73; // percent
66+
std::uniform_int_distribution<int> op_distribution(0, 99);
67+
std::uniform_int_distribution<int> index_distribution(0, num_allocations-1);
68+
69+
int active_allocations{0};
70+
int allocation_count{0};
71+
72+
allocation_vector allocations{};
73+
size_t allocation_size{0};
74+
75+
for (int i = 0; i < num_allocations * 2; ++i) {
76+
bool do_alloc = true;
77+
size_t size = static_cast<size_t>(size_distribution(generator));
78+
79+
if (active_allocations > 0) {
80+
int chance = op_distribution(generator);
81+
do_alloc = (chance < allocation_probability) &&
82+
(allocation_count < num_allocations) &&
83+
(allocation_size + size < max_usage);
84+
}
85+
86+
void* ptr = nullptr;
87+
if (do_alloc) { // try to allocate
88+
try {
89+
ptr = mr.allocate(size, stream);
90+
} catch(std::bad_alloc) {
91+
do_alloc = false;
92+
}
93+
}
94+
95+
if (do_alloc) { // alloc succeeded
96+
allocations.emplace_back(ptr, size);
97+
active_allocations++;
98+
allocation_count++;
99+
allocation_size += size;
100+
101+
#if VERBOSE
102+
std::cout << active_allocations << " | " << allocation_count << " Allocating: " << size
103+
<< " | total: " << allocation_size << "\n";
104+
#endif
105+
}
106+
else { // dealloc, or alloc failed
107+
if (active_allocations > 0) {
108+
size_t index = index_distribution(generator) % active_allocations;
109+
active_allocations--;
110+
allocation to_free = remove_at(allocations, index);
111+
mr.deallocate(to_free.p, to_free.size, stream);
112+
allocation_size -= to_free.size;
113+
114+
#if VERBOSE
115+
std::cout << active_allocations << " | " << allocation_count << " Deallocating: "
116+
<< to_free.size << " at " << index << " | total: " << allocation_size << "\n";
117+
#endif
118+
}
119+
}
120+
}
121+
122+
assert(active_allocations == 0);
123+
assert(allocations.size() == 0);
124+
}
125+
} // namespace
126+
127+
void uniform_random_allocations(rmm::mr::device_memory_resource& mr,
128+
size_t num_allocations,
129+
size_t max_allocation_size, // in MiB
130+
size_t max_usage,
131+
cudaStream_t stream = 0) {
132+
std::uniform_int_distribution<std::size_t> size_distribution(1, max_allocation_size * size_mb);
133+
random_allocation_free(mr, size_distribution, num_allocations, max_usage, stream);
134+
}
135+
136+
// TODO figure out how to map a normal distribution to integers between 1 and max_allocation_size
137+
/*void normal_random_allocations(rmm::mr::device_memory_resource& mr,
138+
size_t num_allocations = 1000,
139+
size_t mean_allocation_size = 500, // in MiB
140+
size_t stddev_allocation_size = 500, // in MiB
141+
size_t max_usage = 8 << 20,
142+
cudaStream_t stream) {
143+
std::normal_distribution<std::size_t> size_distribution(, max_allocation_size * size_mb);
144+
}*/
145+
146+
constexpr size_t num_allocations = 100000;
147+
constexpr size_t max_size = 2;
148+
constexpr size_t max_usage = 16000;
149+
150+
static void BM_RandomAllocationsCUDA(benchmark::State& state) {
151+
rmm::mr::cuda_memory_resource mr;
152+
153+
try {
154+
for (auto _ : state)
155+
uniform_random_allocations(mr, num_allocations, max_size, max_usage);
156+
} catch (std::exception const& e) {
157+
std::cout << "Error: " << e.what() << "\n";
158+
}
159+
}
160+
//BENCHMARK(BM_RandomAllocationsCUDA)->Unit(benchmark::kMillisecond);
161+
162+
template <typename State>
163+
static void BM_RandomAllocationsSub(State& state) {
164+
rmm::mr::sub_memory_resource mr;
165+
166+
try {
167+
for (auto _ : state)
168+
uniform_random_allocations(mr, num_allocations, max_size, max_usage);
169+
} catch (std::exception const& e) {
170+
std::cout << "Error: " << e.what() << "\n";
171+
}
172+
}
173+
BENCHMARK(BM_RandomAllocationsSub)->Unit(benchmark::kMillisecond);
174+
175+
template <typename State>
176+
static void BM_RandomAllocationsCnmem(State& state) {
177+
rmm::mr::cnmem_memory_resource mr;
178+
179+
try {
180+
for (auto _ : state)
181+
uniform_random_allocations(mr, num_allocations, max_size, max_usage);
182+
} catch (std::exception const& e) {
183+
std::cout << "Error: " << e.what() << "\n";
184+
}
185+
}
186+
BENCHMARK(BM_RandomAllocationsCnmem)->Unit(benchmark::kMillisecond);
187+
188+
/*int main(void) {
189+
std::vector<int> state(1);
190+
BM_RandomAllocationsSub(state);
191+
return 0;
192+
}*/
193+
194+
195+
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
/*
2+
* Copyright (c) 2019, NVIDIA CORPORATION.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#include "synchronization.hpp"
18+
#include "rmm/rmm.h"
19+
20+
#define CUDA_TRY(call) \
21+
do { \
22+
cudaError_t const status = (call); \
23+
if (cudaSuccess != status) { \
24+
throw std::runtime_error("CUDA error"); \
25+
} \
26+
} while (0);
27+
28+
#define RMM_TRY(call) \
29+
do { \
30+
rmmError_t const status = (call); \
31+
if (RMM_SUCCESS != status) { \
32+
throw std::runtime_error("RMM error"); \
33+
} \
34+
} while (0);
35+
36+
cuda_event_timer::cuda_event_timer(benchmark::State& state,
37+
bool flush_l2_cache,
38+
cudaStream_t stream):
39+
p_state(&state), stream(stream) {
40+
// flush all of L2$
41+
if(flush_l2_cache) {
42+
int current_device = 0;
43+
CUDA_TRY(cudaGetDevice(&current_device));
44+
45+
int l2_cache_bytes = 0;
46+
CUDA_TRY(cudaDeviceGetAttribute(&l2_cache_bytes, cudaDevAttrL2CacheSize, current_device));
47+
48+
if (l2_cache_bytes > 0) {
49+
const int memset_value = 0;
50+
int* l2_cache_buffer = nullptr;
51+
RMM_TRY(RMM_ALLOC(&l2_cache_buffer, l2_cache_bytes, stream));
52+
CUDA_TRY(cudaMemsetAsync(l2_cache_buffer, memset_value, l2_cache_bytes, stream));
53+
RMM_TRY(RMM_FREE(l2_cache_buffer, stream));
54+
}
55+
}
56+
57+
CUDA_TRY(cudaEventCreate(&start));
58+
CUDA_TRY(cudaEventCreate(&stop));
59+
CUDA_TRY(cudaEventRecord(start, stream));
60+
}
61+
62+
cuda_event_timer::~cuda_event_timer() {
63+
CUDA_TRY(cudaEventRecord(stop, stream));
64+
CUDA_TRY(cudaEventSynchronize(stop));
65+
66+
float milliseconds = 0.0f;
67+
CUDA_TRY(cudaEventElapsedTime(&milliseconds, start, stop));
68+
p_state->SetIterationTime(milliseconds/(1000.0f));
69+
CUDA_TRY(cudaEventDestroy(start));
70+
CUDA_TRY(cudaEventDestroy(stop));
71+
}
72+

0 commit comments

Comments
 (0)