From 1dac9271c8890e2e607223619b322b87ce554040 Mon Sep 17 00:00:00 2001 From: Eric Fiselier Date: Fri, 2 Sep 2016 02:26:33 -0600 Subject: [PATCH 01/20] Change to using per-thread timers --- include/benchmark/benchmark_api.h | 58 +++--- src/CMakeLists.txt | 7 +- src/benchmark.cc | 285 +++++++++++++----------------- src/cpu_thread_time.cc | 201 +++++++++++++++++++++ src/cpu_thread_time.h | 10 ++ src/mutex.h | 71 +++++--- src/sysinfo.cc | 95 +--------- src/sysinfo.h | 2 - 8 files changed, 412 insertions(+), 317 deletions(-) create mode 100644 src/cpu_thread_time.cc create mode 100644 src/cpu_thread_time.h diff --git a/include/benchmark/benchmark_api.h b/include/benchmark/benchmark_api.h index 2357e8848a..ae948b7817 100644 --- a/include/benchmark/benchmark_api.h +++ b/include/benchmark/benchmark_api.h @@ -270,31 +270,25 @@ enum BigO { // computational complexity for the benchmark. typedef double(BigOFunc)(int); +namespace internal { + class ThreadTimer; + class ThreadManager; +} + // State is passed to a running Benchmark and contains state for the // benchmark to use. class State { public: - State(size_t max_iters, const std::vector& ranges, - int thread_i, int n_threads); - // Returns true if the benchmark should continue through another iteration. // NOTE: A benchmark may not return from the test until KeepRunning() has // returned false. bool KeepRunning() { if (BENCHMARK_BUILTIN_EXPECT(!started_, false)) { - assert(!finished_); - started_ = true; - ResumeTiming(); + StartKeepRunning(); } bool const res = total_iterations_++ < max_iterations; if (BENCHMARK_BUILTIN_EXPECT(!res, false)) { - assert(started_ && (!finished_ || error_occurred_)); - if (!error_occurred_) { - PauseTiming(); - } - // Total iterations now is one greater than max iterations. Fix this. - total_iterations_ = max_iterations; - finished_ = true; + FinishKeepRunning(); } return res; } @@ -304,10 +298,11 @@ class State { // Stop the benchmark timer. If not called, the timer will be // automatically stopped after KeepRunning() returns false for the first time. // - // For threaded benchmarks the PauseTiming() function acts - // like a barrier. I.e., the ith call by a particular thread to this - // function will block until all active threads have made their ith call. - // The timer will stop when the last thread has called this function. + // For threaded benchmarks the PauseTiming() function only pauses the timing + // for the current thread. + // + // NOTE: The "real time" measurement is per-thread. If different threads + // report different measurements the largest one is reported. // // NOTE: PauseTiming()/ResumeTiming() are relatively // heavyweight, and so their use should generally be avoided @@ -319,11 +314,6 @@ class State { // Start the benchmark timer. The timer is NOT running on entrance to the // benchmark function. It begins running after the first call to KeepRunning() // - // For threaded benchmarks the ResumeTiming() function acts - // like a barrier. I.e., the ith call by a particular thread to this - // function will block until all active threads have made their ith call. - // The timer will start when the last thread has called this function. - // // NOTE: PauseTiming()/ResumeTiming() are relatively // heavyweight, and so their use should generally be avoided // within each benchmark iteration, if possible. @@ -335,10 +325,10 @@ class State { // thread and report an error with the specified 'msg'. After this call // the user may explicitly 'return' from the benchmark. // - // For threaded benchmarks only the current thread stops executing. If - // multiple threads report an error only the first error message is used. - // The current thread is no longer considered 'active' by - // 'PauseTiming()' and 'ResumingTiming()'. + // For threaded benchmarks only the current thread stops executing and future + // calls to `KeepRunning()` will block until all threads have completed + // the `KeepRunning()` loop. If multiple threads report an error only the + // first error message is used. // // NOTE: Calling 'SkipWithError(...)' does not cause the benchmark to exit // the current scope immediately. If the function is called from within @@ -351,10 +341,8 @@ class State { // is used instead of automatically measured time if UseManualTime() was // specified. // - // For threaded benchmarks the SetIterationTime() function acts - // like a barrier. I.e., the ith call by a particular thread to this - // function will block until all threads have made their ith call. - // The time will be set by the last thread to call this function. + // For threaded benchmarks the final value will be set to the largest + // reported values. void SetIterationTime(double seconds); // Set the number of bytes processed by the current benchmark @@ -465,7 +453,17 @@ class State { const int threads; const size_t max_iterations; + // TODO make me private + State(size_t max_iters, const std::vector& ranges, + int thread_i, int n_threads, + internal::ThreadTimer* timer, + internal::ThreadManager* manager); + private: + void StartKeepRunning(); + void FinishKeepRunning(); + internal::ThreadTimer* timer_; + internal::ThreadManager* manager_; BENCHMARK_DISALLOW_COPY_AND_ASSIGN(State); }; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 144ff924f8..7a6508de3f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -8,9 +8,9 @@ endif() # Define the source files set(SOURCE_FILES "benchmark.cc" "colorprint.cc" "commandlineflags.cc" - "console_reporter.cc" "csv_reporter.cc" "json_reporter.cc" - "log.cc" "reporter.cc" "sleep.cc" "string_util.cc" - "sysinfo.cc" "walltime.cc" "complexity.cc") + "console_reporter.cc" "cpu_thread_time.cc" "csv_reporter.cc" + "json_reporter.cc" "log.cc" "reporter.cc" "sleep.cc" + "string_util.cc" "sysinfo.cc" "walltime.cc" "complexity.cc") # Add headers to the list of source files. cmake does not require this, # but IDEs such as Visual Studio need this to add the headers # to the generated project. @@ -18,6 +18,7 @@ set(_d "${PROJECT_SOURCE_DIR}/include/benchmark") list(APPEND SOURCE_FILES "${_d}/benchmark.h" "${_d}/benchmark_api.h" "${_d}/macros.h" "${_d}/reporter.h" "arraysize.h" "check.h" "colorprint.h" "commandlineflags.h" "complexity.h" + "cpu_thread_time.h" "cycleclock.h" "internal_macros.h" "log.h" "mutex.h" "re.h" "sleep.h" "stat.h" "string_util.h" "sysinfo.h" "walltime.h") diff --git a/src/benchmark.cc b/src/benchmark.cc index bfac28c23d..be8b291249 100644 --- a/src/benchmark.cc +++ b/src/benchmark.cc @@ -35,6 +35,7 @@ #include "check.h" #include "commandlineflags.h" #include "complexity.h" +#include "cpu_thread_time.h" #include "log.h" #include "mutex.h" #include "re.h" @@ -141,127 +142,100 @@ static std::atomic error_message = ATOMIC_VAR_INIT(nullptr); // TODO(ericwf): support MallocCounter. //static benchmark::MallocCounter *benchmark_mc; -struct ThreadStats { - ThreadStats() : bytes_processed(0), items_processed(0), complexity_n(0) {} - int64_t bytes_processed; - int64_t items_processed; - int complexity_n; +} // end namespace + +namespace internal { + + +class ThreadManager { +public: + ThreadManager(int num_threads) + : alive_threads_(num_threads), + start_stop_barrier_(num_threads) { + } + + bool startStopBarrier() EXCLUDES(mutex_) { + return start_stop_barrier_.wait(); + } + + void notifyThreadComplete() EXCLUDES(mutex_) { + start_stop_barrier_.removeThread(); + if (--alive_threads_ == 0) { + end_condition_.notify_all(); + } + } + + void waitForAllThreads() EXCLUDES(mutex_) { + MutexLock lock(mutex_); + end_condition_.wait(lock.native_handle(), + [this]() { return alive_threads_ == 0; }); + } + +public: + double real_time_used = 0; + double cpu_time_used = 0; + double manual_time_used = 0; + int64_t bytes_processed = 0; + int64_t items_processed = 0; + int complexity_n = 0; + +private: + Mutex mutex_; + std::atomic alive_threads_; + Barrier start_stop_barrier_; + Condition end_condition_; }; // Timer management class -class TimerManager { +class ThreadTimer { public: - TimerManager(int num_threads, Notification* done) - : num_threads_(num_threads), - running_threads_(num_threads), - done_(done), - running_(false), + ThreadTimer() + : running_(false), real_time_used_(0), cpu_time_used_(0), - manual_time_used_(0), - num_finalized_(0), - phase_number_(0), - entered_(0) + manual_time_used_(0) { } // Called by each thread - void StartTimer() EXCLUDES(lock_) { - bool last_thread = false; - { - MutexLock ml(lock_); - last_thread = Barrier(ml); - if (last_thread) { - CHECK(!running_) << "Called StartTimer when timer is already running"; - running_ = true; - start_real_time_ = walltime::Now(); - start_cpu_time_ = MyCPUUsage() + ChildrenCPUUsage(); - } - } - if (last_thread) { - phase_condition_.notify_all(); - } - } - - // Called by each thread - void StopTimer() EXCLUDES(lock_) { - bool last_thread = false; - { - MutexLock ml(lock_); - last_thread = Barrier(ml); - if (last_thread) { - CHECK(running_) << "Called StopTimer when timer is already stopped"; - InternalStop(); - } - } - if (last_thread) { - phase_condition_.notify_all(); - } + void StartTimer() { + running_ = true; + start_real_time_ = walltime::Now(); + start_cpu_time_ = ThreadCPUUsage(); } // Called by each thread - void SetIterationTime(double seconds) EXCLUDES(lock_) { - bool last_thread = false; - { - MutexLock ml(lock_); - last_thread = Barrier(ml); - if (last_thread) { - manual_time_used_ += seconds; - } - } - if (last_thread) { - phase_condition_.notify_all(); - } + void StopTimer() { + CHECK(running_); + running_ = false; + real_time_used_ += walltime::Now() - start_real_time_; + cpu_time_used_ += ThreadCPUUsage() - start_cpu_time_; } // Called by each thread - void Finalize() EXCLUDES(lock_) { - MutexLock l(lock_); - num_finalized_++; - if (num_finalized_ == num_threads_) { - CHECK(!running_) << - "The timer should be stopped before the timer is finalized"; - done_->Notify(); - } - } - - void RemoveErroredThread() EXCLUDES(lock_) { - MutexLock ml(lock_); - int last_thread = --running_threads_ == 0; - if (last_thread && running_) - InternalStop(); - else if (!last_thread) - phase_condition_.notify_all(); + void SetIterationTime(double seconds) { + manual_time_used_ += seconds; } // REQUIRES: timer is not running - double real_time_used() EXCLUDES(lock_) { - MutexLock l(lock_); + double real_time_used() { CHECK(!running_); return real_time_used_; } // REQUIRES: timer is not running - double cpu_time_used() EXCLUDES(lock_) { - MutexLock l(lock_); + double cpu_time_used() { CHECK(!running_); return cpu_time_used_; } // REQUIRES: timer is not running - double manual_time_used() EXCLUDES(lock_) { - MutexLock l(lock_); + double manual_time_used() { CHECK(!running_); return manual_time_used_; } private: - Mutex lock_; - Condition phase_condition_; - int num_threads_; - int running_threads_; - Notification* done_; - bool running_; // Is the timer running double start_real_time_; // If running_ double start_cpu_time_; // If running_ @@ -272,52 +246,8 @@ class TimerManager { // Manually set iteration time. User sets this with SetIterationTime(seconds). double manual_time_used_; - // How many threads have called Finalize() - int num_finalized_; - - // State for barrier management - int phase_number_; - int entered_; // Number of threads that have entered this barrier - - void InternalStop() REQUIRES(lock_) { - CHECK(running_); - running_ = false; - real_time_used_ += walltime::Now() - start_real_time_; - cpu_time_used_ += ((MyCPUUsage() + ChildrenCPUUsage()) - - start_cpu_time_); - } - - // Enter the barrier and wait until all other threads have also - // entered the barrier. Returns iff this is the last thread to - // enter the barrier. - bool Barrier(MutexLock& ml) REQUIRES(lock_) { - CHECK_LT(entered_, running_threads_); - entered_++; - if (entered_ < running_threads_) { - // Wait for all threads to enter - int phase_number_cp = phase_number_; - auto cb = [this, phase_number_cp]() { - return this->phase_number_ > phase_number_cp || - entered_ == running_threads_; // A thread has aborted in error - }; - phase_condition_.wait(ml.native_handle(), cb); - if (phase_number_ > phase_number_cp) - return false; - // else (running_threads_ == entered_) and we are the last thread. - } - // Last thread has reached the barrier - phase_number_++; - entered_ = 0; - return true; - } }; -// TimerManager for current run. -static std::unique_ptr timer_manager = nullptr; - -} // end namespace - -namespace internal { enum ReportMode : unsigned { RM_Unspecified, // The mode has not been manually specified @@ -802,19 +732,25 @@ namespace { // Adds the stats collected for the thread into *total. void RunInThread(const benchmark::internal::Benchmark::Instance* b, size_t iters, int thread_id, - ThreadStats* total) EXCLUDES(GetBenchmarkLock()) { - State st(iters, b->arg, thread_id, b->threads); + internal::ThreadManager* manager) EXCLUDES(GetBenchmarkLock()) { + internal::ThreadTimer timer; + State st(iters, b->arg, thread_id, b->threads, &timer, manager); b->benchmark->Run(st); + manager->notifyThreadComplete(); + CHECK(st.iterations() == st.max_iterations) << "Benchmark returned before State::KeepRunning() returned false!"; { MutexLock l(GetBenchmarkLock()); - total->bytes_processed += st.bytes_processed(); - total->items_processed += st.items_processed(); - total->complexity_n += st.complexity_length_n(); + manager->cpu_time_used += timer.cpu_time_used(); + // Take the largest value + manager->real_time_used = std::max(manager->real_time_used, timer.real_time_used()); + manager->manual_time_used = std::max(manager->manual_time_used, timer.manual_time_used()); + manager->bytes_processed += st.bytes_processed(); + manager->items_processed += st.items_processed(); + manager->complexity_n += st.complexity_length_n(); } - timer_manager->Finalize(); } std::vector @@ -826,9 +762,10 @@ RunBenchmark(const benchmark::internal::Benchmark::Instance& b, size_t iters = 1; std::vector pool; - if (b.multithreaded) - pool.resize(b.threads); - + if (b.multithreaded) { + CHECK(b.threads > 1); + pool.resize(b.threads - 1); + } const int repeats = b.repetitions != 0 ? b.repetitions : FLAGS_benchmark_repetitions; const bool report_aggregates_only = repeats != 1 && @@ -847,10 +784,7 @@ RunBenchmark(const benchmark::internal::Benchmark::Instance& b, } error_message = nullptr; - Notification done; - timer_manager = std::unique_ptr(new TimerManager(b.threads, &done)); - - ThreadStats total; + internal::ThreadManager manager(b.multithreaded ? b.threads : 1); running_benchmark = true; if (b.multithreaded) { // If this is out first iteration of the while(true) loop then the @@ -861,19 +795,17 @@ RunBenchmark(const benchmark::internal::Benchmark::Instance& b, thread.join(); } for (std::size_t ti = 0; ti < pool.size(); ++ti) { - pool[ti] = std::thread(&RunInThread, &b, iters, static_cast(ti), &total); + pool[ti] = std::thread(&RunInThread, &b, iters, + static_cast(ti + 1), &manager); } - } else { - // Run directly in this thread - RunInThread(&b, iters, 0, &total); } - done.WaitForNotification(); + RunInThread(&b, iters, 0, &manager); + manager.waitForAllThreads(); running_benchmark = false; - const double cpu_accumulated_time = timer_manager->cpu_time_used(); - const double real_accumulated_time = timer_manager->real_time_used(); - const double manual_accumulated_time = timer_manager->manual_time_used(); - timer_manager.reset(); + const double cpu_accumulated_time = manager.cpu_time_used; + const double real_accumulated_time = manager.real_time_used; + const double manual_accumulated_time = manager.manual_time_used; VLOG(2) << "Ran in " << cpu_accumulated_time << "/" << real_accumulated_time << "\n"; @@ -915,12 +847,12 @@ RunBenchmark(const benchmark::internal::Benchmark::Instance& b, if (!report.error_occurred) { double bytes_per_second = 0; - if (total.bytes_processed > 0 && seconds > 0.0) { - bytes_per_second = (total.bytes_processed / seconds); + if (manager.bytes_processed > 0 && seconds > 0.0) { + bytes_per_second = (manager.bytes_processed / seconds); } double items_per_second = 0; - if (total.items_processed > 0 && seconds > 0.0) { - items_per_second = (total.items_processed / seconds); + if (manager.items_processed > 0 && seconds > 0.0) { + items_per_second = (manager.items_processed / seconds); } if (b.use_manual_time) { @@ -931,7 +863,7 @@ RunBenchmark(const benchmark::internal::Benchmark::Instance& b, report.cpu_accumulated_time = cpu_accumulated_time; report.bytes_per_second = bytes_per_second; report.items_per_second = items_per_second; - report.complexity_n = total.complexity_n; + report.complexity_n = manager.complexity_n; report.complexity = b.complexity; report.complexity_lambda = b.complexity_lambda; if(report.complexity != oNone) @@ -982,7 +914,9 @@ RunBenchmark(const benchmark::internal::Benchmark::Instance& b, } // namespace State::State(size_t max_iters, const std::vector& ranges, - int thread_i, int n_threads) + int thread_i, int n_threads, + internal::ThreadTimer* timer, + internal::ThreadManager* manager) : started_(false), finished_(false), total_iterations_(0), range_(ranges), bytes_processed_(0), items_processed_(0), @@ -990,7 +924,9 @@ State::State(size_t max_iters, const std::vector& ranges, error_occurred_(false), thread_index(thread_i), threads(n_threads), - max_iterations(max_iters) + max_iterations(max_iters), + timer_(timer), + manager_(manager) { CHECK(max_iterations != 0) << "At least one iteration must be run"; CHECK_LT(thread_index, threads) << "thread_index must be less than threads"; @@ -1000,13 +936,13 @@ void State::PauseTiming() { // Add in time accumulated so far CHECK(running_benchmark); CHECK(started_ && !finished_ && !error_occurred_); - timer_manager->StopTimer(); + timer_->StopTimer(); } void State::ResumeTiming() { CHECK(running_benchmark); CHECK(started_ && !finished_ && !error_occurred_); - timer_manager->StartTimer(); + timer_->StartTimer(); } void State::SkipWithError(const char* msg) { @@ -1015,15 +951,13 @@ void State::SkipWithError(const char* msg) { error_message_type expected_no_error_msg = nullptr; error_message.compare_exchange_weak(expected_no_error_msg, const_cast(msg)); - started_ = finished_ = true; total_iterations_ = max_iterations; - timer_manager->RemoveErroredThread(); } void State::SetIterationTime(double seconds) { CHECK(running_benchmark); - timer_manager->SetIterationTime(seconds); + timer_->SetIterationTime(seconds); } void State::SetLabel(const char* label) { @@ -1032,6 +966,25 @@ void State::SetLabel(const char* label) { *GetReportLabel() = label; } +void State::StartKeepRunning() { + CHECK(!started_ && !finished_); + started_ = true; + manager_->startStopBarrier(); + if (!error_occurred_) + ResumeTiming(); +} + +void State::FinishKeepRunning() { + CHECK(started_ && (!finished_ || error_occurred_)); + if (!error_occurred_) { + PauseTiming(); + } + // Total iterations now is one greater than max iterations. Fix this. + total_iterations_ = max_iterations; + finished_ = true; + manager_->startStopBarrier(); +} + namespace internal { namespace { @@ -1234,7 +1187,7 @@ void Initialize(int* argc, char** argv) { internal::SetLogLevel(FLAGS_v); // TODO remove this. It prints some output the first time it is called. // We don't want to have this ouput printed during benchmarking. - MyCPUUsage(); + ProcessCPUUsage(); // The first call to walltime::Now initialized it. Call it once to // prevent the initialization from happening in a benchmark. walltime::Now(); diff --git a/src/cpu_thread_time.cc b/src/cpu_thread_time.cc new file mode 100644 index 0000000000..b4f760d79f --- /dev/null +++ b/src/cpu_thread_time.cc @@ -0,0 +1,201 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cpu_thread_time.h" +#include "internal_macros.h" + +#ifdef BENCHMARK_OS_WINDOWS +#include +#include +#include +#else +#include +#include +#include // this header must be included before 'sys/sysctl.h' to avoid compilation error on FreeBSD +#include +#include +#if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_MACOSX +#include +#endif +#if defined(BENCHMARK_OS_MACOSX) +#include +#include +#include +#endif +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "check.h" +#include "log.h" +#include "sleep.h" +#include "string_util.h" + +namespace benchmark { +namespace { +std::mutex cputimens_mutex; + +#if defined(BENCHMARK_OS_WINDOWS) +double MakeTime(FILETIME const& kernel_time, FILETIME const& user_time) { + ULARGE_INTEGER kernel; + ULARGE_INTEGER user; + kernel.HighPart = kernel_time.dwHighDateTime; + kernel.LowPart = kernel_time.dwLowDateTime; + user.HighPart = user_time.dwHighDateTime; + user.LowPart = user_time.dwLowDateTime; + return (static_cast(kernel.QuadPart) + + static_cast(user.QuadPart)) * 1e-7; +} +#else +double MakeTime(struct timespec const& ts) { + return ts.tv_sec + (static_cast(ts.tv_nsec) * 1e-9); +} +double MakeTime(struct rusage ru) { + return (static_cast(ru.ru_utime.tv_sec) + + static_cast(ru.ru_utime.tv_usec) * 1e-6 + + static_cast(ru.ru_stime.tv_sec) + + static_cast(ru.ru_stime.tv_usec) * 1e-6); +} +#endif +#if defined(BENCHMARK_OS_MACOSX) +double MakeTime(thread_basic_info_data_t const& info) { + return (static_cast(info.user_time.seconds) + + static_cast(info.user_time.microseconds) * 1e-6 + + static_cast(info.system_time.seconds) + + static_cast(info.user_time.microseconds) * 1e-6); +} +#endif + +} // end namespace + +// getrusage() based implementation of MyCPUUsage +static double ProcessCPURUsage() { +#if defined(_POSIX_CPUTIME) + struct timespec spec; + if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &spec) == 0) { + return MakeTime(spec); + } + return 0.0; +#elif !defined(BENCHMARK_OS_WINDOWS) + struct rusage ru; + if (getrusage(RUSAGE_SELF, &ru) == 0) { + return MakeTime(ru); + } + return 0.0; +#else + HANDLE proc = GetCurrentProcess(); + FILETIME creation_time; + FILETIME exit_time; + FILETIME kernel_time; + FILETIME user_time; + GetProcessTimes(proc, &creation_time, &exit_time, &kernel_time, &user_time); + return MakeTime(kernel_time, user_time); +#endif // OS_WINDOWS +} + +#ifndef BENCHMARK_OS_WINDOWS +static bool MyCPUUsageCPUTimeNsLocked(double* cputime) { + static int cputime_fd = -1; + if (cputime_fd == -1) { + cputime_fd = open("/proc/self/cputime_ns", O_RDONLY); + if (cputime_fd < 0) { + cputime_fd = -1; + return false; + } + } + char buff[64]; + memset(buff, 0, sizeof(buff)); + if (pread(cputime_fd, buff, sizeof(buff) - 1, 0) <= 0) { + close(cputime_fd); + cputime_fd = -1; + return false; + } + unsigned long long result = strtoull(buff, nullptr, 0); + if (result == (std::numeric_limits::max)()) { + close(cputime_fd); + cputime_fd = -1; + return false; + } + *cputime = static_cast(result) / 1e9; + return true; +} + +#endif // OS_WINDOWS + + +double ThreadCPUUsage() { +#if defined(_POSIX_THREAD_CPUTIME) + struct timespec ts; + if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts) != 0) { + std::cerr << "Failed to get thread CPU time using clock_gettime" << std::endl; + std::exit(EXIT_FAILURE); + } + return ts.tv_sec + (static_cast(ts.tv_nsec) * 1e-9); +#elif defined(BENCHMARK_OS_WINDOWS) + HANDLE this_thread = GetCurrentThread(); + FILETIME creation_time; + FILETIME exit_time; + FILETIME kernel_time; + FILETIME user_time; + GetProcessTimes(this_thread, &creation_time, &exit_time, &kernel_time, &user_time); + return MakeTime(kernel_time, user_time); +#elif defined(BENCHMARK_OS_MACOSX) + mach_msg_type_number_t count = THREAD_BASIC_INFO_COUNT; + thread_basic_info_data_t info; +#else +#error Per-thread timing is not available on your system. +#endif +} + +double ProcessCPUUsage() { +#ifndef BENCHMARK_OS_WINDOWS + { + std::lock_guard l(cputimens_mutex); + static bool use_cputime_ns = true; + if (use_cputime_ns) { + double value; + if (MyCPUUsageCPUTimeNsLocked(&value)) { + return value; + } + // Once MyCPUUsageCPUTimeNsLocked fails once fall back to getrusage(). + VLOG(1) << "Reading /proc/self/cputime_ns failed. Using getrusage().\n"; + use_cputime_ns = false; + } + } +#endif // OS_WINDOWS + return ProcessCPURUsage(); +} + +double ChildrenCPUUsage() { +#ifndef BENCHMARK_OS_WINDOWS + struct rusage ru; + if (getrusage(RUSAGE_CHILDREN, &ru) == 0) { + return MakeTime(ru); + } else { + return 0.0; + } +#else + // TODO: Not sure what this even means on Windows + return 0.0; +#endif // OS_WINDOWS +} + +} // end namespace benchmark diff --git a/src/cpu_thread_time.h b/src/cpu_thread_time.h new file mode 100644 index 0000000000..4029cee8ee --- /dev/null +++ b/src/cpu_thread_time.h @@ -0,0 +1,10 @@ +#ifndef BENCHMARK_CPU_THREAD_TIME_H +#define BENCHMARK_CPU_THREAD_TIME_H + +namespace benchmark { +double ProcessCPUUsage(); +double ChildrenCPUUsage(); +double ThreadCPUUsage(); +} // end namespace benchmark + +#endif // BENCHMARK_CPU_THREAD_TIME_H diff --git a/src/mutex.h b/src/mutex.h index f37ec35b3a..6835e8b9c5 100644 --- a/src/mutex.h +++ b/src/mutex.h @@ -105,36 +105,63 @@ class SCOPED_CAPABILITY MutexLock MutexLockImp ml_; }; - -class Notification -{ -public: - Notification() : notified_yet_(false) { } - - void WaitForNotification() const EXCLUDES(mutex_) { - MutexLock m_lock(mutex_); - auto notified_fn = [this]() REQUIRES(mutex_) { - return this->HasBeenNotified(); - }; - cv_.wait(m_lock.native_handle(), notified_fn); +class Barrier { + public: + Barrier(int num_threads) + : running_threads_(num_threads) + { } - void Notify() EXCLUDES(mutex_) { + // Called by each thread + bool wait() EXCLUDES(lock_) { + bool last_thread = false; { - MutexLock lock(mutex_); - notified_yet_ = 1; + MutexLock ml(lock_); + last_thread = createBarrier(ml); } - cv_.notify_all(); + if (last_thread) phase_condition_.notify_all(); + return last_thread; } -private: - bool HasBeenNotified() const REQUIRES(mutex_) { - return notified_yet_; + void removeThread() EXCLUDES(lock_) { + MutexLock ml(lock_); + --running_threads_; + if (entered_ != 0) + phase_condition_.notify_all(); } - mutable Mutex mutex_; - mutable std::condition_variable cv_; - bool notified_yet_ GUARDED_BY(mutex_); + private: + Mutex lock_; + Condition phase_condition_; + int running_threads_; + + // State for barrier management + int phase_number_ = 0; + int entered_ = 0; // Number of threads that have entered this barrier + + // Enter the barrier and wait until all other threads have also + // entered the barrier. Returns iff this is the last thread to + // enter the barrier. + bool createBarrier(MutexLock& ml) REQUIRES(lock_) { + CHECK_LT(entered_, running_threads_); + entered_++; + if (entered_ < running_threads_) { + // Wait for all threads to enter + int phase_number_cp = phase_number_; + auto cb = [this, phase_number_cp]() { + return this->phase_number_ > phase_number_cp || + entered_ == running_threads_; // A thread has aborted in error + }; + phase_condition_.wait(ml.native_handle(), cb); + if (phase_number_ > phase_number_cp) + return false; + // else (running_threads_ == entered_) and we are the last thread. + } + // Last thread has reached the barrier + phase_number_++; + entered_ = 0; + return true; + } }; } // end namespace benchmark diff --git a/src/sysinfo.cc b/src/sysinfo.cc index b5b9851f5a..60e74a295a 100644 --- a/src/sysinfo.cc +++ b/src/sysinfo.cc @@ -52,7 +52,6 @@ namespace { std::once_flag cpuinfo_init; double cpuinfo_cycles_per_second = 1.0; int cpuinfo_num_cpus = 1; // Conservative guess -std::mutex cputimens_mutex; #if !defined BENCHMARK_OS_MACOSX const int64_t estimate_time_ms = 1000; @@ -288,101 +287,9 @@ void InitializeSystemInfo() { cpuinfo_cycles_per_second = static_cast(EstimateCyclesPerSecond()); #endif } -} // end namespace - -// getrusage() based implementation of MyCPUUsage -static double MyCPUUsageRUsage() { -#ifndef BENCHMARK_OS_WINDOWS - struct rusage ru; - if (getrusage(RUSAGE_SELF, &ru) == 0) { - return (static_cast(ru.ru_utime.tv_sec) + - static_cast(ru.ru_utime.tv_usec) * 1e-6 + - static_cast(ru.ru_stime.tv_sec) + - static_cast(ru.ru_stime.tv_usec) * 1e-6); - } else { - return 0.0; - } -#else - HANDLE proc = GetCurrentProcess(); - FILETIME creation_time; - FILETIME exit_time; - FILETIME kernel_time; - FILETIME user_time; - ULARGE_INTEGER kernel; - ULARGE_INTEGER user; - GetProcessTimes(proc, &creation_time, &exit_time, &kernel_time, &user_time); - kernel.HighPart = kernel_time.dwHighDateTime; - kernel.LowPart = kernel_time.dwLowDateTime; - user.HighPart = user_time.dwHighDateTime; - user.LowPart = user_time.dwLowDateTime; - return (static_cast(kernel.QuadPart) + - static_cast(user.QuadPart)) * 1e-7; -#endif // OS_WINDOWS -} -#ifndef BENCHMARK_OS_WINDOWS -static bool MyCPUUsageCPUTimeNsLocked(double* cputime) { - static int cputime_fd = -1; - if (cputime_fd == -1) { - cputime_fd = open("/proc/self/cputime_ns", O_RDONLY); - if (cputime_fd < 0) { - cputime_fd = -1; - return false; - } - } - char buff[64]; - memset(buff, 0, sizeof(buff)); - if (pread(cputime_fd, buff, sizeof(buff) - 1, 0) <= 0) { - close(cputime_fd); - cputime_fd = -1; - return false; - } - unsigned long long result = strtoull(buff, nullptr, 0); - if (result == (std::numeric_limits::max)()) { - close(cputime_fd); - cputime_fd = -1; - return false; - } - *cputime = static_cast(result) / 1e9; - return true; -} -#endif // OS_WINDOWS - -double MyCPUUsage() { -#ifndef BENCHMARK_OS_WINDOWS - { - std::lock_guard l(cputimens_mutex); - static bool use_cputime_ns = true; - if (use_cputime_ns) { - double value; - if (MyCPUUsageCPUTimeNsLocked(&value)) { - return value; - } - // Once MyCPUUsageCPUTimeNsLocked fails once fall back to getrusage(). - VLOG(1) << "Reading /proc/self/cputime_ns failed. Using getrusage().\n"; - use_cputime_ns = false; - } - } -#endif // OS_WINDOWS - return MyCPUUsageRUsage(); -} +} // end namespace -double ChildrenCPUUsage() { -#ifndef BENCHMARK_OS_WINDOWS - struct rusage ru; - if (getrusage(RUSAGE_CHILDREN, &ru) == 0) { - return (static_cast(ru.ru_utime.tv_sec) + - static_cast(ru.ru_utime.tv_usec) * 1e-6 + - static_cast(ru.ru_stime.tv_sec) + - static_cast(ru.ru_stime.tv_usec) * 1e-6); - } else { - return 0.0; - } -#else - // TODO: Not sure what this even means on Windows - return 0.0; -#endif // OS_WINDOWS -} double CyclesPerSecond(void) { std::call_once(cpuinfo_init, InitializeSystemInfo); diff --git a/src/sysinfo.h b/src/sysinfo.h index eaf77e07cc..c5d9916d2d 100644 --- a/src/sysinfo.h +++ b/src/sysinfo.h @@ -2,8 +2,6 @@ #define BENCHMARK_SYSINFO_H_ namespace benchmark { -double MyCPUUsage(); -double ChildrenCPUUsage(); int NumCPUs(); double CyclesPerSecond(); bool CpuScalingEnabled(); From ffb7b33b511a1ae8ecfa2f821df1631a87b7d46f Mon Sep 17 00:00:00 2001 From: Eric Fiselier Date: Fri, 2 Sep 2016 03:59:12 -0600 Subject: [PATCH 02/20] fix bad assertions --- src/benchmark.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/benchmark.cc b/src/benchmark.cc index be8b291249..8afda647f7 100644 --- a/src/benchmark.cc +++ b/src/benchmark.cc @@ -217,6 +217,8 @@ class ThreadTimer { manual_time_used_ += seconds; } + bool running() const { return running_; } + // REQUIRES: timer is not running double real_time_used() { CHECK(!running_); @@ -763,7 +765,7 @@ RunBenchmark(const benchmark::internal::Benchmark::Instance& b, std::vector pool; if (b.multithreaded) { - CHECK(b.threads > 1); + CHECK(b.threads >= 1); pool.resize(b.threads - 1); } const int repeats = b.repetitions != 0 ? b.repetitions @@ -952,6 +954,7 @@ void State::SkipWithError(const char* msg) { error_message.compare_exchange_weak(expected_no_error_msg, const_cast(msg)); total_iterations_ = max_iterations; + if (timer_->running()) timer_->StopTimer(); } void State::SetIterationTime(double seconds) From 724ce26bdbe7b3595cca25def66506c5d4225b69 Mon Sep 17 00:00:00 2001 From: Eric Fiselier Date: Fri, 2 Sep 2016 04:02:30 -0600 Subject: [PATCH 03/20] fix copy paste error on windows --- src/cpu_thread_time.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cpu_thread_time.cc b/src/cpu_thread_time.cc index b4f760d79f..d2f8d89603 100644 --- a/src/cpu_thread_time.cc +++ b/src/cpu_thread_time.cc @@ -155,7 +155,7 @@ double ThreadCPUUsage() { FILETIME exit_time; FILETIME kernel_time; FILETIME user_time; - GetProcessTimes(this_thread, &creation_time, &exit_time, &kernel_time, &user_time); + GetThreadTimes(this_thread, &creation_time, &exit_time, &kernel_time, &user_time); return MakeTime(kernel_time, user_time); #elif defined(BENCHMARK_OS_MACOSX) mach_msg_type_number_t count = THREAD_BASIC_INFO_COUNT; From 7fea4a2804de7e5d5941622b1d8b737078734ce6 Mon Sep 17 00:00:00 2001 From: Eric Fiselier Date: Fri, 2 Sep 2016 14:29:12 -0600 Subject: [PATCH 04/20] Fix thread safety annotations --- CMakeLists.txt | 2 +- src/mutex.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1ef8300763..5b5e6b9241 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -152,7 +152,7 @@ cxx_feature_check(STD_REGEX) cxx_feature_check(GNU_POSIX_REGEX) cxx_feature_check(POSIX_REGEX) cxx_feature_check(STEADY_CLOCK) - +cxx_feature_check(THREAD_SAFETY_ATTRIBUTES) # Ensure we have pthreads find_package(Threads REQUIRED) diff --git a/src/mutex.h b/src/mutex.h index 6835e8b9c5..a801ef0a30 100644 --- a/src/mutex.h +++ b/src/mutex.h @@ -4,6 +4,8 @@ #include #include +#include "check.h" + // Enable thread safety attributes only with clang. // The attributes can be safely erased when compiling with other compilers. #if defined(HAVE_THREAD_SAFETY_ATTRIBUTES) From 529dac6fb9e486047894555849259a49772ef734 Mon Sep 17 00:00:00 2001 From: Eric Fiselier Date: Fri, 2 Sep 2016 15:24:59 -0600 Subject: [PATCH 05/20] Make null-log thread safe --- src/CMakeLists.txt | 2 +- src/check.h | 4 ++-- src/log.cc | 40 --------------------------------- src/log.h | 55 +++++++++++++++++++++++++++++++++++++++++----- 4 files changed, 52 insertions(+), 49 deletions(-) delete mode 100644 src/log.cc diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 7a6508de3f..bbdf024517 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -9,7 +9,7 @@ endif() # Define the source files set(SOURCE_FILES "benchmark.cc" "colorprint.cc" "commandlineflags.cc" "console_reporter.cc" "cpu_thread_time.cc" "csv_reporter.cc" - "json_reporter.cc" "log.cc" "reporter.cc" "sleep.cc" + "json_reporter.cc" "reporter.cc" "sleep.cc" "string_util.cc" "sysinfo.cc" "walltime.cc" "complexity.cc") # Add headers to the list of source files. cmake does not require this, # but IDEs such as Visual Studio need this to add the headers diff --git a/src/check.h b/src/check.h index 4572babb49..e927b75239 100644 --- a/src/check.h +++ b/src/check.h @@ -33,7 +33,7 @@ class CheckHandler { << check << "' failed. "; } - std::ostream& GetLog() { + LogType& GetLog() { return log_; } @@ -46,7 +46,7 @@ class CheckHandler { CheckHandler(const CheckHandler&) = delete; CheckHandler() = delete; private: - std::ostream& log_; + LogType& log_; }; } // end namespace internal diff --git a/src/log.cc b/src/log.cc deleted file mode 100644 index b660309d32..0000000000 --- a/src/log.cc +++ /dev/null @@ -1,40 +0,0 @@ -#include "log.h" - -#include - -namespace benchmark { -namespace internal { - -int& LoggingLevelImp() { - static int level = 0; - return level; -} - -void SetLogLevel(int value) { - LoggingLevelImp() = value; -} - -int GetLogLevel() { - return LoggingLevelImp(); -} - -class NullLogBuffer : public std::streambuf -{ -public: - int overflow(int c) { - return c; - } -}; - -std::ostream& GetNullLogInstance() { - static NullLogBuffer log_buff; - static std::ostream null_log(&log_buff); - return null_log; -} - -std::ostream& GetErrorLogInstance() { - return std::clog; -} - -} // end namespace internal -} // end namespace benchmark \ No newline at end of file diff --git a/src/log.h b/src/log.h index 3777810e1c..b34cd5fb3e 100644 --- a/src/log.h +++ b/src/log.h @@ -2,18 +2,61 @@ #define BENCHMARK_LOG_H_ #include +#include + +#include "benchmark/macros.h" namespace benchmark { namespace internal { -int GetLogLevel(); -void SetLogLevel(int level); +typedef std::basic_ostream&(EndLType)(std::basic_ostream&); + +class LogType { + friend LogType& GetNullLogInstance(); + friend LogType& GetErrorLogInstance(); + + // FIXME: Add locking to output. + template + friend LogType& operator<<(LogType&, Tp const&); + friend LogType& operator<<(LogType&, EndLType*); +private: + LogType(std::ostream* out) : out_(out) {} + std::ostream* out_; + BENCHMARK_DISALLOW_COPY_AND_ASSIGN(LogType); +}; + +template +LogType& operator<<(LogType& log, Tp const& value) { + if (log.out_) { + *log.out_ << value; + } + return log; +} + +inline LogType& operator<<(LogType& log, EndLType* m) { + if (log.out_) { + *log.out_ << m; + } + return log; +} + +inline int& LogLevel() { + static int log_level = 0; + return log_level; +} -std::ostream& GetNullLogInstance(); -std::ostream& GetErrorLogInstance(); +inline LogType& GetNullLogInstance() { + static LogType log(nullptr); + return log; +} + +inline LogType& GetErrorLogInstance() { + static LogType log(&std::clog); + return log; +} -inline std::ostream& GetLogInstanceForLevel(int level) { - if (level <= GetLogLevel()) { +inline LogType& GetLogInstanceForLevel(int level) { + if (level <= LogLevel()) { return GetErrorLogInstance(); } return GetNullLogInstance(); From 9cd8a88d3caf59702e9d88ae6eadd28d68ea7fa0 Mon Sep 17 00:00:00 2001 From: Eric Fiselier Date: Fri, 2 Sep 2016 15:26:41 -0600 Subject: [PATCH 06/20] remove remaining globals --- src/benchmark.cc | 137 ++++++++++++++++++----------------------------- 1 file changed, 52 insertions(+), 85 deletions(-) diff --git a/src/benchmark.cc b/src/benchmark.cc index 8afda647f7..5c7b05a5b1 100644 --- a/src/benchmark.cc +++ b/src/benchmark.cc @@ -93,20 +93,9 @@ namespace internal { void UseCharPointer(char const volatile*) {} -// NOTE: This is a dummy "mutex" type used to denote the actual mutex -// returned by GetBenchmarkLock(). This is only used to placate the thread -// safety warnings by giving the return of GetBenchmarkLock() a name. -struct CAPABILITY("mutex") BenchmarkLockType {}; -BenchmarkLockType BenchmarkLockVar; -} // end namespace internal -inline Mutex& RETURN_CAPABILITY(::benchmark::internal::BenchmarkLockVar) -GetBenchmarkLock() -{ - static Mutex lock; - return lock; -} +} // end namespace internal namespace { @@ -121,68 +110,63 @@ static const int kRangeMultiplier = 8; static const size_t kMaxFamilySize = 100; static const size_t kMaxIterations = 1000000000; -bool running_benchmark = false; - -// Global variable so that a benchmark can cause a little extra printing -std::string* GetReportLabel() { - static std::string label GUARDED_BY(GetBenchmarkLock()); - return &label; -} - -// Global variable so that a benchmark can report an error as a human readable -// string. If error_message is null no error occurred. -#if defined(_MSC_VER) && _MSC_VER <= 1800 -typedef char* error_message_type; -#else -typedef const char* error_message_type; -#endif - -static std::atomic error_message = ATOMIC_VAR_INIT(nullptr); - -// TODO(ericwf): support MallocCounter. -//static benchmark::MallocCounter *benchmark_mc; - } // end namespace namespace internal { +// NOTE: This is a dummy "mutex" type used to denote the actual mutex +// returned by getBenchmarkMutex(). This is only used to placate the thread +// safety warnings by giving the return of GetBenchmarkLock() a name. +struct CAPABILITY("mutex") BenchmarkLockType {}; +BenchmarkLockType BenchmarkLockVar; class ThreadManager { public: ThreadManager(int num_threads) : alive_threads_(num_threads), - start_stop_barrier_(num_threads) { + start_stop_barrier_(num_threads) + { + } + + Mutex& getBenchmarkMutex() const + RETURN_CAPABILITY(::benchmark::internal::BenchmarkLockVar) + { + return benchmark_mutex_; } - bool startStopBarrier() EXCLUDES(mutex_) { + bool startStopBarrier() EXCLUDES(end_cond_mutex_) { return start_stop_barrier_.wait(); } - void notifyThreadComplete() EXCLUDES(mutex_) { + void notifyThreadComplete() EXCLUDES(end_cond_mutex_) { start_stop_barrier_.removeThread(); if (--alive_threads_ == 0) { + MutexLock lock(end_cond_mutex_); end_condition_.notify_all(); } } - void waitForAllThreads() EXCLUDES(mutex_) { - MutexLock lock(mutex_); + void waitForAllThreads() EXCLUDES(end_cond_mutex_) { + MutexLock lock(end_cond_mutex_); end_condition_.wait(lock.native_handle(), [this]() { return alive_threads_ == 0; }); } public: - double real_time_used = 0; - double cpu_time_used = 0; - double manual_time_used = 0; - int64_t bytes_processed = 0; - int64_t items_processed = 0; - int complexity_n = 0; - + GUARDED_BY(getBenchmarkMutex()) double real_time_used = 0; + GUARDED_BY(getBenchmarkMutex()) double cpu_time_used = 0; + GUARDED_BY(getBenchmarkMutex()) double manual_time_used = 0; + GUARDED_BY(getBenchmarkMutex()) int64_t bytes_processed = 0; + GUARDED_BY(getBenchmarkMutex()) int64_t items_processed = 0; + GUARDED_BY(getBenchmarkMutex()) int complexity_n = 0; + GUARDED_BY(getBenchmarkMutex()) std::string report_label_; + GUARDED_BY(getBenchmarkMutex()) std::string error_message_; + GUARDED_BY(getBenchmarkMutex()) bool has_error_ = false; private: - Mutex mutex_; + mutable Mutex benchmark_mutex_; std::atomic alive_threads_; Barrier start_stop_barrier_; + Mutex end_cond_mutex_; Condition end_condition_; }; @@ -200,15 +184,16 @@ class ThreadTimer { // Called by each thread void StartTimer() { running_ = true; - start_real_time_ = walltime::Now(); + start_real_time_ = walltime::ChronoClockNow(); start_cpu_time_ = ThreadCPUUsage(); } // Called by each thread void StopTimer() { CHECK(running_); + running_ = false; - real_time_used_ += walltime::Now() - start_real_time_; + real_time_used_ += walltime::ChronoClockNow() - start_real_time_; cpu_time_used_ += ThreadCPUUsage() - start_cpu_time_; } @@ -734,16 +719,14 @@ namespace { // Adds the stats collected for the thread into *total. void RunInThread(const benchmark::internal::Benchmark::Instance* b, size_t iters, int thread_id, - internal::ThreadManager* manager) EXCLUDES(GetBenchmarkLock()) { + internal::ThreadManager* manager) { internal::ThreadTimer timer; State st(iters, b->arg, thread_id, b->threads, &timer, manager); b->benchmark->Run(st); - manager->notifyThreadComplete(); - CHECK(st.iterations() == st.max_iterations) << "Benchmark returned before State::KeepRunning() returned false!"; { - MutexLock l(GetBenchmarkLock()); + MutexLock l(manager->getBenchmarkMutex()); manager->cpu_time_used += timer.cpu_time_used(); // Take the largest value manager->real_time_used = std::max(manager->real_time_used, timer.real_time_used()); @@ -752,13 +735,12 @@ void RunInThread(const benchmark::internal::Benchmark::Instance* b, manager->items_processed += st.items_processed(); manager->complexity_n += st.complexity_length_n(); } - + manager->notifyThreadComplete(); } std::vector RunBenchmark(const benchmark::internal::Benchmark::Instance& b, - std::vector* complexity_reports) - EXCLUDES(GetBenchmarkLock()) { + std::vector* complexity_reports) { std::vector reports; // return value size_t iters = 1; @@ -780,14 +762,7 @@ RunBenchmark(const benchmark::internal::Benchmark::Instance& b, // Try benchmark VLOG(2) << "Running " << b.name << " for " << iters << "\n"; - { - MutexLock l(GetBenchmarkLock()); - GetReportLabel()->clear(); - } - error_message = nullptr; - internal::ThreadManager manager(b.multithreaded ? b.threads : 1); - running_benchmark = true; if (b.multithreaded) { // If this is out first iteration of the while(true) loop then the // threads haven't been started and can't be joined. Otherwise we need @@ -803,7 +778,7 @@ RunBenchmark(const benchmark::internal::Benchmark::Instance& b, } RunInThread(&b, iters, 0, &manager); manager.waitForAllThreads(); - running_benchmark = false; + MutexLock l(manager.getBenchmarkMutex()); const double cpu_accumulated_time = manager.cpu_time_used; const double real_accumulated_time = manager.real_time_used; @@ -820,19 +795,11 @@ RunBenchmark(const benchmark::internal::Benchmark::Instance& b, seconds = real_accumulated_time; } - std::string label; - { - MutexLock l(GetBenchmarkLock()); - label = *GetReportLabel(); - } - error_message_type error_msg = error_message; - const double min_time = !IsZero(b.min_time) ? b.min_time : FLAGS_benchmark_min_time; - // If this was the first run, was elapsed time or cpu time large enough? // If this is not the first run, go with the current value of iter. - if ((i > 0) || (error_msg != nullptr) || + if ((i > 0) || manager.has_error_ || (iters >= kMaxIterations) || (seconds >= min_time) || (real_accumulated_time >= 5*min_time)) { @@ -840,9 +807,9 @@ RunBenchmark(const benchmark::internal::Benchmark::Instance& b, // Create report about this benchmark run. BenchmarkReporter::Run report; report.benchmark_name = b.name; - report.error_occurred = error_msg != nullptr; - report.error_message = error_msg != nullptr ? error_msg : ""; - report.report_label = label; + report.error_occurred = manager.has_error_; + report.error_message = manager.error_message_; + report.report_label = manager.report_label_; // Report the total iterations across all threads. report.iterations = static_cast(iters) * b.threads; report.time_unit = b.time_unit; @@ -936,13 +903,11 @@ State::State(size_t max_iters, const std::vector& ranges, void State::PauseTiming() { // Add in time accumulated so far - CHECK(running_benchmark); CHECK(started_ && !finished_ && !error_occurred_); timer_->StopTimer(); } void State::ResumeTiming() { - CHECK(running_benchmark); CHECK(started_ && !finished_ && !error_occurred_); timer_->StartTimer(); } @@ -950,23 +915,25 @@ void State::ResumeTiming() { void State::SkipWithError(const char* msg) { CHECK(msg); error_occurred_ = true; - error_message_type expected_no_error_msg = nullptr; - error_message.compare_exchange_weak(expected_no_error_msg, - const_cast(msg)); + { + MutexLock l(manager_->getBenchmarkMutex()); + if (manager_->has_error_ == false) { + manager_->error_message_ = msg; + manager_->has_error_ = true; + } + } total_iterations_ = max_iterations; if (timer_->running()) timer_->StopTimer(); } void State::SetIterationTime(double seconds) { - CHECK(running_benchmark); timer_->SetIterationTime(seconds); } void State::SetLabel(const char* label) { - CHECK(running_benchmark); - MutexLock l(GetBenchmarkLock()); - *GetReportLabel() = label; + MutexLock l(manager_->getBenchmarkMutex()); + manager_->report_label_ = label; } void State::StartKeepRunning() { @@ -1187,7 +1154,7 @@ int InitializeStreams() { void Initialize(int* argc, char** argv) { internal::ParseCommandLineFlags(argc, argv); - internal::SetLogLevel(FLAGS_v); + internal::LogLevel() = FLAGS_v; // TODO remove this. It prints some output the first time it is called. // We don't want to have this ouput printed during benchmarking. ProcessCPUUsage(); From ebae4a72b4045cf7061be36fe88da7fbaced60fd Mon Sep 17 00:00:00 2001 From: Eric Fiselier Date: Fri, 2 Sep 2016 15:27:03 -0600 Subject: [PATCH 07/20] use chrono for walltime since it is thread safe --- src/walltime.cc | 19 ------------------- src/walltime.h | 31 +++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 19 deletions(-) diff --git a/src/walltime.cc b/src/walltime.cc index 4bdbaa5985..b878ad78b3 100644 --- a/src/walltime.cc +++ b/src/walltime.cc @@ -43,25 +43,6 @@ namespace walltime { namespace { -#if defined(HAVE_STEADY_CLOCK) -template -struct ChooseSteadyClock { - typedef std::chrono::high_resolution_clock type; -}; - -template <> -struct ChooseSteadyClock { - typedef std::chrono::steady_clock type; -}; -#endif - -struct ChooseClockType { -#if defined(HAVE_STEADY_CLOCK) - typedef ChooseSteadyClock<>::type type; -#else - typedef std::chrono::high_resolution_clock type; -#endif -}; class WallTimeImp { diff --git a/src/walltime.h b/src/walltime.h index 38c26f3321..3244c71113 100644 --- a/src/walltime.h +++ b/src/walltime.h @@ -1,12 +1,43 @@ #ifndef BENCHMARK_WALLTIME_H_ #define BENCHMARK_WALLTIME_H_ +#include "internal_macros.h" + +#include #include namespace benchmark { typedef double WallTime; namespace walltime { + +#if defined(HAVE_STEADY_CLOCK) +template +struct ChooseSteadyClock { + typedef std::chrono::high_resolution_clock type; +}; + +template <> +struct ChooseSteadyClock { + typedef std::chrono::steady_clock type; +}; +#endif + +struct ChooseClockType { +#if defined(HAVE_STEADY_CLOCK) + typedef ChooseSteadyClock<>::type type; +#else + typedef std::chrono::high_resolution_clock type; +#endif +}; + +inline double ChronoClockNow() { + typedef ChooseClockType::type ClockType; + using FpSeconds = + std::chrono::duration; + return FpSeconds(ClockType::now().time_since_epoch()).count(); +} + WallTime Now(); } // end namespace walltime From fb608df3466b4ed36bb2cb98f17aa7b2c18bbd27 Mon Sep 17 00:00:00 2001 From: Eric Fiselier Date: Fri, 2 Sep 2016 15:39:50 -0600 Subject: [PATCH 08/20] consolidate timer functions --- src/CMakeLists.txt | 8 +- src/benchmark.cc | 14 +- src/console_reporter.cc | 2 +- src/cpu_thread_time.h | 10 -- src/csv_reporter.cc | 2 +- src/json_reporter.cc | 2 +- src/reporter.cc | 2 +- src/{cpu_thread_time.cc => timers.cc} | 120 ++++++------- src/{walltime.h => timers.h} | 22 +-- src/walltime.cc | 244 -------------------------- 10 files changed, 76 insertions(+), 350 deletions(-) delete mode 100644 src/cpu_thread_time.h rename src/{cpu_thread_time.cc => timers.cc} (74%) rename src/{walltime.h => timers.h} (72%) delete mode 100644 src/walltime.cc diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index bbdf024517..f78a1fd3ad 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -8,9 +8,9 @@ endif() # Define the source files set(SOURCE_FILES "benchmark.cc" "colorprint.cc" "commandlineflags.cc" - "console_reporter.cc" "cpu_thread_time.cc" "csv_reporter.cc" + "console_reporter.cc" "csv_reporter.cc" "json_reporter.cc" "reporter.cc" "sleep.cc" - "string_util.cc" "sysinfo.cc" "walltime.cc" "complexity.cc") + "string_util.cc" "sysinfo.cc" "complexity.cc" "timers.cc") # Add headers to the list of source files. cmake does not require this, # but IDEs such as Visual Studio need this to add the headers # to the generated project. @@ -18,10 +18,8 @@ set(_d "${PROJECT_SOURCE_DIR}/include/benchmark") list(APPEND SOURCE_FILES "${_d}/benchmark.h" "${_d}/benchmark_api.h" "${_d}/macros.h" "${_d}/reporter.h" "arraysize.h" "check.h" "colorprint.h" "commandlineflags.h" "complexity.h" - "cpu_thread_time.h" "cycleclock.h" "internal_macros.h" "log.h" "mutex.h" - "re.h" "sleep.h" "stat.h" "string_util.h" "sysinfo.h" - "walltime.h") + "re.h" "sleep.h" "stat.h" "string_util.h" "sysinfo.h" "timers.h") unset(_d) # Determine the correct regular expression engine to use diff --git a/src/benchmark.cc b/src/benchmark.cc index 5c7b05a5b1..e3ef6897de 100644 --- a/src/benchmark.cc +++ b/src/benchmark.cc @@ -35,14 +35,13 @@ #include "check.h" #include "commandlineflags.h" #include "complexity.h" -#include "cpu_thread_time.h" #include "log.h" #include "mutex.h" #include "re.h" #include "stat.h" #include "string_util.h" #include "sysinfo.h" -#include "walltime.h" +#include "timers.h" DEFINE_bool(benchmark_list_tests, false, "Print a list of benchmarks. This option overrides all other " @@ -184,16 +183,15 @@ class ThreadTimer { // Called by each thread void StartTimer() { running_ = true; - start_real_time_ = walltime::ChronoClockNow(); + start_real_time_ = ChronoClockNow(); start_cpu_time_ = ThreadCPUUsage(); } // Called by each thread void StopTimer() { CHECK(running_); - running_ = false; - real_time_used_ += walltime::ChronoClockNow() - start_real_time_; + real_time_used_ += ChronoClockNow() - start_real_time_; cpu_time_used_ += ThreadCPUUsage() - start_cpu_time_; } @@ -1155,12 +1153,6 @@ int InitializeStreams() { void Initialize(int* argc, char** argv) { internal::ParseCommandLineFlags(argc, argv); internal::LogLevel() = FLAGS_v; - // TODO remove this. It prints some output the first time it is called. - // We don't want to have this ouput printed during benchmarking. - ProcessCPUUsage(); - // The first call to walltime::Now initialized it. Call it once to - // prevent the initialization from happening in a benchmark. - walltime::Now(); } } // end namespace benchmark diff --git a/src/console_reporter.cc b/src/console_reporter.cc index a663b93675..2a35b7870d 100644 --- a/src/console_reporter.cc +++ b/src/console_reporter.cc @@ -28,7 +28,7 @@ #include "commandlineflags.h" #include "internal_macros.h" #include "string_util.h" -#include "walltime.h" +#include "timers.h" namespace benchmark { diff --git a/src/cpu_thread_time.h b/src/cpu_thread_time.h deleted file mode 100644 index 4029cee8ee..0000000000 --- a/src/cpu_thread_time.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef BENCHMARK_CPU_THREAD_TIME_H -#define BENCHMARK_CPU_THREAD_TIME_H - -namespace benchmark { -double ProcessCPUUsage(); -double ChildrenCPUUsage(); -double ThreadCPUUsage(); -} // end namespace benchmark - -#endif // BENCHMARK_CPU_THREAD_TIME_H diff --git a/src/csv_reporter.cc b/src/csv_reporter.cc index 7bc7ef3dbc..1c137f3f38 100644 --- a/src/csv_reporter.cc +++ b/src/csv_reporter.cc @@ -23,7 +23,7 @@ #include #include "string_util.h" -#include "walltime.h" +#include "timers.h" // File format reference: http://edoceo.com/utilitas/csv-file-format. diff --git a/src/json_reporter.cc b/src/json_reporter.cc index 485d30524a..092b42548d 100644 --- a/src/json_reporter.cc +++ b/src/json_reporter.cc @@ -23,7 +23,7 @@ #include #include "string_util.h" -#include "walltime.h" +#include "timers.h" namespace benchmark { diff --git a/src/reporter.cc b/src/reporter.cc index 5187859074..188723f976 100644 --- a/src/reporter.cc +++ b/src/reporter.cc @@ -13,7 +13,7 @@ // limitations under the License. #include "benchmark/reporter.h" -#include "walltime.h" +#include "timers.h" #include diff --git a/src/cpu_thread_time.cc b/src/timers.cc similarity index 74% rename from src/cpu_thread_time.cc rename to src/timers.cc index d2f8d89603..caa2820a66 100644 --- a/src/cpu_thread_time.cc +++ b/src/timers.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "cpu_thread_time.h" +#include "timers.h" #include "internal_macros.h" #ifdef BENCHMARK_OS_WINDOWS @@ -51,7 +51,6 @@ namespace benchmark { namespace { -std::mutex cputimens_mutex; #if defined(BENCHMARK_OS_WINDOWS) double MakeTime(FILETIME const& kernel_time, FILETIME const& user_time) { @@ -86,21 +85,14 @@ double MakeTime(thread_basic_info_data_t const& info) { } // end namespace -// getrusage() based implementation of MyCPUUsage -static double ProcessCPURUsage() { +double ProcessCPUUsage() { #if defined(_POSIX_CPUTIME) struct timespec spec; if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &spec) == 0) { return MakeTime(spec); } return 0.0; -#elif !defined(BENCHMARK_OS_WINDOWS) - struct rusage ru; - if (getrusage(RUSAGE_SELF, &ru) == 0) { - return MakeTime(ru); - } - return 0.0; -#else +#elif defined(BENCHMARK_OS_WINDOWS) HANDLE proc = GetCurrentProcess(); FILETIME creation_time; FILETIME exit_time; @@ -108,39 +100,29 @@ static double ProcessCPURUsage() { FILETIME user_time; GetProcessTimes(proc, &creation_time, &exit_time, &kernel_time, &user_time); return MakeTime(kernel_time, user_time); -#endif // OS_WINDOWS +#else + struct rusage ru; + if (getrusage(RUSAGE_SELF, &ru) == 0) { + return MakeTime(ru); + } + return 0.0; +#endif } +double ChildrenCPUUsage() { #ifndef BENCHMARK_OS_WINDOWS -static bool MyCPUUsageCPUTimeNsLocked(double* cputime) { - static int cputime_fd = -1; - if (cputime_fd == -1) { - cputime_fd = open("/proc/self/cputime_ns", O_RDONLY); - if (cputime_fd < 0) { - cputime_fd = -1; - return false; - } - } - char buff[64]; - memset(buff, 0, sizeof(buff)); - if (pread(cputime_fd, buff, sizeof(buff) - 1, 0) <= 0) { - close(cputime_fd); - cputime_fd = -1; - return false; - } - unsigned long long result = strtoull(buff, nullptr, 0); - if (result == (std::numeric_limits::max)()) { - close(cputime_fd); - cputime_fd = -1; - return false; + struct rusage ru; + if (getrusage(RUSAGE_CHILDREN, &ru) == 0) { + return MakeTime(ru); + } else { + return 0.0; } - *cputime = static_cast(result) / 1e9; - return true; +#else + // TODO: Not sure what this even means on Windows + return 0.0; +#endif // BENCHMARK_OS_WINDOWS } -#endif // OS_WINDOWS - - double ThreadCPUUsage() { #if defined(_POSIX_THREAD_CPUTIME) struct timespec ts; @@ -165,37 +147,43 @@ double ThreadCPUUsage() { #endif } -double ProcessCPUUsage() { -#ifndef BENCHMARK_OS_WINDOWS - { - std::lock_guard l(cputimens_mutex); - static bool use_cputime_ns = true; - if (use_cputime_ns) { - double value; - if (MyCPUUsageCPUTimeNsLocked(&value)) { - return value; - } - // Once MyCPUUsageCPUTimeNsLocked fails once fall back to getrusage(). - VLOG(1) << "Reading /proc/self/cputime_ns failed. Using getrusage().\n"; - use_cputime_ns = false; - } - } -#endif // OS_WINDOWS - return ProcessCPURUsage(); -} +namespace { -double ChildrenCPUUsage() { -#ifndef BENCHMARK_OS_WINDOWS - struct rusage ru; - if (getrusage(RUSAGE_CHILDREN, &ru) == 0) { - return MakeTime(ru); +std::string DateTimeString(bool local) { + typedef std::chrono::system_clock Clock; + std::time_t now = Clock::to_time_t(Clock::now()); + const std::size_t kStorageSize = 128; + char storage[kStorageSize]; + std::size_t written; + + if (local) { +#if defined(BENCHMARK_OS_WINDOWS) + written = std::strftime(storage, sizeof(storage), "%x %X", ::localtime(&now)); +#else + std::tm timeinfo; + std::memset(&timeinfo, 0, sizeof(std::tm)); + ::localtime_r(&now, &timeinfo); + written = std::strftime(storage, sizeof(storage), "%F %T", &timeinfo); +#endif } else { - return 0.0; - } +#if defined(BENCHMARK_OS_WINDOWS) + written = std::strftime(storage, sizeof(storage), "%x %X", ::gmtime(&now)); #else - // TODO: Not sure what this even means on Windows - return 0.0; -#endif // OS_WINDOWS + std::tm timeinfo; + std::memset(&timeinfo, 0, sizeof(std::tm)); + ::gmtime_r(&now, &timeinfo); + written = std::strftime(storage, sizeof(storage), "%F %T", &timeinfo); +#endif + } + CHECK(written < kStorageSize); + ((void)written); // prevent unused variable in optimized mode. + return std::string(storage); +} + +} // end namespace + +std::string LocalDateTimeString() { + return DateTimeString(true); } } // end namespace benchmark diff --git a/src/walltime.h b/src/timers.h similarity index 72% rename from src/walltime.h rename to src/timers.h index 3244c71113..912497d1a7 100644 --- a/src/walltime.h +++ b/src/timers.h @@ -1,15 +1,20 @@ -#ifndef BENCHMARK_WALLTIME_H_ -#define BENCHMARK_WALLTIME_H_ - -#include "internal_macros.h" +#ifndef BENCHMARK_TIMERS_H +#define BENCHMARK_TIMERS_H #include #include namespace benchmark { -typedef double WallTime; -namespace walltime { +// Return the CPU usage of the current process +double ProcessCPUUsage(); + +// Return the CPU usage of the children of the current process +double ChildrenCPUUsage(); + +// Return the CPU usage of the current thread +double ThreadCPUUsage(); + #if defined(HAVE_STEADY_CLOCK) template @@ -38,11 +43,8 @@ inline double ChronoClockNow() { return FpSeconds(ClockType::now().time_since_epoch()).count(); } -WallTime Now(); -} // end namespace walltime - std::string LocalDateTimeString(); } // end namespace benchmark -#endif // BENCHMARK_WALLTIME_H_ +#endif // BENCHMARK_TIMERS_H diff --git a/src/walltime.cc b/src/walltime.cc deleted file mode 100644 index b878ad78b3..0000000000 --- a/src/walltime.cc +++ /dev/null @@ -1,244 +0,0 @@ -// Copyright 2015 Google Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "benchmark/macros.h" -#include "internal_macros.h" -#include "walltime.h" - -#if defined(BENCHMARK_OS_WINDOWS) -#include -#include // for timeval -#else -#include -#endif - -#include -#include -#include -#include - -#include -#include -#include - -#include "arraysize.h" -#include "check.h" -#include "cycleclock.h" -#include "log.h" -#include "sysinfo.h" - -namespace benchmark { -namespace walltime { - -namespace { - - -class WallTimeImp -{ -public: - WallTime Now(); - - static WallTimeImp& GetWallTimeImp() { - static WallTimeImp* imp = new WallTimeImp(); - return *imp; - } - -private: - WallTimeImp(); - // Helper routines to load/store a float from an AtomicWord. Required because - // g++ < 4.7 doesn't support std::atomic correctly. I cannot wait to - // get rid of this horror show. - void SetDrift(float f) { - int32_t w; - memcpy(&w, &f, sizeof(f)); - std::atomic_store(&drift_adjust_, w); - } - - float GetDrift() const { - float f; - int32_t w = std::atomic_load(&drift_adjust_); - memcpy(&f, &w, sizeof(f)); - return f; - } - - WallTime Slow() const { - struct timeval tv; -#if defined(BENCHMARK_OS_WINDOWS) - FILETIME file_time; - SYSTEMTIME system_time; - ULARGE_INTEGER ularge; - const unsigned __int64 epoch = 116444736000000000LL; - - GetSystemTime(&system_time); - SystemTimeToFileTime(&system_time, &file_time); - ularge.LowPart = file_time.dwLowDateTime; - ularge.HighPart = file_time.dwHighDateTime; - - tv.tv_sec = (long)((ularge.QuadPart - epoch) / (10L * 1000 * 1000)); - tv.tv_usec = (long)(system_time.wMilliseconds * 1000); -#else - gettimeofday(&tv, nullptr); -#endif - return tv.tv_sec + tv.tv_usec * 1e-6; - } - -private: - static_assert(sizeof(float) <= sizeof(int32_t), - "type sizes don't allow the drift_adjust hack"); - - WallTime base_walltime_; - int64_t base_cycletime_; - int64_t cycles_per_second_; - double seconds_per_cycle_; - uint32_t last_adjust_time_; - std::atomic drift_adjust_; - int64_t max_interval_cycles_; - - BENCHMARK_DISALLOW_COPY_AND_ASSIGN(WallTimeImp); -}; - - -WallTime WallTimeImp::Now() { - WallTime now = 0.0; - WallTime result = 0.0; - int64_t ct = 0; - uint32_t top_bits = 0; - do { - ct = cycleclock::Now(); - int64_t cycle_delta = ct - base_cycletime_; - result = base_walltime_ + cycle_delta * seconds_per_cycle_; - - top_bits = static_cast(uint64_t(ct) >> 32); - // Recompute drift no more often than every 2^32 cycles. - // I.e., @2GHz, ~ every two seconds - if (top_bits == last_adjust_time_) { // don't need to recompute drift - return result + GetDrift(); - } - - now = Slow(); - } while (cycleclock::Now() - ct > max_interval_cycles_); - // We are now sure that "now" and "result" were produced within - // kMaxErrorInterval of one another. - - SetDrift(static_cast(now - result)); - last_adjust_time_ = top_bits; - return now; -} - - -WallTimeImp::WallTimeImp() - : base_walltime_(0.0), base_cycletime_(0), - cycles_per_second_(0), seconds_per_cycle_(0.0), - last_adjust_time_(0), drift_adjust_(0), - max_interval_cycles_(0) { - const double kMaxErrorInterval = 100e-6; - cycles_per_second_ = static_cast(CyclesPerSecond()); - CHECK(cycles_per_second_ != 0); - seconds_per_cycle_ = 1.0 / cycles_per_second_; - max_interval_cycles_ = - static_cast(cycles_per_second_ * kMaxErrorInterval); - do { - base_cycletime_ = cycleclock::Now(); - base_walltime_ = Slow(); - } while (cycleclock::Now() - base_cycletime_ > max_interval_cycles_); - // We are now sure that "base_walltime" and "base_cycletime" were produced - // within kMaxErrorInterval of one another. - - SetDrift(0.0); - last_adjust_time_ = static_cast(uint64_t(base_cycletime_) >> 32); -} - -WallTime CPUWalltimeNow() { - static WallTimeImp& imp = WallTimeImp::GetWallTimeImp(); - return imp.Now(); -} - -WallTime ChronoWalltimeNow() { - typedef ChooseClockType::type Clock; - typedef std::chrono::duration - FPSeconds; - static_assert(std::chrono::treat_as_floating_point::value, - "This type must be treated as a floating point type."); - auto now = Clock::now().time_since_epoch(); - return std::chrono::duration_cast(now).count(); -} - -bool UseCpuCycleClock() { - bool useWallTime = !CpuScalingEnabled(); - if (useWallTime) { - VLOG(1) << "Using the CPU cycle clock to provide walltime::Now().\n"; - } else { - VLOG(1) << "Using std::chrono to provide walltime::Now().\n"; - } - return useWallTime; -} - - -} // end anonymous namespace - -// WallTimeImp doesn't work when CPU Scaling is enabled. If CPU Scaling is -// enabled at the start of the program then std::chrono::system_clock is used -// instead. -WallTime Now() -{ - static bool useCPUClock = UseCpuCycleClock(); - if (useCPUClock) { - return CPUWalltimeNow(); - } else { - return ChronoWalltimeNow(); - } -} - -} // end namespace walltime - - -namespace { - -std::string DateTimeString(bool local) { - typedef std::chrono::system_clock Clock; - std::time_t now = Clock::to_time_t(Clock::now()); - char storage[128]; - std::size_t written; - - if (local) { -#if defined(BENCHMARK_OS_WINDOWS) - written = std::strftime(storage, sizeof(storage), "%x %X", ::localtime(&now)); -#else - std::tm timeinfo; - std::memset(&timeinfo, 0, sizeof(std::tm)); - ::localtime_r(&now, &timeinfo); - written = std::strftime(storage, sizeof(storage), "%F %T", &timeinfo); -#endif - } else { -#if defined(BENCHMARK_OS_WINDOWS) - written = std::strftime(storage, sizeof(storage), "%x %X", ::gmtime(&now)); -#else - std::tm timeinfo; - std::memset(&timeinfo, 0, sizeof(std::tm)); - ::gmtime_r(&now, &timeinfo); - written = std::strftime(storage, sizeof(storage), "%F %T", &timeinfo); -#endif - } - CHECK(written < arraysize(storage)); - ((void)written); // prevent unused variable in optimized mode. - return std::string(storage); -} - -} // end namespace - -std::string LocalDateTimeString() { - return DateTimeString(true); -} - -} // end namespace benchmark From 48caee720e2bba407fd52de974767e0a78db9ad4 Mon Sep 17 00:00:00 2001 From: Eric Fiselier Date: Fri, 2 Sep 2016 15:43:27 -0600 Subject: [PATCH 09/20] Add missing ctime include --- src/timers.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/timers.cc b/src/timers.cc index caa2820a66..b6ba7e914b 100644 --- a/src/timers.cc +++ b/src/timers.cc @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include From c150ce44e21427e80dcd2d3e441750e19b2b59bc Mon Sep 17 00:00:00 2001 From: Eric Fiselier Date: Fri, 2 Sep 2016 15:50:43 -0600 Subject: [PATCH 10/20] Rename to be consistent with Google style --- src/benchmark.cc | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/src/benchmark.cc b/src/benchmark.cc index e3ef6897de..1cbb4f55e5 100644 --- a/src/benchmark.cc +++ b/src/benchmark.cc @@ -114,7 +114,7 @@ static const size_t kMaxIterations = 1000000000; namespace internal { // NOTE: This is a dummy "mutex" type used to denote the actual mutex -// returned by getBenchmarkMutex(). This is only used to placate the thread +// returned by GetBenchmarkMutex(). This is only used to placate the thread // safety warnings by giving the return of GetBenchmarkLock() a name. struct CAPABILITY("mutex") BenchmarkLockType {}; BenchmarkLockType BenchmarkLockVar; @@ -127,17 +127,17 @@ class ThreadManager { { } - Mutex& getBenchmarkMutex() const + Mutex& GetBenchmarkMutex() const RETURN_CAPABILITY(::benchmark::internal::BenchmarkLockVar) { return benchmark_mutex_; } - bool startStopBarrier() EXCLUDES(end_cond_mutex_) { + bool StartStopBarrier() EXCLUDES(end_cond_mutex_) { return start_stop_barrier_.wait(); } - void notifyThreadComplete() EXCLUDES(end_cond_mutex_) { + void NotifyThreadComplete() EXCLUDES(end_cond_mutex_) { start_stop_barrier_.removeThread(); if (--alive_threads_ == 0) { MutexLock lock(end_cond_mutex_); @@ -145,22 +145,22 @@ class ThreadManager { } } - void waitForAllThreads() EXCLUDES(end_cond_mutex_) { + void WaitForAllThreads() EXCLUDES(end_cond_mutex_) { MutexLock lock(end_cond_mutex_); end_condition_.wait(lock.native_handle(), [this]() { return alive_threads_ == 0; }); } public: - GUARDED_BY(getBenchmarkMutex()) double real_time_used = 0; - GUARDED_BY(getBenchmarkMutex()) double cpu_time_used = 0; - GUARDED_BY(getBenchmarkMutex()) double manual_time_used = 0; - GUARDED_BY(getBenchmarkMutex()) int64_t bytes_processed = 0; - GUARDED_BY(getBenchmarkMutex()) int64_t items_processed = 0; - GUARDED_BY(getBenchmarkMutex()) int complexity_n = 0; - GUARDED_BY(getBenchmarkMutex()) std::string report_label_; - GUARDED_BY(getBenchmarkMutex()) std::string error_message_; - GUARDED_BY(getBenchmarkMutex()) bool has_error_ = false; + GUARDED_BY(GetBenchmarkMutex()) double real_time_used = 0; + GUARDED_BY(GetBenchmarkMutex()) double cpu_time_used = 0; + GUARDED_BY(GetBenchmarkMutex()) double manual_time_used = 0; + GUARDED_BY(GetBenchmarkMutex()) int64_t bytes_processed = 0; + GUARDED_BY(GetBenchmarkMutex()) int64_t items_processed = 0; + GUARDED_BY(GetBenchmarkMutex()) int complexity_n = 0; + GUARDED_BY(GetBenchmarkMutex()) std::string report_label_; + GUARDED_BY(GetBenchmarkMutex()) std::string error_message_; + GUARDED_BY(GetBenchmarkMutex()) bool has_error_ = false; private: mutable Mutex benchmark_mutex_; std::atomic alive_threads_; @@ -724,7 +724,7 @@ void RunInThread(const benchmark::internal::Benchmark::Instance* b, CHECK(st.iterations() == st.max_iterations) << "Benchmark returned before State::KeepRunning() returned false!"; { - MutexLock l(manager->getBenchmarkMutex()); + MutexLock l(manager->GetBenchmarkMutex()); manager->cpu_time_used += timer.cpu_time_used(); // Take the largest value manager->real_time_used = std::max(manager->real_time_used, timer.real_time_used()); @@ -733,7 +733,7 @@ void RunInThread(const benchmark::internal::Benchmark::Instance* b, manager->items_processed += st.items_processed(); manager->complexity_n += st.complexity_length_n(); } - manager->notifyThreadComplete(); + manager->NotifyThreadComplete(); } std::vector @@ -775,8 +775,8 @@ RunBenchmark(const benchmark::internal::Benchmark::Instance& b, } } RunInThread(&b, iters, 0, &manager); - manager.waitForAllThreads(); - MutexLock l(manager.getBenchmarkMutex()); + manager.WaitForAllThreads(); + MutexLock l(manager.GetBenchmarkMutex()); const double cpu_accumulated_time = manager.cpu_time_used; const double real_accumulated_time = manager.real_time_used; @@ -914,7 +914,7 @@ void State::SkipWithError(const char* msg) { CHECK(msg); error_occurred_ = true; { - MutexLock l(manager_->getBenchmarkMutex()); + MutexLock l(manager_->GetBenchmarkMutex()); if (manager_->has_error_ == false) { manager_->error_message_ = msg; manager_->has_error_ = true; @@ -930,14 +930,14 @@ void State::SetIterationTime(double seconds) } void State::SetLabel(const char* label) { - MutexLock l(manager_->getBenchmarkMutex()); + MutexLock l(manager_->GetBenchmarkMutex()); manager_->report_label_ = label; } void State::StartKeepRunning() { CHECK(!started_ && !finished_); started_ = true; - manager_->startStopBarrier(); + manager_->StartStopBarrier(); if (!error_occurred_) ResumeTiming(); } @@ -950,7 +950,7 @@ void State::FinishKeepRunning() { // Total iterations now is one greater than max iterations. Fix this. total_iterations_ = max_iterations; finished_ = true; - manager_->startStopBarrier(); + manager_->StartStopBarrier(); } namespace internal { From 448c797271e750af34750fff2293bfe11d86983d Mon Sep 17 00:00:00 2001 From: Eric Fiselier Date: Fri, 2 Sep 2016 15:54:09 -0600 Subject: [PATCH 11/20] Format patch using clang-format --- include/benchmark/benchmark_api.h | 11 +-- src/benchmark.cc | 148 ++++++++++++++---------------- src/check.h | 6 +- src/log.h | 47 +++++----- src/mutex.h | 15 +-- src/sysinfo.cc | 1 - src/timers.cc | 40 ++++---- src/timers.h | 12 +-- 8 files changed, 131 insertions(+), 149 deletions(-) diff --git a/include/benchmark/benchmark_api.h b/include/benchmark/benchmark_api.h index ae948b7817..7762a42f34 100644 --- a/include/benchmark/benchmark_api.h +++ b/include/benchmark/benchmark_api.h @@ -271,8 +271,8 @@ enum BigO { typedef double(BigOFunc)(int); namespace internal { - class ThreadTimer; - class ThreadManager; +class ThreadTimer; +class ThreadManager; } // State is passed to a running Benchmark and contains state for the @@ -454,12 +454,11 @@ class State { const size_t max_iterations; // TODO make me private - State(size_t max_iters, const std::vector& ranges, - int thread_i, int n_threads, - internal::ThreadTimer* timer, + State(size_t max_iters, const std::vector& ranges, int thread_i, + int n_threads, internal::ThreadTimer* timer, internal::ThreadManager* manager); -private: + private: void StartKeepRunning(); void FinishKeepRunning(); internal::ThreadTimer* timer_; diff --git a/src/benchmark.cc b/src/benchmark.cc index 1cbb4f55e5..193bf80a78 100644 --- a/src/benchmark.cc +++ b/src/benchmark.cc @@ -92,9 +92,7 @@ namespace internal { void UseCharPointer(char const volatile*) {} - - -} // end namespace internal +} // end namespace internal namespace { @@ -109,7 +107,7 @@ static const int kRangeMultiplier = 8; static const size_t kMaxFamilySize = 100; static const size_t kMaxIterations = 1000000000; -} // end namespace +} // end namespace namespace internal { @@ -120,53 +118,50 @@ struct CAPABILITY("mutex") BenchmarkLockType {}; BenchmarkLockType BenchmarkLockVar; class ThreadManager { -public: - ThreadManager(int num_threads) - : alive_threads_(num_threads), - start_stop_barrier_(num_threads) - { - } - - Mutex& GetBenchmarkMutex() const - RETURN_CAPABILITY(::benchmark::internal::BenchmarkLockVar) - { - return benchmark_mutex_; - } + public: + ThreadManager(int num_threads) + : alive_threads_(num_threads), start_stop_barrier_(num_threads) {} - bool StartStopBarrier() EXCLUDES(end_cond_mutex_) { - return start_stop_barrier_.wait(); - } + Mutex& GetBenchmarkMutex() const + RETURN_CAPABILITY(::benchmark::internal::BenchmarkLockVar) { + return benchmark_mutex_; + } - void NotifyThreadComplete() EXCLUDES(end_cond_mutex_) { - start_stop_barrier_.removeThread(); - if (--alive_threads_ == 0) { - MutexLock lock(end_cond_mutex_); - end_condition_.notify_all(); - } - } + bool StartStopBarrier() EXCLUDES(end_cond_mutex_) { + return start_stop_barrier_.wait(); + } - void WaitForAllThreads() EXCLUDES(end_cond_mutex_) { + void NotifyThreadComplete() EXCLUDES(end_cond_mutex_) { + start_stop_barrier_.removeThread(); + if (--alive_threads_ == 0) { MutexLock lock(end_cond_mutex_); - end_condition_.wait(lock.native_handle(), - [this]() { return alive_threads_ == 0; }); + end_condition_.notify_all(); } + } -public: - GUARDED_BY(GetBenchmarkMutex()) double real_time_used = 0; - GUARDED_BY(GetBenchmarkMutex()) double cpu_time_used = 0; - GUARDED_BY(GetBenchmarkMutex()) double manual_time_used = 0; - GUARDED_BY(GetBenchmarkMutex()) int64_t bytes_processed = 0; - GUARDED_BY(GetBenchmarkMutex()) int64_t items_processed = 0; - GUARDED_BY(GetBenchmarkMutex()) int complexity_n = 0; - GUARDED_BY(GetBenchmarkMutex()) std::string report_label_; - GUARDED_BY(GetBenchmarkMutex()) std::string error_message_; - GUARDED_BY(GetBenchmarkMutex()) bool has_error_ = false; -private: - mutable Mutex benchmark_mutex_; - std::atomic alive_threads_; - Barrier start_stop_barrier_; - Mutex end_cond_mutex_; - Condition end_condition_; + void WaitForAllThreads() EXCLUDES(end_cond_mutex_) { + MutexLock lock(end_cond_mutex_); + end_condition_.wait(lock.native_handle(), + [this]() { return alive_threads_ == 0; }); + } + + public: + GUARDED_BY(GetBenchmarkMutex()) double real_time_used = 0; + GUARDED_BY(GetBenchmarkMutex()) double cpu_time_used = 0; + GUARDED_BY(GetBenchmarkMutex()) double manual_time_used = 0; + GUARDED_BY(GetBenchmarkMutex()) int64_t bytes_processed = 0; + GUARDED_BY(GetBenchmarkMutex()) int64_t items_processed = 0; + GUARDED_BY(GetBenchmarkMutex()) int complexity_n = 0; + GUARDED_BY(GetBenchmarkMutex()) std::string report_label_; + GUARDED_BY(GetBenchmarkMutex()) std::string error_message_; + GUARDED_BY(GetBenchmarkMutex()) bool has_error_ = false; + + private: + mutable Mutex benchmark_mutex_; + std::atomic alive_threads_; + Barrier start_stop_barrier_; + Mutex end_cond_mutex_; + Condition end_condition_; }; // Timer management class @@ -176,9 +171,7 @@ class ThreadTimer { : running_(false), real_time_used_(0), cpu_time_used_(0), - manual_time_used_(0) - { - } + manual_time_used_(0) {} // Called by each thread void StartTimer() { @@ -196,9 +189,7 @@ class ThreadTimer { } // Called by each thread - void SetIterationTime(double seconds) { - manual_time_used_ += seconds; - } + void SetIterationTime(double seconds) { manual_time_used_ += seconds; } bool running() const { return running_; } @@ -727,8 +718,10 @@ void RunInThread(const benchmark::internal::Benchmark::Instance* b, MutexLock l(manager->GetBenchmarkMutex()); manager->cpu_time_used += timer.cpu_time_used(); // Take the largest value - manager->real_time_used = std::max(manager->real_time_used, timer.real_time_used()); - manager->manual_time_used = std::max(manager->manual_time_used, timer.manual_time_used()); + manager->real_time_used = + std::max(manager->real_time_used, timer.real_time_used()); + manager->manual_time_used = + std::max(manager->manual_time_used, timer.manual_time_used()); manager->bytes_processed += st.bytes_processed(); manager->items_processed += st.items_processed(); manager->complexity_n += st.complexity_length_n(); @@ -736,9 +729,9 @@ void RunInThread(const benchmark::internal::Benchmark::Instance* b, manager->NotifyThreadComplete(); } -std::vector -RunBenchmark(const benchmark::internal::Benchmark::Instance& b, - std::vector* complexity_reports) { +std::vector RunBenchmark( + const benchmark::internal::Benchmark::Instance& b, + std::vector* complexity_reports) { std::vector reports; // return value size_t iters = 1; @@ -770,8 +763,8 @@ RunBenchmark(const benchmark::internal::Benchmark::Instance& b, thread.join(); } for (std::size_t ti = 0; ti < pool.size(); ++ti) { - pool[ti] = std::thread(&RunInThread, &b, iters, - static_cast(ti + 1), &manager); + pool[ti] = std::thread(&RunInThread, &b, iters, + static_cast(ti + 1), &manager); } } RunInThread(&b, iters, 0, &manager); @@ -797,11 +790,8 @@ RunBenchmark(const benchmark::internal::Benchmark::Instance& b, : FLAGS_benchmark_min_time; // If this was the first run, was elapsed time or cpu time large enough? // If this is not the first run, go with the current value of iter. - if ((i > 0) || manager.has_error_ || - (iters >= kMaxIterations) || - (seconds >= min_time) || - (real_accumulated_time >= 5*min_time)) { - + if ((i > 0) || manager.has_error_ || (iters >= kMaxIterations) || + (seconds >= min_time) || (real_accumulated_time >= 5 * min_time)) { // Create report about this benchmark run. BenchmarkReporter::Run report; report.benchmark_name = b.name; @@ -880,23 +870,24 @@ RunBenchmark(const benchmark::internal::Benchmark::Instance& b, } // namespace -State::State(size_t max_iters, const std::vector& ranges, - int thread_i, int n_threads, - internal::ThreadTimer* timer, +State::State(size_t max_iters, const std::vector& ranges, int thread_i, + int n_threads, internal::ThreadTimer* timer, internal::ThreadManager* manager) - : started_(false), finished_(false), total_iterations_(0), + : started_(false), + finished_(false), + total_iterations_(0), range_(ranges), - bytes_processed_(0), items_processed_(0), + bytes_processed_(0), + items_processed_(0), complexity_n_(0), error_occurred_(false), thread_index(thread_i), threads(n_threads), max_iterations(max_iters), timer_(timer), - manager_(manager) -{ - CHECK(max_iterations != 0) << "At least one iteration must be run"; - CHECK_LT(thread_index, threads) << "thread_index must be less than threads"; + manager_(manager) { + CHECK(max_iterations != 0) << "At least one iteration must be run"; + CHECK_LT(thread_index, threads) << "thread_index must be less than threads"; } void State::PauseTiming() { @@ -914,11 +905,11 @@ void State::SkipWithError(const char* msg) { CHECK(msg); error_occurred_ = true; { - MutexLock l(manager_->GetBenchmarkMutex()); - if (manager_->has_error_ == false) { - manager_->error_message_ = msg; - manager_->has_error_ = true; - } + MutexLock l(manager_->GetBenchmarkMutex()); + if (manager_->has_error_ == false) { + manager_->error_message_ = msg; + manager_->has_error_ = true; + } } total_iterations_ = max_iterations; if (timer_->running()) timer_->StopTimer(); @@ -938,8 +929,7 @@ void State::StartKeepRunning() { CHECK(!started_ && !finished_); started_ = true; manager_->StartStopBarrier(); - if (!error_occurred_) - ResumeTiming(); + if (!error_occurred_) ResumeTiming(); } void State::FinishKeepRunning() { diff --git a/src/check.h b/src/check.h index e927b75239..a17799bfef 100644 --- a/src/check.h +++ b/src/check.h @@ -33,9 +33,7 @@ class CheckHandler { << check << "' failed. "; } - LogType& GetLog() { - return log_; - } + LogType& GetLog() { return log_; } BENCHMARK_NORETURN ~CheckHandler() BENCHMARK_NOEXCEPT_OP(false) { log_ << std::endl; @@ -46,7 +44,7 @@ class CheckHandler { CheckHandler(const CheckHandler&) = delete; CheckHandler() = delete; private: - LogType& log_; + LogType& log_; }; } // end namespace internal diff --git a/src/log.h b/src/log.h index b34cd5fb3e..e965d36194 100644 --- a/src/log.h +++ b/src/log.h @@ -1,8 +1,8 @@ #ifndef BENCHMARK_LOG_H_ #define BENCHMARK_LOG_H_ -#include #include +#include #include "benchmark/macros.h" @@ -12,14 +12,15 @@ namespace internal { typedef std::basic_ostream&(EndLType)(std::basic_ostream&); class LogType { - friend LogType& GetNullLogInstance(); - friend LogType& GetErrorLogInstance(); - - // FIXME: Add locking to output. - template - friend LogType& operator<<(LogType&, Tp const&); - friend LogType& operator<<(LogType&, EndLType*); -private: + friend LogType& GetNullLogInstance(); + friend LogType& GetErrorLogInstance(); + + // FIXME: Add locking to output. + template + friend LogType& operator<<(LogType&, Tp const&); + friend LogType& operator<<(LogType&, EndLType*); + + private: LogType(std::ostream* out) : out_(out) {} std::ostream* out_; BENCHMARK_DISALLOW_COPY_AND_ASSIGN(LogType); @@ -27,32 +28,32 @@ class LogType { template LogType& operator<<(LogType& log, Tp const& value) { - if (log.out_) { - *log.out_ << value; - } - return log; + if (log.out_) { + *log.out_ << value; + } + return log; } inline LogType& operator<<(LogType& log, EndLType* m) { - if (log.out_) { - *log.out_ << m; - } - return log; + if (log.out_) { + *log.out_ << m; + } + return log; } inline int& LogLevel() { - static int log_level = 0; - return log_level; + static int log_level = 0; + return log_level; } inline LogType& GetNullLogInstance() { - static LogType log(nullptr); - return log; + static LogType log(nullptr); + return log; } inline LogType& GetErrorLogInstance() { - static LogType log(&std::clog); - return log; + static LogType log(&std::clog); + return log; } inline LogType& GetLogInstanceForLevel(int level) { diff --git a/src/mutex.h b/src/mutex.h index a801ef0a30..a6cf7f8072 100644 --- a/src/mutex.h +++ b/src/mutex.h @@ -109,10 +109,7 @@ class SCOPED_CAPABILITY MutexLock class Barrier { public: - Barrier(int num_threads) - : running_threads_(num_threads) - { - } + Barrier(int num_threads) : running_threads_(num_threads) {} // Called by each thread bool wait() EXCLUDES(lock_) { @@ -128,8 +125,7 @@ class Barrier { void removeThread() EXCLUDES(lock_) { MutexLock ml(lock_); --running_threads_; - if (entered_ != 0) - phase_condition_.notify_all(); + if (entered_ != 0) phase_condition_.notify_all(); } private: @@ -139,7 +135,7 @@ class Barrier { // State for barrier management int phase_number_ = 0; - int entered_ = 0; // Number of threads that have entered this barrier + int entered_ = 0; // Number of threads that have entered this barrier // Enter the barrier and wait until all other threads have also // entered the barrier. Returns iff this is the last thread to @@ -152,11 +148,10 @@ class Barrier { int phase_number_cp = phase_number_; auto cb = [this, phase_number_cp]() { return this->phase_number_ > phase_number_cp || - entered_ == running_threads_; // A thread has aborted in error + entered_ == running_threads_; // A thread has aborted in error }; phase_condition_.wait(ml.native_handle(), cb); - if (phase_number_ > phase_number_cp) - return false; + if (phase_number_ > phase_number_cp) return false; // else (running_threads_ == entered_) and we are the last thread. } // Last thread has reached the barrier diff --git a/src/sysinfo.cc b/src/sysinfo.cc index 60e74a295a..483ea8a9e9 100644 --- a/src/sysinfo.cc +++ b/src/sysinfo.cc @@ -290,7 +290,6 @@ void InitializeSystemInfo() { } // end namespace - double CyclesPerSecond(void) { std::call_once(cpuinfo_init, InitializeSystemInfo); return cpuinfo_cycles_per_second; diff --git a/src/timers.cc b/src/timers.cc index b6ba7e914b..1be1eae017 100644 --- a/src/timers.cc +++ b/src/timers.cc @@ -17,27 +17,27 @@ #ifdef BENCHMARK_OS_WINDOWS #include -#include #include +#include #else #include #include -#include // this header must be included before 'sys/sysctl.h' to avoid compilation error on FreeBSD #include +#include // this header must be included before 'sys/sysctl.h' to avoid compilation error on FreeBSD #include #if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_MACOSX #include #endif #if defined(BENCHMARK_OS_MACOSX) #include -#include #include +#include #endif #endif #include -#include #include +#include #include #include #include @@ -62,7 +62,8 @@ double MakeTime(FILETIME const& kernel_time, FILETIME const& user_time) { user.HighPart = user_time.dwHighDateTime; user.LowPart = user_time.dwLowDateTime; return (static_cast(kernel.QuadPart) + - static_cast(user.QuadPart)) * 1e-7; + static_cast(user.QuadPart)) * + 1e-7; } #else double MakeTime(struct timespec const& ts) { @@ -70,17 +71,17 @@ double MakeTime(struct timespec const& ts) { } double MakeTime(struct rusage ru) { return (static_cast(ru.ru_utime.tv_sec) + - static_cast(ru.ru_utime.tv_usec) * 1e-6 + - static_cast(ru.ru_stime.tv_sec) + - static_cast(ru.ru_stime.tv_usec) * 1e-6); + static_cast(ru.ru_utime.tv_usec) * 1e-6 + + static_cast(ru.ru_stime.tv_sec) + + static_cast(ru.ru_stime.tv_usec) * 1e-6); } #endif #if defined(BENCHMARK_OS_MACOSX) double MakeTime(thread_basic_info_data_t const& info) { return (static_cast(info.user_time.seconds) + - static_cast(info.user_time.microseconds) * 1e-6 + - static_cast(info.system_time.seconds) + - static_cast(info.user_time.microseconds) * 1e-6); + static_cast(info.user_time.microseconds) * 1e-6 + + static_cast(info.system_time.seconds) + + static_cast(info.user_time.microseconds) * 1e-6); } #endif @@ -128,7 +129,8 @@ double ThreadCPUUsage() { #if defined(_POSIX_THREAD_CPUTIME) struct timespec ts; if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts) != 0) { - std::cerr << "Failed to get thread CPU time using clock_gettime" << std::endl; + std::cerr << "Failed to get thread CPU time using clock_gettime" + << std::endl; std::exit(EXIT_FAILURE); } return ts.tv_sec + (static_cast(ts.tv_nsec) * 1e-9); @@ -138,7 +140,8 @@ double ThreadCPUUsage() { FILETIME exit_time; FILETIME kernel_time; FILETIME user_time; - GetThreadTimes(this_thread, &creation_time, &exit_time, &kernel_time, &user_time); + GetThreadTimes(this_thread, &creation_time, &exit_time, &kernel_time, + &user_time); return MakeTime(kernel_time, user_time); #elif defined(BENCHMARK_OS_MACOSX) mach_msg_type_number_t count = THREAD_BASIC_INFO_COUNT; @@ -159,7 +162,8 @@ std::string DateTimeString(bool local) { if (local) { #if defined(BENCHMARK_OS_WINDOWS) - written = std::strftime(storage, sizeof(storage), "%x %X", ::localtime(&now)); + written = + std::strftime(storage, sizeof(storage), "%x %X", ::localtime(&now)); #else std::tm timeinfo; std::memset(&timeinfo, 0, sizeof(std::tm)); @@ -177,14 +181,12 @@ std::string DateTimeString(bool local) { #endif } CHECK(written < kStorageSize); - ((void)written); // prevent unused variable in optimized mode. + ((void)written); // prevent unused variable in optimized mode. return std::string(storage); } -} // end namespace +} // end namespace -std::string LocalDateTimeString() { - return DateTimeString(true); -} +std::string LocalDateTimeString() { return DateTimeString(true); } } // end namespace benchmark diff --git a/src/timers.h b/src/timers.h index 912497d1a7..65606ccd93 100644 --- a/src/timers.h +++ b/src/timers.h @@ -15,16 +15,15 @@ double ChildrenCPUUsage(); // Return the CPU usage of the current thread double ThreadCPUUsage(); - #if defined(HAVE_STEADY_CLOCK) template struct ChooseSteadyClock { - typedef std::chrono::high_resolution_clock type; + typedef std::chrono::high_resolution_clock type; }; template <> struct ChooseSteadyClock { - typedef std::chrono::steady_clock type; + typedef std::chrono::steady_clock type; }; #endif @@ -37,10 +36,9 @@ struct ChooseClockType { }; inline double ChronoClockNow() { - typedef ChooseClockType::type ClockType; - using FpSeconds = - std::chrono::duration; - return FpSeconds(ClockType::now().time_since_epoch()).count(); + typedef ChooseClockType::type ClockType; + using FpSeconds = std::chrono::duration; + return FpSeconds(ClockType::now().time_since_epoch()).count(); } std::string LocalDateTimeString(); From 9130bea90aacc6d75bfa9d1125898ab092e45587 Mon Sep 17 00:00:00 2001 From: Eric Fiselier Date: Fri, 2 Sep 2016 16:04:02 -0600 Subject: [PATCH 12/20] cleanup -Wthread-safety configuration --- CMakeLists.txt | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5b5e6b9241..86f614298c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -87,8 +87,7 @@ else() add_cxx_compiler_flag(-Wstrict-aliasing) endif() add_cxx_compiler_flag(-Wthread-safety) - if (HAVE_WTHREAD_SAFETY) - add_definitions(-DHAVE_WTHREAD_SAFETY) + if (HAVE_CXX_FLAG_WTHREAD_SAFETY) cxx_feature_check(THREAD_SAFETY_ATTRIBUTES) endif() @@ -152,7 +151,6 @@ cxx_feature_check(STD_REGEX) cxx_feature_check(GNU_POSIX_REGEX) cxx_feature_check(POSIX_REGEX) cxx_feature_check(STEADY_CLOCK) -cxx_feature_check(THREAD_SAFETY_ATTRIBUTES) # Ensure we have pthreads find_package(Threads REQUIRED) From d2cbeacbc05c901eff62a6d93f03c1d2825fe889 Mon Sep 17 00:00:00 2001 From: Eric Fiselier Date: Fri, 2 Sep 2016 16:10:29 -0600 Subject: [PATCH 13/20] Don't trust _POSIX_FEATURE macros because OS X lies. --- src/timers.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/timers.cc b/src/timers.cc index 1be1eae017..8b29c90a3f 100644 --- a/src/timers.cc +++ b/src/timers.cc @@ -88,7 +88,7 @@ double MakeTime(thread_basic_info_data_t const& info) { } // end namespace double ProcessCPUUsage() { -#if defined(_POSIX_CPUTIME) +#if defined(CLOCK_PROCESS_CPUTIME_ID) struct timespec spec; if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &spec) == 0) { return MakeTime(spec); @@ -126,7 +126,7 @@ double ChildrenCPUUsage() { } double ThreadCPUUsage() { -#if defined(_POSIX_THREAD_CPUTIME) +#if defined(CLOCK_THREAD_CPUTIME_ID) struct timespec ts; if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts) != 0) { std::cerr << "Failed to get thread CPU time using clock_gettime" From ef4640b95cf981f5ed0497b672a46d43dd14b7bc Mon Sep 17 00:00:00 2001 From: Eric Fiselier Date: Fri, 2 Sep 2016 16:18:03 -0600 Subject: [PATCH 14/20] Fix OS X thread timings --- src/timers.cc | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/src/timers.cc b/src/timers.cc index 8b29c90a3f..c9d7e583da 100644 --- a/src/timers.cc +++ b/src/timers.cc @@ -51,8 +51,14 @@ #include "string_util.h" namespace benchmark { -namespace { +// Suppress unused warnings on helper functions. +#if defined(__GNUC__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-function" +#endif + +namespace { #if defined(BENCHMARK_OS_WINDOWS) double MakeTime(FILETIME const& kernel_time, FILETIME const& user_time) { ULARGE_INTEGER kernel; @@ -87,6 +93,10 @@ double MakeTime(thread_basic_info_data_t const& info) { } // end namespace +#if defined(__GNUC__) +#pragma GCC diagnostic pop +#endif + double ProcessCPUUsage() { #if defined(CLOCK_PROCESS_CPUTIME_ID) struct timespec spec; @@ -146,6 +156,16 @@ double ThreadCPUUsage() { #elif defined(BENCHMARK_OS_MACOSX) mach_msg_type_number_t count = THREAD_BASIC_INFO_COUNT; thread_basic_info_data_t info; + mach_port_t thread = mach_thread_self(); + kern_return_t kr = thread_info(thread, THREAD_BASIC_INFO, (thread_info_t) &info, &count); + if (kr == KERN_SUCCESS && (info.flags & TH_FLAGS_IDLE) == 0) { + double t = MakeTime(info); + mach_port_deallocate(mach_task_self(), thread); + return t; + } else { + std::cerr << "Could not retrieve thread info" << std::endl; + std::exit(EXIT_FAILURE); + } #else #error Per-thread timing is not available on your system. #endif From 2a6e747f3356b027d6062f5175482d25ef714c92 Mon Sep 17 00:00:00 2001 From: Eric Fiselier Date: Fri, 2 Sep 2016 16:26:12 -0600 Subject: [PATCH 15/20] attempt to fix mingw build --- src/timers.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/timers.cc b/src/timers.cc index c9d7e583da..2a3d379a93 100644 --- a/src/timers.cc +++ b/src/timers.cc @@ -19,12 +19,15 @@ #include #include #include -#else +#endif + +#if !defined(BENCHMARK_OS_WINDOWS) || defined(__GNUC__) // handles mingw #include #include #include #include // this header must be included before 'sys/sysctl.h' to avoid compilation error on FreeBSD #include +#endif #if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_MACOSX #include #endif @@ -33,7 +36,6 @@ #include #include #endif -#endif #include #include From 2ed3c3c56289513e5fcebf58e53bbca7e4da16fb Mon Sep 17 00:00:00 2001 From: Eric Fiselier Date: Fri, 2 Sep 2016 16:29:15 -0600 Subject: [PATCH 16/20] Attempt to make mingw work again --- src/timers.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/timers.cc b/src/timers.cc index 2a3d379a93..2aee0b4d2b 100644 --- a/src/timers.cc +++ b/src/timers.cc @@ -73,7 +73,9 @@ double MakeTime(FILETIME const& kernel_time, FILETIME const& user_time) { static_cast(user.QuadPart)) * 1e-7; } -#else +#endif + +#if !defined(BENCHMARK_OS_WINDOWS) || defined(__GNUC__) double MakeTime(struct timespec const& ts) { return ts.tv_sec + (static_cast(ts.tv_nsec) * 1e-9); } @@ -84,6 +86,7 @@ double MakeTime(struct rusage ru) { static_cast(ru.ru_stime.tv_usec) * 1e-6); } #endif + #if defined(BENCHMARK_OS_MACOSX) double MakeTime(thread_basic_info_data_t const& info) { return (static_cast(info.user_time.seconds) + From 3e683c518306d9d1d31bfe18bb12487e345fad4e Mon Sep 17 00:00:00 2001 From: Eric Fiselier Date: Fri, 2 Sep 2016 16:50:55 -0600 Subject: [PATCH 17/20] Revert old mingw workaround --- src/timers.cc | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/src/timers.cc b/src/timers.cc index 2aee0b4d2b..c9d7e583da 100644 --- a/src/timers.cc +++ b/src/timers.cc @@ -19,15 +19,12 @@ #include #include #include -#endif - -#if !defined(BENCHMARK_OS_WINDOWS) || defined(__GNUC__) // handles mingw +#else #include #include #include #include // this header must be included before 'sys/sysctl.h' to avoid compilation error on FreeBSD #include -#endif #if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_MACOSX #include #endif @@ -36,6 +33,7 @@ #include #include #endif +#endif #include #include @@ -73,9 +71,7 @@ double MakeTime(FILETIME const& kernel_time, FILETIME const& user_time) { static_cast(user.QuadPart)) * 1e-7; } -#endif - -#if !defined(BENCHMARK_OS_WINDOWS) || defined(__GNUC__) +#else double MakeTime(struct timespec const& ts) { return ts.tv_sec + (static_cast(ts.tv_nsec) * 1e-9); } @@ -86,7 +82,6 @@ double MakeTime(struct rusage ru) { static_cast(ru.ru_stime.tv_usec) * 1e-6); } #endif - #if defined(BENCHMARK_OS_MACOSX) double MakeTime(thread_basic_info_data_t const& info) { return (static_cast(info.user_time.seconds) + From e8858c55613dd28ec65987a73086fe15e09b4a42 Mon Sep 17 00:00:00 2001 From: Eric Fiselier Date: Fri, 2 Sep 2016 17:01:07 -0600 Subject: [PATCH 18/20] improve diagnostics --- src/timers.cc | 61 +++++++++++++++++++-------------------------------- 1 file changed, 23 insertions(+), 38 deletions(-) diff --git a/src/timers.cc b/src/timers.cc index c9d7e583da..b0e701a7d4 100644 --- a/src/timers.cc +++ b/src/timers.cc @@ -91,6 +91,11 @@ double MakeTime(thread_basic_info_data_t const& info) { } #endif +static BENCHMARK_NORETURN void DiagnoseAndExit(const char* msg) { + std::cerr << "ERROR: " << msg << std::endl; + std::exit(EXIT_FAILURE); +} + } // end namespace #if defined(__GNUC__) @@ -98,53 +103,30 @@ double MakeTime(thread_basic_info_data_t const& info) { #endif double ProcessCPUUsage() { -#if defined(CLOCK_PROCESS_CPUTIME_ID) - struct timespec spec; - if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &spec) == 0) { - return MakeTime(spec); - } - return 0.0; -#elif defined(BENCHMARK_OS_WINDOWS) +#if defined(BENCHMARK_OS_WINDOWS) HANDLE proc = GetCurrentProcess(); FILETIME creation_time; FILETIME exit_time; FILETIME kernel_time; FILETIME user_time; - GetProcessTimes(proc, &creation_time, &exit_time, &kernel_time, &user_time); - return MakeTime(kernel_time, user_time); + if (GetProcessTimes(proc, &creation_time, &exit_time, &kernel_time, &user_time)) + return MakeTime(kernel_time, user_time); + DiagnoseAndExit("GetProccessTimes() failed"); +#elif defined(CLOCK_PROCESS_CPUTIME_ID) + struct timespec spec; + if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &spec) == 0) + return MakeTime(spec); + DiagnoseAndExit("clock_gettime(CLOCK_PROCESS_CPUTIME_ID, ...) failed"); #else struct rusage ru; - if (getrusage(RUSAGE_SELF, &ru) == 0) { + if (getrusage(RUSAGE_SELF, &ru) == 0) return MakeTime(ru); - } - return 0.0; + DiagnoseAndExit("clock_gettime(CLOCK_PROCESS_CPUTIME_ID, ...) failed"); #endif } -double ChildrenCPUUsage() { -#ifndef BENCHMARK_OS_WINDOWS - struct rusage ru; - if (getrusage(RUSAGE_CHILDREN, &ru) == 0) { - return MakeTime(ru); - } else { - return 0.0; - } -#else - // TODO: Not sure what this even means on Windows - return 0.0; -#endif // BENCHMARK_OS_WINDOWS -} - double ThreadCPUUsage() { -#if defined(CLOCK_THREAD_CPUTIME_ID) - struct timespec ts; - if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts) != 0) { - std::cerr << "Failed to get thread CPU time using clock_gettime" - << std::endl; - std::exit(EXIT_FAILURE); - } - return ts.tv_sec + (static_cast(ts.tv_nsec) * 1e-9); -#elif defined(BENCHMARK_OS_WINDOWS) +#if defined(BENCHMARK_OS_WINDOWS) HANDLE this_thread = GetCurrentThread(); FILETIME creation_time; FILETIME exit_time; @@ -153,6 +135,11 @@ double ThreadCPUUsage() { GetThreadTimes(this_thread, &creation_time, &exit_time, &kernel_time, &user_time); return MakeTime(kernel_time, user_time); +#elif defined(CLOCK_THREAD_CPUTIME_ID) + struct timespec ts; + if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts) == 0) + return MakeTime(ts); + DiagnoseAndExit("clock_gettime(CLOCK_THREAD_CPUTIME_ID, ...) failed"); #elif defined(BENCHMARK_OS_MACOSX) mach_msg_type_number_t count = THREAD_BASIC_INFO_COUNT; thread_basic_info_data_t info; @@ -162,10 +149,8 @@ double ThreadCPUUsage() { double t = MakeTime(info); mach_port_deallocate(mach_task_self(), thread); return t; - } else { - std::cerr << "Could not retrieve thread info" << std::endl; - std::exit(EXIT_FAILURE); } + DiagnoseAndExit("ThreadCPUUsage() failed when evaluating thread_info"); #else #error Per-thread timing is not available on your system. #endif From 25d56592c7734d27451a85c3f8691bfb91d980d4 Mon Sep 17 00:00:00 2001 From: Eric Fiselier Date: Fri, 2 Sep 2016 17:35:06 -0600 Subject: [PATCH 19/20] Drastically improve OS X measurements --- src/timers.cc | 12 +++++------- tools/gbench/report.py | 9 +++++++-- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/src/timers.cc b/src/timers.cc index b0e701a7d4..95e6ceb55a 100644 --- a/src/timers.cc +++ b/src/timers.cc @@ -91,7 +91,7 @@ double MakeTime(thread_basic_info_data_t const& info) { } #endif -static BENCHMARK_NORETURN void DiagnoseAndExit(const char* msg) { +BENCHMARK_NORETURN static void DiagnoseAndExit(const char* msg) { std::cerr << "ERROR: " << msg << std::endl; std::exit(EXIT_FAILURE); } @@ -143,12 +143,10 @@ double ThreadCPUUsage() { #elif defined(BENCHMARK_OS_MACOSX) mach_msg_type_number_t count = THREAD_BASIC_INFO_COUNT; thread_basic_info_data_t info; - mach_port_t thread = mach_thread_self(); - kern_return_t kr = thread_info(thread, THREAD_BASIC_INFO, (thread_info_t) &info, &count); - if (kr == KERN_SUCCESS && (info.flags & TH_FLAGS_IDLE) == 0) { - double t = MakeTime(info); - mach_port_deallocate(mach_task_self(), thread); - return t; + mach_port_t thread = pthread_mach_thread_np(pthread_self()); + if (thread_info(thread, THREAD_BASIC_INFO, (thread_info_t) &info, &count) + == KERN_SUCCESS) { + return MakeTime(info); } DiagnoseAndExit("ThreadCPUUsage() failed when evaluating thread_info"); #else diff --git a/tools/gbench/report.py b/tools/gbench/report.py index 44fa4a5632..ac69b9bef6 100644 --- a/tools/gbench/report.py +++ b/tools/gbench/report.py @@ -59,6 +59,10 @@ def calculate_change(old_val, new_val): """ Return a float representing the decimal change between old_val and new_val. """ + if old_val == 0 and new_val == 0: + return 0.0 + if old_val == 0: + return float(new_val - old_val) / (float(old_val + new_val) / 2) return float(new_val - old_val) / abs(old_val) @@ -73,7 +77,7 @@ def find_test(name): if b['name'] == name: return b return None - first_line = "{:<{}s} Time CPU".format( + first_line = "{:<{}s} Time CPU Old New".format( 'Benchmark', first_col_width) output_strs = [first_line, '-' * len(first_line)] for bn in json1['benchmarks']: @@ -88,12 +92,13 @@ def get_color(res): return BC_WHITE else: return BC_CYAN - fmt_str = "{}{:<{}s}{endc} {}{:+.2f}{endc} {}{:+.2f}{endc}" + fmt_str = "{}{:<{}s}{endc} {}{:+.2f}{endc} {}{:+.2f}{endc} {:4d} {:4d}" tres = calculate_change(bn['real_time'], other_bench['real_time']) cpures = calculate_change(bn['cpu_time'], other_bench['cpu_time']) output_strs += [color_format(use_color, fmt_str, BC_HEADER, bn['name'], first_col_width, get_color(tres), tres, get_color(cpures), cpures, + bn['cpu_time'], other_bench['cpu_time'], endc=BC_ENDC)] return output_strs From 7bc016b6b752271570f850069525000ccd881e35 Mon Sep 17 00:00:00 2001 From: Eric Fiselier Date: Fri, 2 Sep 2016 20:48:22 -0600 Subject: [PATCH 20/20] Use average real time instead of max --- src/benchmark.cc | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/src/benchmark.cc b/src/benchmark.cc index 193bf80a78..f0f0a43d90 100644 --- a/src/benchmark.cc +++ b/src/benchmark.cc @@ -717,11 +717,8 @@ void RunInThread(const benchmark::internal::Benchmark::Instance* b, { MutexLock l(manager->GetBenchmarkMutex()); manager->cpu_time_used += timer.cpu_time_used(); - // Take the largest value - manager->real_time_used = - std::max(manager->real_time_used, timer.real_time_used()); - manager->manual_time_used = - std::max(manager->manual_time_used, timer.manual_time_used()); + manager->real_time_used += timer.real_time_used(); + manager->manual_time_used += timer.manual_time_used(); manager->bytes_processed += st.bytes_processed(); manager->items_processed += st.items_processed(); manager->complexity_n += st.complexity_length_n(); @@ -735,12 +732,10 @@ std::vector RunBenchmark( std::vector reports; // return value size_t iters = 1; - + const int num_threads = b.multithreaded ? b.threads : 1; std::vector pool; - if (b.multithreaded) { - CHECK(b.threads >= 1); - pool.resize(b.threads - 1); - } + if (num_threads > 1) pool.resize(num_threads -1); + const int repeats = b.repetitions != 0 ? b.repetitions : FLAGS_benchmark_repetitions; const bool report_aggregates_only = repeats != 1 && @@ -753,7 +748,7 @@ std::vector RunBenchmark( // Try benchmark VLOG(2) << "Running " << b.name << " for " << iters << "\n"; - internal::ThreadManager manager(b.multithreaded ? b.threads : 1); + internal::ThreadManager manager(num_threads); if (b.multithreaded) { // If this is out first iteration of the while(true) loop then the // threads haven't been started and can't be joined. Otherwise we need @@ -772,8 +767,8 @@ std::vector RunBenchmark( MutexLock l(manager.GetBenchmarkMutex()); const double cpu_accumulated_time = manager.cpu_time_used; - const double real_accumulated_time = manager.real_time_used; - const double manual_accumulated_time = manager.manual_time_used; + const double real_accumulated_time = manager.real_time_used / num_threads; + const double manual_accumulated_time = manager.manual_time_used / num_threads; VLOG(2) << "Ran in " << cpu_accumulated_time << "/" << real_accumulated_time << "\n";