From 1b9eb409c7275fcefd61d39e2657e6e1b7059889 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Tue, 25 May 2021 16:56:16 -0400 Subject: [PATCH] cuda callback is prebuf-aware cudaTaskFn::cuda_callback must flush prebuffer to submit the cleanup task to the worker queues --- src/TiledArray/cuda/cuda_task_fn.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/TiledArray/cuda/cuda_task_fn.h b/src/TiledArray/cuda/cuda_task_fn.h index 98db4def7e..f11df63285 100644 --- a/src/TiledArray/cuda/cuda_task_fn.h +++ b/src/TiledArray/cuda/cuda_task_fn.h @@ -131,6 +131,9 @@ struct cudaTaskFn : public TaskInterface { // std::string message = "callback on cudaTaskFn: " + address.str() + // '\n'; std::cout << message; callback->notify(); + // must flush the prebuf (if any) to make sure this cleanup task will be + // actually submitted + ThreadPool::instance()->flush_prebuf(); const auto t1 = TiledArray::now(); TiledArray::detail::cuda_taskfn_callback_duration_ns() += @@ -799,10 +802,10 @@ add_cuda_taskfn( cudaTaskFn* t) { typename cudaTaskFn::futureT res(t->result()); - // add the cuda task - world.taskq.add(static_cast(t)); // add the internal async task in cuda task as well world.taskq.add(t->async_task()); + // add the cuda task + world.taskq.add(static_cast(t)); return res; }