diff --git a/backends/xnnpack/runtime/XNNExecutor.cpp b/backends/xnnpack/runtime/XNNExecutor.cpp index 103a8812931..1cba33a91e6 100644 --- a/backends/xnnpack/runtime/XNNExecutor.cpp +++ b/backends/xnnpack/runtime/XNNExecutor.cpp @@ -23,6 +23,28 @@ using executorch::runtime::is_contiguous_dim_order; using executorch::runtime::kTensorDimensionLimit; using executorch::runtime::Span; +namespace { +class InUseGuard { + public: + explicit InUseGuard(std::atomic& flag) : flag_(flag) {} + ~InUseGuard() { + if (!dismissed_) { + flag_.store(false, std::memory_order_release); + } + } + void dismiss() { + dismissed_ = true; + } + + InUseGuard(const InUseGuard&) = delete; + InUseGuard& operator=(const InUseGuard&) = delete; + + private: + std::atomic& flag_; + bool dismissed_ = false; +}; +} // namespace + /** * Initializes the XNNExecutor with the runtime and given number of * inputs/outputs externals_ is resized to the total number of inputs and @@ -71,6 +93,21 @@ ET_NODISCARD Error XNNExecutor::initialize( * delegate->execute() */ ET_NODISCARD Error XNNExecutor::prepare_args(Span args) { + ET_CHECK_MSG( + !destroyed_.load(std::memory_order_acquire), + "XNNExecutor::prepare_args called after destroy"); + + bool was_in_use = in_use_.exchange(true, std::memory_order_acquire); + if (was_in_use) { + ET_LOG(Error, "XNNExecutor::prepare_args called concurrently"); + } + ET_DCHECK_MSG(!was_in_use, "XNNExecutor::prepare_args called concurrently"); + + InUseGuard in_use_guard(in_use_); + if (was_in_use) { + in_use_guard.dismiss(); + } + ET_CHECK_OR_RETURN_ERROR( runtime_ != nullptr, Internal, @@ -142,6 +179,7 @@ ET_NODISCARD Error XNNExecutor::prepare_args(Span args) { return err; } + in_use_guard.dismiss(); return Error::Ok; } @@ -152,6 +190,8 @@ ET_NODISCARD Error XNNExecutor::prepare_args(Span args) { * After which we then execute the runtime through invoke_runtime. */ ET_NODISCARD Error XNNExecutor::forward(BackendExecutionContext& context) { + InUseGuard in_use_guard(in_use_); + ET_CHECK_OR_RETURN_ERROR( runtime_ != nullptr, Internal, @@ -160,11 +200,13 @@ ET_NODISCARD Error XNNExecutor::forward(BackendExecutionContext& context) { xnn_status status = xnn_setup_runtime_v2( runtime_.get(), externals_.size(), externals_.data()); - ET_CHECK_OR_RETURN_ERROR( - status == xnn_status_success, - Internal, - "Internal Error: Setting up the runtime failed with code: %s", - xnn_status_to_string(status)); + if (status != xnn_status_success) { + ET_LOG( + Error, + "Internal Error: Setting up the runtime failed with code: %s", + xnn_status_to_string(status)); + return Error::Internal; + } auto error = profiler_.start(context.event_tracer()); if (error != Error::Ok) { diff --git a/backends/xnnpack/runtime/XNNExecutor.h b/backends/xnnpack/runtime/XNNExecutor.h index fa7c8360be4..0af8b6056b0 100644 --- a/backends/xnnpack/runtime/XNNExecutor.h +++ b/backends/xnnpack/runtime/XNNExecutor.h @@ -16,6 +16,7 @@ #include #include +#include #include #include @@ -36,11 +37,20 @@ class XNNExecutor { std::vector externals_; std::vector packed_data_names_; std::shared_ptr workspace_; + std::atomic in_use_{false}; + std::atomic destroyed_{false}; public: XNNExecutor(std::shared_ptr workspace) : workspace_(workspace) {} + ~XNNExecutor() { + ET_CHECK_MSG( + !in_use_.load(std::memory_order_acquire), + "XNNExecutor destroyed while in use"); + destroyed_.store(true, std::memory_order_release); + } + inline size_t getNumInputs() { return input_ids_.size(); } diff --git a/backends/xnnpack/runtime/XNNPACKBackend.cpp b/backends/xnnpack/runtime/XNNPACKBackend.cpp index c20fa985f46..a02cf98771b 100644 --- a/backends/xnnpack/runtime/XNNPACKBackend.cpp +++ b/backends/xnnpack/runtime/XNNPACKBackend.cpp @@ -16,6 +16,7 @@ #include #include +#include #include #include @@ -129,6 +130,17 @@ class XnnpackBackend final Error, "XNNCompiler::compileModel failed: 0x%x", (unsigned int)err); return err; } + + ET_LOG( + Info, + "XnnpackBackend::init delegate=%p workspace_id=%" PRIu64 + " workspace_ptr=%p program_id=0x%" PRIxPTR " weight_cache=%s", + (void*)executor, + workspace->id(), + (void*)workspace_ptr, + program_id, + use_weight_cache ? "true" : "false"); + return executor; } @@ -138,13 +150,23 @@ class XnnpackBackend final Span args) const override { auto executor = static_cast(handle); + auto workspace = executor->get_workspace(); + ET_LOG( + Info, + "XnnpackBackend::execute begin delegate=%p workspace_id=%" PRIu64 + " num_args=%zu weight_cache=%s", + (void*)executor, + workspace->id(), + (size_t)args.size(), + executor->uses_weight_cache() ? "true" : "false"); + std::unique_lock lock_weights_cache( weights_cache_mutex_, std::defer_lock); if (executor->uses_weight_cache()) { lock_weights_cache.lock(); } - auto [raii_lock, _] = executor->get_workspace()->acquire(); + auto [raii_lock, _] = workspace->acquire(); // Prepare Inputs/Outputs and Propagate Input Shapes Error err = executor->prepare_args(args); @@ -161,20 +183,36 @@ class XnnpackBackend final // Convert output data types if necessary (e.g., int32 -> int64 for Long) err = executor->convert_outputs(args); + ET_LOG( + Info, + "XnnpackBackend::execute end delegate=%p workspace_id=%" PRIu64 + " err=0x%x", + (void*)executor, + workspace->id(), + (unsigned int)err); + return err; } void destroy(DelegateHandle* handle) const override { if (handle != nullptr) { auto executor = static_cast(handle); + auto workspace = executor->get_workspace(); + + ET_LOG( + Info, + "XnnpackBackend::destroy delegate=%p workspace_id=%" PRIu64, + (void*)executor, + workspace->id()); + + const std::lock_guard lock_weights_cache( + weights_cache_mutex_); #ifdef ENABLE_XNNPACK_PROFILING executor->print_avg_op_timings(); #endif if (executor->uses_weight_cache()) { - const std::lock_guard lock_weights_cache( - weights_cache_mutex_); weights_cache_->delete_packed_data(executor->get_packed_data_names()); } @@ -183,7 +221,6 @@ class XnnpackBackend final // the same backend instance. Make sure to hold onto the workspace // shared_ptr, as the pointer in the executor is freed, which includes // the mutex referenced by raii_lock. - auto workspace = executor->get_workspace(); auto [raii_lock, _] = workspace->acquire(); // XNNExecutor is not trivially destructible. Since this was constructed diff --git a/backends/xnnpack/runtime/XNNWorkspaceManager.cpp b/backends/xnnpack/runtime/XNNWorkspaceManager.cpp index d3550da5cc7..e115074a108 100644 --- a/backends/xnnpack/runtime/XNNWorkspaceManager.cpp +++ b/backends/xnnpack/runtime/XNNWorkspaceManager.cpp @@ -61,7 +61,9 @@ XNNWorkspaceManager::get_or_create_workspace( return create_result.error(); } +#ifndef XNNPACK_WORKSPACE_ALWAYS_LOCK create_result.get()->disable_locking(); +#endif return create_result.get(); } else if (mode == WorkspaceSharingMode::PerModel) { return get_or_create_model_workspace(program_id); diff --git a/backends/xnnpack/targets.bzl b/backends/xnnpack/targets.bzl index 868e68e5b8c..b3af589df10 100644 --- a/backends/xnnpack/targets.bzl +++ b/backends/xnnpack/targets.bzl @@ -14,6 +14,8 @@ def _get_preprocessor_flags(): if native.read_config("executorch", "xnnpack_weights_cache", "0") != "0": preprocessor_flags.append("-DENABLE_XNNPACK_WEIGHTS_CACHE") + preprocessor_flags.append("-DXNNPACK_WORKSPACE_ALWAYS_LOCK") + # Enable if not disabled through config return preprocessor_flags diff --git a/backends/xnnpack/test/runtime/test_workspace_manager.cpp b/backends/xnnpack/test/runtime/test_workspace_manager.cpp index a7689966635..a239d19b415 100644 --- a/backends/xnnpack/test/runtime/test_workspace_manager.cpp +++ b/backends/xnnpack/test/runtime/test_workspace_manager.cpp @@ -116,7 +116,11 @@ TEST_F(XNNWorkspaceManagerTest, DisabledModeAcquireDoesNotLock) { auto [lock, ptr] = workspace->acquire(); ASSERT_NE(ptr, nullptr); +#ifdef XNNPACK_WORKSPACE_ALWAYS_LOCK + EXPECT_TRUE(lock.owns_lock()); +#else EXPECT_FALSE(lock.owns_lock()); +#endif } TEST_F(XNNWorkspaceManagerTest, PerModelMode) { diff --git a/backends/xnnpack/test/targets.bzl b/backends/xnnpack/test/targets.bzl index 812986a12e6..d690e1c9dcd 100644 --- a/backends/xnnpack/test/targets.bzl +++ b/backends/xnnpack/test/targets.bzl @@ -96,6 +96,9 @@ def define_common_targets(): runtime.cxx_test( name = "test_workspace_manager", srcs = ["runtime/test_workspace_manager.cpp"], + preprocessor_flags = [ + "-DXNNPACK_WORKSPACE_ALWAYS_LOCK", + ], deps = [ third_party_dep("XNNPACK"), "//executorch/backends/xnnpack:xnnpack_backend",