-
Notifications
You must be signed in to change notification settings - Fork 1k
Arm backend: Add event profiling to VGF backend #19703
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
c104cf6
af840eb
073f52b
83d5245
49b36b2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6,13 +6,20 @@ | |
| */ | ||
|
|
||
| #include <cinttypes> | ||
| #include <list> | ||
| #include <numeric> | ||
|
|
||
| using namespace std; | ||
|
|
||
| #include <c10/util/safe_numerics.h> | ||
| #include <executorch/runtime/backend/interface.h> | ||
| #include <executorch/runtime/core/error.h> | ||
| #include <executorch/runtime/core/evalue.h> | ||
|
|
||
| #ifdef ET_EVENT_TRACER_ENABLED | ||
| #include <executorch/runtime/core/event_tracer_hooks_delegate.h> | ||
| #endif | ||
|
|
||
| using executorch::aten::Tensor; | ||
| using executorch::runtime::ArrayRef; | ||
| using executorch::runtime::Backend; | ||
|
|
@@ -27,6 +34,13 @@ using executorch::runtime::MemoryAllocator; | |
| using executorch::runtime::Result; | ||
| using executorch::runtime::Span; | ||
|
|
||
| #ifdef ET_EVENT_TRACER_ENABLED | ||
| using executorch::runtime::event_tracer_end_profiling_delegate; | ||
| using executorch::runtime::event_tracer_start_profiling_delegate; | ||
| using executorch::runtime::EventTracer; | ||
| using executorch::runtime::EventTracerEntry; | ||
| #endif | ||
|
|
||
| // We use the platform and runtime environment provided by the Vulkan delegate | ||
| #include <executorch/backends/vulkan/runtime/vk_api/vk_api.h> | ||
|
|
||
|
|
@@ -69,7 +83,8 @@ VkResult vkml_allocate_basics( | |
| VkPhysicalDevice* physical_device, | ||
| VkDevice* device, | ||
| VkQueue* queue, | ||
| VkCommandPool* command_pool); | ||
| VkCommandPool* command_pool, | ||
| uint32_t* queue_family_index); | ||
|
|
||
| void vkml_free_basics( | ||
| VkInstance* instance, | ||
|
|
@@ -104,7 +119,8 @@ class VGFBackend final : public ::executorch::runtime::BackendInterface { | |
| &vk_physical_device, | ||
| &vk_device, | ||
| &vk_queue, | ||
| &vk_command_pool); | ||
| &vk_command_pool, | ||
| &vk_queue_family_index); | ||
| if (result != VK_SUCCESS) { | ||
| ET_LOG( | ||
| Error, "Failed to initialize the Vulkan device error 0x%08X", result); | ||
|
|
@@ -142,8 +158,31 @@ class VGFBackend final : public ::executorch::runtime::BackendInterface { | |
| ArrayRef<CompileSpec> compile_specs) const override { | ||
| ET_LOG(Info, "Entered VGF init"); | ||
|
|
||
| #ifdef ET_EVENT_TRACER_ENABLED | ||
| EventTracer* event_tracer = context.event_tracer(); | ||
|
|
||
| EventTracerEntry init_total_event = event_tracer_start_profiling_delegate( | ||
| event_tracer, | ||
| "VGF_INIT_TOTAL", | ||
| /*delegate_debug_id=*/-1); | ||
|
|
||
| EventTracerEntry ensure_initialized_event = | ||
| event_tracer_start_profiling_delegate( | ||
| event_tracer, | ||
| "VGF_INIT_ENSURE_INITIALIZED", | ||
| /*delegate_debug_id=*/-1); | ||
| #endif | ||
|
|
||
| const_cast<VGFBackend*>(this)->ensure_initialized(); | ||
|
|
||
| #ifdef ET_EVENT_TRACER_ENABLED | ||
| event_tracer_end_profiling_delegate(event_tracer, ensure_initialized_event); | ||
| #endif | ||
|
|
||
| if (!is_initialized_) { | ||
| #ifdef ET_EVENT_TRACER_ENABLED | ||
| event_tracer_end_profiling_delegate(event_tracer, init_total_event); | ||
| #endif | ||
| ET_LOG( | ||
| Error, | ||
| "VGF backend is unavailable because Vulkan initialization failed"); | ||
|
|
@@ -152,30 +191,89 @@ class VGFBackend final : public ::executorch::runtime::BackendInterface { | |
|
|
||
| const char* vgf_data = reinterpret_cast<const char*>(processed->data()); | ||
|
|
||
| #ifdef ET_EVENT_TRACER_ENABLED | ||
| EventTracerEntry allocate_repr_event = | ||
| event_tracer_start_profiling_delegate( | ||
| event_tracer, | ||
| "VGF_INIT_ALLOCATE_REPR", | ||
| /*delegate_debug_id=*/-1); | ||
| #endif | ||
|
|
||
| MemoryAllocator* allocator = context.get_runtime_allocator(); | ||
| VgfRepr* repr = allocator->allocateInstance<VgfRepr>(); | ||
| new (repr) VgfRepr( | ||
| vk_instance, vk_physical_device, vk_device, vk_queue, vk_command_pool); | ||
| vk_instance, | ||
| vk_physical_device, | ||
| vk_device, | ||
| vk_queue, | ||
| vk_command_pool, | ||
| vk_queue_family_index); | ||
|
|
||
| #ifdef ET_EVENT_TRACER_ENABLED | ||
| event_tracer_end_profiling_delegate(event_tracer, allocate_repr_event); | ||
|
|
||
| EventTracerEntry process_vgf_event = event_tracer_start_profiling_delegate( | ||
| event_tracer, | ||
| "VGF_INIT_PROCESS_VGF_BACKEND", | ||
| /*delegate_debug_id=*/-1); | ||
| #endif | ||
|
|
||
| #ifdef ET_EVENT_TRACER_ENABLED | ||
| auto valid_vgf = repr->process_vgf( | ||
| vgf_data, processed->size(), compile_specs, event_tracer); | ||
| #else | ||
| auto valid_vgf = | ||
| repr->process_vgf(vgf_data, processed->size(), compile_specs); | ||
| #endif | ||
|
|
||
| #ifdef ET_EVENT_TRACER_ENABLED | ||
| event_tracer_end_profiling_delegate(event_tracer, process_vgf_event); | ||
| #endif | ||
|
|
||
| if (!valid_vgf) { | ||
| #ifdef ET_EVENT_TRACER_ENABLED | ||
| event_tracer_end_profiling_delegate(event_tracer, init_total_event); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you see nested profiling events leading to some noise? I.e. the time spent in recording the inner profiling event measured in the outer one? I understand this is needed for chrometrace etc. Just want to make sure this record overhead isn't too much.
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no, i didn't see it |
||
| #endif | ||
| ET_LOG(Error, "Failed to process VGF blob."); | ||
| return Error::Internal; | ||
| } | ||
|
|
||
| #ifdef ET_EVENT_TRACER_ENABLED | ||
| event_tracer_end_profiling_delegate(event_tracer, init_total_event); | ||
| #endif | ||
|
|
||
| return repr; | ||
| } | ||
|
|
||
| Error execute( | ||
| ET_UNUSED BackendExecutionContext& context, | ||
| BackendExecutionContext& context, | ||
| DelegateHandle* handle, | ||
| Span<EValue*> args) const override { | ||
| VgfRepr* repr = static_cast<VgfRepr*>(handle); | ||
|
|
||
| #ifdef ET_EVENT_TRACER_ENABLED | ||
| EventTracer* event_tracer = context.event_tracer(); | ||
|
|
||
| EventTracerEntry vgf_execute_event = event_tracer_start_profiling_delegate( | ||
| event_tracer, | ||
| "VGF_EXECUTE", | ||
| /*delegate_debug_id=*/-1); | ||
|
|
||
| EventTracerEntry copy_inputs_event = event_tracer_start_profiling_delegate( | ||
| event_tracer, | ||
| "VGF_COPY_INPUTS", | ||
| /*delegate_debug_id=*/-1); | ||
| #else | ||
| (void)context; | ||
| #endif | ||
|
|
||
| // Copy all inputs from EValue to VkDeviceMemory | ||
| for (int i = 0; i < repr->IOs.size(); i++) { | ||
| if (!args[i]->isTensor()) { | ||
| #ifdef ET_EVENT_TRACER_ENABLED | ||
| event_tracer_end_profiling_delegate(event_tracer, copy_inputs_event); | ||
| event_tracer_end_profiling_delegate(event_tracer, vgf_execute_event); | ||
| #endif | ||
| ET_LOG( | ||
| Error, | ||
| "Expected EValue %d to be tensor, got %d", | ||
|
|
@@ -206,22 +304,59 @@ class VGFBackend final : public ::executorch::runtime::BackendInterface { | |
|
|
||
| void* data; | ||
| if (!repr->map_io(io, &data)) { | ||
| #ifdef ET_EVENT_TRACER_ENABLED | ||
| event_tracer_end_profiling_delegate(event_tracer, copy_inputs_event); | ||
| event_tracer_end_profiling_delegate(event_tracer, vgf_execute_event); | ||
| #endif | ||
| ET_LOG(Error, "Failed to map Vulkan IO memory"); | ||
| return Error::Internal; | ||
| } | ||
| memcpy(data, tensor->mutable_data_ptr(), io_size); | ||
| repr->unmap_io(io); | ||
| } | ||
|
|
||
| #ifdef ET_EVENT_TRACER_ENABLED | ||
| event_tracer_end_profiling_delegate(event_tracer, copy_inputs_event); | ||
|
|
||
| EventTracerEntry dispatch_event = event_tracer_start_profiling_delegate( | ||
| event_tracer, | ||
| "VGF_DISPATCH_AND_WAIT", | ||
| /*delegate_debug_id=*/-1); | ||
| #endif | ||
|
|
||
| // Execute the workload | ||
| if (!repr->execute_vgf()) { | ||
| bool execute_ok = false; | ||
| #ifdef ET_EVENT_TRACER_ENABLED | ||
| execute_ok = repr->execute_vgf(event_tracer); | ||
| #else | ||
| execute_ok = repr->execute_vgf(); | ||
| #endif | ||
|
|
||
| if (!execute_ok) { | ||
| #ifdef ET_EVENT_TRACER_ENABLED | ||
| event_tracer_end_profiling_delegate(event_tracer, dispatch_event); | ||
| event_tracer_end_profiling_delegate(event_tracer, vgf_execute_event); | ||
| #endif | ||
| ET_LOG(Error, "Failed to execute the VGF representation"); | ||
| return Error::Internal; | ||
| } | ||
|
|
||
| #ifdef ET_EVENT_TRACER_ENABLED | ||
| event_tracer_end_profiling_delegate(event_tracer, dispatch_event); | ||
|
|
||
| EventTracerEntry copy_outputs_event = event_tracer_start_profiling_delegate( | ||
| event_tracer, | ||
| "VGF_COPY_OUTPUTS", | ||
| /*delegate_debug_id=*/-1); | ||
| #endif | ||
|
|
||
| // Copy all outputs from VKDeviceMemory to EValue | ||
| for (int i = 0; i < repr->IOs.size(); i++) { | ||
| if (!args[i]->isTensor()) { | ||
| #ifdef ET_EVENT_TRACER_ENABLED | ||
| event_tracer_end_profiling_delegate(event_tracer, copy_outputs_event); | ||
| event_tracer_end_profiling_delegate(event_tracer, vgf_execute_event); | ||
| #endif | ||
| ET_LOG( | ||
| Error, | ||
| "Expected EValue %d to be tensor, got %d", | ||
|
|
@@ -251,13 +386,22 @@ class VGFBackend final : public ::executorch::runtime::BackendInterface { | |
|
|
||
| void* data; | ||
| if (!repr->map_io(io, &data)) { | ||
| #ifdef ET_EVENT_TRACER_ENABLED | ||
| event_tracer_end_profiling_delegate(event_tracer, copy_outputs_event); | ||
| event_tracer_end_profiling_delegate(event_tracer, vgf_execute_event); | ||
| #endif | ||
| ET_LOG(Error, "Failed to map Vulkan IO memory"); | ||
| return Error::Internal; | ||
| } | ||
| memcpy(tensor->mutable_data_ptr(), data, io_size); | ||
| repr->unmap_io(io); | ||
| } | ||
|
|
||
| #ifdef ET_EVENT_TRACER_ENABLED | ||
| event_tracer_end_profiling_delegate(event_tracer, copy_outputs_event); | ||
| event_tracer_end_profiling_delegate(event_tracer, vgf_execute_event); | ||
| #endif | ||
|
|
||
| return Error::Ok; | ||
| } | ||
|
|
||
|
|
@@ -272,6 +416,7 @@ class VGFBackend final : public ::executorch::runtime::BackendInterface { | |
| VkDevice vk_device = VK_NULL_HANDLE; | ||
| VkQueue vk_queue = VK_NULL_HANDLE; | ||
| VkCommandPool vk_command_pool = VK_NULL_HANDLE; | ||
| uint32_t vk_queue_family_index = UINT32_MAX; | ||
| bool is_initialized_ = false; | ||
| }; | ||
|
|
||
|
|
@@ -286,7 +431,8 @@ VkResult vkml_allocate_basics( | |
| VkPhysicalDevice* physical_device, | ||
| VkDevice* device, | ||
| VkQueue* queue, | ||
| VkCommandPool* command_pool) { | ||
| VkCommandPool* command_pool, | ||
| uint32_t* queue_family_index) { | ||
| VkResult result; | ||
|
|
||
| if (VK_SUCCESS != volkInitialize()) { | ||
|
|
@@ -408,6 +554,9 @@ VkResult vkml_allocate_basics( | |
| ET_LOG(Error, "Failed to find suitable queue"); | ||
| return VK_ERROR_UNKNOWN; | ||
| } | ||
| if (queue_family_index != nullptr) { | ||
| *queue_family_index = qf; | ||
| } | ||
|
|
||
| // Device with ML tensor extension | ||
| float qp = 1.0f; | ||
|
|
@@ -544,4 +693,4 @@ VkResult vkml_allocate_basics( | |
|
|
||
| } // namespace vgf | ||
| } // namespace backends | ||
| } // namespace executorch | ||
| } // namespace executorch | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are you planning to expand this to hardware performance counters like occupancy going beyond time measurements, and chrometrace.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
not at the moment