From c5cfe6239f64b13239a2a076e72e7b4653cdf2f2 Mon Sep 17 00:00:00 2001
From: Max Ren <maxren@meta.com>
Date: Tue, 11 Mar 2025 12:54:21 -0700
Subject: [PATCH 1/2] [ExecuTorch][Weight Sharing][XNNPACK] load named data map
 data for xnnpack

If data is serialized into the NamedDataMap, then we overload getConstantDataPtr to retrieve the data from the named data map. This should be done in a Backwards Compatible way. Meaning if no data is serialized into the named data map, then we are still loading the data from the flatbuffer payload.

Since the runtime change here is being made before the AoT changes, All CI on this diff by itself should test that the changes made here are backwards compatitble.

Note: We do not resolve Runtime Memory usage at this point. WeightCache will be implemented in the next diff. Meaning If we load via the same key across different methods, we still pack twice and allocate two instances for the packed weights.

Differential Revision: [D70315209](https://our.internmc.facebook.com/intern/diff/D70315209/)

[ghstack-poisoned]
---
 backends/xnnpack/runtime/XNNCompiler.cpp      | 49 +++++++++++++++----
 backends/xnnpack/runtime/XNNCompiler.h        |  1 +
 backends/xnnpack/runtime/XNNPACKBackend.cpp   |  7 ++-
 .../xnnpack/serialization/runtime_schema.fbs  |  9 ++++
 backends/xnnpack/targets.bzl                  |  1 +
 5 files changed, 55 insertions(+), 12 deletions(-)
diff --git a/backends/xnnpack/runtime/XNNCompiler.cpp b/backends/xnnpack/runtime/XNNCompiler.cpp
index 8d8e9a13152..af959735cd4 100644
--- a/backends/xnnpack/runtime/XNNCompiler.cpp
+++ b/backends/xnnpack/runtime/XNNCompiler.cpp
@@ -10,7 +10,7 @@
 #include <executorch/backends/xnnpack/runtime/XNNHeader.h>
 #include <executorch/backends/xnnpack/serialization/schema_generated.h>
 #include <executorch/extension/threadpool/threadpool.h>
-#include <executorch/runtime/core/exec_aten/util/scalar_type_util.h>
+#include <executorch/runtime/executor/pte_data_map.h>
 #include <unordered_map>
 
 #pragma clang diagnostic ignored "-Wmissing-prototypes"
@@ -24,6 +24,8 @@ namespace delegate {
 using executorch::runtime::Error;
 using executorch::runtime::MemoryAllocator;
 using executorch::runtime::Result;
+using executorch::runtime::FreeableBuffer;
+using executorch::runtime::NamedDataMap;
 
 /*
  * Provide compile-time allocation.
@@ -48,6 +50,7 @@ class CompileAllocator {
 using ValuePtr = const fb_xnnpack::XValue*;
 using NodePtr = const fb_xnnpack::XNode*;
 using GraphPtr = const fb_xnnpack::XNNGraph*;
+using ConstantDataOffsetPtr = const fb_xnnpack::ConstantDataOffset*;
 using DataType = fb_xnnpack::XNNDatatype;
 
 // Type for define node function. This is the function signature
@@ -162,7 +165,9 @@ data associated with the tensor value, then returns nullptr.
 const uint8_t* getConstantDataPtr(
     const fb_xnnpack::XNNTensorValue* tensor_value,
     GraphPtr flatbuffer_graph,
-    const uint8_t* constant_data_ptr) {
+    const uint8_t* constant_data_ptr,
+    const NamedDataMap* named_data_map,
+    std::vector<FreeableBuffer>& loaded_buffers_from_map) {
   auto buffer_idx = tensor_value->constant_buffer_idx();
   if (buffer_idx) {
     if (!constant_data_ptr) {
@@ -171,10 +176,23 @@ const uint8_t* getConstantDataPtr(
       const auto& constant_buffer = *flatbuffer_graph->constant_buffer();
       return constant_buffer[buffer_idx]->storage()->data();
     } else {
-      const auto& constant_data_offsets = *flatbuffer_graph->constant_data();
-      uint64_t constant_data_offset =
-          constant_data_offsets[buffer_idx]->offset();
-      return constant_data_ptr + constant_data_offset;
+      ConstantDataOffsetPtr constant_data_offset = flatbuffer_graph->constant_data()->Get(buffer_idx);
+      uint64_t offset = constant_data_offset->offset();
+
+      const std::string &data_name = constant_data_offset->named_key()->str();
+      // If there is no tensor name
+      if (data_name.length() == 0) {
+        return constant_data_ptr + offset;
+      } else {
+        Result<FreeableBuffer> buffer = named_data_map->get_data(data_name.c_str());
+        if (!buffer.ok()) {
+          ET_LOG(Error, "Failed to get constant data for key %s", data_name.c_str());
+          return nullptr;
+        }
+        const uint8_t* data_ptr = static_cast<const uint8_t*>(buffer.get().data());
+        loaded_buffers_from_map.push_back(std::move(buffer.get()));
+        return data_ptr;
+      }
     }
   }
 
@@ -194,7 +212,9 @@ Error defineTensor(
     const uint8_t* constant_data_ptr,
     std::vector<uint32_t>& input_ids,
     std::vector<uint32_t>& output_ids,
-    CompileAllocator& allocator) {
+    CompileAllocator& allocator,
+    const NamedDataMap* named_data_map,
+    std::vector<FreeableBuffer>& loaded_buffers_from_map) {
   const fb_xnnpack::XNNTensorValue* tensor_value = nullptr;
   const fb_xnnpack::XNNQuantizedTensorValue* qtensor_value = nullptr;
 
@@ -231,8 +251,13 @@ Error defineTensor(
 
   // Get Pointer to constant data from flatbuffer, if its non-constant
   // it is a nullptr
-  const uint8_t* buffer_ptr =
-      getConstantDataPtr(tensor_value, flatbuffer_graph, constant_data_ptr);
+  const uint8_t* buffer_ptr = getConstantDataPtr(
+    tensor_value, 
+    flatbuffer_graph, 
+    constant_data_ptr,
+    named_data_map,
+    loaded_buffers_from_map
+  );
 
   xnn_status status;
   // The type we might have to convert to
@@ -1968,6 +1993,7 @@ ET_NODISCARD Error XNNCompiler::compileModel(
     size_t num_bytes,
     XNNExecutor* executor,
     MemoryAllocator* runtime_allocator,
+    const NamedDataMap* named_data_map,
     xnn_workspace_t workspace) {
   Result<XNNHeader> header = XNNHeader::Parse(buffer_pointer, num_bytes);
   const uint8_t* flatbuffer_data = nullptr;
@@ -2036,6 +2062,7 @@ ET_NODISCARD Error XNNCompiler::compileModel(
   std::vector<uint32_t> input_ids;
   std::vector<uint32_t> output_ids;
   Error err = Error::Ok;
+  std::vector<FreeableBuffer> loaded_buffers_from_map;
   for (auto value : *flatbuffer_graph->xvalues()) {
     err = defineTensor(
         subgraph.get(),
@@ -2045,7 +2072,9 @@ ET_NODISCARD Error XNNCompiler::compileModel(
         constant_data,
         input_ids,
         output_ids,
-        compile_allocator);
+        compile_allocator,
+        named_data_map,
+        loaded_buffers_from_map);
 
     if (err != Error::Ok) {
       return err;
diff --git a/backends/xnnpack/runtime/XNNCompiler.h b/backends/xnnpack/runtime/XNNCompiler.h
index e66cb791ecb..3ea621a4d59 100644
--- a/backends/xnnpack/runtime/XNNCompiler.h
+++ b/backends/xnnpack/runtime/XNNCompiler.h
@@ -30,6 +30,7 @@ class XNNCompiler {
       size_t num_bytes,
       XNNExecutor* executor,
       executorch::runtime::MemoryAllocator* runtime_allocator,
+      const executorch::runtime::NamedDataMap* named_data_map,
       xnn_workspace_t workspace);
 };
 
diff --git a/backends/xnnpack/runtime/XNNPACKBackend.cpp b/backends/xnnpack/runtime/XNNPACKBackend.cpp
index 1938c5441a5..f453453cf76 100644
--- a/backends/xnnpack/runtime/XNNPACKBackend.cpp
+++ b/backends/xnnpack/runtime/XNNPACKBackend.cpp
@@ -10,7 +10,7 @@
 #include <executorch/runtime/backend/interface.h>
 #include <executorch/runtime/core/error.h>
 #include <executorch/runtime/core/evalue.h>
-#include <executorch/runtime/platform/profiler.h>
+#include <executorch/runtime/executor/pte_data_map.h>
 
 #include <memory>
 #include <mutex>
@@ -30,6 +30,7 @@ using executorch::runtime::Error;
 using executorch::runtime::EValue;
 using executorch::runtime::FreeableBuffer;
 using executorch::runtime::Result;
+using executorch::runtime::NamedDataMap;
 
 class XnnpackBackend final : public ::executorch::runtime::BackendInterface {
  public:
@@ -79,13 +80,14 @@ class XnnpackBackend final : public ::executorch::runtime::BackendInterface {
       return Error::MemoryAllocationFailed;
     }
 
+    const NamedDataMap* named_data_map = context.get_named_data_map();
+
 #ifdef ENABLE_XNNPACK_SHARED_WORKSPACE
     // This is needed to serialize access to xnn_create_runtime which is not
     // thread safe. This can heppen when multiple threads call init() on
     // the same backend instance.
     const std::lock_guard<std::mutex> lock(workspace_mutex_);
 #endif
-
     // Executor has been allocated but not constructed, ensure that runtime_ is
     // nullptr by constructing it in place here. NOTE: Since we use placement
     // new and since this type is not trivially destructible, we must call the
@@ -96,6 +98,7 @@ class XnnpackBackend final : public ::executorch::runtime::BackendInterface {
         processed->size(),
         executor,
         context.get_runtime_allocator(),
+        named_data_map,
         workspace_.get());
     // This backend does not need its processed data after compiling the model.
     processed->Free();
diff --git a/backends/xnnpack/serialization/runtime_schema.fbs b/backends/xnnpack/serialization/runtime_schema.fbs
index 11cb48430ed..75074107c55 100644
--- a/backends/xnnpack/serialization/runtime_schema.fbs
+++ b/backends/xnnpack/serialization/runtime_schema.fbs
@@ -320,11 +320,20 @@ table XNNLeakyReLU {
 table ConstantDataOffset {
   // Constant data offsets are relative to the constant data base offset provided
   // in the XNNPACKHeader.
+  // named_key and offset are mutually exclusive, meaning only one of these values
+  // are valid. If the named key is a non-empty string, then the offset must be UINT64_MAX.
+  // If the offset is not UINT64_MAX, then the named key must be an empty string
   offset: uint64;
 
   // The size in bytes of valid data starting at the offset. The constant data
   // may be followed by padding before the next piece of constant data
   size: uint64;
+
+  // unique string id used to query the offset from the named data store.
+  // named_key and offset are mutually exclusive, meaning only one of these values
+  // are valid. If the named key is a non-empty string, then the offset must be UINT64_MAX.
+  // If the offset is not UINT64_MAX, then the named key must be an empty string
+  named_key: string;
 }
 
 table XNNGraph {
diff --git a/backends/xnnpack/targets.bzl b/backends/xnnpack/targets.bzl
index b89a999bc3d..3fd9c433372 100644
--- a/backends/xnnpack/targets.bzl
+++ b/backends/xnnpack/targets.bzl
@@ -60,6 +60,7 @@ def define_common_targets():
             "//executorch/backends/xnnpack/serialization:xnnpack_flatbuffer_header",
             "//executorch/extension/threadpool:threadpool",
             "//executorch/runtime/core/exec_aten/util:tensor_util",
+            "//executorch/runtime/executor:pte_data_map"
         ],
         # XnnpackBackend.cpp needs to compile with executor as whole
         # @lint-ignore BUCKLINT: Avoid `link_whole=True` (https://fburl.com/avoid-link-whole)

From 1c8ce559df73788334a38d8b8871f7694ae190ef Mon Sep 17 00:00:00 2001
From: Max Ren <maxren@meta.com>
Date: Tue, 11 Mar 2025 13:24:32 -0700
Subject: [PATCH 2/2] Update on "[ExecuTorch][Weight Sharing][XNNPACK] load
 named data map data for xnnpack"

If data is serialized into the NamedDataMap, then we overload getConstantDataPtr to retrieve the data from the named data map. This should be done in a Backwards Compatible way. Meaning if no data is serialized into the named data map, then we are still loading the data from the flatbuffer payload.

Since the runtime change here is being made before the AoT changes, All CI on this diff by itself should test that the changes made here are backwards compatitble.

Note: We do not resolve Runtime Memory usage at this point. WeightCache will be implemented in the next diff. Meaning If we load via the same key across different methods, we still pack twice and allocate two instances for the packed weights.

Differential Revision: [D70315209](https://our.internmc.facebook.com/intern/diff/D70315209/)

[ghstack-poisoned]
---
 backends/xnnpack/runtime/XNNCompiler.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/backends/xnnpack/runtime/XNNCompiler.cpp b/backends/xnnpack/runtime/XNNCompiler.cpp
index af959735cd4..7b58ceb7739 100644
--- a/backends/xnnpack/runtime/XNNCompiler.cpp
+++ b/backends/xnnpack/runtime/XNNCompiler.cpp
@@ -179,11 +179,12 @@ const uint8_t* getConstantDataPtr(
       ConstantDataOffsetPtr constant_data_offset = flatbuffer_graph->constant_data()->Get(buffer_idx);
       uint64_t offset = constant_data_offset->offset();
 
-      const std::string &data_name = constant_data_offset->named_key()->str();
+      bool has_named_key = flatbuffers::IsFieldPresent(constant_data_offset, fb_xnnpack::ConstantDataOffset::VT_NAMED_KEY);
       // If there is no tensor name
-      if (data_name.length() == 0) {
+      if (!has_named_key) {
         return constant_data_ptr + offset;
       } else {
+        const std::string &data_name = constant_data_offset->named_key()->str();
         Result<FreeableBuffer> buffer = named_data_map->get_data(data_name.c_str());
         if (!buffer.ok()) {
           ET_LOG(Error, "Failed to get constant data for key %s", data_name.c_str());