Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
example (#64)
Summary:
Pull Request resolved: #64

Adding export example for XNNPACK delegated models, also adding to executor runner to run

Differential Revision: D48371417

fbshipit-source-id: b5a4c3b341e0e3e1607a6ac16bf82d0e1ccaac5c
  • Loading branch information
mcr229 authored and facebook-github-bot committed Aug 22, 2023
commit 4a2ae4cc7d972300634d7531abd2028a116630b6
13 changes: 13 additions & 0 deletions examples/backend/TARGETS
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")

runtime.python_binary(
name = "xnnpack_lowering_examples",
main_src = "xnnpack_lowering_examples.py",
deps = [
"//caffe2:torch",
"//executorch/backends/xnnpack:xnnpack_preprocess",
"//executorch/backends/xnnpack/partition:xnnpack_partitioner",
"//executorch/examples/models:models",
"//executorch/exir/backend:backend_api",
],
)
100 changes: 100 additions & 0 deletions examples/backend/xnnpack_lowering_examples.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

# Example script for exporting simple models to flatbuffer

import argparse
import copy

import executorch.exir as exir
import torch._export as export
from executorch.backends.xnnpack.partition.xnnpack_partitioner import (
XnnpackFloatingPointPartitioner,
XnnpackQuantizedPartitioner2,
)
from executorch.exir.backend.backend_api import to_backend, validation_disabled

from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
from torch.ao.quantization.quantizer.xnnpack_quantizer import (
get_symmetric_quantization_config,
XNNPACKQuantizer,
)

from ..models import MODEL_NAME_TO_MODEL

# Note: for mv3, the mul op is not supported in XNNPACKQuantizer, that could be supported soon
XNNPACK_MODEL_NAME_TO_MODEL = {
name: MODEL_NAME_TO_MODEL[name] for name in ["linear", "add", "add_mul", "mv2"]
}


def quantize(model, example_inputs):
"""This is the official recommended flow for quantization in pytorch 2.0 export"""
m = model.eval()
m = export.capture_pre_autograd_graph(m, copy.deepcopy(example_inputs))
quantizer = XNNPACKQuantizer()
# if we set is_per_channel to True, we also need to add out_variant of quantize_per_channel/dequantize_per_channel
operator_config = get_symmetric_quantization_config(is_per_channel=False)
quantizer.set_global(operator_config)
m = prepare_pt2e(m, quantizer)
# calibration
m(*example_inputs)
m = convert_pt2e(m)
return m


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"-m",
"--model_name",
required=True,
help=f"Provide model name. Valid ones: {list(XNNPACK_MODEL_NAME_TO_MODEL.keys())}",
)
parser.add_argument(
"-q",
"--quantize",
action="store_true",
required=False,
default=False,
help="Flag for producing quantized or floating-point model",
)
args = parser.parse_args()

if args.model_name not in XNNPACK_MODEL_NAME_TO_MODEL:
raise RuntimeError(
f"Model {args.model_name} is not a valid name. or not quantizable right now, "
"please contact executorch team if you want to learn why or how to support "
"quantization for the requested model"
f"Available models are {list(XNNPACK_MODEL_NAME_TO_MODEL.keys())}."
)

model, example_inputs = MODEL_NAME_TO_MODEL[args.model_name]()
model = model.eval()

partitioner = XnnpackFloatingPointPartitioner
if args.quantize:
print("Quantizing Model...")
model = quantize(model, example_inputs)
# Partitioner will eventually be a single partitioner for both fp32 and quantized models
partitioner = XnnpackQuantizedPartitioner2

edge = exir.capture(
model, example_inputs, exir.CaptureConfig(enable_aot=True, _unlift=True)
).to_edge(exir.EdgeCompileConfig(_check_ir_validity=False))
print("Exported graph:\n", edge.exported_program.graph)

with validation_disabled():
edge.exported_program = to_backend(edge.exported_program, partitioner)
print("Lowered graph:\n", edge.exported_program.graph)

exec_prog = edge.to_executorch()
buffer = exec_prog.buffer
quant_tag = "_quantize" if args.quantize else ""
filename = f"xnnpack_{args.model_name}{quant_tag}.pte"
print(f"Saving exported program to {filename}.")
with open(filename, "wb") as f:
f.write(buffer)
13 changes: 13 additions & 0 deletions examples/executor_runner/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,16 @@ def define_common_targets():
define_static_target = True,
**get_oss_build_kwargs()
)

# executor runner for XNNPACK Backend and portable kernels.
runtime.cxx_binary(
name = "xnn_executor_runner",
srcs = [],
deps = [
":executor_runner_lib",
"//executorch/backends/xnnpack:xnnpack_backend",
"//executorch/kernels/portable:generated_lib_all_ops",
] + custom_ops_lib,
define_static_target = True,
**get_oss_build_kwargs()
)