From f6b2fa6db9cc7cc90347701e364a96f2eadbe43b Mon Sep 17 00:00:00 2001
From: Mengwei Liu <larryliu@meta.com>
Date: Tue, 16 Apr 2024 10:32:45 -0700
Subject: [PATCH] Add quantized op support to llama runner

Summary:

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:

[ghstack-poisoned]
---
 .ci/scripts/test_quantized_aot_lib.sh | 2 +-
 build/executorch-config.cmake         | 2 +-
 examples/models/llama2/CMakeLists.txt | 6 ++++++
 examples/models/llama2/quant_lib.py   | 2 +-
 kernels/quantized/CMakeLists.txt      | 5 ++++-
 5 files changed, 13 insertions(+), 4 deletions(-)
diff --git a/.ci/scripts/test_quantized_aot_lib.sh b/.ci/scripts/test_quantized_aot_lib.sh
index ed9c789c5e4..0ab9ceb81a7 100755
--- a/.ci/scripts/test_quantized_aot_lib.sh
+++ b/.ci/scripts/test_quantized_aot_lib.sh
@@ -24,7 +24,7 @@ build_cmake_quantized_aot_lib() {
     && retry cmake -DBUCK2=buck2 \
       -DCMAKE_BUILD_TYPE=Release \
       -DCMAKE_PREFIX_PATH="$CMAKE_PREFIX_PATH" \
-      -DEXECUTORCH_BUILD_QUANTIZED=ON \
+      -DEXECUTORCH_BUILD_QUANTIZED_OPS_AOT=ON \
       -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" ..)
 
   cmake --build ${CMAKE_OUTPUT_DIR} -j4
diff --git a/build/executorch-config.cmake b/build/executorch-config.cmake
index acf8b6779d5..60c8ebda5e6 100644
--- a/build/executorch-config.cmake
+++ b/build/executorch-config.cmake
@@ -38,7 +38,7 @@ set(lib_list
     etdump bundled_program extension_data_loader ${FLATCCRT_LIB} mpsdelegate
     qnn_executorch_backend portable_ops_lib extension_module xnnpack_backend
     XNNPACK cpuinfo pthreadpool vulkan_backend optimized_kernels cpublas eigen_blas
-    optimized_ops_lib optimized_native_cpu_ops_lib
+    optimized_ops_lib optimized_native_cpu_ops_lib quantized_kernels quantized_ops_lib
 )
 foreach(lib ${lib_list})
     # Name of the variable which stores result of the find_library search
diff --git a/examples/models/llama2/CMakeLists.txt b/examples/models/llama2/CMakeLists.txt
index 0735b5331e8..68332f24b49 100644
--- a/examples/models/llama2/CMakeLists.txt
+++ b/examples/models/llama2/CMakeLists.txt
@@ -91,6 +91,7 @@ add_subdirectory(runner)
 if(EXECUTORCH_USE_TIKTOKEN)
   # find RE2 for tokenizer
   set(ABSL_ENABLE_INSTALL ON)
+  set(ABSL_PROPAGATE_CXX_STD ON)
   set(_pic_flag
     ${CMAKE_POSITION_INDEPENDENT_CODE})
   set(CMAKE_POSITION_INDEPENDENT_CODE ON)
@@ -118,6 +119,11 @@ else()
   target_link_options_shared_lib(portable_ops_lib)
 endif()
 
+if(EXECUTORCH_BUILD_QUANTIZED)
+  list(APPEND link_libraries quantized_ops_lib quantized_kernels)
+  target_link_options_shared_lib(quantized_ops_lib)
+endif()
+
 if(EXECUTORCH_BUILD_CUSTOM)
   target_link_options_shared_lib(custom_ops)
   list(APPEND link_libraries custom_ops)
diff --git a/examples/models/llama2/quant_lib.py b/examples/models/llama2/quant_lib.py
index 226f10421b9..c7453248b7d 100644
--- a/examples/models/llama2/quant_lib.py
+++ b/examples/models/llama2/quant_lib.py
@@ -105,7 +105,7 @@ def check_embedding_byte_registered():
                     'Use `python -c "import torch as _; print(_.__path__)"` to find where torch package is installed.\n'
                     "Set that as TORCH_PACKAGE_DIR.\n"
                     "Then from root executorch dir do the following:\n"
-                    "rm -rf cmake-out && mkdir cmake-out && (cd cmake-out && cmake -DBUCK2=<path-to-buck2> -DCMAKE_PREFIX_PATH=$TORCH_PACKAGE_DIR -DEXECUTORCH_BUILD_QUANTIZED=ON ..) && cmake --build . -j16\n"
+                    "rm -rf cmake-out && mkdir cmake-out && (cd cmake-out && cmake -DBUCK2=<path-to-buck2> -DCMAKE_PREFIX_PATH=$TORCH_PACKAGE_DIR -DEXECUTORCH_BUILD_QUANTIZED_OPS_AOT=ON ..) && cmake --build . -j16\n"
                     'To find the location of the lib: find cmake-out -name "libquantized_ops_aot_lib*"\n'
                     "Then specify the said library via -s <path to libquantized_ops_aot_lib.so\n"
                 )
diff --git a/kernels/quantized/CMakeLists.txt b/kernels/quantized/CMakeLists.txt
index 7be9e73827f..b34ba75ae29 100644
--- a/kernels/quantized/CMakeLists.txt
+++ b/kernels/quantized/CMakeLists.txt
@@ -10,6 +10,9 @@
 # ~~~
 cmake_minimum_required(VERSION 3.19)
 
+option(EXECUTORCH_BUILD_QUANTIZED_OPS_AOT
+       "Build the optimized ops library for AOT export usage" OFF)
+
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 if(NOT CMAKE_CXX_STANDARD)
   set(CMAKE_CXX_STANDARD 17)
@@ -49,7 +52,7 @@ message("Generated files ${gen_command_sources}")
 # quantized_ops_aot_lib quantized_ops_lib but none of these is a common
 # dependency of the other(s). This is not allowed by the Xcode "new build
 # system".
-if(NOT CMAKE_GENERATOR STREQUAL "Xcode")
+if(NOT CMAKE_GENERATOR STREQUAL "Xcode" AND EXECUTORCH_BUILD_QUANTIZED_OPS_AOT)
   # Build a AOT library to register quantized ops into PyTorch. This is a hack.
   set(_quantized_sources
       ${_quantized_kernels__srcs}