From f6b2fa6db9cc7cc90347701e364a96f2eadbe43b Mon Sep 17 00:00:00 2001 From: Mengwei Liu Date: Tue, 16 Apr 2024 10:32:45 -0700 Subject: [PATCH] Add quantized op support to llama runner Summary: Test Plan: Reviewers: Subscribers: Tasks: Tags: [ghstack-poisoned] --- .ci/scripts/test_quantized_aot_lib.sh | 2 +- build/executorch-config.cmake | 2 +- examples/models/llama2/CMakeLists.txt | 6 ++++++ examples/models/llama2/quant_lib.py | 2 +- kernels/quantized/CMakeLists.txt | 5 ++++- 5 files changed, 13 insertions(+), 4 deletions(-) diff --git a/.ci/scripts/test_quantized_aot_lib.sh b/.ci/scripts/test_quantized_aot_lib.sh index ed9c789c5e4..0ab9ceb81a7 100755 --- a/.ci/scripts/test_quantized_aot_lib.sh +++ b/.ci/scripts/test_quantized_aot_lib.sh @@ -24,7 +24,7 @@ build_cmake_quantized_aot_lib() { && retry cmake -DBUCK2=buck2 \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_PREFIX_PATH="$CMAKE_PREFIX_PATH" \ - -DEXECUTORCH_BUILD_QUANTIZED=ON \ + -DEXECUTORCH_BUILD_QUANTIZED_OPS_AOT=ON \ -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" ..) cmake --build ${CMAKE_OUTPUT_DIR} -j4 diff --git a/build/executorch-config.cmake b/build/executorch-config.cmake index acf8b6779d5..60c8ebda5e6 100644 --- a/build/executorch-config.cmake +++ b/build/executorch-config.cmake @@ -38,7 +38,7 @@ set(lib_list etdump bundled_program extension_data_loader ${FLATCCRT_LIB} mpsdelegate qnn_executorch_backend portable_ops_lib extension_module xnnpack_backend XNNPACK cpuinfo pthreadpool vulkan_backend optimized_kernels cpublas eigen_blas - optimized_ops_lib optimized_native_cpu_ops_lib + optimized_ops_lib optimized_native_cpu_ops_lib quantized_kernels quantized_ops_lib ) foreach(lib ${lib_list}) # Name of the variable which stores result of the find_library search diff --git a/examples/models/llama2/CMakeLists.txt b/examples/models/llama2/CMakeLists.txt index 0735b5331e8..68332f24b49 100644 --- a/examples/models/llama2/CMakeLists.txt +++ b/examples/models/llama2/CMakeLists.txt @@ -91,6 +91,7 @@ add_subdirectory(runner) if(EXECUTORCH_USE_TIKTOKEN) # find RE2 for tokenizer set(ABSL_ENABLE_INSTALL ON) + set(ABSL_PROPAGATE_CXX_STD ON) set(_pic_flag ${CMAKE_POSITION_INDEPENDENT_CODE}) set(CMAKE_POSITION_INDEPENDENT_CODE ON) @@ -118,6 +119,11 @@ else() target_link_options_shared_lib(portable_ops_lib) endif() +if(EXECUTORCH_BUILD_QUANTIZED) + list(APPEND link_libraries quantized_ops_lib quantized_kernels) + target_link_options_shared_lib(quantized_ops_lib) +endif() + if(EXECUTORCH_BUILD_CUSTOM) target_link_options_shared_lib(custom_ops) list(APPEND link_libraries custom_ops) diff --git a/examples/models/llama2/quant_lib.py b/examples/models/llama2/quant_lib.py index 226f10421b9..c7453248b7d 100644 --- a/examples/models/llama2/quant_lib.py +++ b/examples/models/llama2/quant_lib.py @@ -105,7 +105,7 @@ def check_embedding_byte_registered(): 'Use `python -c "import torch as _; print(_.__path__)"` to find where torch package is installed.\n' "Set that as TORCH_PACKAGE_DIR.\n" "Then from root executorch dir do the following:\n" - "rm -rf cmake-out && mkdir cmake-out && (cd cmake-out && cmake -DBUCK2= -DCMAKE_PREFIX_PATH=$TORCH_PACKAGE_DIR -DEXECUTORCH_BUILD_QUANTIZED=ON ..) && cmake --build . -j16\n" + "rm -rf cmake-out && mkdir cmake-out && (cd cmake-out && cmake -DBUCK2= -DCMAKE_PREFIX_PATH=$TORCH_PACKAGE_DIR -DEXECUTORCH_BUILD_QUANTIZED_OPS_AOT=ON ..) && cmake --build . -j16\n" 'To find the location of the lib: find cmake-out -name "libquantized_ops_aot_lib*"\n' "Then specify the said library via -s