From 5d0df2d338a7cf6c53093ded79a07cd05e0836ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Lindstr=C3=B6m?= Date: Tue, 25 Nov 2025 15:52:31 +0100 Subject: [PATCH] Arm backend: Test partial quantization of models on VGF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Run tests of partial quantization on the MobileNetV2 and Llama models using the VGF backend (INT+FP profile). Signed-off-by: Martin Lindström Change-Id: I581f1c2be89e15ed186db2189fc507bfa1a82d6a --- backends/arm/test/models/test_llama.py | 19 +++++++++++++++++++ .../arm/test/models/test_mobilenet_v2_arm.py | 14 ++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/backends/arm/test/models/test_llama.py b/backends/arm/test/models/test_llama.py index 5f2348dee1e..5791c8bcd75 100644 --- a/backends/arm/test/models/test_llama.py +++ b/backends/arm/test/models/test_llama.py @@ -206,3 +206,22 @@ def test_llama_partial_quant_tosa_INT_FP(): ) _use_partial_quantizer(pipeline) pipeline.run() + + +@common.SkipIfNoModelConverter +def test_llama_partial_quant_vgf_quant(): + llama_model, llama_inputs, llama_meta = TestLlama().prepare_model() + + if llama_model is None or llama_inputs is None: + pytest.skip("Missing model and/or input files") + + with torch.no_grad(): + pipeline = VgfPipeline[input_t]( + llama_model, + llama_inputs, + aten_op=[], + exir_op=[], + quantize=True, + ) + _use_partial_quantizer(pipeline) + pipeline.run() diff --git a/backends/arm/test/models/test_mobilenet_v2_arm.py b/backends/arm/test/models/test_mobilenet_v2_arm.py index 2c5d2cd627d..f2264535ba9 100644 --- a/backends/arm/test/models/test_mobilenet_v2_arm.py +++ b/backends/arm/test/models/test_mobilenet_v2_arm.py @@ -163,3 +163,17 @@ def test_mv2_partial_quant_tosa_INT_FP(): ) _use_partial_quantizer(pipeline) pipeline.run() + + +@common.SkipIfNoModelConverter +def test_mv2_partial_quant_vgf_quant(): + pipeline = VgfPipeline[input_t]( + mv2, + model_inputs, + aten_op=[], + exir_op=[], + quantize=True, + atol=0.10, + ) + _use_partial_quantizer(pipeline) + pipeline.run()