diff --git a/backends/arm/public_api_manifests/api_manifest_running.toml b/backends/arm/public_api_manifests/api_manifest_running.toml
index 0b096102100..f01128058e6 100644
--- a/backends/arm/public_api_manifests/api_manifest_running.toml
+++ b/backends/arm/public_api_manifests/api_manifest_running.toml
@@ -62,7 +62,7 @@ signature = "EthosUPartitioner.register_custom_partition_op(self, op: torch._ops
 
 [python.EthosUQuantizer]
 kind = "class"
-signature = "EthosUQuantizer(compile_spec: 'EthosUCompileSpec', use_composable_quantizer: 'bool' = False) -> 'None'"
+signature = "EthosUQuantizer(compile_spec: 'EthosUCompileSpec', use_composable_quantizer: 'bool' = True) -> 'None'"
 
 [python.EthosUQuantizer.annotate]
 kind = "function"
@@ -146,7 +146,7 @@ signature = "VgfPartitioner.register_custom_partition_op(self, op: torch._ops.Op
 
 [python.VgfQuantizer]
 kind = "class"
-signature = "VgfQuantizer(compile_spec: 'VgfCompileSpec', use_composable_quantizer: 'bool' = False) -> 'None'"
+signature = "VgfQuantizer(compile_spec: 'VgfCompileSpec', use_composable_quantizer: 'bool' = True) -> 'None'"
 
 [python.VgfQuantizer.annotate]
 kind = "function"
diff --git a/backends/arm/quantizer/arm_quantizer.py b/backends/arm/quantizer/arm_quantizer.py
index f1dfb5f1323..9bf6c3530e2 100644
--- a/backends/arm/quantizer/arm_quantizer.py
+++ b/backends/arm/quantizer/arm_quantizer.py
@@ -470,21 +470,23 @@ class TOSAQuantizer(Quantizer):
     """Manage quantization annotations for TOSA-compatible backends.
 
     .. warning::
-        Setting ``use_composable_quantizer=True`` enables an experimental API
-        surface that may change without notice.
+        The composable quantizer is now the default implementation. Setting
+        ``use_composable_quantizer=False`` is deprecated and will be removed in
+        two minor releases.
 
     """
 
     def __init__(
         self,
         compile_spec_or_tosa_spec,
-        use_composable_quantizer: bool = False,
+        use_composable_quantizer: bool = True,
     ) -> None:
         """Create a TOSA quantizer from a TOSA spec or Arm compile spec.
 
         .. warning::
-            Setting ``use_composable_quantizer=True`` enables an experimental
-            API surface that may change without notice.
+            The composable quantizer is now the default implementation.
+            Setting ``use_composable_quantizer=False`` is deprecated and will
+            be removed in two minor releases.
 
         """
         self.use_composable_quantizer = use_composable_quantizer
@@ -496,7 +498,7 @@ def __init__(
             self.quantizer = _TOSAQuantizerV2(compile_spec_or_tosa_spec)
         else:
             logger.info(
-                "Using default quantizer in the arm backend. This quantizer is planned to be replaced by the composable quantizer implementation in the future, see https://github.com/pytorch/executorch/issues/17701"
+                "Using deprecated legacy quantizer implementation in the arm backend. Setting use_composable_quantizer=False will be removed in two minor releases. See https://github.com/pytorch/executorch/issues/17701"
             )
             self.quantizer = _TOSAQuantizerV1(compile_spec_or_tosa_spec)
 
@@ -1239,20 +1241,25 @@ class EthosUQuantizer(TOSAQuantizer):
     """Quantizer supported by the Arm Ethos-U backend.
 
     .. warning::
-        Setting ``use_composable_quantizer=True`` enables an experimental API
-        surface that may change without notice.
+        The composable quantizer is now the default implementation. Setting
+        ``use_composable_quantizer=False`` is deprecated and will be removed in
+        two minor releases.
 
     Args:
         compile_spec (EthosUCompileSpec): Backend compile specification for
             Ethos-U targets.
-        use_composable_quantizer (bool): Whether to use the composable quantizer implementation. See https://github.com/pytorch/executorch/issues/17701" for details.
+        use_composable_quantizer (bool): Whether to use the composable
+            quantizer implementation. Setting this to ``False`` is deprecated
+            and will be removed in two minor releases. See
+            [issue #17701](https://github.com/pytorch/executorch/issues/17701)
+            for details.
 
     """
 
     def __init__(
         self,
         compile_spec: EthosUCompileSpec,
-        use_composable_quantizer: bool = False,
+        use_composable_quantizer: bool = True,
     ) -> None:
         super().__init__(compile_spec, use_composable_quantizer)
 
@@ -1261,19 +1268,24 @@ class VgfQuantizer(TOSAQuantizer):
     """Quantizer supported by the Arm Vgf backend.
 
     .. warning::
-        Setting ``use_composable_quantizer=True`` enables an experimental API
-        surface that may change without notice.
+        The composable quantizer is now the default implementation. Setting
+        ``use_composable_quantizer=False`` is deprecated and will be removed in
+        two minor releases.
 
     Args:
         compile_spec (VgfCompileSpec): Backend compile specification for Vgf
             targets.
-        use_composable_quantizer (bool): Whether to use the composable quantizer implementation. See https://github.com/pytorch/executorch/issues/17701" for details.
+        use_composable_quantizer (bool): Whether to use the composable
+            quantizer implementation. Setting this to ``False`` is deprecated
+            and will be removed in two minor releases. See
+            [issue #17701](https://github.com/pytorch/executorch/issues/17701)
+            for details.
 
     """
 
     def __init__(
         self,
         compile_spec: VgfCompileSpec,
-        use_composable_quantizer: bool = False,
+        use_composable_quantizer: bool = True,
     ) -> None:
         super().__init__(compile_spec, use_composable_quantizer)
diff --git a/backends/arm/quantizer/arm_quantizer_utils.py b/backends/arm/quantizer/arm_quantizer_utils.py
index 190e8a57cd8..d4c2dfebdee 100644
--- a/backends/arm/quantizer/arm_quantizer_utils.py
+++ b/backends/arm/quantizer/arm_quantizer_utils.py
@@ -243,6 +243,18 @@ class PatternQuantizer(Quantizer, QuantizerReporterUser):
 
     """
 
+    PARAMETER_TARGETS = {
+        torch.ops.aten.linear.default,
+        torch.ops.aten.convolution.default,
+        torch.ops.aten.conv1d.default,
+        torch.ops.aten.conv1d.padding,
+        torch.ops.aten.conv2d.default,
+        torch.ops.aten.conv2d.padding,
+        torch.ops.aten.conv3d.default,
+        torch.ops.aten.conv3d.padding,
+        torch.ops.aten.conv_transpose2d.input,
+    }
+
     def __init__(
         self,
         quantization_config: QuantizationConfig | None,
@@ -275,75 +287,59 @@ def get_quantizer_info(self):
             support_config_path,
         )
 
-    def is_parameter(self, node: Node, model: torch.fx.GraphModule) -> bool:
-        """Returns True if the given node is a parameter of the model."""
-        try:
-            _ = model.get_parameter(node.target)  # type: ignore[arg-type]
-            return True
-        except Exception:
+    def is_weight(self, node: Node) -> bool:
+        """Returns True if node is used as a weight by all users."""
+        if node.op != "get_attr":
             return False
 
-    def is_weight(
-        self, node: Node, params: list[Node], model: torch.fx.GraphModule
-    ) -> bool:
-        """Returns True if node is the first parameter of the given
-        parameters.
-        """
-        return len(params) > 0 and node == params[0]
+        # Ensure that the node is used as a weight by all users
+        for user_node in node.users:
+            if user_node.target not in self.PARAMETER_TARGETS:
+                return False
 
-    def is_bias(
-        self, node: Node, params: list[Node], model: torch.fx.GraphModule
-    ) -> bool:
-        """Returns True if node is the second parameter of the given
-        parameters.
-        """
-        return len(params) == 2 and node == params[1]
+            args = list(user_node.args)
+            if not (len(args) > 1 and node == args[1]):
+                return False
+
+        return True
+
+    def is_bias(self, node: Node) -> bool:
+        """Returns True if node is used as a bias by all users."""
+        if node.op != "get_attr":
+            return False
+
+        # Ensure that the node is used as a bias by all users
+        for user_node in node.users:
+            if user_node.target not in self.PARAMETER_TARGETS:
+                return False
+
+            args = list(user_node.args)
+            if not (len(args) > 2 and node == args[2]):
+                return False
+
+        return True
 
     def annotate_match(
         self,
         match: list[Node],
         config: QuantizationConfig | None,
-        model: torch.fx.GraphModule,
     ) -> None:
         """Annotates a matched pattern according to the given quantization
         config.
         """
-        parameter_targets = {
-            torch.ops.aten.linear.default,
-            torch.ops.aten.convolution.default,
-            torch.ops.aten.conv1d.default,
-            torch.ops.aten.conv1d.padding,
-            torch.ops.aten.conv2d.default,
-            torch.ops.aten.conv2d.padding,
-            torch.ops.aten.conv3d.default,
-            torch.ops.aten.conv3d.padding,
-            torch.ops.aten.conv_transpose2d.input,
-        }
 
         for node in match:
             input_qspec_map = {}
             output_qspec = None
 
-            params = [n for n in node.all_input_nodes if self.is_parameter(n, model)]
-            if node.target in parameter_targets:
-                if len(params) == 0 or len(params) > 2:
-                    logger.warning(
-                        f"{node.name} is expected to have parameter tensors for weight/bias but no such inputs found, which may cause unexpected quantization annotations. This is likely caused by incorrect tensor instantiations or non-constant weight/biases."
-                    )
-            else:
-                if len(params) > 0:
-                    logger.warning(
-                        f"{node.name} is not expected to not have parameter tensors but found {[n.name for n in params]}, which may cause unexpected quantization annotations."
-                    )
-
             for input_node in node.all_input_nodes:
                 if not has_float_output(input_node):
                     continue
-                if self.is_weight(input_node, params, model):
+                if self.is_weight(input_node):
                     input_qspec_map[input_node] = (
                         config.get_weight_qspec(node) if config else None
                     )
-                elif self.is_bias(input_node, params, model):
+                elif self.is_bias(input_node):
                     input_qspec_map[input_node] = (
                         config.get_bias_qspec(node) if config else None  # type: ignore[assignment]
                     )
@@ -370,7 +366,7 @@ def annotate(self, model: torch.fx.GraphModule) -> None:  # type: ignore[overrid
         )
         for result in matches:
             if result.accepted:
-                self.annotate_match(result.pattern, self.quantization_config, model)
+                self.annotate_match(result.pattern, self.quantization_config)
                 self.report_accept(result.pattern)
             else:
                 self.report_reject(
@@ -424,6 +420,9 @@ class SharedQspecQuantizer(Quantizer, QuantizerReporterUser):
         torch.ops.aten.flip.default,
         torch.ops.aten.index_select.default,
         torch.ops.aten.index_put.default,
+        torch.ops.aten.index_put_.default,
+        torch.ops.aten.index_copy.default,
+        torch.ops.aten.index_copy_.default,
         torch.ops.aten.contiguous.default,
         torch.ops.aten.as_strided_copy.default,
         torch.ops.aten.pixel_shuffle.default,
@@ -571,6 +570,42 @@ def _get_shared_clique(self, root_node: Node) -> tuple[set[Node], list[Any]]:
 
         return shared_nodes, adjacent_qspecs
 
+    def _should_skip_while_shared_qspec(self, node: Node) -> bool:
+        return node.target == torch.ops.higher_order.while_loop and bool(
+            node.meta.get("additional_inputs")
+        )
+
+    def _annotate_while_with_additional_inputs(
+        self,
+        root_node: Node,
+        adjacent_qspecs: list[Any],
+    ) -> bool:
+        if not self._should_skip_while_shared_qspec(root_node):
+            return False
+        if len(adjacent_qspecs) == 0:
+            self.report_reject(
+                [root_node],
+                "Couldn't find any adjacent quantization spec to annotate while_loop.",
+            )
+            return True
+
+        input_qspec = adjacent_qspecs[0]
+        input_qspec_map: dict[Node, Optional[QuantizationSpec]] = {
+            n: input_qspec for n in self._get_input_nodes_with_float_output(root_node)
+        }
+        output_qspec: Optional[QuantizationSpec] = None
+        if len(self._get_user_nodes_with_float_input(root_node)) > 0:
+            output_qspec = input_qspec
+
+        _mark_node_as_quantized(
+            root_node,
+            input_qspec_map,
+            output_qspec,
+            is_quantized=True,
+        )
+        self.report_accept([root_node])
+        return True
+
     def _annotate_shared_cluster(self, root_node: Node) -> None:
         if (
             len(self._get_input_nodes_with_float_output(root_node)) == 0
@@ -592,9 +627,11 @@ def _annotate_shared_cluster(self, root_node: Node) -> None:
         node_order = {node: index for index, node in enumerate(root_node.graph.nodes)}
         ordered_nodes = sorted(shared_nodes, key=lambda node: node_order.get(node, 0))
 
+        if self._annotate_while_with_additional_inputs(root_node, adjacent_qspecs):
+            return
+
         # Ensure the root node is the first one in the graph.
         root_node = ordered_nodes[0]
-
         if len(adjacent_qspecs) > 0:
             root_node_float_inputs = self._get_input_nodes_with_float_output(root_node)
             if len(root_node_float_inputs) > 0:
diff --git a/backends/arm/quantizer/quantization_config.py b/backends/arm/quantizer/quantization_config.py
index d06203cede3..0c64d147c84 100644
--- a/backends/arm/quantizer/quantization_config.py
+++ b/backends/arm/quantizer/quantization_config.py
@@ -21,6 +21,7 @@
 
 from torchao.quantization.pt2e.quantizer import (
     DerivedQuantizationSpec,
+    FixedQParamsQuantizationSpec,
     QuantizationSpec,
     QuantizationSpecBase,
     SharedQuantizationSpec,
@@ -284,10 +285,18 @@ def get_input_act_qspec(self, node=None, input_node=None):
 
         For comparison operators, make sure that both inputs share the same
         quantization spec, by returning a SharedQuantizationSpec that ties the
-        quantization of both inputs together. For other operators, return the
-        default input activation spec.
+        quantization of both inputs together.
+
+        For trigonometric ops, ensure that input spec has fixed qparams.
+
+        For other operators, return the default input activation spec.
 
         """
+        # MLETORCH-1853: Fix lazy import when moving files around
+        from executorch.backends.arm.quantizer.quantization_annotator import (
+            _fixed_input_qspec_ops,
+        )
+
         if node is None or input_node is None:
             return super().get_input_act_qspec(node, input_node)
 
@@ -296,6 +305,29 @@ def get_input_act_qspec(self, node=None, input_node=None):
                 return super().get_input_act_qspec(node, input_node)
             else:
                 return SharedQuantizationSpec((node.args[0], node))
+        elif node.target in _fixed_input_qspec_ops:
+
+            input_act_qspec = super().get_input_act_qspec(node, input_node)
+            if not hasattr(input_act_qspec, "dtype") or not isinstance(
+                input_act_qspec.dtype, torch.dtype
+            ):
+                raise ValueError(
+                    f"{node.target} requires an input activation quantization "
+                    "spec to use fixed input qparams."
+                )
+            dtype = getattr(input_act_qspec, "dtype", None)
+            num_bits = torch.iinfo(dtype).bits
+
+            qparams = _fixed_input_qspec_ops[node.target][num_bits]
+            return FixedQParamsQuantizationSpec(
+                dtype=dtype,
+                scale=qparams.scale,
+                zero_point=qparams.zero_point,
+                quant_min=input_act_qspec.quant_min,
+                quant_max=input_act_qspec.quant_max,
+                qscheme=input_act_qspec.qscheme,
+                is_dynamic=input_act_qspec.is_dynamic,
+            )
 
         return super().get_input_act_qspec(node, input_node)
 
diff --git a/backends/arm/quantizer/quantizer_support.py b/backends/arm/quantizer/quantizer_support.py
index bb3ea158fba..d6a725c2b06 100644
--- a/backends/arm/quantizer/quantizer_support.py
+++ b/backends/arm/quantizer/quantizer_support.py
@@ -77,8 +77,6 @@ def check_pattern(cls, pattern):
     torch.ops.aten.relu_.default,
     torch.ops.aten.hardtanh.default,
     torch.ops.aten.hardtanh_.default,
-    torch.ops.aten.hardsigmoid.default,
-    torch.ops.aten.hardsigmoid_.default,
     torch.ops.aten.clamp.default,
     torch.ops.aten.clamp_.default,
 ]
@@ -168,6 +166,14 @@ def check_pattern(cls, pattern):
         (torch.ops.aten.ge.Scalar,),
         (torch.ops.aten.eq.Scalar,),
         (torch.ops.aten.ne.Scalar,),
+        (torch.ops.aten.lstm.input,),
+        (torch.ops.aten.rnn_tanh.input,),
+        (torch.ops.aten.rnn_relu.input,),
+        (torch.ops.aten.gru.input,),
+        (torch.ops.aten.asin.default,),
+        (torch.ops.aten.acos.default,),
+        (torch.ops.aten.atanh.default,),
+        (torch.ops.aten.einsum.default,),
     ]
 )
 TOSA_QUANTIZER_SUPPORT_DICT: dict[tuple[OpOverload, ...], type[PatternCheck] | None] = {
diff --git a/backends/arm/scripts/docgen/docgen.py b/backends/arm/scripts/docgen/docgen.py
index 75baf3e8e40..c0b708bdb5e 100644
--- a/backends/arm/scripts/docgen/docgen.py
+++ b/backends/arm/scripts/docgen/docgen.py
@@ -46,7 +46,9 @@ def get_docstring(obj) -> str:
 
     lines = docstring.split("\n")
     for line in lines:
-        if ":" in line and line.startswith(" "):
+        # Only first-level arg lines should become bullets.
+        is_arg_line = line.startswith("    ") and not line.startswith("        ")
+        if ":" in line and is_arg_line:
             new_line = line.strip()
             pos = new_line.index(":")
             new_line = f"- **{new_line[:pos]}**" + new_line[pos:]
diff --git a/backends/arm/test/misc/test_quant_custom_meta.py b/backends/arm/test/misc/test_quant_custom_meta.py
index cd9964f4511..f64b8067098 100644
--- a/backends/arm/test/misc/test_quant_custom_meta.py
+++ b/backends/arm/test/misc/test_quant_custom_meta.py
@@ -105,5 +105,6 @@ def test_quantized_to_float_transition_tosa_INT_FP(fp_extension: bool):
         )
     pipeline.quantizer.set_module_type(torch.nn.Sigmoid, None)  # type: ignore
     pipeline.quantizer.set_module_type(torch.nn.Conv1d, None)  # type: ignore
+    pipeline.quantizer.set_io(None)  # type: ignore
 
     pipeline.run()
diff --git a/backends/arm/test/misc/test_shared_qspecs.py b/backends/arm/test/misc/test_shared_qspecs.py
index de07bd5f6c2..93129633418 100644
--- a/backends/arm/test/misc/test_shared_qspecs.py
+++ b/backends/arm/test/misc/test_shared_qspecs.py
@@ -87,8 +87,8 @@ class SharedQspecMulipleClusters(torch.nn.Module):
         "quantized_decomposed.dequantize_per_tensor.default": {None: 8},
         "aten.add.Tensor": {_INT8_QSPEC: 2},
     }
-    inputs_qspecs = {None: 1}
-    outputs_qspecs = {None: 1}
+    inputs_qspecs = {_INT8_QSPEC: 1}
+    outputs_qspecs = {_INT8_QSPEC: 1}
     quant_params = {
         "quantized_decomposed.dequantize_per_tensor.default": {
             (0.015678614, 0, -128, 127, torch.int8): 2,
@@ -122,8 +122,8 @@ class SharedQspecInputForkNonShared(torch.nn.Module):
         "quantized_decomposed.quantize_per_tensor.default": {None: 4},
         "quantized_decomposed.dequantize_per_tensor.default": {None: 4},
     }
-    inputs_qspecs = {None: 2}
-    outputs_qspecs = {None: 1}
+    inputs_qspecs = {_INT8_QSPEC: 2}
+    outputs_qspecs = {_INT8_QSPEC: 1}
     quant_params = {
         "quantized_decomposed.dequantize_per_tensor.default": {
             (0.015678614, -64, -128, 127, torch.int8): 3,
@@ -149,8 +149,8 @@ class SharedQspecInputForkShared(torch.nn.Module):
         "quantized_decomposed.quantize_per_tensor.default": {None: 5},
         "quantized_decomposed.dequantize_per_tensor.default": {None: 5},
     }
-    inputs_qspecs = {None: 2}
-    outputs_qspecs = {None: 1}
+    inputs_qspecs = {_INT8_QSPEC: 2}
+    outputs_qspecs = {_INT8_QSPEC: 1}
     quant_params = {
         "quantized_decomposed.dequantize_per_tensor.default": {
             (0.015678614, -64, -128, 127, torch.int8): 2,
@@ -178,8 +178,8 @@ class SharedQspecInputForkXShared(torch.nn.Module):
         "quantized_decomposed.quantize_per_tensor.default": {None: 4},
         "quantized_decomposed.dequantize_per_tensor.default": {None: 4},
     }
-    inputs_qspecs = {None: 2}
-    outputs_qspecs = {None: 1}
+    inputs_qspecs = {_INT8_QSPEC: 2}
+    outputs_qspecs = {_INT8_QSPEC: 1}
     quant_params = {
         "quantized_decomposed.dequantize_per_tensor.default": {
             (0.015678614, -64, -128, 127, torch.int8): 2,
@@ -206,8 +206,8 @@ class SharedQspecInputForkYShared(torch.nn.Module):
         "quantized_decomposed.quantize_per_tensor.default": {None: 5},
         "quantized_decomposed.dequantize_per_tensor.default": {None: 5},
     }
-    inputs_qspecs = {None: 2}
-    outputs_qspecs = {None: 1}
+    inputs_qspecs = {_INT8_QSPEC: 2}
+    outputs_qspecs = {_INT8_QSPEC: 1}
     quant_params = {
         "quantized_decomposed.dequantize_per_tensor.default": {
             (0.015678614, -64, -128, 127, torch.int8): 2,
@@ -234,8 +234,8 @@ class SharedQspecInputForkXConstant(torch.nn.Module):
         "quantized_decomposed.quantize_per_tensor.default": {None: 2},
         "quantized_decomposed.dequantize_per_tensor.default": {None: 3},
     }
-    inputs_qspecs = {None: 1}
-    outputs_qspecs = {None: 1}
+    inputs_qspecs = {_INT8_QSPEC: 1}
+    outputs_qspecs = {_INT8_QSPEC: 1}
     quant_params = {
         "quantized_decomposed.dequantize_per_tensor.default": {
             (0.015678614, 0, -128, 127, torch.int8): 2,
@@ -260,8 +260,8 @@ class SharedQspecInputForkYConstant(torch.nn.Module):
         "quantized_decomposed.quantize_per_tensor.default": {None: 2},
         "quantized_decomposed.dequantize_per_tensor.default": {None: 3},
     }
-    inputs_qspecs = {None: 1}
-    outputs_qspecs = {None: 1}
+    inputs_qspecs = {_INT8_QSPEC: 1}
+    outputs_qspecs = {_INT8_QSPEC: 1}
     quant_params = {
         "quantized_decomposed.dequantize_per_tensor.default": {
             (0.015678614, 0, -128, 127, torch.int8): 1,
@@ -287,8 +287,8 @@ class SharedQspecOutputForkNonShared(torch.nn.Module):
         "quantized_decomposed.dequantize_per_tensor.default": {None: 4},
         "aten.add.Tensor": {_INT8_QSPEC: 1},
     }
-    inputs_qspecs = {None: 1}
-    outputs_qspecs = {None: 1}
+    inputs_qspecs = {_INT8_QSPEC: 1}
+    outputs_qspecs = {_INT8_QSPEC: 2}
     quant_params = {
         "quantized_decomposed.dequantize_per_tensor.default": {
             (0.015678614, 0, -128, 127, torch.int8): 3,
@@ -315,8 +315,8 @@ class SharedQspecOutputForkShared(torch.nn.Module):
         "quantized_decomposed.quantize_per_tensor.default": {None: 4},
         "quantized_decomposed.dequantize_per_tensor.default": {None: 6},
     }
-    inputs_qspecs = {None: 1}
-    outputs_qspecs = {None: 1}
+    inputs_qspecs = {_INT8_QSPEC: 1}
+    outputs_qspecs = {_INT8_QSPEC: 3}
     quant_params = {
         "quantized_decomposed.dequantize_per_tensor.default": {
             (0.015678614, 0, -128, 127, torch.int8): 6,
@@ -341,10 +341,10 @@ class SharedQspecManyForks(torch.nn.Module):
     qspecs = {
         "quantized_decomposed.quantize_per_tensor.default": {None: 6},
         "quantized_decomposed.dequantize_per_tensor.default": {None: 9},
-        "aten.t.default": {None: 1},
+        "aten.t.default": {_INT8_QSPEC: 1},
     }
-    inputs_qspecs = {None: 1}
-    outputs_qspecs = {None: 1}
+    inputs_qspecs = {_INT8_QSPEC: 1}
+    outputs_qspecs = {_INT8_QSPEC: 1}
     quant_params = {
         "quantized_decomposed.dequantize_per_tensor.default": {
             (0.086232387, 104, -128, 127, torch.int8): 9,
@@ -372,8 +372,8 @@ class SharedQspecSurroundedQuantizedOp(torch.nn.Module):
         "quantized_decomposed.dequantize_per_tensor.default": {None: 5},
         "aten.add.Tensor": {_INT8_QSPEC: 1},
     }
-    inputs_qspecs = {None: 1}
-    outputs_qspecs = {None: 1}
+    inputs_qspecs = {_INT8_QSPEC: 1}
+    outputs_qspecs = {_INT8_QSPEC: 1}
     quant_params = {
         "quantized_decomposed.dequantize_per_tensor.default": {
             (0.509554982, 123, -128, 127, torch.int8): 3,
@@ -403,8 +403,8 @@ class SharedQspecSurroundedQuantizedOpConstant(torch.nn.Module):
         "aten.ones.default": {_INT8_QSPEC: 1},
         "aten.add.Tensor": {_INT8_QSPEC: 1},
     }
-    inputs_qspecs = {None: 1}
-    outputs_qspecs = {None: 1}
+    inputs_qspecs = {_INT8_QSPEC: 1}
+    outputs_qspecs = {_INT8_QSPEC: 1}
     quant_params = {
         "quantized_decomposed.dequantize_per_tensor.default": {
             (0.003921569, -128, -128, 127, torch.int8): 1,
@@ -429,18 +429,22 @@ class SharedQspecSub(torch.nn.Module):
     """A shared qspec node with float input."""
 
     qspecs = {
-        "quantized_decomposed.quantize_per_tensor.default": {None: 2},
-        "quantized_decomposed.dequantize_per_tensor.default": {None: 2},
+        "quantized_decomposed.quantize_per_tensor.default": {None: 4},
+        "quantized_decomposed.dequantize_per_tensor.default": {None: 4},
         "aten.sub.Tensor": {None: 1},
     }
-    inputs_qspecs = {None: 2}
-    outputs_qspecs = {None: 1}
+    inputs_qspecs = {_INT8_QSPEC: 2}
+    outputs_qspecs = {_INT8_QSPEC: 1}
     quant_params = {
         "quantized_decomposed.dequantize_per_tensor.default": {
+            (0.003919654, -128, -128, 127, torch.int8): 1,
             (0.035276882, -128, -128, 127, torch.int8): 2,
+            (0.03919654, -128, -128, 127, torch.int8): 1,
         },
         "quantized_decomposed.quantize_per_tensor.default": {
+            (0.003919654, -128, -128, 127, torch.int8): 1,
             (0.035276882, -128, -128, 127, torch.int8): 2,
+            (0.03919654, -128, -128, 127, torch.int8): 1,
         },
     }
 
@@ -462,8 +466,8 @@ class SharedQspecCompetingQspecs(torch.nn.Module):
         "quantized_decomposed.dequantize_per_tensor.default": {None: 4},
         "aten.conv2d.default": {_INT8_QSPEC: 1},
     }
-    inputs_qspecs = {None: 1}
-    outputs_qspecs = {None: 1}
+    inputs_qspecs = {_INT8_QSPEC: 1}
+    outputs_qspecs = {_INT8_QSPEC: 1}
     quant_params = {
         "quantized_decomposed.dequantize_per_channel.default": {
             (0, -2147483647, 2147483647, torch.int32): 1,
@@ -502,20 +506,16 @@ class SharedQspecNoQspecs(torch.nn.Module):
         "quantized_decomposed.dequantize_per_tensor.default": {None: 2},
         "aten.sub.Tensor": {None: 2},
     }
-    inputs_qspecs = {None: 1}
-    outputs_qspecs = {None: 1}
+    inputs_qspecs = {_INT8_QSPEC: 1}
+    outputs_qspecs = {_INT8_QSPEC: 1}
     quant_params = {
         "quantized_decomposed.dequantize_per_tensor.default": {
-            (
-                1.5259e-05,
-                -128,
-                -128,
-                127,
-                torch.int8,
-            ): 2,  # The network always has 0 output -> very small scale.
+            (1.5259e-05, -128, -128, 127, torch.int8): 1,
+            (0.03919654, -128, -128, 127, torch.int8): 1,
         },
         "quantized_decomposed.quantize_per_tensor.default": {
-            (1.5259e-05, -128, -128, 127, torch.int8): 2,
+            (1.5259e-05, -128, -128, 127, torch.int8): 1,
+            (0.03919654, -128, -128, 127, torch.int8): 1,
         },
     }
 
@@ -542,21 +542,19 @@ class MixedMaximumInt8Int16(torch.nn.Module):
     """A shared qspec node with int16/int8 inputs."""
 
     qspecs = {
-        "quantized_decomposed.quantize_per_tensor.default": {None: 6},
-        "quantized_decomposed.dequantize_per_tensor.default": {None: 6},
+        "quantized_decomposed.quantize_per_tensor.default": {None: 4},
+        "quantized_decomposed.dequantize_per_tensor.default": {None: 5},
     }
-    input_qspecs = {None: 1}
-    output_qspecs = {None: 1}
+    input_qspecs = {_INT8_QSPEC: 1}
+    output_qspecs = {_INT8_QSPEC: 1}
     quant_params = {
         "quantized_decomposed.quantize_per_tensor.default": {
-            (0.007839307, -128, -128, 127, torch.int8): 2,
-            (0.015678614, 0, -128, 127, torch.int8): 2,
-            (0.000244141, 0, -32767, 32767, torch.int16): 2,
+            (0.007839307, -128, -128, 127, torch.int8): 1,
+            (0.015678614, 0, -128, 127, torch.int8): 3,
         },
         "quantized_decomposed.dequantize_per_tensor.default": {
-            (0.007839307, -128, -128, 127, torch.int8): 2,
-            (0.015678614, 0, -128, 127, torch.int8): 2,
-            (0.000244141, 0, -32767, 32767, torch.int16): 2,
+            (0.007839307, -128, -128, 127, torch.int8): 1,
+            (0.015678614, 0, -128, 127, torch.int8): 4,
         },
     }
 
diff --git a/backends/arm/test/ops/test_to_copy.py b/backends/arm/test/ops/test_to_copy.py
index 6718fedea04..e0d910bd069 100644
--- a/backends/arm/test/ops/test_to_copy.py
+++ b/backends/arm/test/ops/test_to_copy.py
@@ -330,18 +330,14 @@ def test_to_vgf_quant(test_data: Tuple):
     ),
 }
 
-redundant_xfails_FP = {
+redundant_xfails = {
     "rand_int8_int8": "Tracing graph with quantized input is not supported.",
     "rand_int16_int16": "Tracing graph with quantized input is not supported.",
 }
 
-redundant_xfails_INT = redundant_xfails_FP | {
-    "rand_fp16_fp16": "FP16 is not supported",
-}
-
 
 @common.parametrize(
-    "test_data", _TO_COPY_TEST_DATA_REDUNDANT_CAST, xfails=redundant_xfails_FP
+    "test_data", _TO_COPY_TEST_DATA_REDUNDANT_CAST, xfails=redundant_xfails
 )
 def test_to_tosa_FP_REDUNDANT_CAST(test_data: Tuple):
     test_tensor, new_dtype = test_data()
@@ -356,7 +352,7 @@ def test_to_tosa_FP_REDUNDANT_CAST(test_data: Tuple):
 
 
 @common.parametrize(
-    "test_data", _TO_COPY_TEST_DATA_REDUNDANT_CAST, xfails=redundant_xfails_INT
+    "test_data", _TO_COPY_TEST_DATA_REDUNDANT_CAST, xfails=redundant_xfails
 )
 def test_to_tosa_INT_REDUNDANT_CAST(test_data: Tuple):
     test_tensor, new_dtype = test_data()
diff --git a/backends/arm/test/ops/test_transpose_conv2d.py b/backends/arm/test/ops/test_transpose_conv2d.py
index 1ab077841b6..a288cc3ebac 100644
--- a/backends/arm/test/ops/test_transpose_conv2d.py
+++ b/backends/arm/test/ops/test_transpose_conv2d.py
@@ -7,14 +7,14 @@
 
 import conftest
 import torch
-
-from executorch.backends.arm.quantizer import QuantizationConfig
 from executorch.backends.arm.quantizer.arm_quantizer import (
     get_symmetric_a16w8_quantization_config,
     get_symmetric_a8w4_quantization_config,
     get_symmetric_quantization_config,
     TOSAQuantizer,
 )
+
+from executorch.backends.arm.quantizer.quantization_config import TOSAQuantizationConfig
 from executorch.backends.arm.test import common
 from executorch.backends.arm.test.tester.test_pipeline import (
     EthosU55PipelineINT,
@@ -311,7 +311,7 @@ def test_conv_transpose2d_tosa_INT_qat_axis1_uses_non_fused_fake_quant(test_data
         ),
     )
     quantizer.set_global(
-        QuantizationConfig(
+        TOSAQuantizationConfig(
             input_activation=activation_qspec,
             output_activation=activation_qspec,
             weight=weight_qspec,
@@ -350,7 +350,7 @@ def test_conv_transpose2d_tosa_INT_grouped_qat_axis0_keeps_fused_fake_quant(test
         ),
     )
     quantizer.set_global(
-        QuantizationConfig(
+        TOSAQuantizationConfig(
             input_activation=activation_qspec,
             output_activation=activation_qspec,
             weight=weight_qspec,
@@ -389,7 +389,7 @@ def test_conv_transpose2d_tosa_INT_ptq_observer_updates_axis(test_data):
         ),
     )
     quantizer.set_global(
-        QuantizationConfig(
+        TOSAQuantizationConfig(
             input_activation=activation_qspec,
             output_activation=activation_qspec,
             weight=weight_qspec,
@@ -427,7 +427,7 @@ def test_conv_transpose2d_tosa_INT_qat_correct_qspec_wrong_ctor_axis(test_data):
         ),
     )
     quantizer.set_global(
-        QuantizationConfig(
+        TOSAQuantizationConfig(
             input_activation=activation_qspec,
             output_activation=activation_qspec,
             weight=weight_qspec,
diff --git a/backends/arm/test/ops/test_unary_combos.py b/backends/arm/test/ops/test_unary_combos.py
index bc4bb0b39d9..2ecd04b9c79 100644
--- a/backends/arm/test/ops/test_unary_combos.py
+++ b/backends/arm/test/ops/test_unary_combos.py
@@ -104,9 +104,7 @@ def test_add_tensor_tosa_INT_combos(model_cls):
 
 
 @common.XfailIfNoCorstone300
-@common.parametrize(
-    "model_cls", MODEL_DATA, xfails={"NegAdd": "Numerical failure. MLBEDSW-11581"}
-)
+@common.parametrize("model_cls", MODEL_DATA)
 def test_add_tensor_u55_INT_combos(model_cls):
     m, inputs, exir = _build(model_cls)
     p = EthosU55PipelineINT[Tensor1](
diff --git a/backends/arm/test/ops/test_while.py b/backends/arm/test/ops/test_while.py
index b5cab047a50..51b56661b50 100644
--- a/backends/arm/test/ops/test_while.py
+++ b/backends/arm/test/ops/test_while.py
@@ -8,6 +8,8 @@
 import torch
 import torch.fx
 
+from executorch.backends.arm.quantizer import get_symmetric_quantization_config
+from executorch.backends.arm.quantizer.arm_quantizer import _TOSAQuantizerV2
 from executorch.backends.arm.test import common
 from executorch.backends.arm.test.tester.arm_tester import ArmTester
 from executorch.backends.arm.test.tester.test_pipeline import (
@@ -228,6 +230,28 @@ def test_while_loop_tosa_INT(case: Callable[[], Tuple[torch.nn.Module, Tuple]]):
     pipeline.run()
 
 
+def test_while_loop_tosa_INT_composable_large_threshold():
+    module, example_inputs = test_cases["large_threshold"]()
+    pipeline = TosaPipelineINT[tuple](
+        module,
+        example_inputs,
+        "torch.ops.higher_order.while_loop",
+        tosa_extensions=["cf"],
+    )
+
+    composable_quantizer = _TOSAQuantizerV2(pipeline.tester.compile_spec)
+    composable_quantizer.set_global(get_symmetric_quantization_config())
+    pipeline.quantizer.quantizer = composable_quantizer
+
+    pipeline.add_stage_after(
+        "to_edge_transform_and_lower",
+        ArmTester.check_not,
+        pipeline.tester,
+        ["torch.ops.higher_order.while_loop"],
+    )
+    pipeline.run()
+
+
 @common.parametrize(
     "case",
     test_cases,
diff --git a/backends/cortex_m/test/misc/test_portable_int8.py b/backends/cortex_m/test/misc/test_portable_int8.py
index 4e3b5f41561..920b4200e60 100644
--- a/backends/cortex_m/test/misc/test_portable_int8.py
+++ b/backends/cortex_m/test/misc/test_portable_int8.py
@@ -301,6 +301,36 @@ def _quantize_and_export(
         (torch.randn(6), torch.randn(6)),
         torch.int64,
     ),
+    "index_put_": OpCase(
+        torch.ops.aten.index_put_.default,
+        _build_module(
+            lambda x, y: torch.ops.aten.index_put_.default(
+                x, (torch.tensor([1, 3]),), torch.tensor([1.0, 2.0]), False
+            )
+        ),
+        (torch.randn(6), torch.randn(6)),
+        torch.int64,
+    ),
+    "index_copy": OpCase(
+        torch.ops.aten.index_copy.default,
+        _build_module(
+            lambda x, y: torch.ops.aten.index_copy.default(
+                x, 0, torch.tensor([0, 2]), y
+            )
+        ),
+        (torch.randn(4, 5), torch.randn(2, 5)),
+        torch.int64,
+    ),
+    "index_copy_": OpCase(
+        torch.ops.aten.index_copy_.default,
+        _build_module(
+            lambda x, y: torch.ops.aten.index_copy_.default(
+                x, 0, torch.tensor([0, 2]), y
+            )
+        ),
+        (torch.randn(4, 5), torch.randn(2, 5)),
+        torch.int64,
+    ),
     "contiguous": OpCase(
         torch.ops.aten.contiguous.default,
         _build_module(lambda x, y: torch.ops.aten.contiguous.default(x)),
diff --git a/docs/source/backends/arm-ethos-u/arm-ethos-u-quantization.md b/docs/source/backends/arm-ethos-u/arm-ethos-u-quantization.md
index 68fe9d160aa..c2f7035c89c 100644
--- a/docs/source/backends/arm-ethos-u/arm-ethos-u-quantization.md
+++ b/docs/source/backends/arm-ethos-u/arm-ethos-u-quantization.md
@@ -16,18 +16,23 @@ The Arm Ethos-U delegate supports the following quantization schemes:
 ### Quantization API
 
 ```python
-class EthosUQuantizer(compile_spec: 'EthosUCompileSpec', use_composable_quantizer: 'bool' = False) -> 'None'
+class EthosUQuantizer(compile_spec: 'EthosUCompileSpec', use_composable_quantizer: 'bool' = True) -> 'None'
 ```
 Quantizer supported by the Arm Ethos-U backend.
 
 .. warning::
-    Setting ``use_composable_quantizer=True`` enables an experimental API
-    surface that may change without notice.
+    The composable quantizer is now the default implementation. Setting
+    ``use_composable_quantizer=False`` is deprecated and will be removed in
+    two minor releases.
 
 Args:
 - **compile_spec (EthosUCompileSpec)**: Backend compile specification for
         Ethos-U targets.
-- **use_composable_quantizer (bool)**: Whether to use the composable quantizer implementation. See https://github.com/pytorch/executorch/issues/17701" for details.
+- **use_composable_quantizer (bool)**: Whether to use the composable
+        quantizer implementation. Setting this to ``False`` is deprecated
+        and will be removed in two minor releases. See
+        [issue #17701](https://github.com/pytorch/executorch/issues/17701)
+        for details.
 
 ```python
 def EthosUQuantizer.add_quantizer(self, quantizer: 'Quantizer') -> 'TOSAQuantizer':
diff --git a/docs/source/backends/arm-vgf/arm-vgf-quantization.md b/docs/source/backends/arm-vgf/arm-vgf-quantization.md
index 49ba41f74e1..2dc5b5631e6 100644
--- a/docs/source/backends/arm-vgf/arm-vgf-quantization.md
+++ b/docs/source/backends/arm-vgf/arm-vgf-quantization.md
@@ -35,18 +35,23 @@ setting using the `set_module_name` or `set_module_type` methods.
 ### Quantization API
 
 ```python
-class VgfQuantizer(compile_spec: 'VgfCompileSpec', use_composable_quantizer: 'bool' = False) -> 'None'
+class VgfQuantizer(compile_spec: 'VgfCompileSpec', use_composable_quantizer: 'bool' = True) -> 'None'
 ```
 Quantizer supported by the Arm Vgf backend.
 
 .. warning::
-    Setting ``use_composable_quantizer=True`` enables an experimental API
-    surface that may change without notice.
+    The composable quantizer is now the default implementation. Setting
+    ``use_composable_quantizer=False`` is deprecated and will be removed in
+    two minor releases.
 
 Args:
 - **compile_spec (VgfCompileSpec)**: Backend compile specification for Vgf
         targets.
-- **use_composable_quantizer (bool)**: Whether to use the composable quantizer implementation. See https://github.com/pytorch/executorch/issues/17701" for details.
+- **use_composable_quantizer (bool)**: Whether to use the composable
+        quantizer implementation. Setting this to ``False`` is deprecated
+        and will be removed in two minor releases. See
+        [issue #17701](https://github.com/pytorch/executorch/issues/17701)
+        for details.
 
 ```python
 def VgfQuantizer.add_quantizer(self, quantizer: 'Quantizer') -> 'TOSAQuantizer':
diff --git a/examples/arm/quantizer_tutorial.ipynb b/examples/arm/quantizer_tutorial.ipynb
index 76979316002..25b99dbd4b5 100644
--- a/examples/arm/quantizer_tutorial.ipynb
+++ b/examples/arm/quantizer_tutorial.ipynb
@@ -16,13 +16,11 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# WIP: TOSA/EthosU/VgfQuantizer composable quantizer tutorial\n",
+    "# TOSA/EthosU/VgfQuantizer composable quantizer tutorial\n",
     "\n",
     "This is an in-depth tutorial of the new `TOSA/EthosU/VgfQuantizer` API. While the `TOSAQuantizer` is used in the example, both the\n",
     "`EthosUQuantizer` and `VgfQuantizer` directly inherit from this base class. \n",
     "\n",
-    "Note that the main API and functionality remains largely the same to allow for a drop-in replacement, but the underlying framework is different - as will be explained. **Both the quantizer and this tutorial are currently experimental and may change without prior notice.** Refer to https://github.com/pytorch/executorch/issues/17701 for questions and feedback.\n",
-    "\n",
     "Before you begin:\n",
     "1. (In a clean virtual environment with a compatible Python version) Install executorch using `./install_executorch.sh`\n",
     "2. Install Arm TOSA dependencies using `examples/arm/setup.sh --disable-ethos-u-deps`\n",
diff --git a/examples/models/llama/tests/test_export_llama_lib.py b/examples/models/llama/tests/test_export_llama_lib.py
index f3dc403aa05..2e708479b4e 100644
--- a/examples/models/llama/tests/test_export_llama_lib.py
+++ b/examples/models/llama/tests/test_export_llama_lib.py
@@ -7,8 +7,6 @@
 
 import unittest
 
-import torch
-
 from executorch.devtools.backend_debug import get_delegation_info
 
 try:
@@ -117,8 +115,6 @@ def test_get_quantizer_and_quant_params_returns_vgf_quantizer(self):
         self.assertIsNone(quant_dtype)
         self.assertEqual(len(quantizers), 1)
         self.assertIsInstance(quantizers[0], VgfQuantizer)
-        self.assertIsNotNone(quantizers[0].global_config)
-        self.assertEqual(quantizers[0].module_type_config, {})
 
     @unittest.skipUnless(HAS_ARM_BACKEND, "ARM backend not available")
     def test_get_quantizer_and_quant_params_returns_vgf_linear_quantizer(self):
@@ -134,8 +130,6 @@ def test_get_quantizer_and_quant_params_returns_vgf_linear_quantizer(self):
 
         self.assertEqual(len(quantizers), 1)
         self.assertIsInstance(quantizers[0], VgfQuantizer)
-        self.assertIsNone(quantizers[0].global_config)
-        self.assertIn(torch.nn.Linear, quantizers[0].module_type_config)
 
     @unittest.skipUnless(HAS_ARM_BACKEND, "ARM backend not available")
     def test_vgf_16a8w_requires_int16_compile_spec_extension(self):
@@ -162,4 +156,3 @@ def test_vgf_16a8w_accepts_int16_compile_spec_extension(self):
 
         self.assertEqual(len(quantizers), 1)
         self.assertIsInstance(quantizers[0], VgfQuantizer)
-        self.assertIn(torch.nn.Linear, quantizers[0].module_type_config)