pytorch · facebook-github-bot · Sep 6, 2024 · Aug 28, 2024 · Aug 28, 2024 · Aug 28, 2024
@@ -424,6 +424,7 @@ def _prepare_for_llama_export(modelname: str, args) -> LLMEdgeManager:
             verbose=args.verbose,
             max_seq_len=args.max_seq_length,
             metadata_str=args.metadata,
+            args=args,
         )
         .set_output_dir(output_dir_path)
         .to_dtype(dtype_override)
@@ -633,6 +634,7 @@ def _load_llama_model(
     verbose: bool = False,
     max_seq_len: int = 128,
     metadata_str: Optional[str] = None,
+    args,
 ) -> "LLMEdgeManager":
     """
     A helper util that builds a Llama2 model. It returns a LLMEdgeManager that
@@ -694,4 +696,5 @@ def _load_llama_model(
             model.params,
             metadata_str,
         ),
+        args=args,
     )
@@ -89,6 +89,7 @@ def forward(self, input_pos, embeddings):
         use_kv_cache=True,
         example_inputs=(torch.tensor([0], dtype=torch.int64), embeddings),
         dynamic_shapes=dynamic_shapes,
+        args=llava.text_model_args,
     )
 
     dtype_override = DType.fp32
@@ -145,6 +146,7 @@ def forward(self, images):
             use_kv_cache=True,
             example_inputs=(resized,),
             dynamic_shapes=dynamic_shapes,
+            args=None,
         )
         .capture_pre_autograd_graph()
         .pt2e_quantize([quantizer])

@@ -65,6 +65,7 @@ def __init__(
         dtype,
         use_kv_cache,
         example_inputs,
+        args: Optional[Any] = None,
         enable_dynamic_shape: bool = False,
         verbose: bool = False,
         metadata: Optional[dict] = None,
@@ -87,6 +88,7 @@ def __init__(
         self.output_dir = "."
         self.dynamic_shapes = dynamic_shapes
         self._saved_pte_filename = None
+        self.args = args
 
     def set_output_dir(self, output_dir: str) -> "LLMEdgeManager":
         """
@@ -162,9 +164,20 @@ def capture_pre_autograd_graph(self) -> "LLMEdgeManager":
         # 2. torch.no_grad() is for getting rid of the dropout (not sure why training ops will show up)
         with torch.nn.attention.sdpa_kernel([SDPBackend.MATH]), torch.no_grad():
             # pyre-fixme[8]
-            self.pre_autograd_graph_module = capture_pre_autograd_graph(
-                self.model, self.example_inputs, dynamic_shapes=dynamic_shape
-            )
+            if hasattr(self.args, "qnn") and self.args.qnn:
+                # TODO: this is temporary and export_for_training doesn't work with qnn either. We need a
+                # functional graph. See issue https://github.com/pytorch/executorch/pull/4627 for more details
+                self.pre_autograd_graph_module = torch.export.export(
+                    self.model,
+                    self.example_inputs,
+                    dynamic_shapes=dynamic_shape,
+                    strict=True,
+                ).module()
+            else:
+                self.pre_autograd_graph_module = capture_pre_autograd_graph(
+                    self.model, self.example_inputs, dynamic_shapes=dynamic_shape
+                )
+
         return self
 
     def pt2e_quantize(self, quantizers: Optional[List[Quantizer]]) -> "LLMEdgeManager":
@@ -210,10 +223,8 @@ def export_to_edge(self) -> "LLMEdgeManager":
         # 2. torch.no_grad() is for getting rid of the dropout (not sure why training ops will show up)
         with torch.nn.attention.sdpa_kernel([SDPBackend.MATH]), torch.no_grad():
             if self.pre_autograd_graph_module is None:
-                # pyre-fixme[8]
-                self.pre_autograd_graph_module = capture_pre_autograd_graph(
-                    self.model, self.example_inputs, dynamic_shapes=dynamic_shape
-                )
+                # Run capture_pre_autograd_graph if it didn't run
+                self.capture_pre_autograd_graph()
             self.edge_manager = export_to_edge(
                 self.pre_autograd_graph_module,  # pyre-fixme[6]
                 self.example_inputs,