From 455aac7e3e4940292130b112f2b40907344011dd Mon Sep 17 00:00:00 2001
From: Shushi Hong <820958424@qq.com>
Date: Mon, 1 Dec 2025 22:50:30 -0500
Subject: [PATCH 1/2] Fix output indexing in GPU execution

Adjust output extraction from virtual machine execution.
---
 docs/how_to/tutorials/e2e_opt_model.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/docs/how_to/tutorials/e2e_opt_model.py b/docs/how_to/tutorials/e2e_opt_model.py
index 9f89e744a362..b9812516cb82 100644
--- a/docs/how_to/tutorials/e2e_opt_model.py
+++ b/docs/how_to/tutorials/e2e_opt_model.py
@@ -113,12 +113,14 @@
 # We skip this step in the CI environment.
 
 if not IS_IN_CI:
+    with target:
+        mod = tvm.tir.transform.DefaultGPUSchedule()(mod)
     ex = tvm.compile(mod, target="cuda")
     dev = tvm.device("cuda", 0)
     vm = relax.VirtualMachine(ex, dev)
     # Need to allocate data and params on GPU device
     gpu_data = tvm.runtime.tensor(np.random.rand(1, 3, 224, 224).astype("float32"), dev)
     gpu_params = [tvm.runtime.tensor(p, dev) for p in params["main"]]
-    gpu_out = vm["main"](gpu_data, *gpu_params).numpy()
+    gpu_out = vm["main"](gpu_data, *gpu_params)[0].numpy()
 
     print(gpu_out.shape)

From b93ca6e845366a6ae4ac47117e948bd84667b6c3 Mon Sep 17 00:00:00 2001
From: Shushi Hong <820958424@qq.com>
Date: Mon, 1 Dec 2025 23:00:43 -0500
Subject: [PATCH 2/2] Change compilation target to use variable target

---
 docs/how_to/tutorials/e2e_opt_model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/how_to/tutorials/e2e_opt_model.py b/docs/how_to/tutorials/e2e_opt_model.py
index b9812516cb82..8307ddc4f299 100644
--- a/docs/how_to/tutorials/e2e_opt_model.py
+++ b/docs/how_to/tutorials/e2e_opt_model.py
@@ -115,7 +115,7 @@
 if not IS_IN_CI:
     with target:
         mod = tvm.tir.transform.DefaultGPUSchedule()(mod)
-    ex = tvm.compile(mod, target="cuda")
+    ex = tvm.compile(mod, target=target)
     dev = tvm.device("cuda", 0)
     vm = relax.VirtualMachine(ex, dev)
     # Need to allocate data and params on GPU device