[Diffusion] [NPU] Fix CI run (#18921)

Makcum888e · web-flow · commit 2aa0db7d9cfa · 2026-02-17T16:54:19.000+03:00
diff --git a/.github/workflows/pr-test-npu.yml b/.github/workflows/pr-test-npu.yml
@@ -28,9 +28,9 @@ jobs:
   check-changes:
     runs-on: ubuntu-latest
     outputs:
-      changes_exist: ${{ steps.filter.outputs.main_package || steps.filter.outputs.multimodal_gen || steps.run-mode.outputs.run_all_tests}}
-      main_package: ${{ steps.filter.outputs.main_package || steps.run-mode.outputs.run_all_tests }}
-      multimodal_gen: ${{ steps.filter.outputs.multimodal_gen || steps.run-mode.outputs.run_all_tests }}
+      changes_exist: ${{ steps.filter.outputs.main_package == 'true' || steps.filter.outputs.multimodal_gen == 'true' || steps.run-mode.outputs.run_all_tests == 'true'}}
+      main_package: ${{ steps.filter.outputs.main_package == 'true' || steps.run-mode.outputs.run_all_tests == 'true' }}
+      multimodal_gen: ${{ steps.filter.outputs.multimodal_gen == 'true' || steps.run-mode.outputs.run_all_tests == 'true' }}
     steps:
       - name: Checkout code
         uses: actions/checkout@v4
diff --git a/python/sglang/multimodal_gen/runtime/managers/gpu_worker.py b/python/sglang/multimodal_gen/runtime/managers/gpu_worker.py
@@ -154,8 +154,8 @@ def do_mem_analysis(self, output_batch: OutputBatch):
             output_batch.timings.record_memory_snapshot("mem_analysis", final_snapshot)
 
         # for details on max_memory_reserved: https://docs.pytorch.org/docs/stable/generated/torch.cuda.memory.max_memory_reserved.html
-        peak_reserved_bytes = torch.cuda.max_memory_reserved()
-        peak_allocated_bytes = torch.cuda.max_memory_allocated()
+        peak_reserved_bytes = torch.get_device_module().max_memory_reserved()
+        peak_allocated_bytes = torch.get_device_module().max_memory_allocated()
 
         output_batch.peak_memory_mb = peak_reserved_bytes / (1024**2)
         peak_reserved_gb = peak_reserved_bytes / (1024**3)
diff --git a/python/sglang/multimodal_gen/test/server/ascend/perf_baselines_npu.json b/python/sglang/multimodal_gen/test/server/ascend/perf_baselines_npu.json
@@ -17,9 +17,9 @@
                 "per_frame_generation": null
             },
             "denoise_step_ms": {
-                "0": 195.27,
-                "1": 329.05,
-                "2": 545.43,
+                "0": 364.97,
+                "1": 542.66,
+                "2": 542.1,
                 "3": 541.3,
                 "4": 537.07,
                 "5": 537.21,
diff --git a/python/sglang/multimodal_gen/test/server/ascend/testcase_configs_npu.py b/python/sglang/multimodal_gen/test/server/ascend/testcase_configs_npu.py
@@ -12,7 +12,7 @@
         DiffusionServerArgs(
             model_path="/root/.cache/modelscope/hub/models/Wan-AI/Wan2.1-T2V-1.3B-Diffusers",
             modality="video",
-            warmup=0,
+            warmup=True,
             custom_validator="video",
         ),
         DiffusionSamplingParams(