Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions tests/integration/test_lists/waives.txt
Original file line number Diff line number Diff line change
Expand Up @@ -420,11 +420,6 @@ perf/test_perf_sanity.py::test_e2e[disagg_upload-e2e-gb200_kimi-k25-thinking-fp4
perf/test_perf_sanity.py::test_e2e[disagg_upload-e2e-gb200_kimi-k25-thinking-fp4_8k1k_con4096_ctx1_dep4_gen1_dep16_eplb0_mtp0_ccb-NIXL] SKIP (https://nvbugs/6110326)
perf/test_perf_sanity.py::test_e2e[aggr_upload-ctx_only-gb200_deepseek-v32-fp4_32k4k_con2048_ctx1_dep4_gen1_dep32_eplb288_mtp1_ccb-NIXL] SKIP (https://nvbugs/6110326)
perf/test_perf_sanity.py::test_e2e[aggr_upload-k25_thinking_fp4_2_nodes_grace_blackwell-k25_thinking_fp4_dep8_32k8k] SKIP (https://nvbugs/6110326)
unittest/_torch/visual_gen/multi_gpu/test_ulysses_sage_attention.py::TestSageUlyssesAttention::test_sage_ulysses_forward[False] SKIP (https://nvbugs/6111076)
unittest/_torch/visual_gen/multi_gpu/test_ulysses_sage_attention.py::TestSageUlyssesAttention::test_sage_ulysses_forward[True] SKIP (https://nvbugs/6111076)
unittest/_torch/visual_gen/multi_gpu/test_ulysses_sage_attention.py::TestSageUlyssesAttention::test_sage_ulysses_vs_reference[False-1] SKIP (https://nvbugs/6111076)
unittest/_torch/visual_gen/multi_gpu/test_ulysses_sage_attention.py::TestSageUlyssesAttention::test_sage_ulysses_vs_reference[True-16] SKIP (https://nvbugs/6111076)
unittest/_torch/visual_gen/multi_gpu/test_ulysses_sage_attention.py::TestSageUlyssesAttention::test_sage_ulysses_vs_reference[True-4] SKIP (https://nvbugs/6111076)
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_eagle3_4gpus[v2_kv_cache-trtllm-one_model-overlap_scheduler] SKIP (https://nvbugs/6113016)
disaggregated/test_disaggregated.py::test_disaggregated_gpt_oss_120b_harmony[gpt_oss/gpt-oss-120b] SKIP (https://nvbugs/6011317)
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[v2_kv_cache-dp4-cutlass-auto] SKIP (https://nvbugs/5596343)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

import functools
import os
import threading

os.environ["TLLM_DISABLE_MPI"] = "1"

Expand All @@ -38,9 +39,12 @@
try:
from tensorrt_llm._torch.visual_gen.attention_backend import UlyssesAttention
from tensorrt_llm._torch.visual_gen.attention_backend.trtllm import TrtllmAttention
from tensorrt_llm._torch.visual_gen.config import create_attention_metadata_state
from tensorrt_llm._utils import get_free_port

MODULES_AVAILABLE = True
ATTENTION_META_DICT = threading.local()
ATTENTION_META_DICT.metadata = create_attention_metadata_state()
except ImportError:
MODULES_AVAILABLE = False

Expand Down Expand Up @@ -133,6 +137,7 @@ def _logic_sage_ulysses_forward(rank, world_size, *, sage_attn_qk_int8: bool):
sage_attn_num_elts_per_blk_k=blk_k,
sage_attn_num_elts_per_blk_v=1,
sage_attn_qk_int8=sage_attn_qk_int8,
attention_metadata_state=ATTENTION_META_DICT.metadata,
)
attention = UlyssesAttention(inner_backend=inner, process_group=None)

Expand Down Expand Up @@ -189,6 +194,7 @@ def _logic_sage_ulysses_vs_reference(
sage_attn_num_elts_per_blk_k=sage_attn_num_elts_per_blk_k,
sage_attn_num_elts_per_blk_v=1,
sage_attn_qk_int8=sage_attn_qk_int8,
attention_metadata_state=ATTENTION_META_DICT.metadata,
)
attention = UlyssesAttention(inner_backend=inner, process_group=None)

Expand Down
Loading