Skip to content

Commit 944a9f6

Browse files
andyluo7seungrokj
andauthored
Fix/qwen3 5 amd rope cutedsl fallback (#18753)
Co-authored-by: seungrokj <seungrok.jung@amd.com>
1 parent 91230dc commit 944a9f6

File tree

2 files changed

+24
-4
lines changed

2 files changed

+24
-4
lines changed

python/sglang/srt/configs/qwen3_5.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,18 @@ def __init__(
1717
self,
1818
**kwargs,
1919
):
20+
# HF Qwen3.5 checkpoints may provide RoPE settings under rope_parameters.
21+
# Normalize it before parent init so downstream code sees the expected values.
22+
rope_parameters = kwargs.pop("rope_parameters", None)
23+
if kwargs.get("rope_scaling") is None and rope_parameters is not None:
24+
kwargs["rope_scaling"] = rope_parameters
25+
2026
super().__init__(**kwargs)
2127
if self.rope_scaling is None:
22-
self.rope_scaling = {}
28+
self.rope_scaling = rope_parameters or {}
29+
30+
# Keep both names for compatibility with model code paths that read either.
31+
self.rope_parameters = rope_parameters or self.rope_scaling
2332

2433

2534
class Qwen3_5Config(PretrainedConfig):

python/sglang/srt/layers/attention/hybrid_linear_attn_backend.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,14 @@
4545

4646
if not is_cpu() and not is_npu():
4747
# fix import error on CPU device, no impacts when non-CPU path
48-
from sglang.jit_kernel.cutedsl_gdn import (
49-
cutedsl_fused_sigmoid_gating_delta_rule_update,
50-
)
48+
try:
49+
from sglang.jit_kernel.cutedsl_gdn import (
50+
cutedsl_fused_sigmoid_gating_delta_rule_update,
51+
)
52+
except ModuleNotFoundError:
53+
# CuTe DSL path requires cuda-python (cuda.bindings.*). Keep runtime usable
54+
# by falling back to non-CuTe kernels when it's unavailable.
55+
cutedsl_fused_sigmoid_gating_delta_rule_update = None
5156
from sglang.srt.layers.attention.fla.chunk import chunk_gated_delta_rule
5257
from sglang.srt.layers.attention.fla.chunk_delta_h import (
5358
CHUNK_SIZE as FLA_CHUNK_SIZE,
@@ -830,6 +835,12 @@ def __init__(self, model_runner: ModelRunner):
830835
), f"{self.conv_states_shape[-1]=} should be less than {FLA_CHUNK_SIZE}"
831836

832837
use_cutedsl = Envs.SGLANG_USE_CUTEDSL_GDN_DECODE.get()
838+
if use_cutedsl and cutedsl_fused_sigmoid_gating_delta_rule_update is None:
839+
rank0_log(
840+
"CuTe DSL GDN decode requested but unavailable "
841+
"(missing cuda.bindings). Falling back to FLA decode kernel."
842+
)
843+
use_cutedsl = False
833844
rank0_log(f"CuTe DSL GDN decode enabled: {use_cutedsl}")
834845
self._kernel_func = (
835846
cutedsl_fused_sigmoid_gating_delta_rule_update

0 commit comments

Comments
 (0)