fix lint

sgl-project · HaiShaw · Feb 20, 2026 · Jan 29, 2026 · Jan 29, 2026 · Jan 29, 2026
commit aa84e36b8a10a5dc5378707f4a66f4b0be32bd36
diff --git a/python/sglang/srt/batch_overlap/operations_strategy.py b/python/sglang/srt/batch_overlap/operations_strategy.py
@@ -7,6 +7,9 @@
 from sglang.srt.batch_overlap.operations import Operation
 from sglang.srt.layers.moe.token_dispatcher import DeepEPConfig
 from sglang.srt.model_executor.forward_batch_info import ForwardMode
+from sglang.srt.utils import is_cuda
+
+_is_cuda = is_cuda()
 
 
 @dataclass
@@ -91,9 +94,9 @@ def _compute_moe_deepseek_layer_operations_strategy_tbo(
 def _compute_moe_deepseek_blog_prefill(layer):
     device_properties = torch.cuda.get_device_properties(device="cuda")
     total_num_sms = device_properties.multi_processor_count
-    # deep_gemm_num_sms = total_num_sms - DeepEPConfig.get_instance().num_sms
-    ##TODO(billishyahao): fixme 
     deep_gemm_num_sms = None
+    if _is_cuda:
+        deep_gemm_num_sms = total_num_sms - DeepEPConfig.get_instance().num_sms
 
     return OperationsStrategy(
         deep_gemm_num_sms=deep_gemm_num_sms,
@@ -170,7 +173,9 @@ def _compute_moe_qwen3_layer_operations_strategy_tbo(
 def _compute_moe_qwen3_prefill(layer):
     device_properties = torch.cuda.get_device_properties(device="cuda")
     total_num_sms = device_properties.multi_processor_count
-    deep_gemm_num_sms = total_num_sms - DeepEPConfig.get_instance().num_sms
+    deep_gemm_num_sms = None
+    if _is_cuda:
+        deep_gemm_num_sms = total_num_sms - DeepEPConfig.get_instance().num_sms
 
     return OperationsStrategy(
         deep_gemm_num_sms=deep_gemm_num_sms,

diff --git a/python/sglang/srt/batch_overlap/two_batch_overlap.py b/python/sglang/srt/batch_overlap/two_batch_overlap.py
@@ -30,7 +30,7 @@
 from sglang.srt.layers.moe.token_dispatcher import (
     DeepEPDispatcher,
     MooncakeEPDispatcher,
-    MoriEPDispatcher
+    MoriEPDispatcher,
 )
 from sglang.srt.layers.moe.token_dispatcher.base import BaseDispatcher
 from sglang.srt.managers.schedule_batch import ScheduleBatch

@@ -24,14 +24,11 @@
     DeepEPLLCombineInput,
     DeepEPNormalCombineInput,
 )
-from sglang.srt.layers.moe.token_dispatcher.moriep import MoriEPNormalCombineInput
-from sglang.srt.layers.moe.topk import TopKOutput, TopKOutputChecker
 from sglang.srt.layers.moe.token_dispatcher.moriep import (
     MoriEPLLCombineInput,
     MoriEPNormalCombineInput,
 )
 from sglang.srt.layers.moe.topk import TopKOutput, TopKOutputChecker
-
 from sglang.srt.layers.quantization.base_config import QuantizationConfig
 from sglang.srt.layers.quantization.compressed_tensors.compressed_tensors_moe import (
     NPUCompressedTensorsW4A16Int4DynamicMoEMethod,
@@ -46,8 +43,6 @@
     from sglang.srt.layers.moe.token_dispatcher import (
         DeepEPLLDispatchOutput,
         DeepEPNormalDispatchOutput,
-        MoriEPNormalDispatchOutput,
-        MoriEPLLDispatchOutput,
         DispatchOutput,
     )
 
@@ -163,7 +158,6 @@ def __init__(
             # the last one is invalid rank_id
             self.expert_mask[:-1] = 1
 
-
     def forward(
         self,
         hidden_states: torch.Tensor,
@@ -256,12 +250,12 @@ def run_moe_core(
             if DispatchOutputChecker.format_is_deepep_normal(dispatch_output)
             else DeepEPLLCombineInput
         )
-        
+
         return combine_input_wrapper(
-                hidden_states=output,
-                topk_ids=dispatch_output.topk_ids,
-                topk_weights=dispatch_output.topk_weights,
-            )
+            hidden_states=output,
+            topk_ids=dispatch_output.topk_ids,
+            topk_weights=dispatch_output.topk_weights,
+        )
 
     def combine(
         self,
@@ -606,7 +600,7 @@ def forward(
         hidden_states: torch.Tensor,
         topk_output: TopKOutput,
     ):
-        num_token = hidden_states.shape[0] 
+        num_token = hidden_states.shape[0]
         dispatch_output = self.dispatcher.dispatch(
             hidden_states=hidden_states, topk_output=topk_output
         )
@@ -617,13 +611,12 @@ def forward(
 
         return hidden_states[:num_token]
 
-
     def run_moe_core(
         self,
         dispatch_output: DispatchOutput,
     ):
-        #TODO(billishyahao): check aiter path
-        #billishyahao: for now, fused_moe only support torch.bfloat16
+        # TODO(billishyahao): check aiter path
+        # billishyahao: for now, fused_moe only support torch.bfloat16
         output_dtype = torch.bfloat16
         scale = None
         is_fp8_quant = isinstance(self.quant_method, Fp8MoEMethod)
@@ -636,15 +629,15 @@ def run_moe_core(
             dispatch_weights,
             dispatch_recv_token_num,
             origin_topk_ids,
-            origin_topk_weights
+            origin_topk_weights,
         ) = (
             dispatch_output.hidden_states,
             dispatch_output.hidden_states_scale,
             dispatch_output.topk_ids,
             dispatch_output.topk_weights,
             dispatch_output.num_recv_tokens_per_expert,
             dispatch_output.origin_topk_ids,
-            dispatch_output.origin_topk_weights
+            dispatch_output.origin_topk_weights,
         )
 
         w13_weight = self.w13_weight
@@ -717,8 +710,6 @@ def run_moe_core(
         )
 
 
-
-
 def get_moe_impl_class(quant_config: Optional[QuantizationConfig]):
     # [TODO] kk, temporary solution
     if get_moe_a2a_backend().is_mori():

@@ -98,7 +98,11 @@ def create_moe_dispatcher(moe_runner_config: MoeRunnerConfig) -> BaseDispatcher:
         return StandardDispatcher(moe_runner_config)
     elif a2a_backend.is_deepep() or a2a_backend.is_mooncake() or a2a_backend.is_mori():
         return MaybeTboDeepEPDispatcher(
-            group=get_tp_group().device_group if not a2a_backend.is_mori() else get_tp_group(),
+            group=(
+                get_tp_group().device_group
+                if not a2a_backend.is_mori()
+                else get_tp_group()
+            ),
             router_topk=moe_runner_config.top_k,
             permute_fusion=True,
             num_experts=moe_runner_config.num_experts,

@@ -28,10 +28,10 @@
 )
 from sglang.srt.layers.moe.token_dispatcher.moriep import (
     MoriEPDispatcher,
+    MoriEPLLCombineInput,
+    MoriEPLLDispatchOutput,
     MoriEPNormalCombineInput,
     MoriEPNormalDispatchOutput,
-    MoriEPLLDispatchOutput,
-    MoriEPLLCombineInput,
 )
 from sglang.srt.layers.moe.token_dispatcher.standard import (
     StandardCombineInput,