We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 58dd5f3 commit 5352aa1Copy full SHA for 5352aa1
python/sglang/srt/models/qwen2_moe.py
@@ -324,6 +324,10 @@ def forward(
324
final_hidden_states = self._forward_router_experts(hidden_states)
325
326
if shared_output is not None:
327
+ # In-place add is required to keep final_hidden_states in the
328
+ # symmetric memory pool (when --enable-symm-mem is used).
329
+ # An out-of-place add would allocate a new tensor outside symm
330
+ # memory, breaking subsequent symmetric collective operations.
331
final_hidden_states += shared_output
332
if self.tp_size > 1 and not use_reduce_scatter:
333
final_hidden_states = tensor_model_parallel_all_reduce(final_hidden_states)
0 commit comments