Skip to content

Commit dc1309f

Browse files
authored
Avoid kimi linear stream sync (sgl-project#16186)
Signed-off-by: vincentzed <207368749+vincentzed@users.noreply.github.com>
1 parent 539bbf4 commit dc1309f

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

python/sglang/srt/layers/attention/hybrid_linear_attn_backend.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -535,7 +535,7 @@ def _replay_metadata(
535535
self.query_start_loc_list[bs - 1][: bs - num_padding].copy_(
536536
self.cached_cuda_graph_decode_query_start_loc[: bs - num_padding]
537537
)
538-
self.query_start_loc_list[bs - 1][bs - num_padding :].copy_(
538+
self.query_start_loc_list[bs - 1][bs - num_padding :].fill_(
539539
bs - num_padding
540540
)
541541
elif forward_mode.is_target_verify():
@@ -547,7 +547,7 @@ def _replay_metadata(
547547
self.query_start_loc_list[bs - 1][: bs - num_padding].copy_(
548548
self.cached_cuda_graph_verify_query_start_loc[: bs - num_padding]
549549
)
550-
self.query_start_loc_list[bs - 1][bs - num_padding :].copy_(
550+
self.query_start_loc_list[bs - 1][bs - num_padding :].fill_(
551551
(bs - num_padding) * spec_info.draft_token_num
552552
)
553553
else:

0 commit comments

Comments
 (0)