Avoid kimi linear stream sync (sgl-project#16186)

vincentzed · web-flow · commit dc1309fc7e4a · 2026-02-12T09:27:22.000+08:00
Signed-off-by: vincentzed &lt;207368749+vincentzed@users.noreply.github.com&gt;
diff --git a/python/sglang/srt/layers/attention/hybrid_linear_attn_backend.py b/python/sglang/srt/layers/attention/hybrid_linear_attn_backend.py
@@ -535,7 +535,7 @@ def _replay_metadata(
                 self.query_start_loc_list[bs - 1][: bs - num_padding].copy_(
                     self.cached_cuda_graph_decode_query_start_loc[: bs - num_padding]
                 )
-                self.query_start_loc_list[bs - 1][bs - num_padding :].copy_(
+                self.query_start_loc_list[bs - 1][bs - num_padding :].fill_(
                     bs - num_padding
                 )
         elif forward_mode.is_target_verify():
@@ -547,7 +547,7 @@ def _replay_metadata(
                 self.query_start_loc_list[bs - 1][: bs - num_padding].copy_(
                     self.cached_cuda_graph_verify_query_start_loc[: bs - num_padding]
                 )
-                self.query_start_loc_list[bs - 1][bs - num_padding :].copy_(
+                self.query_start_loc_list[bs - 1][bs - num_padding :].fill_(
                     (bs - num_padding) * spec_info.draft_token_num
                 )
         else:

Original file line number	Diff line number	Diff line change
`@@ -535,7 +535,7 @@ def _replay_metadata(`
`535`	`535`	`self.query_start_loc_list[bs - 1][: bs - num_padding].copy_(`
`536`	`536`	`self.cached_cuda_graph_decode_query_start_loc[: bs - num_padding]`
`537`	`537`	`)`
`538`		`- self.query_start_loc_list[bs - 1][bs - num_padding :].copy_(`
	`538`	`+ self.query_start_loc_list[bs - 1][bs - num_padding :].fill_(`
`539`	`539`	`bs - num_padding`
`540`	`540`	`)`
`541`	`541`	`elif forward_mode.is_target_verify():`
`@@ -547,7 +547,7 @@ def _replay_metadata(`
`547`	`547`	`self.query_start_loc_list[bs - 1][: bs - num_padding].copy_(`
`548`	`548`	`self.cached_cuda_graph_verify_query_start_loc[: bs - num_padding]`
`549`	`549`	`)`
`550`		`- self.query_start_loc_list[bs - 1][bs - num_padding :].copy_(`
	`550`	`+ self.query_start_loc_list[bs - 1][bs - num_padding :].fill_(`
`551`	`551`	`(bs - num_padding) * spec_info.draft_token_num`
`552`	`552`	`)`
`553`	`553`	`else:`