Skip to content

Commit 7d0de21

Browse files
kuafouyizhang2077
authored andcommitted
[bugfix] fix mamba slot leak when scheduling fails with radix cache (sgl-project#15840) (sgl-project#16067)
Co-authored-by: yizhang2077 <1109276519@qq.com>
1 parent b873721 commit 7d0de21

File tree

1 file changed

+21
-0
lines changed

1 file changed

+21
-0
lines changed

python/sglang/srt/managers/scheduler.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2095,6 +2095,8 @@ def _get_new_batch_prefill_raw(
20952095
running_loras.add(req.lora_id)
20962096

20972097
if res != AddReqResult.CONTINUE:
2098+
self.maybe_release_mamba_cache(req)
2099+
20982100
if res == AddReqResult.NO_TOKEN:
20992101
if self.enable_hierarchical_cache:
21002102
# Set batch_is_full after making sure there are requests that can be served
@@ -2192,6 +2194,25 @@ def _get_new_batch_prefill_raw(
21922194

21932195
return new_batch
21942196

2197+
def maybe_release_mamba_cache(self, req: Req) -> None:
2198+
"""Release mamba slot if allocated via COW but scheduling failed.
2199+
2200+
Without this, the slot remains held by a waiting request, causing
2201+
check_memory() to detect a "memory leak" and crash the server.
2202+
The next schedule round will re-allocate safely via match_prefix().
2203+
2204+
Note: In disaggregation DECODE mode, mamba state is transferred from PREFILL and
2205+
is not recoverable if freed, so we do not free it here. To avoid false-positive
2206+
leak checks in this situation, self_check_during_idle skips memory checking when
2207+
the waiting queue is not empty.
2208+
"""
2209+
if (
2210+
req.mamba_pool_idx is not None
2211+
and self.disaggregation_mode != DisaggregationMode.DECODE
2212+
):
2213+
self.req_to_token_pool.mamba_pool.free(req.mamba_pool_idx.unsqueeze(-1))
2214+
req.mamba_pool_idx = None
2215+
21952216
def update_running_batch(self, batch: ScheduleBatch) -> Optional[ScheduleBatch]:
21962217
"""Update the current running decoding batch."""
21972218
initial_bs = batch.batch_size()

0 commit comments

Comments
 (0)