Skip to content

Commit f1824a9

Browse files
authored
[EPD][refactor]: introduce BaseMMReceiver for gRPC transport integration (#17921)
1 parent ab8b99e commit f1824a9

File tree

3 files changed

+33
-6
lines changed

3 files changed

+33
-6
lines changed

python/sglang/srt/disaggregation/encode_receiver.py

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import random
55
import threading
66
import uuid
7+
from abc import ABC, abstractmethod
78
from enum import IntEnum
89
from typing import TYPE_CHECKING, List, Optional
910

@@ -241,7 +242,33 @@ def _determine_tensor_transport_mode(server_args):
241242
return "cuda_ipc"
242243

243244

244-
class MMReceiver:
245+
class MMReceiverBase(ABC):
246+
def __init__(
247+
self,
248+
server_args: ServerArgs,
249+
dtype: Optional[torch.dtype] = None,
250+
hf_config: Optional[PretrainedConfig] = None,
251+
pp_rank: Optional[int] = None,
252+
tp_rank: Optional[int] = None,
253+
tp_group: Optional[GroupCoordinator] = None,
254+
scheduler: Optional["Scheduler"] = None,
255+
):
256+
pass
257+
258+
@abstractmethod
259+
def process_waiting_requests(self, recv_reqs):
260+
pass
261+
262+
@abstractmethod
263+
async def recv_mm_data(self, img_data, mm_processor, prompt):
264+
pass
265+
266+
@abstractmethod
267+
def send_encode_request(self, obj):
268+
pass
269+
270+
271+
class MMReceiverHTTP(MMReceiverBase):
245272

246273
def __init__(
247274
self,
@@ -602,7 +629,7 @@ async def recv_mm_data(self, img_data, mm_processor, prompt):
602629

603630
# For zmq_to_tokenizer and mooncake
604631
async def _recv_mm_data(self, req_id, recv_socket, mm_processor, prompt):
605-
# Bypass MMReceiver
632+
# Bypass MMReceiverHTTP
606633
if req_id is None:
607634
return None
608635

python/sglang/srt/managers/scheduler.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
from sglang.srt.disaggregation.decode_kvcache_offload_manager import (
4444
DecodeKVCacheOffloadManager,
4545
)
46-
from sglang.srt.disaggregation.encode_receiver import MMReceiver
46+
from sglang.srt.disaggregation.encode_receiver import MMReceiverHTTP
4747
from sglang.srt.disaggregation.prefill import (
4848
PrefillBootstrapQueue,
4949
SchedulerDisaggregationPrefillMixin,
@@ -949,7 +949,7 @@ def init_disaggregation(self):
949949
self.server_args.language_only
950950
and self.server_args.encoder_transfer_backend == "zmq_to_scheduler"
951951
):
952-
self.mm_receiver = MMReceiver(
952+
self.mm_receiver = MMReceiverHTTP(
953953
self.server_args,
954954
hf_config=self.model_config.hf_config,
955955
pp_rank=self.pp_rank,

python/sglang/srt/managers/tokenizer_manager.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
from fastapi import BackgroundTasks
4040

4141
from sglang.srt.configs.model_config import ModelConfig
42-
from sglang.srt.disaggregation.encode_receiver import MMReceiver
42+
from sglang.srt.disaggregation.encode_receiver import MMReceiverHTTP
4343
from sglang.srt.disaggregation.utils import DisaggregationMode
4444
from sglang.srt.environ import envs
4545
from sglang.srt.lora.lora_registry import LoRARef, LoRARegistry
@@ -422,7 +422,7 @@ def init_disaggregation(self):
422422

423423
# Encoder Disaggregation
424424
if self.server_args.language_only:
425-
self.mm_receiver = MMReceiver(
425+
self.mm_receiver = MMReceiverHTTP(
426426
self.server_args,
427427
dtype=self.model_config.dtype,
428428
)

0 commit comments

Comments
 (0)