Skip to content

Commit 5875ef0

Browse files
authored
Clean up noisy startup log messages and refactor loader.py (sgl-project#18531)
1 parent ded068a commit 5875ef0

File tree

7 files changed

+60
-43
lines changed

7 files changed

+60
-43
lines changed

python/sglang/launch_server.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@
66

77
from sglang.srt.server_args import prepare_server_args
88
from sglang.srt.utils import kill_process_tree
9+
from sglang.srt.utils.common import suppress_noisy_warnings
10+
11+
suppress_noisy_warnings()
912

1013

1114
def run_server(server_args):

python/sglang/srt/entrypoints/openai/serving_chat.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,8 @@ def _extract_max_dynamic_patch(request: ChatCompletionRequest):
8787
class OpenAIServingChat(OpenAIServingBase):
8888
"""Handler for /v1/chat/completions requests"""
8989

90+
_default_sampling_params_logged = False
91+
9092
def __init__(
9193
self,
9294
tokenizer_manager: TokenizerManager,
@@ -101,10 +103,14 @@ def __init__(
101103
self.default_sampling_params = (
102104
self.tokenizer_manager.model_config.get_default_sampling_params()
103105
)
104-
if self.default_sampling_params:
106+
if (
107+
self.default_sampling_params
108+
and not OpenAIServingChat._default_sampling_params_logged
109+
):
105110
logger.info(
106111
f"Using default chat sampling params from model generation config: {self.default_sampling_params}",
107112
)
113+
OpenAIServingChat._default_sampling_params_logged = True
108114

109115
# Check if the model is a GPT-OSS model
110116
self.is_gpt_oss = (

python/sglang/srt/layers/moe/utils.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
get_attention_dp_size,
1111
is_dp_attention_enabled,
1212
)
13-
from sglang.srt.utils import log_info_on_rank0
1413

1514
if TYPE_CHECKING:
1615
from sglang.srt.server_args import ServerArgs
@@ -189,10 +188,6 @@ def get_moe_a2a_backend() -> MoeA2ABackend:
189188
def get_moe_runner_backend() -> MoeRunnerBackend:
190189
global MOE_RUNNER_BACKEND
191190
if MOE_RUNNER_BACKEND is None:
192-
log_info_on_rank0(
193-
logger,
194-
"MOE_RUNNER_BACKEND is not initialized, the backend will be automatically selected",
195-
)
196191
MOE_RUNNER_BACKEND = MoeRunnerBackend.AUTO
197192
return MOE_RUNNER_BACKEND
198193

python/sglang/srt/mem_cache/mamba_radix_cache.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -384,8 +384,6 @@ def __init__(self, params: CacheInitParams):
384384
assert (
385385
self.page_size == 1
386386
), f"Page size must be 1 for MambaRadixCache v1, got {self.page_size}"
387-
else:
388-
logger.info(f"Mamba extra_buffer is enabled.")
389387

390388
if self.token_to_kv_pool_allocator:
391389
self.device = self.token_to_kv_pool_allocator.device

python/sglang/srt/model_executor/model_runner.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1684,8 +1684,6 @@ def init_cublas(self):
16841684

16851685
def init_attention_backend(self):
16861686
"""Init attention kernel backend."""
1687-
tic = time.perf_counter()
1688-
logger.info("Init attention backend begin.")
16891687
if self.server_args.enable_pdmux:
16901688
self.attn_backend = self._get_attention_backend(init_new_workspace=True)
16911689
self.decode_attn_backend_group = []
@@ -1696,9 +1694,6 @@ def init_attention_backend(self):
16961694
self.attn_backend = TboAttnBackend.init_new(self._get_attention_backend)
16971695
else:
16981696
self.attn_backend = self._get_attention_backend()
1699-
logger.info(
1700-
f"Init attention backend end. elapsed={time.perf_counter() - tic:.2f} s"
1701-
)
17021697

17031698
def _get_attention_backend(self, init_new_workspace: bool = False):
17041699
"""Init attention kernel backend."""

python/sglang/srt/model_loader/loader.py

Lines changed: 34 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import logging
1313
import math
1414
import os
15+
import re
1516
import socket
1617
import threading
1718
import time
@@ -304,6 +305,8 @@ class DefaultModelLoader(BaseModelLoader):
304305
# default number of thread when enable multithread weight loading
305306
DEFAULT_NUM_THREADS = 8
306307

308+
_MTP_PATTERN = re.compile(r"model\.mtp\.layers\.(\d+)\.")
309+
307310
@dataclasses.dataclass
308311
class Source:
309312
"""A source for weights."""
@@ -351,11 +354,11 @@ def __init__(self, load_config: LoadConfig):
351354

352355
def _maybe_download_from_modelscope(
353356
self, model: str, revision: Optional[str]
354-
) -> Optional[str]:
357+
) -> str:
355358
"""Download model from ModelScope hub if SGLANG_USE_MODELSCOPE is True.
356359
357-
Returns the path to the downloaded model, or None if the model is not
358-
downloaded from ModelScope."""
360+
Returns the path to the downloaded model, or the original model path if
361+
not downloaded from ModelScope."""
359362
if get_bool_env_var("SGLANG_USE_MODELSCOPE"):
360363
# download model from ModelScope hub,
361364
# lazy import so that modelscope is not required for normal use.
@@ -373,17 +376,16 @@ def _maybe_download_from_modelscope(
373376
else:
374377
model_path = model
375378
return model_path
376-
return None
379+
return model
377380

378381
def _prepare_weights(
379382
self, model_name_or_path: str, revision: Optional[str], fall_back_to_pt: bool
380383
) -> Tuple[str, List[str], bool]:
381384
"""Prepare weights for the model.
382385
383386
If the model is not local, it will be downloaded."""
384-
model_name_or_path = (
385-
self._maybe_download_from_modelscope(model_name_or_path, revision)
386-
or model_name_or_path
387+
model_name_or_path = self._maybe_download_from_modelscope(
388+
model_name_or_path, revision
387389
)
388390

389391
is_local = os.path.isdir(model_name_or_path)
@@ -474,6 +476,7 @@ def _get_weights_iterator(
474476
) -> Generator[Tuple[str, torch.Tensor], None, None]:
475477
"""Get an iterator for the model weights based on the load format."""
476478
extra_config = self.load_config.model_loader_extra_config
479+
use_multithread = extra_config.get("enable_multithread_load", False)
477480
hf_folder, hf_weights_files, use_safetensors = self._prepare_weights(
478481
source.model_or_path, source.revision, source.fall_back_to_pt
479482
)
@@ -504,7 +507,7 @@ def _get_weights_iterator(
504507
weights_iterator = fastsafetensors_weights_iterator(
505508
hf_weights_files,
506509
)
507-
elif extra_config.get("enable_multithread_load"):
510+
elif use_multithread:
508511
weights_iterator = multi_thread_safetensors_weights_iterator(
509512
hf_weights_files,
510513
max_workers=extra_config.get(
@@ -518,7 +521,7 @@ def _get_weights_iterator(
518521
)
519522

520523
else:
521-
if extra_config.get("enable_multithread_load"):
524+
if use_multithread:
522525
weights_iterator = multi_thread_pt_weights_iterator(
523526
hf_weights_files,
524527
max_workers=extra_config.get(
@@ -529,28 +532,34 @@ def _get_weights_iterator(
529532
weights_iterator = pt_weights_iterator(hf_weights_files)
530533

531534
if self.load_config.draft_model_idx is not None:
532-
import re
533-
534-
pattern = r"model.mtp.layers.(\d+)."
535-
filtered_weights = []
536-
for name, tensor in weights_iterator:
537-
group = re.match(pattern, name)
538-
if group is not None:
539-
idx = int(group.group(1))
540-
if idx != self.load_config.draft_model_idx:
541-
continue
542-
new_name = name.replace(group.group(), "model.mtp.layers.0.")
543-
else:
544-
new_name = name
545-
filtered_weights.append((source.prefix + new_name, tensor))
546-
return tuple(filtered_weights)
535+
return self._filter_mtp_weights(
536+
weights_iterator, source.prefix, self.load_config.draft_model_idx
537+
)
547538

548539
if self.counter_before_loading_weights == 0.0:
549-
logger.info("Beginning to load weights")
550540
self.counter_before_loading_weights = time.perf_counter()
551541
# Apply the prefix.
552542
return ((source.prefix + name, tensor) for (name, tensor) in weights_iterator)
553543

544+
@classmethod
545+
def _filter_mtp_weights(
546+
cls, weights_iterator, prefix: str, draft_model_idx: int
547+
) -> Tuple[Tuple[str, torch.Tensor], ...]:
548+
"""Filter MTP (Multi-Token Prediction) weights to keep only the
549+
specified draft model layer and remap it to layer 0."""
550+
filtered_weights = []
551+
for name, tensor in weights_iterator:
552+
match = cls._MTP_PATTERN.match(name)
553+
if match is not None:
554+
idx = int(match.group(1))
555+
if idx != draft_model_idx:
556+
continue
557+
new_name = name.replace(match.group(), "model.mtp.layers.0.")
558+
else:
559+
new_name = name
560+
filtered_weights.append((prefix + new_name, tensor))
561+
return tuple(filtered_weights)
562+
554563
def _get_all_weights(
555564
self,
556565
model_config: ModelConfig,
@@ -670,10 +679,6 @@ def load_model(
670679
)
671680

672681
self.counter_after_loading_weights = time.perf_counter()
673-
logger.info(
674-
"Loading weights took %.2f seconds",
675-
self.counter_after_loading_weights - self.counter_before_loading_weights,
676-
)
677682
return model.eval()
678683

679684
@staticmethod

python/sglang/srt/utils/common.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1057,10 +1057,25 @@ def uniform_sample(l, n):
10571057
return frames
10581058

10591059

1060-
def suppress_other_loggers():
1060+
def suppress_noisy_warnings():
1061+
"""Suppress known noisy warnings from third-party libraries."""
10611062
warnings.filterwarnings(
10621063
"ignore", category=UserWarning, message="The given NumPy array is not writable"
10631064
)
1065+
warnings.filterwarnings(
1066+
"ignore",
1067+
message="The cuda.cudart module is deprecated",
1068+
category=FutureWarning,
1069+
)
1070+
warnings.filterwarnings(
1071+
"ignore",
1072+
message="The cuda.nvrtc module is deprecated",
1073+
category=FutureWarning,
1074+
)
1075+
1076+
1077+
def suppress_other_loggers():
1078+
suppress_noisy_warnings()
10641079

10651080
try:
10661081
from vllm.logger import logger as vllm_default_logger

0 commit comments

Comments
 (0)