Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions python/sglang/srt/layers/quantization/modelopt_quant.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
CombineInput,
StandardDispatchOutput,
)
from sglang.srt.models.utils import WeightsMapper

fp4_quantize = None
try:
Expand Down Expand Up @@ -308,6 +309,22 @@ def get_config_filenames(cls) -> List[str]:
def get_scaled_act_names(self) -> List[str]:
return []

def apply_weight_name_mapper(
self, hf_to_sglang_mapper: "WeightsMapper"
): # noqa: B027
# Map excluded module patterns from HF layout to sglang layout.
# Ref: HF hf_quant_config.json for nvidia/Kimi-K2.5-NVFP4
# https://huggingface.co/nvidia/Kimi-K2.5-NVFP4/blob/main/hf_quant_config.json
if self.exclude_modules:
mapped = hf_to_sglang_mapper.apply_list(self.exclude_modules)
expanded: List[str] = []
for name in mapped:
expanded.append(name)
if name.startswith("language_model."):
expanded.append(name.removeprefix("language_model."))
# Preserve order, drop duplicates.
self.exclude_modules = list(dict.fromkeys(expanded))


class ModelOptFp8Config(ModelOptQuantConfig):
"""Configuration for ModelOpt FP8 quantization, including serialization and compatibility checks."""
Expand Down
13 changes: 13 additions & 0 deletions python/sglang/srt/models/kimi_k25.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from sglang.srt.model_loader.weight_utils import default_weight_loader
from sglang.srt.models.deepseek_v2 import DeepseekV3ForCausalLM
from sglang.srt.models.kimi_vl_moonvit import MLP2
from sglang.srt.models.utils import WeightsMapper
from sglang.srt.utils import add_prefix

KIMIV_VT_INFER_MAX_PATCH_NUM = 16328
Expand Down Expand Up @@ -643,6 +644,15 @@ def vision_tower_forward_auto(


class KimiK25ForConditionalGeneration(nn.Module):
# Support nvidia/Kimi-K2.5-NVFP4 naming: language_model.layers.*.
# Ref: HF config.json for nvidia/Kimi-K2.5-NVFP4
# https://huggingface.co/nvidia/Kimi-K2.5-NVFP4/blob/main/config.json
hf_to_sglang_mapper = WeightsMapper(
orig_to_new_prefix={
"language_model.layers.": "language_model.model.layers.",
}
)

def __init__(
self,
config: KimiK25Config,
Expand Down Expand Up @@ -710,6 +720,9 @@ def forward(

def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
"""Load weights for the model, separating vision and language weights"""
mapper = getattr(self, "hf_to_sglang_mapper", None)
if mapper is not None:
weights = mapper.apply(weights)
weights = list(weights)

# Separate vision tower weights and language model weights
Expand Down
Loading