fix: correct weight loading prefix mapping for Qwen3-VL (sgl-project#18024)

Lollipop · liuxiaoming · JustinTong0323 · sfiisf · commit 60defb430425 · 2026-02-05T11:52:27.000+08:00
Co-authored-by: liuxiaoming &lt;liuxiaoming@modelbest.cn&gt;
Co-authored-by: Xinyuan Tong &lt;115166877+JustinTong0323@users.noreply.github.com&gt;
diff --git a/python/sglang/srt/models/qwen3_vl.py b/python/sglang/srt/models/qwen3_vl.py
@@ -959,7 +959,13 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
                 name = name.replace(r"model.language_model.", r"model.")
             layer_id = get_layer_id(name)
 
-            if self.pp_group.is_last_rank and "model.embed_tokens.weight" in name:
+            # Only copy embed_tokens to lm_head when tie_word_embeddings=True
+            # For models with tie_word_embeddings=False (e.g. 8B), lm_head has independent weights
+            if (
+                self.pp_group.is_last_rank
+                and "model.embed_tokens.weight" in name
+                and self.config.tie_word_embeddings
+            ):
                 if "lm_head.weight" in params_dict:
                     lm_head_param = params_dict["lm_head.weight"]
                     weight_loader = getattr(