Unverified Commit 3c93187c authored by TianyiQ's avatar TianyiQ Committed by GitHub
Browse files

Add support for tie_word_embeddings when loading weights + support for SmolLM (#1508)

parent fb2d0680
......@@ -263,6 +263,7 @@ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct
- BaiChuan2
- MiniCPM / MiniCPM 3
- XVERSE / XVERSE MoE
- SmolLM
**Embedding Models**
......
......@@ -403,6 +403,14 @@ class LlamaForCausalLM(nn.Module):
weight_loader = getattr(param, "weight_loader", default_weight_loader)
weight_loader(param, loaded_weight)
if (
hasattr(self.config, "tie_word_embeddings")
and self.config.tie_word_embeddings
):
# Tie output embedding layer to input embedding layer, to solve issues where lm_head.weight is missing
param = self.lm_head.weight
weight_loader = getattr(param, "weight_loader", default_weight_loader)
weight_loader(param, self.model.embed_tokens.weight)
apply_torchao_config_(self, params_dict, set(["proj.weight"]))
......
......@@ -51,6 +51,7 @@ CI_MODELS = [
# All other models
ALL_OTHER_MODELS = [
ModelCase("Qwen/Qwen2-1.5B"),
ModelCase("HuggingFaceTB/SmolLM-135M-Instruct"),
]
TORCH_DTYPES = [torch.float16]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment