"...git@developer.sourcefind.cn:2222/OpenDAS/vllm_cscc.git" did not exist on "57e9bf18642a391e918400a5afc7c01221635698"
Commit 7f3dec12 authored by zhuwenwen's avatar zhuwenwen
Browse files

[Bugfix] Fix GLM rotary_dim issue and support v1

Fix glm4.py residual bug
parent b956dfd6
...@@ -3,13 +3,13 @@ ...@@ -3,13 +3,13 @@
from vllm.config import VllmConfig from vllm.config import VllmConfig
from vllm.model_executor.models.llama import LlamaForCausalLM from vllm.model_executor.models.llama import LlamaForCausalLM
from .interfaces import SupportsV0Only
from .utils import PPMissingLayer from .utils import PPMissingLayer
class GlmForCausalLM(LlamaForCausalLM, SupportsV0Only): class GlmForCausalLM(LlamaForCausalLM):
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
vllm_config.model_config.hf_config.partial_rotary_factor = 0.5
super().__init__(vllm_config=vllm_config, prefix=prefix) super().__init__(vllm_config=vllm_config, prefix=prefix)
# Hack Llama model to fit HF format GLM implementation # Hack Llama model to fit HF format GLM implementation
# Attention difference between GLM and Llama: # Attention difference between GLM and Llama:
...@@ -17,7 +17,6 @@ class GlmForCausalLM(LlamaForCausalLM, SupportsV0Only): ...@@ -17,7 +17,6 @@ class GlmForCausalLM(LlamaForCausalLM, SupportsV0Only):
# 2. There is no bias for o_proj in attention # 2. There is no bias for o_proj in attention
for layer in self.model.layers: for layer in self.model.layers:
if not isinstance(layer, PPMissingLayer): if not isinstance(layer, PPMissingLayer):
layer.self_attn.rotary_emb.rotary_dim //= 2
layer.self_attn.rotary_emb.is_neox_style = False layer.self_attn.rotary_emb.is_neox_style = False
layer.self_attn.o_proj.bias = None layer.self_attn.o_proj.bias = None
layer.self_attn.o_proj.skip_bias_add = True layer.self_attn.o_proj.skip_bias_add = True
...@@ -200,8 +200,8 @@ class Glm4DecoderLayer(nn.Module): ...@@ -200,8 +200,8 @@ class Glm4DecoderLayer(nn.Module):
hidden_states = self.post_self_attn_layernorm(hidden_states) hidden_states = self.post_self_attn_layernorm(hidden_states)
# Fully Connected # Fully Connected
hidden_states, residual = self.post_attention_layernorm( residual = hidden_states
hidden_states, residual) hidden_states = self.post_attention_layernorm(hidden_states)
hidden_states = self.mlp(hidden_states) hidden_states = self.mlp(hidden_states)
hidden_states = self.post_mlp_layernorm(hidden_states) hidden_states = self.post_mlp_layernorm(hidden_states)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment