Unverified Commit a7b3e330 authored by Woosuk Kwon's avatar Woosuk Kwon Committed by GitHub
Browse files

[Fix] Fix RoPE in ChatGLM-32K (#1841)

parent e19a64c7
......@@ -75,12 +75,17 @@ class GLMAttention(nn.Module):
linear_method=linear_method,
)
# https://huggingface.co/THUDM/chatglm3-6b-32k/blob/e210410255278dd9d74463cf396ba559c0ef801c/modeling_chatglm.py#L141
rope_ratio = getattr(config, "rope_ratio", 1.0)
max_positions = getattr(config, "seq_length", 8192)
self.attn = PagedAttentionWithRoPE(
self.num_heads,
self.head_dim,
self.scaling,
rotary_dim=self.head_dim // 2,
num_kv_heads=self.num_kv_heads,
max_position=max_positions,
base=10000 * rope_ratio,
is_neox_style=False,
)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment