Unverified Commit 4b657d32 authored by sixgod's avatar sixgod Committed by GitHub
Browse files

[Model] Add cogagent model support vLLM (#11742)


Signed-off-by: default avatarIsotr0py <2037008807@qq.com>
Co-authored-by: default avatarIsotr0py <2037008807@qq.com>
parent d697dc01
# Adapted from # Adapted from
# https://github.com/THUDM/GLM-4 # https://github.com/THUDM/CogAgent
"""Inference-only ChatGLM model compatible with THUDM weights.""" """Inference-only CogAgent model compatible with THUDM weights."""
from argparse import Namespace from argparse import Namespace
from array import array from array import array
from typing import (Dict, Iterable, List, Mapping, Optional, Set, Tuple, from typing import (Dict, Iterable, List, Mapping, Optional, Set, Tuple,
...@@ -201,7 +201,6 @@ def input_processor_for_glmv(ctx: InputContext, inputs: DecoderOnlyInputs): ...@@ -201,7 +201,6 @@ def input_processor_for_glmv(ctx: InputContext, inputs: DecoderOnlyInputs):
new_input_ids = [] new_input_ids = []
final_processed_position = 0 final_processed_position = 0
final_processed_position = 0
for boi_position, eoi_position in zip(boi_positions, eoi_positions): for boi_position, eoi_position in zip(boi_positions, eoi_positions):
assert boi_position < eoi_position assert boi_position < eoi_position
...@@ -275,12 +274,15 @@ class GLMAttention(nn.Module): ...@@ -275,12 +274,15 @@ class GLMAttention(nn.Module):
# https://huggingface.co/THUDM/chatglm3-6b-32k/blob/e210410255278dd9d74463cf396ba559c0ef801c/modeling_chatglm.py#L141 # https://huggingface.co/THUDM/chatglm3-6b-32k/blob/e210410255278dd9d74463cf396ba559c0ef801c/modeling_chatglm.py#L141
rope_ratio = getattr(config, "rope_ratio", 1.0) rope_ratio = getattr(config, "rope_ratio", 1.0)
max_positions = getattr(config, "seq_length", 8192) max_positions = getattr(config, "seq_length", 8192)
# NOTE: THUDM/cogagent-9b-20241220 uses original_rope=False,
# which is equivalent to is_neox_style=True
is_neox_style = not config.original_rope
self.rotary_emb = get_rope( self.rotary_emb = get_rope(
self.head_dim, self.head_dim,
rotary_dim=self.head_dim // 2, rotary_dim=self.head_dim // 2,
max_position=max_positions, max_position=max_positions,
base=10000 * rope_ratio, base=10000 * rope_ratio,
is_neox_style=False, is_neox_style=is_neox_style,
) )
self.attn = Attention(self.num_heads, self.attn = Attention(self.num_heads,
self.head_dim, self.head_dim,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment