Unverified Commit ba45bedf authored by hujiaxin0's avatar hujiaxin0 Committed by GitHub
Browse files

[model] Add support for openPangu7B-VL (#32449)


Signed-off-by: default avatarhujiaxin <524446785@qq.com>
Signed-off-by: default avatarEmilie1001 <79921183+Emilie1001@users.noreply.github.com>
Co-authored-by: default avatarEmilie1001 <79921183+Emilie1001@users.noreply.github.com>
parent 9432ed8c
...@@ -705,6 +705,7 @@ These models primarily accept the [`LLM.generate`](./generative_models.md#llmgen ...@@ -705,6 +705,7 @@ These models primarily accept the [`LLM.generate`](./generative_models.md#llmgen
| `Molmo2ForConditionalGeneration` | Molmo2 | T + I<sup>+</sup> / V | `allenai/Molmo2-4B`, `allenai/Molmo2-8B`, `allenai/Molmo2-O-7B` | ✅︎ | ✅︎ | | `Molmo2ForConditionalGeneration` | Molmo2 | T + I<sup>+</sup> / V | `allenai/Molmo2-4B`, `allenai/Molmo2-8B`, `allenai/Molmo2-O-7B` | ✅︎ | ✅︎ |
| `NVLM_D_Model` | NVLM-D 1.0 | T + I<sup>+</sup> | `nvidia/NVLM-D-72B`, etc. | | ✅︎ | | `NVLM_D_Model` | NVLM-D 1.0 | T + I<sup>+</sup> | `nvidia/NVLM-D-72B`, etc. | | ✅︎ |
| `OpenCUAForConditionalGeneration` | OpenCUA-7B | T + I<sup>E+</sup> | `xlangai/OpenCUA-7B` | ✅︎ | ✅︎ | | `OpenCUAForConditionalGeneration` | OpenCUA-7B | T + I<sup>E+</sup> | `xlangai/OpenCUA-7B` | ✅︎ | ✅︎ |
| `OpenPanguVLForConditionalGeneration` | openpangu-VL | T + I<sup>E+</sup> + V<sup>E+</sup> |`FreedomIntelligence/openPangu-VL-7B` | ✅︎ | ✅︎ |
| `Ovis` | Ovis2, Ovis1.6 | T + I<sup>+</sup> | `AIDC-AI/Ovis2-1B`, `AIDC-AI/Ovis1.6-Llama3.2-3B`, etc. | | ✅︎ | | `Ovis` | Ovis2, Ovis1.6 | T + I<sup>+</sup> | `AIDC-AI/Ovis2-1B`, `AIDC-AI/Ovis1.6-Llama3.2-3B`, etc. | | ✅︎ |
| `Ovis2_5` | Ovis2.5 | T + I<sup>+</sup> + V | `AIDC-AI/Ovis2.5-9B`, etc. | | | | `Ovis2_5` | Ovis2.5 | T + I<sup>+</sup> + V | `AIDC-AI/Ovis2.5-9B`, etc. | | |
| `PaddleOCRVLForConditionalGeneration` | Paddle-OCR | T + I<sup>+</sup> | `PaddlePaddle/PaddleOCR-VL`, etc. | | | | `PaddleOCRVLForConditionalGeneration` | Paddle-OCR | T + I<sup>+</sup> | `PaddlePaddle/PaddleOCR-VL`, etc. | | |
......
...@@ -1394,6 +1394,37 @@ def run_nvlm_d(questions: list[str], modality: str) -> ModelRequestData: ...@@ -1394,6 +1394,37 @@ def run_nvlm_d(questions: list[str], modality: str) -> ModelRequestData:
) )
# OpenPangu
def run_openpangu_vl(questions: list[str], modality: str) -> ModelRequestData:
model_name = "FreedomIntelligence/openPangu-VL-7B"
engine_args = EngineArgs(
model=model_name,
max_model_len=4096,
max_num_seqs=4,
trust_remote_code=True,
enforce_eager=True,
limit_mm_per_prompt={modality: 1},
)
if modality == "image":
placeholder = "[unused19]"
elif modality == "video":
placeholder = "[unused32]"
prompts = [
(
f"<s>[unused9]系统:[unused10][unused9]用户:[unused18]{placeholder}[unused20]{question}[unused10][unused9]助手:"
)
for question in questions
]
return ModelRequestData(
engine_args=engine_args,
prompts=prompts,
)
# Ovis # Ovis
def run_ovis(questions: list[str], modality: str) -> ModelRequestData: def run_ovis(questions: list[str], modality: str) -> ModelRequestData:
assert modality == "image" assert modality == "image"
...@@ -2051,6 +2082,7 @@ model_example_map = { ...@@ -2051,6 +2082,7 @@ model_example_map = {
"molmo2": run_molmo2, "molmo2": run_molmo2,
"nemotron_vl": run_nemotron_vl, "nemotron_vl": run_nemotron_vl,
"NVLM_D": run_nvlm_d, "NVLM_D": run_nvlm_d,
"openpangu_vl": run_openpangu_vl,
"ovis": run_ovis, "ovis": run_ovis,
"ovis2_5": run_ovis2_5, "ovis2_5": run_ovis2_5,
"paddleocr_vl": run_paddleocr_vl, "paddleocr_vl": run_paddleocr_vl,
......
...@@ -765,6 +765,32 @@ def load_nvlm_d(question: str, image_urls: list[str]) -> ModelRequestData: ...@@ -765,6 +765,32 @@ def load_nvlm_d(question: str, image_urls: list[str]) -> ModelRequestData:
) )
# OpenPangu
def load_openpangu_vl(question: str, image_urls: list[str]) -> ModelRequestData:
model_name = "FreedomIntelligence/openPangu-VL-7B"
engine_args = EngineArgs(
model=model_name,
trust_remote_code=True,
max_model_len=8192,
max_num_seqs=2,
enforce_eager=True,
limit_mm_per_prompt={"image": len(image_urls)},
)
placeholders = "[unused18][unused19][unused20]" * len(image_urls)
prompt = (
f"<s>[unused9]系统:[unused10][unused9]用户:{question}{placeholders}"
"[unused10][unused9]助手:"
)
return ModelRequestData(
engine_args=engine_args,
prompt=prompt,
image_data=[fetch_image(url) for url in image_urls],
)
# Ovis # Ovis
def load_ovis(question: str, image_urls: list[str]) -> ModelRequestData: def load_ovis(question: str, image_urls: list[str]) -> ModelRequestData:
model_name = "AIDC-AI/Ovis2-1B" model_name = "AIDC-AI/Ovis2-1B"
...@@ -1388,6 +1414,7 @@ model_example_map = { ...@@ -1388,6 +1414,7 @@ model_example_map = {
"mistral3": load_mistral3, "mistral3": load_mistral3,
"molmo2": load_molmo2, "molmo2": load_molmo2,
"NVLM_D": load_nvlm_d, "NVLM_D": load_nvlm_d,
"openpangu_vl": load_openpangu_vl,
"ovis": load_ovis, "ovis": load_ovis,
"ovis2_5": load_ovis2_5, "ovis2_5": load_ovis2_5,
"paddleocr_vl": load_paddleocr_vl, "paddleocr_vl": load_paddleocr_vl,
......
...@@ -873,6 +873,12 @@ _MULTIMODAL_EXAMPLE_MODELS = { ...@@ -873,6 +873,12 @@ _MULTIMODAL_EXAMPLE_MODELS = {
"OpenCUAForConditionalGeneration": _HfExamplesInfo( "OpenCUAForConditionalGeneration": _HfExamplesInfo(
"xlangai/OpenCUA-7B", trust_remote_code=True "xlangai/OpenCUA-7B", trust_remote_code=True
), ),
"OpenPanguVLForConditionalGeneration": _HfExamplesInfo(
"FreedomIntelligence/openPangu-VL-7B",
trust_remote_code=True,
max_model_len=4096,
enforce_eager=True,
),
"Ovis": _HfExamplesInfo( "Ovis": _HfExamplesInfo(
"AIDC-AI/Ovis2-1B", "AIDC-AI/Ovis2-1B",
trust_remote_code=True, trust_remote_code=True,
......
...@@ -15,6 +15,7 @@ from .linear_scaling_rope import LinearScalingRotaryEmbedding ...@@ -15,6 +15,7 @@ from .linear_scaling_rope import LinearScalingRotaryEmbedding
from .llama3_rope import Llama3RotaryEmbedding from .llama3_rope import Llama3RotaryEmbedding
from .llama4_vision_rope import Llama4VisionRotaryEmbedding from .llama4_vision_rope import Llama4VisionRotaryEmbedding
from .mrope import MRotaryEmbedding from .mrope import MRotaryEmbedding
from .mrope_interleaved import MRotaryEmbeddingInterleaved
from .ntk_scaling_rope import NTKScalingRotaryEmbedding from .ntk_scaling_rope import NTKScalingRotaryEmbedding
from .phi3_long_rope_scaled_rope import Phi3LongRoPEScaledRotaryEmbedding from .phi3_long_rope_scaled_rope import Phi3LongRoPEScaledRotaryEmbedding
from .xdrope import XDRotaryEmbedding from .xdrope import XDRotaryEmbedding
...@@ -283,6 +284,21 @@ def get_rope( ...@@ -283,6 +284,21 @@ def get_rope(
long_factor, long_factor,
**extra_kwargs, **extra_kwargs,
) )
elif scaling_type == "openpangu":
mrope_interleaved = rope_parameters.get("mrope_interleaved", False)
if "mrope_section" in rope_parameters and mrope_interleaved:
rotary_emb = MRotaryEmbeddingInterleaved(
head_size,
rotary_dim,
max_position,
base,
is_neox_style,
dtype,
mrope_section=rope_parameters["mrope_section"],
mrope_interleaved=mrope_interleaved,
)
else:
raise ValueError("Pangu mrope lacks necessary parameters.")
else: else:
raise ValueError(f"Unknown RoPE scaling type {scaling_type}") raise ValueError(f"Unknown RoPE scaling type {scaling_type}")
_ROPE_DICT[key] = rotary_emb _ROPE_DICT[key] = rotary_emb
......
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
#
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
# Adapted from vllm/model_executor/layers/rotary_embedding/__init__.py
# Copyright 2023 The vLLM team.
#
# This file is a part of the vllm-ascend project.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import torch
from .mrope import MRotaryEmbedding
# MRotaryEmbedding with interleaved
class MRotaryEmbeddingInterleaved(MRotaryEmbedding):
"""Rotary Embedding with Multimodal Sections and Interleaved Support."""
def __init__(
self,
head_size: int,
rotary_dim: int,
max_position_embeddings: int,
base: float,
is_neox_style: bool,
dtype: torch.dtype,
mrope_section: list[int],
mrope_interleaved: bool = True,
) -> None:
# Enlarge max_position_embeddings for video inputs
self.cache_max_position_num = max_position_embeddings
super().__init__(
head_size,
rotary_dim,
self.cache_max_position_num,
base,
is_neox_style,
dtype,
)
self.mrope_section = mrope_section
self.mrope_interleaved = mrope_interleaved
if self.mrope_section is None:
raise ValueError("mrope_section cannot be None.")
if sum(self.mrope_section) != rotary_dim // 2:
raise ValueError("Sum of mrope_section must equal rotary_dim // 2.")
if not self.mrope_interleaved:
raise ValueError(
"mrope_interleaved must be True when mrope_section is provided."
)
# Generate interleaved indices
if len(mrope_section) == 2:
h_num, w_num = mrope_section[0], mrope_section[1]
mrope_dim = self.get_mrope_interleaved_id_list(h_num, w_num, 0)
elif len(mrope_section) == 3:
t_num, h_num, w_num = mrope_section[0], mrope_section[1], mrope_section[2]
mrope_dim = self.get_mrope_interleaved_id_list(
t_num, h_num, w_num, force_last=True
)
else:
raise AssertionError(
"Cannot support the length of mrope section is not 2 or 3."
)
mrope_dim = mrope_dim * 2
self.mrope_dim = mrope_dim
self.layer_cache = None
def _rebuild_pos_emb(
self,
positions: torch.Tensor,
) -> tuple[torch.Tensor, torch.Tensor]:
"""Interleave the rotary embedding"""
cos_sin = self.cos_sin_cache[positions]
mrope_section_3d = [1] * len(self.mrope_dim)
mrope_dim = self.mrope_dim
cos_sin = torch.cat(
[
m[mrope_dim[i]]
for i, m in enumerate(cos_sin.split(mrope_section_3d, dim=-1))
],
dim=-1,
)
return cos_sin, torch.arange(cos_sin.shape[0], device=positions.device)
def forward(
self,
positions: torch.Tensor,
query: torch.Tensor,
key: torch.Tensor | None = None,
) -> tuple[torch.Tensor, torch.Tensor | None]:
"""Forward pass with interleaved rotary embedding."""
cos_sin, positions = self._rebuild_pos_emb(positions)
cos, sin = cos_sin.chunk(2, dim=-1)
query_shape = query.shape
positions = positions.flatten()
num_tokens = positions.shape[0]
query = query.view(num_tokens, -1, self.head_size)
query_rot = query[..., : self.rotary_dim]
query_pass = query[..., self.rotary_dim :]
query_rot = self.apply_rotary_emb.forward_native(
query_rot,
cos,
sin,
)
query = torch.cat((query_rot, query_pass), dim=-1).reshape(query_shape)
# key may be None in some cases, e.g. cross-layer KV sharing
if key is not None:
key_shape = key.shape
key = key.view(num_tokens, -1, self.head_size)
key_rot = key[..., : self.rotary_dim]
key_pass = key[..., self.rotary_dim :]
key_rot = self.apply_rotary_emb.forward_native(
key_rot,
cos,
sin,
)
key = torch.cat((key_rot, key_pass), dim=-1).reshape(key_shape)
return query, key
@staticmethod
def get_mrope_interleaved_id_list(
a: int, b: int, c: int, force_last: bool = False
) -> list[int]:
"""
Generate an interleaved list of indices for multi-modal rotary embedding.
Args:
a: Number of indices for first modality
b: Number of indices for second modality
c: Number of indices for third modality
force_last: Whether to force the last element to be from the first modality
Returns:
List of interleaved indices
"""
if force_last:
a -= 1
counts = {0: a, 1: b, 2: c}
placed = {k: 0 for k in counts}
rem = counts.copy()
seq: list[int] = []
last = None
total = a + b + c
for _ in range(total):
# Candidates: remaining > 0 and ≠ last
cands = [k for k in rem if rem[k] > 0 and k != last]
if not cands:
# If only last remains, relax the condition
cands = [k for k in rem if rem[k] > 0]
# Select the rarest candidate
try:
best = min(cands, key=lambda k: (placed[k] / counts[k], k))
except KeyError:
best = 0
seq.append(best)
placed[best] += 1
rem[best] -= 1
last = best
if force_last:
seq.append(0)
return seq
...@@ -537,10 +537,16 @@ class OpenPanguEmbeddedAttention(nn.Module): ...@@ -537,10 +537,16 @@ class OpenPanguEmbeddedAttention(nn.Module):
if is_gguf and config.model_type == "PanguEmbedded": if is_gguf and config.model_type == "PanguEmbedded":
is_neox_style = False is_neox_style = False
rope_parameters = config.rope_parameters or {}
if rope_parameters is not None and rope_parameters.get(
"mrope_interleaved", False
):
rope_parameters["rope_type"] = "openpangu"
self.rotary_emb = get_rope( self.rotary_emb = get_rope(
self.head_dim, self.head_dim,
max_position=self.max_position_embeddings, max_position=self.max_position_embeddings,
rope_parameters=config.rope_parameters, rope_parameters=rope_parameters,
is_neox_style=is_neox_style, is_neox_style=is_neox_style,
) )
......
This diff is collapsed.
...@@ -405,6 +405,10 @@ _MULTIMODAL_MODELS = { ...@@ -405,6 +405,10 @@ _MULTIMODAL_MODELS = {
"MolmoForCausalLM": ("molmo", "MolmoForCausalLM"), "MolmoForCausalLM": ("molmo", "MolmoForCausalLM"),
"Molmo2ForConditionalGeneration": ("molmo2", "Molmo2ForConditionalGeneration"), "Molmo2ForConditionalGeneration": ("molmo2", "Molmo2ForConditionalGeneration"),
"NVLM_D": ("nvlm_d", "NVLM_D_Model"), "NVLM_D": ("nvlm_d", "NVLM_D_Model"),
"OpenPanguVLForConditionalGeneration": (
"openpangu_vl",
"OpenPanguVLForConditionalGeneration",
),
"Ovis": ("ovis", "Ovis"), "Ovis": ("ovis", "Ovis"),
"Ovis2_5": ("ovis2_5", "Ovis2_5"), "Ovis2_5": ("ovis2_5", "Ovis2_5"),
"PaddleOCRVLForConditionalGeneration": ( "PaddleOCRVLForConditionalGeneration": (
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment