Commit df704163 authored by zhuwenwen's avatar zhuwenwen
Browse files

sync v0.15.1 (models)

parent d7db129a
...@@ -732,7 +732,7 @@ class KananaVForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP) ...@@ -732,7 +732,7 @@ class KananaVForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP)
def forward( def forward(
self, self,
input_ids: torch.Tensor | None, input_ids: torch.Tensor,
positions: torch.Tensor, positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None, intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None,
...@@ -755,4 +755,4 @@ class KananaVForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP) ...@@ -755,4 +755,4 @@ class KananaVForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP)
def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
loader = AutoWeightsLoader(self) loader = AutoWeightsLoader(self)
return loader.load_weights(weights) return loader.load_weights(weights)
\ No newline at end of file
...@@ -1438,7 +1438,7 @@ class BaseKeyeModule(nn.Module, SupportsMultiModal): ...@@ -1438,7 +1438,7 @@ class BaseKeyeModule(nn.Module, SupportsMultiModal):
def forward( def forward(
self, self,
input_ids: torch.Tensor | None, input_ids: torch.Tensor,
positions: torch.Tensor, positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None, intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None,
...@@ -1692,4 +1692,4 @@ class KeyeForConditionalGeneration( ...@@ -1692,4 +1692,4 @@ class KeyeForConditionalGeneration(
llm_positions = torch.cat(llm_pos_ids_list, dim=1).reshape(3, -1) llm_positions = torch.cat(llm_pos_ids_list, dim=1).reshape(3, -1)
mrope_position_delta = (llm_positions.max() + 1 - len(input_tokens)).item() mrope_position_delta = (llm_positions.max() + 1 - len(input_tokens)).item()
return llm_positions, mrope_position_delta return llm_positions, mrope_position_delta
\ No newline at end of file
...@@ -506,7 +506,7 @@ class KimiLinearForCausalLM( ...@@ -506,7 +506,7 @@ class KimiLinearForCausalLM(
def forward( def forward(
self, self,
input_ids: torch.Tensor | None, input_ids: torch.Tensor,
positions: torch.Tensor, positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None, intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None,
...@@ -666,4 +666,4 @@ def get_spec_layer_idx_from_weight_name( ...@@ -666,4 +666,4 @@ def get_spec_layer_idx_from_weight_name(
for i in range(config.num_nextn_predict_layers): for i in range(config.num_nextn_predict_layers):
if weight_name.startswith(f"model.layers.{layer_idx + i}."): if weight_name.startswith(f"model.layers.{layer_idx + i}."):
return layer_idx + i return layer_idx + i
return None return None
\ No newline at end of file
...@@ -389,7 +389,7 @@ class KimiVLForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP): ...@@ -389,7 +389,7 @@ class KimiVLForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP):
def forward( def forward(
self, self,
input_ids: torch.Tensor | None, input_ids: torch.Tensor,
positions: torch.Tensor, positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None, intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None,
...@@ -412,4 +412,4 @@ class KimiVLForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP): ...@@ -412,4 +412,4 @@ class KimiVLForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP):
def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]): def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]):
loader = AutoWeightsLoader(self) loader = AutoWeightsLoader(self)
return loader.load_weights(weights) return loader.load_weights(weights)
\ No newline at end of file
...@@ -342,7 +342,7 @@ class Lfm2Model(nn.Module): ...@@ -342,7 +342,7 @@ class Lfm2Model(nn.Module):
def forward( def forward(
self, self,
input_ids: torch.Tensor | None, input_ids: torch.Tensor,
positions: torch.Tensor, positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None, intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None,
...@@ -503,7 +503,7 @@ class Lfm2ForCausalLM( ...@@ -503,7 +503,7 @@ class Lfm2ForCausalLM(
def forward( def forward(
self, self,
input_ids: torch.Tensor | None, input_ids: torch.Tensor,
positions: torch.Tensor, positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None, intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None,
...@@ -523,4 +523,4 @@ class Lfm2ForCausalLM( ...@@ -523,4 +523,4 @@ class Lfm2ForCausalLM(
self, self,
skip_prefixes=(["lm_head."] if self.config.tie_word_embeddings else None), skip_prefixes=(["lm_head."] if self.config.tie_word_embeddings else None),
) )
return loader.load_weights(weights) return loader.load_weights(weights)
\ No newline at end of file
...@@ -457,7 +457,7 @@ class Lfm2MoeModel(nn.Module): ...@@ -457,7 +457,7 @@ class Lfm2MoeModel(nn.Module):
def forward( def forward(
self, self,
input_ids: torch.Tensor | None, input_ids: torch.Tensor,
positions: torch.Tensor, positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None, intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None,
...@@ -730,7 +730,7 @@ class Lfm2MoeForCausalLM( ...@@ -730,7 +730,7 @@ class Lfm2MoeForCausalLM(
def forward( def forward(
self, self,
input_ids: torch.Tensor | None, input_ids: torch.Tensor,
positions: torch.Tensor, positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None, intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None,
...@@ -753,4 +753,4 @@ class Lfm2MoeForCausalLM( ...@@ -753,4 +753,4 @@ class Lfm2MoeForCausalLM(
return loader.load_weights(weights) return loader.load_weights(weights)
def get_expert_mapping(self) -> list[tuple[str, str, int, str]]: def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
return self.model.get_expert_mapping() return self.model.get_expert_mapping()
\ No newline at end of file
...@@ -769,7 +769,7 @@ class Lfm2VLForConditionalGeneration( ...@@ -769,7 +769,7 @@ class Lfm2VLForConditionalGeneration(
def forward( def forward(
self, self,
input_ids: torch.Tensor | None, input_ids: torch.Tensor,
positions: torch.Tensor, positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None, intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None,
...@@ -804,4 +804,4 @@ class Lfm2VLForConditionalGeneration( ...@@ -804,4 +804,4 @@ class Lfm2VLForConditionalGeneration(
language_model="language_model", language_model="language_model",
connector="multi_modal_projector", connector="multi_modal_projector",
tower_model="vision_tower", tower_model="vision_tower",
) )
\ No newline at end of file
...@@ -651,7 +651,7 @@ class LlamaForCausalLM( ...@@ -651,7 +651,7 @@ class LlamaForCausalLM(
def forward( def forward(
self, self,
input_ids: torch.Tensor | None, input_ids: torch.Tensor,
positions: torch.Tensor, positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None, intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None,
......
...@@ -662,7 +662,7 @@ class LlavaForConditionalGeneration( ...@@ -662,7 +662,7 @@ class LlavaForConditionalGeneration(
def forward( def forward(
self, self,
input_ids: torch.Tensor | None, input_ids: torch.Tensor,
positions: torch.Tensor, positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None, intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None,
...@@ -853,4 +853,4 @@ class MantisMultiModalProcessor(LlavaMultiModalProcessor): ...@@ -853,4 +853,4 @@ class MantisMultiModalProcessor(LlavaMultiModalProcessor):
dummy_inputs=LlavaDummyInputsBuilder, dummy_inputs=LlavaDummyInputsBuilder,
) )
class MantisForConditionalGeneration(LlavaForConditionalGeneration): class MantisForConditionalGeneration(LlavaForConditionalGeneration):
pass pass
\ No newline at end of file
...@@ -509,7 +509,7 @@ class LlavaNextForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsP ...@@ -509,7 +509,7 @@ class LlavaNextForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsP
def forward( def forward(
self, self,
input_ids: torch.Tensor | None, input_ids: torch.Tensor,
positions: torch.Tensor, positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None, intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None,
...@@ -577,4 +577,4 @@ model_executor.models.llava_next.LlavaNextProcessingInfo.get_num_image_tokens]. ...@@ -577,4 +577,4 @@ model_executor.models.llava_next.LlavaNextProcessingInfo.get_num_image_tokens].
def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
loader = AutoWeightsLoader(self) loader = AutoWeightsLoader(self)
return loader.load_weights(weights, mapper=self.hf_to_vllm_mapper) return loader.load_weights(weights, mapper=self.hf_to_vllm_mapper)
\ No newline at end of file
...@@ -426,7 +426,7 @@ class LlavaNextVideoForConditionalGeneration(nn.Module, SupportsMultiModal, Supp ...@@ -426,7 +426,7 @@ class LlavaNextVideoForConditionalGeneration(nn.Module, SupportsMultiModal, Supp
def forward( def forward(
self, self,
input_ids: torch.Tensor | None, input_ids: torch.Tensor,
positions: torch.Tensor, positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None, intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None,
...@@ -459,4 +459,4 @@ class LlavaNextVideoForConditionalGeneration(nn.Module, SupportsMultiModal, Supp ...@@ -459,4 +459,4 @@ class LlavaNextVideoForConditionalGeneration(nn.Module, SupportsMultiModal, Supp
# This model doesn't support images for now # This model doesn't support images for now
ignore_unexpected_prefixes=["image_newline"], ignore_unexpected_prefixes=["image_newline"],
) )
return loader.load_weights(weights, mapper=self.hf_to_vllm_mapper) return loader.load_weights(weights, mapper=self.hf_to_vllm_mapper)
\ No newline at end of file
...@@ -887,7 +887,7 @@ class LlavaOnevisionForConditionalGeneration(nn.Module, SupportsMultiModal, Supp ...@@ -887,7 +887,7 @@ class LlavaOnevisionForConditionalGeneration(nn.Module, SupportsMultiModal, Supp
def forward( def forward(
self, self,
input_ids: torch.Tensor | None, input_ids: torch.Tensor,
positions: torch.Tensor, positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None, intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None,
...@@ -916,4 +916,4 @@ class LlavaOnevisionForConditionalGeneration(nn.Module, SupportsMultiModal, Supp ...@@ -916,4 +916,4 @@ class LlavaOnevisionForConditionalGeneration(nn.Module, SupportsMultiModal, Supp
def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
loader = AutoWeightsLoader(self) loader = AutoWeightsLoader(self)
return loader.load_weights(weights, mapper=self.hf_to_vllm_mapper) return loader.load_weights(weights, mapper=self.hf_to_vllm_mapper)
\ No newline at end of file
...@@ -520,7 +520,7 @@ class FlashModel(nn.Module): ...@@ -520,7 +520,7 @@ class FlashModel(nn.Module):
def forward( def forward(
self, self,
input_ids: torch.Tensor | None, input_ids: torch.Tensor,
positions: torch.Tensor, positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None, intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None,
...@@ -605,7 +605,7 @@ class LongcatFlashForCausalLM(nn.Module, SupportsLoRA, SupportsPP): ...@@ -605,7 +605,7 @@ class LongcatFlashForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
def forward( def forward(
self, self,
input_ids: torch.Tensor | None, input_ids: torch.Tensor,
positions: torch.Tensor, positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None, intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None,
...@@ -764,4 +764,4 @@ class LongcatFlashForCausalLM(nn.Module, SupportsLoRA, SupportsPP): ...@@ -764,4 +764,4 @@ class LongcatFlashForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
self_attn.kv_a_layernorm.weight.data *= ( self_attn.kv_a_layernorm.weight.data *= (
self.config.hidden_size / self.config.kv_lora_rank self.config.hidden_size / self.config.kv_lora_rank
) ** 0.5 ) ** 0.5
return loaded_params return loaded_params
\ No newline at end of file
...@@ -150,7 +150,7 @@ class LongCatFlashMTP(nn.Module): ...@@ -150,7 +150,7 @@ class LongCatFlashMTP(nn.Module):
def forward( def forward(
self, self,
input_ids: torch.Tensor | None, input_ids: torch.Tensor,
positions: torch.Tensor, positions: torch.Tensor,
hidden_states: torch.Tensor, hidden_states: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None, intermediate_tensors: IntermediateTensors | None = None,
...@@ -345,4 +345,4 @@ class LongCatFlashMTP(nn.Module): ...@@ -345,4 +345,4 @@ class LongCatFlashMTP(nn.Module):
) -> int | None: ) -> int | None:
if "model.mtp" in weight_name: if "model.mtp" in weight_name:
return config.num_hidden_layers * 2 return config.num_hidden_layers * 2
return None return None
\ No newline at end of file
...@@ -142,7 +142,7 @@ class MambaModel(nn.Module): ...@@ -142,7 +142,7 @@ class MambaModel(nn.Module):
def forward( def forward(
self, self,
input_ids: torch.Tensor | None, input_ids: torch.Tensor,
positions: torch.Tensor, positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None, intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None,
...@@ -225,7 +225,7 @@ class MambaForCausalLM( ...@@ -225,7 +225,7 @@ class MambaForCausalLM(
def forward( def forward(
self, self,
input_ids: torch.Tensor | None, input_ids: torch.Tensor,
positions: torch.Tensor, positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None, intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None,
...@@ -279,4 +279,4 @@ class MambaForCausalLM( ...@@ -279,4 +279,4 @@ class MambaForCausalLM(
def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
loader = AutoWeightsLoader(self) loader = AutoWeightsLoader(self)
return loader.load_weights(weights) return loader.load_weights(weights)
\ No newline at end of file
...@@ -137,7 +137,7 @@ class Mamba2Model(nn.Module): ...@@ -137,7 +137,7 @@ class Mamba2Model(nn.Module):
def forward( def forward(
self, self,
input_ids: torch.Tensor | None, input_ids: torch.Tensor,
positions: torch.Tensor, positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None, intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None,
...@@ -267,7 +267,7 @@ class Mamba2ForCausalLM( ...@@ -267,7 +267,7 @@ class Mamba2ForCausalLM(
def forward( def forward(
self, self,
input_ids: torch.Tensor | None, input_ids: torch.Tensor,
positions: torch.Tensor, positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None, intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None,
...@@ -291,4 +291,4 @@ class Mamba2ForCausalLM( ...@@ -291,4 +291,4 @@ class Mamba2ForCausalLM(
def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
loader = AutoWeightsLoader(self) loader = AutoWeightsLoader(self)
return loader.load_weights(weights) return loader.load_weights(weights)
\ No newline at end of file
...@@ -2,8 +2,6 @@ ...@@ -2,8 +2,6 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import os import os
from typing import Iterable, List, Optional, Set, Tuple, Any, Dict
from collections.abc import Iterable from collections.abc import Iterable
import torch import torch
...@@ -20,8 +18,6 @@ from .utils import maybe_prefix ...@@ -20,8 +18,6 @@ from .utils import maybe_prefix
from vllm import _custom_ops as ops from vllm import _custom_ops as ops
TOPK=10 # topk for sparse tree (10 is a placeholder and it is sufficient)
class ResidualBlock(nn.Module): class ResidualBlock(nn.Module):
def __init__(self, config: VllmConfig, hidden_size: int, num_layers: int) -> None: def __init__(self, config: VllmConfig, hidden_size: int, num_layers: int) -> None:
......
...@@ -796,7 +796,7 @@ class MiDashengLMModel(nn.Module, SupportsMultiModal, SupportsPP): ...@@ -796,7 +796,7 @@ class MiDashengLMModel(nn.Module, SupportsMultiModal, SupportsPP):
def forward( def forward(
self, self,
input_ids: torch.Tensor | None, input_ids: torch.Tensor,
positions: torch.Tensor, positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None, intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None,
...@@ -820,4 +820,4 @@ class MiDashengLMModel(nn.Module, SupportsMultiModal, SupportsPP): ...@@ -820,4 +820,4 @@ class MiDashengLMModel(nn.Module, SupportsMultiModal, SupportsPP):
def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
loader = AutoWeightsLoader(self) loader = AutoWeightsLoader(self)
return loader.load_weights(weights) return loader.load_weights(weights)
\ No newline at end of file
...@@ -61,7 +61,7 @@ logger = init_logger(__name__) ...@@ -61,7 +61,7 @@ logger = init_logger(__name__)
class MiMoModel(Qwen2Model): class MiMoModel(Qwen2Model):
def forward( def forward(
self, self,
input_ids: torch.Tensor | None, input_ids: torch.Tensor,
positions: torch.Tensor, positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None, intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None,
...@@ -185,4 +185,4 @@ class MiMoForCausalLM(Qwen2ForCausalLM, nn.Module): ...@@ -185,4 +185,4 @@ class MiMoForCausalLM(Qwen2ForCausalLM, nn.Module):
) -> torch.Tensor | None: ) -> torch.Tensor | None:
hidden_states = self.model.norm(hidden_states) hidden_states = self.model.norm(hidden_states)
logits = self.logits_processor(self.lm_head, hidden_states) logits = self.logits_processor(self.lm_head, hidden_states)
return logits return logits
\ No newline at end of file
...@@ -169,7 +169,7 @@ class MiMoMTP(nn.Module): ...@@ -169,7 +169,7 @@ class MiMoMTP(nn.Module):
def forward( def forward(
self, self,
input_ids: torch.Tensor | None, input_ids: torch.Tensor,
positions: torch.Tensor, positions: torch.Tensor,
hidden_states: torch.Tensor, hidden_states: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None, intermediate_tensors: IntermediateTensors | None = None,
...@@ -291,4 +291,4 @@ class MiMoMTP(nn.Module): ...@@ -291,4 +291,4 @@ class MiMoMTP(nn.Module):
name = name.replace( name = name.replace(
f"model.layers.{spec_layer}.", f"model.layers.{spec_layer}.mtp_block." f"model.layers.{spec_layer}.", f"model.layers.{spec_layer}.mtp_block."
) )
return name return name
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment