Commit c80f5968 authored by 王敏's avatar 王敏
Browse files

Merge remote-tracking branch 'origin/v0.15.1-dev' into v0.15.1-dev

# Conflicts:
#	vllm/model_executor/layers/fused_moe/config.py
#	vllm/model_executor/layers/fused_moe/layer.py
#	vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_marlin.py
parents 74306deb 530e785f
......@@ -44,10 +44,6 @@ else:
_ProcessorFactories = object
IntermediateTensors = object
if TYPE_CHECKING:
from vllm.config import LoRAConfig, MultiModalConfig, SchedulerConfig
from vllm.sequence import IntermediateTensors
logger = init_logger(__name__)
MultiModalEmbeddings: TypeAlias = list[Tensor] | Tensor | tuple[Tensor, ...]
......@@ -607,8 +603,6 @@ class SupportsPP(Protocol):
def forward(
self,
input_ids: Tensor | None,
positions: Tensor,
*,
intermediate_tensors: IntermediateTensors | None,
) -> IntermediateTensors | None:
......@@ -637,8 +631,6 @@ class _SupportsPPType(Protocol):
def forward(
self,
input_ids: Tensor | None,
positions: Tensor,
*,
intermediate_tensors: IntermediateTensors | None,
) -> Tensor | IntermediateTensors: ...
......@@ -1339,4 +1331,4 @@ def supports_xdrope(model: object) -> TypeIs[SupportsXDRoPE]: ...
def supports_xdrope(
model: type[object] | object,
) -> TypeIs[type[SupportsXDRoPE]] | TypeIs[SupportsXDRoPE]:
return isinstance(model, SupportsXDRoPE)
return isinstance(model, SupportsXDRoPE)
\ No newline at end of file
......@@ -33,8 +33,6 @@ from vllm.model_executor.layers.linear import (
)
from vllm.model_executor.layers.quantization import QuantizationConfig
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
import vllm.envs as envs
from .vision import run_dp_sharded_vision_model
......@@ -457,4 +455,4 @@ class InternVisionModel(nn.Module):
weight_loader = getattr(param, "weight_loader", default_weight_loader)
weight_loader(param, loaded_weight)
loaded_params.add(name)
return loaded_params
return loaded_params
\ No newline at end of file
......@@ -284,7 +284,7 @@ class InternLM2Model(nn.Module):
def forward(
self,
input_ids: torch.Tensor | None,
input_ids: torch.Tensor,
positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None,
......@@ -350,7 +350,7 @@ class InternLM2ForCausalLM(nn.Module, SupportsPP, SupportsLoRA):
def forward(
self,
input_ids: torch.Tensor | None,
input_ids: torch.Tensor,
positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None,
inputs_embeds: torch.Tensor | None = None,
......@@ -446,7 +446,7 @@ class InternLM2ForRewardModel(InternLM2ForCausalLM):
def forward(
self,
input_ids: torch.Tensor | None,
input_ids: torch.Tensor,
positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None,
......@@ -456,4 +456,4 @@ class InternLM2ForRewardModel(InternLM2ForCausalLM):
)
hidden_states = hidden_states.to(self.head_dtype)
logits = self.v_head(hidden_states)
return logits
return logits
\ No newline at end of file
......@@ -101,7 +101,7 @@ class InternLM2VEModel(InternLM2Model):
def forward(
self,
input_ids: torch.Tensor | None,
input_ids: torch.Tensor,
positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None,
......@@ -136,4 +136,4 @@ class InternLM2VEForCausalLM(InternLM2ForCausalLM):
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
super().__init__(
vllm_config=vllm_config, prefix=prefix, model_type=InternLM2VEModel
)
)
\ No newline at end of file
......@@ -782,7 +782,7 @@ class InternS1ForConditionalGeneration(
def forward(
self,
input_ids: torch.Tensor | None,
input_ids: torch.Tensor,
positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None,
......@@ -819,4 +819,4 @@ class InternS1ForConditionalGeneration(
language_model="language_model",
connector="multi_modal_projector",
tower_model="vision_tower",
)
)
\ No newline at end of file
......@@ -1371,7 +1371,7 @@ class InternVLChatModel(nn.Module, SupportsMultiModal, SupportsPP, SupportsLoRA)
def forward(
self,
input_ids: torch.Tensor | None,
input_ids: torch.Tensor,
positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None,
......@@ -1442,4 +1442,4 @@ class InternVLChatModel(nn.Module, SupportsMultiModal, SupportsPP, SupportsLoRA)
return 0
num_patches = num_vision_tokens // (self.patch_tokens + 1)
return num_patches * self.num_image_token
return num_patches * self.num_image_token
\ No newline at end of file
......@@ -438,7 +438,7 @@ class IQuestLoopCoderModel(nn.Module):
def forward(
self,
input_ids: torch.Tensor | None,
input_ids: torch.Tensor,
positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None,
......@@ -570,7 +570,7 @@ class IQuestLoopCoderForCausalLM(nn.Module):
def forward(
self,
input_ids: torch.Tensor | None,
input_ids: torch.Tensor,
positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None,
......@@ -592,4 +592,4 @@ class IQuestLoopCoderForCausalLM(nn.Module):
self,
skip_prefixes=(["lm_head."] if self.config.tie_word_embeddings else None),
)
return loader.load_weights(weights)
return loader.load_weights(weights)
\ No newline at end of file
......@@ -1450,7 +1450,7 @@ class IsaacForConditionalGeneration(
def forward(
self,
input_ids: torch.Tensor | None,
input_ids: torch.Tensor,
positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None,
......@@ -1479,4 +1479,4 @@ class IsaacForConditionalGeneration(
language_model="language_model",
connector="vision_embedding.linear_fc2", # The final linear layer
tower_model="vision_embedding",
)
)
\ No newline at end of file
......@@ -280,7 +280,7 @@ class JAISModel(nn.Module):
def forward(
self,
input_ids: torch.Tensor | None,
input_ids: torch.Tensor,
position_ids: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None,
......@@ -344,7 +344,7 @@ class JAISLMHeadModel(nn.Module, SupportsPP):
def forward(
self,
input_ids: torch.Tensor | None,
input_ids: torch.Tensor,
positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None,
......@@ -394,4 +394,4 @@ class JAISLMHeadModel(nn.Module, SupportsPP):
weight_loader = getattr(param, "weight_loader", default_weight_loader)
weight_loader(param, loaded_weight)
loaded_params.add(name)
return loaded_params
return loaded_params
\ No newline at end of file
......@@ -483,7 +483,7 @@ class Jais2ForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
def forward(
self,
input_ids: torch.Tensor | None,
input_ids: torch.Tensor,
positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None,
......@@ -505,4 +505,4 @@ class Jais2ForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
self,
skip_prefixes=(["lm_head."] if self.config.tie_word_embeddings else None),
)
return loader.load_weights(weights)
return loader.load_weights(weights)
\ No newline at end of file
......@@ -348,7 +348,7 @@ class JambaModel(nn.Module):
def forward(
self,
input_ids: torch.Tensor | None,
input_ids: torch.Tensor,
positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None,
......@@ -516,7 +516,7 @@ class JambaForCausalLM(
def forward(
self,
input_ids: torch.Tensor | None,
input_ids: torch.Tensor,
positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None,
......@@ -602,4 +602,4 @@ class JambaForSequenceClassification(JambaForCausalLM):
pooler_config = vllm_config.model_config.pooler_config
assert pooler_config is not None
self.pooler = DispatchPooler.for_seq_cls(pooler_config, classifier=self.score)
self.pooler = DispatchPooler.for_seq_cls(pooler_config, classifier=self.score)
\ No newline at end of file
......@@ -125,7 +125,7 @@ class JinaVLForSequenceClassification(
def forward(
self,
input_ids: torch.Tensor | None,
input_ids: torch.Tensor,
positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None,
......@@ -142,4 +142,4 @@ class JinaVLForSequenceClassification(
def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]):
loader = AutoWeightsLoader(self)
return loader.load_weights(weights, mapper=self.weight_mapper)
return loader.load_weights(weights, mapper=self.weight_mapper)
\ No newline at end of file
......@@ -732,7 +732,7 @@ class KananaVForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP)
def forward(
self,
input_ids: torch.Tensor | None,
input_ids: torch.Tensor,
positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None,
......@@ -755,4 +755,4 @@ class KananaVForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP)
def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
loader = AutoWeightsLoader(self)
return loader.load_weights(weights)
return loader.load_weights(weights)
\ No newline at end of file
......@@ -1438,7 +1438,7 @@ class BaseKeyeModule(nn.Module, SupportsMultiModal):
def forward(
self,
input_ids: torch.Tensor | None,
input_ids: torch.Tensor,
positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None,
......@@ -1692,4 +1692,4 @@ class KeyeForConditionalGeneration(
llm_positions = torch.cat(llm_pos_ids_list, dim=1).reshape(3, -1)
mrope_position_delta = (llm_positions.max() + 1 - len(input_tokens)).item()
return llm_positions, mrope_position_delta
return llm_positions, mrope_position_delta
\ No newline at end of file
......@@ -506,7 +506,7 @@ class KimiLinearForCausalLM(
def forward(
self,
input_ids: torch.Tensor | None,
input_ids: torch.Tensor,
positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None,
......@@ -666,4 +666,4 @@ def get_spec_layer_idx_from_weight_name(
for i in range(config.num_nextn_predict_layers):
if weight_name.startswith(f"model.layers.{layer_idx + i}."):
return layer_idx + i
return None
return None
\ No newline at end of file
......@@ -389,7 +389,7 @@ class KimiVLForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP):
def forward(
self,
input_ids: torch.Tensor | None,
input_ids: torch.Tensor,
positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None,
......@@ -412,4 +412,4 @@ class KimiVLForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP):
def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]):
loader = AutoWeightsLoader(self)
return loader.load_weights(weights)
return loader.load_weights(weights)
\ No newline at end of file
......@@ -342,7 +342,7 @@ class Lfm2Model(nn.Module):
def forward(
self,
input_ids: torch.Tensor | None,
input_ids: torch.Tensor,
positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None,
......@@ -503,7 +503,7 @@ class Lfm2ForCausalLM(
def forward(
self,
input_ids: torch.Tensor | None,
input_ids: torch.Tensor,
positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None,
......@@ -523,4 +523,4 @@ class Lfm2ForCausalLM(
self,
skip_prefixes=(["lm_head."] if self.config.tie_word_embeddings else None),
)
return loader.load_weights(weights)
return loader.load_weights(weights)
\ No newline at end of file
......@@ -457,7 +457,7 @@ class Lfm2MoeModel(nn.Module):
def forward(
self,
input_ids: torch.Tensor | None,
input_ids: torch.Tensor,
positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None,
......@@ -730,7 +730,7 @@ class Lfm2MoeForCausalLM(
def forward(
self,
input_ids: torch.Tensor | None,
input_ids: torch.Tensor,
positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None,
......@@ -753,4 +753,4 @@ class Lfm2MoeForCausalLM(
return loader.load_weights(weights)
def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
return self.model.get_expert_mapping()
return self.model.get_expert_mapping()
\ No newline at end of file
......@@ -769,7 +769,7 @@ class Lfm2VLForConditionalGeneration(
def forward(
self,
input_ids: torch.Tensor | None,
input_ids: torch.Tensor,
positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None,
......@@ -804,4 +804,4 @@ class Lfm2VLForConditionalGeneration(
language_model="language_model",
connector="multi_modal_projector",
tower_model="vision_tower",
)
)
\ No newline at end of file
......@@ -651,7 +651,7 @@ class LlamaForCausalLM(
def forward(
self,
input_ids: torch.Tensor | None,
input_ids: torch.Tensor,
positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment