Unverified Commit 2f4e108f authored by Alphi's avatar Alphi Committed by GitHub
Browse files

[Bugfix] Clean up MiniCPM-V (#6939)


Co-authored-by: default avatarhezhihui <hzh7269@modelbest.cn>
Co-authored-by: default avatarCyrus Leung <cyrus.tl.leung@gmail.com>
parent 6512937d
...@@ -222,9 +222,13 @@ Vision Language Models ...@@ -222,9 +222,13 @@ Vision Language Models
- -
* - :code:`MiniCPM-V` * - :code:`MiniCPM-V`
- MiniCPM-V - MiniCPM-V
- :code:`openbmb/MiniCPM-V-2`, :code:`openbmb/MiniCPM-Llama3-V-2_5`, etc. - :code:`openbmb/MiniCPM-V-2` (see note), :code:`openbmb/MiniCPM-Llama3-V-2_5`, etc.
- -
.. note::
For :code:`openbmb/MiniCPM-V-2`, the official repo doesn't work yet, so we need to use a fork (:code:`HwwwH/MiniCPM-V-2`) for now.
For more details, please see: https://github.com/vllm-project/vllm/pull/4087#issuecomment-2250397630
---- ----
If your model uses one of the above model architectures, you can seamlessly run your model with vLLM. If your model uses one of the above model architectures, you can seamlessly run your model with vLLM.
......
...@@ -418,11 +418,9 @@ class LlamaForCausalLM(nn.Module, SupportsLoRA): ...@@ -418,11 +418,9 @@ class LlamaForCausalLM(nn.Module, SupportsLoRA):
kv_caches: List[torch.Tensor], kv_caches: List[torch.Tensor],
attn_metadata: AttentionMetadata, attn_metadata: AttentionMetadata,
intermediate_tensors: Optional[IntermediateTensors] = None, intermediate_tensors: Optional[IntermediateTensors] = None,
input_embeds: Optional[torch.Tensor] = None
) -> Union[torch.Tensor, IntermediateTensors]: ) -> Union[torch.Tensor, IntermediateTensors]:
model_output = self.model(input_ids, positions, kv_caches, model_output = self.model(input_ids, positions, kv_caches,
attn_metadata, intermediate_tensors, attn_metadata, intermediate_tensors)
input_embeds)
return model_output return model_output
def compute_logits(self, hidden_states: torch.Tensor, def compute_logits(self, hidden_states: torch.Tensor,
......
...@@ -370,6 +370,7 @@ class MiniCPMModel(nn.Module): ...@@ -370,6 +370,7 @@ class MiniCPMModel(nn.Module):
positions: torch.Tensor, positions: torch.Tensor,
kv_caches: List[torch.Tensor], kv_caches: List[torch.Tensor],
attn_metadata: AttentionMetadata, attn_metadata: AttentionMetadata,
intermediate_tensors: Optional[IntermediateTensors] = None,
inputs_embeds: Optional[torch.Tensor] = None, inputs_embeds: Optional[torch.Tensor] = None,
) -> torch.Tensor: ) -> torch.Tensor:
if inputs_embeds is not None: if inputs_embeds is not None:
...@@ -463,11 +464,10 @@ class MiniCPMForCausalLM(nn.Module, SupportsLoRA): ...@@ -463,11 +464,10 @@ class MiniCPMForCausalLM(nn.Module, SupportsLoRA):
positions: torch.Tensor, positions: torch.Tensor,
kv_caches: List[torch.Tensor], kv_caches: List[torch.Tensor],
attn_metadata: AttentionMetadata, attn_metadata: AttentionMetadata,
input_embeds: Optional[torch.Tensor] = None,
intermediate_tensors: Optional[IntermediateTensors] = None, intermediate_tensors: Optional[IntermediateTensors] = None,
) -> torch.Tensor: ) -> torch.Tensor:
hidden_states = self.model(input_ids, positions, kv_caches, hidden_states = self.model(input_ids, positions, kv_caches,
attn_metadata, input_embeds) attn_metadata, intermediate_tensors)
return hidden_states return hidden_states
def compute_logits(self, hidden_states: torch.Tensor, def compute_logits(self, hidden_states: torch.Tensor,
......
This diff is collapsed.
This diff is collapsed.
...@@ -342,7 +342,7 @@ class Qwen2ForCausalLM(nn.Module, SupportsLoRA): ...@@ -342,7 +342,7 @@ class Qwen2ForCausalLM(nn.Module, SupportsLoRA):
intermediate_tensors: Optional[IntermediateTensors] = None, intermediate_tensors: Optional[IntermediateTensors] = None,
) -> torch.Tensor: ) -> torch.Tensor:
hidden_states = self.model(input_ids, positions, kv_caches, hidden_states = self.model(input_ids, positions, kv_caches,
attn_metadata) attn_metadata, intermediate_tensors)
return hidden_states return hidden_states
def compute_logits(self, hidden_states: torch.Tensor, def compute_logits(self, hidden_states: torch.Tensor,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment