Unverified Commit ba2dfbb0 authored by Roger Wang's avatar Roger Wang Committed by GitHub
Browse files

[Misc] Make MM embedding merge interface explicit in model runner (#21147)


Signed-off-by: default avatarRoger Wang <hey@rogerw.me>
Co-authored-by: default avatargemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
parent 1bf65138
......@@ -1328,11 +1328,10 @@ class GPUModelRunner(LoRAModelRunnerMixin):
# embeddings), we always use embeddings (rather than token ids)
# as input to the multimodal model, even when the input is text.
input_ids = self.input_ids[:num_scheduled_tokens]
if mm_embeds:
inputs_embeds = self.model.get_input_embeddings(
input_ids, mm_embeds)
else:
inputs_embeds = self.model.get_input_embeddings(input_ids)
input_ids=input_ids,
multimodal_embeddings=mm_embeds or None,
)
# TODO(woosuk): Avoid the copy. Optimize.
self.inputs_embeds[:num_scheduled_tokens].copy_(inputs_embeds)
inputs_embeds = self.inputs_embeds[:num_input_tokens]
......
......@@ -937,11 +937,10 @@ class TPUModelRunner(LoRAModelRunnerMixin):
# NOTE(woosuk): To unify token ids and soft tokens (vision
# embeddings), we always use embeddings (rather than token ids)
# as input to the multimodal model, even when the input is text.
if mm_embeds:
inputs_embeds = self.model.get_input_embeddings(
input_ids, mm_embeds)
else:
inputs_embeds = self.model.get_input_embeddings(input_ids)
input_ids=input_ids,
multimodal_embeddings=mm_embeds,
)
return None, inputs_embeds
else:
# For text-only models, we use token ids as input.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment