Unverified Commit ddbb0d23 authored by Nick Hill's avatar Nick Hill Committed by GitHub
Browse files

[Model Runner V2] Fix mm input embeddings lookup (#36588)


Signed-off-by: default avatarNick Hill <nickhill123@gmail.com>
parent 9efc3bdc
......@@ -98,8 +98,11 @@ class DefaultModelState(ModelState):
req_states.prefill_len.np[input_batch.idx_mapping_np],
req_states.num_computed_prefill_tokens[input_batch.idx_mapping_np],
)
# Use unpadded input_ids to match is_mm_embed size (num_tokens).
# input_batch.input_ids may be padded for CUDA graphs.
input_ids_unpadded = input_batch.input_ids[: input_batch.num_tokens]
inputs_embeds = self.encoder_runner.get_inputs_embeds(
input_batch.input_ids, mm_embeds, is_mm_embed
input_ids_unpadded, mm_embeds, is_mm_embed
)
return inputs_embeds[: input_batch.num_tokens_after_padding]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment