Unverified Commit c2c661af authored by Roger Wang's avatar Roger Wang Committed by GitHub
Browse files

[Bugfix] Fix overallocation in MM profiling (#29386)


Signed-off-by: default avatarRoger Wang <hey@rogerw.io>
parent 798e87db
...@@ -4245,14 +4245,18 @@ class GPUModelRunner( ...@@ -4245,14 +4245,18 @@ class GPUModelRunner(
# NOTE: This happens when encoder cache needs to store # NOTE: This happens when encoder cache needs to store
# the embeddings that encoder outputs are scattered onto. # the embeddings that encoder outputs are scattered onto.
# In this case we create dummy embeddings of size # In this case we create dummy embeddings of size
# (encode_budget, hidden_size) and scatter encoder # (max_tokens_for_modality, hidden_size) and scatter
# output into it. # encoder output into it.
encoder_output_shape = dummy_encoder_outputs[0].shape encoder_output_shape = dummy_encoder_outputs[0].shape
if encoder_output_shape[0] < encoder_budget: max_mm_tokens_per_item = mm_budget.max_tokens_by_modality[
dummy_modality
]
if encoder_output_shape[0] < max_mm_tokens_per_item:
encoder_hidden_size = encoder_output_shape[-1]
expanded_outputs = [] expanded_outputs = []
for output in dummy_encoder_outputs: for output in dummy_encoder_outputs:
expanded = output.new_zeros( expanded = output.new_zeros(
(encoder_budget, encoder_output_shape[-1]) (max_mm_tokens_per_item, encoder_hidden_size)
) )
num_tokens = output.shape[0] num_tokens = output.shape[0]
expanded[:num_tokens].copy_(output) expanded[:num_tokens].copy_(output)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment