Unverified Commit 0037b574 authored by Wushi Dong's avatar Wushi Dong Committed by GitHub
Browse files

[Core] Eliminate redundant is_encoder_decoder lookups (20-40us/step) (#29800)


Signed-off-by: default avatarWushi Dong <dongws@meta.com>
parent f5b0846b
......@@ -2439,16 +2439,13 @@ class GPUModelRunner(
]:
num_scheduled_tokens = scheduler_output.total_num_scheduled_tokens
is_first_rank = get_pp_group().is_first_rank
is_encoder_decoder = self.model_config.is_encoder_decoder
# _prepare_inputs may reorder the batch, so we must gather multi
# modal outputs after that to ensure the correct order
ec_connector_output = None
if (
self.supports_mm_inputs
and is_first_rank
and not self.model_config.is_encoder_decoder
):
if self.supports_mm_inputs and is_first_rank and not is_encoder_decoder:
# Run the multimodal encoder if any.
with self.maybe_get_ec_connector_output(
scheduler_output,
......@@ -2526,10 +2523,7 @@ class GPUModelRunner(
num_input_tokens, intermediate_tensors, True
)
if (
self.model_config.is_encoder_decoder
and scheduler_output.scheduled_encoder_inputs
):
if is_encoder_decoder and scheduler_output.scheduled_encoder_inputs:
# Run the encoder, just like we do with other multimodal inputs.
# For an encoder-decoder model, our processing here is a bit
# simpler, because the outputs are just passed to the decoder.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment