".github/actions/vscode:/vscode.git/clone" did not exist on "062d3e6cfc3c6b6f912db1cf05992ceea0925829"
Unverified Commit 59965aff authored by Vadim Gimpelson's avatar Vadim Gimpelson Committed by GitHub
Browse files

[BUGFIX] Fix `_dummy_run` missing `prepare_inputs_event` synchronization (#34866)


Signed-off-by: default avatarVadim Gimpelson <vadim.gimpelson@gmail.com>
parent b1c4f0b2
...@@ -4771,8 +4771,13 @@ class GPUModelRunner( ...@@ -4771,8 +4771,13 @@ class GPUModelRunner(
ubatch_slices=ubatch_slices_padded, ubatch_slices=ubatch_slices_padded,
) )
# If force_attention is True, we always capture attention. Otherwise, # _dummy_run shares pinned CPU buffers (seq_lens, query_start_loc,
# it only happens for cudagraph_runtime_mode=FULL. # etc.) with execute_model. It must participate in the same event
# protocol so that back-to-back dummy/real steps don't overwrite
# pinned memory while a prior non_blocking H2D DMA is still reading.
with self.synchronize_input_prep():
# If force_attention is True, we always capture attention.
# Otherwise, it only happens for cudagraph_runtime_mode=FULL.
if force_attention or cudagraph_runtime_mode == CUDAGraphMode.FULL: if force_attention or cudagraph_runtime_mode == CUDAGraphMode.FULL:
if create_mixed_batch: if create_mixed_batch:
# In the mixed batch mode (used for FI warmup), we use # In the mixed batch mode (used for FI warmup), we use
...@@ -4795,7 +4800,7 @@ class GPUModelRunner( ...@@ -4795,7 +4800,7 @@ class GPUModelRunner(
num_tokens_padded=num_tokens_padded if pad_attn else None, num_tokens_padded=num_tokens_padded if pad_attn else None,
num_reqs=num_reqs_padded, num_reqs=num_reqs_padded,
max_query_len=max_query_len, max_query_len=max_query_len,
ubatch_slices=ubatch_slices_padded if pad_attn else ubatch_slices, ubatch_slices=(ubatch_slices_padded if pad_attn else ubatch_slices),
for_cudagraph_capture=is_graph_capturing, for_cudagraph_capture=is_graph_capturing,
slot_mappings=slot_mappings_by_group, slot_mappings=slot_mappings_by_group,
) )
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment