"vscode:/vscode.git/clone" did not exist on "0e98964e94385c914783842eac4b448236018689"
Unverified Commit 81e217fe authored by Lucas Wilkinson's avatar Lucas Wilkinson Committed by GitHub
Browse files

[Bugfix] Fix DP Attention Padding in Dummy Run (#34187)


Signed-off-by: default avatarBenjamin Chislett <bchislett@nvidia.com>
Signed-off-by: default avatarLucas Wilkinson <lwilkins@redhat.com>
Co-authored-by: default avatarBenjamin Chislett <bchislett@nvidia.com>
parent ab97bcf6
...@@ -4787,6 +4787,7 @@ class GPUModelRunner( ...@@ -4787,6 +4787,7 @@ class GPUModelRunner(
pad_attn = cudagraph_runtime_mode == CUDAGraphMode.FULL pad_attn = cudagraph_runtime_mode == CUDAGraphMode.FULL
attn_metadata, _ = self._build_attention_metadata( attn_metadata, _ = self._build_attention_metadata(
num_tokens=num_tokens_unpadded, num_tokens=num_tokens_unpadded,
num_tokens_padded=num_tokens_padded if pad_attn else None,
num_reqs=num_reqs_padded, num_reqs=num_reqs_padded,
max_query_len=max_query_len, max_query_len=max_query_len,
ubatch_slices=ubatch_slices_padded if pad_attn else ubatch_slices, ubatch_slices=ubatch_slices_padded if pad_attn else ubatch_slices,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment