Unverified Commit 378b3ef6 authored by Sage Moore's avatar Sage Moore Committed by GitHub
Browse files

[ROCm][V1] Update reshape_and_cache to properly work with CUDA graph padding (#13922)

parent c9944acb
...@@ -375,7 +375,7 @@ void reshape_and_cache( ...@@ -375,7 +375,7 @@ void reshape_and_cache(
torch::Tensor& slot_mapping, // [num_tokens] torch::Tensor& slot_mapping, // [num_tokens]
const std::string& kv_cache_dtype, torch::Tensor& k_scale, const std::string& kv_cache_dtype, torch::Tensor& k_scale,
torch::Tensor& v_scale) { torch::Tensor& v_scale) {
int num_tokens = key.size(0); int num_tokens = slot_mapping.size(0);
int num_heads = key.size(1); int num_heads = key.size(1);
int head_size = key.size(2); int head_size = key.size(2);
int block_size = key_cache.size(3); int block_size = key_cache.size(3);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment