Unverified Commit 369f47aa authored by Matthew Bonanni's avatar Matthew Bonanni Committed by GitHub
Browse files

[DeepSeek v3.2] Remove unnecessary syncwarps (#31047)


Signed-off-by: default avatarMatthew Bonanni <mbonanni@redhat.com>
parent dabff12e
...@@ -451,9 +451,6 @@ __global__ void indexer_k_quant_and_cache_kernel( ...@@ -451,9 +451,6 @@ __global__ void indexer_k_quant_and_cache_kernel(
for (int i = 0; i < VEC_SIZE; i++) { for (int i = 0; i < VEC_SIZE; i++) {
amax = fmaxf(amax, fabsf(float(k_val_ptr[i]))); amax = fmaxf(amax, fabsf(float(k_val_ptr[i])));
} }
#ifndef USE_ROCM
__syncwarp();
#endif
// Reduced amax // Reduced amax
for (int mask = 16; mask > 0; mask /= 2) { for (int mask = 16; mask > 0; mask /= 2) {
...@@ -463,9 +460,7 @@ __global__ void indexer_k_quant_and_cache_kernel( ...@@ -463,9 +460,7 @@ __global__ void indexer_k_quant_and_cache_kernel(
amax = fmaxf(amax, __shfl_xor_sync(unsigned(-1), amax, mask)); amax = fmaxf(amax, __shfl_xor_sync(unsigned(-1), amax, mask));
#endif #endif
} }
#ifndef USE_ROCM
__syncwarp();
#endif
#if defined(__gfx942__) #if defined(__gfx942__)
float scale = fmaxf(amax, 1e-4) / 224.0f; float scale = fmaxf(amax, 1e-4) / 224.0f;
#else #else
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment