"vscode:/vscode.git/clone" did not exist on "4b612664fdfb4e87af6684403872d83ac04fa496"
Unverified Commit 369f47aa authored by Matthew Bonanni's avatar Matthew Bonanni Committed by GitHub
Browse files

[DeepSeek v3.2] Remove unnecessary syncwarps (#31047)


Signed-off-by: default avatarMatthew Bonanni <mbonanni@redhat.com>
parent dabff12e
......@@ -451,9 +451,6 @@ __global__ void indexer_k_quant_and_cache_kernel(
for (int i = 0; i < VEC_SIZE; i++) {
amax = fmaxf(amax, fabsf(float(k_val_ptr[i])));
}
#ifndef USE_ROCM
__syncwarp();
#endif
// Reduced amax
for (int mask = 16; mask > 0; mask /= 2) {
......@@ -463,9 +460,7 @@ __global__ void indexer_k_quant_and_cache_kernel(
amax = fmaxf(amax, __shfl_xor_sync(unsigned(-1), amax, mask));
#endif
}
#ifndef USE_ROCM
__syncwarp();
#endif
#if defined(__gfx942__)
float scale = fmaxf(amax, 1e-4) / 224.0f;
#else
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment