change the scale of convert_fp8 to 1

b0e99e82 · zhuwenwen · 511eeccd · b0e99e82
Commit b0e99e82 authored Nov 28, 2025 by zhuwenwen
Hide whitespace changes
Inline Side-by-side

Showing with 2 additions and 1 deletion

vllm/_custom_ops.py vllm/_custom_ops.py +2 -1

No files found.
--- a/vllm/_custom_ops.py
+++ b/vllm/_custom_ops.py
@@ -2187,7 +2187,8 @@ def gather_cache(src_cache: torch.Tensor,
        torch.ops._C_cache_ops.gather_cache(src_cache, dst_fp8, block_table,
                                            cu_seq_lens, batch_size, seq_starts)
        #dst_fp8->bf16     
-        convert_fp8(dst, dst_fp8, scale, kv_dtype)
+        # convert_fp8(dst, dst_fp8, scale, kv_dtype)
+        convert_fp8(dst, dst_fp8, 1.0, kv_dtype)
    else:
        torch.ops._C_cache_ops.gather_cache(src_cache, dst, block_table,
                                            cu_seq_lens, batch_size, seq_starts)