Commit b0e99e82 authored by zhuwenwen's avatar zhuwenwen
Browse files

change the scale of convert_fp8 to 1

parent 511eeccd
...@@ -2187,7 +2187,8 @@ def gather_cache(src_cache: torch.Tensor, ...@@ -2187,7 +2187,8 @@ def gather_cache(src_cache: torch.Tensor,
torch.ops._C_cache_ops.gather_cache(src_cache, dst_fp8, block_table, torch.ops._C_cache_ops.gather_cache(src_cache, dst_fp8, block_table,
cu_seq_lens, batch_size, seq_starts) cu_seq_lens, batch_size, seq_starts)
#dst_fp8->bf16 #dst_fp8->bf16
convert_fp8(dst, dst_fp8, scale, kv_dtype) # convert_fp8(dst, dst_fp8, scale, kv_dtype)
convert_fp8(dst, dst_fp8, 1.0, kv_dtype)
else: else:
torch.ops._C_cache_ops.gather_cache(src_cache, dst, block_table, torch.ops._C_cache_ops.gather_cache(src_cache, dst, block_table,
cu_seq_lens, batch_size, seq_starts) cu_seq_lens, batch_size, seq_starts)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment