Commit 2b90ecd3 authored by zhuwenwen's avatar zhuwenwen
Browse files

remove fp8_e5m2 error

parent 90227352
......@@ -123,9 +123,9 @@ class Attention(nn.Module):
assert isinstance(quant_method, BaseKVCacheMethod)
# TODO (mgoin): kv cache dtype should be specified in the FP8
# checkpoint config and become the "auto" behavior
if self.kv_cache_dtype == "fp8_e5m2":
raise ValueError("fp8_e5m2 kv-cache is not supported with "
"fp8 checkpoints.")
# if self.kv_cache_dtype == "fp8_e5m2":
# raise ValueError("fp8_e5m2 kv-cache is not supported with "
# "fp8 checkpoints.")
# If quantization is enabled, we make "k_scale" and "v_scale"
# parameters so that it can be loaded from the model checkpoint.
# The k/v_scale will then be converted back to native float32
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment