".github/actions/vscode:/vscode.git/clone" did not exist on "062d3e6cfc3c6b6f912db1cf05992ceea0925829"
Commit 653b799b authored by zhuwenwen's avatar zhuwenwen
Browse files

remove fp8_e5m2 error

parent 1851782d
...@@ -170,9 +170,9 @@ class Attention(nn.Module, AttentionLayerBase): ...@@ -170,9 +170,9 @@ class Attention(nn.Module, AttentionLayerBase):
assert isinstance(quant_method, BaseKVCacheMethod) assert isinstance(quant_method, BaseKVCacheMethod)
# TODO (mgoin): kv cache dtype should be specified in the FP8 # TODO (mgoin): kv cache dtype should be specified in the FP8
# checkpoint config and become the "auto" behavior # checkpoint config and become the "auto" behavior
if self.kv_cache_dtype == "fp8_e5m2": # if self.kv_cache_dtype == "fp8_e5m2":
raise ValueError("fp8_e5m2 kv-cache is not supported with " # raise ValueError("fp8_e5m2 kv-cache is not supported with "
"fp8 checkpoints.") # "fp8 checkpoints.")
# If quantization is enabled, we make "k_scale" and "v_scale" # If quantization is enabled, we make "k_scale" and "v_scale"
# parameters so that it can be loaded from the model checkpoint. # parameters so that it can be loaded from the model checkpoint.
# The k/v_scale will then be converted back to native float32 # The k/v_scale will then be converted back to native float32
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment