Unverified Commit 72031173 authored by Jiaqi's avatar Jiaqi Committed by GitHub
Browse files

fix: fix typo of comments in w8a8_fp8.py (#4843)

parent 9fdc6d6a
......@@ -37,7 +37,7 @@ class W8A8Fp8Config(QuantizationConfig):
Note:
- For models without offline quantization, weights will be quantized during model loading
- If CUTLASS is supported: Per-channel weight quantization is used
- If CUTLASS is not supported: Falls back to per-token weight quantization
- If CUTLASS is not supported: Falls back to per-tensor weight quantization
"""
def __init__(self, is_checkpoint_fp8_serialized: bool = False):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment