fix: fix typo of comments in w8a8_fp8.py (#4843)

72031173 · Jiaqi · GitHub · 9fdc6d6a · 72031173
Unverified Commit 72031173 authored Mar 28, 2025 by Jiaqi Committed by GitHub Mar 27, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 1 addition and 1 deletion

python/sglang/srt/layers/quantization/w8a8_fp8.py python/sglang/srt/layers/quantization/w8a8_fp8.py +1 -1

No files found.
--- a/python/sglang/srt/layers/quantization/w8a8_fp8.py
+++ b/python/sglang/srt/layers/quantization/w8a8_fp8.py
@@ -37,7 +37,7 @@ class W8A8Fp8Config(QuantizationConfig):
    Note:
    - For models without offline quantization, weights will be quantized during model loading
    - If CUTLASS is supported: Per-channel weight quantization is used
-    - If CUTLASS is not supported: Falls back to per-token weight quantization
+    - If CUTLASS is not supported: Falls back to per-tensor weight quantization
    """
    def __init__(self, is_checkpoint_fp8_serialized: bool = False):