Fix block wise fp8 torch compile (#3232)

c02e3139 · Ke Bao · GitHub · 734daedd · c02e3139
Unverified Commit c02e3139 authored Jan 31, 2025 by Ke Bao Committed by GitHub Jan 31, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 7 additions and 0 deletions

python/sglang/srt/layers/quantization/fp8.py python/sglang/srt/layers/quantization/fp8.py +7 -0

No files found.
--- a/python/sglang/srt/layers/quantization/fp8.py
+++ b/python/sglang/srt/layers/quantization/fp8.py
@@ -290,6 +290,13 @@ class Fp8LinearMethod(LinearMethodBase):
                    weight_scale, requires_grad=False
                )
                layer.input_scale = None
+            else:
+                layer.weight = torch.nn.Parameter(
+                    layer.weight.data, requires_grad=False
+                )
+                layer.weight_scale_inv = torch.nn.Parameter(
+                    layer.weight_scale_inv.data, requires_grad=False
+                )
            return
        layer.weight = torch.nn.Parameter(layer.weight.data, requires_grad=False)
        # If checkpoint not serialized fp8, quantize the weights.