Fix skip layer in get_quant_method (#12632)

7cee07a0 · Ke Bao · GitHub · bb517fe3 · 7cee07a0
Unverified Commit 7cee07a0 authored Nov 04, 2025 by Ke Bao Committed by GitHub Nov 04, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 4 additions and 1 deletion

python/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors.py ...ers/quantization/compressed_tensors/compressed_tensors.py +4 -1

No files found.
--- a/python/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors.py
+++ b/python/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors.py
@@ -123,7 +123,10 @@ class CompressedTensorsConfig(QuantizationConfig):
        if should_ignore_layer(
            prefix, ignore=self.ignore, fused_mapping=self.packed_modules_mapping
        ):
-            return UnquantizedLinearMethod()
+            if isinstance(layer, LinearBase):
+                return UnquantizedLinearMethod()
+            return None
        if isinstance(layer, LinearBase):
            if CompressedTensorsConfig.DeepSeekFP8Config is not None:
                return Fp8LinearMethod(CompressedTensorsConfig.DeepSeekFP8Config)