Revert "[ModelOpt] Fix Weight Loading for DSR1-FP4 Quantization (#9712)" (#10176)

45b3a6a2 · Yineng Zhang · GitHub · 9a18aa54 · 45b3a6a2 · 45b3a6a2
Unverified Commit 45b3a6a2 authored Sep 08, 2025 by Yineng Zhang Committed by GitHub Sep 08, 2025
Showing with 2 additions and 10 deletions

python/sglang/srt/layers/linear.py python/sglang/srt/layers/linear.py +2 -3

python/sglang/srt/layers/quantization/modelopt_quant.py python/sglang/srt/layers/quantization/modelopt_quant.py +0 -7

No files found.
--- a/python/sglang/srt/layers/linear.py
+++ b/python/sglang/srt/layers/linear.py
@@ -235,9 +235,8 @@ class ReplicatedLinear(LinearBase):
                    loaded_weight = loaded_weight[:1]
                else:
                    raise ValueError(f"{loaded_weight} are not all equal")
-        assert (
-            param.size() == loaded_weight.size()
+        assert param.size() == loaded_weight.size()
-        ), f"Loading weight error:  param: {param.size()}, loaded_weight: {loaded_weight.size()}"
        param.data.copy_(loaded_weight)
    def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:

--- a/python/sglang/srt/layers/quantization/modelopt_quant.py
+++ b/python/sglang/srt/layers/quantization/modelopt_quant.py
@@ -646,13 +646,6 @@ class ModelOptFp4Config(QuantizationConfig):
            regex_str = pattern.replace(".", r"\.").replace("*", r".*")
            if re.fullmatch(regex_str, prefix):
                return True
-            # Check if the last part of the excluded pattern is contained in the last part of the prefix
-            # This handles fused modules like fused_qkv_a_proj_with_mqa that contain q_a_proj and kv_a_proj_with_mqa
-            pattern_last_part = pattern.split(".")[-1]
-            prefix_last_part = prefix.split(".")[-1]
-            if pattern_last_part in prefix_last_part:
-                return True
        return False
    def get_quant_method(