fix GLM4_MOE launch with compressed_tensor quant model (#8456)

25f73c6c · Minglei Zhu · GitHub · 581e7dcb · 25f73c6c
Unverified Commit 25f73c6c authored Jul 28, 2025 by Minglei Zhu Committed by GitHub Jul 28, 2025
Show whitespace changes
Inline Side-by-side

Showing with 1 addition and 0 deletions

python/sglang/srt/models/glm4_moe.py python/sglang/srt/models/glm4_moe.py +1 -0

No files found.
--- a/python/sglang/srt/models/glm4_moe.py
+++ b/python/sglang/srt/models/glm4_moe.py
@@ -795,6 +795,7 @@ class Glm4MoeForCausalLM(DeepseekV2ForCausalLM):
                elif (
                    self.quant_config.get_name() == "fp8"
                    or self.quant_config.get_name() == "blockwise_int8"
+                    or self.quant_config.get_name() == "compressed_tensors"
                ):
                    suffix_list = [
                        "down_proj.weight",