Unverified Commit 25f73c6c authored by Minglei Zhu's avatar Minglei Zhu Committed by GitHub
Browse files

fix GLM4_MOE launch with compressed_tensor quant model (#8456)

parent 581e7dcb
...@@ -795,6 +795,7 @@ class Glm4MoeForCausalLM(DeepseekV2ForCausalLM): ...@@ -795,6 +795,7 @@ class Glm4MoeForCausalLM(DeepseekV2ForCausalLM):
elif ( elif (
self.quant_config.get_name() == "fp8" self.quant_config.get_name() == "fp8"
or self.quant_config.get_name() == "blockwise_int8" or self.quant_config.get_name() == "blockwise_int8"
or self.quant_config.get_name() == "compressed_tensors"
): ):
suffix_list = [ suffix_list = [
"down_proj.weight", "down_proj.weight",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment