Unverified Commit 85bda9e7 authored by Yuxuan Zhang's avatar Yuxuan Zhang Committed by GitHub
Browse files

remove GLM-4.5 quantization wrong Code (#21435)

parent 610852a4
......@@ -20,7 +20,7 @@ from vllm.transformers_utils.tokenizer import AnyTokenizer
logger = init_logger(__name__)
@ToolParserManager.register_module("glm4_moe")
@ToolParserManager.register_module("glm45")
class Glm4MoeModelToolParser(ToolParser):
def __init__(self, tokenizer: AnyTokenizer):
......
......@@ -390,7 +390,6 @@ class Glm4MoeModel(nn.Module):
self.embed_tokens = VocabParallelEmbedding(
config.vocab_size,
config.hidden_size,
quant_config=quant_config,
prefix=f"{prefix}.embed_tokens")
else:
self.embed_tokens = PPMissingLayer()
......
......@@ -14,7 +14,7 @@ from vllm.reasoning import ReasoningParser, ReasoningParserManager
logger = init_logger(__name__)
@ReasoningParserManager.register_module("glm4_moe")
@ReasoningParserManager.register_module("glm45")
class Glm4MoeModelReasoningParser(ReasoningParser):
"""
Reasoning parser for the Glm4MoeModel model.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment