"...text-generation-inference.git" did not exist on "1e3ec3c91f140c5f0dddad95a9bcb843d67b382b"
Unverified Commit abb67815 authored by Yuxuan Zhang's avatar Yuxuan Zhang Committed by GitHub
Browse files

Update GLM-4.5 Model Doc (#11017)

parent 07440f5f
...@@ -39,7 +39,7 @@ def parse_arguments(json_value): ...@@ -39,7 +39,7 @@ def parse_arguments(json_value):
class Glm4MoeDetector(BaseFormatDetector): class Glm4MoeDetector(BaseFormatDetector):
""" """
Detector for GLM-4.5 models. Detector for GLM-4.5 and GLM-4.6 models.
Assumes function call format: Assumes function call format:
<tool_call>get_weather\n<arg_key>city</arg_key>\n<arg_value>北京</arg_value>\n<arg_key>date</arg_key>\n<arg_value>2024-06-27</arg_value>\n</tool_call>\n<tool_call>get_weather\n<arg_key>city</arg_key>\n<arg_value>上海</arg_value>\n<arg_key>date</arg_key>\n<arg_value>2024-06-27</arg_value>\n</tool_call> <tool_call>get_weather\n<arg_key>city</arg_key>\n<arg_value>北京</arg_value>\n<arg_key>date</arg_key>\n<arg_value>2024-06-27</arg_value>\n</tool_call>\n<tool_call>get_weather\n<arg_key>city</arg_key>\n<arg_value>上海</arg_value>\n<arg_key>date</arg_key>\n<arg_value>2024-06-27</arg_value>\n</tool_call>
""" """
...@@ -53,7 +53,7 @@ class Glm4MoeDetector(BaseFormatDetector): ...@@ -53,7 +53,7 @@ class Glm4MoeDetector(BaseFormatDetector):
self.func_arg_regex = r"<arg_key>(.*?)</arg_key>\s*<arg_value>(.*?)</arg_value>" self.func_arg_regex = r"<arg_key>(.*?)</arg_key>\s*<arg_value>(.*?)</arg_value>"
def has_tool_call(self, text: str) -> bool: def has_tool_call(self, text: str) -> bool:
"""Check if the text contains a glm-4.5 format tool call.""" """Check if the text contains a glm-4.5 / glm-4.6 format tool call."""
return self.bot_token in text return self.bot_token in text
def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingParseResult: def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingParseResult:
...@@ -102,7 +102,7 @@ class Glm4MoeDetector(BaseFormatDetector): ...@@ -102,7 +102,7 @@ class Glm4MoeDetector(BaseFormatDetector):
self, new_text: str, tools: List[Tool] self, new_text: str, tools: List[Tool]
) -> StreamingParseResult: ) -> StreamingParseResult:
""" """
Streaming incremental parsing tool calls for GLM-4.5 format. Streaming incremental parsing tool calls for GLM-4.5 and GLM-4.6 format.
""" """
self._buffer += new_text self._buffer += new_text
current_text = self._buffer current_text = self._buffer
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# limitations under the License. # limitations under the License.
# ============================================================================== # ==============================================================================
"""Inference-only GLM-4.5 model compatible with HuggingFace weights""" """Inference-only GLM-4.5, GLM-4.6 model compatible with HuggingFace weights"""
import logging import logging
from typing import Any, Dict, Iterable, Optional, Tuple from typing import Any, Dict, Iterable, Optional, Tuple
...@@ -785,9 +785,9 @@ class Glm4MoeForCausalLM(DeepseekV2ForCausalLM): ...@@ -785,9 +785,9 @@ class Glm4MoeForCausalLM(DeepseekV2ForCausalLM):
or self.config.architectures[0] != architecture or self.config.architectures[0] != architecture
or self.config.n_shared_experts != 1 or self.config.n_shared_experts != 1
): ):
disable_reason = "Only GLM-4.5 on NV-platform with capability >= 80 can use shared experts fusion optimization." disable_reason = "Only GLM-4.5 or GLM-4.6 on NV-platform with capability >= 80 can use shared experts fusion optimization."
elif get_moe_expert_parallel_world_size() > 1: elif get_moe_expert_parallel_world_size() > 1:
disable_reason = "Deepseek and GLM-4.5 can not use shared experts fusion optimization under expert parallelism." disable_reason = "Deepseek and GLM-4.5 or GLM-4.6 can not use shared experts fusion optimization under expert parallelism."
if disable_reason is not None: if disable_reason is not None:
global_server_args_dict["disable_shared_experts_fusion"] = True global_server_args_dict["disable_shared_experts_fusion"] = True
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# limitations under the License. # limitations under the License.
# ============================================================================== # ==============================================================================
"""Inference-only GLM-4.5 NextN Speculative Decoding.""" """Inference-only GLM-4.5, GLM-4.6 NextN Speculative Decoding."""
import logging import logging
from typing import Iterable, Optional, Tuple from typing import Iterable, Optional, Tuple
...@@ -48,7 +48,7 @@ class Glm4MoeModelNextN(nn.Module): ...@@ -48,7 +48,7 @@ class Glm4MoeModelNextN(nn.Module):
super().__init__() super().__init__()
if quant_config is not None and quant_config.get_name() == "modelopt_fp4": if quant_config is not None and quant_config.get_name() == "modelopt_fp4":
logger.warning( logger.warning(
"Overriding Glm4MoeForCausalLMNextN quant config for modelopt_fp4 GLM-4.5 model." "Overriding Glm4MoeForCausalLMNextN quant config for modelopt_fp4 GLM-4.5 / GLM-4.6 model."
) )
quant_config = None quant_config = None
......
...@@ -325,7 +325,7 @@ classDiagram ...@@ -325,7 +325,7 @@ classDiagram
- `qwen3`: Qwen3 base model (initial_in_reasoning=false) - `qwen3`: Qwen3 base model (initial_in_reasoning=false)
- `qwen3_thinking`: Qwen3 thinking variant (initial_in_reasoning=true) - `qwen3_thinking`: Qwen3 thinking variant (initial_in_reasoning=true)
- `kimi`: Kimi with Unicode tokens - `kimi`: Kimi with Unicode tokens
- `glm45`: GLM-4.5 parser - `glm45`: GLM-4.5 / GLM-4.6 parser
- `step3`: Step3 parser - `step3`: Step3 parser
- `passthrough`: No-op fallback parser - `passthrough`: No-op fallback parser
......
...@@ -180,10 +180,9 @@ impl ParserRegistry { ...@@ -180,10 +180,9 @@ impl ParserRegistry {
self.map_model("deepseek-*", "pythonic"); self.map_model("deepseek-*", "pythonic");
// GLM models // GLM models
// GLM-4 MoE uses XML-style format // GLM-4.5 and GLM-4.6 uses XML-style format
self.map_model("glm-4-moe*", "glm4_moe");
self.map_model("THUDM/glm-4-moe*", "glm4_moe");
self.map_model("glm-4.5*", "glm4_moe"); self.map_model("glm-4.5*", "glm4_moe");
self.map_model("glm-4.6*", "glm4_moe");
// Other GLM models may use JSON // Other GLM models may use JSON
self.map_model("glm-*", "json"); self.map_model("glm-*", "json");
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment