Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
abb67815
Unverified
Commit
abb67815
authored
Sep 29, 2025
by
Yuxuan Zhang
Committed by
GitHub
Sep 28, 2025
Browse files
Update GLM-4.5 Model Doc (#11017)
parent
07440f5f
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
11 additions
and
12 deletions
+11
-12
python/sglang/srt/function_call/glm4_moe_detector.py
python/sglang/srt/function_call/glm4_moe_detector.py
+3
-3
python/sglang/srt/models/glm4_moe.py
python/sglang/srt/models/glm4_moe.py
+3
-3
python/sglang/srt/models/glm4_moe_nextn.py
python/sglang/srt/models/glm4_moe_nextn.py
+2
-2
sgl-router/src/reasoning_parser/README.md
sgl-router/src/reasoning_parser/README.md
+1
-1
sgl-router/src/tool_parser/registry.rs
sgl-router/src/tool_parser/registry.rs
+2
-3
No files found.
python/sglang/srt/function_call/glm4_moe_detector.py
View file @
abb67815
...
...
@@ -39,7 +39,7 @@ def parse_arguments(json_value):
class
Glm4MoeDetector
(
BaseFormatDetector
):
"""
Detector for GLM-4.5 models.
Detector for GLM-4.5
and GLM-4.6
models.
Assumes function call format:
<tool_call>get_weather
\n
<arg_key>city</arg_key>
\n
<arg_value>北京</arg_value>
\n
<arg_key>date</arg_key>
\n
<arg_value>2024-06-27</arg_value>
\n
</tool_call>
\n
<tool_call>get_weather
\n
<arg_key>city</arg_key>
\n
<arg_value>上海</arg_value>
\n
<arg_key>date</arg_key>
\n
<arg_value>2024-06-27</arg_value>
\n
</tool_call>
"""
...
...
@@ -53,7 +53,7 @@ class Glm4MoeDetector(BaseFormatDetector):
self
.
func_arg_regex
=
r
"<arg_key>(.*?)</arg_key>\s*<arg_value>(.*?)</arg_value>"
def
has_tool_call
(
self
,
text
:
str
)
->
bool
:
"""Check if the text contains a glm-4.5 format tool call."""
"""Check if the text contains a glm-4.5
/ glm-4.6
format tool call."""
return
self
.
bot_token
in
text
def
detect_and_parse
(
self
,
text
:
str
,
tools
:
List
[
Tool
])
->
StreamingParseResult
:
...
...
@@ -102,7 +102,7 @@ class Glm4MoeDetector(BaseFormatDetector):
self
,
new_text
:
str
,
tools
:
List
[
Tool
]
)
->
StreamingParseResult
:
"""
Streaming incremental parsing tool calls for GLM-4.5 format.
Streaming incremental parsing tool calls for GLM-4.5
and GLM-4.6
format.
"""
self
.
_buffer
+=
new_text
current_text
=
self
.
_buffer
...
...
python/sglang/srt/models/glm4_moe.py
View file @
abb67815
...
...
@@ -12,7 +12,7 @@
# limitations under the License.
# ==============================================================================
"""Inference-only GLM-4.5 model compatible with HuggingFace weights"""
"""Inference-only GLM-4.5
, GLM-4.6
model compatible with HuggingFace weights"""
import
logging
from
typing
import
Any
,
Dict
,
Iterable
,
Optional
,
Tuple
...
...
@@ -785,9 +785,9 @@ class Glm4MoeForCausalLM(DeepseekV2ForCausalLM):
or
self
.
config
.
architectures
[
0
]
!=
architecture
or
self
.
config
.
n_shared_experts
!=
1
):
disable_reason
=
"Only GLM-4.5 on NV-platform with capability >= 80 can use shared experts fusion optimization."
disable_reason
=
"Only GLM-4.5
or GLM-4.6
on NV-platform with capability >= 80 can use shared experts fusion optimization."
elif
get_moe_expert_parallel_world_size
()
>
1
:
disable_reason
=
"Deepseek and GLM-4.5 can not use shared experts fusion optimization under expert parallelism."
disable_reason
=
"Deepseek and GLM-4.5
or GLM-4.6
can not use shared experts fusion optimization under expert parallelism."
if
disable_reason
is
not
None
:
global_server_args_dict
[
"disable_shared_experts_fusion"
]
=
True
...
...
python/sglang/srt/models/glm4_moe_nextn.py
View file @
abb67815
...
...
@@ -12,7 +12,7 @@
# limitations under the License.
# ==============================================================================
"""Inference-only GLM-4.5 NextN Speculative Decoding."""
"""Inference-only GLM-4.5
, GLM-4.6
NextN Speculative Decoding."""
import
logging
from
typing
import
Iterable
,
Optional
,
Tuple
...
...
@@ -48,7 +48,7 @@ class Glm4MoeModelNextN(nn.Module):
super
().
__init__
()
if
quant_config
is
not
None
and
quant_config
.
get_name
()
==
"modelopt_fp4"
:
logger
.
warning
(
"Overriding Glm4MoeForCausalLMNextN quant config for modelopt_fp4 GLM-4.5 model."
"Overriding Glm4MoeForCausalLMNextN quant config for modelopt_fp4 GLM-4.5
/ GLM-4.6
model."
)
quant_config
=
None
...
...
sgl-router/src/reasoning_parser/README.md
View file @
abb67815
...
...
@@ -325,7 +325,7 @@ classDiagram
-
`qwen3`
: Qwen3 base model (initial_in_reasoning=false)
-
`qwen3_thinking`
: Qwen3 thinking variant (initial_in_reasoning=true)
-
`kimi`
: Kimi with Unicode tokens
-
`glm45`
: GLM-4.5 parser
-
`glm45`
: GLM-4.5
/ GLM-4.6
parser
-
`step3`
: Step3 parser
-
`passthrough`
: No-op fallback parser
...
...
sgl-router/src/tool_parser/registry.rs
View file @
abb67815
...
...
@@ -180,10 +180,9 @@ impl ParserRegistry {
self
.map_model
(
"deepseek-*"
,
"pythonic"
);
// GLM models
// GLM-4 MoE uses XML-style format
self
.map_model
(
"glm-4-moe*"
,
"glm4_moe"
);
self
.map_model
(
"THUDM/glm-4-moe*"
,
"glm4_moe"
);
// GLM-4.5 and GLM-4.6 uses XML-style format
self
.map_model
(
"glm-4.5*"
,
"glm4_moe"
);
self
.map_model
(
"glm-4.6*"
,
"glm4_moe"
);
// Other GLM models may use JSON
self
.map_model
(
"glm-*"
,
"json"
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment