Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
14c18d25
Unverified
Commit
14c18d25
authored
Jun 10, 2025
by
Yudi Xue
Committed by
GitHub
Jun 10, 2025
Browse files
Frontend language separate reasoning support (#6031)
parent
90bd3e32
Changes
9
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
822 additions
and
1 deletion
+822
-1
docs/frontend/frontend_reasoning.ipynb
docs/frontend/frontend_reasoning.ipynb
+480
-0
docs/index.rst
docs/index.rst
+1
-0
python/sglang/__init__.py
python/sglang/__init__.py
+2
-0
python/sglang/api.py
python/sglang/api.py
+7
-0
python/sglang/lang/interpreter.py
python/sglang/lang/interpreter.py
+40
-1
python/sglang/lang/ir.py
python/sglang/lang/ir.py
+27
-0
test/lang/run_suite.py
test/lang/run_suite.py
+2
-0
test/lang/test_separate_reasoning.py
test/lang/test_separate_reasoning.py
+68
-0
test/lang/test_separate_reasoning_execution.py
test/lang/test_separate_reasoning_execution.py
+195
-0
No files found.
docs/frontend/frontend_reasoning.ipynb
0 → 100644
View file @
14c18d25
This diff is collapsed.
Click to expand it.
docs/index.rst
View file @
14c18d25
...
...
@@ -68,6 +68,7 @@ The core features include:
:caption: Frontend Tutorial
frontend/frontend.ipynb
frontend/frontend_reasoning.ipynb
frontend/choices_methods.md
.. toctree::
...
...
python/sglang/__init__.py
View file @
14c18d25
...
...
@@ -15,6 +15,7 @@ from sglang.api import (
get_server_info
,
image
,
select
,
separate_reasoning
,
set_default_backend
,
system
,
system_begin
,
...
...
@@ -54,6 +55,7 @@ __all__ = [
"get_server_info"
,
"image"
,
"select"
,
"separate_reasoning"
,
"set_default_backend"
,
"system"
,
"system_begin"
,
...
...
python/sglang/api.py
View file @
14c18d25
...
...
@@ -15,6 +15,7 @@ from sglang.lang.ir import (
SglRoleBegin
,
SglRoleEnd
,
SglSelect
,
SglSeparateReasoning
,
SglVideo
,
)
...
...
@@ -277,3 +278,9 @@ def assistant_begin():
def
assistant_end
():
return
SglRoleEnd
(
"assistant"
)
def
separate_reasoning
(
expr
:
Optional
[
SglExpr
]
=
None
,
model_type
:
Optional
[
str
]
=
None
):
return
SglExprList
([
expr
,
SglSeparateReasoning
(
model_type
,
expr
=
expr
)])
python/sglang/lang/interpreter.py
View file @
14c18d25
...
...
@@ -26,6 +26,7 @@ from sglang.lang.ir import (
SglRoleBegin
,
SglRoleEnd
,
SglSelect
,
SglSeparateReasoning
,
SglVariable
,
SglVarScopeBegin
,
SglVarScopeEnd
,
...
...
@@ -472,6 +473,8 @@ class StreamExecutor:
self
.
_execute_concatenate_and_append_kv_cache
(
other
)
else
:
self
.
_execute_concatenate_and_append_text
(
other
)
elif
isinstance
(
other
,
SglSeparateReasoning
):
self
.
_execute_separate_reasoning
(
other
)
else
:
raise
ValueError
(
f
"Unknown type:
{
type
(
other
)
}
"
)
...
...
@@ -724,8 +727,44 @@ class StreamExecutor:
src_rids
=
[
state
.
stream_executor
.
sid
for
state
in
expr
.
states
]
self
.
backend
.
concatenate_and_append
(
src_rids
,
self
.
sid
)
def
_execute_separate_reasoning
(
self
,
expr
:
SglSeparateReasoning
):
if
self
.
stream
:
# separate reasoning for stream is not supported
return
if
(
self
.
cur_role
==
"assistant"
and
self
.
num_api_spec_tokens
is
not
None
and
self
.
backend
.
is_chat_model
):
# Execute the stored lazy generation calls
self
.
backend
.
role_end_generate
(
self
)
from
sglang.srt.reasoning_parser
import
ReasoningParser
reasoning_parser
=
ReasoningParser
(
expr
.
model_type
)
other
=
expr
.
expr
if
not
other
:
return
elif
isinstance
(
other
,
SglGen
)
or
isinstance
(
other
,
SglSelect
):
cur_text
=
self
.
get_var
(
other
.
name
)
reasoning
,
normal_text
=
reasoning_parser
.
parse_non_stream
(
cur_text
)
reasoning_name
=
expr
.
process_name_for_reasoning
(
other
.
name
)
self
.
set_var
(
other
.
name
,
normal_text
)
self
.
set_var
(
reasoning_name
,
reasoning
)
# the variable is ready to be used
self
.
variable_event
[
reasoning_name
].
set
()
self
.
text_
=
self
.
text_
[:
self
.
cur_role_begin_pos
]
+
normal_text
elif
isinstance
(
other
,
SglExprList
):
for
x
in
other
.
expr_list
:
self
.
_execute_separate_reasoning
(
SglSeparateReasoning
(
expr
.
model_type
,
x
)
)
def
_init_var_event
(
self
,
expr
):
if
isinstance
(
expr
,
(
SglGen
,
SglSelect
,
SglVarScopeBegin
)):
if
isinstance
(
expr
,
(
SglGen
,
SglSelect
,
SglVarScopeBegin
,
SglSeparateReasoning
)
):
self
.
variable_event
[
expr
.
name
]
=
threading
.
Event
()
if
self
.
stream
:
self
.
stream_var_event
[
expr
.
name
]
=
threading
.
Event
()
...
...
python/sglang/lang/ir.py
View file @
14c18d25
...
...
@@ -606,3 +606,30 @@ class SglCommitLazy(SglExpr):
def
__repr__
(
self
):
return
"CommitLazy()"
class
SglSeparateReasoning
(
SglExpr
):
def
__init__
(
self
,
model_type
:
str
,
expr
:
SglExpr
):
super
().
__init__
()
self
.
model_type
=
model_type
self
.
expr
=
expr
self
.
name
=
None
self
.
_process_expr
(
expr
)
def
process_name_for_reasoning
(
self
,
name
):
if
not
name
:
raise
ValueError
(
"name must be provided"
)
return
f
"
{
name
}
_reasoning_content"
def
_process_expr
(
self
,
expr
):
if
isinstance
(
expr
,
SglGen
):
self
.
name
=
self
.
process_name_for_reasoning
(
expr
.
name
)
elif
isinstance
(
expr
,
SglSelect
):
self
.
name
=
self
.
process_name_for_reasoning
(
expr
.
name
)
elif
isinstance
(
expr
,
SglExprList
):
for
x
in
expr
.
expr_list
:
self
.
_process_expr
(
x
)
def
__repr__
(
self
):
return
f
"SeparateReasoning(model_type=
{
self
.
model_type
}
, name=
{
self
.
name
}
)"
test/lang/run_suite.py
View file @
14c18d25
...
...
@@ -8,6 +8,8 @@ suites = {
TestFile
(
"test_srt_backend.py"
),
# Skip this due to some OPENAI_API_KEY issues
# "test_openai_backend.py",
TestFile
(
"test_separate_reasoning.py"
),
TestFile
(
"test_separate_reasoning_execution.py"
),
],
}
...
...
test/lang/test_separate_reasoning.py
0 → 100644
View file @
14c18d25
"""
Tests for the separate_reasoning functionality in sglang.
Usage:
python3 -m unittest test/lang/test_separate_reasoning.py
"""
import
unittest
from
sglang
import
assistant
,
gen
,
separate_reasoning
,
user
from
sglang.lang.ir
import
SglExprList
,
SglSeparateReasoning
from
sglang.test.test_utils
import
CustomTestCase
class
TestSeparateReasoning
(
CustomTestCase
):
def
test_separate_reasoning_creation
(
self
):
"""Test that SglSeparateReasoning objects are created correctly."""
# Test with valid model type and gen expression
test_gen
=
gen
(
"test"
)
expr
=
separate_reasoning
(
test_gen
,
model_type
=
"deepseek-r1"
)
self
.
assertIsInstance
(
expr
,
SglExprList
)
self
.
assertEqual
(
len
(
expr
.
expr_list
),
2
)
self
.
assertEqual
(
expr
.
expr_list
[
0
],
test_gen
)
reasoning_expr
=
expr
.
expr_list
[
1
]
self
.
assertIsInstance
(
reasoning_expr
,
SglSeparateReasoning
)
self
.
assertEqual
(
reasoning_expr
.
model_type
,
"deepseek-r1"
)
self
.
assertEqual
(
reasoning_expr
.
name
,
"test_reasoning_content"
)
# Test with another valid model type
expr
=
separate_reasoning
(
test_gen
,
model_type
=
"qwen3"
)
self
.
assertIsInstance
(
expr
,
SglExprList
)
self
.
assertEqual
(
expr
.
expr_list
[
1
].
model_type
,
"qwen3"
)
def
test_separate_reasoning_name_processing
(
self
):
"""Test that separate_reasoning correctly processes names."""
test_gen
=
gen
(
"test_var"
)
expr
=
separate_reasoning
(
test_gen
,
model_type
=
"deepseek-r1"
)
reasoning_expr
=
expr
.
expr_list
[
1
]
self
.
assertEqual
(
reasoning_expr
.
name
,
"test_var_reasoning_content"
)
# Test the process_name_for_reasoning method
self
.
assertEqual
(
reasoning_expr
.
process_name_for_reasoning
(
"another_var"
),
"another_var_reasoning_content"
,
)
def
test_separate_reasoning_repr
(
self
):
"""Test the string representation of SglSeparateReasoning."""
test_gen
=
gen
(
"test_var"
)
expr
=
separate_reasoning
(
test_gen
,
model_type
=
"deepseek-r1"
)
reasoning_expr
=
expr
.
expr_list
[
1
]
self
.
assertEqual
(
repr
(
reasoning_expr
),
"SeparateReasoning(model_type=deepseek-r1, name=test_var_reasoning_content)"
,
)
def
test_separate_reasoning_with_invalid_model_type
(
self
):
"""Test that separate_reasoning accepts any model type during creation."""
# Create with invalid model type
test_gen
=
gen
(
"test"
)
expr
=
separate_reasoning
(
test_gen
,
model_type
=
"invalid-model"
)
self
.
assertIsInstance
(
expr
,
SglExprList
)
self
.
assertEqual
(
expr
.
expr_list
[
1
].
model_type
,
"invalid-model"
)
# The actual validation happens in the ReasoningParser constructor
if
__name__
==
"__main__"
:
unittest
.
main
()
test/lang/test_separate_reasoning_execution.py
0 → 100644
View file @
14c18d25
"""
Tests for the execution of separate_reasoning functionality in sglang.
Usage:
python3 -m unittest test/lang/test_separate_reasoning_execution.py
"""
import
threading
import
time
import
unittest
from
unittest.mock
import
MagicMock
,
patch
from
sglang
import
assistant
,
gen
,
separate_reasoning
,
user
from
sglang.lang.interpreter
import
StreamExecutor
from
sglang.lang.ir
import
SglGen
,
SglSeparateReasoning
from
sglang.test.test_utils
import
CustomTestCase
# Helper function to create events that won't block program exit
def
create_daemon_event
():
event
=
threading
.
Event
()
return
event
class
MockReasoningParser
:
def
__init__
(
self
,
model_type
):
self
.
model_type
=
model_type
self
.
parse_non_stream_called
=
False
self
.
parse_stream_chunk_called
=
False
def
parse_non_stream
(
self
,
full_text
):
self
.
parse_non_stream_called
=
True
# Simulate parsing by adding a prefix to indicate reasoning
reasoning
=
f
"[REASONING from
{
self
.
model_type
}
]:
{
full_text
}
"
normal_text
=
f
"[NORMAL from
{
self
.
model_type
}
]:
{
full_text
}
"
return
reasoning
,
normal_text
def
parse_stream_chunk
(
self
,
chunk_text
):
self
.
parse_stream_chunk_called
=
True
# Simulate parsing by adding a prefix to indicate reasoning
reasoning
=
f
"[REASONING from
{
self
.
model_type
}
]:
{
chunk_text
}
"
normal_text
=
f
"[NORMAL from
{
self
.
model_type
}
]:
{
chunk_text
}
"
return
reasoning
,
normal_text
class
TestSeparateReasoningExecution
(
CustomTestCase
):
def
setUp
(
self
):
"""Set up for the test."""
super
().
setUp
()
# Store any events created during the test
self
.
events
=
[]
def
tearDown
(
self
):
"""Clean up any threads that might have been created during the test."""
super
().
tearDown
()
# Set all events to ensure any waiting threads are released
for
event
in
self
.
events
:
event
.
set
()
def
tearDown
(
self
):
super
().
tearDown
()
# wake up all threads
for
ev
in
self
.
events
:
ev
.
set
()
@
patch
(
"sglang.srt.reasoning_parser.ReasoningParser"
)
def
test_execute_separate_reasoning
(
self
,
mock_parser_class
):
"""Test that _execute_separate_reasoning correctly calls the ReasoningParser."""
# Setup mock parser
mock_parser
=
MockReasoningParser
(
"deepseek-r1"
)
mock_parser_class
.
return_value
=
mock_parser
# Create a mock backend to avoid AttributeError in __del__
mock_backend
=
MagicMock
()
# Create a StreamExecutor with necessary setup
executor
=
StreamExecutor
(
backend
=
mock_backend
,
arguments
=
{},
default_sampling_para
=
{},
chat_template
=
{
"role_map"
:
{
"user"
:
"user"
,
"assistant"
:
"assistant"
}
},
# Simple chat template
stream
=
False
,
use_thread
=
False
,
)
# Set up the executor with a variable and its value
var_name
=
"test_var"
reasoning_name
=
f
"
{
var_name
}
_reasoning_content"
var_value
=
"Test content"
executor
.
variables
=
{
var_name
:
var_value
}
# Create events and track them for cleanup
var_event
=
create_daemon_event
()
reasoning_event
=
create_daemon_event
()
self
.
events
.
extend
([
var_event
,
reasoning_event
])
executor
.
variable_event
=
{
var_name
:
var_event
,
reasoning_name
:
reasoning_event
}
executor
.
variable_event
[
var_name
].
set
()
# Mark as ready
# Set up the current role
executor
.
cur_role
=
"assistant"
executor
.
cur_role_begin_pos
=
0
executor
.
text_
=
var_value
# Create a gen expression and a separate_reasoning expression
gen_expr
=
SglGen
(
var_name
)
expr
=
SglSeparateReasoning
(
"deepseek-r1"
,
expr
=
gen_expr
)
# Execute separate_reasoning
executor
.
_execute_separate_reasoning
(
expr
)
# Verify that the parser was created with the correct model type
mock_parser_class
.
assert_called_once_with
(
"deepseek-r1"
)
# Verify that parse_non_stream was called
self
.
assertTrue
(
mock_parser
.
parse_non_stream_called
)
# Verify that the variables were updated correctly
reasoning_name
=
f
"
{
var_name
}
_reasoning_content"
self
.
assertIn
(
reasoning_name
,
executor
.
variables
)
self
.
assertEqual
(
executor
.
variables
[
reasoning_name
],
f
"[REASONING from deepseek-r1]:
{
var_value
}
"
,
)
self
.
assertEqual
(
executor
.
variables
[
var_name
],
f
"[NORMAL from deepseek-r1]:
{
var_value
}
"
)
# Verify that the variable event was set
self
.
assertIn
(
reasoning_name
,
executor
.
variable_event
)
self
.
assertTrue
(
executor
.
variable_event
[
reasoning_name
].
is_set
())
# Verify that the text was updated
self
.
assertEqual
(
executor
.
text_
,
f
"[NORMAL from deepseek-r1]:
{
var_value
}
"
)
@
patch
(
"sglang.srt.reasoning_parser.ReasoningParser"
)
def
test_reasoning_parser_integration
(
self
,
mock_parser_class
):
"""Test the integration between separate_reasoning and ReasoningParser."""
# Setup mock parsers for different model types
deepseek_parser
=
MockReasoningParser
(
"deepseek-r1"
)
qwen_parser
=
MockReasoningParser
(
"qwen3"
)
# Configure the mock to return different parsers based on model type
def
get_parser
(
model_type
):
if
model_type
==
"deepseek-r1"
:
return
deepseek_parser
elif
model_type
==
"qwen3"
:
return
qwen_parser
else
:
raise
ValueError
(
f
"Unsupported model type:
{
model_type
}
"
)
mock_parser_class
.
side_effect
=
get_parser
# Test with DeepSeek-R1 model
test_text
=
"This is a test"
reasoning
,
normal_text
=
deepseek_parser
.
parse_non_stream
(
test_text
)
self
.
assertEqual
(
reasoning
,
f
"[REASONING from deepseek-r1]:
{
test_text
}
"
)
self
.
assertEqual
(
normal_text
,
f
"[NORMAL from deepseek-r1]:
{
test_text
}
"
)
# Test with Qwen3 model
reasoning
,
normal_text
=
qwen_parser
.
parse_non_stream
(
test_text
)
self
.
assertEqual
(
reasoning
,
f
"[REASONING from qwen3]:
{
test_text
}
"
)
self
.
assertEqual
(
normal_text
,
f
"[NORMAL from qwen3]:
{
test_text
}
"
)
@
patch
(
"sglang.srt.reasoning_parser.ReasoningParser"
)
def
test_reasoning_parser_invalid_model
(
self
,
mock_parser_class
):
"""Test that ReasoningParser raises an error for invalid model types."""
# Configure the mock to raise an error for invalid model types
def
get_parser
(
model_type
):
if
model_type
in
[
"deepseek-r1"
,
"qwen3"
]:
return
MockReasoningParser
(
model_type
)
elif
model_type
is
None
:
raise
ValueError
(
"Model type must be specified"
)
else
:
raise
ValueError
(
f
"Unsupported model type:
{
model_type
}
"
)
mock_parser_class
.
side_effect
=
get_parser
with
self
.
assertRaises
(
ValueError
)
as
context
:
mock_parser_class
(
"invalid-model"
)
self
.
assertIn
(
"Unsupported model type"
,
str
(
context
.
exception
))
with
self
.
assertRaises
(
ValueError
)
as
context
:
mock_parser_class
(
None
)
self
.
assertIn
(
"Model type must be specified"
,
str
(
context
.
exception
))
if
__name__
==
"__main__"
:
unittest
.
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment