Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
dd445a41
Unverified
Commit
dd445a41
authored
Jul 09, 2025
by
kyleliang-nv
Committed by
GitHub
Jul 09, 2025
Browse files
[feature] Add start step profile argument in /start_profile (#7608)
parent
7590f522
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
142 additions
and
1 deletion
+142
-1
python/sglang/srt/entrypoints/http_server.py
python/sglang/srt/entrypoints/http_server.py
+1
-0
python/sglang/srt/managers/io_struct.py
python/sglang/srt/managers/io_struct.py
+2
-0
python/sglang/srt/managers/scheduler.py
python/sglang/srt/managers/scheduler.py
+21
-1
python/sglang/srt/managers/tokenizer_manager.py
python/sglang/srt/managers/tokenizer_manager.py
+2
-0
test/srt/run_suite.py
test/srt/run_suite.py
+1
-0
test/srt/test_start_profile.py
test/srt/test_start_profile.py
+115
-0
No files found.
python/sglang/srt/entrypoints/http_server.py
View file @
dd445a41
...
...
@@ -418,6 +418,7 @@ async def start_profile_async(obj: Optional[ProfileReqInput] = None):
await
_global_state
.
tokenizer_manager
.
start_profile
(
output_dir
=
obj
.
output_dir
,
start_step
=
obj
.
start_step
,
num_steps
=
obj
.
num_steps
,
activities
=
obj
.
activities
,
with_stack
=
obj
.
with_stack
,
...
...
python/sglang/srt/managers/io_struct.py
View file @
dd445a41
...
...
@@ -905,6 +905,7 @@ class ProfileReqInput:
# If set, it profile as many as this number of steps.
# If it is set, profiling is automatically stopped after this step, and
# the caller doesn't need to run stop_profile.
start_step
:
Optional
[
int
]
=
None
num_steps
:
Optional
[
int
]
=
None
activities
:
Optional
[
List
[
str
]]
=
None
profile_by_stage
:
bool
=
False
...
...
@@ -932,6 +933,7 @@ class ExpertDistributionReqOutput:
class
ProfileReq
:
type
:
ProfileReqType
output_dir
:
Optional
[
str
]
=
None
start_step
:
Optional
[
int
]
=
None
num_steps
:
Optional
[
int
]
=
None
activities
:
Optional
[
List
[
str
]]
=
None
profile_by_stage
:
bool
=
False
...
...
python/sglang/srt/managers/scheduler.py
View file @
dd445a41
...
...
@@ -485,6 +485,8 @@ class Scheduler(
enable
=
server_args
.
enable_memory_saver
)
self
.
init_profier
()
# Init metrics stats
self
.
init_metrics
()
self
.
init_kv_events
(
server_args
.
kv_events_config
)
...
...
@@ -628,6 +630,7 @@ class Scheduler(
self
.
torch_profiler_output_dir
:
Optional
[
str
]
=
None
self
.
profiler_activities
:
Optional
[
List
[
str
]]
=
None
self
.
profile_id
:
Optional
[
str
]
=
None
self
.
profiler_start_forward_ct
:
Optional
[
int
]
=
None
self
.
profiler_target_forward_ct
:
Optional
[
int
]
=
None
self
.
profiler_target_prefill_ct
:
Optional
[
int
]
=
None
self
.
profiler_target_decode_ct
:
Optional
[
int
]
=
None
...
...
@@ -2389,9 +2392,10 @@ class Scheduler(
def
profile
(
self
,
recv_req
:
ProfileReq
):
if
recv_req
.
type
==
ProfileReqType
.
START_PROFILE
:
if
recv_req
.
profile_by_stage
:
if
recv_req
.
profile_by_stage
or
recv_req
.
start_step
:
return
self
.
init_profile
(
recv_req
.
output_dir
,
recv_req
.
start_step
,
recv_req
.
num_steps
,
recv_req
.
activities
,
recv_req
.
with_stack
,
...
...
@@ -2402,6 +2406,7 @@ class Scheduler(
else
:
self
.
init_profile
(
recv_req
.
output_dir
,
recv_req
.
start_step
,
recv_req
.
num_steps
,
recv_req
.
activities
,
recv_req
.
with_stack
,
...
...
@@ -2416,6 +2421,7 @@ class Scheduler(
def
init_profile
(
self
,
output_dir
:
Optional
[
str
],
start_step
:
Optional
[
int
],
num_steps
:
Optional
[
int
],
activities
:
Optional
[
List
[
str
]],
with_stack
:
Optional
[
bool
],
...
...
@@ -2442,6 +2448,9 @@ class Scheduler(
self
.
profiler_activities
=
activities
self
.
profile_id
=
profile_id
if
start_step
:
self
.
profiler_start_forward_ct
=
max
(
start_step
,
self
.
forward_ct
+
1
)
if
num_steps
:
self
.
profile_steps
=
num_steps
if
self
.
profile_by_stage
:
...
...
@@ -2449,6 +2458,10 @@ class Scheduler(
self
.
profiler_target_decode_ct
=
num_steps
self
.
profiler_prefill_ct
=
0
self
.
profiler_decode_ct
=
0
elif
start_step
:
self
.
profiler_target_forward_ct
=
(
self
.
profiler_start_forward_ct
+
num_steps
)
else
:
self
.
profiler_target_forward_ct
=
self
.
forward_ct
+
num_steps
# The caller will be notified when reaching profiler_target_forward_ct
...
...
@@ -2521,6 +2534,7 @@ class Scheduler(
if
"CUDA_PROFILER"
in
activities
:
torch
.
cuda
.
cudart
().
cudaProfilerStart
()
self
.
profile_in_progress
=
True
return
ProfileReqOutput
(
success
=
True
,
message
=
"Succeeded"
)
...
...
@@ -2584,6 +2598,7 @@ class Scheduler(
)
self
.
torch_profiler
=
None
self
.
profile_in_progress
=
False
self
.
profiler_start_forward_ct
=
None
return
ProfileReqOutput
(
success
=
True
,
message
=
"Succeeded."
)
...
...
@@ -2617,6 +2632,11 @@ class Scheduler(
and
self
.
profiler_target_forward_ct
<=
self
.
forward_ct
):
self
.
stop_profile
()
if
(
self
.
profiler_start_forward_ct
and
self
.
profiler_start_forward_ct
==
self
.
forward_ct
):
self
.
start_profile
()
def
expert_distribution_handle
(
self
,
recv_req
:
ExpertDistributionReq
):
if
recv_req
==
ExpertDistributionReq
.
START_RECORD
:
...
...
python/sglang/srt/managers/tokenizer_manager.py
View file @
dd445a41
...
...
@@ -863,6 +863,7 @@ class TokenizerManager:
async
def
start_profile
(
self
,
output_dir
:
Optional
[
str
]
=
None
,
start_step
:
Optional
[
int
]
=
None
,
num_steps
:
Optional
[
int
]
=
None
,
activities
:
Optional
[
List
[
str
]]
=
None
,
with_stack
:
Optional
[
bool
]
=
None
,
...
...
@@ -875,6 +876,7 @@ class TokenizerManager:
req
=
ProfileReq
(
type
=
ProfileReqType
.
START_PROFILE
,
output_dir
=
output_dir
,
start_step
=
start_step
,
num_steps
=
num_steps
,
activities
=
activities
,
with_stack
=
with_stack
,
...
...
test/srt/run_suite.py
View file @
dd445a41
...
...
@@ -87,6 +87,7 @@ suites = {
TestFile
(
"test_skip_tokenizer_init.py"
,
117
),
TestFile
(
"test_srt_engine.py"
,
261
),
TestFile
(
"test_srt_endpoint.py"
,
130
),
TestFile
(
"test_start_profile.py"
,
60
),
TestFile
(
"test_torch_compile.py"
,
76
),
TestFile
(
"test_torch_compile_moe.py"
,
172
),
TestFile
(
"test_torch_native_attention_backend.py"
,
123
),
...
...
test/srt/test_start_profile.py
0 → 100644
View file @
dd445a41
"""
Usage:
python3 -m unittest test_srt_engine.TestSRTEngine.test_4_sync_async_stream_combination
"""
import
os
import
shutil
import
unittest
import
requests
from
sglang.srt.utils
import
kill_process_tree
from
sglang.test.test_utils
import
(
DEFAULT_SMALL_MODEL_NAME_FOR_TEST
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
CustomTestCase
,
popen_launch_server
,
)
OUTPUT_DIR
=
"./profiler_dir"
class
TestStartProfile
(
CustomTestCase
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
DEFAULT_SMALL_MODEL_NAME_FOR_TEST
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
)
@
classmethod
def
tearDownClass
(
cls
):
kill_process_tree
(
cls
.
process
.
pid
)
def
setUp
(
self
):
self
.
_clear_profile_dir
()
def
test_start_profile_1
(
self
):
"""Test /start_profile with start_step and num_steps argument. This have to be the first test for start_step to work"""
response
=
self
.
_start_profile
(
start_step
=
"15"
,
num_steps
=
5
)
self
.
_post_request
()
self
.
_check_non_empty_profile_dir
()
def
test_start_profile_2
(
self
):
"""Test /start_profile with no argument"""
response
=
self
.
_start_profile
()
self
.
_post_request
()
# Before /stop_profile, the profile directory should be empty
self
.
_check_empty_profile_dir
()
# Post /stop_profile and check the profile directory is non-empty
response
=
requests
.
post
(
f
"
{
DEFAULT_URL_FOR_TEST
}
/stop_profile"
,
)
self
.
_check_non_empty_profile_dir
()
def
test_start_profile_3
(
self
):
"""Test /start_profile with num_steps argument"""
response
=
self
.
_start_profile
(
num_steps
=
5
)
self
.
_post_request
()
self
.
_check_non_empty_profile_dir
()
def
_start_profile
(
self
,
**
kwargs
):
"""Start profiling with optional parameters."""
response
=
requests
.
post
(
f
"
{
DEFAULT_URL_FOR_TEST
}
/start_profile"
,
json
=
kwargs
if
kwargs
else
None
,
)
self
.
assertEqual
(
response
.
status_code
,
200
)
def
_post_request
(
self
):
response
=
requests
.
post
(
f
"
{
DEFAULT_URL_FOR_TEST
}
/generate"
,
json
=
{
"text"
:
"The capital of France is"
,
"sampling_params"
:
{
"temperature"
:
0
,
"max_new_tokens"
:
32
,
},
},
)
self
.
assertEqual
(
response
.
status_code
,
200
)
def
_clear_profile_dir
(
self
):
if
os
.
path
.
isdir
(
OUTPUT_DIR
):
# Remove the directory and all its contents
shutil
.
rmtree
(
OUTPUT_DIR
)
def
_check_non_empty_profile_dir
(
self
):
self
.
assertTrue
(
os
.
path
.
isdir
(
OUTPUT_DIR
),
"Output directory does not exist."
)
self
.
assertNotEqual
(
len
(
os
.
listdir
(
OUTPUT_DIR
)),
0
,
"Output directory is empty!"
)
def
_check_empty_profile_dir
(
self
):
if
os
.
path
.
isdir
(
OUTPUT_DIR
):
self
.
assertEqual
(
len
(
os
.
listdir
(
OUTPUT_DIR
)),
0
,
"Output directory is non-empty!"
)
if
__name__
==
"__main__"
:
os
.
environ
[
"SGLANG_TORCH_PROFILER_DIR"
]
=
OUTPUT_DIR
unittest
.
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment