Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
c9db7911
Unverified
Commit
c9db7911
authored
Nov 03, 2025
by
fzyzcjy
Committed by
GitHub
Nov 02, 2025
Browse files
Super tiny fix naming in bench serving scripts (#12515)
parent
15ed27d7
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
19 additions
and
19 deletions
+19
-19
python/sglang/bench_one_batch.py
python/sglang/bench_one_batch.py
+19
-19
No files found.
python/sglang/bench_one_batch.py
View file @
c9db7911
...
...
@@ -15,7 +15,7 @@ python -m sglang.bench_one_batch --model-path meta-llama/Meta-Llama-3-8B-Instruc
export SGLANG_TORCH_PROFILER_DIR=/root/sglang/profile_log
python -m sglang.bench_one_batch --model-path meta-llama/Meta-Llama-3-8B-Instruct --batch 1 --input-len 256 --profile
## run with CUDA profiler (nsys):
nsys profile --force-overwrite=true -o bench_one_batch python -m sglang.bench_one_batch --model-path meta-llama/Meta-Llama-3-8B-Instruct --batch 1 --input-len 256 --profile --profile
r_
activities CUDA_PROFILER
nsys profile --force-overwrite=true -o bench_one_batch python -m sglang.bench_one_batch --model-path meta-llama/Meta-Llama-3-8B-Instruct --batch 1 --input-len 256 --profile --profile
-
activities CUDA_PROFILER
# Usage (correctness test):
python -m sglang.bench_one_batch --model-path TinyLlama/TinyLlama-1.1B-Chat-v0.4 --correct
...
...
@@ -98,12 +98,12 @@ profile_activities = [torch.profiler.ProfilerActivity.CPU] + [
]
def
start_profile
(
profile
r
_activities
,
profile_record_shapes
=
False
,
rank_print
=
print
):
def
start_profile
(
profile_activities
,
profile_record_shapes
=
False
,
rank_print
=
print
):
"""
Abstracted function to start profiling based on profile
r
_activities.
Abstracted function to start profiling based on profile_activities.
Returns profiler object (or None).
"""
if
"CUDA_PROFILER"
in
profile
r
_activities
:
if
"CUDA_PROFILER"
in
profile_activities
:
try
:
torch
.
cuda
.
cudart
().
cudaProfilerStart
()
rank_print
(
"CUDA Profiler started (nsys will begin capturing)"
)
...
...
@@ -112,9 +112,9 @@ def start_profile(profiler_activities, profile_record_shapes=False, rank_print=p
return
None
else
:
activities
=
[]
if
"CPU"
in
profile
r
_activities
:
if
"CPU"
in
profile_activities
:
activities
.
append
(
torch
.
profiler
.
ProfilerActivity
.
CPU
)
if
"GPU"
in
profile
r
_activities
:
if
"GPU"
in
profile_activities
:
activities
.
append
(
torch
.
profiler
.
ProfilerActivity
.
CUDA
)
if
activities
:
profiler
=
torch
.
profiler
.
profile
(
...
...
@@ -129,17 +129,17 @@ def start_profile(profiler_activities, profile_record_shapes=False, rank_print=p
def
stop_profile
(
profiler
,
profile
r
_activities
,
profile_activities
,
rank_print
=
print
,
save_trace
=
False
,
trace_filename
=
None
,
stage
=
None
,
):
"""
Abstracted function to stop profiling based on profile
r
_activities.
Abstracted function to stop profiling based on profile_activities.
Optionally saves trace results and prints completion messages.
"""
if
"CUDA_PROFILER"
in
profile
r
_activities
:
if
"CUDA_PROFILER"
in
profile_activities
:
try
:
torch
.
cuda
.
cudart
().
cudaProfilerStop
()
rank_print
(
"CUDA Profiler stopped (nsys should dump traces)"
)
...
...
@@ -156,7 +156,7 @@ def stop_profile(
rank_print
(
f
"torch profiler chrome trace
{
stage_desc
}
saved to
{
trace_filename
}
"
)
if
"CUDA_PROFILER"
in
profile
r
_activities
:
if
"CUDA_PROFILER"
in
profile_activities
:
rank_print
(
f
"CUDA profiler trace for
{
stage
}
completed"
)
...
...
@@ -174,7 +174,7 @@ class BenchArgs:
log_decode_step
:
int
=
0
profile
:
bool
=
False
profile_record_shapes
:
bool
=
False
profile
r
_activities
:
Tuple
[
str
]
=
(
"CPU"
,
"GPU"
)
profile_activities
:
Tuple
[
str
]
=
(
"CPU"
,
"GPU"
)
profile_stage
:
str
=
"all"
profile_filename_prefix
:
str
=
"profile"
...
...
@@ -211,7 +211,7 @@ class BenchArgs:
help
=
"Record tensor shapes in profiling results."
,
)
parser
.
add_argument
(
"--profile
r_
activities"
,
"--profile
-
activities"
,
type
=
str
,
nargs
=
"+"
,
default
=
[
"CPU"
,
"GPU"
],
...
...
@@ -507,7 +507,7 @@ def latency_test_run_once(
log_decode_step
,
profile
,
profile_record_shapes
,
profile
r
_activities
,
profile_activities
,
profile_filename_prefix
,
profile_stage
,
tp_rank
,
...
...
@@ -535,7 +535,7 @@ def latency_test_run_once(
enable_profile_prefill
=
profile
and
profile_stage
in
[
"all"
,
"prefill"
]
if
enable_profile_prefill
:
profiler
=
start_profile
(
profile
r
_activities
,
profile_activities
,
profile_record_shapes
=
profile_record_shapes
,
rank_print
=
rank_print
,
)
...
...
@@ -552,7 +552,7 @@ def latency_test_run_once(
)
stop_profile
(
profiler
,
profile
r
_activities
,
profile_activities
,
rank_print
=
rank_print
,
save_trace
=
True
,
trace_filename
=
trace_filename
,
...
...
@@ -575,7 +575,7 @@ def latency_test_run_once(
profiler
=
None
if
enable_profile_decode
and
i
==
profile_step_of_interest
:
profiler
=
start_profile
(
profile
r
_activities
,
profile_activities
,
profile_record_shapes
=
profile_record_shapes
,
rank_print
=
rank_print
,
)
...
...
@@ -591,7 +591,7 @@ def latency_test_run_once(
)
stop_profile
(
profiler
,
profile
r
_activities
,
profile_activities
,
rank_print
=
rank_print
,
save_trace
=
True
,
trace_filename
=
trace_filename
,
...
...
@@ -666,7 +666,7 @@ def latency_test(
log_decode_step
=
0
,
profile
=
False
,
profile_record_shapes
=
False
,
profile
r
_activities
=
(
"CPU"
,
"GPU"
),
profile_activities
=
(
"CPU"
,
"GPU"
),
profile_filename_prefix
=
""
,
profile_stage
=
"all"
,
tp_rank
=
tp_rank
,
...
...
@@ -716,7 +716,7 @@ def latency_test(
bench_args
.
log_decode_step
,
bench_args
.
profile
if
tp_rank
==
0
else
None
,
bench_args
.
profile_record_shapes
if
tp_rank
==
0
else
None
,
bench_args
.
profile
r
_activities
,
bench_args
.
profile_activities
,
bench_args
.
profile_filename_prefix
,
bench_args
.
profile_stage
,
tp_rank
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment