Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
c9db7911
"docs/vscode:/vscode.git/clone" did not exist on "ab50909d958ebd5280fb80cbda3ccb229e890a11"
Unverified
Commit
c9db7911
authored
Nov 03, 2025
by
fzyzcjy
Committed by
GitHub
Nov 02, 2025
Browse files
Super tiny fix naming in bench serving scripts (#12515)
parent
15ed27d7
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
19 additions
and
19 deletions
+19
-19
python/sglang/bench_one_batch.py
python/sglang/bench_one_batch.py
+19
-19
No files found.
python/sglang/bench_one_batch.py
View file @
c9db7911
...
...
@@ -15,7 +15,7 @@ python -m sglang.bench_one_batch --model-path meta-llama/Meta-Llama-3-8B-Instruc
export SGLANG_TORCH_PROFILER_DIR=/root/sglang/profile_log
python -m sglang.bench_one_batch --model-path meta-llama/Meta-Llama-3-8B-Instruct --batch 1 --input-len 256 --profile
## run with CUDA profiler (nsys):
nsys profile --force-overwrite=true -o bench_one_batch python -m sglang.bench_one_batch --model-path meta-llama/Meta-Llama-3-8B-Instruct --batch 1 --input-len 256 --profile --profile
r_
activities CUDA_PROFILER
nsys profile --force-overwrite=true -o bench_one_batch python -m sglang.bench_one_batch --model-path meta-llama/Meta-Llama-3-8B-Instruct --batch 1 --input-len 256 --profile --profile
-
activities CUDA_PROFILER
# Usage (correctness test):
python -m sglang.bench_one_batch --model-path TinyLlama/TinyLlama-1.1B-Chat-v0.4 --correct
...
...
@@ -98,12 +98,12 @@ profile_activities = [torch.profiler.ProfilerActivity.CPU] + [
]
def
start_profile
(
profile
r
_activities
,
profile_record_shapes
=
False
,
rank_print
=
print
):
def
start_profile
(
profile_activities
,
profile_record_shapes
=
False
,
rank_print
=
print
):
"""
Abstracted function to start profiling based on profile
r
_activities.
Abstracted function to start profiling based on profile_activities.
Returns profiler object (or None).
"""
if
"CUDA_PROFILER"
in
profile
r
_activities
:
if
"CUDA_PROFILER"
in
profile_activities
:
try
:
torch
.
cuda
.
cudart
().
cudaProfilerStart
()
rank_print
(
"CUDA Profiler started (nsys will begin capturing)"
)
...
...
@@ -112,9 +112,9 @@ def start_profile(profiler_activities, profile_record_shapes=False, rank_print=p
return
None
else
:
activities
=
[]
if
"CPU"
in
profile
r
_activities
:
if
"CPU"
in
profile_activities
:
activities
.
append
(
torch
.
profiler
.
ProfilerActivity
.
CPU
)
if
"GPU"
in
profile
r
_activities
:
if
"GPU"
in
profile_activities
:
activities
.
append
(
torch
.
profiler
.
ProfilerActivity
.
CUDA
)
if
activities
:
profiler
=
torch
.
profiler
.
profile
(
...
...
@@ -129,17 +129,17 @@ def start_profile(profiler_activities, profile_record_shapes=False, rank_print=p
def
stop_profile
(
profiler
,
profile
r
_activities
,
profile_activities
,
rank_print
=
print
,
save_trace
=
False
,
trace_filename
=
None
,
stage
=
None
,
):
"""
Abstracted function to stop profiling based on profile
r
_activities.
Abstracted function to stop profiling based on profile_activities.
Optionally saves trace results and prints completion messages.
"""
if
"CUDA_PROFILER"
in
profile
r
_activities
:
if
"CUDA_PROFILER"
in
profile_activities
:
try
:
torch
.
cuda
.
cudart
().
cudaProfilerStop
()
rank_print
(
"CUDA Profiler stopped (nsys should dump traces)"
)
...
...
@@ -156,7 +156,7 @@ def stop_profile(
rank_print
(
f
"torch profiler chrome trace
{
stage_desc
}
saved to
{
trace_filename
}
"
)
if
"CUDA_PROFILER"
in
profile
r
_activities
:
if
"CUDA_PROFILER"
in
profile_activities
:
rank_print
(
f
"CUDA profiler trace for
{
stage
}
completed"
)
...
...
@@ -174,7 +174,7 @@ class BenchArgs:
log_decode_step
:
int
=
0
profile
:
bool
=
False
profile_record_shapes
:
bool
=
False
profile
r
_activities
:
Tuple
[
str
]
=
(
"CPU"
,
"GPU"
)
profile_activities
:
Tuple
[
str
]
=
(
"CPU"
,
"GPU"
)
profile_stage
:
str
=
"all"
profile_filename_prefix
:
str
=
"profile"
...
...
@@ -211,7 +211,7 @@ class BenchArgs:
help
=
"Record tensor shapes in profiling results."
,
)
parser
.
add_argument
(
"--profile
r_
activities"
,
"--profile
-
activities"
,
type
=
str
,
nargs
=
"+"
,
default
=
[
"CPU"
,
"GPU"
],
...
...
@@ -507,7 +507,7 @@ def latency_test_run_once(
log_decode_step
,
profile
,
profile_record_shapes
,
profile
r
_activities
,
profile_activities
,
profile_filename_prefix
,
profile_stage
,
tp_rank
,
...
...
@@ -535,7 +535,7 @@ def latency_test_run_once(
enable_profile_prefill
=
profile
and
profile_stage
in
[
"all"
,
"prefill"
]
if
enable_profile_prefill
:
profiler
=
start_profile
(
profile
r
_activities
,
profile_activities
,
profile_record_shapes
=
profile_record_shapes
,
rank_print
=
rank_print
,
)
...
...
@@ -552,7 +552,7 @@ def latency_test_run_once(
)
stop_profile
(
profiler
,
profile
r
_activities
,
profile_activities
,
rank_print
=
rank_print
,
save_trace
=
True
,
trace_filename
=
trace_filename
,
...
...
@@ -575,7 +575,7 @@ def latency_test_run_once(
profiler
=
None
if
enable_profile_decode
and
i
==
profile_step_of_interest
:
profiler
=
start_profile
(
profile
r
_activities
,
profile_activities
,
profile_record_shapes
=
profile_record_shapes
,
rank_print
=
rank_print
,
)
...
...
@@ -591,7 +591,7 @@ def latency_test_run_once(
)
stop_profile
(
profiler
,
profile
r
_activities
,
profile_activities
,
rank_print
=
rank_print
,
save_trace
=
True
,
trace_filename
=
trace_filename
,
...
...
@@ -666,7 +666,7 @@ def latency_test(
log_decode_step
=
0
,
profile
=
False
,
profile_record_shapes
=
False
,
profile
r
_activities
=
(
"CPU"
,
"GPU"
),
profile_activities
=
(
"CPU"
,
"GPU"
),
profile_filename_prefix
=
""
,
profile_stage
=
"all"
,
tp_rank
=
tp_rank
,
...
...
@@ -716,7 +716,7 @@ def latency_test(
bench_args
.
log_decode_step
,
bench_args
.
profile
if
tp_rank
==
0
else
None
,
bench_args
.
profile_record_shapes
if
tp_rank
==
0
else
None
,
bench_args
.
profile
r
_activities
,
bench_args
.
profile_activities
,
bench_args
.
profile_filename_prefix
,
bench_args
.
profile_stage
,
tp_rank
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment