Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
f246ee95
Commit
f246ee95
authored
Jun 23, 2025
by
zhuwenwen
Browse files
update bench run error
parent
10184690
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
13 additions
and
7 deletions
+13
-7
benchmarks/benchmark_throughput.py
benchmarks/benchmark_throughput.py
+4
-4
vllm/engine/arg_utils.py
vllm/engine/arg_utils.py
+6
-0
vllm/perf/benchmark_throughput.py
vllm/perf/benchmark_throughput.py
+3
-3
No files found.
benchmarks/benchmark_throughput.py
View file @
f246ee95
...
...
@@ -103,9 +103,11 @@ def run_vllm(
"prompt_token_ids"
:
batch
}
for
batch
in
dummy_prompt_token_ids
.
tolist
()]
use_beam_search
=
False
print
(
"Warming up..."
)
for
_
in
tqdm
(
range
(
num_iters_warmup
),
desc
=
"Warmup iterations"
):
if
not
args
.
use_beam_search
:
if
not
use_beam_search
:
llm
.
generate
(
dummy_prompts
,
sampling_params
=
sampling_params
,
use_tqdm
=
False
)
else
:
llm
.
beam_search
(
...
...
@@ -117,8 +119,6 @@ def run_vllm(
),
)
use_beam_search
=
False
outputs
=
None
if
not
use_beam_search
:
if
args
.
profile
:
...
...
@@ -790,4 +790,4 @@ if __name__ == "__main__":
if
args
.
tokenizer
is
None
:
args
.
tokenizer
=
args
.
model
validate_args
(
args
)
main
(
args
)
main
(
args
)
\ No newline at end of file
vllm/engine/arg_utils.py
View file @
f246ee95
...
...
@@ -788,6 +788,12 @@ class EngineArgs:
default
=
None
,
help
=
"The configurations for speculative decoding. Should be a "
"JSON string."
)
parser
.
add_argument
(
'--num-speculative-heads'
,
type
=
int
,
default
=
EngineArgs
.
num_speculative_heads
,
help
=
'The number of speculative heads to sample from '
'the draft model in speculative decoding.'
)
# Observability arguments
observability_kwargs
=
get_kwargs
(
ObservabilityConfig
)
...
...
vllm/perf/benchmark_throughput.py
View file @
f246ee95
...
...
@@ -103,9 +103,11 @@ def run_vllm(
"prompt_token_ids"
:
batch
}
for
batch
in
dummy_prompt_token_ids
.
tolist
()]
use_beam_search
=
False
print
(
"Warming up..."
)
for
_
in
tqdm
(
range
(
num_iters_warmup
),
desc
=
"Warmup iterations"
):
if
not
args
.
use_beam_search
:
if
not
use_beam_search
:
llm
.
generate
(
dummy_prompts
,
sampling_params
=
sampling_params
,
use_tqdm
=
False
)
else
:
llm
.
beam_search
(
...
...
@@ -117,8 +119,6 @@ def run_vllm(
),
)
use_beam_search
=
False
outputs
=
None
if
not
use_beam_search
:
if
args
.
profile
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment