Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
303d4479
Unverified
Commit
303d4479
authored
Oct 03, 2024
by
Michael Goin
Committed by
GitHub
Oct 03, 2024
Browse files
[Misc] Enable multi-step output streaming by default (#9047)
parent
aeb37c2a
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
9 additions
and
5 deletions
+9
-5
vllm/engine/arg_utils.py
vllm/engine/arg_utils.py
+9
-5
No files found.
vllm/engine/arg_utils.py
View file @
303d4479
...
...
@@ -145,7 +145,7 @@ class EngineArgs:
max_cpu_loras
:
Optional
[
int
]
=
None
device
:
str
=
'auto'
num_scheduler_steps
:
int
=
1
multi_step_stream_outputs
:
bool
=
Fals
e
multi_step_stream_outputs
:
bool
=
Tru
e
ray_workers_use_nsight
:
bool
=
False
num_gpu_blocks_override
:
Optional
[
int
]
=
None
num_lookahead_slots
:
int
=
0
...
...
@@ -603,13 +603,17 @@ class EngineArgs:
parser
.
add_argument
(
'--multi-step-stream-outputs'
,
action
=
'store_true'
,
help
=
'If True, then multi-step will stream outputs for every step'
)
action
=
StoreBoolean
,
default
=
EngineArgs
.
multi_step_stream_outputs
,
nargs
=
"?"
,
const
=
"True"
,
help
=
'If False, then multi-step will stream outputs at the end '
'of all steps'
)
parser
.
add_argument
(
'--scheduler-delay-factor'
,
type
=
float
,
default
=
EngineArgs
.
scheduler_delay_factor
,
help
=
'Apply a delay (of delay factor multiplied by previous'
help
=
'Apply a delay (of delay factor multiplied by previous
'
'prompt latency) before scheduling next prompt.'
)
parser
.
add_argument
(
'--enable-chunked-prefill'
,
...
...
@@ -632,7 +636,7 @@ class EngineArgs:
type
=
nullable_str
,
choices
=
[
*
QUANTIZATION_METHODS
,
None
],
default
=
EngineArgs
.
speculative_model_quantization
,
help
=
'Method used to quantize the weights of speculative model.'
help
=
'Method used to quantize the weights of speculative model.
'
'If None, we first check the `quantization_config` '
'attribute in the model config file. If that is '
'None, we assume the model weights are not '
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment