Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
f5d72b2f
Unverified
Commit
f5d72b2f
authored
Oct 03, 2024
by
sroy745
Committed by
GitHub
Oct 03, 2024
Browse files
[Core] Make BlockSpaceManagerV2 the default BlockManager to use. (#8678)
parent
83caf35e
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
8 additions
and
5 deletions
+8
-5
vllm/config.py
vllm/config.py
+1
-1
vllm/engine/arg_utils.py
vllm/engine/arg_utils.py
+7
-4
No files found.
vllm/config.py
View file @
f5d72b2f
...
@@ -970,7 +970,7 @@ class SchedulerConfig:
...
@@ -970,7 +970,7 @@ class SchedulerConfig:
max_num_batched_tokens
:
Optional
[
int
],
max_num_batched_tokens
:
Optional
[
int
],
max_num_seqs
:
int
,
max_num_seqs
:
int
,
max_model_len
:
int
,
max_model_len
:
int
,
use_v2_block_manager
:
bool
=
Fals
e
,
use_v2_block_manager
:
bool
=
Tru
e
,
num_lookahead_slots
:
int
=
0
,
num_lookahead_slots
:
int
=
0
,
delay_factor
:
float
=
0.0
,
delay_factor
:
float
=
0.0
,
enable_chunked_prefill
:
bool
=
False
,
enable_chunked_prefill
:
bool
=
False
,
...
...
vllm/engine/arg_utils.py
View file @
f5d72b2f
...
@@ -107,7 +107,7 @@ class EngineArgs:
...
@@ -107,7 +107,7 @@ class EngineArgs:
block_size
:
int
=
16
block_size
:
int
=
16
enable_prefix_caching
:
bool
=
False
enable_prefix_caching
:
bool
=
False
disable_sliding_window
:
bool
=
False
disable_sliding_window
:
bool
=
False
use_v2_block_manager
:
bool
=
Fals
e
use_v2_block_manager
:
bool
=
Tru
e
swap_space
:
float
=
4
# GiB
swap_space
:
float
=
4
# GiB
cpu_offload_gb
:
float
=
0
# GiB
cpu_offload_gb
:
float
=
0
# GiB
gpu_memory_utilization
:
float
=
0.90
gpu_memory_utilization
:
float
=
0.90
...
@@ -369,9 +369,12 @@ class EngineArgs:
...
@@ -369,9 +369,12 @@ class EngineArgs:
action
=
'store_true'
,
action
=
'store_true'
,
help
=
'Disables sliding window, '
help
=
'Disables sliding window, '
'capping to sliding window size'
)
'capping to sliding window size'
)
parser
.
add_argument
(
'--use-v2-block-manager'
,
parser
.
add_argument
(
'--use-v2-block-manager'
,
default
=
EngineArgs
.
use_v2_block_manager
,
action
=
'store_true'
,
action
=
'store_true'
,
help
=
'Use BlockSpaceMangerV2.'
)
help
=
'Use BlockSpaceMangerV2. By default this is set to True. '
'Set to False to use BlockSpaceManagerV1'
)
parser
.
add_argument
(
parser
.
add_argument
(
'--num-lookahead-slots'
,
'--num-lookahead-slots'
,
type
=
int
,
type
=
int
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment