Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
d9b4b3f0
Unverified
Commit
d9b4b3f0
authored
Nov 27, 2024
by
Ricky Xu
Committed by
GitHub
Nov 27, 2024
Browse files
[Bug][CLI] Allow users to disable prefix caching explicitly (#10724)
Signed-off-by:
rickyx
<
rickyx@anyscale.com
>
parent
278be671
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
45 additions
and
3 deletions
+45
-3
tests/engine/test_arg_utils.py
tests/engine/test_arg_utils.py
+19
-0
tests/v1/engine/test_engine_args.py
tests/v1/engine/test_engine_args.py
+19
-0
vllm/engine/arg_utils.py
vllm/engine/arg_utils.py
+7
-3
No files found.
tests/engine/test_arg_utils.py
View file @
d9b4b3f0
...
@@ -59,6 +59,25 @@ def test_compilation_config():
...
@@ -59,6 +59,25 @@ def test_compilation_config():
assert
args
.
compilation_config
.
level
==
3
assert
args
.
compilation_config
.
level
==
3
def
test_prefix_cache_default
():
parser
=
EngineArgs
.
add_cli_args
(
FlexibleArgumentParser
())
args
=
parser
.
parse_args
([])
engine_args
=
EngineArgs
.
from_cli_args
(
args
=
args
)
assert
(
not
engine_args
.
enable_prefix_caching
),
"prefix caching defaults to off."
# with flag to turn it on.
args
=
parser
.
parse_args
([
"--enable-prefix-caching"
])
engine_args
=
EngineArgs
.
from_cli_args
(
args
=
args
)
assert
engine_args
.
enable_prefix_caching
# with disable flag to turn it off.
args
=
parser
.
parse_args
([
"--no-enable-prefix-caching"
])
engine_args
=
EngineArgs
.
from_cli_args
(
args
=
args
)
assert
not
engine_args
.
enable_prefix_caching
def
test_valid_pooling_config
():
def
test_valid_pooling_config
():
parser
=
EngineArgs
.
add_cli_args
(
FlexibleArgumentParser
())
parser
=
EngineArgs
.
add_cli_args
(
FlexibleArgumentParser
())
args
=
parser
.
parse_args
([
args
=
parser
.
parse_args
([
...
...
tests/v1/engine/test_engine_args.py
View file @
d9b4b3f0
...
@@ -4,6 +4,7 @@ from vllm import envs
...
@@ -4,6 +4,7 @@ from vllm import envs
from
vllm.config
import
VllmConfig
from
vllm.config
import
VllmConfig
from
vllm.engine.arg_utils
import
EngineArgs
from
vllm.engine.arg_utils
import
EngineArgs
from
vllm.usage.usage_lib
import
UsageContext
from
vllm.usage.usage_lib
import
UsageContext
from
vllm.utils
import
FlexibleArgumentParser
if
not
envs
.
VLLM_USE_V1
:
if
not
envs
.
VLLM_USE_V1
:
pytest
.
skip
(
pytest
.
skip
(
...
@@ -12,6 +13,24 @@ if not envs.VLLM_USE_V1:
...
@@ -12,6 +13,24 @@ if not envs.VLLM_USE_V1:
)
)
def
test_prefix_caching_from_cli
():
parser
=
EngineArgs
.
add_cli_args
(
FlexibleArgumentParser
())
args
=
parser
.
parse_args
([])
engine_args
=
EngineArgs
.
from_cli_args
(
args
=
args
)
assert
(
engine_args
.
enable_prefix_caching
),
"V1 turns on prefix caching by default."
# Turn it off possible with flag.
args
=
parser
.
parse_args
([
"--no-enable-prefix-caching"
])
engine_args
=
EngineArgs
.
from_cli_args
(
args
=
args
)
assert
not
engine_args
.
enable_prefix_caching
# Turn it on with flag.
args
=
parser
.
parse_args
([
"--enable-prefix-caching"
])
engine_args
=
EngineArgs
.
from_cli_args
(
args
=
args
)
assert
engine_args
.
enable_prefix_caching
def
test_defaults
():
def
test_defaults
():
engine_args
=
EngineArgs
(
model
=
"facebook/opt-125m"
)
engine_args
=
EngineArgs
(
model
=
"facebook/opt-125m"
)
...
...
vllm/engine/arg_utils.py
View file @
d9b4b3f0
...
@@ -416,9 +416,13 @@ class EngineArgs:
...
@@ -416,9 +416,13 @@ class EngineArgs:
'tokens. This is ignored on neuron devices and '
'tokens. This is ignored on neuron devices and '
'set to max-model-len'
)
'set to max-model-len'
)
parser
.
add_argument
(
'--enable-prefix-caching'
,
parser
.
add_argument
(
action
=
'store_true'
,
"--enable-prefix-caching"
,
help
=
'Enables automatic prefix caching.'
)
action
=
argparse
.
BooleanOptionalAction
,
default
=
EngineArgs
.
enable_prefix_caching
,
help
=
"Enables automatic prefix caching. "
"Use --no-enable-prefix-caching to disable explicitly."
,
)
parser
.
add_argument
(
'--disable-sliding-window'
,
parser
.
add_argument
(
'--disable-sliding-window'
,
action
=
'store_true'
,
action
=
'store_true'
,
help
=
'Disables sliding window, '
help
=
'Disables sliding window, '
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment