Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
776dcec8
Unverified
Commit
776dcec8
authored
Mar 14, 2025
by
Russell Bryant
Committed by
GitHub
Mar 15, 2025
Browse files
Disable outlines cache by default (#14837)
parent
ccf02fcb
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
16 additions
and
1 deletion
+16
-1
vllm/envs.py
vllm/envs.py
+7
-0
vllm/model_executor/guided_decoding/outlines_logits_processors.py
...el_executor/guided_decoding/outlines_logits_processors.py
+9
-1
No files found.
vllm/envs.py
View file @
776dcec8
...
@@ -95,6 +95,7 @@ if TYPE_CHECKING:
...
@@ -95,6 +95,7 @@ if TYPE_CHECKING:
VLLM_DP_MASTER_IP
:
str
=
""
VLLM_DP_MASTER_IP
:
str
=
""
VLLM_DP_MASTER_PORT
:
int
=
0
VLLM_DP_MASTER_PORT
:
int
=
0
VLLM_MARLIN_USE_ATOMIC_ADD
:
bool
=
False
VLLM_MARLIN_USE_ATOMIC_ADD
:
bool
=
False
VLLM_V0_USE_OUTLINES_CACHE
:
bool
=
False
def
get_default_cache_root
():
def
get_default_cache_root
():
...
@@ -623,6 +624,12 @@ environment_variables: dict[str, Callable[[], Any]] = {
...
@@ -623,6 +624,12 @@ environment_variables: dict[str, Callable[[], Any]] = {
# Whether to use atomicAdd reduce in gptq/awq marlin kernel.
# Whether to use atomicAdd reduce in gptq/awq marlin kernel.
"VLLM_MARLIN_USE_ATOMIC_ADD"
:
"VLLM_MARLIN_USE_ATOMIC_ADD"
:
lambda
:
os
.
environ
.
get
(
"VLLM_MARLIN_USE_ATOMIC_ADD"
,
"0"
)
==
"1"
,
lambda
:
os
.
environ
.
get
(
"VLLM_MARLIN_USE_ATOMIC_ADD"
,
"0"
)
==
"1"
,
# Whether to turn on the outlines cache for V0
# This cache is unbounded and on disk, so it's not safe to use in
# an environment with potentially malicious users.
"VLLM_V0_USE_OUTLINES_CACHE"
:
lambda
:
os
.
environ
.
get
(
"VLLM_V0_USE_OUTLINES_CACHE"
,
"0"
)
==
"1"
,
}
}
# end-env-vars-definition
# end-env-vars-definition
...
...
vllm/model_executor/guided_decoding/outlines_logits_processors.py
View file @
776dcec8
...
@@ -24,7 +24,7 @@ from typing import Callable, DefaultDict, Dict, List, Optional, Union
...
@@ -24,7 +24,7 @@ from typing import Callable, DefaultDict, Dict, List, Optional, Union
import
numpy
as
np
import
numpy
as
np
import
torch
import
torch
from
outlines
import
grammars
from
outlines
import
grammars
from
outlines.caching
import
cache
from
outlines.caching
import
cache
,
disable_cache
from
outlines.fsm.guide
import
(
CFGGuide
,
CFGState
,
Generate
,
Guide
,
from
outlines.fsm.guide
import
(
CFGGuide
,
CFGState
,
Generate
,
Guide
,
RegexGuide
,
Write
)
RegexGuide
,
Write
)
from
outlines.fsm.parsing
import
PartialLark
from
outlines.fsm.parsing
import
PartialLark
...
@@ -32,12 +32,20 @@ from outlines_core.fsm.json_schema import build_regex_from_schema
...
@@ -32,12 +32,20 @@ from outlines_core.fsm.json_schema import build_regex_from_schema
from
pydantic
import
BaseModel
from
pydantic
import
BaseModel
from
transformers
import
PreTrainedTokenizerBase
from
transformers
import
PreTrainedTokenizerBase
import
vllm.envs
as
envs
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.model_executor.guided_decoding.reasoner
import
Reasoner
from
vllm.model_executor.guided_decoding.reasoner
import
Reasoner
from
vllm.platforms
import
current_platform
from
vllm.platforms
import
current_platform
logger
=
init_logger
(
__name__
)
logger
=
init_logger
(
__name__
)
if
envs
.
VLLM_V0_USE_OUTLINES_CACHE
:
logger
.
warning
(
"Enabling outlines cache. This is an unbounded on-disk "
"cache. It may consume a lot of disk space and should "
"not be used with untrusted clients."
)
else
:
disable_cache
()
class
BaseLogitsProcessor
:
class
BaseLogitsProcessor
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment