Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
776dcec8
Unverified
Commit
776dcec8
authored
Mar 14, 2025
by
Russell Bryant
Committed by
GitHub
Mar 15, 2025
Browse files
Disable outlines cache by default (#14837)
parent
ccf02fcb
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
16 additions
and
1 deletion
+16
-1
vllm/envs.py
vllm/envs.py
+7
-0
vllm/model_executor/guided_decoding/outlines_logits_processors.py
...el_executor/guided_decoding/outlines_logits_processors.py
+9
-1
No files found.
vllm/envs.py
View file @
776dcec8
...
...
@@ -95,6 +95,7 @@ if TYPE_CHECKING:
VLLM_DP_MASTER_IP
:
str
=
""
VLLM_DP_MASTER_PORT
:
int
=
0
VLLM_MARLIN_USE_ATOMIC_ADD
:
bool
=
False
VLLM_V0_USE_OUTLINES_CACHE
:
bool
=
False
def
get_default_cache_root
():
...
...
@@ -623,6 +624,12 @@ environment_variables: dict[str, Callable[[], Any]] = {
# Whether to use atomicAdd reduce in gptq/awq marlin kernel.
"VLLM_MARLIN_USE_ATOMIC_ADD"
:
lambda
:
os
.
environ
.
get
(
"VLLM_MARLIN_USE_ATOMIC_ADD"
,
"0"
)
==
"1"
,
# Whether to turn on the outlines cache for V0
# This cache is unbounded and on disk, so it's not safe to use in
# an environment with potentially malicious users.
"VLLM_V0_USE_OUTLINES_CACHE"
:
lambda
:
os
.
environ
.
get
(
"VLLM_V0_USE_OUTLINES_CACHE"
,
"0"
)
==
"1"
,
}
# end-env-vars-definition
...
...
vllm/model_executor/guided_decoding/outlines_logits_processors.py
View file @
776dcec8
...
...
@@ -24,7 +24,7 @@ from typing import Callable, DefaultDict, Dict, List, Optional, Union
import
numpy
as
np
import
torch
from
outlines
import
grammars
from
outlines.caching
import
cache
from
outlines.caching
import
cache
,
disable_cache
from
outlines.fsm.guide
import
(
CFGGuide
,
CFGState
,
Generate
,
Guide
,
RegexGuide
,
Write
)
from
outlines.fsm.parsing
import
PartialLark
...
...
@@ -32,12 +32,20 @@ from outlines_core.fsm.json_schema import build_regex_from_schema
from
pydantic
import
BaseModel
from
transformers
import
PreTrainedTokenizerBase
import
vllm.envs
as
envs
from
vllm.logger
import
init_logger
from
vllm.model_executor.guided_decoding.reasoner
import
Reasoner
from
vllm.platforms
import
current_platform
logger
=
init_logger
(
__name__
)
if
envs
.
VLLM_V0_USE_OUTLINES_CACHE
:
logger
.
warning
(
"Enabling outlines cache. This is an unbounded on-disk "
"cache. It may consume a lot of disk space and should "
"not be used with untrusted clients."
)
else
:
disable_cache
()
class
BaseLogitsProcessor
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment