Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
697ef765
Unverified
Commit
697ef765
authored
Jul 14, 2025
by
Aaron Pham
Committed by
GitHub
Jul 14, 2025
Browse files
[Refactor][V1] Move outlines utils for V1 imports (#20878)
Signed-off-by:
Aaron Pham
<
contact@aarnphm.xyz
>
parent
a99b9f7d
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
204 additions
and
5 deletions
+204
-5
vllm/v1/structured_output/backend_outlines.py
vllm/v1/structured_output/backend_outlines.py
+5
-4
vllm/v1/structured_output/utils.py
vllm/v1/structured_output/utils.py
+199
-1
No files found.
vllm/v1/structured_output/backend_outlines.py
View file @
697ef765
...
@@ -13,13 +13,14 @@ from typing import TYPE_CHECKING
...
@@ -13,13 +13,14 @@ from typing import TYPE_CHECKING
import
torch
import
torch
from
regex
import
escape
as
regex_escape
from
regex
import
escape
as
regex_escape
from
vllm.model_executor.guided_decoding.outlines_logits_processors
import
(
OutlinesVocabulary
,
get_cache
,
get_vocabulary
)
from
vllm.sampling_params
import
SamplingParams
from
vllm.sampling_params
import
SamplingParams
from
vllm.utils
import
LazyLoader
from
vllm.utils
import
LazyLoader
from
vllm.v1.structured_output.backend_types
import
(
StructuredOutputBackend
,
from
vllm.v1.structured_output.backend_types
import
(
StructuredOutputBackend
,
StructuredOutputGrammar
,
StructuredOutputGrammar
,
StructuredOutputOptions
)
StructuredOutputOptions
)
from
vllm.v1.structured_output.utils
import
(
OutlinesVocabulary
,
get_outlines_cache
,
get_outlines_vocabulary
)
if
TYPE_CHECKING
:
if
TYPE_CHECKING
:
import
outlines_core
as
oc
import
outlines_core
as
oc
...
@@ -47,8 +48,8 @@ else:
...
@@ -47,8 +48,8 @@ else:
class
OutlinesBackend
(
StructuredOutputBackend
):
class
OutlinesBackend
(
StructuredOutputBackend
):
def
__post_init__
(
self
):
def
__post_init__
(
self
):
self
.
vocabulary
=
get_vocabulary
(
self
.
tokenizer
)
self
.
vocabulary
=
get_
outlines_
vocabulary
(
self
.
tokenizer
)
self
.
cache
=
get_cache
()
self
.
cache
=
get_
outlines_
cache
()
def
_compile_index
(
self
,
regex_string
:
str
,
def
_compile_index
(
self
,
regex_string
:
str
,
vocabulary
:
OutlinesVocabulary
)
->
oc
.
Index
:
vocabulary
:
OutlinesVocabulary
)
->
oc
.
Index
:
...
...
vllm/v1/structured_output/utils.py
View file @
697ef765
...
@@ -3,7 +3,205 @@
...
@@ -3,7 +3,205 @@
from
__future__
import
annotations
from
__future__
import
annotations
import
hashlib
import
importlib.metadata
import
os
from
typing
import
TYPE_CHECKING
import
regex
as
re
import
regex
as
re
from
cachetools
import
LRUCache
from
diskcache
import
Cache
import
vllm.envs
as
envs
from
vllm.logger
import
init_logger
from
vllm.utils
import
LazyLoader
if
TYPE_CHECKING
:
import
outlines_core
as
oc
import
transformers.file_utils
as
file_utils
import
transformers.models.gpt2.tokenization_gpt2
as
tokenization_gpt2
from
vllm.transformers_utils.tokenizer
import
AnyTokenizer
else
:
oc
=
LazyLoader
(
"oc"
,
globals
(),
"outlines_core"
)
file_utils
=
LazyLoader
(
"file_utils"
,
globals
(),
"transformers.file_utils"
)
tokenization_gpt2
=
LazyLoader
(
"tokenization_gpt2"
,
globals
(),
"transformers.models.gpt2.tokenization_gpt2"
,
)
logger
=
init_logger
(
__name__
)
CACHE
=
None
class
OutlinesVocabulary
:
"""
Wrapper class for `outlines_core.Vocabulary`,
which allows us to store a hash with the vocabulary
"""
def
__init__
(
self
,
vocabulary
:
oc
.
Vocabulary
)
->
None
:
# Actual vocabulary object
self
.
inner
=
vocabulary
# Have to do abs(hash()) because python hashes can
# be negative, and we are using hash as a cache key.
hex_str
=
hashlib
.
sha256
(
vocabulary
.
__repr__
().
encode
(
'utf-8'
)).
hexdigest
()
hash_int
=
int
(
hex_str
,
16
)
self
.
_hash
=
hash_int
def
get_outlines_cache_path
()
->
str
:
"""Get the context object that contains previously-computed return values"""
outlines_cache_dir
=
os
.
getenv
(
"OUTLINES_CACHE_DIR"
)
xdg_cache_home
=
os
.
getenv
(
"XDG_CACHE_HOME"
)
home_dir
=
os
.
path
.
expanduser
(
"~"
)
if
outlines_cache_dir
:
# OUTLINES_CACHE_DIR takes precedence
return
outlines_cache_dir
elif
xdg_cache_home
:
return
os
.
path
.
join
(
xdg_cache_home
,
".cache"
,
"outlines"
)
# If homedir is "/", we may be inside a container, and thus writing to
# root would be problematic, so we fallback to using a tempfile.
# Also validate the path exists, since os.path.expanduser does
# not garuntee existence.
elif
os
.
path
.
isdir
(
home_dir
)
and
home_dir
!=
"/"
:
# Default Unix fallback: ~/.cache/outlines
return
os
.
path
.
join
(
home_dir
,
".cache"
,
"outlines"
)
else
:
import
tempfile
# home_dir may be / inside a docker container without existing user
tempdir
=
tempfile
.
gettempdir
()
return
os
.
path
.
join
(
tempdir
,
".cache"
,
"outlines"
)
def
get_outlines_cache
():
"""Get the Cache instance to be used for index caching"""
cache_dir
=
get_outlines_cache_path
()
if
envs
.
VLLM_V1_USE_OUTLINES_CACHE
:
logger
.
warning
(
"Enabling outlines cache. This is an unbounded on-disk "
"cache. It may consume a lot of disk space and should "
"not be used with untrusted clients."
)
cache
=
Cache
(
cache_dir
,
eviction_policy
=
"none"
,
cull_limit
=
0
)
outlines_version
=
importlib
.
metadata
.
version
(
"outlines_core"
)
cached_version
=
cache
.
get
(
'__version__'
,
None
)
if
cached_version
!=
outlines_version
:
cache
.
clear
()
cache
.
set
(
'__version__'
,
outlines_version
)
return
cache
else
:
return
LRUCache
(
maxsize
=
128
)
re_llama_byte_token
=
re
.
compile
(
r
"^<0x[0-9A-F]{2}>$"
)
re_replacement_seq
=
re
.
compile
(
r
"^.{0,6}�+.{0,6}$"
)
def
_reduced_vocabulary
(
tokenizer
:
AnyTokenizer
,
eos_token_id
:
int
,
)
->
dict
[
bytes
,
list
[
int
]]:
"""Create a map from vocabulary tokens to lists of equivalent token ids.
Returns:
A Dict of token string -> equivalent token ids
"""
unicode_to_bytes
=
{
v
:
k
for
k
,
v
in
tokenization_gpt2
.
bytes_to_unicode
().
items
()
}
def
convert_token_to_string
(
token
:
str
)
->
str
:
string
=
tokenizer
.
convert_tokens_to_string
([
token
])
# A hack to handle missing spaces to HF's Llama tokenizers
if
(
type
(
token
)
is
str
and
token
.
startswith
(
file_utils
.
SPIECE_UNDERLINE
)
or
token
==
"<0x20>"
):
return
" "
+
string
return
string
vocabulary
:
dict
[
bytes
,
list
[
int
]]
=
{}
empty_token_ids
:
list
[
int
]
=
[]
for
token
,
token_idx
in
tokenizer
.
get_vocab
().
items
():
if
token
in
tokenizer
.
all_special_tokens
:
# type: ignore
continue
token_str
=
convert_token_to_string
(
token
)
if
token_str
:
if
isinstance
(
token
,
(
bytes
,
bytearray
)):
# For BPE tokenizers where tokens are stored as bytes.
# safe to ignore since token_str is of type (bytearray, bytes)
# by this point.
token_bytes
=
bytes
(
token_str
)
# type: ignore[arg-type]
elif
"
\ufffd
"
in
token_str
and
not
re_replacement_seq
.
match
(
token_str
):
# Handle tokens with invalid UTF-8 sequences.
if
re_llama_byte_token
.
match
(
token
):
# Llama-like tokenizers use <0xXX> for incomplete sequences.
token_bytes
=
bytes
([
int
(
token
[
3
:
5
],
16
)])
else
:
# GPT2 tokenizers: map each byte back using unicode_to_bytes
byte_vals
=
[
unicode_to_bytes
.
get
(
c
)
for
c
in
token
]
if
None
in
byte_vals
:
raise
RuntimeError
(
f
"Cannot convert token `
{
token
}
`"
f
" (
{
token_idx
}
) to bytes:
{
token_str
}
"
)
# safe to ignore, since if None in byte_vals,
# an error is thrown.
token_bytes
=
bytes
(
byte_vals
)
# type: ignore[arg-type]
else
:
token_bytes
=
token_str
.
encode
(
'utf-8'
)
if
token_idx
!=
eos_token_id
:
vocabulary
.
setdefault
(
token_bytes
,
[]).
append
(
token_idx
)
else
:
empty_token_ids
.
append
(
token_idx
)
return
vocabulary
def
get_outlines_vocabulary
(
tokenizer
:
AnyTokenizer
)
->
oc
.
Vocabulary
:
"""Get the `Vocabulary` object for a given tokenizer.
"""
if
hasattr
(
tokenizer
,
"_outlines_vocabulary"
):
return
tokenizer
.
_outlines_vocabulary
# type: ignore
try
:
if
hasattr
(
tokenizer
,
"eos_token_id"
,
)
and
tokenizer
.
eos_token_id
is
not
None
:
eos_token_id
=
tokenizer
.
eos_token_id
else
:
raise
ValueError
(
f
"Error during structured outputs setup for outlines: Tokenizer (
{
type
(
tokenizer
)
}
) has no `eos_token_id` property, but `eos_token_id` is required for structured outputs to work properly."
# noqa: E501
)
reduced_vocab
=
_reduced_vocabulary
(
tokenizer
,
eos_token_id
#type: ignore
)
vocabulary
=
OutlinesVocabulary
(
oc
.
Vocabulary
(
eos_token_id
,
reduced_vocab
))
tokenizer
.
_outlines_vocabulary
=
vocabulary
# type: ignore
return
vocabulary
except
AttributeError
as
e
:
raise
ValueError
(
f
"Cannot get the vocabulary of the tokenizer "
f
"(
{
type
(
tokenizer
)
}
). The tokenizer should have a "
"get_vocab method."
)
from
e
def
grammar_is_likely_lark
(
grammar_str
:
str
)
->
bool
:
def
grammar_is_likely_lark
(
grammar_str
:
str
)
->
bool
:
...
@@ -77,7 +275,7 @@ def convert_lark_to_ebnf(grammar_str: str) -> str:
...
@@ -77,7 +275,7 @@ def convert_lark_to_ebnf(grammar_str: str) -> str:
raise
ValueError
(
raise
ValueError
(
f
"Mismatched quotes in
{
rule_name
}
on line
{
line_num
}
"
)
f
"Mismatched quotes in
{
rule_name
}
on line
{
line_num
}
"
)
def
extract_references
(
text
:
str
)
->
set
:
def
extract_references
(
text
:
str
)
->
set
[
str
]
:
"""Extract rule references from text."""
"""Extract rule references from text."""
# Remove quoted strings and special characters
# Remove quoted strings and special characters
text
=
re
.
sub
(
r
'"[^"]*"'
,
''
,
text
)
text
=
re
.
sub
(
r
'"[^"]*"'
,
''
,
text
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment