Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
b879ecd6
Unverified
Commit
b879ecd6
authored
Aug 02, 2025
by
rongfu.leng
Committed by
GitHub
Aug 01, 2025
Browse files
[Bugfix] fix when skip tokenizer init (#21922)
Signed-off-by:
rongfu.leng
<
rongfu.leng@daocloud.io
>
parent
3f8e9521
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
33 additions
and
2 deletions
+33
-2
tests/v1/engine/test_llm_engine.py
tests/v1/engine/test_llm_engine.py
+26
-0
vllm/v1/engine/processor.py
vllm/v1/engine/processor.py
+7
-2
No files found.
tests/v1/engine/test_llm_engine.py
View file @
b879ecd6
...
...
@@ -213,3 +213,29 @@ def test_engine_metrics(vllm_runner, monkeypatch, example_prompts):
assert
len
(
num_accepted_tokens_per_pos
)
==
1
assert
isinstance
(
num_accepted_tokens_per_pos
[
0
],
Vector
)
assert
len
(
num_accepted_tokens_per_pos
[
0
].
values
)
==
5
@
pytest
.
mark
.
parametrize
(
"model"
,
[
"meta-llama/Llama-3.2-1B-Instruct"
])
def
test_skip_tokenizer_initialization
(
model
:
str
,
monkeypatch
:
pytest
.
MonkeyPatch
):
monkeypatch
.
setenv
(
"VLLM_USE_V1"
,
"1"
)
# This test checks if the flag skip_tokenizer_init skips the initialization
# of tokenizer and detokenizer. The generated output is expected to contain
# token ids.
llm
=
LLM
(
model
=
model
,
skip_tokenizer_init
=
True
,
enforce_eager
=
True
,
)
sampling_params
=
SamplingParams
(
prompt_logprobs
=
True
,
detokenize
=
True
)
with
pytest
.
raises
(
ValueError
,
match
=
"cannot pass text prompts when"
):
llm
.
generate
(
"abc"
,
sampling_params
)
outputs
=
llm
.
generate
({
"prompt_token_ids"
:
[
1
,
2
,
3
]},
sampling_params
=
sampling_params
)
assert
len
(
outputs
)
>
0
completions
=
outputs
[
0
].
outputs
assert
len
(
completions
)
>
0
assert
completions
[
0
].
text
==
""
assert
completions
[
0
].
token_ids
vllm/v1/engine/processor.py
View file @
b879ecd6
...
...
@@ -89,6 +89,10 @@ class Processor:
return
if
not
params
.
allowed_token_ids
:
raise
ValueError
(
"allowed_token_ids is not None and empty!"
)
if
self
.
tokenizer
is
None
:
# When skip_tokenizer_init=True, we can't validate token IDs
# Skip validation and let the model handle invalid tokens
return
tokenizer
=
self
.
tokenizer
.
get_lora_tokenizer
(
lora_request
)
vocab_size
=
len
(
tokenizer
)
if
not
all
(
0
<=
tid
<
vocab_size
for
tid
in
params
.
allowed_token_ids
):
...
...
@@ -283,6 +287,7 @@ class Processor:
len
(
decoder_inputs
[
"prompt_token_ids"
]))
sampling_params
.
update_from_generation_config
(
self
.
generation_config_fields
,
eos_token_id
)
if
self
.
tokenizer
is
not
None
:
sampling_params
.
update_from_tokenizer
(
self
.
tokenizer
.
get_lora_tokenizer
(
lora_request
))
else
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment