Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
031a7995
Unverified
Commit
031a7995
authored
Oct 31, 2024
by
Joe Runde
Committed by
GitHub
Nov 01, 2024
Browse files
[Bugfix][Frontend] Reject guided decoding in multistep mode (#9892)
Signed-off-by:
Joe Runde
<
Joseph.Runde@ibm.com
>
parent
b63c64d9
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
30 additions
and
3 deletions
+30
-3
docs/source/serving/compatibility_matrix.rst
docs/source/serving/compatibility_matrix.rst
+1
-1
tests/entrypoints/openai/test_prompt_validation.py
tests/entrypoints/openai/test_prompt_validation.py
+20
-0
vllm/engine/llm_engine.py
vllm/engine/llm_engine.py
+7
-0
vllm/sampling_params.py
vllm/sampling_params.py
+2
-2
No files found.
docs/source/serving/compatibility_matrix.rst
View file @
031a7995
...
@@ -283,7 +283,7 @@ Feature x Feature
...
@@ -283,7 +283,7 @@ Feature x Feature
- ✅
- ✅
- ✅
- ✅
- ✅
- ✅
- `✗ <https://github.com/vllm-project/vllm/issues/89
85
>`__
- `✗ <https://github.com/vllm-project/vllm/issues/
9
89
3
>`__
- ?
- ?
- ✅
- ✅
- ✅
- ✅
...
...
tests/entrypoints/openai/test_prompt_validation.py
View file @
031a7995
...
@@ -35,3 +35,23 @@ async def test_out_of_vocab_token_ids():
...
@@ -35,3 +35,23 @@ async def test_out_of_vocab_token_ids():
prompt
=
[
999999
],
prompt
=
[
999999
],
max_tokens
=
5
,
max_tokens
=
5
,
temperature
=
0.0
)
temperature
=
0.0
)
@
pytest
.
mark
.
asyncio
async
def
test_reject_multistep_with_guided_decoding
():
model_name
=
"gpt2"
server_args
=
[
"--enforce-eager"
,
"--num-scheduler-steps"
,
"8"
]
with
RemoteOpenAIServer
(
model_name
,
server_args
)
as
remote_server
:
client
=
remote_server
.
get_async_client
()
with
pytest
.
raises
(
openai
.
BadRequestError
,
match
=
re
.
compile
(
'.*Guided decoding .* multi-step decoding.*'
)):
await
client
.
completions
.
create
(
model
=
model_name
,
prompt
=
"Hello"
,
max_tokens
=
5
,
temperature
=
0.0
,
extra_body
=
{
"response_format"
:
{
"type"
:
"json_object"
}})
vllm/engine/llm_engine.py
View file @
031a7995
...
@@ -829,6 +829,13 @@ class LLMEngine:
...
@@ -829,6 +829,13 @@ class LLMEngine:
raise
ValueError
(
f
"Got priority
{
priority
}
but "
raise
ValueError
(
f
"Got priority
{
priority
}
but "
"Priority scheduling is not enabled."
)
"Priority scheduling is not enabled."
)
if
isinstance
(
params
,
SamplingParams
)
\
and
(
params
.
guided_decoding
or
params
.
logits_processors
)
\
and
self
.
scheduler_config
.
num_scheduler_steps
>
1
:
raise
ValueError
(
"Guided decoding and logits processors are not supported "
"in multi-step decoding"
)
if
arrival_time
is
None
:
if
arrival_time
is
None
:
arrival_time
=
time
.
time
()
arrival_time
=
time
.
time
()
...
...
vllm/sampling_params.py
View file @
031a7995
...
@@ -485,8 +485,8 @@ class SamplingParams(
...
@@ -485,8 +485,8 @@ class SamplingParams(
f
"skip_special_tokens=
{
self
.
skip_special_tokens
}
, "
f
"skip_special_tokens=
{
self
.
skip_special_tokens
}
, "
"spaces_between_special_tokens="
"spaces_between_special_tokens="
f
"
{
self
.
spaces_between_special_tokens
}
, "
f
"
{
self
.
spaces_between_special_tokens
}
, "
f
"truncate_prompt_tokens=
{
self
.
truncate_prompt_tokens
}
)
, "
f
"truncate_prompt_tokens=
{
self
.
truncate_prompt_tokens
}
, "
f
"guided_decoding=
{
self
.
guided_decoding
}
"
)
f
"guided_decoding=
{
self
.
guided_decoding
}
)
"
)
class
BeamSearchParams
(
class
BeamSearchParams
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment