Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
e1bb2fd5
Unverified
Commit
e1bb2fd5
authored
Apr 18, 2024
by
James Whedbee
Committed by
GitHub
Apr 18, 2024
Browse files
[Bugfix] Support logprobs when using guided_json and other constrained decoding fields (#4149)
parent
705578ae
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
33 additions
and
1 deletion
+33
-1
tests/entrypoints/test_openai_server.py
tests/entrypoints/test_openai_server.py
+30
-0
vllm/entrypoints/openai/serving_engine.py
vllm/entrypoints/openai/serving_engine.py
+3
-1
No files found.
tests/entrypoints/test_openai_server.py
View file @
e1bb2fd5
...
@@ -723,6 +723,36 @@ async def test_guided_decoding_type_error(server, client: openai.AsyncOpenAI,
...
@@ -723,6 +723,36 @@ async def test_guided_decoding_type_error(server, client: openai.AsyncOpenAI,
extra_body
=
dict
(
guided_regex
=
TEST_REGEX
,
guided_json
=
TEST_SCHEMA
))
extra_body
=
dict
(
guided_regex
=
TEST_REGEX
,
guided_json
=
TEST_SCHEMA
))
@
pytest
.
mark
.
parametrize
(
"guided_decoding_backend"
,
[
"outlines"
,
"lm-format-enforcer"
])
async
def
test_guided_choice_chat_logprobs
(
server
,
client
:
openai
.
AsyncOpenAI
,
guided_decoding_backend
:
str
):
messages
=
[{
"role"
:
"system"
,
"content"
:
"you are a helpful assistant"
},
{
"role"
:
"user"
,
"content"
:
"The best language for type-safe systems programming is "
}]
chat_completion
=
await
client
.
chat
.
completions
.
create
(
model
=
MODEL_NAME
,
messages
=
messages
,
max_tokens
=
10
,
logprobs
=
True
,
top_logprobs
=
5
,
extra_body
=
dict
(
guided_choice
=
TEST_CHOICE
,
guided_decoding_backend
=
guided_decoding_backend
))
top_logprobs
=
chat_completion
.
choices
[
0
].
logprobs
.
top_logprobs
# -9999.0 is the minimum logprob returned by OpenAI
assert
all
(
isinstance
(
logprob
,
float
)
and
logprob
>=
-
9999.0
for
token_dict
in
top_logprobs
for
token
,
logprob
in
token_dict
.
items
())
async
def
test_response_format_json_object
(
server
,
client
:
openai
.
AsyncOpenAI
):
async
def
test_response_format_json_object
(
server
,
client
:
openai
.
AsyncOpenAI
):
resp
=
await
client
.
chat
.
completions
.
create
(
resp
=
await
client
.
chat
.
completions
.
create
(
model
=
MODEL_NAME
,
model
=
MODEL_NAME
,
...
...
vllm/entrypoints/openai/serving_engine.py
View file @
e1bb2fd5
...
@@ -116,7 +116,9 @@ class OpenAIServing:
...
@@ -116,7 +116,9 @@ class OpenAIServing:
if
num_output_top_logprobs
:
if
num_output_top_logprobs
:
logprobs
.
top_logprobs
.
append
({
logprobs
.
top_logprobs
.
append
({
p
.
decoded_token
:
p
.
logprob
# Convert float("-inf") to the
# JSON-serializable float that OpenAI uses
p
.
decoded_token
:
max
(
p
.
logprob
,
-
9999.0
)
for
i
,
p
in
step_top_logprobs
.
items
()
for
i
,
p
in
step_top_logprobs
.
items
()
}
if
step_top_logprobs
else
None
)
}
if
step_top_logprobs
else
None
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment