Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
ae2e93f8
Unverified
Commit
ae2e93f8
authored
Feb 06, 2026
by
Sumanth R Hegde
Committed by
GitHub
Feb 06, 2026
Browse files
[Fix] Fix `logprobs=0` handling for `/inference/v1/generate` endpoint (#34010)
Signed-off-by:
SumanthRH
<
sumanthrh99@gmail.com
>
parent
9e9acce5
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
29 additions
and
2 deletions
+29
-2
tests/entrypoints/openai/test_serving_tokens.py
tests/entrypoints/openai/test_serving_tokens.py
+26
-0
vllm/entrypoints/serve/disagg/serving.py
vllm/entrypoints/serve/disagg/serving.py
+3
-2
No files found.
tests/entrypoints/openai/test_serving_tokens.py
View file @
ae2e93f8
...
...
@@ -87,6 +87,32 @@ async def test_generate_endpoint(client):
assert
"choices"
in
data
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
"logprobs_value"
,
[
0
,
1
,
5
])
async
def
test_generate_logprobs
(
client
,
logprobs_value
):
payload
=
{
"model"
:
MODEL_NAME
,
"token_ids"
:
[
1
,
2
,
3
],
"sampling_params"
:
{
"max_tokens"
:
5
,
"temperature"
:
0.0
,
"logprobs"
:
logprobs_value
,
},
"stream"
:
False
,
}
resp
=
await
client
.
post
(
GEN_ENDPOINT
,
json
=
payload
)
resp
.
raise_for_status
()
data
=
resp
.
json
()
choice
=
data
[
"choices"
][
0
]
assert
choice
[
"logprobs"
]
is
not
None
logprobs_content
=
choice
[
"logprobs"
][
"content"
]
assert
len
(
logprobs_content
)
==
len
(
choice
[
"token_ids"
])
for
entry
in
logprobs_content
:
assert
"logprob"
in
entry
assert
len
(
entry
[
"top_logprobs"
])
>=
1
assert
len
(
entry
[
"top_logprobs"
])
==
max
(
logprobs_value
,
1
)
@
pytest
.
mark
.
asyncio
async
def
test_same_response_as_chat_completions
(
client
,
tokenizer
,
messages
):
token_ids
=
tokenizer
.
apply_chat_template
(
...
...
vllm/entrypoints/serve/disagg/serving.py
View file @
ae2e93f8
...
...
@@ -184,7 +184,7 @@ class ServingTokens(OpenAIServing):
out_logprobs
=
output
.
logprobs
# This is top_logprobs in completions API
if
sampling_params
.
logprobs
:
if
sampling_params
.
logprobs
is
not
None
:
assert
out_logprobs
is
not
None
,
"Did not output logprobs"
logprobs
=
self
.
_create_tokens_logprobs
(
token_ids
=
token_ids
,
...
...
@@ -284,7 +284,8 @@ class ServingTokens(OpenAIServing):
logprob
=
max
(
p
[
1
].
logprob
,
-
9999.0
),
)
for
i
,
p
in
enumerate
(
step_top_logprobs
.
items
())
if
num_output_top_logprobs
and
i
<
num_output_top_logprobs
if
num_output_top_logprobs
is
not
None
and
i
<
max
(
num_output_top_logprobs
,
1
)
],
)
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment