Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
947939c7
Unverified
Commit
947939c7
authored
Mar 13, 2026
by
Biswa Panda
Committed by
GitHub
Mar 13, 2026
Browse files
fix: populate logprobs bytes and token fields in OpenAI-compatible responses (#6953)
parent
1d509252
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
136 additions
and
66 deletions
+136
-66
components/src/dynamo/vllm/handlers.py
components/src/dynamo/vllm/handlers.py
+15
-7
lib/llm/src/protocols/common/llm_backend.rs
lib/llm/src/protocols/common/llm_backend.rs
+2
-0
lib/llm/src/protocols/openai.rs
lib/llm/src/protocols/openai.rs
+44
-0
lib/llm/src/protocols/openai/chat_completions/aggregator.rs
lib/llm/src/protocols/openai/chat_completions/aggregator.rs
+14
-10
lib/llm/src/protocols/openai/chat_completions/delta.rs
lib/llm/src/protocols/openai/chat_completions/delta.rs
+8
-25
lib/llm/src/protocols/openai/completions/delta.rs
lib/llm/src/protocols/openai/completions/delta.rs
+6
-24
tests/utils/payloads.py
tests/utils/payloads.py
+47
-0
No files found.
components/src/dynamo/vllm/handlers.py
View file @
947939c7
...
...
@@ -1107,7 +1107,7 @@ class BaseWorkerHandler(ABC):
@
staticmethod
def
_extract_logprobs
(
output
,
num_output_tokens_so_far
:
int
output
,
num_output_tokens_so_far
:
int
,
tokenizer
=
None
)
->
tuple
[
list
[
float
]
|
None
,
list
[
list
[
dict
]]
|
None
]:
"""
Extract logprobs from vLLM CompletionOutput for new tokens.
...
...
@@ -1115,6 +1115,8 @@ class BaseWorkerHandler(ABC):
Args:
output: vLLM CompletionOutput object
num_output_tokens_so_far: Number of tokens already processed
tokenizer: Optional tokenizer for decoding token IDs when
decoded_token is not populated by the engine
Returns:
Tuple of (log_probs, top_logprobs) in Dynamo's expected format:
...
...
@@ -1147,18 +1149,23 @@ class BaseWorkerHandler(ABC):
# Build top_logprobs list for this token position
token_top_logprobs
=
[]
for
tok_id
,
logprob_info
in
token_logprobs_dict
.
items
():
token_str
=
getattr
(
logprob_info
,
"decoded_token"
,
None
)
if
not
token_str
and
tokenizer
:
try
:
token_str
=
tokenizer
.
decode
([
tok_id
])
except
Exception
:
token_str
=
None
token_top_logprobs
.
append
(
{
"rank"
:
(
logprob_info
.
rank
if
hasattr
(
logprob_info
,
"rank"
)
else
0
),
"token_id"
:
tok_id
,
"token"
:
(
logprob_info
.
decoded_token
if
hasattr
(
logprob_info
,
"decoded_token"
)
else
None
),
"token"
:
token_str
,
"logprob"
:
float
(
logprob_info
.
logprob
),
"bytes"
:
(
list
(
token_str
.
encode
(
"utf-8"
))
if
token_str
else
None
),
}
)
top_logprobs
.
append
(
token_top_logprobs
)
...
...
@@ -1250,8 +1257,9 @@ class BaseWorkerHandler(ABC):
out
=
{
"token_ids"
:
output
.
token_ids
[
num_output_tokens_so_far
:]}
# Extract logprobs for new tokens if available
tokenizer
=
getattr
(
self
.
engine_client
,
"tokenizer"
,
None
)
log_probs
,
top_logprobs
=
self
.
_extract_logprobs
(
output
,
num_output_tokens_so_far
output
,
num_output_tokens_so_far
,
tokenizer
=
tokenizer
)
if
log_probs
is
not
None
:
out
[
"log_probs"
]
=
log_probs
...
...
lib/llm/src/protocols/common/llm_backend.rs
View file @
947939c7
...
...
@@ -59,6 +59,8 @@ pub struct TopLogprob {
pub
token_id
:
TokenIdType
,
pub
token
:
TokenType
,
pub
logprob
:
f64
,
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
pub
bytes
:
Option
<
Vec
<
u8
>>
,
}
pub
type
TopLogprobs
=
Vec
<
Vec
<
TopLogprob
>>
;
// num_tokens x top_logprobs
...
...
lib/llm/src/protocols/openai.rs
View file @
947939c7
...
...
@@ -9,6 +9,7 @@ use super::{
common
::{
self
,
OutputOptionsProvider
,
SamplingOptionsProvider
,
StopConditionsProvider
},
};
use
crate
::
protocols
::
openai
::
common_ext
::
CommonExtProvider
;
use
crate
::
types
::
TokenIdType
;
pub
mod
chat_completions
;
pub
mod
common_ext
;
...
...
@@ -211,6 +212,49 @@ impl<T: OpenAIOutputOptionsProvider> OutputOptionsProvider for T {
}
}
/// Converts a token string to its UTF-8 byte representation for OpenAI logprobs responses.
/// Returns `None` for empty tokens (unknown/unresolved tokens from the backend).
pub
(
crate
)
fn
token_to_utf8_bytes
(
token
:
&
str
)
->
Option
<
Vec
<
u8
>>
{
if
token
.is_empty
()
{
None
}
else
{
Some
(
token
.as_bytes
()
.to_vec
())
}
}
/// Converts a list of internal backend `TopLogprob` entries into the OpenAI-compatible
/// `TopLogprobs` format. Ensures the selected token is present in the list.
pub
(
crate
)
fn
convert_backend_top_logprobs
(
top_lps
:
&
[
common
::
llm_backend
::
TopLogprob
],
selected_token
:
&
str
,
selected_token_id
:
TokenIdType
,
selected_logprob
:
f32
,
)
->
Vec
<
dynamo_async_openai
::
types
::
TopLogprobs
>
{
let
mut
found_selected
=
false
;
let
mut
result
:
Vec
<
dynamo_async_openai
::
types
::
TopLogprobs
>
=
top_lps
.iter
()
.map
(|
top_lp
|
{
let
tok
=
top_lp
.token
.clone
()
.unwrap_or_default
();
found_selected
=
found_selected
||
top_lp
.token_id
==
selected_token_id
;
let
bytes
=
top_lp
.bytes
.clone
()
.or_else
(||
token_to_utf8_bytes
(
&
tok
));
dynamo_async_openai
::
types
::
TopLogprobs
{
token
:
tok
,
logprob
:
top_lp
.logprob
as
f32
,
bytes
,
}
})
.collect
();
if
!
found_selected
{
result
.push
(
dynamo_async_openai
::
types
::
TopLogprobs
{
token
:
selected_token
.to_string
(),
logprob
:
selected_logprob
,
bytes
:
token_to_utf8_bytes
(
selected_token
),
});
}
result
}
pub
trait
DeltaGeneratorExt
<
ResponseType
:
Send
+
'static
+
std
::
fmt
::
Debug
>
:
Send
+
'static
{
...
...
lib/llm/src/protocols/openai/chat_completions/aggregator.rs
View file @
947939c7
...
...
@@ -381,6 +381,7 @@ impl ChatCompletionAggregator for dynamo_async_openai::types::CreateChatCompleti
mod
tests
{
use
super
::
*
;
use
crate
::
protocols
::
openai
::
token_to_utf8_bytes
;
use
futures
::
stream
;
#[allow(deprecated)]
...
...
@@ -421,16 +422,19 @@ mod tests {
refusal
:
None
,
reasoning_content
:
None
,
};
let
logprobs
=
logprob
.map
(|
lp
|
dynamo_async_openai
::
types
::
ChatChoiceLogprobs
{
content
:
Some
(
vec!
[
dynamo_async_openai
::
types
::
ChatCompletionTokenLogprob
{
token
:
text
.to_string
(),
logprob
:
lp
,
bytes
:
None
,
top_logprobs
:
vec!
[],
},
]),
refusal
:
None
,
let
logprobs
=
logprob
.map
(|
lp
|
{
let
token
=
text
.to_string
();
dynamo_async_openai
::
types
::
ChatChoiceLogprobs
{
content
:
Some
(
vec!
[
dynamo_async_openai
::
types
::
ChatCompletionTokenLogprob
{
token
:
token
.clone
(),
logprob
:
lp
,
bytes
:
token_to_utf8_bytes
(
&
token
),
top_logprobs
:
vec!
[],
},
]),
refusal
:
None
,
}
});
let
choice
=
dynamo_async_openai
::
types
::
ChatChoiceStream
{
index
,
...
...
lib/llm/src/protocols/openai/chat_completions/delta.rs
View file @
947939c7
...
...
@@ -8,7 +8,11 @@ use crate::{
local_model
::
runtime_config
::
ModelRuntimeConfig
,
protocols
::{
common
::{
self
,
timing
::
RequestTracker
},
openai
::
nvext
::{
NvExtProvider
,
NvExtResponse
,
TimingInfo
},
openai
::{
convert_backend_top_logprobs
,
nvext
::{
NvExtProvider
,
NvExtResponse
,
TimingInfo
},
token_to_utf8_bytes
,
},
},
types
::
TokenIdType
,
};
...
...
@@ -211,33 +215,12 @@ impl DeltaGenerator {
.zip
(
tok_lps
)
.zip
(
top_logprobs
)
.map
(|(((
t
,
tid
),
lp
),
top_lps
)|
{
let
mut
found_selected_token
=
false
;
let
mut
converted_top_lps
=
top_lps
.iter
()
.map
(|
top_lp
|
{
let
top_t
=
top_lp
.token
.clone
()
.unwrap_or_default
();
let
top_tid
=
top_lp
.token_id
;
found_selected_token
=
found_selected_token
||
top_tid
==
*
tid
;
dynamo_async_openai
::
types
::
TopLogprobs
{
token
:
top_t
,
logprob
:
top_lp
.logprob
as
f32
,
bytes
:
None
,
}
})
.collect
::
<
Vec
<
dynamo_async_openai
::
types
::
TopLogprobs
>>
();
if
!
found_selected_token
{
// If the selected token is not in the top logprobs, add it
converted_top_lps
.push
(
dynamo_async_openai
::
types
::
TopLogprobs
{
token
:
t
.clone
(),
logprob
:
lp
,
bytes
:
None
,
});
}
let
converted
=
convert_backend_top_logprobs
(
&
top_lps
,
t
,
*
tid
,
lp
);
dynamo_async_openai
::
types
::
ChatCompletionTokenLogprob
{
token
:
t
.clone
(),
logprob
:
lp
,
bytes
:
None
,
top_logprobs
:
converted
_top_lps
,
bytes
:
token_to_utf8_bytes
(
t
)
,
top_logprobs
:
converted
,
}
})
.collect
()
...
...
lib/llm/src/protocols/openai/completions/delta.rs
View file @
947939c7
...
...
@@ -7,7 +7,10 @@ use super::{NvCreateCompletionRequest, NvCreateCompletionResponse};
use
crate
::{
protocols
::{
common
::{
self
,
timing
::
RequestTracker
},
openai
::
nvext
::{
NvExtProvider
,
NvExtResponse
,
TimingInfo
},
openai
::{
convert_backend_top_logprobs
,
nvext
::{
NvExtProvider
,
NvExtResponse
,
TimingInfo
},
},
},
types
::
TokenIdType
,
};
...
...
@@ -172,29 +175,8 @@ impl DeltaGenerator {
.zip
(
tok_lps
.iter
())
.zip
(
top_logprobs
.iter
())
.map
(|(((
t
,
tid
),
lp
),
top_lps
)|
{
let
mut
found_selected_token
=
false
;
let
mut
converted_top_lps
=
top_lps
.iter
()
.map
(|
top_lp
|
{
let
top_t
=
top_lp
.token
.clone
()
.unwrap_or_default
();
let
top_tid
=
top_lp
.token_id
;
found_selected_token
=
found_selected_token
||
top_tid
==
*
tid
;
dynamo_async_openai
::
types
::
TopLogprobs
{
token
:
top_t
,
logprob
:
top_lp
.logprob
as
f32
,
bytes
:
None
,
}
})
.collect
::
<
Vec
<
dynamo_async_openai
::
types
::
TopLogprobs
>>
();
if
!
found_selected_token
{
// If the selected token is not in the top logprobs, add it
converted_top_lps
.push
(
dynamo_async_openai
::
types
::
TopLogprobs
{
token
:
t
.clone
(),
logprob
:
*
lp
,
bytes
:
None
,
});
}
serde_json
::
to_value
(
converted_top_lps
)
.unwrap
()
let
converted
=
convert_backend_top_logprobs
(
top_lps
,
t
,
*
tid
,
*
lp
);
serde_json
::
to_value
(
converted
)
.unwrap
()
})
.collect
()
});
...
...
tests/utils/payloads.py
View file @
947939c7
...
...
@@ -200,6 +200,33 @@ class ChatPayloadWithLogprobs(ChatPayload):
logprob_val
<=
0
),
f
"logprob should be <= 0, got
{
logprob_val
}
"
# Validate bytes field is populated for the selected token
assert
"bytes"
in
item
,
"Missing 'bytes' in logprobs content item"
token_str
=
item
[
"token"
]
if
token_str
:
assert
(
item
[
"bytes"
]
is
not
None
),
f
"'bytes' should be populated for non-empty token
{
token_str
!
r
}
"
assert
isinstance
(
item
[
"bytes"
],
list
),
f
"'bytes' should be a list, got
{
type
(
item
[
'bytes'
])
}
"
# Validate top_logprobs entries have token, logprob, and bytes
for
top_lp
in
item
[
"top_logprobs"
]:
assert
(
"token"
in
top_lp
),
"Missing 'token' in top_logprobs entry"
assert
(
"logprob"
in
top_lp
),
"Missing 'logprob' in top_logprobs entry"
assert
(
"bytes"
in
top_lp
),
"Missing 'bytes' in top_logprobs entry"
if
top_lp
[
"token"
]:
assert
(
top_lp
[
"bytes"
]
is
not
None
),
f
"'bytes' should be populated for top_logprob token
{
top_lp
[
'token'
]
!
r
}
"
logger
.
info
(
f
"✓ Logprobs validation passed: found
{
len
(
content_logprobs
)
}
tokens with logprobs"
)
...
...
@@ -482,6 +509,26 @@ class CompletionPayloadWithLogprobs(CompletionPayload):
logprob_val
<=
0
),
f
"logprob at index
{
i
}
should be <= 0, got
{
logprob_val
}
"
# Validate top_logprobs entries have token, logprob, and bytes when present
top_logprobs_list
=
logprobs_data
.
get
(
"top_logprobs"
,
[])
for
i
,
token_top_lps
in
enumerate
(
top_logprobs_list
):
if
not
token_top_lps
:
continue
for
top_lp
in
token_top_lps
:
assert
(
"token"
in
top_lp
),
f
"Missing 'token' in top_logprobs[
{
i
}
] entry"
assert
(
"logprob"
in
top_lp
),
f
"Missing 'logprob' in top_logprobs[
{
i
}
] entry"
assert
(
"bytes"
in
top_lp
),
f
"Missing 'bytes' in top_logprobs[
{
i
}
] entry"
if
top_lp
[
"token"
]:
assert
(
top_lp
[
"bytes"
]
is
not
None
),
f
"'bytes' should be populated for top_logprob token
{
top_lp
[
'token'
]
!
r
}
"
logger
.
info
(
f
"✓ Logprobs validation passed: found
{
len
(
token_logprobs
)
}
tokens with logprobs"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment