Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
947939c7
"lib/llm/vscode:/vscode.git/clone" did not exist on "f4f827620de9cebb2038581a1ae7dabf01173142"
Unverified
Commit
947939c7
authored
Mar 13, 2026
by
Biswa Panda
Committed by
GitHub
Mar 13, 2026
Browse files
fix: populate logprobs bytes and token fields in OpenAI-compatible responses (#6953)
parent
1d509252
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
136 additions
and
66 deletions
+136
-66
components/src/dynamo/vllm/handlers.py
components/src/dynamo/vllm/handlers.py
+15
-7
lib/llm/src/protocols/common/llm_backend.rs
lib/llm/src/protocols/common/llm_backend.rs
+2
-0
lib/llm/src/protocols/openai.rs
lib/llm/src/protocols/openai.rs
+44
-0
lib/llm/src/protocols/openai/chat_completions/aggregator.rs
lib/llm/src/protocols/openai/chat_completions/aggregator.rs
+14
-10
lib/llm/src/protocols/openai/chat_completions/delta.rs
lib/llm/src/protocols/openai/chat_completions/delta.rs
+8
-25
lib/llm/src/protocols/openai/completions/delta.rs
lib/llm/src/protocols/openai/completions/delta.rs
+6
-24
tests/utils/payloads.py
tests/utils/payloads.py
+47
-0
No files found.
components/src/dynamo/vllm/handlers.py
View file @
947939c7
...
...
@@ -1107,7 +1107,7 @@ class BaseWorkerHandler(ABC):
@
staticmethod
def
_extract_logprobs
(
output
,
num_output_tokens_so_far
:
int
output
,
num_output_tokens_so_far
:
int
,
tokenizer
=
None
)
->
tuple
[
list
[
float
]
|
None
,
list
[
list
[
dict
]]
|
None
]:
"""
Extract logprobs from vLLM CompletionOutput for new tokens.
...
...
@@ -1115,6 +1115,8 @@ class BaseWorkerHandler(ABC):
Args:
output: vLLM CompletionOutput object
num_output_tokens_so_far: Number of tokens already processed
tokenizer: Optional tokenizer for decoding token IDs when
decoded_token is not populated by the engine
Returns:
Tuple of (log_probs, top_logprobs) in Dynamo's expected format:
...
...
@@ -1147,18 +1149,23 @@ class BaseWorkerHandler(ABC):
# Build top_logprobs list for this token position
token_top_logprobs
=
[]
for
tok_id
,
logprob_info
in
token_logprobs_dict
.
items
():
token_str
=
getattr
(
logprob_info
,
"decoded_token"
,
None
)
if
not
token_str
and
tokenizer
:
try
:
token_str
=
tokenizer
.
decode
([
tok_id
])
except
Exception
:
token_str
=
None
token_top_logprobs
.
append
(
{
"rank"
:
(
logprob_info
.
rank
if
hasattr
(
logprob_info
,
"rank"
)
else
0
),
"token_id"
:
tok_id
,
"token"
:
(
logprob_info
.
decoded_token
if
hasattr
(
logprob_info
,
"decoded_token"
)
else
None
),
"token"
:
token_str
,
"logprob"
:
float
(
logprob_info
.
logprob
),
"bytes"
:
(
list
(
token_str
.
encode
(
"utf-8"
))
if
token_str
else
None
),
}
)
top_logprobs
.
append
(
token_top_logprobs
)
...
...
@@ -1250,8 +1257,9 @@ class BaseWorkerHandler(ABC):
out
=
{
"token_ids"
:
output
.
token_ids
[
num_output_tokens_so_far
:]}
# Extract logprobs for new tokens if available
tokenizer
=
getattr
(
self
.
engine_client
,
"tokenizer"
,
None
)
log_probs
,
top_logprobs
=
self
.
_extract_logprobs
(
output
,
num_output_tokens_so_far
output
,
num_output_tokens_so_far
,
tokenizer
=
tokenizer
)
if
log_probs
is
not
None
:
out
[
"log_probs"
]
=
log_probs
...
...
lib/llm/src/protocols/common/llm_backend.rs
View file @
947939c7
...
...
@@ -59,6 +59,8 @@ pub struct TopLogprob {
pub
token_id
:
TokenIdType
,
pub
token
:
TokenType
,
pub
logprob
:
f64
,
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
pub
bytes
:
Option
<
Vec
<
u8
>>
,
}
pub
type
TopLogprobs
=
Vec
<
Vec
<
TopLogprob
>>
;
// num_tokens x top_logprobs
...
...
lib/llm/src/protocols/openai.rs
View file @
947939c7
...
...
@@ -9,6 +9,7 @@ use super::{
common
::{
self
,
OutputOptionsProvider
,
SamplingOptionsProvider
,
StopConditionsProvider
},
};
use
crate
::
protocols
::
openai
::
common_ext
::
CommonExtProvider
;
use
crate
::
types
::
TokenIdType
;
pub
mod
chat_completions
;
pub
mod
common_ext
;
...
...
@@ -211,6 +212,49 @@ impl<T: OpenAIOutputOptionsProvider> OutputOptionsProvider for T {
}
}
/// Converts a token string to its UTF-8 byte representation for OpenAI logprobs responses.
/// Returns `None` for empty tokens (unknown/unresolved tokens from the backend).
pub
(
crate
)
fn
token_to_utf8_bytes
(
token
:
&
str
)
->
Option
<
Vec
<
u8
>>
{
if
token
.is_empty
()
{
None
}
else
{
Some
(
token
.as_bytes
()
.to_vec
())
}
}
/// Converts a list of internal backend `TopLogprob` entries into the OpenAI-compatible
/// `TopLogprobs` format. Ensures the selected token is present in the list.
pub
(
crate
)
fn
convert_backend_top_logprobs
(
top_lps
:
&
[
common
::
llm_backend
::
TopLogprob
],
selected_token
:
&
str
,
selected_token_id
:
TokenIdType
,
selected_logprob
:
f32
,
)
->
Vec
<
dynamo_async_openai
::
types
::
TopLogprobs
>
{
let
mut
found_selected
=
false
;
let
mut
result
:
Vec
<
dynamo_async_openai
::
types
::
TopLogprobs
>
=
top_lps
.iter
()
.map
(|
top_lp
|
{
let
tok
=
top_lp
.token
.clone
()
.unwrap_or_default
();
found_selected
=
found_selected
||
top_lp
.token_id
==
selected_token_id
;
let
bytes
=
top_lp
.bytes
.clone
()
.or_else
(||
token_to_utf8_bytes
(
&
tok
));
dynamo_async_openai
::
types
::
TopLogprobs
{
token
:
tok
,
logprob
:
top_lp
.logprob
as
f32
,
bytes
,
}
})
.collect
();
if
!
found_selected
{
result
.push
(
dynamo_async_openai
::
types
::
TopLogprobs
{
token
:
selected_token
.to_string
(),
logprob
:
selected_logprob
,
bytes
:
token_to_utf8_bytes
(
selected_token
),
});
}
result
}
pub
trait
DeltaGeneratorExt
<
ResponseType
:
Send
+
'static
+
std
::
fmt
::
Debug
>
:
Send
+
'static
{
...
...
lib/llm/src/protocols/openai/chat_completions/aggregator.rs
View file @
947939c7
...
...
@@ -381,6 +381,7 @@ impl ChatCompletionAggregator for dynamo_async_openai::types::CreateChatCompleti
mod
tests
{
use
super
::
*
;
use
crate
::
protocols
::
openai
::
token_to_utf8_bytes
;
use
futures
::
stream
;
#[allow(deprecated)]
...
...
@@ -421,16 +422,19 @@ mod tests {
refusal
:
None
,
reasoning_content
:
None
,
};
let
logprobs
=
logprob
.map
(|
lp
|
dynamo_async_openai
::
types
::
ChatChoiceLogprobs
{
content
:
Some
(
vec!
[
dynamo_async_openai
::
types
::
ChatCompletionTokenLogprob
{
token
:
text
.to_string
(),
logprob
:
lp
,
bytes
:
None
,
top_logprobs
:
vec!
[],
},
]),
refusal
:
None
,
let
logprobs
=
logprob
.map
(|
lp
|
{
let
token
=
text
.to_string
();
dynamo_async_openai
::
types
::
ChatChoiceLogprobs
{
content
:
Some
(
vec!
[
dynamo_async_openai
::
types
::
ChatCompletionTokenLogprob
{
token
:
token
.clone
(),
logprob
:
lp
,
bytes
:
token_to_utf8_bytes
(
&
token
),
top_logprobs
:
vec!
[],
},
]),
refusal
:
None
,
}
});
let
choice
=
dynamo_async_openai
::
types
::
ChatChoiceStream
{
index
,
...
...
lib/llm/src/protocols/openai/chat_completions/delta.rs
View file @
947939c7
...
...
@@ -8,7 +8,11 @@ use crate::{
local_model
::
runtime_config
::
ModelRuntimeConfig
,
protocols
::{
common
::{
self
,
timing
::
RequestTracker
},
openai
::
nvext
::{
NvExtProvider
,
NvExtResponse
,
TimingInfo
},
openai
::{
convert_backend_top_logprobs
,
nvext
::{
NvExtProvider
,
NvExtResponse
,
TimingInfo
},
token_to_utf8_bytes
,
},
},
types
::
TokenIdType
,
};
...
...
@@ -211,33 +215,12 @@ impl DeltaGenerator {
.zip
(
tok_lps
)
.zip
(
top_logprobs
)
.map
(|(((
t
,
tid
),
lp
),
top_lps
)|
{
let
mut
found_selected_token
=
false
;
let
mut
converted_top_lps
=
top_lps
.iter
()
.map
(|
top_lp
|
{
let
top_t
=
top_lp
.token
.clone
()
.unwrap_or_default
();
let
top_tid
=
top_lp
.token_id
;
found_selected_token
=
found_selected_token
||
top_tid
==
*
tid
;
dynamo_async_openai
::
types
::
TopLogprobs
{
token
:
top_t
,
logprob
:
top_lp
.logprob
as
f32
,
bytes
:
None
,
}
})
.collect
::
<
Vec
<
dynamo_async_openai
::
types
::
TopLogprobs
>>
();
if
!
found_selected_token
{
// If the selected token is not in the top logprobs, add it
converted_top_lps
.push
(
dynamo_async_openai
::
types
::
TopLogprobs
{
token
:
t
.clone
(),
logprob
:
lp
,
bytes
:
None
,
});
}
let
converted
=
convert_backend_top_logprobs
(
&
top_lps
,
t
,
*
tid
,
lp
);
dynamo_async_openai
::
types
::
ChatCompletionTokenLogprob
{
token
:
t
.clone
(),
logprob
:
lp
,
bytes
:
None
,
top_logprobs
:
converted
_top_lps
,
bytes
:
token_to_utf8_bytes
(
t
)
,
top_logprobs
:
converted
,
}
})
.collect
()
...
...
lib/llm/src/protocols/openai/completions/delta.rs
View file @
947939c7
...
...
@@ -7,7 +7,10 @@ use super::{NvCreateCompletionRequest, NvCreateCompletionResponse};
use
crate
::{
protocols
::{
common
::{
self
,
timing
::
RequestTracker
},
openai
::
nvext
::{
NvExtProvider
,
NvExtResponse
,
TimingInfo
},
openai
::{
convert_backend_top_logprobs
,
nvext
::{
NvExtProvider
,
NvExtResponse
,
TimingInfo
},
},
},
types
::
TokenIdType
,
};
...
...
@@ -172,29 +175,8 @@ impl DeltaGenerator {
.zip
(
tok_lps
.iter
())
.zip
(
top_logprobs
.iter
())
.map
(|(((
t
,
tid
),
lp
),
top_lps
)|
{
let
mut
found_selected_token
=
false
;
let
mut
converted_top_lps
=
top_lps
.iter
()
.map
(|
top_lp
|
{
let
top_t
=
top_lp
.token
.clone
()
.unwrap_or_default
();
let
top_tid
=
top_lp
.token_id
;
found_selected_token
=
found_selected_token
||
top_tid
==
*
tid
;
dynamo_async_openai
::
types
::
TopLogprobs
{
token
:
top_t
,
logprob
:
top_lp
.logprob
as
f32
,
bytes
:
None
,
}
})
.collect
::
<
Vec
<
dynamo_async_openai
::
types
::
TopLogprobs
>>
();
if
!
found_selected_token
{
// If the selected token is not in the top logprobs, add it
converted_top_lps
.push
(
dynamo_async_openai
::
types
::
TopLogprobs
{
token
:
t
.clone
(),
logprob
:
*
lp
,
bytes
:
None
,
});
}
serde_json
::
to_value
(
converted_top_lps
)
.unwrap
()
let
converted
=
convert_backend_top_logprobs
(
top_lps
,
t
,
*
tid
,
*
lp
);
serde_json
::
to_value
(
converted
)
.unwrap
()
})
.collect
()
});
...
...
tests/utils/payloads.py
View file @
947939c7
...
...
@@ -200,6 +200,33 @@ class ChatPayloadWithLogprobs(ChatPayload):
logprob_val
<=
0
),
f
"logprob should be <= 0, got
{
logprob_val
}
"
# Validate bytes field is populated for the selected token
assert
"bytes"
in
item
,
"Missing 'bytes' in logprobs content item"
token_str
=
item
[
"token"
]
if
token_str
:
assert
(
item
[
"bytes"
]
is
not
None
),
f
"'bytes' should be populated for non-empty token
{
token_str
!
r
}
"
assert
isinstance
(
item
[
"bytes"
],
list
),
f
"'bytes' should be a list, got
{
type
(
item
[
'bytes'
])
}
"
# Validate top_logprobs entries have token, logprob, and bytes
for
top_lp
in
item
[
"top_logprobs"
]:
assert
(
"token"
in
top_lp
),
"Missing 'token' in top_logprobs entry"
assert
(
"logprob"
in
top_lp
),
"Missing 'logprob' in top_logprobs entry"
assert
(
"bytes"
in
top_lp
),
"Missing 'bytes' in top_logprobs entry"
if
top_lp
[
"token"
]:
assert
(
top_lp
[
"bytes"
]
is
not
None
),
f
"'bytes' should be populated for top_logprob token
{
top_lp
[
'token'
]
!
r
}
"
logger
.
info
(
f
"✓ Logprobs validation passed: found
{
len
(
content_logprobs
)
}
tokens with logprobs"
)
...
...
@@ -482,6 +509,26 @@ class CompletionPayloadWithLogprobs(CompletionPayload):
logprob_val
<=
0
),
f
"logprob at index
{
i
}
should be <= 0, got
{
logprob_val
}
"
# Validate top_logprobs entries have token, logprob, and bytes when present
top_logprobs_list
=
logprobs_data
.
get
(
"top_logprobs"
,
[])
for
i
,
token_top_lps
in
enumerate
(
top_logprobs_list
):
if
not
token_top_lps
:
continue
for
top_lp
in
token_top_lps
:
assert
(
"token"
in
top_lp
),
f
"Missing 'token' in top_logprobs[
{
i
}
] entry"
assert
(
"logprob"
in
top_lp
),
f
"Missing 'logprob' in top_logprobs[
{
i
}
] entry"
assert
(
"bytes"
in
top_lp
),
f
"Missing 'bytes' in top_logprobs[
{
i
}
] entry"
if
top_lp
[
"token"
]:
assert
(
top_lp
[
"bytes"
]
is
not
None
),
f
"'bytes' should be populated for top_logprob token
{
top_lp
[
'token'
]
!
r
}
"
logger
.
info
(
f
"✓ Logprobs validation passed: found
{
len
(
token_logprobs
)
}
tokens with logprobs"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment