Unverified Commit af5ace66 authored by William Arnold's avatar William Arnold Committed by GitHub
Browse files

fix: ignore empty non-final stream chunks in decode handler (#6304)


Signed-off-by: default avatarWilliam Arnold <warnold@nvidia.com>
parent d86937f9
...@@ -235,11 +235,12 @@ class DecodeWorkerHandler(BaseWorkerHandler): ...@@ -235,11 +235,12 @@ class DecodeWorkerHandler(BaseWorkerHandler):
# With stream_output=True, output_ids contains only new tokens (disjoint) # With stream_output=True, output_ids contains only new tokens (disjoint)
output_ids = res.get("output_ids", []) output_ids = res.get("output_ids", [])
# If request is not finished yet, but there are no outputs, return an error. # Empty, non-final chunks can happen during scheduler idle ticks.
# Keep waiting for the next chunk unless cancellation was requested.
if not output_ids and not finish_reason: if not output_ids and not finish_reason:
if not context.is_stopped(): if context.is_stopped():
yield {"finish_reason": "error", "token_ids": []} break
break continue
# Pass through disjoint token segments directly # Pass through disjoint token segments directly
out["token_ids"] = output_ids out["token_ids"] = output_ids
......
...@@ -154,14 +154,21 @@ class StreamProcessor: ...@@ -154,14 +154,21 @@ class StreamProcessor:
async for res in stream_source: async for res in stream_source:
try: try:
# With stream_output=True, output_ids contains only new tokens (disjoint) # With stream_output=True, output_ids contains only new tokens (disjoint)
output_ids = res.get("output_ids", [])
finish_reason = res.get("meta_info", {}).get("finish_reason")
# Empty, non-final chunks can happen during scheduler idle ticks.
# Keep waiting for the next chunk.
if not output_ids and not finish_reason:
continue
output = { output = {
"token_ids": res["output_ids"], "token_ids": output_ids,
"text": res.get("text", ""), "text": res.get("text", ""),
"finished": False, "finished": False,
} }
# Check for finish reason # Check for finish reason
finish_reason = res.get("meta_info", {}).get("finish_reason")
if finish_reason: if finish_reason:
output.update( output.update(
{ {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment