Unverified Commit 7275d496 authored by Ryan McCormick's avatar Ryan McCormick Committed by GitHub
Browse files

feat: Add ignore_eos/nvext support for legacy completions (#1080)

parent b813befa
...@@ -174,5 +174,18 @@ class Processor(ChatProcessorMixin): ...@@ -174,5 +174,18 @@ class Processor(ChatProcessorMixin):
@dynamo_endpoint(name="completions") @dynamo_endpoint(name="completions")
async def completions(self, raw_request: DynamoTRTLLMCompletionRequest): async def completions(self, raw_request: DynamoTRTLLMCompletionRequest):
# min_tokens isn't currently propagated through the Rust OpenAI HTTP frontend,
# and ignore_eos is passed through the 'nvext' field, so set both when found.
if raw_request.nvext:
ignore_eos = raw_request.nvext.get("ignore_eos")
raw_request.ignore_eos = ignore_eos
# If ignore_eos is True, set min_tokens to max_tokens to guarantee
# the full expected OSL for consistent benchmarking purposes.
if ignore_eos:
logger.debug(
f"[preprocessor] `ignore_eos` detected, setting `min_tokens` to `max_tokens`: {raw_request.max_tokens}"
)
raw_request.min_tokens = raw_request.max_tokens
async for response in self._generate(raw_request, RequestType.COMPLETION): async for response in self._generate(raw_request, RequestType.COMPLETION):
yield response yield response
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment