Unverified Commit 6a62cb82 authored by Robert Shaw's avatar Robert Shaw Committed by GitHub
Browse files

[Bugfix] Fix Engine Failing After Invalid Request - AsyncEngineDeadError (#5963)


Co-authored-by: default avatarRobert Shaw <rshaw@neuralmagic>
parent 5d2a1a9c
...@@ -234,15 +234,22 @@ class ChatCompletionRequest(OpenAIBaseModel): ...@@ -234,15 +234,22 @@ class ChatCompletionRequest(OpenAIBaseModel):
logits_processors = None logits_processors = None
if self.logit_bias: if self.logit_bias:
logit_bias: Dict[int, float] = {}
try:
for token_id, bias in self.logit_bias.items():
# Convert token_id to integer before we add to LLMEngine
# Clamp the bias between -100 and 100 per OpenAI API spec
logit_bias[int(token_id)] = min(100, max(-100, bias))
except ValueError as exc:
raise ValueError(f"Found token_id `{token_id}` in logit_bias "
f"but token_id must be an integer or string "
f"representing an integer") from exc
def logit_bias_logits_processor( def logit_bias_logits_processor(
token_ids: List[int], token_ids: List[int],
logits: torch.Tensor) -> torch.Tensor: logits: torch.Tensor) -> torch.Tensor:
assert self.logit_bias is not None for token_id, bias in logit_bias.items():
for token_id, bias in self.logit_bias.items(): logits[token_id] += bias
# Clamp the bias between -100 and 100 per OpenAI API spec
bias = min(100, max(-100, bias))
logits[int(token_id)] += bias
return logits return logits
logits_processors = [logit_bias_logits_processor] logits_processors = [logit_bias_logits_processor]
...@@ -419,15 +426,22 @@ class CompletionRequest(OpenAIBaseModel): ...@@ -419,15 +426,22 @@ class CompletionRequest(OpenAIBaseModel):
logits_processors = None logits_processors = None
if self.logit_bias: if self.logit_bias:
logit_bias: Dict[int, float] = {}
try:
for token_id, bias in self.logit_bias.items():
# Convert token_id to integer
# Clamp the bias between -100 and 100 per OpenAI API spec
logit_bias[int(token_id)] = min(100, max(-100, bias))
except ValueError as exc:
raise ValueError(f"Found token_id `{token_id}` in logit_bias "
f"but token_id must be an integer or string "
f"representing an integer") from exc
def logit_bias_logits_processor( def logit_bias_logits_processor(
token_ids: List[int], token_ids: List[int],
logits: torch.Tensor) -> torch.Tensor: logits: torch.Tensor) -> torch.Tensor:
assert self.logit_bias is not None for token_id, bias in logit_bias.items():
for token_id, bias in self.logit_bias.items(): logits[token_id] += bias
# Clamp the bias between -100 and 100 per OpenAI API spec
bias = min(100, max(-100, bias))
logits[int(token_id)] += bias
return logits return logits
logits_processors = [logit_bias_logits_processor] logits_processors = [logit_bias_logits_processor]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment