Unverified Commit d3c8180a authored by Jack Gordley's avatar Jack Gordley Committed by GitHub
Browse files

[Bugfix] Fixing max token error message for openai compatible server (#4016)

parent 62b8aebc
...@@ -206,6 +206,12 @@ class OpenAIServing: ...@@ -206,6 +206,12 @@ class OpenAIServing:
token_num = len(input_ids) token_num = len(input_ids)
if request.max_tokens is None: if request.max_tokens is None:
if token_num >= self.max_model_len:
raise ValueError(
f"This model's maximum context length is "
f"{self.max_model_len} tokens. However, you requested "
f"{token_num} tokens in the messages, "
f"Please reduce the length of the messages.", )
request.max_tokens = self.max_model_len - token_num request.max_tokens = self.max_model_len - token_num
if token_num + request.max_tokens > self.max_model_len: if token_num + request.max_tokens > self.max_model_len:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment