f"logprob_start_len, ({req.logprob_start_len}) is higher than the number of input tokens ({len(req.origin_input_ids)}). Request with a lower logprob_start_len.",
HTTPStatus.BAD_REQUEST,
"BadRequestError",
)
error_msg=f"{req.logprob_start_len=} is higher than the number of input tokens {len(req.origin_input_ids)=}. Please use a smaller logprob_start_len."
req.logprob_start_len=len(req.origin_input_ids)-1
req.set_finish_with_abort(error_msg)
self._add_request_to_queue(req)
return
...
...
@@ -1061,6 +1055,10 @@ class Scheduler(
ifnotcache_hit:
req.grammar_key=key
add_to_grammar_queue=True
else:
ifvalueisINVALID_GRAMMAR_OBJ:# We hit a cached invalid grammar.
error_msg=f"Invalid grammar request with cache hit: {key=}"