Unverified Commit 08a3763a authored by ishandhanani's avatar ishandhanani Committed by GitHub
Browse files

fix: `skip-tokenizer-init` by default in sglang (#2595)

parent 57728909
......@@ -128,6 +128,13 @@ def parse_args(args: list[str]) -> Config:
server_args = ServerArgs.from_cli_args(parsed_args)
if not server_args.skip_tokenizer_init:
logging.warning(
"When using the dynamo frontend (python3 -m dynamo.frontend), we perform tokenization and detokenization "
"in the frontend. Automatically setting --skip-tokenizer-init to True."
)
server_args.skip_tokenizer_init = True
return Config(server_args, dynamo_args)
......
......@@ -102,7 +102,14 @@ class DecodeWorkerHandler(BaseWorkerHandler):
if finish_reason:
out = {"token_ids": [], "finish_reason": finish_reason["type"]}
else:
next_total_toks = len(res["output_ids"])
try:
next_total_toks = len(res["output_ids"])
except KeyError:
raise ValueError(
f"Missing 'output_ids' in response. This often happens when using skip_tokenizer_init=True. "
f"If you're using ModelType.CHAT or custom model configurations, you may need to modify "
f"the tokenization/detokenization logic in your handler. Response keys: {list(res.keys())}"
)
out = {"token_ids": res["output_ids"][num_output_tokens_so_far:]}
num_output_tokens_so_far = next_total_toks
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment