Unverified Commit 138ff231 authored by Jue WANG's avatar Jue WANG Committed by GitHub
Browse files

Allow to disable batch decoding. (#11944)

parent 13fb8b54
......@@ -108,6 +108,7 @@ class DetokenizerManager(MultiHttpWorkerDetokenizerMixin):
)
self.is_tool_call_parser_gpt_oss = server_args.tool_call_parser == "gpt-oss"
self.disable_tokenizer_batch_decode = server_args.disable_tokenizer_batch_decode
def event_loop(self):
"""The event loop that handles requests"""
......@@ -176,17 +177,39 @@ class DetokenizerManager(MultiHttpWorkerDetokenizerMixin):
)
surr_ids.append(s.decode_ids[s.surr_offset : s.read_offset])
# TODO(lmzheng): handle skip_special_tokens/spaces_between_special_tokens per request
surr_texts = self.tokenizer.batch_decode(
surr_ids,
skip_special_tokens=recv_obj.skip_special_tokens[0],
spaces_between_special_tokens=recv_obj.spaces_between_special_tokens[0],
)
read_texts = self.tokenizer.batch_decode(
read_ids,
skip_special_tokens=recv_obj.skip_special_tokens[0],
spaces_between_special_tokens=recv_obj.spaces_between_special_tokens[0],
)
# TODO(lmzheng): better handle skip_special_tokens/spaces_between_special_tokens per request
if self.disable_tokenizer_batch_decode:
surr_texts = [
self.tokenizer.decode(
surr, skip_special_tokens=skip, spaces_between_special_tokens=space
)
for surr, skip, space in zip(
surr_ids,
recv_obj.skip_special_tokens,
recv_obj.spaces_between_special_tokens,
)
]
read_texts = [
self.tokenizer.decode(
read, skip_special_tokens=skip, spaces_between_special_tokens=space
)
for read, skip, space in zip(
read_ids,
recv_obj.skip_special_tokens,
recv_obj.spaces_between_special_tokens,
)
]
else:
surr_texts = self.tokenizer.batch_decode(
surr_ids,
skip_special_tokens=recv_obj.skip_special_tokens[0],
spaces_between_special_tokens=recv_obj.spaces_between_special_tokens[0],
)
read_texts = self.tokenizer.batch_decode(
read_ids,
skip_special_tokens=recv_obj.skip_special_tokens[0],
spaces_between_special_tokens=recv_obj.spaces_between_special_tokens[0],
)
# Incremental decoding
output_strs = []
......
......@@ -433,6 +433,7 @@ class ServerArgs:
enable_symm_mem: bool = False
disable_flashinfer_cutlass_moe_fp4_allgather: bool = False
enable_tokenizer_batch_encode: bool = False
disable_tokenizer_batch_decode: bool = False
disable_outlines_disk_cache: bool = False
disable_custom_all_reduce: bool = False
enable_mscclpp: bool = False
......@@ -2898,6 +2899,11 @@ class ServerArgs:
action="store_true",
help="Enable batch tokenization for improved performance when processing multiple text inputs. Do not use with image inputs, pre-tokenized input_ids, or input_embeds.",
)
parser.add_argument(
"--disable-tokenizer-batch-decode",
action="store_true",
help="Disable batch decoding when decoding multiple completions.",
)
parser.add_argument(
"--disable-outlines-disk-cache",
action="store_true",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment