Unverified Commit 138ff231 authored by Jue WANG's avatar Jue WANG Committed by GitHub
Browse files

Allow to disable batch decoding. (#11944)

parent 13fb8b54
...@@ -108,6 +108,7 @@ class DetokenizerManager(MultiHttpWorkerDetokenizerMixin): ...@@ -108,6 +108,7 @@ class DetokenizerManager(MultiHttpWorkerDetokenizerMixin):
) )
self.is_tool_call_parser_gpt_oss = server_args.tool_call_parser == "gpt-oss" self.is_tool_call_parser_gpt_oss = server_args.tool_call_parser == "gpt-oss"
self.disable_tokenizer_batch_decode = server_args.disable_tokenizer_batch_decode
def event_loop(self): def event_loop(self):
"""The event loop that handles requests""" """The event loop that handles requests"""
...@@ -176,17 +177,39 @@ class DetokenizerManager(MultiHttpWorkerDetokenizerMixin): ...@@ -176,17 +177,39 @@ class DetokenizerManager(MultiHttpWorkerDetokenizerMixin):
) )
surr_ids.append(s.decode_ids[s.surr_offset : s.read_offset]) surr_ids.append(s.decode_ids[s.surr_offset : s.read_offset])
# TODO(lmzheng): handle skip_special_tokens/spaces_between_special_tokens per request # TODO(lmzheng): better handle skip_special_tokens/spaces_between_special_tokens per request
surr_texts = self.tokenizer.batch_decode( if self.disable_tokenizer_batch_decode:
surr_ids, surr_texts = [
skip_special_tokens=recv_obj.skip_special_tokens[0], self.tokenizer.decode(
spaces_between_special_tokens=recv_obj.spaces_between_special_tokens[0], surr, skip_special_tokens=skip, spaces_between_special_tokens=space
) )
read_texts = self.tokenizer.batch_decode( for surr, skip, space in zip(
read_ids, surr_ids,
skip_special_tokens=recv_obj.skip_special_tokens[0], recv_obj.skip_special_tokens,
spaces_between_special_tokens=recv_obj.spaces_between_special_tokens[0], recv_obj.spaces_between_special_tokens,
) )
]
read_texts = [
self.tokenizer.decode(
read, skip_special_tokens=skip, spaces_between_special_tokens=space
)
for read, skip, space in zip(
read_ids,
recv_obj.skip_special_tokens,
recv_obj.spaces_between_special_tokens,
)
]
else:
surr_texts = self.tokenizer.batch_decode(
surr_ids,
skip_special_tokens=recv_obj.skip_special_tokens[0],
spaces_between_special_tokens=recv_obj.spaces_between_special_tokens[0],
)
read_texts = self.tokenizer.batch_decode(
read_ids,
skip_special_tokens=recv_obj.skip_special_tokens[0],
spaces_between_special_tokens=recv_obj.spaces_between_special_tokens[0],
)
# Incremental decoding # Incremental decoding
output_strs = [] output_strs = []
......
...@@ -433,6 +433,7 @@ class ServerArgs: ...@@ -433,6 +433,7 @@ class ServerArgs:
enable_symm_mem: bool = False enable_symm_mem: bool = False
disable_flashinfer_cutlass_moe_fp4_allgather: bool = False disable_flashinfer_cutlass_moe_fp4_allgather: bool = False
enable_tokenizer_batch_encode: bool = False enable_tokenizer_batch_encode: bool = False
disable_tokenizer_batch_decode: bool = False
disable_outlines_disk_cache: bool = False disable_outlines_disk_cache: bool = False
disable_custom_all_reduce: bool = False disable_custom_all_reduce: bool = False
enable_mscclpp: bool = False enable_mscclpp: bool = False
...@@ -2898,6 +2899,11 @@ class ServerArgs: ...@@ -2898,6 +2899,11 @@ class ServerArgs:
action="store_true", action="store_true",
help="Enable batch tokenization for improved performance when processing multiple text inputs. Do not use with image inputs, pre-tokenized input_ids, or input_embeds.", help="Enable batch tokenization for improved performance when processing multiple text inputs. Do not use with image inputs, pre-tokenized input_ids, or input_embeds.",
) )
parser.add_argument(
"--disable-tokenizer-batch-decode",
action="store_true",
help="Disable batch decoding when decoding multiple completions.",
)
parser.add_argument( parser.add_argument(
"--disable-outlines-disk-cache", "--disable-outlines-disk-cache",
action="store_true", action="store_true",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment