Unverified Commit 0ce091a8 authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

[Minor] Improve code style (#2419)

parent 835f8afc
...@@ -29,7 +29,6 @@ from sglang.srt.managers.io_struct import ( ...@@ -29,7 +29,6 @@ from sglang.srt.managers.io_struct import (
BatchStrOut, BatchStrOut,
BatchTokenIDOut, BatchTokenIDOut,
) )
from sglang.srt.managers.schedule_batch import FINISH_MATCHED_STR, FINISH_MATCHED_TOKEN
from sglang.srt.server_args import PortArgs, ServerArgs from sglang.srt.server_args import PortArgs, ServerArgs
from sglang.srt.utils import configure_logger, get_zmq_socket from sglang.srt.utils import configure_logger, get_zmq_socket
from sglang.utils import find_printable_text, get_exception_traceback from sglang.utils import find_printable_text, get_exception_traceback
......
...@@ -1198,6 +1198,7 @@ class Scheduler: ...@@ -1198,6 +1198,7 @@ class Scheduler:
decode_ids_list = [] decode_ids_list = []
read_offsets = [] read_offsets = []
output_ids = [] output_ids = []
skip_special_tokens = [] skip_special_tokens = []
spaces_between_special_tokens = [] spaces_between_special_tokens = []
no_stop_trim = [] no_stop_trim = []
......
...@@ -623,23 +623,23 @@ class TokenizerManager: ...@@ -623,23 +623,23 @@ class TokenizerManager:
i, i,
) )
if not isinstance(recv_obj, BatchEmbeddingOut):
meta_info.update(
{
"completion_tokens": recv_obj.completion_tokens[i],
"cached_tokens": recv_obj.cached_tokens[i],
}
)
if isinstance(recv_obj, BatchStrOut): if isinstance(recv_obj, BatchStrOut):
out_dict = { out_dict = {
"text": recv_obj.output_strs[i], "text": recv_obj.output_strs[i],
"meta_info": { "meta_info": meta_info,
**meta_info,
"completion_tokens": recv_obj.completion_tokens[i],
"cached_tokens": recv_obj.cached_tokens[i],
},
} }
elif isinstance(recv_obj, BatchTokenIDOut): elif isinstance(recv_obj, BatchTokenIDOut):
out_dict = { out_dict = {
"token_ids": recv_obj.output_ids[i], "token_ids": recv_obj.output_ids[i],
"meta_info": { "meta_info": meta_info,
**meta_info,
"completion_tokens": recv_obj.completion_tokens[i],
"cached_tokens": recv_obj.cached_tokens[i],
},
} }
else: else:
assert isinstance(recv_obj, BatchEmbeddingOut) assert isinstance(recv_obj, BatchEmbeddingOut)
......
...@@ -114,7 +114,7 @@ class ModelRunner: ...@@ -114,7 +114,7 @@ class ModelRunner:
server_args.chunked_prefill_size = -1 server_args.chunked_prefill_size = -1
self.mem_fraction_static *= 0.95 self.mem_fraction_static *= 0.95
logger.info( logger.info(
f"Automatically reduce --mem-fraction-static to {self.mem_fraction_static} " f"Automatically reduce --mem-fraction-static to {self.mem_fraction_static:.3f} "
f"and turn off chunked prefill " f"and turn off chunked prefill "
f"because this is a multimodal model." f"because this is a multimodal model."
) )
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment