Unverified Commit d6545ad2 authored by leiwen83's avatar leiwen83 Committed by GitHub
Browse files

add option to shorten prompt print in log (#991)


Signed-off-by: default avatarLei Wen <wenlei03@qiyi.com>
Co-authored-by: default avatarLei Wen <wenlei03@qiyi.com>
Co-authored-by: default avatarZhuohan Li <zhuohan123@gmail.com>
parent 90eb3f43
...@@ -171,6 +171,7 @@ class AsyncEngineArgs(EngineArgs): ...@@ -171,6 +171,7 @@ class AsyncEngineArgs(EngineArgs):
"""Arguments for asynchronous vLLM engine.""" """Arguments for asynchronous vLLM engine."""
engine_use_ray: bool = False engine_use_ray: bool = False
disable_log_requests: bool = False disable_log_requests: bool = False
max_log_len: Optional[int] = None
@staticmethod @staticmethod
def add_cli_args( def add_cli_args(
...@@ -183,4 +184,10 @@ class AsyncEngineArgs(EngineArgs): ...@@ -183,4 +184,10 @@ class AsyncEngineArgs(EngineArgs):
parser.add_argument('--disable-log-requests', parser.add_argument('--disable-log-requests',
action='store_true', action='store_true',
help='disable logging requests') help='disable logging requests')
parser.add_argument('--max-log-len',
type=int,
default=None,
help='max number of prompt characters or prompt '
'ID numbers being printed in log. '
'Default: unlimited.')
return parser return parser
...@@ -242,11 +242,13 @@ class AsyncLLMEngine: ...@@ -242,11 +242,13 @@ class AsyncLLMEngine:
engine_use_ray: bool, engine_use_ray: bool,
*args, *args,
log_requests: bool = True, log_requests: bool = True,
max_log_len: Optional[int] = None,
start_engine_loop: bool = True, start_engine_loop: bool = True,
**kwargs) -> None: **kwargs) -> None:
self.worker_use_ray = worker_use_ray self.worker_use_ray = worker_use_ray
self.engine_use_ray = engine_use_ray self.engine_use_ray = engine_use_ray
self.log_requests = log_requests self.log_requests = log_requests
self.max_log_len = max_log_len
self.engine = self._init_engine(*args, **kwargs) self.engine = self._init_engine(*args, **kwargs)
self.request_tracker: RequestTracker = RequestTracker() self.request_tracker: RequestTracker = RequestTracker()
...@@ -325,10 +327,18 @@ class AsyncLLMEngine: ...@@ -325,10 +327,18 @@ class AsyncLLMEngine:
arrival_time: Optional[float] = None, arrival_time: Optional[float] = None,
) -> AsyncStream: ) -> AsyncStream:
if self.log_requests: if self.log_requests:
shortened_prompt = prompt
shortened_token_ids = prompt_token_ids
if self.max_log_len is not None:
if shortened_prompt is not None:
shortened_prompt = shortened_prompt[:self.max_log_len]
if shortened_token_ids is not None:
shortened_token_ids = shortened_token_ids[:self.
max_log_len]
logger.info(f"Received request {request_id}: " logger.info(f"Received request {request_id}: "
f"prompt: {prompt!r}, " f"prompt: {shortened_prompt!r}, "
f"sampling params: {sampling_params}, " f"sampling params: {sampling_params}, "
f"prompt token ids: {prompt_token_ids}.") f"prompt token ids: {shortened_token_ids}.")
if not self.is_running: if not self.is_running:
if self.start_engine_loop: if self.start_engine_loop:
...@@ -446,5 +456,6 @@ class AsyncLLMEngine: ...@@ -446,5 +456,6 @@ class AsyncLLMEngine:
placement_group, placement_group,
log_requests=not engine_args.disable_log_requests, log_requests=not engine_args.disable_log_requests,
log_stats=not engine_args.disable_log_stats, log_stats=not engine_args.disable_log_stats,
max_log_len=engine_args.max_log_len,
start_engine_loop=start_engine_loop) start_engine_loop=start_engine_loop)
return engine return engine
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment