Unverified Commit 3d13ca0e authored by Nick Hill's avatar Nick Hill Committed by GitHub
Browse files

[BugFix] Fix `--disable-log-stats` in V1 server mode (#17600)


Signed-off-by: default avatarNick Hill <nhill@redhat.com>
parent 66ab3b13
...@@ -120,6 +120,7 @@ class AsyncLLM(EngineClient): ...@@ -120,6 +120,7 @@ class AsyncLLM(EngineClient):
executor_class=executor_class, executor_class=executor_class,
log_stats=self.log_stats, log_stats=self.log_stats,
) )
if self.stat_loggers:
for stat_logger in self.stat_loggers[0]: for stat_logger in self.stat_loggers[0]:
stat_logger.log_engine_initialized() stat_logger.log_engine_initialized()
self.output_handler: Optional[asyncio.Task] = None self.output_handler: Optional[asyncio.Task] = None
......
...@@ -442,9 +442,10 @@ class MPClient(EngineCoreClient): ...@@ -442,9 +442,10 @@ class MPClient(EngineCoreClient):
logger.info("Core engine process %d ready.", eng_id) logger.info("Core engine process %d ready.", eng_id)
identities.discard(eng_id) identities.discard(eng_id)
# Setup KV cache config with initialization state from # Setup KV cache config with initialization state from
# engine core process. # engine core process. Sum values from all engines in DP case.
self.vllm_config.cache_config.num_gpu_blocks = message_dict[ num_gpu_blocks = self.vllm_config.cache_config.num_gpu_blocks or 0
'num_gpu_blocks'] num_gpu_blocks += message_dict['num_gpu_blocks']
self.vllm_config.cache_config.num_gpu_blocks = num_gpu_blocks
def _init_core_engines( def _init_core_engines(
self, self,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment