"...git@developer.sourcefind.cn:2222/OpenDAS/vllm_cscc.git" did not exist on "e71b8e210db4b98ffa4398d25f8bdf280686ad78"
Unverified Commit 44bc46da authored by Cyrus Leung's avatar Cyrus Leung Committed by GitHub
Browse files

[Bugfix] Actually disable processing cache when API server is scaled out (#21839)


Signed-off-by: default avatarDarkLight1337 <tlleungac@connect.ust.hk>
parent b7b23da4
...@@ -140,11 +140,16 @@ def run_multi_api_server(args: argparse.Namespace): ...@@ -140,11 +140,16 @@ def run_multi_api_server(args: argparse.Namespace):
num_api_servers = args.api_server_count num_api_servers = args.api_server_count
assert num_api_servers > 0 assert num_api_servers > 0
orig_disable_mm_preprocessor_cache = args.disable_mm_preprocessor_cache
# set_process_title("ProcManager") # set_process_title("ProcManager")
if num_api_servers > 1: if num_api_servers > 1:
setup_multiprocess_prometheus() setup_multiprocess_prometheus()
# Not compatible with API server scale-out
args.disable_mm_preprocessor_cache = True
listen_address, sock = setup_server(args) listen_address, sock = setup_server(args)
engine_args = vllm.AsyncEngineArgs.from_cli_args(args) engine_args = vllm.AsyncEngineArgs.from_cli_args(args)
...@@ -161,11 +166,9 @@ def run_multi_api_server(args: argparse.Namespace): ...@@ -161,11 +166,9 @@ def run_multi_api_server(args: argparse.Namespace):
"with api_server_count > 1") "with api_server_count > 1")
if model_config.is_multimodal_model and not ( if model_config.is_multimodal_model and not (
model_config.disable_mm_preprocessor_cache): orig_disable_mm_preprocessor_cache):
logger.warning( logger.warning("Multi-model preprocessor cache will be disabled "
"Multi-model preprocessor cache will be disabled for" "for api_server_count > 1")
" api_server_count > 1")
model_config.disable_mm_preprocessor_cache = True
executor_class = Executor.get_class(vllm_config) executor_class = Executor.get_class(vllm_config)
log_stats = not engine_args.disable_log_stats log_stats = not engine_args.disable_log_stats
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment