Unverified Commit 31fccf5a authored by ishandhanani's avatar ishandhanani Committed by GitHub
Browse files

chore: change logs from`INFO` to `DEBUG` for dp and add force quit for tokenizer manager (#7251)

parent b783c1cb
...@@ -175,7 +175,7 @@ class Engine(EngineBase): ...@@ -175,7 +175,7 @@ class Engine(EngineBase):
""" """
if self.server_args.enable_dp_attention: if self.server_args.enable_dp_attention:
if data_parallel_rank is None: if data_parallel_rank is None:
logger.info("data_parallel_rank not provided, using default dispatch") logger.debug("data_parallel_rank not provided, using default dispatch")
elif data_parallel_rank < 0: elif data_parallel_rank < 0:
raise ValueError("data_parallel_rank must be non-negative") raise ValueError("data_parallel_rank must be non-negative")
elif data_parallel_rank >= self.server_args.dp_size: elif data_parallel_rank >= self.server_args.dp_size:
...@@ -258,7 +258,7 @@ class Engine(EngineBase): ...@@ -258,7 +258,7 @@ class Engine(EngineBase):
if self.server_args.enable_dp_attention: if self.server_args.enable_dp_attention:
if data_parallel_rank is None: if data_parallel_rank is None:
logger.info("data_parallel_rank not provided, using default dispatch") logger.debug("data_parallel_rank not provided, using default dispatch")
elif data_parallel_rank < 0: elif data_parallel_rank < 0:
raise ValueError("data_parallel_rank must be non-negative") raise ValueError("data_parallel_rank must be non-negative")
elif data_parallel_rank >= self.server_args.dp_size: elif data_parallel_rank >= self.server_args.dp_size:
......
...@@ -1140,13 +1140,21 @@ class TokenizerManager: ...@@ -1140,13 +1140,21 @@ class TokenizerManager:
remain_num_req = len(self.rid_to_state) remain_num_req = len(self.rid_to_state)
if self.health_check_failed: if self.health_check_failed:
# if health check failed, we should exit immediately # if health check failed, exit immediately
logger.error( logger.error(
"Signal SIGTERM received while health check failed. Exiting... remaining number of requests: %d", "Signal SIGTERM received while health check failed. Exiting... remaining number of requests: %d",
remain_num_req, remain_num_req,
) )
break break
elif get_bool_env_var("SGL_FORCE_SHUTDOWN"):
# if force shutdown flag set, exit immediately
logger.error(
"Signal SIGTERM received while force shutdown flag set. Force exiting... remaining number of requests: %d",
remain_num_req,
)
break
logger.info( logger.info(
f"Gracefully exiting... remaining number of requests {remain_num_req}" f"Gracefully exiting... remaining number of requests {remain_num_req}"
) )
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment