Unverified Commit e55ebec5 authored by Keiven C's avatar Keiven C Committed by GitHub
Browse files

chore: rename terminate_existing to terminate_all_matching_processes (#5923)


Signed-off-by: default avatarKeiven Chang <keivenchang@users.noreply.github.com>
Co-authored-by: default avatarKeiven Chang <keivenchang@users.noreply.github.com>
parent 4d9e64a2
...@@ -73,7 +73,7 @@ class DynamoWorkerProcess(ManagedProcess): ...@@ -73,7 +73,7 @@ class DynamoWorkerProcess(ManagedProcess):
], ],
timeout=360, timeout=360,
display_output=True, display_output=True,
terminate_existing=False, terminate_all_matching_process_names=False,
log_dir=log_dir, log_dir=log_dir,
) )
......
...@@ -278,7 +278,7 @@ class EtcdServer(ManagedProcess): ...@@ -278,7 +278,7 @@ class EtcdServer(ManagedProcess):
command=command, command=command,
timeout=timeout, timeout=timeout,
display_output=False, display_output=False,
terminate_existing=not use_random_port, # Disabled for parallel test execution with random ports terminate_all_matching_process_names=not use_random_port, # For distributed tests, do not terminate all matching processes
health_check_ports=[port], health_check_ports=[port],
data_dir=data_dir, data_dir=data_dir,
log_dir=request.node.name, log_dir=request.node.name,
...@@ -325,7 +325,7 @@ class NatsServer(ManagedProcess): ...@@ -325,7 +325,7 @@ class NatsServer(ManagedProcess):
command=command, command=command,
timeout=timeout, timeout=timeout,
display_output=False, display_output=False,
terminate_existing=not use_random_port, # Disabled for parallel test execution with random ports terminate_all_matching_process_names=not use_random_port, # For distributed tests, do not terminate all matching processes
data_dir=data_dir, data_dir=data_dir,
health_check_ports=[port], health_check_ports=[port],
health_check_funcs=[self._nats_ready], health_check_funcs=[self._nats_ready],
......
...@@ -142,7 +142,7 @@ class DynamoWorkerProcess(ManagedProcess): ...@@ -142,7 +142,7 @@ class DynamoWorkerProcess(ManagedProcess):
health_check_urls=health_check_urls, health_check_urls=health_check_urls,
timeout=300, timeout=300,
display_output=True, display_output=True,
terminate_existing=False, terminate_all_matching_process_names=False,
# Ensure any orphaned SGLang engine cores or child helpers are cleaned up # Ensure any orphaned SGLang engine cores or child helpers are cleaned up
stragglers=[ stragglers=[
"SGLANG:EngineCore", "SGLANG:EngineCore",
......
...@@ -134,7 +134,7 @@ class DynamoWorkerProcess(ManagedProcess): ...@@ -134,7 +134,7 @@ class DynamoWorkerProcess(ManagedProcess):
health_check_urls=health_check_urls, health_check_urls=health_check_urls,
timeout=300, timeout=300,
display_output=True, display_output=True,
terminate_existing=False, terminate_all_matching_process_names=False,
log_dir=log_dir, log_dir=log_dir,
) )
......
...@@ -123,7 +123,7 @@ class DynamoWorkerProcess(ManagedProcess): ...@@ -123,7 +123,7 @@ class DynamoWorkerProcess(ManagedProcess):
health_check_urls=health_check_urls, health_check_urls=health_check_urls,
timeout=300, timeout=300,
display_output=True, display_output=True,
terminate_existing=False, terminate_all_matching_process_names=False,
# Ensure any orphaned vLLM engine cores or child helpers are cleaned up # Ensure any orphaned vLLM engine cores or child helpers are cleaned up
stragglers=[ stragglers=[
"VLLM::EngineCore", "VLLM::EngineCore",
......
...@@ -36,7 +36,7 @@ class DynamoFrontendProcess(BaseDynamoFrontendProcess): ...@@ -36,7 +36,7 @@ class DynamoFrontendProcess(BaseDynamoFrontendProcess):
frontend_port=0, # allocate a free port (xdist-safe) frontend_port=0, # allocate a free port (xdist-safe)
router_mode="round-robin", router_mode="round-robin",
extra_env=extra_env, extra_env=extra_env,
terminate_existing=False, terminate_all_matching_process_names=False,
) )
......
...@@ -118,7 +118,7 @@ class DynamoWorkerProcess(ManagedProcess): ...@@ -118,7 +118,7 @@ class DynamoWorkerProcess(ManagedProcess):
health_check_urls=health_check_urls, health_check_urls=health_check_urls,
timeout=300, timeout=300,
display_output=True, display_output=True,
terminate_existing=False, terminate_all_matching_process_names=False,
# Ensure any orphaned SGLang engine cores or child helpers are cleaned up # Ensure any orphaned SGLang engine cores or child helpers are cleaned up
stragglers=[ stragglers=[
"SGLANG:EngineCore", "SGLANG:EngineCore",
......
...@@ -107,7 +107,7 @@ class DynamoWorkerProcess(ManagedProcess): ...@@ -107,7 +107,7 @@ class DynamoWorkerProcess(ManagedProcess):
health_check_urls=health_check_urls, health_check_urls=health_check_urls,
timeout=300, timeout=300,
display_output=True, display_output=True,
terminate_existing=False, terminate_all_matching_process_names=False,
log_dir=log_dir, log_dir=log_dir,
) )
......
...@@ -84,7 +84,7 @@ class DynamoWorkerProcess(ManagedProcess): ...@@ -84,7 +84,7 @@ class DynamoWorkerProcess(ManagedProcess):
health_check_urls=health_check_urls, health_check_urls=health_check_urls,
timeout=120, timeout=120,
display_output=True, display_output=True,
terminate_existing=False, terminate_all_matching_process_names=False,
stragglers=[ stragglers=[
"VLLM::EngineCore", "VLLM::EngineCore",
], ],
......
...@@ -31,11 +31,16 @@ class DynamoFrontendProcess(BaseDynamoFrontendProcess): ...@@ -31,11 +31,16 @@ class DynamoFrontendProcess(BaseDynamoFrontendProcess):
"DYN_LOG": "debug", "DYN_LOG": "debug",
"ETCD_ENDPOINTS": ",".join(etcd_endpoints), "ETCD_ENDPOINTS": ",".join(etcd_endpoints),
} }
# WARNING: terminate_all_matching_process_names=True is NOT pytest-xdist safe!
# DANGER: Kills ALL dynamo-frontend processes system-wide, including other parallel tests.
# For parallel-safe alternative, use terminate_all_matching_process_names=False.
# See tests/kvbm_integration/common.py:llm_server_kvbm for example.
# TODO: Switch to terminate_all_matching_process_names=False with dynamic ports
super().__init__( super().__init__(
request, request,
router_mode="round-robin", router_mode="round-robin",
extra_env=extra_env, extra_env=extra_env,
terminate_existing=True, terminate_all_matching_process_names=True, # TODO: Change to False
) )
...@@ -90,7 +95,7 @@ class EtcdReplicaServer(ManagedProcess): ...@@ -90,7 +95,7 @@ class EtcdReplicaServer(ManagedProcess):
command=command, command=command,
timeout=timeout, timeout=timeout,
display_output=False, display_output=False,
terminate_existing=False, terminate_all_matching_process_names=False,
data_dir=data_dir, data_dir=data_dir,
log_dir=log_dir, log_dir=log_dir,
) )
......
...@@ -96,7 +96,7 @@ class GMSServerProcess(ManagedProcess): ...@@ -96,7 +96,7 @@ class GMSServerProcess(ManagedProcess):
env={**os.environ, "DYN_LOG": "debug"}, env={**os.environ, "DYN_LOG": "debug"},
timeout=60, timeout=60,
display_output=True, display_output=True,
terminate_existing=False, terminate_all_matching_process_names=False,
log_dir=log_dir, log_dir=log_dir,
health_check_funcs=[self._socket_ready], health_check_funcs=[self._socket_ready],
) )
......
...@@ -60,7 +60,7 @@ class SGLangWithGMSProcess(ManagedProcess): ...@@ -60,7 +60,7 @@ class SGLangWithGMSProcess(ManagedProcess):
], ],
timeout=300, timeout=300,
display_output=True, display_output=True,
terminate_existing=False, terminate_all_matching_process_names=False,
stragglers=[], stragglers=[],
log_dir=log_dir, log_dir=log_dir,
) )
......
...@@ -61,7 +61,7 @@ class VLLMWithGMSProcess(ManagedProcess): ...@@ -61,7 +61,7 @@ class VLLMWithGMSProcess(ManagedProcess):
], ],
timeout=300, timeout=300,
display_output=True, display_output=True,
terminate_existing=False, terminate_all_matching_process_names=False,
stragglers=[], stragglers=[],
log_dir=log_dir, log_dir=log_dir,
) )
......
...@@ -184,7 +184,7 @@ class DynamoWorkerProcess(ManagedProcess): ...@@ -184,7 +184,7 @@ class DynamoWorkerProcess(ManagedProcess):
health_check_urls=health_check_urls, health_check_urls=health_check_urls,
timeout=300, timeout=300,
display_output=True, display_output=True,
terminate_existing=False, terminate_all_matching_process_names=False,
stragglers=["SGLANG:EngineCore"], stragglers=["SGLANG:EngineCore"],
straggler_commands=["-m dynamo.sglang"], straggler_commands=["-m dynamo.sglang"],
log_dir=log_dir, log_dir=log_dir,
......
...@@ -173,7 +173,7 @@ class DynamoWorkerProcess(ManagedProcess): ...@@ -173,7 +173,7 @@ class DynamoWorkerProcess(ManagedProcess):
health_check_urls=health_check_urls, health_check_urls=health_check_urls,
timeout=300, timeout=300,
display_output=True, display_output=True,
terminate_existing=False, terminate_all_matching_process_names=False,
log_dir=log_dir, log_dir=log_dir,
display_name=worker_id, display_name=worker_id,
) )
......
...@@ -167,7 +167,7 @@ class DynamoWorkerProcess(ManagedProcess): ...@@ -167,7 +167,7 @@ class DynamoWorkerProcess(ManagedProcess):
health_check_urls=health_check_urls, health_check_urls=health_check_urls,
timeout=300, timeout=300,
display_output=True, display_output=True,
terminate_existing=False, terminate_all_matching_process_names=False,
stragglers=["VLLM::EngineCore"], stragglers=["VLLM::EngineCore"],
straggler_commands=["-m dynamo.vllm"], straggler_commands=["-m dynamo.vllm"],
log_dir=log_dir, log_dir=log_dir,
......
...@@ -37,7 +37,7 @@ class DynamoFrontendProcess(BaseDynamoFrontendProcess): ...@@ -37,7 +37,7 @@ class DynamoFrontendProcess(BaseDynamoFrontendProcess):
router_mode="round-robin", router_mode="round-robin",
extra_args=extra_args if extra_args else None, extra_args=extra_args if extra_args else None,
extra_env=extra_env, extra_env=extra_env,
terminate_existing=False, terminate_all_matching_process_names=False,
display_name="frontend", display_name="frontend",
) )
......
...@@ -63,7 +63,7 @@ class DynamoWorkerProcess(ManagedProcess): ...@@ -63,7 +63,7 @@ class DynamoWorkerProcess(ManagedProcess):
], ],
timeout=300, timeout=300,
display_output=True, display_output=True,
terminate_existing=False, terminate_all_matching_process_names=False,
stragglers=["VLLM::EngineCore"], stragglers=["VLLM::EngineCore"],
straggler_commands=["-m dynamo.vllm"], straggler_commands=["-m dynamo.vllm"],
log_dir=log_dir, log_dir=log_dir,
......
...@@ -58,7 +58,7 @@ def start_services_with_http( ...@@ -58,7 +58,7 @@ def start_services_with_http(
with DynamoFrontendProcess( with DynamoFrontendProcess(
request, request,
frontend_port=ports.frontend_port, frontend_port=ports.frontend_port,
terminate_existing=False, terminate_all_matching_process_names=False,
): ):
logger.info(f"HTTP Frontend started on port {ports.frontend_port}") logger.info(f"HTTP Frontend started on port {ports.frontend_port}")
yield ports.frontend_port, ports.system_ports[0] yield ports.frontend_port, ports.system_ports[0]
...@@ -188,7 +188,7 @@ def start_services_with_grpc( ...@@ -188,7 +188,7 @@ def start_services_with_grpc(
with DynamoFrontendProcess( with DynamoFrontendProcess(
request, request,
frontend_port=ports.frontend_port, frontend_port=ports.frontend_port,
terminate_existing=False, terminate_all_matching_process_names=False,
extra_args=[ extra_args=[
"--kserve-grpc-server", "--kserve-grpc-server",
"--grpc-metrics-port", "--grpc-metrics-port",
...@@ -261,7 +261,7 @@ class MockerWorkerProcess(ManagedProcess): ...@@ -261,7 +261,7 @@ class MockerWorkerProcess(ManagedProcess):
], ],
timeout=300, timeout=300,
display_output=True, display_output=True,
terminate_existing=False, terminate_all_matching_process_names=False,
stragglers=["VLLM::EngineCore"], stragglers=["VLLM::EngineCore"],
straggler_commands=["-m dynamo.mocker"], straggler_commands=["-m dynamo.mocker"],
log_dir=log_dir, log_dir=log_dir,
......
...@@ -63,7 +63,7 @@ class MockWorkerProcess(ManagedProcess): ...@@ -63,7 +63,7 @@ class MockWorkerProcess(ManagedProcess):
], ],
timeout=300, timeout=300,
display_output=True, display_output=True,
terminate_existing=False, terminate_all_matching_process_names=False,
stragglers=[], stragglers=[],
straggler_commands=["echo_tensor_worker.py"], straggler_commands=["echo_tensor_worker.py"],
log_dir=log_dir, log_dir=log_dir,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment