Unverified Commit 96784a65 authored by Caproni's avatar Caproni Committed by GitHub
Browse files

[Fix] Orphan process in data parallel (#7995)


Signed-off-by: default avatarCapronir <839972205@qq.com>
parent df5407fb
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
# ============================================================================== # ==============================================================================
"""A controller that dispatches requests to multiple data parallel workers.""" """A controller that dispatches requests to multiple data parallel workers."""
import faulthandler
import logging import logging
import multiprocessing as mp import multiprocessing as mp
import signal import signal
...@@ -39,7 +40,12 @@ from sglang.srt.managers.scheduler import run_scheduler_process ...@@ -39,7 +40,12 @@ from sglang.srt.managers.scheduler import run_scheduler_process
from sglang.srt.managers.utils import DPBalanceMeta from sglang.srt.managers.utils import DPBalanceMeta
from sglang.srt.server_args import PortArgs, ServerArgs from sglang.srt.server_args import PortArgs, ServerArgs
from sglang.srt.torch_memory_saver_adapter import TorchMemorySaverAdapter from sglang.srt.torch_memory_saver_adapter import TorchMemorySaverAdapter
from sglang.srt.utils import bind_port, configure_logger, get_zmq_socket from sglang.srt.utils import (
bind_port,
configure_logger,
get_zmq_socket,
kill_itself_when_parent_died,
)
from sglang.utils import get_exception_traceback from sglang.utils import get_exception_traceback
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
...@@ -343,7 +349,9 @@ def run_data_parallel_controller_process( ...@@ -343,7 +349,9 @@ def run_data_parallel_controller_process(
port_args: PortArgs, port_args: PortArgs,
pipe_writer, pipe_writer,
): ):
kill_itself_when_parent_died()
setproctitle.setproctitle("sglang::data_parallel_controller") setproctitle.setproctitle("sglang::data_parallel_controller")
faulthandler.enable()
configure_logger(server_args) configure_logger(server_args)
parent_process = psutil.Process().parent() parent_process = psutil.Process().parent()
balance_meta = DPBalanceMeta(server_args.dp_size) balance_meta = DPBalanceMeta(server_args.dp_size)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment