Unverified Commit df97b31f authored by fzyzcjy's avatar fzyzcjy Committed by GitHub
Browse files

Tiny support setting numa nodes for different ranks (#10006)

parent 339f8eef
...@@ -158,6 +158,7 @@ from sglang.srt.utils import ( ...@@ -158,6 +158,7 @@ from sglang.srt.utils import (
get_zmq_socket, get_zmq_socket,
is_cpu, is_cpu,
kill_itself_when_parent_died, kill_itself_when_parent_died,
numa_bind_to_node,
point_to_point_pyobj, point_to_point_pyobj,
pyspy_dump_schedulers, pyspy_dump_schedulers,
require_mlp_sync, require_mlp_sync,
...@@ -2519,6 +2520,9 @@ def run_scheduler_process( ...@@ -2519,6 +2520,9 @@ def run_scheduler_process(
pipe_writer, pipe_writer,
balance_meta: Optional[DPBalanceMeta] = None, balance_meta: Optional[DPBalanceMeta] = None,
): ):
if (numa_node := server_args.numa_node) is not None:
numa_bind_to_node(numa_node[gpu_id])
# Generate the prefix # Generate the prefix
prefix = "" prefix = ""
if dp_rank is not None: if dp_rank is not None:
......
...@@ -351,6 +351,7 @@ class ServerArgs: ...@@ -351,6 +351,7 @@ class ServerArgs:
disable_fast_image_processor: bool = False disable_fast_image_processor: bool = False
enable_return_hidden_states: bool = False enable_return_hidden_states: bool = False
scheduler_recv_interval: int = 1 scheduler_recv_interval: int = 1
numa_node: Optional[List[int]] = None
# Debug tensor dumps # Debug tensor dumps
debug_tensor_dump_output_folder: Optional[str] = None debug_tensor_dump_output_folder: Optional[str] = None
...@@ -1991,6 +1992,12 @@ class ServerArgs: ...@@ -1991,6 +1992,12 @@ class ServerArgs:
default=ServerArgs.scheduler_recv_interval, default=ServerArgs.scheduler_recv_interval,
help="The interval to poll requests in scheduler. Can be set to >1 to reduce the overhead of this.", help="The interval to poll requests in scheduler. Can be set to >1 to reduce the overhead of this.",
) )
parser.add_argument(
"--numa-node",
type=int,
nargs="+",
help="Sets the numa node for the subprocesses. i-th element corresponds to i-th subprocess.",
)
# Debug tensor dumps # Debug tensor dumps
parser.add_argument( parser.add_argument(
......
...@@ -3027,3 +3027,12 @@ def check_cuda_result(raw_output): ...@@ -3027,3 +3027,12 @@ def check_cuda_result(raw_output):
raise Exception(f"CUDA error: {err}") raise Exception(f"CUDA error: {err}")
return results return results
def numa_bind_to_node(node: int):
libnuma = ctypes.CDLL("libnuma.so")
if libnuma.numa_available() < 0:
raise SystemError("numa not available on this system")
libnuma.numa_run_on_node(ctypes.c_int(node))
libnuma.numa_set_localalloc()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment