Unverified Commit df97b31f authored by fzyzcjy's avatar fzyzcjy Committed by GitHub
Browse files

Tiny support setting numa nodes for different ranks (#10006)

parent 339f8eef
......@@ -158,6 +158,7 @@ from sglang.srt.utils import (
get_zmq_socket,
is_cpu,
kill_itself_when_parent_died,
numa_bind_to_node,
point_to_point_pyobj,
pyspy_dump_schedulers,
require_mlp_sync,
......@@ -2519,6 +2520,9 @@ def run_scheduler_process(
pipe_writer,
balance_meta: Optional[DPBalanceMeta] = None,
):
if (numa_node := server_args.numa_node) is not None:
numa_bind_to_node(numa_node[gpu_id])
# Generate the prefix
prefix = ""
if dp_rank is not None:
......
......@@ -351,6 +351,7 @@ class ServerArgs:
disable_fast_image_processor: bool = False
enable_return_hidden_states: bool = False
scheduler_recv_interval: int = 1
numa_node: Optional[List[int]] = None
# Debug tensor dumps
debug_tensor_dump_output_folder: Optional[str] = None
......@@ -1991,6 +1992,12 @@ class ServerArgs:
default=ServerArgs.scheduler_recv_interval,
help="The interval to poll requests in scheduler. Can be set to >1 to reduce the overhead of this.",
)
parser.add_argument(
"--numa-node",
type=int,
nargs="+",
help="Sets the numa node for the subprocesses. i-th element corresponds to i-th subprocess.",
)
# Debug tensor dumps
parser.add_argument(
......
......@@ -3027,3 +3027,12 @@ def check_cuda_result(raw_output):
raise Exception(f"CUDA error: {err}")
return results
def numa_bind_to_node(node: int):
libnuma = ctypes.CDLL("libnuma.so")
if libnuma.numa_available() < 0:
raise SystemError("numa not available on this system")
libnuma.numa_run_on_node(ctypes.c_int(node))
libnuma.numa_set_localalloc()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment