Tiny support setting numa nodes for different ranks (#10006)

df97b31f · fzyzcjy · GitHub · 339f8eef · df97b31f · df97b31f
Unverified Commit df97b31f authored Sep 05, 2025 by fzyzcjy Committed by GitHub Sep 05, 2025
Showing with 20 additions and 0 deletions

python/sglang/srt/managers/scheduler.py python/sglang/srt/managers/scheduler.py +4 -0

python/sglang/srt/server_args.py python/sglang/srt/server_args.py +7 -0

python/sglang/srt/utils.py python/sglang/srt/utils.py +9 -0

No files found.
--- a/python/sglang/srt/managers/scheduler.py
+++ b/python/sglang/srt/managers/scheduler.py
@@ -158,6 +158,7 @@ from sglang.srt.utils import (
    get_zmq_socket,
    is_cpu,
    kill_itself_when_parent_died,
+    numa_bind_to_node,
    point_to_point_pyobj,
    pyspy_dump_schedulers,
    require_mlp_sync,
@@ -2519,6 +2520,9 @@ def run_scheduler_process(
    pipe_writer,
    balance_meta: Optional[DPBalanceMeta] = None,
 ):
+    if (numa_node := server_args.numa_node) is not None:
+        numa_bind_to_node(numa_node[gpu_id])
    # Generate the prefix
    prefix = ""
    if dp_rank is not None:

--- a/python/sglang/srt/server_args.py
+++ b/python/sglang/srt/server_args.py
@@ -351,6 +351,7 @@ class ServerArgs:
    disable_fast_image_processor: bool = False
    enable_return_hidden_states: bool = False
    scheduler_recv_interval: int = 1
+    numa_node: Optional[List[int]] = None
    # Debug tensor dumps
    debug_tensor_dump_output_folder: Optional[str] = None
@@ -1991,6 +1992,12 @@ class ServerArgs:
            default=ServerArgs.scheduler_recv_interval,
            help="The interval to poll requests in scheduler. Can be set to >1 to reduce the overhead of this.",
        )
+        parser.add_argument(
+            "--numa-node",
+            type=int,
+            nargs="+",
+            help="Sets the numa node for the subprocesses. i-th element corresponds to i-th subprocess.",
+        )
        # Debug tensor dumps
        parser.add_argument(

--- a/python/sglang/srt/utils.py
+++ b/python/sglang/srt/utils.py
@@ -3027,3 +3027,12 @@ def check_cuda_result(raw_output):
        raise Exception(f"CUDA error: {err}")
    return results
+def numa_bind_to_node(node: int):
+    libnuma = ctypes.CDLL("libnuma.so")
+    if libnuma.numa_available() < 0:
+        raise SystemError("numa not available on this system")
+    libnuma.numa_run_on_node(ctypes.c_int(node))
+    libnuma.numa_set_localalloc()