[Performance]: Process affinity to CPU cores with multiple sockets support (#2171)

10189d08 · HAI · GitHub · c4336b2b · 10189d08 · 10189d08
Unverified Commit 10189d08 authored Nov 25, 2024 by HAI Committed by GitHub Nov 25, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 39 additions and 0 deletions

python/sglang/srt/managers/scheduler.py python/sglang/srt/managers/scheduler.py +4 -0

python/sglang/srt/utils.py python/sglang/srt/utils.py +35 -0

No files found.
--- a/python/sglang/srt/managers/scheduler.py
+++ b/python/sglang/srt/managers/scheduler.py
@@ -72,6 +72,7 @@ from sglang.srt.utils import (
    configure_logger,
    crash_on_warnings,
    get_zmq_socket,
+    gpu_proc_affinity,
    kill_parent_process,
    set_random_seed,
    suppress_other_loggers,
@@ -1393,6 +1394,9 @@ def run_scheduler_process(
    dp_rank: Optional[int],
    pipe_writer,
 ):
+    # set cpu affinity to this gpu process
+    gpu_proc_affinity(server_args.tp_size, server_args.nnodes, gpu_id)
    # [For Router] if env var "DP_RANK" exist, set dp_rank to the value of the env var
    if dp_rank is None and "DP_RANK" in os.environ:
        dp_rank = int(os.environ["DP_RANK"])

--- a/python/sglang/srt/utils.py
+++ b/python/sglang/srt/utils.py
@@ -15,6 +15,7 @@
 import base64
 import ipaddress
+import itertools
 import json
 import logging
 import os
@@ -987,3 +988,37 @@ def direct_register_custom_op(
    my_lib.impl(op_name, op_func, "CUDA")
    if fake_impl is not None:
        my_lib._register_fake(op_name, fake_impl)
+def gpu_proc_affinity(
+    tp_size: int,
+    nnodes: int,
+    gpu_id: int,
+):
+    # current process
+    pid = os.getpid()
+    p = psutil.Process(pid)
+    tp_size_per_node = tp_size // nnodes
+    # total physical cores
+    total_pcores = psutil.cpu_count(logical=False)
+    # physical cores per TP (N.B. more Cores than GPUs on node)
+    num_cores_bind = total_pcores // tp_size_per_node
+    # able to handle multiple DP per node
+    start_cpu_id = (gpu_id * num_cores_bind) % total_pcores
+    end_cpu_id = start_cpu_id + num_cores_bind
+    if psutil.cpu_count() != psutil.cpu_count(logical=False):
+        # HT on
+        upper_cpu_ids = [id for id in range(start_cpu_id, end_cpu_id)]
+        lower_cpu_ids = [id + total_pcores for id in range(start_cpu_id, end_cpu_id)]
+        bind_cpu_ids = list(itertools.chain(upper_cpu_ids, lower_cpu_ids))
+    else:
+        # HT off
+        bind_cpu_ids = [id for id in range(start_cpu_id, end_cpu_id)]
+    # set cpu_affinity to current process
+    p.cpu_affinity(bind_cpu_ids)
+    logger.info(f"Process {pid} gpu_id {gpu_id} is running on CPUs: {p.cpu_affinity()}")