[CPU] Adding Memory Capacity Acquisition Functionality (#11102)

229d2b95 · Zaili Wang · GitHub · 9710f718 · 229d2b95 · 229d2b95
Unverified Commit 229d2b95 authored Sep 30, 2025 by Zaili Wang Committed by GitHub Sep 30, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 34 additions and 2 deletions

python/sglang/srt/server_args.py python/sglang/srt/server_args.py +6 -2

python/sglang/srt/utils.py python/sglang/srt/utils.py +28 -0

No files found.
--- a/python/sglang/srt/server_args.py
+++ b/python/sglang/srt/server_args.py
@@ -639,7 +639,7 @@ class ServerArgs:
                if self.cuda_graph_max_bs > 300:
                    reserved_mem += self.cuda_graph_max_bs * self.dp_size * 1.5

-            if gpu_mem > 60 * 1024:
+            if gpu_mem is not None and gpu_mem > 60 * 1024:
                reserved_mem = max(reserved_mem, 10 * 1024)

            if self.speculative_algorithm is not None:
@@ -650,7 +650,11 @@ class ServerArgs:
                    # eagle draft models and cuda graphs
                    reserved_mem += 2 * 1024

-            self.mem_fraction_static = round((gpu_mem - reserved_mem) / gpu_mem, 3)
+            self.mem_fraction_static = (
+                round((gpu_mem - reserved_mem) / gpu_mem, 3)
+                if gpu_mem is not None
+                else 0.88
+            )

            # Lazy init to avoid circular import
            # Multimodal models need more memory for the image processor

--- a/python/sglang/srt/utils.py
+++ b/python/sglang/srt/utils.py
@@ -1507,6 +1507,32 @@ def get_npu_memory_capacity():
        raise ImportError("torch_npu is required when run on npu device.")


+def get_cpu_memory_capacity():
+    # Per-rank memory capacity cannot be determined for customized core settings
+    if os.environ.get("SGLANG_CPU_OMP_THREADS_BIND", ""):
+        return None
+    n_numa_node: int = len(get_cpu_ids_by_node())
+    if n_numa_node == 0:
+        # Cannot determine NUMA config, fallback to total memory and avoid ZeroDivisionError.
+        return float(psutil.virtual_memory().total // (1 << 20))
+    try:
+        numa_mem_list = list()
+        file_prefix = "/sys/devices/system/node/"
+        for numa_id in range(n_numa_node):
+            file_meminfo = f"node{numa_id}/meminfo"
+            with open(os.path.join(file_prefix, file_meminfo), "r") as f:
+                # 1st line contains 'MemTotal'
+                line = f.read().split("\n")[0]
+                numa_mem_list.append(int(line.split()[3]))
+        # Retrieved value in KB, need MB
+        numa_mem = float(min(numa_mem_list) // 1024)
+        return numa_mem
+    except FileNotFoundError:
+        numa_mem = psutil.virtual_memory().total / n_numa_node
+        # Retrieved value in Byte, need MB
+        return float(numa_mem // (1 << 20))
+
+
 def get_device_memory_capacity(device: str = None):
    if is_cuda():
        gpu_mem = get_nvgpu_memory_capacity()
@@ -1516,6 +1542,8 @@ def get_device_memory_capacity(device: str = None):
        gpu_mem = get_hpu_memory_capacity()
    elif device == "npu":
        gpu_mem = get_npu_memory_capacity()
+    elif device == "cpu":
+        gpu_mem = get_cpu_memory_capacity()
    else:
        # GPU memory is not known yet or no GPU is available.
        gpu_mem = None