Unverified Commit 229d2b95 authored by Zaili Wang's avatar Zaili Wang Committed by GitHub
Browse files

[CPU] Adding Memory Capacity Acquisition Functionality (#11102)

parent 9710f718
...@@ -639,7 +639,7 @@ class ServerArgs: ...@@ -639,7 +639,7 @@ class ServerArgs:
if self.cuda_graph_max_bs > 300: if self.cuda_graph_max_bs > 300:
reserved_mem += self.cuda_graph_max_bs * self.dp_size * 1.5 reserved_mem += self.cuda_graph_max_bs * self.dp_size * 1.5
if gpu_mem > 60 * 1024: if gpu_mem is not None and gpu_mem > 60 * 1024:
reserved_mem = max(reserved_mem, 10 * 1024) reserved_mem = max(reserved_mem, 10 * 1024)
if self.speculative_algorithm is not None: if self.speculative_algorithm is not None:
...@@ -650,7 +650,11 @@ class ServerArgs: ...@@ -650,7 +650,11 @@ class ServerArgs:
# eagle draft models and cuda graphs # eagle draft models and cuda graphs
reserved_mem += 2 * 1024 reserved_mem += 2 * 1024
self.mem_fraction_static = round((gpu_mem - reserved_mem) / gpu_mem, 3) self.mem_fraction_static = (
round((gpu_mem - reserved_mem) / gpu_mem, 3)
if gpu_mem is not None
else 0.88
)
# Lazy init to avoid circular import # Lazy init to avoid circular import
# Multimodal models need more memory for the image processor # Multimodal models need more memory for the image processor
......
...@@ -1507,6 +1507,32 @@ def get_npu_memory_capacity(): ...@@ -1507,6 +1507,32 @@ def get_npu_memory_capacity():
raise ImportError("torch_npu is required when run on npu device.") raise ImportError("torch_npu is required when run on npu device.")
def get_cpu_memory_capacity():
# Per-rank memory capacity cannot be determined for customized core settings
if os.environ.get("SGLANG_CPU_OMP_THREADS_BIND", ""):
return None
n_numa_node: int = len(get_cpu_ids_by_node())
if n_numa_node == 0:
# Cannot determine NUMA config, fallback to total memory and avoid ZeroDivisionError.
return float(psutil.virtual_memory().total // (1 << 20))
try:
numa_mem_list = list()
file_prefix = "/sys/devices/system/node/"
for numa_id in range(n_numa_node):
file_meminfo = f"node{numa_id}/meminfo"
with open(os.path.join(file_prefix, file_meminfo), "r") as f:
# 1st line contains 'MemTotal'
line = f.read().split("\n")[0]
numa_mem_list.append(int(line.split()[3]))
# Retrieved value in KB, need MB
numa_mem = float(min(numa_mem_list) // 1024)
return numa_mem
except FileNotFoundError:
numa_mem = psutil.virtual_memory().total / n_numa_node
# Retrieved value in Byte, need MB
return float(numa_mem // (1 << 20))
def get_device_memory_capacity(device: str = None): def get_device_memory_capacity(device: str = None):
if is_cuda(): if is_cuda():
gpu_mem = get_nvgpu_memory_capacity() gpu_mem = get_nvgpu_memory_capacity()
...@@ -1516,6 +1542,8 @@ def get_device_memory_capacity(device: str = None): ...@@ -1516,6 +1542,8 @@ def get_device_memory_capacity(device: str = None):
gpu_mem = get_hpu_memory_capacity() gpu_mem = get_hpu_memory_capacity()
elif device == "npu": elif device == "npu":
gpu_mem = get_npu_memory_capacity() gpu_mem = get_npu_memory_capacity()
elif device == "cpu":
gpu_mem = get_cpu_memory_capacity()
else: else:
# GPU memory is not known yet or no GPU is available. # GPU memory is not known yet or no GPU is available.
gpu_mem = None gpu_mem = None
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment