Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
229d2b95
Unverified
Commit
229d2b95
authored
Sep 30, 2025
by
Zaili Wang
Committed by
GitHub
Sep 30, 2025
Browse files
[CPU] Adding Memory Capacity Acquisition Functionality (#11102)
parent
9710f718
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
34 additions
and
2 deletions
+34
-2
python/sglang/srt/server_args.py
python/sglang/srt/server_args.py
+6
-2
python/sglang/srt/utils.py
python/sglang/srt/utils.py
+28
-0
No files found.
python/sglang/srt/server_args.py
View file @
229d2b95
...
...
@@ -639,7 +639,7 @@ class ServerArgs:
if
self
.
cuda_graph_max_bs
>
300
:
reserved_mem
+=
self
.
cuda_graph_max_bs
*
self
.
dp_size
*
1.5
if
gpu_mem
>
60
*
1024
:
if
gpu_mem
is
not
None
and
gpu_mem
>
60
*
1024
:
reserved_mem
=
max
(
reserved_mem
,
10
*
1024
)
if
self
.
speculative_algorithm
is
not
None
:
...
...
@@ -650,7 +650,11 @@ class ServerArgs:
# eagle draft models and cuda graphs
reserved_mem
+=
2
*
1024
self
.
mem_fraction_static
=
round
((
gpu_mem
-
reserved_mem
)
/
gpu_mem
,
3
)
self
.
mem_fraction_static
=
(
round
((
gpu_mem
-
reserved_mem
)
/
gpu_mem
,
3
)
if
gpu_mem
is
not
None
else
0.88
)
# Lazy init to avoid circular import
# Multimodal models need more memory for the image processor
...
...
python/sglang/srt/utils.py
View file @
229d2b95
...
...
@@ -1507,6 +1507,32 @@ def get_npu_memory_capacity():
raise
ImportError
(
"torch_npu is required when run on npu device."
)
def
get_cpu_memory_capacity
():
# Per-rank memory capacity cannot be determined for customized core settings
if
os
.
environ
.
get
(
"SGLANG_CPU_OMP_THREADS_BIND"
,
""
):
return
None
n_numa_node
:
int
=
len
(
get_cpu_ids_by_node
())
if
n_numa_node
==
0
:
# Cannot determine NUMA config, fallback to total memory and avoid ZeroDivisionError.
return
float
(
psutil
.
virtual_memory
().
total
//
(
1
<<
20
))
try
:
numa_mem_list
=
list
()
file_prefix
=
"/sys/devices/system/node/"
for
numa_id
in
range
(
n_numa_node
):
file_meminfo
=
f
"node
{
numa_id
}
/meminfo"
with
open
(
os
.
path
.
join
(
file_prefix
,
file_meminfo
),
"r"
)
as
f
:
# 1st line contains 'MemTotal'
line
=
f
.
read
().
split
(
"
\n
"
)[
0
]
numa_mem_list
.
append
(
int
(
line
.
split
()[
3
]))
# Retrieved value in KB, need MB
numa_mem
=
float
(
min
(
numa_mem_list
)
//
1024
)
return
numa_mem
except
FileNotFoundError
:
numa_mem
=
psutil
.
virtual_memory
().
total
/
n_numa_node
# Retrieved value in Byte, need MB
return
float
(
numa_mem
//
(
1
<<
20
))
def
get_device_memory_capacity
(
device
:
str
=
None
):
if
is_cuda
():
gpu_mem
=
get_nvgpu_memory_capacity
()
...
...
@@ -1516,6 +1542,8 @@ def get_device_memory_capacity(device: str = None):
gpu_mem
=
get_hpu_memory_capacity
()
elif
device
==
"npu"
:
gpu_mem
=
get_npu_memory_capacity
()
elif
device
==
"cpu"
:
gpu_mem
=
get_cpu_memory_capacity
()
else
:
# GPU memory is not known yet or no GPU is available.
gpu_mem
=
None
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment