Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
2ffe0a73
Unverified
Commit
2ffe0a73
authored
Nov 15, 2024
by
HAI
Committed by
GitHub
Nov 15, 2024
Browse files
Add get_amdgpu_memory_capacity() (#2049)
parent
cf248976
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
40 additions
and
3 deletions
+40
-3
python/sglang/srt/server_args.py
python/sglang/srt/server_args.py
+7
-2
python/sglang/srt/utils.py
python/sglang/srt/utils.py
+33
-1
No files found.
python/sglang/srt/server_args.py
View file @
2ffe0a73
...
@@ -23,8 +23,10 @@ import tempfile
...
@@ -23,8 +23,10 @@ import tempfile
from
typing
import
List
,
Optional
from
typing
import
List
,
Optional
from
sglang.srt.utils
import
(
from
sglang.srt.utils
import
(
get_gpu_memory_capacity
,
get_amdgpu_memory_capacity
,
get_nvgpu_memory_capacity
,
is_flashinfer_available
,
is_flashinfer_available
,
is_hip
,
is_ipv6
,
is_ipv6
,
is_port_available
,
is_port_available
,
)
)
...
@@ -165,7 +167,10 @@ class ServerArgs:
...
@@ -165,7 +167,10 @@ class ServerArgs:
self
.
mem_fraction_static
=
0.88
self
.
mem_fraction_static
=
0.88
# Adjust for GPUs with small memory capacities
# Adjust for GPUs with small memory capacities
gpu_mem
=
get_gpu_memory_capacity
()
if
is_hip
():
gpu_mem
=
get_amdgpu_memory_capacity
()
else
:
gpu_mem
=
get_nvgpu_memory_capacity
()
if
gpu_mem
<
25000
:
if
gpu_mem
<
25000
:
logger
.
warning
(
logger
.
warning
(
"Automatically adjust --chunked-prefill-size for small GPUs."
"Automatically adjust --chunked-prefill-size for small GPUs."
...
...
python/sglang/srt/utils.py
View file @
2ffe0a73
...
@@ -794,7 +794,39 @@ def add_prometheus_middleware(app):
...
@@ -794,7 +794,39 @@ def add_prometheus_middleware(app):
app
.
routes
.
append
(
metrics_route
)
app
.
routes
.
append
(
metrics_route
)
def
get_gpu_memory_capacity
():
def
get_amdgpu_memory_capacity
():
try
:
# Run rocm-smi and capture the output
result
=
subprocess
.
run
(
[
"rocm-smi --showmeminfo vram | grep 'Total Memory' | awk '{print $NF}'"
],
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
,
shell
=
True
,
text
=
True
,
)
if
result
.
returncode
!=
0
:
raise
RuntimeError
(
f
"rocm-smi error:
{
result
.
stderr
.
strip
()
}
"
)
# Parse the output to extract memory values in MiB
memory_values
=
[
float
(
mem
)
/
1024
/
1024
for
mem
in
result
.
stdout
.
strip
().
split
(
"
\n
"
)
if
re
.
match
(
r
"^\d+(\.\d+)?$"
,
mem
.
strip
())
]
if
not
memory_values
:
raise
ValueError
(
"No GPU memory values found."
)
# Return the minimum memory value
return
min
(
memory_values
)
except
FileNotFoundError
:
raise
RuntimeError
(
"rocm-smi not found. Ensure AMD ROCm drivers are installed and accessible."
)
def
get_nvgpu_memory_capacity
():
try
:
try
:
# Run nvidia-smi and capture the output
# Run nvidia-smi and capture the output
result
=
subprocess
.
run
(
result
=
subprocess
.
run
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment