Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
7a5e6ce1
Unverified
Commit
7a5e6ce1
authored
May 25, 2025
by
kk
Committed by
GitHub
May 24, 2025
Browse files
Fix GPU OOM (#6564)
Co-authored-by:
michael
<
michael.zhang@amd.com
>
parent
24c035f2
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
15 additions
and
16 deletions
+15
-16
.github/workflows/pr-test-amd.yml
.github/workflows/pr-test-amd.yml
+0
-5
python/sglang/srt/layers/attention/aiter_backend.py
python/sglang/srt/layers/attention/aiter_backend.py
+1
-0
python/sglang/srt/model_executor/model_runner.py
python/sglang/srt/model_executor/model_runner.py
+4
-0
scripts/amd_ci_install_dependency.sh
scripts/amd_ci_install_dependency.sh
+1
-0
test/srt/test_bench_one_batch.py
test/srt/test_bench_one_batch.py
+4
-1
test/srt/test_bench_serving.py
test/srt/test_bench_serving.py
+3
-3
test/srt/test_eval_accuracy_large.py
test/srt/test_eval_accuracy_large.py
+0
-5
test/srt/test_full_deepseek_v3.py
test/srt/test_full_deepseek_v3.py
+2
-2
No files found.
.github/workflows/pr-test-amd.yml
View file @
7a5e6ce1
...
@@ -138,11 +138,6 @@ jobs:
...
@@ -138,11 +138,6 @@ jobs:
run
:
|
run
:
|
bash scripts/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_non_stream_small_batch_size
bash scripts/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_non_stream_small_batch_size
-
name
:
Benchmark online latency (EAGLE)
timeout-minutes
:
15
run
:
|
bash scripts/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_online_latency_eagle
performance-test-1-gpu-part-2-amd
:
performance-test-1-gpu-part-2-amd
:
if
:
(github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
if
:
(github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
github.event.pull_request.draft ==
false
github.event.pull_request.draft ==
false
...
...
python/sglang/srt/layers/attention/aiter_backend.py
View file @
7a5e6ce1
...
@@ -506,6 +506,7 @@ class AiterIndicesUpdaterPrefill:
...
@@ -506,6 +506,7 @@ class AiterIndicesUpdaterPrefill:
spec_info
.
generate_attn_arg_prefill
(
spec_info
.
generate_attn_arg_prefill
(
req_pool_indices
,
req_pool_indices
,
paged_kernel_lens
,
paged_kernel_lens
,
None
,
self
.
req_to_token
,
self
.
req_to_token
,
)
)
)
)
...
...
python/sglang/srt/model_executor/model_runner.py
View file @
7a5e6ce1
...
@@ -412,6 +412,10 @@ class ModelRunner:
...
@@ -412,6 +412,10 @@ class ModelRunner:
if
not
server_args
.
disable_chunked_prefix_cache
:
if
not
server_args
.
disable_chunked_prefix_cache
:
logger
.
info
(
"Chunked prefix cache is turned on."
)
logger
.
info
(
"Chunked prefix cache is turned on."
)
if
server_args
.
attention_backend
==
"aiter"
:
if
self
.
model_config
.
context_len
>
8192
:
self
.
mem_fraction_static
*=
0.85
def
init_torch_distributed
(
self
):
def
init_torch_distributed
(
self
):
logger
.
info
(
"Init torch distributed begin."
)
logger
.
info
(
"Init torch distributed begin."
)
...
...
scripts/amd_ci_install_dependency.sh
View file @
7a5e6ce1
...
@@ -5,6 +5,7 @@ set -euo pipefail
...
@@ -5,6 +5,7 @@ set -euo pipefail
docker
exec
ci_sglang pip
install
--upgrade
pip
docker
exec
ci_sglang pip
install
--upgrade
pip
docker
exec
ci_sglang pip uninstall sgl-kernel
-y
||
true
docker
exec
ci_sglang pip uninstall sgl-kernel
-y
||
true
docker
exec
-w
/sglang-checkout/sgl-kernel ci_sglang bash
-c
"rm -f pyproject.toml && mv pyproject_rocm.toml pyproject.toml && python3 setup_rocm.py install"
docker
exec
-w
/sglang-checkout/sgl-kernel ci_sglang bash
-c
"rm -f pyproject.toml && mv pyproject_rocm.toml pyproject.toml && python3 setup_rocm.py install"
docker
exec
ci_sglang pip
install
-e
"python[dev_hip]"
docker
exec
-w
/ ci_sglang git clone https://github.com/merrymercy/human-eval.git
docker
exec
-w
/ ci_sglang git clone https://github.com/merrymercy/human-eval.git
docker
exec
-w
/human-eval ci_sglang pip
install
-e
.
docker
exec
-w
/human-eval ci_sglang pip
install
-e
.
...
...
test/srt/test_bench_one_batch.py
View file @
7a5e6ce1
...
@@ -62,7 +62,10 @@ class TestBenchOneBatch(CustomTestCase):
...
@@ -62,7 +62,10 @@ class TestBenchOneBatch(CustomTestCase):
f
"### test_torch_compile_tp2_bs1 (Mixtral-8x7B)
\n
"
f
"### test_torch_compile_tp2_bs1 (Mixtral-8x7B)
\n
"
f
"output_throughput:
{
output_throughput
:.
2
f
}
token/s
\n
"
f
"output_throughput:
{
output_throughput
:.
2
f
}
token/s
\n
"
)
)
self
.
assertGreater
(
output_throughput
,
220
)
if
os
.
getenv
(
"SGLANG_AMD_CI"
)
==
"1"
:
self
.
assertGreater
(
output_throughput
,
200
)
else
:
self
.
assertGreater
(
output_throughput
,
220
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
...
...
test/srt/test_bench_serving.py
View file @
7a5e6ce1
...
@@ -32,7 +32,7 @@ class TestBenchServing(CustomTestCase):
...
@@ -32,7 +32,7 @@ class TestBenchServing(CustomTestCase):
f
'Output throughput:
{
res
[
"output_throughput"
]:.
2
f
}
token/s
\n
'
f
'Output throughput:
{
res
[
"output_throughput"
]:.
2
f
}
token/s
\n
'
)
)
if
os
.
getenv
(
"SGLANG_AMD_CI"
)
==
"1"
:
if
os
.
getenv
(
"SGLANG_AMD_CI"
)
==
"1"
:
self
.
assertGreater
(
res
[
"output_throughput"
],
35
0
0
)
self
.
assertGreater
(
res
[
"output_throughput"
],
3
1
50
)
else
:
else
:
self
.
assertGreater
(
res
[
"output_throughput"
],
3800
)
self
.
assertGreater
(
res
[
"output_throughput"
],
3800
)
...
@@ -70,7 +70,7 @@ class TestBenchServing(CustomTestCase):
...
@@ -70,7 +70,7 @@ class TestBenchServing(CustomTestCase):
f
'Output throughput:
{
res
[
"output_throughput"
]:.
2
f
}
token/s
\n
'
f
'Output throughput:
{
res
[
"output_throughput"
]:.
2
f
}
token/s
\n
'
)
)
if
os
.
getenv
(
"SGLANG_AMD_CI"
)
==
"1"
:
if
os
.
getenv
(
"SGLANG_AMD_CI"
)
==
"1"
:
self
.
assertGreater
(
res
[
"output_throughput"
],
3
5
00
)
self
.
assertGreater
(
res
[
"output_throughput"
],
30
5
0
)
else
:
else
:
self
.
assertGreater
(
res
[
"output_throughput"
],
3800
)
self
.
assertGreater
(
res
[
"output_throughput"
],
3800
)
...
@@ -126,7 +126,7 @@ class TestBenchServing(CustomTestCase):
...
@@ -126,7 +126,7 @@ class TestBenchServing(CustomTestCase):
f
'Output throughput:
{
res
[
"output_throughput"
]:.
2
f
}
token/s
\n
'
f
'Output throughput:
{
res
[
"output_throughput"
]:.
2
f
}
token/s
\n
'
)
)
if
os
.
getenv
(
"SGLANG_AMD_CI"
)
==
"1"
:
if
os
.
getenv
(
"SGLANG_AMD_CI"
)
==
"1"
:
self
.
assertGreater
(
res
[
"output_throughput"
],
40
00
)
self
.
assertGreater
(
res
[
"output_throughput"
],
35
00
)
else
:
else
:
self
.
assertGreater
(
res
[
"output_throughput"
],
4300
)
self
.
assertGreater
(
res
[
"output_throughput"
],
4300
)
...
...
test/srt/test_eval_accuracy_large.py
View file @
7a5e6ce1
...
@@ -37,11 +37,6 @@ class TestEvalAccuracyLarge(CustomTestCase):
...
@@ -37,11 +37,6 @@ class TestEvalAccuracyLarge(CustomTestCase):
def
tearDownClass
(
cls
):
def
tearDownClass
(
cls
):
kill_process_tree
(
cls
.
process
.
pid
)
kill_process_tree
(
cls
.
process
.
pid
)
def
tearDown
(
self
):
# Delay between tests to allow GPU memory cleanup
if
os
.
getenv
(
"SGLANG_AMD_CI"
)
==
"1"
:
time
.
sleep
(
180
)
def
test_mmlu
(
self
):
def
test_mmlu
(
self
):
args
=
SimpleNamespace
(
args
=
SimpleNamespace
(
base_url
=
self
.
base_url
,
base_url
=
self
.
base_url
,
...
...
test/srt/test_full_deepseek_v3.py
View file @
7a5e6ce1
...
@@ -90,9 +90,9 @@ class TestDeepseekV3MTP(CustomTestCase):
...
@@ -90,9 +90,9 @@ class TestDeepseekV3MTP(CustomTestCase):
"2"
,
"2"
,
"--speculative-num-draft-tokens"
,
"--speculative-num-draft-tokens"
,
"4"
,
"4"
,
"--mem-fraction-static"
,
"0.7"
,
]
]
if
os
.
environ
.
get
(
"SGLANG_AMD_CI"
)
!=
"1"
:
other_args
+=
[
"--mem-frac"
,
"0.7"
]
cls
.
process
=
popen_launch_server
(
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
model
,
cls
.
base_url
,
cls
.
base_url
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment