Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
03227c5f
Unverified
Commit
03227c5f
authored
May 11, 2025
by
Lianmin Zheng
Committed by
GitHub
May 11, 2025
Browse files
[CI] Reorganize the 8 gpu tests (#6192)
parent
01bdbf7f
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
15 additions
and
82 deletions
+15
-82
.github/workflows/pr-test.yml
.github/workflows/pr-test.yml
+1
-19
python/sglang/srt/server_args.py
python/sglang/srt/server_args.py
+6
-0
scripts/ci_install_dependency.sh
scripts/ci_install_dependency.sh
+5
-8
test/srt/run_suite.py
test/srt/run_suite.py
+1
-3
test/srt/test_disaggregation.py
test/srt/test_disaggregation.py
+0
-2
test/srt/test_pp_single_node.py
test/srt/test_pp_single_node.py
+2
-50
No files found.
.github/workflows/pr-test.yml
View file @
03227c5f
...
@@ -92,7 +92,7 @@ jobs:
...
@@ -92,7 +92,7 @@ jobs:
unittest-test-backend-8-gpu
:
unittest-test-backend-8-gpu
:
if
:
(github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
if
:
(github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
github.event.pull_request.draft ==
false
github.event.pull_request.draft ==
false
needs
:
[
unit-test-frontend
,
unit-test-backend-1-gpu
,
unit-test-backend-2-gpu
]
needs
:
[
unit-test-frontend
,
unit-test-backend-2-gpu
]
runs-on
:
8-gpu-runner
runs-on
:
8-gpu-runner
steps
:
steps
:
-
name
:
Checkout code
-
name
:
Checkout code
...
@@ -271,24 +271,6 @@ jobs:
...
@@ -271,24 +271,6 @@ jobs:
cd test/srt
cd test/srt
python3 test_moe_eval_accuracy_large.py
python3 test_moe_eval_accuracy_large.py
unit-test-backend-pd
:
if
:
(github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
github.event.pull_request.draft ==
false
runs-on
:
8-gpu-runner
steps
:
-
name
:
Checkout code
uses
:
actions/checkout@v4
-
name
:
Install dependencies
run
:
|
bash scripts/ci_install_dependency.sh
-
name
:
Run test
timeout-minutes
:
10
run
:
|
cd test/srt
python3 -m unittest test_disaggregation.TestDisaggregationMooncake.test_gsm8k
finish
:
finish
:
if
:
always()
if
:
always()
needs
:
[
needs
:
[
...
...
python/sglang/srt/server_args.py
View file @
03227c5f
...
@@ -305,6 +305,12 @@ class ServerArgs:
...
@@ -305,6 +305,12 @@ class ServerArgs:
if
self
.
grammar_backend
is
None
:
if
self
.
grammar_backend
is
None
:
self
.
grammar_backend
=
"xgrammar"
self
.
grammar_backend
=
"xgrammar"
if
self
.
pp_size
>
1
:
self
.
disable_overlap_schedule
=
True
logger
.
warning
(
"Overlap scheduler is disabled because of using pipeline parallelism."
)
# Data parallelism attention
# Data parallelism attention
if
self
.
enable_dp_attention
:
if
self
.
enable_dp_attention
:
self
.
schedule_conservativeness
=
self
.
schedule_conservativeness
*
0.3
self
.
schedule_conservativeness
=
self
.
schedule_conservativeness
*
0.3
...
...
scripts/ci_install_dependency.sh
View file @
03227c5f
...
@@ -5,25 +5,22 @@ set -euxo pipefail
...
@@ -5,25 +5,22 @@ set -euxo pipefail
SCRIPT_DIR
=
"
$(
cd
"
$(
dirname
"
${
BASH_SOURCE
[0]
}
"
)
"
&&
pwd
)
"
SCRIPT_DIR
=
"
$(
cd
"
$(
dirname
"
${
BASH_SOURCE
[0]
}
"
)
"
&&
pwd
)
"
bash
"
${
SCRIPT_DIR
}
/killall_sglang.sh"
bash
"
${
SCRIPT_DIR
}
/killall_sglang.sh"
# Update pip
pip
install
--upgrade
pip
# Clean up existing installations
# Clean up existing installations
pip uninstall
-y
flashinfer flashinfer_python sgl-kernel sglang vllm
||
true
pip uninstall
-y
flashinfer flashinfer_python sgl-kernel sglang vllm
pip cache purge
pip cache purge
rm
-rf
/root/.cache/flashinfer
rm
-rf
/root/.cache/flashinfer
rm
-rf
/usr/local/lib/python3.10/dist-packages/flashinfer
*
rm
-rf
/usr/local/lib/python3.10/dist-packages/flashinfer
*
rm
-rf
/usr/local/lib/python3.10/dist-packages/sgl_kernel
*
rm
-rf
/usr/local/lib/python3.10/dist-packages/sgl_kernel
*
# Update pip
pip
install
--upgrade
pip
# Install sgl-kernel
pip
install
sgl-kernel
==
0.1.2.post1
--no-cache-dir
# Install the main package
# Install the main package
pip
install
-e
"python[all]"
pip
install
-e
"python[all]"
# Install additional dependencies
# Install additional dependencies
pip
install
torch_memory_saver
pip
install
torch_memory_saver
pip
install
transformers
==
4.51.0 sentence_transformers accelerate peft pandas datasets
timm
torchaudio
==
2.6.0
pip
install
transformers
==
4.51.0
timm
torchaudio
==
2.6.0
sentence_transformers accelerate peft pandas datasets
mooncake-transfer-engine
# For compiling xgrammar kernels
# For compiling xgrammar kernels
pip
install
cuda-python nvidia-cuda-nvrtc-cu12
pip
install
cuda-python nvidia-cuda-nvrtc-cu12
...
...
test/srt/run_suite.py
View file @
03227c5f
...
@@ -85,9 +85,6 @@ suites = {
...
@@ -85,9 +85,6 @@ suites = {
TestFile
(
"test_w8a8_quantization.py"
,
46
),
TestFile
(
"test_w8a8_quantization.py"
,
46
),
TestFile
(
"models/lora/test_lora_cuda_graph.py"
,
250
),
TestFile
(
"models/lora/test_lora_cuda_graph.py"
,
250
),
],
],
"per-commit-pd"
:
[
TestFile
(
"test_disaggregation.py"
,
90
),
],
"per-commit-2-gpu"
:
[
"per-commit-2-gpu"
:
[
TestFile
(
"models/lora/test_lora_tp.py"
,
116
),
TestFile
(
"models/lora/test_lora_tp.py"
,
116
),
TestFile
(
"test_data_parallelism.py"
,
73
),
TestFile
(
"test_data_parallelism.py"
,
73
),
...
@@ -105,6 +102,7 @@ suites = {
...
@@ -105,6 +102,7 @@ suites = {
# TestFile("test_deepep_low_latency.py", 50),
# TestFile("test_deepep_low_latency.py", 50),
# TestFile("test_moe_deepep_eval_accuracy_large.py", 250),
# TestFile("test_moe_deepep_eval_accuracy_large.py", 250),
TestFile
(
"test_local_attn.py"
,
250
),
TestFile
(
"test_local_attn.py"
,
250
),
TestFile
(
"test_disaggregation.py"
,
90
),
TestFile
(
"test_full_deepseek_v3.py"
,
250
),
TestFile
(
"test_full_deepseek_v3.py"
,
250
),
TestFile
(
"test_pp_single_node.py"
,
150
),
TestFile
(
"test_pp_single_node.py"
,
150
),
],
],
...
...
test/srt/test_disaggregation.py
View file @
03227c5f
import
subprocess
import
subprocess
import
threading
import
time
import
time
import
unittest
import
unittest
from
types
import
SimpleNamespace
from
types
import
SimpleNamespace
import
requests
import
requests
import
torch
from
sglang.srt.utils
import
kill_process_tree
from
sglang.srt.utils
import
kill_process_tree
from
sglang.test.few_shot_gsm8k
import
run_eval
as
run_eval_few_shot_gsm8k
from
sglang.test.few_shot_gsm8k
import
run_eval
as
run_eval_few_shot_gsm8k
...
...
test/srt/test_pp_single_node.py
View file @
03227c5f
...
@@ -9,13 +9,10 @@ import time
...
@@ -9,13 +9,10 @@ import time
import
unittest
import
unittest
from
types
import
SimpleNamespace
from
types
import
SimpleNamespace
import
requests
from
sglang.bench_one_batch_server
import
BenchArgs
as
OneBatchBenchArgs
from
sglang.bench_one_batch_server
import
BenchArgs
as
OneBatchBenchArgs
from
sglang.srt.server_args
import
ServerArgs
from
sglang.srt.server_args
import
ServerArgs
from
sglang.srt.utils
import
kill_process_tree
from
sglang.srt.utils
import
kill_process_tree
from
sglang.test.few_shot_gsm8k
import
run_eval
from
sglang.test.few_shot_gsm8k
import
run_eval
from
sglang.test.runners
import
DEFAULT_PROMPTS
from
sglang.test.test_utils
import
(
from
sglang.test.test_utils
import
(
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
...
@@ -28,17 +25,16 @@ from sglang.test.test_utils import (
...
@@ -28,17 +25,16 @@ from sglang.test.test_utils import (
class
TestPPAccuracy
(
unittest
.
TestCase
):
class
TestPPAccuracy
(
unittest
.
TestCase
):
@
classmethod
@
classmethod
def
setUpClass
(
cls
):
def
setUpClass
(
cls
):
# These config helps find a leak.
os
.
environ
[
"SGLANG_IS_IN_CI"
]
=
"1"
cls
.
base_url
=
"http://127.0.0.1:23333"
cls
.
base_url
=
"http://127.0.0.1:23333"
cls
.
process
=
popen_launch_server
(
cls
.
process
=
popen_launch_server
(
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_MODEL_NAME_FOR_TEST
,
cls
.
base_url
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
other_args
=
[
"--tp-size"
,
2
,
"--pp-size"
,
"--pp-size"
,
4
,
4
,
"--disable-overlap-schedule"
,
"--chunked-prefill-size"
,
"--chunked-prefill-size"
,
256
,
256
,
],
],
...
@@ -66,49 +62,6 @@ class TestPPAccuracy(unittest.TestCase):
...
@@ -66,49 +62,6 @@ class TestPPAccuracy(unittest.TestCase):
time
.
sleep
(
5
)
time
.
sleep
(
5
)
# class TestPPAccuracyFlashInfer(unittest.TestCase):
# @classmethod
# def setUpClass(cls):
# # These config helps find a leak.
# os.environ["SGLANG_IS_IN_CI"] = "1"
# cls.base_url = "http://127.0.0.1:23333"
# cls.process = popen_launch_server(
# DEFAULT_MODEL_NAME_FOR_TEST,
# cls.base_url,
# timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
# other_args=[
# "--pp-size",
# 4,
# "--disable-overlap-schedule",
# "--attention-backend",
# "flashinfer",
# "--chunked-prefill-size",
# 256,
# ],
# )
#
# @classmethod
# def tearDownClass(cls):
# kill_process_tree(cls.process.pid)
#
# def test_gsm8k(self):
# args = SimpleNamespace(
# num_shots=5,
# data_path=None,
# num_questions=200,
# max_new_tokens=512,
# parallel=128,
# host="http://127.0.0.1",
# port=int(self.base_url.split(":")[-1]),
# )
# metrics = run_eval(args)
# print(f"{metrics=}")
#
# self.assertGreater(metrics["accuracy"], 0.75)
# # Wait a little bit so that the memory check happens.
# time.sleep(5)
class
TestFixedBugs
(
unittest
.
TestCase
):
class
TestFixedBugs
(
unittest
.
TestCase
):
def
test_chunked_prefill_with_small_bs
(
self
):
def
test_chunked_prefill_with_small_bs
(
self
):
model
=
DEFAULT_MODEL_NAME_FOR_TEST
model
=
DEFAULT_MODEL_NAME_FOR_TEST
...
@@ -124,7 +77,6 @@ class TestFixedBugs(unittest.TestCase):
...
@@ -124,7 +77,6 @@ class TestFixedBugs(unittest.TestCase):
2
,
2
,
"--pp-size"
,
"--pp-size"
,
2
,
2
,
"--disable-overlap-schedule"
,
"--chunked-prefill"
,
"--chunked-prefill"
,
256
,
256
,
"--max-running-requests"
,
"--max-running-requests"
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment