Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
15f1a49d
Unverified
Commit
15f1a49d
authored
Aug 25, 2024
by
Lianmin Zheng
Committed by
GitHub
Aug 25, 2024
Browse files
Update CI workflows (#1210)
parent
308d0240
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
43 additions
and
48 deletions
+43
-48
.github/workflows/accuracy-test.yml
.github/workflows/accuracy-test.yml
+2
-5
.github/workflows/e2e-test.yml
.github/workflows/e2e-test.yml
+4
-7
.github/workflows/moe-test.yml
.github/workflows/moe-test.yml
+24
-26
.github/workflows/unit-test.yml
.github/workflows/unit-test.yml
+3
-6
python/sglang/test/runners.py
python/sglang/test/runners.py
+2
-1
python/sglang/test/test_utils.py
python/sglang/test/test_utils.py
+1
-0
test/srt/sampling/penaltylib/test_srt_endpoint_with_penalizers.py
.../sampling/penaltylib/test_srt_endpoint_with_penalizers.py
+6
-2
test/srt/test_vision_openai_server.py
test/srt/test_vision_openai_server.py
+1
-1
No files found.
.github/workflows/accuracy-test.yml
View file @
15f1a49d
...
...
@@ -20,7 +20,7 @@ concurrency:
jobs
:
accuracy-test
:
if
:
github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on
:
accuracy
runs-on
:
accuracy
-test
steps
:
-
name
:
Checkout code
...
...
@@ -28,9 +28,6 @@ jobs:
-
name
:
Install dependencies
run
:
|
source $HOME/venv/bin/activate
echo "$HOME/venv/bin" >> $GITHUB_PATH
pip install --upgrade pip
pip install -e "python[all]"
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
...
...
@@ -40,7 +37,7 @@ jobs:
pip install -e .
-
name
:
Evaluate Accuracy
timeout-minutes
:
20
run
:
|
cd test/srt
python3 test_eval_accuracy_large.py
timeout-minutes
:
20
.github/workflows/e2e-test.yml
View file @
15f1a49d
...
...
@@ -20,7 +20,7 @@ concurrency:
jobs
:
e2e-test
:
if
:
github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on
:
e2e
runs-on
:
e2e
-test
steps
:
-
name
:
Checkout code
...
...
@@ -28,27 +28,24 @@ jobs:
-
name
:
Install dependencies
run
:
|
source $HOME/venv/bin/activate
echo "$HOME/venv/bin" >> $GITHUB_PATH
pip install --upgrade pip
pip install -e "python[all]"
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
-
name
:
Benchmark Serving Throughput
timeout-minutes
:
10
run
:
|
cd test/srt
python3 -m unittest test_serving_throughput.TestServingThroughput.test_default
timeout-minutes
:
10
-
name
:
Benchmark Serving Throughput (w/o RadixAttention)
timeout-minutes
:
10
run
:
|
cd test/srt
python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_radix_cache
timeout-minutes
:
10
-
name
:
Benchmark Serving Throughput (w/o ChunkedPrefill)
timeout-minutes
:
10
run
:
|
cd test/srt
python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_chunked_prefill
timeout-minutes
:
10
.github/workflows/moe-test.yml
View file @
15f1a49d
...
...
@@ -18,30 +18,28 @@ concurrency:
cancel-in-progress
:
true
jobs
:
moe-test
:
if
:
github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on
:
accuracy
steps
:
-
name
:
Checkout code
uses
:
actions/checkout@v3
-
name
:
Install dependencies
run
:
|
source $HOME/venv/bin/activate
echo "$HOME/venv/bin" >> $GITHUB_PATH
pip install --upgrade pip
pip install -e "python[all]"
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
moe-test
:
if
:
github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on
:
moe-test
-
name
:
Benchmark MOE Serving Throughput
uses
:
nick-fields/retry@v3
with
:
timeout_minutes
:
15
max_attempts
:
2
retry_on
:
error
command
:
|
cd test/srt
python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default
python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default_without_radix_cache
steps
:
-
name
:
Checkout code
uses
:
actions/checkout@v3
-
name
:
Install dependencies
run
:
|
pip install --upgrade pip
pip install -e "python[all]"
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
-
name
:
Benchmark MoE Serving Throughput
timeout_minutes
:
10
run
:
|
cd test/srt
python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default
-
name
:
Benchmark MoE Serving Throughput (w/o RadixAttention)
timeout_minutes
:
10
run
:
|
cd test/srt
python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default_without_radix_cache
.github/workflows/unit-test.yml
View file @
15f1a49d
...
...
@@ -20,7 +20,7 @@ concurrency:
jobs
:
unit-test
:
if
:
github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on
:
unit
runs-on
:
unit
-test
steps
:
-
name
:
Checkout code
...
...
@@ -28,9 +28,6 @@ jobs:
-
name
:
Install dependencies
run
:
|
source $HOME/venv/bin/activate
echo "$HOME/venv/bin" >> $GITHUB_PATH
pip install --upgrade pip
pip install -e "python[all]"
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
...
...
@@ -38,13 +35,13 @@ jobs:
pip install sentence_transformers
-
name
:
Test Backend Runtime
timeout-minutes
:
20
run
:
|
cd test/srt
python3 run_suite.py --suite minimal
timeout-minutes
:
20
-
name
:
Test Frontend Language
timeout-minutes
:
10
run
:
|
cd test/lang
python3 run_suite.py --suite minimal
timeout-minutes
:
10
python/sglang/test/runners.py
View file @
15f1a49d
...
...
@@ -24,6 +24,7 @@ import torch.nn.functional as F
from
transformers
import
AutoModelForCausalLM
,
AutoTokenizer
from
sglang.srt.server
import
Runtime
from
sglang.test.test_utils
import
DEFAULT_PORT_FOR_SRT_TEST_RUNNER
DEFAULT_PROMPTS
=
[
# the output of gemma-2-2b from SRT is unstable on the commented prompt
...
...
@@ -171,7 +172,7 @@ class SRTRunner:
torch_dtype
,
is_generation
,
tp_size
=
1
,
port
=
5157
,
port
=
DEFAULT_PORT_FOR_SRT_TEST_RUNNER
,
):
self
.
is_generation
=
is_generation
self
.
runtime
=
Runtime
(
...
...
python/sglang/test/test_utils.py
View file @
15f1a49d
...
...
@@ -25,6 +25,7 @@ DEFAULT_MODEL_NAME_FOR_TEST = "meta-llama/Meta-Llama-3.1-8B-Instruct"
DEFAULT_MOE_MODEL_NAME_FOR_TEST
=
"mistralai/Mixtral-8x7B-Instruct-v0.1"
if
os
.
getenv
(
"SGLANG_IS_IN_CI"
,
"false"
)
==
"true"
:
DEFAULT_PORT_FOR_SRT_TEST_RUNNER
=
5157
DEFAULT_URL_FOR_MOE_TEST
=
"http://127.0.0.1:6157"
DEFAULT_URL_FOR_ACCURACY_TEST
=
"http://127.0.0.1:7157"
DEFAULT_URL_FOR_UNIT_TEST
=
"http://127.0.0.1:8157"
...
...
test/srt/sampling/penaltylib/test_srt_endpoint_with_penalizers.py
View file @
15f1a49d
...
...
@@ -5,7 +5,11 @@ from multiprocessing import Process
import
requests
from
sglang.srt.utils
import
kill_child_process
from
sglang.test.test_utils
import
DEFAULT_MODEL_NAME_FOR_TEST
,
popen_launch_server
from
sglang.test.test_utils
import
(
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_URL_FOR_UNIT_TEST
,
popen_launch_server
,
)
class
TestBatchPenalizerE2E
(
unittest
.
TestCase
):
...
...
@@ -13,7 +17,7 @@ class TestBatchPenalizerE2E(unittest.TestCase):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST
cls
.
base_url
=
f
"http://127.0.0.1:
{
8157
}
"
cls
.
base_url
=
DEFAULT_URL_FOR_UNIT_TEST
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
...
...
test/srt/test_vision_openai_server.py
View file @
15f1a49d
...
...
@@ -67,7 +67,7 @@ class TestOpenAIVisionServer(unittest.TestCase):
assert
response
.
choices
[
0
].
message
.
role
==
"assistant"
text
=
response
.
choices
[
0
].
message
.
content
assert
isinstance
(
text
,
str
)
assert
"
car"
in
text
or
"taxi
"
in
text
,
text
assert
"
logo
"
in
text
,
text
assert
response
.
id
assert
response
.
created
assert
response
.
usage
.
prompt_tokens
>
0
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment