Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
15f1a49d
"vscode:/vscode.git/clone" did not exist on "35f68a09f94b2d7afb3f6adc2ba850216413f28e"
Unverified
Commit
15f1a49d
authored
Aug 25, 2024
by
Lianmin Zheng
Committed by
GitHub
Aug 25, 2024
Browse files
Update CI workflows (#1210)
parent
308d0240
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
43 additions
and
48 deletions
+43
-48
.github/workflows/accuracy-test.yml
.github/workflows/accuracy-test.yml
+2
-5
.github/workflows/e2e-test.yml
.github/workflows/e2e-test.yml
+4
-7
.github/workflows/moe-test.yml
.github/workflows/moe-test.yml
+24
-26
.github/workflows/unit-test.yml
.github/workflows/unit-test.yml
+3
-6
python/sglang/test/runners.py
python/sglang/test/runners.py
+2
-1
python/sglang/test/test_utils.py
python/sglang/test/test_utils.py
+1
-0
test/srt/sampling/penaltylib/test_srt_endpoint_with_penalizers.py
.../sampling/penaltylib/test_srt_endpoint_with_penalizers.py
+6
-2
test/srt/test_vision_openai_server.py
test/srt/test_vision_openai_server.py
+1
-1
No files found.
.github/workflows/accuracy-test.yml
View file @
15f1a49d
...
@@ -20,7 +20,7 @@ concurrency:
...
@@ -20,7 +20,7 @@ concurrency:
jobs
:
jobs
:
accuracy-test
:
accuracy-test
:
if
:
github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
if
:
github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on
:
accuracy
runs-on
:
accuracy
-test
steps
:
steps
:
-
name
:
Checkout code
-
name
:
Checkout code
...
@@ -28,9 +28,6 @@ jobs:
...
@@ -28,9 +28,6 @@ jobs:
-
name
:
Install dependencies
-
name
:
Install dependencies
run
:
|
run
:
|
source $HOME/venv/bin/activate
echo "$HOME/venv/bin" >> $GITHUB_PATH
pip install --upgrade pip
pip install --upgrade pip
pip install -e "python[all]"
pip install -e "python[all]"
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
...
@@ -40,7 +37,7 @@ jobs:
...
@@ -40,7 +37,7 @@ jobs:
pip install -e .
pip install -e .
-
name
:
Evaluate Accuracy
-
name
:
Evaluate Accuracy
timeout-minutes
:
20
run
:
|
run
:
|
cd test/srt
cd test/srt
python3 test_eval_accuracy_large.py
python3 test_eval_accuracy_large.py
timeout-minutes
:
20
.github/workflows/e2e-test.yml
View file @
15f1a49d
...
@@ -20,7 +20,7 @@ concurrency:
...
@@ -20,7 +20,7 @@ concurrency:
jobs
:
jobs
:
e2e-test
:
e2e-test
:
if
:
github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
if
:
github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on
:
e2e
runs-on
:
e2e
-test
steps
:
steps
:
-
name
:
Checkout code
-
name
:
Checkout code
...
@@ -28,27 +28,24 @@ jobs:
...
@@ -28,27 +28,24 @@ jobs:
-
name
:
Install dependencies
-
name
:
Install dependencies
run
:
|
run
:
|
source $HOME/venv/bin/activate
echo "$HOME/venv/bin" >> $GITHUB_PATH
pip install --upgrade pip
pip install --upgrade pip
pip install -e "python[all]"
pip install -e "python[all]"
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
-
name
:
Benchmark Serving Throughput
-
name
:
Benchmark Serving Throughput
timeout-minutes
:
10
run
:
|
run
:
|
cd test/srt
cd test/srt
python3 -m unittest test_serving_throughput.TestServingThroughput.test_default
python3 -m unittest test_serving_throughput.TestServingThroughput.test_default
timeout-minutes
:
10
-
name
:
Benchmark Serving Throughput (w/o RadixAttention)
-
name
:
Benchmark Serving Throughput (w/o RadixAttention)
timeout-minutes
:
10
run
:
|
run
:
|
cd test/srt
cd test/srt
python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_radix_cache
python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_radix_cache
timeout-minutes
:
10
-
name
:
Benchmark Serving Throughput (w/o ChunkedPrefill)
-
name
:
Benchmark Serving Throughput (w/o ChunkedPrefill)
timeout-minutes
:
10
run
:
|
run
:
|
cd test/srt
cd test/srt
python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_chunked_prefill
python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_chunked_prefill
timeout-minutes
:
10
.github/workflows/moe-test.yml
View file @
15f1a49d
...
@@ -18,30 +18,28 @@ concurrency:
...
@@ -18,30 +18,28 @@ concurrency:
cancel-in-progress
:
true
cancel-in-progress
:
true
jobs
:
jobs
:
moe-test
:
moe-test
:
if
:
github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
if
:
github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on
:
accuracy
runs-on
:
moe-test
steps
:
-
name
:
Checkout code
uses
:
actions/checkout@v3
-
name
:
Install dependencies
run
:
|
source $HOME/venv/bin/activate
echo "$HOME/venv/bin" >> $GITHUB_PATH
pip install --upgrade pip
pip install -e "python[all]"
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
-
name
:
Benchmark MOE Serving Throughput
steps
:
uses
:
nick-fields/retry@v3
-
name
:
Checkout code
with
:
uses
:
actions/checkout@v3
timeout_minutes
:
15
max_attempts
:
2
-
name
:
Install dependencies
retry_on
:
error
run
:
|
command
:
|
pip install --upgrade pip
cd test/srt
pip install -e "python[all]"
python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default_without_radix_cache
-
name
:
Benchmark MoE Serving Throughput
timeout_minutes
:
10
run
:
|
cd test/srt
python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default
-
name
:
Benchmark MoE Serving Throughput (w/o RadixAttention)
timeout_minutes
:
10
run
:
|
cd test/srt
python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default_without_radix_cache
.github/workflows/unit-test.yml
View file @
15f1a49d
...
@@ -20,7 +20,7 @@ concurrency:
...
@@ -20,7 +20,7 @@ concurrency:
jobs
:
jobs
:
unit-test
:
unit-test
:
if
:
github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
if
:
github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on
:
unit
runs-on
:
unit
-test
steps
:
steps
:
-
name
:
Checkout code
-
name
:
Checkout code
...
@@ -28,9 +28,6 @@ jobs:
...
@@ -28,9 +28,6 @@ jobs:
-
name
:
Install dependencies
-
name
:
Install dependencies
run
:
|
run
:
|
source $HOME/venv/bin/activate
echo "$HOME/venv/bin" >> $GITHUB_PATH
pip install --upgrade pip
pip install --upgrade pip
pip install -e "python[all]"
pip install -e "python[all]"
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
...
@@ -38,13 +35,13 @@ jobs:
...
@@ -38,13 +35,13 @@ jobs:
pip install sentence_transformers
pip install sentence_transformers
-
name
:
Test Backend Runtime
-
name
:
Test Backend Runtime
timeout-minutes
:
20
run
:
|
run
:
|
cd test/srt
cd test/srt
python3 run_suite.py --suite minimal
python3 run_suite.py --suite minimal
timeout-minutes
:
20
-
name
:
Test Frontend Language
-
name
:
Test Frontend Language
timeout-minutes
:
10
run
:
|
run
:
|
cd test/lang
cd test/lang
python3 run_suite.py --suite minimal
python3 run_suite.py --suite minimal
timeout-minutes
:
10
python/sglang/test/runners.py
View file @
15f1a49d
...
@@ -24,6 +24,7 @@ import torch.nn.functional as F
...
@@ -24,6 +24,7 @@ import torch.nn.functional as F
from
transformers
import
AutoModelForCausalLM
,
AutoTokenizer
from
transformers
import
AutoModelForCausalLM
,
AutoTokenizer
from
sglang.srt.server
import
Runtime
from
sglang.srt.server
import
Runtime
from
sglang.test.test_utils
import
DEFAULT_PORT_FOR_SRT_TEST_RUNNER
DEFAULT_PROMPTS
=
[
DEFAULT_PROMPTS
=
[
# the output of gemma-2-2b from SRT is unstable on the commented prompt
# the output of gemma-2-2b from SRT is unstable on the commented prompt
...
@@ -171,7 +172,7 @@ class SRTRunner:
...
@@ -171,7 +172,7 @@ class SRTRunner:
torch_dtype
,
torch_dtype
,
is_generation
,
is_generation
,
tp_size
=
1
,
tp_size
=
1
,
port
=
5157
,
port
=
DEFAULT_PORT_FOR_SRT_TEST_RUNNER
,
):
):
self
.
is_generation
=
is_generation
self
.
is_generation
=
is_generation
self
.
runtime
=
Runtime
(
self
.
runtime
=
Runtime
(
...
...
python/sglang/test/test_utils.py
View file @
15f1a49d
...
@@ -25,6 +25,7 @@ DEFAULT_MODEL_NAME_FOR_TEST = "meta-llama/Meta-Llama-3.1-8B-Instruct"
...
@@ -25,6 +25,7 @@ DEFAULT_MODEL_NAME_FOR_TEST = "meta-llama/Meta-Llama-3.1-8B-Instruct"
DEFAULT_MOE_MODEL_NAME_FOR_TEST
=
"mistralai/Mixtral-8x7B-Instruct-v0.1"
DEFAULT_MOE_MODEL_NAME_FOR_TEST
=
"mistralai/Mixtral-8x7B-Instruct-v0.1"
if
os
.
getenv
(
"SGLANG_IS_IN_CI"
,
"false"
)
==
"true"
:
if
os
.
getenv
(
"SGLANG_IS_IN_CI"
,
"false"
)
==
"true"
:
DEFAULT_PORT_FOR_SRT_TEST_RUNNER
=
5157
DEFAULT_URL_FOR_MOE_TEST
=
"http://127.0.0.1:6157"
DEFAULT_URL_FOR_MOE_TEST
=
"http://127.0.0.1:6157"
DEFAULT_URL_FOR_ACCURACY_TEST
=
"http://127.0.0.1:7157"
DEFAULT_URL_FOR_ACCURACY_TEST
=
"http://127.0.0.1:7157"
DEFAULT_URL_FOR_UNIT_TEST
=
"http://127.0.0.1:8157"
DEFAULT_URL_FOR_UNIT_TEST
=
"http://127.0.0.1:8157"
...
...
test/srt/sampling/penaltylib/test_srt_endpoint_with_penalizers.py
View file @
15f1a49d
...
@@ -5,7 +5,11 @@ from multiprocessing import Process
...
@@ -5,7 +5,11 @@ from multiprocessing import Process
import
requests
import
requests
from
sglang.srt.utils
import
kill_child_process
from
sglang.srt.utils
import
kill_child_process
from
sglang.test.test_utils
import
DEFAULT_MODEL_NAME_FOR_TEST
,
popen_launch_server
from
sglang.test.test_utils
import
(
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_URL_FOR_UNIT_TEST
,
popen_launch_server
,
)
class
TestBatchPenalizerE2E
(
unittest
.
TestCase
):
class
TestBatchPenalizerE2E
(
unittest
.
TestCase
):
...
@@ -13,7 +17,7 @@ class TestBatchPenalizerE2E(unittest.TestCase):
...
@@ -13,7 +17,7 @@ class TestBatchPenalizerE2E(unittest.TestCase):
@
classmethod
@
classmethod
def
setUpClass
(
cls
):
def
setUpClass
(
cls
):
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST
cls
.
base_url
=
f
"http://127.0.0.1:
{
8157
}
"
cls
.
base_url
=
DEFAULT_URL_FOR_UNIT_TEST
cls
.
process
=
popen_launch_server
(
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
model
,
cls
.
base_url
,
cls
.
base_url
,
...
...
test/srt/test_vision_openai_server.py
View file @
15f1a49d
...
@@ -67,7 +67,7 @@ class TestOpenAIVisionServer(unittest.TestCase):
...
@@ -67,7 +67,7 @@ class TestOpenAIVisionServer(unittest.TestCase):
assert
response
.
choices
[
0
].
message
.
role
==
"assistant"
assert
response
.
choices
[
0
].
message
.
role
==
"assistant"
text
=
response
.
choices
[
0
].
message
.
content
text
=
response
.
choices
[
0
].
message
.
content
assert
isinstance
(
text
,
str
)
assert
isinstance
(
text
,
str
)
assert
"
car"
in
text
or
"taxi
"
in
text
,
text
assert
"
logo
"
in
text
,
text
assert
response
.
id
assert
response
.
id
assert
response
.
created
assert
response
.
created
assert
response
.
usage
.
prompt_tokens
>
0
assert
response
.
usage
.
prompt_tokens
>
0
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment