Unverified Commit 15f1a49d authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

Update CI workflows (#1210)

parent 308d0240
......@@ -20,7 +20,7 @@ concurrency:
jobs:
accuracy-test:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on: accuracy
runs-on: accuracy-test
steps:
- name: Checkout code
......@@ -28,9 +28,6 @@ jobs:
- name: Install dependencies
run: |
source $HOME/venv/bin/activate
echo "$HOME/venv/bin" >> $GITHUB_PATH
pip install --upgrade pip
pip install -e "python[all]"
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
......@@ -40,7 +37,7 @@ jobs:
pip install -e .
- name: Evaluate Accuracy
timeout-minutes: 20
run: |
cd test/srt
python3 test_eval_accuracy_large.py
timeout-minutes: 20
......@@ -20,7 +20,7 @@ concurrency:
jobs:
e2e-test:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on: e2e
runs-on: e2e-test
steps:
- name: Checkout code
......@@ -28,27 +28,24 @@ jobs:
- name: Install dependencies
run: |
source $HOME/venv/bin/activate
echo "$HOME/venv/bin" >> $GITHUB_PATH
pip install --upgrade pip
pip install -e "python[all]"
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
- name: Benchmark Serving Throughput
timeout-minutes: 10
run: |
cd test/srt
python3 -m unittest test_serving_throughput.TestServingThroughput.test_default
timeout-minutes: 10
- name: Benchmark Serving Throughput (w/o RadixAttention)
timeout-minutes: 10
run: |
cd test/srt
python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_radix_cache
timeout-minutes: 10
- name: Benchmark Serving Throughput (w/o ChunkedPrefill)
timeout-minutes: 10
run: |
cd test/srt
python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_chunked_prefill
timeout-minutes: 10
......@@ -20,7 +20,7 @@ concurrency:
jobs:
moe-test:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on: accuracy
runs-on: moe-test
steps:
- name: Checkout code
......@@ -28,20 +28,18 @@ jobs:
- name: Install dependencies
run: |
source $HOME/venv/bin/activate
echo "$HOME/venv/bin" >> $GITHUB_PATH
pip install --upgrade pip
pip install -e "python[all]"
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
- name: Benchmark MOE Serving Throughput
uses: nick-fields/retry@v3
with:
timeout_minutes: 15
max_attempts: 2
retry_on: error
command: |
- name: Benchmark MoE Serving Throughput
timeout_minutes: 10
run: |
cd test/srt
python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default
- name: Benchmark MoE Serving Throughput (w/o RadixAttention)
timeout_minutes: 10
run: |
cd test/srt
python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default_without_radix_cache
......@@ -20,7 +20,7 @@ concurrency:
jobs:
unit-test:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on: unit
runs-on: unit-test
steps:
- name: Checkout code
......@@ -28,9 +28,6 @@ jobs:
- name: Install dependencies
run: |
source $HOME/venv/bin/activate
echo "$HOME/venv/bin" >> $GITHUB_PATH
pip install --upgrade pip
pip install -e "python[all]"
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
......@@ -38,13 +35,13 @@ jobs:
pip install sentence_transformers
- name: Test Backend Runtime
timeout-minutes: 20
run: |
cd test/srt
python3 run_suite.py --suite minimal
timeout-minutes: 20
- name: Test Frontend Language
timeout-minutes: 10
run: |
cd test/lang
python3 run_suite.py --suite minimal
timeout-minutes: 10
......@@ -24,6 +24,7 @@ import torch.nn.functional as F
from transformers import AutoModelForCausalLM, AutoTokenizer
from sglang.srt.server import Runtime
from sglang.test.test_utils import DEFAULT_PORT_FOR_SRT_TEST_RUNNER
DEFAULT_PROMPTS = [
# the output of gemma-2-2b from SRT is unstable on the commented prompt
......@@ -171,7 +172,7 @@ class SRTRunner:
torch_dtype,
is_generation,
tp_size=1,
port=5157,
port=DEFAULT_PORT_FOR_SRT_TEST_RUNNER,
):
self.is_generation = is_generation
self.runtime = Runtime(
......
......@@ -25,6 +25,7 @@ DEFAULT_MODEL_NAME_FOR_TEST = "meta-llama/Meta-Llama-3.1-8B-Instruct"
DEFAULT_MOE_MODEL_NAME_FOR_TEST = "mistralai/Mixtral-8x7B-Instruct-v0.1"
if os.getenv("SGLANG_IS_IN_CI", "false") == "true":
DEFAULT_PORT_FOR_SRT_TEST_RUNNER = 5157
DEFAULT_URL_FOR_MOE_TEST = "http://127.0.0.1:6157"
DEFAULT_URL_FOR_ACCURACY_TEST = "http://127.0.0.1:7157"
DEFAULT_URL_FOR_UNIT_TEST = "http://127.0.0.1:8157"
......
......@@ -5,7 +5,11 @@ from multiprocessing import Process
import requests
from sglang.srt.utils import kill_child_process
from sglang.test.test_utils import DEFAULT_MODEL_NAME_FOR_TEST, popen_launch_server
from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_URL_FOR_UNIT_TEST,
popen_launch_server,
)
class TestBatchPenalizerE2E(unittest.TestCase):
......@@ -13,7 +17,7 @@ class TestBatchPenalizerE2E(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
cls.base_url = f"http://127.0.0.1:{8157}"
cls.base_url = DEFAULT_URL_FOR_UNIT_TEST
cls.process = popen_launch_server(
cls.model,
cls.base_url,
......
......@@ -67,7 +67,7 @@ class TestOpenAIVisionServer(unittest.TestCase):
assert response.choices[0].message.role == "assistant"
text = response.choices[0].message.content
assert isinstance(text, str)
assert "car" in text or "taxi" in text, text
assert "logo" in text, text
assert response.id
assert response.created
assert response.usage.prompt_tokens > 0
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment