"vscode:/vscode.git/clone" did not exist on "35f68a09f94b2d7afb3f6adc2ba850216413f28e"
Unverified Commit 15f1a49d authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

Update CI workflows (#1210)

parent 308d0240
...@@ -20,7 +20,7 @@ concurrency: ...@@ -20,7 +20,7 @@ concurrency:
jobs: jobs:
accuracy-test: accuracy-test:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on: accuracy runs-on: accuracy-test
steps: steps:
- name: Checkout code - name: Checkout code
...@@ -28,9 +28,6 @@ jobs: ...@@ -28,9 +28,6 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: | run: |
source $HOME/venv/bin/activate
echo "$HOME/venv/bin" >> $GITHUB_PATH
pip install --upgrade pip pip install --upgrade pip
pip install -e "python[all]" pip install -e "python[all]"
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
...@@ -40,7 +37,7 @@ jobs: ...@@ -40,7 +37,7 @@ jobs:
pip install -e . pip install -e .
- name: Evaluate Accuracy - name: Evaluate Accuracy
timeout-minutes: 20
run: | run: |
cd test/srt cd test/srt
python3 test_eval_accuracy_large.py python3 test_eval_accuracy_large.py
timeout-minutes: 20
...@@ -20,7 +20,7 @@ concurrency: ...@@ -20,7 +20,7 @@ concurrency:
jobs: jobs:
e2e-test: e2e-test:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on: e2e runs-on: e2e-test
steps: steps:
- name: Checkout code - name: Checkout code
...@@ -28,27 +28,24 @@ jobs: ...@@ -28,27 +28,24 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: | run: |
source $HOME/venv/bin/activate
echo "$HOME/venv/bin" >> $GITHUB_PATH
pip install --upgrade pip pip install --upgrade pip
pip install -e "python[all]" pip install -e "python[all]"
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
- name: Benchmark Serving Throughput - name: Benchmark Serving Throughput
timeout-minutes: 10
run: | run: |
cd test/srt cd test/srt
python3 -m unittest test_serving_throughput.TestServingThroughput.test_default python3 -m unittest test_serving_throughput.TestServingThroughput.test_default
timeout-minutes: 10
- name: Benchmark Serving Throughput (w/o RadixAttention) - name: Benchmark Serving Throughput (w/o RadixAttention)
timeout-minutes: 10
run: | run: |
cd test/srt cd test/srt
python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_radix_cache python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_radix_cache
timeout-minutes: 10
- name: Benchmark Serving Throughput (w/o ChunkedPrefill) - name: Benchmark Serving Throughput (w/o ChunkedPrefill)
timeout-minutes: 10
run: | run: |
cd test/srt cd test/srt
python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_chunked_prefill python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_chunked_prefill
timeout-minutes: 10
...@@ -18,30 +18,28 @@ concurrency: ...@@ -18,30 +18,28 @@ concurrency:
cancel-in-progress: true cancel-in-progress: true
jobs: jobs:
moe-test: moe-test:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on: accuracy runs-on: moe-test
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Install dependencies
run: |
source $HOME/venv/bin/activate
echo "$HOME/venv/bin" >> $GITHUB_PATH
pip install --upgrade pip
pip install -e "python[all]"
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
- name: Benchmark MOE Serving Throughput steps:
uses: nick-fields/retry@v3 - name: Checkout code
with: uses: actions/checkout@v3
timeout_minutes: 15
max_attempts: 2 - name: Install dependencies
retry_on: error run: |
command: | pip install --upgrade pip
cd test/srt pip install -e "python[all]"
python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default_without_radix_cache
- name: Benchmark MoE Serving Throughput
timeout_minutes: 10
run: |
cd test/srt
python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default
- name: Benchmark MoE Serving Throughput (w/o RadixAttention)
timeout_minutes: 10
run: |
cd test/srt
python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default_without_radix_cache
...@@ -20,7 +20,7 @@ concurrency: ...@@ -20,7 +20,7 @@ concurrency:
jobs: jobs:
unit-test: unit-test:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on: unit runs-on: unit-test
steps: steps:
- name: Checkout code - name: Checkout code
...@@ -28,9 +28,6 @@ jobs: ...@@ -28,9 +28,6 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: | run: |
source $HOME/venv/bin/activate
echo "$HOME/venv/bin" >> $GITHUB_PATH
pip install --upgrade pip pip install --upgrade pip
pip install -e "python[all]" pip install -e "python[all]"
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
...@@ -38,13 +35,13 @@ jobs: ...@@ -38,13 +35,13 @@ jobs:
pip install sentence_transformers pip install sentence_transformers
- name: Test Backend Runtime - name: Test Backend Runtime
timeout-minutes: 20
run: | run: |
cd test/srt cd test/srt
python3 run_suite.py --suite minimal python3 run_suite.py --suite minimal
timeout-minutes: 20
- name: Test Frontend Language - name: Test Frontend Language
timeout-minutes: 10
run: | run: |
cd test/lang cd test/lang
python3 run_suite.py --suite minimal python3 run_suite.py --suite minimal
timeout-minutes: 10
...@@ -24,6 +24,7 @@ import torch.nn.functional as F ...@@ -24,6 +24,7 @@ import torch.nn.functional as F
from transformers import AutoModelForCausalLM, AutoTokenizer from transformers import AutoModelForCausalLM, AutoTokenizer
from sglang.srt.server import Runtime from sglang.srt.server import Runtime
from sglang.test.test_utils import DEFAULT_PORT_FOR_SRT_TEST_RUNNER
DEFAULT_PROMPTS = [ DEFAULT_PROMPTS = [
# the output of gemma-2-2b from SRT is unstable on the commented prompt # the output of gemma-2-2b from SRT is unstable on the commented prompt
...@@ -171,7 +172,7 @@ class SRTRunner: ...@@ -171,7 +172,7 @@ class SRTRunner:
torch_dtype, torch_dtype,
is_generation, is_generation,
tp_size=1, tp_size=1,
port=5157, port=DEFAULT_PORT_FOR_SRT_TEST_RUNNER,
): ):
self.is_generation = is_generation self.is_generation = is_generation
self.runtime = Runtime( self.runtime = Runtime(
......
...@@ -25,6 +25,7 @@ DEFAULT_MODEL_NAME_FOR_TEST = "meta-llama/Meta-Llama-3.1-8B-Instruct" ...@@ -25,6 +25,7 @@ DEFAULT_MODEL_NAME_FOR_TEST = "meta-llama/Meta-Llama-3.1-8B-Instruct"
DEFAULT_MOE_MODEL_NAME_FOR_TEST = "mistralai/Mixtral-8x7B-Instruct-v0.1" DEFAULT_MOE_MODEL_NAME_FOR_TEST = "mistralai/Mixtral-8x7B-Instruct-v0.1"
if os.getenv("SGLANG_IS_IN_CI", "false") == "true": if os.getenv("SGLANG_IS_IN_CI", "false") == "true":
DEFAULT_PORT_FOR_SRT_TEST_RUNNER = 5157
DEFAULT_URL_FOR_MOE_TEST = "http://127.0.0.1:6157" DEFAULT_URL_FOR_MOE_TEST = "http://127.0.0.1:6157"
DEFAULT_URL_FOR_ACCURACY_TEST = "http://127.0.0.1:7157" DEFAULT_URL_FOR_ACCURACY_TEST = "http://127.0.0.1:7157"
DEFAULT_URL_FOR_UNIT_TEST = "http://127.0.0.1:8157" DEFAULT_URL_FOR_UNIT_TEST = "http://127.0.0.1:8157"
......
...@@ -5,7 +5,11 @@ from multiprocessing import Process ...@@ -5,7 +5,11 @@ from multiprocessing import Process
import requests import requests
from sglang.srt.utils import kill_child_process from sglang.srt.utils import kill_child_process
from sglang.test.test_utils import DEFAULT_MODEL_NAME_FOR_TEST, popen_launch_server from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_URL_FOR_UNIT_TEST,
popen_launch_server,
)
class TestBatchPenalizerE2E(unittest.TestCase): class TestBatchPenalizerE2E(unittest.TestCase):
...@@ -13,7 +17,7 @@ class TestBatchPenalizerE2E(unittest.TestCase): ...@@ -13,7 +17,7 @@ class TestBatchPenalizerE2E(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MODEL_NAME_FOR_TEST
cls.base_url = f"http://127.0.0.1:{8157}" cls.base_url = DEFAULT_URL_FOR_UNIT_TEST
cls.process = popen_launch_server( cls.process = popen_launch_server(
cls.model, cls.model,
cls.base_url, cls.base_url,
......
...@@ -67,7 +67,7 @@ class TestOpenAIVisionServer(unittest.TestCase): ...@@ -67,7 +67,7 @@ class TestOpenAIVisionServer(unittest.TestCase):
assert response.choices[0].message.role == "assistant" assert response.choices[0].message.role == "assistant"
text = response.choices[0].message.content text = response.choices[0].message.content
assert isinstance(text, str) assert isinstance(text, str)
assert "car" in text or "taxi" in text, text assert "logo" in text, text
assert response.id assert response.id
assert response.created assert response.created
assert response.usage.prompt_tokens > 0 assert response.usage.prompt_tokens > 0
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment