Unverified Commit 761b2ceb authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

[CI] merge all ci tests into one file (#1289)

parent 54772f78
name: Accuracy Test
on:
push:
branches: [ main ]
paths:
- "python/sglang/**"
- "test/**"
pull_request:
branches: [ main ]
paths:
- "python/sglang/**"
- "test/**"
workflow_dispatch:
concurrency:
group: accuracy-test-${{ github.ref }}
cancel-in-progress: true
jobs:
one-gpu:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on: 1-gpu-runner
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Install dependencies
run: |
pip install --upgrade pip
pip install -e "python[all]"
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
git clone https://github.com/merrymercy/human-eval.git
cd human-eval
pip install -e .
- name: Evaluate Accuracy
timeout-minutes: 20
run: |
cd test/srt
python3 test_eval_accuracy_large.py
two-gpu:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on: 2-gpu-runner
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Install dependencies
run: |
pip install --upgrade pip
pip install -e "python[all]"
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
git clone https://github.com/merrymercy/human-eval.git
cd human-eval
pip install -e .
- name: Evaluate Accuracy
timeout-minutes: 20
run: |
cd test/srt
python3 test_moe_eval_accuracy_large.py
finish:
needs: [one-gpu, two-gpu]
runs-on: ubuntu-latest
steps:
- name: Finish
run: echo "This is an empty step to ensure that all jobs are completed."
name: Lint name: Lint
on: [push, pull_request] on: [pull_request]
jobs: jobs:
lint: lint:
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v2 - uses: actions/checkout@v2
- name: Set up Python 3.8
- name: Set up Python 3.9
uses: actions/setup-python@v2 uses: actions/setup-python@v2
with: with:
python-version: 3.8 python-version: 3.9
- name: Install pre-commit hook - name: Install pre-commit hook
run: | run: |
python -m pip install pre-commit python -m pip install pre-commit
pre-commit install pre-commit install
- name: Linting - name: Linting
run: pre-commit run --all-files run: pre-commit run --all-files
name: E2E Test name: Pull Request Test
on: on:
push: push:
...@@ -14,14 +14,70 @@ on: ...@@ -14,14 +14,70 @@ on:
workflow_dispatch: workflow_dispatch:
concurrency: concurrency:
group: e2e-test-${{ github.ref }} group: pr-test-${{ github.ref }}
cancel-in-progress: true cancel-in-progress: true
jobs: jobs:
one-gpu: unit-test-frontend:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on: 1-gpu-runner runs-on: 1-gpu-runner
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Install dependencies
run: |
pip install --upgrade pip
pip install -e "python[dev]"
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
- name: Run test
timeout-minutes: 20
run: |
cd test/lang
python3 run_suite.py --suite minimal
unit-test-backend-part-0:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on: 1-gpu-runner
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Install dependencies
run: |
pip install --upgrade pip
pip install -e "python[dev]"
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
- name: Run test
timeout-minutes: 20
run: |
cd test/srt
python3 run_suite.py --suite minimal --range-begin 0 --range-end 8
unit-test-backend-part-1:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on: 1-gpu-runner
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Install dependencies
run: |
pip install --upgrade pip
pip install -e "python[dev]"
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
- name: Run test
timeout-minutes: 20
run: |
cd test/srt
python3 run_suite.py --suite minimal --range-begin 8
performance-test-1-gpu:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on: 1-gpu-runner
steps: steps:
- name: Checkout code - name: Checkout code
uses: actions/checkout@v3 uses: actions/checkout@v3
...@@ -56,10 +112,9 @@ jobs: ...@@ -56,10 +112,9 @@ jobs:
cd test/srt cd test/srt
python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_chunked_prefill python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_chunked_prefill
two-gpu: performance-test-2-gpu:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on: 2-gpu-runner runs-on: 2-gpu-runner
steps: steps:
- name: Checkout code - name: Checkout code
uses: actions/checkout@v3 uses: actions/checkout@v3
...@@ -88,8 +143,58 @@ jobs: ...@@ -88,8 +143,58 @@ jobs:
cd test/srt cd test/srt
python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default_without_radix_cache python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default_without_radix_cache
accuracy-test-1-gpu:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on: 1-gpu-runner
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Install dependencies
run: |
pip install --upgrade pip
pip install -e "python[all]"
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
git clone https://github.com/merrymercy/human-eval.git
cd human-eval
pip install -e .
- name: Evaluate Accuracy
timeout-minutes: 20
run: |
cd test/srt
python3 test_eval_accuracy_large.py
accuracy-test-2-gpu:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on: 2-gpu-runner
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Install dependencies
run: |
pip install --upgrade pip
pip install -e "python[all]"
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
git clone https://github.com/merrymercy/human-eval.git
cd human-eval
pip install -e .
- name: Evaluate Accuracy
timeout-minutes: 20
run: |
cd test/srt
python3 test_moe_eval_accuracy_large.py
finish: finish:
needs: [one-gpu, two-gpu] needs: [
unit-test-frontend, unit-test-backend-part-0, unit-test-backend-part-1,
performance-test-1-gpu, performance-test-2-gpu,
accuracy-test-1-gpu, accuracy-test-2-gpu
]
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- name: Finish - name: Finish
......
name: Unit Test
on:
push:
branches: [ main ]
paths:
- "python/sglang/**"
- "test/**"
pull_request:
branches: [ main ]
paths:
- "python/sglang/**"
- "test/**"
workflow_dispatch:
concurrency:
group: unit-test-${{ github.ref }}
cancel-in-progress: true
jobs:
run-test:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on: 1-gpu-runner
strategy:
matrix:
test_type: ['backend-0', 'backend-1', 'frontend']
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Install dependencies
run: |
pip install --upgrade pip
pip install -e "python[dev]"
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
- name: Run test
timeout-minutes: 20
run: |
if [ "${{ matrix.test_type }}" = "frontend" ]; then
cd test/lang
python3 run_suite.py --suite minimal
elif [ "${{ matrix.test_type }}" = "backend-0" ]; then
cd test/srt
python3 run_suite.py --suite minimal --range-begin 0 --range-end 8
elif [ "${{ matrix.test_type }}" = "backend-1" ]; then
cd test/srt
python3 run_suite.py --suite minimal --range-begin 8
fi
finish:
needs: [run-test]
runs-on: ubuntu-latest
steps:
- name: Finish
run: echo "This is an empty step to ensure that all jobs are completed."
...@@ -2,8 +2,8 @@ ...@@ -2,8 +2,8 @@
- `lang`: The frontend language. - `lang`: The frontend language.
- `srt`: The backend engine for running local models. (SRT = SGLang Runtime). - `srt`: The backend engine for running local models. (SRT = SGLang Runtime).
- `test`: Test utilities. - `test`: The test utilities.
- `api.py`: Public API. - `api.py`: The public APIs.
- `bench_latency.py`: Benchmark a single static batch. - `bench_latency.py`: Benchmark a single static batch.
- `bench_serving.py`: Benchmark online serving with dynamic requests. - `bench_serving.py`: Benchmark online serving with dynamic requests.
- `global_config.py`: The global configs and constants. - `global_config.py`: The global configs and constants.
......
...@@ -75,7 +75,7 @@ class TestServingThroughput(unittest.TestCase): ...@@ -75,7 +75,7 @@ class TestServingThroughput(unittest.TestCase):
) )
if os.getenv("SGLANG_IS_IN_CI", "false") == "true": if os.getenv("SGLANG_IS_IN_CI", "false") == "true":
assert res["output_throughput"] > 1850 assert res["output_throughput"] > 1800
def test_default_without_radix_cache(self): def test_default_without_radix_cache(self):
res = self.run_test( res = self.run_test(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment