Unverified Commit 80802c4c authored by Keyang Ru's avatar Keyang Ru Committed by GitHub
Browse files

[router][ci] speed up python binding to 1.5 min (#12673)

parent 83b104ee
......@@ -13,7 +13,7 @@ on:
workflow_dispatch:
concurrency:
group: pr-test-rust-${{ github.ref }}
group: router-tests-${{ github.ref }}
cancel-in-progress: true
env:
......@@ -70,7 +70,7 @@ jobs:
python -c "import sglang_router; print('Python package: OK')"
python -c "from sglang_router.sglang_router_rs import Router; print('Rust extension: OK')"
python -m sglang_router.launch_router --help > /dev/null && echo "Entry point: OK"
unit-test-rust:
router-unit-tests:
if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
runs-on: ubuntu-latest
steps:
......@@ -133,7 +133,7 @@ jobs:
if: always()
run: sccache --show-stats
pytest-rust:
router-http-tests:
if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
runs-on: 4-gpu-a10
timeout-minutes: 32
......@@ -157,6 +157,16 @@ jobs:
cache-all-crates: true
cache-on-failure: true
- name: Cache router build
uses: actions/cache@v4
with:
path: |
sgl-router/target
sgl-router/.venv
key: ${{ runner.os }}-router-${{ hashFiles('sgl-router/Cargo.lock', 'sgl-router/pyproject.toml') }}
restore-keys: |
${{ runner.os }}-router-
- name: Install SGLang dependencies
run: |
sudo --preserve-env=PATH bash scripts/ci/ci_install_dependency.sh
......@@ -165,18 +175,17 @@ jobs:
run: |
source "$HOME/.cargo/env"
export RUSTC_WRAPPER=sccache
pip install -e "sgl-router"
cd sgl-router
pip install maturin
maturin build --release --out dist --features vendored-openssl
pip install --force-reinstall dist/*.whl
python3 -m venv .venv
python3 -m pip install --upgrade pip maturin
maturin develop --release --profile ci --features vendored-openssl
- name: Run Python unit tests
run: |
cd sgl-router
source "$HOME/.cargo/env"
pip install pytest pytest-cov pytest-xdist
python3 -m pip install pytest pytest-cov pytest-xdist
pytest -q py_test/unit --cov=sglang_router --cov-report=term-missing --cov-fail-under=80
- name: Run Python integration tests
......@@ -184,16 +193,17 @@ jobs:
cd sgl-router
source "$HOME/.cargo/env"
# Integration tests use FastAPI/uvicorn for mock workers
pip install fastapi uvicorn orjson
pytest -q -m integration
python3 -m pip install fastapi uvicorn orjson
pytest -q py_test/integration_mock
- name: Run Python E2E tests
run: |
bash scripts/killall_sglang.sh "nuk_gpus"
cd sgl-router
source "$HOME/.cargo/env"
python3 -m pip --no-cache-dir install --upgrade --ignore-installed blinker
python3 -m pip --no-cache-dir install --upgrade --break-system-packages genai-bench==0.0.2
pytest -m e2e -s -vv -o log_cli=true --log-cli-level=INFO
python3 -m pip --no-cache-dir install --upgrade genai-bench==0.0.2
pytest py_test/e2e_http -s -vv -o log_cli=true --log-cli-level=INFO
- name: Upload benchmark results
if: success()
......@@ -202,7 +212,7 @@ jobs:
name: genai-bench-results-all-policies
path: sgl-router/benchmark_**/
pytest-rust-2:
router-grpc-response-api-tests:
if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
runs-on: 4-gpu-a10
timeout-minutes: 32
......@@ -226,6 +236,16 @@ jobs:
cache-all-crates: true
cache-on-failure: true
- name: Cache router build
uses: actions/cache@v4
with:
path: |
sgl-router/target
sgl-router/.venv
key: ${{ runner.os }}-router-${{ hashFiles('sgl-router/Cargo.lock', 'sgl-router/pyproject.toml') }}
restore-keys: |
${{ runner.os }}-router-
- name: Install SGLang dependencies
run: |
sudo --preserve-env=PATH bash scripts/ci/ci_install_dependency.sh
......@@ -264,20 +284,22 @@ jobs:
source "$HOME/.cargo/env"
export RUSTC_WRAPPER=sccache
cd sgl-router
pip install maturin
maturin build --release --out dist --features vendored-openssl
pip install --force-reinstall dist/*.whl
python3 -m venv .venv
python3 -m pip install --upgrade pip maturin
maturin develop --release --profile ci --features vendored-openssl
- name: Run Python E2E response API tests
run: |
bash scripts/killall_sglang.sh "nuk_gpus"
cd sgl-router
source "$HOME/.cargo/env"
SHOW_ROUTER_LOGS=1 pytest py_test/e2e_response_api -s -vv -o log_cli=true --log-cli-level=INFO
- name: Run Python E2E gRPC tests
run: |
bash scripts/killall_sglang.sh "nuk_gpus"
cd sgl-router
source "$HOME/.cargo/env"
SHOW_ROUTER_LOGS=1 ROUTER_LOCAL_MODEL_PATH="/home/ubuntu/models" pytest py_test/e2e_grpc -s -vv -o log_cli=true --log-cli-level=INFO
- name: Cleanup Oracle Database
......@@ -288,14 +310,14 @@ jobs:
finish:
needs: [maturin-build-test, unit-test-rust, pytest-rust, pytest-rust-2]
needs: [maturin-build-test, router-unit-tests, router-http-tests, router-grpc-response-api-tests]
runs-on: ubuntu-latest
steps:
- name: Finish
run: echo "This is an empty step to ensure that all jobs are completed."
summarize-benchmarks:
needs: pytest-rust
needs: router-http-tests
runs-on: ubuntu-latest
if: success()
......
[run]
source = py_src/sglang_router
omit =
py_src/sglang_router/mini_lb.py
*/sglang_router/mini_lb.py
[report]
fail_under = 80
omit =
py_src/sglang_router/mini_lb.py
*/sglang_router/mini_lb.py
......@@ -129,6 +129,13 @@ lto = "fat" # Full LTO for smaller binaries
codegen-units = 1 # Better optimization, slower compile
strip = true # Strip debug symbols
[profile.ci]
inherits = "release"
opt-level = 2 # Lighter optimization (still fast runtime, much faster compile)
lto = "thin" # Thin LTO - good balance
codegen-units = 16 # More parallelization for faster builds
strip = true
[profile.dev]
opt-level = 0
debug = 1
......
......@@ -43,7 +43,6 @@ High-performance model routing control and data plane for large-scale LLM deploy
- Additional guides, API references, and deployment patterns are continuously updated alongside SGLang releases.
## Installation
### Prerequisites
- **Rust and Cargo**
```bash
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment