"examples/pytorch/vscode:/vscode.git/clone" did not exist on "b1840f49fae30d6830370d36c3c6af4881c60927"
Unverified Commit 80802c4c authored by Keyang Ru's avatar Keyang Ru Committed by GitHub
Browse files

[router][ci] speed up python binding to 1.5 min (#12673)

parent 83b104ee
...@@ -13,7 +13,7 @@ on: ...@@ -13,7 +13,7 @@ on:
workflow_dispatch: workflow_dispatch:
concurrency: concurrency:
group: pr-test-rust-${{ github.ref }} group: router-tests-${{ github.ref }}
cancel-in-progress: true cancel-in-progress: true
env: env:
...@@ -70,7 +70,7 @@ jobs: ...@@ -70,7 +70,7 @@ jobs:
python -c "import sglang_router; print('Python package: OK')" python -c "import sglang_router; print('Python package: OK')"
python -c "from sglang_router.sglang_router_rs import Router; print('Rust extension: OK')" python -c "from sglang_router.sglang_router_rs import Router; print('Rust extension: OK')"
python -m sglang_router.launch_router --help > /dev/null && echo "Entry point: OK" python -m sglang_router.launch_router --help > /dev/null && echo "Entry point: OK"
unit-test-rust: router-unit-tests:
if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci') if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
...@@ -133,7 +133,7 @@ jobs: ...@@ -133,7 +133,7 @@ jobs:
if: always() if: always()
run: sccache --show-stats run: sccache --show-stats
pytest-rust: router-http-tests:
if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci') if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
runs-on: 4-gpu-a10 runs-on: 4-gpu-a10
timeout-minutes: 32 timeout-minutes: 32
...@@ -157,6 +157,16 @@ jobs: ...@@ -157,6 +157,16 @@ jobs:
cache-all-crates: true cache-all-crates: true
cache-on-failure: true cache-on-failure: true
- name: Cache router build
uses: actions/cache@v4
with:
path: |
sgl-router/target
sgl-router/.venv
key: ${{ runner.os }}-router-${{ hashFiles('sgl-router/Cargo.lock', 'sgl-router/pyproject.toml') }}
restore-keys: |
${{ runner.os }}-router-
- name: Install SGLang dependencies - name: Install SGLang dependencies
run: | run: |
sudo --preserve-env=PATH bash scripts/ci/ci_install_dependency.sh sudo --preserve-env=PATH bash scripts/ci/ci_install_dependency.sh
...@@ -165,18 +175,17 @@ jobs: ...@@ -165,18 +175,17 @@ jobs:
run: | run: |
source "$HOME/.cargo/env" source "$HOME/.cargo/env"
export RUSTC_WRAPPER=sccache export RUSTC_WRAPPER=sccache
pip install -e "sgl-router"
cd sgl-router cd sgl-router
pip install maturin python3 -m venv .venv
maturin build --release --out dist --features vendored-openssl python3 -m pip install --upgrade pip maturin
pip install --force-reinstall dist/*.whl maturin develop --release --profile ci --features vendored-openssl
- name: Run Python unit tests - name: Run Python unit tests
run: | run: |
cd sgl-router cd sgl-router
source "$HOME/.cargo/env" source "$HOME/.cargo/env"
pip install pytest pytest-cov pytest-xdist python3 -m pip install pytest pytest-cov pytest-xdist
pytest -q py_test/unit --cov=sglang_router --cov-report=term-missing --cov-fail-under=80 pytest -q py_test/unit --cov=sglang_router --cov-report=term-missing --cov-fail-under=80
- name: Run Python integration tests - name: Run Python integration tests
...@@ -184,16 +193,17 @@ jobs: ...@@ -184,16 +193,17 @@ jobs:
cd sgl-router cd sgl-router
source "$HOME/.cargo/env" source "$HOME/.cargo/env"
# Integration tests use FastAPI/uvicorn for mock workers # Integration tests use FastAPI/uvicorn for mock workers
pip install fastapi uvicorn orjson python3 -m pip install fastapi uvicorn orjson
pytest -q -m integration pytest -q py_test/integration_mock
- name: Run Python E2E tests - name: Run Python E2E tests
run: | run: |
bash scripts/killall_sglang.sh "nuk_gpus" bash scripts/killall_sglang.sh "nuk_gpus"
cd sgl-router cd sgl-router
source "$HOME/.cargo/env"
python3 -m pip --no-cache-dir install --upgrade --ignore-installed blinker python3 -m pip --no-cache-dir install --upgrade --ignore-installed blinker
python3 -m pip --no-cache-dir install --upgrade --break-system-packages genai-bench==0.0.2 python3 -m pip --no-cache-dir install --upgrade genai-bench==0.0.2
pytest -m e2e -s -vv -o log_cli=true --log-cli-level=INFO pytest py_test/e2e_http -s -vv -o log_cli=true --log-cli-level=INFO
- name: Upload benchmark results - name: Upload benchmark results
if: success() if: success()
...@@ -202,7 +212,7 @@ jobs: ...@@ -202,7 +212,7 @@ jobs:
name: genai-bench-results-all-policies name: genai-bench-results-all-policies
path: sgl-router/benchmark_**/ path: sgl-router/benchmark_**/
pytest-rust-2: router-grpc-response-api-tests:
if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci') if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
runs-on: 4-gpu-a10 runs-on: 4-gpu-a10
timeout-minutes: 32 timeout-minutes: 32
...@@ -226,6 +236,16 @@ jobs: ...@@ -226,6 +236,16 @@ jobs:
cache-all-crates: true cache-all-crates: true
cache-on-failure: true cache-on-failure: true
- name: Cache router build
uses: actions/cache@v4
with:
path: |
sgl-router/target
sgl-router/.venv
key: ${{ runner.os }}-router-${{ hashFiles('sgl-router/Cargo.lock', 'sgl-router/pyproject.toml') }}
restore-keys: |
${{ runner.os }}-router-
- name: Install SGLang dependencies - name: Install SGLang dependencies
run: | run: |
sudo --preserve-env=PATH bash scripts/ci/ci_install_dependency.sh sudo --preserve-env=PATH bash scripts/ci/ci_install_dependency.sh
...@@ -264,20 +284,22 @@ jobs: ...@@ -264,20 +284,22 @@ jobs:
source "$HOME/.cargo/env" source "$HOME/.cargo/env"
export RUSTC_WRAPPER=sccache export RUSTC_WRAPPER=sccache
cd sgl-router cd sgl-router
pip install maturin python3 -m venv .venv
maturin build --release --out dist --features vendored-openssl python3 -m pip install --upgrade pip maturin
pip install --force-reinstall dist/*.whl maturin develop --release --profile ci --features vendored-openssl
- name: Run Python E2E response API tests - name: Run Python E2E response API tests
run: | run: |
bash scripts/killall_sglang.sh "nuk_gpus" bash scripts/killall_sglang.sh "nuk_gpus"
cd sgl-router cd sgl-router
source "$HOME/.cargo/env"
SHOW_ROUTER_LOGS=1 pytest py_test/e2e_response_api -s -vv -o log_cli=true --log-cli-level=INFO SHOW_ROUTER_LOGS=1 pytest py_test/e2e_response_api -s -vv -o log_cli=true --log-cli-level=INFO
- name: Run Python E2E gRPC tests - name: Run Python E2E gRPC tests
run: | run: |
bash scripts/killall_sglang.sh "nuk_gpus" bash scripts/killall_sglang.sh "nuk_gpus"
cd sgl-router cd sgl-router
source "$HOME/.cargo/env"
SHOW_ROUTER_LOGS=1 ROUTER_LOCAL_MODEL_PATH="/home/ubuntu/models" pytest py_test/e2e_grpc -s -vv -o log_cli=true --log-cli-level=INFO SHOW_ROUTER_LOGS=1 ROUTER_LOCAL_MODEL_PATH="/home/ubuntu/models" pytest py_test/e2e_grpc -s -vv -o log_cli=true --log-cli-level=INFO
- name: Cleanup Oracle Database - name: Cleanup Oracle Database
...@@ -288,14 +310,14 @@ jobs: ...@@ -288,14 +310,14 @@ jobs:
finish: finish:
needs: [maturin-build-test, unit-test-rust, pytest-rust, pytest-rust-2] needs: [maturin-build-test, router-unit-tests, router-http-tests, router-grpc-response-api-tests]
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- name: Finish - name: Finish
run: echo "This is an empty step to ensure that all jobs are completed." run: echo "This is an empty step to ensure that all jobs are completed."
summarize-benchmarks: summarize-benchmarks:
needs: pytest-rust needs: router-http-tests
runs-on: ubuntu-latest runs-on: ubuntu-latest
if: success() if: success()
......
[run] [run]
source = py_src/sglang_router source = py_src/sglang_router
omit = omit =
py_src/sglang_router/mini_lb.py */sglang_router/mini_lb.py
[report] [report]
fail_under = 80 fail_under = 80
omit = omit =
py_src/sglang_router/mini_lb.py */sglang_router/mini_lb.py
...@@ -129,6 +129,13 @@ lto = "fat" # Full LTO for smaller binaries ...@@ -129,6 +129,13 @@ lto = "fat" # Full LTO for smaller binaries
codegen-units = 1 # Better optimization, slower compile codegen-units = 1 # Better optimization, slower compile
strip = true # Strip debug symbols strip = true # Strip debug symbols
[profile.ci]
inherits = "release"
opt-level = 2 # Lighter optimization (still fast runtime, much faster compile)
lto = "thin" # Thin LTO - good balance
codegen-units = 16 # More parallelization for faster builds
strip = true
[profile.dev] [profile.dev]
opt-level = 0 opt-level = 0
debug = 1 debug = 1
......
...@@ -43,7 +43,6 @@ High-performance model routing control and data plane for large-scale LLM deploy ...@@ -43,7 +43,6 @@ High-performance model routing control and data plane for large-scale LLM deploy
- Additional guides, API references, and deployment patterns are continuously updated alongside SGLang releases. - Additional guides, API references, and deployment patterns are continuously updated alongside SGLang releases.
## Installation ## Installation
### Prerequisites ### Prerequisites
- **Rust and Cargo** - **Rust and Cargo**
```bash ```bash
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment