name: PR Test (SMG) on: push: branches: [ main ] paths: - "sgl-router/**" pull_request: branches: [ main ] paths: - "sgl-router/**" types: [synchronize, labeled] workflow_dispatch: concurrency: group: pr-test-rust-${{ github.ref }} cancel-in-progress: true env: RUSTC_WRAPPER: sccache SCCACHE_GHA_ENABLED: "true" jobs: maturin-build-test: if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci') runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 with: path: sglang-repo - name: Move sgl-router folder to root run: | mv sglang-repo/sgl-router/* . rm -rf sglang-repo - name: Set up Python uses: actions/setup-python@v5 with: python-version: "3.13" - name: Install protoc and dependencies run: | sudo apt-get update sudo apt-get install -y wget unzip gcc g++ perl make cd /tmp wget https://github.com/protocolbuffers/protobuf/releases/download/v32.0/protoc-32.0-linux-x86_64.zip sudo unzip protoc-32.0-linux-x86_64.zip -d /usr/local rm protoc-32.0-linux-x86_64.zip protoc --version - name: Configure sccache uses: mozilla-actions/sccache-action@v0.0.9 with: version: "v0.10.0" - name: Test maturin build uses: PyO3/maturin-action@v1 with: args: --release --out dist --features vendored-openssl rust-toolchain: stable sccache: true - name: List built wheel run: ls -lh dist/ - name: Test wheel install run: | pip install dist/*.whl python -c "import sglang_router; print('Python package: OK')" python -c "from sglang_router.sglang_router_rs import Router; print('Rust extension: OK')" python -m sglang_router.launch_router --help > /dev/null && echo "Entry point: OK" unit-test-rust: if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci') runs-on: ubuntu-latest steps: - name: Checkout code uses: actions/checkout@v4 - name: Install dependencies run: | bash scripts/ci/ci_install_rust.sh - name: Configure sccache uses: mozilla-actions/sccache-action@v0.0.9 with: version: "v0.10.0" - name: Rust cache uses: Swatinem/rust-cache@v2 with: workspaces: sgl-router cache-all-crates: true cache-on-failure: true - name: Run lint run: | source "$HOME/.cargo/env" cd sgl-router/ cargo clippy --all-targets --all-features -- -D warnings - name: Run fmt run: | source "$HOME/.cargo/env" cd sgl-router/ rustup component add --toolchain nightly-x86_64-unknown-linux-gnu rustfmt rustup toolchain install nightly --profile minimal cargo +nightly fmt -- --check - name: Run Rust tests timeout-minutes: 20 run: | source "$HOME/.cargo/env" cd sgl-router/ cargo test - name: Check benchmark compilation run: | source "$HOME/.cargo/env" cd sgl-router/ cargo check --benches - name: Quick benchmark sanity check timeout-minutes: 15 run: | source "$HOME/.cargo/env" cd sgl-router/ # Run quick benchmarks to ensure they work using Python script python3 scripts/run_benchmarks.py --quick - name: Show sccache stats if: always() run: sccache --show-stats pytest-rust: if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci') runs-on: 4-gpu-a10 timeout-minutes: 32 steps: - name: Checkout code uses: actions/checkout@v4 - name: Install rust dependencies run: | bash scripts/ci/ci_install_rust.sh - name: Configure sccache uses: mozilla-actions/sccache-action@v0.0.9 with: version: "v0.10.0" - name: Rust cache uses: Swatinem/rust-cache@v2 with: workspaces: sgl-router cache-all-crates: true cache-on-failure: true - name: Install SGLang dependencies run: | sudo --preserve-env=PATH bash scripts/ci/ci_install_dependency.sh - name: Build python binding run: | source "$HOME/.cargo/env" export RUSTC_WRAPPER=sccache cd sgl-router pip install maturin maturin build --release --out dist --features vendored-openssl pip install --force-reinstall dist/*.whl - name: Run Python unit tests run: | cd sgl-router source "$HOME/.cargo/env" pip install pytest pytest-cov pytest-xdist pytest -q py_test/unit --cov=sglang_router --cov-report=term-missing --cov-fail-under=80 - name: Run Python integration tests run: | cd sgl-router source "$HOME/.cargo/env" # Integration tests use FastAPI/uvicorn for mock workers pip install fastapi uvicorn orjson pytest -q -m integration - name: Run Python E2E tests run: | bash scripts/killall_sglang.sh "nuk_gpus" cd sgl-router python3 -m pip --no-cache-dir install --upgrade --ignore-installed blinker python3 -m pip --no-cache-dir install --upgrade --break-system-packages genai-bench==0.0.2 pytest -m e2e -s -vv -o log_cli=true --log-cli-level=INFO - name: Upload benchmark results if: success() uses: actions/upload-artifact@v4 with: name: genai-bench-results-all-policies path: sgl-router/benchmark_**/ pytest-rust-2: if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci') runs-on: 4-gpu-a10 timeout-minutes: 32 steps: - name: Checkout code uses: actions/checkout@v4 - name: Install rust dependencies run: | bash scripts/ci/ci_install_rust.sh - name: Configure sccache uses: mozilla-actions/sccache-action@v0.0.9 with: version: "v0.10.0" - name: Rust cache uses: Swatinem/rust-cache@v2 with: workspaces: sgl-router cache-all-crates: true cache-on-failure: true - name: Install SGLang dependencies run: | sudo --preserve-env=PATH bash scripts/ci/ci_install_dependency.sh - name: Setup Oracle Instant Client run: | sudo apt-get install -y unzip INSTANT_CLIENT_DIR="/home/ubuntu/instant-client" INSTANT_CLIENT_ZIP="instantclient-basic-linux.x64-23.9.0.25.07.zip" if [ ! -d "$INSTANT_CLIENT_DIR/instantclient_23_9" ]; then echo "Downloading Oracle Instant Client..." mkdir -p "$INSTANT_CLIENT_DIR" cd "$INSTANT_CLIENT_DIR" wget https://download.oracle.com/otn_software/linux/instantclient/2390000/$INSTANT_CLIENT_ZIP unzip $INSTANT_CLIENT_ZIP rm $INSTANT_CLIENT_ZIP else echo "Oracle Instant Client already exists, skipping download" fi echo "LD_LIBRARY_PATH=/home/ubuntu/instant-client/instantclient_23_9:\$LD_LIBRARY_PATH" >> $GITHUB_ENV - name: Start Oracle Database run: | docker run -d -p 1521:1521 -e ORACLE_PASSWORD=oracle --name oracle-db gvenzl/oracle-xe:21-slim echo "Starting Oracle DB..." # Export Oracle connection environment variables echo "ATP_USER=system" >> $GITHUB_ENV echo "ATP_PASSWORD=oracle" >> $GITHUB_ENV echo "ATP_DSN=localhost:1521/XEPDB1" >> $GITHUB_ENV - name: Build python binding run: | source "$HOME/.cargo/env" export RUSTC_WRAPPER=sccache cd sgl-router pip install maturin maturin build --release --out dist --features vendored-openssl pip install --force-reinstall dist/*.whl - name: Run Python E2E response API tests run: | bash scripts/killall_sglang.sh "nuk_gpus" cd sgl-router SHOW_ROUTER_LOGS=1 pytest py_test/e2e_response_api -s -vv -o log_cli=true --log-cli-level=INFO - name: Run Python E2E gRPC tests run: | bash scripts/killall_sglang.sh "nuk_gpus" cd sgl-router SHOW_ROUTER_LOGS=1 ROUTER_LOCAL_MODEL_PATH="/home/ubuntu/models" pytest py_test/e2e_grpc -s -vv -o log_cli=true --log-cli-level=INFO - name: Cleanup Oracle Database if: always() run: | docker stop oracle-db || true docker rm oracle-db || true finish: needs: [maturin-build-test, unit-test-rust, pytest-rust, pytest-rust-2] runs-on: ubuntu-latest steps: - name: Finish run: echo "This is an empty step to ensure that all jobs are completed." summarize-benchmarks: needs: pytest-rust runs-on: ubuntu-latest if: success() steps: - name: Install jq run: sudo apt-get update && sudo apt-get install -y jq bc - name: Download benchmark results uses: actions/download-artifact@v4 with: name: genai-bench-results-all-policies - name: List downloaded contents run: | echo "Contents after download:" ls -la find . -name "benchmark_*" -type d echo "JSON files found:" find . -name "*.json" | head -10 - name: Create benchmark summary run: | echo "=== DEBUG: Creating benchmark summary ===" echo "Available benchmark directories:" find . -name "benchmark_*" -type d || true echo "==========================================" echo "## Router E2E Genai-Bench Results Summary" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY echo "Results captured from E2E tests for two scenarios: regular router (2 workers, dp=2) and PD router (2 prefill + 2 decode)." >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY echo "| Scenario | Status | TTFT (s) | E2E Latency (s) | Input Throughput (tok/s) | Output Throughput (tok/s) |" >> $GITHUB_STEP_SUMMARY echo "|----------|--------|----------|-----------------|--------------------------|---------------------------|" >> $GITHUB_STEP_SUMMARY scenarios=$'Regular (dp=2, round_robin)|benchmark_round_robin_regular\nPD (2 prefill + 2 decode, round_robin)|benchmark_round_robin_pd' echo "$scenarios" | sed 's/^\s*//' | while IFS='|' read -r label pattern; do [ -z "$label" ] && continue # Find the result folder (handle different extraction layouts) result_folder=$(find . -maxdepth 3 \( -name "$pattern" -o -path "*${pattern}*" \) -type d | head -1) if [ -n "$result_folder" ] && [ -d "$result_folder" ]; then json_file=$(find "$result_folder" -name "*.json" -not -name "experiment_metadata.json" | head -1) if [ -n "$json_file" ] && [ -f "$json_file" ]; then ttft_mean=$(jq -r '.aggregated_metrics.stats.ttft.mean' "$json_file") e2e_latency_mean=$(jq -r '.aggregated_metrics.stats.e2e_latency.mean' "$json_file") input_throughput_mean=$(jq -r '.aggregated_metrics.stats.input_throughput.mean' "$json_file") output_throughput_mean=$(jq -r '.aggregated_metrics.stats.output_throughput.mean' "$json_file") ttft_display=$(printf "%.2f" "$ttft_mean" 2>/dev/null || echo "$ttft_mean") e2e_display=$(printf "%.2f" "$e2e_latency_mean" 2>/dev/null || echo "$e2e_latency_mean") input_display=$(printf "%.0f" "$input_throughput_mean" 2>/dev/null || echo "$input_throughput_mean") output_display=$(printf "%.0f" "$output_throughput_mean" 2>/dev/null || echo "$output_throughput_mean") echo "| ${label} | ✅ Success | $ttft_display | $e2e_display | $input_display | $output_display |" >> $GITHUB_STEP_SUMMARY # Optional GPU utilization table if monitor output exists gpu_json="$result_folder/gpu_utilization.json" if [ -f "$gpu_json" ]; then overall_mean=$(jq -r '.overall.mean // 0' "$gpu_json") printf "\n#### GPU Utilization — %s\n\n" "$label" >> $GITHUB_STEP_SUMMARY printf "Overall mean: %.2f%%\n\n" "$overall_mean" >> $GITHUB_STEP_SUMMARY echo "| GPU | Mean (%) | p5 | p10 | p25 | p50 | p75 | p90 | p95 |" >> $GITHUB_STEP_SUMMARY echo "|-----|----------|----|-----|-----|-----|-----|-----|-----|" >> $GITHUB_STEP_SUMMARY jq -r ' .per_gpu | to_entries[] | [ .key, (.value.mean // 0), (.value.p5 // 0), (.value.p10 // 0), (.value.p25 // 0), (.value.p50 // 0), (.value.p75 // 0), (.value.p90 // 0), (.value.p95 // 0) ] | @tsv' "$gpu_json" \ | while IFS=$'\t' read -r gpu m p5 p10 p25 p50 p75 p90 p95; do printf "| %s | %.2f | %.2f | %.2f | %.2f | %.2f | %.2f | %.2f | %.2f |\n" "$gpu" "$m" "$p5" "$p10" "$p25" "$p50" "$p75" "$p90" "$p95" >> $GITHUB_STEP_SUMMARY done echo "" >> $GITHUB_STEP_SUMMARY fi fi fi done