name: E2E Test on: push: branches: [ main ] paths: - "python/sglang/**" - "test/**" pull_request: branches: [ main ] paths: - "python/sglang/**" - "test/**" workflow_dispatch: concurrency: group: e2e-test-${{ github.ref }} cancel-in-progress: true jobs: e2e-test: if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' runs-on: e2e steps: - name: Checkout code uses: actions/checkout@v3 - name: Install dependencies run: | source $HOME/venv/bin/activate echo "$HOME/venv/bin" >> $GITHUB_PATH pip install --upgrade pip pip install -e "python[all]" pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall - name: Benchmark Serving Throughput run: | cd test/srt python3 -m unittest test_serving_throughput.TestServingThroughput.test_default timeout-minutes: 10 - name: Benchmark Serving Throughput (w/o RadixAttention) run: | cd test/srt python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_radix_cache timeout-minutes: 10 - name: Benchmark Serving Throughput (w/ ChunkedPrefill) run: | cd test/srt python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_with_chunked_prefill timeout-minutes: 10