name: Nightly Evaluation on: schedule: - cron: '0 0 * * *' push: branches: - main paths: - "python/sglang/version.py" workflow_dispatch: concurrency: group: nightly-eval-${{ github.ref }} cancel-in-progress: true jobs: meta-llama-31-8b-instruct: if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' runs-on: 1-gpu-runner steps: - name: Checkout code uses: actions/checkout@v3 - name: Install dependencies run: | pip install --upgrade pip pip install -e "python[dev]" pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall git clone https://github.com/EleutherAI/lm-evaluation-harness pushd lm-evaluation-harness pip install -e . pip install lm_eval[api] popd - name: Run eval timeout-minutes: 20 run: | python3 -m sglang.launch_server --model meta-llama/Meta-Llama-3.1-8B-Instruct --disable-radix-cache & echo "Waiting for server to start..." for i in {1..120}; do if curl -s http://127.0.0.1:30000/health; then echo "Server is up!" break fi if [ $i -eq 120 ]; then echo "Server failed to start within 120 seconds" exit 1 fi sleep 1 done lm_eval --model local-completions --tasks gsm8k --model_args model=meta-llama/Meta-Llama-3.1-8B-Instruct,base_url=http://127.0.0.1:30000/v1/completions,num_concurrent=128,max_retries=3,tokenized_requests=False echo "Stopping server..." kill -9 $(ps aux | grep sglang | grep Meta-Llama-3.1-8B-Instruct | grep -v grep | awk '{print $2}') finish: needs: [ meta-llama-31-8b-instruct ] runs-on: ubuntu-latest steps: - name: Finish run: echo "This is an empty step to ensure that all jobs are completed."