misc: use pip cache purge and add unit test ci (#871)

7f6c690b · Yineng Zhang · GitHub · 40e6f513 · 7f6c690b · 7f6c690b
Unverified Commit 7f6c690b authored Aug 02, 2024 by Yineng Zhang Committed by GitHub Aug 02, 2024
Showing with 50 additions and 10 deletions

.github/workflows/pr-e2e-test.yml .github/workflows/pr-e2e-test.yml +6 -9

.github/workflows/unit-test.yml .github/workflows/unit-test.yml +42 -0

python/sglang/README.md python/sglang/README.md +2 -1

No files found.
--- a/.github/workflows/pr-e2e-test.yml
+++ b/.github/workflows/pr-e2e-test.yml
@@ -16,7 +16,7 @@ concurrency:
  cancel-in-progress: true
 jobs:
-  gpu-job:
+  pr-e2e-test:
    runs-on: self-hosted
    env:
      CUDA_VISIBLE_DEVICES: 6
@@ -27,20 +27,17 @@ jobs:
    - name: Install dependencies
      run: |
+        cd /data/zhyncs/venv && source ./bin/activate && cd -
+        pip cache purge
        pip install --upgrade pip
        pip install -e "python[all]"
        pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.3/ --force-reinstall
        pip install --upgrade transformers
-    - name: Test OpenAI Backend
-      run: |
-        export OPENAI_API_KEY=${{ secrets.OPENAI_API_KEY }}
-        cd test/lang
-        python3 test_openai_backend.py
    - name: Benchmark Serving
      run: |
-        python3 -m sglang.launch_server --model /home/lmzheng/zhyncs/Meta-Llama-3.1-8B-Instruct --port 8413 --disable-radix-cache &
+        cd /data/zhyncs/venv && source ./bin/activate && cd -
+        python3 -m sglang.launch_server --model /data/zhyncs/Meta-Llama-3.1-8B-Instruct --port 8413 --disable-radix-cache &
        echo "Waiting for server to start..."
        for i in {1..120}; do
@@ -55,7 +52,7 @@ jobs:
          sleep 1
        done
-        cd /home/lmzheng/zhyncs && python3 -m sglang.bench_serving --backend sglang --port 8413 --dataset-name random --num-prompts 3000 --random-input 256 --random-output 512
+        cd /data/zhyncs && python3 -m sglang.bench_serving --backend sglang --port 8413 --dataset-name random --num-prompts 3000 --random-input 256 --random-output 512
        echo "Stopping server..."
        kill -9 $(ps aux | grep sglang | grep Meta-Llama-3.1-8B-Instruct | grep -v grep | awk '{print $2}')
--- a/.github/workflows/unit-test.yml
+++ b/.github/workflows/unit-test.yml
+name: Unit Test
+on:
+  push:
+    branches: [ main ]
+    paths:
+      - "python/sglang/**"
+  pull_request:
+    branches: [ main ]
+    paths:
+      - "python/sglang/**"
+  workflow_dispatch:
+concurrency:
+  group: unit-test-${{ github.ref }}
+  cancel-in-progress: true
+jobs:
+  unit-test:
+    runs-on: self-hosted
+    env:
+      CUDA_VISIBLE_DEVICES: 6
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v3
+    - name: Install dependencies
+      run: |
+        cd /data/zhyncs/venv && source ./bin/activate && cd -
+        pip cache purge
+        pip install --upgrade pip
+        pip install -e "python[all]"
+        pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.3/ --force-reinstall
+        pip install --upgrade transformers
+    - name: Test OpenAI Backend
+      run: |
+        cd /data/zhyncs/venv && source ./bin/activate && cd -
+        cd test/lang
+        export OPENAI_API_KEY=${{ secrets.OPENAI_API_KEY }}
+        python3 test_openai_backend.py
--- a/python/sglang/README.md
+++ b/python/sglang/README.md
-# Code Structure
+# Code Structures
 - `lang`: The frontend language.
 - `srt`: The backend engine for running local models. (SRT = SGLang Runtime).