"vscode:/vscode.git/clone" did not exist on "b725ee53d72bd2e3451b9a7718dc1b0d17ee47d0"
Unverified Commit 7f6c690b authored by Yineng Zhang's avatar Yineng Zhang Committed by GitHub
Browse files

misc: use pip cache purge and add unit test ci (#871)

parent 40e6f513
...@@ -16,7 +16,7 @@ concurrency: ...@@ -16,7 +16,7 @@ concurrency:
cancel-in-progress: true cancel-in-progress: true
jobs: jobs:
gpu-job: pr-e2e-test:
runs-on: self-hosted runs-on: self-hosted
env: env:
CUDA_VISIBLE_DEVICES: 6 CUDA_VISIBLE_DEVICES: 6
...@@ -27,20 +27,17 @@ jobs: ...@@ -27,20 +27,17 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: | run: |
cd /data/zhyncs/venv && source ./bin/activate && cd -
pip cache purge
pip install --upgrade pip pip install --upgrade pip
pip install -e "python[all]" pip install -e "python[all]"
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.3/ --force-reinstall pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.3/ --force-reinstall
pip install --upgrade transformers pip install --upgrade transformers
- name: Test OpenAI Backend
run: |
export OPENAI_API_KEY=${{ secrets.OPENAI_API_KEY }}
cd test/lang
python3 test_openai_backend.py
- name: Benchmark Serving - name: Benchmark Serving
run: | run: |
python3 -m sglang.launch_server --model /home/lmzheng/zhyncs/Meta-Llama-3.1-8B-Instruct --port 8413 --disable-radix-cache & cd /data/zhyncs/venv && source ./bin/activate && cd -
python3 -m sglang.launch_server --model /data/zhyncs/Meta-Llama-3.1-8B-Instruct --port 8413 --disable-radix-cache &
echo "Waiting for server to start..." echo "Waiting for server to start..."
for i in {1..120}; do for i in {1..120}; do
...@@ -55,7 +52,7 @@ jobs: ...@@ -55,7 +52,7 @@ jobs:
sleep 1 sleep 1
done done
cd /home/lmzheng/zhyncs && python3 -m sglang.bench_serving --backend sglang --port 8413 --dataset-name random --num-prompts 3000 --random-input 256 --random-output 512 cd /data/zhyncs && python3 -m sglang.bench_serving --backend sglang --port 8413 --dataset-name random --num-prompts 3000 --random-input 256 --random-output 512
echo "Stopping server..." echo "Stopping server..."
kill -9 $(ps aux | grep sglang | grep Meta-Llama-3.1-8B-Instruct | grep -v grep | awk '{print $2}') kill -9 $(ps aux | grep sglang | grep Meta-Llama-3.1-8B-Instruct | grep -v grep | awk '{print $2}')
name: Unit Test
on:
push:
branches: [ main ]
paths:
- "python/sglang/**"
pull_request:
branches: [ main ]
paths:
- "python/sglang/**"
workflow_dispatch:
concurrency:
group: unit-test-${{ github.ref }}
cancel-in-progress: true
jobs:
unit-test:
runs-on: self-hosted
env:
CUDA_VISIBLE_DEVICES: 6
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Install dependencies
run: |
cd /data/zhyncs/venv && source ./bin/activate && cd -
pip cache purge
pip install --upgrade pip
pip install -e "python[all]"
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.3/ --force-reinstall
pip install --upgrade transformers
- name: Test OpenAI Backend
run: |
cd /data/zhyncs/venv && source ./bin/activate && cd -
cd test/lang
export OPENAI_API_KEY=${{ secrets.OPENAI_API_KEY }}
python3 test_openai_backend.py
# Code Structure # Code Structures
- `lang`: The frontend language. - `lang`: The frontend language.
- `srt`: The backend engine for running local models. (SRT = SGLang Runtime). - `srt`: The backend engine for running local models. (SRT = SGLang Runtime).
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment