Unverified Commit 562f279a authored by Zaili Wang's avatar Zaili Wang Committed by GitHub
Browse files

[CPU] enable CI for PRs, add Dockerfile and auto build task (#6458)


Co-authored-by: default avatardiwei sun <diwei.sun@intel.com>
Co-authored-by: default avatarYineng Zhang <me@zhyncs.com>
parent 8b247489
name: PR Test (Xeon)
on:
pull_request:
branches:
- main
workflow_dispatch:
concurrency:
group: pr-test-xeon-${{ github.ref }}
cancel-in-progress: true
jobs:
build-test:
if: github.event_name == 'pull_request'
runs-on: sgl-kernel-build-node
environment: 'prod'
strategy:
matrix:
build_type: ['all']
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Build and Push
run: |
version=$(cat python/sglang/version.py | cut -d'"' -f2)
tag=v${version}-xeon
docker build . -f docker/Dockerfile.xeon -t sglang_xeon --no-cache
unit-test:
if: github.event_name == 'pull_request'
needs: [build-test]
runs-on: sgl-kernel-build-node
steps:
- name: Run container
run: |
docker run -dt \
-v ${{ github.workspace }}:/sglang-checkout/ --ipc=host \
--name ci_sglang_xeon \
sglang_xeon
- name: Install Dependency
timeout-minutes: 20
run: |
docker exec ci_sglang_xeon bash -c "python3 -m pip install --upgrade pip"
docker exec ci_sglang_xeon pip uninstall sgl-kernel -y || true
docker exec -w /sglang-checkout/sgl-kernel ci_sglang_xeon bash -c "cp pyproject_cpu.toml pyproject.toml && pip install -v ."
docker exec -w /sglang-checkout/ ci_sglang_xeon bash -c "pip install -e "python[all_cpu]""
docker exec ci_sglang_xeon bash -c "python3 -m pip install pytest expecttest"
- name: Check AMX Support
id: check_amx
timeout-minutes: 5
run: |
docker exec -w /sglang-checkout/ ci_sglang_xeon \
bash -c "python3 -c 'import torch; import sgl_kernel; assert torch._C._cpu._is_amx_tile_supported(); assert hasattr(torch.ops.sgl_kernel, \"convert_weight_packed\"); '"
continue-on-error: true
- name: Run UT Cases
if: steps.check_amx.outcome == 'success'
timeout-minutes: 20
run: |
docker exec -w /sglang-checkout/ ci_sglang_xeon \
bash -c "cd ./test/srt && python3 run_suite.py --suite per-commit-cpu"
- name: Cleanup container
if: always()
run: |
docker rm -f ci_sglang_xeon || true
finish:
if: always()
needs: [build-test, unit-test]
runs-on: ubuntu-24.04
steps:
- name: Check all dependent job statuses
run: |
results=(${{ join(needs.*.result, ' ') }})
for result in "${results[@]}"; do
if [ "$result" = "failure" ] || [ "$result" = "cancelled" ]; then
echo "Job failed with result: $result"
exit 1
fi
done
echo "All jobs completed successfully"
exit 0
name: Release Docker Images
on:
push:
branches:
- main
paths:
- "python/sglang/version.py"
workflow_dispatch:
jobs:
publish:
if: github.repository == 'sgl-project/sglang'
runs-on: ubuntu-24.04
environment: 'prod'
strategy:
matrix:
build_type: ['all']
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Login to Docker Hub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Build and Push
run: |
version=$(cat python/sglang/version.py | cut -d'"' -f2)
tag=v${version}-xeon
docker build . -f docker/Dockerfile.xeon -t lmsysorg/sglang:${tag} --no-cache
docker push lmsysorg/sglang:${tag}
FROM ubuntu:24.04
SHELL ["/bin/bash", "-c"]
ARG VER_SGLANG=main
ARG VER_TORCH=2.6.0
ARG VER_TORCHVISION=0.21.0
RUN apt-get update && \
apt-get full-upgrade -y && \
DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
ca-certificates \
git \
curl \
wget \
vim \
gcc \
g++ \
make
WORKDIR /sgl-workspace
RUN curl -fsSL -v -o miniforge.sh -O https://github.com/conda-forge/miniforge/releases/download/24.11.3-2/Miniforge3-24.11.3-2-Linux-x86_64.sh && \
bash miniforge.sh -b -p ./miniforge3 && \
rm -f miniforge.sh && \
. miniforge3/bin/activate && \
conda install -y libsqlite==3.48.0 gperftools tbb libnuma numactl
ENV PATH=/sgl-workspace/miniforge3/bin:/sgl-workspace/miniforge3/condabin:${PATH}
ENV PIP_ROOT_USER_ACTION=ignore
RUN pip install intel-openmp
RUN git clone https://github.com/sgl-project/sglang.git && \
cd sglang && \
git checkout ${VER_SGLANG} && \
pip install -e "python[all_cpu]" && \
pip install torch==${VER_TORCH} torchvision==${VER_TORCHVISION} --index-url https://download.pytorch.org/whl/cpu --force-reinstall && \
cd sgl-kernel && \
cp pyproject_cpu.toml pyproject.toml && \
pip install -v .
ENV LD_PRELOAD=/sgl-workspace/miniforge3/lib/libiomp5.so:/sgl-workspace/miniforge3/lib/libtcmalloc.so:/sgl-workspace/miniforge3/lib/libtbbmalloc.so.2
WORKDIR /sgl-workspace/sglang
......@@ -89,7 +89,7 @@ srt_hpu = ["sglang[runtime_common]", "outlines>=0.0.44,<=0.1.11"]
# CPU: currently, there are no pre-built vllm wheels for CPU.
# To install vllm for CPU, please follow the instruction here:
# https://docs.vllm.ai/en/latest/getting_started/installation/cpu/index.html
srt_cpu = ["sglang[runtime_common]", "outlines>=0.0.44,<=0.1.11", "torch"]
srt_cpu = ["sglang[runtime_common]", "outlines>=0.0.44,<=0.1.11", "einops"]
# https://vllm-ascend.readthedocs.io/en/latest/installation.html
srt_npu = ["sglang[runtime_common]", "outlines>=0.0.44,<=0.1.11"]
......
......@@ -26,6 +26,7 @@ from sglang.lang.backend.openai import OpenAI
from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint
from sglang.srt.utils import (
get_bool_env_var,
get_device,
is_port_available,
kill_process_tree,
retry,
......@@ -305,13 +306,33 @@ def add_common_other_args_and_parse(parser: argparse.ArgumentParser):
return args
def auto_config_device() -> str:
"""Auto-config available device platform"""
try:
device = get_device()
except (RuntimeError, ImportError) as e:
print(f"Warning: {e} - Falling back to CPU")
device = "cpu"
return device
def add_common_sglang_args_and_parse(parser: argparse.ArgumentParser):
parser.add_argument("--parallel", type=int, default=64)
parser.add_argument("--host", type=str, default="http://127.0.0.1")
parser.add_argument("--port", type=int, default=30000)
parser.add_argument("--backend", type=str, default="srt")
parser.add_argument(
"--device",
type=str,
default="auto",
choices=["auto", "cuda", "rocm", "cpu"],
help="Device type (auto/cuda/rocm/cpu). Auto will detect available platforms",
)
parser.add_argument("--result-file", type=str, default="result.jsonl")
args = parser.parse_args()
return args
......@@ -397,11 +418,25 @@ def popen_launch_server(
base_url: str,
timeout: float,
api_key: Optional[str] = None,
other_args: list[str] = (),
other_args: list[str] = [],
env: Optional[dict] = None,
return_stdout_stderr: Optional[tuple] = None,
device: str = "auto",
pd_separated: bool = False,
):
"""Launch a server process with automatic device detection.
Args:
device: Device type ("auto", "cuda", "rocm" or "cpu").
If "auto", will detect available platforms automatically.
"""
# Auto-detect device if needed
if device == "auto":
device = auto_config_device()
print(f"Auto-configed device: {device}", flush=True)
other_args = list(other_args)
other_args += ["--device", str(device)]
_, host, port = base_url.split(":")
host = host[2:]
......@@ -457,6 +492,15 @@ def popen_launch_server(
start_time = time.perf_counter()
with requests.Session() as session:
while time.perf_counter() - start_time < timeout:
return_code = process.poll()
if return_code is not None:
# Server failed to start (non-zero exit code) or crashed
raise Exception(
f"Server process exited with code {return_code}. "
"Check server logs for errors."
)
try:
headers = {
"Content-Type": "application/json; charset=utf-8",
......@@ -627,6 +671,7 @@ def get_benchmark_args(
disable_stream=False,
disable_ignore_eos=False,
seed: int = 0,
device="auto",
pd_separated: bool = False,
):
return SimpleNamespace(
......@@ -657,6 +702,7 @@ def get_benchmark_args(
profile=None,
lora_name=None,
prompt_suffix="",
device=device,
pd_separated=pd_separated,
)
......@@ -676,7 +722,10 @@ def run_bench_serving(
disable_ignore_eos=False,
need_warmup=False,
seed: int = 0,
device="auto",
):
if device == "auto":
device = auto_config_device()
# Launch the server
base_url = DEFAULT_URL_FOR_TEST
process = popen_launch_server(
......@@ -700,6 +749,7 @@ def run_bench_serving(
disable_stream=disable_stream,
disable_ignore_eos=disable_ignore_eos,
seed=seed,
device=device,
)
try:
......@@ -750,6 +800,18 @@ def run_bench_serving_multi(
def run_bench_one_batch(model, other_args):
"""Launch a offline process with automatic device detection.
Args:
device: Device type ("auto", "cuda", "rocm" or "cpu").
If "auto", will detect available platforms automatically.
"""
# Auto-detect device if needed
device = auto_config_device()
print(f"Auto-configed device: {device}", flush=True)
other_args += ["--device", str(device)]
command = [
"python3",
"-m",
......
......@@ -127,6 +127,16 @@ suites = {
"per-commit-8-gpu-amd": [
TestFile("test_full_deepseek_v3.py", 250),
],
"per-commit-cpu": [
TestFile("cpu/test_activation.py"),
TestFile("cpu/test_decode.py"),
TestFile("cpu/test_extend.py"),
TestFile("cpu/test_gemm.py"),
TestFile("cpu/test_moe.py"),
TestFile("cpu/test_norm.py"),
TestFile("cpu/test_qkv_proj_with_rope.py"),
TestFile("cpu/test_shared_expert.py"),
],
"nightly": [
TestFile("test_nightly_gsm8k_eval.py"),
],
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment