ci_install_dependency.sh 4.58 KB
Newer Older
1
#!/bin/bash
2
# Install the dependency in CI.
3
set -euxo pipefail
Lianmin Zheng's avatar
Lianmin Zheng committed
4

5
IS_BLACKWELL=${IS_BLACKWELL:-0}
Johnny's avatar
Johnny committed
6
7
CU_VERSION="cu129"

8
9
10
11
# Detect system architecture
ARCH=$(uname -m)
echo "Detected architecture: ${ARCH}"

Johnny's avatar
Johnny committed
12
13
14
15
16
if [ "$CU_VERSION" = "cu130" ]; then
    NVRTC_SPEC="nvidia-cuda-nvrtc"
else
    NVRTC_SPEC="nvidia-cuda-nvrtc-cu12"
fi
Cheng Wan's avatar
Cheng Wan committed
17

18
# Kill existing processes
19
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
20
bash "${SCRIPT_DIR}/../killall_sglang.sh"
21
echo "CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-}"
22

Lianmin Zheng's avatar
Lianmin Zheng committed
23
24
# Clear torch compilation cache
python3 -c 'import os, shutil, tempfile, getpass; cache_dir = os.environ.get("TORCHINDUCTOR_CACHE_DIR") or os.path.join(tempfile.gettempdir(), "torchinductor_" + getpass.getuser()); shutil.rmtree(cache_dir, ignore_errors=True)'
25
rm -rf /root/.cache/flashinfer
26
pip3 uninstall flashinfer-python flashinfer-cubin flashinfer-jit-cache || true
Lianmin Zheng's avatar
Lianmin Zheng committed
27

28
# Install apt packages
Lianmin Zheng's avatar
Lianmin Zheng committed
29
apt install -y git libnuma-dev libssl-dev pkg-config
30

31
32
33
34
35
36
37
38
39
40
41
42
# Check if protoc of correct architecture is already installed
if command -v protoc >/dev/null 2>&1; then
    if protoc --version >/dev/null 2>&1; then
        echo "protoc already installed: $(protoc --version)"
    else
        echo "protoc found but not runnable, reinstalling..."
        INSTALL_PROTOC=1
    fi
else
    INSTALL_PROTOC=1
fi

43
# Install protoc for router build (gRPC protobuf compilation)
44
if [ "${INSTALL_PROTOC:-0}" = "1" ]; then
45
46
47
48
49
50
51
52
53
54
55
56
    echo "Installing protoc..."
    if command -v apt-get &> /dev/null; then
        # Ubuntu/Debian
        apt-get update
        apt-get install -y wget unzip gcc g++ perl make
    elif command -v yum &> /dev/null; then
        # RHEL/CentOS
        yum update -y
        yum install -y wget unzip gcc gcc-c++ perl-core make
    fi

    cd /tmp
57
58
59
60
61
62
63
64
65
66
    # Determine protoc architecture
    if [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ]; then
        PROTOC_ARCH="aarch_64"
    else
        PROTOC_ARCH="x86_64"
    fi
    PROTOC_ZIP="protoc-32.0-linux-${PROTOC_ARCH}.zip"
    wget https://github.com/protocolbuffers/protobuf/releases/download/v32.0/${PROTOC_ZIP}
    unzip -o ${PROTOC_ZIP} -d /usr/local
    rm ${PROTOC_ZIP}
67
68
69
70
71
72
    protoc --version
    cd -
else
    echo "protoc already installed: $(protoc --version)"
fi

73
74
75
76
77
78
# Install uv
if [ "$IS_BLACKWELL" = "1" ]; then
    # The blackwell CI runner has some issues with pip and uv,
    # so we can only use pip with `--break-system-packages`
    PIP_CMD="pip"
    PIP_INSTALL_SUFFIX="--break-system-packages"
79
    $PIP_CMD install --upgrade pip
fzyzcjy's avatar
fzyzcjy committed
80

81
    # Clean up existing installations
Lianmin Zheng's avatar
Lianmin Zheng committed
82
    $PIP_CMD uninstall -y sgl-kernel sglang $PIP_INSTALL_SUFFIX || true
83
84
else
    # In normal cases, we use uv, which is much faster than pip.
Cheng Wan's avatar
Cheng Wan committed
85
    pip install --upgrade pip
86
87
    pip install uv
    export UV_SYSTEM_PYTHON=true
88

89
    PIP_CMD="uv pip"
90
    PIP_INSTALL_SUFFIX="--index-strategy unsafe-best-match --prerelease allow"
91
92

    # Clean up existing installations
Lianmin Zheng's avatar
Lianmin Zheng committed
93
    $PIP_CMD uninstall sgl-kernel sglang || true
94
fi
Xiaoyu Zhang's avatar
Xiaoyu Zhang committed
95

Lianmin Zheng's avatar
Lianmin Zheng committed
96
# Install the main package
97
$PIP_CMD install -e "python[dev]" --extra-index-url https://download.pytorch.org/whl/${CU_VERSION} $PIP_INSTALL_SUFFIX
98

99
# Install router for pd-disagg test
100
$PIP_CMD install sglang-router $PIP_INSTALL_SUFFIX
101

Lianmin Zheng's avatar
Lianmin Zheng committed
102
# Install sgl-kernel
103
104
105
SGL_KERNEL_VERSION_FROM_KERNEL=$(grep -Po '(?<=^version = ")[^"]*' sgl-kernel/pyproject.toml)
SGL_KERNEL_VERSION_FROM_SRT=$(grep -Po -m1 '(?<=sgl-kernel==)[0-9A-Za-z\.\-]+' python/pyproject.toml)
echo "SGL_KERNEL_VERSION_FROM_KERNEL=${SGL_KERNEL_VERSION_FROM_KERNEL} SGL_KERNEL_VERSION_FROM_SRT=${SGL_KERNEL_VERSION_FROM_SRT}"
106

107
108
if [ "${CUSTOM_BUILD_SGL_KERNEL:-}" = "true" ]; then
    ls -alh sgl-kernel/dist
109
110
111
112
113
114
115
    # Determine wheel architecture
    if [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ]; then
        WHEEL_ARCH="aarch64"
    else
        WHEEL_ARCH="x86_64"
    fi
    $PIP_CMD install sgl-kernel/dist/sgl_kernel-${SGL_KERNEL_VERSION_FROM_KERNEL}-cp310-abi3-manylinux2014_${WHEEL_ARCH}.whl --force-reinstall $PIP_INSTALL_SUFFIX
116
else
Lianmin Zheng's avatar
Lianmin Zheng committed
117
    $PIP_CMD install sgl-kernel==${SGL_KERNEL_VERSION_FROM_SRT} --force-reinstall $PIP_INSTALL_SUFFIX
118
119
fi

120
# Show current packages
121
$PIP_CMD list
122

123
$PIP_CMD install mooncake-transfer-engine==0.3.7.post2 "${NVRTC_SPEC}" py-spy scipy huggingface_hub[hf_xet] $PIP_INSTALL_SUFFIX
124

125
if [ "$IS_BLACKWELL" != "1" ]; then
fzyzcjy's avatar
fzyzcjy committed
126
    # For lmms_evals evaluating MMMU
Johnny's avatar
Johnny committed
127
    git clone --branch v0.5 --depth 1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git
128
    $PIP_CMD install -e lmms-eval/ $PIP_INSTALL_SUFFIX
129

fzyzcjy's avatar
fzyzcjy committed
130
    # Install xformers
131
132
133
    $PIP_CMD install xformers --index-url https://download.pytorch.org/whl/${CU_VERSION} --no-deps $PIP_INSTALL_SUFFIX
fi

134
# Show current packages
135
$PIP_CMD list
Lianmin Zheng's avatar
Lianmin Zheng committed
136
python3 -c "import torch; print(torch.version.cuda)"