ci_install_dependency.sh 4.92 KB
Newer Older
1
#!/bin/bash
2
# Install the dependency in CI.
3
set -euxo pipefail
Lianmin Zheng's avatar
Lianmin Zheng committed
4

5
IS_BLACKWELL=${IS_BLACKWELL:-0}
6
RUN_DEEPSEEK_V32=${RUN_DEEPSEEK_V32:-0}
Johnny's avatar
Johnny committed
7
8
9
10
11
12
13
CU_VERSION="cu129"

if [ "$CU_VERSION" = "cu130" ]; then
    NVRTC_SPEC="nvidia-cuda-nvrtc"
else
    NVRTC_SPEC="nvidia-cuda-nvrtc-cu12"
fi
Cheng Wan's avatar
Cheng Wan committed
14

15
# Kill existing processes
16
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
17
bash "${SCRIPT_DIR}/../killall_sglang.sh"
18
echo "CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-}"
19

Lianmin Zheng's avatar
Lianmin Zheng committed
20
21
# Clear torch compilation cache
python3 -c 'import os, shutil, tempfile, getpass; cache_dir = os.environ.get("TORCHINDUCTOR_CACHE_DIR") or os.path.join(tempfile.gettempdir(), "torchinductor_" + getpass.getuser()); shutil.rmtree(cache_dir, ignore_errors=True)'
22
rm -rf /root/.cache/flashinfer
Lianmin Zheng's avatar
Lianmin Zheng committed
23

24
25
26
# Install apt packages
apt install -y git libnuma-dev

27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# Install protoc for router build (gRPC protobuf compilation)
if ! command -v protoc &> /dev/null; then
    echo "Installing protoc..."
    if command -v apt-get &> /dev/null; then
        # Ubuntu/Debian
        apt-get update
        apt-get install -y wget unzip gcc g++ perl make
    elif command -v yum &> /dev/null; then
        # RHEL/CentOS
        yum update -y
        yum install -y wget unzip gcc gcc-c++ perl-core make
    fi

    cd /tmp
    wget https://github.com/protocolbuffers/protobuf/releases/download/v32.0/protoc-32.0-linux-x86_64.zip
    unzip protoc-32.0-linux-x86_64.zip -d /usr/local
    rm protoc-32.0-linux-x86_64.zip
    protoc --version
    cd -
else
    echo "protoc already installed: $(protoc --version)"
fi

50
51
52
53
54
55
# Install uv
if [ "$IS_BLACKWELL" = "1" ]; then
    # The blackwell CI runner has some issues with pip and uv,
    # so we can only use pip with `--break-system-packages`
    PIP_CMD="pip"
    PIP_INSTALL_SUFFIX="--break-system-packages"
fzyzcjy's avatar
fzyzcjy committed
56

57
    # Clean up existing installations
Lianmin Zheng's avatar
Lianmin Zheng committed
58
    $PIP_CMD uninstall -y flashinfer_python sgl-kernel sglang vllm $PIP_INSTALL_SUFFIX || true
59
60
61

    # Install the main package
    $PIP_CMD install -e "python[dev]" --extra-index-url https://download.pytorch.org/whl/${CU_VERSION} $PIP_INSTALL_SUFFIX --force-reinstall
62
63
else
    # In normal cases, we use uv, which is much faster than pip.
Cheng Wan's avatar
Cheng Wan committed
64
    pip install --upgrade pip
65
66
    pip install uv
    export UV_SYSTEM_PYTHON=true
67

68
    PIP_CMD="uv pip"
69
    PIP_INSTALL_SUFFIX="--index-strategy unsafe-best-match"
70
71

    # Clean up existing installations
Lianmin Zheng's avatar
Lianmin Zheng committed
72
    $PIP_CMD uninstall flashinfer_python sgl-kernel sglang vllm || true
Xiaoyu Zhang's avatar
Xiaoyu Zhang committed
73

74
75
    # Install the main package without deps
    $PIP_CMD install -e "python[dev]" --no-deps $PIP_INSTALL_SUFFIX --force-reinstall
76

77
78
    # Install flashinfer-python 0.4.1 dependency that requires prerelease (This should be removed when flashinfer fixes this issue)
    $PIP_CMD install flashinfer-python==0.4.1 --prerelease=allow $PIP_INSTALL_SUFFIX
79

80
    # Install the main package
81
    $PIP_CMD install -e "python[dev]" --extra-index-url https://download.pytorch.org/whl/${CU_VERSION} $PIP_INSTALL_SUFFIX --upgrade
82
fi
Xiaoyu Zhang's avatar
Xiaoyu Zhang committed
83

84
85
86
# Install OpenSSL development libraries for router build
apt install -y libssl-dev pkg-config

87
# Install router for pd-disagg test
88
$PIP_CMD install -e "sgl-router" $PIP_INSTALL_SUFFIX
89

Lianmin Zheng's avatar
Lianmin Zheng committed
90
# Install sgl-kernel
91
92
93
SGL_KERNEL_VERSION_FROM_KERNEL=$(grep -Po '(?<=^version = ")[^"]*' sgl-kernel/pyproject.toml)
SGL_KERNEL_VERSION_FROM_SRT=$(grep -Po -m1 '(?<=sgl-kernel==)[0-9A-Za-z\.\-]+' python/pyproject.toml)
echo "SGL_KERNEL_VERSION_FROM_KERNEL=${SGL_KERNEL_VERSION_FROM_KERNEL} SGL_KERNEL_VERSION_FROM_SRT=${SGL_KERNEL_VERSION_FROM_SRT}"
94

95
96
if [ "${CUSTOM_BUILD_SGL_KERNEL:-}" = "true" ]; then
    ls -alh sgl-kernel/dist
97
    $PIP_CMD install sgl-kernel/dist/sgl_kernel-${SGL_KERNEL_VERSION_FROM_KERNEL}-cp310-abi3-manylinux2014_x86_64.whl --force-reinstall $PIP_INSTALL_SUFFIX
98
else
Lianmin Zheng's avatar
Lianmin Zheng committed
99
    $PIP_CMD install sgl-kernel==${SGL_KERNEL_VERSION_FROM_SRT} --force-reinstall $PIP_INSTALL_SUFFIX
100
101
fi

102
# Show current packages
103
$PIP_CMD list
104

Johnny's avatar
Johnny committed
105
$PIP_CMD install mooncake-transfer-engine==0.3.6.post1 "${NVRTC_SPEC}" py-spy scipy huggingface_hub[hf_xet] $PIP_INSTALL_SUFFIX
106

107
if [ "$IS_BLACKWELL" != "1" ]; then
fzyzcjy's avatar
fzyzcjy committed
108
    # For lmms_evals evaluating MMMU
Johnny's avatar
Johnny committed
109
    git clone --branch v0.5 --depth 1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git
110
    $PIP_CMD install -e lmms-eval/ $PIP_INSTALL_SUFFIX
111

fzyzcjy's avatar
fzyzcjy committed
112
    # Install xformers
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
    $PIP_CMD install xformers --index-url https://download.pytorch.org/whl/${CU_VERSION} --no-deps $PIP_INSTALL_SUFFIX
fi

# Install dependencies for deepseek-v3.2
if [ "$RUN_DEEPSEEK_V32" = "1" ]; then
    # Install flashmla
    FLASHMLA_COMMIT="1408756a88e52a25196b759eaf8db89d2b51b5a1"
    FLASH_MLA_DISABLE_SM100="0"
    if [ "$IS_BLACKWELL" != "1" ]; then
        FLASH_MLA_DISABLE_SM100="1"
    fi
    git clone https://github.com/deepseek-ai/FlashMLA.git flash-mla
    cd flash-mla
    git checkout ${FLASHMLA_COMMIT}
    git submodule update --init --recursive
    FLASH_MLA_DISABLE_SM100=${FLASH_MLA_DISABLE_SM100} $PIP_CMD install -v . $PIP_INSTALL_SUFFIX --no-build-isolation
    cd ..
fzyzcjy's avatar
fzyzcjy committed
130
fi
131
132

# Show current packages
133
$PIP_CMD list
Lianmin Zheng's avatar
Lianmin Zheng committed
134
python3 -c "import torch; print(torch.version.cuda)"