install_vllm.sh 5.68 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
#!/usr/bin/env bash
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Install vllm and wideEP kernels from a specific git reference

set -euo pipefail

# Parse arguments
EDITABLE=true
23
24
VLLM_REF="ba81acbdc1eec643ba815a76628ae3e4b2263b76"
VLLM_GIT_URL="https://github.com/vllm-project/vllm.git"
25
26
27
MAX_JOBS=16
INSTALLATION_DIR=/tmp
ARCH=$(uname -m)
28
DEEPGEMM_REF="03d0be3"
29
FLASHINF_REF="v0.2.8rc1"
30
TORCH_BACKEND="cu128"
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52

# Convert x86_64 to amd64 for consistency with Docker ARG
if [ "$ARCH" = "x86_64" ]; then
    ARCH="amd64"
elif [ "$ARCH" = "aarch64" ]; then
    ARCH="arm64"
fi

while [[ $# -gt 0 ]]; do
    case $1 in
        --editable)
            EDITABLE=true
            shift
            ;;
        --no-editable)
            EDITABLE=false
            shift
            ;;
        --vllm-ref)
            VLLM_REF="$2"
            shift 2
            ;;
53
54
55
56
        --vllm-git-url)
            VLLM_GIT_URL="$2"
            shift 2
            ;;
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
        --max-jobs)
            MAX_JOBS="$2"
            shift 2
            ;;
        --arch)
            ARCH="$2"
            shift 2
            ;;
        --installation-dir)
            INSTALLATION_DIR="$2"
            shift 2
            ;;
        --deepgemm-ref)
            DEEPGEMM_REF="$2"
            shift 2
            ;;
        --flashinf-ref)
            FLASHINF_REF="$2"
            shift 2
            ;;
77
78
79
80
        --torch-backend)
            TORCH_BACKEND="$2"
            shift 2
            ;;
81
        -h|--help)
82
            echo "Usage: $0 [--editable|--no-editable] [--vllm-ref REF] [--max-jobs NUM] [--arch ARCH] [--deepgemm-ref REF] [--flashinf-ref REF] [--torch-backend BACKEND]"
83
84
85
            echo "Options:"
            echo "  --editable        Install vllm in editable mode (default)"
            echo "  --no-editable     Install vllm in non-editable mode"
86
            echo "  --vllm-ref REF    Git reference to checkout (default: f4135232b9a8c4845f8961fb1cd17581c56ae2ce)"
87
88
89
            echo "  --max-jobs NUM    Maximum number of parallel jobs (default: 16)"
            echo "  --arch ARCH       Architecture (amd64|arm64, default: auto-detect)"
            echo "  --installation-dir DIR  Directory to install vllm (default: /tmp/vllm)"
90
91
            echo "  --deepgemm-ref REF  Git reference for DeepGEMM (default: 1876566)"
            echo "  --flashinf-ref REF  Git reference for Flash Infer (default: v0.2.8rc1)"
92
            echo "  --torch-backend BACKEND  Torch backend to use (default: cu128)"
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
            exit 0
            ;;
        *)
            echo "Unknown option: $1"
            exit 1
            ;;
    esac
done

export MAX_JOBS=$MAX_JOBS
export CUDA_HOME=/usr/local/cuda

echo "Installing vllm with the following configuration:"
echo "  EDITABLE: $EDITABLE"
echo "  VLLM_REF: $VLLM_REF"
echo "  MAX_JOBS: $MAX_JOBS"
echo "  ARCH: $ARCH"
110
echo "  TORCH_BACKEND: $TORCH_BACKEND"
111
112
113
114

# Install common dependencies
uv pip install pip cuda-python

115
116
117
# Install LMCache
uv pip install lmcache

118
119
120
# Create vllm directory and clone
mkdir -p $INSTALLATION_DIR
cd $INSTALLATION_DIR
121
git clone $VLLM_GIT_URL vllm
122
123
124
125
126
127
128
129
cd vllm
git checkout $VLLM_REF

if [ "$ARCH" = "arm64" ]; then
    echo "Installing vllm for ARM64 architecture"

    # Try to install specific PyTorch version first, fallback to latest nightly
    echo "Attempting to install pinned PyTorch nightly versions..."
130
131
132
133
    if ! uv pip install torch==2.8.0.dev20250613+cu128 torchaudio==2.8.0.dev20250616 torchvision==0.23.0.dev20250616 --index-url https://download.pytorch.org/whl/nightly/cu128; then
        echo "Pinned versions failed"
        exit 1
        # uv pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128
134
135
136
137
138
139
140
141
142
143
144
145
146
    fi

    python use_existing_torch.py
    uv pip install -r requirements/build.txt

    if [ "$EDITABLE" = "true" ]; then
        MAX_JOBS=${MAX_JOBS} uv pip install --no-build-isolation -e . -v
    else
        MAX_JOBS=${MAX_JOBS} uv pip install --no-build-isolation . -v
    fi
else
    echo "Installing vllm for AMD64 architecture"
    if [ "$EDITABLE" = "true" ]; then
147
        VLLM_USE_PRECOMPILED=1 uv pip install -e . --torch-backend=$TORCH_BACKEND
148
    else
149
        VLLM_USE_PRECOMPILED=1 uv pip install . --torch-backend=$TORCH_BACKEND
150
151
152
153
154
155
    fi
fi

# Install ep_kernels and DeepGEMM
echo "Installing ep_kernels and DeepGEMM"
cd tools/ep_kernels
156
TORCH_CUDA_ARCH_LIST="9.0;10.0" bash install_python_libraries.sh # These libraries aren't pinned.
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
cd ep_kernels_workspace
git clone https://github.com/deepseek-ai/DeepGEMM.git
cd DeepGEMM
git checkout $DEEPGEMM_REF # Pin Version

sed -i 's|git@github.com:|https://github.com/|g' .gitmodules
git submodule sync --recursive
git submodule update --init --recursive

# command for 03d0be3
python setup.py install

# new install command for post 03d0be3
# cat install.sh
# ./install.sh


# Install Flash Infer
175
176
177
178
179
180
181
if [ "$ARCH" = "arm64" ]; then
    uv pip install flashinfer-python
else
    cd $INSTALLATION_DIR
    git clone https://github.com/flashinfer-ai/flashinfer.git --recursive
    cd flashinfer
    git checkout $FLASHINF_REF
182
    uv pip install -v --no-build-isolation .
183
fi
184
185

echo "vllm installation completed successfully"