build.sh 11.9 KB
Newer Older
lijian6's avatar
lijian6 committed
1
#!/bin/bash
2
set -eux
lijian6's avatar
lijian6 committed
3

lijian6's avatar
lijian6 committed
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
export amd_comgr_DIR=${ROCM_PATH}/lib64/cmake
llvm15_path=${ROCM_PATH}/llvm/lib/clang/15.0.0
llvm17_path=${ROCM_PATH}/llvm/lib/clang/17.0.0
llvm18_path=${ROCM_PATH}/llvm/lib/clang/18

if [ -d "${llvm15_path}" ]; then
    echo "llvm version is 15.0.0"
    llvm_path=${llvm15_path}
fi
if [ -d "${llvm17_path}" ]; then
    echo "llvm version is 17.0.0"
    llvm_path=${llvm17_path}
fi
if [ -d "${llvm18_path}" ]; then
    echo "llvm version is 18"
    llvm_path=${llvm18_path}
fi

src_path=$(dirname "$(realpath $0)")

lijian6's avatar
lijian6 committed
24
25
26
if [ ! -d "build_" ]; then
    mkdir -p build_
fi
lijian6's avatar
lijian6 committed
27

lijian6's avatar
lijian6 committed
28
29
PYTHON_INCLUDE=$(python3 -c "from sysconfig import get_paths; print(get_paths()['include'])")
PYTHON_PLATLIB=$(python3 -c "from sysconfig import get_paths; print(get_paths()['platlib'])")
lijian6's avatar
lijian6 committed
30

lijian6's avatar
lijian6 committed
31
32
USE_NVSHMEM=OFF
USE_ROCSHMEM=OFF
lijian's avatar
lijian committed
33
BUILD_SHCA=OFF
lijian6's avatar
lijian6 committed
34
ROCM_DISABLE_CTX=OFF
lishen's avatar
lishen committed
35
ROCM_DISABLE_MULTIQP=OFF
36
37
38
39
40
41
42
43
44
# 解析命令行参数
for arg in "$@"; do
    case $arg in
        rocshmem)
            USE_ROCSHMEM=ON
            ;;
        nvshmem|dushmem)
            USE_NVSHMEM=ON
            ;;
lijian's avatar
lijian committed
45
46
47
        BUILD_SHCA=ON)
            BUILD_SHCA=ON
            ;;
48
49
50
        ROCM_DISABLE_CTX=ON)
            ROCM_DISABLE_CTX=ON
            ;;
lishen's avatar
lishen committed
51
52
        ROCM_DISABLE_MULTIQP=ON)
            ROCM_DISABLE_MULTIQP=ON
53
54
            ;;
        *)
lishen's avatar
lishen committed
55
            echo "Usage: ./build.sh rocshmem [ROCM_DISABLE_CTX=ON] [ROCM_DISABLE_MULTIQP=ON] / ./build.sh dushmem"
56
57
58
59
60
            exit 1
            ;;
    esac
done

lishen's avatar
lishen committed
61
detect_offload_arch() {
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
    # 获取当前硬件的 gfx 版本(例如 gfx936)
    current_gfx=$(rocminfo 2>/dev/null | grep -E 'Name:.*gfx[0-9]+' | head -n1 | grep -oE 'gfx[0-9]+' | cut -c4-)
    if [ -z "$current_gfx" ]; then
        # 如果无法获取当前硬件版本,回退到原逻辑(选择最大的架构)
        if command -v rocm_agent_enumerator >/dev/null 2>&1; then
            arch=$(rocm_agent_enumerator 2>/dev/null | grep -E '^gfx[0-9]+' | sort -r | head -n1)
            if [ -n "$arch" ]; then
                echo "--offload-arch=$arch"
                return 0
            fi
        fi
        return 1
    fi

    # 转换为整数,以便比较(如 936)
    current_gfx_int=$((current_gfx))

79
    # 获取所有支持的 gfx 版本(降序排列)
80
81
82
    if command -v rocm_agent_enumerator >/dev/null 2>&1; then
        supported_archs=$(rocm_agent_enumerator 2>/dev/null | grep -E '^gfx[0-9]+' | sort -r)
        if [ -n "$supported_archs" ]; then
83
84
85
            # 取前2个最大的架构作为基础
            top2=""
            count=0
86
            for arch in $supported_archs; do
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
                top2="$top2 --offload-arch=$arch"
                count=$((count + 1))
                [ $count -ge 2 ] && break
            done

            # 检查当前 GPU 是否已经在前2个中
            found=0
            for arch in $supported_archs; do
                arch_int=${arch:3}
                if [ "$arch_int" -eq "$current_gfx_int" ]; then
                    count2=0
                    for a in $supported_archs; do
                        count2=$((count2 + 1))
                        [ $count2 -gt 2 ] && break
                        a_int=${a:3}
                        if [ "$a_int" -eq "$current_gfx_int" ]; then
                            found=1
                            break
                        fi
                    done
                    break
108
109
110
                fi
            done

111
112
113
114
115
116
            # 如果当前 GPU 不在前2个中,追加它
            if [ "$found" -eq 0 ]; then
                top2="$top2 --offload-arch=gfx${current_gfx_int}"
            fi

            echo "$top2"
117
118
119
120
121
            return 0
        fi
    fi

    # 回退逻辑:如果没有匹配的架构,选择最大的架构
lishen's avatar
lishen committed
122
123
124
    if command -v rocm_agent_enumerator >/dev/null 2>&1; then
        arch=$(rocm_agent_enumerator 2>/dev/null | grep -E '^gfx[0-9]+' | sort -r | head -n1)
        if [ -n "$arch" ]; then
lishen's avatar
lishen committed
125
            echo "--offload-arch=$arch"
lishen's avatar
lishen committed
126
127
128
            return 0
        fi
    fi
129
130

    return 1
lishen's avatar
lishen committed
131
132
}
DETECTED_ARCH=$(detect_offload_arch)
lishen's avatar
lishen committed
133
echo "Current $DETECTED_ARCH"
lishen's avatar
lishen committed
134

lijian6's avatar
lijian6 committed
135
136
echo "USE_NVSHMEM=$USE_NVSHMEM"
echo "USE_ROCSHMEM=$USE_ROCSHMEM"
lijian's avatar
lijian committed
137
echo "BUILD_SHCA=$BUILD_SHCA"
lijian6's avatar
lijian6 committed
138
echo "ROCM_DISABLE_CTX=$ROCM_DISABLE_CTX"
lishen's avatar
lishen committed
139
echo "ROCM_DISABLE_MULTIQP=$ROCM_DISABLE_MULTIQP"
140

lijian6's avatar
lijian6 committed
141
142
143
144
# -------------------------- With rocSHMEM -------------------------- #
build_rocshmem()
{
    cd third-party/rocshmem/
lijian's avatar
lijian committed
145
    git config --global --add safe.directory .
lijian's avatar
lijian committed
146
147
148
    if [ "$BUILD_SHCA" == "ON" ]; then
        git checkout 1aab2cf87fe602b6ad62d93b054025fd1afa57bc
    fi
lijian6's avatar
lijian6 committed
149
150
151
152
153
154
155
156
157
    if [ ! -d "build" ]; then
        mkdir -p build
    fi
    cd build || {
        echo "错误: 无法进入构建目录 '$build_dir'"
        cd "$src_path"
        return 1
    }
    echo "cd third-party/rocshmem/build"
lijian's avatar
lijian committed
158
159
160
161
162
163
164
    if [ "$BUILD_SHCA" == "ON" ]; then
        bash ../scripts/build_configs/gda_shca
        echo "编译SHCA rocshmem 成功"
    else
        bash ../scripts/build_configs/gda_mlx5
        echo "编译MLX rocshmem 成功"
    fi
lijian6's avatar
lijian6 committed
165
166
167
168
    cd "$src_path"
}

if [ "$USE_ROCSHMEM" == "ON" ]; then
lijian6's avatar
lijian6 committed
169
170
171
    if [ ! -d "third-party/rocshmem/src/" ]; then
        echo "download submodule..."
        git submodule update --init third-party/rocshmem
lijian6's avatar
lijian6 committed
172
    fi
lijian6's avatar
lijian6 committed
173
174
175
176
177

    if [ ! -d "third-party/rocshmem_install" ]; then
        mkdir -p third-party/rocshmem_install
    fi

lijian6's avatar
lijian6 committed
178
    build_rocshmem
lijian6's avatar
lijian6 committed
179
    SHMEM_INSTALL_PREFIX=$(pwd)/third-party/rocshmem_install
lishen's avatar
lishen committed
180
    COMPILE_OPTIONS=${COMPILE_OPTIONS:= -fPIC -D__HIP_PLATFORM_AMD__=1 -DUSE_ROCM=1 -DHIPBLAS_V2 -DCUDA_HAS_FP16=1 -O3 -fgpu-rdc -DTORCH_API_INCLUDE_EXTENSION_H '-DPYBIND11_COMPILER_TYPE="_gcc"' '-DPYBIND11_STDLIB="_libstdcpp"' '-DPYBIND11_BUILD_ABI="_cxxabi1014"' -DTORCH_EXTENSION_NAME=deep_ep_cpp -D_GLIBCXX_USE_CXX11_ABI=1 ${DETECTED_ARCH}  -std=c++17 -Wno-return-type}
lijian6's avatar
lijian6 committed
181
    if [ "$ROCM_DISABLE_CTX" == "ON" ]; then
lijian6's avatar
lijian6 committed
182
183
        COMPILE_OPTIONS="-DROCM_DISABLE_CTX $COMPILE_OPTIONS"
    fi
lishen's avatar
lishen committed
184
185
    if [ "$ROCM_DISABLE_MULTIQP" == "ON" ]; then
        COMPILE_OPTIONS="-DROCM_DISABLE_MULTIQP $COMPILE_OPTIONS"
186
    fi
lijian6's avatar
lijian6 committed
187
188
189
190
    SHMEM_LINK_OPTIONS=${SHMEM_LINK_OPTIONS:="-Wl,-rpath,${SHMEM_INSTALL_PREFIX}/lib/ -l:librocshmem.a"}
fi
# -------------------------- rocSHMEM END -------------------------- #
# -------------------------- With duSHMEM -------------------------- #
lijian6's avatar
lijian6 committed
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
build_dushmem()
{
    cd third-party/dushmem-hip/
    source env.build.sh
    export CMAKE_PREFIX_PATH=${ROCM_PATH}/lib/cmake/amd_comgr:${ROCM_PATH}/lib64/cmake/amd_comgr:${CMAKE_PREFIX_PATH:-}
    export NVSHMEM_PREFIX=$src_path/third-party/dushmem_install
    if [ ! -d "build" ]; then
        mkdir -p build
    fi
    cd build || {
        echo "错误: 无法进入构建目录 '$build_dir'"
        cd "$src_path"
        return 1
    }
    echo "cd third-party/dushmem-hip/build"
    cmake ../
    make -j64
    make install
    echo "编译dushmem-hip成功"
    cd "$src_path"
}
212
if [ "$USE_NVSHMEM" == "ON" ]; then
lijian6's avatar
lijian6 committed
213
214
215
216
    # if [ ! -d "third-party/dushmem-hip/src/" ]; then
    #     echo "download submodule..."
    #     git submodule update --init third-party/dushmem-hip
    # fi
lijian6's avatar
lijian6 committed
217

lijian6's avatar
lijian6 committed
218
219
220
221
222
223
    # if [ ! -d "third-party/dushmem_install" ]; then
    #     mkdir -p third-party/dushmem_install
    # fi
    # build_dushmem
    # SHMEM_INSTALL_PREFIX=$(pwd)/third-party/dushmem_install
    SHMEM_INSTALL_PREFIX=${ROCM_PATH}/dushmem
lishen's avatar
lishen committed
224
    COMPILE_OPTIONS=${COMPILE_OPTIONS:= -fPIC -DFORCE_DUSHMEM_API -DHIP_ENABLE_WARP_SYNC_BUILTINS -D__HIP_PLATFORM_AMD__=1 -DUSE_ROCM=1 -DHIPBLAS_V2 -DCUDA_HAS_FP16=1 -O3 -fgpu-rdc -DTORCH_API_INCLUDE_EXTENSION_H '-DPYBIND11_COMPILER_TYPE="_gcc"' '-DPYBIND11_STDLIB="_libstdcpp"' '-DPYBIND11_BUILD_ABI="_cxxabi1014"' -DTORCH_EXTENSION_NAME=deep_ep_cpp -D_GLIBCXX_USE_CXX11_ABI=1 ${DETECTED_ARCH} -std=c++17 -Wno-return-type}
lijian6's avatar
lijian6 committed
225
    SHMEM_LINK_OPTIONS="-Wl,-rpath,${SHMEM_INSTALL_PREFIX}/lib/ -l:libdushmem_device.a -ldushmem_host"
226
fi
lijian6's avatar
lijian6 committed
227
# -------------------------- duSHMEM END -------------------------- #
lishen's avatar
lishen committed
228

229
INCLUDE_PATHS=${INCLUDE_PATHS:=-Icsrc/ -I${SHMEM_INSTALL_PREFIX}/include/ -I/opt/mpi/include -I${PYTHON_PLATLIB}/torch/include -I${PYTHON_PLATLIB}/torch/include/torch/csrc/api/include -I${PYTHON_PLATLIB}/torch/include/TH -I${PYTHON_PLATLIB}/torch/include/THC -I${PYTHON_PLATLIB}/torch/include/THH -I/opt/dtk/include -I${PYTHON_INCLUDE}}
230

lishen's avatar
lishen committed
231
232
233
234
235
236
237
238
239
240
241
242
243
# 定义源文件列表(相对路径)
SOURCES=(
    "csrc/kernels/runtime.cu"
    "csrc/kernels/layout.cu"
    "csrc/kernels/intranode.cu"
    "csrc/kernels/internode.cu"
    "csrc/kernels/internode_ll.cu"
    "csrc/deep_ep.cu"
)

# 初始化对象文件列表
OBJECTS=()

lishen's avatar
lishen committed
244
245
246
# 检查是否需要强制重新编译(如果 shmem 库有更新)
FORCE_REBUILD=true

lishen's avatar
lishen committed
247
248
249
250
251
252
# 编译每个源文件
for src in "${SOURCES[@]}"; do
    # 生成对应的 .o 文件名(保留目录结构或扁平化)
    obj="build_/$(basename "${src%.cu}.o")"
    OBJECTS+=("$obj")

lishen's avatar
lishen committed
253
254
    # 检查是否需要重新编译
    if [[ "$FORCE_REBUILD" == true ]] || [[ ! -f "$obj" ]] || [[ "$src" -nt "$obj" ]]; then
lishen's avatar
lishen committed
255
256
257
258
259
260
261
262
        echo "Compiling $src -> $obj"
        hipcc ${INCLUDE_PATHS} -c "$src" -o "$obj" ${COMPILE_OPTIONS}
    else
        echo "Skipping $src (up to date)"
    fi
done

# 链接阶段
263
264
ext_suffix=$(python3 -c 'import sysconfig; print(sysconfig.get_config_var("EXT_SUFFIX"))')
OUTPUT="deep_ep/deep_ep_cpp$ext_suffix"
lishen's avatar
lishen committed
265
266
267

# 检查是否需要重新链接
need_link=false
lishen's avatar
lishen committed
268
if [[ "$FORCE_REBUILD" == true ]] || [[ ! -f "$OUTPUT" ]]; then
lishen's avatar
lishen committed
269
270
271
272
273
274
275
276
277
    need_link=true
else
    for obj in "${OBJECTS[@]}"; do
        if [[ "$obj" -nt "$OUTPUT" ]]; then
            need_link=true
            break
        fi
    done
fi
278

lishen's avatar
lishen committed
279
280
if [[ "$need_link" == true ]]; then
    echo "Linking -> $OUTPUT"
lijian's avatar
lijian committed
281
282
283
284
285
    if [ "$BUILD_SHCA" == "ON" ]; then
        hipcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O2 -Wall -g -fstack-protector-strong -Wformat -Werror=format-security -g -fwrapv -O2 -shared -Wl,-O1 -Wl,-Bsymbolic-functions "${OBJECTS[@]}" -L${SHMEM_INSTALL_PREFIX}/lib/ -L/opt/mpi/lib -L/opt/dtk/hip/lib -L/usr/lib/x86_64-linux-gnu -lhipblaslt -lamdhip64 -o "$OUTPUT" -Wl,-rpath,/opt/dtk/lib -fgpu-rdc --hip-link ${DETECTED_ARCH} -shared -Wl,-soname,"$(basename "$OUTPUT")" -L"${llvm_path}/include/../lib/linux" -lclang_rt.builtins-x86_64 /opt/dtk/hip/lib/libgalaxyhip.so ${llvm_path}/lib/linux/libclang_rt.builtins-x86_64.a /opt/hyhal/lib/libhsa-runtime64.so -L${PYTHON_PLATLIB}/torch/lib -L/opt/dtk/lib -L/opt/dtk/hip/lib -L/usr/local/lib -lc10 -ltorch -ltorch_cpu -ltorch_python -lamdhip64 -lc10_hip -ltorch_hip -lrocm-core -lrocm_smi64 ${SHMEM_LINK_OPTIONS} -fgpu-rdc --hip-link -lamdhip64 -lhsa-runtime64 -l:libmpi.so -Wl,-rpath,/opt/mpi/lib/ -libverbs -lshca
    else
        hipcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O2 -Wall -g -fstack-protector-strong -Wformat -Werror=format-security -g -fwrapv -O2 -shared -Wl,-O1 -Wl,-Bsymbolic-functions "${OBJECTS[@]}" -L${SHMEM_INSTALL_PREFIX}/lib/ -L/opt/mpi/lib -L/opt/dtk/hip/lib -L/usr/lib/x86_64-linux-gnu -lhipblaslt -lamdhip64 -o "$OUTPUT" -Wl,-rpath,/opt/dtk/lib -fgpu-rdc --hip-link ${DETECTED_ARCH} -shared -Wl,-soname,"$(basename "$OUTPUT")" -L"${llvm_path}/include/../lib/linux" -lclang_rt.builtins-x86_64 /opt/dtk/hip/lib/libgalaxyhip.so ${llvm_path}/lib/linux/libclang_rt.builtins-x86_64.a /opt/hyhal/lib/libhsa-runtime64.so -L${PYTHON_PLATLIB}/torch/lib -L/opt/dtk/lib -L/opt/dtk/hip/lib -L/usr/local/lib -lc10 -ltorch -ltorch_cpu -ltorch_python -lamdhip64 -lc10_hip -ltorch_hip -lrocm-core -lrocm_smi64 ${SHMEM_LINK_OPTIONS} -fgpu-rdc --hip-link -lamdhip64 -lhsa-runtime64 -l:libmpi.so -Wl,-rpath,/opt/mpi/lib/ -libverbs -lmlx5
    fi
lishen's avatar
lishen committed
286
287
288
289
    echo "Successfully built $OUTPUT"
else
    echo "Skipping linking ($OUTPUT is up to date)"
fi
lijian6's avatar
lijian6 committed
290
291
292
293

# build whl
echo "Using Python: $(which python3)"
python3 --version
294
if [ "$USE_NVSHMEM" == "ON" ]; then
lijian's avatar
lijian committed
295
296
297
298
299
    if [ "$BUILD_SHCA" == "ON" ]; then
        python setup.py bdist_wheel --shmem=nv --build_shca
    else
        python setup.py bdist_wheel --shmem=nv
    fi
300
301
fi
if [ "$USE_ROCSHMEM" == "ON" ]; then
lijian's avatar
lijian committed
302
303
304
305
306
    if [ "$BUILD_SHCA" == "ON" ]; then
        python setup.py bdist_wheel --shmem=rocm --build_shca
    else
        python setup.py bdist_wheel --shmem=rocm
    fi
307
fi
lijian6's avatar
lijian6 committed
308
309
echo "✅ Build complete:"
ls -lh dist/