Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
DeepEP
Commits
baa261b5
Commit
baa261b5
authored
Nov 24, 2025
by
lijian6
Browse files
Add compile shmem on DeepEP.
Signed-off-by:
lijian
<
lijian6@sugon.com
>
parent
a117adf8
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
68 additions
and
10 deletions
+68
-10
build.sh
build.sh
+67
-9
third-party/rocshmem
third-party/rocshmem
+1
-1
No files found.
build.sh
View file @
baa261b5
#!/bin/bash
#!/bin/bash
set
-eux
set
-eux
export
amd_comgr_DIR
=
${
ROCM_PATH
}
/lib64/cmake
llvm15_path
=
${
ROCM_PATH
}
/llvm/lib/clang/15.0.0
llvm17_path
=
${
ROCM_PATH
}
/llvm/lib/clang/17.0.0
llvm18_path
=
${
ROCM_PATH
}
/llvm/lib/clang/18
if
[
-d
"
${
llvm15_path
}
"
]
;
then
echo
"llvm version is 15.0.0"
llvm_path
=
${
llvm15_path
}
fi
if
[
-d
"
${
llvm17_path
}
"
]
;
then
echo
"llvm version is 17.0.0"
llvm_path
=
${
llvm17_path
}
fi
if
[
-d
"
${
llvm18_path
}
"
]
;
then
echo
"llvm version is 18"
llvm_path
=
${
llvm18_path
}
fi
if
[
!
-d
"third-party/rocshmem/src/"
]
;
then
echo
"download submodule..."
git submodule update
--recursive
--init
fi
src_path
=
$(
dirname
"
$(
realpath
$0
)
"
)
if
[
!
-d
"build_"
]
;
then
if
[
!
-d
"build_"
]
;
then
mkdir
-p
build_
mkdir
-p
build_
fi
fi
...
@@ -8,19 +33,52 @@ fi
...
@@ -8,19 +33,52 @@ fi
PYTHON_INCLUDE
=
$(
python3
-c
"from sysconfig import get_paths; print(get_paths()['include'])"
)
PYTHON_INCLUDE
=
$(
python3
-c
"from sysconfig import get_paths; print(get_paths()['include'])"
)
PYTHON_PLATLIB
=
$(
python3
-c
"from sysconfig import get_paths; print(get_paths()['platlib'])"
)
PYTHON_PLATLIB
=
$(
python3
-c
"from sysconfig import get_paths; print(get_paths()['platlib'])"
)
# --------------------------------------------------------------------- #
USE_NVSHMEM
=
${
USE_NVSHMEM
:
=OFF
}
USE_NVSHMEM
=
${
USE_NVSHMEM
:
=OFF
}
SHMEM_INSTALL_PREFIX
=
${
SHMEM_INSTALL_PREFIX
:
=
$(
pwd
)
/rocshmem_dir
}
USE_ROCSHMEM
=
${
USE_ROCSHMEM
:
=ON
}
COMPILE_OPTIONS
=
${
COMPILE_OPTIONS
:
= -fPIC -D__HIP_PLATFORM_AMD__=1 -DUSE_ROCM=1 -DHIPBLAS_V2 -DCUDA_HAS_FP16=1 -O3 -fgpu-rdc -DTORCH_API_INCLUDE_EXTENSION_H
'-DPYBIND11_COMPILER_TYPE="_gcc"'
'-DPYBIND11_STDLIB="_libstdcpp"'
'-DPYBIND11_BUILD_ABI="_cxxabi1014"'
-DTORCH_EXTENSION_NAME=deep_ep_cpp -D_GLIBCXX_USE_CXX11_ABI=1 --offload-arch=gfx936 -std=c++17 -Wno-return-type
}
SHMEM_LINK_OPTIONS
=
${
SHMEM_LINK_OPTIONS
:
=
"-Wl,-rpath,
${
SHMEM_INSTALL_PREFIX
}
/lib/ -l:librocshmem.a"
}
####
# -------------------------- With rocSHMEM -------------------------- #
# 检查是否设置了USE_NVSHMEM环境变量
build_rocshmem
()
{
cd
third-party/rocshmem/
if
[
!
-d
"build"
]
;
then
mkdir
-p
build
fi
cd
build
||
{
echo
"错误: 无法进入构建目录 '
$build_dir
'"
cd
"
$src_path
"
return
1
}
echo
"cd third-party/rocshmem/build"
../scripts/build_configs/gda_mlx5
echo
"编译rocshmem成功"
cd
"
$src_path
"
}
if
[
"
$USE_ROCSHMEM
"
==
"ON"
]
;
then
if
[
!
-d
"rocshmem_dir"
]
;
then
mkdir
-p
rocshmem_dir
fi
build_rocshmem
SHMEM_INSTALL_PREFIX
=
$(
pwd
)
/rocshmem_dir
COMPILE_OPTIONS
=
${
COMPILE_OPTIONS
:
= -fPIC -D__HIP_PLATFORM_AMD__=1 -DUSE_ROCM=1 -DHIPBLAS_V2 -DCUDA_HAS_FP16=1 -O3 -fgpu-rdc -DTORCH_API_INCLUDE_EXTENSION_H
'-DPYBIND11_COMPILER_TYPE="_gcc"'
'-DPYBIND11_STDLIB="_libstdcpp"'
'-DPYBIND11_BUILD_ABI="_cxxabi1014"'
-DTORCH_EXTENSION_NAME=deep_ep_cpp -D_GLIBCXX_USE_CXX11_ABI=1 --offload-arch=gfx936 -std=c++17 -Wno-return-type
}
SHMEM_LINK_OPTIONS
=
${
SHMEM_LINK_OPTIONS
:
=
"-Wl,-rpath,
${
SHMEM_INSTALL_PREFIX
}
/lib/ -l:librocshmem.a"
}
fi
# -------------------------- rocSHMEM END -------------------------- #
# -------------------------- With duSHMEM -------------------------- #
# build_dushmem() # TODO
# {
#
# }
if
[
"
$USE_NVSHMEM
"
==
"ON"
]
;
then
if
[
"
$USE_NVSHMEM
"
==
"ON"
]
;
then
COMPILE_OPTIONS+
=
" -DFORCE_NVSHMEM_API -DHIP_ENABLE_WARP_SYNC_BUILTINS"
if
[
!
-d
"dushmem_dir"
]
;
then
SHMEM_INSTALL_PREFIX
=
???/dushmem_dir
mkdir
-p
dushmem_dir
fi
# build_dushmem() #TODO
COMPILE_OPTIONS
=
${
COMPILE_OPTIONS
:
= -fPIC -DFORCE_NVSHMEM_API -D__HIP_PLATFORM_AMD__=1 -DUSE_ROCM=1 -DHIPBLAS_V2 -DCUDA_HAS_FP16=1 -O3 -fgpu-rdc -DTORCH_API_INCLUDE_EXTENSION_H
'-DPYBIND11_COMPILER_TYPE="_gcc"'
'-DPYBIND11_STDLIB="_libstdcpp"'
'-DPYBIND11_BUILD_ABI="_cxxabi1014"'
-DTORCH_EXTENSION_NAME=deep_ep_cpp -D_GLIBCXX_USE_CXX11_ABI=1 --offload-arch=gfx936 -std=c++17 -Wno-return-type
}
SHMEM_INSTALL_PREFIX
=
${
SHMEM_INSTALL_PREFIX
:
=
$(
pwd
)
/dushmem_dir
}
SHMEM_LINK_OPTIONS
=
"-Wl,-rpath,
${
SHMEM_INSTALL_PREFIX
}
/lib/ -l:libnvshmem_device.a -lnvshmem_host"
SHMEM_LINK_OPTIONS
=
"-Wl,-rpath,
${
SHMEM_INSTALL_PREFIX
}
/lib/ -l:libnvshmem_device.a -lnvshmem_host"
fi
fi
# -------------------------- duSHMEM END -------------------------- #
INCLUDE_PATHS
=
${
INCLUDE_PATHS
:
=-Icsrc/ -I
${
SHMEM_INSTALL_PREFIX
}
/include/ -I/opt/mpi/include -I
${
PYTHON_PLATLIB
}
/torch/include -I
${
PYTHON_PLATLIB
}
/torch/include/torch/csrc/api/include -I
${
PYTHON_PLATLIB
}
/torch/include/TH -I
${
PYTHON_PLATLIB
}
/torch/include/THC -I
${
PYTHON_PLATLIB
}
/torch/include/THH -I/opt/dtk/include -I
${
PYTHON_INCLUDE
}}
INCLUDE_PATHS
=
${
INCLUDE_PATHS
:
=-Icsrc/ -I
${
SHMEM_INSTALL_PREFIX
}
/include/ -I/opt/mpi/include -I
${
PYTHON_PLATLIB
}
/torch/include -I
${
PYTHON_PLATLIB
}
/torch/include/torch/csrc/api/include -I
${
PYTHON_PLATLIB
}
/torch/include/TH -I
${
PYTHON_PLATLIB
}
/torch/include/THC -I
${
PYTHON_PLATLIB
}
/torch/include/THH -I/opt/dtk/include -I
${
PYTHON_INCLUDE
}}
...
@@ -31,7 +89,7 @@ hipcc ${INCLUDE_PATHS} -c $(pwd)/csrc/kernels/internode.cu -o build_/internode.o
...
@@ -31,7 +89,7 @@ hipcc ${INCLUDE_PATHS} -c $(pwd)/csrc/kernels/internode.cu -o build_/internode.o
hipcc
${
INCLUDE_PATHS
}
-c
$(
pwd
)
/csrc/kernels/internode_ll.cu
-o
build_/internode_ll.o
${
COMPILE_OPTIONS
}
hipcc
${
INCLUDE_PATHS
}
-c
$(
pwd
)
/csrc/kernels/internode_ll.cu
-o
build_/internode_ll.o
${
COMPILE_OPTIONS
}
hipcc
${
INCLUDE_PATHS
}
-c
$(
pwd
)
/csrc/deep_ep.cu
-o
build_/deep_ep.o
${
COMPILE_OPTIONS
}
hipcc
${
INCLUDE_PATHS
}
-c
$(
pwd
)
/csrc/deep_ep.cu
-o
build_/deep_ep.o
${
COMPILE_OPTIONS
}
hipcc
-Wno-unused-result
-Wsign-compare
-DNDEBUG
-g
-fwrapv
-O2
-Wall
-g
-fstack-protector-strong
-Wformat
-Werror
=
format-security
-g
-fwrapv
-O2
-shared
-Wl
,-O1
-Wl
,-Bsymbolic-functions build_/internode.o build_/intranode.o build_/runtime.o build_/deep_ep.o build_/layout.o build_/internode_ll.o
-L
${
SHMEM_INSTALL_PREFIX
}
/lib/
-L
/opt/mpi/lib
-L
/opt/dtk/hip/lib
-L
/usr/lib/x86_64-linux-gnu
-lhipblaslt
-lamdhip64
-o
deep_ep/deep_ep_cpp.cpython-310-x86_64-linux-gnu.so
-Wl
,-rpath,/opt/dtk/lib
-fgpu-rdc
--hip-link
--offload-arch
=
gfx936
-shared
-Wl
,-soname,deep_ep/deep_ep_cpp.cpython-310-x86_64-linux-gnu.so
-L
"
/opt/dtk/llvm/lib/clang/15.0.0
/include/../lib/linux"
-lclang_rt
.builtins-x86_64 /opt/dtk/hip/lib/libgalaxyhip.so
/opt/dtk/llvm/lib/clang/15.0.0
/lib/linux/libclang_rt.builtins-x86_64.a /opt/hyhal/lib/libhsa-runtime64.so.1.11.0
-L
${
PYTHON_PLATLIB
}
/torch/lib
-L
/opt/dtk/lib
-L
/opt/dtk/hip/lib
-L
/usr/local/lib
-lc10
-ltorch
-ltorch_cpu
-ltorch_python
-lamdhip64
-lc10_hip
-ltorch_hip
-lrocm-core
-lrocm_smi64
${
SHMEM_LINK_OPTIONS
}
-fgpu-rdc
--hip-link
-lamdhip64
-lhsa-runtime64
-l
:libmpi.so
-Wl
,-rpath,/opt/mpi/lib/
-libverbs
-lmlx5
hipcc
-Wno-unused-result
-Wsign-compare
-DNDEBUG
-g
-fwrapv
-O2
-Wall
-g
-fstack-protector-strong
-Wformat
-Werror
=
format-security
-g
-fwrapv
-O2
-shared
-Wl
,-O1
-Wl
,-Bsymbolic-functions build_/internode.o build_/intranode.o build_/runtime.o build_/deep_ep.o build_/layout.o build_/internode_ll.o
-L
${
SHMEM_INSTALL_PREFIX
}
/lib/
-L
/opt/mpi/lib
-L
/opt/dtk/hip/lib
-L
/usr/lib/x86_64-linux-gnu
-lhipblaslt
-lamdhip64
-o
deep_ep/deep_ep_cpp.cpython-310-x86_64-linux-gnu.so
-Wl
,-rpath,/opt/dtk/lib
-fgpu-rdc
--hip-link
--offload-arch
=
gfx936
-shared
-Wl
,-soname,deep_ep/deep_ep_cpp.cpython-310-x86_64-linux-gnu.so
-L
"
${
llvm_path
}
/include/../lib/linux"
-lclang_rt
.builtins-x86_64 /opt/dtk/hip/lib/libgalaxyhip.so
${
llvm_path
}
/lib/linux/libclang_rt.builtins-x86_64.a /opt/hyhal/lib/libhsa-runtime64.so.1.11.0
-L
${
PYTHON_PLATLIB
}
/torch/lib
-L
/opt/dtk/lib
-L
/opt/dtk/hip/lib
-L
/usr/local/lib
-lc10
-ltorch
-ltorch_cpu
-ltorch_python
-lamdhip64
-lc10_hip
-ltorch_hip
-lrocm-core
-lrocm_smi64
${
SHMEM_LINK_OPTIONS
}
-fgpu-rdc
--hip-link
-lamdhip64
-lhsa-runtime64
-l
:libmpi.so
-Wl
,-rpath,/opt/mpi/lib/
-libverbs
-lmlx5
# build whl
# build whl
echo
"Using Python:
$(
which python3
)
"
echo
"Using Python:
$(
which python3
)
"
...
...
rocshmem
@
8e95de40
Subproject commit
f5a87af2671b6daaea16ae766ca97db867ef996c
Subproject commit
8e95de40f7209db648c84aa47bbfdea7a4727386
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment