Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tsoc
superbenchmark
Commits
6ef3a011
Unverified
Commit
6ef3a011
authored
Dec 07, 2023
by
Ziyue Yang
Committed by
GitHub
Dec 07, 2023
Browse files
Benchmarks: Add MSCCL Support for Nvidia GPU (#584)
**Description** Add MSCCL support for Nvidia GPU
parent
dd5a6329
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
33 additions
and
3 deletions
+33
-3
.github/workflows/build-image.yml
.github/workflows/build-image.yml
+1
-1
.gitmodules
.gitmodules
+3
-0
dockerfile/cuda12.2.dockerfile
dockerfile/cuda12.2.dockerfile
+3
-1
third_party/Makefile
third_party/Makefile
+25
-1
third_party/msccl
third_party/msccl
+1
-0
No files found.
.github/workflows/build-image.yml
View file @
6ef3a011
...
@@ -54,7 +54,7 @@ jobs:
...
@@ -54,7 +54,7 @@ jobs:
-
name
:
Checkout
-
name
:
Checkout
uses
:
actions/checkout@v2
uses
:
actions/checkout@v2
with
:
with
:
submodules
:
tru
e
submodules
:
recursiv
e
-
name
:
Free disk space
-
name
:
Free disk space
run
:
|
run
:
|
mkdir /tmp/emptydir
mkdir /tmp/emptydir
...
...
.gitmodules
View file @
6ef3a011
...
@@ -21,3 +21,6 @@
...
@@ -21,3 +21,6 @@
[submodule "third_party/gpu-burn"]
[submodule "third_party/gpu-burn"]
path = third_party/gpu-burn
path = third_party/gpu-burn
url = https://github.com/wilicc/gpu-burn.git
url = https://github.com/wilicc/gpu-burn.git
[submodule "third_party/msccl"]
path = third_party/msccl
url = https://github.com/Azure/msccl
dockerfile/cuda12.2.dockerfile
View file @
6ef3a011
...
@@ -35,6 +35,7 @@ RUN apt-get update && \
...
@@ -35,6 +35,7 @@ RUN apt-get update && \
libavutil-dev
\
libavutil-dev
\
libboost-program-options-dev
\
libboost-program-options-dev
\
libcap2
\
libcap2
\
libcurl4-openssl-dev
\
libnuma-dev
\
libnuma-dev
\
libpci-dev
\
libpci-dev
\
libswresample-dev
\
libswresample-dev
\
...
@@ -43,6 +44,7 @@ RUN apt-get update && \
...
@@ -43,6 +44,7 @@ RUN apt-get update && \
lshw
\
lshw
\
python3-mpi4py
\
python3-mpi4py
\
net-tools
\
net-tools
\
nlohmann-json3-dev
\
openssh-client
\
openssh-client
\
openssh-server
\
openssh-server
\
pciutils
\
pciutils
\
...
@@ -129,7 +131,7 @@ ADD dockerfile/etc /opt/microsoft/
...
@@ -129,7 +131,7 @@ ADD dockerfile/etc /opt/microsoft/
WORKDIR
${SB_HOME}
WORKDIR
${SB_HOME}
ADD
third_party third_party
ADD
third_party third_party
RUN
make
-C
third_party cuda
RUN
make
-C
third_party cuda
_with_msccl
ADD
. .
ADD
. .
RUN
python3
-m
pip
install
--upgrade
setuptools
==
65.7
&&
\
RUN
python3
-m
pip
install
--upgrade
setuptools
==
65.7
&&
\
...
...
third_party/Makefile
View file @
6ef3a011
...
@@ -11,10 +11,11 @@ HPCX_HOME ?= /opt/hpcx
...
@@ -11,10 +11,11 @@ HPCX_HOME ?= /opt/hpcx
CUDA_VER
?=
$(
shell
nvcc
--version
|
grep
'release'
|
awk
'{print $$6
}
'
|
cut
-c2-
|
cut
-d
'.'
-f1-2
)
CUDA_VER
?=
$(
shell
nvcc
--version
|
grep
'release'
|
awk
'{print $$6
}
'
|
cut
-c2-
|
cut
-d
'.'
-f1-2
)
ROCBLAS_BRANCH
?=
rocm-
$(
shell
dpkg
-l
|
grep
'rocm-dev '
|
awk
'{print $$3
}
'
|
cut
-d
'.'
-f1-3
)
ROCBLAS_BRANCH
?=
rocm-
$(
shell
dpkg
-l
|
grep
'rocm-dev '
|
awk
'{print $$3
}
'
|
cut
-d
'.'
-f1-3
)
.PHONY
:
all cuda rocm common cuda_cutlass cuda_bandwidthTest cuda_nccl_tests cuda_perftest rocm_perftest fio rocm_rccl_tests rocm_rocblas rocm_bandwidthTest gpcnet cuda_gpuburn cpu_stream cpu_hpl directx_amf_encoding_latency directx_amd rocm_hipblaslt megatron_lm megatron_deepspeed
.PHONY
:
all
cuda_with_msccl
cuda rocm common cuda_cutlass cuda_bandwidthTest cuda_nccl_tests cuda_perftest
cuda_msccl
rocm_perftest fio rocm_rccl_tests rocm_rocblas rocm_bandwidthTest gpcnet cuda_gpuburn cpu_stream cpu_hpl directx_amf_encoding_latency directx_amd rocm_hipblaslt megatron_lm megatron_deepspeed
# Build all targets.
# Build all targets.
all
:
cuda rocm
all
:
cuda rocm
cuda_with_msccl
:
cuda cuda_msccl
cuda
:
common cuda_cutlass cuda_bandwidthTest cuda_nccl_tests cuda_perftest gpcnet cuda_gpuburn megatron_lm megatron_deepspeed
cuda
:
common cuda_cutlass cuda_bandwidthTest cuda_nccl_tests cuda_perftest gpcnet cuda_gpuburn megatron_lm megatron_deepspeed
rocm
:
common rocm_perftest rocm_rccl_tests rocm_rocblas rocm_bandwidthTest rocm_hipblaslt megatron_deepspeed
rocm
:
common rocm_perftest rocm_rccl_tests rocm_rocblas rocm_bandwidthTest rocm_hipblaslt megatron_deepspeed
cpu
:
common cpu_perftest
cpu
:
common cpu_perftest
...
@@ -188,3 +189,26 @@ megatron_deepspeed:
...
@@ -188,3 +189,26 @@ megatron_deepspeed:
cd
Megatron
&&
\
cd
Megatron
&&
\
python
-m
pip
install
-r
requirements.txt
&&
\
python
-m
pip
install
-r
requirements.txt
&&
\
python
-m
pip
install
DeepSpeed
python
-m
pip
install
DeepSpeed
# Build MSCCL for CUDA
cuda_msccl
:
sb_micro_path
ifneq
(,$(wildcard msccl/executor/msccl-executor-nccl/Makefile))
cd
./msccl/executor/msccl-executor-nccl
&&
\
make
-j4
src.build
&&
\
cd
../../..
mkdir
-p
$(SB_MICRO_PATH)
/lib/msccl-executor-nccl
&&
\
cp
-r
-v
./msccl/executor/msccl-executor-nccl/build/
*
$(SB_MICRO_PATH)
/lib/msccl-executor-nccl/
endif
ifneq
(,$(wildcard msccl/scheduler/msccl-scheduler/Makefile))
cd
./msccl/scheduler/msccl-scheduler
&&
\
CXX
=
nvcc
BIN_HOME
=
$(SB_MICRO_PATH)
/lib/msccl-executor-nccl
SRC_HOME
=
../../../msccl/executor/msccl-executor-nccl make
-j4
&&
\
cd
../../..
mkdir
-p
$(SB_MICRO_PATH)
/lib/msccl-scheduler
&&
\
cp
-r
-v
./msccl/scheduler/msccl-scheduler/build/
*
$(SB_MICRO_PATH)
/lib/msccl-scheduler/
endif
ifneq
(,$(wildcard msccl/tests/msccl-tests-nccl/Makefile))
cd
./msccl/tests/msccl-tests-nccl
&&
\
make
MPI
=
1
MPI_HOME
=
$(MPI_HOME)
NCCL_HOME
=
$(SB_MICRO_PATH)
/lib/msccl-executor-nccl
-j4
&&
cd
../../..
mkdir
-p
$(SB_MICRO_PATH)
/bin/msccl-tests-nccl
&&
\
cp
-r
-v
./msccl/tests/msccl-tests-nccl/build/
*
$(SB_MICRO_PATH)
/bin/msccl-tests-nccl/
endif
msccl
@
7d4beb8c
Subproject commit 7d4beb8c0ba5b6c534c524023e57fe0467dc591c
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment