Unverified Commit 433785fd authored by Ziyue Yang's avatar Ziyue Yang Committed by GitHub
Browse files

Benchmarks: Add Feature - Add GDR-only nccl-tests for Nvidia machines (#299)

This commit adds GDR-only nccl-tests for Nvidia machines. Also bump NCCL to v2.10.3-1 to achieve peak performance in this test.
parent 682b2c12
...@@ -103,6 +103,8 @@ jobs: ...@@ -103,6 +103,8 @@ jobs:
tags: ${{ steps.metadata.outputs.tags }} tags: ${{ steps.metadata.outputs.tags }}
cache-from: ${{ steps.metadata.outputs.cache_from }} cache-from: ${{ steps.metadata.outputs.cache_from }}
cache-to: ${{ steps.metadata.outputs.cache_to }} cache-to: ${{ steps.metadata.outputs.cache_to }}
build-args: |
NUM_MAKE_JOBS=8
labels: | labels: |
org.opencontainers.image.source=${{ github.event.repository.html_url }} org.opencontainers.image.source=${{ github.event.repository.html_url }}
org.opencontainers.image.created=${{ github.event.repository.pushed_at }} org.opencontainers.image.created=${{ github.event.repository.pushed_at }}
......
...@@ -7,7 +7,7 @@ FROM nvcr.io/nvidia/pytorch:20.12-py3 ...@@ -7,7 +7,7 @@ FROM nvcr.io/nvidia/pytorch:20.12-py3
# NVIDIA: # NVIDIA:
# - CUDA: 11.1.1 # - CUDA: 11.1.1
# - cuDNN: 8.0.5 # - cuDNN: 8.0.5
# - NCCL: bootstrap_tag # - NCCL: v2.10.3-1
# Mellanox: # Mellanox:
# - OFED: 5.2-2.2.3.0 # - OFED: 5.2-2.2.3.0
# - HPC-X: v2.8.3 # - HPC-X: v2.8.3
...@@ -46,6 +46,8 @@ RUN apt-get update && \ ...@@ -46,6 +46,8 @@ RUN apt-get update && \
apt-get clean && \ apt-get clean && \
rm -rf /var/lib/apt/lists/* /tmp/* /opt/cmake-3.14.6-Linux-x86_64 rm -rf /var/lib/apt/lists/* /tmp/* /opt/cmake-3.14.6-Linux-x86_64
ARG NUM_MAKE_JOBS=
# Install Docker # Install Docker
ENV DOCKER_VERSION=20.10.8 ENV DOCKER_VERSION=20.10.8
RUN cd /tmp && \ RUN cd /tmp && \
...@@ -85,16 +87,16 @@ RUN cd /tmp && \ ...@@ -85,16 +87,16 @@ RUN cd /tmp && \
git reset --hard 7cccbc1 && \ git reset --hard 7cccbc1 && \
./autogen.sh && \ ./autogen.sh && \
./configure --prefix=/usr/local --with-cuda=/usr/local/cuda && \ ./configure --prefix=/usr/local --with-cuda=/usr/local/cuda && \
make -j && \ make -j ${NUM_MAKE_JOBS} && \
make install && \ make install && \
cd /tmp && \ cd /tmp && \
rm -rf nccl-rdma-sharp-plugins rm -rf nccl-rdma-sharp-plugins
# Install NCCL patch # Install NCCL patch
RUN cd /tmp && \ RUN cd /tmp && \
git clone -b bootstrap_tag https://github.com/NVIDIA/nccl.git && \ git clone -b v2.10.3-1 https://github.com/NVIDIA/nccl.git && \
cd nccl && \ cd nccl && \
make -j src.build && \ make -j ${NUM_MAKE_JOBS} src.build && \
make install && \ make install && \
cd /tmp && \ cd /tmp && \
rm -rf nccl rm -rf nccl
...@@ -117,7 +119,7 @@ ENV PATH="${PATH}" \ ...@@ -117,7 +119,7 @@ ENV PATH="${PATH}" \
WORKDIR ${SB_HOME} WORKDIR ${SB_HOME}
ADD third_party third_party ADD third_party third_party
RUN make -j -C third_party cuda RUN make -j ${NUM_MAKE_JOBS} -C third_party cuda
ADD . . ADD . .
RUN python3 -m pip install .[nvidia,torch,ort] && \ RUN python3 -m pip install .[nvidia,torch,ort] && \
......
...@@ -43,7 +43,7 @@ superbench: ...@@ -43,7 +43,7 @@ superbench:
<<: *default_local_mode <<: *default_local_mode
gemm-flops: gemm-flops:
<<: *default_local_mode <<: *default_local_mode
nccl-bw: nccl-bw:default:
enable: true enable: true
modes: modes:
- name: local - name: local
...@@ -51,6 +51,21 @@ superbench: ...@@ -51,6 +51,21 @@ superbench:
parallel: no parallel: no
parameters: parameters:
ngpus: 8 ngpus: 8
nccl-bw:gdr-only:
enable: true
modes:
- name: local
proc_num: 1
parallel: no
env:
NCCL_IB_PCI_RELAXED_ORDERING: '1'
NCCL_NET_GDR_LEVEL: '5'
NCCL_P2P_DISABLE: '1'
NCCL_SHM_DISABLE: '1'
NCCL_MIN_NCHANNELS: '16'
NCCL_IB_DISABLE: '0'
parameters:
ngpus: 8
ib-loopback: ib-loopback:
enable: true enable: true
modes: modes:
......
...@@ -39,7 +39,7 @@ superbench: ...@@ -39,7 +39,7 @@ superbench:
<<: *default_local_mode <<: *default_local_mode
gemm-flops: gemm-flops:
<<: *default_local_mode <<: *default_local_mode
nccl-bw: nccl-bw:default:
enable: true enable: true
modes: modes:
- name: local - name: local
...@@ -47,6 +47,21 @@ superbench: ...@@ -47,6 +47,21 @@ superbench:
parallel: no parallel: no
parameters: parameters:
ngpus: 8 ngpus: 8
nccl-bw:gdr-only:
enable: true
modes:
- name: local
proc_num: 1
parallel: no
env:
NCCL_IB_PCI_RELAXED_ORDERING: '1'
NCCL_NET_GDR_LEVEL: '5'
NCCL_P2P_DISABLE: '1'
NCCL_SHM_DISABLE: '1'
NCCL_MIN_NCHANNELS: '16'
NCCL_IB_DISABLE: '0'
parameters:
ngpus: 8
ib-loopback: ib-loopback:
enable: true enable: true
modes: modes:
......
...@@ -33,7 +33,7 @@ superbench: ...@@ -33,7 +33,7 @@ superbench:
model_action: model_action:
- train - train
benchmarks: benchmarks:
nccl-bw: nccl-bw:default:
enable: true enable: true
modes: modes:
- name: local - name: local
...@@ -41,6 +41,21 @@ superbench: ...@@ -41,6 +41,21 @@ superbench:
parallel: no parallel: no
parameters: parameters:
ngpus: 8 ngpus: 8
nccl-bw:gdr-only:
enable: true
modes:
- name: local
proc_num: 1
parallel: no
env:
NCCL_IB_PCI_RELAXED_ORDERING: '1'
NCCL_NET_GDR_LEVEL: '5'
NCCL_P2P_DISABLE: '1'
NCCL_SHM_DISABLE: '1'
NCCL_MIN_NCHANNELS: '16'
NCCL_IB_DISABLE: '0'
parameters:
ngpus: 8
ib-loopback: ib-loopback:
enable: true enable: true
modes: modes:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment