You need to sign in or sign up before continuing.
Unverified Commit 17c01d84 authored by Yifan Xiong's avatar Yifan Xiong Committed by GitHub
Browse files

Update cuda11.8 image to cuda12.1 based on nvcr23.03 (#513)

Update cuda11.8 image to cuda12.1 based on nvcr23.03 and related versions in the image:
* cuda 11.8 -> 12.1
* nccl 2.15.5 -> 2.17.1
* hpcx: 2.8 -> 2.14
* mlc: 3.9a -> 3.10
parent 5a2adddc
......@@ -24,9 +24,9 @@ jobs:
strategy:
matrix:
include:
- name: cuda11.8
dockerfile: cuda11.8
tags: superbench/main:cuda11.8
- name: cuda12.1
dockerfile: cuda12.1
tags: superbench/main:cuda12.1
- name: cuda11.1.1
dockerfile: cuda11.1.1
tags: superbench/main:cuda11.1.1,superbench/superbench:latest
......
FROM nvcr.io/nvidia/pytorch:22.12-py3
FROM nvcr.io/nvidia/pytorch:23.03-py3
# OS:
# - Ubuntu: 20.04
# - OpenMPI: 4.1.5a1
# - Docker Client: 20.10.8
# NVIDIA:
# - CUDA: 11.8.0
# - cuDNN: 8.7.0.84
# - NCCL: v2.15.5-1
# - CUDA: 12.1.0
# - cuDNN: 8.8.1.3
# - NCCL: v2.17.1-1
# Mellanox:
# - OFED: 5.2-2.2.3.0
# - HPC-X: v2.8.3
# - OFED: 5.2-2.2.3.0 # TODO
# - HPC-X: v2.14
# Intel:
# - mlc: v3.9a
# - mlc: v3.10
LABEL maintainer="SuperBench"
......@@ -71,37 +71,27 @@ RUN mkdir -p /root/.ssh && \
# Install OFED
ENV OFED_VERSION=5.2-2.2.3.0
RUN cd /tmp && \
wget -q http://content.mellanox.com/ofed/MLNX_OFED-${OFED_VERSION}/MLNX_OFED_LINUX-${OFED_VERSION}-ubuntu20.04-x86_64.tgz && \
wget -q https://content.mellanox.com/ofed/MLNX_OFED-${OFED_VERSION}/MLNX_OFED_LINUX-${OFED_VERSION}-ubuntu20.04-x86_64.tgz && \
tar xzf MLNX_OFED_LINUX-${OFED_VERSION}-ubuntu20.04-x86_64.tgz && \
MLNX_OFED_LINUX-${OFED_VERSION}-ubuntu20.04-x86_64/mlnxofedinstall --user-space-only --without-fw-update --force --all && \
rm -rf /tmp/MLNX_OFED_LINUX-${OFED_VERSION}*
# Install HPC-X
ENV HPCX_VERSION=v2.14
RUN cd /opt && \
rm -rf hpcx && \
wget -q https://azhpcstor.blob.core.windows.net/azhpc-images-store/hpcx-v2.8.3-gcc-MLNX_OFED_LINUX-${OFED_VERSION}-ubuntu20.04-x86_64.tbz && \
tar xf hpcx-v2.8.3-gcc-MLNX_OFED_LINUX-${OFED_VERSION}-ubuntu20.04-x86_64.tbz && \
ln -s hpcx-v2.8.3-gcc-MLNX_OFED_LINUX-${OFED_VERSION}-ubuntu20.04-x86_64 hpcx && \
rm hpcx-v2.8.3-gcc-MLNX_OFED_LINUX-${OFED_VERSION}-ubuntu20.04-x86_64.tbz
wget -q https://content.mellanox.com/hpc/hpc-x/${HPCX_VERSION}/hpcx-${HPCX_VERSION}-gcc-MLNX_OFED_LINUX-5-ubuntu20.04-cuda12-gdrcopy2-nccl2.17-x86_64.tbz -O hpcx.tbz && \
tar xf hpcx.tbz && \
mv hpcx-${HPCX_VERSION}-gcc-MLNX_OFED_LINUX-5-ubuntu20.04-cuda12-gdrcopy2-nccl2.17-x86_64 hpcx && \
rm hpcx.tbz
# Install Intel MLC
RUN cd /tmp && \
wget -q https://downloadmirror.intel.com/736634/mlc_v3.9a.tgz -O mlc.tgz && \
wget -q https://downloadmirror.intel.com/763324/mlc_v3.10.tgz -O mlc.tgz && \
tar xzf mlc.tgz Linux/mlc && \
cp ./Linux/mlc /usr/local/bin/ && \
rm -rf ./Linux mlc.tgz
ENV PATH="${PATH}" \
LD_LIBRARY_PATH="/usr/local/lib:${LD_LIBRARY_PATH}" \
SB_HOME=/opt/superbench \
SB_MICRO_PATH=/opt/superbench \
ANSIBLE_DEPRECATION_WARNINGS=FALSE \
ANSIBLE_COLLECTIONS_PATH=/usr/share/ansible/collections
RUN echo PATH="$PATH" > /etc/environment && \
echo LD_LIBRARY_PATH="$LD_LIBRARY_PATH" >> /etc/environment && \
echo SB_MICRO_PATH="$SB_MICRO_PATH" >> /etc/environment
# Install AOCC compiler
RUN cd /tmp && \
wget https://download.amd.com/developer/eula/aocc-compiler/aocc-compiler-4.0.0_1_amd64.deb && \
......@@ -115,6 +105,18 @@ RUN cd /tmp && \
mv amd-blis /opt/AMD && \
rm -rf aocl-blis-linux-aocc-4.0.tar.gz
ENV PATH="${PATH}" \
LD_LIBRARY_PATH="/usr/local/lib:${LD_LIBRARY_PATH}" \
SB_HOME=/opt/superbench \
SB_MICRO_PATH=/opt/superbench \
ANSIBLE_DEPRECATION_WARNINGS=FALSE \
ANSIBLE_COLLECTIONS_PATH=/usr/share/ansible/collections
RUN echo PATH="$PATH" > /etc/environment && \
echo LD_LIBRARY_PATH="$LD_LIBRARY_PATH" >> /etc/environment && \
echo SB_MICRO_PATH="$SB_MICRO_PATH" >> /etc/environment
# Add config files
ADD dockerfile/etc /opt/microsoft/
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment