Commit 76fee8b6 authored by Guolin Ke's avatar Guolin Ke
Browse files

update docker file

parent f24a5f70
......@@ -25,16 +25,16 @@ jobs:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
-
name: Build and push cu113
name: Build and push cu116
uses: docker/build-push-action@v3
with:
context: ./docker/cu113/
context: ./docker/cu116/
push: true
tags: dptechnology/unicore:${{ github.ref_name }}-pytorch1.11.0-cuda11.3
tags: dptechnology/unicore:${{ github.ref_name }}-pytorch1.12.1-cuda11.6
-
name: Build and push cu116
name: Build and push cu116 with rdma
uses: docker/build-push-action@v3
with:
context: ./docker/cu116/
context: ./docker/rdma/
push: true
tags: dptechnology/unicore:${{ github.ref_name }}-pytorch1.12.1-cuda11.6
tags: dptechnology/unicore:${{ github.ref_name }}-pytorch1.12.1-cuda11.6-rdma
......@@ -25,16 +25,16 @@ jobs:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
-
name: Build and push cu113
name: Build and push cu116
uses: docker/build-push-action@v3
with:
context: ./docker/cu113/
context: ./docker/cu116/
push: true
tags: dptechnology/unicore:latest-pytorch1.11.0-cuda11.3
tags: dptechnology/unicore:latest-pytorch1.12.1-cuda11.6
-
name: Build and push cu116
name: Build and push cu116 with rdma
uses: docker/build-push-action@v3
with:
context: ./docker/cu116/
context: ./docker/rdma/
push: true
tags: dptechnology/unicore:latest-pytorch1.12.1-cuda11.6
tags: dptechnology/unicore:latest-pytorch1.12.1-cuda11.6-rdma
FROM nvidia/cuda:11.3.0-cudnn8-devel-ubuntu20.04
ENV LANG C.UTF-8
ENV OFED_VERSION=5.3-1.0.0.1
FROM nvcr.io/nvidia/pytorch:22.04-py3
RUN APT_INSTALL="apt-get install -y --no-install-recommends" && \
rm -rf /var/lib/apt/lists/* \
......@@ -52,58 +49,24 @@ RUN APT_INSTALL="apt-get install -y --no-install-recommends" && \
debhelper \
nfs-common
# ==================================================================
# InfiniBand & RDMA
# ------------------------------------------------------------------
RUN cd /tmp && \
wget -q http://content.mellanox.com/ofed/MLNX_OFED-${OFED_VERSION}/MLNX_OFED_LINUX-${OFED_VERSION}-ubuntu20.04-x86_64.tgz && \
tar xzf MLNX_OFED_LINUX-${OFED_VERSION}-ubuntu20.04-x86_64.tgz && \
MLNX_OFED_LINUX-${OFED_VERSION}-ubuntu20.04-x86_64/mlnxofedinstall --user-space-only --without-fw-update --force --all && \
rm -rf /tmp/MLNX_OFED_LINUX-${OFED_VERSION}*
RUN cd /tmp && \
mkdir -p /usr/local/nccl-rdma-sharp-plugins && \
DEBIAN_FRONTEND=noninteractive apt install -y zlib1g-dev && \
git clone --depth=1 https://github.com/Mellanox/nccl-rdma-sharp-plugins.git && \
cd nccl-rdma-sharp-plugins && \
./autogen.sh && \
./configure --prefix=/usr/local/nccl-rdma-sharp-plugins --with-cuda=/usr/local/cuda && \
make && \
make install
# ==================================================================
# python
# ------------------------------------------------------------------
# Set timezone
RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime
ENV PATH /usr/local/nvidia/bin:/usr/local/nvidia/lib64:$PATH
ENV LD_LIBRARY_PATH /usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
ENV PYTHON_VERSION=3.8
RUN wget -O ~/miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
chmod +x ~/miniconda.sh && \
~/miniconda.sh -b -p /opt/conda && \
rm ~/miniconda.sh
ENV PATH /opt/conda/bin:$PATH
RUN conda install -y python=3.8 && conda clean -ya
RUN pip uninstall -y torch && \
pip uninstall -y torch && \
conda clean -ya
RUN conda install -y scipy scikit-learn pyyaml tensorboard tensorboardX && \
conda clean -ya
RUN ldconfig
# RUN ldconfig
# ==================================================================
# pytorch
# ------------------------------------------------------------------
# # ==================================================================
# # pytorch
# # ------------------------------------------------------------------
ENV TORCH_CUDA_ARCH_LIST "7.0;7.5;8.0"
RUN conda install -y numpy pyyaml scipy ipython mkl mkl-include ninja cython typing pandas && \
conda clean -ya
RUN conda install pytorch=1.11.0 cudatoolkit=11.3 -c pytorch && \
RUN conda install pytorch=1.12.1 cudatoolkit=11.6 -c pytorch -c conda-forge && \
conda clean -ya
RUN cd /tmp && \
......@@ -114,13 +77,9 @@ RUN cd /tmp && \
RUN pip install --no-cache-dir tokenizers lmdb biopython ml-collections timeout-decorator urllib3 tree dm-tree
ENV LD_LIBRARY_PATH=/usr/local/nccl-rdma-sharp-plugins/lib:$LD_LIBRARY_PATH
ENV PATH=/usr/mpi/gcc/openmpi-4.1.0rc5/bin:$PATH
ENV LD_LIBRARY_PATH=/usr/mpi/gcc/openmpi-4.1.0rc5/lib:$LD_LIBRARY_PATH
RUN ldconfig && \
apt-get clean && \
apt-get autoremove && \
rm -rf /var/lib/apt/lists/* /tmp/* && \
conda clean -ya
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment