Commit 76fee8b6 authored by Guolin Ke's avatar Guolin Ke
Browse files

update docker file

parent f24a5f70
...@@ -25,16 +25,16 @@ jobs: ...@@ -25,16 +25,16 @@ jobs:
username: ${{ secrets.DOCKERHUB_USERNAME }} username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }} password: ${{ secrets.DOCKERHUB_TOKEN }}
- -
name: Build and push cu113 name: Build and push cu116
uses: docker/build-push-action@v3 uses: docker/build-push-action@v3
with: with:
context: ./docker/cu113/ context: ./docker/cu116/
push: true push: true
tags: dptechnology/unicore:${{ github.ref_name }}-pytorch1.11.0-cuda11.3 tags: dptechnology/unicore:${{ github.ref_name }}-pytorch1.12.1-cuda11.6
- -
name: Build and push cu116 name: Build and push cu116 with rdma
uses: docker/build-push-action@v3 uses: docker/build-push-action@v3
with: with:
context: ./docker/cu116/ context: ./docker/rdma/
push: true push: true
tags: dptechnology/unicore:${{ github.ref_name }}-pytorch1.12.1-cuda11.6 tags: dptechnology/unicore:${{ github.ref_name }}-pytorch1.12.1-cuda11.6-rdma
...@@ -25,16 +25,16 @@ jobs: ...@@ -25,16 +25,16 @@ jobs:
username: ${{ secrets.DOCKERHUB_USERNAME }} username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }} password: ${{ secrets.DOCKERHUB_TOKEN }}
- -
name: Build and push cu113 name: Build and push cu116
uses: docker/build-push-action@v3 uses: docker/build-push-action@v3
with: with:
context: ./docker/cu113/ context: ./docker/cu116/
push: true push: true
tags: dptechnology/unicore:latest-pytorch1.11.0-cuda11.3 tags: dptechnology/unicore:latest-pytorch1.12.1-cuda11.6
- -
name: Build and push cu116 name: Build and push cu116 with rdma
uses: docker/build-push-action@v3 uses: docker/build-push-action@v3
with: with:
context: ./docker/cu116/ context: ./docker/rdma/
push: true push: true
tags: dptechnology/unicore:latest-pytorch1.12.1-cuda11.6 tags: dptechnology/unicore:latest-pytorch1.12.1-cuda11.6-rdma
FROM nvidia/cuda:11.3.0-cudnn8-devel-ubuntu20.04 FROM nvcr.io/nvidia/pytorch:22.04-py3
ENV LANG C.UTF-8
ENV OFED_VERSION=5.3-1.0.0.1
RUN APT_INSTALL="apt-get install -y --no-install-recommends" && \ RUN APT_INSTALL="apt-get install -y --no-install-recommends" && \
rm -rf /var/lib/apt/lists/* \ rm -rf /var/lib/apt/lists/* \
...@@ -52,58 +49,24 @@ RUN APT_INSTALL="apt-get install -y --no-install-recommends" && \ ...@@ -52,58 +49,24 @@ RUN APT_INSTALL="apt-get install -y --no-install-recommends" && \
debhelper \ debhelper \
nfs-common nfs-common
# ================================================================== RUN pip uninstall -y torch && \
# InfiniBand & RDMA pip uninstall -y torch && \
# ------------------------------------------------------------------ conda clean -ya
RUN cd /tmp && \
wget -q http://content.mellanox.com/ofed/MLNX_OFED-${OFED_VERSION}/MLNX_OFED_LINUX-${OFED_VERSION}-ubuntu20.04-x86_64.tgz && \
tar xzf MLNX_OFED_LINUX-${OFED_VERSION}-ubuntu20.04-x86_64.tgz && \
MLNX_OFED_LINUX-${OFED_VERSION}-ubuntu20.04-x86_64/mlnxofedinstall --user-space-only --without-fw-update --force --all && \
rm -rf /tmp/MLNX_OFED_LINUX-${OFED_VERSION}*
RUN cd /tmp && \
mkdir -p /usr/local/nccl-rdma-sharp-plugins && \
DEBIAN_FRONTEND=noninteractive apt install -y zlib1g-dev && \
git clone --depth=1 https://github.com/Mellanox/nccl-rdma-sharp-plugins.git && \
cd nccl-rdma-sharp-plugins && \
./autogen.sh && \
./configure --prefix=/usr/local/nccl-rdma-sharp-plugins --with-cuda=/usr/local/cuda && \
make && \
make install
# ==================================================================
# python
# ------------------------------------------------------------------
# Set timezone
RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime
ENV PATH /usr/local/nvidia/bin:/usr/local/nvidia/lib64:$PATH
ENV LD_LIBRARY_PATH /usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
ENV PYTHON_VERSION=3.8
RUN wget -O ~/miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
chmod +x ~/miniconda.sh && \
~/miniconda.sh -b -p /opt/conda && \
rm ~/miniconda.sh
ENV PATH /opt/conda/bin:$PATH
RUN conda install -y python=3.8 && conda clean -ya
RUN conda install -y scipy scikit-learn pyyaml tensorboard tensorboardX && \ RUN conda install -y scipy scikit-learn pyyaml tensorboard tensorboardX && \
conda clean -ya conda clean -ya
RUN ldconfig # RUN ldconfig
# ================================================================== # # ==================================================================
# pytorch # # pytorch
# ------------------------------------------------------------------ # # ------------------------------------------------------------------
ENV TORCH_CUDA_ARCH_LIST "7.0;7.5;8.0" ENV TORCH_CUDA_ARCH_LIST "7.0;7.5;8.0"
RUN conda install -y numpy pyyaml scipy ipython mkl mkl-include ninja cython typing pandas && \ RUN conda install -y numpy pyyaml scipy ipython mkl mkl-include ninja cython typing pandas && \
conda clean -ya conda clean -ya
RUN conda install pytorch=1.11.0 cudatoolkit=11.3 -c pytorch && \ RUN conda install pytorch=1.12.1 cudatoolkit=11.6 -c pytorch -c conda-forge && \
conda clean -ya conda clean -ya
RUN cd /tmp && \ RUN cd /tmp && \
...@@ -114,13 +77,9 @@ RUN cd /tmp && \ ...@@ -114,13 +77,9 @@ RUN cd /tmp && \
RUN pip install --no-cache-dir tokenizers lmdb biopython ml-collections timeout-decorator urllib3 tree dm-tree RUN pip install --no-cache-dir tokenizers lmdb biopython ml-collections timeout-decorator urllib3 tree dm-tree
ENV LD_LIBRARY_PATH=/usr/local/nccl-rdma-sharp-plugins/lib:$LD_LIBRARY_PATH
ENV PATH=/usr/mpi/gcc/openmpi-4.1.0rc5/bin:$PATH
ENV LD_LIBRARY_PATH=/usr/mpi/gcc/openmpi-4.1.0rc5/lib:$LD_LIBRARY_PATH
RUN ldconfig && \ RUN ldconfig && \
apt-get clean && \ apt-get clean && \
apt-get autoremove && \ apt-get autoremove && \
rm -rf /var/lib/apt/lists/* /tmp/* && \ rm -rf /var/lib/apt/lists/* /tmp/* && \
conda clean -ya conda clean -ya
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment