Dockerfile.megatron

FROM hiyouga/verl:ngc-th2.6.0-cu120-vllm0.8.2

# Define environments
ENV MAX_JOBS=64

RUN apt-get update && \
    apt-get install -y aria2

# 1. Reinstall CUDA 12.4
RUN aria2c https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-ubuntu2204.pin && \
    mv cuda-ubuntu2204.pin /etc/apt/preferences.d/cuda-repository-pin-600

RUN aria2c --always-resume=true --max-tries=99999 https://developer.download.nvidia.com/compute/cuda/12.4.1/local_installers/cuda-repo-ubuntu2204-12-4-local_12.4.1-550.54.15-1_amd64.deb

RUN dpkg -i cuda-repo-ubuntu2204-12-4-local_12.4.1-550.54.15-1_amd64.deb

RUN cp /var/cuda-repo-ubuntu2204-12-4-local/cuda-*-keyring.gpg /usr/share/keyrings/

RUN apt-get update

RUN apt-get -y install cuda-toolkit-12-4

RUN rm cuda-repo-ubuntu2204-12-4-local_12.4.1-550.54.15-1_amd64.deb

RUN update-alternatives --set cuda /usr/local/cuda-12.4

# 2. Reinstall Flash attn 2.7.3
RUN pip uninstall -y flash-attn && \
    wget -nv https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu12torch2.6cxx11abiFALSE-cp310-cp310-linux_x86_64.whl && \
    pip install --no-cache-dir flash_attn-2.7.3+cu12torch2.6cxx11abiFALSE-cp310-cp310-linux_x86_64.whl && \
    rm flash_attn-2.7.3+cu12torch2.6cxx11abiFALSE-cp310-cp310-linux_x86_64.whl 

# 3. Install Apex
RUN git clone https://github.com/NVIDIA/apex.git && \
    cd apex && \
    pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" ./

# 4. Install TransformerEngine
RUN export NVTE_FRAMEWORK=pytorch && pip3 install --no-deps git+https://github.com/NVIDIA/TransformerEngine.git@v2.0

# 5. Install Megatron-LM
RUN pip3 install git+https://github.com/NVIDIA/Megatron-LM.git@v0.11.0