# Start from the verl base image # Dockerfile.base FROM verlai/verl:base-verl0.4-cu124-cudnn9.8-torch2.6-fa2.7.4 # Define environments ENV MAX_JOBS=32 ENV VLLM_WORKER_MULTIPROC_METHOD=spawn ENV DEBIAN_FRONTEND=noninteractive ENV NODE_OPTIONS="" ENV PIP_ROOT_USER_ACTION=ignore ENV HF_HUB_ENABLE_HF_TRANSFER="1" # Install torch-2.6.0+cu124 + vllm-0.8.5.post1 # torch-2.6.0+cu124: cxx11abi=False # torch-2.6.0+cu126: cxx11abi=True # see https://github.com/flashinfer-ai/flashinfer/issues/911 RUN pip install --resume-retries 999 --no-cache-dir vllm==0.8.5.post1 # Install flashinfer-0.2.2.post1+cu126 (cxx11abi=True) # vllm-0.8.3 does not support flashinfer>=0.2.3 # see https://github.com/vllm-project/vllm/pull/15777 RUN aria2c --max-tries=9999 https://github.com/flashinfer-ai/flashinfer/releases/download/v0.2.2.post1/flashinfer_python-0.2.2.post1+cu124torch2.6-cp38-abi3-linux_x86_64.whl && \ pip install --no-cache-dir flashinfer_python-0.2.2.post1+cu124torch2.6-cp38-abi3-linux_x86_64.whl && \ rm flashinfer_python-0.2.2.post1+cu124torch2.6-cp38-abi3-linux_x86_64.whl # Fix packages RUN pip install --no-cache-dir "tensordict==0.6.2" "transformers[hf_xet]>=4.51.0" accelerate datasets peft hf-transfer \ "numpy<2.0.0" "pyarrow>=19.0.1" pandas \ ray[default] codetiming hydra-core pylatexenc qwen-vl-utils wandb dill pybind11 liger-kernel mathruler blobfile xgrammar \ pytest py-spy pyext pre-commit ruff RUN pip uninstall -y pynvml nvidia-ml-py && \ pip install --resume-retries 999 --no-cache-dir --upgrade "nvidia-ml-py>=12.560.30" "fastapi[standard]>=0.115.0" "optree>=0.13.0" "pydantic>=2.9" "grpcio>=1.62.1" RUN pip install --resume-retries 999 --no-cache-dir nvidia-cudnn-cu12==9.8.0.87 # Install TransformerEngine RUN export NVTE_FRAMEWORK=pytorch && pip3 install --resume-retries 999 --no-deps --no-cache-dir --no-build-isolation git+https://github.com/NVIDIA/TransformerEngine.git@v2.2.1 # Install Megatron-LM RUN pip3 install --no-deps --no-cache-dir --no-build-isolation git+https://github.com/NVIDIA/Megatron-LM.git@core_v0.12.2 # Fix for transformers 4.53.0 RUN pip3 install --no-cache-dir "transformers[hf_xet]<4.52.0" # Install mbridge RUN pip3 install --no-cache-dir mbridge