Dockerfile.app.sglang.vllm.mcore0.12 1.88 KB
Newer Older
jerrrrry's avatar
jerrrrry committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# Start from the verl base image
# Dockerfile.base
FROM verlai/verl:base-verl0.4-cu124-cudnn9.8-torch2.6-fa2.7.4

# Define environments
ENV MAX_JOBS=32
ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
ENV DEBIAN_FRONTEND=noninteractive
ENV NODE_OPTIONS=""
ENV PIP_ROOT_USER_ACTION=ignore
ENV HF_HUB_ENABLE_HF_TRANSFER="1"

# Install sglang-0.4.6.post5 and torch-memory-saver
RUN pip install --resume-retries 999 "sglang[all]==0.4.6.post5" --no-cache-dir --find-links https://flashinfer.ai/whl/cu124/torch2.6/flashinfer-python && pip install torch-memory-saver --no-cache-dir

# Some sglang operations in 0.4.6.post5 require vllm
# [Warning] vllm can have some packages not compatible with sglang, for example, flashinfer
RUN pip install --resume-retries 999 --no-cache-dir vllm==0.8.5.post1

# Fix packages
RUN pip install --no-cache-dir "tensordict==0.6.2" "transformers[hf_xet]>=4.51.0" accelerate datasets peft hf-transfer \
    "numpy<2.0.0" "pyarrow>=19.0.1" pandas \
    ray[default] codetiming hydra-core pylatexenc qwen-vl-utils wandb dill pybind11 liger-kernel mathruler blobfile xgrammar \
    pytest py-spy pyext pre-commit ruff

RUN pip uninstall -y pynvml nvidia-ml-py && \
    pip install --resume-retries 999 --no-cache-dir --upgrade "nvidia-ml-py>=12.560.30" "fastapi[standard]>=0.115.0" "optree>=0.13.0" "pydantic>=2.9" "grpcio>=1.62.1"

RUN pip install --resume-retries 999 --no-cache-dir nvidia-cudnn-cu12==9.8.0.87

# Install TransformerEngine
RUN export NVTE_FRAMEWORK=pytorch && pip3 install --resume-retries 999 --no-deps --no-cache-dir --no-build-isolation git+https://github.com/NVIDIA/TransformerEngine.git@v2.2.1

# Install Megatron-LM
RUN pip3 install --no-deps --no-cache-dir --no-build-isolation git+https://github.com/NVIDIA/Megatron-LM.git@core_v0.12.2

# Fix for transformers 4.53.0
RUN pip3 install --no-cache-dir "transformers[hf_xet]<4.52.0"

# Install mbridge
RUN pip3 install --no-cache-dir mbridge