Bootstrap: docker # Support - Traing: fsdp; Inference: vllm # FROM: rocm/vllm:rocm6.2_mi300_ubuntu20.04_py3.9_vllm_0.6.4 # Support - Traing: fsdp; Inference: vllm, sglang FROM lmsysorg/sglang:v0.4.5-rocm630 %environment export PYTORCH_ROCM_ARCH="gfx90a;gfx942" export HIPCC_COMPILE_FLAGS_APPEND="--amdgpu-target=gfx90a;gfx942 -D__HIP_PLATFORM_AMD__" export CFLAGS="-D__HIP_PLATFORM_AMD__" export CXXFLAGS="-D__HIP_PLATFORM_AMD__" %post # Create source directory mkdir -p /opt/src # Uninstall and reinstall vllm pip uninstall -y vllm cd /opt/src git clone -b v0.6.3 https://github.com/vllm-project/vllm.git cd vllm MAX_JOBS=$(nproc) python3 setup.py install cd /opt rm -rf /opt/src/vllm # Install dependencies pip install "tensordict<0.6" --no-deps pip install accelerate \ codetiming \ datasets \ dill \ hydra-core \ liger-kernel \ numpy \ pandas \ peft \ "pyarrow>=15.0.0" \ pylatexenc \ "ray[data,train,tune,serve]" \ torchdata \ transformers \ wandb \ orjson \ pybind11 # Clone and install verl from GitHub cd /opt git clone https://github.com/volcengine/verl.git cd verl # Uncomment to use a specific version # git checkout v0.3.0.post0 pip install -e . --no-deps # Install torch_memory_saver pip install git+https://github.com/ExtremeViscent/torch_memory_saver.git --no-deps