ARG BASE_IMAGE FROM ${BASE_IMAGE} # Deps RUN apt-get update && apt-get install -y netcat-openbsd \ libopenmpi-dev \ kmod \ rdma-core \ infiniband-diags \ openssh-server \ perftest \ ibverbs-providers \ libibumad3 \ libibverbs1 \ libnl-3-200 \ libnl-route-3-200 \ librdmacm1 \ build-essential \ cmake \ libibverbs-dev \ libgoogle-glog-dev \ libgtest-dev \ libjsoncpp-dev \ libnuma-dev \ libibverbs-dev \ libunwind-dev \ libgoogle-glog-dev \ libpython3-dev \ libboost-all-dev \ libssl-dev \ libgrpc-dev \ libgrpc++-dev \ libprotobuf-dev \ protobuf-compiler-grpc \ pybind11-dev \ libhiredis-dev \ pkg-config \ patchelf \ ccache \ libcurl4-openssl-dev \ curl \ pkg-config libczmq4 libczmq-dev \ libnl-route-3-dev libnl-3-dev librdmacm1 \ libhiredis-dev \ nvidia-dkms-535 \ build-essential \ devscripts \ debhelper \ fakeroot \ dkms \ check \ libsubunit0 \ libsubunit-dev \ libfabric-dev \ python3 \ python3-pip \ && rm -rf /var/lib/apt/lists/* \ && ln -s /usr/bin/python3 /usr/bin/python # CMake RUN wget https://github.com/Kitware/CMake/releases/download/v3.27.4/cmake-3.27.4-linux-x86_64.sh \ && chmod +x cmake-3.27.4-linux-x86_64.sh \ && ./cmake-3.27.4-linux-x86_64.sh --skip-license --prefix=/usr/local \ && rm cmake-3.27.4-linux-x86_64.sh ENV GDRCOPY_HOME=/usr/src/gdrdrv-2.4.4/ # GDRCopy RUN mkdir -p /tmp \ && cd /tmp \ && git clone https://github.com/NVIDIA/gdrcopy.git -b v2.4.4 \ && cd /tmp/gdrcopy/packages \ && CUDA=/usr/local/cuda ./build-deb-packages.sh \ && dpkg -i gdrdrv-dkms_*.deb \ && dpkg -i libgdrapi_*.deb \ && dpkg -i gdrcopy-tests_*.deb \ && dpkg -i gdrcopy_*.deb # IBGDA dependency RUN ln -sf /usr/lib/x86_64-linux-gnu/libmlx5.so.1 /usr/lib/x86_64-linux-gnu/libmlx5.so # DeepEP WORKDIR /sgl-workspace RUN git clone https://github.com/deepseek-ai/DeepEP.git # NVSHMEM RUN wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.2.5/source/nvshmem_src_3.2.5-1.txz RUN tar -xf nvshmem_src_3.2.5-1.txz \ && mv nvshmem_src nvshmem \ && cd /sgl-workspace/nvshmem \ && git apply /sgl-workspace/DeepEP/third-party/nvshmem.patch \ && sed -i '1i#include ' /sgl-workspace/nvshmem/examples/moe_shuffle.cu \ && cat /sgl-workspace/nvshmem/examples/moe_shuffle.cu # Compile NVSHMEM ENV CUDA_HOME=/usr/local/cuda RUN cd /sgl-workspace/nvshmem && NVSHMEM_SHMEM_SUPPORT=0 \ NVSHMEM_UCX_SUPPORT=0 \ NVSHMEM_USE_NCCL=0 \ NVSHMEM_MPI_SUPPORT=0 \ NVSHMEM_IBGDA_SUPPORT=1 \ NVSHMEM_PMIX_SUPPORT=0 \ NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \ NVSHMEM_USE_GDRCOPY=1 \ cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=/sgl-workspace/nvshmem/install -DCMAKE_CUDA_ARCHITECTURES=90 \ && cd build \ && make install -j WORKDIR /sgl-workspace/DeepEP ENV NVSHMEM_DIR=/sgl-workspace/nvshmem/install RUN NVSHMEM_DIR=/sgl-workspace/nvshmem/install pip install --break-system-packages . # Install mooncake transfer engine RUN pip install --upgrade mooncake_transfer_engine --break-system-packages