Dockerfile.dev-deepep 2.33 KB
Newer Older
fzyzcjy's avatar
fzyzcjy committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
FROM lmsysorg/sglang:dev

# CMake
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
build-essential \
wget \
libssl-dev \
&& wget https://github.com/Kitware/CMake/releases/download/v3.27.4/cmake-3.27.4-linux-x86_64.sh \
&& chmod +x cmake-3.27.4-linux-x86_64.sh \
&& ./cmake-3.27.4-linux-x86_64.sh --skip-license --prefix=/usr/local \
&& rm cmake-3.27.4-linux-x86_64.sh

# Python
RUN apt-get update \
    && apt-get install -y --no-install-recommends \
        python3 \
        python3-pip \
    && ln -s /usr/bin/python3 /usr/bin/python

# GDRCopy
WORKDIR /tmp
RUN git clone https://github.com/NVIDIA/gdrcopy.git
WORKDIR /tmp/gdrcopy
RUN git checkout v2.4.4

RUN apt update
RUN apt install -y nvidia-dkms-535
RUN apt install -y build-essential devscripts debhelper fakeroot pkg-config dkms
RUN apt install -y check libsubunit0 libsubunit-dev

WORKDIR /tmp/gdrcopy/packages
RUN CUDA=/usr/local/cuda ./build-deb-packages.sh
RUN dpkg -i gdrdrv-dkms_*.deb
RUN dpkg -i libgdrapi_*.deb
RUN dpkg -i gdrcopy-tests_*.deb
RUN dpkg -i gdrcopy_*.deb

ENV GDRCOPY_HOME=/usr/src/gdrdrv-2.4.4/

# IBGDA dependency
RUN ln -s /usr/lib/x86_64-linux-gnu/libmlx5.so.1 /usr/lib/x86_64-linux-gnu/libmlx5.so
RUN apt-get install -y libfabric-dev

# DeepEP
WORKDIR /sgl-workspace
RUN git clone https://github.com/deepseek-ai/DeepEP.git

# NVSHMEM
WORKDIR /sgl-workspace
RUN wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.2.5/source/nvshmem_src_3.2.5-1.txz
RUN tar -xf nvshmem_src_3.2.5-1.txz \
    && mv nvshmem_src nvshmem

WORKDIR /sgl-workspace/nvshmem
RUN git apply /sgl-workspace/DeepEP/third-party/nvshmem.patch

WORKDIR /sgl-workspace/nvshmem
ENV CUDA_HOME=/usr/local/cuda
RUN NVSHMEM_SHMEM_SUPPORT=0 \
    NVSHMEM_UCX_SUPPORT=0 \
    NVSHMEM_USE_NCCL=0 \
    NVSHMEM_MPI_SUPPORT=0 \
    NVSHMEM_IBGDA_SUPPORT=1 \
    NVSHMEM_PMIX_SUPPORT=0 \
    NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \
    NVSHMEM_USE_GDRCOPY=1 \
    cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=/sgl-workspace/nvshmem/install -DCMAKE_CUDA_ARCHITECTURES=90 \
    && cd build \
    && make install -j

WORKDIR /sgl-workspace/DeepEP
ENV NVSHMEM_DIR=/sgl-workspace/nvshmem/install
RUN NVSHMEM_DIR=/sgl-workspace/nvshmem/install pip install .

RUN set -euxo ; \
    curl --proto '=https' --tlsv1.2 -sSf https://just.systems/install.sh | bash -s -- --to /usr/local/bin

# Set workspace
WORKDIR /sgl-workspace