Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
906dbc34
Unverified
Commit
906dbc34
authored
Jun 20, 2025
by
ybyang
Committed by
GitHub
Jun 19, 2025
Browse files
[Docker] optimize dockerfile remove deepep and blackwell merge it to… (#7343)
Co-authored-by:
Yineng Zhang
<
me@zhyncs.com
>
parent
fadf18fd
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
104 additions
and
458 deletions
+104
-458
.github/workflows/release-docker-blackwell.yml
.github/workflows/release-docker-blackwell.yml
+0
-36
.github/workflows/release-docker-deepep.yml
.github/workflows/release-docker-deepep.yml
+0
-47
.github/workflows/release-docker.yml
.github/workflows/release-docker.yml
+15
-4
docker/Dockerfile
docker/Dockerfile
+89
-42
docker/Dockerfile.blackwell
docker/Dockerfile.blackwell
+0
-215
docker/Dockerfile.deepep
docker/Dockerfile.deepep
+0
-114
No files found.
.github/workflows/release-docker-blackwell.yml
deleted
100644 → 0
View file @
fadf18fd
name
:
Build Blackwell Docker Image
on
:
workflow_dispatch
:
schedule
:
-
cron
:
'
0
0
*
*
*'
jobs
:
build-dev
:
if
:
${{ github.repository == 'sgl-project/sglang' }}
runs-on
:
ubuntu-22.04
steps
:
-
name
:
Checkout repository
uses
:
actions/checkout@v4
-
name
:
Free disk space
uses
:
jlumbroso/free-disk-space@main
with
:
tool-cache
:
false
docker-images
:
false
android
:
true
dotnet
:
true
haskell
:
true
large-packages
:
true
swap-storage
:
false
-
name
:
Login to Docker Hub
uses
:
docker/login-action@v2
with
:
username
:
${{ secrets.DOCKERHUB_USERNAME }}
password
:
${{ secrets.DOCKERHUB_TOKEN }}
-
name
:
Build and Push Blackwell Image
run
:
|
docker buildx build --output type=image,compression=zstd . -f docker/Dockerfile.blackwell -t lmsysorg/sglang:blackwell --no-cache
docker push lmsysorg/sglang:blackwell
.github/workflows/release-docker-deepep.yml
deleted
100644 → 0
View file @
fadf18fd
name
:
Build DeepEP Docker Image
on
:
workflow_dispatch
:
schedule
:
-
cron
:
'
0
0
*
*
*'
jobs
:
build-dev
:
if
:
${{ github.repository == 'sgl-project/sglang' }}
runs-on
:
ubuntu-22.04
strategy
:
matrix
:
variant
:
-
base
:
lmsysorg/sglang:latest
tag
:
deepep
-
base
:
lmsysorg/sglang:dev
tag
:
dev-deepep
-
base
:
lmsysorg/sglang:blackwell
tag
:
blackwell-deepep
steps
:
-
name
:
Checkout repository
uses
:
actions/checkout@v4
-
name
:
Free disk space
uses
:
jlumbroso/free-disk-space@main
with
:
tool-cache
:
false
docker-images
:
false
android
:
true
dotnet
:
true
haskell
:
true
large-packages
:
true
swap-storage
:
false
-
name
:
Login to Docker Hub
uses
:
docker/login-action@v2
with
:
username
:
${{ secrets.DOCKERHUB_USERNAME }}
password
:
${{ secrets.DOCKERHUB_TOKEN }}
-
name
:
Build and Push Docker Image
run
:
|
docker build . -f docker/Dockerfile.deepep --build-arg BASE_IMAGE=${{ matrix.variant.base }} -t lmsysorg/sglang:${{ matrix.variant.tag }} --no-cache
docker push lmsysorg/sglang:${{ matrix.variant.tag }}
.github/workflows/release-docker.yml
View file @
906dbc34
...
@@ -14,8 +14,13 @@ jobs:
...
@@ -14,8 +14,13 @@ jobs:
environment
:
'
prod'
environment
:
'
prod'
strategy
:
strategy
:
matrix
:
matrix
:
cuda_version
:
[
'
12.4.1'
]
cuda_version
:
[
'
12.6.1'
,
'
12.8.1'
]
build_type
:
[
'
all'
]
build_type
:
[
'
all'
,
'
blackwell'
]
exclude
:
-
cuda_version
:
'
12.6.1'
build_type
:
'
blackwell'
-
cuda_version
:
'
12.8.1'
build_type
:
'
all'
steps
:
steps
:
-
name
:
Delete huge unnecessary tools folder
-
name
:
Delete huge unnecessary tools folder
run
:
rm -rf /opt/hostedtoolcache
run
:
rm -rf /opt/hostedtoolcache
...
@@ -41,6 +46,10 @@ jobs:
...
@@ -41,6 +46,10 @@ jobs:
cuda_tag="cu124"
cuda_tag="cu124"
elif [ "${{ matrix.cuda_version }}" = "12.5.1" ]; then
elif [ "${{ matrix.cuda_version }}" = "12.5.1" ]; then
cuda_tag="cu125"
cuda_tag="cu125"
elif [ "${{ matrix.cuda_version }}" = "12.6.1" ]; then
cuda_tag="cu126"
elif [ "${{ matrix.cuda_version }}" = "12.8.1" ]; then
cuda_tag="cu128"
else
else
echo "Unsupported CUDA version"
echo "Unsupported CUDA version"
exit 1
exit 1
...
@@ -52,15 +61,17 @@ jobs:
...
@@ -52,15 +61,17 @@ jobs:
tag_suffix=""
tag_suffix=""
elif [ "${{ matrix.build_type }}" = "srt" ]; then
elif [ "${{ matrix.build_type }}" = "srt" ]; then
tag_suffix="-srt"
tag_suffix="-srt"
elif [ "${{ matrix.build_type }}" = "blackwell" ]; then
tag_suffix="-b200"
else
else
echo "Unsupported build type"
echo "Unsupported build type"
exit 1
exit 1
fi
fi
docker build . -f docker/Dockerfile --build-arg CUDA_VERSION=${{ matrix.cuda_version }} --build-arg BUILD_TYPE=${{ matrix.build_type }} -t lmsysorg/sglang:${tag}${tag_suffix} --no-cache
docker build
x build --output type=image,compression=zstd
. -f docker/Dockerfile --build-arg CUDA_VERSION=${{ matrix.cuda_version }} --build-arg BUILD_TYPE=${{ matrix.build_type }} -t lmsysorg/sglang:${tag}${tag_suffix} --no-cache
docker push lmsysorg/sglang:${tag}${tag_suffix}
docker push lmsysorg/sglang:${tag}${tag_suffix}
if [ "${{ matrix.cuda_version }}" = "12.
4
.1" ]; then
if [ "${{ matrix.cuda_version }}" = "12.
6
.1" ]; then
docker tag lmsysorg/sglang:${tag}${tag_suffix} lmsysorg/sglang:latest${tag_suffix}
docker tag lmsysorg/sglang:${tag}${tag_suffix} lmsysorg/sglang:latest${tag_suffix}
docker push lmsysorg/sglang:latest${tag_suffix}
docker push lmsysorg/sglang:latest${tag_suffix}
fi
fi
docker/Dockerfile
View file @
906dbc34
ARG
CUDA_VERSION=12.4.1
ARG
CUDA_VERSION=12.6.1
FROM
nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu22.04
FROM
nvcr.io/nvidia/tritonserver:24.12-py3-min
ARG
BUILD_TYPE=all
ARG
BUILD_TYPE=all
ENV
DEBIAN_FRONTEND=noninteractive
ENV
DEBIAN_FRONTEND=noninteractive \
CUDA_HOME=/usr/local/cuda \
GDRCOPY_HOME=/usr/src/gdrdrv-2.4.4/ \
NVSHMEM_DIR=/sgl-workspace/nvshmem/install
# Set timezone and install all packages
RUN
echo
'tzdata tzdata/Areas select America'
| debconf-set-selections
\
RUN
echo
'tzdata tzdata/Areas select America'
| debconf-set-selections
\
&&
echo
'tzdata tzdata/Zones/America select Los_Angeles'
| debconf-set-selections
\
&&
echo
'tzdata tzdata/Zones/America select Los_Angeles'
| debconf-set-selections
\
&&
apt update
-y
\
&&
apt-get update
&&
apt-get
install
-y
--no-install-recommends
\
&&
apt
install
software-properties-common
-y
\
tzdata
\
&&
apt
install
python3 python3-pip
-y
\
software-properties-common netcat-openbsd kmod unzip openssh-server
\
&&
apt
install
curl git
sudo
libibverbs-dev
-y
\
curl wget lsof zsh ccache tmux htop git-lfs tree
\
&&
apt
install
rdma-core infiniband-diags openssh-server perftest
-y
\
python3 python3-pip python3-dev libpython3-dev
\
&&
python3
--version
\
build-essential cmake
\
&&
python3
-m
pip
--version
\
libopenmpi-dev libnuma1 libnuma-dev
\
&&
rm
-rf
/var/lib/apt/lists/
*
\
libibverbs-dev libibverbs1 libibumad3
\
&&
apt clean
librdmacm1 libnl-3-200 libnl-route-3-200 libnl-route-3-dev libnl-3-dev
\
ibverbs-providers infiniband-diags perftest
\
# For openbmb/MiniCPM models
libgoogle-glog-dev libgtest-dev libjsoncpp-dev libunwind-dev
\
RUN
pip3
install
datamodel_code_generator
--break-system-packages
libboost-all-dev libssl-dev
\
libgrpc-dev libgrpc++-dev libprotobuf-dev protobuf-compiler-grpc
\
pybind11-dev
\
libhiredis-dev libcurl4-openssl-dev
\
libczmq4 libczmq-dev
\
libfabric-dev
\
patchelf
\
nvidia-dkms-550
\
devscripts debhelper fakeroot dkms check libsubunit0 libsubunit-dev
\
&&
ln
-sf
/usr/bin/python3 /usr/bin/python
\
&&
rm
-rf
/var/lib/apt/lists/
*
\
&&
apt-get clean
# GDRCopy installation
RUN
mkdir
-p
/tmp/gdrcopy
&&
cd
/tmp
\
&&
git clone https://github.com/NVIDIA/gdrcopy.git
-b
v2.4.4
\
&&
cd
gdrcopy/packages
\
&&
CUDA
=
/usr/local/cuda ./build-deb-packages.sh
\
&&
dpkg
-i
gdrdrv-dkms_
*
.deb libgdrapi_
*
.deb gdrcopy-tests_
*
.deb gdrcopy_
*
.deb
\
&&
cd
/
&&
rm
-rf
/tmp/gdrcopy
# Fix DeepEP IBGDA symlink
RUN
ln
-sf
/usr/lib/x86_64-linux-gnu/libmlx5.so.1 /usr/lib/x86_64-linux-gnu/libmlx5.so
# Clone and install SGLang
WORKDIR
/sgl-workspace
WORKDIR
/sgl-workspace
RUN
python3
-m
pip
install
--no-cache-dir
--upgrade
pip setuptools wheel html5lib six
\
&&
git clone
--depth
=
1 https://github.com/sgl-project/sglang.git
\
&&
cd
sglang
\
&&
case
"
$CUDA_VERSION
"
in
\
12.6.1
)
CUINDEX
=
126
;;
\
12.8.1
)
CUINDEX
=
128
;;
\
*
)
echo
"Unsupported CUDA version:
$CUDA_VERSION
"
&&
exit
1
;;
\
esac
\
&&
python3
-m
pip
install
--no-cache-dir
-e
"python[
${
BUILD_TYPE
}
]"
--extra-index-url
https://download.pytorch.org/whl/cu
${
CUINDEX
}
\
&&
if
[
"
$CUDA_VERSION
"
=
"12.8.1"
]
;
then
\
python3
-m
pip
install
--no-cache-dir
nvidia-nccl-cu12
==
2.27.3
--force-reinstall
--no-deps
;
\
python3
-m
pip
install
--no-cache-dir
https://github.com/sgl-project/whl/releases/download/v0.1.9/sgl_kernel-0.1.9+cu128-cp39-abi3-manylinux2014_x86_64.whl
--force-reinstall
--no-deps
;
\
fi
# Build and install NVSHMEM + DeepEP
RUN
wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.2.5/source/nvshmem_src_3.2.5-1.txz
\
&&
git clone https://github.com/deepseek-ai/DeepEP.git
\
&&
tar
-xf
nvshmem_src_3.2.5-1.txz
&&
mv
nvshmem_src nvshmem
\
&&
cd
nvshmem
\
&&
git apply /sgl-workspace/DeepEP/third-party/nvshmem.patch
\
&&
sed
-i
'1i#include <unistd.h>'
examples/moe_shuffle.cu
\
&&
rm
-f
/sgl-workspace/nvshmem_src_3.2.5-1.txz
\
&&
NVSHMEM_SHMEM_SUPPORT
=
0
\
NVSHMEM_UCX_SUPPORT
=
0
\
NVSHMEM_USE_NCCL
=
0
\
NVSHMEM_MPI_SUPPORT
=
0
\
NVSHMEM_IBGDA_SUPPORT
=
1
\
NVSHMEM_PMIX_SUPPORT
=
0
\
NVSHMEM_TIMEOUT_DEVICE_POLLING
=
0
\
NVSHMEM_USE_GDRCOPY
=
1
\
cmake
-S
.
-B
build/
-DCMAKE_INSTALL_PREFIX
=
${
NVSHMEM_DIR
}
-DCMAKE_CUDA_ARCHITECTURES
=
90
\
&&
cmake
--build
build
--target
install
-j
\
&&
cd
/sgl-workspace/DeepEP
\
&&
NVSHMEM_DIR
=
${
NVSHMEM_DIR
}
pip
install
.
ARG
CUDA_VERSION
# Python tools
RUN
python3
-m
pip
install
--upgrade
pip setuptools wheel html5lib six
--break-system-packages
--ignore-installed
\
RUN
python3
-m
pip
install
--no-cache-dir
\
&&
git clone
--depth
=
1 https://github.com/sgl-project/sglang.git
\
datamodel_code_generator
\
&&
if
[
"
$CUDA_VERSION
"
=
"12.1.1"
]
;
then
\
mooncake_transfer_engine
==
0.3.3.post2
\
export
CUINDEX
=
121
;
\
pre-commit
\
elif
[
"
$CUDA_VERSION
"
=
"12.4.1"
]
;
then
\
pytest
\
export
CUINDEX
=
124
;
\
black
\
elif
[
"
$CUDA_VERSION
"
=
"12.8.1"
]
;
then
\
isort
\
export
CUINDEX
=
124
;
\
icdiff
\
elif
[
"
$CUDA_VERSION
"
=
"11.8.0"
]
;
then
\
uv
\
export
CUINDEX
=
118
;
\
wheel
\
python3
-m
pip
install
--no-cache-dir
sgl-kernel
-i
https://docs.sglang.ai/whl/cu118
--break-system-packages
;
\
scikit-build-core
else
\
echo
"Unsupported CUDA version:
$CUDA_VERSION
"
&&
exit
1
;
\
fi
\
&&
if
[
"
$CUDA_VERSION
"
=
"12.4.1"
]
;
then
\
python3
-m
pip
install
--no-cache-dir
torch
--index-url
https://download.pytorch.org/whl/cu126
--break-system-packages
;
\
else
\
python3
-m
pip
install
--no-cache-dir
torch
--index-url
https://download.pytorch.org/whl/cu
${
CUINDEX
}
--break-system-packages
;
\
fi
\
&&
cd
sglang
\
&&
python3
-m
pip
--no-cache-dir
install
-e
"python[
${
BUILD_TYPE
}
]"
--break-system-packages
\
&&
if
[
"
$CUDA_VERSION
"
=
"12.8.1"
]
;
then
\
python3
-m
pip
install
nvidia-nccl-cu12
==
2.26.2.post1
--force-reinstall
--no-deps
--break-system-packages
;
\
fi
ENV
DEBIAN_FRONTEND=interactive
ENV
DEBIAN_FRONTEND=interactive
docker/Dockerfile.blackwell
deleted
100644 → 0
View file @
fadf18fd
FROM nvidia/cuda:12.8.1-cudnn-devel-ubuntu22.04
ENV DEBIAN_FRONTEND=noninteractive
WORKDIR /sgl-workspace
RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
&& echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \
&& apt update -y \
&& apt install software-properties-common -y \
&& apt install python3 python3-pip -y \
&& apt install curl git sudo libibverbs-dev -y \
&& apt install rdma-core infiniband-diags openssh-server perftest libnuma1 -y \
&& apt install lsof zsh ccache tmux htop git-lfs tree unzip -y \
&& python3 --version \
&& python3 -m pip --version \
&& pip3 install --upgrade pip \
&& rm -rf /var/lib/apt/lists/* \
&& apt clean
RUN pip3 install https://github.com/sgl-project/whl/releases/download/v0.1.9/sgl_kernel-0.1.9+cu128-cp39-abi3-manylinux2014_x86_64.whl \
&& pip3 install setuptools==75.0.0 wheel scikit-build-core
RUN git clone --depth=1 https://github.com/sgl-project/sglang.git \
&& cd sglang && pip3 install -e "python[blackwell]" --extra-index-url https://download.pytorch.org/whl/cu128
RUN pip3 install nvidia-nccl-cu12==2.27.3 --force-reinstall --no-deps
ENV DEBIAN_FRONTEND=interactive
# Install minimal Python packages
RUN pip3 install --no-cache-dir \
pytest \
black \
isort \
icdiff \
uv \
pre-commit
# Install diff-so-fancy
RUN curl -LSso /usr/local/bin/diff-so-fancy https://github.com/so-fancy/diff-so-fancy/releases/download/v1.4.4/diff-so-fancy \
&& chmod +x /usr/local/bin/diff-so-fancy
# Install clang-format
RUN curl -LSso /usr/local/bin/clang-format https://github.com/muttleyxd/clang-tools-static-binaries/releases/download/master-32d3ac78/clang-format-16_linux-amd64 \
&& chmod +x /usr/local/bin/clang-format
# Install clangd
RUN curl -L https://github.com/clangd/clangd/releases/download/18.1.3/clangd-linux-18.1.3.zip -o clangd.zip \
&& unzip clangd.zip \
&& cp -r clangd_18.1.3/bin/* /usr/local/bin/ \
&& cp -r clangd_18.1.3/lib/* /usr/local/lib/ \
&& rm -rf clangd_18.1.3 clangd.zip
# Install CMake
RUN wget https://github.com/Kitware/CMake/releases/download/v3.31.1/cmake-3.31.1-linux-x86_64.tar.gz \
&& tar -xzf cmake-3.31.1-linux-x86_64.tar.gz \
&& cp -r cmake-3.31.1-linux-x86_64/bin/* /usr/local/bin/ \
&& cp -r cmake-3.31.1-linux-x86_64/share/* /usr/local/share/ \
&& rm -rf cmake-3.31.1-linux-x86_64 cmake-3.31.1-linux-x86_64.tar.gz
# Add yank script
COPY --chown=root:root <<-"EOF" /usr/local/bin/yank
#!/bin/bash
put() {
esc=$1
test -n "$TMUX" -o -z "${TERM##screen*}" && esc="\033Ptmux;\033$esc\033\\"
printf "$esc"
}
put "\033]52;c;!\a"
buf=$( cat "$@" )
len=$( printf %s "$buf" | wc -c ) max=74994
test $len -gt $max && echo "$0: input is $(( len - max )) bytes too long" >&2
put "\033]52;c;$( printf %s "$buf" | head -c $max | base64 | tr -d '\r\n' )\a"
test -n "$TMUX" && tmux set-buffer "$buf" ||:
EOF
RUN chmod +x /usr/local/bin/yank
# Install oh-my-zsh and plugins
RUN sh -c "$(curl -fsSL https://raw.githubusercontent.com/ohmyzsh/ohmyzsh/master/tools/install.sh)" "" --unattended \
&& git clone https://github.com/zsh-users/zsh-autosuggestions ${ZSH_CUSTOM:-~/.oh-my-zsh/custom}/plugins/zsh-autosuggestions \
&& git clone https://github.com/zsh-users/zsh-syntax-highlighting.git ${ZSH_CUSTOM:-~/.oh-my-zsh/custom}/plugins/zsh-syntax-highlighting
# Configure Vim
COPY --chown=root:root <<-"EOF" /root/.vimrc
function! Yank(text) abort
let escape = system('yank', a:text)
if v:shell_error
echoerr escape
else
call writefile([escape], '/dev/tty', 'b')
endif
endfunction
noremap <silent> <Leader>y y:<C-U>call Yank(@0)<CR>
" automatically run yank(1) whenever yanking in Vim
function! CopyYank() abort
call Yank(join(v:event.regcontents, "\n"))
endfunction
autocmd TextYankPost * call CopyYank()
" Basic settings
set number
syntax on
set mouse=a
filetype indent on
" Indentation
set autoindent nosmartindent
set smarttab
set expandtab
set shiftwidth=4
set softtabstop=4
" Visual guides
set colorcolumn=120
highlight ColorColumn ctermbg=5
" Status line
set laststatus=2
set statusline=%<%f\ %h%m%r%=%{\"[\".(&fenc==\"\"?&enc:&fenc).((exists(\"+bomb\")\ &&\ &bomb)?\",B\":\"\").\"]\ \"}%k\ %-14.(%l,%c%V%)\ %P
" Backspace behavior
set backspace=2
" Encoding
set encoding=utf-8
set fileencoding=utf-8
EOF
# Configure tmux
COPY --chown=root:root <<-"EOF" /root/.tmux.conf
# Pane border styling
set -g pane-border-style fg='#742727',bg=black
set -g pane-active-border-style fg=red,bg=black
# Status bar styling
set -g status-style bg='#0C8A92',fg=black
# Change prefix key to backtick
set-option -g prefix `
unbind C-b
bind-key ` send-prefix
# Split panes using - and = with current path
unbind '"'
bind - splitw -v -c '#{pane_current_path}'
unbind '%'
bind = splitw -h -c '#{pane_current_path}'
# Vi mode settings
bind-key -T copy-mode-vi Y send-keys -X copy-pipe 'yank > #{pane_tty}'
set-window-option -g mode-keys vi
# Other settings
set-option -g escape-time 0
set-option -g base-index 1
set-window-option -g mouse on
EOF
# Configure Git
RUN git config --global core.editor "vim" \
&& git config --global core.whitespace "fix,-indent-with-non-tab,trailing-space,cr-at-eol" \
&& git config --global core.pager "diff-so-fancy | less --tabs=4 -RFX" \
&& git config --global color.ui true \
&& git config --global color."diff-highlight".oldNormal "red bold" \
&& git config --global color."diff-highlight".oldHighlight "red bold 52" \
&& git config --global color."diff-highlight".newNormal "green bold" \
&& git config --global color."diff-highlight".newHighlight "green bold 22" \
&& git config --global color.diff.meta "11" \
&& git config --global color.diff.frag "magenta bold" \
&& git config --global color.diff.commit "yellow bold" \
&& git config --global color.diff.old "red bold" \
&& git config --global color.diff.new "green bold" \
&& git config --global color.diff.whitespace "red reverse" \
&& git config --global alias.lg "log --color --graph --pretty=format:'%Cred%h%Creset - %s %Cgreen(%cr) %C(bold blue)<%an>%Creset%C(auto)%d%Creset' --abbrev-commit --" \
&& git config --global http.sslVerify false \
&& git config --global pull.rebase true
# Configure zsh
COPY --chown=root:root <<-"EOF" /root/.zshrc
export ZSH="/root/.oh-my-zsh"
# Theme
ZSH_THEME="robbyrussell"
# Plugins
plugins=(
git
z
zsh-autosuggestions
zsh-syntax-highlighting
)
source $ZSH/oh-my-zsh.sh
# Aliases
alias ll='ls -alF'
alias la='ls -A'
alias l='ls -CF'
alias vi='vim'
# Enhanced history
HISTSIZE=10000
SAVEHIST=10000
setopt HIST_IGNORE_ALL_DUPS
setopt HIST_FIND_NO_DUPS
setopt INC_APPEND_HISTORY
EOF
# Set workspace directory
WORKDIR /sgl-workspace/sglang
docker/Dockerfile.deepep
deleted
100644 → 0
View file @
fadf18fd
ARG BASE_IMAGE
FROM ${BASE_IMAGE}
# Deps
RUN apt-get update && apt-get install -y netcat-openbsd \
libopenmpi-dev \
kmod \
rdma-core \
infiniband-diags \
openssh-server \
perftest \
ibverbs-providers \
libibumad3 \
libibverbs1 \
libnl-3-200 \
libnl-route-3-200 \
librdmacm1 \
build-essential \
cmake \
libibverbs-dev \
libgoogle-glog-dev \
libgtest-dev \
libjsoncpp-dev \
libnuma-dev \
libibverbs-dev \
libunwind-dev \
libgoogle-glog-dev \
libpython3-dev \
libboost-all-dev \
libssl-dev \
libgrpc-dev \
libgrpc++-dev \
libprotobuf-dev \
protobuf-compiler-grpc \
pybind11-dev \
libhiredis-dev \
pkg-config \
patchelf \
ccache \
libcurl4-openssl-dev \
curl \
pkg-config libczmq4 libczmq-dev \
libnl-route-3-dev libnl-3-dev librdmacm1 \
libhiredis-dev \
nvidia-dkms-535 \
build-essential \
devscripts \
debhelper \
fakeroot \
dkms \
check \
libsubunit0 \
libsubunit-dev \
libfabric-dev \
python3 \
python3-pip \
&& rm -rf /var/lib/apt/lists/* \
&& ln -s /usr/bin/python3 /usr/bin/python
# CMake
RUN wget https://github.com/Kitware/CMake/releases/download/v3.27.4/cmake-3.27.4-linux-x86_64.sh \
&& chmod +x cmake-3.27.4-linux-x86_64.sh \
&& ./cmake-3.27.4-linux-x86_64.sh --skip-license --prefix=/usr/local \
&& rm cmake-3.27.4-linux-x86_64.sh
ENV GDRCOPY_HOME=/usr/src/gdrdrv-2.4.4/
# GDRCopy
RUN mkdir -p /tmp \
&& cd /tmp \
&& git clone https://github.com/NVIDIA/gdrcopy.git -b v2.4.4 \
&& cd /tmp/gdrcopy/packages \
&& CUDA=/usr/local/cuda ./build-deb-packages.sh \
&& dpkg -i gdrdrv-dkms_*.deb \
&& dpkg -i libgdrapi_*.deb \
&& dpkg -i gdrcopy-tests_*.deb \
&& dpkg -i gdrcopy_*.deb
# IBGDA dependency
RUN ln -sf /usr/lib/x86_64-linux-gnu/libmlx5.so.1 /usr/lib/x86_64-linux-gnu/libmlx5.so
# DeepEP
WORKDIR /sgl-workspace
RUN git clone https://github.com/deepseek-ai/DeepEP.git
# NVSHMEM
RUN wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.2.5/source/nvshmem_src_3.2.5-1.txz
RUN tar -xf nvshmem_src_3.2.5-1.txz \
&& mv nvshmem_src nvshmem \
&& cd /sgl-workspace/nvshmem \
&& git apply /sgl-workspace/DeepEP/third-party/nvshmem.patch \
&& sed -i '1i#include <unistd.h>' /sgl-workspace/nvshmem/examples/moe_shuffle.cu \
&& cat /sgl-workspace/nvshmem/examples/moe_shuffle.cu
# Compile NVSHMEM
ENV CUDA_HOME=/usr/local/cuda
RUN cd /sgl-workspace/nvshmem && NVSHMEM_SHMEM_SUPPORT=0 \
NVSHMEM_UCX_SUPPORT=0 \
NVSHMEM_USE_NCCL=0 \
NVSHMEM_MPI_SUPPORT=0 \
NVSHMEM_IBGDA_SUPPORT=1 \
NVSHMEM_PMIX_SUPPORT=0 \
NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \
NVSHMEM_USE_GDRCOPY=1 \
cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=/sgl-workspace/nvshmem/install -DCMAKE_CUDA_ARCHITECTURES=90 \
&& cd build \
&& make install -j
WORKDIR /sgl-workspace/DeepEP
ENV NVSHMEM_DIR=/sgl-workspace/nvshmem/install
RUN NVSHMEM_DIR=/sgl-workspace/nvshmem/install pip install --break-system-packages .
# Install mooncake transfer engine
RUN pip install --upgrade mooncake_transfer_engine --break-system-packages
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment