Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
b419e20c
Unverified
Commit
b419e20c
authored
Nov 05, 2025
by
Yingchun Lai
Committed by
GitHub
Nov 04, 2025
Browse files
[Dockerfile] Speed up docker image building (#8784)
parent
48641435
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
38 additions
and
24 deletions
+38
-24
docker/Dockerfile
docker/Dockerfile
+38
-24
No files found.
docker/Dockerfile
View file @
b419e20c
...
...
@@ -9,10 +9,13 @@ ARG GRACE_BLACKWELL=0
ARG
GRACE_BLACKWELL_DEEPEP_BRANCH=gb200_blog_part_2
ARG
DEEPEP_COMMIT=9af0e0d0e74f3577af1979c9b9e1ac2cad0104ee
ARG
TRITON_LANG_COMMIT=4caa0328bf8df64896dd5f6fb9df41b0eb2e750a
ARG
BUILD_AND_DOWNLOAD_PARALLEL=8
ARG
SGL_KERNEL_VERSION=0.3.16.post5
ARG
GDRCOPY_VERSION=2.5.1
ARG
NVSHMEM_VERSION=3.4.5
ARG
PIP_DEFAULT_INDEX
ARG
UBUNTU_MIRROR
ARG
GITHUB_ARTIFACTORY=github.com
ENV
DEBIAN_FRONTEND=noninteractive \
CUDA_HOME=/usr/local/cuda \
...
...
@@ -22,9 +25,15 @@ ENV DEBIAN_FRONTEND=noninteractive \
ENV
PATH="${PATH}:/usr/local/nvidia/bin" \
LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/local/nvidia/lib:/usr/local/nvidia/lib64"
# Replace Ubuntu sources if it is specified
RUN if
[
-n
"
$UBUNTU_MIRROR
"
]
;
then
\
sed
-i
"s|http://.*archive.ubuntu.com|
$UBUNTU_MIRROR
|g"
/etc/apt/sources.list
&&
\
sed
-i
"s|http://.*security.ubuntu.com|
$UBUNTU_MIRROR
|g"
/etc/apt/sources.list
;
\
fi
RUN
--mount
=
type
=
cache,target
=
/var/cache/apt apt update
&&
apt
install
wget
-y
&&
apt
install
software-properties-common
-y
\
&&
add-apt-repository ppa:deadsnakes/ppa
-y
\
&&
apt
install
python3.12-full python3.12-dev python3.10-venv
-y
\
&&
apt
install
python3.12-full python3.12-dev python3.10-venv
-y
\
&&
update-alternatives
--install
/usr/bin/python3 python3 /usr/bin/python3.10 1
\
&&
update-alternatives
--install
/usr/bin/python3 python3 /usr/bin/python3.12 2
\
&&
update-alternatives
--set
python3 /usr/bin/python3.12
\
...
...
@@ -57,10 +66,16 @@ RUN --mount=type=cache,target=/var/cache/apt echo 'tzdata tzdata/Areas select Am
&&
rm
-rf
/var/lib/apt/lists/
*
\
&&
apt-get clean
# Replace pip global cache if it is specified
RUN if
[
-n
"
${
PIP_DEFAULT_INDEX
}
"
]
;
then
\
python3
-m
pip config
set
global.index-url
${
PIP_DEFAULT_INDEX
}
;
\
fi
# GDRCopy installation
RUN
mkdir
-p
/tmp/gdrcopy
&&
cd
/tmp
\
&&
git clone https://github.com/NVIDIA/gdrcopy.git
-b
v
${
GDRCOPY_VERSION
}
\
&&
cd
gdrcopy/packages
\
&&
wget
-q
https://
${
GITHUB_ARTIFACTORY
}
/NVIDIA/gdrcopy/archive/refs/tags/v
${
GDRCOPY_VERSION
}
.tar.gz
\
&&
tar
-xzf
v
${
GDRCOPY_VERSION
}
.tar.gz
&&
rm
v
${
GDRCOPY_VERSION
}
.tar.gz
\
&&
cd
gdrcopy-
${
GDRCOPY_VERSION
}
/packages
\
&&
CUDA
=
/usr/local/cuda ./build-deb-packages.sh
\
&&
dpkg
-i
gdrdrv-dkms_
*
.deb libgdrapi_
*
.deb gdrcopy-tests_
*
.deb gdrcopy_
*
.deb
\
&&
cd
/
&&
rm
-rf
/tmp/gdrcopy
...
...
@@ -93,7 +108,7 @@ RUN --mount=type=cache,target=/root/.cache/pip python3 -m pip install --upgrade
*
)
echo
"Unsupported CUDA version:
$CUDA_VERSION
"
&&
exit
1
;;
\
esac
\
&&
if
[
"
$CUDA_VERSION
"
=
"12.6.1"
]
;
then
\
python3
-m
pip
install
https://
github.com
/sgl-project/whl/releases/download/v
${
SGL_KERNEL_VERSION
}
/sgl_kernel-
${
SGL_KERNEL_VERSION
}
+cu124-cp310-abi3-manylinux2014_
$(
uname
-m
)
.whl
--force-reinstall
--no-deps
\
python3
-m
pip
install
https://
${
GITHUB_ARTIFACTORY
}
/sgl-project/whl/releases/download/v
${
SGL_KERNEL_VERSION
}
/sgl_kernel-
${
SGL_KERNEL_VERSION
}
+cu124-cp310-abi3-manylinux2014_
$(
uname
-m
)
.whl
--force-reinstall
--no-deps
\
;
\
elif
[
"
$CUDA_VERSION
"
=
"12.8.1"
]
||
[
"
$CUDA_VERSION
"
=
"12.9.1"
]
;
then
\
python3
-m
pip
install
sgl-kernel
==
${
SGL_KERNEL_VERSION
}
\
...
...
@@ -115,16 +130,16 @@ RUN --mount=type=cache,target=/root/.cache/pip python3 -m pip install --upgrade
else
\
echo
"No NCCL mapping for CUDA_VERSION=
${
CUDA_VERSION
}
"
&&
exit
1
;
\
fi
\
&&
FLASHINFER_LOGGING_LEVEL
=
warning python3
-m
flashinfer
--download-cubin
&&
FLASHINFER_CUBIN_DOWNLOAD_THREADS
=
${
BUILD_AND_DOWNLOAD_PARALLEL
}
FLASHINFER_LOGGING_LEVEL
=
warning python3
-m
flashinfer
--download-cubin
# Download NVSHMEM source files
# We use Tom's DeepEP fork for GB200 for now; the 1fd57b0276311d035d16176bb0076426166e52f3 commit is https://github.com/fzyzcjy/DeepEP/tree/gb200_blog_part_2
RUN
set
-eux
;
\
if
[
"
${
CUDA_VERSION
%%.*
}
"
=
"13"
]
;
then
\
wget
"
https://
github.com
/NVIDIA/nvshmem/releases/download/v
${
NVSHMEM_VERSION
}
-0/nvshmem_src_cuda-all-all-
${
NVSHMEM_VERSION
}
.tar.gz
"
;
\
wget
-q
https://
${
GITHUB_ARTIFACTORY
}
/NVIDIA/nvshmem/releases/download/v
${
NVSHMEM_VERSION
}
-0
/nvshmem_src_cuda-all-all-
${
NVSHMEM_VERSION
}
.tar.gz
;
\
NVSHMEM_TARBALL
=
"nvshmem_src_cuda-all-all-
${
NVSHMEM_VERSION
}
.tar.gz"
;
\
else
\
wget
"
https://developer.download.nvidia.com/compute/redist/nvshmem/
${
NVSHMEM_VERSION
}
/source/nvshmem_src_cuda12-all-all-
${
NVSHMEM_VERSION
}
.tar.gz
"
;
\
wget
-q
https://developer.download.nvidia.com/compute/redist/nvshmem/
${
NVSHMEM_VERSION
}
/source/nvshmem_src_cuda12-all-all-
${
NVSHMEM_VERSION
}
.tar.gz
;
\
NVSHMEM_TARBALL
=
"nvshmem_src_cuda12-all-all-
${
NVSHMEM_VERSION
}
.tar.gz"
;
\
fi
&&
\
if
[
"
$GRACE_BLACKWELL
"
=
"1"
]
;
then
\
...
...
@@ -134,9 +149,8 @@ RUN set -eux; \
sed
-i
's/#define NUM_CPU_TIMEOUT_SECS 100/#define NUM_CPU_TIMEOUT_SECS 1000/'
csrc/kernels/configs.cuh
&&
\
cd
..
;
\
else
\
git clone https://github.com/deepseek-ai/DeepEP.git
&&
\
cd
DeepEP
&&
\
git checkout
"
${
DEEPEP_COMMIT
}
"
&&
\
wget
-q
https://
${
GITHUB_ARTIFACTORY
}
/deepseek-ai/DeepEP/archive/
${
DEEPEP_COMMIT
}
.zip
&&
\
unzip
${
DEEPEP_COMMIT
}
.zip
&&
rm
${
DEEPEP_COMMIT
}
.zip
&&
mv
DeepEP-
${
DEEPEP_COMMIT
}
DeepEP
&&
cd
DeepEP
&&
\
sed
-i
's/#define NUM_CPU_TIMEOUT_SECS 100/#define NUM_CPU_TIMEOUT_SECS 1000/'
csrc/kernels/configs.cuh
&&
\
cd
..
;
\
fi
&&
\
...
...
@@ -156,7 +170,7 @@ RUN cd /sgl-workspace/nvshmem && \
NVSHMEM_TIMEOUT_DEVICE_POLLING
=
0
\
NVSHMEM_USE_GDRCOPY
=
1
\
cmake
-S
.
-B
build/
-DCMAKE_INSTALL_PREFIX
=
${
NVSHMEM_DIR
}
-DCMAKE_CUDA_ARCHITECTURES
=
${
CUDA_ARCH
}
&&
\
cmake
--build
build
--target
install
-j
${
CMAKE_BUILD_PARALLEL_LEV
EL
}
cmake
--build
build
--target
install
-j
${
BUILD_AND_DOWNLOAD_PARALL
EL
}
# Install DeepEP
# CTK13 requires the cccl include
...
...
@@ -175,22 +189,20 @@ RUN --mount=type=cache,target=/root/.cache/pip cd /sgl-workspace/DeepEP && \
if
[
"
${
CUDA_VERSION
%%.*
}
"
=
"13"
]
;
then
\
sed
-i
"/^ include_dirs =
\[
'csrc
\/
'
\]
/a
\
include_dirs.append('
${
CUDA_HOME
}
/include/cccl')"
setup.py
;
\
fi
&&
\
NVSHMEM_DIR
=
${
NVSHMEM_DIR
}
TORCH_CUDA_ARCH_LIST
=
"
${
CHOSEN_TORCH_CUDA_ARCH_LIST
}
"
pip
install
--no-build-isolation
.
NVSHMEM_DIR
=
${
NVSHMEM_DIR
}
TORCH_CUDA_ARCH_LIST
=
"
${
CHOSEN_TORCH_CUDA_ARCH_LIST
}
"
MAX_JOBS
=
${
BUILD_AND_DOWNLOAD_PARALLEL
}
pip
install
--no-build-isolation
.
# In order to use flashinfer_cutedsl without IMA for WideEP configs we must install
# latest flashinfer_cutedsl. Once 0.4.3 is officially released, remove this
RUN
--mount
=
type
=
cache,target
=
/root/.cache/pip python3
-m
pip
install
--upgrade
--pre
"nvidia-cutlass-dsl==4.3.0.dev0"
RUN
--mount
=
type
=
cache,target
=
/root/.cache/pip python3
-m
pip
install
--upgrade
--pre
"nvidia-cutlass-dsl==4.3.0.dev0"
--extra-index-url
https://pypi.org/simple/
# For cuda 13, we install triton from source to fix some sm103 issues
# This can be reverted after >3.4.5 is released
# See the conversation in: https://github.com/triton-lang/triton/pull/8536
RUN
--mount
=
type
=
cache,target
=
/root/.cache/pip
if
[
"
$CUDA_VERSION
"
=
"13.0.1"
]
;
then
\
git clone https://github.com/triton-lang/triton.git
&&
\
cd
triton
&&
\
git checkout
${
TRITON_LANG_COMMIT
}
&&
\
wget
-q
https://
${
GITHUB_ARTIFACTORY
}
/triton-lang/triton/archive/
${
TRITON_LANG_COMMIT
}
.zip
&&
\
unzip
-q
${
TRITON_LANG_COMMIT
}
.zip
&&
rm
${
TRITON_LANG_COMMIT
}
.zip
&&
mv
triton-
${
TRITON_LANG_COMMIT
}
triton
&&
\
pip
install
--break-system-packages
-r
python/requirements.txt
&&
\
MAX_JOBS
=
20
pip
install
--break-system-packages
-e
.
;
\
MAX_JOBS
=
${
BUILD_AND_DOWNLOAD_PARALLEL
}
pip
install
--break-system-packages
-e
.
;
\
fi
# Python tools
...
...
@@ -262,15 +274,15 @@ RUN --mount=type=cache,target=/root/.cache/pip python3 -m pip install --break-sy
tabulate
# Install diff-so-fancy
RUN
curl
-LSso
/usr/local/bin/diff-so-fancy https://
github.com
/so-fancy/diff-so-fancy/releases/download/v1.4.4/diff-so-fancy
\
RUN
curl
-LSso
/usr/local/bin/diff-so-fancy https://
${
GITHUB_ARTIFACTORY
}
/so-fancy/diff-so-fancy/releases/download/v1.4.4/diff-so-fancy
\
&&
chmod
+x /usr/local/bin/diff-so-fancy
# Install clang-format
RUN
curl
-LSso
/usr/local/bin/clang-format https://
github.com
/muttleyxd/clang-tools-static-binaries/releases/download/master-32d3ac78/clang-format-16_linux-amd64
\
RUN
curl
-LSso
/usr/local/bin/clang-format https://
${
GITHUB_ARTIFACTORY
}
/muttleyxd/clang-tools-static-binaries/releases/download/master-32d3ac78/clang-format-16_linux-amd64
\
&&
chmod
+x /usr/local/bin/clang-format
# Install clangd
RUN
curl
-L
https://
github.com
/clangd/clangd/releases/download/18.1.3/clangd-linux-18.1.3.zip
-o
clangd.zip
\
RUN
curl
-L
https://
${
GITHUB_ARTIFACTORY
}
/clangd/clangd/releases/download/18.1.3/clangd-linux-18.1.3.zip
-o
clangd.zip
\
&&
unzip clangd.zip
\
&&
cp
-r
clangd_18.1.3/bin/
*
/usr/local/bin/
\
&&
cp
-r
clangd_18.1.3/lib/
*
/usr/local/lib/
\
...
...
@@ -280,7 +292,7 @@ RUN curl -L https://github.com/clangd/clangd/releases/download/18.1.3/clangd-lin
RUN
CMAKE_VERSION
=
3.31.1
\
&&
ARCH
=
$(
uname
-m
)
\
&&
CMAKE_INSTALLER
=
"cmake-
${
CMAKE_VERSION
}
-linux-
${
ARCH
}
"
\
&&
wget
"https://
github.com
/Kitware/CMake/releases/download/v
${
CMAKE_VERSION
}
/
${
CMAKE_INSTALLER
}
.tar.gz"
\
&&
wget
-q
"https://
${
GITHUB_ARTIFACTORY
}
/Kitware/CMake/releases/download/v
${
CMAKE_VERSION
}
/
${
CMAKE_INSTALLER
}
.tar.gz"
\
&&
tar
-xzf
"
${
CMAKE_INSTALLER
}
.tar.gz"
\
&&
cp
-r
"
${
CMAKE_INSTALLER
}
/bin/"
*
/usr/local/bin/
\
&&
cp
-r
"
${
CMAKE_INSTALLER
}
/share/"
*
/usr/local/share/
\
...
...
@@ -317,7 +329,9 @@ RUN cat /tmp/.gitconfig >> /root/.gitconfig && rm /tmp/.gitconfig
COPY
docker/configs/.zshrc /root/.zshrc
RUN
set
-euxo
;
\
curl
--proto
'=https'
--tlsv1
.2
-sSf
https://just.systems/install.sh | bash
-s
--
--to
/usr/local/bin
curl
--proto
'=https'
--tlsv1
.2
-sSf
https://just.systems/install.sh |
\
sed
"s|https://github.com|https://
${
GITHUB_ARTIFACTORY
}
|g"
|
\
bash
-s
--
--tag
1.42.4
--to
/usr/local/bin
# Set workspace directory
WORKDIR
/sgl-workspace/sglang
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment