Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
27a223ab
Unverified
Commit
27a223ab
authored
Oct 19, 2025
by
Kangyan-Zhou
Committed by
GitHub
Oct 19, 2025
Browse files
Improve Kernel Build Time (#11508)
parent
53529f46
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
306 additions
and
4 deletions
+306
-4
sgl-kernel/build.sh
sgl-kernel/build.sh
+156
-4
sgl-kernel/kernel-runner-setup.sh
sgl-kernel/kernel-runner-setup.sh
+150
-0
No files found.
sgl-kernel/build.sh
View file @
27a223ab
...
...
@@ -31,11 +31,35 @@ else
TORCH_INSTALL
=
"pip install --no-cache-dir torch==2.8.0 --index-url https://download.pytorch.org/whl/cu126"
fi
# Create cache directories for persistent build artifacts in home directory
# Using home directory to persist across workspace cleanups/checkouts
CACHE_DIR
=
"
${
HOME
}
/.cache/sgl-kernel"
CMAKE_DOWNLOAD_CACHE
=
"
${
CACHE_DIR
}
/cmake-downloads"
CCACHE_DIR
=
"
${
CACHE_DIR
}
/ccache"
mkdir
-p
"
${
CMAKE_DOWNLOAD_CACHE
}
"
mkdir
-p
"
${
CCACHE_DIR
}
"
echo
"==================================="
echo
"Cache Configuration"
echo
"==================================="
echo
"CMake download cache:
${
CMAKE_DOWNLOAD_CACHE
}
"
echo
"ccache directory:
${
CCACHE_DIR
}
"
echo
""
docker run
--rm
\
-v
$(
pwd
)
:/sgl-kernel
\
-v
${
CMAKE_DOWNLOAD_CACHE
}
:/cmake-downloads
\
-v
${
CCACHE_DIR
}
:/ccache
\
-e
ENABLE_CMAKE_PROFILE
=
"
${
ENABLE_CMAKE_PROFILE
:-}
"
\
-e
ENABLE_BUILD_PROFILE
=
"
${
ENABLE_BUILD_PROFILE
:-}
"
\
${
DOCKER_IMAGE
}
\
bash
-c
"
# Install CMake (version >= 3.26) - Robust Installation
set -e
# Install CMake (version >= 3.26) - Robust Installation with caching
echo
\"
==================================
\"
echo
\"
Installing CMake
\"
echo
\"
==================================
\"
export CMAKE_VERSION_MAJOR=3.31
export CMAKE_VERSION_MINOR=1
# Setting these flags to reduce OOM chance only on ARM
...
...
@@ -45,10 +69,23 @@ docker run --rm \
export MAKEFLAGS='-j2'
export CMAKE_BUILD_PARALLEL_LEVEL=2
export NINJAFLAGS='-j2'
echo
\"
ARM detected: Using extra conservative settings (2 parallel jobs)
\"
fi
CMAKE_TARBALL=
\"
cmake-
\$
{CMAKE_VERSION_MAJOR}.
\$
{CMAKE_VERSION_MINOR}-linux-
${
ARCH
}
.tar.gz
\"
# Check if CMake is already cached
if [ -f
\"
/cmake-downloads/
\$
{CMAKE_TARBALL}
\"
]; then
echo
\"
Using cached CMake from /cmake-downloads/
\$
{CMAKE_TARBALL}
\"
cp /cmake-downloads/
\$
{CMAKE_TARBALL} .
else
echo
\"
Downloading CMake from: https://cmake.org/files/v
\$
{CMAKE_VERSION_MAJOR}/
\$
{CMAKE_TARBALL}
\"
wget https://cmake.org/files/v
\$
{CMAKE_VERSION_MAJOR}/
\$
{CMAKE_TARBALL}
# Cache the downloaded file
cp
\$
{CMAKE_TARBALL} /cmake-downloads/
fi
echo
\"
Downloading CMake from: https://cmake.org/files/v
\$
{CMAKE_VERSION_MAJOR}/cmake-
\$
{CMAKE_VERSION_MAJOR}.
\$
{CMAKE_VERSION_MINOR}-linux-
${
ARCH
}
.tar.gz
\"
wget https://cmake.org/files/v
\$
{CMAKE_VERSION_MAJOR}/cmake-
\$
{CMAKE_VERSION_MAJOR}.
\$
{CMAKE_VERSION_MINOR}-linux-
${
ARCH
}
.tar.gz
tar -xzf cmake-
\$
{CMAKE_VERSION_MAJOR}.
\$
{CMAKE_VERSION_MINOR}-linux-
${
ARCH
}
.tar.gz
tar -xzf
\$
{CMAKE_TARBALL}
mv cmake-
\$
{CMAKE_VERSION_MAJOR}.
\$
{CMAKE_VERSION_MINOR}-linux-
${
ARCH
}
/opt/cmake
export PATH=/opt/cmake/bin:
\$
PATH
export LD_LIBRARY_PATH=/lib64:
\$
LD_LIBRARY_PATH
...
...
@@ -58,6 +95,50 @@ docker run --rm \
which cmake
cmake --version
echo
\"
==================================
\"
echo
\"
Installing and configuring ccache
\"
echo
\"
==================================
\"
# Install ccache 4.12.1 from source for CUDA support (yum provides old 3.7.7)
echo
\"
Installing ccache 4.12.1 from source...
\"
# Install build dependencies
yum install -y gcc gcc-c++ make wget tar
# Download and build ccache 4.12.1
cd /tmp
wget -q https://github.com/ccache/ccache/releases/download/v4.12.1/ccache-4.12.1.tar.xz
tar -xf ccache-4.12.1.tar.xz
cd ccache-4.12.1
# Build and install (uses already-installed CMake 3.31)
mkdir build && cd build
/opt/cmake/bin/cmake -D CMAKE_BUILD_TYPE=Release -D CMAKE_INSTALL_PREFIX=/usr .. >/dev/null
make -j
\$
(nproc) >/dev/null
make install >/dev/null
# Verify installation
ccache --version
echo
\"
ccache 4.12.1 installed successfully
\"
cd /sgl-kernel
# Configure ccache
export CCACHE_DIR=/ccache
export CCACHE_BASEDIR=/sgl-kernel
export CCACHE_MAXSIZE=10G
export CCACHE_COMPILERCHECK=content
export CCACHE_COMPRESS=true
export CCACHE_SLOPPINESS=file_macro,time_macros,include_file_mtime,include_file_ctime
# Set up ccache as compiler launcher (don't use PATH to avoid -ccbin conflicts)
export CMAKE_C_COMPILER_LAUNCHER=ccache
export CMAKE_CXX_COMPILER_LAUNCHER=ccache
export CMAKE_CUDA_COMPILER_LAUNCHER=ccache
# Show ccache stats before build
ccache -sV || true
echo
\"\"
yum install numactl-devel -y &&
\
yum install libibverbs -y --nogpgcheck &&
\
ln -sv /usr/lib64/libibverbs.so.1 /usr/lib64/libibverbs.so &&
\
...
...
@@ -70,6 +151,77 @@ docker run --rm \
cd /sgl-kernel &&
\
ls -la
${
PYTHON_ROOT_PATH
}
/lib/python
${
PYTHON_VERSION
}
/site-packages/wheel/ &&
\
# Enable CMake profiling if requested
if [ -n
\"
${
ENABLE_CMAKE_PROFILE
}
\"
]; then
echo
\"
CMake profiling enabled - will save to /sgl-kernel/cmake-profile.json
\"
export CMAKE_ARGS=
\"
--profiling-output=/sgl-kernel/cmake-profile.json --profiling-format=google-trace
\"
fi
export NINJA_STATUS=
\"
[%f/%t %es]
\"
# Enable Ninja build profiling if requested
if [ -n
\"
${
ENABLE_BUILD_PROFILE
}
\"
]; then
echo
\"
Ninja build profiling enabled - will save to /sgl-kernel/build-trace.json
\"
fi
PYTHONPATH=
${
PYTHON_ROOT_PATH
}
/lib/python
${
PYTHON_VERSION
}
/site-packages
${
PYTHON_ROOT_PATH
}
/bin/python -m uv build --wheel -Cbuild-dir=build . --color=always --no-build-isolation &&
\
./rename_wheels.sh
# Show profile location if profiling was enabled
if [ -n
\"
${
ENABLE_CMAKE_PROFILE
}
\"
] && [ -f /sgl-kernel/cmake-profile.json ]; then
echo
\"\"
echo
\"
==================================
\"
echo
\"
CMake Profile Generated
\"
echo
\"
==================================
\"
echo
\"
Profile saved to: cmake-profile.json
\"
echo
\"
View in browser: chrome://tracing or edge://tracing
\"
echo
\"\"
fi
# Generate Ninja build trace if profiling enabled
if [ -n
\"
${
ENABLE_BUILD_PROFILE
}
\"
] && [ -f /sgl-kernel/build/.ninja_log ]; then
echo
\"\"
echo
\"
==================================
\"
echo
\"
Generating Ninja Build Trace
\"
echo
\"
==================================
\"
# Download ninjatracing script from GitHub (using PR #39 branch for ninja log v7 support)
wget -q https://raw.githubusercontent.com/cradleapps/ninjatracing/084212eaf68f25c70579958a2ed67fb4ec2a9ca4/ninjatracing -O /tmp/ninjatracing || echo
\"
Note: Failed to download ninjatracing, skipping build trace
\"
# Convert .ninja_log to Chrome trace (JSON format)
if [ -f /tmp/ninjatracing ]; then
${
PYTHON_ROOT_PATH
}
/bin/python /tmp/ninjatracing /sgl-kernel/build/.ninja_log > /sgl-kernel/build-trace.json || true
if [ -f /sgl-kernel/build-trace.json ]; then
# Compress the trace for smaller file size and faster loading
gzip -9 -k /sgl-kernel/build-trace.json 2>/dev/null || true
echo
\"
Build trace saved to: build-trace.json
\"
if [ -f /sgl-kernel/build-trace.json.gz ]; then
ORIGINAL_SIZE=
\$
(stat -f%z /sgl-kernel/build-trace.json 2>/dev/null || stat -c%s /sgl-kernel/build-trace.json)
COMPRESSED_SIZE=
\$
(stat -f%z /sgl-kernel/build-trace.json.gz 2>/dev/null || stat -c%s /sgl-kernel/build-trace.json.gz)
echo
\"
Compressed to: build-trace.json.gz (
\$
{RATIO}% smaller)
\"
fi
echo
\"\"
echo
\"
View in browser:
\"
echo
\"
- chrome://tracing (load JSON file)
\"
echo
\"
- ui.perfetto.dev (recommended, supports .gz files)
\"
echo
\"\"
echo
\"
Shows:
\"
echo
\"
- Compilation time per file
\"
echo
\"
- Parallelism utilization
\"
echo
\"
- Critical path (longest dependency chain)
\"
echo
\"
- Where the 2-hour build time went
\"
fi
fi
echo
\"\"
fi
# Show ccache statistics after build
echo
\"\"
echo
\"
==================================
\"
echo
\"
ccache Statistics
\"
echo
\"
==================================
\"
ccache -s
echo
\"\"
"
sgl-kernel/kernel-runner-setup.sh
0 → 100755
View file @
27a223ab
#!/bin/bash
set
-e
CUDA_VERSIONS
=
"
${
1
:-
12
-8,12-9
}
"
echo
"==================================="
echo
"Installing Docker..."
echo
"==================================="
# Add Docker's official GPG key:
sudo
apt-get update
sudo
apt-get
install
-y
ca-certificates curl
sudo install
-m
0755
-d
/etc/apt/keyrings
sudo
curl
-fsSL
https://download.docker.com/linux/ubuntu/gpg
-o
/etc/apt/keyrings/docker.asc
sudo chmod
a+r /etc/apt/keyrings/docker.asc
# Add the repository to Apt sources:
echo
\
"deb [arch=
$(
dpkg
--print-architecture
)
signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu
\
$(
.
/etc/os-release
&&
echo
"
${
UBUNTU_CODENAME
:-
$VERSION_CODENAME
}
"
)
stable"
|
\
sudo tee
/etc/apt/sources.list.d/docker.list
>
/dev/null
sudo
apt-get update
sudo
apt-get
install
-y
docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
# Add current user to docker group
sudo
usermod
-aG
docker
$USER
echo
"Docker installed successfully!"
echo
"Note: You need to log out and log back in for docker group membership to take effect"
echo
""
# Detect architecture for Docker image selection
ARCH
=
$(
uname
-m
)
if
[
"
$ARCH
"
=
"x86_64"
]
;
then
BUILDER_NAME
=
"pytorch/manylinux2_28-builder"
elif
[
"
$ARCH
"
=
"aarch64"
]
;
then
BUILDER_NAME
=
"pytorch/manylinuxaarch64-builder"
else
echo
"Unsupported architecture:
$ARCH
"
exit
1
fi
# Pull Docker images for the specified CUDA versions
echo
"==================================="
echo
"Pulling Docker Images..."
echo
"==================================="
echo
"Architecture:
${
ARCH
}
"
echo
"Builder:
${
BUILDER_NAME
}
"
# Parse CUDA versions and pull corresponding Docker images
IFS
=
','
read
-ra
CUDA_VERSION_ARRAY
<<<
"
$CUDA_VERSIONS
"
# Convert CUDA versions from format "12-8" to "12.8" and pull images
for
CUDA_VERSION
in
"
${
CUDA_VERSION_ARRAY
[@]
}
"
;
do
# Trim whitespace
CUDA_VERSION
=
$(
echo
"
$CUDA_VERSION
"
| xargs
)
# Convert format: 12-8 -> 12.8
CUDA_VERSION_DOTTED
=
$(
echo
"
$CUDA_VERSION
"
|
tr
'-'
'.'
)
DOCKER_IMAGE
=
"
${
BUILDER_NAME
}
:cuda
${
CUDA_VERSION_DOTTED
}
"
echo
""
echo
"Pulling
${
DOCKER_IMAGE
}
..."
# Use newgrp to ensure docker commands work (user was just added to docker group)
if
sg docker
-c
"docker pull
${
DOCKER_IMAGE
}
"
;
then
echo
"✓ Successfully pulled
${
DOCKER_IMAGE
}
"
else
echo
"✗ Failed to pull
${
DOCKER_IMAGE
}
"
echo
" You may need to log out and log back in for docker group to take effect"
fi
done
echo
""
echo
"Docker images pulled successfully!"
echo
""
# Auto-detect Ubuntu version
if
command
-v
lsb_release &> /dev/null
;
then
UBUNTU_VERSION
=
$(
lsb_release
-rs
|
tr
-d
'.'
)
else
UBUNTU_VERSION
=
$(
.
/etc/os-release
&&
echo
$VERSION_ID
|
tr
-d
'.'
)
fi
# Set CUDA architecture (ARCH already detected above for Docker images)
if
[
"
$ARCH
"
=
"x86_64"
]
;
then
CUDA_ARCH
=
"x86_64"
elif
[
"
$ARCH
"
=
"aarch64"
]
;
then
CUDA_ARCH
=
"sbsa"
else
echo
"Unsupported architecture:
$ARCH
"
exit
1
fi
echo
"==================================="
echo
"System Information:"
echo
"==================================="
echo
"Ubuntu Version:
${
UBUNTU_VERSION
}
"
echo
"Architecture:
${
ARCH
}
"
echo
"CUDA Architecture:
${
CUDA_ARCH
}
"
echo
""
# Install CUDA keyring (only need to do this once)
echo
"==================================="
echo
"Installing CUDA keyring..."
echo
"==================================="
KEYRING_URL
=
"https://developer.download.nvidia.com/compute/cuda/repos/ubuntu
${
UBUNTU_VERSION
}
/
${
CUDA_ARCH
}
/cuda-keyring_1.1-1_all.deb"
wget
-q
$KEYRING_URL
-O
cuda-keyring.deb
sudo
dpkg
-i
cuda-keyring.deb
sudo
apt-get update
rm
cuda-keyring.deb
echo
"CUDA keyring installed successfully!"
echo
""
# Split CUDA versions and install each one
IFS
=
','
read
-ra
CUDA_VERSION_ARRAY
<<<
"
$CUDA_VERSIONS
"
echo
"==================================="
echo
"Installing CUDA Toolkits..."
echo
"==================================="
echo
"Versions to install:
${
CUDA_VERSIONS
}
"
echo
""
for
CUDA_VERSION
in
"
${
CUDA_VERSION_ARRAY
[@]
}
"
;
do
# Trim whitespace
CUDA_VERSION
=
$(
echo
"
$CUDA_VERSION
"
| xargs
)
echo
"-----------------------------------"
echo
"Installing CUDA Toolkit
${
CUDA_VERSION
}
..."
echo
"-----------------------------------"
if
sudo
apt-get
install
-y
cuda-toolkit-
${
CUDA_VERSION
}
;
then
echo
"✓ CUDA Toolkit
${
CUDA_VERSION
}
installed successfully!"
else
echo
"✗ Failed to install CUDA Toolkit
${
CUDA_VERSION
}
"
echo
" This might be due to an invalid version or repository issue"
fi
echo
""
done
echo
"==================================="
echo
"Installation Summary"
echo
"==================================="
echo
"Installed CUDA versions:"
ls
-d
/usr/local/cuda-
*
2>/dev/null
||
echo
"No CUDA installations found in /usr/local/"
echo
""
echo
"Setup complete!"
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment