Unverified Commit 27a223ab authored by Kangyan-Zhou's avatar Kangyan-Zhou Committed by GitHub
Browse files

Improve Kernel Build Time (#11508)

parent 53529f46
...@@ -31,11 +31,35 @@ else ...@@ -31,11 +31,35 @@ else
TORCH_INSTALL="pip install --no-cache-dir torch==2.8.0 --index-url https://download.pytorch.org/whl/cu126" TORCH_INSTALL="pip install --no-cache-dir torch==2.8.0 --index-url https://download.pytorch.org/whl/cu126"
fi fi
# Create cache directories for persistent build artifacts in home directory
# Using home directory to persist across workspace cleanups/checkouts
CACHE_DIR="${HOME}/.cache/sgl-kernel"
CMAKE_DOWNLOAD_CACHE="${CACHE_DIR}/cmake-downloads"
CCACHE_DIR="${CACHE_DIR}/ccache"
mkdir -p "${CMAKE_DOWNLOAD_CACHE}"
mkdir -p "${CCACHE_DIR}"
echo "==================================="
echo "Cache Configuration"
echo "==================================="
echo "CMake download cache: ${CMAKE_DOWNLOAD_CACHE}"
echo "ccache directory: ${CCACHE_DIR}"
echo ""
docker run --rm \ docker run --rm \
-v $(pwd):/sgl-kernel \ -v $(pwd):/sgl-kernel \
-v ${CMAKE_DOWNLOAD_CACHE}:/cmake-downloads \
-v ${CCACHE_DIR}:/ccache \
-e ENABLE_CMAKE_PROFILE="${ENABLE_CMAKE_PROFILE:-}" \
-e ENABLE_BUILD_PROFILE="${ENABLE_BUILD_PROFILE:-}" \
${DOCKER_IMAGE} \ ${DOCKER_IMAGE} \
bash -c " bash -c "
# Install CMake (version >= 3.26) - Robust Installation set -e
# Install CMake (version >= 3.26) - Robust Installation with caching
echo \"==================================\"
echo \"Installing CMake\"
echo \"==================================\"
export CMAKE_VERSION_MAJOR=3.31 export CMAKE_VERSION_MAJOR=3.31
export CMAKE_VERSION_MINOR=1 export CMAKE_VERSION_MINOR=1
# Setting these flags to reduce OOM chance only on ARM # Setting these flags to reduce OOM chance only on ARM
...@@ -45,10 +69,23 @@ docker run --rm \ ...@@ -45,10 +69,23 @@ docker run --rm \
export MAKEFLAGS='-j2' export MAKEFLAGS='-j2'
export CMAKE_BUILD_PARALLEL_LEVEL=2 export CMAKE_BUILD_PARALLEL_LEVEL=2
export NINJAFLAGS='-j2' export NINJAFLAGS='-j2'
echo \"ARM detected: Using extra conservative settings (2 parallel jobs)\"
fi
CMAKE_TARBALL=\"cmake-\${CMAKE_VERSION_MAJOR}.\${CMAKE_VERSION_MINOR}-linux-${ARCH}.tar.gz\"
# Check if CMake is already cached
if [ -f \"/cmake-downloads/\${CMAKE_TARBALL}\" ]; then
echo \"Using cached CMake from /cmake-downloads/\${CMAKE_TARBALL}\"
cp /cmake-downloads/\${CMAKE_TARBALL} .
else
echo \"Downloading CMake from: https://cmake.org/files/v\${CMAKE_VERSION_MAJOR}/\${CMAKE_TARBALL}\"
wget https://cmake.org/files/v\${CMAKE_VERSION_MAJOR}/\${CMAKE_TARBALL}
# Cache the downloaded file
cp \${CMAKE_TARBALL} /cmake-downloads/
fi fi
echo \"Downloading CMake from: https://cmake.org/files/v\${CMAKE_VERSION_MAJOR}/cmake-\${CMAKE_VERSION_MAJOR}.\${CMAKE_VERSION_MINOR}-linux-${ARCH}.tar.gz\"
wget https://cmake.org/files/v\${CMAKE_VERSION_MAJOR}/cmake-\${CMAKE_VERSION_MAJOR}.\${CMAKE_VERSION_MINOR}-linux-${ARCH}.tar.gz tar -xzf \${CMAKE_TARBALL}
tar -xzf cmake-\${CMAKE_VERSION_MAJOR}.\${CMAKE_VERSION_MINOR}-linux-${ARCH}.tar.gz
mv cmake-\${CMAKE_VERSION_MAJOR}.\${CMAKE_VERSION_MINOR}-linux-${ARCH} /opt/cmake mv cmake-\${CMAKE_VERSION_MAJOR}.\${CMAKE_VERSION_MINOR}-linux-${ARCH} /opt/cmake
export PATH=/opt/cmake/bin:\$PATH export PATH=/opt/cmake/bin:\$PATH
export LD_LIBRARY_PATH=/lib64:\$LD_LIBRARY_PATH export LD_LIBRARY_PATH=/lib64:\$LD_LIBRARY_PATH
...@@ -58,6 +95,50 @@ docker run --rm \ ...@@ -58,6 +95,50 @@ docker run --rm \
which cmake which cmake
cmake --version cmake --version
echo \"==================================\"
echo \"Installing and configuring ccache\"
echo \"==================================\"
# Install ccache 4.12.1 from source for CUDA support (yum provides old 3.7.7)
echo \"Installing ccache 4.12.1 from source...\"
# Install build dependencies
yum install -y gcc gcc-c++ make wget tar
# Download and build ccache 4.12.1
cd /tmp
wget -q https://github.com/ccache/ccache/releases/download/v4.12.1/ccache-4.12.1.tar.xz
tar -xf ccache-4.12.1.tar.xz
cd ccache-4.12.1
# Build and install (uses already-installed CMake 3.31)
mkdir build && cd build
/opt/cmake/bin/cmake -D CMAKE_BUILD_TYPE=Release -D CMAKE_INSTALL_PREFIX=/usr .. >/dev/null
make -j\$(nproc) >/dev/null
make install >/dev/null
# Verify installation
ccache --version
echo \"ccache 4.12.1 installed successfully\"
cd /sgl-kernel
# Configure ccache
export CCACHE_DIR=/ccache
export CCACHE_BASEDIR=/sgl-kernel
export CCACHE_MAXSIZE=10G
export CCACHE_COMPILERCHECK=content
export CCACHE_COMPRESS=true
export CCACHE_SLOPPINESS=file_macro,time_macros,include_file_mtime,include_file_ctime
# Set up ccache as compiler launcher (don't use PATH to avoid -ccbin conflicts)
export CMAKE_C_COMPILER_LAUNCHER=ccache
export CMAKE_CXX_COMPILER_LAUNCHER=ccache
export CMAKE_CUDA_COMPILER_LAUNCHER=ccache
# Show ccache stats before build
ccache -sV || true
echo \"\"
yum install numactl-devel -y && \ yum install numactl-devel -y && \
yum install libibverbs -y --nogpgcheck && \ yum install libibverbs -y --nogpgcheck && \
ln -sv /usr/lib64/libibverbs.so.1 /usr/lib64/libibverbs.so && \ ln -sv /usr/lib64/libibverbs.so.1 /usr/lib64/libibverbs.so && \
...@@ -70,6 +151,77 @@ docker run --rm \ ...@@ -70,6 +151,77 @@ docker run --rm \
cd /sgl-kernel && \ cd /sgl-kernel && \
ls -la ${PYTHON_ROOT_PATH}/lib/python${PYTHON_VERSION}/site-packages/wheel/ && \ ls -la ${PYTHON_ROOT_PATH}/lib/python${PYTHON_VERSION}/site-packages/wheel/ && \
# Enable CMake profiling if requested
if [ -n \"${ENABLE_CMAKE_PROFILE}\" ]; then
echo \"CMake profiling enabled - will save to /sgl-kernel/cmake-profile.json\"
export CMAKE_ARGS=\"--profiling-output=/sgl-kernel/cmake-profile.json --profiling-format=google-trace\"
fi
export NINJA_STATUS=\"[%f/%t %es] \"
# Enable Ninja build profiling if requested
if [ -n \"${ENABLE_BUILD_PROFILE}\" ]; then
echo \"Ninja build profiling enabled - will save to /sgl-kernel/build-trace.json\"
fi
PYTHONPATH=${PYTHON_ROOT_PATH}/lib/python${PYTHON_VERSION}/site-packages ${PYTHON_ROOT_PATH}/bin/python -m uv build --wheel -Cbuild-dir=build . --color=always --no-build-isolation && \ PYTHONPATH=${PYTHON_ROOT_PATH}/lib/python${PYTHON_VERSION}/site-packages ${PYTHON_ROOT_PATH}/bin/python -m uv build --wheel -Cbuild-dir=build . --color=always --no-build-isolation && \
./rename_wheels.sh ./rename_wheels.sh
# Show profile location if profiling was enabled
if [ -n \"${ENABLE_CMAKE_PROFILE}\" ] && [ -f /sgl-kernel/cmake-profile.json ]; then
echo \"\"
echo \"==================================\"
echo \"CMake Profile Generated\"
echo \"==================================\"
echo \"Profile saved to: cmake-profile.json\"
echo \"View in browser: chrome://tracing or edge://tracing\"
echo \"\"
fi
# Generate Ninja build trace if profiling enabled
if [ -n \"${ENABLE_BUILD_PROFILE}\" ] && [ -f /sgl-kernel/build/.ninja_log ]; then
echo \"\"
echo \"==================================\"
echo \"Generating Ninja Build Trace\"
echo \"==================================\"
# Download ninjatracing script from GitHub (using PR #39 branch for ninja log v7 support)
wget -q https://raw.githubusercontent.com/cradleapps/ninjatracing/084212eaf68f25c70579958a2ed67fb4ec2a9ca4/ninjatracing -O /tmp/ninjatracing || echo \"Note: Failed to download ninjatracing, skipping build trace\"
# Convert .ninja_log to Chrome trace (JSON format)
if [ -f /tmp/ninjatracing ]; then
${PYTHON_ROOT_PATH}/bin/python /tmp/ninjatracing /sgl-kernel/build/.ninja_log > /sgl-kernel/build-trace.json || true
if [ -f /sgl-kernel/build-trace.json ]; then
# Compress the trace for smaller file size and faster loading
gzip -9 -k /sgl-kernel/build-trace.json 2>/dev/null || true
echo \"Build trace saved to: build-trace.json\"
if [ -f /sgl-kernel/build-trace.json.gz ]; then
ORIGINAL_SIZE=\$(stat -f%z /sgl-kernel/build-trace.json 2>/dev/null || stat -c%s /sgl-kernel/build-trace.json)
COMPRESSED_SIZE=\$(stat -f%z /sgl-kernel/build-trace.json.gz 2>/dev/null || stat -c%s /sgl-kernel/build-trace.json.gz)
echo \"Compressed to: build-trace.json.gz (\${RATIO}% smaller)\"
fi
echo \"\"
echo \"View in browser:\"
echo \" - chrome://tracing (load JSON file)\"
echo \" - ui.perfetto.dev (recommended, supports .gz files)\"
echo \"\"
echo \"Shows:\"
echo \" - Compilation time per file\"
echo \" - Parallelism utilization\"
echo \" - Critical path (longest dependency chain)\"
echo \" - Where the 2-hour build time went\"
fi
fi
echo \"\"
fi
# Show ccache statistics after build
echo \"\"
echo \"==================================\"
echo \"ccache Statistics\"
echo \"==================================\"
ccache -s
echo \"\"
" "
#!/bin/bash
set -e
CUDA_VERSIONS="${1:-12-8,12-9}"
echo "==================================="
echo "Installing Docker..."
echo "==================================="
# Add Docker's official GPG key:
sudo apt-get update
sudo apt-get install -y ca-certificates curl
sudo install -m 0755 -d /etc/apt/keyrings
sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc
sudo chmod a+r /etc/apt/keyrings/docker.asc
# Add the repository to Apt sources:
echo \
"deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \
$(. /etc/os-release && echo "${UBUNTU_CODENAME:-$VERSION_CODENAME}") stable" | \
sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
sudo apt-get update
sudo apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
# Add current user to docker group
sudo usermod -aG docker $USER
echo "Docker installed successfully!"
echo "Note: You need to log out and log back in for docker group membership to take effect"
echo ""
# Detect architecture for Docker image selection
ARCH=$(uname -m)
if [ "$ARCH" = "x86_64" ]; then
BUILDER_NAME="pytorch/manylinux2_28-builder"
elif [ "$ARCH" = "aarch64" ]; then
BUILDER_NAME="pytorch/manylinuxaarch64-builder"
else
echo "Unsupported architecture: $ARCH"
exit 1
fi
# Pull Docker images for the specified CUDA versions
echo "==================================="
echo "Pulling Docker Images..."
echo "==================================="
echo "Architecture: ${ARCH}"
echo "Builder: ${BUILDER_NAME}"
# Parse CUDA versions and pull corresponding Docker images
IFS=',' read -ra CUDA_VERSION_ARRAY <<< "$CUDA_VERSIONS"
# Convert CUDA versions from format "12-8" to "12.8" and pull images
for CUDA_VERSION in "${CUDA_VERSION_ARRAY[@]}"; do
# Trim whitespace
CUDA_VERSION=$(echo "$CUDA_VERSION" | xargs)
# Convert format: 12-8 -> 12.8
CUDA_VERSION_DOTTED=$(echo "$CUDA_VERSION" | tr '-' '.')
DOCKER_IMAGE="${BUILDER_NAME}:cuda${CUDA_VERSION_DOTTED}"
echo ""
echo "Pulling ${DOCKER_IMAGE}..."
# Use newgrp to ensure docker commands work (user was just added to docker group)
if sg docker -c "docker pull ${DOCKER_IMAGE}"; then
echo "✓ Successfully pulled ${DOCKER_IMAGE}"
else
echo "✗ Failed to pull ${DOCKER_IMAGE}"
echo " You may need to log out and log back in for docker group to take effect"
fi
done
echo ""
echo "Docker images pulled successfully!"
echo ""
# Auto-detect Ubuntu version
if command -v lsb_release &> /dev/null; then
UBUNTU_VERSION=$(lsb_release -rs | tr -d '.')
else
UBUNTU_VERSION=$(. /etc/os-release && echo $VERSION_ID | tr -d '.')
fi
# Set CUDA architecture (ARCH already detected above for Docker images)
if [ "$ARCH" = "x86_64" ]; then
CUDA_ARCH="x86_64"
elif [ "$ARCH" = "aarch64" ]; then
CUDA_ARCH="sbsa"
else
echo "Unsupported architecture: $ARCH"
exit 1
fi
echo "==================================="
echo "System Information:"
echo "==================================="
echo "Ubuntu Version: ${UBUNTU_VERSION}"
echo "Architecture: ${ARCH}"
echo "CUDA Architecture: ${CUDA_ARCH}"
echo ""
# Install CUDA keyring (only need to do this once)
echo "==================================="
echo "Installing CUDA keyring..."
echo "==================================="
KEYRING_URL="https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/${CUDA_ARCH}/cuda-keyring_1.1-1_all.deb"
wget -q $KEYRING_URL -O cuda-keyring.deb
sudo dpkg -i cuda-keyring.deb
sudo apt-get update
rm cuda-keyring.deb
echo "CUDA keyring installed successfully!"
echo ""
# Split CUDA versions and install each one
IFS=',' read -ra CUDA_VERSION_ARRAY <<< "$CUDA_VERSIONS"
echo "==================================="
echo "Installing CUDA Toolkits..."
echo "==================================="
echo "Versions to install: ${CUDA_VERSIONS}"
echo ""
for CUDA_VERSION in "${CUDA_VERSION_ARRAY[@]}"; do
# Trim whitespace
CUDA_VERSION=$(echo "$CUDA_VERSION" | xargs)
echo "-----------------------------------"
echo "Installing CUDA Toolkit ${CUDA_VERSION}..."
echo "-----------------------------------"
if sudo apt-get install -y cuda-toolkit-${CUDA_VERSION}; then
echo "✓ CUDA Toolkit ${CUDA_VERSION} installed successfully!"
else
echo "✗ Failed to install CUDA Toolkit ${CUDA_VERSION}"
echo " This might be due to an invalid version or repository issue"
fi
echo ""
done
echo "==================================="
echo "Installation Summary"
echo "==================================="
echo "Installed CUDA versions:"
ls -d /usr/local/cuda-* 2>/dev/null || echo "No CUDA installations found in /usr/local/"
echo ""
echo "Setup complete!"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment