build.sh 9.13 KB
Newer Older
1
2
#!/bin/bash
set -ex
3

4
5
6
PYTHON_VERSION=$1
CUDA_VERSION=$2
PYTHON_ROOT_PATH=/opt/python/cp${PYTHON_VERSION//.}-cp${PYTHON_VERSION//.}
7

8
9
10
11
12
13
if [ -z "$3" ]; then
   ARCH=$(uname -i)
else
   ARCH=$3
fi

14
echo "ARCH:  $ARCH"
15
16
17
18
19
if [ ${ARCH} = "aarch64" ]; then
   LIBCUDA_ARCH="sbsa"
   BUILDER_NAME="pytorch/manylinuxaarch64-builder"
else
   LIBCUDA_ARCH=${ARCH}
20
   BUILDER_NAME="pytorch/manylinux2_28-builder"
21
fi
22

Johnny's avatar
Johnny committed
23
24
25
26
if [ ${CUDA_VERSION} = "13.0" ]; then
   DOCKER_IMAGE="${BUILDER_NAME}:cuda${CUDA_VERSION}"
   TORCH_INSTALL="pip install --no-cache-dir torch==2.9.0 --index-url https://download.pytorch.org/whl/cu130"
elif [ ${CUDA_VERSION} = "12.9" ]; then
27
   DOCKER_IMAGE="${BUILDER_NAME}:cuda${CUDA_VERSION}"
28
   TORCH_INSTALL="pip install --no-cache-dir torch==2.8.0 --index-url https://download.pytorch.org/whl/cu129"
29
elif [ ${CUDA_VERSION} = "12.8" ]; then
30
   DOCKER_IMAGE="${BUILDER_NAME}:cuda${CUDA_VERSION}"
31
   TORCH_INSTALL="pip install --no-cache-dir torch==2.8.0 --index-url https://download.pytorch.org/whl/cu128"
32
else
33
   DOCKER_IMAGE="${BUILDER_NAME}:cuda${CUDA_VERSION}"
34
   TORCH_INSTALL="pip install --no-cache-dir torch==2.8.0 --index-url https://download.pytorch.org/whl/cu126"
35
36
fi

37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# Create cache directories for persistent build artifacts in home directory
# Using home directory to persist across workspace cleanups/checkouts
CACHE_DIR="${HOME}/.cache/sgl-kernel"
CMAKE_DOWNLOAD_CACHE="${CACHE_DIR}/cmake-downloads"
CCACHE_DIR="${CACHE_DIR}/ccache"

mkdir -p "${CMAKE_DOWNLOAD_CACHE}"
mkdir -p "${CCACHE_DIR}"

echo "==================================="
echo "Cache Configuration"
echo "==================================="
echo "CMake download cache: ${CMAKE_DOWNLOAD_CACHE}"
echo "ccache directory: ${CCACHE_DIR}"
echo ""

53
docker run --rm \
54
   -v $(pwd):/sgl-kernel \
55
56
57
58
   -v ${CMAKE_DOWNLOAD_CACHE}:/cmake-downloads \
   -v ${CCACHE_DIR}:/ccache \
   -e ENABLE_CMAKE_PROFILE="${ENABLE_CMAKE_PROFILE:-}" \
   -e ENABLE_BUILD_PROFILE="${ENABLE_BUILD_PROFILE:-}" \
59
60
   ${DOCKER_IMAGE} \
   bash -c "
61
62
63
64
65
   set -e
   # Install CMake (version >= 3.26) - Robust Installation with caching
   echo \"==================================\"
   echo \"Installing CMake\"
   echo \"==================================\"
66
67
   export CMAKE_VERSION_MAJOR=3.31
   export CMAKE_VERSION_MINOR=1
68
   # Setting these flags to reduce OOM chance only on ARM
69
   export CMAKE_BUILD_PARALLEL_LEVEL=$(( $(nproc)/3 < 48 ? $(nproc)/3 : 48 ))
70
71
72
73
74
   if [ \"${ARCH}\" = \"aarch64\" ]; then
      export CUDA_NVCC_FLAGS=\"-Xcudafe --threads=2\"
      export MAKEFLAGS='-j2'
      export CMAKE_BUILD_PARALLEL_LEVEL=2
      export NINJAFLAGS='-j2'
75
76
77
78
79
80
81
82
83
84
85
86
87
88
      echo \"ARM detected: Using extra conservative settings (2 parallel jobs)\"
   fi

   CMAKE_TARBALL=\"cmake-\${CMAKE_VERSION_MAJOR}.\${CMAKE_VERSION_MINOR}-linux-${ARCH}.tar.gz\"

   # Check if CMake is already cached
   if [ -f \"/cmake-downloads/\${CMAKE_TARBALL}\" ]; then
      echo \"Using cached CMake from /cmake-downloads/\${CMAKE_TARBALL}\"
      cp /cmake-downloads/\${CMAKE_TARBALL} .
   else
      echo \"Downloading CMake from: https://cmake.org/files/v\${CMAKE_VERSION_MAJOR}/\${CMAKE_TARBALL}\"
      wget https://cmake.org/files/v\${CMAKE_VERSION_MAJOR}/\${CMAKE_TARBALL}
      # Cache the downloaded file
      cp \${CMAKE_TARBALL} /cmake-downloads/
89
   fi
90
91

   tar -xzf \${CMAKE_TARBALL}
92
   mv cmake-\${CMAKE_VERSION_MAJOR}.\${CMAKE_VERSION_MINOR}-linux-${ARCH} /opt/cmake
93
   export PATH=/opt/cmake/bin:\$PATH
94
   export LD_LIBRARY_PATH=/lib64:\$LD_LIBRARY_PATH
95
96
97
98
99
100

   # Debugging CMake
   echo \"PATH: \$PATH\"
   which cmake
   cmake --version

101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
   echo \"==================================\"
   echo \"Installing and configuring ccache\"
   echo \"==================================\"

   # Install ccache 4.12.1 from source for CUDA support (yum provides old 3.7.7)
   echo \"Installing ccache 4.12.1 from source...\"

   # Install build dependencies
   yum install -y gcc gcc-c++ make wget tar

   # Download and build ccache 4.12.1
   cd /tmp
   wget -q https://github.com/ccache/ccache/releases/download/v4.12.1/ccache-4.12.1.tar.xz
   tar -xf ccache-4.12.1.tar.xz
   cd ccache-4.12.1

   # Build and install (uses already-installed CMake 3.31)
   mkdir build && cd build
   /opt/cmake/bin/cmake -D CMAKE_BUILD_TYPE=Release -D CMAKE_INSTALL_PREFIX=/usr .. >/dev/null
   make -j\$(nproc) >/dev/null
   make install >/dev/null

   # Verify installation
   ccache --version
   echo \"ccache 4.12.1 installed successfully\"
   cd /sgl-kernel

   # Configure ccache
   export CCACHE_DIR=/ccache
   export CCACHE_BASEDIR=/sgl-kernel
   export CCACHE_MAXSIZE=10G
   export CCACHE_COMPILERCHECK=content
   export CCACHE_COMPRESS=true
   export CCACHE_SLOPPINESS=file_macro,time_macros,include_file_mtime,include_file_ctime

   # Set up ccache as compiler launcher (don't use PATH to avoid -ccbin conflicts)
   export CMAKE_C_COMPILER_LAUNCHER=ccache
   export CMAKE_CXX_COMPILER_LAUNCHER=ccache
   export CMAKE_CUDA_COMPILER_LAUNCHER=ccache

   # Show ccache stats before build
   ccache -sV || true
   echo \"\"

145
   yum install numactl-devel -y && \
146
   yum install libibverbs -y --nogpgcheck && \
147
   ln -sv /usr/lib64/libibverbs.so.1 /usr/lib64/libibverbs.so && \
148
   ${PYTHON_ROOT_PATH}/bin/${TORCH_INSTALL} && \
149
   ${PYTHON_ROOT_PATH}/bin/pip install --no-cache-dir ninja setuptools==75.0.0 wheel==0.41.0 numpy uv scikit-build-core && \
Yineng Zhang's avatar
Yineng Zhang committed
150
   export TORCH_CUDA_ARCH_LIST='8.0 8.9 9.0+PTX' && \
151
   export CUDA_VERSION=${CUDA_VERSION} && \
152
   mkdir -p /usr/lib/${ARCH}-linux-gnu/ && \
153
   ln -s /usr/local/cuda-${CUDA_VERSION}/targets/${LIBCUDA_ARCH}-linux/lib/stubs/libcuda.so /usr/lib/${ARCH}-linux-gnu/libcuda.so && \
Johnny's avatar
Johnny committed
154
155
   export CPLUS_INCLUDE_PATH=/usr/local/cuda/include/cccl${CPLUS_INCLUDE_PATH:+:${CPLUS_INCLUDE_PATH}} && \
   export C_INCLUDE_PATH=/usr/local/cuda/include/cccl${C_INCLUDE_PATH:+:${C_INCLUDE_PATH}} && \
156

157
158
   cd /sgl-kernel && \
   ls -la ${PYTHON_ROOT_PATH}/lib/python${PYTHON_VERSION}/site-packages/wheel/ && \
159
160
161
162
163
164
165
166
167
168
169
170
171

   # Enable CMake profiling if requested
   if [ -n \"${ENABLE_CMAKE_PROFILE}\" ]; then
      echo \"CMake profiling enabled - will save to /sgl-kernel/cmake-profile.json\"
      export CMAKE_ARGS=\"--profiling-output=/sgl-kernel/cmake-profile.json --profiling-format=google-trace\"
   fi

   export NINJA_STATUS=\"[%f/%t %es] \"
   # Enable Ninja build profiling if requested
   if [ -n \"${ENABLE_BUILD_PROFILE}\" ]; then
      echo \"Ninja build profiling enabled - will save to /sgl-kernel/build-trace.json\"
   fi

172
   PYTHONPATH=${PYTHON_ROOT_PATH}/lib/python${PYTHON_VERSION}/site-packages ${PYTHON_ROOT_PATH}/bin/python -m uv build --wheel -Cbuild-dir=build . --color=always --no-build-isolation && \
173
   ./rename_wheels.sh
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231

   # Show profile location if profiling was enabled
   if [ -n \"${ENABLE_CMAKE_PROFILE}\" ] && [ -f /sgl-kernel/cmake-profile.json ]; then
      echo \"\"
      echo \"==================================\"
      echo \"CMake Profile Generated\"
      echo \"==================================\"
      echo \"Profile saved to: cmake-profile.json\"
      echo \"View in browser: chrome://tracing or edge://tracing\"
      echo \"\"
   fi

   # Generate Ninja build trace if profiling enabled
   if [ -n \"${ENABLE_BUILD_PROFILE}\" ] && [ -f /sgl-kernel/build/.ninja_log ]; then
      echo \"\"
      echo \"==================================\"
      echo \"Generating Ninja Build Trace\"
      echo \"==================================\"

      # Download ninjatracing script from GitHub (using PR #39 branch for ninja log v7 support)
      wget -q https://raw.githubusercontent.com/cradleapps/ninjatracing/084212eaf68f25c70579958a2ed67fb4ec2a9ca4/ninjatracing -O /tmp/ninjatracing || echo \"Note: Failed to download ninjatracing, skipping build trace\"

      # Convert .ninja_log to Chrome trace (JSON format)
      if [ -f /tmp/ninjatracing ]; then
         ${PYTHON_ROOT_PATH}/bin/python /tmp/ninjatracing /sgl-kernel/build/.ninja_log > /sgl-kernel/build-trace.json || true

         if [ -f /sgl-kernel/build-trace.json ]; then
            # Compress the trace for smaller file size and faster loading
            gzip -9 -k /sgl-kernel/build-trace.json 2>/dev/null || true

            echo \"Build trace saved to: build-trace.json\"
            if [ -f /sgl-kernel/build-trace.json.gz ]; then
               ORIGINAL_SIZE=\$(stat -f%z /sgl-kernel/build-trace.json 2>/dev/null || stat -c%s /sgl-kernel/build-trace.json)
               COMPRESSED_SIZE=\$(stat -f%z /sgl-kernel/build-trace.json.gz 2>/dev/null || stat -c%s /sgl-kernel/build-trace.json.gz)
               echo \"Compressed to: build-trace.json.gz (\${RATIO}% smaller)\"
            fi
            echo \"\"
            echo \"View in browser:\"
            echo \"  - chrome://tracing (load JSON file)\"
            echo \"  - ui.perfetto.dev (recommended, supports .gz files)\"
            echo \"\"
            echo \"Shows:\"
            echo \"  - Compilation time per file\"
            echo \"  - Parallelism utilization\"
            echo \"  - Critical path (longest dependency chain)\"
            echo \"  - Where the 2-hour build time went\"
         fi
      fi
      echo \"\"
   fi

   # Show ccache statistics after build
   echo \"\"
   echo \"==================================\"
   echo \"ccache Statistics\"
   echo \"==================================\"
   ccache -s
   echo \"\"
232
   "