flashinfer-build.sh 2.02 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
#!/usr/bin/env bash
# This script is used to build FlashInfer wheels with AOT kernels

set -ex

# FlashInfer configuration
FLASHINFER_GIT_REPO="https://github.com/flashinfer-ai/flashinfer.git"
BUILD_WHEEL="${BUILD_WHEEL:-true}"

if [[ -z "${FLASHINFER_GIT_REF}" ]]; then
    echo "❌ FLASHINFER_GIT_REF must be specified" >&2
    exit 1
fi

if [[ -z "${CUDA_VERSION}" ]]; then
    echo "❌ CUDA_VERSION must be specified" >&2
    exit 1
fi

echo "🏗️  Building FlashInfer ${FLASHINFER_GIT_REF} for CUDA ${CUDA_VERSION}"

# Clone FlashInfer
git clone --depth 1 --recursive --shallow-submodules \
24
    --branch "${FLASHINFER_GIT_REF}" \
25
26
27
28
29
30
31
32
    ${FLASHINFER_GIT_REPO} flashinfer

# Set CUDA arch list based on CUDA version
# Exclude CUDA arches for older versions (11.x and 12.0-12.7)
if [[ "${CUDA_VERSION}" == 11.* ]]; then
    FI_TORCH_CUDA_ARCH_LIST="7.5 8.0 8.9"
elif [[ "${CUDA_VERSION}" == 12.[0-7]* ]]; then
    FI_TORCH_CUDA_ARCH_LIST="7.5 8.0 8.9 9.0a"
33
34
35
elif [[ "${CUDA_VERSION}" == 12.[8-9]* ]]; then
    # CUDA 12.8–12.9
    FI_TORCH_CUDA_ARCH_LIST="7.5 8.0 8.9 9.0a 10.0a 10.3a 12.0"
36
else
37
38
    # CUDA 13.0+
    FI_TORCH_CUDA_ARCH_LIST="7.5 8.0 8.9 9.0a 10.0f 12.0"
39
40
41
42
43
44
fi

echo "🏗️ Building FlashInfer AOT for arches: ${FI_TORCH_CUDA_ARCH_LIST}"

pushd flashinfer
    # Make sure the wheel is built for the correct CUDA version
45
    export UV_TORCH_BACKEND=cu$(echo "$CUDA_VERSION" | cut -d. -f1,2 | tr -d '.')
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63

    # Build AOT kernels
    export TORCH_CUDA_ARCH_LIST="${FI_TORCH_CUDA_ARCH_LIST}"
    export FLASHINFER_CUDA_ARCH_LIST="${FI_TORCH_CUDA_ARCH_LIST}"
    python3 -m flashinfer.aot
    
    if [[ "${BUILD_WHEEL}" == "true" ]]; then
        # Build wheel for distribution
        uv build --no-build-isolation --wheel --out-dir ../flashinfer-dist .
        echo "✅ FlashInfer wheel built successfully in flashinfer-dist/"
    else
        # Install directly (for Dockerfile)
        uv pip install --system --no-build-isolation --force-reinstall .
        echo "✅ FlashInfer installed successfully"
    fi
popd

# Cleanup
64
rm -rf flashinfer