setup.py 4.01 KB
Newer Older
1
2
import subprocess
from typing import List, Set
3

4
from packaging.version import parse, Version
Woosuk Kwon's avatar
Woosuk Kwon committed
5
import setuptools
Woosuk Kwon's avatar
Woosuk Kwon committed
6
import torch
7
8
9
from torch.utils.cpp_extension import BuildExtension, CUDAExtension
from torch.utils.cpp_extension import CUDA_HOME

10
11
# Compiler flags.
CXX_FLAGS = ["-g", "-O2"]
12
13
# TODO(woosuk): Should we use -O3?
NVCC_FLAGS = ["-O2"]
Woosuk Kwon's avatar
Woosuk Kwon committed
14

15

Woosuk Kwon's avatar
Woosuk Kwon committed
16
17
if not torch.cuda.is_available():
    raise RuntimeError(
18
19
        f"Cannot find CUDA at CUDA_HOME: {CUDA_HOME}. "
        "CUDA must be available in order to build the package.")
Woosuk Kwon's avatar
Woosuk Kwon committed
20

21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60

def get_nvcc_cuda_version(cuda_dir: str) -> Version:
    """Get the CUDA version from nvcc.

    Adapted from https://github.com/NVIDIA/apex/blob/8b7a1ff183741dd8f9b87e7bafd04cfde99cea28/setup.py
    """
    nvcc_output = subprocess.check_output([cuda_dir + "/bin/nvcc", "-V"],
                                          universal_newlines=True)
    output = nvcc_output.split()
    release_idx = output.index("release") + 1
    nvcc_cuda_version = parse(output[release_idx].split(",")[0])
    return nvcc_cuda_version


# Collect the compute capabilities of all available GPUs.
device_count = torch.cuda.device_count()
compute_capabilities: Set[int] = set()
for i in range(device_count):
    major, minor = torch.cuda.get_device_capability(i)
    if major < 7:
        raise RuntimeError(
            "GPUs with compute capability less than 7.0 are not supported.")
    compute_capabilities.add(major * 10 + minor)
# If no GPU is available, add all supported compute capabilities.
if not compute_capabilities:
    compute_capabilities = {70, 75, 80, 86, 90}
# Add target compute capabilities to NVCC flags.
for capability in compute_capabilities:
    NVCC_FLAGS += ["-gencode", f"arch=compute_{capability},code=sm_{capability}"]

# Validate the NVCC CUDA version.
nvcc_cuda_version = get_nvcc_cuda_version(CUDA_HOME)
if nvcc_cuda_version < Version("11.0"):
    raise RuntimeError("CUDA 11.0 or higher is required to build the package.")
if 86 in compute_capabilities and nvcc_cuda_version < Version("11.1"):
    raise RuntimeError(
        "CUDA 11.1 or higher is required for GPUs with compute capability 8.6.")
if 90 in compute_capabilities and nvcc_cuda_version < Version("11.8"):
    raise RuntimeError(
        "CUDA 11.8 or higher is required for GPUs with compute capability 9.0.")
Woosuk Kwon's avatar
Woosuk Kwon committed
61
62
63
64

ext_modules = []

# Cache operations.
65
66
67
68
cache_extension = CUDAExtension(
    name="cacheflow.cache_ops",
    sources=["csrc/cache.cpp", "csrc/cache_kernels.cu"],
    extra_compile_args={"cxx": CXX_FLAGS, "nvcc": NVCC_FLAGS},
Woosuk Kwon's avatar
Woosuk Kwon committed
69
70
71
)
ext_modules.append(cache_extension)

72
# Attention kernels.
73
74
75
76
attention_extension = CUDAExtension(
    name="cacheflow.attention_ops",
    sources=["csrc/attention.cpp", "csrc/attention/attention_kernels.cu"],
    extra_compile_args={"cxx": CXX_FLAGS, "nvcc": NVCC_FLAGS},
77
78
79
)
ext_modules.append(attention_extension)

Woosuk Kwon's avatar
Woosuk Kwon committed
80
# Positional encoding kernels.
81
82
83
84
positional_encoding_extension = CUDAExtension(
    name="cacheflow.pos_encoding_ops",
    sources=["csrc/pos_encoding.cpp", "csrc/pos_encoding_kernels.cu"],
    extra_compile_args={"cxx": CXX_FLAGS, "nvcc": NVCC_FLAGS},
85
86
87
)
ext_modules.append(positional_encoding_extension)

88
# Layer normalization kernels.
89
90
91
92
layernorm_extension = CUDAExtension(
    name="cacheflow.layernorm_ops",
    sources=["csrc/layernorm.cpp", "csrc/layernorm_kernels.cu"],
    extra_compile_args={"cxx": CXX_FLAGS, "nvcc": NVCC_FLAGS},
93
94
95
)
ext_modules.append(layernorm_extension)

Woosuk Kwon's avatar
Woosuk Kwon committed
96
# Activation kernels.
97
98
99
100
activation_extension = CUDAExtension(
    name="cacheflow.activation_ops",
    sources=["csrc/activation.cpp", "csrc/activation_kernels.cu"],
    extra_compile_args={"cxx": CXX_FLAGS, "nvcc": NVCC_FLAGS},
Woosuk Kwon's avatar
Woosuk Kwon committed
101
102
103
)
ext_modules.append(activation_extension)

104
105
106
107
108
109
110
111

def get_requirements() -> List[str]:
    """Get Python package dependencies from requirements.txt."""
    with open("requirements.txt") as f:
        requirements = f.read().strip().split("\n")
    return requirements


Woosuk Kwon's avatar
Woosuk Kwon committed
112
setuptools.setup(
113
114
115
    name="cacheflow",
    python_requires=">=3.8",
    install_requires=get_requirements(),
Woosuk Kwon's avatar
Woosuk Kwon committed
116
    ext_modules=ext_modules,
117
    cmdclass={"build_ext": BuildExtension},
Woosuk Kwon's avatar
Woosuk Kwon committed
118
)