CMakeLists.txt 14.2 KB
Newer Older
James Wyatt's avatar
James Wyatt committed
1
2
3
4
# This CMake config hopefully makes it easier to compile.
# Ensure the CUDA Toolkit is available on your path. Then run:
#   For  GCC: `cmake -B build . && cmake --build build`
#   For MSVC: `cmake -B build . && cmake --build build --config Release`
5
6
# You can also use the following options and variables
#  - COMPUTE_BACKEND: Set to `cpu`, `cuda`, or `mps` to select the backend
James Wyatt's avatar
James Wyatt committed
7
8
9
#  - CUDA_VERSION: The expected CUDA version, for sanity checking. The actual version
#                  is whatever CMake finds on your path.
#  - COMPUTE_CAPABILITY: Which GPU Arch/Compute codes to provide to NVCC.
Johnny's avatar
Johnny committed
10
#                        Separate by semicolons, i.e. `-DCOMPUTE_CAPABILITY=89;90;100;120`
James Wyatt's avatar
James Wyatt committed
11
12
#                        Check your compute capability here: https://developer.nvidia.com/cuda-gpus
#  - PTXAS_VERBOSE: Pass the `-v` option to the PTX Assembler
13
cmake_minimum_required(VERSION 3.22.1)
James Wyatt's avatar
James Wyatt committed
14

15
project(bitsandbytes LANGUAGES CXX)
James Wyatt's avatar
James Wyatt committed
16

17
18
19
20
21
22
23
24
# If run without specifying a build type, default to using the Release configuration:
#    optimizing the generated binaries for performance and also adds the `-DNDEBUG` flag,
#    which turns off a bunch of asserts which seem to link to new symbols in libstdc++,
#    worsening our many_linux compliance..
if(NOT CMAKE_BUILD_TYPE)
    set(CMAKE_BUILD_TYPE Release)
endif()

25
26
27
# Define included source files
set(CPP_FILES csrc/common.cpp csrc/cpu_ops.cpp csrc/pythonInterface.cpp)
set(CUDA_FILES csrc/ops.cu csrc/kernels.cu)
28
set(HIP_FILES csrc/ops.hip csrc/kernels.hip)
29
30
set(MPS_FILES csrc/mps_ops.mm)
set(METAL_FILES csrc/mps_kernels.metal)
31
set(XPU_FILES csrc/xpu_ops.cpp csrc/xpu_kernels.cpp)
32
set(CMAKE_CXX_COMPILER "nvcc")
33
# C++ sources are always included
James Wyatt's avatar
James Wyatt committed
34
35
list(APPEND SRC_FILES ${CPP_FILES})

36
37
set(COMPUTE_BACKEND "cuda" CACHE STRING "The compute backend to use (cpu, cuda, hip, mps, xpu)")
#set(COMPUTE_BACKEND "cpu" CACHE STRING "The compute backend to use (cpu, cuda, hip, mps, xpu)")
38
set_property(CACHE COMPUTE_BACKEND PROPERTY STRINGS cpu cuda hip mps xpu)
39
40
41
42
43
option(PTXAS_VERBOSE "Pass through -v flag to PTX Assembler" OFF)

if(APPLE)
  set(CMAKE_OSX_DEPLOYMENT_TARGET 13.1)
endif()
James Wyatt's avatar
James Wyatt committed
44
45
46

set(BNB_OUTPUT_NAME "bitsandbytes")

47
message(STATUS "Configuring ${PROJECT_NAME} (Backend: ${COMPUTE_BACKEND})")
48
49
50
51
52
53

if(${COMPUTE_BACKEND} STREQUAL "cuda")
    if(APPLE)
        message(FATAL_ERROR "CUDA is not supported on macOS" )
    endif()
    set(BUILD_CUDA ON)
54
55
56
57
58
59
60
61
    set(BUILD_HIP OFF)
    set(BUILD_MPS OFF)
elseif(${COMPUTE_BACKEND} STREQUAL "hip")
    if(APPLE)
        message(FATAL_ERROR "HIP is not supported on macOS" )
    endif()
    set(BUILD_CUDA OFF)
    set(BUILD_HIP ON)
62
63
64
65
66
67
    set(BUILD_MPS OFF)
elseif(${COMPUTE_BACKEND} STREQUAL "mps")
    if(NOT APPLE)
        message(FATAL_ERROR "MPS is only supported on macOS" )
    endif()
    set(BUILD_CUDA OFF)
68
    set(BUILD_HIP OFF)
69
    set(BUILD_MPS ON)
70
71
72
73
74
elseif(${COMPUTE_BACKEND} STREQUAL "xpu")
    if(APPLE)
        message(FATAL_ERROR "XPU is not supported on macOS" )
    endif()
    set(BUILD_CUDA OFF)
75
    set(BUILD_HIP OFF)
76
77
    set(BUILD_MPS OFF)
    set(BUILD_XPU ON)
78
79
else()
    set(BUILD_CUDA OFF)
80
    set(BUILD_HIP OFF)
81
    set(BUILD_MPS OFF)
82
    set(BUILD_XPU OFF)
83
84
85
endif()


James Wyatt's avatar
James Wyatt committed
86
if(BUILD_CUDA)
87
88
89
90
91
    # NVCC normally will only work with MSVC up to 1939. VS2022 17.10+ starts using versions 1940+.
    # Workaround: use --allow-unsupported-compiler
    # This needs to be added *before* we try to enable the CUDA language so CMake's compiler check passes.
    if(MSVC AND MSVC_VERSION VERSION_GREATER_EQUAL 1940)
        string(APPEND CMAKE_CUDA_FLAGS " --allow-unsupported-compiler")
92
93
94
95
96

        # This is needed to build with VS2022 17.11+ and CUDA < 12.4.
        if (MSVC_VERSION VERSION_GREATER_EQUAL 1941)
            string(APPEND CMAKE_CUDA_FLAGS " -D_ALLOW_COMPILER_AND_STL_VERSION_MISMATCH")
        endif()
97
98
    endif()

James Wyatt's avatar
James Wyatt committed
99
    enable_language(CUDA) # This will fail if CUDA is not found
100
    find_package(CUDAToolkit REQUIRED)
James Wyatt's avatar
James Wyatt committed
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118

    # Convert the CUDA version from X.Y.z to XY. There's probably a shorter way of doing this
    string(REGEX MATCH "^[0-9]+.[0-9]+" _CUDA_VERSION_FIRST_TWO "${CMAKE_CUDA_COMPILER_VERSION}")
    string(REPLACE "." "" CUDA_VERSION_SHORT "${_CUDA_VERSION_FIRST_TWO}")

    # Expose a cache variable that the user can set to ensure the correct version of CUDA is found
    set(CUDA_VERSION "${CUDA_VERSION_SHORT}" CACHE STRING "Expected CUDA Version Shortcode")

    message(STATUS "CUDA Version: ${CUDA_VERSION_SHORT} (${CMAKE_CUDA_COMPILER_VERSION})")
    message(STATUS "CUDA Compiler: ${CMAKE_CUDA_COMPILER}")

    # It should match the discovered version
    if(NOT CUDA_VERSION STREQUAL "${CUDA_VERSION_SHORT}")
        message(FATAL_ERROR "You've specified CUDA version ${CUDA_VERSION} however the CUDA compiler found is ${CUDA_VERSION_SHORT}."
            " Ensure the desired CUDA compiler is the first one available on your PATH."
        )
    endif()

119
120
121
122
    if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS "11.8")
        message(FATAL_ERROR "CUDA Version < 11.8 is not supported")
    elseif(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "14.0")
        message(FATAL_ERROR "CUDA Version > 13 is not supported")
James Wyatt's avatar
James Wyatt committed
123
124
    endif()

125
126
127
128
    # CMake < 3.23.0 does not define CMAKE_CUDA_ARCHITECTURES_ALL.
    if(CMAKE_VERSION VERSION_LESS "3.23.0")
        message(STATUS "CMake < 3.23.0; determining CUDA architectures supported...")

129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
        if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "13.0")
            # Starting in CUDA 13.0, Thor Blackwell is renamed to SM110.
            # Support for architectures older than Turing (SM75) is removed.
            list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL 75 80 86 87 88 89 90 100 103 110 120 121)
            list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR 80 90 100 110 120)
        else()
            # 11.8-12.9 supports these at a minimum.
            set(CMAKE_CUDA_ARCHITECTURES_ALL 50 52 53 60 61 62 70 72 75 80 86 87 89 90)
            set(CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR 50 60 70 80 90)

            # CUDA 12.8 adds support for Blackwell.
            if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "12.8")
                list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL 100 101 120 121)
                list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR 100 120)
            endif()

            # CUDA 12.9 adds SM103 (Blackwell B300).
            if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "12.9")
                list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL 103)
            endif()
Johnny's avatar
Johnny committed
149
        endif()
150
151
    endif()

James Wyatt's avatar
James Wyatt committed
152
    string(APPEND CMAKE_CUDA_FLAGS " --use_fast_math")
153

James Wyatt's avatar
James Wyatt committed
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
    if(PTXAS_VERBOSE)
        # Verbose? Outputs register usage information, and other things...
        string(APPEND CMAKE_CUDA_FLAGS " -Xptxas=-v")
    endif()

    foreach(capability ${CMAKE_CUDA_ARCHITECTURES_ALL})
        # Most of the items here are like: `xx-real`, so we just extract the `xx` portion
        string(REGEX MATCH "[0-9]+" capability_id "${capability}")
        if(capability_id GREATER 0)
            list(APPEND POSSIBLE_CAPABILITIES ${capability_id})
        endif()
    endforeach()

    # This can be changed via -D argument to CMake
    # By default all possible capabilities are compiled
    set(COMPUTE_CAPABILITY "${POSSIBLE_CAPABILITIES}" CACHE STRING "Compute Capabilities Targeted")

    message(STATUS "CUDA Capabilities Available: ${POSSIBLE_CAPABILITIES}")
    message(STATUS "CUDA Capabilities  Selected: ${COMPUTE_CAPABILITY}")

174
175
176
177
178
179
180
181
182
183
184
185
    # Use the "real" option to build native cubin for all selections.
    # Ensure we build the PTX for the latest version.
    # This behavior of adding a PTX (virtual) target for the highest architecture
    # is similar to how the "all" and "all-major" options would behave in CMake >= 3.23.
    # TODO: Consider bumping CMake requirement and using CMAKE_CUDA_ARCHITECTURES=[all | native] by default
    list(REMOVE_DUPLICATES COMPUTE_CAPABILITY)
    list(SORT COMPUTE_CAPABILITY COMPARE NATURAL)
    list(POP_BACK COMPUTE_CAPABILITY _LATEST_CAPABILITY)
    list(TRANSFORM COMPUTE_CAPABILITY APPEND "-real" OUTPUT_VARIABLE CMAKE_CUDA_ARCHITECTURES)
    list(APPEND CMAKE_CUDA_ARCHITECTURES ${_LATEST_CAPABILITY})

    message(STATUS "CUDA Targets: ${CMAKE_CUDA_ARCHITECTURES}")
James Wyatt's avatar
James Wyatt committed
186
187
188
189
190
    message(STATUS "CUDA NVCC Flags: ${CMAKE_CUDA_FLAGS}")

    list(APPEND SRC_FILES ${CUDA_FILES})

    string(APPEND BNB_OUTPUT_NAME "_cuda${CUDA_VERSION_SHORT}")
191
    add_compile_definitions(BUILD_CUDA)
192
elseif(BUILD_HIP)
193
194
    #enable_language(HIP)
    set(amd_comgr_DIR "/opt/dtk/lib64/cmake/amd_comgr")
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
    message(STATUS "HIP Compiler: ${CMAKE_HIP_COMPILER}")
    if(DEFINED BNB_ROCM_ARCH)
      set(CMAKE_HIP_ARCHITECTURES ${BNB_ROCM_ARCH})
    else()
      if (NOT AMDGPU_TARGETS AND NOT CMAKE_HIP_ARCHITECTURES)
        set(CMAKE_HIP_ARCHITECTURES "gfx90a;gfx942;gfx1100")
      elseif (AMDGPU_TARGETS AND NOT CMAKE_HIP_ARCHITECTURES)
        set(CMAKE_HIP_ARCHITECTURES ${AMDGPU_TARGETS})
      endif()
    endif()
    message(STATUS "HIP Targets: ${CMAKE_HIP_ARCHITECTURES}")

    list(APPEND SRC_FILES ${HIP_FILES})

    string(APPEND BNB_OUTPUT_NAME "_rocm")

    # get hip version
    execute_process(COMMAND hipconfig --version OUTPUT_VARIABLE HIP_CONFIG_VERSION)
    string(REGEX MATCH "[0-9]+\\.[0-9]+" HIP_VERSION "${HIP_CONFIG_VERSION}")
    string(REPLACE "." "" HIP_VERSION_SHORT "${HIP_VERSION}")

    string(APPEND BNB_OUTPUT_NAME "${HIP_VERSION_SHORT}")
    add_compile_definitions(__HIP_PLATFORM_AMD__)
    add_compile_definitions(__HIP_PLATFORM_HCC__)
    add_compile_definitions(BUILD_HIP)
220
221
222
elseif(BUILD_MPS)
    if(NOT APPLE)
        message(FATAL_ERROR "MPS is only supported on macOS" )
James Wyatt's avatar
James Wyatt committed
223
    endif()
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238

    enable_language(OBJCXX)

    list(APPEND SRC_FILES ${MPS_FILES})

    string(APPEND BNB_OUTPUT_NAME "_mps")
    add_compile_definitions(BUILD_MPS)
    file(MAKE_DIRECTORY "build")
    add_custom_command(OUTPUT "bitsandbytes/bitsandbytes.metallib"
                COMMAND xcrun metal -c -o "build/bitsandbytes.air" ${METAL_FILES}
                COMMAND xcrun metallib "build/bitsandbytes.air" -o "bitsandbytes/bitsandbytes.metallib"
                DEPENDS "${METAL_FILES}"
                COMMENT "Compiling Metal kernels"
                VERBATIM)
    add_custom_target(metallib DEPENDS "bitsandbytes/bitsandbytes.metallib")
239
240
241
242
243
244
245
246
247
elseif(BUILD_XPU)
    list(APPEND SRC_FILES ${XPU_FILES})
    string(APPEND BNB_OUTPUT_NAME "_xpu")
    add_compile_definitions(BUILD_XPU)
    set(CMAKE_C_COMPILER icx)
    set(CMAKE_CXX_COMPILER icpx)
    if(WIN32)
        set(CMAKE_CXX_COMPILER icx)
    endif()
248
else()
249
    string(APPEND BNB_OUTPUT_NAME "_cpu")
250
251
252
253
254
255
256
257
258
259
260
    set(GPU_SOURCES)
endif()


if(WIN32)
    # Export all symbols
    set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
endif()

if(MSVC)
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2 /fp:fast")
James Wyatt's avatar
James Wyatt committed
261
262
263
264
endif()

set_source_files_properties(${CPP_FILES} PROPERTIES LANGUAGE CXX)
add_library(bitsandbytes SHARED ${SRC_FILES})
265
target_compile_features(bitsandbytes PUBLIC cxx_std_17)
266
target_include_directories(bitsandbytes PUBLIC csrc include)
James Wyatt's avatar
James Wyatt committed
267
268
269


if(BUILD_CUDA)
270
    target_include_directories(bitsandbytes PUBLIC ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
271
    target_link_libraries(bitsandbytes PUBLIC CUDA::cudart CUDA::cublas CUDA::cublasLt CUDA::cusparse)
James Wyatt's avatar
James Wyatt committed
272
273
274
275
276
    set_target_properties(bitsandbytes
        PROPERTIES
            CUDA_SEPARABLE_COMPILATION ON
    )
endif()
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
if(BUILD_HIP)
    if(NOT DEFINED ENV{ROCM_PATH})
      set(ROCM_PATH /opt/rocm)
    else()
      set(ROCM_PATH $ENV{ROCM_PATH})
    endif()
    list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH})
    macro(find_package_and_print_version PACKAGE_NAME)
      find_package("${PACKAGE_NAME}" ${ARGN})
      message("${PACKAGE_NAME} VERSION: ${${PACKAGE_NAME}_VERSION}")
    endmacro()
    find_package_and_print_version(hipblas REQUIRED)
    find_package_and_print_version(hiprand REQUIRED)
    find_package_and_print_version(hipsparse REQUIRED)

    ## hacky way of excluding hip::amdhip64 (with it linked many tests unexpectedly fail e.g. adam8bit because of inaccuracies)
293
294
    #set_target_properties(hip::host PROPERTIES INTERFACE_LINK_LIBRARIES "")
    #set_target_properties(hip-lang::host PROPERTIES INTERFACE_LINK_LIBRARIES "")
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
    set(CMAKE_HIP_IMPLICIT_LINK_LIBRARIES "")

    target_include_directories(bitsandbytes PRIVATE ${CMAKE_SOURCE_DIR} ${CMAKE_SOURCE_DIR}/include ${ROCM_PATH}/include /include)
    target_link_directories(bitsandbytes PRIVATE ${ROCM_PATH}/lib /lib)
    target_link_libraries(bitsandbytes PUBLIC roc::hipblas hip::hiprand roc::hipsparse)

    target_compile_definitions(bitsandbytes PUBLIC BNB_USE_HIP)
    set_source_files_properties(${HIP_FILES} PROPERTIES LANGUAGE HIP)
    set_target_properties(bitsandbytes PROPERTIES LINKER_LANGUAGE CXX)

    if(HIP_VERSION VERSION_LESS "6.1")
	target_compile_definitions(bitsandbytes PUBLIC NO_HIPBLASLT)
    else()
	find_package(hipblaslt)
        target_link_libraries(bitsandbytes PUBLIC roc::hipblaslt)
    endif()
endif()
312
313
314
315
if(BUILD_MPS)
    add_dependencies(bitsandbytes metallib)
    target_link_libraries(bitsandbytes objc "-framework Foundation" "-framework Metal" "-framework MetalPerformanceShaders" "-framework MetalPerformanceShadersGraph")
endif()
316
317
318
319
320
321
322
323
324
if(BUILD_XPU)
    set(SYCL_LINK_FLAGS "-fsycl;--offload-compress;-fsycl-targets=spir64_gen,spir64;-Xs;-device pvc,xe-lpg,ats-m150 -options ' -cl-intel-enable-auto-large-GRF-mode -cl-poison-unsupported-fp64-kernels -cl-intel-greater-than-4GB-buffer-required'")
    set(SYCL_COMPILE_FLAGS "-fsycl;-fhonor-nans;-fhonor-infinities;-fno-associative-math;-fno-approx-func;-fno-sycl-instrument-device-code;--offload-compress;-fsycl-targets=spir64_gen,spir64;")

    set_property(TARGET bitsandbytes PROPERTY CXX_STANDARD 20)
    target_compile_options(bitsandbytes PRIVATE ${SYCL_COMPILE_FLAGS})
    target_link_options(bitsandbytes PRIVATE ${SYCL_LINK_FLAGS})

endif()
James Wyatt's avatar
James Wyatt committed
325
326
327
328

if(WIN32)
    set_target_properties(bitsandbytes PROPERTIES PREFIX "lib")
endif()
329
330
set_target_properties(bitsandbytes PROPERTIES OUTPUT_NAME ${BNB_OUTPUT_NAME})
if(MSVC)
331
332
333
334
    set_target_properties(bitsandbytes PROPERTIES LIBRARY_OUTPUT_DIRECTORY_RELEASE "${PROJECT_SOURCE_DIR}/bitsandbytes")
    set_target_properties(bitsandbytes PROPERTIES LIBRARY_OUTPUT_DIRECTORY_DEBUG "${PROJECT_SOURCE_DIR}/bitsandbytes")
    set_target_properties(bitsandbytes PROPERTIES RUNTIME_OUTPUT_DIRECTORY_RELEASE "${PROJECT_SOURCE_DIR}/bitsandbytes")
    set_target_properties(bitsandbytes PROPERTIES RUNTIME_OUTPUT_DIRECTORY_DEBUG "${PROJECT_SOURCE_DIR}/bitsandbytes")
335
endif()
James Wyatt's avatar
James Wyatt committed
336

337
set_target_properties(bitsandbytes PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${PROJECT_SOURCE_DIR}/bitsandbytes")