CMakeLists.txt 1.76 KB
Newer Older
1
2
3
cmake_minimum_required(VERSION 3.18)
project(sgl-kernel LANGUAGES CXX CUDA)

4
# Basic settings
5
6
7
8
9
10
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CUDA_STANDARD 17)
set(CMAKE_CUDA_STANDARD_REQUIRED ON)

11
set(CUTLASS_DIR "3rdparty/cutlass")
12
set(CUB_DIR "3rdparty/cub")
13

14
15
16
17
18
# Set CUDA architectures
set(CMAKE_CUDA_ARCHITECTURES "75;80;86;89;90")
message(STATUS "Building for CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")

find_package(Python3 COMPONENTS Interpreter Development REQUIRED)
19

20
# Find PyTorch
21
execute_process(
22
    COMMAND ${Python3_EXECUTABLE} -c "import torch; print(torch.utils.cmake_prefix_path)"
23
24
25
26
27
28
29
    OUTPUT_VARIABLE TORCH_CMAKE_PATH
    OUTPUT_STRIP_TRAILING_WHITESPACE
)
list(APPEND CMAKE_PREFIX_PATH "${TORCH_CMAKE_PATH}")

find_package(Torch REQUIRED)

30
# Warp Reduce library
Ke Bao's avatar
Ke Bao committed
31
add_library(_kernels SHARED
32
33
    src/sgl-kernel/csrc/trt_reduce_internal.cu
    src/sgl-kernel/csrc/trt_reduce_kernel.cu
Ke Bao's avatar
Ke Bao committed
34
    src/sgl-kernel/csrc/moe_align_kernel.cu
Ke Bao's avatar
Ke Bao committed
35
    src/sgl-kernel/csrc/int8_gemm_kernel.cu
36
    src/sgl-kernel/csrc/sampling_scaling_penalties.cu
Ke Bao's avatar
Ke Bao committed
37
    src/sgl-kernel/csrc/sgl_kernel_ops.cu
38
)
39

Ke Bao's avatar
Ke Bao committed
40
target_include_directories(_kernels
41
42
43
44
    PRIVATE
        ${CMAKE_CURRENT_SOURCE_DIR}/src/sgl-kernel/csrc
        ${CUDA_INCLUDE_DIRS}
        ${TORCH_INCLUDE_DIRS}
45
46
        ${CUTLASS_DIR}/include
        ${CUTLASS_DIR}/tools/util/include
47
        ${CUB_DIR}/cub
48
49
)

Ke Bao's avatar
Ke Bao committed
50
target_link_libraries(_kernels
51
52
53
54
55
56
    PRIVATE
        ${TORCH_LIBRARIES}
        Python3::Python
)

# Set common properties for both libraries
Ke Bao's avatar
Ke Bao committed
57
foreach(target _kernels)
58
59
60
61
62
63
64
65
    set_target_properties(${target} PROPERTIES
        CUDA_SEPARABLE_COMPILATION ON
        POSITION_INDEPENDENT_CODE ON
        CUDA_RESOLVE_DEVICE_SYMBOLS ON
        PREFIX ""
        SUFFIX ".so"
    )
endforeach()