CMakeLists.txt 5.19 KB
Newer Older
1
# Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
Przemek Tredak's avatar
Przemek Tredak committed
2
3
#
# See LICENSE for license information.
4

5
cmake_minimum_required(VERSION 3.21)
6
7
8
9
10
11
12
13
14
15
16

if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
  set(CMAKE_CUDA_ARCHITECTURES 70 80 89 90)
endif()

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CUDA_STANDARD 17)
set(CMAKE_CUDA_STANDARD_REQUIRED ON)

project(transformer_engine LANGUAGES CUDA CXX)

17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
set(BUILD_THREADS_PER_JOB $ENV{NVTE_BUILD_THREADS_PER_JOB})
if (NOT BUILD_THREADS_PER_JOB)
  set(BUILD_THREADS_PER_JOB 1)
endif()
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --threads ${BUILD_THREADS_PER_JOB}")

if(DEFINED ENV{MAX_JOBS})
  set(JOBS $ENV{MAX_JOBS})
elseif(DEFINED ENV{NVTE_BUILD_MAX_JOBS})
  set(JOBS $ENV{NVTE_BUILD_MAX_JOBS})
else()
  set(JOBS "max number of")
endif()

message(STATUS "Parallel build with ${JOBS} jobs and ${BUILD_THREADS_PER_JOB} threads per job")

33
34
35
36
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
  set(CMAKE_CUDA_FLAGS_DEBUG "${CMAKE_CUDA_FLAGS_DEBUG} -G")
endif()

37
find_package(CUDAToolkit REQUIRED)
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52

# Check for cuDNN frontend API
set(CUDNN_FRONTEND_INCLUDE_DIR
    "${CMAKE_SOURCE_DIR}/../../3rdparty/cudnn-frontend/include")
if(NOT EXISTS "${CUDNN_FRONTEND_INCLUDE_DIR}")
    message(FATAL_ERROR
            "Could not find cuDNN frontend API. "
            "Try running 'git submodule update --init --recursive' "
            "within the Transformer Engine source.")
endif()
include(${CMAKE_SOURCE_DIR}/../../3rdparty/cudnn-frontend/cmake/cuDNN.cmake)

find_package(Python COMPONENTS Interpreter Development.Module REQUIRED)
include_directories(${PROJECT_SOURCE_DIR}/..)

53
# Configure Transformer Engine library
54
set(transformer_engine_SOURCES)
55
list(APPEND transformer_engine_SOURCES
56
     pycudnn.cpp
57
58
59
60
61
62
63
     transformer_engine.cpp
     transpose/cast_transpose.cu
     transpose/transpose.cu
     transpose/cast_transpose_fusion.cu
     transpose/transpose_fusion.cu
     transpose/multi_cast_transpose.cu
     activation/gelu.cu
64
65
     fused_attn/fused_attn_f16_max512_seqlen.cu
     fused_attn/fused_attn_f16_arbitrary_seqlen.cu
66
67
     activation/relu.cu
     activation/swiglu.cu
cyanguwa's avatar
cyanguwa committed
68
69
70
     fused_attn/fused_attn_fp8.cu
     fused_attn/fused_attn.cpp
     fused_attn/utils.cu
71
72
73
74
75
76
77
78
     gemm/cublaslt_gemm.cu
     layer_norm/ln_api.cpp
     layer_norm/ln_bwd_semi_cuda_kernel.cu
     layer_norm/ln_fwd_cuda_kernel.cu
     rmsnorm/rmsnorm_api.cpp
     rmsnorm/rmsnorm_bwd_semi_cuda_kernel.cu
     rmsnorm/rmsnorm_fwd_cuda_kernel.cu
     util/cast.cu
Tim Moon's avatar
Tim Moon committed
79
80
81
82
83
84
     util/cuda_driver.cpp
     util/cuda_runtime.cpp
     util/rtc.cpp
     util/system.cpp
     fused_softmax/scaled_masked_softmax.cu
     fused_softmax/scaled_upper_triang_masked_softmax.cu
85
     fused_softmax/scaled_aligned_causal_masked_softmax.cu
86
87
     fused_rope/fused_rope.cu
     recipe/delayed_scaling.cu)
88
add_library(transformer_engine SHARED ${transformer_engine_SOURCES})
89
90
91
target_include_directories(transformer_engine PUBLIC
                           "${CMAKE_CURRENT_SOURCE_DIR}/include")

92
93
target_compile_definitions(transformer_engine PUBLIC NV_CUDNN_FRONTEND_USE_DYNAMIC_LOADING)

94
95
96
# Configure dependencies
target_link_libraries(transformer_engine PUBLIC
                      CUDA::cublas
Tim Moon's avatar
Tim Moon committed
97
                      CUDA::cuda_driver
98
                      CUDA::cudart)
99
100
target_include_directories(transformer_engine PRIVATE
                           ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
101
target_include_directories(transformer_engine PRIVATE "${CUDNN_FRONTEND_INCLUDE_DIR}")
102

Tim Moon's avatar
Tim Moon committed
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# Make header files with C++ strings
function(make_string_header STRING STRING_NAME)
    configure_file(util/string_header.h.in
                   "string_headers/${STRING_NAME}.h"
                   @ONLY)
endfunction()
function(make_string_header_from_file file_ STRING_NAME)
    file(READ "${file_}" STRING)
    configure_file(util/string_header.h.in
                   "string_headers/${STRING_NAME}.h"
                   @ONLY)
endfunction()
list(GET CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES 0 cuda_include_path)
make_string_header("${cuda_include_path}"
                   string_path_cuda_include)
118
119
120

make_string_header_from_file(transpose/rtc/cast_transpose_fusion.cu
                             string_code_transpose_rtc_cast_transpose_fusion_cu)
121
122
make_string_header_from_file(transpose/rtc/cast_transpose.cu
                             string_code_transpose_rtc_cast_transpose_cu)
Tim Moon's avatar
Tim Moon committed
123
124
make_string_header_from_file(transpose/rtc/transpose.cu
                             string_code_transpose_rtc_transpose_cu)
125
126
make_string_header_from_file(utils.cuh
                             string_code_utils_cuh)
127
128
129
make_string_header_from_file(util/math.h
                             string_code_util_math_h)

Tim Moon's avatar
Tim Moon committed
130
131
132
target_include_directories(transformer_engine PRIVATE
                           "${CMAKE_CURRENT_BINARY_DIR}/string_headers")

133
# Compiler options
134
135
set_source_files_properties(fused_softmax/scaled_masked_softmax.cu
                            fused_softmax/scaled_upper_triang_masked_softmax.cu
136
                            fused_softmax/scaled_aligned_causal_masked_softmax.cu
137
138
139
140
                            PROPERTIES
                            COMPILE_OPTIONS "--use_fast_math")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -O3")
Tim Moon's avatar
Tim Moon committed
141
142
143

# Install library
install(TARGETS transformer_engine DESTINATION .)
144