cpu_extension.cmake 11.3 KB
Newer Older
1
2
3
include(FetchContent)

set(CMAKE_CXX_STANDARD_REQUIRED ON)
4
set(CMAKE_CXX_STANDARD 17)
5
set(CMAKE_CXX_EXTENSIONS ON)
6
7
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

8
9
10
11
12
if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
    set(MACOSX_FOUND TRUE)
endif()


13
14
15
#
# Define environment variables for special configurations
#
16
17
set(ENABLE_AVX512BF16 $ENV{VLLM_CPU_AVX512BF16})
set(ENABLE_AVX512VNNI $ENV{VLLM_CPU_AVX512VNNI})
18
19
20

include_directories("${CMAKE_SOURCE_DIR}/csrc")

21
22
23

set (ENABLE_NUMA TRUE)

24
25
26
#
# Check the compile flags
#
27
28

if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64")
29
30
    list(APPEND CXX_COMPILE_FLAGS
        "-mf16c"
31
    )
32
endif()
33

34
35
36
37
38
39
40
41
if(MACOSX_FOUND)
    list(APPEND CXX_COMPILE_FLAGS
        "-DVLLM_CPU_EXTENSION")
else()
    list(APPEND CXX_COMPILE_FLAGS
        "-fopenmp"
        "-DVLLM_CPU_EXTENSION")
endif()
42

43
44
45
46
47
48
49
if (NOT MACOSX_FOUND)
    execute_process(COMMAND cat /proc/cpuinfo
                    RESULT_VARIABLE CPUINFO_RET
                    OUTPUT_VARIABLE CPUINFO)
    if (NOT CPUINFO_RET EQUAL 0)
        message(FATAL_ERROR "Failed to check CPU features via /proc/cpuinfo")
    endif()
50
51
endif()

52

53
54
55
56
57
58
59
60
61
function (find_isa CPUINFO TARGET OUT)
    string(FIND ${CPUINFO} ${TARGET} ISA_FOUND)
    if(NOT ISA_FOUND EQUAL -1)
        set(${OUT} ON PARENT_SCOPE)
    else()
        set(${OUT} OFF PARENT_SCOPE)
    endif()
endfunction()

62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77

function(check_sysctl TARGET OUT)
    execute_process(COMMAND sysctl -n "${TARGET}"
                    RESULT_VARIABLE SYSCTL_RET
                    OUTPUT_VARIABLE SYSCTL_INFO
                    ERROR_QUIET
                    OUTPUT_STRIP_TRAILING_WHITESPACE)
    if(SYSCTL_RET EQUAL 0 AND
      (SYSCTL_INFO STREQUAL "1" OR SYSCTL_INFO GREATER 0))
        set(${OUT} ON PARENT_SCOPE)
    else()
        set(${OUT} OFF PARENT_SCOPE)
    endif()
endfunction()


78
79
80
81
82
83
84
85
86
87
88
function (is_avx512_disabled OUT)
    set(DISABLE_AVX512 $ENV{VLLM_CPU_DISABLE_AVX512})
    if(DISABLE_AVX512 AND DISABLE_AVX512 STREQUAL "true")
        set(${OUT} ON PARENT_SCOPE)
    else()
        set(${OUT} OFF PARENT_SCOPE)
    endif()
endfunction()

is_avx512_disabled(AVX512_DISABLED)

89
if (MACOSX_FOUND AND CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64")
90
    message(STATUS "Apple Silicon Detected")
91
    set(APPLE_SILICON_FOUND TRUE)
92
93
94
    set(ENABLE_NUMA OFF)
    check_sysctl(hw.optional.neon ASIMD_FOUND)
    check_sysctl(hw.optional.arm.FEAT_BF16 ARM_BF16_FOUND)
95
96
97
else()
    find_isa(${CPUINFO} "avx2" AVX2_FOUND)
    find_isa(${CPUINFO} "avx512f" AVX512_FOUND)
98
    find_isa(${CPUINFO} "Power11" POWER11_FOUND)
99
100
101
102
    find_isa(${CPUINFO} "POWER10" POWER10_FOUND)
    find_isa(${CPUINFO} "POWER9" POWER9_FOUND)
    find_isa(${CPUINFO} "asimd" ASIMD_FOUND) # Check for ARM NEON support
    find_isa(${CPUINFO} "bf16" ARM_BF16_FOUND) # Check for ARM BF16 support
103
    find_isa(${CPUINFO} "S390" S390_FOUND)
104
    find_isa(${CPUINFO} "v" RVV_FOUND) # Check for RISC-V RVV support
105
106
endif()

107
if (AVX512_FOUND AND NOT AVX512_DISABLED)
108
109
110
111
112
113
114
115
    list(APPEND CXX_COMPILE_FLAGS
        "-mavx512f"
        "-mavx512vl"
        "-mavx512bw"
        "-mavx512dq")

    find_isa(${CPUINFO} "avx512_bf16" AVX512BF16_FOUND)
    if (AVX512BF16_FOUND OR ENABLE_AVX512BF16)
116
117
        if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND
            CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 12.3)
118
            list(APPEND CXX_COMPILE_FLAGS "-mavx512bf16")
119
            set(ENABLE_AVX512BF16 ON)
120
        else()
121
            set(ENABLE_AVX512BF16 OFF)
122
123
124
            message(WARNING "Disable AVX512-BF16 ISA support, requires gcc/g++ >= 12.3")
        endif()
    else()
125
        set(ENABLE_AVX512BF16 OFF)
126
127
        message(WARNING "Disable AVX512-BF16 ISA support, no avx512_bf16 found in local CPU flags." " If cross-compilation is required, please set env VLLM_CPU_AVX512BF16=1.")
    endif()
128
129

    find_isa(${CPUINFO} "avx512_vnni" AVX512VNNI_FOUND)
130
131
132
133
134
135
136
137
138
139
140
141
142
    if (AVX512VNNI_FOUND OR ENABLE_AVX512VNNI)
        if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND
            CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 12.3)
            list(APPEND CXX_COMPILE_FLAGS "-mavx512vnni")
            set(ENABLE_AVX512VNNI ON)
        else()
            set(ENABLE_AVX512VNNI OFF)
            message(WARNING "Disable AVX512-VNNI ISA support, requires gcc/g++ >= 12.3")
        endif()
    else()
        set(ENABLE_AVX512VNNI OFF)
        message(WARNING "Disable AVX512-VNNI ISA support, no avx512_vnni found in local CPU flags." " If cross-compilation is required, please set env VLLM_CPU_AVX512VNNI=1.")
    endif()
143
    
144
145
146
elseif (AVX2_FOUND)
    list(APPEND CXX_COMPILE_FLAGS "-mavx2")
    message(WARNING "vLLM CPU backend using AVX2 ISA")
147
    
148
elseif (POWER9_FOUND OR POWER10_FOUND OR POWER11_FOUND)
149
    message(STATUS "PowerPC detected")
150
151
152
153
154
155
156
157
158
159
160
    if (POWER9_FOUND)
        list(APPEND CXX_COMPILE_FLAGS
            "-mvsx"
            "-mcpu=power9"
            "-mtune=power9")
    elseif (POWER10_FOUND OR POWER11_FOUND)
        list(APPEND CXX_COMPILE_FLAGS
            "-mvsx"
            "-mcpu=power10"
            "-mtune=power10")
    endif()
161
162
163
164
165
166
167
168
169
170
171
172

elseif (ASIMD_FOUND)
    message(STATUS "ARMv8 or later architecture detected")
    if(ARM_BF16_FOUND)
        message(STATUS "BF16 extension detected")
        set(MARCH_FLAGS "-march=armv8.2-a+bf16+dotprod+fp16")
        add_compile_definitions(ARM_BF16_SUPPORT)
    else()
        message(WARNING "BF16 functionality is not available")
        set(MARCH_FLAGS "-march=armv8.2-a+dotprod+fp16")  
    endif()
    list(APPEND CXX_COMPILE_FLAGS ${MARCH_FLAGS})     
173
174
175
176
177
178
179
180
elseif (S390_FOUND)
    message(STATUS "S390 detected")
    # Check for S390 VXE support
    list(APPEND CXX_COMPILE_FLAGS
        "-mvx"
        "-mzvector"
        "-march=native"
        "-mtune=native")
181
182
183
184
185
186
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "riscv64")
    if(RVV_FOUND)
	    message(FAIL_ERROR "Can't support rvv now.")
    else()
        list(APPEND CXX_COMPILE_FLAGS "-march=rv64gc")
    endif()
187
else()
188
    message(FATAL_ERROR "vLLM CPU backend requires AVX512, AVX2, Power9+ ISA, S390X ISA, ARMv8 or RISC-V support.")
189
190
endif()

191

192
# Build oneDNN for GEMM kernels (only for x86-AVX512 /ARM platforms)
193
if ((AVX512_FOUND AND NOT AVX512_DISABLED) OR (ASIMD_FOUND AND NOT APPLE_SILICON_FOUND) OR POWER9_FOUND OR POWER10_FOUND OR POWER11_FOUND)
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
    # Fetch and build Arm Compute Library (ACL) as oneDNN's backend for AArch64
    # TODO [fadara01]: remove this once ACL can be fetched and built automatically as a dependency of oneDNN
    if(ASIMD_FOUND)
        if(DEFINED ENV{ACL_ROOT_DIR} AND IS_DIRECTORY "$ENV{ACL_ROOT_DIR}")
            message(STATUS "Using ACL from specified source directory: $ENV{ACL_ROOT_DIR}")
        else()
            message(STATUS "Downloading Arm Compute Library (ACL) from GitHub")
            FetchContent_Populate(arm_compute
                SUBBUILD_DIR "${FETCHCONTENT_BASE_DIR}/arm_compute-subbuild"
                SOURCE_DIR   "${FETCHCONTENT_BASE_DIR}/arm_compute-src"
                GIT_REPOSITORY https://github.com/ARM-software/ComputeLibrary.git
                GIT_TAG        v52.2.0
                GIT_SHALLOW    TRUE
                GIT_PROGRESS   TRUE
            )
            set(ENV{ACL_ROOT_DIR} "${arm_compute_SOURCE_DIR}")
        endif()

        # Build ACL with scons
        include(ProcessorCount)
        ProcessorCount(_NPROC)
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
        set(_scons_cmd
        scons -j${_NPROC}
            Werror=0 debug=0 neon=1 examples=0 embed_kernels=0 os=linux
            arch=armv8.2-a build=native benchmark_examples=0 fixed_format_kernels=1
            multi_isa=1 openmp=1 cppthreads=0
        )

        # locate PyTorch's libgomp (e.g. site-packages/torch.libs/libgomp-947d5fa1.so.1.0.0)
        # and create a local shim dir with it
        include("${CMAKE_CURRENT_LIST_DIR}/utils.cmake")
        vllm_prepare_torch_gomp_shim(VLLM_TORCH_GOMP_SHIM_DIR)

        if(NOT VLLM_TORCH_GOMP_SHIM_DIR STREQUAL "")
            list(APPEND _scons_cmd extra_link_flags=-L${VLLM_TORCH_GOMP_SHIM_DIR})
        endif()

231
        execute_process(
232
            COMMAND ${_scons_cmd}
233
234
235
236
237
238
239
240
241
242
243
244
            WORKING_DIRECTORY "$ENV{ACL_ROOT_DIR}"
            RESULT_VARIABLE _acl_rc
        )
        if(NOT _acl_rc EQUAL 0)
            message(FATAL_ERROR "ACL SCons build failed (exit ${_acl_rc}).")
        endif()

        set(ONEDNN_AARCH64_USE_ACL "ON")
        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wl,-rpath,$ENV{ACL_ROOT_DIR}/build/")
        add_compile_definitions(VLLM_USE_ACL)
    endif()

245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
    set(FETCHCONTENT_SOURCE_DIR_ONEDNN "$ENV{FETCHCONTENT_SOURCE_DIR_ONEDNN}" CACHE PATH "Path to a local oneDNN source directory.")

    if(FETCHCONTENT_SOURCE_DIR_ONEDNN)
        message(STATUS "Using oneDNN from specified source directory: ${FETCHCONTENT_SOURCE_DIR_ONEDNN}")
        FetchContent_Declare(
            oneDNN
            SOURCE_DIR ${FETCHCONTENT_SOURCE_DIR_ONEDNN}
        )
    else()
        message(STATUS "Downloading oneDNN from GitHub")
        FetchContent_Declare(
            oneDNN
            GIT_REPOSITORY https://github.com/oneapi-src/oneDNN.git
            GIT_TAG v3.9
            GIT_PROGRESS TRUE
            GIT_SHALLOW TRUE
        )
    endif()
263
264
265
266
267
268
269
270
271
272
273
274

    set(ONEDNN_LIBRARY_TYPE "STATIC")
    set(ONEDNN_BUILD_DOC "OFF")
    set(ONEDNN_BUILD_EXAMPLES "OFF")
    set(ONEDNN_BUILD_TESTS "OFF")
    set(ONEDNN_ENABLE_WORKLOAD "INFERENCE")
    set(ONEDNN_ENABLE_PRIMITIVE "MATMUL;REORDER")
    set(ONEDNN_BUILD_GRAPH "OFF")
    set(ONEDNN_ENABLE_JIT_PROFILING "OFF")
    set(ONEDNN_ENABLE_ITT_TASKS "OFF")
    set(ONEDNN_ENABLE_MAX_CPU_ISA "OFF")
    set(ONEDNN_ENABLE_CPU_ISA_HINTS "OFF")
275
    set(ONEDNN_VERBOSE "OFF")
276
277
278
    set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)

    FetchContent_MakeAvailable(oneDNN)
279
280
281
282
283
284
    add_library(dnnl_ext OBJECT "csrc/cpu/dnnl_helper.cpp")
    target_include_directories(
        dnnl_ext
        PUBLIC ${oneDNN_SOURCE_DIR}/include
        PUBLIC ${oneDNN_BINARY_DIR}/include
        PRIVATE ${oneDNN_SOURCE_DIR}/src
285
    )
286
287
288
289
290
291
    target_link_libraries(dnnl_ext dnnl)
    target_compile_options(dnnl_ext PRIVATE ${CXX_COMPILE_FLAGS} -fPIC)
    list(APPEND LIBS dnnl_ext)
    set(USE_ONEDNN ON)
else()
    set(USE_ONEDNN OFF)
292
293
endif()

294
295
message(STATUS "CPU extension compile flags: ${CXX_COMPILE_FLAGS}")

296
297
298
299
300
301
if(ENABLE_NUMA)
    list(APPEND LIBS numa)
else()
    message(STATUS "NUMA is disabled")
    add_compile_definitions(-DVLLM_NUMA_DISABLED)
endif()
302

303
304
305
306
307
308
309
#
# _C extension
#
set(VLLM_EXT_SRC
    "csrc/cpu/activation.cpp"
    "csrc/cpu/attention.cpp"
    "csrc/cpu/cache.cpp"
310
    "csrc/cpu/utils.cpp"
311
    "csrc/cpu/layernorm.cpp"
Thien Tran's avatar
Thien Tran committed
312
    "csrc/cpu/mla_decode.cpp"
313
    "csrc/cpu/pos_encoding.cpp"
314
315
    "csrc/cpu/torch_bindings.cpp"
    "csrc/moe/dynamic_4bit_int_moe_cpu.cpp")
316

317
318
if (AVX512_FOUND AND NOT AVX512_DISABLED)
    set(VLLM_EXT_SRC
319
        "csrc/cpu/shm.cpp"
320
        ${VLLM_EXT_SRC})
321
322
323
324
325
326
327
328
329
330
331
    if (ENABLE_AVX512BF16 AND ENABLE_AVX512VNNI)
        set(VLLM_EXT_SRC
            "csrc/cpu/sgl-kernels/gemm.cpp"
            "csrc/cpu/sgl-kernels/gemm_int8.cpp"
            "csrc/cpu/sgl-kernels/gemm_fp8.cpp"
            "csrc/cpu/sgl-kernels/moe.cpp"
            "csrc/cpu/sgl-kernels/moe_int8.cpp"
            "csrc/cpu/sgl-kernels/moe_fp8.cpp"
            ${VLLM_EXT_SRC})
        add_compile_definitions(-DCPU_CAPABILITY_AVX512)
    endif()
332
endif()
333
334

if(USE_ONEDNN)
335
    set(VLLM_EXT_SRC
336
        "csrc/cpu/dnnl_kernels.cpp"
337
338
        ${VLLM_EXT_SRC})
endif()
339

340
341
message(STATUS "CPU extension source files: ${VLLM_EXT_SRC}")

342
343
344
345
#
# Define extension targets
#

346
define_extension_target(
347
348
349
350
    _C
    DESTINATION vllm
    LANGUAGE CXX
    SOURCES ${VLLM_EXT_SRC}
351
    LIBRARIES ${LIBS}
352
    COMPILE_FLAGS ${CXX_COMPILE_FLAGS}
353
354
    USE_SABI 3
    WITH_SOABI
355
356
)

357
message(STATUS "Enabling C extension.")