cpu_extension.cmake 14.6 KB
Newer Older
1
2
3
include(FetchContent)

set(CMAKE_CXX_STANDARD_REQUIRED ON)
4
set(CMAKE_CXX_STANDARD 17)
5
set(CMAKE_CXX_EXTENSIONS ON)
6
7
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

8
9
10
11
12
if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
    set(MACOSX_FOUND TRUE)
endif()


13
14
15
#
# Define environment variables for special configurations
#
16
set(ENABLE_X86_ISA $ENV{VLLM_CPU_X86})
17
set(ENABLE_ARM_BF16 $ENV{VLLM_CPU_ARM_BF16})
18
19
20

include_directories("${CMAKE_SOURCE_DIR}/csrc")

21
22
set (ENABLE_NUMA TRUE)

23
24
25
#
# Check the compile flags
#
26
27
28
29
30
31
32
33
if(MACOSX_FOUND)
    list(APPEND CXX_COMPILE_FLAGS
        "-DVLLM_CPU_EXTENSION")
else()
    list(APPEND CXX_COMPILE_FLAGS
        "-fopenmp"
        "-DVLLM_CPU_EXTENSION")
endif()
34

35
36
37
38
39
40
41
if (NOT MACOSX_FOUND)
    execute_process(COMMAND cat /proc/cpuinfo
                    RESULT_VARIABLE CPUINFO_RET
                    OUTPUT_VARIABLE CPUINFO)
    if (NOT CPUINFO_RET EQUAL 0)
        message(FATAL_ERROR "Failed to check CPU features via /proc/cpuinfo")
    endif()
42
43
endif()

44

45
46
47
48
49
50
51
52
53
function (find_isa CPUINFO TARGET OUT)
    string(FIND ${CPUINFO} ${TARGET} ISA_FOUND)
    if(NOT ISA_FOUND EQUAL -1)
        set(${OUT} ON PARENT_SCOPE)
    else()
        set(${OUT} OFF PARENT_SCOPE)
    endif()
endfunction()

54
55
56
57
58
59
60
61
62
63
64
65
66
67
68

function(check_sysctl TARGET OUT)
    execute_process(COMMAND sysctl -n "${TARGET}"
                    RESULT_VARIABLE SYSCTL_RET
                    OUTPUT_VARIABLE SYSCTL_INFO
                    ERROR_QUIET
                    OUTPUT_STRIP_TRAILING_WHITESPACE)
    if(SYSCTL_RET EQUAL 0 AND
      (SYSCTL_INFO STREQUAL "1" OR SYSCTL_INFO GREATER 0))
        set(${OUT} ON PARENT_SCOPE)
    else()
        set(${OUT} OFF PARENT_SCOPE)
    endif()
endfunction()

69
if (MACOSX_FOUND AND CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64")
70
    message(STATUS "Apple Silicon Detected")
71
    set(APPLE_SILICON_FOUND TRUE)
72
73
74
    set(ENABLE_NUMA OFF)
    check_sysctl(hw.optional.neon ASIMD_FOUND)
    check_sysctl(hw.optional.arm.FEAT_BF16 ARM_BF16_FOUND)
75
else()
76
    find_isa(${CPUINFO} "Power11" POWER11_FOUND)
77
78
79
80
    find_isa(${CPUINFO} "POWER10" POWER10_FOUND)
    find_isa(${CPUINFO} "POWER9" POWER9_FOUND)
    find_isa(${CPUINFO} "asimd" ASIMD_FOUND) # Check for ARM NEON support
    find_isa(${CPUINFO} "bf16" ARM_BF16_FOUND) # Check for ARM BF16 support
81
    find_isa(${CPUINFO} "S390" S390_FOUND)
82
83
    find_isa(${CPUINFO} "zvfhmin" RVV_FP16_FOUND) # Check for RISC-V Vector FP16 support
    find_isa(${CPUINFO} "zvfbfmin" RVV_BF16_FOUND) # Check for RISC-V Vector BF16 support
84
85

    # Support cross-compilation by allowing override via environment variables
86
87
88
89
    if (ENABLE_ARM_BF16)
        set(ARM_BF16_FOUND ON)
        message(STATUS "ARM BF16 support enabled via VLLM_CPU_ARM_BF16 environment variable")
    endif()
90
91
endif()

92
93
94
95
96
97
98
99
100
101
if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64" OR ENABLE_X86_ISA)
    set(ENABLE_X86_ISA ON)
    if (NOT (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND
            CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 12.3))
        message(FATAL_ERROR "X86 backend requires gcc/g++ >= 12.3")
    endif()
    list(APPEND CXX_COMPILE_FLAGS "-mf16c")
    list(APPEND CXX_COMPILE_FLAGS_AVX512 ${CXX_COMPILE_FLAGS})
    list(APPEND CXX_COMPILE_FLAGS_AVX2 ${CXX_COMPILE_FLAGS})
    list(APPEND CXX_COMPILE_FLAGS_AVX512
102
103
104
        "-mavx512f"
        "-mavx512vl"
        "-mavx512bw"
105
106
107
108
109
110
111
        "-mavx512dq"
        "-mavx512bf16"
        "-mavx512vnni"
        "-mamx-bf16"
        "-mamx-tile")
    list(APPEND CXX_COMPILE_FLAGS_AVX2
        "-mavx2")
112
elseif (POWER9_FOUND OR POWER10_FOUND OR POWER11_FOUND)
113
    message(STATUS "PowerPC detected")
114
115
116
117
118
119
120
121
122
123
124
    if (POWER9_FOUND)
        list(APPEND CXX_COMPILE_FLAGS
            "-mvsx"
            "-mcpu=power9"
            "-mtune=power9")
    elseif (POWER10_FOUND OR POWER11_FOUND)
        list(APPEND CXX_COMPILE_FLAGS
            "-mvsx"
            "-mcpu=power10"
            "-mtune=power10")
    endif()
125
126
127
128
129
130
131
132
133
134
135
136

elseif (ASIMD_FOUND)
    message(STATUS "ARMv8 or later architecture detected")
    if(ARM_BF16_FOUND)
        message(STATUS "BF16 extension detected")
        set(MARCH_FLAGS "-march=armv8.2-a+bf16+dotprod+fp16")
        add_compile_definitions(ARM_BF16_SUPPORT)
    else()
        message(WARNING "BF16 functionality is not available")
        set(MARCH_FLAGS "-march=armv8.2-a+dotprod+fp16")  
    endif()
    list(APPEND CXX_COMPILE_FLAGS ${MARCH_FLAGS})     
137
138
139
140
141
142
143
144
elseif (S390_FOUND)
    message(STATUS "S390 detected")
    # Check for S390 VXE support
    list(APPEND CXX_COMPILE_FLAGS
        "-mvx"
        "-mzvector"
        "-march=native"
        "-mtune=native")
145
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "riscv64")
146
147
148
149
150
151
152
153
    message(STATUS "RISC-V detected")
    if(RVV_BF16_FOUND)
        message(STATUS "BF16 extension detected")
        set(MARCH_FLAGS -march=rv64gcv_zvfh_zfbfmin_zvfbfmin_zvl128b -mrvv-vector-bits=zvl -mabi=lp64d)
        add_compile_definitions(RISCV_BF16_SUPPORT)
    elseif (RVV_FP16_FOUND)
        message(WARNING "BF16 functionality is not available")
        set(MARCH_FLAGS -march=rv64gcv_zvfh_zvl128b -mrvv-vector-bits=zvl -mabi=lp64d)
154
    else()
155
        message(STATUS "compile riscv with scalar")
156
157
        list(APPEND CXX_COMPILE_FLAGS "-march=rv64gc")
    endif()
158
    list(APPEND CXX_COMPILE_FLAGS ${MARCH_FLAGS})
159
else()
160
    message(FATAL_ERROR "vLLM CPU backend requires X86, Power9+ ISA, S390X ISA, ARMv8 or RISC-V support.")
161
162
endif()

163

164
165
# Build oneDNN for GEMM kernels
if (ENABLE_X86_ISA OR (ASIMD_FOUND AND NOT APPLE_SILICON_FOUND) OR POWER9_FOUND OR POWER10_FOUND OR POWER11_FOUND)
166
167
    # Fetch and build Arm Compute Library (ACL) as oneDNN's backend for AArch64
    # TODO [fadara01]: remove this once ACL can be fetched and built automatically as a dependency of oneDNN
168
    set(ONEDNN_AARCH64_USE_ACL OFF CACHE BOOL "")
169
    if(ASIMD_FOUND)
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
        # Set number of parallel build processes
        include(ProcessorCount)
        ProcessorCount(NPROC)
        if(NOT NPROC)
            set(NPROC 4)
        endif()
        # locate PyTorch's libgomp (e.g. site-packages/torch.libs/libgomp-947d5fa1.so.1.0.0)
        # and create a local shim dir with it
        vllm_prepare_torch_gomp_shim(VLLM_TORCH_GOMP_SHIM_DIR)

        find_library(OPEN_MP
            NAMES gomp
            PATHS ${VLLM_TORCH_GOMP_SHIM_DIR}
            NO_DEFAULT_PATH
            REQUIRED
        )
        # Set LD_LIBRARY_PATH to include the shim dir at build time to use the same libgomp as PyTorch
        if (OPEN_MP)
            set(ENV{LD_LIBRARY_PATH} "${VLLM_TORCH_GOMP_SHIM_DIR}:$ENV{LD_LIBRARY_PATH}")
        endif()

        # Fetch and populate ACL
192
193
194
195
196
197
198
199
        if(DEFINED ENV{ACL_ROOT_DIR} AND IS_DIRECTORY "$ENV{ACL_ROOT_DIR}")
            message(STATUS "Using ACL from specified source directory: $ENV{ACL_ROOT_DIR}")
        else()
            message(STATUS "Downloading Arm Compute Library (ACL) from GitHub")
            FetchContent_Populate(arm_compute
                SUBBUILD_DIR "${FETCHCONTENT_BASE_DIR}/arm_compute-subbuild"
                SOURCE_DIR   "${FETCHCONTENT_BASE_DIR}/arm_compute-src"
                GIT_REPOSITORY https://github.com/ARM-software/ComputeLibrary.git
200
                GIT_TAG        v52.6.0
201
202
203
204
                GIT_SHALLOW    TRUE
                GIT_PROGRESS   TRUE
            )
            set(ENV{ACL_ROOT_DIR} "${arm_compute_SOURCE_DIR}")
205
            set(ACL_LIB_DIR "$ENV{ACL_ROOT_DIR}/build")
206
207
        endif()

208
209
210
211
212
213
214
215
216
217
218
219
220
221
        # Build ACL with CMake
        set(_cmake_config_cmd
             ${CMAKE_COMMAND} -G Ninja -B build 
            -DARM_COMPUTE_BUILD_SHARED_LIB=OFF 
            -DCMAKE_BUILD_TYPE=Release 
            -DARM_COMPUTE_ARCH=armv8.2-a 
            -DARM_COMPUTE_ENABLE_ASSERTS=OFF 
            -DARM_COMPUTE_ENABLE_CPPTHREADS=OFF 
            -DARM_COMPUTE_ENABLE_OPENMP=ON 
            -DARM_COMPUTE_ENABLE_WERROR=OFF 
            -DARM_COMPUTE_BUILD_EXAMPLES=OFF 
            -DARM_COMPUTE_BUILD_TESTING=OFF)
        set(_cmake_build_cmd
            ${CMAKE_COMMAND} --build build -- -j${NPROC}
222
223
        )

224
        execute_process(
225
226
227
228
229
            COMMAND ${_cmake_config_cmd}
            WORKING_DIRECTORY "$ENV{ACL_ROOT_DIR}"
        )
        execute_process(
            COMMAND ${_cmake_build_cmd}
230
231
232
            WORKING_DIRECTORY "$ENV{ACL_ROOT_DIR}"
            RESULT_VARIABLE _acl_rc
        )
233

234
235
236
        if(NOT _acl_rc EQUAL 0)
            message(FATAL_ERROR "ACL SCons build failed (exit ${_acl_rc}).")
        endif()
237
        message(STATUS "Arm Compute Library (ACL) built successfully.")
238

239
240
        # VLLM/oneDNN settings for ACL
        set(ONEDNN_AARCH64_USE_ACL ON CACHE BOOL "" FORCE)
241
242
243
        add_compile_definitions(VLLM_USE_ACL)
    endif()

244
245
246
247
248
249
250
251
252
253
    set(FETCHCONTENT_SOURCE_DIR_ONEDNN "$ENV{FETCHCONTENT_SOURCE_DIR_ONEDNN}" CACHE PATH "Path to a local oneDNN source directory.")

    if(FETCHCONTENT_SOURCE_DIR_ONEDNN)
        message(STATUS "Using oneDNN from specified source directory: ${FETCHCONTENT_SOURCE_DIR_ONEDNN}")
        FetchContent_Declare(
            oneDNN
            SOURCE_DIR ${FETCHCONTENT_SOURCE_DIR_ONEDNN}
        )
    else()
        message(STATUS "Downloading oneDNN from GitHub")
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
        if(ASIMD_FOUND AND NOT APPLE_SILICON_FOUND)
            message(STATUS "aarch64 detected: using pinned oneDNN commit 9c5be1cc59e368aebf0909e6cf20f981ea61462a")
            FetchContent_Declare(
                oneDNN
                GIT_REPOSITORY https://github.com/oneapi-src/oneDNN.git
                GIT_TAG        9c5be1cc59e368aebf0909e6cf20f981ea61462a
                GIT_PROGRESS   TRUE
                GIT_SHALLOW    FALSE
            )
        else()
            FetchContent_Declare(
                oneDNN
                GIT_REPOSITORY https://github.com/oneapi-src/oneDNN.git
                GIT_TAG        v3.10
                GIT_PROGRESS   TRUE
                GIT_SHALLOW    TRUE
            )
        endif()
272
    endif()
273
274
275
276
277
278
279
280

    set(ONEDNN_LIBRARY_TYPE "STATIC")
    set(ONEDNN_BUILD_DOC "OFF")
    set(ONEDNN_BUILD_EXAMPLES "OFF")
    set(ONEDNN_BUILD_TESTS "OFF")
    set(ONEDNN_ENABLE_WORKLOAD "INFERENCE")
    set(ONEDNN_ENABLE_PRIMITIVE "MATMUL;REORDER")
    set(ONEDNN_BUILD_GRAPH "OFF")
281
    set(ONEDNN_ENABLE_JIT_PROFILING "ON")
282
    set(ONEDNN_ENABLE_ITT_TASKS "OFF")
283
284
285
    set(ONEDNN_ENABLE_MAX_CPU_ISA "ON")
    set(ONEDNN_ENABLE_CPU_ISA_HINTS "ON")
    set(ONEDNN_VERBOSE "ON")
286
287
    set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)

288
289
290
291
292
293
294
295
    # TODO: Refactor this
    if (ENABLE_X86_ISA)
        # Note: only enable oneDNN for AVX512
        list(APPEND DNNL_COMPILE_FLAGS ${CXX_COMPILE_FLAGS_AVX512})
    else()
        list(APPEND DNNL_COMPILE_FLAGS ${CXX_COMPILE_FLAGS})
    endif()

296
297
    set(VLLM_BUILD_TYPE ${CMAKE_BUILD_TYPE})
    set(CMAKE_BUILD_TYPE "Release") # remove oneDNN debug symbols to reduce size
298
    FetchContent_MakeAvailable(oneDNN)
299
    set(CMAKE_BUILD_TYPE ${VLLM_BUILD_TYPE})
300
301
302
303
304
305
    add_library(dnnl_ext OBJECT "csrc/cpu/dnnl_helper.cpp")
    target_include_directories(
        dnnl_ext
        PUBLIC ${oneDNN_SOURCE_DIR}/include
        PUBLIC ${oneDNN_BINARY_DIR}/include
        PRIVATE ${oneDNN_SOURCE_DIR}/src
306
    )
307
    target_link_libraries(dnnl_ext dnnl torch)
308
    target_compile_options(dnnl_ext PRIVATE ${DNNL_COMPILE_FLAGS} -fPIC)
309
310
311
312
    list(APPEND LIBS dnnl_ext)
    set(USE_ONEDNN ON)
else()
    set(USE_ONEDNN OFF)
313
314
endif()

315
316
317
318
319
320
321
# TODO: Refactor this
if (ENABLE_X86_ISA)
    message(STATUS "CPU extension (AVX512) compile flags: ${CXX_COMPILE_FLAGS_AVX512}")
    message(STATUS "CPU extension (AVX2) compile flags: ${CXX_COMPILE_FLAGS_AVX2}")
else()
    message(STATUS "CPU extension compile flags: ${CXX_COMPILE_FLAGS}")
endif()
322

323
324
325
326
327
328
if(ENABLE_NUMA)
    list(APPEND LIBS numa)
else()
    message(STATUS "NUMA is disabled")
    add_compile_definitions(-DVLLM_NUMA_DISABLED)
endif()
329

330
331
332
333
334
335
336
337
338
339
340
341
342
#
# Generate CPU attention dispatch header
#
message(STATUS "Generating CPU attention dispatch header")
execute_process(
    COMMAND ${Python_EXECUTABLE} ${CMAKE_SOURCE_DIR}/csrc/cpu/generate_cpu_attn_dispatch.py
    WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/csrc/cpu
    RESULT_VARIABLE GEN_RESULT
)
if(NOT GEN_RESULT EQUAL 0)
    message(FATAL_ERROR "Failed to generate CPU attention dispatch header")
endif()

343
344
345
346
347
#
# _C extension
#
set(VLLM_EXT_SRC
    "csrc/cpu/activation.cpp"
348
    "csrc/cpu/utils.cpp"
349
    "csrc/cpu/layernorm.cpp"
Thien Tran's avatar
Thien Tran committed
350
    "csrc/cpu/mla_decode.cpp"
351
    "csrc/cpu/pos_encoding.cpp"
352
353
354
    "csrc/moe/dynamic_4bit_int_moe_cpu.cpp"
    "csrc/cpu/cpu_attn.cpp"
    "csrc/cpu/torch_bindings.cpp")
355

356
357
358
359
360
361
if (ASIMD_FOUND AND NOT APPLE_SILICON_FOUND)
    set(VLLM_EXT_SRC
        "csrc/cpu/shm.cpp"
        ${VLLM_EXT_SRC})
endif()

362
if(USE_ONEDNN)
363
    set(VLLM_EXT_SRC
364
        "csrc/cpu/dnnl_kernels.cpp"
365
366
        ${VLLM_EXT_SRC})
endif()
367

368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
if (ENABLE_X86_ISA)
    set(VLLM_EXT_SRC_AVX512
        "csrc/cpu/sgl-kernels/gemm.cpp"
        "csrc/cpu/sgl-kernels/gemm_int8.cpp"
        "csrc/cpu/sgl-kernels/gemm_fp8.cpp"
        "csrc/cpu/sgl-kernels/moe.cpp"
        "csrc/cpu/sgl-kernels/moe_int8.cpp"
        "csrc/cpu/sgl-kernels/moe_fp8.cpp"
        "csrc/cpu/shm.cpp"
        "csrc/cpu/cpu_wna16.cpp"
        "csrc/cpu/cpu_fused_moe.cpp"
        "csrc/cpu/utils.cpp"
        "csrc/cpu/cpu_attn.cpp"
        "csrc/cpu/dnnl_kernels.cpp"
        "csrc/cpu/torch_bindings.cpp"
        # TODO: Remove these files
        "csrc/cpu/activation.cpp"
        "csrc/cpu/layernorm.cpp"
        "csrc/cpu/mla_decode.cpp"
        "csrc/cpu/pos_encoding.cpp"
        "csrc/moe/dynamic_4bit_int_moe_cpu.cpp") 

    set(VLLM_EXT_SRC_AVX2 
        "csrc/cpu/utils.cpp"
        "csrc/cpu/cpu_attn.cpp"
        "csrc/cpu/torch_bindings.cpp"
        # TODO: Remove these files
        "csrc/cpu/activation.cpp"
        "csrc/cpu/layernorm.cpp"
        "csrc/cpu/mla_decode.cpp"
        "csrc/cpu/pos_encoding.cpp"
        "csrc/moe/dynamic_4bit_int_moe_cpu.cpp") 

    message(STATUS "CPU extension (AVX512) source files: ${VLLM_EXT_SRC_AVX512}")
    message(STATUS "CPU extension (AVX2) source files: ${VLLM_EXT_SRC_AVX2}")

    define_extension_target(
        _C
        DESTINATION vllm
        LANGUAGE CXX
        SOURCES ${VLLM_EXT_SRC_AVX512}
        LIBRARIES ${LIBS}
        COMPILE_FLAGS ${CXX_COMPILE_FLAGS_AVX512}
        USE_SABI 3
        WITH_SOABI
    )
414

415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
    # For SGL kernels
    target_compile_definitions(_C PRIVATE "-DCPU_CAPABILITY_AVX512")
    # For AMX kernels
    target_compile_definitions(_C PRIVATE "-DCPU_CAPABILITY_AMXBF16")

    define_extension_target(
        _C_AVX2
        DESTINATION vllm
        LANGUAGE CXX
        SOURCES ${VLLM_EXT_SRC_AVX2}
        LIBRARIES ${LIBS}
        COMPILE_FLAGS ${CXX_COMPILE_FLAGS_AVX2}
        USE_SABI 3
        WITH_SOABI
    )
else()
    message(STATUS "CPU extension source files: ${VLLM_EXT_SRC}")
    #
    # Define extension targets
    #
    define_extension_target(
        _C
        DESTINATION vllm
        LANGUAGE CXX
        SOURCES ${VLLM_EXT_SRC}
        LIBRARIES ${LIBS}
        COMPILE_FLAGS ${CXX_COMPILE_FLAGS}
        USE_SABI 3
        WITH_SOABI
    )
endif()
446

447
message(STATUS "Enabling C extension.")