cpu_extension.cmake 15.4 KB
Newer Older
1
2
3
include(FetchContent)

set(CMAKE_CXX_STANDARD_REQUIRED ON)
4
set(CMAKE_CXX_STANDARD 17)
5
set(CMAKE_CXX_EXTENSIONS ON)
6
7
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

8
9
10
11
12
if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
    set(MACOSX_FOUND TRUE)
endif()


13
14
15
#
# Define environment variables for special configurations
#
16
set(ENABLE_X86_ISA $ENV{VLLM_CPU_X86})
17
set(ENABLE_ARM_BF16 $ENV{VLLM_CPU_ARM_BF16})
18
19
20

include_directories("${CMAKE_SOURCE_DIR}/csrc")

21
22
set (ENABLE_NUMA TRUE)

23
24
25
#
# Check the compile flags
#
26
27
28
29
30
31
32
if(MACOSX_FOUND)
    list(APPEND CXX_COMPILE_FLAGS
        "-DVLLM_CPU_EXTENSION")
else()
    list(APPEND CXX_COMPILE_FLAGS
        "-fopenmp"
        "-DVLLM_CPU_EXTENSION")
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47

    # locate PyTorch's libgomp (e.g. site-packages/torch.libs/libgomp-947d5fa1.so.1.0.0)
    # and create a local shim dir with it
    vllm_prepare_torch_gomp_shim(VLLM_TORCH_GOMP_SHIM_DIR)

    find_library(OPEN_MP
        NAMES gomp
        PATHS ${VLLM_TORCH_GOMP_SHIM_DIR}
        NO_DEFAULT_PATH
        REQUIRED
    )
    # Set LD_LIBRARY_PATH to include the shim dir at build time to use the same libgomp as PyTorch
    if (OPEN_MP)
        set(ENV{LD_LIBRARY_PATH} "${VLLM_TORCH_GOMP_SHIM_DIR}:$ENV{LD_LIBRARY_PATH}")
    endif()
48
endif()
49

50
51
52
53
54
55
56
if (NOT MACOSX_FOUND)
    execute_process(COMMAND cat /proc/cpuinfo
                    RESULT_VARIABLE CPUINFO_RET
                    OUTPUT_VARIABLE CPUINFO)
    if (NOT CPUINFO_RET EQUAL 0)
        message(FATAL_ERROR "Failed to check CPU features via /proc/cpuinfo")
    endif()
57
58
endif()

59

60
61
62
63
64
65
66
67
68
function (find_isa CPUINFO TARGET OUT)
    string(FIND ${CPUINFO} ${TARGET} ISA_FOUND)
    if(NOT ISA_FOUND EQUAL -1)
        set(${OUT} ON PARENT_SCOPE)
    else()
        set(${OUT} OFF PARENT_SCOPE)
    endif()
endfunction()

69
70
71
72
73
74
75
76
77
78
79
80
81
82
83

function(check_sysctl TARGET OUT)
    execute_process(COMMAND sysctl -n "${TARGET}"
                    RESULT_VARIABLE SYSCTL_RET
                    OUTPUT_VARIABLE SYSCTL_INFO
                    ERROR_QUIET
                    OUTPUT_STRIP_TRAILING_WHITESPACE)
    if(SYSCTL_RET EQUAL 0 AND
      (SYSCTL_INFO STREQUAL "1" OR SYSCTL_INFO GREATER 0))
        set(${OUT} ON PARENT_SCOPE)
    else()
        set(${OUT} OFF PARENT_SCOPE)
    endif()
endfunction()

84
if (MACOSX_FOUND AND CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64")
85
    message(STATUS "Apple Silicon Detected")
86
    set(APPLE_SILICON_FOUND TRUE)
87
88
89
    set(ENABLE_NUMA OFF)
    check_sysctl(hw.optional.neon ASIMD_FOUND)
    check_sysctl(hw.optional.arm.FEAT_BF16 ARM_BF16_FOUND)
90
else()
91
    find_isa(${CPUINFO} "Power11" POWER11_FOUND)
92
93
94
95
    find_isa(${CPUINFO} "POWER10" POWER10_FOUND)
    find_isa(${CPUINFO} "POWER9" POWER9_FOUND)
    find_isa(${CPUINFO} "asimd" ASIMD_FOUND) # Check for ARM NEON support
    find_isa(${CPUINFO} "bf16" ARM_BF16_FOUND) # Check for ARM BF16 support
96
    find_isa(${CPUINFO} "S390" S390_FOUND)
97
98
    find_isa(${CPUINFO} "zvfhmin" RVV_FP16_FOUND) # Check for RISC-V Vector FP16 support
    find_isa(${CPUINFO} "zvfbfmin" RVV_BF16_FOUND) # Check for RISC-V Vector BF16 support
99
100

    # Support cross-compilation by allowing override via environment variables
101
102
103
104
    if (ENABLE_ARM_BF16)
        set(ARM_BF16_FOUND ON)
        message(STATUS "ARM BF16 support enabled via VLLM_CPU_ARM_BF16 environment variable")
    endif()
105
106
endif()

107
108
109
110
111
112
113
114
115
116
if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64" OR ENABLE_X86_ISA)
    set(ENABLE_X86_ISA ON)
    if (NOT (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND
            CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 12.3))
        message(FATAL_ERROR "X86 backend requires gcc/g++ >= 12.3")
    endif()
    list(APPEND CXX_COMPILE_FLAGS "-mf16c")
    list(APPEND CXX_COMPILE_FLAGS_AVX512 ${CXX_COMPILE_FLAGS})
    list(APPEND CXX_COMPILE_FLAGS_AVX2 ${CXX_COMPILE_FLAGS})
    list(APPEND CXX_COMPILE_FLAGS_AVX512
117
118
119
        "-mavx512f"
        "-mavx512vl"
        "-mavx512bw"
120
121
122
        "-mavx512dq")
    list(APPEND CXX_COMPILE_FLAGS_AVX512_AMX 
        ${CXX_COMPILE_FLAGS_AVX512}
123
        "-mamx-bf16"
124
125
126
        "-mamx-tile"
        "-mavx512bf16"
        "-mavx512vnni")
127
128
    list(APPEND CXX_COMPILE_FLAGS_AVX2
        "-mavx2")
129
elseif (POWER9_FOUND OR POWER10_FOUND OR POWER11_FOUND)
130
    message(STATUS "PowerPC detected")
131
132
133
134
135
136
137
138
139
140
141
    if (POWER9_FOUND)
        list(APPEND CXX_COMPILE_FLAGS
            "-mvsx"
            "-mcpu=power9"
            "-mtune=power9")
    elseif (POWER10_FOUND OR POWER11_FOUND)
        list(APPEND CXX_COMPILE_FLAGS
            "-mvsx"
            "-mcpu=power10"
            "-mtune=power10")
    endif()
142
143
144
145
146
147
148
149
150
151
152
153

elseif (ASIMD_FOUND)
    message(STATUS "ARMv8 or later architecture detected")
    if(ARM_BF16_FOUND)
        message(STATUS "BF16 extension detected")
        set(MARCH_FLAGS "-march=armv8.2-a+bf16+dotprod+fp16")
        add_compile_definitions(ARM_BF16_SUPPORT)
    else()
        message(WARNING "BF16 functionality is not available")
        set(MARCH_FLAGS "-march=armv8.2-a+dotprod+fp16")  
    endif()
    list(APPEND CXX_COMPILE_FLAGS ${MARCH_FLAGS})     
154
155
156
157
158
159
160
161
elseif (S390_FOUND)
    message(STATUS "S390 detected")
    # Check for S390 VXE support
    list(APPEND CXX_COMPILE_FLAGS
        "-mvx"
        "-mzvector"
        "-march=native"
        "-mtune=native")
162
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "riscv64")
163
164
165
166
167
168
169
170
    message(STATUS "RISC-V detected")
    if(RVV_BF16_FOUND)
        message(STATUS "BF16 extension detected")
        set(MARCH_FLAGS -march=rv64gcv_zvfh_zfbfmin_zvfbfmin_zvl128b -mrvv-vector-bits=zvl -mabi=lp64d)
        add_compile_definitions(RISCV_BF16_SUPPORT)
    elseif (RVV_FP16_FOUND)
        message(WARNING "BF16 functionality is not available")
        set(MARCH_FLAGS -march=rv64gcv_zvfh_zvl128b -mrvv-vector-bits=zvl -mabi=lp64d)
171
    else()
172
        message(STATUS "compile riscv with scalar")
173
174
        list(APPEND CXX_COMPILE_FLAGS "-march=rv64gc")
    endif()
175
    list(APPEND CXX_COMPILE_FLAGS ${MARCH_FLAGS})
176
else()
177
    message(FATAL_ERROR "vLLM CPU backend requires X86, Power9+ ISA, S390X ISA, ARMv8 or RISC-V support.")
178
179
endif()

180

181
182
# Build oneDNN for GEMM kernels
if (ENABLE_X86_ISA OR (ASIMD_FOUND AND NOT APPLE_SILICON_FOUND) OR POWER9_FOUND OR POWER10_FOUND OR POWER11_FOUND)
183
184
    # Fetch and build Arm Compute Library (ACL) as oneDNN's backend for AArch64
    # TODO [fadara01]: remove this once ACL can be fetched and built automatically as a dependency of oneDNN
185
    set(ONEDNN_AARCH64_USE_ACL OFF CACHE BOOL "")
186
    if(ASIMD_FOUND)
187
188
189
190
191
192
193
194
        # Set number of parallel build processes
        include(ProcessorCount)
        ProcessorCount(NPROC)
        if(NOT NPROC)
            set(NPROC 4)
        endif()

        # Fetch and populate ACL
195
196
197
198
199
200
201
202
        if(DEFINED ENV{ACL_ROOT_DIR} AND IS_DIRECTORY "$ENV{ACL_ROOT_DIR}")
            message(STATUS "Using ACL from specified source directory: $ENV{ACL_ROOT_DIR}")
        else()
            message(STATUS "Downloading Arm Compute Library (ACL) from GitHub")
            FetchContent_Populate(arm_compute
                SUBBUILD_DIR "${FETCHCONTENT_BASE_DIR}/arm_compute-subbuild"
                SOURCE_DIR   "${FETCHCONTENT_BASE_DIR}/arm_compute-src"
                GIT_REPOSITORY https://github.com/ARM-software/ComputeLibrary.git
203
                GIT_TAG        v52.6.0
204
205
206
207
                GIT_SHALLOW    TRUE
                GIT_PROGRESS   TRUE
            )
            set(ENV{ACL_ROOT_DIR} "${arm_compute_SOURCE_DIR}")
208
            set(ACL_LIB_DIR "$ENV{ACL_ROOT_DIR}/build")
209
210
        endif()

211
212
213
214
215
216
217
218
219
220
221
222
223
224
        # Build ACL with CMake
        set(_cmake_config_cmd
             ${CMAKE_COMMAND} -G Ninja -B build 
            -DARM_COMPUTE_BUILD_SHARED_LIB=OFF 
            -DCMAKE_BUILD_TYPE=Release 
            -DARM_COMPUTE_ARCH=armv8.2-a 
            -DARM_COMPUTE_ENABLE_ASSERTS=OFF 
            -DARM_COMPUTE_ENABLE_CPPTHREADS=OFF 
            -DARM_COMPUTE_ENABLE_OPENMP=ON 
            -DARM_COMPUTE_ENABLE_WERROR=OFF 
            -DARM_COMPUTE_BUILD_EXAMPLES=OFF 
            -DARM_COMPUTE_BUILD_TESTING=OFF)
        set(_cmake_build_cmd
            ${CMAKE_COMMAND} --build build -- -j${NPROC}
225
226
        )

227
        execute_process(
228
229
230
231
232
            COMMAND ${_cmake_config_cmd}
            WORKING_DIRECTORY "$ENV{ACL_ROOT_DIR}"
        )
        execute_process(
            COMMAND ${_cmake_build_cmd}
233
234
235
            WORKING_DIRECTORY "$ENV{ACL_ROOT_DIR}"
            RESULT_VARIABLE _acl_rc
        )
236

237
238
239
        if(NOT _acl_rc EQUAL 0)
            message(FATAL_ERROR "ACL SCons build failed (exit ${_acl_rc}).")
        endif()
240
        message(STATUS "Arm Compute Library (ACL) built successfully.")
241

242
243
        # VLLM/oneDNN settings for ACL
        set(ONEDNN_AARCH64_USE_ACL ON CACHE BOOL "" FORCE)
244
245
246
        add_compile_definitions(VLLM_USE_ACL)
    endif()

247
248
249
250
251
252
253
254
255
256
    set(FETCHCONTENT_SOURCE_DIR_ONEDNN "$ENV{FETCHCONTENT_SOURCE_DIR_ONEDNN}" CACHE PATH "Path to a local oneDNN source directory.")

    if(FETCHCONTENT_SOURCE_DIR_ONEDNN)
        message(STATUS "Using oneDNN from specified source directory: ${FETCHCONTENT_SOURCE_DIR_ONEDNN}")
        FetchContent_Declare(
            oneDNN
            SOURCE_DIR ${FETCHCONTENT_SOURCE_DIR_ONEDNN}
        )
    else()
        message(STATUS "Downloading oneDNN from GitHub")
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
        if(ASIMD_FOUND AND NOT APPLE_SILICON_FOUND)
            message(STATUS "aarch64 detected: using pinned oneDNN commit 9c5be1cc59e368aebf0909e6cf20f981ea61462a")
            FetchContent_Declare(
                oneDNN
                GIT_REPOSITORY https://github.com/oneapi-src/oneDNN.git
                GIT_TAG        9c5be1cc59e368aebf0909e6cf20f981ea61462a
                GIT_PROGRESS   TRUE
                GIT_SHALLOW    FALSE
            )
        else()
            FetchContent_Declare(
                oneDNN
                GIT_REPOSITORY https://github.com/oneapi-src/oneDNN.git
                GIT_TAG        v3.10
                GIT_PROGRESS   TRUE
                GIT_SHALLOW    TRUE
            )
        endif()
275
    endif()
276
277
278
279
280
281
282
283

    set(ONEDNN_LIBRARY_TYPE "STATIC")
    set(ONEDNN_BUILD_DOC "OFF")
    set(ONEDNN_BUILD_EXAMPLES "OFF")
    set(ONEDNN_BUILD_TESTS "OFF")
    set(ONEDNN_ENABLE_WORKLOAD "INFERENCE")
    set(ONEDNN_ENABLE_PRIMITIVE "MATMUL;REORDER")
    set(ONEDNN_BUILD_GRAPH "OFF")
284
    set(ONEDNN_ENABLE_JIT_PROFILING "ON")
285
    set(ONEDNN_ENABLE_ITT_TASKS "OFF")
286
287
288
    set(ONEDNN_ENABLE_MAX_CPU_ISA "ON")
    set(ONEDNN_ENABLE_CPU_ISA_HINTS "ON")
    set(ONEDNN_VERBOSE "ON")
289
290
    set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)

291
292
293
294
295
296
297
298
    # TODO: Refactor this
    if (ENABLE_X86_ISA)
        # Note: only enable oneDNN for AVX512
        list(APPEND DNNL_COMPILE_FLAGS ${CXX_COMPILE_FLAGS_AVX512})
    else()
        list(APPEND DNNL_COMPILE_FLAGS ${CXX_COMPILE_FLAGS})
    endif()

299
300
    set(VLLM_BUILD_TYPE ${CMAKE_BUILD_TYPE})
    set(CMAKE_BUILD_TYPE "Release") # remove oneDNN debug symbols to reduce size
301
    FetchContent_MakeAvailable(oneDNN)
302
    set(CMAKE_BUILD_TYPE ${VLLM_BUILD_TYPE})
303
304
305
306
307
308
    add_library(dnnl_ext OBJECT "csrc/cpu/dnnl_helper.cpp")
    target_include_directories(
        dnnl_ext
        PUBLIC ${oneDNN_SOURCE_DIR}/include
        PUBLIC ${oneDNN_BINARY_DIR}/include
        PRIVATE ${oneDNN_SOURCE_DIR}/src
309
    )
310
    target_link_libraries(dnnl_ext dnnl torch)
311
    target_compile_options(dnnl_ext PRIVATE ${DNNL_COMPILE_FLAGS} -fPIC)
312
313
314
315
    list(APPEND LIBS dnnl_ext)
    set(USE_ONEDNN ON)
else()
    set(USE_ONEDNN OFF)
316
317
endif()

318
319
# TODO: Refactor this
if (ENABLE_X86_ISA)
320
321
    message(STATUS "CPU extension (AVX512F + BF16 + VNNI + AMX) compile flags: ${CXX_COMPILE_FLAGS_AVX512_AMX}")
    message(STATUS "CPU extension (AVX512F) compile flags: ${CXX_COMPILE_FLAGS_AVX512}")
322
323
324
325
    message(STATUS "CPU extension (AVX2) compile flags: ${CXX_COMPILE_FLAGS_AVX2}")
else()
    message(STATUS "CPU extension compile flags: ${CXX_COMPILE_FLAGS}")
endif()
326

327
328
329
330
331
332
if(ENABLE_NUMA)
    list(APPEND LIBS numa)
else()
    message(STATUS "NUMA is disabled")
    add_compile_definitions(-DVLLM_NUMA_DISABLED)
endif()
333

334
335
336
337
338
339
340
341
342
343
344
345
346
#
# Generate CPU attention dispatch header
#
message(STATUS "Generating CPU attention dispatch header")
execute_process(
    COMMAND ${Python_EXECUTABLE} ${CMAKE_SOURCE_DIR}/csrc/cpu/generate_cpu_attn_dispatch.py
    WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/csrc/cpu
    RESULT_VARIABLE GEN_RESULT
)
if(NOT GEN_RESULT EQUAL 0)
    message(FATAL_ERROR "Failed to generate CPU attention dispatch header")
endif()

347
348
349
350
351
#
# _C extension
#
set(VLLM_EXT_SRC
    "csrc/cpu/activation.cpp"
352
    "csrc/cpu/utils.cpp"
353
    "csrc/cpu/spec_decode_utils.cpp"
354
    "csrc/cpu/layernorm.cpp"
Thien Tran's avatar
Thien Tran committed
355
    "csrc/cpu/mla_decode.cpp"
356
    "csrc/cpu/pos_encoding.cpp"
357
358
359
    "csrc/moe/dynamic_4bit_int_moe_cpu.cpp"
    "csrc/cpu/cpu_attn.cpp"
    "csrc/cpu/torch_bindings.cpp")
360

361
362
363
if (ASIMD_FOUND AND NOT APPLE_SILICON_FOUND)
    set(VLLM_EXT_SRC
        "csrc/cpu/shm.cpp"
364
        "csrc/cpu/activation_lut_bf16.cpp"
365
366
367
        ${VLLM_EXT_SRC})
endif()

368
if(USE_ONEDNN)
369
    set(VLLM_EXT_SRC
370
        "csrc/cpu/dnnl_kernels.cpp"
371
372
        ${VLLM_EXT_SRC})
endif()
373

374
if (ENABLE_X86_ISA)
375
    set(VLLM_EXT_SRC_SGL
376
377
378
        "csrc/cpu/sgl-kernels/gemm.cpp"
        "csrc/cpu/sgl-kernels/gemm_int8.cpp"
        "csrc/cpu/sgl-kernels/gemm_fp8.cpp"
379
        "csrc/cpu/sgl-kernels/gemm_int4.cpp"
380
381
        "csrc/cpu/sgl-kernels/moe.cpp"
        "csrc/cpu/sgl-kernels/moe_int8.cpp"
382
383
384
        "csrc/cpu/sgl-kernels/moe_fp8.cpp")

    set(VLLM_EXT_SRC_AVX512
385
386
387
388
        "csrc/cpu/shm.cpp"
        "csrc/cpu/cpu_wna16.cpp"
        "csrc/cpu/cpu_fused_moe.cpp"
        "csrc/cpu/utils.cpp"
389
        "csrc/cpu/spec_decode_utils.cpp"
390
391
392
393
394
395
396
397
398
399
400
401
        "csrc/cpu/cpu_attn.cpp"
        "csrc/cpu/dnnl_kernels.cpp"
        "csrc/cpu/torch_bindings.cpp"
        # TODO: Remove these files
        "csrc/cpu/activation.cpp"
        "csrc/cpu/layernorm.cpp"
        "csrc/cpu/mla_decode.cpp"
        "csrc/cpu/pos_encoding.cpp"
        "csrc/moe/dynamic_4bit_int_moe_cpu.cpp") 

    set(VLLM_EXT_SRC_AVX2 
        "csrc/cpu/utils.cpp"
402
        "csrc/cpu/spec_decode_utils.cpp"
403
404
405
406
407
408
409
410
411
        "csrc/cpu/cpu_attn.cpp"
        "csrc/cpu/torch_bindings.cpp"
        # TODO: Remove these files
        "csrc/cpu/activation.cpp"
        "csrc/cpu/layernorm.cpp"
        "csrc/cpu/mla_decode.cpp"
        "csrc/cpu/pos_encoding.cpp"
        "csrc/moe/dynamic_4bit_int_moe_cpu.cpp") 

412
413
    message(STATUS "CPU extension (AVX512F + BF16 + VNNI + AMX) source files: ${VLLM_EXT_SRC_AVX512} ${VLLM_EXT_SRC_SGL}")
    message(STATUS "CPU extension (AVX512F) source files: ${VLLM_EXT_SRC_AVX512}")
414
415
    message(STATUS "CPU extension (AVX2) source files: ${VLLM_EXT_SRC_AVX2}")

416
417
418
419
420
    set(_C_LIBS numa dnnl_ext)
    set(_C_AVX512_LIBS numa dnnl_ext)
    set(_C_AVX2_LIBS numa)

    # AMX + AVX512F + AVX512BF16 + AVX512VNNI
421
422
423
424
    define_extension_target(
        _C
        DESTINATION vllm
        LANGUAGE CXX
425
426
427
        SOURCES ${VLLM_EXT_SRC_AVX512} ${VLLM_EXT_SRC_SGL}
        LIBRARIES ${_C_LIBS}
        COMPILE_FLAGS ${CXX_COMPILE_FLAGS_AVX512_AMX}
428
429
430
        USE_SABI 3
        WITH_SOABI
    )
431

432
433
434
    # For AMX kernels
    target_compile_definitions(_C PRIVATE "-DCPU_CAPABILITY_AMXBF16")

435
436
437
438
439
440
441
442
443
444
445
446
447
    # AVX512F 
    define_extension_target(
        _C_AVX512
        DESTINATION vllm
        LANGUAGE CXX
        SOURCES ${VLLM_EXT_SRC_AVX512}
        LIBRARIES ${_C_AVX512_LIBS}
        COMPILE_FLAGS ${CXX_COMPILE_FLAGS_AVX512}
        USE_SABI 3
        WITH_SOABI
    )

    # AVX2 
448
449
450
451
452
    define_extension_target(
        _C_AVX2
        DESTINATION vllm
        LANGUAGE CXX
        SOURCES ${VLLM_EXT_SRC_AVX2}
453
        LIBRARIES ${_C_AVX2_LIBS}
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
        COMPILE_FLAGS ${CXX_COMPILE_FLAGS_AVX2}
        USE_SABI 3
        WITH_SOABI
    )
else()
    message(STATUS "CPU extension source files: ${VLLM_EXT_SRC}")
    #
    # Define extension targets
    #
    define_extension_target(
        _C
        DESTINATION vllm
        LANGUAGE CXX
        SOURCES ${VLLM_EXT_SRC}
        LIBRARIES ${LIBS}
        COMPILE_FLAGS ${CXX_COMPILE_FLAGS}
        USE_SABI 3
        WITH_SOABI
    )
endif()
474

475
message(STATUS "Enabling C extension.")