cpu_extension.cmake 17 KB
Newer Older
1
2
3
include(FetchContent)

set(CMAKE_CXX_STANDARD_REQUIRED ON)
4
set(CMAKE_CXX_STANDARD 17)
5
set(CMAKE_CXX_EXTENSIONS ON)
6
7
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

8
9
10
11
12
if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
    set(MACOSX_FOUND TRUE)
endif()


13
14
15
#
# Define environment variables for special configurations
#
16
set(ENABLE_X86_ISA $ENV{VLLM_CPU_X86})
17
set(ENABLE_ARM_BF16 $ENV{VLLM_CPU_ARM_BF16})
18
19
20

include_directories("${CMAKE_SOURCE_DIR}/csrc")

21
22
set (ENABLE_NUMA TRUE)

23
24
25
#
# Check the compile flags
#
26
27
28
29
30
31
32
if(MACOSX_FOUND)
    list(APPEND CXX_COMPILE_FLAGS
        "-DVLLM_CPU_EXTENSION")
else()
    list(APPEND CXX_COMPILE_FLAGS
        "-fopenmp"
        "-DVLLM_CPU_EXTENSION")
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47

    # locate PyTorch's libgomp (e.g. site-packages/torch.libs/libgomp-947d5fa1.so.1.0.0)
    # and create a local shim dir with it
    vllm_prepare_torch_gomp_shim(VLLM_TORCH_GOMP_SHIM_DIR)

    find_library(OPEN_MP
        NAMES gomp
        PATHS ${VLLM_TORCH_GOMP_SHIM_DIR}
        NO_DEFAULT_PATH
        REQUIRED
    )
    # Set LD_LIBRARY_PATH to include the shim dir at build time to use the same libgomp as PyTorch
    if (OPEN_MP)
        set(ENV{LD_LIBRARY_PATH} "${VLLM_TORCH_GOMP_SHIM_DIR}:$ENV{LD_LIBRARY_PATH}")
    endif()
48
endif()
49

50
51
52
53
54
55
56
if (NOT MACOSX_FOUND)
    execute_process(COMMAND cat /proc/cpuinfo
                    RESULT_VARIABLE CPUINFO_RET
                    OUTPUT_VARIABLE CPUINFO)
    if (NOT CPUINFO_RET EQUAL 0)
        message(FATAL_ERROR "Failed to check CPU features via /proc/cpuinfo")
    endif()
57
58
endif()

59

60
61
62
63
64
65
66
67
68
function (find_isa CPUINFO TARGET OUT)
    string(FIND ${CPUINFO} ${TARGET} ISA_FOUND)
    if(NOT ISA_FOUND EQUAL -1)
        set(${OUT} ON PARENT_SCOPE)
    else()
        set(${OUT} OFF PARENT_SCOPE)
    endif()
endfunction()

69
70
71
72
73
74
75
76
77
78
79
80
81
82
83

function(check_sysctl TARGET OUT)
    execute_process(COMMAND sysctl -n "${TARGET}"
                    RESULT_VARIABLE SYSCTL_RET
                    OUTPUT_VARIABLE SYSCTL_INFO
                    ERROR_QUIET
                    OUTPUT_STRIP_TRAILING_WHITESPACE)
    if(SYSCTL_RET EQUAL 0 AND
      (SYSCTL_INFO STREQUAL "1" OR SYSCTL_INFO GREATER 0))
        set(${OUT} ON PARENT_SCOPE)
    else()
        set(${OUT} OFF PARENT_SCOPE)
    endif()
endfunction()

84
if (MACOSX_FOUND AND CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64")
85
    message(STATUS "Apple Silicon Detected")
86
    set(APPLE_SILICON_FOUND TRUE)
87
88
89
    set(ENABLE_NUMA OFF)
    check_sysctl(hw.optional.neon ASIMD_FOUND)
    check_sysctl(hw.optional.arm.FEAT_BF16 ARM_BF16_FOUND)
90
else()
91
    find_isa(${CPUINFO} "Power11" POWER11_FOUND)
92
93
94
95
    find_isa(${CPUINFO} "POWER10" POWER10_FOUND)
    find_isa(${CPUINFO} "POWER9" POWER9_FOUND)
    find_isa(${CPUINFO} "asimd" ASIMD_FOUND) # Check for ARM NEON support
    find_isa(${CPUINFO} "bf16" ARM_BF16_FOUND) # Check for ARM BF16 support
96
    find_isa(${CPUINFO} "S390" S390_FOUND)
97
98
    find_isa(${CPUINFO} "zvfhmin" RVV_FP16_FOUND) # Check for RISC-V Vector FP16 support
    find_isa(${CPUINFO} "zvfbfmin" RVV_BF16_FOUND) # Check for RISC-V Vector BF16 support
99
100

    # Support cross-compilation by allowing override via environment variables
101
102
103
104
    if (ENABLE_ARM_BF16)
        set(ARM_BF16_FOUND ON)
        message(STATUS "ARM BF16 support enabled via VLLM_CPU_ARM_BF16 environment variable")
    endif()
105
106
endif()

107
108
109
110
111
112
113
114
115
116
if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64" OR ENABLE_X86_ISA)
    set(ENABLE_X86_ISA ON)
    if (NOT (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND
            CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 12.3))
        message(FATAL_ERROR "X86 backend requires gcc/g++ >= 12.3")
    endif()
    list(APPEND CXX_COMPILE_FLAGS "-mf16c")
    list(APPEND CXX_COMPILE_FLAGS_AVX512 ${CXX_COMPILE_FLAGS})
    list(APPEND CXX_COMPILE_FLAGS_AVX2 ${CXX_COMPILE_FLAGS})
    list(APPEND CXX_COMPILE_FLAGS_AVX512
117
118
119
        "-mavx512f"
        "-mavx512vl"
        "-mavx512bw"
120
121
122
        "-mavx512dq")
    list(APPEND CXX_COMPILE_FLAGS_AVX512_AMX 
        ${CXX_COMPILE_FLAGS_AVX512}
123
        "-mamx-bf16"
124
125
126
        "-mamx-tile"
        "-mavx512bf16"
        "-mavx512vnni")
127
128
    list(APPEND CXX_COMPILE_FLAGS_AVX2
        "-mavx2")
129
elseif (POWER9_FOUND OR POWER10_FOUND OR POWER11_FOUND)
130
    message(STATUS "PowerPC detected")
131
132
133
134
135
136
137
138
139
140
141
    if (POWER9_FOUND)
        list(APPEND CXX_COMPILE_FLAGS
            "-mvsx"
            "-mcpu=power9"
            "-mtune=power9")
    elseif (POWER10_FOUND OR POWER11_FOUND)
        list(APPEND CXX_COMPILE_FLAGS
            "-mvsx"
            "-mcpu=power10"
            "-mtune=power10")
    endif()
142
143
144
145
146
147
148
149
150
151
152
153

elseif (ASIMD_FOUND)
    message(STATUS "ARMv8 or later architecture detected")
    if(ARM_BF16_FOUND)
        message(STATUS "BF16 extension detected")
        set(MARCH_FLAGS "-march=armv8.2-a+bf16+dotprod+fp16")
        add_compile_definitions(ARM_BF16_SUPPORT)
    else()
        message(WARNING "BF16 functionality is not available")
        set(MARCH_FLAGS "-march=armv8.2-a+dotprod+fp16")  
    endif()
    list(APPEND CXX_COMPILE_FLAGS ${MARCH_FLAGS})     
154
155
156
157
158
159
160
161
elseif (S390_FOUND)
    message(STATUS "S390 detected")
    # Check for S390 VXE support
    list(APPEND CXX_COMPILE_FLAGS
        "-mvx"
        "-mzvector"
        "-march=native"
        "-mtune=native")
162
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "riscv64")
163
    message(STATUS "RISC-V detected")
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
    # VLLM_RVV_VLEN selects the target VLEN. Auto-detected from /proc/cpuinfo
    # by default; override with -DVLLM_RVV_VLEN=128 or -DVLLM_RVV_VLEN=256.
    if(NOT DEFINED VLLM_RVV_VLEN)
        # Auto-detect: find the largest zvl<N>b in /proc/cpuinfo isa line.
        if(EXISTS /proc/cpuinfo)
            file(READ /proc/cpuinfo _cpuinfo)
            set(_best 0)
            foreach(_n IN ITEMS 128 256 512 1024)
                if(_cpuinfo MATCHES "zvl${_n}b")
                    set(_best ${_n})
                endif()
            endforeach()
            if(_best GREATER 0)
                set(VLLM_RVV_VLEN ${_best})
            endif()
        endif()
        # If auto-detect failed (no /proc/cpuinfo or no zvl<N>b reported)
        # but the compiler supports RVV, require explicit specification.
        if(NOT DEFINED VLLM_RVV_VLEN AND (RVV_FP16_FOUND OR RVV_BF16_FOUND))
            message(FATAL_ERROR
                "RISC-V RVV is available but VLEN could not be auto-detected. "
                "Please specify VLEN explicitly:\n"
                "  -DVLLM_RVV_VLEN=128   (for VLEN=128 hardware)\n"
                "  -DVLLM_RVV_VLEN=256   (for VLEN=256 hardware, e.g. Spacemit X100)\n"
                "  -DVLLM_RVV_VLEN=0     (force scalar, no RVV)")
        endif()
    endif()
    if(VLLM_RVV_VLEN AND VLLM_RVV_VLEN GREATER 0)
        message(STATUS "RISC-V RVV VLEN=${VLLM_RVV_VLEN}")
        if(RVV_BF16_FOUND)
            message(STATUS "BF16 extension detected")
            set(MARCH_FLAGS -march=rv64gcv_zvfh_zfbfmin_zvfbfmin_zvl${VLLM_RVV_VLEN}b -mrvv-vector-bits=zvl -mabi=lp64d)
            add_compile_definitions(RISCV_BF16_SUPPORT)
        elseif(RVV_FP16_FOUND)
            message(WARNING "BF16 functionality is not available")
            set(MARCH_FLAGS -march=rv64gcv_zvfh_zvl${VLLM_RVV_VLEN}b -mrvv-vector-bits=zvl -mabi=lp64d)
        else()
            message(STATUS "compile riscv with scalar (no FP16/BF16)")
            set(MARCH_FLAGS -march=rv64gc)
        endif()
204
    else()
205
        message(STATUS "compile riscv with scalar")
206
        set(MARCH_FLAGS -march=rv64gc)
207
    endif()
208
    list(APPEND CXX_COMPILE_FLAGS ${MARCH_FLAGS})
209
else()
210
    message(FATAL_ERROR "vLLM CPU backend requires X86, Power9+ ISA, S390X ISA, ARMv8 or RISC-V support.")
211
212
endif()

213

214
215
# Build oneDNN for GEMM kernels
if (ENABLE_X86_ISA OR (ASIMD_FOUND AND NOT APPLE_SILICON_FOUND) OR POWER9_FOUND OR POWER10_FOUND OR POWER11_FOUND)
216
217
    # Fetch and build Arm Compute Library (ACL) as oneDNN's backend for AArch64
    # TODO [fadara01]: remove this once ACL can be fetched and built automatically as a dependency of oneDNN
218
    set(ONEDNN_AARCH64_USE_ACL OFF CACHE BOOL "")
219
    if(ASIMD_FOUND)
220
221
222
223
224
225
226
227
        # Set number of parallel build processes
        include(ProcessorCount)
        ProcessorCount(NPROC)
        if(NOT NPROC)
            set(NPROC 4)
        endif()

        # Fetch and populate ACL
228
229
230
231
232
233
234
235
        if(DEFINED ENV{ACL_ROOT_DIR} AND IS_DIRECTORY "$ENV{ACL_ROOT_DIR}")
            message(STATUS "Using ACL from specified source directory: $ENV{ACL_ROOT_DIR}")
        else()
            message(STATUS "Downloading Arm Compute Library (ACL) from GitHub")
            FetchContent_Populate(arm_compute
                SUBBUILD_DIR "${FETCHCONTENT_BASE_DIR}/arm_compute-subbuild"
                SOURCE_DIR   "${FETCHCONTENT_BASE_DIR}/arm_compute-src"
                GIT_REPOSITORY https://github.com/ARM-software/ComputeLibrary.git
236
                GIT_TAG        v52.6.0
237
238
239
240
                GIT_SHALLOW    TRUE
                GIT_PROGRESS   TRUE
            )
            set(ENV{ACL_ROOT_DIR} "${arm_compute_SOURCE_DIR}")
241
            set(ACL_LIB_DIR "$ENV{ACL_ROOT_DIR}/build")
242
243
        endif()

244
245
246
247
248
249
250
251
252
253
254
255
256
257
        # Build ACL with CMake
        set(_cmake_config_cmd
             ${CMAKE_COMMAND} -G Ninja -B build 
            -DARM_COMPUTE_BUILD_SHARED_LIB=OFF 
            -DCMAKE_BUILD_TYPE=Release 
            -DARM_COMPUTE_ARCH=armv8.2-a 
            -DARM_COMPUTE_ENABLE_ASSERTS=OFF 
            -DARM_COMPUTE_ENABLE_CPPTHREADS=OFF 
            -DARM_COMPUTE_ENABLE_OPENMP=ON 
            -DARM_COMPUTE_ENABLE_WERROR=OFF 
            -DARM_COMPUTE_BUILD_EXAMPLES=OFF 
            -DARM_COMPUTE_BUILD_TESTING=OFF)
        set(_cmake_build_cmd
            ${CMAKE_COMMAND} --build build -- -j${NPROC}
258
259
        )

260
        execute_process(
261
262
263
264
265
            COMMAND ${_cmake_config_cmd}
            WORKING_DIRECTORY "$ENV{ACL_ROOT_DIR}"
        )
        execute_process(
            COMMAND ${_cmake_build_cmd}
266
267
268
            WORKING_DIRECTORY "$ENV{ACL_ROOT_DIR}"
            RESULT_VARIABLE _acl_rc
        )
269

270
271
272
        if(NOT _acl_rc EQUAL 0)
            message(FATAL_ERROR "ACL SCons build failed (exit ${_acl_rc}).")
        endif()
273
        message(STATUS "Arm Compute Library (ACL) built successfully.")
274

275
276
        # VLLM/oneDNN settings for ACL
        set(ONEDNN_AARCH64_USE_ACL ON CACHE BOOL "" FORCE)
277
278
279
        add_compile_definitions(VLLM_USE_ACL)
    endif()

280
281
282
283
284
285
286
287
288
289
    set(FETCHCONTENT_SOURCE_DIR_ONEDNN "$ENV{FETCHCONTENT_SOURCE_DIR_ONEDNN}" CACHE PATH "Path to a local oneDNN source directory.")

    if(FETCHCONTENT_SOURCE_DIR_ONEDNN)
        message(STATUS "Using oneDNN from specified source directory: ${FETCHCONTENT_SOURCE_DIR_ONEDNN}")
        FetchContent_Declare(
            oneDNN
            SOURCE_DIR ${FETCHCONTENT_SOURCE_DIR_ONEDNN}
        )
    else()
        message(STATUS "Downloading oneDNN from GitHub")
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
        if(ASIMD_FOUND AND NOT APPLE_SILICON_FOUND)
            message(STATUS "aarch64 detected: using pinned oneDNN commit 9c5be1cc59e368aebf0909e6cf20f981ea61462a")
            FetchContent_Declare(
                oneDNN
                GIT_REPOSITORY https://github.com/oneapi-src/oneDNN.git
                GIT_TAG        9c5be1cc59e368aebf0909e6cf20f981ea61462a
                GIT_PROGRESS   TRUE
                GIT_SHALLOW    FALSE
            )
        else()
            FetchContent_Declare(
                oneDNN
                GIT_REPOSITORY https://github.com/oneapi-src/oneDNN.git
                GIT_TAG        v3.10
                GIT_PROGRESS   TRUE
                GIT_SHALLOW    TRUE
            )
        endif()
308
    endif()
309
310
311
312
313
314
315
316

    set(ONEDNN_LIBRARY_TYPE "STATIC")
    set(ONEDNN_BUILD_DOC "OFF")
    set(ONEDNN_BUILD_EXAMPLES "OFF")
    set(ONEDNN_BUILD_TESTS "OFF")
    set(ONEDNN_ENABLE_WORKLOAD "INFERENCE")
    set(ONEDNN_ENABLE_PRIMITIVE "MATMUL;REORDER")
    set(ONEDNN_BUILD_GRAPH "OFF")
317
    set(ONEDNN_ENABLE_JIT_PROFILING "ON")
318
    set(ONEDNN_ENABLE_ITT_TASKS "OFF")
319
320
321
    set(ONEDNN_ENABLE_MAX_CPU_ISA "ON")
    set(ONEDNN_ENABLE_CPU_ISA_HINTS "ON")
    set(ONEDNN_VERBOSE "ON")
322
323
    set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)

324
325
326
327
328
329
330
331
    # TODO: Refactor this
    if (ENABLE_X86_ISA)
        # Note: only enable oneDNN for AVX512
        list(APPEND DNNL_COMPILE_FLAGS ${CXX_COMPILE_FLAGS_AVX512})
    else()
        list(APPEND DNNL_COMPILE_FLAGS ${CXX_COMPILE_FLAGS})
    endif()

332
333
    set(VLLM_BUILD_TYPE ${CMAKE_BUILD_TYPE})
    set(CMAKE_BUILD_TYPE "Release") # remove oneDNN debug symbols to reduce size
334
    FetchContent_MakeAvailable(oneDNN)
335
    set(CMAKE_BUILD_TYPE ${VLLM_BUILD_TYPE})
336
337
338
339
340
341
    add_library(dnnl_ext OBJECT "csrc/cpu/dnnl_helper.cpp")
    target_include_directories(
        dnnl_ext
        PUBLIC ${oneDNN_SOURCE_DIR}/include
        PUBLIC ${oneDNN_BINARY_DIR}/include
        PRIVATE ${oneDNN_SOURCE_DIR}/src
342
    )
343
    target_link_libraries(dnnl_ext dnnl torch)
344
    target_compile_options(dnnl_ext PRIVATE ${DNNL_COMPILE_FLAGS} -fPIC)
345
346
347
348
    list(APPEND LIBS dnnl_ext)
    set(USE_ONEDNN ON)
else()
    set(USE_ONEDNN OFF)
349
350
endif()

351
352
# TODO: Refactor this
if (ENABLE_X86_ISA)
353
354
    message(STATUS "CPU extension (AVX512F + BF16 + VNNI + AMX) compile flags: ${CXX_COMPILE_FLAGS_AVX512_AMX}")
    message(STATUS "CPU extension (AVX512F) compile flags: ${CXX_COMPILE_FLAGS_AVX512}")
355
356
357
358
    message(STATUS "CPU extension (AVX2) compile flags: ${CXX_COMPILE_FLAGS_AVX2}")
else()
    message(STATUS "CPU extension compile flags: ${CXX_COMPILE_FLAGS}")
endif()
359

360
361
362
363
364
365
if(ENABLE_NUMA)
    list(APPEND LIBS numa)
else()
    message(STATUS "NUMA is disabled")
    add_compile_definitions(-DVLLM_NUMA_DISABLED)
endif()
366

367
368
369
370
371
372
373
374
375
376
377
378
379
#
# Generate CPU attention dispatch header
#
message(STATUS "Generating CPU attention dispatch header")
execute_process(
    COMMAND ${Python_EXECUTABLE} ${CMAKE_SOURCE_DIR}/csrc/cpu/generate_cpu_attn_dispatch.py
    WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/csrc/cpu
    RESULT_VARIABLE GEN_RESULT
)
if(NOT GEN_RESULT EQUAL 0)
    message(FATAL_ERROR "Failed to generate CPU attention dispatch header")
endif()

380
381
382
383
384
#
# _C extension
#
set(VLLM_EXT_SRC
    "csrc/cpu/activation.cpp"
385
    "csrc/cpu/utils.cpp"
386
    "csrc/cpu/spec_decode_utils.cpp"
387
    "csrc/cpu/layernorm.cpp"
Thien Tran's avatar
Thien Tran committed
388
    "csrc/cpu/mla_decode.cpp"
389
    "csrc/cpu/pos_encoding.cpp"
390
391
392
    "csrc/moe/dynamic_4bit_int_moe_cpu.cpp"
    "csrc/cpu/cpu_attn.cpp"
    "csrc/cpu/torch_bindings.cpp")
393

394
395
396
if (ASIMD_FOUND AND NOT APPLE_SILICON_FOUND)
    set(VLLM_EXT_SRC
        "csrc/cpu/shm.cpp"
397
        "csrc/cpu/activation_lut_bf16.cpp"
398
399
400
        ${VLLM_EXT_SRC})
endif()

401
if(USE_ONEDNN)
402
    set(VLLM_EXT_SRC
403
        "csrc/cpu/dnnl_kernels.cpp"
404
405
        ${VLLM_EXT_SRC})
endif()
406

407
if (ENABLE_X86_ISA)
408
    set(VLLM_EXT_SRC_SGL
409
410
411
        "csrc/cpu/sgl-kernels/gemm.cpp"
        "csrc/cpu/sgl-kernels/gemm_int8.cpp"
        "csrc/cpu/sgl-kernels/gemm_fp8.cpp"
412
        "csrc/cpu/sgl-kernels/gemm_int4.cpp"
413
414
        "csrc/cpu/sgl-kernels/moe.cpp"
        "csrc/cpu/sgl-kernels/moe_int8.cpp"
415
416
417
        "csrc/cpu/sgl-kernels/moe_fp8.cpp")

    set(VLLM_EXT_SRC_AVX512
418
419
420
421
        "csrc/cpu/shm.cpp"
        "csrc/cpu/cpu_wna16.cpp"
        "csrc/cpu/cpu_fused_moe.cpp"
        "csrc/cpu/utils.cpp"
422
        "csrc/cpu/spec_decode_utils.cpp"
423
424
425
426
427
428
429
430
431
432
433
434
        "csrc/cpu/cpu_attn.cpp"
        "csrc/cpu/dnnl_kernels.cpp"
        "csrc/cpu/torch_bindings.cpp"
        # TODO: Remove these files
        "csrc/cpu/activation.cpp"
        "csrc/cpu/layernorm.cpp"
        "csrc/cpu/mla_decode.cpp"
        "csrc/cpu/pos_encoding.cpp"
        "csrc/moe/dynamic_4bit_int_moe_cpu.cpp") 

    set(VLLM_EXT_SRC_AVX2 
        "csrc/cpu/utils.cpp"
435
        "csrc/cpu/spec_decode_utils.cpp"
436
437
438
439
440
441
442
443
444
        "csrc/cpu/cpu_attn.cpp"
        "csrc/cpu/torch_bindings.cpp"
        # TODO: Remove these files
        "csrc/cpu/activation.cpp"
        "csrc/cpu/layernorm.cpp"
        "csrc/cpu/mla_decode.cpp"
        "csrc/cpu/pos_encoding.cpp"
        "csrc/moe/dynamic_4bit_int_moe_cpu.cpp") 

445
446
    message(STATUS "CPU extension (AVX512F + BF16 + VNNI + AMX) source files: ${VLLM_EXT_SRC_AVX512} ${VLLM_EXT_SRC_SGL}")
    message(STATUS "CPU extension (AVX512F) source files: ${VLLM_EXT_SRC_AVX512}")
447
448
    message(STATUS "CPU extension (AVX2) source files: ${VLLM_EXT_SRC_AVX2}")

449
450
451
452
453
    set(_C_LIBS numa dnnl_ext)
    set(_C_AVX512_LIBS numa dnnl_ext)
    set(_C_AVX2_LIBS numa)

    # AMX + AVX512F + AVX512BF16 + AVX512VNNI
454
455
456
457
    define_extension_target(
        _C
        DESTINATION vllm
        LANGUAGE CXX
458
459
460
        SOURCES ${VLLM_EXT_SRC_AVX512} ${VLLM_EXT_SRC_SGL}
        LIBRARIES ${_C_LIBS}
        COMPILE_FLAGS ${CXX_COMPILE_FLAGS_AVX512_AMX}
461
462
463
        USE_SABI 3
        WITH_SOABI
    )
464

465
466
467
    # For AMX kernels
    target_compile_definitions(_C PRIVATE "-DCPU_CAPABILITY_AMXBF16")

468
469
470
471
472
473
474
475
476
477
478
479
480
    # AVX512F 
    define_extension_target(
        _C_AVX512
        DESTINATION vllm
        LANGUAGE CXX
        SOURCES ${VLLM_EXT_SRC_AVX512}
        LIBRARIES ${_C_AVX512_LIBS}
        COMPILE_FLAGS ${CXX_COMPILE_FLAGS_AVX512}
        USE_SABI 3
        WITH_SOABI
    )

    # AVX2 
481
482
483
484
485
    define_extension_target(
        _C_AVX2
        DESTINATION vllm
        LANGUAGE CXX
        SOURCES ${VLLM_EXT_SRC_AVX2}
486
        LIBRARIES ${_C_AVX2_LIBS}
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
        COMPILE_FLAGS ${CXX_COMPILE_FLAGS_AVX2}
        USE_SABI 3
        WITH_SOABI
    )
else()
    message(STATUS "CPU extension source files: ${VLLM_EXT_SRC}")
    #
    # Define extension targets
    #
    define_extension_target(
        _C
        DESTINATION vllm
        LANGUAGE CXX
        SOURCES ${VLLM_EXT_SRC}
        LIBRARIES ${LIBS}
        COMPILE_FLAGS ${CXX_COMPILE_FLAGS}
        USE_SABI 3
        WITH_SOABI
    )
endif()
507

508
message(STATUS "Enabling C extension.")