cpu_extension.cmake 14 KB
Newer Older
1
2
3
include(FetchContent)

set(CMAKE_CXX_STANDARD_REQUIRED ON)
4
set(CMAKE_CXX_STANDARD 17)
5
set(CMAKE_CXX_EXTENSIONS ON)
6
7
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

8
9
10
11
12
if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
    set(MACOSX_FOUND TRUE)
endif()


13
14
15
#
# Define environment variables for special configurations
#
16
set(ENABLE_X86_ISA $ENV{VLLM_CPU_X86})
17
set(ENABLE_ARM_BF16 $ENV{VLLM_CPU_ARM_BF16})
18
19
20

include_directories("${CMAKE_SOURCE_DIR}/csrc")

21
22
set (ENABLE_NUMA TRUE)

23
24
25
#
# Check the compile flags
#
26
27
28
29
30
31
32
33
if(MACOSX_FOUND)
    list(APPEND CXX_COMPILE_FLAGS
        "-DVLLM_CPU_EXTENSION")
else()
    list(APPEND CXX_COMPILE_FLAGS
        "-fopenmp"
        "-DVLLM_CPU_EXTENSION")
endif()
34

35
36
37
38
39
40
41
if (NOT MACOSX_FOUND)
    execute_process(COMMAND cat /proc/cpuinfo
                    RESULT_VARIABLE CPUINFO_RET
                    OUTPUT_VARIABLE CPUINFO)
    if (NOT CPUINFO_RET EQUAL 0)
        message(FATAL_ERROR "Failed to check CPU features via /proc/cpuinfo")
    endif()
42
43
endif()

44

45
46
47
48
49
50
51
52
53
function (find_isa CPUINFO TARGET OUT)
    string(FIND ${CPUINFO} ${TARGET} ISA_FOUND)
    if(NOT ISA_FOUND EQUAL -1)
        set(${OUT} ON PARENT_SCOPE)
    else()
        set(${OUT} OFF PARENT_SCOPE)
    endif()
endfunction()

54
55
56
57
58
59
60
61
62
63
64
65
66
67
68

function(check_sysctl TARGET OUT)
    execute_process(COMMAND sysctl -n "${TARGET}"
                    RESULT_VARIABLE SYSCTL_RET
                    OUTPUT_VARIABLE SYSCTL_INFO
                    ERROR_QUIET
                    OUTPUT_STRIP_TRAILING_WHITESPACE)
    if(SYSCTL_RET EQUAL 0 AND
      (SYSCTL_INFO STREQUAL "1" OR SYSCTL_INFO GREATER 0))
        set(${OUT} ON PARENT_SCOPE)
    else()
        set(${OUT} OFF PARENT_SCOPE)
    endif()
endfunction()

69
if (MACOSX_FOUND AND CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64")
70
    message(STATUS "Apple Silicon Detected")
71
    set(APPLE_SILICON_FOUND TRUE)
72
73
74
    set(ENABLE_NUMA OFF)
    check_sysctl(hw.optional.neon ASIMD_FOUND)
    check_sysctl(hw.optional.arm.FEAT_BF16 ARM_BF16_FOUND)
75
else()
76
    find_isa(${CPUINFO} "Power11" POWER11_FOUND)
77
78
79
80
    find_isa(${CPUINFO} "POWER10" POWER10_FOUND)
    find_isa(${CPUINFO} "POWER9" POWER9_FOUND)
    find_isa(${CPUINFO} "asimd" ASIMD_FOUND) # Check for ARM NEON support
    find_isa(${CPUINFO} "bf16" ARM_BF16_FOUND) # Check for ARM BF16 support
81
    find_isa(${CPUINFO} "S390" S390_FOUND)
82
    find_isa(${CPUINFO} "v" RVV_FOUND) # Check for RISC-V RVV support
83
84

    # Support cross-compilation by allowing override via environment variables
85
86
87
88
    if (ENABLE_ARM_BF16)
        set(ARM_BF16_FOUND ON)
        message(STATUS "ARM BF16 support enabled via VLLM_CPU_ARM_BF16 environment variable")
    endif()
89
90
endif()

91
92
93
94
95
96
97
98
99
100
if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64" OR ENABLE_X86_ISA)
    set(ENABLE_X86_ISA ON)
    if (NOT (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND
            CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 12.3))
        message(FATAL_ERROR "X86 backend requires gcc/g++ >= 12.3")
    endif()
    list(APPEND CXX_COMPILE_FLAGS "-mf16c")
    list(APPEND CXX_COMPILE_FLAGS_AVX512 ${CXX_COMPILE_FLAGS})
    list(APPEND CXX_COMPILE_FLAGS_AVX2 ${CXX_COMPILE_FLAGS})
    list(APPEND CXX_COMPILE_FLAGS_AVX512
101
102
103
        "-mavx512f"
        "-mavx512vl"
        "-mavx512bw"
104
105
106
107
108
109
110
        "-mavx512dq"
        "-mavx512bf16"
        "-mavx512vnni"
        "-mamx-bf16"
        "-mamx-tile")
    list(APPEND CXX_COMPILE_FLAGS_AVX2
        "-mavx2")
111
elseif (POWER9_FOUND OR POWER10_FOUND OR POWER11_FOUND)
112
    message(STATUS "PowerPC detected")
113
114
115
116
117
118
119
120
121
122
123
    if (POWER9_FOUND)
        list(APPEND CXX_COMPILE_FLAGS
            "-mvsx"
            "-mcpu=power9"
            "-mtune=power9")
    elseif (POWER10_FOUND OR POWER11_FOUND)
        list(APPEND CXX_COMPILE_FLAGS
            "-mvsx"
            "-mcpu=power10"
            "-mtune=power10")
    endif()
124
125
126
127
128
129
130
131
132
133
134
135

elseif (ASIMD_FOUND)
    message(STATUS "ARMv8 or later architecture detected")
    if(ARM_BF16_FOUND)
        message(STATUS "BF16 extension detected")
        set(MARCH_FLAGS "-march=armv8.2-a+bf16+dotprod+fp16")
        add_compile_definitions(ARM_BF16_SUPPORT)
    else()
        message(WARNING "BF16 functionality is not available")
        set(MARCH_FLAGS "-march=armv8.2-a+dotprod+fp16")  
    endif()
    list(APPEND CXX_COMPILE_FLAGS ${MARCH_FLAGS})     
136
137
138
139
140
141
142
143
elseif (S390_FOUND)
    message(STATUS "S390 detected")
    # Check for S390 VXE support
    list(APPEND CXX_COMPILE_FLAGS
        "-mvx"
        "-mzvector"
        "-march=native"
        "-mtune=native")
144
145
146
147
148
149
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "riscv64")
    if(RVV_FOUND)
	    message(FAIL_ERROR "Can't support rvv now.")
    else()
        list(APPEND CXX_COMPILE_FLAGS "-march=rv64gc")
    endif()
150
else()
151
    message(FATAL_ERROR "vLLM CPU backend requires X86, Power9+ ISA, S390X ISA, ARMv8 or RISC-V support.")
152
153
endif()

154

155
156
# Build oneDNN for GEMM kernels
if (ENABLE_X86_ISA OR (ASIMD_FOUND AND NOT APPLE_SILICON_FOUND) OR POWER9_FOUND OR POWER10_FOUND OR POWER11_FOUND)
157
158
    # Fetch and build Arm Compute Library (ACL) as oneDNN's backend for AArch64
    # TODO [fadara01]: remove this once ACL can be fetched and built automatically as a dependency of oneDNN
159
    set(ONEDNN_AARCH64_USE_ACL OFF CACHE BOOL "")
160
    if(ASIMD_FOUND)
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
        # Set number of parallel build processes
        include(ProcessorCount)
        ProcessorCount(NPROC)
        if(NOT NPROC)
            set(NPROC 4)
        endif()
        # locate PyTorch's libgomp (e.g. site-packages/torch.libs/libgomp-947d5fa1.so.1.0.0)
        # and create a local shim dir with it
        vllm_prepare_torch_gomp_shim(VLLM_TORCH_GOMP_SHIM_DIR)

        find_library(OPEN_MP
            NAMES gomp
            PATHS ${VLLM_TORCH_GOMP_SHIM_DIR}
            NO_DEFAULT_PATH
            REQUIRED
        )
        # Set LD_LIBRARY_PATH to include the shim dir at build time to use the same libgomp as PyTorch
        if (OPEN_MP)
            set(ENV{LD_LIBRARY_PATH} "${VLLM_TORCH_GOMP_SHIM_DIR}:$ENV{LD_LIBRARY_PATH}")
        endif()

        # Fetch and populate ACL
183
184
185
186
187
188
189
190
        if(DEFINED ENV{ACL_ROOT_DIR} AND IS_DIRECTORY "$ENV{ACL_ROOT_DIR}")
            message(STATUS "Using ACL from specified source directory: $ENV{ACL_ROOT_DIR}")
        else()
            message(STATUS "Downloading Arm Compute Library (ACL) from GitHub")
            FetchContent_Populate(arm_compute
                SUBBUILD_DIR "${FETCHCONTENT_BASE_DIR}/arm_compute-subbuild"
                SOURCE_DIR   "${FETCHCONTENT_BASE_DIR}/arm_compute-src"
                GIT_REPOSITORY https://github.com/ARM-software/ComputeLibrary.git
191
                GIT_TAG        v52.6.0
192
193
194
195
                GIT_SHALLOW    TRUE
                GIT_PROGRESS   TRUE
            )
            set(ENV{ACL_ROOT_DIR} "${arm_compute_SOURCE_DIR}")
196
            set(ACL_LIB_DIR "$ENV{ACL_ROOT_DIR}/build")
197
198
        endif()

199
200
201
202
203
204
205
206
207
208
209
210
211
212
        # Build ACL with CMake
        set(_cmake_config_cmd
             ${CMAKE_COMMAND} -G Ninja -B build 
            -DARM_COMPUTE_BUILD_SHARED_LIB=OFF 
            -DCMAKE_BUILD_TYPE=Release 
            -DARM_COMPUTE_ARCH=armv8.2-a 
            -DARM_COMPUTE_ENABLE_ASSERTS=OFF 
            -DARM_COMPUTE_ENABLE_CPPTHREADS=OFF 
            -DARM_COMPUTE_ENABLE_OPENMP=ON 
            -DARM_COMPUTE_ENABLE_WERROR=OFF 
            -DARM_COMPUTE_BUILD_EXAMPLES=OFF 
            -DARM_COMPUTE_BUILD_TESTING=OFF)
        set(_cmake_build_cmd
            ${CMAKE_COMMAND} --build build -- -j${NPROC}
213
214
        )

215
        execute_process(
216
217
218
219
220
            COMMAND ${_cmake_config_cmd}
            WORKING_DIRECTORY "$ENV{ACL_ROOT_DIR}"
        )
        execute_process(
            COMMAND ${_cmake_build_cmd}
221
222
223
            WORKING_DIRECTORY "$ENV{ACL_ROOT_DIR}"
            RESULT_VARIABLE _acl_rc
        )
224

225
226
227
        if(NOT _acl_rc EQUAL 0)
            message(FATAL_ERROR "ACL SCons build failed (exit ${_acl_rc}).")
        endif()
228
        message(STATUS "Arm Compute Library (ACL) built successfully.")
229

230
231
        # VLLM/oneDNN settings for ACL
        set(ONEDNN_AARCH64_USE_ACL ON CACHE BOOL "" FORCE)
232
233
234
        add_compile_definitions(VLLM_USE_ACL)
    endif()

235
236
237
238
239
240
241
242
243
244
    set(FETCHCONTENT_SOURCE_DIR_ONEDNN "$ENV{FETCHCONTENT_SOURCE_DIR_ONEDNN}" CACHE PATH "Path to a local oneDNN source directory.")

    if(FETCHCONTENT_SOURCE_DIR_ONEDNN)
        message(STATUS "Using oneDNN from specified source directory: ${FETCHCONTENT_SOURCE_DIR_ONEDNN}")
        FetchContent_Declare(
            oneDNN
            SOURCE_DIR ${FETCHCONTENT_SOURCE_DIR_ONEDNN}
        )
    else()
        message(STATUS "Downloading oneDNN from GitHub")
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
        if(ASIMD_FOUND AND NOT APPLE_SILICON_FOUND)
            message(STATUS "aarch64 detected: using pinned oneDNN commit 9c5be1cc59e368aebf0909e6cf20f981ea61462a")
            FetchContent_Declare(
                oneDNN
                GIT_REPOSITORY https://github.com/oneapi-src/oneDNN.git
                GIT_TAG        9c5be1cc59e368aebf0909e6cf20f981ea61462a
                GIT_PROGRESS   TRUE
                GIT_SHALLOW    FALSE
            )
        else()
            FetchContent_Declare(
                oneDNN
                GIT_REPOSITORY https://github.com/oneapi-src/oneDNN.git
                GIT_TAG        v3.10
                GIT_PROGRESS   TRUE
                GIT_SHALLOW    TRUE
            )
        endif()
263
    endif()
264
265
266
267
268
269
270
271

    set(ONEDNN_LIBRARY_TYPE "STATIC")
    set(ONEDNN_BUILD_DOC "OFF")
    set(ONEDNN_BUILD_EXAMPLES "OFF")
    set(ONEDNN_BUILD_TESTS "OFF")
    set(ONEDNN_ENABLE_WORKLOAD "INFERENCE")
    set(ONEDNN_ENABLE_PRIMITIVE "MATMUL;REORDER")
    set(ONEDNN_BUILD_GRAPH "OFF")
272
    set(ONEDNN_ENABLE_JIT_PROFILING "ON")
273
    set(ONEDNN_ENABLE_ITT_TASKS "OFF")
274
275
276
    set(ONEDNN_ENABLE_MAX_CPU_ISA "ON")
    set(ONEDNN_ENABLE_CPU_ISA_HINTS "ON")
    set(ONEDNN_VERBOSE "ON")
277
278
    set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)

279
280
281
282
283
284
285
286
    # TODO: Refactor this
    if (ENABLE_X86_ISA)
        # Note: only enable oneDNN for AVX512
        list(APPEND DNNL_COMPILE_FLAGS ${CXX_COMPILE_FLAGS_AVX512})
    else()
        list(APPEND DNNL_COMPILE_FLAGS ${CXX_COMPILE_FLAGS})
    endif()

287
288
    set(VLLM_BUILD_TYPE ${CMAKE_BUILD_TYPE})
    set(CMAKE_BUILD_TYPE "Release") # remove oneDNN debug symbols to reduce size
289
    FetchContent_MakeAvailable(oneDNN)
290
    set(CMAKE_BUILD_TYPE ${VLLM_BUILD_TYPE})
291
292
293
294
295
296
    add_library(dnnl_ext OBJECT "csrc/cpu/dnnl_helper.cpp")
    target_include_directories(
        dnnl_ext
        PUBLIC ${oneDNN_SOURCE_DIR}/include
        PUBLIC ${oneDNN_BINARY_DIR}/include
        PRIVATE ${oneDNN_SOURCE_DIR}/src
297
    )
298
    target_link_libraries(dnnl_ext dnnl torch)
299
    target_compile_options(dnnl_ext PRIVATE ${DNNL_COMPILE_FLAGS} -fPIC)
300
301
302
303
    list(APPEND LIBS dnnl_ext)
    set(USE_ONEDNN ON)
else()
    set(USE_ONEDNN OFF)
304
305
endif()

306
307
308
309
310
311
312
# TODO: Refactor this
if (ENABLE_X86_ISA)
    message(STATUS "CPU extension (AVX512) compile flags: ${CXX_COMPILE_FLAGS_AVX512}")
    message(STATUS "CPU extension (AVX2) compile flags: ${CXX_COMPILE_FLAGS_AVX2}")
else()
    message(STATUS "CPU extension compile flags: ${CXX_COMPILE_FLAGS}")
endif()
313

314
315
316
317
318
319
if(ENABLE_NUMA)
    list(APPEND LIBS numa)
else()
    message(STATUS "NUMA is disabled")
    add_compile_definitions(-DVLLM_NUMA_DISABLED)
endif()
320

321
322
323
324
325
326
327
328
329
330
331
332
333
#
# Generate CPU attention dispatch header
#
message(STATUS "Generating CPU attention dispatch header")
execute_process(
    COMMAND ${Python_EXECUTABLE} ${CMAKE_SOURCE_DIR}/csrc/cpu/generate_cpu_attn_dispatch.py
    WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/csrc/cpu
    RESULT_VARIABLE GEN_RESULT
)
if(NOT GEN_RESULT EQUAL 0)
    message(FATAL_ERROR "Failed to generate CPU attention dispatch header")
endif()

334
335
336
337
338
#
# _C extension
#
set(VLLM_EXT_SRC
    "csrc/cpu/activation.cpp"
339
    "csrc/cpu/utils.cpp"
340
    "csrc/cpu/layernorm.cpp"
Thien Tran's avatar
Thien Tran committed
341
    "csrc/cpu/mla_decode.cpp"
342
    "csrc/cpu/pos_encoding.cpp"
343
344
345
    "csrc/moe/dynamic_4bit_int_moe_cpu.cpp"
    "csrc/cpu/cpu_attn.cpp"
    "csrc/cpu/torch_bindings.cpp")
346

347
348
349
350
351
352
if (ASIMD_FOUND AND NOT APPLE_SILICON_FOUND)
    set(VLLM_EXT_SRC
        "csrc/cpu/shm.cpp"
        ${VLLM_EXT_SRC})
endif()

353
if(USE_ONEDNN)
354
    set(VLLM_EXT_SRC
355
        "csrc/cpu/dnnl_kernels.cpp"
356
357
        ${VLLM_EXT_SRC})
endif()
358

359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
if (ENABLE_X86_ISA)
    set(VLLM_EXT_SRC_AVX512
        "csrc/cpu/sgl-kernels/gemm.cpp"
        "csrc/cpu/sgl-kernels/gemm_int8.cpp"
        "csrc/cpu/sgl-kernels/gemm_fp8.cpp"
        "csrc/cpu/sgl-kernels/moe.cpp"
        "csrc/cpu/sgl-kernels/moe_int8.cpp"
        "csrc/cpu/sgl-kernels/moe_fp8.cpp"
        "csrc/cpu/shm.cpp"
        "csrc/cpu/cpu_wna16.cpp"
        "csrc/cpu/cpu_fused_moe.cpp"
        "csrc/cpu/utils.cpp"
        "csrc/cpu/cpu_attn.cpp"
        "csrc/cpu/dnnl_kernels.cpp"
        "csrc/cpu/torch_bindings.cpp"
        # TODO: Remove these files
        "csrc/cpu/activation.cpp"
        "csrc/cpu/layernorm.cpp"
        "csrc/cpu/mla_decode.cpp"
        "csrc/cpu/pos_encoding.cpp"
        "csrc/moe/dynamic_4bit_int_moe_cpu.cpp") 

    set(VLLM_EXT_SRC_AVX2 
        "csrc/cpu/utils.cpp"
        "csrc/cpu/cpu_attn.cpp"
        "csrc/cpu/torch_bindings.cpp"
        # TODO: Remove these files
        "csrc/cpu/activation.cpp"
        "csrc/cpu/layernorm.cpp"
        "csrc/cpu/mla_decode.cpp"
        "csrc/cpu/pos_encoding.cpp"
        "csrc/moe/dynamic_4bit_int_moe_cpu.cpp") 

    message(STATUS "CPU extension (AVX512) source files: ${VLLM_EXT_SRC_AVX512}")
    message(STATUS "CPU extension (AVX2) source files: ${VLLM_EXT_SRC_AVX2}")

    define_extension_target(
        _C
        DESTINATION vllm
        LANGUAGE CXX
        SOURCES ${VLLM_EXT_SRC_AVX512}
        LIBRARIES ${LIBS}
        COMPILE_FLAGS ${CXX_COMPILE_FLAGS_AVX512}
        USE_SABI 3
        WITH_SOABI
    )
405

406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
    # For SGL kernels
    target_compile_definitions(_C PRIVATE "-DCPU_CAPABILITY_AVX512")
    # For AMX kernels
    target_compile_definitions(_C PRIVATE "-DCPU_CAPABILITY_AMXBF16")

    define_extension_target(
        _C_AVX2
        DESTINATION vllm
        LANGUAGE CXX
        SOURCES ${VLLM_EXT_SRC_AVX2}
        LIBRARIES ${LIBS}
        COMPILE_FLAGS ${CXX_COMPILE_FLAGS_AVX2}
        USE_SABI 3
        WITH_SOABI
    )
else()
    message(STATUS "CPU extension source files: ${VLLM_EXT_SRC}")
    #
    # Define extension targets
    #
    define_extension_target(
        _C
        DESTINATION vllm
        LANGUAGE CXX
        SOURCES ${VLLM_EXT_SRC}
        LIBRARIES ${LIBS}
        COMPILE_FLAGS ${CXX_COMPILE_FLAGS}
        USE_SABI 3
        WITH_SOABI
    )
endif()
437

438
message(STATUS "Enabling C extension.")