cpu_extension.cmake 13.5 KB
Newer Older
1
2
3
include(FetchContent)

set(CMAKE_CXX_STANDARD_REQUIRED ON)
4
set(CMAKE_CXX_STANDARD 17)
5
set(CMAKE_CXX_EXTENSIONS ON)
6
7
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

8
9
10
11
12
if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
    set(MACOSX_FOUND TRUE)
endif()


13
14
15
#
# Define environment variables for special configurations
#
16
set(ENABLE_X86_ISA $ENV{VLLM_CPU_X86})
17
set(ENABLE_ARM_BF16 $ENV{VLLM_CPU_ARM_BF16})
18
19
20

include_directories("${CMAKE_SOURCE_DIR}/csrc")

21
22
set (ENABLE_NUMA TRUE)

23
24
25
#
# Check the compile flags
#
26
27
28
29
30
31
32
33
if(MACOSX_FOUND)
    list(APPEND CXX_COMPILE_FLAGS
        "-DVLLM_CPU_EXTENSION")
else()
    list(APPEND CXX_COMPILE_FLAGS
        "-fopenmp"
        "-DVLLM_CPU_EXTENSION")
endif()
34

35
36
37
38
39
40
41
if (NOT MACOSX_FOUND)
    execute_process(COMMAND cat /proc/cpuinfo
                    RESULT_VARIABLE CPUINFO_RET
                    OUTPUT_VARIABLE CPUINFO)
    if (NOT CPUINFO_RET EQUAL 0)
        message(FATAL_ERROR "Failed to check CPU features via /proc/cpuinfo")
    endif()
42
43
endif()

44

45
46
47
48
49
50
51
52
53
function (find_isa CPUINFO TARGET OUT)
    string(FIND ${CPUINFO} ${TARGET} ISA_FOUND)
    if(NOT ISA_FOUND EQUAL -1)
        set(${OUT} ON PARENT_SCOPE)
    else()
        set(${OUT} OFF PARENT_SCOPE)
    endif()
endfunction()

54
55
56
57
58
59
60
61
62
63
64
65
66
67
68

function(check_sysctl TARGET OUT)
    execute_process(COMMAND sysctl -n "${TARGET}"
                    RESULT_VARIABLE SYSCTL_RET
                    OUTPUT_VARIABLE SYSCTL_INFO
                    ERROR_QUIET
                    OUTPUT_STRIP_TRAILING_WHITESPACE)
    if(SYSCTL_RET EQUAL 0 AND
      (SYSCTL_INFO STREQUAL "1" OR SYSCTL_INFO GREATER 0))
        set(${OUT} ON PARENT_SCOPE)
    else()
        set(${OUT} OFF PARENT_SCOPE)
    endif()
endfunction()

69
if (MACOSX_FOUND AND CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64")
70
    message(STATUS "Apple Silicon Detected")
71
    set(APPLE_SILICON_FOUND TRUE)
72
73
74
    set(ENABLE_NUMA OFF)
    check_sysctl(hw.optional.neon ASIMD_FOUND)
    check_sysctl(hw.optional.arm.FEAT_BF16 ARM_BF16_FOUND)
75
else()
76
    find_isa(${CPUINFO} "Power11" POWER11_FOUND)
77
78
79
80
    find_isa(${CPUINFO} "POWER10" POWER10_FOUND)
    find_isa(${CPUINFO} "POWER9" POWER9_FOUND)
    find_isa(${CPUINFO} "asimd" ASIMD_FOUND) # Check for ARM NEON support
    find_isa(${CPUINFO} "bf16" ARM_BF16_FOUND) # Check for ARM BF16 support
81
    find_isa(${CPUINFO} "S390" S390_FOUND)
82
    find_isa(${CPUINFO} "v" RVV_FOUND) # Check for RISC-V RVV support
83
84

    # Support cross-compilation by allowing override via environment variables
85
86
87
88
    if (ENABLE_ARM_BF16)
        set(ARM_BF16_FOUND ON)
        message(STATUS "ARM BF16 support enabled via VLLM_CPU_ARM_BF16 environment variable")
    endif()
89
90
endif()

91
92
93
94
95
96
97
98
99
100
if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64" OR ENABLE_X86_ISA)
    set(ENABLE_X86_ISA ON)
    if (NOT (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND
            CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 12.3))
        message(FATAL_ERROR "X86 backend requires gcc/g++ >= 12.3")
    endif()
    list(APPEND CXX_COMPILE_FLAGS "-mf16c")
    list(APPEND CXX_COMPILE_FLAGS_AVX512 ${CXX_COMPILE_FLAGS})
    list(APPEND CXX_COMPILE_FLAGS_AVX2 ${CXX_COMPILE_FLAGS})
    list(APPEND CXX_COMPILE_FLAGS_AVX512
101
102
103
        "-mavx512f"
        "-mavx512vl"
        "-mavx512bw"
104
105
106
107
108
109
110
        "-mavx512dq"
        "-mavx512bf16"
        "-mavx512vnni"
        "-mamx-bf16"
        "-mamx-tile")
    list(APPEND CXX_COMPILE_FLAGS_AVX2
        "-mavx2")
111
elseif (POWER9_FOUND OR POWER10_FOUND OR POWER11_FOUND)
112
    message(STATUS "PowerPC detected")
113
114
115
116
117
118
119
120
121
122
123
    if (POWER9_FOUND)
        list(APPEND CXX_COMPILE_FLAGS
            "-mvsx"
            "-mcpu=power9"
            "-mtune=power9")
    elseif (POWER10_FOUND OR POWER11_FOUND)
        list(APPEND CXX_COMPILE_FLAGS
            "-mvsx"
            "-mcpu=power10"
            "-mtune=power10")
    endif()
124
125
126
127
128
129
130
131
132
133
134
135

elseif (ASIMD_FOUND)
    message(STATUS "ARMv8 or later architecture detected")
    if(ARM_BF16_FOUND)
        message(STATUS "BF16 extension detected")
        set(MARCH_FLAGS "-march=armv8.2-a+bf16+dotprod+fp16")
        add_compile_definitions(ARM_BF16_SUPPORT)
    else()
        message(WARNING "BF16 functionality is not available")
        set(MARCH_FLAGS "-march=armv8.2-a+dotprod+fp16")  
    endif()
    list(APPEND CXX_COMPILE_FLAGS ${MARCH_FLAGS})     
136
137
138
139
140
141
142
143
elseif (S390_FOUND)
    message(STATUS "S390 detected")
    # Check for S390 VXE support
    list(APPEND CXX_COMPILE_FLAGS
        "-mvx"
        "-mzvector"
        "-march=native"
        "-mtune=native")
144
145
146
147
148
149
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "riscv64")
    if(RVV_FOUND)
	    message(FAIL_ERROR "Can't support rvv now.")
    else()
        list(APPEND CXX_COMPILE_FLAGS "-march=rv64gc")
    endif()
150
else()
151
    message(FATAL_ERROR "vLLM CPU backend requires X86, Power9+ ISA, S390X ISA, ARMv8 or RISC-V support.")
152
153
endif()

154

155
156
# Build oneDNN for GEMM kernels
if (ENABLE_X86_ISA OR (ASIMD_FOUND AND NOT APPLE_SILICON_FOUND) OR POWER9_FOUND OR POWER10_FOUND OR POWER11_FOUND)
157
158
    # Fetch and build Arm Compute Library (ACL) as oneDNN's backend for AArch64
    # TODO [fadara01]: remove this once ACL can be fetched and built automatically as a dependency of oneDNN
159
    set(ONEDNN_AARCH64_USE_ACL OFF CACHE BOOL "")
160
    if(ASIMD_FOUND)
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
        # Set number of parallel build processes
        include(ProcessorCount)
        ProcessorCount(NPROC)
        if(NOT NPROC)
            set(NPROC 4)
        endif()
        # locate PyTorch's libgomp (e.g. site-packages/torch.libs/libgomp-947d5fa1.so.1.0.0)
        # and create a local shim dir with it
        vllm_prepare_torch_gomp_shim(VLLM_TORCH_GOMP_SHIM_DIR)

        find_library(OPEN_MP
            NAMES gomp
            PATHS ${VLLM_TORCH_GOMP_SHIM_DIR}
            NO_DEFAULT_PATH
            REQUIRED
        )
        # Set LD_LIBRARY_PATH to include the shim dir at build time to use the same libgomp as PyTorch
        if (OPEN_MP)
            set(ENV{LD_LIBRARY_PATH} "${VLLM_TORCH_GOMP_SHIM_DIR}:$ENV{LD_LIBRARY_PATH}")
        endif()

        # Fetch and populate ACL
183
184
185
186
187
188
189
190
        if(DEFINED ENV{ACL_ROOT_DIR} AND IS_DIRECTORY "$ENV{ACL_ROOT_DIR}")
            message(STATUS "Using ACL from specified source directory: $ENV{ACL_ROOT_DIR}")
        else()
            message(STATUS "Downloading Arm Compute Library (ACL) from GitHub")
            FetchContent_Populate(arm_compute
                SUBBUILD_DIR "${FETCHCONTENT_BASE_DIR}/arm_compute-subbuild"
                SOURCE_DIR   "${FETCHCONTENT_BASE_DIR}/arm_compute-src"
                GIT_REPOSITORY https://github.com/ARM-software/ComputeLibrary.git
191
                GIT_TAG        v52.6.0
192
193
194
195
                GIT_SHALLOW    TRUE
                GIT_PROGRESS   TRUE
            )
            set(ENV{ACL_ROOT_DIR} "${arm_compute_SOURCE_DIR}")
196
            set(ACL_LIB_DIR "$ENV{ACL_ROOT_DIR}/build")
197
198
        endif()

199
200
201
202
203
204
205
206
207
208
209
210
211
212
        # Build ACL with CMake
        set(_cmake_config_cmd
             ${CMAKE_COMMAND} -G Ninja -B build 
            -DARM_COMPUTE_BUILD_SHARED_LIB=OFF 
            -DCMAKE_BUILD_TYPE=Release 
            -DARM_COMPUTE_ARCH=armv8.2-a 
            -DARM_COMPUTE_ENABLE_ASSERTS=OFF 
            -DARM_COMPUTE_ENABLE_CPPTHREADS=OFF 
            -DARM_COMPUTE_ENABLE_OPENMP=ON 
            -DARM_COMPUTE_ENABLE_WERROR=OFF 
            -DARM_COMPUTE_BUILD_EXAMPLES=OFF 
            -DARM_COMPUTE_BUILD_TESTING=OFF)
        set(_cmake_build_cmd
            ${CMAKE_COMMAND} --build build -- -j${NPROC}
213
214
        )

215
        execute_process(
216
217
218
219
220
            COMMAND ${_cmake_config_cmd}
            WORKING_DIRECTORY "$ENV{ACL_ROOT_DIR}"
        )
        execute_process(
            COMMAND ${_cmake_build_cmd}
221
222
223
            WORKING_DIRECTORY "$ENV{ACL_ROOT_DIR}"
            RESULT_VARIABLE _acl_rc
        )
224

225
226
227
        if(NOT _acl_rc EQUAL 0)
            message(FATAL_ERROR "ACL SCons build failed (exit ${_acl_rc}).")
        endif()
228
        message(STATUS "Arm Compute Library (ACL) built successfully.")
229

230
231
        # VLLM/oneDNN settings for ACL
        set(ONEDNN_AARCH64_USE_ACL ON CACHE BOOL "" FORCE)
232
233
234
        add_compile_definitions(VLLM_USE_ACL)
    endif()

235
236
237
238
239
240
241
242
243
244
245
246
247
    set(FETCHCONTENT_SOURCE_DIR_ONEDNN "$ENV{FETCHCONTENT_SOURCE_DIR_ONEDNN}" CACHE PATH "Path to a local oneDNN source directory.")

    if(FETCHCONTENT_SOURCE_DIR_ONEDNN)
        message(STATUS "Using oneDNN from specified source directory: ${FETCHCONTENT_SOURCE_DIR_ONEDNN}")
        FetchContent_Declare(
            oneDNN
            SOURCE_DIR ${FETCHCONTENT_SOURCE_DIR_ONEDNN}
        )
    else()
        message(STATUS "Downloading oneDNN from GitHub")
        FetchContent_Declare(
            oneDNN
            GIT_REPOSITORY https://github.com/oneapi-src/oneDNN.git
248
            GIT_TAG v3.10
249
250
251
252
            GIT_PROGRESS TRUE
            GIT_SHALLOW TRUE
        )
    endif()
253
254
255
256
257
258
259
260

    set(ONEDNN_LIBRARY_TYPE "STATIC")
    set(ONEDNN_BUILD_DOC "OFF")
    set(ONEDNN_BUILD_EXAMPLES "OFF")
    set(ONEDNN_BUILD_TESTS "OFF")
    set(ONEDNN_ENABLE_WORKLOAD "INFERENCE")
    set(ONEDNN_ENABLE_PRIMITIVE "MATMUL;REORDER")
    set(ONEDNN_BUILD_GRAPH "OFF")
261
    set(ONEDNN_ENABLE_JIT_PROFILING "ON")
262
    set(ONEDNN_ENABLE_ITT_TASKS "OFF")
263
264
265
    set(ONEDNN_ENABLE_MAX_CPU_ISA "ON")
    set(ONEDNN_ENABLE_CPU_ISA_HINTS "ON")
    set(ONEDNN_VERBOSE "ON")
266
267
    set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)

268
269
270
271
272
273
274
275
    # TODO: Refactor this
    if (ENABLE_X86_ISA)
        # Note: only enable oneDNN for AVX512
        list(APPEND DNNL_COMPILE_FLAGS ${CXX_COMPILE_FLAGS_AVX512})
    else()
        list(APPEND DNNL_COMPILE_FLAGS ${CXX_COMPILE_FLAGS})
    endif()

276
277
    set(VLLM_BUILD_TYPE ${CMAKE_BUILD_TYPE})
    set(CMAKE_BUILD_TYPE "Release") # remove oneDNN debug symbols to reduce size
278
    FetchContent_MakeAvailable(oneDNN)
279
    set(CMAKE_BUILD_TYPE ${VLLM_BUILD_TYPE})
280
281
282
283
284
285
    add_library(dnnl_ext OBJECT "csrc/cpu/dnnl_helper.cpp")
    target_include_directories(
        dnnl_ext
        PUBLIC ${oneDNN_SOURCE_DIR}/include
        PUBLIC ${oneDNN_BINARY_DIR}/include
        PRIVATE ${oneDNN_SOURCE_DIR}/src
286
    )
287
    target_link_libraries(dnnl_ext dnnl torch)
288
    target_compile_options(dnnl_ext PRIVATE ${DNNL_COMPILE_FLAGS} -fPIC)
289
290
291
292
    list(APPEND LIBS dnnl_ext)
    set(USE_ONEDNN ON)
else()
    set(USE_ONEDNN OFF)
293
294
endif()

295
296
297
298
299
300
301
# TODO: Refactor this
if (ENABLE_X86_ISA)
    message(STATUS "CPU extension (AVX512) compile flags: ${CXX_COMPILE_FLAGS_AVX512}")
    message(STATUS "CPU extension (AVX2) compile flags: ${CXX_COMPILE_FLAGS_AVX2}")
else()
    message(STATUS "CPU extension compile flags: ${CXX_COMPILE_FLAGS}")
endif()
302

303
304
305
306
307
308
if(ENABLE_NUMA)
    list(APPEND LIBS numa)
else()
    message(STATUS "NUMA is disabled")
    add_compile_definitions(-DVLLM_NUMA_DISABLED)
endif()
309

310
311
312
313
314
315
316
317
318
319
320
321
322
#
# Generate CPU attention dispatch header
#
message(STATUS "Generating CPU attention dispatch header")
execute_process(
    COMMAND ${Python_EXECUTABLE} ${CMAKE_SOURCE_DIR}/csrc/cpu/generate_cpu_attn_dispatch.py
    WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/csrc/cpu
    RESULT_VARIABLE GEN_RESULT
)
if(NOT GEN_RESULT EQUAL 0)
    message(FATAL_ERROR "Failed to generate CPU attention dispatch header")
endif()

323
324
325
326
327
#
# _C extension
#
set(VLLM_EXT_SRC
    "csrc/cpu/activation.cpp"
328
    "csrc/cpu/utils.cpp"
329
    "csrc/cpu/layernorm.cpp"
Thien Tran's avatar
Thien Tran committed
330
    "csrc/cpu/mla_decode.cpp"
331
    "csrc/cpu/pos_encoding.cpp"
332
333
334
    "csrc/moe/dynamic_4bit_int_moe_cpu.cpp"
    "csrc/cpu/cpu_attn.cpp"
    "csrc/cpu/torch_bindings.cpp")
335

336
337
338
339
340
341
if (ASIMD_FOUND AND NOT APPLE_SILICON_FOUND)
    set(VLLM_EXT_SRC
        "csrc/cpu/shm.cpp"
        ${VLLM_EXT_SRC})
endif()

342
if(USE_ONEDNN)
343
    set(VLLM_EXT_SRC
344
        "csrc/cpu/dnnl_kernels.cpp"
345
346
        ${VLLM_EXT_SRC})
endif()
347

348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
if (ENABLE_X86_ISA)
    set(VLLM_EXT_SRC_AVX512
        "csrc/cpu/sgl-kernels/gemm.cpp"
        "csrc/cpu/sgl-kernels/gemm_int8.cpp"
        "csrc/cpu/sgl-kernels/gemm_fp8.cpp"
        "csrc/cpu/sgl-kernels/moe.cpp"
        "csrc/cpu/sgl-kernels/moe_int8.cpp"
        "csrc/cpu/sgl-kernels/moe_fp8.cpp"
        "csrc/cpu/shm.cpp"
        "csrc/cpu/cpu_wna16.cpp"
        "csrc/cpu/cpu_fused_moe.cpp"
        "csrc/cpu/utils.cpp"
        "csrc/cpu/cpu_attn.cpp"
        "csrc/cpu/dnnl_kernels.cpp"
        "csrc/cpu/torch_bindings.cpp"
        # TODO: Remove these files
        "csrc/cpu/activation.cpp"
        "csrc/cpu/layernorm.cpp"
        "csrc/cpu/mla_decode.cpp"
        "csrc/cpu/pos_encoding.cpp"
        "csrc/moe/dynamic_4bit_int_moe_cpu.cpp") 

    set(VLLM_EXT_SRC_AVX2 
        "csrc/cpu/utils.cpp"
        "csrc/cpu/cpu_attn.cpp"
        "csrc/cpu/torch_bindings.cpp"
        # TODO: Remove these files
        "csrc/cpu/activation.cpp"
        "csrc/cpu/layernorm.cpp"
        "csrc/cpu/mla_decode.cpp"
        "csrc/cpu/pos_encoding.cpp"
        "csrc/moe/dynamic_4bit_int_moe_cpu.cpp") 

    message(STATUS "CPU extension (AVX512) source files: ${VLLM_EXT_SRC_AVX512}")
    message(STATUS "CPU extension (AVX2) source files: ${VLLM_EXT_SRC_AVX2}")

    define_extension_target(
        _C
        DESTINATION vllm
        LANGUAGE CXX
        SOURCES ${VLLM_EXT_SRC_AVX512}
        LIBRARIES ${LIBS}
        COMPILE_FLAGS ${CXX_COMPILE_FLAGS_AVX512}
        USE_SABI 3
        WITH_SOABI
    )
394

395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
    # For SGL kernels
    target_compile_definitions(_C PRIVATE "-DCPU_CAPABILITY_AVX512")
    # For AMX kernels
    target_compile_definitions(_C PRIVATE "-DCPU_CAPABILITY_AMXBF16")

    define_extension_target(
        _C_AVX2
        DESTINATION vllm
        LANGUAGE CXX
        SOURCES ${VLLM_EXT_SRC_AVX2}
        LIBRARIES ${LIBS}
        COMPILE_FLAGS ${CXX_COMPILE_FLAGS_AVX2}
        USE_SABI 3
        WITH_SOABI
    )
else()
    message(STATUS "CPU extension source files: ${VLLM_EXT_SRC}")
    #
    # Define extension targets
    #
    define_extension_target(
        _C
        DESTINATION vllm
        LANGUAGE CXX
        SOURCES ${VLLM_EXT_SRC}
        LIBRARIES ${LIBS}
        COMPILE_FLAGS ${CXX_COMPILE_FLAGS}
        USE_SABI 3
        WITH_SOABI
    )
endif()
426

427
message(STATUS "Enabling C extension.")