CMakeLists.txt 1.89 KB
Newer Older
Li Zhang's avatar
Li Zhang committed
1
2
3
4
# Copyright (c) OpenMMLab. All rights reserved.

cmake_minimum_required(VERSION 3.8)

xiabo's avatar
xiabo committed
5
#add_subdirectory(fused_multi_head_attention)
Li Zhang's avatar
Li Zhang committed
6

xiabo's avatar
xiabo committed
7
8
#find_package(CUDAToolkit REQUIRED)
find_package(CUDA REQUIRED)
Chen Xin's avatar
Chen Xin committed
9

AllentDan's avatar
AllentDan committed
10
add_library(Llama STATIC
Li Zhang's avatar
Li Zhang committed
11
12
        LlamaV2.cc
        LlamaBatch.cc
Li Zhang's avatar
Li Zhang committed
13
14
        BlockManager.cc
        SequenceManager.cc
Li Zhang's avatar
Li Zhang committed
15
16
17
        LlamaWeight.cc
        LlamaDecoderLayerWeight.cc
        LlamaFfnLayer.cc
18
19
        unified_decoder.cc
        unified_attention_layer.cc
Li Zhang's avatar
Li Zhang committed
20
21
        llama_kernels.cu
        llama_decoder_kernels.cu
gaoqiong's avatar
gaoqiong committed
22
        llama_utils.cu
23
        )
xiabo's avatar
xiabo committed
24
25
26
27
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -fPIC")
#set_property(TARGET Llama PROPERTY POSITION_INDEPENDENT_CODE  ON)
#set_property(TARGET Llama PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS  ON)
28
target_link_directories(Llama PUBLIC ../../../../3rdparty/composable_kernel/)
xiabo's avatar
xiabo committed
29
target_link_libraries(Llama PUBLIC cudart
gaoqiong's avatar
gaoqiong committed
30
        gemm_s4_f16
Li Zhang's avatar
Li Zhang committed
31
32
33
34
        cublasMMWrapper
        DynamicDecodeLayer
        activation_kernels
        decoder_masked_multihead_attention
Li Zhang's avatar
Li Zhang committed
35
        decoder_multihead_attention
Li Zhang's avatar
Li Zhang committed
36
37
38
39
40
41
42
43
44
45
        bert_preprocess_kernels
        decoding_kernels
        unfused_attention_kernels
        custom_ar_kernels
        custom_ar_comm
        gpt_kernels
        tensor
        memory_utils
        nccl_utils
        cuda_utils
gaoqiong's avatar
gaoqiong committed
46
47
        logger
        gemm_multiB_int4)
xiabo's avatar
xiabo committed
48
#        llama_fmha)
Li Zhang's avatar
Li Zhang committed
49

q.yao's avatar
q.yao committed
50
if (NOT MSVC)
xiabo's avatar
xiabo committed
51
52
#        add_subdirectory(flash_attention2)
#        target_link_libraries(Llama PUBLIC flash_attention2)
q.yao's avatar
q.yao committed
53
54
endif()

Li Zhang's avatar
Li Zhang committed
55
add_executable(llama_gemm llama_gemm.cc)
56
target_link_libraries(llama_gemm PUBLIC -lrocblas cudart gpt_gemm_func memory_utils cuda_utils logger)
57
install(TARGETS llama_gemm DESTINATION ${CMAKE_SOURCE_DIR}/lmdeploy/bin)
Li Zhang's avatar
Li Zhang committed
58
59
60
61
62
63

find_package(Catch2 3 QUIET)
if (Catch2_FOUND)
        add_executable(test_cache_manager test_cache_manager.cc)
        target_link_libraries(test_cache_manager PRIVATE Llama Catch2::Catch2WithMain)
endif ()