CMakeLists.txt 1.65 KB
Newer Older
Li Zhang's avatar
Li Zhang committed
1
2
3
4
5
6
# Copyright (c) OpenMMLab. All rights reserved.

cmake_minimum_required(VERSION 3.8)

add_subdirectory(fused_multi_head_attention)

Chen Xin's avatar
Chen Xin committed
7
8
find_package(CUDAToolkit REQUIRED)

AllentDan's avatar
AllentDan committed
9
add_library(Llama STATIC
Li Zhang's avatar
Li Zhang committed
10
11
        LlamaV2.cc
        LlamaBatch.cc
Li Zhang's avatar
Li Zhang committed
12
13
        BlockManager.cc
        SequenceManager.cc
Li Zhang's avatar
Li Zhang committed
14
15
16
        LlamaWeight.cc
        LlamaDecoderLayerWeight.cc
        LlamaFfnLayer.cc
17
18
        unified_decoder.cc
        unified_attention_layer.cc
Li Zhang's avatar
Li Zhang committed
19
20
21
22
23
        llama_kernels.cu
        llama_decoder_kernels.cu
        llama_utils.cu)
set_property(TARGET Llama PROPERTY POSITION_INDEPENDENT_CODE  ON)
set_property(TARGET Llama PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS  ON)
Chen Xin's avatar
Chen Xin committed
24
target_link_libraries(Llama PUBLIC CUDA::cudart
25
        gemm_s4_f16
Li Zhang's avatar
Li Zhang committed
26
27
28
29
        cublasMMWrapper
        DynamicDecodeLayer
        activation_kernels
        decoder_masked_multihead_attention
Li Zhang's avatar
Li Zhang committed
30
        decoder_multihead_attention
Li Zhang's avatar
Li Zhang committed
31
32
33
34
35
36
37
38
39
40
41
        bert_preprocess_kernels
        decoding_kernels
        unfused_attention_kernels
        custom_ar_kernels
        custom_ar_comm
        gpt_kernels
        tensor
        memory_utils
        nccl_utils
        cuda_utils
        logger
Li Zhang's avatar
Li Zhang committed
42
43
        llama_fmha)

q.yao's avatar
q.yao committed
44
45
46
47
48
if (NOT MSVC)
        add_subdirectory(flash_attention2)
        target_link_libraries(Llama PUBLIC flash_attention2)
endif()

Li Zhang's avatar
Li Zhang committed
49
add_executable(llama_gemm llama_gemm.cc)
Chen Xin's avatar
Chen Xin committed
50
target_link_libraries(llama_gemm PUBLIC CUDA::cudart gpt_gemm_func memory_utils cuda_utils logger)
Li Zhang's avatar
Li Zhang committed
51

52
install(TARGETS llama_gemm DESTINATION ${CMAKE_SOURCE_DIR}/lmdeploy/bin)
Li Zhang's avatar
Li Zhang committed
53
54
55
56
57
58

find_package(Catch2 3 QUIET)
if (Catch2_FOUND)
        add_executable(test_cache_manager test_cache_manager.cc)
        target_link_libraries(test_cache_manager PRIVATE Llama Catch2::Catch2WithMain)
endif ()