CMakeLists.txt 6.13 KB
Newer Older
1
2
3
4
5
# ckProfiler
set(PROFILER_SOURCES
    profiler.cpp
    profile_gemm.cpp
    profile_gemm_splitk.cpp
Jing Zhang's avatar
Jing Zhang committed
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
    #profile_gemm_bias_add_reduce.cpp
    #profile_gemm_add_multiply.cpp
    #profile_gemm_multiply_add.cpp
    #profile_gemm_reduce.cpp
    #profile_batched_gemm.cpp
    #profile_batched_gemm_reduce.cpp
    #profile_conv_fwd.cpp
    #profile_conv_fwd_bias_relu.cpp
    #profile_conv_fwd_bias_relu_add.cpp
    #profile_conv_bwd_data.cpp
    #profile_grouped_conv_fwd.cpp
    #profile_grouped_conv_bwd_weight.cpp
    #profile_reduce.cpp
    #profile_groupnorm.cpp
    #profile_layernorm.cpp
    #profile_max_pool3d_fwd.cpp
    #profile_avg_pool3d_bwd.cpp
    #profile_max_pool3d_bwd.cpp
    #profile_softmax.cpp
    #profile_batchnorm_fwd.cpp
    #profile_batchnorm_bwd.cpp
    #profile_batchnorm_infer.cpp
    #profile_grouped_conv_bwd_data.cpp
    #profile_conv_tensor_rearrange.cpp
30
)
Jing Zhang's avatar
Jing Zhang committed
31
32
33
34
35
36
37
38
39
40
41
42
#if(DL_KERNELS)
  #list(APPEND PROFILER_SOURCES profile_batched_gemm_multi_d.cpp)
#endif()
#if(DTYPES MATCHES "fp16" OR NOT DEFINED DTYPES)
  #list(APPEND PROFILER_SOURCES profile_batched_gemm_gemm.cpp)
  #list(APPEND PROFILER_SOURCES profile_gemm_fastgelu.cpp)
  #list(APPEND PROFILER_SOURCES profile_gemm_streamk.cpp)
  #list(APPEND PROFILER_SOURCES profile_gemm_bilinear.cpp)
  #list(APPEND PROFILER_SOURCES profile_gemm_add_fastgelu.cpp)
  #list(APPEND PROFILER_SOURCES profile_gemm_add_add_fastgelu.cpp)
  #list(APPEND PROFILER_SOURCES profile_gemm_add_relu_add_layernorm.cpp)
  #list(APPEND PROFILER_SOURCES profile_batched_gemm_add_relu_gemm_add.cpp)
root's avatar
root committed
43
  list(APPEND PROFILER_SOURCES profile_grouped_gemm.cpp)
Jing Zhang's avatar
Jing Zhang committed
44
45
  #list(APPEND PROFILER_SOURCES profile_grouped_gemm_fastgelu.cpp)
#endif()
46

Jing Zhang's avatar
Jing Zhang committed
47
48
49
50
#if(DTYPES MATCHES "fp32" OR DTYPES MATCHES "fp64" OR NOT DEFINED DTYPES)
  #list(APPEND PROFILER_SOURCES profile_contraction_bilinear.cpp)
  #list(APPEND PROFILER_SOURCES profile_contraction_scale.cpp)
#endif()
51

52
53
54
55
56
57
58
59
set(PROFILER_EXECUTABLE ckProfiler)

add_executable(${PROFILER_EXECUTABLE} ${PROFILER_SOURCES})
target_compile_options(${PROFILER_EXECUTABLE} PRIVATE -Wno-global-constructors)

target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE utility)
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_instance)
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_splitk_instance)
Jing Zhang's avatar
Jing Zhang committed
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_add_multiply_instance)
#target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_multiply_add_instance)
#target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_reduce_instance)
#target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_bias_add_reduce_instance)
#target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_batched_gemm_instance)
#target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_batched_gemm_reduce_instance)
#target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_conv2d_fwd_instance)
#target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_grouped_conv1d_fwd_instance)
#target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_grouped_conv2d_fwd_instance)
#target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_grouped_conv3d_fwd_instance)
#target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_conv1d_bwd_data_instance)
#target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_conv2d_bwd_data_instance)
#target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_conv3d_bwd_data_instance)
#target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_grouped_conv1d_bwd_weight_instance)
#target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_grouped_conv2d_bwd_weight_instance)
#target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_grouped_conv3d_bwd_weight_instance)
#target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_conv2d_fwd_bias_relu_instance)
#target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_conv2d_fwd_bias_relu_add_instance)
#target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_normalization_instance)
#target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_softmax_instance)
#target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_reduce_instance)
#target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_batchnorm_instance)
#target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_pool3d_fwd_instance)
#target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_avg_pool3d_bwd_instance)
#target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_max_pool_bwd_instance)
#target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_grouped_conv2d_bwd_data_instance)
#target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_grouped_conv3d_bwd_data_instance)
#target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_image_to_column_instance)
#target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_column_to_image_instance)
89

Jing Zhang's avatar
Jing Zhang committed
90
91
92
93
#if(DTYPES MATCHES "fp32" OR DTYPES MATCHES "fp64" OR NOT DEFINED DTYPES)
  #target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_contraction_bilinear_instance)
  #target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_contraction_scale_instance)
#endif()
94
95
96



Jing Zhang's avatar
Jing Zhang committed
97
98
99
#if(DL_KERNELS)
  #target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_batched_gemm_multi_d_instance)
#endif()
100

Jing Zhang's avatar
Jing Zhang committed
101
102
103
104
105
106
107
108
109
#if(DTYPES MATCHES "fp16" OR NOT DEFINED DTYPES)
  #target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_add_fastgelu_instance)
  #target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_add_relu_add_layernorm_instance)
  #target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_bilinear_instance)
  #target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_add_add_fastgelu_instance)
  #target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_streamk_instance)
  #target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_fastgelu_instance)
  #target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_batched_gemm_gemm_instance)
  #target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_batched_gemm_add_relu_gemm_add_instance)
root's avatar
root committed
110
  target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_grouped_gemm_instance)
Jing Zhang's avatar
Jing Zhang committed
111
112
  #target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_grouped_gemm_fastgelu_instance)
#endif()
113
rocm_install(TARGETS ${PROFILER_EXECUTABLE} COMPONENT profiler)