Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
6b9a4bd5
Commit
6b9a4bd5
authored
Apr 23, 2024
by
Jun Liu
Browse files
Merge branch 'amd-develop-staging-0423' into amd-master
parents
56de337f
c5f1cdf7
Changes
364
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
232 additions
and
38 deletions
+232
-38
library/src/tensor_operation_instance/gpu/contraction_scale/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_knn_instance.cpp
...scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_knn_instance.cpp
+57
-0
library/src/tensor_operation_instance/gpu/contraction_scale/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_mkn_instance.cpp
...scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_mkn_instance.cpp
+57
-0
library/src/tensor_operation_instance/gpu/contraction_scale/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_mnn_instance.cpp
...scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_mnn_instance.cpp
+57
-0
library/src/tensor_operation_instance/gpu/contraction_scale/CMakeLists.txt
...r_operation_instance/gpu/contraction_scale/CMakeLists.txt
+45
-38
library/src/tensor_operation_instance/gpu/conv1d_bwd_data/CMakeLists.txt
...sor_operation_instance/gpu/conv1d_bwd_data/CMakeLists.txt
+1
-0
library/src/tensor_operation_instance/gpu/conv2d_bwd_data/CMakeLists.txt
...sor_operation_instance/gpu/conv2d_bwd_data/CMakeLists.txt
+1
-0
library/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeLists.txt
...c/tensor_operation_instance/gpu/conv2d_fwd/CMakeLists.txt
+1
-0
library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu/CMakeLists.txt
...peration_instance/gpu/conv2d_fwd_bias_relu/CMakeLists.txt
+1
-0
library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu_add/CMakeLists.txt
...tion_instance/gpu/conv2d_fwd_bias_relu_add/CMakeLists.txt
+1
-0
library/src/tensor_operation_instance/gpu/conv3d_bwd_data/CMakeLists.txt
...sor_operation_instance/gpu/conv3d_bwd_data/CMakeLists.txt
+1
-0
library/src/tensor_operation_instance/gpu/gemm_add/CMakeLists.txt
...src/tensor_operation_instance/gpu/gemm_add/CMakeLists.txt
+1
-0
library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/CMakeLists.txt
...eration_instance/gpu/gemm_add_add_fastgelu/CMakeLists.txt
+1
-0
library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/CMakeLists.txt
...r_operation_instance/gpu/gemm_add_fastgelu/CMakeLists.txt
+1
-0
library/src/tensor_operation_instance/gpu/gemm_add_multiply/CMakeLists.txt
...r_operation_instance/gpu/gemm_add_multiply/CMakeLists.txt
+1
-0
library/src/tensor_operation_instance/gpu/gemm_add_relu/CMakeLists.txt
...ensor_operation_instance/gpu/gemm_add_relu/CMakeLists.txt
+1
-0
library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/CMakeLists.txt
...n_instance/gpu/gemm_add_relu_add_layernorm/CMakeLists.txt
+1
-0
library/src/tensor_operation_instance/gpu/gemm_add_silu/CMakeLists.txt
...ensor_operation_instance/gpu/gemm_add_silu/CMakeLists.txt
+1
-0
library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeLists.txt
...peration_instance/gpu/gemm_bias_add_reduce/CMakeLists.txt
+1
-0
library/src/tensor_operation_instance/gpu/gemm_bilinear/CMakeLists.txt
...ensor_operation_instance/gpu/gemm_bilinear/CMakeLists.txt
+1
-0
library/src/tensor_operation_instance/gpu/gemm_fastgelu/CMakeLists.txt
...ensor_operation_instance/gpu/gemm_fastgelu/CMakeLists.txt
+1
-0
No files found.
library/src/tensor_operation_instance/gpu/contraction_scale/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_knn_instance.cpp
0 → 100644
View file @
6b9a4bd5
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
// This (ifndef) is a hack to use customized behavior for buffer load rather than using default
// setting Don't use this hack unless absolutely necessary!
// FIXME: make the behavior of buffer load a configurable (template) parameter of each device op
#define CK_EXPERIMENTAL_USE_BUFFER_LOAD_OOB_CHECK_OFFSET_TRICK 1
#include <cstdlib>
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/device_contraction_multiple_d.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/tensor_operation_instance/gpu/contraction/device_contraction_instance.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
// A[m0, m1, k0, k1] * B[n0, n1, k0, k1] + D[m0, m1, n0, n1] = E[m0, m1, n0, n1]
// k/n/n/n are the fast changing dimension for A/B/D/E
using
device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_knn_instance
=
device_contraction_f64_kn_instance
<
F64
,
F64
,
F64
,
F64
,
Empty_Tuple
,
F64
,
F64
,
PassThrough
,
PassThrough
,
Scale
,
6
>
;
void
add_device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_knn_instance
(
std
::
vector
<
std
::
unique_ptr
<
DeviceContractionMultipleD
<
6
,
6
,
6
,
F64
,
F64
,
Empty_Tuple
,
F64
,
PassThrough
,
PassThrough
,
Scale
,
F64
>>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_knn_instance
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/contraction_scale/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_mkn_instance.cpp
0 → 100644
View file @
6b9a4bd5
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
// This (ifndef) is a hack to use customized behavior for buffer load rather than using default
// setting Don't use this hack unless absolutely necessary!
// FIXME: make the behavior of buffer load a configurable (template) parameter of each device op
#define CK_EXPERIMENTAL_USE_BUFFER_LOAD_OOB_CHECK_OFFSET_TRICK 1
#include <cstdlib>
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/device_contraction_multiple_d.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/tensor_operation_instance/gpu/contraction/device_contraction_instance.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
// A[m0, m1, k0, k1] * B[n0, n1, k0, k1] + D[m0, m1, n0, n1] = E[m0, m1, n0, n1]
// m/k/n/n are the fast changing dimension for A/B/D/E
using
device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_mkn_instance
=
device_contraction_f64_mk_instance
<
F64
,
F64
,
F64
,
F64
,
Empty_Tuple
,
F64
,
F64
,
PassThrough
,
PassThrough
,
Scale
,
6
>
;
void
add_device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_mkn_instance
(
std
::
vector
<
std
::
unique_ptr
<
DeviceContractionMultipleD
<
6
,
6
,
6
,
F64
,
F64
,
Empty_Tuple
,
F64
,
PassThrough
,
PassThrough
,
Scale
,
F64
>>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_mkn_instance
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/contraction_scale/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_mnn_instance.cpp
0 → 100644
View file @
6b9a4bd5
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
// This (ifndef) is a hack to use customized behavior for buffer load rather than using default
// setting Don't use this hack unless absolutely necessary!
// FIXME: make the behavior of buffer load a configurable (template) parameter of each device op
#define CK_EXPERIMENTAL_USE_BUFFER_LOAD_OOB_CHECK_OFFSET_TRICK 1
#include <cstdlib>
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/device_contraction_multiple_d.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/tensor_operation_instance/gpu/contraction/device_contraction_instance.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
// A[m0, m1, k0, k1] * B[n0, n1, k0, k1] + D[m0, m1, n0, n1] = E[m0, m1, n0, n1]
// m/n/n/n are the fast changing dimension for A/B/D/E
using
device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_mnn_instance
=
device_contraction_f64_mn_instance
<
F64
,
F64
,
F64
,
F64
,
Empty_Tuple
,
F64
,
F64
,
PassThrough
,
PassThrough
,
Scale
,
6
>
;
void
add_device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_mnn_instance
(
std
::
vector
<
std
::
unique_ptr
<
DeviceContractionMultipleD
<
6
,
6
,
6
,
F64
,
F64
,
Empty_Tuple
,
F64
,
PassThrough
,
PassThrough
,
Scale
,
F64
>>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_mnn_instance
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/contraction_scale/CMakeLists.txt
View file @
6b9a4bd5
# ONLY XDL_KERNELS
set
(
DEVICE_CONTRACTION_SCALE_INSTANCES
)
set
(
DEVICE_CONTRACTION_SCALE_INSTANCES
)
# FP32
list
(
APPEND DIMS 2 6
)
list
(
APPEND DEVICE_CONTRACTION_SCALE_INSTANCES device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_kkn_instance.cpp
device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_knn_instance.cpp
foreach
(
idx IN LISTS DIMS
)
device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_mkn_instance.cpp
set
(
PREFIX
${
idx
}
D/device_contraction_scale_m
${
idx
}
_n
${
idx
}
_k
${
idx
}
)
device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_mnn_instance.cpp
)
# FP32
list
(
APPEND DEVICE_CONTRACTION_SCALE_INSTANCES device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_kkn_instance.cpp
list
(
APPEND DEVICE_CONTRACTION_SCALE_INSTANCES
${
PREFIX
}
_xdl_c_shuffle_f32_f32_f32_kkn_instance.cpp
device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_knn_instance.cpp
${
PREFIX
}
_xdl_c_shuffle_f32_f32_f32_knn_instance.cpp
device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_mkn_instance.cpp
${
PREFIX
}
_xdl_c_shuffle_f32_f32_f32_mkn_instance.cpp
device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_mnn_instance.cpp
)
${
PREFIX
}
_xdl_c_shuffle_f32_f32_f32_mnn_instance.cpp
)
list
(
APPEND DEVICE_CONTRACTION_SCALE_INSTANCES device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_kkn_instance.cpp
list
(
APPEND DEVICE_CONTRACTION_SCALE_INSTANCES
${
PREFIX
}
_xdl_c_shuffle_f32_f32_f32_compute_f16_kkn_instance.cpp
device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_knn_instance.cpp
${
PREFIX
}
_xdl_c_shuffle_f32_f32_f32_compute_f16_knn_instance.cpp
device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_mkn_instance.cpp
${
PREFIX
}
_xdl_c_shuffle_f32_f32_f32_compute_f16_mkn_instance.cpp
device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_mnn_instance.cpp
)
${
PREFIX
}
_xdl_c_shuffle_f32_f32_f32_compute_f16_mnn_instance.cpp
)
# FP64
list
(
APPEND DEVICE_CONTRACTION_SCALE_INSTANCES
${
PREFIX
}
_xdl_c_shuffle_f32_f32_f32_compute_bf16_kkn_instance.cpp
list
(
APPEND DEVICE_CONTRACTION_SCALE_INSTANCES device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_kkn_instance.cpp
${
PREFIX
}
_xdl_c_shuffle_f32_f32_f32_compute_bf16_knn_instance.cpp
device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_knn_instance.cpp
${
PREFIX
}
_xdl_c_shuffle_f32_f32_f32_compute_bf16_mkn_instance.cpp
device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_mkn_instance.cpp
${
PREFIX
}
_xdl_c_shuffle_f32_f32_f32_compute_bf16_mnn_instance.cpp
)
device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_mnn_instance.cpp
)
# FP64
list
(
APPEND DEVICE_CONTRACTION_SCALE_INSTANCES device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_kkn_instance.cpp
list
(
APPEND DEVICE_CONTRACTION_SCALE_INSTANCES
${
PREFIX
}
_xdl_c_shuffle_f64_f64_f64_kkn_instance.cpp
device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_knn_instance.cpp
${
PREFIX
}
_xdl_c_shuffle_f64_f64_f64_knn_instance.cpp
device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_mkn_instance.cpp
${
PREFIX
}
_xdl_c_shuffle_f64_f64_f64_mkn_instance.cpp
device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_mnn_instance.cpp
)
${
PREFIX
}
_xdl_c_shuffle_f64_f64_f64_mnn_instance.cpp
)
# FP16
list
(
APPEND DEVICE_CONTRACTION_SCALE_INSTANCES
${
PREFIX
}
_xdl_c_shuffle_f64_f64_f64_compute_f32_kkn_instance.cpp
list
(
APPEND DEVICE_CONTRACTION_SCALE_INSTANCES device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_kkn_instance.cpp
${
PREFIX
}
_xdl_c_shuffle_f64_f64_f64_compute_f32_knn_instance.cpp
device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_knn_instance.cpp
${
PREFIX
}
_xdl_c_shuffle_f64_f64_f64_compute_f32_mkn_instance.cpp
device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_mkn_instance.cpp
${
PREFIX
}
_xdl_c_shuffle_f64_f64_f64_compute_f32_mnn_instance.cpp
)
device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_mnn_instance.cpp
)
# FP16
# BF16
list
(
APPEND DEVICE_CONTRACTION_SCALE_INSTANCES
${
PREFIX
}
_xdl_c_shuffle_f16_f16_f16_compute_f32_kkn_instance.cpp
list
(
APPEND DEVICE_CONTRACTION_SCALE_INSTANCES device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_kkn_instance.cpp
${
PREFIX
}
_xdl_c_shuffle_f16_f16_f16_compute_f32_knn_instance.cpp
device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_knn_instance.cpp
${
PREFIX
}
_xdl_c_shuffle_f16_f16_f16_compute_f32_mkn_instance.cpp
device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_mkn_instance.cpp
${
PREFIX
}
_xdl_c_shuffle_f16_f16_f16_compute_f32_mnn_instance.cpp
)
device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_mnn_instance.cpp
)
# BF16
list
(
APPEND DEVICE_CONTRACTION_SCALE_INSTANCES
${
PREFIX
}
_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_kkn_instance.cpp
${
PREFIX
}
_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_knn_instance.cpp
${
PREFIX
}
_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_mkn_instance.cpp
${
PREFIX
}
_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_mnn_instance.cpp
)
endforeach
()
add_instance_library
(
device_contraction_scale_instance
${
DEVICE_CONTRACTION_SCALE_INSTANCES
}
)
add_instance_library
(
device_contraction_scale_instance
${
DEVICE_CONTRACTION_SCALE_INSTANCES
}
)
library/src/tensor_operation_instance/gpu/conv1d_bwd_data/CMakeLists.txt
View file @
6b9a4bd5
# ONLY XDL_KERNELS
add_instance_library
(
device_conv1d_bwd_data_instance
add_instance_library
(
device_conv1d_bwd_data_instance
device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f16_instance.cpp
device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f16_instance.cpp
device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f32_instance.cpp
device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f32_instance.cpp
...
...
library/src/tensor_operation_instance/gpu/conv2d_bwd_data/CMakeLists.txt
View file @
6b9a4bd5
# ONLY XDL_AND_DL_KERNELS
set
(
CONV2D_BWD_DATA_INSTANCES
)
set
(
CONV2D_BWD_DATA_INSTANCES
)
list
(
APPEND CONV2D_BWD_DATA_INSTANCES device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f32_instance.cpp
list
(
APPEND CONV2D_BWD_DATA_INSTANCES device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f32_instance.cpp
device_conv2d_bwd_data_dl_nhwc_kyxc_nhwk_f32_instance.cpp
device_conv2d_bwd_data_dl_nhwc_kyxc_nhwk_f32_instance.cpp
...
...
library/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeLists.txt
View file @
6b9a4bd5
# ONLY XDL_KERNELS
set
(
DEVICE_CONV2D_FWD_INSTANCES
)
set
(
DEVICE_CONV2D_FWD_INSTANCES
)
list
(
APPEND DEVICE_CONV2D_FWD_INSTANCES device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instance.cpp
list
(
APPEND DEVICE_CONV2D_FWD_INSTANCES device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instance.cpp
device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instance.cpp
device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instance.cpp
...
...
library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu/CMakeLists.txt
View file @
6b9a4bd5
# ONLY XDL_KERNELS
add_instance_library
(
device_conv2d_fwd_bias_relu_instance
add_instance_library
(
device_conv2d_fwd_bias_relu_instance
device_conv2d_fwd_xdl_c_shuffle_bias_relu_nhwc_kyxc_nhwk_f16_instance.cpp
device_conv2d_fwd_xdl_c_shuffle_bias_relu_nhwc_kyxc_nhwk_f16_instance.cpp
)
)
library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu_add/CMakeLists.txt
View file @
6b9a4bd5
# ONLY XDL_KERNELS
add_instance_library
(
device_conv2d_fwd_bias_relu_add_instance
add_instance_library
(
device_conv2d_fwd_bias_relu_add_instance
device_conv2d_fwd_xdl_c_shuffle_bias_relu_add_nhwc_kyxc_nhwk_f16_instance.cpp
device_conv2d_fwd_xdl_c_shuffle_bias_relu_add_nhwc_kyxc_nhwk_f16_instance.cpp
)
)
...
...
library/src/tensor_operation_instance/gpu/conv3d_bwd_data/CMakeLists.txt
View file @
6b9a4bd5
# ONLY XDL_KERNELS
add_instance_library
(
device_conv3d_bwd_data_instance
add_instance_library
(
device_conv3d_bwd_data_instance
device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f16_instance.cpp
device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f16_instance.cpp
device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f32_instance.cpp
device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f32_instance.cpp
...
...
library/src/tensor_operation_instance/gpu/gemm_add/CMakeLists.txt
View file @
6b9a4bd5
# ONLY XDL_KERNELS
add_instance_library
(
device_gemm_add_instance
add_instance_library
(
device_gemm_add_instance
device_gemm_add_xdl_c_shuffle_f16_i8_f16_f16_mk_kn_mn_mn_instance.cpp
device_gemm_add_xdl_c_shuffle_f16_i8_f16_f16_mk_kn_mn_mn_instance.cpp
device_gemm_add_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp
device_gemm_add_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp
...
...
library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/CMakeLists.txt
View file @
6b9a4bd5
# ONLY XDL_KERNELS
add_instance_library
(
device_gemm_add_add_fastgelu_instance
add_instance_library
(
device_gemm_add_add_fastgelu_instance
device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_km_kn_mn_mn_mn_instance.cpp
device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_km_kn_mn_mn_mn_instance.cpp
device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_km_nk_mn_mn_mn_instance.cpp
device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_km_nk_mn_mn_mn_instance.cpp
...
...
library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/CMakeLists.txt
View file @
6b9a4bd5
# ONLY XDL_KERNELS
add_instance_library
(
device_gemm_add_fastgelu_instance
add_instance_library
(
device_gemm_add_fastgelu_instance
device_gemm_add_fastgelu_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp
device_gemm_add_fastgelu_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp
device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_km_kn_mn_mn_instance.cpp
device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_km_kn_mn_mn_instance.cpp
...
...
library/src/tensor_operation_instance/gpu/gemm_add_multiply/CMakeLists.txt
View file @
6b9a4bd5
# ONLY XDL_KERNELS
add_instance_library
(
device_gemm_add_multiply_instance
add_instance_library
(
device_gemm_add_multiply_instance
device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_km_kn_mn_mn_mn_instance.cpp
device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_km_kn_mn_mn_mn_instance.cpp
device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_km_nk_mn_mn_mn_instance.cpp
device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_km_nk_mn_mn_mn_instance.cpp
...
...
library/src/tensor_operation_instance/gpu/gemm_add_relu/CMakeLists.txt
View file @
6b9a4bd5
# ONLY XDL_KERNELS
add_instance_library
(
device_gemm_add_relu_instance
add_instance_library
(
device_gemm_add_relu_instance
device_gemm_add_relu_xdl_c_shuffle_f16_i8_f16_f16_mk_kn_mn_mn_instance.cpp
device_gemm_add_relu_xdl_c_shuffle_f16_i8_f16_f16_mk_kn_mn_mn_instance.cpp
device_gemm_add_relu_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp
device_gemm_add_relu_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp
...
...
library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/CMakeLists.txt
View file @
6b9a4bd5
# ONLY XDL_KERNELS
add_instance_library
(
device_gemm_add_relu_add_layernorm_instance
add_instance_library
(
device_gemm_add_relu_add_layernorm_instance
device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_km_kn_mn_mn_mn_instance.cpp
device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_km_kn_mn_mn_mn_instance.cpp
device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_km_nk_mn_mn_mn_instance.cpp
device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_km_nk_mn_mn_mn_instance.cpp
...
...
library/src/tensor_operation_instance/gpu/gemm_add_silu/CMakeLists.txt
View file @
6b9a4bd5
# ONLY XDL_KERNELS
add_instance_library
(
device_gemm_add_silu_instance
add_instance_library
(
device_gemm_add_silu_instance
device_gemm_add_silu_xdl_c_shuffle_f16_i8_f16_f16_mk_kn_mn_mn_instance.cpp
device_gemm_add_silu_xdl_c_shuffle_f16_i8_f16_f16_mk_kn_mn_mn_instance.cpp
device_gemm_add_silu_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp
device_gemm_add_silu_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp
...
...
library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeLists.txt
View file @
6b9a4bd5
# ONLY XDL_KERNELS
add_instance_library
(
device_gemm_bias_add_reduce_instance
add_instance_library
(
device_gemm_bias_add_reduce_instance
device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp
device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp
device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp
device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp
...
...
library/src/tensor_operation_instance/gpu/gemm_bilinear/CMakeLists.txt
View file @
6b9a4bd5
# ONLY XDL_AND_WMMA_KERNELS
add_instance_library
(
device_gemm_bilinear_instance
add_instance_library
(
device_gemm_bilinear_instance
device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_km_kn_mn_mn_instance.cpp
device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_km_kn_mn_mn_instance.cpp
device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_km_nk_mn_mn_instance.cpp
device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_km_nk_mn_mn_instance.cpp
...
...
library/src/tensor_operation_instance/gpu/gemm_fastgelu/CMakeLists.txt
View file @
6b9a4bd5
# ONLY XDL_KERNELS
add_instance_library
(
device_gemm_fastgelu_instance
add_instance_library
(
device_gemm_fastgelu_instance
device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instance.cpp
device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instance.cpp
device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instance.cpp
device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instance.cpp
...
...
Prev
1
…
9
10
11
12
13
14
15
16
17
…
19
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment